blob: 39f1a8bc17d467e5a3183ad8e6e6387766dea19f [file] [log] [blame]
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00001/* Bytes object implementation */
2
3/* XXX TO DO: optimizations */
4
5#define PY_SSIZE_T_CLEAN
6#include "Python.h"
Guido van Rossuma0867f72006-05-05 04:34:18 +00007#include "structmember.h"
Gregory P. Smith60d241f2007-10-16 06:31:30 +00008#include "bytes_methods.h"
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00009
Neal Norwitz6968b052007-02-27 19:02:19 +000010static PyBytesObject *nullbytes = NULL;
11
12void
13PyBytes_Fini(void)
14{
15 Py_CLEAR(nullbytes);
16}
17
18int
19PyBytes_Init(void)
20{
21 nullbytes = PyObject_New(PyBytesObject, &PyBytes_Type);
22 if (nullbytes == NULL)
23 return 0;
24 nullbytes->ob_bytes = NULL;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +000025 Py_Size(nullbytes) = nullbytes->ob_alloc = 0;
Travis E. Oliphantb99f7622007-08-18 11:21:56 +000026 nullbytes->ob_exports = 0;
Neal Norwitz6968b052007-02-27 19:02:19 +000027 return 1;
28}
29
30/* end nullbytes support */
31
Guido van Rossumad7d8d12007-04-13 01:39:34 +000032/* Helpers */
33
34static int
35_getbytevalue(PyObject* arg, int *value)
Neal Norwitz6968b052007-02-27 19:02:19 +000036{
Gregory P. Smith60d241f2007-10-16 06:31:30 +000037 long face_value;
38
39 if (PyInt_Check(arg)) {
40 face_value = PyInt_AsLong(arg);
41 if (face_value < 0 || face_value >= 256) {
42 PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
43 return 0;
44 }
45 } else {
46 PyErr_Format(PyExc_TypeError, "an integer is required");
Neal Norwitz6968b052007-02-27 19:02:19 +000047 return 0;
48 }
Gregory P. Smith60d241f2007-10-16 06:31:30 +000049
50 *value = face_value;
Neal Norwitz6968b052007-02-27 19:02:19 +000051 return 1;
52}
53
Travis E. Oliphantb99f7622007-08-18 11:21:56 +000054static int
Travis E. Oliphant8ae62b62007-09-23 02:00:13 +000055bytes_getbuffer(PyBytesObject *obj, Py_buffer *view, int flags)
Guido van Rossum75d38e92007-08-24 17:33:11 +000056{
Travis E. Oliphantb99f7622007-08-18 11:21:56 +000057 int ret;
58 void *ptr;
59 if (view == NULL) {
60 obj->ob_exports++;
61 return 0;
62 }
Guido van Rossum75d38e92007-08-24 17:33:11 +000063 if (obj->ob_bytes == NULL)
Travis E. Oliphantb99f7622007-08-18 11:21:56 +000064 ptr = "";
65 else
66 ptr = obj->ob_bytes;
67 ret = PyBuffer_FillInfo(view, ptr, Py_Size(obj), 0, flags);
68 if (ret >= 0) {
69 obj->ob_exports++;
70 }
71 return ret;
72}
73
74static void
Travis E. Oliphant8ae62b62007-09-23 02:00:13 +000075bytes_releasebuffer(PyBytesObject *obj, Py_buffer *view)
Travis E. Oliphantb99f7622007-08-18 11:21:56 +000076{
77 obj->ob_exports--;
78}
79
Neal Norwitz2bad9702007-08-27 06:19:22 +000080static Py_ssize_t
Travis E. Oliphant8ae62b62007-09-23 02:00:13 +000081_getbuffer(PyObject *obj, Py_buffer *view)
Guido van Rossumad7d8d12007-04-13 01:39:34 +000082{
Martin v. Löwis9f2e3462007-07-21 17:22:18 +000083 PyBufferProcs *buffer = Py_Type(obj)->tp_as_buffer;
Guido van Rossumad7d8d12007-04-13 01:39:34 +000084
Gregory P. Smith60d241f2007-10-16 06:31:30 +000085 if (buffer == NULL || buffer->bf_getbuffer == NULL)
Guido van Rossuma74184e2007-08-29 04:05:57 +000086 {
87 PyErr_Format(PyExc_TypeError,
88 "Type %.100s doesn't support the buffer API",
89 Py_Type(obj)->tp_name);
90 return -1;
91 }
Guido van Rossumad7d8d12007-04-13 01:39:34 +000092
Travis E. Oliphantb99f7622007-08-18 11:21:56 +000093 if (buffer->bf_getbuffer(obj, view, PyBUF_SIMPLE) < 0)
94 return -1;
95 return view->len;
Guido van Rossumad7d8d12007-04-13 01:39:34 +000096}
97
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000098/* Direct API functions */
99
100PyObject *
Guido van Rossumd624f182006-04-24 13:47:05 +0000101PyBytes_FromObject(PyObject *input)
102{
103 return PyObject_CallFunctionObjArgs((PyObject *)&PyBytes_Type,
104 input, NULL);
105}
106
107PyObject *
108PyBytes_FromStringAndSize(const char *bytes, Py_ssize_t size)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000109{
110 PyBytesObject *new;
Neal Norwitz61ec0d32007-10-26 06:44:10 +0000111 Py_ssize_t alloc;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000112
Guido van Rossumd624f182006-04-24 13:47:05 +0000113 assert(size >= 0);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000114
115 new = PyObject_New(PyBytesObject, &PyBytes_Type);
116 if (new == NULL)
Guido van Rossumd624f182006-04-24 13:47:05 +0000117 return NULL;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000118
Guido van Rossumf15a29f2007-05-04 00:41:39 +0000119 if (size == 0) {
Guido van Rossumd624f182006-04-24 13:47:05 +0000120 new->ob_bytes = NULL;
Guido van Rossumf15a29f2007-05-04 00:41:39 +0000121 alloc = 0;
122 }
Guido van Rossumd624f182006-04-24 13:47:05 +0000123 else {
Guido van Rossumf15a29f2007-05-04 00:41:39 +0000124 alloc = size + 1;
125 new->ob_bytes = PyMem_Malloc(alloc);
Guido van Rossumd624f182006-04-24 13:47:05 +0000126 if (new->ob_bytes == NULL) {
127 Py_DECREF(new);
Neal Norwitz16596dd2007-08-30 05:44:54 +0000128 return PyErr_NoMemory();
Guido van Rossumd624f182006-04-24 13:47:05 +0000129 }
130 if (bytes != NULL)
131 memcpy(new->ob_bytes, bytes, size);
Guido van Rossumf15a29f2007-05-04 00:41:39 +0000132 new->ob_bytes[size] = '\0'; /* Trailing null byte */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000133 }
Martin v. Löwis5d7428b2007-07-21 18:47:48 +0000134 Py_Size(new) = size;
Guido van Rossumf15a29f2007-05-04 00:41:39 +0000135 new->ob_alloc = alloc;
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000136 new->ob_exports = 0;
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +0000137
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000138 return (PyObject *)new;
139}
140
141Py_ssize_t
142PyBytes_Size(PyObject *self)
143{
144 assert(self != NULL);
145 assert(PyBytes_Check(self));
146
Guido van Rossum20188312006-05-05 15:15:40 +0000147 return PyBytes_GET_SIZE(self);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000148}
149
150char *
151PyBytes_AsString(PyObject *self)
152{
153 assert(self != NULL);
154 assert(PyBytes_Check(self));
155
Guido van Rossum20188312006-05-05 15:15:40 +0000156 return PyBytes_AS_STRING(self);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000157}
158
159int
160PyBytes_Resize(PyObject *self, Py_ssize_t size)
161{
162 void *sval;
Guido van Rossuma0867f72006-05-05 04:34:18 +0000163 Py_ssize_t alloc = ((PyBytesObject *)self)->ob_alloc;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000164
165 assert(self != NULL);
166 assert(PyBytes_Check(self));
167 assert(size >= 0);
168
Guido van Rossuma0867f72006-05-05 04:34:18 +0000169 if (size < alloc / 2) {
170 /* Major downsize; resize down to exact size */
Guido van Rossum6c1e6742007-05-04 04:27:16 +0000171 alloc = size + 1;
Guido van Rossuma0867f72006-05-05 04:34:18 +0000172 }
Guido van Rossumf15a29f2007-05-04 00:41:39 +0000173 else if (size < alloc) {
Guido van Rossuma0867f72006-05-05 04:34:18 +0000174 /* Within allocated size; quick exit */
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000175 Py_Size(self) = size;
Guido van Rossuma74184e2007-08-29 04:05:57 +0000176 ((PyBytesObject *)self)->ob_bytes[size] = '\0'; /* Trailing null */
Guido van Rossuma0867f72006-05-05 04:34:18 +0000177 return 0;
178 }
179 else if (size <= alloc * 1.125) {
180 /* Moderate upsize; overallocate similar to list_resize() */
181 alloc = size + (size >> 3) + (size < 9 ? 3 : 6);
182 }
183 else {
184 /* Major upsize; resize up to exact size */
Guido van Rossumf15a29f2007-05-04 00:41:39 +0000185 alloc = size + 1;
Guido van Rossum6c1e6742007-05-04 04:27:16 +0000186 }
Guido van Rossuma0867f72006-05-05 04:34:18 +0000187
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000188 if (((PyBytesObject *)self)->ob_exports > 0) {
189 /*
Guido van Rossuma74184e2007-08-29 04:05:57 +0000190 fprintf(stderr, "%d: %s", ((PyBytesObject *)self)->ob_exports,
191 ((PyBytesObject *)self)->ob_bytes);
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000192 */
193 PyErr_SetString(PyExc_BufferError,
Guido van Rossuma74184e2007-08-29 04:05:57 +0000194 "Existing exports of data: object cannot be re-sized");
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000195 return -1;
196 }
197
Guido van Rossuma0867f72006-05-05 04:34:18 +0000198 sval = PyMem_Realloc(((PyBytesObject *)self)->ob_bytes, alloc);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000199 if (sval == NULL) {
Guido van Rossumd624f182006-04-24 13:47:05 +0000200 PyErr_NoMemory();
201 return -1;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000202 }
203
Guido van Rossumd624f182006-04-24 13:47:05 +0000204 ((PyBytesObject *)self)->ob_bytes = sval;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000205 Py_Size(self) = size;
Guido van Rossuma0867f72006-05-05 04:34:18 +0000206 ((PyBytesObject *)self)->ob_alloc = alloc;
Guido van Rossumf15a29f2007-05-04 00:41:39 +0000207 ((PyBytesObject *)self)->ob_bytes[size] = '\0'; /* Trailing null byte */
208
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000209 return 0;
210}
211
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000212PyObject *
213PyBytes_Concat(PyObject *a, PyObject *b)
214{
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000215 Py_ssize_t size;
Travis E. Oliphant8ae62b62007-09-23 02:00:13 +0000216 Py_buffer va, vb;
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000217 PyBytesObject *result;
218
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000219 va.len = -1;
220 vb.len = -1;
221 if (_getbuffer(a, &va) < 0 ||
222 _getbuffer(b, &vb) < 0) {
Guido van Rossum75d38e92007-08-24 17:33:11 +0000223 if (va.len != -1)
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000224 PyObject_ReleaseBuffer(a, &va);
225 if (vb.len != -1)
226 PyObject_ReleaseBuffer(b, &vb);
227 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
228 Py_Type(a)->tp_name, Py_Type(b)->tp_name);
229 return NULL;
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000230 }
231
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000232 size = va.len + vb.len;
233 if (size < 0) {
234 PyObject_ReleaseBuffer(a, &va);
235 PyObject_ReleaseBuffer(b, &vb);
236 return PyErr_NoMemory();
237 }
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000238
239 result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, size);
240 if (result != NULL) {
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000241 memcpy(result->ob_bytes, va.buf, va.len);
242 memcpy(result->ob_bytes + va.len, vb.buf, vb.len);
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000243 }
Guido van Rossum75d38e92007-08-24 17:33:11 +0000244
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000245 PyObject_ReleaseBuffer(a, &va);
246 PyObject_ReleaseBuffer(b, &vb);
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000247 return (PyObject *)result;
248}
249
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000250/* Functions stuffed into the type object */
251
252static Py_ssize_t
253bytes_length(PyBytesObject *self)
254{
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000255 return Py_Size(self);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000256}
257
258static PyObject *
Guido van Rossumd624f182006-04-24 13:47:05 +0000259bytes_concat(PyBytesObject *self, PyObject *other)
260{
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000261 return PyBytes_Concat((PyObject *)self, other);
Guido van Rossumd624f182006-04-24 13:47:05 +0000262}
263
264static PyObject *
Guido van Rossum13e57212006-04-27 22:54:26 +0000265bytes_iconcat(PyBytesObject *self, PyObject *other)
266{
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000267 Py_ssize_t mysize;
Guido van Rossum13e57212006-04-27 22:54:26 +0000268 Py_ssize_t size;
Travis E. Oliphant8ae62b62007-09-23 02:00:13 +0000269 Py_buffer vo;
Guido van Rossum13e57212006-04-27 22:54:26 +0000270
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000271 if (_getbuffer(other, &vo) < 0) {
Guido van Rossuma74184e2007-08-29 04:05:57 +0000272 PyErr_Format(PyExc_TypeError, "can't concat bytes to %.100s",
273 Py_Type(self)->tp_name);
274 return NULL;
Guido van Rossum13e57212006-04-27 22:54:26 +0000275 }
276
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000277 mysize = Py_Size(self);
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000278 size = mysize + vo.len;
279 if (size < 0) {
Guido van Rossuma74184e2007-08-29 04:05:57 +0000280 PyObject_ReleaseBuffer(other, &vo);
281 return PyErr_NoMemory();
Guido van Rossum6c1e6742007-05-04 04:27:16 +0000282 }
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000283 if (size < self->ob_alloc) {
Guido van Rossuma74184e2007-08-29 04:05:57 +0000284 Py_Size(self) = size;
285 self->ob_bytes[Py_Size(self)] = '\0'; /* Trailing null byte */
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000286 }
287 else if (PyBytes_Resize((PyObject *)self, size) < 0) {
Guido van Rossuma74184e2007-08-29 04:05:57 +0000288 PyObject_ReleaseBuffer(other, &vo);
289 return NULL;
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000290 }
291 memcpy(self->ob_bytes + mysize, vo.buf, vo.len);
292 PyObject_ReleaseBuffer(other, &vo);
Guido van Rossum13e57212006-04-27 22:54:26 +0000293 Py_INCREF(self);
294 return (PyObject *)self;
295}
296
297static PyObject *
Guido van Rossumd624f182006-04-24 13:47:05 +0000298bytes_repeat(PyBytesObject *self, Py_ssize_t count)
299{
300 PyBytesObject *result;
301 Py_ssize_t mysize;
302 Py_ssize_t size;
303
304 if (count < 0)
305 count = 0;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000306 mysize = Py_Size(self);
Guido van Rossumd624f182006-04-24 13:47:05 +0000307 size = mysize * count;
308 if (count != 0 && size / count != mysize)
309 return PyErr_NoMemory();
Guido van Rossumf15a29f2007-05-04 00:41:39 +0000310 result = (PyBytesObject *)PyBytes_FromStringAndSize(NULL, size);
Guido van Rossumd624f182006-04-24 13:47:05 +0000311 if (result != NULL && size != 0) {
312 if (mysize == 1)
313 memset(result->ob_bytes, self->ob_bytes[0], size);
314 else {
Guido van Rossum13e57212006-04-27 22:54:26 +0000315 Py_ssize_t i;
Guido van Rossumd624f182006-04-24 13:47:05 +0000316 for (i = 0; i < count; i++)
317 memcpy(result->ob_bytes + i*mysize, self->ob_bytes, mysize);
318 }
319 }
320 return (PyObject *)result;
321}
322
323static PyObject *
Guido van Rossum13e57212006-04-27 22:54:26 +0000324bytes_irepeat(PyBytesObject *self, Py_ssize_t count)
325{
326 Py_ssize_t mysize;
327 Py_ssize_t size;
328
329 if (count < 0)
330 count = 0;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000331 mysize = Py_Size(self);
Guido van Rossum13e57212006-04-27 22:54:26 +0000332 size = mysize * count;
333 if (count != 0 && size / count != mysize)
334 return PyErr_NoMemory();
Guido van Rossum6c1e6742007-05-04 04:27:16 +0000335 if (size < self->ob_alloc) {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000336 Py_Size(self) = size;
Guido van Rossuma74184e2007-08-29 04:05:57 +0000337 self->ob_bytes[Py_Size(self)] = '\0'; /* Trailing null byte */
Guido van Rossum6c1e6742007-05-04 04:27:16 +0000338 }
Guido van Rossuma0867f72006-05-05 04:34:18 +0000339 else if (PyBytes_Resize((PyObject *)self, size) < 0)
Guido van Rossum13e57212006-04-27 22:54:26 +0000340 return NULL;
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +0000341
Guido van Rossum13e57212006-04-27 22:54:26 +0000342 if (mysize == 1)
343 memset(self->ob_bytes, self->ob_bytes[0], size);
344 else {
345 Py_ssize_t i;
346 for (i = 1; i < count; i++)
347 memcpy(self->ob_bytes + i*mysize, self->ob_bytes, mysize);
348 }
349
350 Py_INCREF(self);
351 return (PyObject *)self;
352}
353
354static int
355bytes_substring(PyBytesObject *self, PyBytesObject *other)
356{
357 Py_ssize_t i;
358
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000359 if (Py_Size(other) == 1) {
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +0000360 return memchr(self->ob_bytes, other->ob_bytes[0],
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000361 Py_Size(self)) != NULL;
Guido van Rossum13e57212006-04-27 22:54:26 +0000362 }
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000363 if (Py_Size(other) == 0)
Guido van Rossum13e57212006-04-27 22:54:26 +0000364 return 1; /* Edge case */
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000365 for (i = 0; i + Py_Size(other) <= Py_Size(self); i++) {
Guido van Rossum13e57212006-04-27 22:54:26 +0000366 /* XXX Yeah, yeah, lots of optimizations possible... */
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000367 if (memcmp(self->ob_bytes + i, other->ob_bytes, Py_Size(other)) == 0)
Guido van Rossum13e57212006-04-27 22:54:26 +0000368 return 1;
369 }
370 return 0;
371}
372
373static int
374bytes_contains(PyBytesObject *self, PyObject *value)
375{
376 Py_ssize_t ival;
377
378 if (PyBytes_Check(value))
379 return bytes_substring(self, (PyBytesObject *)value);
380
Thomas Woutersd204a712006-08-22 13:41:17 +0000381 ival = PyNumber_AsSsize_t(value, PyExc_ValueError);
Guido van Rossum13e57212006-04-27 22:54:26 +0000382 if (ival == -1 && PyErr_Occurred())
383 return -1;
Guido van Rossum13e57212006-04-27 22:54:26 +0000384 if (ival < 0 || ival >= 256) {
385 PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
386 return -1;
387 }
388
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000389 return memchr(self->ob_bytes, ival, Py_Size(self)) != NULL;
Guido van Rossum13e57212006-04-27 22:54:26 +0000390}
391
392static PyObject *
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000393bytes_getitem(PyBytesObject *self, Py_ssize_t i)
394{
395 if (i < 0)
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000396 i += Py_Size(self);
397 if (i < 0 || i >= Py_Size(self)) {
Guido van Rossumd624f182006-04-24 13:47:05 +0000398 PyErr_SetString(PyExc_IndexError, "bytes index out of range");
399 return NULL;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000400 }
Guido van Rossumd624f182006-04-24 13:47:05 +0000401 return PyInt_FromLong((unsigned char)(self->ob_bytes[i]));
402}
403
404static PyObject *
Thomas Wouters376446d2006-12-19 08:30:14 +0000405bytes_subscript(PyBytesObject *self, PyObject *item)
Guido van Rossumd624f182006-04-24 13:47:05 +0000406{
Thomas Wouters376446d2006-12-19 08:30:14 +0000407 if (PyIndex_Check(item)) {
408 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Guido van Rossumd624f182006-04-24 13:47:05 +0000409
Thomas Wouters376446d2006-12-19 08:30:14 +0000410 if (i == -1 && PyErr_Occurred())
411 return NULL;
412
413 if (i < 0)
414 i += PyBytes_GET_SIZE(self);
415
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000416 if (i < 0 || i >= Py_Size(self)) {
Thomas Wouters376446d2006-12-19 08:30:14 +0000417 PyErr_SetString(PyExc_IndexError, "bytes index out of range");
418 return NULL;
419 }
420 return PyInt_FromLong((unsigned char)(self->ob_bytes[i]));
421 }
422 else if (PySlice_Check(item)) {
423 Py_ssize_t start, stop, step, slicelength, cur, i;
424 if (PySlice_GetIndicesEx((PySliceObject *)item,
425 PyBytes_GET_SIZE(self),
426 &start, &stop, &step, &slicelength) < 0) {
427 return NULL;
428 }
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +0000429
Thomas Wouters376446d2006-12-19 08:30:14 +0000430 if (slicelength <= 0)
431 return PyBytes_FromStringAndSize("", 0);
432 else if (step == 1) {
433 return PyBytes_FromStringAndSize(self->ob_bytes + start,
434 slicelength);
435 }
436 else {
437 char *source_buf = PyBytes_AS_STRING(self);
438 char *result_buf = (char *)PyMem_Malloc(slicelength);
439 PyObject *result;
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +0000440
Thomas Wouters376446d2006-12-19 08:30:14 +0000441 if (result_buf == NULL)
442 return PyErr_NoMemory();
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +0000443
Thomas Wouters376446d2006-12-19 08:30:14 +0000444 for (cur = start, i = 0; i < slicelength;
445 cur += step, i++) {
446 result_buf[i] = source_buf[cur];
447 }
448 result = PyBytes_FromStringAndSize(result_buf, slicelength);
449 PyMem_Free(result_buf);
450 return result;
451 }
452 }
453 else {
454 PyErr_SetString(PyExc_TypeError, "bytes indices must be integers");
455 return NULL;
456 }
457}
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +0000458
Guido van Rossumd624f182006-04-24 13:47:05 +0000459static int
Thomas Wouters376446d2006-12-19 08:30:14 +0000460bytes_setslice(PyBytesObject *self, Py_ssize_t lo, Py_ssize_t hi,
Guido van Rossumd624f182006-04-24 13:47:05 +0000461 PyObject *values)
462{
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000463 Py_ssize_t avail, needed;
464 void *bytes;
Travis E. Oliphant8ae62b62007-09-23 02:00:13 +0000465 Py_buffer vbytes;
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000466 int res = 0;
Guido van Rossumd624f182006-04-24 13:47:05 +0000467
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000468 vbytes.len = -1;
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000469 if (values == (PyObject *)self) {
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000470 /* Make a copy and call this function recursively */
Guido van Rossumd624f182006-04-24 13:47:05 +0000471 int err;
472 values = PyBytes_FromObject(values);
473 if (values == NULL)
474 return -1;
475 err = bytes_setslice(self, lo, hi, values);
476 Py_DECREF(values);
477 return err;
478 }
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000479 if (values == NULL) {
480 /* del b[lo:hi] */
481 bytes = NULL;
482 needed = 0;
483 }
Guido van Rossumd624f182006-04-24 13:47:05 +0000484 else {
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000485 if (_getbuffer(values, &vbytes) < 0) {
486 PyErr_Format(PyExc_TypeError,
487 "can't set bytes slice from %.100s",
488 Py_Type(values)->tp_name);
489 return -1;
490 }
491 needed = vbytes.len;
492 bytes = vbytes.buf;
Guido van Rossumd624f182006-04-24 13:47:05 +0000493 }
494
495 if (lo < 0)
496 lo = 0;
Thomas Wouters9a6e62b2006-08-23 23:20:29 +0000497 if (hi < lo)
498 hi = lo;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000499 if (hi > Py_Size(self))
500 hi = Py_Size(self);
Guido van Rossumd624f182006-04-24 13:47:05 +0000501
502 avail = hi - lo;
503 if (avail < 0)
504 lo = hi = avail = 0;
505
506 if (avail != needed) {
507 if (avail > needed) {
508 /*
509 0 lo hi old_size
510 | |<----avail----->|<-----tomove------>|
511 | |<-needed->|<-----tomove------>|
512 0 lo new_hi new_size
513 */
514 memmove(self->ob_bytes + lo + needed, self->ob_bytes + hi,
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000515 Py_Size(self) - hi);
Guido van Rossumd624f182006-04-24 13:47:05 +0000516 }
Guido van Rossuma74184e2007-08-29 04:05:57 +0000517 /* XXX(nnorwitz): need to verify this can't overflow! */
Thomas Wouters376446d2006-12-19 08:30:14 +0000518 if (PyBytes_Resize((PyObject *)self,
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000519 Py_Size(self) + needed - avail) < 0) {
520 res = -1;
521 goto finish;
522 }
Guido van Rossumd624f182006-04-24 13:47:05 +0000523 if (avail < needed) {
524 /*
525 0 lo hi old_size
526 | |<-avail->|<-----tomove------>|
527 | |<----needed---->|<-----tomove------>|
528 0 lo new_hi new_size
529 */
530 memmove(self->ob_bytes + lo + needed, self->ob_bytes + hi,
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000531 Py_Size(self) - lo - needed);
Guido van Rossumd624f182006-04-24 13:47:05 +0000532 }
533 }
534
535 if (needed > 0)
536 memcpy(self->ob_bytes + lo, bytes, needed);
537
Guido van Rossum75d38e92007-08-24 17:33:11 +0000538
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000539 finish:
Guido van Rossum75d38e92007-08-24 17:33:11 +0000540 if (vbytes.len != -1)
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000541 PyObject_ReleaseBuffer(values, &vbytes);
542 return res;
Guido van Rossumd624f182006-04-24 13:47:05 +0000543}
544
545static int
546bytes_setitem(PyBytesObject *self, Py_ssize_t i, PyObject *value)
547{
548 Py_ssize_t ival;
549
550 if (i < 0)
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000551 i += Py_Size(self);
Guido van Rossumd624f182006-04-24 13:47:05 +0000552
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000553 if (i < 0 || i >= Py_Size(self)) {
Guido van Rossumd624f182006-04-24 13:47:05 +0000554 PyErr_SetString(PyExc_IndexError, "bytes index out of range");
555 return -1;
556 }
557
558 if (value == NULL)
559 return bytes_setslice(self, i, i+1, NULL);
560
Thomas Woutersd204a712006-08-22 13:41:17 +0000561 ival = PyNumber_AsSsize_t(value, PyExc_ValueError);
Guido van Rossumd624f182006-04-24 13:47:05 +0000562 if (ival == -1 && PyErr_Occurred())
563 return -1;
564
565 if (ival < 0 || ival >= 256) {
566 PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
567 return -1;
568 }
569
570 self->ob_bytes[i] = ival;
571 return 0;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000572}
573
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000574static int
Thomas Wouters376446d2006-12-19 08:30:14 +0000575bytes_ass_subscript(PyBytesObject *self, PyObject *item, PyObject *values)
576{
577 Py_ssize_t start, stop, step, slicelen, needed;
578 char *bytes;
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +0000579
Thomas Wouters376446d2006-12-19 08:30:14 +0000580 if (PyIndex_Check(item)) {
581 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
582
583 if (i == -1 && PyErr_Occurred())
584 return -1;
585
586 if (i < 0)
587 i += PyBytes_GET_SIZE(self);
588
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000589 if (i < 0 || i >= Py_Size(self)) {
Thomas Wouters376446d2006-12-19 08:30:14 +0000590 PyErr_SetString(PyExc_IndexError, "bytes index out of range");
591 return -1;
592 }
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +0000593
Thomas Wouters376446d2006-12-19 08:30:14 +0000594 if (values == NULL) {
595 /* Fall through to slice assignment */
596 start = i;
597 stop = i + 1;
598 step = 1;
599 slicelen = 1;
600 }
601 else {
602 Py_ssize_t ival = PyNumber_AsSsize_t(values, PyExc_ValueError);
603 if (ival == -1 && PyErr_Occurred())
604 return -1;
605 if (ival < 0 || ival >= 256) {
606 PyErr_SetString(PyExc_ValueError,
607 "byte must be in range(0, 256)");
608 return -1;
609 }
610 self->ob_bytes[i] = (char)ival;
611 return 0;
612 }
613 }
614 else if (PySlice_Check(item)) {
615 if (PySlice_GetIndicesEx((PySliceObject *)item,
616 PyBytes_GET_SIZE(self),
617 &start, &stop, &step, &slicelen) < 0) {
618 return -1;
619 }
620 }
621 else {
622 PyErr_SetString(PyExc_TypeError, "bytes indices must be integer");
623 return -1;
624 }
625
626 if (values == NULL) {
627 bytes = NULL;
628 needed = 0;
629 }
630 else if (values == (PyObject *)self || !PyBytes_Check(values)) {
631 /* Make a copy an call this function recursively */
632 int err;
633 values = PyBytes_FromObject(values);
634 if (values == NULL)
635 return -1;
636 err = bytes_ass_subscript(self, item, values);
637 Py_DECREF(values);
638 return err;
639 }
640 else {
641 assert(PyBytes_Check(values));
642 bytes = ((PyBytesObject *)values)->ob_bytes;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000643 needed = Py_Size(values);
Thomas Wouters376446d2006-12-19 08:30:14 +0000644 }
645 /* Make sure b[5:2] = ... inserts before 5, not before 2. */
646 if ((step < 0 && start < stop) ||
647 (step > 0 && start > stop))
648 stop = start;
649 if (step == 1) {
650 if (slicelen != needed) {
651 if (slicelen > needed) {
652 /*
653 0 start stop old_size
654 | |<---slicelen--->|<-----tomove------>|
655 | |<-needed->|<-----tomove------>|
656 0 lo new_hi new_size
657 */
658 memmove(self->ob_bytes + start + needed, self->ob_bytes + stop,
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000659 Py_Size(self) - stop);
Thomas Wouters376446d2006-12-19 08:30:14 +0000660 }
661 if (PyBytes_Resize((PyObject *)self,
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000662 Py_Size(self) + needed - slicelen) < 0)
Thomas Wouters376446d2006-12-19 08:30:14 +0000663 return -1;
664 if (slicelen < needed) {
665 /*
666 0 lo hi old_size
667 | |<-avail->|<-----tomove------>|
668 | |<----needed---->|<-----tomove------>|
669 0 lo new_hi new_size
670 */
671 memmove(self->ob_bytes + start + needed, self->ob_bytes + stop,
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000672 Py_Size(self) - start - needed);
Thomas Wouters376446d2006-12-19 08:30:14 +0000673 }
674 }
675
676 if (needed > 0)
677 memcpy(self->ob_bytes + start, bytes, needed);
678
679 return 0;
680 }
681 else {
682 if (needed == 0) {
683 /* Delete slice */
684 Py_ssize_t cur, i;
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +0000685
Thomas Wouters376446d2006-12-19 08:30:14 +0000686 if (step < 0) {
687 stop = start + 1;
688 start = stop + step * (slicelen - 1) - 1;
689 step = -step;
690 }
691 for (cur = start, i = 0;
692 i < slicelen; cur += step, i++) {
693 Py_ssize_t lim = step - 1;
694
695 if (cur + step >= PyBytes_GET_SIZE(self))
696 lim = PyBytes_GET_SIZE(self) - cur - 1;
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +0000697
Thomas Wouters376446d2006-12-19 08:30:14 +0000698 memmove(self->ob_bytes + cur - i,
699 self->ob_bytes + cur + 1, lim);
700 }
701 /* Move the tail of the bytes, in one chunk */
702 cur = start + slicelen*step;
703 if (cur < PyBytes_GET_SIZE(self)) {
704 memmove(self->ob_bytes + cur - slicelen,
705 self->ob_bytes + cur,
706 PyBytes_GET_SIZE(self) - cur);
707 }
708 if (PyBytes_Resize((PyObject *)self,
709 PyBytes_GET_SIZE(self) - slicelen) < 0)
710 return -1;
711
712 return 0;
713 }
714 else {
715 /* Assign slice */
716 Py_ssize_t cur, i;
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +0000717
Thomas Wouters376446d2006-12-19 08:30:14 +0000718 if (needed != slicelen) {
719 PyErr_Format(PyExc_ValueError,
720 "attempt to assign bytes of size %zd "
721 "to extended slice of size %zd",
722 needed, slicelen);
723 return -1;
724 }
725 for (cur = start, i = 0; i < slicelen; cur += step, i++)
726 self->ob_bytes[cur] = bytes[i];
727 return 0;
728 }
729 }
730}
731
732static int
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000733bytes_init(PyBytesObject *self, PyObject *args, PyObject *kwds)
734{
Guido van Rossumd624f182006-04-24 13:47:05 +0000735 static char *kwlist[] = {"source", "encoding", "errors", 0};
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000736 PyObject *arg = NULL;
Guido van Rossumd624f182006-04-24 13:47:05 +0000737 const char *encoding = NULL;
738 const char *errors = NULL;
739 Py_ssize_t count;
740 PyObject *it;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000741 PyObject *(*iternext)(PyObject *);
742
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000743 if (Py_Size(self) != 0) {
Guido van Rossuma0867f72006-05-05 04:34:18 +0000744 /* Empty previous contents (yes, do this first of all!) */
745 if (PyBytes_Resize((PyObject *)self, 0) < 0)
746 return -1;
747 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000748
Guido van Rossumd624f182006-04-24 13:47:05 +0000749 /* Parse arguments */
750 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oss:bytes", kwlist,
751 &arg, &encoding, &errors))
752 return -1;
753
754 /* Make a quick exit if no first argument */
755 if (arg == NULL) {
756 if (encoding != NULL || errors != NULL) {
757 PyErr_SetString(PyExc_TypeError,
758 "encoding or errors without sequence argument");
759 return -1;
760 }
761 return 0;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000762 }
763
Guido van Rossumd624f182006-04-24 13:47:05 +0000764 if (PyUnicode_Check(arg)) {
765 /* Encode via the codec registry */
Guido van Rossum4355a472007-05-04 05:00:04 +0000766 PyObject *encoded, *new;
Guido van Rossuma74184e2007-08-29 04:05:57 +0000767 if (encoding == NULL) {
768 PyErr_SetString(PyExc_TypeError,
769 "string argument without an encoding");
770 return -1;
771 }
Guido van Rossumd624f182006-04-24 13:47:05 +0000772 encoded = PyCodec_Encode(arg, encoding, errors);
773 if (encoded == NULL)
774 return -1;
Guido van Rossum4355a472007-05-04 05:00:04 +0000775 if (!PyBytes_Check(encoded) && !PyString_Check(encoded)) {
Guido van Rossumd624f182006-04-24 13:47:05 +0000776 PyErr_Format(PyExc_TypeError,
Guido van Rossum4355a472007-05-04 05:00:04 +0000777 "encoder did not return a str8 or bytes object (type=%.400s)",
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000778 Py_Type(encoded)->tp_name);
Guido van Rossumd624f182006-04-24 13:47:05 +0000779 Py_DECREF(encoded);
780 return -1;
781 }
Guido van Rossuma74184e2007-08-29 04:05:57 +0000782 new = bytes_iconcat(self, encoded);
783 Py_DECREF(encoded);
784 if (new == NULL)
785 return -1;
786 Py_DECREF(new);
787 return 0;
Guido van Rossumd624f182006-04-24 13:47:05 +0000788 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000789
Guido van Rossumd624f182006-04-24 13:47:05 +0000790 /* If it's not unicode, there can't be encoding or errors */
791 if (encoding != NULL || errors != NULL) {
792 PyErr_SetString(PyExc_TypeError,
793 "encoding or errors without a string argument");
794 return -1;
795 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000796
Guido van Rossumd624f182006-04-24 13:47:05 +0000797 /* Is it an int? */
Thomas Woutersd204a712006-08-22 13:41:17 +0000798 count = PyNumber_AsSsize_t(arg, PyExc_ValueError);
Guido van Rossumd624f182006-04-24 13:47:05 +0000799 if (count == -1 && PyErr_Occurred())
800 PyErr_Clear();
801 else {
802 if (count < 0) {
803 PyErr_SetString(PyExc_ValueError, "negative count");
804 return -1;
805 }
806 if (count > 0) {
807 if (PyBytes_Resize((PyObject *)self, count))
808 return -1;
809 memset(self->ob_bytes, 0, count);
810 }
811 return 0;
812 }
Guido van Rossum75d38e92007-08-24 17:33:11 +0000813
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000814 /* Use the modern buffer interface */
815 if (PyObject_CheckBuffer(arg)) {
Guido van Rossumd624f182006-04-24 13:47:05 +0000816 Py_ssize_t size;
Travis E. Oliphant8ae62b62007-09-23 02:00:13 +0000817 Py_buffer view;
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000818 if (PyObject_GetBuffer(arg, &view, PyBUF_FULL_RO) < 0)
Guido van Rossumd624f182006-04-24 13:47:05 +0000819 return -1;
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000820 size = view.len;
821 if (PyBytes_Resize((PyObject *)self, size) < 0) goto fail;
822 if (PyBuffer_ToContiguous(self->ob_bytes, &view, size, 'C') < 0)
823 goto fail;
824 PyObject_ReleaseBuffer(arg, &view);
Guido van Rossumd624f182006-04-24 13:47:05 +0000825 return 0;
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000826 fail:
827 PyObject_ReleaseBuffer(arg, &view);
828 return -1;
Guido van Rossumd624f182006-04-24 13:47:05 +0000829 }
830
831 /* XXX Optimize this if the arguments is a list, tuple */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000832
833 /* Get the iterator */
834 it = PyObject_GetIter(arg);
835 if (it == NULL)
Guido van Rossumd624f182006-04-24 13:47:05 +0000836 return -1;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000837 iternext = *Py_Type(it)->tp_iternext;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000838
839 /* Run the iterator to exhaustion */
840 for (;;) {
Guido van Rossumd624f182006-04-24 13:47:05 +0000841 PyObject *item;
842 Py_ssize_t value;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000843
Guido van Rossumd624f182006-04-24 13:47:05 +0000844 /* Get the next item */
845 item = iternext(it);
846 if (item == NULL) {
847 if (PyErr_Occurred()) {
848 if (!PyErr_ExceptionMatches(PyExc_StopIteration))
849 goto error;
850 PyErr_Clear();
851 }
852 break;
853 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000854
Guido van Rossumd624f182006-04-24 13:47:05 +0000855 /* Interpret it as an int (__index__) */
Thomas Woutersd204a712006-08-22 13:41:17 +0000856 value = PyNumber_AsSsize_t(item, PyExc_ValueError);
Guido van Rossumd624f182006-04-24 13:47:05 +0000857 Py_DECREF(item);
858 if (value == -1 && PyErr_Occurred())
859 goto error;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000860
Guido van Rossumd624f182006-04-24 13:47:05 +0000861 /* Range check */
862 if (value < 0 || value >= 256) {
863 PyErr_SetString(PyExc_ValueError,
864 "bytes must be in range(0, 256)");
865 goto error;
866 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000867
Guido van Rossumd624f182006-04-24 13:47:05 +0000868 /* Append the byte */
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000869 if (Py_Size(self) < self->ob_alloc)
870 Py_Size(self)++;
871 else if (PyBytes_Resize((PyObject *)self, Py_Size(self)+1) < 0)
Guido van Rossumd624f182006-04-24 13:47:05 +0000872 goto error;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000873 self->ob_bytes[Py_Size(self)-1] = value;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000874 }
875
876 /* Clean up and return success */
877 Py_DECREF(it);
878 return 0;
879
880 error:
881 /* Error handling when it != NULL */
882 Py_DECREF(it);
883 return -1;
884}
885
Georg Brandlee91be42007-02-24 19:41:35 +0000886/* Mostly copied from string_repr, but without the
887 "smart quote" functionality. */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000888static PyObject *
889bytes_repr(PyBytesObject *self)
890{
Walter Dörwald1ab83302007-05-18 17:15:44 +0000891 static const char *hexdigits = "0123456789abcdef";
Martin v. Löwis5d7428b2007-07-21 18:47:48 +0000892 size_t newsize = 3 + 4 * Py_Size(self);
Georg Brandlee91be42007-02-24 19:41:35 +0000893 PyObject *v;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000894 if (newsize > PY_SSIZE_T_MAX || newsize / 4 != Py_Size(self)) {
Georg Brandlee91be42007-02-24 19:41:35 +0000895 PyErr_SetString(PyExc_OverflowError,
896 "bytes object is too large to make repr");
Guido van Rossumd624f182006-04-24 13:47:05 +0000897 return NULL;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000898 }
Walter Dörwald1ab83302007-05-18 17:15:44 +0000899 v = PyUnicode_FromUnicode(NULL, newsize);
Georg Brandlee91be42007-02-24 19:41:35 +0000900 if (v == NULL) {
901 return NULL;
902 }
903 else {
904 register Py_ssize_t i;
Walter Dörwald1ab83302007-05-18 17:15:44 +0000905 register Py_UNICODE c;
906 register Py_UNICODE *p;
Georg Brandlee91be42007-02-24 19:41:35 +0000907 int quote = '\'';
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000908
Walter Dörwald1ab83302007-05-18 17:15:44 +0000909 p = PyUnicode_AS_UNICODE(v);
Georg Brandlee91be42007-02-24 19:41:35 +0000910 *p++ = 'b';
911 *p++ = quote;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000912 for (i = 0; i < Py_Size(self); i++) {
Georg Brandlee91be42007-02-24 19:41:35 +0000913 /* There's at least enough room for a hex escape
914 and a closing quote. */
Walter Dörwald1ab83302007-05-18 17:15:44 +0000915 assert(newsize - (p - PyUnicode_AS_UNICODE(v)) >= 5);
Georg Brandlee91be42007-02-24 19:41:35 +0000916 c = self->ob_bytes[i];
917 if (c == quote || c == '\\')
918 *p++ = '\\', *p++ = c;
919 else if (c == '\t')
920 *p++ = '\\', *p++ = 't';
921 else if (c == '\n')
922 *p++ = '\\', *p++ = 'n';
923 else if (c == '\r')
924 *p++ = '\\', *p++ = 'r';
925 else if (c == 0)
Guido van Rossum57b93ad2007-05-08 19:09:34 +0000926 *p++ = '\\', *p++ = 'x', *p++ = '0', *p++ = '0';
Georg Brandlee91be42007-02-24 19:41:35 +0000927 else if (c < ' ' || c >= 0x7f) {
Walter Dörwald1ab83302007-05-18 17:15:44 +0000928 *p++ = '\\';
929 *p++ = 'x';
930 *p++ = hexdigits[(c & 0xf0) >> 4];
931 *p++ = hexdigits[c & 0xf];
Georg Brandlee91be42007-02-24 19:41:35 +0000932 }
933 else
934 *p++ = c;
935 }
Walter Dörwald1ab83302007-05-18 17:15:44 +0000936 assert(newsize - (p - PyUnicode_AS_UNICODE(v)) >= 1);
Georg Brandlee91be42007-02-24 19:41:35 +0000937 *p++ = quote;
938 *p = '\0';
Walter Dörwald1ab83302007-05-18 17:15:44 +0000939 if (PyUnicode_Resize(&v, (p - PyUnicode_AS_UNICODE(v)))) {
940 Py_DECREF(v);
941 return NULL;
942 }
Georg Brandlee91be42007-02-24 19:41:35 +0000943 return v;
944 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000945}
946
947static PyObject *
Guido van Rossumd624f182006-04-24 13:47:05 +0000948bytes_str(PyBytesObject *self)
949{
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000950 return PyString_FromStringAndSize(self->ob_bytes, Py_Size(self));
Guido van Rossumd624f182006-04-24 13:47:05 +0000951}
952
953static PyObject *
Guido van Rossum343e97f2007-04-09 00:43:24 +0000954bytes_richcompare(PyObject *self, PyObject *other, int op)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000955{
Guido van Rossum343e97f2007-04-09 00:43:24 +0000956 Py_ssize_t self_size, other_size;
Travis E. Oliphant8ae62b62007-09-23 02:00:13 +0000957 Py_buffer self_bytes, other_bytes;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000958 PyObject *res;
Guido van Rossum343e97f2007-04-09 00:43:24 +0000959 Py_ssize_t minsize;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000960 int cmp;
961
Jeremy Hylton18c3ff82007-08-29 18:47:16 +0000962 /* Bytes can be compared to anything that supports the (binary)
963 buffer API. Except that a comparison with Unicode is always an
964 error, even if the comparison is for equality. */
965 if (PyObject_IsInstance(self, (PyObject*)&PyUnicode_Type) ||
966 PyObject_IsInstance(other, (PyObject*)&PyUnicode_Type)) {
Guido van Rossum1e35e762007-10-09 17:21:10 +0000967 Py_INCREF(Py_NotImplemented);
968 return Py_NotImplemented;
Jeremy Hylton18c3ff82007-08-29 18:47:16 +0000969 }
Guido van Rossumebea9be2007-04-09 00:49:13 +0000970
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000971 self_size = _getbuffer(self, &self_bytes);
972 if (self_size < 0) {
Guido van Rossuma74184e2007-08-29 04:05:57 +0000973 PyErr_Clear();
Guido van Rossumebea9be2007-04-09 00:49:13 +0000974 Py_INCREF(Py_NotImplemented);
975 return Py_NotImplemented;
976 }
Guido van Rossum343e97f2007-04-09 00:43:24 +0000977
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000978 other_size = _getbuffer(other, &other_bytes);
979 if (other_size < 0) {
Guido van Rossuma74184e2007-08-29 04:05:57 +0000980 PyErr_Clear();
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000981 PyObject_ReleaseBuffer(self, &self_bytes);
Guido van Rossumd624f182006-04-24 13:47:05 +0000982 Py_INCREF(Py_NotImplemented);
983 return Py_NotImplemented;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000984 }
Guido van Rossum343e97f2007-04-09 00:43:24 +0000985
986 if (self_size != other_size && (op == Py_EQ || op == Py_NE)) {
Guido van Rossumd624f182006-04-24 13:47:05 +0000987 /* Shortcut: if the lengths differ, the objects differ */
988 cmp = (op == Py_NE);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000989 }
990 else {
Guido van Rossum343e97f2007-04-09 00:43:24 +0000991 minsize = self_size;
992 if (other_size < minsize)
993 minsize = other_size;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000994
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000995 cmp = memcmp(self_bytes.buf, other_bytes.buf, minsize);
Guido van Rossumd624f182006-04-24 13:47:05 +0000996 /* In ISO C, memcmp() guarantees to use unsigned bytes! */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000997
Guido van Rossumd624f182006-04-24 13:47:05 +0000998 if (cmp == 0) {
Guido van Rossum343e97f2007-04-09 00:43:24 +0000999 if (self_size < other_size)
Guido van Rossumd624f182006-04-24 13:47:05 +00001000 cmp = -1;
Guido van Rossum343e97f2007-04-09 00:43:24 +00001001 else if (self_size > other_size)
Guido van Rossumd624f182006-04-24 13:47:05 +00001002 cmp = 1;
1003 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00001004
Guido van Rossumd624f182006-04-24 13:47:05 +00001005 switch (op) {
1006 case Py_LT: cmp = cmp < 0; break;
1007 case Py_LE: cmp = cmp <= 0; break;
1008 case Py_EQ: cmp = cmp == 0; break;
1009 case Py_NE: cmp = cmp != 0; break;
1010 case Py_GT: cmp = cmp > 0; break;
1011 case Py_GE: cmp = cmp >= 0; break;
1012 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00001013 }
1014
1015 res = cmp ? Py_True : Py_False;
Travis E. Oliphantb99f7622007-08-18 11:21:56 +00001016 PyObject_ReleaseBuffer(self, &self_bytes);
Guido van Rossum75d38e92007-08-24 17:33:11 +00001017 PyObject_ReleaseBuffer(other, &other_bytes);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00001018 Py_INCREF(res);
1019 return res;
1020}
1021
1022static void
1023bytes_dealloc(PyBytesObject *self)
1024{
Guido van Rossumd624f182006-04-24 13:47:05 +00001025 if (self->ob_bytes != 0) {
1026 PyMem_Free(self->ob_bytes);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00001027 }
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001028 Py_Type(self)->tp_free((PyObject *)self);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00001029}
1030
Neal Norwitz6968b052007-02-27 19:02:19 +00001031
1032/* -------------------------------------------------------------------- */
1033/* Methods */
1034
1035#define STRINGLIB_CHAR char
1036#define STRINGLIB_CMP memcmp
1037#define STRINGLIB_LEN PyBytes_GET_SIZE
Gregory P. Smith60d241f2007-10-16 06:31:30 +00001038#define STRINGLIB_STR PyBytes_AS_STRING
Neal Norwitz6968b052007-02-27 19:02:19 +00001039#define STRINGLIB_NEW PyBytes_FromStringAndSize
1040#define STRINGLIB_EMPTY nullbytes
Gregory P. Smith60d241f2007-10-16 06:31:30 +00001041#define STRINGLIB_CHECK_EXACT PyBytes_CheckExact
1042#define STRINGLIB_MUTABLE 1
Neal Norwitz6968b052007-02-27 19:02:19 +00001043
1044#include "stringlib/fastsearch.h"
1045#include "stringlib/count.h"
1046#include "stringlib/find.h"
1047#include "stringlib/partition.h"
Gregory P. Smith60d241f2007-10-16 06:31:30 +00001048#include "stringlib/ctype.h"
1049#include "stringlib/transmogrify.h"
Neal Norwitz6968b052007-02-27 19:02:19 +00001050
1051
1052/* The following Py_LOCAL_INLINE and Py_LOCAL functions
1053were copied from the old char* style string object. */
1054
1055Py_LOCAL_INLINE(void)
1056_adjust_indices(Py_ssize_t *start, Py_ssize_t *end, Py_ssize_t len)
1057{
1058 if (*end > len)
1059 *end = len;
1060 else if (*end < 0)
1061 *end += len;
1062 if (*end < 0)
1063 *end = 0;
1064 if (*start < 0)
1065 *start += len;
1066 if (*start < 0)
1067 *start = 0;
1068}
1069
1070
1071Py_LOCAL_INLINE(Py_ssize_t)
1072bytes_find_internal(PyBytesObject *self, PyObject *args, int dir)
1073{
1074 PyObject *subobj;
Travis E. Oliphant8ae62b62007-09-23 02:00:13 +00001075 Py_buffer subbuf;
Neal Norwitz6968b052007-02-27 19:02:19 +00001076 Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
Guido van Rossum06b8b022007-08-31 13:48:41 +00001077 Py_ssize_t res;
Neal Norwitz6968b052007-02-27 19:02:19 +00001078
1079 if (!PyArg_ParseTuple(args, "O|O&O&:find/rfind/index/rindex", &subobj,
1080 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
1081 return -2;
Guido van Rossum06b8b022007-08-31 13:48:41 +00001082 if (_getbuffer(subobj, &subbuf) < 0)
Neal Norwitz6968b052007-02-27 19:02:19 +00001083 return -2;
Neal Norwitz6968b052007-02-27 19:02:19 +00001084 if (dir > 0)
Guido van Rossum06b8b022007-08-31 13:48:41 +00001085 res = stringlib_find_slice(
Neal Norwitz6968b052007-02-27 19:02:19 +00001086 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
Guido van Rossum06b8b022007-08-31 13:48:41 +00001087 subbuf.buf, subbuf.len, start, end);
Neal Norwitz6968b052007-02-27 19:02:19 +00001088 else
Guido van Rossum06b8b022007-08-31 13:48:41 +00001089 res = stringlib_rfind_slice(
Neal Norwitz6968b052007-02-27 19:02:19 +00001090 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
Guido van Rossum06b8b022007-08-31 13:48:41 +00001091 subbuf.buf, subbuf.len, start, end);
1092 PyObject_ReleaseBuffer(subobj, &subbuf);
1093 return res;
Neal Norwitz6968b052007-02-27 19:02:19 +00001094}
1095
Neal Norwitz6968b052007-02-27 19:02:19 +00001096PyDoc_STRVAR(find__doc__,
1097"B.find(sub [,start [,end]]) -> int\n\
1098\n\
1099Return the lowest index in B where subsection sub is found,\n\
1100such that sub is contained within s[start,end]. Optional\n\
1101arguments start and end are interpreted as in slice notation.\n\
1102\n\
1103Return -1 on failure.");
1104
1105static PyObject *
1106bytes_find(PyBytesObject *self, PyObject *args)
1107{
1108 Py_ssize_t result = bytes_find_internal(self, args, +1);
1109 if (result == -2)
1110 return NULL;
1111 return PyInt_FromSsize_t(result);
1112}
1113
1114PyDoc_STRVAR(count__doc__,
1115"B.count(sub[, start[, end]]) -> int\n\
1116\n\
1117Return the number of non-overlapping occurrences of subsection sub in\n\
1118bytes B[start:end]. Optional arguments start and end are interpreted\n\
1119as in slice notation.");
1120
1121static PyObject *
1122bytes_count(PyBytesObject *self, PyObject *args)
1123{
1124 PyObject *sub_obj;
Gregory P. Smith60d241f2007-10-16 06:31:30 +00001125 const char *str = PyBytes_AS_STRING(self);
Neal Norwitz6968b052007-02-27 19:02:19 +00001126 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
Gregory P. Smith60d241f2007-10-16 06:31:30 +00001127 Py_buffer vsub;
1128 PyObject *count_obj;
Neal Norwitz6968b052007-02-27 19:02:19 +00001129
1130 if (!PyArg_ParseTuple(args, "O|O&O&:count", &sub_obj,
1131 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
1132 return NULL;
1133
Gregory P. Smith60d241f2007-10-16 06:31:30 +00001134 if (_getbuffer(sub_obj, &vsub) < 0)
Neal Norwitz6968b052007-02-27 19:02:19 +00001135 return NULL;
1136
Martin v. Löwis5b222132007-06-10 09:51:05 +00001137 _adjust_indices(&start, &end, PyBytes_GET_SIZE(self));
Neal Norwitz6968b052007-02-27 19:02:19 +00001138
Gregory P. Smith60d241f2007-10-16 06:31:30 +00001139 count_obj = PyInt_FromSsize_t(
1140 stringlib_count(str + start, end - start, vsub.buf, vsub.len)
Neal Norwitz6968b052007-02-27 19:02:19 +00001141 );
Gregory P. Smith60d241f2007-10-16 06:31:30 +00001142 PyObject_ReleaseBuffer(sub_obj, &vsub);
1143 return count_obj;
Neal Norwitz6968b052007-02-27 19:02:19 +00001144}
1145
1146
1147PyDoc_STRVAR(index__doc__,
1148"B.index(sub [,start [,end]]) -> int\n\
1149\n\
1150Like B.find() but raise ValueError when the subsection is not found.");
1151
1152static PyObject *
1153bytes_index(PyBytesObject *self, PyObject *args)
1154{
1155 Py_ssize_t result = bytes_find_internal(self, args, +1);
1156 if (result == -2)
1157 return NULL;
1158 if (result == -1) {
1159 PyErr_SetString(PyExc_ValueError,
1160 "subsection not found");
1161 return NULL;
1162 }
1163 return PyInt_FromSsize_t(result);
1164}
1165
1166
1167PyDoc_STRVAR(rfind__doc__,
1168"B.rfind(sub [,start [,end]]) -> int\n\
1169\n\
1170Return the highest index in B where subsection sub is found,\n\
1171such that sub is contained within s[start,end]. Optional\n\
1172arguments start and end are interpreted as in slice notation.\n\
1173\n\
1174Return -1 on failure.");
1175
1176static PyObject *
1177bytes_rfind(PyBytesObject *self, PyObject *args)
1178{
1179 Py_ssize_t result = bytes_find_internal(self, args, -1);
1180 if (result == -2)
1181 return NULL;
1182 return PyInt_FromSsize_t(result);
1183}
1184
1185
1186PyDoc_STRVAR(rindex__doc__,
1187"B.rindex(sub [,start [,end]]) -> int\n\
1188\n\
1189Like B.rfind() but raise ValueError when the subsection is not found.");
1190
1191static PyObject *
1192bytes_rindex(PyBytesObject *self, PyObject *args)
1193{
1194 Py_ssize_t result = bytes_find_internal(self, args, -1);
1195 if (result == -2)
1196 return NULL;
1197 if (result == -1) {
1198 PyErr_SetString(PyExc_ValueError,
1199 "subsection not found");
1200 return NULL;
1201 }
1202 return PyInt_FromSsize_t(result);
1203}
1204
1205
1206/* Matches the end (direction >= 0) or start (direction < 0) of self
1207 * against substr, using the start and end arguments. Returns
1208 * -1 on error, 0 if not found and 1 if found.
1209 */
1210Py_LOCAL(int)
1211_bytes_tailmatch(PyBytesObject *self, PyObject *substr, Py_ssize_t start,
1212 Py_ssize_t end, int direction)
1213{
1214 Py_ssize_t len = PyBytes_GET_SIZE(self);
Neal Norwitz6968b052007-02-27 19:02:19 +00001215 const char* str;
Gregory P. Smith60d241f2007-10-16 06:31:30 +00001216 Py_buffer vsubstr;
Guido van Rossum40d20bc2007-10-22 00:09:51 +00001217 int rv = 0;
Neal Norwitz6968b052007-02-27 19:02:19 +00001218
Neal Norwitz6968b052007-02-27 19:02:19 +00001219 str = PyBytes_AS_STRING(self);
1220
Gregory P. Smith60d241f2007-10-16 06:31:30 +00001221 if (_getbuffer(substr, &vsubstr) < 0)
1222 return -1;
1223
Neal Norwitz6968b052007-02-27 19:02:19 +00001224 _adjust_indices(&start, &end, len);
1225
1226 if (direction < 0) {
1227 /* startswith */
Gregory P. Smith60d241f2007-10-16 06:31:30 +00001228 if (start+vsubstr.len > len) {
Gregory P. Smith60d241f2007-10-16 06:31:30 +00001229 goto done;
1230 }
Neal Norwitz6968b052007-02-27 19:02:19 +00001231 } else {
1232 /* endswith */
Gregory P. Smith60d241f2007-10-16 06:31:30 +00001233 if (end-start < vsubstr.len || start > len) {
Gregory P. Smith60d241f2007-10-16 06:31:30 +00001234 goto done;
1235 }
Neal Norwitz6968b052007-02-27 19:02:19 +00001236
Gregory P. Smith60d241f2007-10-16 06:31:30 +00001237 if (end-vsubstr.len > start)
1238 start = end - vsubstr.len;
Neal Norwitz6968b052007-02-27 19:02:19 +00001239 }
Gregory P. Smith60d241f2007-10-16 06:31:30 +00001240 if (end-start >= vsubstr.len)
1241 rv = ! memcmp(str+start, vsubstr.buf, vsubstr.len);
1242
1243done:
1244 PyObject_ReleaseBuffer(substr, &vsubstr);
1245 return rv;
Neal Norwitz6968b052007-02-27 19:02:19 +00001246}
1247
1248
1249PyDoc_STRVAR(startswith__doc__,
1250"B.startswith(prefix[, start[, end]]) -> bool\n\
1251\n\
1252Return True if B starts with the specified prefix, False otherwise.\n\
1253With optional start, test B beginning at that position.\n\
1254With optional end, stop comparing B at that position.\n\
1255prefix can also be a tuple of strings to try.");
1256
1257static PyObject *
1258bytes_startswith(PyBytesObject *self, PyObject *args)
1259{
1260 Py_ssize_t start = 0;
1261 Py_ssize_t end = PY_SSIZE_T_MAX;
1262 PyObject *subobj;
1263 int result;
1264
1265 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
1266 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
1267 return NULL;
1268 if (PyTuple_Check(subobj)) {
1269 Py_ssize_t i;
1270 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
1271 result = _bytes_tailmatch(self,
1272 PyTuple_GET_ITEM(subobj, i),
1273 start, end, -1);
1274 if (result == -1)
1275 return NULL;
1276 else if (result) {
1277 Py_RETURN_TRUE;
1278 }
1279 }
1280 Py_RETURN_FALSE;
1281 }
1282 result = _bytes_tailmatch(self, subobj, start, end, -1);
1283 if (result == -1)
1284 return NULL;
1285 else
1286 return PyBool_FromLong(result);
1287}
1288
1289PyDoc_STRVAR(endswith__doc__,
1290"B.endswith(suffix[, start[, end]]) -> bool\n\
1291\n\
1292Return True if B ends with the specified suffix, False otherwise.\n\
1293With optional start, test B beginning at that position.\n\
1294With optional end, stop comparing B at that position.\n\
1295suffix can also be a tuple of strings to try.");
1296
1297static PyObject *
1298bytes_endswith(PyBytesObject *self, PyObject *args)
1299{
1300 Py_ssize_t start = 0;
1301 Py_ssize_t end = PY_SSIZE_T_MAX;
1302 PyObject *subobj;
1303 int result;
1304
1305 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
1306 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
1307 return NULL;
1308 if (PyTuple_Check(subobj)) {
1309 Py_ssize_t i;
1310 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
1311 result = _bytes_tailmatch(self,
1312 PyTuple_GET_ITEM(subobj, i),
1313 start, end, +1);
1314 if (result == -1)
1315 return NULL;
1316 else if (result) {
1317 Py_RETURN_TRUE;
1318 }
1319 }
1320 Py_RETURN_FALSE;
1321 }
1322 result = _bytes_tailmatch(self, subobj, start, end, +1);
1323 if (result == -1)
1324 return NULL;
1325 else
1326 return PyBool_FromLong(result);
1327}
1328
1329
Neal Norwitz6968b052007-02-27 19:02:19 +00001330PyDoc_STRVAR(translate__doc__,
1331"B.translate(table [,deletechars]) -> bytes\n\
1332\n\
1333Return a copy of the bytes B, where all characters occurring\n\
1334in the optional argument deletechars are removed, and the\n\
1335remaining characters have been mapped through the given\n\
1336translation table, which must be a bytes of length 256.");
1337
1338static PyObject *
1339bytes_translate(PyBytesObject *self, PyObject *args)
1340{
1341 register char *input, *output;
1342 register const char *table;
1343 register Py_ssize_t i, c, changed = 0;
1344 PyObject *input_obj = (PyObject*)self;
Gregory P. Smith60d241f2007-10-16 06:31:30 +00001345 const char *output_start;
1346 Py_ssize_t inlen;
Neal Norwitz6968b052007-02-27 19:02:19 +00001347 PyObject *result;
1348 int trans_table[256];
1349 PyObject *tableobj, *delobj = NULL;
Gregory P. Smith60d241f2007-10-16 06:31:30 +00001350 Py_buffer vtable, vdel;
Neal Norwitz6968b052007-02-27 19:02:19 +00001351
1352 if (!PyArg_UnpackTuple(args, "translate", 1, 2,
1353 &tableobj, &delobj))
1354 return NULL;
1355
Gregory P. Smith60d241f2007-10-16 06:31:30 +00001356 if (_getbuffer(tableobj, &vtable) < 0)
Neal Norwitz6968b052007-02-27 19:02:19 +00001357 return NULL;
1358
Gregory P. Smith60d241f2007-10-16 06:31:30 +00001359 if (vtable.len != 256) {
Neal Norwitz6968b052007-02-27 19:02:19 +00001360 PyErr_SetString(PyExc_ValueError,
1361 "translation table must be 256 characters long");
Gregory P. Smith60d241f2007-10-16 06:31:30 +00001362 result = NULL;
1363 goto done;
Neal Norwitz6968b052007-02-27 19:02:19 +00001364 }
1365
1366 if (delobj != NULL) {
Gregory P. Smith60d241f2007-10-16 06:31:30 +00001367 if (_getbuffer(delobj, &vdel) < 0) {
1368 result = NULL;
1369 goto done;
Neal Norwitz6968b052007-02-27 19:02:19 +00001370 }
Neal Norwitz6968b052007-02-27 19:02:19 +00001371 }
1372 else {
Gregory P. Smith60d241f2007-10-16 06:31:30 +00001373 vdel.buf = NULL;
1374 vdel.len = 0;
Neal Norwitz6968b052007-02-27 19:02:19 +00001375 }
1376
Gregory P. Smith60d241f2007-10-16 06:31:30 +00001377 table = (const char *)vtable.buf;
Neal Norwitz6968b052007-02-27 19:02:19 +00001378 inlen = PyBytes_GET_SIZE(input_obj);
1379 result = PyBytes_FromStringAndSize((char *)NULL, inlen);
1380 if (result == NULL)
Gregory P. Smith60d241f2007-10-16 06:31:30 +00001381 goto done;
Neal Norwitz6968b052007-02-27 19:02:19 +00001382 output_start = output = PyBytes_AsString(result);
1383 input = PyBytes_AS_STRING(input_obj);
1384
Gregory P. Smith60d241f2007-10-16 06:31:30 +00001385 if (vdel.len == 0) {
Neal Norwitz6968b052007-02-27 19:02:19 +00001386 /* If no deletions are required, use faster code */
1387 for (i = inlen; --i >= 0; ) {
1388 c = Py_CHARMASK(*input++);
1389 if (Py_CHARMASK((*output++ = table[c])) != c)
1390 changed = 1;
1391 }
1392 if (changed || !PyBytes_CheckExact(input_obj))
Gregory P. Smith60d241f2007-10-16 06:31:30 +00001393 goto done;
Neal Norwitz6968b052007-02-27 19:02:19 +00001394 Py_DECREF(result);
1395 Py_INCREF(input_obj);
Gregory P. Smith60d241f2007-10-16 06:31:30 +00001396 result = input_obj;
1397 goto done;
Neal Norwitz6968b052007-02-27 19:02:19 +00001398 }
1399
1400 for (i = 0; i < 256; i++)
1401 trans_table[i] = Py_CHARMASK(table[i]);
1402
Gregory P. Smith60d241f2007-10-16 06:31:30 +00001403 for (i = 0; i < vdel.len; i++)
1404 trans_table[(int) Py_CHARMASK( ((unsigned char*)vdel.buf)[i] )] = -1;
Neal Norwitz6968b052007-02-27 19:02:19 +00001405
1406 for (i = inlen; --i >= 0; ) {
1407 c = Py_CHARMASK(*input++);
1408 if (trans_table[c] != -1)
1409 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
1410 continue;
1411 changed = 1;
1412 }
1413 if (!changed && PyBytes_CheckExact(input_obj)) {
1414 Py_DECREF(result);
1415 Py_INCREF(input_obj);
Gregory P. Smith60d241f2007-10-16 06:31:30 +00001416 result = input_obj;
1417 goto done;
Neal Norwitz6968b052007-02-27 19:02:19 +00001418 }
1419 /* Fix the size of the resulting string */
1420 if (inlen > 0)
1421 PyBytes_Resize(result, output - output_start);
Gregory P. Smith60d241f2007-10-16 06:31:30 +00001422
1423done:
1424 PyObject_ReleaseBuffer(tableobj, &vtable);
1425 if (delobj != NULL)
1426 PyObject_ReleaseBuffer(delobj, &vdel);
Neal Norwitz6968b052007-02-27 19:02:19 +00001427 return result;
1428}
1429
1430
1431#define FORWARD 1
1432#define REVERSE -1
1433
1434/* find and count characters and substrings */
1435
1436#define findchar(target, target_len, c) \
1437 ((char *)memchr((const void *)(target), c, target_len))
1438
1439/* Don't call if length < 2 */
1440#define Py_STRING_MATCH(target, offset, pattern, length) \
1441 (target[offset] == pattern[0] && \
1442 target[offset+length-1] == pattern[length-1] && \
1443 !memcmp(target+offset+1, pattern+1, length-2) )
1444
1445
1446/* Bytes ops must return a string. */
1447/* If the object is subclass of bytes, create a copy */
1448Py_LOCAL(PyBytesObject *)
1449return_self(PyBytesObject *self)
1450{
1451 if (PyBytes_CheckExact(self)) {
1452 Py_INCREF(self);
1453 return (PyBytesObject *)self;
1454 }
1455 return (PyBytesObject *)PyBytes_FromStringAndSize(
1456 PyBytes_AS_STRING(self),
1457 PyBytes_GET_SIZE(self));
1458}
1459
1460Py_LOCAL_INLINE(Py_ssize_t)
Neal Norwitz61ec0d32007-10-26 06:44:10 +00001461countchar(const char *target, Py_ssize_t target_len, char c, Py_ssize_t maxcount)
Neal Norwitz6968b052007-02-27 19:02:19 +00001462{
1463 Py_ssize_t count=0;
1464 const char *start=target;
1465 const char *end=target+target_len;
1466
1467 while ( (start=findchar(start, end-start, c)) != NULL ) {
1468 count++;
1469 if (count >= maxcount)
1470 break;
1471 start += 1;
1472 }
1473 return count;
1474}
1475
1476Py_LOCAL(Py_ssize_t)
1477findstring(const char *target, Py_ssize_t target_len,
1478 const char *pattern, Py_ssize_t pattern_len,
1479 Py_ssize_t start,
1480 Py_ssize_t end,
1481 int direction)
1482{
1483 if (start < 0) {
1484 start += target_len;
1485 if (start < 0)
1486 start = 0;
1487 }
1488 if (end > target_len) {
1489 end = target_len;
1490 } else if (end < 0) {
1491 end += target_len;
1492 if (end < 0)
1493 end = 0;
1494 }
1495
1496 /* zero-length substrings always match at the first attempt */
1497 if (pattern_len == 0)
1498 return (direction > 0) ? start : end;
1499
1500 end -= pattern_len;
1501
1502 if (direction < 0) {
1503 for (; end >= start; end--)
1504 if (Py_STRING_MATCH(target, end, pattern, pattern_len))
1505 return end;
1506 } else {
1507 for (; start <= end; start++)
1508 if (Py_STRING_MATCH(target, start, pattern, pattern_len))
1509 return start;
1510 }
1511 return -1;
1512}
1513
1514Py_LOCAL_INLINE(Py_ssize_t)
1515countstring(const char *target, Py_ssize_t target_len,
1516 const char *pattern, Py_ssize_t pattern_len,
1517 Py_ssize_t start,
1518 Py_ssize_t end,
1519 int direction, Py_ssize_t maxcount)
1520{
1521 Py_ssize_t count=0;
1522
1523 if (start < 0) {
1524 start += target_len;
1525 if (start < 0)
1526 start = 0;
1527 }
1528 if (end > target_len) {
1529 end = target_len;
1530 } else if (end < 0) {
1531 end += target_len;
1532 if (end < 0)
1533 end = 0;
1534 }
1535
1536 /* zero-length substrings match everywhere */
1537 if (pattern_len == 0 || maxcount == 0) {
1538 if (target_len+1 < maxcount)
1539 return target_len+1;
1540 return maxcount;
1541 }
1542
1543 end -= pattern_len;
1544 if (direction < 0) {
1545 for (; (end >= start); end--)
1546 if (Py_STRING_MATCH(target, end, pattern, pattern_len)) {
1547 count++;
1548 if (--maxcount <= 0) break;
1549 end -= pattern_len-1;
1550 }
1551 } else {
1552 for (; (start <= end); start++)
1553 if (Py_STRING_MATCH(target, start, pattern, pattern_len)) {
1554 count++;
1555 if (--maxcount <= 0)
1556 break;
1557 start += pattern_len-1;
1558 }
1559 }
1560 return count;
1561}
1562
1563
1564/* Algorithms for different cases of string replacement */
1565
1566/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
1567Py_LOCAL(PyBytesObject *)
1568replace_interleave(PyBytesObject *self,
1569 const char *to_s, Py_ssize_t to_len,
1570 Py_ssize_t maxcount)
1571{
1572 char *self_s, *result_s;
1573 Py_ssize_t self_len, result_len;
1574 Py_ssize_t count, i, product;
1575 PyBytesObject *result;
1576
1577 self_len = PyBytes_GET_SIZE(self);
1578
1579 /* 1 at the end plus 1 after every character */
1580 count = self_len+1;
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +00001581 if (maxcount < count)
Neal Norwitz6968b052007-02-27 19:02:19 +00001582 count = maxcount;
1583
1584 /* Check for overflow */
1585 /* result_len = count * to_len + self_len; */
1586 product = count * to_len;
1587 if (product / to_len != count) {
1588 PyErr_SetString(PyExc_OverflowError,
1589 "replace string is too long");
1590 return NULL;
1591 }
1592 result_len = product + self_len;
1593 if (result_len < 0) {
1594 PyErr_SetString(PyExc_OverflowError,
1595 "replace string is too long");
1596 return NULL;
1597 }
1598
1599 if (! (result = (PyBytesObject *)
1600 PyBytes_FromStringAndSize(NULL, result_len)) )
1601 return NULL;
1602
1603 self_s = PyBytes_AS_STRING(self);
1604 result_s = PyBytes_AS_STRING(result);
1605
1606 /* TODO: special case single character, which doesn't need memcpy */
1607
1608 /* Lay the first one down (guaranteed this will occur) */
1609 Py_MEMCPY(result_s, to_s, to_len);
1610 result_s += to_len;
1611 count -= 1;
1612
1613 for (i=0; i<count; i++) {
1614 *result_s++ = *self_s++;
1615 Py_MEMCPY(result_s, to_s, to_len);
1616 result_s += to_len;
1617 }
1618
1619 /* Copy the rest of the original string */
1620 Py_MEMCPY(result_s, self_s, self_len-i);
1621
1622 return result;
1623}
1624
1625/* Special case for deleting a single character */
1626/* len(self)>=1, len(from)==1, to="", maxcount>=1 */
1627Py_LOCAL(PyBytesObject *)
1628replace_delete_single_character(PyBytesObject *self,
1629 char from_c, Py_ssize_t maxcount)
1630{
1631 char *self_s, *result_s;
1632 char *start, *next, *end;
1633 Py_ssize_t self_len, result_len;
1634 Py_ssize_t count;
1635 PyBytesObject *result;
1636
1637 self_len = PyBytes_GET_SIZE(self);
1638 self_s = PyBytes_AS_STRING(self);
1639
1640 count = countchar(self_s, self_len, from_c, maxcount);
1641 if (count == 0) {
1642 return return_self(self);
1643 }
1644
1645 result_len = self_len - count; /* from_len == 1 */
1646 assert(result_len>=0);
1647
1648 if ( (result = (PyBytesObject *)
1649 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
1650 return NULL;
1651 result_s = PyBytes_AS_STRING(result);
1652
1653 start = self_s;
1654 end = self_s + self_len;
1655 while (count-- > 0) {
1656 next = findchar(start, end-start, from_c);
1657 if (next == NULL)
1658 break;
1659 Py_MEMCPY(result_s, start, next-start);
1660 result_s += (next-start);
1661 start = next+1;
1662 }
1663 Py_MEMCPY(result_s, start, end-start);
1664
1665 return result;
1666}
1667
1668/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
1669
1670Py_LOCAL(PyBytesObject *)
1671replace_delete_substring(PyBytesObject *self,
1672 const char *from_s, Py_ssize_t from_len,
1673 Py_ssize_t maxcount)
1674{
1675 char *self_s, *result_s;
1676 char *start, *next, *end;
1677 Py_ssize_t self_len, result_len;
1678 Py_ssize_t count, offset;
1679 PyBytesObject *result;
1680
1681 self_len = PyBytes_GET_SIZE(self);
1682 self_s = PyBytes_AS_STRING(self);
1683
1684 count = countstring(self_s, self_len,
1685 from_s, from_len,
1686 0, self_len, 1,
1687 maxcount);
1688
1689 if (count == 0) {
1690 /* no matches */
1691 return return_self(self);
1692 }
1693
1694 result_len = self_len - (count * from_len);
1695 assert (result_len>=0);
1696
1697 if ( (result = (PyBytesObject *)
1698 PyBytes_FromStringAndSize(NULL, result_len)) == NULL )
1699 return NULL;
1700
1701 result_s = PyBytes_AS_STRING(result);
1702
1703 start = self_s;
1704 end = self_s + self_len;
1705 while (count-- > 0) {
1706 offset = findstring(start, end-start,
1707 from_s, from_len,
1708 0, end-start, FORWARD);
1709 if (offset == -1)
1710 break;
1711 next = start + offset;
1712
1713 Py_MEMCPY(result_s, start, next-start);
1714
1715 result_s += (next-start);
1716 start = next+from_len;
1717 }
1718 Py_MEMCPY(result_s, start, end-start);
1719 return result;
1720}
1721
1722/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
1723Py_LOCAL(PyBytesObject *)
1724replace_single_character_in_place(PyBytesObject *self,
1725 char from_c, char to_c,
1726 Py_ssize_t maxcount)
1727{
1728 char *self_s, *result_s, *start, *end, *next;
1729 Py_ssize_t self_len;
1730 PyBytesObject *result;
1731
1732 /* The result string will be the same size */
1733 self_s = PyBytes_AS_STRING(self);
1734 self_len = PyBytes_GET_SIZE(self);
1735
1736 next = findchar(self_s, self_len, from_c);
1737
1738 if (next == NULL) {
1739 /* No matches; return the original bytes */
1740 return return_self(self);
1741 }
1742
1743 /* Need to make a new bytes */
1744 result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, self_len);
1745 if (result == NULL)
1746 return NULL;
1747 result_s = PyBytes_AS_STRING(result);
1748 Py_MEMCPY(result_s, self_s, self_len);
1749
1750 /* change everything in-place, starting with this one */
1751 start = result_s + (next-self_s);
1752 *start = to_c;
1753 start++;
1754 end = result_s + self_len;
1755
1756 while (--maxcount > 0) {
1757 next = findchar(start, end-start, from_c);
1758 if (next == NULL)
1759 break;
1760 *next = to_c;
1761 start = next+1;
1762 }
1763
1764 return result;
1765}
1766
1767/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
1768Py_LOCAL(PyBytesObject *)
1769replace_substring_in_place(PyBytesObject *self,
1770 const char *from_s, Py_ssize_t from_len,
1771 const char *to_s, Py_ssize_t to_len,
1772 Py_ssize_t maxcount)
1773{
1774 char *result_s, *start, *end;
1775 char *self_s;
1776 Py_ssize_t self_len, offset;
1777 PyBytesObject *result;
1778
1779 /* The result bytes will be the same size */
1780
1781 self_s = PyBytes_AS_STRING(self);
1782 self_len = PyBytes_GET_SIZE(self);
1783
1784 offset = findstring(self_s, self_len,
1785 from_s, from_len,
1786 0, self_len, FORWARD);
1787 if (offset == -1) {
1788 /* No matches; return the original bytes */
1789 return return_self(self);
1790 }
1791
1792 /* Need to make a new bytes */
1793 result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, self_len);
1794 if (result == NULL)
1795 return NULL;
1796 result_s = PyBytes_AS_STRING(result);
1797 Py_MEMCPY(result_s, self_s, self_len);
1798
1799 /* change everything in-place, starting with this one */
1800 start = result_s + offset;
1801 Py_MEMCPY(start, to_s, from_len);
1802 start += from_len;
1803 end = result_s + self_len;
1804
1805 while ( --maxcount > 0) {
1806 offset = findstring(start, end-start,
1807 from_s, from_len,
1808 0, end-start, FORWARD);
1809 if (offset==-1)
1810 break;
1811 Py_MEMCPY(start+offset, to_s, from_len);
1812 start += offset+from_len;
1813 }
1814
1815 return result;
1816}
1817
1818/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
1819Py_LOCAL(PyBytesObject *)
1820replace_single_character(PyBytesObject *self,
1821 char from_c,
1822 const char *to_s, Py_ssize_t to_len,
1823 Py_ssize_t maxcount)
1824{
1825 char *self_s, *result_s;
1826 char *start, *next, *end;
1827 Py_ssize_t self_len, result_len;
1828 Py_ssize_t count, product;
1829 PyBytesObject *result;
1830
1831 self_s = PyBytes_AS_STRING(self);
1832 self_len = PyBytes_GET_SIZE(self);
1833
1834 count = countchar(self_s, self_len, from_c, maxcount);
1835 if (count == 0) {
1836 /* no matches, return unchanged */
1837 return return_self(self);
1838 }
1839
1840 /* use the difference between current and new, hence the "-1" */
1841 /* result_len = self_len + count * (to_len-1) */
1842 product = count * (to_len-1);
1843 if (product / (to_len-1) != count) {
1844 PyErr_SetString(PyExc_OverflowError, "replace bytes is too long");
1845 return NULL;
1846 }
1847 result_len = self_len + product;
1848 if (result_len < 0) {
1849 PyErr_SetString(PyExc_OverflowError, "replace bytes is too long");
1850 return NULL;
1851 }
1852
1853 if ( (result = (PyBytesObject *)
1854 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
1855 return NULL;
1856 result_s = PyBytes_AS_STRING(result);
1857
1858 start = self_s;
1859 end = self_s + self_len;
1860 while (count-- > 0) {
1861 next = findchar(start, end-start, from_c);
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +00001862 if (next == NULL)
Neal Norwitz6968b052007-02-27 19:02:19 +00001863 break;
1864
1865 if (next == start) {
1866 /* replace with the 'to' */
1867 Py_MEMCPY(result_s, to_s, to_len);
1868 result_s += to_len;
1869 start += 1;
1870 } else {
1871 /* copy the unchanged old then the 'to' */
1872 Py_MEMCPY(result_s, start, next-start);
1873 result_s += (next-start);
1874 Py_MEMCPY(result_s, to_s, to_len);
1875 result_s += to_len;
1876 start = next+1;
1877 }
1878 }
1879 /* Copy the remainder of the remaining bytes */
1880 Py_MEMCPY(result_s, start, end-start);
1881
1882 return result;
1883}
1884
1885/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
1886Py_LOCAL(PyBytesObject *)
1887replace_substring(PyBytesObject *self,
1888 const char *from_s, Py_ssize_t from_len,
1889 const char *to_s, Py_ssize_t to_len,
1890 Py_ssize_t maxcount)
1891{
1892 char *self_s, *result_s;
1893 char *start, *next, *end;
1894 Py_ssize_t self_len, result_len;
1895 Py_ssize_t count, offset, product;
1896 PyBytesObject *result;
1897
1898 self_s = PyBytes_AS_STRING(self);
1899 self_len = PyBytes_GET_SIZE(self);
1900
1901 count = countstring(self_s, self_len,
1902 from_s, from_len,
1903 0, self_len, FORWARD, maxcount);
1904 if (count == 0) {
1905 /* no matches, return unchanged */
1906 return return_self(self);
1907 }
1908
1909 /* Check for overflow */
1910 /* result_len = self_len + count * (to_len-from_len) */
1911 product = count * (to_len-from_len);
1912 if (product / (to_len-from_len) != count) {
1913 PyErr_SetString(PyExc_OverflowError, "replace bytes is too long");
1914 return NULL;
1915 }
1916 result_len = self_len + product;
1917 if (result_len < 0) {
1918 PyErr_SetString(PyExc_OverflowError, "replace bytes is too long");
1919 return NULL;
1920 }
1921
1922 if ( (result = (PyBytesObject *)
1923 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
1924 return NULL;
1925 result_s = PyBytes_AS_STRING(result);
1926
1927 start = self_s;
1928 end = self_s + self_len;
1929 while (count-- > 0) {
1930 offset = findstring(start, end-start,
1931 from_s, from_len,
1932 0, end-start, FORWARD);
1933 if (offset == -1)
1934 break;
1935 next = start+offset;
1936 if (next == start) {
1937 /* replace with the 'to' */
1938 Py_MEMCPY(result_s, to_s, to_len);
1939 result_s += to_len;
1940 start += from_len;
1941 } else {
1942 /* copy the unchanged old then the 'to' */
1943 Py_MEMCPY(result_s, start, next-start);
1944 result_s += (next-start);
1945 Py_MEMCPY(result_s, to_s, to_len);
1946 result_s += to_len;
1947 start = next+from_len;
1948 }
1949 }
1950 /* Copy the remainder of the remaining bytes */
1951 Py_MEMCPY(result_s, start, end-start);
1952
1953 return result;
1954}
1955
1956
1957Py_LOCAL(PyBytesObject *)
1958replace(PyBytesObject *self,
1959 const char *from_s, Py_ssize_t from_len,
1960 const char *to_s, Py_ssize_t to_len,
1961 Py_ssize_t maxcount)
1962{
1963 if (maxcount < 0) {
1964 maxcount = PY_SSIZE_T_MAX;
1965 } else if (maxcount == 0 || PyBytes_GET_SIZE(self) == 0) {
1966 /* nothing to do; return the original bytes */
1967 return return_self(self);
1968 }
1969
1970 if (maxcount == 0 ||
1971 (from_len == 0 && to_len == 0)) {
1972 /* nothing to do; return the original bytes */
1973 return return_self(self);
1974 }
1975
1976 /* Handle zero-length special cases */
1977
1978 if (from_len == 0) {
1979 /* insert the 'to' bytes everywhere. */
1980 /* >>> "Python".replace("", ".") */
1981 /* '.P.y.t.h.o.n.' */
1982 return replace_interleave(self, to_s, to_len, maxcount);
1983 }
1984
1985 /* Except for "".replace("", "A") == "A" there is no way beyond this */
1986 /* point for an empty self bytes to generate a non-empty bytes */
1987 /* Special case so the remaining code always gets a non-empty bytes */
1988 if (PyBytes_GET_SIZE(self) == 0) {
1989 return return_self(self);
1990 }
1991
1992 if (to_len == 0) {
1993 /* delete all occurances of 'from' bytes */
1994 if (from_len == 1) {
1995 return replace_delete_single_character(
1996 self, from_s[0], maxcount);
1997 } else {
1998 return replace_delete_substring(self, from_s, from_len, maxcount);
1999 }
2000 }
2001
2002 /* Handle special case where both bytes have the same length */
2003
2004 if (from_len == to_len) {
2005 if (from_len == 1) {
2006 return replace_single_character_in_place(
2007 self,
2008 from_s[0],
2009 to_s[0],
2010 maxcount);
2011 } else {
2012 return replace_substring_in_place(
2013 self, from_s, from_len, to_s, to_len, maxcount);
2014 }
2015 }
2016
2017 /* Otherwise use the more generic algorithms */
2018 if (from_len == 1) {
2019 return replace_single_character(self, from_s[0],
2020 to_s, to_len, maxcount);
2021 } else {
2022 /* len('from')>=2, len('to')>=1 */
2023 return replace_substring(self, from_s, from_len, to_s, to_len, maxcount);
2024 }
2025}
2026
Gregory P. Smith60d241f2007-10-16 06:31:30 +00002027
Neal Norwitz6968b052007-02-27 19:02:19 +00002028PyDoc_STRVAR(replace__doc__,
2029"B.replace (old, new[, count]) -> bytes\n\
2030\n\
2031Return a copy of bytes B with all occurrences of subsection\n\
2032old replaced by new. If the optional argument count is\n\
2033given, only the first count occurrences are replaced.");
2034
2035static PyObject *
2036bytes_replace(PyBytesObject *self, PyObject *args)
2037{
2038 Py_ssize_t count = -1;
Travis E. Oliphantb99f7622007-08-18 11:21:56 +00002039 PyObject *from, *to, *res;
Travis E. Oliphant8ae62b62007-09-23 02:00:13 +00002040 Py_buffer vfrom, vto;
Neal Norwitz6968b052007-02-27 19:02:19 +00002041
2042 if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
2043 return NULL;
2044
Guido van Rossuma74184e2007-08-29 04:05:57 +00002045 if (_getbuffer(from, &vfrom) < 0)
2046 return NULL;
2047 if (_getbuffer(to, &vto) < 0) {
2048 PyObject_ReleaseBuffer(from, &vfrom);
2049 return NULL;
Travis E. Oliphantb99f7622007-08-18 11:21:56 +00002050 }
Neal Norwitz6968b052007-02-27 19:02:19 +00002051
Travis E. Oliphantb99f7622007-08-18 11:21:56 +00002052 res = (PyObject *)replace((PyBytesObject *) self,
Guido van Rossuma74184e2007-08-29 04:05:57 +00002053 vfrom.buf, vfrom.len,
2054 vto.buf, vto.len, count);
Travis E. Oliphantb99f7622007-08-18 11:21:56 +00002055
Guido van Rossuma74184e2007-08-29 04:05:57 +00002056 PyObject_ReleaseBuffer(from, &vfrom);
2057 PyObject_ReleaseBuffer(to, &vto);
Travis E. Oliphantb99f7622007-08-18 11:21:56 +00002058 return res;
Neal Norwitz6968b052007-02-27 19:02:19 +00002059}
2060
2061
2062/* Overallocate the initial list to reduce the number of reallocs for small
2063 split sizes. Eg, "A A A A A A A A A A".split() (10 elements) has three
2064 resizes, to sizes 4, 8, then 16. Most observed string splits are for human
2065 text (roughly 11 words per line) and field delimited data (usually 1-10
2066 fields). For large strings the split algorithms are bandwidth limited
2067 so increasing the preallocation likely will not improve things.*/
2068
2069#define MAX_PREALLOC 12
2070
2071/* 5 splits gives 6 elements */
2072#define PREALLOC_SIZE(maxsplit) \
2073 (maxsplit >= MAX_PREALLOC ? MAX_PREALLOC : maxsplit+1)
2074
2075#define SPLIT_APPEND(data, left, right) \
2076 str = PyBytes_FromStringAndSize((data) + (left), \
2077 (right) - (left)); \
2078 if (str == NULL) \
2079 goto onError; \
2080 if (PyList_Append(list, str)) { \
2081 Py_DECREF(str); \
2082 goto onError; \
2083 } \
2084 else \
2085 Py_DECREF(str);
2086
2087#define SPLIT_ADD(data, left, right) { \
2088 str = PyBytes_FromStringAndSize((data) + (left), \
2089 (right) - (left)); \
2090 if (str == NULL) \
2091 goto onError; \
2092 if (count < MAX_PREALLOC) { \
2093 PyList_SET_ITEM(list, count, str); \
2094 } else { \
2095 if (PyList_Append(list, str)) { \
2096 Py_DECREF(str); \
2097 goto onError; \
2098 } \
2099 else \
2100 Py_DECREF(str); \
2101 } \
2102 count++; }
2103
2104/* Always force the list to the expected size. */
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002105#define FIX_PREALLOC_SIZE(list) Py_Size(list) = count
Neal Norwitz6968b052007-02-27 19:02:19 +00002106
2107
2108Py_LOCAL_INLINE(PyObject *)
2109split_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount)
2110{
Guido van Rossum8f950672007-09-10 16:53:45 +00002111 register Py_ssize_t i, j, count = 0;
Neal Norwitz6968b052007-02-27 19:02:19 +00002112 PyObject *str;
2113 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
2114
2115 if (list == NULL)
2116 return NULL;
2117
2118 i = j = 0;
2119 while ((j < len) && (maxcount-- > 0)) {
Guido van Rossum8f950672007-09-10 16:53:45 +00002120 for(; j < len; j++) {
Neal Norwitz6968b052007-02-27 19:02:19 +00002121 /* I found that using memchr makes no difference */
2122 if (s[j] == ch) {
2123 SPLIT_ADD(s, i, j);
2124 i = j = j + 1;
2125 break;
2126 }
2127 }
2128 }
2129 if (i <= len) {
2130 SPLIT_ADD(s, i, len);
2131 }
2132 FIX_PREALLOC_SIZE(list);
2133 return list;
2134
2135 onError:
2136 Py_DECREF(list);
2137 return NULL;
2138}
2139
Guido van Rossum8f950672007-09-10 16:53:45 +00002140
2141Py_LOCAL_INLINE(PyObject *)
2142split_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxcount)
2143{
2144 register Py_ssize_t i, j, count = 0;
2145 PyObject *str;
2146 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
2147
2148 if (list == NULL)
2149 return NULL;
2150
2151 for (i = j = 0; i < len; ) {
2152 /* find a token */
2153 while (i < len && ISSPACE(s[i]))
2154 i++;
2155 j = i;
2156 while (i < len && !ISSPACE(s[i]))
2157 i++;
2158 if (j < i) {
2159 if (maxcount-- <= 0)
2160 break;
2161 SPLIT_ADD(s, j, i);
2162 while (i < len && ISSPACE(s[i]))
2163 i++;
2164 j = i;
2165 }
2166 }
2167 if (j < len) {
2168 SPLIT_ADD(s, j, len);
2169 }
2170 FIX_PREALLOC_SIZE(list);
2171 return list;
2172
2173 onError:
2174 Py_DECREF(list);
2175 return NULL;
2176}
2177
Neal Norwitz6968b052007-02-27 19:02:19 +00002178PyDoc_STRVAR(split__doc__,
Guido van Rossum8f950672007-09-10 16:53:45 +00002179"B.split([sep [, maxsplit]]) -> list of bytes\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00002180\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00002181Return a list of the bytes in the string B, using sep as the delimiter.\n\
2182If sep is not given, B is split on ASCII whitespace charcters\n\
2183(space, tab, return, newline, formfeed, vertical tab).\n\
2184If maxsplit is given, at most maxsplit splits are done.");
Neal Norwitz6968b052007-02-27 19:02:19 +00002185
2186static PyObject *
2187bytes_split(PyBytesObject *self, PyObject *args)
2188{
2189 Py_ssize_t len = PyBytes_GET_SIZE(self), n, i, j;
Guido van Rossum8f950672007-09-10 16:53:45 +00002190 Py_ssize_t maxsplit = -1, count = 0;
Neal Norwitz6968b052007-02-27 19:02:19 +00002191 const char *s = PyBytes_AS_STRING(self), *sub;
Guido van Rossum8f950672007-09-10 16:53:45 +00002192 PyObject *list, *str, *subobj = Py_None;
Travis E. Oliphant8ae62b62007-09-23 02:00:13 +00002193 Py_buffer vsub;
Neal Norwitz6968b052007-02-27 19:02:19 +00002194#ifdef USE_FAST
2195 Py_ssize_t pos;
2196#endif
2197
Guido van Rossum8f950672007-09-10 16:53:45 +00002198 if (!PyArg_ParseTuple(args, "|On:split", &subobj, &maxsplit))
Neal Norwitz6968b052007-02-27 19:02:19 +00002199 return NULL;
2200 if (maxsplit < 0)
2201 maxsplit = PY_SSIZE_T_MAX;
Guido van Rossum8f950672007-09-10 16:53:45 +00002202
2203 if (subobj == Py_None)
2204 return split_whitespace(s, len, maxsplit);
2205
2206 if (_getbuffer(subobj, &vsub) < 0)
Neal Norwitz6968b052007-02-27 19:02:19 +00002207 return NULL;
Guido van Rossum8f950672007-09-10 16:53:45 +00002208 sub = vsub.buf;
2209 n = vsub.len;
Neal Norwitz6968b052007-02-27 19:02:19 +00002210
2211 if (n == 0) {
2212 PyErr_SetString(PyExc_ValueError, "empty separator");
Guido van Rossum8f950672007-09-10 16:53:45 +00002213 PyObject_ReleaseBuffer(subobj, &vsub);
Neal Norwitz6968b052007-02-27 19:02:19 +00002214 return NULL;
2215 }
Guido van Rossum8f950672007-09-10 16:53:45 +00002216 if (n == 1)
Neal Norwitz6968b052007-02-27 19:02:19 +00002217 return split_char(s, len, sub[0], maxsplit);
2218
2219 list = PyList_New(PREALLOC_SIZE(maxsplit));
Guido van Rossum8f950672007-09-10 16:53:45 +00002220 if (list == NULL) {
2221 PyObject_ReleaseBuffer(subobj, &vsub);
Neal Norwitz6968b052007-02-27 19:02:19 +00002222 return NULL;
Guido van Rossum8f950672007-09-10 16:53:45 +00002223 }
Neal Norwitz6968b052007-02-27 19:02:19 +00002224
2225#ifdef USE_FAST
2226 i = j = 0;
2227 while (maxsplit-- > 0) {
2228 pos = fastsearch(s+i, len-i, sub, n, FAST_SEARCH);
2229 if (pos < 0)
2230 break;
2231 j = i+pos;
2232 SPLIT_ADD(s, i, j);
2233 i = j + n;
2234 }
2235#else
2236 i = j = 0;
2237 while ((j+n <= len) && (maxsplit-- > 0)) {
2238 for (; j+n <= len; j++) {
2239 if (Py_STRING_MATCH(s, j, sub, n)) {
2240 SPLIT_ADD(s, i, j);
2241 i = j = j + n;
2242 break;
2243 }
2244 }
2245 }
2246#endif
2247 SPLIT_ADD(s, i, len);
2248 FIX_PREALLOC_SIZE(list);
Guido van Rossum8f950672007-09-10 16:53:45 +00002249 PyObject_ReleaseBuffer(subobj, &vsub);
Neal Norwitz6968b052007-02-27 19:02:19 +00002250 return list;
2251
2252 onError:
2253 Py_DECREF(list);
Guido van Rossum8f950672007-09-10 16:53:45 +00002254 PyObject_ReleaseBuffer(subobj, &vsub);
Neal Norwitz6968b052007-02-27 19:02:19 +00002255 return NULL;
2256}
2257
2258PyDoc_STRVAR(partition__doc__,
2259"B.partition(sep) -> (head, sep, tail)\n\
2260\n\
2261Searches for the separator sep in B, and returns the part before it,\n\
2262the separator itself, and the part after it. If the separator is not\n\
2263found, returns B and two empty bytes.");
2264
2265static PyObject *
2266bytes_partition(PyBytesObject *self, PyObject *sep_obj)
2267{
2268 PyObject *bytesep, *result;
2269
2270 bytesep = PyBytes_FromObject(sep_obj);
2271 if (! bytesep)
2272 return NULL;
2273
2274 result = stringlib_partition(
2275 (PyObject*) self,
2276 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +00002277 bytesep,
Neal Norwitz6968b052007-02-27 19:02:19 +00002278 PyBytes_AS_STRING(bytesep), PyBytes_GET_SIZE(bytesep)
2279 );
2280
2281 Py_DECREF(bytesep);
2282 return result;
2283}
2284
2285PyDoc_STRVAR(rpartition__doc__,
2286"B.rpartition(sep) -> (tail, sep, head)\n\
2287\n\
2288Searches for the separator sep in B, starting at the end of B, and returns\n\
2289the part before it, the separator itself, and the part after it. If the\n\
2290separator is not found, returns two empty bytes and B.");
2291
2292static PyObject *
2293bytes_rpartition(PyBytesObject *self, PyObject *sep_obj)
2294{
2295 PyObject *bytesep, *result;
2296
2297 bytesep = PyBytes_FromObject(sep_obj);
2298 if (! bytesep)
2299 return NULL;
2300
2301 result = stringlib_rpartition(
2302 (PyObject*) self,
2303 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +00002304 bytesep,
Neal Norwitz6968b052007-02-27 19:02:19 +00002305 PyBytes_AS_STRING(bytesep), PyBytes_GET_SIZE(bytesep)
2306 );
2307
2308 Py_DECREF(bytesep);
2309 return result;
2310}
2311
2312Py_LOCAL_INLINE(PyObject *)
2313rsplit_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount)
2314{
2315 register Py_ssize_t i, j, count=0;
2316 PyObject *str;
2317 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
2318
2319 if (list == NULL)
2320 return NULL;
2321
2322 i = j = len - 1;
2323 while ((i >= 0) && (maxcount-- > 0)) {
2324 for (; i >= 0; i--) {
2325 if (s[i] == ch) {
2326 SPLIT_ADD(s, i + 1, j + 1);
2327 j = i = i - 1;
2328 break;
2329 }
2330 }
2331 }
2332 if (j >= -1) {
2333 SPLIT_ADD(s, 0, j + 1);
2334 }
2335 FIX_PREALLOC_SIZE(list);
2336 if (PyList_Reverse(list) < 0)
2337 goto onError;
2338
2339 return list;
2340
2341 onError:
2342 Py_DECREF(list);
2343 return NULL;
2344}
2345
Guido van Rossum8f950672007-09-10 16:53:45 +00002346Py_LOCAL_INLINE(PyObject *)
2347rsplit_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxcount)
2348{
2349 register Py_ssize_t i, j, count = 0;
2350 PyObject *str;
2351 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
2352
2353 if (list == NULL)
2354 return NULL;
2355
2356 for (i = j = len - 1; i >= 0; ) {
2357 /* find a token */
2358 while (i >= 0 && Py_UNICODE_ISSPACE(s[i]))
2359 i--;
2360 j = i;
2361 while (i >= 0 && !Py_UNICODE_ISSPACE(s[i]))
2362 i--;
2363 if (j > i) {
2364 if (maxcount-- <= 0)
2365 break;
2366 SPLIT_ADD(s, i + 1, j + 1);
2367 while (i >= 0 && Py_UNICODE_ISSPACE(s[i]))
2368 i--;
2369 j = i;
2370 }
2371 }
2372 if (j >= 0) {
2373 SPLIT_ADD(s, 0, j + 1);
2374 }
2375 FIX_PREALLOC_SIZE(list);
2376 if (PyList_Reverse(list) < 0)
2377 goto onError;
2378
2379 return list;
2380
2381 onError:
2382 Py_DECREF(list);
2383 return NULL;
2384}
2385
Neal Norwitz6968b052007-02-27 19:02:19 +00002386PyDoc_STRVAR(rsplit__doc__,
2387"B.rsplit(sep [,maxsplit]) -> list of bytes\n\
2388\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00002389Return a list of the sections in the byte B, using sep as the delimiter,\n\
2390starting at the end of the bytes and working to the front.\n\
2391If sep is not given, B is split on ASCII whitespace characters\n\
2392(space, tab, return, newline, formfeed, vertical tab).\n\
2393If maxsplit is given, at most maxsplit splits are done.");
Neal Norwitz6968b052007-02-27 19:02:19 +00002394
2395static PyObject *
2396bytes_rsplit(PyBytesObject *self, PyObject *args)
2397{
2398 Py_ssize_t len = PyBytes_GET_SIZE(self), n, i, j;
Guido van Rossum8f950672007-09-10 16:53:45 +00002399 Py_ssize_t maxsplit = -1, count = 0;
Neal Norwitz6968b052007-02-27 19:02:19 +00002400 const char *s = PyBytes_AS_STRING(self), *sub;
Guido van Rossum8f950672007-09-10 16:53:45 +00002401 PyObject *list, *str, *subobj = Py_None;
Travis E. Oliphant8ae62b62007-09-23 02:00:13 +00002402 Py_buffer vsub;
Neal Norwitz6968b052007-02-27 19:02:19 +00002403
Guido van Rossum8f950672007-09-10 16:53:45 +00002404 if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit))
Neal Norwitz6968b052007-02-27 19:02:19 +00002405 return NULL;
2406 if (maxsplit < 0)
2407 maxsplit = PY_SSIZE_T_MAX;
Guido van Rossum8f950672007-09-10 16:53:45 +00002408
2409 if (subobj == Py_None)
2410 return rsplit_whitespace(s, len, maxsplit);
2411
2412 if (_getbuffer(subobj, &vsub) < 0)
Neal Norwitz6968b052007-02-27 19:02:19 +00002413 return NULL;
Guido van Rossum8f950672007-09-10 16:53:45 +00002414 sub = vsub.buf;
2415 n = vsub.len;
Neal Norwitz6968b052007-02-27 19:02:19 +00002416
2417 if (n == 0) {
2418 PyErr_SetString(PyExc_ValueError, "empty separator");
Guido van Rossum8f950672007-09-10 16:53:45 +00002419 PyObject_ReleaseBuffer(subobj, &vsub);
Neal Norwitz6968b052007-02-27 19:02:19 +00002420 return NULL;
2421 }
2422 else if (n == 1)
2423 return rsplit_char(s, len, sub[0], maxsplit);
2424
2425 list = PyList_New(PREALLOC_SIZE(maxsplit));
Guido van Rossum8f950672007-09-10 16:53:45 +00002426 if (list == NULL) {
2427 PyObject_ReleaseBuffer(subobj, &vsub);
Neal Norwitz6968b052007-02-27 19:02:19 +00002428 return NULL;
Guido van Rossum8f950672007-09-10 16:53:45 +00002429 }
Neal Norwitz6968b052007-02-27 19:02:19 +00002430
2431 j = len;
2432 i = j - n;
2433
2434 while ( (i >= 0) && (maxsplit-- > 0) ) {
2435 for (; i>=0; i--) {
2436 if (Py_STRING_MATCH(s, i, sub, n)) {
2437 SPLIT_ADD(s, i + n, j);
2438 j = i;
2439 i -= n;
2440 break;
2441 }
2442 }
2443 }
2444 SPLIT_ADD(s, 0, j);
2445 FIX_PREALLOC_SIZE(list);
2446 if (PyList_Reverse(list) < 0)
2447 goto onError;
Guido van Rossum8f950672007-09-10 16:53:45 +00002448 PyObject_ReleaseBuffer(subobj, &vsub);
Neal Norwitz6968b052007-02-27 19:02:19 +00002449 return list;
2450
2451onError:
2452 Py_DECREF(list);
Guido van Rossum8f950672007-09-10 16:53:45 +00002453 PyObject_ReleaseBuffer(subobj, &vsub);
Neal Norwitz6968b052007-02-27 19:02:19 +00002454 return NULL;
2455}
2456
2457PyDoc_STRVAR(extend__doc__,
2458"B.extend(iterable int) -> None\n\
2459\n\
2460Append all the elements from the iterator or sequence to the\n\
2461end of the bytes.");
2462static PyObject *
2463bytes_extend(PyBytesObject *self, PyObject *arg)
2464{
Gregory P. Smith60d241f2007-10-16 06:31:30 +00002465 /* XXX(gps): The docstring says any iterable int will do but the
2466 * bytes_setslice code only accepts something supporting PEP 3118.
2467 * A list or tuple of 0 <= int <= 255 is supposed to work. */
2468 /* bug being tracked on: http://bugs.python.org/issue1283 */
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002469 if (bytes_setslice(self, Py_Size(self), Py_Size(self), arg) == -1)
Neal Norwitz6968b052007-02-27 19:02:19 +00002470 return NULL;
2471 Py_RETURN_NONE;
2472}
2473
2474
2475PyDoc_STRVAR(reverse__doc__,
2476"B.reverse() -> None\n\
2477\n\
2478Reverse the order of the values in bytes in place.");
2479static PyObject *
2480bytes_reverse(PyBytesObject *self, PyObject *unused)
2481{
2482 char swap, *head, *tail;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002483 Py_ssize_t i, j, n = Py_Size(self);
Neal Norwitz6968b052007-02-27 19:02:19 +00002484
2485 j = n / 2;
2486 head = self->ob_bytes;
2487 tail = head + n - 1;
2488 for (i = 0; i < j; i++) {
2489 swap = *head;
2490 *head++ = *tail;
2491 *tail-- = swap;
2492 }
2493
2494 Py_RETURN_NONE;
2495}
2496
2497PyDoc_STRVAR(insert__doc__,
2498"B.insert(index, int) -> None\n\
2499\n\
2500Insert a single item into the bytes before the given index.");
2501static PyObject *
2502bytes_insert(PyBytesObject *self, PyObject *args)
2503{
2504 int value;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002505 Py_ssize_t where, n = Py_Size(self);
Neal Norwitz6968b052007-02-27 19:02:19 +00002506
2507 if (!PyArg_ParseTuple(args, "ni:insert", &where, &value))
2508 return NULL;
2509
2510 if (n == PY_SSIZE_T_MAX) {
2511 PyErr_SetString(PyExc_OverflowError,
2512 "cannot add more objects to bytes");
2513 return NULL;
2514 }
2515 if (value < 0 || value >= 256) {
2516 PyErr_SetString(PyExc_ValueError,
2517 "byte must be in range(0, 256)");
2518 return NULL;
2519 }
2520 if (PyBytes_Resize((PyObject *)self, n + 1) < 0)
2521 return NULL;
2522
2523 if (where < 0) {
2524 where += n;
2525 if (where < 0)
2526 where = 0;
2527 }
2528 if (where > n)
2529 where = n;
Guido van Rossum4fc8ae42007-02-27 20:57:45 +00002530 memmove(self->ob_bytes + where + 1, self->ob_bytes + where, n - where);
Neal Norwitz6968b052007-02-27 19:02:19 +00002531 self->ob_bytes[where] = value;
2532
2533 Py_RETURN_NONE;
2534}
2535
2536PyDoc_STRVAR(append__doc__,
2537"B.append(int) -> None\n\
2538\n\
2539Append a single item to the end of the bytes.");
2540static PyObject *
2541bytes_append(PyBytesObject *self, PyObject *arg)
2542{
2543 int value;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002544 Py_ssize_t n = Py_Size(self);
Neal Norwitz6968b052007-02-27 19:02:19 +00002545
2546 if (! _getbytevalue(arg, &value))
2547 return NULL;
2548 if (n == PY_SSIZE_T_MAX) {
2549 PyErr_SetString(PyExc_OverflowError,
2550 "cannot add more objects to bytes");
2551 return NULL;
2552 }
2553 if (PyBytes_Resize((PyObject *)self, n + 1) < 0)
2554 return NULL;
2555
2556 self->ob_bytes[n] = value;
2557
2558 Py_RETURN_NONE;
2559}
2560
2561PyDoc_STRVAR(pop__doc__,
2562"B.pop([index]) -> int\n\
2563\n\
2564Remove and return a single item from the bytes. If no index\n\
2565argument is give, will pop the last value.");
2566static PyObject *
2567bytes_pop(PyBytesObject *self, PyObject *args)
2568{
2569 int value;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002570 Py_ssize_t where = -1, n = Py_Size(self);
Neal Norwitz6968b052007-02-27 19:02:19 +00002571
2572 if (!PyArg_ParseTuple(args, "|n:pop", &where))
2573 return NULL;
2574
2575 if (n == 0) {
2576 PyErr_SetString(PyExc_OverflowError,
2577 "cannot pop an empty bytes");
2578 return NULL;
2579 }
2580 if (where < 0)
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002581 where += Py_Size(self);
2582 if (where < 0 || where >= Py_Size(self)) {
Neal Norwitz6968b052007-02-27 19:02:19 +00002583 PyErr_SetString(PyExc_IndexError, "pop index out of range");
2584 return NULL;
2585 }
2586
2587 value = self->ob_bytes[where];
2588 memmove(self->ob_bytes + where, self->ob_bytes + where + 1, n - where);
2589 if (PyBytes_Resize((PyObject *)self, n - 1) < 0)
2590 return NULL;
2591
2592 return PyInt_FromLong(value);
2593}
2594
2595PyDoc_STRVAR(remove__doc__,
2596"B.remove(int) -> None\n\
2597\n\
2598Remove the first occurance of a value in bytes");
2599static PyObject *
2600bytes_remove(PyBytesObject *self, PyObject *arg)
2601{
2602 int value;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002603 Py_ssize_t where, n = Py_Size(self);
Neal Norwitz6968b052007-02-27 19:02:19 +00002604
2605 if (! _getbytevalue(arg, &value))
2606 return NULL;
2607
2608 for (where = 0; where < n; where++) {
2609 if (self->ob_bytes[where] == value)
2610 break;
2611 }
2612 if (where == n) {
2613 PyErr_SetString(PyExc_ValueError, "value not found in bytes");
2614 return NULL;
2615 }
2616
2617 memmove(self->ob_bytes + where, self->ob_bytes + where + 1, n - where);
2618 if (PyBytes_Resize((PyObject *)self, n - 1) < 0)
2619 return NULL;
2620
2621 Py_RETURN_NONE;
2622}
2623
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002624/* XXX These two helpers could be optimized if argsize == 1 */
2625
Neal Norwitz2bad9702007-08-27 06:19:22 +00002626static Py_ssize_t
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002627lstrip_helper(unsigned char *myptr, Py_ssize_t mysize,
2628 void *argptr, Py_ssize_t argsize)
2629{
2630 Py_ssize_t i = 0;
2631 while (i < mysize && memchr(argptr, myptr[i], argsize))
2632 i++;
2633 return i;
2634}
2635
Neal Norwitz2bad9702007-08-27 06:19:22 +00002636static Py_ssize_t
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002637rstrip_helper(unsigned char *myptr, Py_ssize_t mysize,
2638 void *argptr, Py_ssize_t argsize)
2639{
2640 Py_ssize_t i = mysize - 1;
2641 while (i >= 0 && memchr(argptr, myptr[i], argsize))
2642 i--;
2643 return i + 1;
2644}
2645
2646PyDoc_STRVAR(strip__doc__,
Guido van Rossum8f950672007-09-10 16:53:45 +00002647"B.strip([bytes]) -> bytes\n\
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002648\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00002649Strip leading and trailing bytes contained in the argument.\n\
2650If the argument is omitted, strip ASCII whitespace.");
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002651static PyObject *
Guido van Rossum8f950672007-09-10 16:53:45 +00002652bytes_strip(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002653{
2654 Py_ssize_t left, right, mysize, argsize;
2655 void *myptr, *argptr;
Guido van Rossum8f950672007-09-10 16:53:45 +00002656 PyObject *arg = Py_None;
Travis E. Oliphant8ae62b62007-09-23 02:00:13 +00002657 Py_buffer varg;
Guido van Rossum8f950672007-09-10 16:53:45 +00002658 if (!PyArg_ParseTuple(args, "|O:strip", &arg))
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002659 return NULL;
Guido van Rossum8f950672007-09-10 16:53:45 +00002660 if (arg == Py_None) {
2661 argptr = "\t\n\r\f\v ";
2662 argsize = 6;
2663 }
2664 else {
2665 if (_getbuffer(arg, &varg) < 0)
2666 return NULL;
2667 argptr = varg.buf;
2668 argsize = varg.len;
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002669 }
2670 myptr = self->ob_bytes;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002671 mysize = Py_Size(self);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002672 left = lstrip_helper(myptr, mysize, argptr, argsize);
Guido van Rossumeb29e9a2007-08-08 21:55:33 +00002673 if (left == mysize)
2674 right = left;
2675 else
2676 right = rstrip_helper(myptr, mysize, argptr, argsize);
Guido van Rossum8f950672007-09-10 16:53:45 +00002677 if (arg != Py_None)
2678 PyObject_ReleaseBuffer(arg, &varg);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002679 return PyBytes_FromStringAndSize(self->ob_bytes + left, right - left);
2680}
2681
2682PyDoc_STRVAR(lstrip__doc__,
Guido van Rossum8f950672007-09-10 16:53:45 +00002683"B.lstrip([bytes]) -> bytes\n\
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002684\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00002685Strip leading bytes contained in the argument.\n\
2686If the argument is omitted, strip leading ASCII whitespace.");
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002687static PyObject *
Guido van Rossum8f950672007-09-10 16:53:45 +00002688bytes_lstrip(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002689{
2690 Py_ssize_t left, right, mysize, argsize;
2691 void *myptr, *argptr;
Guido van Rossum8f950672007-09-10 16:53:45 +00002692 PyObject *arg = Py_None;
Travis E. Oliphant8ae62b62007-09-23 02:00:13 +00002693 Py_buffer varg;
Guido van Rossum8f950672007-09-10 16:53:45 +00002694 if (!PyArg_ParseTuple(args, "|O:lstrip", &arg))
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002695 return NULL;
Guido van Rossum8f950672007-09-10 16:53:45 +00002696 if (arg == Py_None) {
2697 argptr = "\t\n\r\f\v ";
2698 argsize = 6;
2699 }
2700 else {
2701 if (_getbuffer(arg, &varg) < 0)
2702 return NULL;
2703 argptr = varg.buf;
2704 argsize = varg.len;
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002705 }
2706 myptr = self->ob_bytes;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002707 mysize = Py_Size(self);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002708 left = lstrip_helper(myptr, mysize, argptr, argsize);
2709 right = mysize;
Guido van Rossum8f950672007-09-10 16:53:45 +00002710 if (arg != Py_None)
2711 PyObject_ReleaseBuffer(arg, &varg);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002712 return PyBytes_FromStringAndSize(self->ob_bytes + left, right - left);
2713}
2714
2715PyDoc_STRVAR(rstrip__doc__,
Guido van Rossum8f950672007-09-10 16:53:45 +00002716"B.rstrip([bytes]) -> bytes\n\
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002717\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00002718Strip trailing bytes contained in the argument.\n\
2719If the argument is omitted, strip trailing ASCII whitespace.");
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002720static PyObject *
Guido van Rossum8f950672007-09-10 16:53:45 +00002721bytes_rstrip(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002722{
2723 Py_ssize_t left, right, mysize, argsize;
2724 void *myptr, *argptr;
Guido van Rossum8f950672007-09-10 16:53:45 +00002725 PyObject *arg = Py_None;
Travis E. Oliphant8ae62b62007-09-23 02:00:13 +00002726 Py_buffer varg;
Guido van Rossum8f950672007-09-10 16:53:45 +00002727 if (!PyArg_ParseTuple(args, "|O:rstrip", &arg))
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002728 return NULL;
Guido van Rossum8f950672007-09-10 16:53:45 +00002729 if (arg == Py_None) {
2730 argptr = "\t\n\r\f\v ";
2731 argsize = 6;
2732 }
2733 else {
2734 if (_getbuffer(arg, &varg) < 0)
2735 return NULL;
2736 argptr = varg.buf;
2737 argsize = varg.len;
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002738 }
2739 myptr = self->ob_bytes;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002740 mysize = Py_Size(self);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002741 left = 0;
2742 right = rstrip_helper(myptr, mysize, argptr, argsize);
Guido van Rossum8f950672007-09-10 16:53:45 +00002743 if (arg != Py_None)
2744 PyObject_ReleaseBuffer(arg, &varg);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002745 return PyBytes_FromStringAndSize(self->ob_bytes + left, right - left);
2746}
Neal Norwitz6968b052007-02-27 19:02:19 +00002747
Guido van Rossumd624f182006-04-24 13:47:05 +00002748PyDoc_STRVAR(decode_doc,
2749"B.decode([encoding[,errors]]) -> unicode obect.\n\
2750\n\
2751Decodes B using the codec registered for encoding. encoding defaults\n\
2752to the default encoding. errors may be given to set a different error\n\
2753handling scheme. Default is 'strict' meaning that encoding errors raise\n\
2754a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
2755as well as any other name registerd with codecs.register_error that is\n\
2756able to handle UnicodeDecodeErrors.");
2757
2758static PyObject *
2759bytes_decode(PyObject *self, PyObject *args)
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +00002760{
Guido van Rossumd624f182006-04-24 13:47:05 +00002761 const char *encoding = NULL;
2762 const char *errors = NULL;
2763
2764 if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
2765 return NULL;
2766 if (encoding == NULL)
2767 encoding = PyUnicode_GetDefaultEncoding();
2768 return PyCodec_Decode(self, encoding, errors);
2769}
2770
Guido van Rossuma0867f72006-05-05 04:34:18 +00002771PyDoc_STRVAR(alloc_doc,
2772"B.__alloc__() -> int\n\
2773\n\
2774Returns the number of bytes actually allocated.");
2775
2776static PyObject *
2777bytes_alloc(PyBytesObject *self)
2778{
2779 return PyInt_FromSsize_t(self->ob_alloc);
2780}
2781
Guido van Rossum20188312006-05-05 15:15:40 +00002782PyDoc_STRVAR(join_doc,
Guido van Rossumcd6ae682007-05-09 19:52:16 +00002783"B.join(iterable_of_bytes) -> bytes\n\
Guido van Rossum20188312006-05-05 15:15:40 +00002784\n\
Guido van Rossumcd6ae682007-05-09 19:52:16 +00002785Concatenates any number of bytes objects, with B in between each pair.\n\
2786Example: b'.'.join([b'ab', b'pq', b'rs']) -> b'ab.pq.rs'.");
Guido van Rossum20188312006-05-05 15:15:40 +00002787
2788static PyObject *
Guido van Rossumcd6ae682007-05-09 19:52:16 +00002789bytes_join(PyBytesObject *self, PyObject *it)
Guido van Rossum20188312006-05-05 15:15:40 +00002790{
2791 PyObject *seq;
Martin v. Löwis5d7428b2007-07-21 18:47:48 +00002792 Py_ssize_t mysize = Py_Size(self);
Guido van Rossum20188312006-05-05 15:15:40 +00002793 Py_ssize_t i;
2794 Py_ssize_t n;
2795 PyObject **items;
2796 Py_ssize_t totalsize = 0;
2797 PyObject *result;
2798 char *dest;
2799
2800 seq = PySequence_Fast(it, "can only join an iterable");
2801 if (seq == NULL)
Georg Brandlb3f568f2007-02-27 08:49:18 +00002802 return NULL;
Guido van Rossum20188312006-05-05 15:15:40 +00002803 n = PySequence_Fast_GET_SIZE(seq);
2804 items = PySequence_Fast_ITEMS(seq);
2805
2806 /* Compute the total size, and check that they are all bytes */
2807 for (i = 0; i < n; i++) {
Georg Brandlb3f568f2007-02-27 08:49:18 +00002808 PyObject *obj = items[i];
2809 if (!PyBytes_Check(obj)) {
2810 PyErr_Format(PyExc_TypeError,
2811 "can only join an iterable of bytes "
2812 "(item %ld has type '%.100s')",
Guido van Rossum3cf5b1e2006-07-27 21:53:35 +00002813 /* XXX %ld isn't right on Win64 */
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002814 (long)i, Py_Type(obj)->tp_name);
Georg Brandlb3f568f2007-02-27 08:49:18 +00002815 goto error;
2816 }
Guido van Rossumcd6ae682007-05-09 19:52:16 +00002817 if (i > 0)
2818 totalsize += mysize;
Georg Brandlb3f568f2007-02-27 08:49:18 +00002819 totalsize += PyBytes_GET_SIZE(obj);
2820 if (totalsize < 0) {
2821 PyErr_NoMemory();
2822 goto error;
2823 }
Guido van Rossum20188312006-05-05 15:15:40 +00002824 }
2825
2826 /* Allocate the result, and copy the bytes */
2827 result = PyBytes_FromStringAndSize(NULL, totalsize);
2828 if (result == NULL)
Georg Brandlb3f568f2007-02-27 08:49:18 +00002829 goto error;
Guido van Rossum20188312006-05-05 15:15:40 +00002830 dest = PyBytes_AS_STRING(result);
2831 for (i = 0; i < n; i++) {
Georg Brandlb3f568f2007-02-27 08:49:18 +00002832 PyObject *obj = items[i];
2833 Py_ssize_t size = PyBytes_GET_SIZE(obj);
Guido van Rossumcd6ae682007-05-09 19:52:16 +00002834 if (i > 0) {
2835 memcpy(dest, self->ob_bytes, mysize);
2836 dest += mysize;
2837 }
Georg Brandlb3f568f2007-02-27 08:49:18 +00002838 memcpy(dest, PyBytes_AS_STRING(obj), size);
2839 dest += size;
Guido van Rossum20188312006-05-05 15:15:40 +00002840 }
2841
2842 /* Done */
2843 Py_DECREF(seq);
2844 return result;
2845
2846 /* Error handling */
2847 error:
2848 Py_DECREF(seq);
2849 return NULL;
2850}
2851
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002852PyDoc_STRVAR(fromhex_doc,
2853"bytes.fromhex(string) -> bytes\n\
2854\n\
2855Create a bytes object from a string of hexadecimal numbers.\n\
2856Spaces between two numbers are accepted. Example:\n\
2857bytes.fromhex('10 2030') -> bytes([0x10, 0x20, 0x30]).");
2858
2859static int
2860hex_digit_to_int(int c)
2861{
Gregory P. Smith60d241f2007-10-16 06:31:30 +00002862 if (ISDIGIT(c))
Georg Brandlb3f568f2007-02-27 08:49:18 +00002863 return c - '0';
2864 else {
Gregory P. Smith60d241f2007-10-16 06:31:30 +00002865 if (ISUPPER(c))
2866 c = TOLOWER(c);
Georg Brandlb3f568f2007-02-27 08:49:18 +00002867 if (c >= 'a' && c <= 'f')
2868 return c - 'a' + 10;
2869 }
2870 return -1;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002871}
2872
2873static PyObject *
2874bytes_fromhex(PyObject *cls, PyObject *args)
2875{
Gregory P. Smith60d241f2007-10-16 06:31:30 +00002876 PyObject *newbytes, *hexobj;
2877 char *buf;
2878 unsigned char *hex;
2879 Py_ssize_t byteslen, i, j;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002880 int top, bot;
Gregory P. Smith60d241f2007-10-16 06:31:30 +00002881 Py_buffer vhex;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002882
Gregory P. Smith60d241f2007-10-16 06:31:30 +00002883 if (!PyArg_ParseTuple(args, "O:fromhex", &hexobj))
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002884 return NULL;
2885
Gregory P. Smith60d241f2007-10-16 06:31:30 +00002886 if (_getbuffer(hexobj, &vhex) < 0)
2887 return NULL;
2888
2889 byteslen = vhex.len / 2; /* max length if there are no spaces */
2890 hex = vhex.buf;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002891
2892 newbytes = PyBytes_FromStringAndSize(NULL, byteslen);
Gregory P. Smith60d241f2007-10-16 06:31:30 +00002893 if (!newbytes) {
2894 PyObject_ReleaseBuffer(hexobj, &vhex);
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002895 return NULL;
Gregory P. Smith60d241f2007-10-16 06:31:30 +00002896 }
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002897 buf = PyBytes_AS_STRING(newbytes);
2898
Gregory P. Smith60d241f2007-10-16 06:31:30 +00002899 for (i = j = 0; i < vhex.len; i += 2) {
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002900 /* skip over spaces in the input */
2901 while (Py_CHARMASK(hex[i]) == ' ')
2902 i++;
Gregory P. Smith60d241f2007-10-16 06:31:30 +00002903 if (i >= vhex.len)
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002904 break;
2905 top = hex_digit_to_int(Py_CHARMASK(hex[i]));
2906 bot = hex_digit_to_int(Py_CHARMASK(hex[i+1]));
2907 if (top == -1 || bot == -1) {
2908 PyErr_Format(PyExc_ValueError,
2909 "non-hexadecimal number string '%c%c' found in "
2910 "fromhex() arg at position %zd",
2911 hex[i], hex[i+1], i);
2912 goto error;
2913 }
2914 buf[j++] = (top << 4) + bot;
2915 }
2916 if (PyBytes_Resize(newbytes, j) < 0)
2917 goto error;
Gregory P. Smith60d241f2007-10-16 06:31:30 +00002918 PyObject_ReleaseBuffer(hexobj, &vhex);
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002919 return newbytes;
2920
2921 error:
2922 Py_DECREF(newbytes);
Gregory P. Smith60d241f2007-10-16 06:31:30 +00002923 PyObject_ReleaseBuffer(hexobj, &vhex);
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002924 return NULL;
2925}
2926
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002927PyDoc_STRVAR(reduce_doc, "Return state information for pickling.");
2928
2929static PyObject *
2930bytes_reduce(PyBytesObject *self)
2931{
Martin v. Löwis9c121062007-08-05 20:26:11 +00002932 PyObject *latin1;
2933 if (self->ob_bytes)
Guido van Rossuma74184e2007-08-29 04:05:57 +00002934 latin1 = PyUnicode_DecodeLatin1(self->ob_bytes,
2935 Py_Size(self), NULL);
Martin v. Löwis9c121062007-08-05 20:26:11 +00002936 else
Guido van Rossuma74184e2007-08-29 04:05:57 +00002937 latin1 = PyUnicode_FromString("");
Martin v. Löwis9c121062007-08-05 20:26:11 +00002938 return Py_BuildValue("(O(Ns))", Py_Type(self), latin1, "latin-1");
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002939}
2940
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002941static PySequenceMethods bytes_as_sequence = {
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002942 (lenfunc)bytes_length, /* sq_length */
2943 (binaryfunc)bytes_concat, /* sq_concat */
2944 (ssizeargfunc)bytes_repeat, /* sq_repeat */
2945 (ssizeargfunc)bytes_getitem, /* sq_item */
2946 0, /* sq_slice */
2947 (ssizeobjargproc)bytes_setitem, /* sq_ass_item */
2948 0, /* sq_ass_slice */
Guido van Rossumd624f182006-04-24 13:47:05 +00002949 (objobjproc)bytes_contains, /* sq_contains */
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002950 (binaryfunc)bytes_iconcat, /* sq_inplace_concat */
2951 (ssizeargfunc)bytes_irepeat, /* sq_inplace_repeat */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002952};
2953
2954static PyMappingMethods bytes_as_mapping = {
Guido van Rossumd624f182006-04-24 13:47:05 +00002955 (lenfunc)bytes_length,
Thomas Wouters376446d2006-12-19 08:30:14 +00002956 (binaryfunc)bytes_subscript,
2957 (objobjargproc)bytes_ass_subscript,
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002958};
2959
2960static PyBufferProcs bytes_as_buffer = {
Travis E. Oliphantb99f7622007-08-18 11:21:56 +00002961 (getbufferproc)bytes_getbuffer,
2962 (releasebufferproc)bytes_releasebuffer,
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002963};
2964
2965static PyMethodDef
2966bytes_methods[] = {
Neal Norwitz6968b052007-02-27 19:02:19 +00002967 {"find", (PyCFunction)bytes_find, METH_VARARGS, find__doc__},
2968 {"count", (PyCFunction)bytes_count, METH_VARARGS, count__doc__},
2969 {"index", (PyCFunction)bytes_index, METH_VARARGS, index__doc__},
2970 {"rfind", (PyCFunction)bytes_rfind, METH_VARARGS, rfind__doc__},
2971 {"rindex", (PyCFunction)bytes_rindex, METH_VARARGS, rindex__doc__},
2972 {"endswith", (PyCFunction)bytes_endswith, METH_VARARGS, endswith__doc__},
2973 {"startswith", (PyCFunction)bytes_startswith, METH_VARARGS,
2974 startswith__doc__},
Gregory P. Smith60d241f2007-10-16 06:31:30 +00002975 {"lower", (PyCFunction)stringlib_lower, METH_NOARGS, _Py_lower__doc__},
2976 {"upper", (PyCFunction)stringlib_upper, METH_NOARGS, _Py_upper__doc__},
2977 {"capitalize", (PyCFunction)stringlib_capitalize, METH_NOARGS,
2978 _Py_capitalize__doc__},
2979 {"swapcase", (PyCFunction)stringlib_swapcase, METH_NOARGS,
2980 _Py_swapcase__doc__},
2981 {"islower", (PyCFunction)stringlib_islower, METH_NOARGS,_Py_islower__doc__},
2982 {"isupper", (PyCFunction)stringlib_isupper, METH_NOARGS,_Py_isupper__doc__},
2983 {"isspace", (PyCFunction)stringlib_isspace, METH_NOARGS,_Py_isspace__doc__},
2984 {"isdigit", (PyCFunction)stringlib_isdigit, METH_NOARGS,_Py_isdigit__doc__},
2985 {"istitle", (PyCFunction)stringlib_istitle, METH_NOARGS,_Py_istitle__doc__},
2986 {"isalpha", (PyCFunction)stringlib_isalpha, METH_NOARGS,_Py_isalpha__doc__},
2987 {"isalnum", (PyCFunction)stringlib_isalnum, METH_NOARGS,_Py_isalnum__doc__},
Neal Norwitz6968b052007-02-27 19:02:19 +00002988 {"replace", (PyCFunction)bytes_replace, METH_VARARGS, replace__doc__},
2989 {"translate", (PyCFunction)bytes_translate, METH_VARARGS, translate__doc__},
2990 {"partition", (PyCFunction)bytes_partition, METH_O, partition__doc__},
2991 {"rpartition", (PyCFunction)bytes_rpartition, METH_O, rpartition__doc__},
2992 {"split", (PyCFunction)bytes_split, METH_VARARGS, split__doc__},
2993 {"rsplit", (PyCFunction)bytes_rsplit, METH_VARARGS, rsplit__doc__},
2994 {"extend", (PyCFunction)bytes_extend, METH_O, extend__doc__},
2995 {"insert", (PyCFunction)bytes_insert, METH_VARARGS, insert__doc__},
2996 {"append", (PyCFunction)bytes_append, METH_O, append__doc__},
2997 {"reverse", (PyCFunction)bytes_reverse, METH_NOARGS, reverse__doc__},
2998 {"pop", (PyCFunction)bytes_pop, METH_VARARGS, pop__doc__},
2999 {"remove", (PyCFunction)bytes_remove, METH_O, remove__doc__},
Guido van Rossum8f950672007-09-10 16:53:45 +00003000 {"strip", (PyCFunction)bytes_strip, METH_VARARGS, strip__doc__},
3001 {"lstrip", (PyCFunction)bytes_lstrip, METH_VARARGS, lstrip__doc__},
3002 {"rstrip", (PyCFunction)bytes_rstrip, METH_VARARGS, rstrip__doc__},
Guido van Rossumd624f182006-04-24 13:47:05 +00003003 {"decode", (PyCFunction)bytes_decode, METH_VARARGS, decode_doc},
Guido van Rossuma0867f72006-05-05 04:34:18 +00003004 {"__alloc__", (PyCFunction)bytes_alloc, METH_NOARGS, alloc_doc},
Guido van Rossum0dd32e22007-04-11 05:40:58 +00003005 {"fromhex", (PyCFunction)bytes_fromhex, METH_VARARGS|METH_CLASS,
3006 fromhex_doc},
Guido van Rossumcd6ae682007-05-09 19:52:16 +00003007 {"join", (PyCFunction)bytes_join, METH_O, join_doc},
Gregory P. Smith60d241f2007-10-16 06:31:30 +00003008 {"title", (PyCFunction)stringlib_title, METH_NOARGS, _Py_title__doc__},
3009 {"ljust", (PyCFunction)stringlib_ljust, METH_VARARGS, ljust__doc__},
3010 {"rjust", (PyCFunction)stringlib_rjust, METH_VARARGS, rjust__doc__},
3011 {"center", (PyCFunction)stringlib_center, METH_VARARGS, center__doc__},
3012 {"zfill", (PyCFunction)stringlib_zfill, METH_VARARGS, zfill__doc__},
3013 {"expandtabs", (PyCFunction)stringlib_expandtabs, METH_VARARGS,
3014 expandtabs__doc__},
3015 {"splitlines", (PyCFunction)stringlib_splitlines, METH_VARARGS,
3016 splitlines__doc__},
Guido van Rossum0dd32e22007-04-11 05:40:58 +00003017 {"__reduce__", (PyCFunction)bytes_reduce, METH_NOARGS, reduce_doc},
Guido van Rossuma0867f72006-05-05 04:34:18 +00003018 {NULL}
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003019};
3020
3021PyDoc_STRVAR(bytes_doc,
3022"bytes([iterable]) -> new array of bytes.\n\
3023\n\
3024If an argument is given it must be an iterable yielding ints in range(256).");
3025
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003026static PyObject *bytes_iter(PyObject *seq);
3027
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003028PyTypeObject PyBytes_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00003029 PyVarObject_HEAD_INIT(&PyType_Type, 0)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003030 "bytes",
3031 sizeof(PyBytesObject),
3032 0,
Guido van Rossumd624f182006-04-24 13:47:05 +00003033 (destructor)bytes_dealloc, /* tp_dealloc */
3034 0, /* tp_print */
3035 0, /* tp_getattr */
3036 0, /* tp_setattr */
3037 0, /* tp_compare */
3038 (reprfunc)bytes_repr, /* tp_repr */
3039 0, /* tp_as_number */
3040 &bytes_as_sequence, /* tp_as_sequence */
3041 &bytes_as_mapping, /* tp_as_mapping */
Georg Brandlb3f568f2007-02-27 08:49:18 +00003042 0, /* tp_hash */
Guido van Rossumd624f182006-04-24 13:47:05 +00003043 0, /* tp_call */
3044 (reprfunc)bytes_str, /* tp_str */
3045 PyObject_GenericGetAttr, /* tp_getattro */
3046 0, /* tp_setattro */
3047 &bytes_as_buffer, /* tp_as_buffer */
Georg Brandlb3f568f2007-02-27 08:49:18 +00003048 /* bytes is 'final' or 'sealed' */
Georg Brandl0b9b9e02007-02-27 08:40:54 +00003049 Py_TPFLAGS_DEFAULT, /* tp_flags */
Guido van Rossumd624f182006-04-24 13:47:05 +00003050 bytes_doc, /* tp_doc */
3051 0, /* tp_traverse */
3052 0, /* tp_clear */
3053 (richcmpfunc)bytes_richcompare, /* tp_richcompare */
3054 0, /* tp_weaklistoffset */
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003055 bytes_iter, /* tp_iter */
Guido van Rossumd624f182006-04-24 13:47:05 +00003056 0, /* tp_iternext */
3057 bytes_methods, /* tp_methods */
3058 0, /* tp_members */
3059 0, /* tp_getset */
3060 0, /* tp_base */
3061 0, /* tp_dict */
3062 0, /* tp_descr_get */
3063 0, /* tp_descr_set */
3064 0, /* tp_dictoffset */
3065 (initproc)bytes_init, /* tp_init */
3066 PyType_GenericAlloc, /* tp_alloc */
3067 PyType_GenericNew, /* tp_new */
3068 PyObject_Del, /* tp_free */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003069};
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003070
3071/*********************** Bytes Iterator ****************************/
3072
3073typedef struct {
3074 PyObject_HEAD
3075 Py_ssize_t it_index;
3076 PyBytesObject *it_seq; /* Set to NULL when iterator is exhausted */
3077} bytesiterobject;
3078
3079static void
3080bytesiter_dealloc(bytesiterobject *it)
3081{
3082 _PyObject_GC_UNTRACK(it);
3083 Py_XDECREF(it->it_seq);
3084 PyObject_GC_Del(it);
3085}
3086
3087static int
3088bytesiter_traverse(bytesiterobject *it, visitproc visit, void *arg)
3089{
3090 Py_VISIT(it->it_seq);
3091 return 0;
3092}
3093
3094static PyObject *
3095bytesiter_next(bytesiterobject *it)
3096{
3097 PyBytesObject *seq;
3098 PyObject *item;
3099
3100 assert(it != NULL);
3101 seq = it->it_seq;
3102 if (seq == NULL)
3103 return NULL;
3104 assert(PyBytes_Check(seq));
3105
3106 if (it->it_index < PyBytes_GET_SIZE(seq)) {
3107 item = PyInt_FromLong(
3108 (unsigned char)seq->ob_bytes[it->it_index]);
3109 if (item != NULL)
3110 ++it->it_index;
3111 return item;
3112 }
3113
3114 Py_DECREF(seq);
3115 it->it_seq = NULL;
3116 return NULL;
3117}
3118
3119static PyObject *
3120bytesiter_length_hint(bytesiterobject *it)
3121{
3122 Py_ssize_t len = 0;
3123 if (it->it_seq)
3124 len = PyBytes_GET_SIZE(it->it_seq) - it->it_index;
3125 return PyInt_FromSsize_t(len);
3126}
3127
3128PyDoc_STRVAR(length_hint_doc,
3129 "Private method returning an estimate of len(list(it)).");
3130
3131static PyMethodDef bytesiter_methods[] = {
3132 {"__length_hint__", (PyCFunction)bytesiter_length_hint, METH_NOARGS,
3133 length_hint_doc},
3134 {NULL, NULL} /* sentinel */
3135};
3136
3137PyTypeObject PyBytesIter_Type = {
3138 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3139 "bytesiterator", /* tp_name */
3140 sizeof(bytesiterobject), /* tp_basicsize */
3141 0, /* tp_itemsize */
3142 /* methods */
3143 (destructor)bytesiter_dealloc, /* tp_dealloc */
3144 0, /* tp_print */
3145 0, /* tp_getattr */
3146 0, /* tp_setattr */
3147 0, /* tp_compare */
3148 0, /* tp_repr */
3149 0, /* tp_as_number */
3150 0, /* tp_as_sequence */
3151 0, /* tp_as_mapping */
3152 0, /* tp_hash */
3153 0, /* tp_call */
3154 0, /* tp_str */
3155 PyObject_GenericGetAttr, /* tp_getattro */
3156 0, /* tp_setattro */
3157 0, /* tp_as_buffer */
3158 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
3159 0, /* tp_doc */
3160 (traverseproc)bytesiter_traverse, /* tp_traverse */
3161 0, /* tp_clear */
3162 0, /* tp_richcompare */
3163 0, /* tp_weaklistoffset */
3164 PyObject_SelfIter, /* tp_iter */
3165 (iternextfunc)bytesiter_next, /* tp_iternext */
3166 bytesiter_methods, /* tp_methods */
3167 0,
3168};
3169
3170static PyObject *
3171bytes_iter(PyObject *seq)
3172{
3173 bytesiterobject *it;
3174
3175 if (!PyBytes_Check(seq)) {
3176 PyErr_BadInternalCall();
3177 return NULL;
3178 }
3179 it = PyObject_GC_New(bytesiterobject, &PyBytesIter_Type);
3180 if (it == NULL)
3181 return NULL;
3182 it->it_index = 0;
3183 Py_INCREF(seq);
3184 it->it_seq = (PyBytesObject *)seq;
3185 _PyObject_GC_TRACK(it);
3186 return (PyObject *)it;
3187}