blob: 1486c5e395c3bd468f37fa322560eb4f4858b593 [file] [log] [blame]
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00001/* Bytes object implementation */
2
3/* XXX TO DO: optimizations */
4
5#define PY_SSIZE_T_CLEAN
6#include "Python.h"
Guido van Rossuma0867f72006-05-05 04:34:18 +00007#include "structmember.h"
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00008
Neal Norwitz6968b052007-02-27 19:02:19 +00009/* The nullbytes are used by the stringlib during partition.
10 * If partition is removed from bytes, nullbytes and its helper
11 * Init/Fini should also be removed.
12 */
13static PyBytesObject *nullbytes = NULL;
14
15void
16PyBytes_Fini(void)
17{
18 Py_CLEAR(nullbytes);
19}
20
21int
22PyBytes_Init(void)
23{
24 nullbytes = PyObject_New(PyBytesObject, &PyBytes_Type);
25 if (nullbytes == NULL)
26 return 0;
27 nullbytes->ob_bytes = NULL;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +000028 Py_Size(nullbytes) = nullbytes->ob_alloc = 0;
Travis E. Oliphantb99f7622007-08-18 11:21:56 +000029 nullbytes->ob_exports = 0;
Neal Norwitz6968b052007-02-27 19:02:19 +000030 return 1;
31}
32
33/* end nullbytes support */
34
Guido van Rossumad7d8d12007-04-13 01:39:34 +000035/* Helpers */
36
37static int
38_getbytevalue(PyObject* arg, int *value)
Neal Norwitz6968b052007-02-27 19:02:19 +000039{
40 PyObject *intarg = PyNumber_Int(arg);
41 if (! intarg)
42 return 0;
43 *value = PyInt_AsLong(intarg);
44 Py_DECREF(intarg);
45 if (*value < 0 || *value >= 256) {
46 PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
47 return 0;
48 }
49 return 1;
50}
51
Travis E. Oliphantb99f7622007-08-18 11:21:56 +000052static int
53bytes_getbuffer(PyBytesObject *obj, PyBuffer *view, int flags)
Guido van Rossum75d38e92007-08-24 17:33:11 +000054{
Travis E. Oliphantb99f7622007-08-18 11:21:56 +000055 int ret;
56 void *ptr;
57 if (view == NULL) {
58 obj->ob_exports++;
59 return 0;
60 }
Guido van Rossum75d38e92007-08-24 17:33:11 +000061 if (obj->ob_bytes == NULL)
Travis E. Oliphantb99f7622007-08-18 11:21:56 +000062 ptr = "";
63 else
64 ptr = obj->ob_bytes;
65 ret = PyBuffer_FillInfo(view, ptr, Py_Size(obj), 0, flags);
66 if (ret >= 0) {
67 obj->ob_exports++;
68 }
69 return ret;
70}
71
72static void
73bytes_releasebuffer(PyBytesObject *obj, PyBuffer *view)
74{
75 obj->ob_exports--;
76}
77
Neal Norwitz2bad9702007-08-27 06:19:22 +000078static Py_ssize_t
Travis E. Oliphantb99f7622007-08-18 11:21:56 +000079_getbuffer(PyObject *obj, PyBuffer *view)
Guido van Rossumad7d8d12007-04-13 01:39:34 +000080{
Martin v. Löwis9f2e3462007-07-21 17:22:18 +000081 PyBufferProcs *buffer = Py_Type(obj)->tp_as_buffer;
Guido van Rossumad7d8d12007-04-13 01:39:34 +000082
83 if (buffer == NULL ||
84 PyUnicode_Check(obj) ||
Guido van Rossuma74184e2007-08-29 04:05:57 +000085 buffer->bf_getbuffer == NULL)
86 {
87 PyErr_Format(PyExc_TypeError,
88 "Type %.100s doesn't support the buffer API",
89 Py_Type(obj)->tp_name);
90 return -1;
91 }
Guido van Rossumad7d8d12007-04-13 01:39:34 +000092
Travis E. Oliphantb99f7622007-08-18 11:21:56 +000093 if (buffer->bf_getbuffer(obj, view, PyBUF_SIMPLE) < 0)
94 return -1;
95 return view->len;
Guido van Rossumad7d8d12007-04-13 01:39:34 +000096}
97
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000098/* Direct API functions */
99
100PyObject *
Guido van Rossumd624f182006-04-24 13:47:05 +0000101PyBytes_FromObject(PyObject *input)
102{
103 return PyObject_CallFunctionObjArgs((PyObject *)&PyBytes_Type,
104 input, NULL);
105}
106
107PyObject *
108PyBytes_FromStringAndSize(const char *bytes, Py_ssize_t size)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000109{
110 PyBytesObject *new;
Guido van Rossumf15a29f2007-05-04 00:41:39 +0000111 int alloc;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000112
Guido van Rossumd624f182006-04-24 13:47:05 +0000113 assert(size >= 0);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000114
115 new = PyObject_New(PyBytesObject, &PyBytes_Type);
116 if (new == NULL)
Guido van Rossumd624f182006-04-24 13:47:05 +0000117 return NULL;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000118
Guido van Rossumf15a29f2007-05-04 00:41:39 +0000119 if (size == 0) {
Guido van Rossumd624f182006-04-24 13:47:05 +0000120 new->ob_bytes = NULL;
Guido van Rossumf15a29f2007-05-04 00:41:39 +0000121 alloc = 0;
122 }
Guido van Rossumd624f182006-04-24 13:47:05 +0000123 else {
Guido van Rossumf15a29f2007-05-04 00:41:39 +0000124 alloc = size + 1;
125 new->ob_bytes = PyMem_Malloc(alloc);
Guido van Rossumd624f182006-04-24 13:47:05 +0000126 if (new->ob_bytes == NULL) {
127 Py_DECREF(new);
Neal Norwitz16596dd2007-08-30 05:44:54 +0000128 return PyErr_NoMemory();
Guido van Rossumd624f182006-04-24 13:47:05 +0000129 }
130 if (bytes != NULL)
131 memcpy(new->ob_bytes, bytes, size);
Guido van Rossumf15a29f2007-05-04 00:41:39 +0000132 new->ob_bytes[size] = '\0'; /* Trailing null byte */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000133 }
Martin v. Löwis5d7428b2007-07-21 18:47:48 +0000134 Py_Size(new) = size;
Guido van Rossumf15a29f2007-05-04 00:41:39 +0000135 new->ob_alloc = alloc;
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000136 new->ob_exports = 0;
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +0000137
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000138 return (PyObject *)new;
139}
140
141Py_ssize_t
142PyBytes_Size(PyObject *self)
143{
144 assert(self != NULL);
145 assert(PyBytes_Check(self));
146
Guido van Rossum20188312006-05-05 15:15:40 +0000147 return PyBytes_GET_SIZE(self);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000148}
149
150char *
151PyBytes_AsString(PyObject *self)
152{
153 assert(self != NULL);
154 assert(PyBytes_Check(self));
155
Guido van Rossum20188312006-05-05 15:15:40 +0000156 return PyBytes_AS_STRING(self);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000157}
158
159int
160PyBytes_Resize(PyObject *self, Py_ssize_t size)
161{
162 void *sval;
Guido van Rossuma0867f72006-05-05 04:34:18 +0000163 Py_ssize_t alloc = ((PyBytesObject *)self)->ob_alloc;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000164
165 assert(self != NULL);
166 assert(PyBytes_Check(self));
167 assert(size >= 0);
168
Guido van Rossuma0867f72006-05-05 04:34:18 +0000169 if (size < alloc / 2) {
170 /* Major downsize; resize down to exact size */
Guido van Rossum6c1e6742007-05-04 04:27:16 +0000171 alloc = size + 1;
Guido van Rossuma0867f72006-05-05 04:34:18 +0000172 }
Guido van Rossumf15a29f2007-05-04 00:41:39 +0000173 else if (size < alloc) {
Guido van Rossuma0867f72006-05-05 04:34:18 +0000174 /* Within allocated size; quick exit */
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000175 Py_Size(self) = size;
Guido van Rossuma74184e2007-08-29 04:05:57 +0000176 ((PyBytesObject *)self)->ob_bytes[size] = '\0'; /* Trailing null */
Guido van Rossuma0867f72006-05-05 04:34:18 +0000177 return 0;
178 }
179 else if (size <= alloc * 1.125) {
180 /* Moderate upsize; overallocate similar to list_resize() */
181 alloc = size + (size >> 3) + (size < 9 ? 3 : 6);
182 }
183 else {
184 /* Major upsize; resize up to exact size */
Guido van Rossumf15a29f2007-05-04 00:41:39 +0000185 alloc = size + 1;
Guido van Rossum6c1e6742007-05-04 04:27:16 +0000186 }
Guido van Rossuma0867f72006-05-05 04:34:18 +0000187
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000188 if (((PyBytesObject *)self)->ob_exports > 0) {
189 /*
Guido van Rossuma74184e2007-08-29 04:05:57 +0000190 fprintf(stderr, "%d: %s", ((PyBytesObject *)self)->ob_exports,
191 ((PyBytesObject *)self)->ob_bytes);
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000192 */
193 PyErr_SetString(PyExc_BufferError,
Guido van Rossuma74184e2007-08-29 04:05:57 +0000194 "Existing exports of data: object cannot be re-sized");
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000195 return -1;
196 }
197
Guido van Rossuma0867f72006-05-05 04:34:18 +0000198 sval = PyMem_Realloc(((PyBytesObject *)self)->ob_bytes, alloc);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000199 if (sval == NULL) {
Guido van Rossumd624f182006-04-24 13:47:05 +0000200 PyErr_NoMemory();
201 return -1;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000202 }
203
Guido van Rossumd624f182006-04-24 13:47:05 +0000204 ((PyBytesObject *)self)->ob_bytes = sval;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000205 Py_Size(self) = size;
Guido van Rossuma0867f72006-05-05 04:34:18 +0000206 ((PyBytesObject *)self)->ob_alloc = alloc;
Guido van Rossumf15a29f2007-05-04 00:41:39 +0000207 ((PyBytesObject *)self)->ob_bytes[size] = '\0'; /* Trailing null byte */
208
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000209 return 0;
210}
211
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000212PyObject *
213PyBytes_Concat(PyObject *a, PyObject *b)
214{
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000215 Py_ssize_t size;
216 PyBuffer va, vb;
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000217 PyBytesObject *result;
218
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000219 va.len = -1;
220 vb.len = -1;
221 if (_getbuffer(a, &va) < 0 ||
222 _getbuffer(b, &vb) < 0) {
Guido van Rossum75d38e92007-08-24 17:33:11 +0000223 if (va.len != -1)
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000224 PyObject_ReleaseBuffer(a, &va);
225 if (vb.len != -1)
226 PyObject_ReleaseBuffer(b, &vb);
227 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
228 Py_Type(a)->tp_name, Py_Type(b)->tp_name);
229 return NULL;
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000230 }
231
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000232 size = va.len + vb.len;
233 if (size < 0) {
234 PyObject_ReleaseBuffer(a, &va);
235 PyObject_ReleaseBuffer(b, &vb);
236 return PyErr_NoMemory();
237 }
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000238
239 result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, size);
240 if (result != NULL) {
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000241 memcpy(result->ob_bytes, va.buf, va.len);
242 memcpy(result->ob_bytes + va.len, vb.buf, vb.len);
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000243 }
Guido van Rossum75d38e92007-08-24 17:33:11 +0000244
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000245 PyObject_ReleaseBuffer(a, &va);
246 PyObject_ReleaseBuffer(b, &vb);
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000247 return (PyObject *)result;
248}
249
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000250/* Functions stuffed into the type object */
251
252static Py_ssize_t
253bytes_length(PyBytesObject *self)
254{
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000255 return Py_Size(self);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000256}
257
258static PyObject *
Guido van Rossumd624f182006-04-24 13:47:05 +0000259bytes_concat(PyBytesObject *self, PyObject *other)
260{
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000261 return PyBytes_Concat((PyObject *)self, other);
Guido van Rossumd624f182006-04-24 13:47:05 +0000262}
263
264static PyObject *
Guido van Rossum13e57212006-04-27 22:54:26 +0000265bytes_iconcat(PyBytesObject *self, PyObject *other)
266{
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000267 Py_ssize_t mysize;
Guido van Rossum13e57212006-04-27 22:54:26 +0000268 Py_ssize_t size;
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000269 PyBuffer vo;
Guido van Rossum13e57212006-04-27 22:54:26 +0000270
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000271 if (_getbuffer(other, &vo) < 0) {
Guido van Rossuma74184e2007-08-29 04:05:57 +0000272 PyErr_Format(PyExc_TypeError, "can't concat bytes to %.100s",
273 Py_Type(self)->tp_name);
274 return NULL;
Guido van Rossum13e57212006-04-27 22:54:26 +0000275 }
276
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000277 mysize = Py_Size(self);
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000278 size = mysize + vo.len;
279 if (size < 0) {
Guido van Rossuma74184e2007-08-29 04:05:57 +0000280 PyObject_ReleaseBuffer(other, &vo);
281 return PyErr_NoMemory();
Guido van Rossum6c1e6742007-05-04 04:27:16 +0000282 }
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000283 if (size < self->ob_alloc) {
Guido van Rossuma74184e2007-08-29 04:05:57 +0000284 Py_Size(self) = size;
285 self->ob_bytes[Py_Size(self)] = '\0'; /* Trailing null byte */
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000286 }
287 else if (PyBytes_Resize((PyObject *)self, size) < 0) {
Guido van Rossuma74184e2007-08-29 04:05:57 +0000288 PyObject_ReleaseBuffer(other, &vo);
289 return NULL;
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000290 }
291 memcpy(self->ob_bytes + mysize, vo.buf, vo.len);
292 PyObject_ReleaseBuffer(other, &vo);
Guido van Rossum13e57212006-04-27 22:54:26 +0000293 Py_INCREF(self);
294 return (PyObject *)self;
295}
296
297static PyObject *
Guido van Rossumd624f182006-04-24 13:47:05 +0000298bytes_repeat(PyBytesObject *self, Py_ssize_t count)
299{
300 PyBytesObject *result;
301 Py_ssize_t mysize;
302 Py_ssize_t size;
303
304 if (count < 0)
305 count = 0;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000306 mysize = Py_Size(self);
Guido van Rossumd624f182006-04-24 13:47:05 +0000307 size = mysize * count;
308 if (count != 0 && size / count != mysize)
309 return PyErr_NoMemory();
Guido van Rossumf15a29f2007-05-04 00:41:39 +0000310 result = (PyBytesObject *)PyBytes_FromStringAndSize(NULL, size);
Guido van Rossumd624f182006-04-24 13:47:05 +0000311 if (result != NULL && size != 0) {
312 if (mysize == 1)
313 memset(result->ob_bytes, self->ob_bytes[0], size);
314 else {
Guido van Rossum13e57212006-04-27 22:54:26 +0000315 Py_ssize_t i;
Guido van Rossumd624f182006-04-24 13:47:05 +0000316 for (i = 0; i < count; i++)
317 memcpy(result->ob_bytes + i*mysize, self->ob_bytes, mysize);
318 }
319 }
320 return (PyObject *)result;
321}
322
323static PyObject *
Guido van Rossum13e57212006-04-27 22:54:26 +0000324bytes_irepeat(PyBytesObject *self, Py_ssize_t count)
325{
326 Py_ssize_t mysize;
327 Py_ssize_t size;
328
329 if (count < 0)
330 count = 0;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000331 mysize = Py_Size(self);
Guido van Rossum13e57212006-04-27 22:54:26 +0000332 size = mysize * count;
333 if (count != 0 && size / count != mysize)
334 return PyErr_NoMemory();
Guido van Rossum6c1e6742007-05-04 04:27:16 +0000335 if (size < self->ob_alloc) {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000336 Py_Size(self) = size;
Guido van Rossuma74184e2007-08-29 04:05:57 +0000337 self->ob_bytes[Py_Size(self)] = '\0'; /* Trailing null byte */
Guido van Rossum6c1e6742007-05-04 04:27:16 +0000338 }
Guido van Rossuma0867f72006-05-05 04:34:18 +0000339 else if (PyBytes_Resize((PyObject *)self, size) < 0)
Guido van Rossum13e57212006-04-27 22:54:26 +0000340 return NULL;
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +0000341
Guido van Rossum13e57212006-04-27 22:54:26 +0000342 if (mysize == 1)
343 memset(self->ob_bytes, self->ob_bytes[0], size);
344 else {
345 Py_ssize_t i;
346 for (i = 1; i < count; i++)
347 memcpy(self->ob_bytes + i*mysize, self->ob_bytes, mysize);
348 }
349
350 Py_INCREF(self);
351 return (PyObject *)self;
352}
353
354static int
355bytes_substring(PyBytesObject *self, PyBytesObject *other)
356{
357 Py_ssize_t i;
358
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000359 if (Py_Size(other) == 1) {
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +0000360 return memchr(self->ob_bytes, other->ob_bytes[0],
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000361 Py_Size(self)) != NULL;
Guido van Rossum13e57212006-04-27 22:54:26 +0000362 }
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000363 if (Py_Size(other) == 0)
Guido van Rossum13e57212006-04-27 22:54:26 +0000364 return 1; /* Edge case */
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000365 for (i = 0; i + Py_Size(other) <= Py_Size(self); i++) {
Guido van Rossum13e57212006-04-27 22:54:26 +0000366 /* XXX Yeah, yeah, lots of optimizations possible... */
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000367 if (memcmp(self->ob_bytes + i, other->ob_bytes, Py_Size(other)) == 0)
Guido van Rossum13e57212006-04-27 22:54:26 +0000368 return 1;
369 }
370 return 0;
371}
372
373static int
374bytes_contains(PyBytesObject *self, PyObject *value)
375{
376 Py_ssize_t ival;
377
378 if (PyBytes_Check(value))
379 return bytes_substring(self, (PyBytesObject *)value);
380
Thomas Woutersd204a712006-08-22 13:41:17 +0000381 ival = PyNumber_AsSsize_t(value, PyExc_ValueError);
Guido van Rossum13e57212006-04-27 22:54:26 +0000382 if (ival == -1 && PyErr_Occurred())
383 return -1;
Guido van Rossum13e57212006-04-27 22:54:26 +0000384 if (ival < 0 || ival >= 256) {
385 PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
386 return -1;
387 }
388
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000389 return memchr(self->ob_bytes, ival, Py_Size(self)) != NULL;
Guido van Rossum13e57212006-04-27 22:54:26 +0000390}
391
392static PyObject *
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000393bytes_getitem(PyBytesObject *self, Py_ssize_t i)
394{
395 if (i < 0)
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000396 i += Py_Size(self);
397 if (i < 0 || i >= Py_Size(self)) {
Guido van Rossumd624f182006-04-24 13:47:05 +0000398 PyErr_SetString(PyExc_IndexError, "bytes index out of range");
399 return NULL;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000400 }
Guido van Rossumd624f182006-04-24 13:47:05 +0000401 return PyInt_FromLong((unsigned char)(self->ob_bytes[i]));
402}
403
404static PyObject *
Thomas Wouters376446d2006-12-19 08:30:14 +0000405bytes_subscript(PyBytesObject *self, PyObject *item)
Guido van Rossumd624f182006-04-24 13:47:05 +0000406{
Thomas Wouters376446d2006-12-19 08:30:14 +0000407 if (PyIndex_Check(item)) {
408 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Guido van Rossumd624f182006-04-24 13:47:05 +0000409
Thomas Wouters376446d2006-12-19 08:30:14 +0000410 if (i == -1 && PyErr_Occurred())
411 return NULL;
412
413 if (i < 0)
414 i += PyBytes_GET_SIZE(self);
415
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000416 if (i < 0 || i >= Py_Size(self)) {
Thomas Wouters376446d2006-12-19 08:30:14 +0000417 PyErr_SetString(PyExc_IndexError, "bytes index out of range");
418 return NULL;
419 }
420 return PyInt_FromLong((unsigned char)(self->ob_bytes[i]));
421 }
422 else if (PySlice_Check(item)) {
423 Py_ssize_t start, stop, step, slicelength, cur, i;
424 if (PySlice_GetIndicesEx((PySliceObject *)item,
425 PyBytes_GET_SIZE(self),
426 &start, &stop, &step, &slicelength) < 0) {
427 return NULL;
428 }
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +0000429
Thomas Wouters376446d2006-12-19 08:30:14 +0000430 if (slicelength <= 0)
431 return PyBytes_FromStringAndSize("", 0);
432 else if (step == 1) {
433 return PyBytes_FromStringAndSize(self->ob_bytes + start,
434 slicelength);
435 }
436 else {
437 char *source_buf = PyBytes_AS_STRING(self);
438 char *result_buf = (char *)PyMem_Malloc(slicelength);
439 PyObject *result;
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +0000440
Thomas Wouters376446d2006-12-19 08:30:14 +0000441 if (result_buf == NULL)
442 return PyErr_NoMemory();
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +0000443
Thomas Wouters376446d2006-12-19 08:30:14 +0000444 for (cur = start, i = 0; i < slicelength;
445 cur += step, i++) {
446 result_buf[i] = source_buf[cur];
447 }
448 result = PyBytes_FromStringAndSize(result_buf, slicelength);
449 PyMem_Free(result_buf);
450 return result;
451 }
452 }
453 else {
454 PyErr_SetString(PyExc_TypeError, "bytes indices must be integers");
455 return NULL;
456 }
457}
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +0000458
Guido van Rossumd624f182006-04-24 13:47:05 +0000459static int
Thomas Wouters376446d2006-12-19 08:30:14 +0000460bytes_setslice(PyBytesObject *self, Py_ssize_t lo, Py_ssize_t hi,
Guido van Rossumd624f182006-04-24 13:47:05 +0000461 PyObject *values)
462{
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000463 Py_ssize_t avail, needed;
464 void *bytes;
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000465 PyBuffer vbytes;
466 int res = 0;
Guido van Rossumd624f182006-04-24 13:47:05 +0000467
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000468 vbytes.len = -1;
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000469 if (values == (PyObject *)self) {
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000470 /* Make a copy and call this function recursively */
Guido van Rossumd624f182006-04-24 13:47:05 +0000471 int err;
472 values = PyBytes_FromObject(values);
473 if (values == NULL)
474 return -1;
475 err = bytes_setslice(self, lo, hi, values);
476 Py_DECREF(values);
477 return err;
478 }
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000479 if (values == NULL) {
480 /* del b[lo:hi] */
481 bytes = NULL;
482 needed = 0;
483 }
Guido van Rossumd624f182006-04-24 13:47:05 +0000484 else {
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000485 if (_getbuffer(values, &vbytes) < 0) {
486 PyErr_Format(PyExc_TypeError,
487 "can't set bytes slice from %.100s",
488 Py_Type(values)->tp_name);
489 return -1;
490 }
491 needed = vbytes.len;
492 bytes = vbytes.buf;
Guido van Rossumd624f182006-04-24 13:47:05 +0000493 }
494
495 if (lo < 0)
496 lo = 0;
Thomas Wouters9a6e62b2006-08-23 23:20:29 +0000497 if (hi < lo)
498 hi = lo;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000499 if (hi > Py_Size(self))
500 hi = Py_Size(self);
Guido van Rossumd624f182006-04-24 13:47:05 +0000501
502 avail = hi - lo;
503 if (avail < 0)
504 lo = hi = avail = 0;
505
506 if (avail != needed) {
507 if (avail > needed) {
508 /*
509 0 lo hi old_size
510 | |<----avail----->|<-----tomove------>|
511 | |<-needed->|<-----tomove------>|
512 0 lo new_hi new_size
513 */
514 memmove(self->ob_bytes + lo + needed, self->ob_bytes + hi,
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000515 Py_Size(self) - hi);
Guido van Rossumd624f182006-04-24 13:47:05 +0000516 }
Guido van Rossuma74184e2007-08-29 04:05:57 +0000517 /* XXX(nnorwitz): need to verify this can't overflow! */
Thomas Wouters376446d2006-12-19 08:30:14 +0000518 if (PyBytes_Resize((PyObject *)self,
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000519 Py_Size(self) + needed - avail) < 0) {
520 res = -1;
521 goto finish;
522 }
Guido van Rossumd624f182006-04-24 13:47:05 +0000523 if (avail < needed) {
524 /*
525 0 lo hi old_size
526 | |<-avail->|<-----tomove------>|
527 | |<----needed---->|<-----tomove------>|
528 0 lo new_hi new_size
529 */
530 memmove(self->ob_bytes + lo + needed, self->ob_bytes + hi,
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000531 Py_Size(self) - lo - needed);
Guido van Rossumd624f182006-04-24 13:47:05 +0000532 }
533 }
534
535 if (needed > 0)
536 memcpy(self->ob_bytes + lo, bytes, needed);
537
Guido van Rossum75d38e92007-08-24 17:33:11 +0000538
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000539 finish:
Guido van Rossum75d38e92007-08-24 17:33:11 +0000540 if (vbytes.len != -1)
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000541 PyObject_ReleaseBuffer(values, &vbytes);
542 return res;
Guido van Rossumd624f182006-04-24 13:47:05 +0000543}
544
545static int
546bytes_setitem(PyBytesObject *self, Py_ssize_t i, PyObject *value)
547{
548 Py_ssize_t ival;
549
550 if (i < 0)
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000551 i += Py_Size(self);
Guido van Rossumd624f182006-04-24 13:47:05 +0000552
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000553 if (i < 0 || i >= Py_Size(self)) {
Guido van Rossumd624f182006-04-24 13:47:05 +0000554 PyErr_SetString(PyExc_IndexError, "bytes index out of range");
555 return -1;
556 }
557
558 if (value == NULL)
559 return bytes_setslice(self, i, i+1, NULL);
560
Thomas Woutersd204a712006-08-22 13:41:17 +0000561 ival = PyNumber_AsSsize_t(value, PyExc_ValueError);
Guido van Rossumd624f182006-04-24 13:47:05 +0000562 if (ival == -1 && PyErr_Occurred())
563 return -1;
564
565 if (ival < 0 || ival >= 256) {
566 PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
567 return -1;
568 }
569
570 self->ob_bytes[i] = ival;
571 return 0;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000572}
573
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000574static int
Thomas Wouters376446d2006-12-19 08:30:14 +0000575bytes_ass_subscript(PyBytesObject *self, PyObject *item, PyObject *values)
576{
577 Py_ssize_t start, stop, step, slicelen, needed;
578 char *bytes;
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +0000579
Thomas Wouters376446d2006-12-19 08:30:14 +0000580 if (PyIndex_Check(item)) {
581 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
582
583 if (i == -1 && PyErr_Occurred())
584 return -1;
585
586 if (i < 0)
587 i += PyBytes_GET_SIZE(self);
588
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000589 if (i < 0 || i >= Py_Size(self)) {
Thomas Wouters376446d2006-12-19 08:30:14 +0000590 PyErr_SetString(PyExc_IndexError, "bytes index out of range");
591 return -1;
592 }
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +0000593
Thomas Wouters376446d2006-12-19 08:30:14 +0000594 if (values == NULL) {
595 /* Fall through to slice assignment */
596 start = i;
597 stop = i + 1;
598 step = 1;
599 slicelen = 1;
600 }
601 else {
602 Py_ssize_t ival = PyNumber_AsSsize_t(values, PyExc_ValueError);
603 if (ival == -1 && PyErr_Occurred())
604 return -1;
605 if (ival < 0 || ival >= 256) {
606 PyErr_SetString(PyExc_ValueError,
607 "byte must be in range(0, 256)");
608 return -1;
609 }
610 self->ob_bytes[i] = (char)ival;
611 return 0;
612 }
613 }
614 else if (PySlice_Check(item)) {
615 if (PySlice_GetIndicesEx((PySliceObject *)item,
616 PyBytes_GET_SIZE(self),
617 &start, &stop, &step, &slicelen) < 0) {
618 return -1;
619 }
620 }
621 else {
622 PyErr_SetString(PyExc_TypeError, "bytes indices must be integer");
623 return -1;
624 }
625
626 if (values == NULL) {
627 bytes = NULL;
628 needed = 0;
629 }
630 else if (values == (PyObject *)self || !PyBytes_Check(values)) {
631 /* Make a copy an call this function recursively */
632 int err;
633 values = PyBytes_FromObject(values);
634 if (values == NULL)
635 return -1;
636 err = bytes_ass_subscript(self, item, values);
637 Py_DECREF(values);
638 return err;
639 }
640 else {
641 assert(PyBytes_Check(values));
642 bytes = ((PyBytesObject *)values)->ob_bytes;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000643 needed = Py_Size(values);
Thomas Wouters376446d2006-12-19 08:30:14 +0000644 }
645 /* Make sure b[5:2] = ... inserts before 5, not before 2. */
646 if ((step < 0 && start < stop) ||
647 (step > 0 && start > stop))
648 stop = start;
649 if (step == 1) {
650 if (slicelen != needed) {
651 if (slicelen > needed) {
652 /*
653 0 start stop old_size
654 | |<---slicelen--->|<-----tomove------>|
655 | |<-needed->|<-----tomove------>|
656 0 lo new_hi new_size
657 */
658 memmove(self->ob_bytes + start + needed, self->ob_bytes + stop,
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000659 Py_Size(self) - stop);
Thomas Wouters376446d2006-12-19 08:30:14 +0000660 }
661 if (PyBytes_Resize((PyObject *)self,
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000662 Py_Size(self) + needed - slicelen) < 0)
Thomas Wouters376446d2006-12-19 08:30:14 +0000663 return -1;
664 if (slicelen < needed) {
665 /*
666 0 lo hi old_size
667 | |<-avail->|<-----tomove------>|
668 | |<----needed---->|<-----tomove------>|
669 0 lo new_hi new_size
670 */
671 memmove(self->ob_bytes + start + needed, self->ob_bytes + stop,
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000672 Py_Size(self) - start - needed);
Thomas Wouters376446d2006-12-19 08:30:14 +0000673 }
674 }
675
676 if (needed > 0)
677 memcpy(self->ob_bytes + start, bytes, needed);
678
679 return 0;
680 }
681 else {
682 if (needed == 0) {
683 /* Delete slice */
684 Py_ssize_t cur, i;
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +0000685
Thomas Wouters376446d2006-12-19 08:30:14 +0000686 if (step < 0) {
687 stop = start + 1;
688 start = stop + step * (slicelen - 1) - 1;
689 step = -step;
690 }
691 for (cur = start, i = 0;
692 i < slicelen; cur += step, i++) {
693 Py_ssize_t lim = step - 1;
694
695 if (cur + step >= PyBytes_GET_SIZE(self))
696 lim = PyBytes_GET_SIZE(self) - cur - 1;
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +0000697
Thomas Wouters376446d2006-12-19 08:30:14 +0000698 memmove(self->ob_bytes + cur - i,
699 self->ob_bytes + cur + 1, lim);
700 }
701 /* Move the tail of the bytes, in one chunk */
702 cur = start + slicelen*step;
703 if (cur < PyBytes_GET_SIZE(self)) {
704 memmove(self->ob_bytes + cur - slicelen,
705 self->ob_bytes + cur,
706 PyBytes_GET_SIZE(self) - cur);
707 }
708 if (PyBytes_Resize((PyObject *)self,
709 PyBytes_GET_SIZE(self) - slicelen) < 0)
710 return -1;
711
712 return 0;
713 }
714 else {
715 /* Assign slice */
716 Py_ssize_t cur, i;
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +0000717
Thomas Wouters376446d2006-12-19 08:30:14 +0000718 if (needed != slicelen) {
719 PyErr_Format(PyExc_ValueError,
720 "attempt to assign bytes of size %zd "
721 "to extended slice of size %zd",
722 needed, slicelen);
723 return -1;
724 }
725 for (cur = start, i = 0; i < slicelen; cur += step, i++)
726 self->ob_bytes[cur] = bytes[i];
727 return 0;
728 }
729 }
730}
731
732static int
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000733bytes_init(PyBytesObject *self, PyObject *args, PyObject *kwds)
734{
Guido van Rossumd624f182006-04-24 13:47:05 +0000735 static char *kwlist[] = {"source", "encoding", "errors", 0};
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000736 PyObject *arg = NULL;
Guido van Rossumd624f182006-04-24 13:47:05 +0000737 const char *encoding = NULL;
738 const char *errors = NULL;
739 Py_ssize_t count;
740 PyObject *it;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000741 PyObject *(*iternext)(PyObject *);
742
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000743 if (Py_Size(self) != 0) {
Guido van Rossuma0867f72006-05-05 04:34:18 +0000744 /* Empty previous contents (yes, do this first of all!) */
745 if (PyBytes_Resize((PyObject *)self, 0) < 0)
746 return -1;
747 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000748
Guido van Rossumd624f182006-04-24 13:47:05 +0000749 /* Parse arguments */
750 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oss:bytes", kwlist,
751 &arg, &encoding, &errors))
752 return -1;
753
754 /* Make a quick exit if no first argument */
755 if (arg == NULL) {
756 if (encoding != NULL || errors != NULL) {
757 PyErr_SetString(PyExc_TypeError,
758 "encoding or errors without sequence argument");
759 return -1;
760 }
761 return 0;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000762 }
763
Guido van Rossumd624f182006-04-24 13:47:05 +0000764 if (PyUnicode_Check(arg)) {
765 /* Encode via the codec registry */
Guido van Rossum4355a472007-05-04 05:00:04 +0000766 PyObject *encoded, *new;
Guido van Rossuma74184e2007-08-29 04:05:57 +0000767 if (encoding == NULL) {
768 PyErr_SetString(PyExc_TypeError,
769 "string argument without an encoding");
770 return -1;
771 }
Guido van Rossumd624f182006-04-24 13:47:05 +0000772 encoded = PyCodec_Encode(arg, encoding, errors);
773 if (encoded == NULL)
774 return -1;
Guido van Rossum4355a472007-05-04 05:00:04 +0000775 if (!PyBytes_Check(encoded) && !PyString_Check(encoded)) {
Guido van Rossumd624f182006-04-24 13:47:05 +0000776 PyErr_Format(PyExc_TypeError,
Guido van Rossum4355a472007-05-04 05:00:04 +0000777 "encoder did not return a str8 or bytes object (type=%.400s)",
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000778 Py_Type(encoded)->tp_name);
Guido van Rossumd624f182006-04-24 13:47:05 +0000779 Py_DECREF(encoded);
780 return -1;
781 }
Guido van Rossuma74184e2007-08-29 04:05:57 +0000782 new = bytes_iconcat(self, encoded);
783 Py_DECREF(encoded);
784 if (new == NULL)
785 return -1;
786 Py_DECREF(new);
787 return 0;
Guido van Rossumd624f182006-04-24 13:47:05 +0000788 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000789
Guido van Rossumd624f182006-04-24 13:47:05 +0000790 /* If it's not unicode, there can't be encoding or errors */
791 if (encoding != NULL || errors != NULL) {
792 PyErr_SetString(PyExc_TypeError,
793 "encoding or errors without a string argument");
794 return -1;
795 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000796
Guido van Rossumd624f182006-04-24 13:47:05 +0000797 /* Is it an int? */
Thomas Woutersd204a712006-08-22 13:41:17 +0000798 count = PyNumber_AsSsize_t(arg, PyExc_ValueError);
Guido van Rossumd624f182006-04-24 13:47:05 +0000799 if (count == -1 && PyErr_Occurred())
800 PyErr_Clear();
801 else {
802 if (count < 0) {
803 PyErr_SetString(PyExc_ValueError, "negative count");
804 return -1;
805 }
806 if (count > 0) {
807 if (PyBytes_Resize((PyObject *)self, count))
808 return -1;
809 memset(self->ob_bytes, 0, count);
810 }
811 return 0;
812 }
Guido van Rossum75d38e92007-08-24 17:33:11 +0000813
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000814 /* Use the modern buffer interface */
815 if (PyObject_CheckBuffer(arg)) {
Guido van Rossumd624f182006-04-24 13:47:05 +0000816 Py_ssize_t size;
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000817 PyBuffer view;
818 if (PyObject_GetBuffer(arg, &view, PyBUF_FULL_RO) < 0)
Guido van Rossumd624f182006-04-24 13:47:05 +0000819 return -1;
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000820 size = view.len;
821 if (PyBytes_Resize((PyObject *)self, size) < 0) goto fail;
822 if (PyBuffer_ToContiguous(self->ob_bytes, &view, size, 'C') < 0)
823 goto fail;
824 PyObject_ReleaseBuffer(arg, &view);
Guido van Rossumd624f182006-04-24 13:47:05 +0000825 return 0;
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000826 fail:
827 PyObject_ReleaseBuffer(arg, &view);
828 return -1;
Guido van Rossumd624f182006-04-24 13:47:05 +0000829 }
830
831 /* XXX Optimize this if the arguments is a list, tuple */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000832
833 /* Get the iterator */
834 it = PyObject_GetIter(arg);
835 if (it == NULL)
Guido van Rossumd624f182006-04-24 13:47:05 +0000836 return -1;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000837 iternext = *Py_Type(it)->tp_iternext;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000838
839 /* Run the iterator to exhaustion */
840 for (;;) {
Guido van Rossumd624f182006-04-24 13:47:05 +0000841 PyObject *item;
842 Py_ssize_t value;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000843
Guido van Rossumd624f182006-04-24 13:47:05 +0000844 /* Get the next item */
845 item = iternext(it);
846 if (item == NULL) {
847 if (PyErr_Occurred()) {
848 if (!PyErr_ExceptionMatches(PyExc_StopIteration))
849 goto error;
850 PyErr_Clear();
851 }
852 break;
853 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000854
Guido van Rossumd624f182006-04-24 13:47:05 +0000855 /* Interpret it as an int (__index__) */
Thomas Woutersd204a712006-08-22 13:41:17 +0000856 value = PyNumber_AsSsize_t(item, PyExc_ValueError);
Guido van Rossumd624f182006-04-24 13:47:05 +0000857 Py_DECREF(item);
858 if (value == -1 && PyErr_Occurred())
859 goto error;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000860
Guido van Rossumd624f182006-04-24 13:47:05 +0000861 /* Range check */
862 if (value < 0 || value >= 256) {
863 PyErr_SetString(PyExc_ValueError,
864 "bytes must be in range(0, 256)");
865 goto error;
866 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000867
Guido van Rossumd624f182006-04-24 13:47:05 +0000868 /* Append the byte */
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000869 if (Py_Size(self) < self->ob_alloc)
870 Py_Size(self)++;
871 else if (PyBytes_Resize((PyObject *)self, Py_Size(self)+1) < 0)
Guido van Rossumd624f182006-04-24 13:47:05 +0000872 goto error;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000873 self->ob_bytes[Py_Size(self)-1] = value;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000874 }
875
876 /* Clean up and return success */
877 Py_DECREF(it);
878 return 0;
879
880 error:
881 /* Error handling when it != NULL */
882 Py_DECREF(it);
883 return -1;
884}
885
Georg Brandlee91be42007-02-24 19:41:35 +0000886/* Mostly copied from string_repr, but without the
887 "smart quote" functionality. */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000888static PyObject *
889bytes_repr(PyBytesObject *self)
890{
Walter Dörwald1ab83302007-05-18 17:15:44 +0000891 static const char *hexdigits = "0123456789abcdef";
Martin v. Löwis5d7428b2007-07-21 18:47:48 +0000892 size_t newsize = 3 + 4 * Py_Size(self);
Georg Brandlee91be42007-02-24 19:41:35 +0000893 PyObject *v;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000894 if (newsize > PY_SSIZE_T_MAX || newsize / 4 != Py_Size(self)) {
Georg Brandlee91be42007-02-24 19:41:35 +0000895 PyErr_SetString(PyExc_OverflowError,
896 "bytes object is too large to make repr");
Guido van Rossumd624f182006-04-24 13:47:05 +0000897 return NULL;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000898 }
Walter Dörwald1ab83302007-05-18 17:15:44 +0000899 v = PyUnicode_FromUnicode(NULL, newsize);
Georg Brandlee91be42007-02-24 19:41:35 +0000900 if (v == NULL) {
901 return NULL;
902 }
903 else {
904 register Py_ssize_t i;
Walter Dörwald1ab83302007-05-18 17:15:44 +0000905 register Py_UNICODE c;
906 register Py_UNICODE *p;
Georg Brandlee91be42007-02-24 19:41:35 +0000907 int quote = '\'';
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000908
Walter Dörwald1ab83302007-05-18 17:15:44 +0000909 p = PyUnicode_AS_UNICODE(v);
Georg Brandlee91be42007-02-24 19:41:35 +0000910 *p++ = 'b';
911 *p++ = quote;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000912 for (i = 0; i < Py_Size(self); i++) {
Georg Brandlee91be42007-02-24 19:41:35 +0000913 /* There's at least enough room for a hex escape
914 and a closing quote. */
Walter Dörwald1ab83302007-05-18 17:15:44 +0000915 assert(newsize - (p - PyUnicode_AS_UNICODE(v)) >= 5);
Georg Brandlee91be42007-02-24 19:41:35 +0000916 c = self->ob_bytes[i];
917 if (c == quote || c == '\\')
918 *p++ = '\\', *p++ = c;
919 else if (c == '\t')
920 *p++ = '\\', *p++ = 't';
921 else if (c == '\n')
922 *p++ = '\\', *p++ = 'n';
923 else if (c == '\r')
924 *p++ = '\\', *p++ = 'r';
925 else if (c == 0)
Guido van Rossum57b93ad2007-05-08 19:09:34 +0000926 *p++ = '\\', *p++ = 'x', *p++ = '0', *p++ = '0';
Georg Brandlee91be42007-02-24 19:41:35 +0000927 else if (c < ' ' || c >= 0x7f) {
Walter Dörwald1ab83302007-05-18 17:15:44 +0000928 *p++ = '\\';
929 *p++ = 'x';
930 *p++ = hexdigits[(c & 0xf0) >> 4];
931 *p++ = hexdigits[c & 0xf];
Georg Brandlee91be42007-02-24 19:41:35 +0000932 }
933 else
934 *p++ = c;
935 }
Walter Dörwald1ab83302007-05-18 17:15:44 +0000936 assert(newsize - (p - PyUnicode_AS_UNICODE(v)) >= 1);
Georg Brandlee91be42007-02-24 19:41:35 +0000937 *p++ = quote;
938 *p = '\0';
Walter Dörwald1ab83302007-05-18 17:15:44 +0000939 if (PyUnicode_Resize(&v, (p - PyUnicode_AS_UNICODE(v)))) {
940 Py_DECREF(v);
941 return NULL;
942 }
Georg Brandlee91be42007-02-24 19:41:35 +0000943 return v;
944 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000945}
946
947static PyObject *
Guido van Rossumd624f182006-04-24 13:47:05 +0000948bytes_str(PyBytesObject *self)
949{
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000950 return PyString_FromStringAndSize(self->ob_bytes, Py_Size(self));
Guido van Rossumd624f182006-04-24 13:47:05 +0000951}
952
953static PyObject *
Guido van Rossum343e97f2007-04-09 00:43:24 +0000954bytes_richcompare(PyObject *self, PyObject *other, int op)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000955{
Guido van Rossum343e97f2007-04-09 00:43:24 +0000956 Py_ssize_t self_size, other_size;
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000957 PyBuffer self_bytes, other_bytes;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000958 PyObject *res;
Guido van Rossum343e97f2007-04-09 00:43:24 +0000959 Py_ssize_t minsize;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000960 int cmp;
961
Jeremy Hylton18c3ff82007-08-29 18:47:16 +0000962 /* Bytes can be compared to anything that supports the (binary)
963 buffer API. Except that a comparison with Unicode is always an
964 error, even if the comparison is for equality. */
965 if (PyObject_IsInstance(self, (PyObject*)&PyUnicode_Type) ||
966 PyObject_IsInstance(other, (PyObject*)&PyUnicode_Type)) {
967 PyErr_SetString(PyExc_TypeError, "can't compare bytes and str");
968 return NULL;
969 }
Guido van Rossumebea9be2007-04-09 00:49:13 +0000970
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000971 self_size = _getbuffer(self, &self_bytes);
972 if (self_size < 0) {
Guido van Rossuma74184e2007-08-29 04:05:57 +0000973 PyErr_Clear();
Guido van Rossumebea9be2007-04-09 00:49:13 +0000974 Py_INCREF(Py_NotImplemented);
975 return Py_NotImplemented;
976 }
Guido van Rossum343e97f2007-04-09 00:43:24 +0000977
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000978 other_size = _getbuffer(other, &other_bytes);
979 if (other_size < 0) {
Guido van Rossuma74184e2007-08-29 04:05:57 +0000980 PyErr_Clear();
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000981 PyObject_ReleaseBuffer(self, &self_bytes);
Guido van Rossumd624f182006-04-24 13:47:05 +0000982 Py_INCREF(Py_NotImplemented);
983 return Py_NotImplemented;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000984 }
Guido van Rossum343e97f2007-04-09 00:43:24 +0000985
986 if (self_size != other_size && (op == Py_EQ || op == Py_NE)) {
Guido van Rossumd624f182006-04-24 13:47:05 +0000987 /* Shortcut: if the lengths differ, the objects differ */
988 cmp = (op == Py_NE);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000989 }
990 else {
Guido van Rossum343e97f2007-04-09 00:43:24 +0000991 minsize = self_size;
992 if (other_size < minsize)
993 minsize = other_size;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000994
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000995 cmp = memcmp(self_bytes.buf, other_bytes.buf, minsize);
Guido van Rossumd624f182006-04-24 13:47:05 +0000996 /* In ISO C, memcmp() guarantees to use unsigned bytes! */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000997
Guido van Rossumd624f182006-04-24 13:47:05 +0000998 if (cmp == 0) {
Guido van Rossum343e97f2007-04-09 00:43:24 +0000999 if (self_size < other_size)
Guido van Rossumd624f182006-04-24 13:47:05 +00001000 cmp = -1;
Guido van Rossum343e97f2007-04-09 00:43:24 +00001001 else if (self_size > other_size)
Guido van Rossumd624f182006-04-24 13:47:05 +00001002 cmp = 1;
1003 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00001004
Guido van Rossumd624f182006-04-24 13:47:05 +00001005 switch (op) {
1006 case Py_LT: cmp = cmp < 0; break;
1007 case Py_LE: cmp = cmp <= 0; break;
1008 case Py_EQ: cmp = cmp == 0; break;
1009 case Py_NE: cmp = cmp != 0; break;
1010 case Py_GT: cmp = cmp > 0; break;
1011 case Py_GE: cmp = cmp >= 0; break;
1012 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00001013 }
1014
1015 res = cmp ? Py_True : Py_False;
Travis E. Oliphantb99f7622007-08-18 11:21:56 +00001016 PyObject_ReleaseBuffer(self, &self_bytes);
Guido van Rossum75d38e92007-08-24 17:33:11 +00001017 PyObject_ReleaseBuffer(other, &other_bytes);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00001018 Py_INCREF(res);
1019 return res;
1020}
1021
1022static void
1023bytes_dealloc(PyBytesObject *self)
1024{
Guido van Rossumd624f182006-04-24 13:47:05 +00001025 if (self->ob_bytes != 0) {
1026 PyMem_Free(self->ob_bytes);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00001027 }
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001028 Py_Type(self)->tp_free((PyObject *)self);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00001029}
1030
Neal Norwitz6968b052007-02-27 19:02:19 +00001031
1032/* -------------------------------------------------------------------- */
1033/* Methods */
1034
1035#define STRINGLIB_CHAR char
1036#define STRINGLIB_CMP memcmp
1037#define STRINGLIB_LEN PyBytes_GET_SIZE
1038#define STRINGLIB_NEW PyBytes_FromStringAndSize
1039#define STRINGLIB_EMPTY nullbytes
1040
1041#include "stringlib/fastsearch.h"
1042#include "stringlib/count.h"
1043#include "stringlib/find.h"
1044#include "stringlib/partition.h"
1045
1046
1047/* The following Py_LOCAL_INLINE and Py_LOCAL functions
1048were copied from the old char* style string object. */
1049
1050Py_LOCAL_INLINE(void)
1051_adjust_indices(Py_ssize_t *start, Py_ssize_t *end, Py_ssize_t len)
1052{
1053 if (*end > len)
1054 *end = len;
1055 else if (*end < 0)
1056 *end += len;
1057 if (*end < 0)
1058 *end = 0;
1059 if (*start < 0)
1060 *start += len;
1061 if (*start < 0)
1062 *start = 0;
1063}
1064
1065
1066Py_LOCAL_INLINE(Py_ssize_t)
1067bytes_find_internal(PyBytesObject *self, PyObject *args, int dir)
1068{
1069 PyObject *subobj;
Guido van Rossum06b8b022007-08-31 13:48:41 +00001070 PyBuffer subbuf;
Neal Norwitz6968b052007-02-27 19:02:19 +00001071 Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
Guido van Rossum06b8b022007-08-31 13:48:41 +00001072 Py_ssize_t res;
Neal Norwitz6968b052007-02-27 19:02:19 +00001073
1074 if (!PyArg_ParseTuple(args, "O|O&O&:find/rfind/index/rindex", &subobj,
1075 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
1076 return -2;
Guido van Rossum06b8b022007-08-31 13:48:41 +00001077 if (_getbuffer(subobj, &subbuf) < 0)
Neal Norwitz6968b052007-02-27 19:02:19 +00001078 return -2;
Neal Norwitz6968b052007-02-27 19:02:19 +00001079 if (dir > 0)
Guido van Rossum06b8b022007-08-31 13:48:41 +00001080 res = stringlib_find_slice(
Neal Norwitz6968b052007-02-27 19:02:19 +00001081 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
Guido van Rossum06b8b022007-08-31 13:48:41 +00001082 subbuf.buf, subbuf.len, start, end);
Neal Norwitz6968b052007-02-27 19:02:19 +00001083 else
Guido van Rossum06b8b022007-08-31 13:48:41 +00001084 res = stringlib_rfind_slice(
Neal Norwitz6968b052007-02-27 19:02:19 +00001085 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
Guido van Rossum06b8b022007-08-31 13:48:41 +00001086 subbuf.buf, subbuf.len, start, end);
1087 PyObject_ReleaseBuffer(subobj, &subbuf);
1088 return res;
Neal Norwitz6968b052007-02-27 19:02:19 +00001089}
1090
1091
1092PyDoc_STRVAR(find__doc__,
1093"B.find(sub [,start [,end]]) -> int\n\
1094\n\
1095Return the lowest index in B where subsection sub is found,\n\
1096such that sub is contained within s[start,end]. Optional\n\
1097arguments start and end are interpreted as in slice notation.\n\
1098\n\
1099Return -1 on failure.");
1100
1101static PyObject *
1102bytes_find(PyBytesObject *self, PyObject *args)
1103{
1104 Py_ssize_t result = bytes_find_internal(self, args, +1);
1105 if (result == -2)
1106 return NULL;
1107 return PyInt_FromSsize_t(result);
1108}
1109
1110PyDoc_STRVAR(count__doc__,
1111"B.count(sub[, start[, end]]) -> int\n\
1112\n\
1113Return the number of non-overlapping occurrences of subsection sub in\n\
1114bytes B[start:end]. Optional arguments start and end are interpreted\n\
1115as in slice notation.");
1116
1117static PyObject *
1118bytes_count(PyBytesObject *self, PyObject *args)
1119{
1120 PyObject *sub_obj;
1121 const char *str = PyBytes_AS_STRING(self), *sub;
1122 Py_ssize_t sub_len;
1123 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
1124
1125 if (!PyArg_ParseTuple(args, "O|O&O&:count", &sub_obj,
1126 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
1127 return NULL;
1128
1129 if (PyBytes_Check(sub_obj)) {
1130 sub = PyBytes_AS_STRING(sub_obj);
1131 sub_len = PyBytes_GET_SIZE(sub_obj);
1132 }
Travis E. Oliphantb99f7622007-08-18 11:21:56 +00001133 /* XXX --> use the modern buffer interface */
Neal Norwitz6968b052007-02-27 19:02:19 +00001134 else if (PyObject_AsCharBuffer(sub_obj, &sub, &sub_len))
1135 return NULL;
1136
Martin v. Löwis5b222132007-06-10 09:51:05 +00001137 _adjust_indices(&start, &end, PyBytes_GET_SIZE(self));
Neal Norwitz6968b052007-02-27 19:02:19 +00001138
1139 return PyInt_FromSsize_t(
1140 stringlib_count(str + start, end - start, sub, sub_len)
1141 );
1142}
1143
1144
1145PyDoc_STRVAR(index__doc__,
1146"B.index(sub [,start [,end]]) -> int\n\
1147\n\
1148Like B.find() but raise ValueError when the subsection is not found.");
1149
1150static PyObject *
1151bytes_index(PyBytesObject *self, PyObject *args)
1152{
1153 Py_ssize_t result = bytes_find_internal(self, args, +1);
1154 if (result == -2)
1155 return NULL;
1156 if (result == -1) {
1157 PyErr_SetString(PyExc_ValueError,
1158 "subsection not found");
1159 return NULL;
1160 }
1161 return PyInt_FromSsize_t(result);
1162}
1163
1164
1165PyDoc_STRVAR(rfind__doc__,
1166"B.rfind(sub [,start [,end]]) -> int\n\
1167\n\
1168Return the highest index in B where subsection sub is found,\n\
1169such that sub is contained within s[start,end]. Optional\n\
1170arguments start and end are interpreted as in slice notation.\n\
1171\n\
1172Return -1 on failure.");
1173
1174static PyObject *
1175bytes_rfind(PyBytesObject *self, PyObject *args)
1176{
1177 Py_ssize_t result = bytes_find_internal(self, args, -1);
1178 if (result == -2)
1179 return NULL;
1180 return PyInt_FromSsize_t(result);
1181}
1182
1183
1184PyDoc_STRVAR(rindex__doc__,
1185"B.rindex(sub [,start [,end]]) -> int\n\
1186\n\
1187Like B.rfind() but raise ValueError when the subsection is not found.");
1188
1189static PyObject *
1190bytes_rindex(PyBytesObject *self, PyObject *args)
1191{
1192 Py_ssize_t result = bytes_find_internal(self, args, -1);
1193 if (result == -2)
1194 return NULL;
1195 if (result == -1) {
1196 PyErr_SetString(PyExc_ValueError,
1197 "subsection not found");
1198 return NULL;
1199 }
1200 return PyInt_FromSsize_t(result);
1201}
1202
1203
1204/* Matches the end (direction >= 0) or start (direction < 0) of self
1205 * against substr, using the start and end arguments. Returns
1206 * -1 on error, 0 if not found and 1 if found.
1207 */
1208Py_LOCAL(int)
1209_bytes_tailmatch(PyBytesObject *self, PyObject *substr, Py_ssize_t start,
1210 Py_ssize_t end, int direction)
1211{
1212 Py_ssize_t len = PyBytes_GET_SIZE(self);
1213 Py_ssize_t slen;
1214 const char* sub;
1215 const char* str;
1216
1217 if (PyBytes_Check(substr)) {
1218 sub = PyBytes_AS_STRING(substr);
1219 slen = PyBytes_GET_SIZE(substr);
1220 }
Travis E. Oliphantb99f7622007-08-18 11:21:56 +00001221 /* XXX --> Use the modern buffer interface */
Neal Norwitz6968b052007-02-27 19:02:19 +00001222 else if (PyObject_AsCharBuffer(substr, &sub, &slen))
1223 return -1;
1224 str = PyBytes_AS_STRING(self);
1225
1226 _adjust_indices(&start, &end, len);
1227
1228 if (direction < 0) {
1229 /* startswith */
1230 if (start+slen > len)
1231 return 0;
1232 } else {
1233 /* endswith */
1234 if (end-start < slen || start > len)
1235 return 0;
1236
1237 if (end-slen > start)
1238 start = end - slen;
1239 }
1240 if (end-start >= slen)
1241 return ! memcmp(str+start, sub, slen);
1242 return 0;
1243}
1244
1245
1246PyDoc_STRVAR(startswith__doc__,
1247"B.startswith(prefix[, start[, end]]) -> bool\n\
1248\n\
1249Return True if B starts with the specified prefix, False otherwise.\n\
1250With optional start, test B beginning at that position.\n\
1251With optional end, stop comparing B at that position.\n\
1252prefix can also be a tuple of strings to try.");
1253
1254static PyObject *
1255bytes_startswith(PyBytesObject *self, PyObject *args)
1256{
1257 Py_ssize_t start = 0;
1258 Py_ssize_t end = PY_SSIZE_T_MAX;
1259 PyObject *subobj;
1260 int result;
1261
1262 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
1263 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
1264 return NULL;
1265 if (PyTuple_Check(subobj)) {
1266 Py_ssize_t i;
1267 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
1268 result = _bytes_tailmatch(self,
1269 PyTuple_GET_ITEM(subobj, i),
1270 start, end, -1);
1271 if (result == -1)
1272 return NULL;
1273 else if (result) {
1274 Py_RETURN_TRUE;
1275 }
1276 }
1277 Py_RETURN_FALSE;
1278 }
1279 result = _bytes_tailmatch(self, subobj, start, end, -1);
1280 if (result == -1)
1281 return NULL;
1282 else
1283 return PyBool_FromLong(result);
1284}
1285
1286PyDoc_STRVAR(endswith__doc__,
1287"B.endswith(suffix[, start[, end]]) -> bool\n\
1288\n\
1289Return True if B ends with the specified suffix, False otherwise.\n\
1290With optional start, test B beginning at that position.\n\
1291With optional end, stop comparing B at that position.\n\
1292suffix can also be a tuple of strings to try.");
1293
1294static PyObject *
1295bytes_endswith(PyBytesObject *self, PyObject *args)
1296{
1297 Py_ssize_t start = 0;
1298 Py_ssize_t end = PY_SSIZE_T_MAX;
1299 PyObject *subobj;
1300 int result;
1301
1302 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
1303 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
1304 return NULL;
1305 if (PyTuple_Check(subobj)) {
1306 Py_ssize_t i;
1307 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
1308 result = _bytes_tailmatch(self,
1309 PyTuple_GET_ITEM(subobj, i),
1310 start, end, +1);
1311 if (result == -1)
1312 return NULL;
1313 else if (result) {
1314 Py_RETURN_TRUE;
1315 }
1316 }
1317 Py_RETURN_FALSE;
1318 }
1319 result = _bytes_tailmatch(self, subobj, start, end, +1);
1320 if (result == -1)
1321 return NULL;
1322 else
1323 return PyBool_FromLong(result);
1324}
1325
1326
1327
1328PyDoc_STRVAR(translate__doc__,
1329"B.translate(table [,deletechars]) -> bytes\n\
1330\n\
1331Return a copy of the bytes B, where all characters occurring\n\
1332in the optional argument deletechars are removed, and the\n\
1333remaining characters have been mapped through the given\n\
1334translation table, which must be a bytes of length 256.");
1335
1336static PyObject *
1337bytes_translate(PyBytesObject *self, PyObject *args)
1338{
1339 register char *input, *output;
1340 register const char *table;
1341 register Py_ssize_t i, c, changed = 0;
1342 PyObject *input_obj = (PyObject*)self;
1343 const char *table1, *output_start, *del_table=NULL;
1344 Py_ssize_t inlen, tablen, dellen = 0;
1345 PyObject *result;
1346 int trans_table[256];
1347 PyObject *tableobj, *delobj = NULL;
1348
1349 if (!PyArg_UnpackTuple(args, "translate", 1, 2,
1350 &tableobj, &delobj))
1351 return NULL;
1352
1353 if (PyBytes_Check(tableobj)) {
1354 table1 = PyBytes_AS_STRING(tableobj);
1355 tablen = PyBytes_GET_SIZE(tableobj);
1356 }
Travis E. Oliphantb99f7622007-08-18 11:21:56 +00001357 /* XXX -> Use the modern buffer interface */
Neal Norwitz6968b052007-02-27 19:02:19 +00001358 else if (PyObject_AsCharBuffer(tableobj, &table1, &tablen))
1359 return NULL;
1360
1361 if (tablen != 256) {
1362 PyErr_SetString(PyExc_ValueError,
1363 "translation table must be 256 characters long");
1364 return NULL;
1365 }
1366
1367 if (delobj != NULL) {
1368 if (PyBytes_Check(delobj)) {
1369 del_table = PyBytes_AS_STRING(delobj);
1370 dellen = PyBytes_GET_SIZE(delobj);
1371 }
Travis E. Oliphantb99f7622007-08-18 11:21:56 +00001372 /* XXX -> use the modern buffer interface */
Neal Norwitz6968b052007-02-27 19:02:19 +00001373 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
1374 return NULL;
1375 }
1376 else {
1377 del_table = NULL;
1378 dellen = 0;
1379 }
1380
1381 table = table1;
1382 inlen = PyBytes_GET_SIZE(input_obj);
1383 result = PyBytes_FromStringAndSize((char *)NULL, inlen);
1384 if (result == NULL)
1385 return NULL;
1386 output_start = output = PyBytes_AsString(result);
1387 input = PyBytes_AS_STRING(input_obj);
1388
1389 if (dellen == 0) {
1390 /* If no deletions are required, use faster code */
1391 for (i = inlen; --i >= 0; ) {
1392 c = Py_CHARMASK(*input++);
1393 if (Py_CHARMASK((*output++ = table[c])) != c)
1394 changed = 1;
1395 }
1396 if (changed || !PyBytes_CheckExact(input_obj))
1397 return result;
1398 Py_DECREF(result);
1399 Py_INCREF(input_obj);
1400 return input_obj;
1401 }
1402
1403 for (i = 0; i < 256; i++)
1404 trans_table[i] = Py_CHARMASK(table[i]);
1405
1406 for (i = 0; i < dellen; i++)
1407 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
1408
1409 for (i = inlen; --i >= 0; ) {
1410 c = Py_CHARMASK(*input++);
1411 if (trans_table[c] != -1)
1412 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
1413 continue;
1414 changed = 1;
1415 }
1416 if (!changed && PyBytes_CheckExact(input_obj)) {
1417 Py_DECREF(result);
1418 Py_INCREF(input_obj);
1419 return input_obj;
1420 }
1421 /* Fix the size of the resulting string */
1422 if (inlen > 0)
1423 PyBytes_Resize(result, output - output_start);
1424 return result;
1425}
1426
1427
1428#define FORWARD 1
1429#define REVERSE -1
1430
1431/* find and count characters and substrings */
1432
1433#define findchar(target, target_len, c) \
1434 ((char *)memchr((const void *)(target), c, target_len))
1435
1436/* Don't call if length < 2 */
1437#define Py_STRING_MATCH(target, offset, pattern, length) \
1438 (target[offset] == pattern[0] && \
1439 target[offset+length-1] == pattern[length-1] && \
1440 !memcmp(target+offset+1, pattern+1, length-2) )
1441
1442
1443/* Bytes ops must return a string. */
1444/* If the object is subclass of bytes, create a copy */
1445Py_LOCAL(PyBytesObject *)
1446return_self(PyBytesObject *self)
1447{
1448 if (PyBytes_CheckExact(self)) {
1449 Py_INCREF(self);
1450 return (PyBytesObject *)self;
1451 }
1452 return (PyBytesObject *)PyBytes_FromStringAndSize(
1453 PyBytes_AS_STRING(self),
1454 PyBytes_GET_SIZE(self));
1455}
1456
1457Py_LOCAL_INLINE(Py_ssize_t)
1458countchar(const char *target, int target_len, char c, Py_ssize_t maxcount)
1459{
1460 Py_ssize_t count=0;
1461 const char *start=target;
1462 const char *end=target+target_len;
1463
1464 while ( (start=findchar(start, end-start, c)) != NULL ) {
1465 count++;
1466 if (count >= maxcount)
1467 break;
1468 start += 1;
1469 }
1470 return count;
1471}
1472
1473Py_LOCAL(Py_ssize_t)
1474findstring(const char *target, Py_ssize_t target_len,
1475 const char *pattern, Py_ssize_t pattern_len,
1476 Py_ssize_t start,
1477 Py_ssize_t end,
1478 int direction)
1479{
1480 if (start < 0) {
1481 start += target_len;
1482 if (start < 0)
1483 start = 0;
1484 }
1485 if (end > target_len) {
1486 end = target_len;
1487 } else if (end < 0) {
1488 end += target_len;
1489 if (end < 0)
1490 end = 0;
1491 }
1492
1493 /* zero-length substrings always match at the first attempt */
1494 if (pattern_len == 0)
1495 return (direction > 0) ? start : end;
1496
1497 end -= pattern_len;
1498
1499 if (direction < 0) {
1500 for (; end >= start; end--)
1501 if (Py_STRING_MATCH(target, end, pattern, pattern_len))
1502 return end;
1503 } else {
1504 for (; start <= end; start++)
1505 if (Py_STRING_MATCH(target, start, pattern, pattern_len))
1506 return start;
1507 }
1508 return -1;
1509}
1510
1511Py_LOCAL_INLINE(Py_ssize_t)
1512countstring(const char *target, Py_ssize_t target_len,
1513 const char *pattern, Py_ssize_t pattern_len,
1514 Py_ssize_t start,
1515 Py_ssize_t end,
1516 int direction, Py_ssize_t maxcount)
1517{
1518 Py_ssize_t count=0;
1519
1520 if (start < 0) {
1521 start += target_len;
1522 if (start < 0)
1523 start = 0;
1524 }
1525 if (end > target_len) {
1526 end = target_len;
1527 } else if (end < 0) {
1528 end += target_len;
1529 if (end < 0)
1530 end = 0;
1531 }
1532
1533 /* zero-length substrings match everywhere */
1534 if (pattern_len == 0 || maxcount == 0) {
1535 if (target_len+1 < maxcount)
1536 return target_len+1;
1537 return maxcount;
1538 }
1539
1540 end -= pattern_len;
1541 if (direction < 0) {
1542 for (; (end >= start); end--)
1543 if (Py_STRING_MATCH(target, end, pattern, pattern_len)) {
1544 count++;
1545 if (--maxcount <= 0) break;
1546 end -= pattern_len-1;
1547 }
1548 } else {
1549 for (; (start <= end); start++)
1550 if (Py_STRING_MATCH(target, start, pattern, pattern_len)) {
1551 count++;
1552 if (--maxcount <= 0)
1553 break;
1554 start += pattern_len-1;
1555 }
1556 }
1557 return count;
1558}
1559
1560
1561/* Algorithms for different cases of string replacement */
1562
1563/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
1564Py_LOCAL(PyBytesObject *)
1565replace_interleave(PyBytesObject *self,
1566 const char *to_s, Py_ssize_t to_len,
1567 Py_ssize_t maxcount)
1568{
1569 char *self_s, *result_s;
1570 Py_ssize_t self_len, result_len;
1571 Py_ssize_t count, i, product;
1572 PyBytesObject *result;
1573
1574 self_len = PyBytes_GET_SIZE(self);
1575
1576 /* 1 at the end plus 1 after every character */
1577 count = self_len+1;
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +00001578 if (maxcount < count)
Neal Norwitz6968b052007-02-27 19:02:19 +00001579 count = maxcount;
1580
1581 /* Check for overflow */
1582 /* result_len = count * to_len + self_len; */
1583 product = count * to_len;
1584 if (product / to_len != count) {
1585 PyErr_SetString(PyExc_OverflowError,
1586 "replace string is too long");
1587 return NULL;
1588 }
1589 result_len = product + self_len;
1590 if (result_len < 0) {
1591 PyErr_SetString(PyExc_OverflowError,
1592 "replace string is too long");
1593 return NULL;
1594 }
1595
1596 if (! (result = (PyBytesObject *)
1597 PyBytes_FromStringAndSize(NULL, result_len)) )
1598 return NULL;
1599
1600 self_s = PyBytes_AS_STRING(self);
1601 result_s = PyBytes_AS_STRING(result);
1602
1603 /* TODO: special case single character, which doesn't need memcpy */
1604
1605 /* Lay the first one down (guaranteed this will occur) */
1606 Py_MEMCPY(result_s, to_s, to_len);
1607 result_s += to_len;
1608 count -= 1;
1609
1610 for (i=0; i<count; i++) {
1611 *result_s++ = *self_s++;
1612 Py_MEMCPY(result_s, to_s, to_len);
1613 result_s += to_len;
1614 }
1615
1616 /* Copy the rest of the original string */
1617 Py_MEMCPY(result_s, self_s, self_len-i);
1618
1619 return result;
1620}
1621
1622/* Special case for deleting a single character */
1623/* len(self)>=1, len(from)==1, to="", maxcount>=1 */
1624Py_LOCAL(PyBytesObject *)
1625replace_delete_single_character(PyBytesObject *self,
1626 char from_c, Py_ssize_t maxcount)
1627{
1628 char *self_s, *result_s;
1629 char *start, *next, *end;
1630 Py_ssize_t self_len, result_len;
1631 Py_ssize_t count;
1632 PyBytesObject *result;
1633
1634 self_len = PyBytes_GET_SIZE(self);
1635 self_s = PyBytes_AS_STRING(self);
1636
1637 count = countchar(self_s, self_len, from_c, maxcount);
1638 if (count == 0) {
1639 return return_self(self);
1640 }
1641
1642 result_len = self_len - count; /* from_len == 1 */
1643 assert(result_len>=0);
1644
1645 if ( (result = (PyBytesObject *)
1646 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
1647 return NULL;
1648 result_s = PyBytes_AS_STRING(result);
1649
1650 start = self_s;
1651 end = self_s + self_len;
1652 while (count-- > 0) {
1653 next = findchar(start, end-start, from_c);
1654 if (next == NULL)
1655 break;
1656 Py_MEMCPY(result_s, start, next-start);
1657 result_s += (next-start);
1658 start = next+1;
1659 }
1660 Py_MEMCPY(result_s, start, end-start);
1661
1662 return result;
1663}
1664
1665/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
1666
1667Py_LOCAL(PyBytesObject *)
1668replace_delete_substring(PyBytesObject *self,
1669 const char *from_s, Py_ssize_t from_len,
1670 Py_ssize_t maxcount)
1671{
1672 char *self_s, *result_s;
1673 char *start, *next, *end;
1674 Py_ssize_t self_len, result_len;
1675 Py_ssize_t count, offset;
1676 PyBytesObject *result;
1677
1678 self_len = PyBytes_GET_SIZE(self);
1679 self_s = PyBytes_AS_STRING(self);
1680
1681 count = countstring(self_s, self_len,
1682 from_s, from_len,
1683 0, self_len, 1,
1684 maxcount);
1685
1686 if (count == 0) {
1687 /* no matches */
1688 return return_self(self);
1689 }
1690
1691 result_len = self_len - (count * from_len);
1692 assert (result_len>=0);
1693
1694 if ( (result = (PyBytesObject *)
1695 PyBytes_FromStringAndSize(NULL, result_len)) == NULL )
1696 return NULL;
1697
1698 result_s = PyBytes_AS_STRING(result);
1699
1700 start = self_s;
1701 end = self_s + self_len;
1702 while (count-- > 0) {
1703 offset = findstring(start, end-start,
1704 from_s, from_len,
1705 0, end-start, FORWARD);
1706 if (offset == -1)
1707 break;
1708 next = start + offset;
1709
1710 Py_MEMCPY(result_s, start, next-start);
1711
1712 result_s += (next-start);
1713 start = next+from_len;
1714 }
1715 Py_MEMCPY(result_s, start, end-start);
1716 return result;
1717}
1718
1719/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
1720Py_LOCAL(PyBytesObject *)
1721replace_single_character_in_place(PyBytesObject *self,
1722 char from_c, char to_c,
1723 Py_ssize_t maxcount)
1724{
1725 char *self_s, *result_s, *start, *end, *next;
1726 Py_ssize_t self_len;
1727 PyBytesObject *result;
1728
1729 /* The result string will be the same size */
1730 self_s = PyBytes_AS_STRING(self);
1731 self_len = PyBytes_GET_SIZE(self);
1732
1733 next = findchar(self_s, self_len, from_c);
1734
1735 if (next == NULL) {
1736 /* No matches; return the original bytes */
1737 return return_self(self);
1738 }
1739
1740 /* Need to make a new bytes */
1741 result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, self_len);
1742 if (result == NULL)
1743 return NULL;
1744 result_s = PyBytes_AS_STRING(result);
1745 Py_MEMCPY(result_s, self_s, self_len);
1746
1747 /* change everything in-place, starting with this one */
1748 start = result_s + (next-self_s);
1749 *start = to_c;
1750 start++;
1751 end = result_s + self_len;
1752
1753 while (--maxcount > 0) {
1754 next = findchar(start, end-start, from_c);
1755 if (next == NULL)
1756 break;
1757 *next = to_c;
1758 start = next+1;
1759 }
1760
1761 return result;
1762}
1763
1764/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
1765Py_LOCAL(PyBytesObject *)
1766replace_substring_in_place(PyBytesObject *self,
1767 const char *from_s, Py_ssize_t from_len,
1768 const char *to_s, Py_ssize_t to_len,
1769 Py_ssize_t maxcount)
1770{
1771 char *result_s, *start, *end;
1772 char *self_s;
1773 Py_ssize_t self_len, offset;
1774 PyBytesObject *result;
1775
1776 /* The result bytes will be the same size */
1777
1778 self_s = PyBytes_AS_STRING(self);
1779 self_len = PyBytes_GET_SIZE(self);
1780
1781 offset = findstring(self_s, self_len,
1782 from_s, from_len,
1783 0, self_len, FORWARD);
1784 if (offset == -1) {
1785 /* No matches; return the original bytes */
1786 return return_self(self);
1787 }
1788
1789 /* Need to make a new bytes */
1790 result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, self_len);
1791 if (result == NULL)
1792 return NULL;
1793 result_s = PyBytes_AS_STRING(result);
1794 Py_MEMCPY(result_s, self_s, self_len);
1795
1796 /* change everything in-place, starting with this one */
1797 start = result_s + offset;
1798 Py_MEMCPY(start, to_s, from_len);
1799 start += from_len;
1800 end = result_s + self_len;
1801
1802 while ( --maxcount > 0) {
1803 offset = findstring(start, end-start,
1804 from_s, from_len,
1805 0, end-start, FORWARD);
1806 if (offset==-1)
1807 break;
1808 Py_MEMCPY(start+offset, to_s, from_len);
1809 start += offset+from_len;
1810 }
1811
1812 return result;
1813}
1814
1815/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
1816Py_LOCAL(PyBytesObject *)
1817replace_single_character(PyBytesObject *self,
1818 char from_c,
1819 const char *to_s, Py_ssize_t to_len,
1820 Py_ssize_t maxcount)
1821{
1822 char *self_s, *result_s;
1823 char *start, *next, *end;
1824 Py_ssize_t self_len, result_len;
1825 Py_ssize_t count, product;
1826 PyBytesObject *result;
1827
1828 self_s = PyBytes_AS_STRING(self);
1829 self_len = PyBytes_GET_SIZE(self);
1830
1831 count = countchar(self_s, self_len, from_c, maxcount);
1832 if (count == 0) {
1833 /* no matches, return unchanged */
1834 return return_self(self);
1835 }
1836
1837 /* use the difference between current and new, hence the "-1" */
1838 /* result_len = self_len + count * (to_len-1) */
1839 product = count * (to_len-1);
1840 if (product / (to_len-1) != count) {
1841 PyErr_SetString(PyExc_OverflowError, "replace bytes is too long");
1842 return NULL;
1843 }
1844 result_len = self_len + product;
1845 if (result_len < 0) {
1846 PyErr_SetString(PyExc_OverflowError, "replace bytes is too long");
1847 return NULL;
1848 }
1849
1850 if ( (result = (PyBytesObject *)
1851 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
1852 return NULL;
1853 result_s = PyBytes_AS_STRING(result);
1854
1855 start = self_s;
1856 end = self_s + self_len;
1857 while (count-- > 0) {
1858 next = findchar(start, end-start, from_c);
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +00001859 if (next == NULL)
Neal Norwitz6968b052007-02-27 19:02:19 +00001860 break;
1861
1862 if (next == start) {
1863 /* replace with the 'to' */
1864 Py_MEMCPY(result_s, to_s, to_len);
1865 result_s += to_len;
1866 start += 1;
1867 } else {
1868 /* copy the unchanged old then the 'to' */
1869 Py_MEMCPY(result_s, start, next-start);
1870 result_s += (next-start);
1871 Py_MEMCPY(result_s, to_s, to_len);
1872 result_s += to_len;
1873 start = next+1;
1874 }
1875 }
1876 /* Copy the remainder of the remaining bytes */
1877 Py_MEMCPY(result_s, start, end-start);
1878
1879 return result;
1880}
1881
1882/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
1883Py_LOCAL(PyBytesObject *)
1884replace_substring(PyBytesObject *self,
1885 const char *from_s, Py_ssize_t from_len,
1886 const char *to_s, Py_ssize_t to_len,
1887 Py_ssize_t maxcount)
1888{
1889 char *self_s, *result_s;
1890 char *start, *next, *end;
1891 Py_ssize_t self_len, result_len;
1892 Py_ssize_t count, offset, product;
1893 PyBytesObject *result;
1894
1895 self_s = PyBytes_AS_STRING(self);
1896 self_len = PyBytes_GET_SIZE(self);
1897
1898 count = countstring(self_s, self_len,
1899 from_s, from_len,
1900 0, self_len, FORWARD, maxcount);
1901 if (count == 0) {
1902 /* no matches, return unchanged */
1903 return return_self(self);
1904 }
1905
1906 /* Check for overflow */
1907 /* result_len = self_len + count * (to_len-from_len) */
1908 product = count * (to_len-from_len);
1909 if (product / (to_len-from_len) != count) {
1910 PyErr_SetString(PyExc_OverflowError, "replace bytes is too long");
1911 return NULL;
1912 }
1913 result_len = self_len + product;
1914 if (result_len < 0) {
1915 PyErr_SetString(PyExc_OverflowError, "replace bytes is too long");
1916 return NULL;
1917 }
1918
1919 if ( (result = (PyBytesObject *)
1920 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
1921 return NULL;
1922 result_s = PyBytes_AS_STRING(result);
1923
1924 start = self_s;
1925 end = self_s + self_len;
1926 while (count-- > 0) {
1927 offset = findstring(start, end-start,
1928 from_s, from_len,
1929 0, end-start, FORWARD);
1930 if (offset == -1)
1931 break;
1932 next = start+offset;
1933 if (next == start) {
1934 /* replace with the 'to' */
1935 Py_MEMCPY(result_s, to_s, to_len);
1936 result_s += to_len;
1937 start += from_len;
1938 } else {
1939 /* copy the unchanged old then the 'to' */
1940 Py_MEMCPY(result_s, start, next-start);
1941 result_s += (next-start);
1942 Py_MEMCPY(result_s, to_s, to_len);
1943 result_s += to_len;
1944 start = next+from_len;
1945 }
1946 }
1947 /* Copy the remainder of the remaining bytes */
1948 Py_MEMCPY(result_s, start, end-start);
1949
1950 return result;
1951}
1952
1953
1954Py_LOCAL(PyBytesObject *)
1955replace(PyBytesObject *self,
1956 const char *from_s, Py_ssize_t from_len,
1957 const char *to_s, Py_ssize_t to_len,
1958 Py_ssize_t maxcount)
1959{
1960 if (maxcount < 0) {
1961 maxcount = PY_SSIZE_T_MAX;
1962 } else if (maxcount == 0 || PyBytes_GET_SIZE(self) == 0) {
1963 /* nothing to do; return the original bytes */
1964 return return_self(self);
1965 }
1966
1967 if (maxcount == 0 ||
1968 (from_len == 0 && to_len == 0)) {
1969 /* nothing to do; return the original bytes */
1970 return return_self(self);
1971 }
1972
1973 /* Handle zero-length special cases */
1974
1975 if (from_len == 0) {
1976 /* insert the 'to' bytes everywhere. */
1977 /* >>> "Python".replace("", ".") */
1978 /* '.P.y.t.h.o.n.' */
1979 return replace_interleave(self, to_s, to_len, maxcount);
1980 }
1981
1982 /* Except for "".replace("", "A") == "A" there is no way beyond this */
1983 /* point for an empty self bytes to generate a non-empty bytes */
1984 /* Special case so the remaining code always gets a non-empty bytes */
1985 if (PyBytes_GET_SIZE(self) == 0) {
1986 return return_self(self);
1987 }
1988
1989 if (to_len == 0) {
1990 /* delete all occurances of 'from' bytes */
1991 if (from_len == 1) {
1992 return replace_delete_single_character(
1993 self, from_s[0], maxcount);
1994 } else {
1995 return replace_delete_substring(self, from_s, from_len, maxcount);
1996 }
1997 }
1998
1999 /* Handle special case where both bytes have the same length */
2000
2001 if (from_len == to_len) {
2002 if (from_len == 1) {
2003 return replace_single_character_in_place(
2004 self,
2005 from_s[0],
2006 to_s[0],
2007 maxcount);
2008 } else {
2009 return replace_substring_in_place(
2010 self, from_s, from_len, to_s, to_len, maxcount);
2011 }
2012 }
2013
2014 /* Otherwise use the more generic algorithms */
2015 if (from_len == 1) {
2016 return replace_single_character(self, from_s[0],
2017 to_s, to_len, maxcount);
2018 } else {
2019 /* len('from')>=2, len('to')>=1 */
2020 return replace_substring(self, from_s, from_len, to_s, to_len, maxcount);
2021 }
2022}
2023
2024PyDoc_STRVAR(replace__doc__,
2025"B.replace (old, new[, count]) -> bytes\n\
2026\n\
2027Return a copy of bytes B with all occurrences of subsection\n\
2028old replaced by new. If the optional argument count is\n\
2029given, only the first count occurrences are replaced.");
2030
2031static PyObject *
2032bytes_replace(PyBytesObject *self, PyObject *args)
2033{
2034 Py_ssize_t count = -1;
Travis E. Oliphantb99f7622007-08-18 11:21:56 +00002035 PyObject *from, *to, *res;
Travis E. Oliphantb99f7622007-08-18 11:21:56 +00002036 PyBuffer vfrom, vto;
Neal Norwitz6968b052007-02-27 19:02:19 +00002037
2038 if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
2039 return NULL;
2040
Guido van Rossuma74184e2007-08-29 04:05:57 +00002041 if (_getbuffer(from, &vfrom) < 0)
2042 return NULL;
2043 if (_getbuffer(to, &vto) < 0) {
2044 PyObject_ReleaseBuffer(from, &vfrom);
2045 return NULL;
Travis E. Oliphantb99f7622007-08-18 11:21:56 +00002046 }
Neal Norwitz6968b052007-02-27 19:02:19 +00002047
Travis E. Oliphantb99f7622007-08-18 11:21:56 +00002048 res = (PyObject *)replace((PyBytesObject *) self,
Guido van Rossuma74184e2007-08-29 04:05:57 +00002049 vfrom.buf, vfrom.len,
2050 vto.buf, vto.len, count);
Travis E. Oliphantb99f7622007-08-18 11:21:56 +00002051
Guido van Rossuma74184e2007-08-29 04:05:57 +00002052 PyObject_ReleaseBuffer(from, &vfrom);
2053 PyObject_ReleaseBuffer(to, &vto);
Travis E. Oliphantb99f7622007-08-18 11:21:56 +00002054 return res;
Neal Norwitz6968b052007-02-27 19:02:19 +00002055}
2056
2057
2058/* Overallocate the initial list to reduce the number of reallocs for small
2059 split sizes. Eg, "A A A A A A A A A A".split() (10 elements) has three
2060 resizes, to sizes 4, 8, then 16. Most observed string splits are for human
2061 text (roughly 11 words per line) and field delimited data (usually 1-10
2062 fields). For large strings the split algorithms are bandwidth limited
2063 so increasing the preallocation likely will not improve things.*/
2064
2065#define MAX_PREALLOC 12
2066
2067/* 5 splits gives 6 elements */
2068#define PREALLOC_SIZE(maxsplit) \
2069 (maxsplit >= MAX_PREALLOC ? MAX_PREALLOC : maxsplit+1)
2070
2071#define SPLIT_APPEND(data, left, right) \
2072 str = PyBytes_FromStringAndSize((data) + (left), \
2073 (right) - (left)); \
2074 if (str == NULL) \
2075 goto onError; \
2076 if (PyList_Append(list, str)) { \
2077 Py_DECREF(str); \
2078 goto onError; \
2079 } \
2080 else \
2081 Py_DECREF(str);
2082
2083#define SPLIT_ADD(data, left, right) { \
2084 str = PyBytes_FromStringAndSize((data) + (left), \
2085 (right) - (left)); \
2086 if (str == NULL) \
2087 goto onError; \
2088 if (count < MAX_PREALLOC) { \
2089 PyList_SET_ITEM(list, count, str); \
2090 } else { \
2091 if (PyList_Append(list, str)) { \
2092 Py_DECREF(str); \
2093 goto onError; \
2094 } \
2095 else \
2096 Py_DECREF(str); \
2097 } \
2098 count++; }
2099
2100/* Always force the list to the expected size. */
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002101#define FIX_PREALLOC_SIZE(list) Py_Size(list) = count
Neal Norwitz6968b052007-02-27 19:02:19 +00002102
2103
2104Py_LOCAL_INLINE(PyObject *)
2105split_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount)
2106{
Guido van Rossum8f950672007-09-10 16:53:45 +00002107 register Py_ssize_t i, j, count = 0;
Neal Norwitz6968b052007-02-27 19:02:19 +00002108 PyObject *str;
2109 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
2110
2111 if (list == NULL)
2112 return NULL;
2113
2114 i = j = 0;
2115 while ((j < len) && (maxcount-- > 0)) {
Guido van Rossum8f950672007-09-10 16:53:45 +00002116 for(; j < len; j++) {
Neal Norwitz6968b052007-02-27 19:02:19 +00002117 /* I found that using memchr makes no difference */
2118 if (s[j] == ch) {
2119 SPLIT_ADD(s, i, j);
2120 i = j = j + 1;
2121 break;
2122 }
2123 }
2124 }
2125 if (i <= len) {
2126 SPLIT_ADD(s, i, len);
2127 }
2128 FIX_PREALLOC_SIZE(list);
2129 return list;
2130
2131 onError:
2132 Py_DECREF(list);
2133 return NULL;
2134}
2135
Guido van Rossum8f950672007-09-10 16:53:45 +00002136#define ISSPACE(c) (isspace(Py_CHARMASK(c)) && ((c) & 0x80) == 0)
2137
2138Py_LOCAL_INLINE(PyObject *)
2139split_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxcount)
2140{
2141 register Py_ssize_t i, j, count = 0;
2142 PyObject *str;
2143 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
2144
2145 if (list == NULL)
2146 return NULL;
2147
2148 for (i = j = 0; i < len; ) {
2149 /* find a token */
2150 while (i < len && ISSPACE(s[i]))
2151 i++;
2152 j = i;
2153 while (i < len && !ISSPACE(s[i]))
2154 i++;
2155 if (j < i) {
2156 if (maxcount-- <= 0)
2157 break;
2158 SPLIT_ADD(s, j, i);
2159 while (i < len && ISSPACE(s[i]))
2160 i++;
2161 j = i;
2162 }
2163 }
2164 if (j < len) {
2165 SPLIT_ADD(s, j, len);
2166 }
2167 FIX_PREALLOC_SIZE(list);
2168 return list;
2169
2170 onError:
2171 Py_DECREF(list);
2172 return NULL;
2173}
2174
Neal Norwitz6968b052007-02-27 19:02:19 +00002175PyDoc_STRVAR(split__doc__,
Guido van Rossum8f950672007-09-10 16:53:45 +00002176"B.split([sep [, maxsplit]]) -> list of bytes\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00002177\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00002178Return a list of the bytes in the string B, using sep as the delimiter.\n\
2179If sep is not given, B is split on ASCII whitespace charcters\n\
2180(space, tab, return, newline, formfeed, vertical tab).\n\
2181If maxsplit is given, at most maxsplit splits are done.");
Neal Norwitz6968b052007-02-27 19:02:19 +00002182
2183static PyObject *
2184bytes_split(PyBytesObject *self, PyObject *args)
2185{
2186 Py_ssize_t len = PyBytes_GET_SIZE(self), n, i, j;
Guido van Rossum8f950672007-09-10 16:53:45 +00002187 Py_ssize_t maxsplit = -1, count = 0;
Neal Norwitz6968b052007-02-27 19:02:19 +00002188 const char *s = PyBytes_AS_STRING(self), *sub;
Guido van Rossum8f950672007-09-10 16:53:45 +00002189 PyObject *list, *str, *subobj = Py_None;
2190 PyBuffer vsub;
Neal Norwitz6968b052007-02-27 19:02:19 +00002191#ifdef USE_FAST
2192 Py_ssize_t pos;
2193#endif
2194
Guido van Rossum8f950672007-09-10 16:53:45 +00002195 if (!PyArg_ParseTuple(args, "|On:split", &subobj, &maxsplit))
Neal Norwitz6968b052007-02-27 19:02:19 +00002196 return NULL;
2197 if (maxsplit < 0)
2198 maxsplit = PY_SSIZE_T_MAX;
Guido van Rossum8f950672007-09-10 16:53:45 +00002199
2200 if (subobj == Py_None)
2201 return split_whitespace(s, len, maxsplit);
2202
2203 if (_getbuffer(subobj, &vsub) < 0)
Neal Norwitz6968b052007-02-27 19:02:19 +00002204 return NULL;
Guido van Rossum8f950672007-09-10 16:53:45 +00002205 sub = vsub.buf;
2206 n = vsub.len;
Neal Norwitz6968b052007-02-27 19:02:19 +00002207
2208 if (n == 0) {
2209 PyErr_SetString(PyExc_ValueError, "empty separator");
Guido van Rossum8f950672007-09-10 16:53:45 +00002210 PyObject_ReleaseBuffer(subobj, &vsub);
Neal Norwitz6968b052007-02-27 19:02:19 +00002211 return NULL;
2212 }
Guido van Rossum8f950672007-09-10 16:53:45 +00002213 if (n == 1)
Neal Norwitz6968b052007-02-27 19:02:19 +00002214 return split_char(s, len, sub[0], maxsplit);
2215
2216 list = PyList_New(PREALLOC_SIZE(maxsplit));
Guido van Rossum8f950672007-09-10 16:53:45 +00002217 if (list == NULL) {
2218 PyObject_ReleaseBuffer(subobj, &vsub);
Neal Norwitz6968b052007-02-27 19:02:19 +00002219 return NULL;
Guido van Rossum8f950672007-09-10 16:53:45 +00002220 }
Neal Norwitz6968b052007-02-27 19:02:19 +00002221
2222#ifdef USE_FAST
2223 i = j = 0;
2224 while (maxsplit-- > 0) {
2225 pos = fastsearch(s+i, len-i, sub, n, FAST_SEARCH);
2226 if (pos < 0)
2227 break;
2228 j = i+pos;
2229 SPLIT_ADD(s, i, j);
2230 i = j + n;
2231 }
2232#else
2233 i = j = 0;
2234 while ((j+n <= len) && (maxsplit-- > 0)) {
2235 for (; j+n <= len; j++) {
2236 if (Py_STRING_MATCH(s, j, sub, n)) {
2237 SPLIT_ADD(s, i, j);
2238 i = j = j + n;
2239 break;
2240 }
2241 }
2242 }
2243#endif
2244 SPLIT_ADD(s, i, len);
2245 FIX_PREALLOC_SIZE(list);
Guido van Rossum8f950672007-09-10 16:53:45 +00002246 PyObject_ReleaseBuffer(subobj, &vsub);
Neal Norwitz6968b052007-02-27 19:02:19 +00002247 return list;
2248
2249 onError:
2250 Py_DECREF(list);
Guido van Rossum8f950672007-09-10 16:53:45 +00002251 PyObject_ReleaseBuffer(subobj, &vsub);
Neal Norwitz6968b052007-02-27 19:02:19 +00002252 return NULL;
2253}
2254
2255PyDoc_STRVAR(partition__doc__,
2256"B.partition(sep) -> (head, sep, tail)\n\
2257\n\
2258Searches for the separator sep in B, and returns the part before it,\n\
2259the separator itself, and the part after it. If the separator is not\n\
2260found, returns B and two empty bytes.");
2261
2262static PyObject *
2263bytes_partition(PyBytesObject *self, PyObject *sep_obj)
2264{
2265 PyObject *bytesep, *result;
2266
2267 bytesep = PyBytes_FromObject(sep_obj);
2268 if (! bytesep)
2269 return NULL;
2270
2271 result = stringlib_partition(
2272 (PyObject*) self,
2273 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +00002274 bytesep,
Neal Norwitz6968b052007-02-27 19:02:19 +00002275 PyBytes_AS_STRING(bytesep), PyBytes_GET_SIZE(bytesep)
2276 );
2277
2278 Py_DECREF(bytesep);
2279 return result;
2280}
2281
2282PyDoc_STRVAR(rpartition__doc__,
2283"B.rpartition(sep) -> (tail, sep, head)\n\
2284\n\
2285Searches for the separator sep in B, starting at the end of B, and returns\n\
2286the part before it, the separator itself, and the part after it. If the\n\
2287separator is not found, returns two empty bytes and B.");
2288
2289static PyObject *
2290bytes_rpartition(PyBytesObject *self, PyObject *sep_obj)
2291{
2292 PyObject *bytesep, *result;
2293
2294 bytesep = PyBytes_FromObject(sep_obj);
2295 if (! bytesep)
2296 return NULL;
2297
2298 result = stringlib_rpartition(
2299 (PyObject*) self,
2300 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +00002301 bytesep,
Neal Norwitz6968b052007-02-27 19:02:19 +00002302 PyBytes_AS_STRING(bytesep), PyBytes_GET_SIZE(bytesep)
2303 );
2304
2305 Py_DECREF(bytesep);
2306 return result;
2307}
2308
2309Py_LOCAL_INLINE(PyObject *)
2310rsplit_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount)
2311{
2312 register Py_ssize_t i, j, count=0;
2313 PyObject *str;
2314 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
2315
2316 if (list == NULL)
2317 return NULL;
2318
2319 i = j = len - 1;
2320 while ((i >= 0) && (maxcount-- > 0)) {
2321 for (; i >= 0; i--) {
2322 if (s[i] == ch) {
2323 SPLIT_ADD(s, i + 1, j + 1);
2324 j = i = i - 1;
2325 break;
2326 }
2327 }
2328 }
2329 if (j >= -1) {
2330 SPLIT_ADD(s, 0, j + 1);
2331 }
2332 FIX_PREALLOC_SIZE(list);
2333 if (PyList_Reverse(list) < 0)
2334 goto onError;
2335
2336 return list;
2337
2338 onError:
2339 Py_DECREF(list);
2340 return NULL;
2341}
2342
Guido van Rossum8f950672007-09-10 16:53:45 +00002343Py_LOCAL_INLINE(PyObject *)
2344rsplit_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxcount)
2345{
2346 register Py_ssize_t i, j, count = 0;
2347 PyObject *str;
2348 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
2349
2350 if (list == NULL)
2351 return NULL;
2352
2353 for (i = j = len - 1; i >= 0; ) {
2354 /* find a token */
2355 while (i >= 0 && Py_UNICODE_ISSPACE(s[i]))
2356 i--;
2357 j = i;
2358 while (i >= 0 && !Py_UNICODE_ISSPACE(s[i]))
2359 i--;
2360 if (j > i) {
2361 if (maxcount-- <= 0)
2362 break;
2363 SPLIT_ADD(s, i + 1, j + 1);
2364 while (i >= 0 && Py_UNICODE_ISSPACE(s[i]))
2365 i--;
2366 j = i;
2367 }
2368 }
2369 if (j >= 0) {
2370 SPLIT_ADD(s, 0, j + 1);
2371 }
2372 FIX_PREALLOC_SIZE(list);
2373 if (PyList_Reverse(list) < 0)
2374 goto onError;
2375
2376 return list;
2377
2378 onError:
2379 Py_DECREF(list);
2380 return NULL;
2381}
2382
Neal Norwitz6968b052007-02-27 19:02:19 +00002383PyDoc_STRVAR(rsplit__doc__,
2384"B.rsplit(sep [,maxsplit]) -> list of bytes\n\
2385\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00002386Return a list of the sections in the byte B, using sep as the delimiter,\n\
2387starting at the end of the bytes and working to the front.\n\
2388If sep is not given, B is split on ASCII whitespace characters\n\
2389(space, tab, return, newline, formfeed, vertical tab).\n\
2390If maxsplit is given, at most maxsplit splits are done.");
Neal Norwitz6968b052007-02-27 19:02:19 +00002391
2392static PyObject *
2393bytes_rsplit(PyBytesObject *self, PyObject *args)
2394{
2395 Py_ssize_t len = PyBytes_GET_SIZE(self), n, i, j;
Guido van Rossum8f950672007-09-10 16:53:45 +00002396 Py_ssize_t maxsplit = -1, count = 0;
Neal Norwitz6968b052007-02-27 19:02:19 +00002397 const char *s = PyBytes_AS_STRING(self), *sub;
Guido van Rossum8f950672007-09-10 16:53:45 +00002398 PyObject *list, *str, *subobj = Py_None;
2399 PyBuffer vsub;
Neal Norwitz6968b052007-02-27 19:02:19 +00002400
Guido van Rossum8f950672007-09-10 16:53:45 +00002401 if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit))
Neal Norwitz6968b052007-02-27 19:02:19 +00002402 return NULL;
2403 if (maxsplit < 0)
2404 maxsplit = PY_SSIZE_T_MAX;
Guido van Rossum8f950672007-09-10 16:53:45 +00002405
2406 if (subobj == Py_None)
2407 return rsplit_whitespace(s, len, maxsplit);
2408
2409 if (_getbuffer(subobj, &vsub) < 0)
Neal Norwitz6968b052007-02-27 19:02:19 +00002410 return NULL;
Guido van Rossum8f950672007-09-10 16:53:45 +00002411 sub = vsub.buf;
2412 n = vsub.len;
Neal Norwitz6968b052007-02-27 19:02:19 +00002413
2414 if (n == 0) {
2415 PyErr_SetString(PyExc_ValueError, "empty separator");
Guido van Rossum8f950672007-09-10 16:53:45 +00002416 PyObject_ReleaseBuffer(subobj, &vsub);
Neal Norwitz6968b052007-02-27 19:02:19 +00002417 return NULL;
2418 }
2419 else if (n == 1)
2420 return rsplit_char(s, len, sub[0], maxsplit);
2421
2422 list = PyList_New(PREALLOC_SIZE(maxsplit));
Guido van Rossum8f950672007-09-10 16:53:45 +00002423 if (list == NULL) {
2424 PyObject_ReleaseBuffer(subobj, &vsub);
Neal Norwitz6968b052007-02-27 19:02:19 +00002425 return NULL;
Guido van Rossum8f950672007-09-10 16:53:45 +00002426 }
Neal Norwitz6968b052007-02-27 19:02:19 +00002427
2428 j = len;
2429 i = j - n;
2430
2431 while ( (i >= 0) && (maxsplit-- > 0) ) {
2432 for (; i>=0; i--) {
2433 if (Py_STRING_MATCH(s, i, sub, n)) {
2434 SPLIT_ADD(s, i + n, j);
2435 j = i;
2436 i -= n;
2437 break;
2438 }
2439 }
2440 }
2441 SPLIT_ADD(s, 0, j);
2442 FIX_PREALLOC_SIZE(list);
2443 if (PyList_Reverse(list) < 0)
2444 goto onError;
Guido van Rossum8f950672007-09-10 16:53:45 +00002445 PyObject_ReleaseBuffer(subobj, &vsub);
Neal Norwitz6968b052007-02-27 19:02:19 +00002446 return list;
2447
2448onError:
2449 Py_DECREF(list);
Guido van Rossum8f950672007-09-10 16:53:45 +00002450 PyObject_ReleaseBuffer(subobj, &vsub);
Neal Norwitz6968b052007-02-27 19:02:19 +00002451 return NULL;
2452}
2453
2454PyDoc_STRVAR(extend__doc__,
2455"B.extend(iterable int) -> None\n\
2456\n\
2457Append all the elements from the iterator or sequence to the\n\
2458end of the bytes.");
2459static PyObject *
2460bytes_extend(PyBytesObject *self, PyObject *arg)
2461{
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002462 if (bytes_setslice(self, Py_Size(self), Py_Size(self), arg) == -1)
Neal Norwitz6968b052007-02-27 19:02:19 +00002463 return NULL;
2464 Py_RETURN_NONE;
2465}
2466
2467
2468PyDoc_STRVAR(reverse__doc__,
2469"B.reverse() -> None\n\
2470\n\
2471Reverse the order of the values in bytes in place.");
2472static PyObject *
2473bytes_reverse(PyBytesObject *self, PyObject *unused)
2474{
2475 char swap, *head, *tail;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002476 Py_ssize_t i, j, n = Py_Size(self);
Neal Norwitz6968b052007-02-27 19:02:19 +00002477
2478 j = n / 2;
2479 head = self->ob_bytes;
2480 tail = head + n - 1;
2481 for (i = 0; i < j; i++) {
2482 swap = *head;
2483 *head++ = *tail;
2484 *tail-- = swap;
2485 }
2486
2487 Py_RETURN_NONE;
2488}
2489
2490PyDoc_STRVAR(insert__doc__,
2491"B.insert(index, int) -> None\n\
2492\n\
2493Insert a single item into the bytes before the given index.");
2494static PyObject *
2495bytes_insert(PyBytesObject *self, PyObject *args)
2496{
2497 int value;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002498 Py_ssize_t where, n = Py_Size(self);
Neal Norwitz6968b052007-02-27 19:02:19 +00002499
2500 if (!PyArg_ParseTuple(args, "ni:insert", &where, &value))
2501 return NULL;
2502
2503 if (n == PY_SSIZE_T_MAX) {
2504 PyErr_SetString(PyExc_OverflowError,
2505 "cannot add more objects to bytes");
2506 return NULL;
2507 }
2508 if (value < 0 || value >= 256) {
2509 PyErr_SetString(PyExc_ValueError,
2510 "byte must be in range(0, 256)");
2511 return NULL;
2512 }
2513 if (PyBytes_Resize((PyObject *)self, n + 1) < 0)
2514 return NULL;
2515
2516 if (where < 0) {
2517 where += n;
2518 if (where < 0)
2519 where = 0;
2520 }
2521 if (where > n)
2522 where = n;
Guido van Rossum4fc8ae42007-02-27 20:57:45 +00002523 memmove(self->ob_bytes + where + 1, self->ob_bytes + where, n - where);
Neal Norwitz6968b052007-02-27 19:02:19 +00002524 self->ob_bytes[where] = value;
2525
2526 Py_RETURN_NONE;
2527}
2528
2529PyDoc_STRVAR(append__doc__,
2530"B.append(int) -> None\n\
2531\n\
2532Append a single item to the end of the bytes.");
2533static PyObject *
2534bytes_append(PyBytesObject *self, PyObject *arg)
2535{
2536 int value;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002537 Py_ssize_t n = Py_Size(self);
Neal Norwitz6968b052007-02-27 19:02:19 +00002538
2539 if (! _getbytevalue(arg, &value))
2540 return NULL;
2541 if (n == PY_SSIZE_T_MAX) {
2542 PyErr_SetString(PyExc_OverflowError,
2543 "cannot add more objects to bytes");
2544 return NULL;
2545 }
2546 if (PyBytes_Resize((PyObject *)self, n + 1) < 0)
2547 return NULL;
2548
2549 self->ob_bytes[n] = value;
2550
2551 Py_RETURN_NONE;
2552}
2553
2554PyDoc_STRVAR(pop__doc__,
2555"B.pop([index]) -> int\n\
2556\n\
2557Remove and return a single item from the bytes. If no index\n\
2558argument is give, will pop the last value.");
2559static PyObject *
2560bytes_pop(PyBytesObject *self, PyObject *args)
2561{
2562 int value;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002563 Py_ssize_t where = -1, n = Py_Size(self);
Neal Norwitz6968b052007-02-27 19:02:19 +00002564
2565 if (!PyArg_ParseTuple(args, "|n:pop", &where))
2566 return NULL;
2567
2568 if (n == 0) {
2569 PyErr_SetString(PyExc_OverflowError,
2570 "cannot pop an empty bytes");
2571 return NULL;
2572 }
2573 if (where < 0)
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002574 where += Py_Size(self);
2575 if (where < 0 || where >= Py_Size(self)) {
Neal Norwitz6968b052007-02-27 19:02:19 +00002576 PyErr_SetString(PyExc_IndexError, "pop index out of range");
2577 return NULL;
2578 }
2579
2580 value = self->ob_bytes[where];
2581 memmove(self->ob_bytes + where, self->ob_bytes + where + 1, n - where);
2582 if (PyBytes_Resize((PyObject *)self, n - 1) < 0)
2583 return NULL;
2584
2585 return PyInt_FromLong(value);
2586}
2587
2588PyDoc_STRVAR(remove__doc__,
2589"B.remove(int) -> None\n\
2590\n\
2591Remove the first occurance of a value in bytes");
2592static PyObject *
2593bytes_remove(PyBytesObject *self, PyObject *arg)
2594{
2595 int value;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002596 Py_ssize_t where, n = Py_Size(self);
Neal Norwitz6968b052007-02-27 19:02:19 +00002597
2598 if (! _getbytevalue(arg, &value))
2599 return NULL;
2600
2601 for (where = 0; where < n; where++) {
2602 if (self->ob_bytes[where] == value)
2603 break;
2604 }
2605 if (where == n) {
2606 PyErr_SetString(PyExc_ValueError, "value not found in bytes");
2607 return NULL;
2608 }
2609
2610 memmove(self->ob_bytes + where, self->ob_bytes + where + 1, n - where);
2611 if (PyBytes_Resize((PyObject *)self, n - 1) < 0)
2612 return NULL;
2613
2614 Py_RETURN_NONE;
2615}
2616
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002617/* XXX These two helpers could be optimized if argsize == 1 */
2618
Neal Norwitz2bad9702007-08-27 06:19:22 +00002619static Py_ssize_t
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002620lstrip_helper(unsigned char *myptr, Py_ssize_t mysize,
2621 void *argptr, Py_ssize_t argsize)
2622{
2623 Py_ssize_t i = 0;
2624 while (i < mysize && memchr(argptr, myptr[i], argsize))
2625 i++;
2626 return i;
2627}
2628
Neal Norwitz2bad9702007-08-27 06:19:22 +00002629static Py_ssize_t
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002630rstrip_helper(unsigned char *myptr, Py_ssize_t mysize,
2631 void *argptr, Py_ssize_t argsize)
2632{
2633 Py_ssize_t i = mysize - 1;
2634 while (i >= 0 && memchr(argptr, myptr[i], argsize))
2635 i--;
2636 return i + 1;
2637}
2638
2639PyDoc_STRVAR(strip__doc__,
Guido van Rossum8f950672007-09-10 16:53:45 +00002640"B.strip([bytes]) -> bytes\n\
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002641\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00002642Strip leading and trailing bytes contained in the argument.\n\
2643If the argument is omitted, strip ASCII whitespace.");
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002644static PyObject *
Guido van Rossum8f950672007-09-10 16:53:45 +00002645bytes_strip(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002646{
2647 Py_ssize_t left, right, mysize, argsize;
2648 void *myptr, *argptr;
Guido van Rossum8f950672007-09-10 16:53:45 +00002649 PyObject *arg = Py_None;
2650 PyBuffer varg;
2651 if (!PyArg_ParseTuple(args, "|O:strip", &arg))
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002652 return NULL;
Guido van Rossum8f950672007-09-10 16:53:45 +00002653 if (arg == Py_None) {
2654 argptr = "\t\n\r\f\v ";
2655 argsize = 6;
2656 }
2657 else {
2658 if (_getbuffer(arg, &varg) < 0)
2659 return NULL;
2660 argptr = varg.buf;
2661 argsize = varg.len;
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002662 }
2663 myptr = self->ob_bytes;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002664 mysize = Py_Size(self);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002665 left = lstrip_helper(myptr, mysize, argptr, argsize);
Guido van Rossumeb29e9a2007-08-08 21:55:33 +00002666 if (left == mysize)
2667 right = left;
2668 else
2669 right = rstrip_helper(myptr, mysize, argptr, argsize);
Guido van Rossum8f950672007-09-10 16:53:45 +00002670 if (arg != Py_None)
2671 PyObject_ReleaseBuffer(arg, &varg);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002672 return PyBytes_FromStringAndSize(self->ob_bytes + left, right - left);
2673}
2674
2675PyDoc_STRVAR(lstrip__doc__,
Guido van Rossum8f950672007-09-10 16:53:45 +00002676"B.lstrip([bytes]) -> bytes\n\
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002677\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00002678Strip leading bytes contained in the argument.\n\
2679If the argument is omitted, strip leading ASCII whitespace.");
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002680static PyObject *
Guido van Rossum8f950672007-09-10 16:53:45 +00002681bytes_lstrip(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002682{
2683 Py_ssize_t left, right, mysize, argsize;
2684 void *myptr, *argptr;
Guido van Rossum8f950672007-09-10 16:53:45 +00002685 PyObject *arg = Py_None;
2686 PyBuffer varg;
2687 if (!PyArg_ParseTuple(args, "|O:lstrip", &arg))
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002688 return NULL;
Guido van Rossum8f950672007-09-10 16:53:45 +00002689 if (arg == Py_None) {
2690 argptr = "\t\n\r\f\v ";
2691 argsize = 6;
2692 }
2693 else {
2694 if (_getbuffer(arg, &varg) < 0)
2695 return NULL;
2696 argptr = varg.buf;
2697 argsize = varg.len;
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002698 }
2699 myptr = self->ob_bytes;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002700 mysize = Py_Size(self);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002701 left = lstrip_helper(myptr, mysize, argptr, argsize);
2702 right = mysize;
Guido van Rossum8f950672007-09-10 16:53:45 +00002703 if (arg != Py_None)
2704 PyObject_ReleaseBuffer(arg, &varg);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002705 return PyBytes_FromStringAndSize(self->ob_bytes + left, right - left);
2706}
2707
2708PyDoc_STRVAR(rstrip__doc__,
Guido van Rossum8f950672007-09-10 16:53:45 +00002709"B.rstrip([bytes]) -> bytes\n\
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002710\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00002711Strip trailing bytes contained in the argument.\n\
2712If the argument is omitted, strip trailing ASCII whitespace.");
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002713static PyObject *
Guido van Rossum8f950672007-09-10 16:53:45 +00002714bytes_rstrip(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002715{
2716 Py_ssize_t left, right, mysize, argsize;
2717 void *myptr, *argptr;
Guido van Rossum8f950672007-09-10 16:53:45 +00002718 PyObject *arg = Py_None;
2719 PyBuffer varg;
2720 if (!PyArg_ParseTuple(args, "|O:rstrip", &arg))
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002721 return NULL;
Guido van Rossum8f950672007-09-10 16:53:45 +00002722 if (arg == Py_None) {
2723 argptr = "\t\n\r\f\v ";
2724 argsize = 6;
2725 }
2726 else {
2727 if (_getbuffer(arg, &varg) < 0)
2728 return NULL;
2729 argptr = varg.buf;
2730 argsize = varg.len;
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002731 }
2732 myptr = self->ob_bytes;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002733 mysize = Py_Size(self);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002734 left = 0;
2735 right = rstrip_helper(myptr, mysize, argptr, argsize);
Guido van Rossum8f950672007-09-10 16:53:45 +00002736 if (arg != Py_None)
2737 PyObject_ReleaseBuffer(arg, &varg);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002738 return PyBytes_FromStringAndSize(self->ob_bytes + left, right - left);
2739}
Neal Norwitz6968b052007-02-27 19:02:19 +00002740
Guido van Rossumd624f182006-04-24 13:47:05 +00002741PyDoc_STRVAR(decode_doc,
2742"B.decode([encoding[,errors]]) -> unicode obect.\n\
2743\n\
2744Decodes B using the codec registered for encoding. encoding defaults\n\
2745to the default encoding. errors may be given to set a different error\n\
2746handling scheme. Default is 'strict' meaning that encoding errors raise\n\
2747a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
2748as well as any other name registerd with codecs.register_error that is\n\
2749able to handle UnicodeDecodeErrors.");
2750
2751static PyObject *
2752bytes_decode(PyObject *self, PyObject *args)
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +00002753{
Guido van Rossumd624f182006-04-24 13:47:05 +00002754 const char *encoding = NULL;
2755 const char *errors = NULL;
2756
2757 if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
2758 return NULL;
2759 if (encoding == NULL)
2760 encoding = PyUnicode_GetDefaultEncoding();
2761 return PyCodec_Decode(self, encoding, errors);
2762}
2763
Guido van Rossuma0867f72006-05-05 04:34:18 +00002764PyDoc_STRVAR(alloc_doc,
2765"B.__alloc__() -> int\n\
2766\n\
2767Returns the number of bytes actually allocated.");
2768
2769static PyObject *
2770bytes_alloc(PyBytesObject *self)
2771{
2772 return PyInt_FromSsize_t(self->ob_alloc);
2773}
2774
Guido van Rossum20188312006-05-05 15:15:40 +00002775PyDoc_STRVAR(join_doc,
Guido van Rossumcd6ae682007-05-09 19:52:16 +00002776"B.join(iterable_of_bytes) -> bytes\n\
Guido van Rossum20188312006-05-05 15:15:40 +00002777\n\
Guido van Rossumcd6ae682007-05-09 19:52:16 +00002778Concatenates any number of bytes objects, with B in between each pair.\n\
2779Example: b'.'.join([b'ab', b'pq', b'rs']) -> b'ab.pq.rs'.");
Guido van Rossum20188312006-05-05 15:15:40 +00002780
2781static PyObject *
Guido van Rossumcd6ae682007-05-09 19:52:16 +00002782bytes_join(PyBytesObject *self, PyObject *it)
Guido van Rossum20188312006-05-05 15:15:40 +00002783{
2784 PyObject *seq;
Martin v. Löwis5d7428b2007-07-21 18:47:48 +00002785 Py_ssize_t mysize = Py_Size(self);
Guido van Rossum20188312006-05-05 15:15:40 +00002786 Py_ssize_t i;
2787 Py_ssize_t n;
2788 PyObject **items;
2789 Py_ssize_t totalsize = 0;
2790 PyObject *result;
2791 char *dest;
2792
2793 seq = PySequence_Fast(it, "can only join an iterable");
2794 if (seq == NULL)
Georg Brandlb3f568f2007-02-27 08:49:18 +00002795 return NULL;
Guido van Rossum20188312006-05-05 15:15:40 +00002796 n = PySequence_Fast_GET_SIZE(seq);
2797 items = PySequence_Fast_ITEMS(seq);
2798
2799 /* Compute the total size, and check that they are all bytes */
2800 for (i = 0; i < n; i++) {
Georg Brandlb3f568f2007-02-27 08:49:18 +00002801 PyObject *obj = items[i];
2802 if (!PyBytes_Check(obj)) {
2803 PyErr_Format(PyExc_TypeError,
2804 "can only join an iterable of bytes "
2805 "(item %ld has type '%.100s')",
Guido van Rossum3cf5b1e2006-07-27 21:53:35 +00002806 /* XXX %ld isn't right on Win64 */
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002807 (long)i, Py_Type(obj)->tp_name);
Georg Brandlb3f568f2007-02-27 08:49:18 +00002808 goto error;
2809 }
Guido van Rossumcd6ae682007-05-09 19:52:16 +00002810 if (i > 0)
2811 totalsize += mysize;
Georg Brandlb3f568f2007-02-27 08:49:18 +00002812 totalsize += PyBytes_GET_SIZE(obj);
2813 if (totalsize < 0) {
2814 PyErr_NoMemory();
2815 goto error;
2816 }
Guido van Rossum20188312006-05-05 15:15:40 +00002817 }
2818
2819 /* Allocate the result, and copy the bytes */
2820 result = PyBytes_FromStringAndSize(NULL, totalsize);
2821 if (result == NULL)
Georg Brandlb3f568f2007-02-27 08:49:18 +00002822 goto error;
Guido van Rossum20188312006-05-05 15:15:40 +00002823 dest = PyBytes_AS_STRING(result);
2824 for (i = 0; i < n; i++) {
Georg Brandlb3f568f2007-02-27 08:49:18 +00002825 PyObject *obj = items[i];
2826 Py_ssize_t size = PyBytes_GET_SIZE(obj);
Guido van Rossumcd6ae682007-05-09 19:52:16 +00002827 if (i > 0) {
2828 memcpy(dest, self->ob_bytes, mysize);
2829 dest += mysize;
2830 }
Georg Brandlb3f568f2007-02-27 08:49:18 +00002831 memcpy(dest, PyBytes_AS_STRING(obj), size);
2832 dest += size;
Guido van Rossum20188312006-05-05 15:15:40 +00002833 }
2834
2835 /* Done */
2836 Py_DECREF(seq);
2837 return result;
2838
2839 /* Error handling */
2840 error:
2841 Py_DECREF(seq);
2842 return NULL;
2843}
2844
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002845PyDoc_STRVAR(fromhex_doc,
2846"bytes.fromhex(string) -> bytes\n\
2847\n\
2848Create a bytes object from a string of hexadecimal numbers.\n\
2849Spaces between two numbers are accepted. Example:\n\
2850bytes.fromhex('10 2030') -> bytes([0x10, 0x20, 0x30]).");
2851
2852static int
2853hex_digit_to_int(int c)
2854{
Georg Brandlb3f568f2007-02-27 08:49:18 +00002855 if (isdigit(c))
2856 return c - '0';
2857 else {
2858 if (isupper(c))
2859 c = tolower(c);
2860 if (c >= 'a' && c <= 'f')
2861 return c - 'a' + 10;
2862 }
2863 return -1;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002864}
2865
2866static PyObject *
2867bytes_fromhex(PyObject *cls, PyObject *args)
2868{
2869 PyObject *newbytes;
2870 char *hex, *buf;
2871 Py_ssize_t len, byteslen, i, j;
2872 int top, bot;
2873
2874 if (!PyArg_ParseTuple(args, "s#:fromhex", &hex, &len))
2875 return NULL;
2876
2877 byteslen = len / 2; /* max length if there are no spaces */
2878
2879 newbytes = PyBytes_FromStringAndSize(NULL, byteslen);
2880 if (!newbytes)
2881 return NULL;
2882 buf = PyBytes_AS_STRING(newbytes);
2883
Guido van Rossum4355a472007-05-04 05:00:04 +00002884 for (i = j = 0; i < len; i += 2) {
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002885 /* skip over spaces in the input */
2886 while (Py_CHARMASK(hex[i]) == ' ')
2887 i++;
2888 if (i >= len)
2889 break;
2890 top = hex_digit_to_int(Py_CHARMASK(hex[i]));
2891 bot = hex_digit_to_int(Py_CHARMASK(hex[i+1]));
2892 if (top == -1 || bot == -1) {
2893 PyErr_Format(PyExc_ValueError,
2894 "non-hexadecimal number string '%c%c' found in "
2895 "fromhex() arg at position %zd",
2896 hex[i], hex[i+1], i);
2897 goto error;
2898 }
2899 buf[j++] = (top << 4) + bot;
2900 }
2901 if (PyBytes_Resize(newbytes, j) < 0)
2902 goto error;
2903 return newbytes;
2904
2905 error:
2906 Py_DECREF(newbytes);
2907 return NULL;
2908}
2909
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002910PyDoc_STRVAR(reduce_doc, "Return state information for pickling.");
2911
2912static PyObject *
2913bytes_reduce(PyBytesObject *self)
2914{
Martin v. Löwis9c121062007-08-05 20:26:11 +00002915 PyObject *latin1;
2916 if (self->ob_bytes)
Guido van Rossuma74184e2007-08-29 04:05:57 +00002917 latin1 = PyUnicode_DecodeLatin1(self->ob_bytes,
2918 Py_Size(self), NULL);
Martin v. Löwis9c121062007-08-05 20:26:11 +00002919 else
Guido van Rossuma74184e2007-08-29 04:05:57 +00002920 latin1 = PyUnicode_FromString("");
Martin v. Löwis9c121062007-08-05 20:26:11 +00002921 return Py_BuildValue("(O(Ns))", Py_Type(self), latin1, "latin-1");
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002922}
2923
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002924static PySequenceMethods bytes_as_sequence = {
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002925 (lenfunc)bytes_length, /* sq_length */
2926 (binaryfunc)bytes_concat, /* sq_concat */
2927 (ssizeargfunc)bytes_repeat, /* sq_repeat */
2928 (ssizeargfunc)bytes_getitem, /* sq_item */
2929 0, /* sq_slice */
2930 (ssizeobjargproc)bytes_setitem, /* sq_ass_item */
2931 0, /* sq_ass_slice */
Guido van Rossumd624f182006-04-24 13:47:05 +00002932 (objobjproc)bytes_contains, /* sq_contains */
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002933 (binaryfunc)bytes_iconcat, /* sq_inplace_concat */
2934 (ssizeargfunc)bytes_irepeat, /* sq_inplace_repeat */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002935};
2936
2937static PyMappingMethods bytes_as_mapping = {
Guido van Rossumd624f182006-04-24 13:47:05 +00002938 (lenfunc)bytes_length,
Thomas Wouters376446d2006-12-19 08:30:14 +00002939 (binaryfunc)bytes_subscript,
2940 (objobjargproc)bytes_ass_subscript,
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002941};
2942
2943static PyBufferProcs bytes_as_buffer = {
Travis E. Oliphantb99f7622007-08-18 11:21:56 +00002944 (getbufferproc)bytes_getbuffer,
2945 (releasebufferproc)bytes_releasebuffer,
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002946};
2947
2948static PyMethodDef
2949bytes_methods[] = {
Neal Norwitz6968b052007-02-27 19:02:19 +00002950 {"find", (PyCFunction)bytes_find, METH_VARARGS, find__doc__},
2951 {"count", (PyCFunction)bytes_count, METH_VARARGS, count__doc__},
2952 {"index", (PyCFunction)bytes_index, METH_VARARGS, index__doc__},
2953 {"rfind", (PyCFunction)bytes_rfind, METH_VARARGS, rfind__doc__},
2954 {"rindex", (PyCFunction)bytes_rindex, METH_VARARGS, rindex__doc__},
2955 {"endswith", (PyCFunction)bytes_endswith, METH_VARARGS, endswith__doc__},
2956 {"startswith", (PyCFunction)bytes_startswith, METH_VARARGS,
2957 startswith__doc__},
2958 {"replace", (PyCFunction)bytes_replace, METH_VARARGS, replace__doc__},
2959 {"translate", (PyCFunction)bytes_translate, METH_VARARGS, translate__doc__},
2960 {"partition", (PyCFunction)bytes_partition, METH_O, partition__doc__},
2961 {"rpartition", (PyCFunction)bytes_rpartition, METH_O, rpartition__doc__},
2962 {"split", (PyCFunction)bytes_split, METH_VARARGS, split__doc__},
2963 {"rsplit", (PyCFunction)bytes_rsplit, METH_VARARGS, rsplit__doc__},
2964 {"extend", (PyCFunction)bytes_extend, METH_O, extend__doc__},
2965 {"insert", (PyCFunction)bytes_insert, METH_VARARGS, insert__doc__},
2966 {"append", (PyCFunction)bytes_append, METH_O, append__doc__},
2967 {"reverse", (PyCFunction)bytes_reverse, METH_NOARGS, reverse__doc__},
2968 {"pop", (PyCFunction)bytes_pop, METH_VARARGS, pop__doc__},
2969 {"remove", (PyCFunction)bytes_remove, METH_O, remove__doc__},
Guido van Rossum8f950672007-09-10 16:53:45 +00002970 {"strip", (PyCFunction)bytes_strip, METH_VARARGS, strip__doc__},
2971 {"lstrip", (PyCFunction)bytes_lstrip, METH_VARARGS, lstrip__doc__},
2972 {"rstrip", (PyCFunction)bytes_rstrip, METH_VARARGS, rstrip__doc__},
Guido van Rossumd624f182006-04-24 13:47:05 +00002973 {"decode", (PyCFunction)bytes_decode, METH_VARARGS, decode_doc},
Guido van Rossuma0867f72006-05-05 04:34:18 +00002974 {"__alloc__", (PyCFunction)bytes_alloc, METH_NOARGS, alloc_doc},
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002975 {"fromhex", (PyCFunction)bytes_fromhex, METH_VARARGS|METH_CLASS,
2976 fromhex_doc},
Guido van Rossumcd6ae682007-05-09 19:52:16 +00002977 {"join", (PyCFunction)bytes_join, METH_O, join_doc},
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002978 {"__reduce__", (PyCFunction)bytes_reduce, METH_NOARGS, reduce_doc},
Guido van Rossuma0867f72006-05-05 04:34:18 +00002979 {NULL}
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002980};
2981
2982PyDoc_STRVAR(bytes_doc,
2983"bytes([iterable]) -> new array of bytes.\n\
2984\n\
2985If an argument is given it must be an iterable yielding ints in range(256).");
2986
2987PyTypeObject PyBytes_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002988 PyVarObject_HEAD_INIT(&PyType_Type, 0)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002989 "bytes",
2990 sizeof(PyBytesObject),
2991 0,
Guido van Rossumd624f182006-04-24 13:47:05 +00002992 (destructor)bytes_dealloc, /* tp_dealloc */
2993 0, /* tp_print */
2994 0, /* tp_getattr */
2995 0, /* tp_setattr */
2996 0, /* tp_compare */
2997 (reprfunc)bytes_repr, /* tp_repr */
2998 0, /* tp_as_number */
2999 &bytes_as_sequence, /* tp_as_sequence */
3000 &bytes_as_mapping, /* tp_as_mapping */
Georg Brandlb3f568f2007-02-27 08:49:18 +00003001 0, /* tp_hash */
Guido van Rossumd624f182006-04-24 13:47:05 +00003002 0, /* tp_call */
3003 (reprfunc)bytes_str, /* tp_str */
3004 PyObject_GenericGetAttr, /* tp_getattro */
3005 0, /* tp_setattro */
3006 &bytes_as_buffer, /* tp_as_buffer */
Georg Brandlb3f568f2007-02-27 08:49:18 +00003007 /* bytes is 'final' or 'sealed' */
Georg Brandl0b9b9e02007-02-27 08:40:54 +00003008 Py_TPFLAGS_DEFAULT, /* tp_flags */
Guido van Rossumd624f182006-04-24 13:47:05 +00003009 bytes_doc, /* tp_doc */
3010 0, /* tp_traverse */
3011 0, /* tp_clear */
3012 (richcmpfunc)bytes_richcompare, /* tp_richcompare */
3013 0, /* tp_weaklistoffset */
3014 0, /* tp_iter */
3015 0, /* tp_iternext */
3016 bytes_methods, /* tp_methods */
3017 0, /* tp_members */
3018 0, /* tp_getset */
3019 0, /* tp_base */
3020 0, /* tp_dict */
3021 0, /* tp_descr_get */
3022 0, /* tp_descr_set */
3023 0, /* tp_dictoffset */
3024 (initproc)bytes_init, /* tp_init */
3025 PyType_GenericAlloc, /* tp_alloc */
3026 PyType_GenericNew, /* tp_new */
3027 PyObject_Del, /* tp_free */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003028};