blob: 930b761f2433ac0b34f4b7a6a301f5ceaf15373d [file] [log] [blame]
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00001/* Bytes object implementation */
2
3/* XXX TO DO: optimizations */
4
5#define PY_SSIZE_T_CLEAN
6#include "Python.h"
Guido van Rossuma0867f72006-05-05 04:34:18 +00007#include "structmember.h"
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00008
Neal Norwitz6968b052007-02-27 19:02:19 +00009/* The nullbytes are used by the stringlib during partition.
10 * If partition is removed from bytes, nullbytes and its helper
11 * Init/Fini should also be removed.
12 */
13static PyBytesObject *nullbytes = NULL;
14
15void
16PyBytes_Fini(void)
17{
18 Py_CLEAR(nullbytes);
19}
20
21int
22PyBytes_Init(void)
23{
24 nullbytes = PyObject_New(PyBytesObject, &PyBytes_Type);
25 if (nullbytes == NULL)
26 return 0;
27 nullbytes->ob_bytes = NULL;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +000028 Py_Size(nullbytes) = nullbytes->ob_alloc = 0;
Travis E. Oliphantb99f7622007-08-18 11:21:56 +000029 nullbytes->ob_exports = 0;
Neal Norwitz6968b052007-02-27 19:02:19 +000030 return 1;
31}
32
33/* end nullbytes support */
34
Guido van Rossumad7d8d12007-04-13 01:39:34 +000035/* Helpers */
36
37static int
38_getbytevalue(PyObject* arg, int *value)
Neal Norwitz6968b052007-02-27 19:02:19 +000039{
40 PyObject *intarg = PyNumber_Int(arg);
41 if (! intarg)
42 return 0;
43 *value = PyInt_AsLong(intarg);
44 Py_DECREF(intarg);
45 if (*value < 0 || *value >= 256) {
46 PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
47 return 0;
48 }
49 return 1;
50}
51
Travis E. Oliphantb99f7622007-08-18 11:21:56 +000052static int
53bytes_getbuffer(PyBytesObject *obj, PyBuffer *view, int flags)
Guido van Rossum75d38e92007-08-24 17:33:11 +000054{
Travis E. Oliphantb99f7622007-08-18 11:21:56 +000055 int ret;
56 void *ptr;
57 if (view == NULL) {
58 obj->ob_exports++;
59 return 0;
60 }
Guido van Rossum75d38e92007-08-24 17:33:11 +000061 if (obj->ob_bytes == NULL)
Travis E. Oliphantb99f7622007-08-18 11:21:56 +000062 ptr = "";
63 else
64 ptr = obj->ob_bytes;
65 ret = PyBuffer_FillInfo(view, ptr, Py_Size(obj), 0, flags);
66 if (ret >= 0) {
67 obj->ob_exports++;
68 }
69 return ret;
70}
71
72static void
73bytes_releasebuffer(PyBytesObject *obj, PyBuffer *view)
74{
75 obj->ob_exports--;
76}
77
Neal Norwitz2bad9702007-08-27 06:19:22 +000078static Py_ssize_t
Travis E. Oliphantb99f7622007-08-18 11:21:56 +000079_getbuffer(PyObject *obj, PyBuffer *view)
Guido van Rossumad7d8d12007-04-13 01:39:34 +000080{
Martin v. Löwis9f2e3462007-07-21 17:22:18 +000081 PyBufferProcs *buffer = Py_Type(obj)->tp_as_buffer;
Guido van Rossumad7d8d12007-04-13 01:39:34 +000082
83 if (buffer == NULL ||
84 PyUnicode_Check(obj) ||
Guido van Rossuma74184e2007-08-29 04:05:57 +000085 buffer->bf_getbuffer == NULL)
86 {
87 PyErr_Format(PyExc_TypeError,
88 "Type %.100s doesn't support the buffer API",
89 Py_Type(obj)->tp_name);
90 return -1;
91 }
Guido van Rossumad7d8d12007-04-13 01:39:34 +000092
Travis E. Oliphantb99f7622007-08-18 11:21:56 +000093 if (buffer->bf_getbuffer(obj, view, PyBUF_SIMPLE) < 0)
94 return -1;
95 return view->len;
Guido van Rossumad7d8d12007-04-13 01:39:34 +000096}
97
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000098/* Direct API functions */
99
100PyObject *
Guido van Rossumd624f182006-04-24 13:47:05 +0000101PyBytes_FromObject(PyObject *input)
102{
103 return PyObject_CallFunctionObjArgs((PyObject *)&PyBytes_Type,
104 input, NULL);
105}
106
107PyObject *
108PyBytes_FromStringAndSize(const char *bytes, Py_ssize_t size)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000109{
110 PyBytesObject *new;
Guido van Rossumf15a29f2007-05-04 00:41:39 +0000111 int alloc;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000112
Guido van Rossumd624f182006-04-24 13:47:05 +0000113 assert(size >= 0);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000114
115 new = PyObject_New(PyBytesObject, &PyBytes_Type);
116 if (new == NULL)
Guido van Rossumd624f182006-04-24 13:47:05 +0000117 return NULL;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000118
Guido van Rossumf15a29f2007-05-04 00:41:39 +0000119 if (size == 0) {
Guido van Rossumd624f182006-04-24 13:47:05 +0000120 new->ob_bytes = NULL;
Guido van Rossumf15a29f2007-05-04 00:41:39 +0000121 alloc = 0;
122 }
Guido van Rossumd624f182006-04-24 13:47:05 +0000123 else {
Guido van Rossumf15a29f2007-05-04 00:41:39 +0000124 alloc = size + 1;
125 new->ob_bytes = PyMem_Malloc(alloc);
Guido van Rossumd624f182006-04-24 13:47:05 +0000126 if (new->ob_bytes == NULL) {
127 Py_DECREF(new);
Neal Norwitz16596dd2007-08-30 05:44:54 +0000128 return PyErr_NoMemory();
Guido van Rossumd624f182006-04-24 13:47:05 +0000129 }
130 if (bytes != NULL)
131 memcpy(new->ob_bytes, bytes, size);
Guido van Rossumf15a29f2007-05-04 00:41:39 +0000132 new->ob_bytes[size] = '\0'; /* Trailing null byte */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000133 }
Martin v. Löwis5d7428b2007-07-21 18:47:48 +0000134 Py_Size(new) = size;
Guido van Rossumf15a29f2007-05-04 00:41:39 +0000135 new->ob_alloc = alloc;
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000136 new->ob_exports = 0;
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +0000137
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000138 return (PyObject *)new;
139}
140
141Py_ssize_t
142PyBytes_Size(PyObject *self)
143{
144 assert(self != NULL);
145 assert(PyBytes_Check(self));
146
Guido van Rossum20188312006-05-05 15:15:40 +0000147 return PyBytes_GET_SIZE(self);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000148}
149
150char *
151PyBytes_AsString(PyObject *self)
152{
153 assert(self != NULL);
154 assert(PyBytes_Check(self));
155
Guido van Rossum20188312006-05-05 15:15:40 +0000156 return PyBytes_AS_STRING(self);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000157}
158
159int
160PyBytes_Resize(PyObject *self, Py_ssize_t size)
161{
162 void *sval;
Guido van Rossuma0867f72006-05-05 04:34:18 +0000163 Py_ssize_t alloc = ((PyBytesObject *)self)->ob_alloc;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000164
165 assert(self != NULL);
166 assert(PyBytes_Check(self));
167 assert(size >= 0);
168
Guido van Rossuma0867f72006-05-05 04:34:18 +0000169 if (size < alloc / 2) {
170 /* Major downsize; resize down to exact size */
Guido van Rossum6c1e6742007-05-04 04:27:16 +0000171 alloc = size + 1;
Guido van Rossuma0867f72006-05-05 04:34:18 +0000172 }
Guido van Rossumf15a29f2007-05-04 00:41:39 +0000173 else if (size < alloc) {
Guido van Rossuma0867f72006-05-05 04:34:18 +0000174 /* Within allocated size; quick exit */
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000175 Py_Size(self) = size;
Guido van Rossuma74184e2007-08-29 04:05:57 +0000176 ((PyBytesObject *)self)->ob_bytes[size] = '\0'; /* Trailing null */
Guido van Rossuma0867f72006-05-05 04:34:18 +0000177 return 0;
178 }
179 else if (size <= alloc * 1.125) {
180 /* Moderate upsize; overallocate similar to list_resize() */
181 alloc = size + (size >> 3) + (size < 9 ? 3 : 6);
182 }
183 else {
184 /* Major upsize; resize up to exact size */
Guido van Rossumf15a29f2007-05-04 00:41:39 +0000185 alloc = size + 1;
Guido van Rossum6c1e6742007-05-04 04:27:16 +0000186 }
Guido van Rossuma0867f72006-05-05 04:34:18 +0000187
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000188 if (((PyBytesObject *)self)->ob_exports > 0) {
189 /*
Guido van Rossuma74184e2007-08-29 04:05:57 +0000190 fprintf(stderr, "%d: %s", ((PyBytesObject *)self)->ob_exports,
191 ((PyBytesObject *)self)->ob_bytes);
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000192 */
193 PyErr_SetString(PyExc_BufferError,
Guido van Rossuma74184e2007-08-29 04:05:57 +0000194 "Existing exports of data: object cannot be re-sized");
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000195 return -1;
196 }
197
Guido van Rossuma0867f72006-05-05 04:34:18 +0000198 sval = PyMem_Realloc(((PyBytesObject *)self)->ob_bytes, alloc);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000199 if (sval == NULL) {
Guido van Rossumd624f182006-04-24 13:47:05 +0000200 PyErr_NoMemory();
201 return -1;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000202 }
203
Guido van Rossumd624f182006-04-24 13:47:05 +0000204 ((PyBytesObject *)self)->ob_bytes = sval;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000205 Py_Size(self) = size;
Guido van Rossuma0867f72006-05-05 04:34:18 +0000206 ((PyBytesObject *)self)->ob_alloc = alloc;
Guido van Rossumf15a29f2007-05-04 00:41:39 +0000207 ((PyBytesObject *)self)->ob_bytes[size] = '\0'; /* Trailing null byte */
208
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000209 return 0;
210}
211
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000212PyObject *
213PyBytes_Concat(PyObject *a, PyObject *b)
214{
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000215 Py_ssize_t size;
216 PyBuffer va, vb;
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000217 PyBytesObject *result;
218
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000219 va.len = -1;
220 vb.len = -1;
221 if (_getbuffer(a, &va) < 0 ||
222 _getbuffer(b, &vb) < 0) {
Guido van Rossum75d38e92007-08-24 17:33:11 +0000223 if (va.len != -1)
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000224 PyObject_ReleaseBuffer(a, &va);
225 if (vb.len != -1)
226 PyObject_ReleaseBuffer(b, &vb);
227 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
228 Py_Type(a)->tp_name, Py_Type(b)->tp_name);
229 return NULL;
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000230 }
231
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000232 size = va.len + vb.len;
233 if (size < 0) {
234 PyObject_ReleaseBuffer(a, &va);
235 PyObject_ReleaseBuffer(b, &vb);
236 return PyErr_NoMemory();
237 }
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000238
239 result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, size);
240 if (result != NULL) {
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000241 memcpy(result->ob_bytes, va.buf, va.len);
242 memcpy(result->ob_bytes + va.len, vb.buf, vb.len);
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000243 }
Guido van Rossum75d38e92007-08-24 17:33:11 +0000244
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000245 PyObject_ReleaseBuffer(a, &va);
246 PyObject_ReleaseBuffer(b, &vb);
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000247 return (PyObject *)result;
248}
249
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000250/* Functions stuffed into the type object */
251
252static Py_ssize_t
253bytes_length(PyBytesObject *self)
254{
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000255 return Py_Size(self);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000256}
257
258static PyObject *
Guido van Rossumd624f182006-04-24 13:47:05 +0000259bytes_concat(PyBytesObject *self, PyObject *other)
260{
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000261 return PyBytes_Concat((PyObject *)self, other);
Guido van Rossumd624f182006-04-24 13:47:05 +0000262}
263
264static PyObject *
Guido van Rossum13e57212006-04-27 22:54:26 +0000265bytes_iconcat(PyBytesObject *self, PyObject *other)
266{
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000267 Py_ssize_t mysize;
Guido van Rossum13e57212006-04-27 22:54:26 +0000268 Py_ssize_t size;
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000269 PyBuffer vo;
Guido van Rossum13e57212006-04-27 22:54:26 +0000270
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000271 if (_getbuffer(other, &vo) < 0) {
Guido van Rossuma74184e2007-08-29 04:05:57 +0000272 PyErr_Format(PyExc_TypeError, "can't concat bytes to %.100s",
273 Py_Type(self)->tp_name);
274 return NULL;
Guido van Rossum13e57212006-04-27 22:54:26 +0000275 }
276
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000277 mysize = Py_Size(self);
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000278 size = mysize + vo.len;
279 if (size < 0) {
Guido van Rossuma74184e2007-08-29 04:05:57 +0000280 PyObject_ReleaseBuffer(other, &vo);
281 return PyErr_NoMemory();
Guido van Rossum6c1e6742007-05-04 04:27:16 +0000282 }
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000283 if (size < self->ob_alloc) {
Guido van Rossuma74184e2007-08-29 04:05:57 +0000284 Py_Size(self) = size;
285 self->ob_bytes[Py_Size(self)] = '\0'; /* Trailing null byte */
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000286 }
287 else if (PyBytes_Resize((PyObject *)self, size) < 0) {
Guido van Rossuma74184e2007-08-29 04:05:57 +0000288 PyObject_ReleaseBuffer(other, &vo);
289 return NULL;
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000290 }
291 memcpy(self->ob_bytes + mysize, vo.buf, vo.len);
292 PyObject_ReleaseBuffer(other, &vo);
Guido van Rossum13e57212006-04-27 22:54:26 +0000293 Py_INCREF(self);
294 return (PyObject *)self;
295}
296
297static PyObject *
Guido van Rossumd624f182006-04-24 13:47:05 +0000298bytes_repeat(PyBytesObject *self, Py_ssize_t count)
299{
300 PyBytesObject *result;
301 Py_ssize_t mysize;
302 Py_ssize_t size;
303
304 if (count < 0)
305 count = 0;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000306 mysize = Py_Size(self);
Guido van Rossumd624f182006-04-24 13:47:05 +0000307 size = mysize * count;
308 if (count != 0 && size / count != mysize)
309 return PyErr_NoMemory();
Guido van Rossumf15a29f2007-05-04 00:41:39 +0000310 result = (PyBytesObject *)PyBytes_FromStringAndSize(NULL, size);
Guido van Rossumd624f182006-04-24 13:47:05 +0000311 if (result != NULL && size != 0) {
312 if (mysize == 1)
313 memset(result->ob_bytes, self->ob_bytes[0], size);
314 else {
Guido van Rossum13e57212006-04-27 22:54:26 +0000315 Py_ssize_t i;
Guido van Rossumd624f182006-04-24 13:47:05 +0000316 for (i = 0; i < count; i++)
317 memcpy(result->ob_bytes + i*mysize, self->ob_bytes, mysize);
318 }
319 }
320 return (PyObject *)result;
321}
322
323static PyObject *
Guido van Rossum13e57212006-04-27 22:54:26 +0000324bytes_irepeat(PyBytesObject *self, Py_ssize_t count)
325{
326 Py_ssize_t mysize;
327 Py_ssize_t size;
328
329 if (count < 0)
330 count = 0;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000331 mysize = Py_Size(self);
Guido van Rossum13e57212006-04-27 22:54:26 +0000332 size = mysize * count;
333 if (count != 0 && size / count != mysize)
334 return PyErr_NoMemory();
Guido van Rossum6c1e6742007-05-04 04:27:16 +0000335 if (size < self->ob_alloc) {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000336 Py_Size(self) = size;
Guido van Rossuma74184e2007-08-29 04:05:57 +0000337 self->ob_bytes[Py_Size(self)] = '\0'; /* Trailing null byte */
Guido van Rossum6c1e6742007-05-04 04:27:16 +0000338 }
Guido van Rossuma0867f72006-05-05 04:34:18 +0000339 else if (PyBytes_Resize((PyObject *)self, size) < 0)
Guido van Rossum13e57212006-04-27 22:54:26 +0000340 return NULL;
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +0000341
Guido van Rossum13e57212006-04-27 22:54:26 +0000342 if (mysize == 1)
343 memset(self->ob_bytes, self->ob_bytes[0], size);
344 else {
345 Py_ssize_t i;
346 for (i = 1; i < count; i++)
347 memcpy(self->ob_bytes + i*mysize, self->ob_bytes, mysize);
348 }
349
350 Py_INCREF(self);
351 return (PyObject *)self;
352}
353
354static int
355bytes_substring(PyBytesObject *self, PyBytesObject *other)
356{
357 Py_ssize_t i;
358
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000359 if (Py_Size(other) == 1) {
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +0000360 return memchr(self->ob_bytes, other->ob_bytes[0],
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000361 Py_Size(self)) != NULL;
Guido van Rossum13e57212006-04-27 22:54:26 +0000362 }
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000363 if (Py_Size(other) == 0)
Guido van Rossum13e57212006-04-27 22:54:26 +0000364 return 1; /* Edge case */
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000365 for (i = 0; i + Py_Size(other) <= Py_Size(self); i++) {
Guido van Rossum13e57212006-04-27 22:54:26 +0000366 /* XXX Yeah, yeah, lots of optimizations possible... */
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000367 if (memcmp(self->ob_bytes + i, other->ob_bytes, Py_Size(other)) == 0)
Guido van Rossum13e57212006-04-27 22:54:26 +0000368 return 1;
369 }
370 return 0;
371}
372
373static int
374bytes_contains(PyBytesObject *self, PyObject *value)
375{
376 Py_ssize_t ival;
377
378 if (PyBytes_Check(value))
379 return bytes_substring(self, (PyBytesObject *)value);
380
Thomas Woutersd204a712006-08-22 13:41:17 +0000381 ival = PyNumber_AsSsize_t(value, PyExc_ValueError);
Guido van Rossum13e57212006-04-27 22:54:26 +0000382 if (ival == -1 && PyErr_Occurred())
383 return -1;
Guido van Rossum13e57212006-04-27 22:54:26 +0000384 if (ival < 0 || ival >= 256) {
385 PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
386 return -1;
387 }
388
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000389 return memchr(self->ob_bytes, ival, Py_Size(self)) != NULL;
Guido van Rossum13e57212006-04-27 22:54:26 +0000390}
391
392static PyObject *
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000393bytes_getitem(PyBytesObject *self, Py_ssize_t i)
394{
395 if (i < 0)
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000396 i += Py_Size(self);
397 if (i < 0 || i >= Py_Size(self)) {
Guido van Rossumd624f182006-04-24 13:47:05 +0000398 PyErr_SetString(PyExc_IndexError, "bytes index out of range");
399 return NULL;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000400 }
Guido van Rossumd624f182006-04-24 13:47:05 +0000401 return PyInt_FromLong((unsigned char)(self->ob_bytes[i]));
402}
403
404static PyObject *
Thomas Wouters376446d2006-12-19 08:30:14 +0000405bytes_subscript(PyBytesObject *self, PyObject *item)
Guido van Rossumd624f182006-04-24 13:47:05 +0000406{
Thomas Wouters376446d2006-12-19 08:30:14 +0000407 if (PyIndex_Check(item)) {
408 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Guido van Rossumd624f182006-04-24 13:47:05 +0000409
Thomas Wouters376446d2006-12-19 08:30:14 +0000410 if (i == -1 && PyErr_Occurred())
411 return NULL;
412
413 if (i < 0)
414 i += PyBytes_GET_SIZE(self);
415
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000416 if (i < 0 || i >= Py_Size(self)) {
Thomas Wouters376446d2006-12-19 08:30:14 +0000417 PyErr_SetString(PyExc_IndexError, "bytes index out of range");
418 return NULL;
419 }
420 return PyInt_FromLong((unsigned char)(self->ob_bytes[i]));
421 }
422 else if (PySlice_Check(item)) {
423 Py_ssize_t start, stop, step, slicelength, cur, i;
424 if (PySlice_GetIndicesEx((PySliceObject *)item,
425 PyBytes_GET_SIZE(self),
426 &start, &stop, &step, &slicelength) < 0) {
427 return NULL;
428 }
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +0000429
Thomas Wouters376446d2006-12-19 08:30:14 +0000430 if (slicelength <= 0)
431 return PyBytes_FromStringAndSize("", 0);
432 else if (step == 1) {
433 return PyBytes_FromStringAndSize(self->ob_bytes + start,
434 slicelength);
435 }
436 else {
437 char *source_buf = PyBytes_AS_STRING(self);
438 char *result_buf = (char *)PyMem_Malloc(slicelength);
439 PyObject *result;
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +0000440
Thomas Wouters376446d2006-12-19 08:30:14 +0000441 if (result_buf == NULL)
442 return PyErr_NoMemory();
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +0000443
Thomas Wouters376446d2006-12-19 08:30:14 +0000444 for (cur = start, i = 0; i < slicelength;
445 cur += step, i++) {
446 result_buf[i] = source_buf[cur];
447 }
448 result = PyBytes_FromStringAndSize(result_buf, slicelength);
449 PyMem_Free(result_buf);
450 return result;
451 }
452 }
453 else {
454 PyErr_SetString(PyExc_TypeError, "bytes indices must be integers");
455 return NULL;
456 }
457}
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +0000458
Guido van Rossumd624f182006-04-24 13:47:05 +0000459static int
Thomas Wouters376446d2006-12-19 08:30:14 +0000460bytes_setslice(PyBytesObject *self, Py_ssize_t lo, Py_ssize_t hi,
Guido van Rossumd624f182006-04-24 13:47:05 +0000461 PyObject *values)
462{
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000463 Py_ssize_t avail, needed;
464 void *bytes;
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000465 PyBuffer vbytes;
466 int res = 0;
Guido van Rossumd624f182006-04-24 13:47:05 +0000467
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000468 vbytes.len = -1;
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000469 if (values == (PyObject *)self) {
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000470 /* Make a copy and call this function recursively */
Guido van Rossumd624f182006-04-24 13:47:05 +0000471 int err;
472 values = PyBytes_FromObject(values);
473 if (values == NULL)
474 return -1;
475 err = bytes_setslice(self, lo, hi, values);
476 Py_DECREF(values);
477 return err;
478 }
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000479 if (values == NULL) {
480 /* del b[lo:hi] */
481 bytes = NULL;
482 needed = 0;
483 }
Guido van Rossumd624f182006-04-24 13:47:05 +0000484 else {
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000485 if (_getbuffer(values, &vbytes) < 0) {
486 PyErr_Format(PyExc_TypeError,
487 "can't set bytes slice from %.100s",
488 Py_Type(values)->tp_name);
489 return -1;
490 }
491 needed = vbytes.len;
492 bytes = vbytes.buf;
Guido van Rossumd624f182006-04-24 13:47:05 +0000493 }
494
495 if (lo < 0)
496 lo = 0;
Thomas Wouters9a6e62b2006-08-23 23:20:29 +0000497 if (hi < lo)
498 hi = lo;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000499 if (hi > Py_Size(self))
500 hi = Py_Size(self);
Guido van Rossumd624f182006-04-24 13:47:05 +0000501
502 avail = hi - lo;
503 if (avail < 0)
504 lo = hi = avail = 0;
505
506 if (avail != needed) {
507 if (avail > needed) {
508 /*
509 0 lo hi old_size
510 | |<----avail----->|<-----tomove------>|
511 | |<-needed->|<-----tomove------>|
512 0 lo new_hi new_size
513 */
514 memmove(self->ob_bytes + lo + needed, self->ob_bytes + hi,
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000515 Py_Size(self) - hi);
Guido van Rossumd624f182006-04-24 13:47:05 +0000516 }
Guido van Rossuma74184e2007-08-29 04:05:57 +0000517 /* XXX(nnorwitz): need to verify this can't overflow! */
Thomas Wouters376446d2006-12-19 08:30:14 +0000518 if (PyBytes_Resize((PyObject *)self,
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000519 Py_Size(self) + needed - avail) < 0) {
520 res = -1;
521 goto finish;
522 }
Guido van Rossumd624f182006-04-24 13:47:05 +0000523 if (avail < needed) {
524 /*
525 0 lo hi old_size
526 | |<-avail->|<-----tomove------>|
527 | |<----needed---->|<-----tomove------>|
528 0 lo new_hi new_size
529 */
530 memmove(self->ob_bytes + lo + needed, self->ob_bytes + hi,
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000531 Py_Size(self) - lo - needed);
Guido van Rossumd624f182006-04-24 13:47:05 +0000532 }
533 }
534
535 if (needed > 0)
536 memcpy(self->ob_bytes + lo, bytes, needed);
537
Guido van Rossum75d38e92007-08-24 17:33:11 +0000538
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000539 finish:
Guido van Rossum75d38e92007-08-24 17:33:11 +0000540 if (vbytes.len != -1)
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000541 PyObject_ReleaseBuffer(values, &vbytes);
542 return res;
Guido van Rossumd624f182006-04-24 13:47:05 +0000543}
544
545static int
546bytes_setitem(PyBytesObject *self, Py_ssize_t i, PyObject *value)
547{
548 Py_ssize_t ival;
549
550 if (i < 0)
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000551 i += Py_Size(self);
Guido van Rossumd624f182006-04-24 13:47:05 +0000552
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000553 if (i < 0 || i >= Py_Size(self)) {
Guido van Rossumd624f182006-04-24 13:47:05 +0000554 PyErr_SetString(PyExc_IndexError, "bytes index out of range");
555 return -1;
556 }
557
558 if (value == NULL)
559 return bytes_setslice(self, i, i+1, NULL);
560
Thomas Woutersd204a712006-08-22 13:41:17 +0000561 ival = PyNumber_AsSsize_t(value, PyExc_ValueError);
Guido van Rossumd624f182006-04-24 13:47:05 +0000562 if (ival == -1 && PyErr_Occurred())
563 return -1;
564
565 if (ival < 0 || ival >= 256) {
566 PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
567 return -1;
568 }
569
570 self->ob_bytes[i] = ival;
571 return 0;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000572}
573
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000574static int
Thomas Wouters376446d2006-12-19 08:30:14 +0000575bytes_ass_subscript(PyBytesObject *self, PyObject *item, PyObject *values)
576{
577 Py_ssize_t start, stop, step, slicelen, needed;
578 char *bytes;
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +0000579
Thomas Wouters376446d2006-12-19 08:30:14 +0000580 if (PyIndex_Check(item)) {
581 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
582
583 if (i == -1 && PyErr_Occurred())
584 return -1;
585
586 if (i < 0)
587 i += PyBytes_GET_SIZE(self);
588
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000589 if (i < 0 || i >= Py_Size(self)) {
Thomas Wouters376446d2006-12-19 08:30:14 +0000590 PyErr_SetString(PyExc_IndexError, "bytes index out of range");
591 return -1;
592 }
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +0000593
Thomas Wouters376446d2006-12-19 08:30:14 +0000594 if (values == NULL) {
595 /* Fall through to slice assignment */
596 start = i;
597 stop = i + 1;
598 step = 1;
599 slicelen = 1;
600 }
601 else {
602 Py_ssize_t ival = PyNumber_AsSsize_t(values, PyExc_ValueError);
603 if (ival == -1 && PyErr_Occurred())
604 return -1;
605 if (ival < 0 || ival >= 256) {
606 PyErr_SetString(PyExc_ValueError,
607 "byte must be in range(0, 256)");
608 return -1;
609 }
610 self->ob_bytes[i] = (char)ival;
611 return 0;
612 }
613 }
614 else if (PySlice_Check(item)) {
615 if (PySlice_GetIndicesEx((PySliceObject *)item,
616 PyBytes_GET_SIZE(self),
617 &start, &stop, &step, &slicelen) < 0) {
618 return -1;
619 }
620 }
621 else {
622 PyErr_SetString(PyExc_TypeError, "bytes indices must be integer");
623 return -1;
624 }
625
626 if (values == NULL) {
627 bytes = NULL;
628 needed = 0;
629 }
630 else if (values == (PyObject *)self || !PyBytes_Check(values)) {
631 /* Make a copy an call this function recursively */
632 int err;
633 values = PyBytes_FromObject(values);
634 if (values == NULL)
635 return -1;
636 err = bytes_ass_subscript(self, item, values);
637 Py_DECREF(values);
638 return err;
639 }
640 else {
641 assert(PyBytes_Check(values));
642 bytes = ((PyBytesObject *)values)->ob_bytes;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000643 needed = Py_Size(values);
Thomas Wouters376446d2006-12-19 08:30:14 +0000644 }
645 /* Make sure b[5:2] = ... inserts before 5, not before 2. */
646 if ((step < 0 && start < stop) ||
647 (step > 0 && start > stop))
648 stop = start;
649 if (step == 1) {
650 if (slicelen != needed) {
651 if (slicelen > needed) {
652 /*
653 0 start stop old_size
654 | |<---slicelen--->|<-----tomove------>|
655 | |<-needed->|<-----tomove------>|
656 0 lo new_hi new_size
657 */
658 memmove(self->ob_bytes + start + needed, self->ob_bytes + stop,
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000659 Py_Size(self) - stop);
Thomas Wouters376446d2006-12-19 08:30:14 +0000660 }
661 if (PyBytes_Resize((PyObject *)self,
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000662 Py_Size(self) + needed - slicelen) < 0)
Thomas Wouters376446d2006-12-19 08:30:14 +0000663 return -1;
664 if (slicelen < needed) {
665 /*
666 0 lo hi old_size
667 | |<-avail->|<-----tomove------>|
668 | |<----needed---->|<-----tomove------>|
669 0 lo new_hi new_size
670 */
671 memmove(self->ob_bytes + start + needed, self->ob_bytes + stop,
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000672 Py_Size(self) - start - needed);
Thomas Wouters376446d2006-12-19 08:30:14 +0000673 }
674 }
675
676 if (needed > 0)
677 memcpy(self->ob_bytes + start, bytes, needed);
678
679 return 0;
680 }
681 else {
682 if (needed == 0) {
683 /* Delete slice */
684 Py_ssize_t cur, i;
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +0000685
Thomas Wouters376446d2006-12-19 08:30:14 +0000686 if (step < 0) {
687 stop = start + 1;
688 start = stop + step * (slicelen - 1) - 1;
689 step = -step;
690 }
691 for (cur = start, i = 0;
692 i < slicelen; cur += step, i++) {
693 Py_ssize_t lim = step - 1;
694
695 if (cur + step >= PyBytes_GET_SIZE(self))
696 lim = PyBytes_GET_SIZE(self) - cur - 1;
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +0000697
Thomas Wouters376446d2006-12-19 08:30:14 +0000698 memmove(self->ob_bytes + cur - i,
699 self->ob_bytes + cur + 1, lim);
700 }
701 /* Move the tail of the bytes, in one chunk */
702 cur = start + slicelen*step;
703 if (cur < PyBytes_GET_SIZE(self)) {
704 memmove(self->ob_bytes + cur - slicelen,
705 self->ob_bytes + cur,
706 PyBytes_GET_SIZE(self) - cur);
707 }
708 if (PyBytes_Resize((PyObject *)self,
709 PyBytes_GET_SIZE(self) - slicelen) < 0)
710 return -1;
711
712 return 0;
713 }
714 else {
715 /* Assign slice */
716 Py_ssize_t cur, i;
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +0000717
Thomas Wouters376446d2006-12-19 08:30:14 +0000718 if (needed != slicelen) {
719 PyErr_Format(PyExc_ValueError,
720 "attempt to assign bytes of size %zd "
721 "to extended slice of size %zd",
722 needed, slicelen);
723 return -1;
724 }
725 for (cur = start, i = 0; i < slicelen; cur += step, i++)
726 self->ob_bytes[cur] = bytes[i];
727 return 0;
728 }
729 }
730}
731
732static int
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000733bytes_init(PyBytesObject *self, PyObject *args, PyObject *kwds)
734{
Guido van Rossumd624f182006-04-24 13:47:05 +0000735 static char *kwlist[] = {"source", "encoding", "errors", 0};
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000736 PyObject *arg = NULL;
Guido van Rossumd624f182006-04-24 13:47:05 +0000737 const char *encoding = NULL;
738 const char *errors = NULL;
739 Py_ssize_t count;
740 PyObject *it;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000741 PyObject *(*iternext)(PyObject *);
742
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000743 if (Py_Size(self) != 0) {
Guido van Rossuma0867f72006-05-05 04:34:18 +0000744 /* Empty previous contents (yes, do this first of all!) */
745 if (PyBytes_Resize((PyObject *)self, 0) < 0)
746 return -1;
747 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000748
Guido van Rossumd624f182006-04-24 13:47:05 +0000749 /* Parse arguments */
750 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oss:bytes", kwlist,
751 &arg, &encoding, &errors))
752 return -1;
753
754 /* Make a quick exit if no first argument */
755 if (arg == NULL) {
756 if (encoding != NULL || errors != NULL) {
757 PyErr_SetString(PyExc_TypeError,
758 "encoding or errors without sequence argument");
759 return -1;
760 }
761 return 0;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000762 }
763
Guido van Rossumd624f182006-04-24 13:47:05 +0000764 if (PyUnicode_Check(arg)) {
765 /* Encode via the codec registry */
Guido van Rossum4355a472007-05-04 05:00:04 +0000766 PyObject *encoded, *new;
Guido van Rossuma74184e2007-08-29 04:05:57 +0000767 if (encoding == NULL) {
768 PyErr_SetString(PyExc_TypeError,
769 "string argument without an encoding");
770 return -1;
771 }
Guido van Rossumd624f182006-04-24 13:47:05 +0000772 encoded = PyCodec_Encode(arg, encoding, errors);
773 if (encoded == NULL)
774 return -1;
Guido van Rossum4355a472007-05-04 05:00:04 +0000775 if (!PyBytes_Check(encoded) && !PyString_Check(encoded)) {
Guido van Rossumd624f182006-04-24 13:47:05 +0000776 PyErr_Format(PyExc_TypeError,
Guido van Rossum4355a472007-05-04 05:00:04 +0000777 "encoder did not return a str8 or bytes object (type=%.400s)",
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000778 Py_Type(encoded)->tp_name);
Guido van Rossumd624f182006-04-24 13:47:05 +0000779 Py_DECREF(encoded);
780 return -1;
781 }
Guido van Rossuma74184e2007-08-29 04:05:57 +0000782 new = bytes_iconcat(self, encoded);
783 Py_DECREF(encoded);
784 if (new == NULL)
785 return -1;
786 Py_DECREF(new);
787 return 0;
Guido van Rossumd624f182006-04-24 13:47:05 +0000788 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000789
Guido van Rossumd624f182006-04-24 13:47:05 +0000790 /* If it's not unicode, there can't be encoding or errors */
791 if (encoding != NULL || errors != NULL) {
792 PyErr_SetString(PyExc_TypeError,
793 "encoding or errors without a string argument");
794 return -1;
795 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000796
Guido van Rossumd624f182006-04-24 13:47:05 +0000797 /* Is it an int? */
Thomas Woutersd204a712006-08-22 13:41:17 +0000798 count = PyNumber_AsSsize_t(arg, PyExc_ValueError);
Guido van Rossumd624f182006-04-24 13:47:05 +0000799 if (count == -1 && PyErr_Occurred())
800 PyErr_Clear();
801 else {
802 if (count < 0) {
803 PyErr_SetString(PyExc_ValueError, "negative count");
804 return -1;
805 }
806 if (count > 0) {
807 if (PyBytes_Resize((PyObject *)self, count))
808 return -1;
809 memset(self->ob_bytes, 0, count);
810 }
811 return 0;
812 }
Guido van Rossum75d38e92007-08-24 17:33:11 +0000813
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000814 /* Use the modern buffer interface */
815 if (PyObject_CheckBuffer(arg)) {
Guido van Rossumd624f182006-04-24 13:47:05 +0000816 Py_ssize_t size;
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000817 PyBuffer view;
818 if (PyObject_GetBuffer(arg, &view, PyBUF_FULL_RO) < 0)
Guido van Rossumd624f182006-04-24 13:47:05 +0000819 return -1;
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000820 size = view.len;
821 if (PyBytes_Resize((PyObject *)self, size) < 0) goto fail;
822 if (PyBuffer_ToContiguous(self->ob_bytes, &view, size, 'C') < 0)
823 goto fail;
824 PyObject_ReleaseBuffer(arg, &view);
Guido van Rossumd624f182006-04-24 13:47:05 +0000825 return 0;
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000826 fail:
827 PyObject_ReleaseBuffer(arg, &view);
828 return -1;
Guido van Rossumd624f182006-04-24 13:47:05 +0000829 }
830
831 /* XXX Optimize this if the arguments is a list, tuple */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000832
833 /* Get the iterator */
834 it = PyObject_GetIter(arg);
835 if (it == NULL)
Guido van Rossumd624f182006-04-24 13:47:05 +0000836 return -1;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000837 iternext = *Py_Type(it)->tp_iternext;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000838
839 /* Run the iterator to exhaustion */
840 for (;;) {
Guido van Rossumd624f182006-04-24 13:47:05 +0000841 PyObject *item;
842 Py_ssize_t value;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000843
Guido van Rossumd624f182006-04-24 13:47:05 +0000844 /* Get the next item */
845 item = iternext(it);
846 if (item == NULL) {
847 if (PyErr_Occurred()) {
848 if (!PyErr_ExceptionMatches(PyExc_StopIteration))
849 goto error;
850 PyErr_Clear();
851 }
852 break;
853 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000854
Guido van Rossumd624f182006-04-24 13:47:05 +0000855 /* Interpret it as an int (__index__) */
Thomas Woutersd204a712006-08-22 13:41:17 +0000856 value = PyNumber_AsSsize_t(item, PyExc_ValueError);
Guido van Rossumd624f182006-04-24 13:47:05 +0000857 Py_DECREF(item);
858 if (value == -1 && PyErr_Occurred())
859 goto error;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000860
Guido van Rossumd624f182006-04-24 13:47:05 +0000861 /* Range check */
862 if (value < 0 || value >= 256) {
863 PyErr_SetString(PyExc_ValueError,
864 "bytes must be in range(0, 256)");
865 goto error;
866 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000867
Guido van Rossumd624f182006-04-24 13:47:05 +0000868 /* Append the byte */
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000869 if (Py_Size(self) < self->ob_alloc)
870 Py_Size(self)++;
871 else if (PyBytes_Resize((PyObject *)self, Py_Size(self)+1) < 0)
Guido van Rossumd624f182006-04-24 13:47:05 +0000872 goto error;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000873 self->ob_bytes[Py_Size(self)-1] = value;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000874 }
875
876 /* Clean up and return success */
877 Py_DECREF(it);
878 return 0;
879
880 error:
881 /* Error handling when it != NULL */
882 Py_DECREF(it);
883 return -1;
884}
885
Georg Brandlee91be42007-02-24 19:41:35 +0000886/* Mostly copied from string_repr, but without the
887 "smart quote" functionality. */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000888static PyObject *
889bytes_repr(PyBytesObject *self)
890{
Walter Dörwald1ab83302007-05-18 17:15:44 +0000891 static const char *hexdigits = "0123456789abcdef";
Martin v. Löwis5d7428b2007-07-21 18:47:48 +0000892 size_t newsize = 3 + 4 * Py_Size(self);
Georg Brandlee91be42007-02-24 19:41:35 +0000893 PyObject *v;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000894 if (newsize > PY_SSIZE_T_MAX || newsize / 4 != Py_Size(self)) {
Georg Brandlee91be42007-02-24 19:41:35 +0000895 PyErr_SetString(PyExc_OverflowError,
896 "bytes object is too large to make repr");
Guido van Rossumd624f182006-04-24 13:47:05 +0000897 return NULL;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000898 }
Walter Dörwald1ab83302007-05-18 17:15:44 +0000899 v = PyUnicode_FromUnicode(NULL, newsize);
Georg Brandlee91be42007-02-24 19:41:35 +0000900 if (v == NULL) {
901 return NULL;
902 }
903 else {
904 register Py_ssize_t i;
Walter Dörwald1ab83302007-05-18 17:15:44 +0000905 register Py_UNICODE c;
906 register Py_UNICODE *p;
Georg Brandlee91be42007-02-24 19:41:35 +0000907 int quote = '\'';
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000908
Walter Dörwald1ab83302007-05-18 17:15:44 +0000909 p = PyUnicode_AS_UNICODE(v);
Georg Brandlee91be42007-02-24 19:41:35 +0000910 *p++ = 'b';
911 *p++ = quote;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000912 for (i = 0; i < Py_Size(self); i++) {
Georg Brandlee91be42007-02-24 19:41:35 +0000913 /* There's at least enough room for a hex escape
914 and a closing quote. */
Walter Dörwald1ab83302007-05-18 17:15:44 +0000915 assert(newsize - (p - PyUnicode_AS_UNICODE(v)) >= 5);
Georg Brandlee91be42007-02-24 19:41:35 +0000916 c = self->ob_bytes[i];
917 if (c == quote || c == '\\')
918 *p++ = '\\', *p++ = c;
919 else if (c == '\t')
920 *p++ = '\\', *p++ = 't';
921 else if (c == '\n')
922 *p++ = '\\', *p++ = 'n';
923 else if (c == '\r')
924 *p++ = '\\', *p++ = 'r';
925 else if (c == 0)
Guido van Rossum57b93ad2007-05-08 19:09:34 +0000926 *p++ = '\\', *p++ = 'x', *p++ = '0', *p++ = '0';
Georg Brandlee91be42007-02-24 19:41:35 +0000927 else if (c < ' ' || c >= 0x7f) {
Walter Dörwald1ab83302007-05-18 17:15:44 +0000928 *p++ = '\\';
929 *p++ = 'x';
930 *p++ = hexdigits[(c & 0xf0) >> 4];
931 *p++ = hexdigits[c & 0xf];
Georg Brandlee91be42007-02-24 19:41:35 +0000932 }
933 else
934 *p++ = c;
935 }
Walter Dörwald1ab83302007-05-18 17:15:44 +0000936 assert(newsize - (p - PyUnicode_AS_UNICODE(v)) >= 1);
Georg Brandlee91be42007-02-24 19:41:35 +0000937 *p++ = quote;
938 *p = '\0';
Walter Dörwald1ab83302007-05-18 17:15:44 +0000939 if (PyUnicode_Resize(&v, (p - PyUnicode_AS_UNICODE(v)))) {
940 Py_DECREF(v);
941 return NULL;
942 }
Georg Brandlee91be42007-02-24 19:41:35 +0000943 return v;
944 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000945}
946
947static PyObject *
Guido van Rossumd624f182006-04-24 13:47:05 +0000948bytes_str(PyBytesObject *self)
949{
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000950 return PyString_FromStringAndSize(self->ob_bytes, Py_Size(self));
Guido van Rossumd624f182006-04-24 13:47:05 +0000951}
952
953static PyObject *
Guido van Rossum343e97f2007-04-09 00:43:24 +0000954bytes_richcompare(PyObject *self, PyObject *other, int op)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000955{
Guido van Rossum343e97f2007-04-09 00:43:24 +0000956 Py_ssize_t self_size, other_size;
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000957 PyBuffer self_bytes, other_bytes;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000958 PyObject *res;
Guido van Rossum343e97f2007-04-09 00:43:24 +0000959 Py_ssize_t minsize;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000960 int cmp;
961
Jeremy Hylton18c3ff82007-08-29 18:47:16 +0000962 /* Bytes can be compared to anything that supports the (binary)
963 buffer API. Except that a comparison with Unicode is always an
964 error, even if the comparison is for equality. */
965 if (PyObject_IsInstance(self, (PyObject*)&PyUnicode_Type) ||
966 PyObject_IsInstance(other, (PyObject*)&PyUnicode_Type)) {
967 PyErr_SetString(PyExc_TypeError, "can't compare bytes and str");
968 return NULL;
969 }
Guido van Rossumebea9be2007-04-09 00:49:13 +0000970
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000971 self_size = _getbuffer(self, &self_bytes);
972 if (self_size < 0) {
Guido van Rossuma74184e2007-08-29 04:05:57 +0000973 PyErr_Clear();
Guido van Rossumebea9be2007-04-09 00:49:13 +0000974 Py_INCREF(Py_NotImplemented);
975 return Py_NotImplemented;
976 }
Guido van Rossum343e97f2007-04-09 00:43:24 +0000977
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000978 other_size = _getbuffer(other, &other_bytes);
979 if (other_size < 0) {
Guido van Rossuma74184e2007-08-29 04:05:57 +0000980 PyErr_Clear();
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000981 PyObject_ReleaseBuffer(self, &self_bytes);
Guido van Rossumd624f182006-04-24 13:47:05 +0000982 Py_INCREF(Py_NotImplemented);
983 return Py_NotImplemented;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000984 }
Guido van Rossum343e97f2007-04-09 00:43:24 +0000985
986 if (self_size != other_size && (op == Py_EQ || op == Py_NE)) {
Guido van Rossumd624f182006-04-24 13:47:05 +0000987 /* Shortcut: if the lengths differ, the objects differ */
988 cmp = (op == Py_NE);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000989 }
990 else {
Guido van Rossum343e97f2007-04-09 00:43:24 +0000991 minsize = self_size;
992 if (other_size < minsize)
993 minsize = other_size;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000994
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000995 cmp = memcmp(self_bytes.buf, other_bytes.buf, minsize);
Guido van Rossumd624f182006-04-24 13:47:05 +0000996 /* In ISO C, memcmp() guarantees to use unsigned bytes! */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000997
Guido van Rossumd624f182006-04-24 13:47:05 +0000998 if (cmp == 0) {
Guido van Rossum343e97f2007-04-09 00:43:24 +0000999 if (self_size < other_size)
Guido van Rossumd624f182006-04-24 13:47:05 +00001000 cmp = -1;
Guido van Rossum343e97f2007-04-09 00:43:24 +00001001 else if (self_size > other_size)
Guido van Rossumd624f182006-04-24 13:47:05 +00001002 cmp = 1;
1003 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00001004
Guido van Rossumd624f182006-04-24 13:47:05 +00001005 switch (op) {
1006 case Py_LT: cmp = cmp < 0; break;
1007 case Py_LE: cmp = cmp <= 0; break;
1008 case Py_EQ: cmp = cmp == 0; break;
1009 case Py_NE: cmp = cmp != 0; break;
1010 case Py_GT: cmp = cmp > 0; break;
1011 case Py_GE: cmp = cmp >= 0; break;
1012 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00001013 }
1014
1015 res = cmp ? Py_True : Py_False;
Travis E. Oliphantb99f7622007-08-18 11:21:56 +00001016 PyObject_ReleaseBuffer(self, &self_bytes);
Guido van Rossum75d38e92007-08-24 17:33:11 +00001017 PyObject_ReleaseBuffer(other, &other_bytes);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00001018 Py_INCREF(res);
1019 return res;
1020}
1021
1022static void
1023bytes_dealloc(PyBytesObject *self)
1024{
Guido van Rossumd624f182006-04-24 13:47:05 +00001025 if (self->ob_bytes != 0) {
1026 PyMem_Free(self->ob_bytes);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00001027 }
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001028 Py_Type(self)->tp_free((PyObject *)self);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00001029}
1030
Neal Norwitz6968b052007-02-27 19:02:19 +00001031
1032/* -------------------------------------------------------------------- */
1033/* Methods */
1034
1035#define STRINGLIB_CHAR char
1036#define STRINGLIB_CMP memcmp
1037#define STRINGLIB_LEN PyBytes_GET_SIZE
1038#define STRINGLIB_NEW PyBytes_FromStringAndSize
1039#define STRINGLIB_EMPTY nullbytes
1040
1041#include "stringlib/fastsearch.h"
1042#include "stringlib/count.h"
1043#include "stringlib/find.h"
1044#include "stringlib/partition.h"
1045
1046
1047/* The following Py_LOCAL_INLINE and Py_LOCAL functions
1048were copied from the old char* style string object. */
1049
1050Py_LOCAL_INLINE(void)
1051_adjust_indices(Py_ssize_t *start, Py_ssize_t *end, Py_ssize_t len)
1052{
1053 if (*end > len)
1054 *end = len;
1055 else if (*end < 0)
1056 *end += len;
1057 if (*end < 0)
1058 *end = 0;
1059 if (*start < 0)
1060 *start += len;
1061 if (*start < 0)
1062 *start = 0;
1063}
1064
1065
1066Py_LOCAL_INLINE(Py_ssize_t)
1067bytes_find_internal(PyBytesObject *self, PyObject *args, int dir)
1068{
1069 PyObject *subobj;
Guido van Rossum06b8b022007-08-31 13:48:41 +00001070 PyBuffer subbuf;
Neal Norwitz6968b052007-02-27 19:02:19 +00001071 Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
Guido van Rossum06b8b022007-08-31 13:48:41 +00001072 Py_ssize_t res;
Neal Norwitz6968b052007-02-27 19:02:19 +00001073
1074 if (!PyArg_ParseTuple(args, "O|O&O&:find/rfind/index/rindex", &subobj,
1075 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
1076 return -2;
Guido van Rossum06b8b022007-08-31 13:48:41 +00001077 if (_getbuffer(subobj, &subbuf) < 0)
Neal Norwitz6968b052007-02-27 19:02:19 +00001078 return -2;
Neal Norwitz6968b052007-02-27 19:02:19 +00001079 if (dir > 0)
Guido van Rossum06b8b022007-08-31 13:48:41 +00001080 res = stringlib_find_slice(
Neal Norwitz6968b052007-02-27 19:02:19 +00001081 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
Guido van Rossum06b8b022007-08-31 13:48:41 +00001082 subbuf.buf, subbuf.len, start, end);
Neal Norwitz6968b052007-02-27 19:02:19 +00001083 else
Guido van Rossum06b8b022007-08-31 13:48:41 +00001084 res = stringlib_rfind_slice(
Neal Norwitz6968b052007-02-27 19:02:19 +00001085 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
Guido van Rossum06b8b022007-08-31 13:48:41 +00001086 subbuf.buf, subbuf.len, start, end);
1087 PyObject_ReleaseBuffer(subobj, &subbuf);
1088 return res;
Neal Norwitz6968b052007-02-27 19:02:19 +00001089}
1090
1091
1092PyDoc_STRVAR(find__doc__,
1093"B.find(sub [,start [,end]]) -> int\n\
1094\n\
1095Return the lowest index in B where subsection sub is found,\n\
1096such that sub is contained within s[start,end]. Optional\n\
1097arguments start and end are interpreted as in slice notation.\n\
1098\n\
1099Return -1 on failure.");
1100
1101static PyObject *
1102bytes_find(PyBytesObject *self, PyObject *args)
1103{
1104 Py_ssize_t result = bytes_find_internal(self, args, +1);
1105 if (result == -2)
1106 return NULL;
1107 return PyInt_FromSsize_t(result);
1108}
1109
1110PyDoc_STRVAR(count__doc__,
1111"B.count(sub[, start[, end]]) -> int\n\
1112\n\
1113Return the number of non-overlapping occurrences of subsection sub in\n\
1114bytes B[start:end]. Optional arguments start and end are interpreted\n\
1115as in slice notation.");
1116
1117static PyObject *
1118bytes_count(PyBytesObject *self, PyObject *args)
1119{
1120 PyObject *sub_obj;
1121 const char *str = PyBytes_AS_STRING(self), *sub;
1122 Py_ssize_t sub_len;
1123 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
1124
1125 if (!PyArg_ParseTuple(args, "O|O&O&:count", &sub_obj,
1126 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
1127 return NULL;
1128
1129 if (PyBytes_Check(sub_obj)) {
1130 sub = PyBytes_AS_STRING(sub_obj);
1131 sub_len = PyBytes_GET_SIZE(sub_obj);
1132 }
Travis E. Oliphantb99f7622007-08-18 11:21:56 +00001133 /* XXX --> use the modern buffer interface */
Neal Norwitz6968b052007-02-27 19:02:19 +00001134 else if (PyObject_AsCharBuffer(sub_obj, &sub, &sub_len))
1135 return NULL;
1136
Martin v. Löwis5b222132007-06-10 09:51:05 +00001137 _adjust_indices(&start, &end, PyBytes_GET_SIZE(self));
Neal Norwitz6968b052007-02-27 19:02:19 +00001138
1139 return PyInt_FromSsize_t(
1140 stringlib_count(str + start, end - start, sub, sub_len)
1141 );
1142}
1143
1144
1145PyDoc_STRVAR(index__doc__,
1146"B.index(sub [,start [,end]]) -> int\n\
1147\n\
1148Like B.find() but raise ValueError when the subsection is not found.");
1149
1150static PyObject *
1151bytes_index(PyBytesObject *self, PyObject *args)
1152{
1153 Py_ssize_t result = bytes_find_internal(self, args, +1);
1154 if (result == -2)
1155 return NULL;
1156 if (result == -1) {
1157 PyErr_SetString(PyExc_ValueError,
1158 "subsection not found");
1159 return NULL;
1160 }
1161 return PyInt_FromSsize_t(result);
1162}
1163
1164
1165PyDoc_STRVAR(rfind__doc__,
1166"B.rfind(sub [,start [,end]]) -> int\n\
1167\n\
1168Return the highest index in B where subsection sub is found,\n\
1169such that sub is contained within s[start,end]. Optional\n\
1170arguments start and end are interpreted as in slice notation.\n\
1171\n\
1172Return -1 on failure.");
1173
1174static PyObject *
1175bytes_rfind(PyBytesObject *self, PyObject *args)
1176{
1177 Py_ssize_t result = bytes_find_internal(self, args, -1);
1178 if (result == -2)
1179 return NULL;
1180 return PyInt_FromSsize_t(result);
1181}
1182
1183
1184PyDoc_STRVAR(rindex__doc__,
1185"B.rindex(sub [,start [,end]]) -> int\n\
1186\n\
1187Like B.rfind() but raise ValueError when the subsection is not found.");
1188
1189static PyObject *
1190bytes_rindex(PyBytesObject *self, PyObject *args)
1191{
1192 Py_ssize_t result = bytes_find_internal(self, args, -1);
1193 if (result == -2)
1194 return NULL;
1195 if (result == -1) {
1196 PyErr_SetString(PyExc_ValueError,
1197 "subsection not found");
1198 return NULL;
1199 }
1200 return PyInt_FromSsize_t(result);
1201}
1202
1203
1204/* Matches the end (direction >= 0) or start (direction < 0) of self
1205 * against substr, using the start and end arguments. Returns
1206 * -1 on error, 0 if not found and 1 if found.
1207 */
1208Py_LOCAL(int)
1209_bytes_tailmatch(PyBytesObject *self, PyObject *substr, Py_ssize_t start,
1210 Py_ssize_t end, int direction)
1211{
1212 Py_ssize_t len = PyBytes_GET_SIZE(self);
1213 Py_ssize_t slen;
1214 const char* sub;
1215 const char* str;
1216
1217 if (PyBytes_Check(substr)) {
1218 sub = PyBytes_AS_STRING(substr);
1219 slen = PyBytes_GET_SIZE(substr);
1220 }
Travis E. Oliphantb99f7622007-08-18 11:21:56 +00001221 /* XXX --> Use the modern buffer interface */
Neal Norwitz6968b052007-02-27 19:02:19 +00001222 else if (PyObject_AsCharBuffer(substr, &sub, &slen))
1223 return -1;
1224 str = PyBytes_AS_STRING(self);
1225
1226 _adjust_indices(&start, &end, len);
1227
1228 if (direction < 0) {
1229 /* startswith */
1230 if (start+slen > len)
1231 return 0;
1232 } else {
1233 /* endswith */
1234 if (end-start < slen || start > len)
1235 return 0;
1236
1237 if (end-slen > start)
1238 start = end - slen;
1239 }
1240 if (end-start >= slen)
1241 return ! memcmp(str+start, sub, slen);
1242 return 0;
1243}
1244
1245
1246PyDoc_STRVAR(startswith__doc__,
1247"B.startswith(prefix[, start[, end]]) -> bool\n\
1248\n\
1249Return True if B starts with the specified prefix, False otherwise.\n\
1250With optional start, test B beginning at that position.\n\
1251With optional end, stop comparing B at that position.\n\
1252prefix can also be a tuple of strings to try.");
1253
1254static PyObject *
1255bytes_startswith(PyBytesObject *self, PyObject *args)
1256{
1257 Py_ssize_t start = 0;
1258 Py_ssize_t end = PY_SSIZE_T_MAX;
1259 PyObject *subobj;
1260 int result;
1261
1262 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
1263 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
1264 return NULL;
1265 if (PyTuple_Check(subobj)) {
1266 Py_ssize_t i;
1267 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
1268 result = _bytes_tailmatch(self,
1269 PyTuple_GET_ITEM(subobj, i),
1270 start, end, -1);
1271 if (result == -1)
1272 return NULL;
1273 else if (result) {
1274 Py_RETURN_TRUE;
1275 }
1276 }
1277 Py_RETURN_FALSE;
1278 }
1279 result = _bytes_tailmatch(self, subobj, start, end, -1);
1280 if (result == -1)
1281 return NULL;
1282 else
1283 return PyBool_FromLong(result);
1284}
1285
1286PyDoc_STRVAR(endswith__doc__,
1287"B.endswith(suffix[, start[, end]]) -> bool\n\
1288\n\
1289Return True if B ends with the specified suffix, False otherwise.\n\
1290With optional start, test B beginning at that position.\n\
1291With optional end, stop comparing B at that position.\n\
1292suffix can also be a tuple of strings to try.");
1293
1294static PyObject *
1295bytes_endswith(PyBytesObject *self, PyObject *args)
1296{
1297 Py_ssize_t start = 0;
1298 Py_ssize_t end = PY_SSIZE_T_MAX;
1299 PyObject *subobj;
1300 int result;
1301
1302 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
1303 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
1304 return NULL;
1305 if (PyTuple_Check(subobj)) {
1306 Py_ssize_t i;
1307 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
1308 result = _bytes_tailmatch(self,
1309 PyTuple_GET_ITEM(subobj, i),
1310 start, end, +1);
1311 if (result == -1)
1312 return NULL;
1313 else if (result) {
1314 Py_RETURN_TRUE;
1315 }
1316 }
1317 Py_RETURN_FALSE;
1318 }
1319 result = _bytes_tailmatch(self, subobj, start, end, +1);
1320 if (result == -1)
1321 return NULL;
1322 else
1323 return PyBool_FromLong(result);
1324}
1325
1326
1327
1328PyDoc_STRVAR(translate__doc__,
1329"B.translate(table [,deletechars]) -> bytes\n\
1330\n\
1331Return a copy of the bytes B, where all characters occurring\n\
1332in the optional argument deletechars are removed, and the\n\
1333remaining characters have been mapped through the given\n\
1334translation table, which must be a bytes of length 256.");
1335
1336static PyObject *
1337bytes_translate(PyBytesObject *self, PyObject *args)
1338{
1339 register char *input, *output;
1340 register const char *table;
1341 register Py_ssize_t i, c, changed = 0;
1342 PyObject *input_obj = (PyObject*)self;
1343 const char *table1, *output_start, *del_table=NULL;
1344 Py_ssize_t inlen, tablen, dellen = 0;
1345 PyObject *result;
1346 int trans_table[256];
1347 PyObject *tableobj, *delobj = NULL;
1348
1349 if (!PyArg_UnpackTuple(args, "translate", 1, 2,
1350 &tableobj, &delobj))
1351 return NULL;
1352
1353 if (PyBytes_Check(tableobj)) {
1354 table1 = PyBytes_AS_STRING(tableobj);
1355 tablen = PyBytes_GET_SIZE(tableobj);
1356 }
Travis E. Oliphantb99f7622007-08-18 11:21:56 +00001357 /* XXX -> Use the modern buffer interface */
Neal Norwitz6968b052007-02-27 19:02:19 +00001358 else if (PyObject_AsCharBuffer(tableobj, &table1, &tablen))
1359 return NULL;
1360
1361 if (tablen != 256) {
1362 PyErr_SetString(PyExc_ValueError,
1363 "translation table must be 256 characters long");
1364 return NULL;
1365 }
1366
1367 if (delobj != NULL) {
1368 if (PyBytes_Check(delobj)) {
1369 del_table = PyBytes_AS_STRING(delobj);
1370 dellen = PyBytes_GET_SIZE(delobj);
1371 }
Travis E. Oliphantb99f7622007-08-18 11:21:56 +00001372 /* XXX -> use the modern buffer interface */
Neal Norwitz6968b052007-02-27 19:02:19 +00001373 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
1374 return NULL;
1375 }
1376 else {
1377 del_table = NULL;
1378 dellen = 0;
1379 }
1380
1381 table = table1;
1382 inlen = PyBytes_GET_SIZE(input_obj);
1383 result = PyBytes_FromStringAndSize((char *)NULL, inlen);
1384 if (result == NULL)
1385 return NULL;
1386 output_start = output = PyBytes_AsString(result);
1387 input = PyBytes_AS_STRING(input_obj);
1388
1389 if (dellen == 0) {
1390 /* If no deletions are required, use faster code */
1391 for (i = inlen; --i >= 0; ) {
1392 c = Py_CHARMASK(*input++);
1393 if (Py_CHARMASK((*output++ = table[c])) != c)
1394 changed = 1;
1395 }
1396 if (changed || !PyBytes_CheckExact(input_obj))
1397 return result;
1398 Py_DECREF(result);
1399 Py_INCREF(input_obj);
1400 return input_obj;
1401 }
1402
1403 for (i = 0; i < 256; i++)
1404 trans_table[i] = Py_CHARMASK(table[i]);
1405
1406 for (i = 0; i < dellen; i++)
1407 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
1408
1409 for (i = inlen; --i >= 0; ) {
1410 c = Py_CHARMASK(*input++);
1411 if (trans_table[c] != -1)
1412 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
1413 continue;
1414 changed = 1;
1415 }
1416 if (!changed && PyBytes_CheckExact(input_obj)) {
1417 Py_DECREF(result);
1418 Py_INCREF(input_obj);
1419 return input_obj;
1420 }
1421 /* Fix the size of the resulting string */
1422 if (inlen > 0)
1423 PyBytes_Resize(result, output - output_start);
1424 return result;
1425}
1426
1427
1428#define FORWARD 1
1429#define REVERSE -1
1430
1431/* find and count characters and substrings */
1432
1433#define findchar(target, target_len, c) \
1434 ((char *)memchr((const void *)(target), c, target_len))
1435
1436/* Don't call if length < 2 */
1437#define Py_STRING_MATCH(target, offset, pattern, length) \
1438 (target[offset] == pattern[0] && \
1439 target[offset+length-1] == pattern[length-1] && \
1440 !memcmp(target+offset+1, pattern+1, length-2) )
1441
1442
1443/* Bytes ops must return a string. */
1444/* If the object is subclass of bytes, create a copy */
1445Py_LOCAL(PyBytesObject *)
1446return_self(PyBytesObject *self)
1447{
1448 if (PyBytes_CheckExact(self)) {
1449 Py_INCREF(self);
1450 return (PyBytesObject *)self;
1451 }
1452 return (PyBytesObject *)PyBytes_FromStringAndSize(
1453 PyBytes_AS_STRING(self),
1454 PyBytes_GET_SIZE(self));
1455}
1456
1457Py_LOCAL_INLINE(Py_ssize_t)
1458countchar(const char *target, int target_len, char c, Py_ssize_t maxcount)
1459{
1460 Py_ssize_t count=0;
1461 const char *start=target;
1462 const char *end=target+target_len;
1463
1464 while ( (start=findchar(start, end-start, c)) != NULL ) {
1465 count++;
1466 if (count >= maxcount)
1467 break;
1468 start += 1;
1469 }
1470 return count;
1471}
1472
1473Py_LOCAL(Py_ssize_t)
1474findstring(const char *target, Py_ssize_t target_len,
1475 const char *pattern, Py_ssize_t pattern_len,
1476 Py_ssize_t start,
1477 Py_ssize_t end,
1478 int direction)
1479{
1480 if (start < 0) {
1481 start += target_len;
1482 if (start < 0)
1483 start = 0;
1484 }
1485 if (end > target_len) {
1486 end = target_len;
1487 } else if (end < 0) {
1488 end += target_len;
1489 if (end < 0)
1490 end = 0;
1491 }
1492
1493 /* zero-length substrings always match at the first attempt */
1494 if (pattern_len == 0)
1495 return (direction > 0) ? start : end;
1496
1497 end -= pattern_len;
1498
1499 if (direction < 0) {
1500 for (; end >= start; end--)
1501 if (Py_STRING_MATCH(target, end, pattern, pattern_len))
1502 return end;
1503 } else {
1504 for (; start <= end; start++)
1505 if (Py_STRING_MATCH(target, start, pattern, pattern_len))
1506 return start;
1507 }
1508 return -1;
1509}
1510
1511Py_LOCAL_INLINE(Py_ssize_t)
1512countstring(const char *target, Py_ssize_t target_len,
1513 const char *pattern, Py_ssize_t pattern_len,
1514 Py_ssize_t start,
1515 Py_ssize_t end,
1516 int direction, Py_ssize_t maxcount)
1517{
1518 Py_ssize_t count=0;
1519
1520 if (start < 0) {
1521 start += target_len;
1522 if (start < 0)
1523 start = 0;
1524 }
1525 if (end > target_len) {
1526 end = target_len;
1527 } else if (end < 0) {
1528 end += target_len;
1529 if (end < 0)
1530 end = 0;
1531 }
1532
1533 /* zero-length substrings match everywhere */
1534 if (pattern_len == 0 || maxcount == 0) {
1535 if (target_len+1 < maxcount)
1536 return target_len+1;
1537 return maxcount;
1538 }
1539
1540 end -= pattern_len;
1541 if (direction < 0) {
1542 for (; (end >= start); end--)
1543 if (Py_STRING_MATCH(target, end, pattern, pattern_len)) {
1544 count++;
1545 if (--maxcount <= 0) break;
1546 end -= pattern_len-1;
1547 }
1548 } else {
1549 for (; (start <= end); start++)
1550 if (Py_STRING_MATCH(target, start, pattern, pattern_len)) {
1551 count++;
1552 if (--maxcount <= 0)
1553 break;
1554 start += pattern_len-1;
1555 }
1556 }
1557 return count;
1558}
1559
1560
1561/* Algorithms for different cases of string replacement */
1562
1563/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
1564Py_LOCAL(PyBytesObject *)
1565replace_interleave(PyBytesObject *self,
1566 const char *to_s, Py_ssize_t to_len,
1567 Py_ssize_t maxcount)
1568{
1569 char *self_s, *result_s;
1570 Py_ssize_t self_len, result_len;
1571 Py_ssize_t count, i, product;
1572 PyBytesObject *result;
1573
1574 self_len = PyBytes_GET_SIZE(self);
1575
1576 /* 1 at the end plus 1 after every character */
1577 count = self_len+1;
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +00001578 if (maxcount < count)
Neal Norwitz6968b052007-02-27 19:02:19 +00001579 count = maxcount;
1580
1581 /* Check for overflow */
1582 /* result_len = count * to_len + self_len; */
1583 product = count * to_len;
1584 if (product / to_len != count) {
1585 PyErr_SetString(PyExc_OverflowError,
1586 "replace string is too long");
1587 return NULL;
1588 }
1589 result_len = product + self_len;
1590 if (result_len < 0) {
1591 PyErr_SetString(PyExc_OverflowError,
1592 "replace string is too long");
1593 return NULL;
1594 }
1595
1596 if (! (result = (PyBytesObject *)
1597 PyBytes_FromStringAndSize(NULL, result_len)) )
1598 return NULL;
1599
1600 self_s = PyBytes_AS_STRING(self);
1601 result_s = PyBytes_AS_STRING(result);
1602
1603 /* TODO: special case single character, which doesn't need memcpy */
1604
1605 /* Lay the first one down (guaranteed this will occur) */
1606 Py_MEMCPY(result_s, to_s, to_len);
1607 result_s += to_len;
1608 count -= 1;
1609
1610 for (i=0; i<count; i++) {
1611 *result_s++ = *self_s++;
1612 Py_MEMCPY(result_s, to_s, to_len);
1613 result_s += to_len;
1614 }
1615
1616 /* Copy the rest of the original string */
1617 Py_MEMCPY(result_s, self_s, self_len-i);
1618
1619 return result;
1620}
1621
1622/* Special case for deleting a single character */
1623/* len(self)>=1, len(from)==1, to="", maxcount>=1 */
1624Py_LOCAL(PyBytesObject *)
1625replace_delete_single_character(PyBytesObject *self,
1626 char from_c, Py_ssize_t maxcount)
1627{
1628 char *self_s, *result_s;
1629 char *start, *next, *end;
1630 Py_ssize_t self_len, result_len;
1631 Py_ssize_t count;
1632 PyBytesObject *result;
1633
1634 self_len = PyBytes_GET_SIZE(self);
1635 self_s = PyBytes_AS_STRING(self);
1636
1637 count = countchar(self_s, self_len, from_c, maxcount);
1638 if (count == 0) {
1639 return return_self(self);
1640 }
1641
1642 result_len = self_len - count; /* from_len == 1 */
1643 assert(result_len>=0);
1644
1645 if ( (result = (PyBytesObject *)
1646 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
1647 return NULL;
1648 result_s = PyBytes_AS_STRING(result);
1649
1650 start = self_s;
1651 end = self_s + self_len;
1652 while (count-- > 0) {
1653 next = findchar(start, end-start, from_c);
1654 if (next == NULL)
1655 break;
1656 Py_MEMCPY(result_s, start, next-start);
1657 result_s += (next-start);
1658 start = next+1;
1659 }
1660 Py_MEMCPY(result_s, start, end-start);
1661
1662 return result;
1663}
1664
1665/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
1666
1667Py_LOCAL(PyBytesObject *)
1668replace_delete_substring(PyBytesObject *self,
1669 const char *from_s, Py_ssize_t from_len,
1670 Py_ssize_t maxcount)
1671{
1672 char *self_s, *result_s;
1673 char *start, *next, *end;
1674 Py_ssize_t self_len, result_len;
1675 Py_ssize_t count, offset;
1676 PyBytesObject *result;
1677
1678 self_len = PyBytes_GET_SIZE(self);
1679 self_s = PyBytes_AS_STRING(self);
1680
1681 count = countstring(self_s, self_len,
1682 from_s, from_len,
1683 0, self_len, 1,
1684 maxcount);
1685
1686 if (count == 0) {
1687 /* no matches */
1688 return return_self(self);
1689 }
1690
1691 result_len = self_len - (count * from_len);
1692 assert (result_len>=0);
1693
1694 if ( (result = (PyBytesObject *)
1695 PyBytes_FromStringAndSize(NULL, result_len)) == NULL )
1696 return NULL;
1697
1698 result_s = PyBytes_AS_STRING(result);
1699
1700 start = self_s;
1701 end = self_s + self_len;
1702 while (count-- > 0) {
1703 offset = findstring(start, end-start,
1704 from_s, from_len,
1705 0, end-start, FORWARD);
1706 if (offset == -1)
1707 break;
1708 next = start + offset;
1709
1710 Py_MEMCPY(result_s, start, next-start);
1711
1712 result_s += (next-start);
1713 start = next+from_len;
1714 }
1715 Py_MEMCPY(result_s, start, end-start);
1716 return result;
1717}
1718
1719/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
1720Py_LOCAL(PyBytesObject *)
1721replace_single_character_in_place(PyBytesObject *self,
1722 char from_c, char to_c,
1723 Py_ssize_t maxcount)
1724{
1725 char *self_s, *result_s, *start, *end, *next;
1726 Py_ssize_t self_len;
1727 PyBytesObject *result;
1728
1729 /* The result string will be the same size */
1730 self_s = PyBytes_AS_STRING(self);
1731 self_len = PyBytes_GET_SIZE(self);
1732
1733 next = findchar(self_s, self_len, from_c);
1734
1735 if (next == NULL) {
1736 /* No matches; return the original bytes */
1737 return return_self(self);
1738 }
1739
1740 /* Need to make a new bytes */
1741 result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, self_len);
1742 if (result == NULL)
1743 return NULL;
1744 result_s = PyBytes_AS_STRING(result);
1745 Py_MEMCPY(result_s, self_s, self_len);
1746
1747 /* change everything in-place, starting with this one */
1748 start = result_s + (next-self_s);
1749 *start = to_c;
1750 start++;
1751 end = result_s + self_len;
1752
1753 while (--maxcount > 0) {
1754 next = findchar(start, end-start, from_c);
1755 if (next == NULL)
1756 break;
1757 *next = to_c;
1758 start = next+1;
1759 }
1760
1761 return result;
1762}
1763
1764/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
1765Py_LOCAL(PyBytesObject *)
1766replace_substring_in_place(PyBytesObject *self,
1767 const char *from_s, Py_ssize_t from_len,
1768 const char *to_s, Py_ssize_t to_len,
1769 Py_ssize_t maxcount)
1770{
1771 char *result_s, *start, *end;
1772 char *self_s;
1773 Py_ssize_t self_len, offset;
1774 PyBytesObject *result;
1775
1776 /* The result bytes will be the same size */
1777
1778 self_s = PyBytes_AS_STRING(self);
1779 self_len = PyBytes_GET_SIZE(self);
1780
1781 offset = findstring(self_s, self_len,
1782 from_s, from_len,
1783 0, self_len, FORWARD);
1784 if (offset == -1) {
1785 /* No matches; return the original bytes */
1786 return return_self(self);
1787 }
1788
1789 /* Need to make a new bytes */
1790 result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, self_len);
1791 if (result == NULL)
1792 return NULL;
1793 result_s = PyBytes_AS_STRING(result);
1794 Py_MEMCPY(result_s, self_s, self_len);
1795
1796 /* change everything in-place, starting with this one */
1797 start = result_s + offset;
1798 Py_MEMCPY(start, to_s, from_len);
1799 start += from_len;
1800 end = result_s + self_len;
1801
1802 while ( --maxcount > 0) {
1803 offset = findstring(start, end-start,
1804 from_s, from_len,
1805 0, end-start, FORWARD);
1806 if (offset==-1)
1807 break;
1808 Py_MEMCPY(start+offset, to_s, from_len);
1809 start += offset+from_len;
1810 }
1811
1812 return result;
1813}
1814
1815/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
1816Py_LOCAL(PyBytesObject *)
1817replace_single_character(PyBytesObject *self,
1818 char from_c,
1819 const char *to_s, Py_ssize_t to_len,
1820 Py_ssize_t maxcount)
1821{
1822 char *self_s, *result_s;
1823 char *start, *next, *end;
1824 Py_ssize_t self_len, result_len;
1825 Py_ssize_t count, product;
1826 PyBytesObject *result;
1827
1828 self_s = PyBytes_AS_STRING(self);
1829 self_len = PyBytes_GET_SIZE(self);
1830
1831 count = countchar(self_s, self_len, from_c, maxcount);
1832 if (count == 0) {
1833 /* no matches, return unchanged */
1834 return return_self(self);
1835 }
1836
1837 /* use the difference between current and new, hence the "-1" */
1838 /* result_len = self_len + count * (to_len-1) */
1839 product = count * (to_len-1);
1840 if (product / (to_len-1) != count) {
1841 PyErr_SetString(PyExc_OverflowError, "replace bytes is too long");
1842 return NULL;
1843 }
1844 result_len = self_len + product;
1845 if (result_len < 0) {
1846 PyErr_SetString(PyExc_OverflowError, "replace bytes is too long");
1847 return NULL;
1848 }
1849
1850 if ( (result = (PyBytesObject *)
1851 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
1852 return NULL;
1853 result_s = PyBytes_AS_STRING(result);
1854
1855 start = self_s;
1856 end = self_s + self_len;
1857 while (count-- > 0) {
1858 next = findchar(start, end-start, from_c);
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +00001859 if (next == NULL)
Neal Norwitz6968b052007-02-27 19:02:19 +00001860 break;
1861
1862 if (next == start) {
1863 /* replace with the 'to' */
1864 Py_MEMCPY(result_s, to_s, to_len);
1865 result_s += to_len;
1866 start += 1;
1867 } else {
1868 /* copy the unchanged old then the 'to' */
1869 Py_MEMCPY(result_s, start, next-start);
1870 result_s += (next-start);
1871 Py_MEMCPY(result_s, to_s, to_len);
1872 result_s += to_len;
1873 start = next+1;
1874 }
1875 }
1876 /* Copy the remainder of the remaining bytes */
1877 Py_MEMCPY(result_s, start, end-start);
1878
1879 return result;
1880}
1881
1882/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
1883Py_LOCAL(PyBytesObject *)
1884replace_substring(PyBytesObject *self,
1885 const char *from_s, Py_ssize_t from_len,
1886 const char *to_s, Py_ssize_t to_len,
1887 Py_ssize_t maxcount)
1888{
1889 char *self_s, *result_s;
1890 char *start, *next, *end;
1891 Py_ssize_t self_len, result_len;
1892 Py_ssize_t count, offset, product;
1893 PyBytesObject *result;
1894
1895 self_s = PyBytes_AS_STRING(self);
1896 self_len = PyBytes_GET_SIZE(self);
1897
1898 count = countstring(self_s, self_len,
1899 from_s, from_len,
1900 0, self_len, FORWARD, maxcount);
1901 if (count == 0) {
1902 /* no matches, return unchanged */
1903 return return_self(self);
1904 }
1905
1906 /* Check for overflow */
1907 /* result_len = self_len + count * (to_len-from_len) */
1908 product = count * (to_len-from_len);
1909 if (product / (to_len-from_len) != count) {
1910 PyErr_SetString(PyExc_OverflowError, "replace bytes is too long");
1911 return NULL;
1912 }
1913 result_len = self_len + product;
1914 if (result_len < 0) {
1915 PyErr_SetString(PyExc_OverflowError, "replace bytes is too long");
1916 return NULL;
1917 }
1918
1919 if ( (result = (PyBytesObject *)
1920 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
1921 return NULL;
1922 result_s = PyBytes_AS_STRING(result);
1923
1924 start = self_s;
1925 end = self_s + self_len;
1926 while (count-- > 0) {
1927 offset = findstring(start, end-start,
1928 from_s, from_len,
1929 0, end-start, FORWARD);
1930 if (offset == -1)
1931 break;
1932 next = start+offset;
1933 if (next == start) {
1934 /* replace with the 'to' */
1935 Py_MEMCPY(result_s, to_s, to_len);
1936 result_s += to_len;
1937 start += from_len;
1938 } else {
1939 /* copy the unchanged old then the 'to' */
1940 Py_MEMCPY(result_s, start, next-start);
1941 result_s += (next-start);
1942 Py_MEMCPY(result_s, to_s, to_len);
1943 result_s += to_len;
1944 start = next+from_len;
1945 }
1946 }
1947 /* Copy the remainder of the remaining bytes */
1948 Py_MEMCPY(result_s, start, end-start);
1949
1950 return result;
1951}
1952
1953
1954Py_LOCAL(PyBytesObject *)
1955replace(PyBytesObject *self,
1956 const char *from_s, Py_ssize_t from_len,
1957 const char *to_s, Py_ssize_t to_len,
1958 Py_ssize_t maxcount)
1959{
1960 if (maxcount < 0) {
1961 maxcount = PY_SSIZE_T_MAX;
1962 } else if (maxcount == 0 || PyBytes_GET_SIZE(self) == 0) {
1963 /* nothing to do; return the original bytes */
1964 return return_self(self);
1965 }
1966
1967 if (maxcount == 0 ||
1968 (from_len == 0 && to_len == 0)) {
1969 /* nothing to do; return the original bytes */
1970 return return_self(self);
1971 }
1972
1973 /* Handle zero-length special cases */
1974
1975 if (from_len == 0) {
1976 /* insert the 'to' bytes everywhere. */
1977 /* >>> "Python".replace("", ".") */
1978 /* '.P.y.t.h.o.n.' */
1979 return replace_interleave(self, to_s, to_len, maxcount);
1980 }
1981
1982 /* Except for "".replace("", "A") == "A" there is no way beyond this */
1983 /* point for an empty self bytes to generate a non-empty bytes */
1984 /* Special case so the remaining code always gets a non-empty bytes */
1985 if (PyBytes_GET_SIZE(self) == 0) {
1986 return return_self(self);
1987 }
1988
1989 if (to_len == 0) {
1990 /* delete all occurances of 'from' bytes */
1991 if (from_len == 1) {
1992 return replace_delete_single_character(
1993 self, from_s[0], maxcount);
1994 } else {
1995 return replace_delete_substring(self, from_s, from_len, maxcount);
1996 }
1997 }
1998
1999 /* Handle special case where both bytes have the same length */
2000
2001 if (from_len == to_len) {
2002 if (from_len == 1) {
2003 return replace_single_character_in_place(
2004 self,
2005 from_s[0],
2006 to_s[0],
2007 maxcount);
2008 } else {
2009 return replace_substring_in_place(
2010 self, from_s, from_len, to_s, to_len, maxcount);
2011 }
2012 }
2013
2014 /* Otherwise use the more generic algorithms */
2015 if (from_len == 1) {
2016 return replace_single_character(self, from_s[0],
2017 to_s, to_len, maxcount);
2018 } else {
2019 /* len('from')>=2, len('to')>=1 */
2020 return replace_substring(self, from_s, from_len, to_s, to_len, maxcount);
2021 }
2022}
2023
2024PyDoc_STRVAR(replace__doc__,
2025"B.replace (old, new[, count]) -> bytes\n\
2026\n\
2027Return a copy of bytes B with all occurrences of subsection\n\
2028old replaced by new. If the optional argument count is\n\
2029given, only the first count occurrences are replaced.");
2030
2031static PyObject *
2032bytes_replace(PyBytesObject *self, PyObject *args)
2033{
2034 Py_ssize_t count = -1;
Travis E. Oliphantb99f7622007-08-18 11:21:56 +00002035 PyObject *from, *to, *res;
Travis E. Oliphantb99f7622007-08-18 11:21:56 +00002036 PyBuffer vfrom, vto;
Neal Norwitz6968b052007-02-27 19:02:19 +00002037
2038 if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
2039 return NULL;
2040
Guido van Rossuma74184e2007-08-29 04:05:57 +00002041 if (_getbuffer(from, &vfrom) < 0)
2042 return NULL;
2043 if (_getbuffer(to, &vto) < 0) {
2044 PyObject_ReleaseBuffer(from, &vfrom);
2045 return NULL;
Travis E. Oliphantb99f7622007-08-18 11:21:56 +00002046 }
Neal Norwitz6968b052007-02-27 19:02:19 +00002047
Travis E. Oliphantb99f7622007-08-18 11:21:56 +00002048 res = (PyObject *)replace((PyBytesObject *) self,
Guido van Rossuma74184e2007-08-29 04:05:57 +00002049 vfrom.buf, vfrom.len,
2050 vto.buf, vto.len, count);
Travis E. Oliphantb99f7622007-08-18 11:21:56 +00002051
Guido van Rossuma74184e2007-08-29 04:05:57 +00002052 PyObject_ReleaseBuffer(from, &vfrom);
2053 PyObject_ReleaseBuffer(to, &vto);
Travis E. Oliphantb99f7622007-08-18 11:21:56 +00002054 return res;
Neal Norwitz6968b052007-02-27 19:02:19 +00002055}
2056
2057
2058/* Overallocate the initial list to reduce the number of reallocs for small
2059 split sizes. Eg, "A A A A A A A A A A".split() (10 elements) has three
2060 resizes, to sizes 4, 8, then 16. Most observed string splits are for human
2061 text (roughly 11 words per line) and field delimited data (usually 1-10
2062 fields). For large strings the split algorithms are bandwidth limited
2063 so increasing the preallocation likely will not improve things.*/
2064
2065#define MAX_PREALLOC 12
2066
2067/* 5 splits gives 6 elements */
2068#define PREALLOC_SIZE(maxsplit) \
2069 (maxsplit >= MAX_PREALLOC ? MAX_PREALLOC : maxsplit+1)
2070
2071#define SPLIT_APPEND(data, left, right) \
2072 str = PyBytes_FromStringAndSize((data) + (left), \
2073 (right) - (left)); \
2074 if (str == NULL) \
2075 goto onError; \
2076 if (PyList_Append(list, str)) { \
2077 Py_DECREF(str); \
2078 goto onError; \
2079 } \
2080 else \
2081 Py_DECREF(str);
2082
2083#define SPLIT_ADD(data, left, right) { \
2084 str = PyBytes_FromStringAndSize((data) + (left), \
2085 (right) - (left)); \
2086 if (str == NULL) \
2087 goto onError; \
2088 if (count < MAX_PREALLOC) { \
2089 PyList_SET_ITEM(list, count, str); \
2090 } else { \
2091 if (PyList_Append(list, str)) { \
2092 Py_DECREF(str); \
2093 goto onError; \
2094 } \
2095 else \
2096 Py_DECREF(str); \
2097 } \
2098 count++; }
2099
2100/* Always force the list to the expected size. */
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002101#define FIX_PREALLOC_SIZE(list) Py_Size(list) = count
Neal Norwitz6968b052007-02-27 19:02:19 +00002102
2103
2104Py_LOCAL_INLINE(PyObject *)
2105split_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount)
2106{
2107 register Py_ssize_t i, j, count=0;
2108 PyObject *str;
2109 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
2110
2111 if (list == NULL)
2112 return NULL;
2113
2114 i = j = 0;
2115 while ((j < len) && (maxcount-- > 0)) {
2116 for(; j<len; j++) {
2117 /* I found that using memchr makes no difference */
2118 if (s[j] == ch) {
2119 SPLIT_ADD(s, i, j);
2120 i = j = j + 1;
2121 break;
2122 }
2123 }
2124 }
2125 if (i <= len) {
2126 SPLIT_ADD(s, i, len);
2127 }
2128 FIX_PREALLOC_SIZE(list);
2129 return list;
2130
2131 onError:
2132 Py_DECREF(list);
2133 return NULL;
2134}
2135
2136PyDoc_STRVAR(split__doc__,
2137"B.split(sep [,maxsplit]) -> list of bytes\n\
2138\n\
2139Return a list of the bytes in the string B, using sep as the\n\
2140delimiter. If maxsplit is given, at most maxsplit\n\
2141splits are done.");
2142
2143static PyObject *
2144bytes_split(PyBytesObject *self, PyObject *args)
2145{
2146 Py_ssize_t len = PyBytes_GET_SIZE(self), n, i, j;
2147 Py_ssize_t maxsplit = -1, count=0;
2148 const char *s = PyBytes_AS_STRING(self), *sub;
2149 PyObject *list, *str, *subobj;
2150#ifdef USE_FAST
2151 Py_ssize_t pos;
2152#endif
2153
2154 if (!PyArg_ParseTuple(args, "O|n:split", &subobj, &maxsplit))
2155 return NULL;
2156 if (maxsplit < 0)
2157 maxsplit = PY_SSIZE_T_MAX;
2158 if (PyBytes_Check(subobj)) {
2159 sub = PyBytes_AS_STRING(subobj);
2160 n = PyBytes_GET_SIZE(subobj);
2161 }
Travis E. Oliphantb99f7622007-08-18 11:21:56 +00002162 /* XXX -> use the modern buffer interface */
Neal Norwitz6968b052007-02-27 19:02:19 +00002163 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
2164 return NULL;
2165
2166 if (n == 0) {
2167 PyErr_SetString(PyExc_ValueError, "empty separator");
2168 return NULL;
2169 }
2170 else if (n == 1)
2171 return split_char(s, len, sub[0], maxsplit);
2172
2173 list = PyList_New(PREALLOC_SIZE(maxsplit));
2174 if (list == NULL)
2175 return NULL;
2176
2177#ifdef USE_FAST
2178 i = j = 0;
2179 while (maxsplit-- > 0) {
2180 pos = fastsearch(s+i, len-i, sub, n, FAST_SEARCH);
2181 if (pos < 0)
2182 break;
2183 j = i+pos;
2184 SPLIT_ADD(s, i, j);
2185 i = j + n;
2186 }
2187#else
2188 i = j = 0;
2189 while ((j+n <= len) && (maxsplit-- > 0)) {
2190 for (; j+n <= len; j++) {
2191 if (Py_STRING_MATCH(s, j, sub, n)) {
2192 SPLIT_ADD(s, i, j);
2193 i = j = j + n;
2194 break;
2195 }
2196 }
2197 }
2198#endif
2199 SPLIT_ADD(s, i, len);
2200 FIX_PREALLOC_SIZE(list);
2201 return list;
2202
2203 onError:
2204 Py_DECREF(list);
2205 return NULL;
2206}
2207
2208PyDoc_STRVAR(partition__doc__,
2209"B.partition(sep) -> (head, sep, tail)\n\
2210\n\
2211Searches for the separator sep in B, and returns the part before it,\n\
2212the separator itself, and the part after it. If the separator is not\n\
2213found, returns B and two empty bytes.");
2214
2215static PyObject *
2216bytes_partition(PyBytesObject *self, PyObject *sep_obj)
2217{
2218 PyObject *bytesep, *result;
2219
2220 bytesep = PyBytes_FromObject(sep_obj);
2221 if (! bytesep)
2222 return NULL;
2223
2224 result = stringlib_partition(
2225 (PyObject*) self,
2226 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +00002227 bytesep,
Neal Norwitz6968b052007-02-27 19:02:19 +00002228 PyBytes_AS_STRING(bytesep), PyBytes_GET_SIZE(bytesep)
2229 );
2230
2231 Py_DECREF(bytesep);
2232 return result;
2233}
2234
2235PyDoc_STRVAR(rpartition__doc__,
2236"B.rpartition(sep) -> (tail, sep, head)\n\
2237\n\
2238Searches for the separator sep in B, starting at the end of B, and returns\n\
2239the part before it, the separator itself, and the part after it. If the\n\
2240separator is not found, returns two empty bytes and B.");
2241
2242static PyObject *
2243bytes_rpartition(PyBytesObject *self, PyObject *sep_obj)
2244{
2245 PyObject *bytesep, *result;
2246
2247 bytesep = PyBytes_FromObject(sep_obj);
2248 if (! bytesep)
2249 return NULL;
2250
2251 result = stringlib_rpartition(
2252 (PyObject*) self,
2253 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +00002254 bytesep,
Neal Norwitz6968b052007-02-27 19:02:19 +00002255 PyBytes_AS_STRING(bytesep), PyBytes_GET_SIZE(bytesep)
2256 );
2257
2258 Py_DECREF(bytesep);
2259 return result;
2260}
2261
2262Py_LOCAL_INLINE(PyObject *)
2263rsplit_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount)
2264{
2265 register Py_ssize_t i, j, count=0;
2266 PyObject *str;
2267 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
2268
2269 if (list == NULL)
2270 return NULL;
2271
2272 i = j = len - 1;
2273 while ((i >= 0) && (maxcount-- > 0)) {
2274 for (; i >= 0; i--) {
2275 if (s[i] == ch) {
2276 SPLIT_ADD(s, i + 1, j + 1);
2277 j = i = i - 1;
2278 break;
2279 }
2280 }
2281 }
2282 if (j >= -1) {
2283 SPLIT_ADD(s, 0, j + 1);
2284 }
2285 FIX_PREALLOC_SIZE(list);
2286 if (PyList_Reverse(list) < 0)
2287 goto onError;
2288
2289 return list;
2290
2291 onError:
2292 Py_DECREF(list);
2293 return NULL;
2294}
2295
2296PyDoc_STRVAR(rsplit__doc__,
2297"B.rsplit(sep [,maxsplit]) -> list of bytes\n\
2298\n\
2299Return a list of the sections in the byte B, using sep as the\n\
2300delimiter, starting at the end of the bytes and working\n\
2301to the front. If maxsplit is given, at most maxsplit splits are\n\
2302done.");
2303
2304static PyObject *
2305bytes_rsplit(PyBytesObject *self, PyObject *args)
2306{
2307 Py_ssize_t len = PyBytes_GET_SIZE(self), n, i, j;
2308 Py_ssize_t maxsplit = -1, count=0;
2309 const char *s = PyBytes_AS_STRING(self), *sub;
2310 PyObject *list, *str, *subobj;
2311
2312 if (!PyArg_ParseTuple(args, "O|n:rsplit", &subobj, &maxsplit))
2313 return NULL;
2314 if (maxsplit < 0)
2315 maxsplit = PY_SSIZE_T_MAX;
2316 if (PyBytes_Check(subobj)) {
2317 sub = PyBytes_AS_STRING(subobj);
2318 n = PyBytes_GET_SIZE(subobj);
2319 }
Travis E. Oliphantb99f7622007-08-18 11:21:56 +00002320 /* XXX -> Use the modern buffer interface */
Neal Norwitz6968b052007-02-27 19:02:19 +00002321 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
2322 return NULL;
2323
2324 if (n == 0) {
2325 PyErr_SetString(PyExc_ValueError, "empty separator");
2326 return NULL;
2327 }
2328 else if (n == 1)
2329 return rsplit_char(s, len, sub[0], maxsplit);
2330
2331 list = PyList_New(PREALLOC_SIZE(maxsplit));
2332 if (list == NULL)
2333 return NULL;
2334
2335 j = len;
2336 i = j - n;
2337
2338 while ( (i >= 0) && (maxsplit-- > 0) ) {
2339 for (; i>=0; i--) {
2340 if (Py_STRING_MATCH(s, i, sub, n)) {
2341 SPLIT_ADD(s, i + n, j);
2342 j = i;
2343 i -= n;
2344 break;
2345 }
2346 }
2347 }
2348 SPLIT_ADD(s, 0, j);
2349 FIX_PREALLOC_SIZE(list);
2350 if (PyList_Reverse(list) < 0)
2351 goto onError;
2352 return list;
2353
2354onError:
2355 Py_DECREF(list);
2356 return NULL;
2357}
2358
2359PyDoc_STRVAR(extend__doc__,
2360"B.extend(iterable int) -> None\n\
2361\n\
2362Append all the elements from the iterator or sequence to the\n\
2363end of the bytes.");
2364static PyObject *
2365bytes_extend(PyBytesObject *self, PyObject *arg)
2366{
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002367 if (bytes_setslice(self, Py_Size(self), Py_Size(self), arg) == -1)
Neal Norwitz6968b052007-02-27 19:02:19 +00002368 return NULL;
2369 Py_RETURN_NONE;
2370}
2371
2372
2373PyDoc_STRVAR(reverse__doc__,
2374"B.reverse() -> None\n\
2375\n\
2376Reverse the order of the values in bytes in place.");
2377static PyObject *
2378bytes_reverse(PyBytesObject *self, PyObject *unused)
2379{
2380 char swap, *head, *tail;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002381 Py_ssize_t i, j, n = Py_Size(self);
Neal Norwitz6968b052007-02-27 19:02:19 +00002382
2383 j = n / 2;
2384 head = self->ob_bytes;
2385 tail = head + n - 1;
2386 for (i = 0; i < j; i++) {
2387 swap = *head;
2388 *head++ = *tail;
2389 *tail-- = swap;
2390 }
2391
2392 Py_RETURN_NONE;
2393}
2394
2395PyDoc_STRVAR(insert__doc__,
2396"B.insert(index, int) -> None\n\
2397\n\
2398Insert a single item into the bytes before the given index.");
2399static PyObject *
2400bytes_insert(PyBytesObject *self, PyObject *args)
2401{
2402 int value;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002403 Py_ssize_t where, n = Py_Size(self);
Neal Norwitz6968b052007-02-27 19:02:19 +00002404
2405 if (!PyArg_ParseTuple(args, "ni:insert", &where, &value))
2406 return NULL;
2407
2408 if (n == PY_SSIZE_T_MAX) {
2409 PyErr_SetString(PyExc_OverflowError,
2410 "cannot add more objects to bytes");
2411 return NULL;
2412 }
2413 if (value < 0 || value >= 256) {
2414 PyErr_SetString(PyExc_ValueError,
2415 "byte must be in range(0, 256)");
2416 return NULL;
2417 }
2418 if (PyBytes_Resize((PyObject *)self, n + 1) < 0)
2419 return NULL;
2420
2421 if (where < 0) {
2422 where += n;
2423 if (where < 0)
2424 where = 0;
2425 }
2426 if (where > n)
2427 where = n;
Guido van Rossum4fc8ae42007-02-27 20:57:45 +00002428 memmove(self->ob_bytes + where + 1, self->ob_bytes + where, n - where);
Neal Norwitz6968b052007-02-27 19:02:19 +00002429 self->ob_bytes[where] = value;
2430
2431 Py_RETURN_NONE;
2432}
2433
2434PyDoc_STRVAR(append__doc__,
2435"B.append(int) -> None\n\
2436\n\
2437Append a single item to the end of the bytes.");
2438static PyObject *
2439bytes_append(PyBytesObject *self, PyObject *arg)
2440{
2441 int value;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002442 Py_ssize_t n = Py_Size(self);
Neal Norwitz6968b052007-02-27 19:02:19 +00002443
2444 if (! _getbytevalue(arg, &value))
2445 return NULL;
2446 if (n == PY_SSIZE_T_MAX) {
2447 PyErr_SetString(PyExc_OverflowError,
2448 "cannot add more objects to bytes");
2449 return NULL;
2450 }
2451 if (PyBytes_Resize((PyObject *)self, n + 1) < 0)
2452 return NULL;
2453
2454 self->ob_bytes[n] = value;
2455
2456 Py_RETURN_NONE;
2457}
2458
2459PyDoc_STRVAR(pop__doc__,
2460"B.pop([index]) -> int\n\
2461\n\
2462Remove and return a single item from the bytes. If no index\n\
2463argument is give, will pop the last value.");
2464static PyObject *
2465bytes_pop(PyBytesObject *self, PyObject *args)
2466{
2467 int value;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002468 Py_ssize_t where = -1, n = Py_Size(self);
Neal Norwitz6968b052007-02-27 19:02:19 +00002469
2470 if (!PyArg_ParseTuple(args, "|n:pop", &where))
2471 return NULL;
2472
2473 if (n == 0) {
2474 PyErr_SetString(PyExc_OverflowError,
2475 "cannot pop an empty bytes");
2476 return NULL;
2477 }
2478 if (where < 0)
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002479 where += Py_Size(self);
2480 if (where < 0 || where >= Py_Size(self)) {
Neal Norwitz6968b052007-02-27 19:02:19 +00002481 PyErr_SetString(PyExc_IndexError, "pop index out of range");
2482 return NULL;
2483 }
2484
2485 value = self->ob_bytes[where];
2486 memmove(self->ob_bytes + where, self->ob_bytes + where + 1, n - where);
2487 if (PyBytes_Resize((PyObject *)self, n - 1) < 0)
2488 return NULL;
2489
2490 return PyInt_FromLong(value);
2491}
2492
2493PyDoc_STRVAR(remove__doc__,
2494"B.remove(int) -> None\n\
2495\n\
2496Remove the first occurance of a value in bytes");
2497static PyObject *
2498bytes_remove(PyBytesObject *self, PyObject *arg)
2499{
2500 int value;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002501 Py_ssize_t where, n = Py_Size(self);
Neal Norwitz6968b052007-02-27 19:02:19 +00002502
2503 if (! _getbytevalue(arg, &value))
2504 return NULL;
2505
2506 for (where = 0; where < n; where++) {
2507 if (self->ob_bytes[where] == value)
2508 break;
2509 }
2510 if (where == n) {
2511 PyErr_SetString(PyExc_ValueError, "value not found in bytes");
2512 return NULL;
2513 }
2514
2515 memmove(self->ob_bytes + where, self->ob_bytes + where + 1, n - where);
2516 if (PyBytes_Resize((PyObject *)self, n - 1) < 0)
2517 return NULL;
2518
2519 Py_RETURN_NONE;
2520}
2521
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002522/* XXX These two helpers could be optimized if argsize == 1 */
2523
Neal Norwitz2bad9702007-08-27 06:19:22 +00002524static Py_ssize_t
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002525lstrip_helper(unsigned char *myptr, Py_ssize_t mysize,
2526 void *argptr, Py_ssize_t argsize)
2527{
2528 Py_ssize_t i = 0;
2529 while (i < mysize && memchr(argptr, myptr[i], argsize))
2530 i++;
2531 return i;
2532}
2533
Neal Norwitz2bad9702007-08-27 06:19:22 +00002534static Py_ssize_t
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002535rstrip_helper(unsigned char *myptr, Py_ssize_t mysize,
2536 void *argptr, Py_ssize_t argsize)
2537{
2538 Py_ssize_t i = mysize - 1;
2539 while (i >= 0 && memchr(argptr, myptr[i], argsize))
2540 i--;
2541 return i + 1;
2542}
2543
2544PyDoc_STRVAR(strip__doc__,
2545"B.strip(bytes) -> bytes\n\
2546\n\
2547Strip leading and trailing bytes contained in the argument.");
2548static PyObject *
2549bytes_strip(PyBytesObject *self, PyObject *arg)
2550{
2551 Py_ssize_t left, right, mysize, argsize;
2552 void *myptr, *argptr;
2553 if (arg == NULL || !PyBytes_Check(arg)) {
2554 PyErr_SetString(PyExc_TypeError, "strip() requires a bytes argument");
2555 return NULL;
2556 }
2557 myptr = self->ob_bytes;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002558 mysize = Py_Size(self);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002559 argptr = ((PyBytesObject *)arg)->ob_bytes;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002560 argsize = Py_Size(arg);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002561 left = lstrip_helper(myptr, mysize, argptr, argsize);
Guido van Rossumeb29e9a2007-08-08 21:55:33 +00002562 if (left == mysize)
2563 right = left;
2564 else
2565 right = rstrip_helper(myptr, mysize, argptr, argsize);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002566 return PyBytes_FromStringAndSize(self->ob_bytes + left, right - left);
2567}
2568
2569PyDoc_STRVAR(lstrip__doc__,
2570"B.lstrip(bytes) -> bytes\n\
2571\n\
2572Strip leading bytes contained in the argument.");
2573static PyObject *
2574bytes_lstrip(PyBytesObject *self, PyObject *arg)
2575{
2576 Py_ssize_t left, right, mysize, argsize;
2577 void *myptr, *argptr;
2578 if (arg == NULL || !PyBytes_Check(arg)) {
2579 PyErr_SetString(PyExc_TypeError, "strip() requires a bytes argument");
2580 return NULL;
2581 }
2582 myptr = self->ob_bytes;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002583 mysize = Py_Size(self);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002584 argptr = ((PyBytesObject *)arg)->ob_bytes;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002585 argsize = Py_Size(arg);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002586 left = lstrip_helper(myptr, mysize, argptr, argsize);
2587 right = mysize;
2588 return PyBytes_FromStringAndSize(self->ob_bytes + left, right - left);
2589}
2590
2591PyDoc_STRVAR(rstrip__doc__,
2592"B.rstrip(bytes) -> bytes\n\
2593\n\
2594Strip trailing bytes contained in the argument.");
2595static PyObject *
2596bytes_rstrip(PyBytesObject *self, PyObject *arg)
2597{
2598 Py_ssize_t left, right, mysize, argsize;
2599 void *myptr, *argptr;
2600 if (arg == NULL || !PyBytes_Check(arg)) {
2601 PyErr_SetString(PyExc_TypeError, "strip() requires a bytes argument");
2602 return NULL;
2603 }
2604 myptr = self->ob_bytes;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002605 mysize = Py_Size(self);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002606 argptr = ((PyBytesObject *)arg)->ob_bytes;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002607 argsize = Py_Size(arg);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002608 left = 0;
2609 right = rstrip_helper(myptr, mysize, argptr, argsize);
2610 return PyBytes_FromStringAndSize(self->ob_bytes + left, right - left);
2611}
Neal Norwitz6968b052007-02-27 19:02:19 +00002612
Guido van Rossumd624f182006-04-24 13:47:05 +00002613PyDoc_STRVAR(decode_doc,
2614"B.decode([encoding[,errors]]) -> unicode obect.\n\
2615\n\
2616Decodes B using the codec registered for encoding. encoding defaults\n\
2617to the default encoding. errors may be given to set a different error\n\
2618handling scheme. Default is 'strict' meaning that encoding errors raise\n\
2619a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
2620as well as any other name registerd with codecs.register_error that is\n\
2621able to handle UnicodeDecodeErrors.");
2622
2623static PyObject *
2624bytes_decode(PyObject *self, PyObject *args)
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +00002625{
Guido van Rossumd624f182006-04-24 13:47:05 +00002626 const char *encoding = NULL;
2627 const char *errors = NULL;
2628
2629 if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
2630 return NULL;
2631 if (encoding == NULL)
2632 encoding = PyUnicode_GetDefaultEncoding();
2633 return PyCodec_Decode(self, encoding, errors);
2634}
2635
Guido van Rossuma0867f72006-05-05 04:34:18 +00002636PyDoc_STRVAR(alloc_doc,
2637"B.__alloc__() -> int\n\
2638\n\
2639Returns the number of bytes actually allocated.");
2640
2641static PyObject *
2642bytes_alloc(PyBytesObject *self)
2643{
2644 return PyInt_FromSsize_t(self->ob_alloc);
2645}
2646
Guido van Rossum20188312006-05-05 15:15:40 +00002647PyDoc_STRVAR(join_doc,
Guido van Rossumcd6ae682007-05-09 19:52:16 +00002648"B.join(iterable_of_bytes) -> bytes\n\
Guido van Rossum20188312006-05-05 15:15:40 +00002649\n\
Guido van Rossumcd6ae682007-05-09 19:52:16 +00002650Concatenates any number of bytes objects, with B in between each pair.\n\
2651Example: b'.'.join([b'ab', b'pq', b'rs']) -> b'ab.pq.rs'.");
Guido van Rossum20188312006-05-05 15:15:40 +00002652
2653static PyObject *
Guido van Rossumcd6ae682007-05-09 19:52:16 +00002654bytes_join(PyBytesObject *self, PyObject *it)
Guido van Rossum20188312006-05-05 15:15:40 +00002655{
2656 PyObject *seq;
Martin v. Löwis5d7428b2007-07-21 18:47:48 +00002657 Py_ssize_t mysize = Py_Size(self);
Guido van Rossum20188312006-05-05 15:15:40 +00002658 Py_ssize_t i;
2659 Py_ssize_t n;
2660 PyObject **items;
2661 Py_ssize_t totalsize = 0;
2662 PyObject *result;
2663 char *dest;
2664
2665 seq = PySequence_Fast(it, "can only join an iterable");
2666 if (seq == NULL)
Georg Brandlb3f568f2007-02-27 08:49:18 +00002667 return NULL;
Guido van Rossum20188312006-05-05 15:15:40 +00002668 n = PySequence_Fast_GET_SIZE(seq);
2669 items = PySequence_Fast_ITEMS(seq);
2670
2671 /* Compute the total size, and check that they are all bytes */
2672 for (i = 0; i < n; i++) {
Georg Brandlb3f568f2007-02-27 08:49:18 +00002673 PyObject *obj = items[i];
2674 if (!PyBytes_Check(obj)) {
2675 PyErr_Format(PyExc_TypeError,
2676 "can only join an iterable of bytes "
2677 "(item %ld has type '%.100s')",
Guido van Rossum3cf5b1e2006-07-27 21:53:35 +00002678 /* XXX %ld isn't right on Win64 */
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002679 (long)i, Py_Type(obj)->tp_name);
Georg Brandlb3f568f2007-02-27 08:49:18 +00002680 goto error;
2681 }
Guido van Rossumcd6ae682007-05-09 19:52:16 +00002682 if (i > 0)
2683 totalsize += mysize;
Georg Brandlb3f568f2007-02-27 08:49:18 +00002684 totalsize += PyBytes_GET_SIZE(obj);
2685 if (totalsize < 0) {
2686 PyErr_NoMemory();
2687 goto error;
2688 }
Guido van Rossum20188312006-05-05 15:15:40 +00002689 }
2690
2691 /* Allocate the result, and copy the bytes */
2692 result = PyBytes_FromStringAndSize(NULL, totalsize);
2693 if (result == NULL)
Georg Brandlb3f568f2007-02-27 08:49:18 +00002694 goto error;
Guido van Rossum20188312006-05-05 15:15:40 +00002695 dest = PyBytes_AS_STRING(result);
2696 for (i = 0; i < n; i++) {
Georg Brandlb3f568f2007-02-27 08:49:18 +00002697 PyObject *obj = items[i];
2698 Py_ssize_t size = PyBytes_GET_SIZE(obj);
Guido van Rossumcd6ae682007-05-09 19:52:16 +00002699 if (i > 0) {
2700 memcpy(dest, self->ob_bytes, mysize);
2701 dest += mysize;
2702 }
Georg Brandlb3f568f2007-02-27 08:49:18 +00002703 memcpy(dest, PyBytes_AS_STRING(obj), size);
2704 dest += size;
Guido van Rossum20188312006-05-05 15:15:40 +00002705 }
2706
2707 /* Done */
2708 Py_DECREF(seq);
2709 return result;
2710
2711 /* Error handling */
2712 error:
2713 Py_DECREF(seq);
2714 return NULL;
2715}
2716
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002717PyDoc_STRVAR(fromhex_doc,
2718"bytes.fromhex(string) -> bytes\n\
2719\n\
2720Create a bytes object from a string of hexadecimal numbers.\n\
2721Spaces between two numbers are accepted. Example:\n\
2722bytes.fromhex('10 2030') -> bytes([0x10, 0x20, 0x30]).");
2723
2724static int
2725hex_digit_to_int(int c)
2726{
Georg Brandlb3f568f2007-02-27 08:49:18 +00002727 if (isdigit(c))
2728 return c - '0';
2729 else {
2730 if (isupper(c))
2731 c = tolower(c);
2732 if (c >= 'a' && c <= 'f')
2733 return c - 'a' + 10;
2734 }
2735 return -1;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002736}
2737
2738static PyObject *
2739bytes_fromhex(PyObject *cls, PyObject *args)
2740{
2741 PyObject *newbytes;
2742 char *hex, *buf;
2743 Py_ssize_t len, byteslen, i, j;
2744 int top, bot;
2745
2746 if (!PyArg_ParseTuple(args, "s#:fromhex", &hex, &len))
2747 return NULL;
2748
2749 byteslen = len / 2; /* max length if there are no spaces */
2750
2751 newbytes = PyBytes_FromStringAndSize(NULL, byteslen);
2752 if (!newbytes)
2753 return NULL;
2754 buf = PyBytes_AS_STRING(newbytes);
2755
Guido van Rossum4355a472007-05-04 05:00:04 +00002756 for (i = j = 0; i < len; i += 2) {
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002757 /* skip over spaces in the input */
2758 while (Py_CHARMASK(hex[i]) == ' ')
2759 i++;
2760 if (i >= len)
2761 break;
2762 top = hex_digit_to_int(Py_CHARMASK(hex[i]));
2763 bot = hex_digit_to_int(Py_CHARMASK(hex[i+1]));
2764 if (top == -1 || bot == -1) {
2765 PyErr_Format(PyExc_ValueError,
2766 "non-hexadecimal number string '%c%c' found in "
2767 "fromhex() arg at position %zd",
2768 hex[i], hex[i+1], i);
2769 goto error;
2770 }
2771 buf[j++] = (top << 4) + bot;
2772 }
2773 if (PyBytes_Resize(newbytes, j) < 0)
2774 goto error;
2775 return newbytes;
2776
2777 error:
2778 Py_DECREF(newbytes);
2779 return NULL;
2780}
2781
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002782PyDoc_STRVAR(reduce_doc, "Return state information for pickling.");
2783
2784static PyObject *
2785bytes_reduce(PyBytesObject *self)
2786{
Martin v. Löwis9c121062007-08-05 20:26:11 +00002787 PyObject *latin1;
2788 if (self->ob_bytes)
Guido van Rossuma74184e2007-08-29 04:05:57 +00002789 latin1 = PyUnicode_DecodeLatin1(self->ob_bytes,
2790 Py_Size(self), NULL);
Martin v. Löwis9c121062007-08-05 20:26:11 +00002791 else
Guido van Rossuma74184e2007-08-29 04:05:57 +00002792 latin1 = PyUnicode_FromString("");
Martin v. Löwis9c121062007-08-05 20:26:11 +00002793 return Py_BuildValue("(O(Ns))", Py_Type(self), latin1, "latin-1");
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002794}
2795
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002796static PySequenceMethods bytes_as_sequence = {
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002797 (lenfunc)bytes_length, /* sq_length */
2798 (binaryfunc)bytes_concat, /* sq_concat */
2799 (ssizeargfunc)bytes_repeat, /* sq_repeat */
2800 (ssizeargfunc)bytes_getitem, /* sq_item */
2801 0, /* sq_slice */
2802 (ssizeobjargproc)bytes_setitem, /* sq_ass_item */
2803 0, /* sq_ass_slice */
Guido van Rossumd624f182006-04-24 13:47:05 +00002804 (objobjproc)bytes_contains, /* sq_contains */
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002805 (binaryfunc)bytes_iconcat, /* sq_inplace_concat */
2806 (ssizeargfunc)bytes_irepeat, /* sq_inplace_repeat */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002807};
2808
2809static PyMappingMethods bytes_as_mapping = {
Guido van Rossumd624f182006-04-24 13:47:05 +00002810 (lenfunc)bytes_length,
Thomas Wouters376446d2006-12-19 08:30:14 +00002811 (binaryfunc)bytes_subscript,
2812 (objobjargproc)bytes_ass_subscript,
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002813};
2814
2815static PyBufferProcs bytes_as_buffer = {
Travis E. Oliphantb99f7622007-08-18 11:21:56 +00002816 (getbufferproc)bytes_getbuffer,
2817 (releasebufferproc)bytes_releasebuffer,
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002818};
2819
2820static PyMethodDef
2821bytes_methods[] = {
Neal Norwitz6968b052007-02-27 19:02:19 +00002822 {"find", (PyCFunction)bytes_find, METH_VARARGS, find__doc__},
2823 {"count", (PyCFunction)bytes_count, METH_VARARGS, count__doc__},
2824 {"index", (PyCFunction)bytes_index, METH_VARARGS, index__doc__},
2825 {"rfind", (PyCFunction)bytes_rfind, METH_VARARGS, rfind__doc__},
2826 {"rindex", (PyCFunction)bytes_rindex, METH_VARARGS, rindex__doc__},
2827 {"endswith", (PyCFunction)bytes_endswith, METH_VARARGS, endswith__doc__},
2828 {"startswith", (PyCFunction)bytes_startswith, METH_VARARGS,
2829 startswith__doc__},
2830 {"replace", (PyCFunction)bytes_replace, METH_VARARGS, replace__doc__},
2831 {"translate", (PyCFunction)bytes_translate, METH_VARARGS, translate__doc__},
2832 {"partition", (PyCFunction)bytes_partition, METH_O, partition__doc__},
2833 {"rpartition", (PyCFunction)bytes_rpartition, METH_O, rpartition__doc__},
2834 {"split", (PyCFunction)bytes_split, METH_VARARGS, split__doc__},
2835 {"rsplit", (PyCFunction)bytes_rsplit, METH_VARARGS, rsplit__doc__},
2836 {"extend", (PyCFunction)bytes_extend, METH_O, extend__doc__},
2837 {"insert", (PyCFunction)bytes_insert, METH_VARARGS, insert__doc__},
2838 {"append", (PyCFunction)bytes_append, METH_O, append__doc__},
2839 {"reverse", (PyCFunction)bytes_reverse, METH_NOARGS, reverse__doc__},
2840 {"pop", (PyCFunction)bytes_pop, METH_VARARGS, pop__doc__},
2841 {"remove", (PyCFunction)bytes_remove, METH_O, remove__doc__},
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002842 {"strip", (PyCFunction)bytes_strip, METH_O, strip__doc__},
2843 {"lstrip", (PyCFunction)bytes_lstrip, METH_O, lstrip__doc__},
2844 {"rstrip", (PyCFunction)bytes_rstrip, METH_O, rstrip__doc__},
Guido van Rossumd624f182006-04-24 13:47:05 +00002845 {"decode", (PyCFunction)bytes_decode, METH_VARARGS, decode_doc},
Guido van Rossuma0867f72006-05-05 04:34:18 +00002846 {"__alloc__", (PyCFunction)bytes_alloc, METH_NOARGS, alloc_doc},
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002847 {"fromhex", (PyCFunction)bytes_fromhex, METH_VARARGS|METH_CLASS,
2848 fromhex_doc},
Guido van Rossumcd6ae682007-05-09 19:52:16 +00002849 {"join", (PyCFunction)bytes_join, METH_O, join_doc},
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002850 {"__reduce__", (PyCFunction)bytes_reduce, METH_NOARGS, reduce_doc},
Guido van Rossuma0867f72006-05-05 04:34:18 +00002851 {NULL}
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002852};
2853
2854PyDoc_STRVAR(bytes_doc,
2855"bytes([iterable]) -> new array of bytes.\n\
2856\n\
2857If an argument is given it must be an iterable yielding ints in range(256).");
2858
2859PyTypeObject PyBytes_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002860 PyVarObject_HEAD_INIT(&PyType_Type, 0)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002861 "bytes",
2862 sizeof(PyBytesObject),
2863 0,
Guido van Rossumd624f182006-04-24 13:47:05 +00002864 (destructor)bytes_dealloc, /* tp_dealloc */
2865 0, /* tp_print */
2866 0, /* tp_getattr */
2867 0, /* tp_setattr */
2868 0, /* tp_compare */
2869 (reprfunc)bytes_repr, /* tp_repr */
2870 0, /* tp_as_number */
2871 &bytes_as_sequence, /* tp_as_sequence */
2872 &bytes_as_mapping, /* tp_as_mapping */
Georg Brandlb3f568f2007-02-27 08:49:18 +00002873 0, /* tp_hash */
Guido van Rossumd624f182006-04-24 13:47:05 +00002874 0, /* tp_call */
2875 (reprfunc)bytes_str, /* tp_str */
2876 PyObject_GenericGetAttr, /* tp_getattro */
2877 0, /* tp_setattro */
2878 &bytes_as_buffer, /* tp_as_buffer */
Georg Brandlb3f568f2007-02-27 08:49:18 +00002879 /* bytes is 'final' or 'sealed' */
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002880 Py_TPFLAGS_DEFAULT, /* tp_flags */
Guido van Rossumd624f182006-04-24 13:47:05 +00002881 bytes_doc, /* tp_doc */
2882 0, /* tp_traverse */
2883 0, /* tp_clear */
2884 (richcmpfunc)bytes_richcompare, /* tp_richcompare */
2885 0, /* tp_weaklistoffset */
2886 0, /* tp_iter */
2887 0, /* tp_iternext */
2888 bytes_methods, /* tp_methods */
2889 0, /* tp_members */
2890 0, /* tp_getset */
2891 0, /* tp_base */
2892 0, /* tp_dict */
2893 0, /* tp_descr_get */
2894 0, /* tp_descr_set */
2895 0, /* tp_dictoffset */
2896 (initproc)bytes_init, /* tp_init */
2897 PyType_GenericAlloc, /* tp_alloc */
2898 PyType_GenericNew, /* tp_new */
2899 PyObject_Del, /* tp_free */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002900};