blob: ec379281ccce8487278427b3056c277098938d06 [file] [log] [blame]
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00001/* Bytes object implementation */
2
3/* XXX TO DO: optimizations */
4
5#define PY_SSIZE_T_CLEAN
6#include "Python.h"
Guido van Rossuma0867f72006-05-05 04:34:18 +00007#include "structmember.h"
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00008
Neal Norwitz6968b052007-02-27 19:02:19 +00009/* The nullbytes are used by the stringlib during partition.
10 * If partition is removed from bytes, nullbytes and its helper
11 * Init/Fini should also be removed.
12 */
13static PyBytesObject *nullbytes = NULL;
14
15void
16PyBytes_Fini(void)
17{
18 Py_CLEAR(nullbytes);
19}
20
21int
22PyBytes_Init(void)
23{
24 nullbytes = PyObject_New(PyBytesObject, &PyBytes_Type);
25 if (nullbytes == NULL)
26 return 0;
27 nullbytes->ob_bytes = NULL;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +000028 Py_Size(nullbytes) = nullbytes->ob_alloc = 0;
Travis E. Oliphantb99f7622007-08-18 11:21:56 +000029 nullbytes->ob_exports = 0;
Neal Norwitz6968b052007-02-27 19:02:19 +000030 return 1;
31}
32
33/* end nullbytes support */
34
Guido van Rossumad7d8d12007-04-13 01:39:34 +000035/* Helpers */
36
37static int
38_getbytevalue(PyObject* arg, int *value)
Neal Norwitz6968b052007-02-27 19:02:19 +000039{
40 PyObject *intarg = PyNumber_Int(arg);
41 if (! intarg)
42 return 0;
43 *value = PyInt_AsLong(intarg);
44 Py_DECREF(intarg);
45 if (*value < 0 || *value >= 256) {
46 PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
47 return 0;
48 }
49 return 1;
50}
51
Travis E. Oliphantb99f7622007-08-18 11:21:56 +000052static int
53bytes_getbuffer(PyBytesObject *obj, PyBuffer *view, int flags)
Guido van Rossum75d38e92007-08-24 17:33:11 +000054{
Travis E. Oliphantb99f7622007-08-18 11:21:56 +000055 int ret;
56 void *ptr;
57 if (view == NULL) {
58 obj->ob_exports++;
59 return 0;
60 }
Guido van Rossum75d38e92007-08-24 17:33:11 +000061 if (obj->ob_bytes == NULL)
Travis E. Oliphantb99f7622007-08-18 11:21:56 +000062 ptr = "";
63 else
64 ptr = obj->ob_bytes;
65 ret = PyBuffer_FillInfo(view, ptr, Py_Size(obj), 0, flags);
66 if (ret >= 0) {
67 obj->ob_exports++;
68 }
69 return ret;
70}
71
72static void
73bytes_releasebuffer(PyBytesObject *obj, PyBuffer *view)
74{
75 obj->ob_exports--;
76}
77
Neal Norwitz2bad9702007-08-27 06:19:22 +000078static Py_ssize_t
Travis E. Oliphantb99f7622007-08-18 11:21:56 +000079_getbuffer(PyObject *obj, PyBuffer *view)
Guido van Rossumad7d8d12007-04-13 01:39:34 +000080{
Martin v. Löwis9f2e3462007-07-21 17:22:18 +000081 PyBufferProcs *buffer = Py_Type(obj)->tp_as_buffer;
Guido van Rossumad7d8d12007-04-13 01:39:34 +000082
83 if (buffer == NULL ||
84 PyUnicode_Check(obj) ||
Guido van Rossuma74184e2007-08-29 04:05:57 +000085 buffer->bf_getbuffer == NULL)
86 {
87 PyErr_Format(PyExc_TypeError,
88 "Type %.100s doesn't support the buffer API",
89 Py_Type(obj)->tp_name);
90 return -1;
91 }
Guido van Rossumad7d8d12007-04-13 01:39:34 +000092
Travis E. Oliphantb99f7622007-08-18 11:21:56 +000093 if (buffer->bf_getbuffer(obj, view, PyBUF_SIMPLE) < 0)
94 return -1;
95 return view->len;
Guido van Rossumad7d8d12007-04-13 01:39:34 +000096}
97
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000098/* Direct API functions */
99
100PyObject *
Guido van Rossumd624f182006-04-24 13:47:05 +0000101PyBytes_FromObject(PyObject *input)
102{
103 return PyObject_CallFunctionObjArgs((PyObject *)&PyBytes_Type,
104 input, NULL);
105}
106
107PyObject *
108PyBytes_FromStringAndSize(const char *bytes, Py_ssize_t size)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000109{
110 PyBytesObject *new;
Guido van Rossumf15a29f2007-05-04 00:41:39 +0000111 int alloc;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000112
Guido van Rossumd624f182006-04-24 13:47:05 +0000113 assert(size >= 0);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000114
115 new = PyObject_New(PyBytesObject, &PyBytes_Type);
116 if (new == NULL)
Guido van Rossumd624f182006-04-24 13:47:05 +0000117 return NULL;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000118
Guido van Rossumf15a29f2007-05-04 00:41:39 +0000119 if (size == 0) {
Guido van Rossumd624f182006-04-24 13:47:05 +0000120 new->ob_bytes = NULL;
Guido van Rossumf15a29f2007-05-04 00:41:39 +0000121 alloc = 0;
122 }
Guido van Rossumd624f182006-04-24 13:47:05 +0000123 else {
Guido van Rossumf15a29f2007-05-04 00:41:39 +0000124 alloc = size + 1;
125 new->ob_bytes = PyMem_Malloc(alloc);
Guido van Rossumd624f182006-04-24 13:47:05 +0000126 if (new->ob_bytes == NULL) {
127 Py_DECREF(new);
Neal Norwitz16596dd2007-08-30 05:44:54 +0000128 return PyErr_NoMemory();
Guido van Rossumd624f182006-04-24 13:47:05 +0000129 }
130 if (bytes != NULL)
131 memcpy(new->ob_bytes, bytes, size);
Guido van Rossumf15a29f2007-05-04 00:41:39 +0000132 new->ob_bytes[size] = '\0'; /* Trailing null byte */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000133 }
Martin v. Löwis5d7428b2007-07-21 18:47:48 +0000134 Py_Size(new) = size;
Guido van Rossumf15a29f2007-05-04 00:41:39 +0000135 new->ob_alloc = alloc;
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000136 new->ob_exports = 0;
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +0000137
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000138 return (PyObject *)new;
139}
140
141Py_ssize_t
142PyBytes_Size(PyObject *self)
143{
144 assert(self != NULL);
145 assert(PyBytes_Check(self));
146
Guido van Rossum20188312006-05-05 15:15:40 +0000147 return PyBytes_GET_SIZE(self);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000148}
149
150char *
151PyBytes_AsString(PyObject *self)
152{
153 assert(self != NULL);
154 assert(PyBytes_Check(self));
155
Guido van Rossum20188312006-05-05 15:15:40 +0000156 return PyBytes_AS_STRING(self);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000157}
158
159int
160PyBytes_Resize(PyObject *self, Py_ssize_t size)
161{
162 void *sval;
Guido van Rossuma0867f72006-05-05 04:34:18 +0000163 Py_ssize_t alloc = ((PyBytesObject *)self)->ob_alloc;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000164
165 assert(self != NULL);
166 assert(PyBytes_Check(self));
167 assert(size >= 0);
168
Guido van Rossuma0867f72006-05-05 04:34:18 +0000169 if (size < alloc / 2) {
170 /* Major downsize; resize down to exact size */
Guido van Rossum6c1e6742007-05-04 04:27:16 +0000171 alloc = size + 1;
Guido van Rossuma0867f72006-05-05 04:34:18 +0000172 }
Guido van Rossumf15a29f2007-05-04 00:41:39 +0000173 else if (size < alloc) {
Guido van Rossuma0867f72006-05-05 04:34:18 +0000174 /* Within allocated size; quick exit */
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000175 Py_Size(self) = size;
Guido van Rossuma74184e2007-08-29 04:05:57 +0000176 ((PyBytesObject *)self)->ob_bytes[size] = '\0'; /* Trailing null */
Guido van Rossuma0867f72006-05-05 04:34:18 +0000177 return 0;
178 }
179 else if (size <= alloc * 1.125) {
180 /* Moderate upsize; overallocate similar to list_resize() */
181 alloc = size + (size >> 3) + (size < 9 ? 3 : 6);
182 }
183 else {
184 /* Major upsize; resize up to exact size */
Guido van Rossumf15a29f2007-05-04 00:41:39 +0000185 alloc = size + 1;
Guido van Rossum6c1e6742007-05-04 04:27:16 +0000186 }
Guido van Rossuma0867f72006-05-05 04:34:18 +0000187
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000188 if (((PyBytesObject *)self)->ob_exports > 0) {
189 /*
Guido van Rossuma74184e2007-08-29 04:05:57 +0000190 fprintf(stderr, "%d: %s", ((PyBytesObject *)self)->ob_exports,
191 ((PyBytesObject *)self)->ob_bytes);
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000192 */
193 PyErr_SetString(PyExc_BufferError,
Guido van Rossuma74184e2007-08-29 04:05:57 +0000194 "Existing exports of data: object cannot be re-sized");
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000195 return -1;
196 }
197
Guido van Rossuma0867f72006-05-05 04:34:18 +0000198 sval = PyMem_Realloc(((PyBytesObject *)self)->ob_bytes, alloc);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000199 if (sval == NULL) {
Guido van Rossumd624f182006-04-24 13:47:05 +0000200 PyErr_NoMemory();
201 return -1;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000202 }
203
Guido van Rossumd624f182006-04-24 13:47:05 +0000204 ((PyBytesObject *)self)->ob_bytes = sval;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000205 Py_Size(self) = size;
Guido van Rossuma0867f72006-05-05 04:34:18 +0000206 ((PyBytesObject *)self)->ob_alloc = alloc;
Guido van Rossumf15a29f2007-05-04 00:41:39 +0000207 ((PyBytesObject *)self)->ob_bytes[size] = '\0'; /* Trailing null byte */
208
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000209 return 0;
210}
211
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000212PyObject *
213PyBytes_Concat(PyObject *a, PyObject *b)
214{
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000215 Py_ssize_t size;
216 PyBuffer va, vb;
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000217 PyBytesObject *result;
218
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000219 va.len = -1;
220 vb.len = -1;
221 if (_getbuffer(a, &va) < 0 ||
222 _getbuffer(b, &vb) < 0) {
Guido van Rossum75d38e92007-08-24 17:33:11 +0000223 if (va.len != -1)
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000224 PyObject_ReleaseBuffer(a, &va);
225 if (vb.len != -1)
226 PyObject_ReleaseBuffer(b, &vb);
227 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
228 Py_Type(a)->tp_name, Py_Type(b)->tp_name);
229 return NULL;
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000230 }
231
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000232 size = va.len + vb.len;
233 if (size < 0) {
234 PyObject_ReleaseBuffer(a, &va);
235 PyObject_ReleaseBuffer(b, &vb);
236 return PyErr_NoMemory();
237 }
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000238
239 result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, size);
240 if (result != NULL) {
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000241 memcpy(result->ob_bytes, va.buf, va.len);
242 memcpy(result->ob_bytes + va.len, vb.buf, vb.len);
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000243 }
Guido van Rossum75d38e92007-08-24 17:33:11 +0000244
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000245 PyObject_ReleaseBuffer(a, &va);
246 PyObject_ReleaseBuffer(b, &vb);
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000247 return (PyObject *)result;
248}
249
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000250/* Functions stuffed into the type object */
251
252static Py_ssize_t
253bytes_length(PyBytesObject *self)
254{
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000255 return Py_Size(self);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000256}
257
258static PyObject *
Guido van Rossumd624f182006-04-24 13:47:05 +0000259bytes_concat(PyBytesObject *self, PyObject *other)
260{
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000261 return PyBytes_Concat((PyObject *)self, other);
Guido van Rossumd624f182006-04-24 13:47:05 +0000262}
263
264static PyObject *
Guido van Rossum13e57212006-04-27 22:54:26 +0000265bytes_iconcat(PyBytesObject *self, PyObject *other)
266{
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000267 Py_ssize_t mysize;
Guido van Rossum13e57212006-04-27 22:54:26 +0000268 Py_ssize_t size;
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000269 PyBuffer vo;
Guido van Rossum13e57212006-04-27 22:54:26 +0000270
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000271 if (_getbuffer(other, &vo) < 0) {
Guido van Rossuma74184e2007-08-29 04:05:57 +0000272 PyErr_Format(PyExc_TypeError, "can't concat bytes to %.100s",
273 Py_Type(self)->tp_name);
274 return NULL;
Guido van Rossum13e57212006-04-27 22:54:26 +0000275 }
276
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000277 mysize = Py_Size(self);
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000278 size = mysize + vo.len;
279 if (size < 0) {
Guido van Rossuma74184e2007-08-29 04:05:57 +0000280 PyObject_ReleaseBuffer(other, &vo);
281 return PyErr_NoMemory();
Guido van Rossum6c1e6742007-05-04 04:27:16 +0000282 }
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000283 if (size < self->ob_alloc) {
Guido van Rossuma74184e2007-08-29 04:05:57 +0000284 Py_Size(self) = size;
285 self->ob_bytes[Py_Size(self)] = '\0'; /* Trailing null byte */
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000286 }
287 else if (PyBytes_Resize((PyObject *)self, size) < 0) {
Guido van Rossuma74184e2007-08-29 04:05:57 +0000288 PyObject_ReleaseBuffer(other, &vo);
289 return NULL;
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000290 }
291 memcpy(self->ob_bytes + mysize, vo.buf, vo.len);
292 PyObject_ReleaseBuffer(other, &vo);
Guido van Rossum13e57212006-04-27 22:54:26 +0000293 Py_INCREF(self);
294 return (PyObject *)self;
295}
296
297static PyObject *
Guido van Rossumd624f182006-04-24 13:47:05 +0000298bytes_repeat(PyBytesObject *self, Py_ssize_t count)
299{
300 PyBytesObject *result;
301 Py_ssize_t mysize;
302 Py_ssize_t size;
303
304 if (count < 0)
305 count = 0;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000306 mysize = Py_Size(self);
Guido van Rossumd624f182006-04-24 13:47:05 +0000307 size = mysize * count;
308 if (count != 0 && size / count != mysize)
309 return PyErr_NoMemory();
Guido van Rossumf15a29f2007-05-04 00:41:39 +0000310 result = (PyBytesObject *)PyBytes_FromStringAndSize(NULL, size);
Guido van Rossumd624f182006-04-24 13:47:05 +0000311 if (result != NULL && size != 0) {
312 if (mysize == 1)
313 memset(result->ob_bytes, self->ob_bytes[0], size);
314 else {
Guido van Rossum13e57212006-04-27 22:54:26 +0000315 Py_ssize_t i;
Guido van Rossumd624f182006-04-24 13:47:05 +0000316 for (i = 0; i < count; i++)
317 memcpy(result->ob_bytes + i*mysize, self->ob_bytes, mysize);
318 }
319 }
320 return (PyObject *)result;
321}
322
323static PyObject *
Guido van Rossum13e57212006-04-27 22:54:26 +0000324bytes_irepeat(PyBytesObject *self, Py_ssize_t count)
325{
326 Py_ssize_t mysize;
327 Py_ssize_t size;
328
329 if (count < 0)
330 count = 0;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000331 mysize = Py_Size(self);
Guido van Rossum13e57212006-04-27 22:54:26 +0000332 size = mysize * count;
333 if (count != 0 && size / count != mysize)
334 return PyErr_NoMemory();
Guido van Rossum6c1e6742007-05-04 04:27:16 +0000335 if (size < self->ob_alloc) {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000336 Py_Size(self) = size;
Guido van Rossuma74184e2007-08-29 04:05:57 +0000337 self->ob_bytes[Py_Size(self)] = '\0'; /* Trailing null byte */
Guido van Rossum6c1e6742007-05-04 04:27:16 +0000338 }
Guido van Rossuma0867f72006-05-05 04:34:18 +0000339 else if (PyBytes_Resize((PyObject *)self, size) < 0)
Guido van Rossum13e57212006-04-27 22:54:26 +0000340 return NULL;
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +0000341
Guido van Rossum13e57212006-04-27 22:54:26 +0000342 if (mysize == 1)
343 memset(self->ob_bytes, self->ob_bytes[0], size);
344 else {
345 Py_ssize_t i;
346 for (i = 1; i < count; i++)
347 memcpy(self->ob_bytes + i*mysize, self->ob_bytes, mysize);
348 }
349
350 Py_INCREF(self);
351 return (PyObject *)self;
352}
353
354static int
355bytes_substring(PyBytesObject *self, PyBytesObject *other)
356{
357 Py_ssize_t i;
358
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000359 if (Py_Size(other) == 1) {
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +0000360 return memchr(self->ob_bytes, other->ob_bytes[0],
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000361 Py_Size(self)) != NULL;
Guido van Rossum13e57212006-04-27 22:54:26 +0000362 }
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000363 if (Py_Size(other) == 0)
Guido van Rossum13e57212006-04-27 22:54:26 +0000364 return 1; /* Edge case */
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000365 for (i = 0; i + Py_Size(other) <= Py_Size(self); i++) {
Guido van Rossum13e57212006-04-27 22:54:26 +0000366 /* XXX Yeah, yeah, lots of optimizations possible... */
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000367 if (memcmp(self->ob_bytes + i, other->ob_bytes, Py_Size(other)) == 0)
Guido van Rossum13e57212006-04-27 22:54:26 +0000368 return 1;
369 }
370 return 0;
371}
372
373static int
374bytes_contains(PyBytesObject *self, PyObject *value)
375{
376 Py_ssize_t ival;
377
378 if (PyBytes_Check(value))
379 return bytes_substring(self, (PyBytesObject *)value);
380
Thomas Woutersd204a712006-08-22 13:41:17 +0000381 ival = PyNumber_AsSsize_t(value, PyExc_ValueError);
Guido van Rossum13e57212006-04-27 22:54:26 +0000382 if (ival == -1 && PyErr_Occurred())
383 return -1;
Guido van Rossum13e57212006-04-27 22:54:26 +0000384 if (ival < 0 || ival >= 256) {
385 PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
386 return -1;
387 }
388
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000389 return memchr(self->ob_bytes, ival, Py_Size(self)) != NULL;
Guido van Rossum13e57212006-04-27 22:54:26 +0000390}
391
392static PyObject *
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000393bytes_getitem(PyBytesObject *self, Py_ssize_t i)
394{
395 if (i < 0)
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000396 i += Py_Size(self);
397 if (i < 0 || i >= Py_Size(self)) {
Guido van Rossumd624f182006-04-24 13:47:05 +0000398 PyErr_SetString(PyExc_IndexError, "bytes index out of range");
399 return NULL;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000400 }
Guido van Rossumd624f182006-04-24 13:47:05 +0000401 return PyInt_FromLong((unsigned char)(self->ob_bytes[i]));
402}
403
404static PyObject *
Thomas Wouters376446d2006-12-19 08:30:14 +0000405bytes_subscript(PyBytesObject *self, PyObject *item)
Guido van Rossumd624f182006-04-24 13:47:05 +0000406{
Thomas Wouters376446d2006-12-19 08:30:14 +0000407 if (PyIndex_Check(item)) {
408 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Guido van Rossumd624f182006-04-24 13:47:05 +0000409
Thomas Wouters376446d2006-12-19 08:30:14 +0000410 if (i == -1 && PyErr_Occurred())
411 return NULL;
412
413 if (i < 0)
414 i += PyBytes_GET_SIZE(self);
415
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000416 if (i < 0 || i >= Py_Size(self)) {
Thomas Wouters376446d2006-12-19 08:30:14 +0000417 PyErr_SetString(PyExc_IndexError, "bytes index out of range");
418 return NULL;
419 }
420 return PyInt_FromLong((unsigned char)(self->ob_bytes[i]));
421 }
422 else if (PySlice_Check(item)) {
423 Py_ssize_t start, stop, step, slicelength, cur, i;
424 if (PySlice_GetIndicesEx((PySliceObject *)item,
425 PyBytes_GET_SIZE(self),
426 &start, &stop, &step, &slicelength) < 0) {
427 return NULL;
428 }
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +0000429
Thomas Wouters376446d2006-12-19 08:30:14 +0000430 if (slicelength <= 0)
431 return PyBytes_FromStringAndSize("", 0);
432 else if (step == 1) {
433 return PyBytes_FromStringAndSize(self->ob_bytes + start,
434 slicelength);
435 }
436 else {
437 char *source_buf = PyBytes_AS_STRING(self);
438 char *result_buf = (char *)PyMem_Malloc(slicelength);
439 PyObject *result;
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +0000440
Thomas Wouters376446d2006-12-19 08:30:14 +0000441 if (result_buf == NULL)
442 return PyErr_NoMemory();
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +0000443
Thomas Wouters376446d2006-12-19 08:30:14 +0000444 for (cur = start, i = 0; i < slicelength;
445 cur += step, i++) {
446 result_buf[i] = source_buf[cur];
447 }
448 result = PyBytes_FromStringAndSize(result_buf, slicelength);
449 PyMem_Free(result_buf);
450 return result;
451 }
452 }
453 else {
454 PyErr_SetString(PyExc_TypeError, "bytes indices must be integers");
455 return NULL;
456 }
457}
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +0000458
Guido van Rossumd624f182006-04-24 13:47:05 +0000459static int
Thomas Wouters376446d2006-12-19 08:30:14 +0000460bytes_setslice(PyBytesObject *self, Py_ssize_t lo, Py_ssize_t hi,
Guido van Rossumd624f182006-04-24 13:47:05 +0000461 PyObject *values)
462{
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000463 Py_ssize_t avail, needed;
464 void *bytes;
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000465 PyBuffer vbytes;
466 int res = 0;
Guido van Rossumd624f182006-04-24 13:47:05 +0000467
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000468 vbytes.len = -1;
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000469 if (values == (PyObject *)self) {
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000470 /* Make a copy and call this function recursively */
Guido van Rossumd624f182006-04-24 13:47:05 +0000471 int err;
472 values = PyBytes_FromObject(values);
473 if (values == NULL)
474 return -1;
475 err = bytes_setslice(self, lo, hi, values);
476 Py_DECREF(values);
477 return err;
478 }
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000479 if (values == NULL) {
480 /* del b[lo:hi] */
481 bytes = NULL;
482 needed = 0;
483 }
Guido van Rossumd624f182006-04-24 13:47:05 +0000484 else {
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000485 if (_getbuffer(values, &vbytes) < 0) {
486 PyErr_Format(PyExc_TypeError,
487 "can't set bytes slice from %.100s",
488 Py_Type(values)->tp_name);
489 return -1;
490 }
491 needed = vbytes.len;
492 bytes = vbytes.buf;
Guido van Rossumd624f182006-04-24 13:47:05 +0000493 }
494
495 if (lo < 0)
496 lo = 0;
Thomas Wouters9a6e62b2006-08-23 23:20:29 +0000497 if (hi < lo)
498 hi = lo;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000499 if (hi > Py_Size(self))
500 hi = Py_Size(self);
Guido van Rossumd624f182006-04-24 13:47:05 +0000501
502 avail = hi - lo;
503 if (avail < 0)
504 lo = hi = avail = 0;
505
506 if (avail != needed) {
507 if (avail > needed) {
508 /*
509 0 lo hi old_size
510 | |<----avail----->|<-----tomove------>|
511 | |<-needed->|<-----tomove------>|
512 0 lo new_hi new_size
513 */
514 memmove(self->ob_bytes + lo + needed, self->ob_bytes + hi,
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000515 Py_Size(self) - hi);
Guido van Rossumd624f182006-04-24 13:47:05 +0000516 }
Guido van Rossuma74184e2007-08-29 04:05:57 +0000517 /* XXX(nnorwitz): need to verify this can't overflow! */
Thomas Wouters376446d2006-12-19 08:30:14 +0000518 if (PyBytes_Resize((PyObject *)self,
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000519 Py_Size(self) + needed - avail) < 0) {
520 res = -1;
521 goto finish;
522 }
Guido van Rossumd624f182006-04-24 13:47:05 +0000523 if (avail < needed) {
524 /*
525 0 lo hi old_size
526 | |<-avail->|<-----tomove------>|
527 | |<----needed---->|<-----tomove------>|
528 0 lo new_hi new_size
529 */
530 memmove(self->ob_bytes + lo + needed, self->ob_bytes + hi,
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000531 Py_Size(self) - lo - needed);
Guido van Rossumd624f182006-04-24 13:47:05 +0000532 }
533 }
534
535 if (needed > 0)
536 memcpy(self->ob_bytes + lo, bytes, needed);
537
Guido van Rossum75d38e92007-08-24 17:33:11 +0000538
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000539 finish:
Guido van Rossum75d38e92007-08-24 17:33:11 +0000540 if (vbytes.len != -1)
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000541 PyObject_ReleaseBuffer(values, &vbytes);
542 return res;
Guido van Rossumd624f182006-04-24 13:47:05 +0000543}
544
545static int
546bytes_setitem(PyBytesObject *self, Py_ssize_t i, PyObject *value)
547{
548 Py_ssize_t ival;
549
550 if (i < 0)
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000551 i += Py_Size(self);
Guido van Rossumd624f182006-04-24 13:47:05 +0000552
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000553 if (i < 0 || i >= Py_Size(self)) {
Guido van Rossumd624f182006-04-24 13:47:05 +0000554 PyErr_SetString(PyExc_IndexError, "bytes index out of range");
555 return -1;
556 }
557
558 if (value == NULL)
559 return bytes_setslice(self, i, i+1, NULL);
560
Thomas Woutersd204a712006-08-22 13:41:17 +0000561 ival = PyNumber_AsSsize_t(value, PyExc_ValueError);
Guido van Rossumd624f182006-04-24 13:47:05 +0000562 if (ival == -1 && PyErr_Occurred())
563 return -1;
564
565 if (ival < 0 || ival >= 256) {
566 PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
567 return -1;
568 }
569
570 self->ob_bytes[i] = ival;
571 return 0;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000572}
573
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000574static int
Thomas Wouters376446d2006-12-19 08:30:14 +0000575bytes_ass_subscript(PyBytesObject *self, PyObject *item, PyObject *values)
576{
577 Py_ssize_t start, stop, step, slicelen, needed;
578 char *bytes;
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +0000579
Thomas Wouters376446d2006-12-19 08:30:14 +0000580 if (PyIndex_Check(item)) {
581 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
582
583 if (i == -1 && PyErr_Occurred())
584 return -1;
585
586 if (i < 0)
587 i += PyBytes_GET_SIZE(self);
588
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000589 if (i < 0 || i >= Py_Size(self)) {
Thomas Wouters376446d2006-12-19 08:30:14 +0000590 PyErr_SetString(PyExc_IndexError, "bytes index out of range");
591 return -1;
592 }
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +0000593
Thomas Wouters376446d2006-12-19 08:30:14 +0000594 if (values == NULL) {
595 /* Fall through to slice assignment */
596 start = i;
597 stop = i + 1;
598 step = 1;
599 slicelen = 1;
600 }
601 else {
602 Py_ssize_t ival = PyNumber_AsSsize_t(values, PyExc_ValueError);
603 if (ival == -1 && PyErr_Occurred())
604 return -1;
605 if (ival < 0 || ival >= 256) {
606 PyErr_SetString(PyExc_ValueError,
607 "byte must be in range(0, 256)");
608 return -1;
609 }
610 self->ob_bytes[i] = (char)ival;
611 return 0;
612 }
613 }
614 else if (PySlice_Check(item)) {
615 if (PySlice_GetIndicesEx((PySliceObject *)item,
616 PyBytes_GET_SIZE(self),
617 &start, &stop, &step, &slicelen) < 0) {
618 return -1;
619 }
620 }
621 else {
622 PyErr_SetString(PyExc_TypeError, "bytes indices must be integer");
623 return -1;
624 }
625
626 if (values == NULL) {
627 bytes = NULL;
628 needed = 0;
629 }
630 else if (values == (PyObject *)self || !PyBytes_Check(values)) {
631 /* Make a copy an call this function recursively */
632 int err;
633 values = PyBytes_FromObject(values);
634 if (values == NULL)
635 return -1;
636 err = bytes_ass_subscript(self, item, values);
637 Py_DECREF(values);
638 return err;
639 }
640 else {
641 assert(PyBytes_Check(values));
642 bytes = ((PyBytesObject *)values)->ob_bytes;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000643 needed = Py_Size(values);
Thomas Wouters376446d2006-12-19 08:30:14 +0000644 }
645 /* Make sure b[5:2] = ... inserts before 5, not before 2. */
646 if ((step < 0 && start < stop) ||
647 (step > 0 && start > stop))
648 stop = start;
649 if (step == 1) {
650 if (slicelen != needed) {
651 if (slicelen > needed) {
652 /*
653 0 start stop old_size
654 | |<---slicelen--->|<-----tomove------>|
655 | |<-needed->|<-----tomove------>|
656 0 lo new_hi new_size
657 */
658 memmove(self->ob_bytes + start + needed, self->ob_bytes + stop,
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000659 Py_Size(self) - stop);
Thomas Wouters376446d2006-12-19 08:30:14 +0000660 }
661 if (PyBytes_Resize((PyObject *)self,
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000662 Py_Size(self) + needed - slicelen) < 0)
Thomas Wouters376446d2006-12-19 08:30:14 +0000663 return -1;
664 if (slicelen < needed) {
665 /*
666 0 lo hi old_size
667 | |<-avail->|<-----tomove------>|
668 | |<----needed---->|<-----tomove------>|
669 0 lo new_hi new_size
670 */
671 memmove(self->ob_bytes + start + needed, self->ob_bytes + stop,
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000672 Py_Size(self) - start - needed);
Thomas Wouters376446d2006-12-19 08:30:14 +0000673 }
674 }
675
676 if (needed > 0)
677 memcpy(self->ob_bytes + start, bytes, needed);
678
679 return 0;
680 }
681 else {
682 if (needed == 0) {
683 /* Delete slice */
684 Py_ssize_t cur, i;
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +0000685
Thomas Wouters376446d2006-12-19 08:30:14 +0000686 if (step < 0) {
687 stop = start + 1;
688 start = stop + step * (slicelen - 1) - 1;
689 step = -step;
690 }
691 for (cur = start, i = 0;
692 i < slicelen; cur += step, i++) {
693 Py_ssize_t lim = step - 1;
694
695 if (cur + step >= PyBytes_GET_SIZE(self))
696 lim = PyBytes_GET_SIZE(self) - cur - 1;
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +0000697
Thomas Wouters376446d2006-12-19 08:30:14 +0000698 memmove(self->ob_bytes + cur - i,
699 self->ob_bytes + cur + 1, lim);
700 }
701 /* Move the tail of the bytes, in one chunk */
702 cur = start + slicelen*step;
703 if (cur < PyBytes_GET_SIZE(self)) {
704 memmove(self->ob_bytes + cur - slicelen,
705 self->ob_bytes + cur,
706 PyBytes_GET_SIZE(self) - cur);
707 }
708 if (PyBytes_Resize((PyObject *)self,
709 PyBytes_GET_SIZE(self) - slicelen) < 0)
710 return -1;
711
712 return 0;
713 }
714 else {
715 /* Assign slice */
716 Py_ssize_t cur, i;
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +0000717
Thomas Wouters376446d2006-12-19 08:30:14 +0000718 if (needed != slicelen) {
719 PyErr_Format(PyExc_ValueError,
720 "attempt to assign bytes of size %zd "
721 "to extended slice of size %zd",
722 needed, slicelen);
723 return -1;
724 }
725 for (cur = start, i = 0; i < slicelen; cur += step, i++)
726 self->ob_bytes[cur] = bytes[i];
727 return 0;
728 }
729 }
730}
731
732static int
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000733bytes_init(PyBytesObject *self, PyObject *args, PyObject *kwds)
734{
Guido van Rossumd624f182006-04-24 13:47:05 +0000735 static char *kwlist[] = {"source", "encoding", "errors", 0};
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000736 PyObject *arg = NULL;
Guido van Rossumd624f182006-04-24 13:47:05 +0000737 const char *encoding = NULL;
738 const char *errors = NULL;
739 Py_ssize_t count;
740 PyObject *it;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000741 PyObject *(*iternext)(PyObject *);
742
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000743 if (Py_Size(self) != 0) {
Guido van Rossuma0867f72006-05-05 04:34:18 +0000744 /* Empty previous contents (yes, do this first of all!) */
745 if (PyBytes_Resize((PyObject *)self, 0) < 0)
746 return -1;
747 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000748
Guido van Rossumd624f182006-04-24 13:47:05 +0000749 /* Parse arguments */
750 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oss:bytes", kwlist,
751 &arg, &encoding, &errors))
752 return -1;
753
754 /* Make a quick exit if no first argument */
755 if (arg == NULL) {
756 if (encoding != NULL || errors != NULL) {
757 PyErr_SetString(PyExc_TypeError,
758 "encoding or errors without sequence argument");
759 return -1;
760 }
761 return 0;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000762 }
763
Guido van Rossumd624f182006-04-24 13:47:05 +0000764 if (PyUnicode_Check(arg)) {
765 /* Encode via the codec registry */
Guido van Rossum4355a472007-05-04 05:00:04 +0000766 PyObject *encoded, *new;
Guido van Rossuma74184e2007-08-29 04:05:57 +0000767 if (encoding == NULL) {
768 PyErr_SetString(PyExc_TypeError,
769 "string argument without an encoding");
770 return -1;
771 }
Guido van Rossumd624f182006-04-24 13:47:05 +0000772 encoded = PyCodec_Encode(arg, encoding, errors);
773 if (encoded == NULL)
774 return -1;
Guido van Rossum4355a472007-05-04 05:00:04 +0000775 if (!PyBytes_Check(encoded) && !PyString_Check(encoded)) {
Guido van Rossumd624f182006-04-24 13:47:05 +0000776 PyErr_Format(PyExc_TypeError,
Guido van Rossum4355a472007-05-04 05:00:04 +0000777 "encoder did not return a str8 or bytes object (type=%.400s)",
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000778 Py_Type(encoded)->tp_name);
Guido van Rossumd624f182006-04-24 13:47:05 +0000779 Py_DECREF(encoded);
780 return -1;
781 }
Guido van Rossuma74184e2007-08-29 04:05:57 +0000782 new = bytes_iconcat(self, encoded);
783 Py_DECREF(encoded);
784 if (new == NULL)
785 return -1;
786 Py_DECREF(new);
787 return 0;
Guido van Rossumd624f182006-04-24 13:47:05 +0000788 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000789
Guido van Rossumd624f182006-04-24 13:47:05 +0000790 /* If it's not unicode, there can't be encoding or errors */
791 if (encoding != NULL || errors != NULL) {
792 PyErr_SetString(PyExc_TypeError,
793 "encoding or errors without a string argument");
794 return -1;
795 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000796
Guido van Rossumd624f182006-04-24 13:47:05 +0000797 /* Is it an int? */
Thomas Woutersd204a712006-08-22 13:41:17 +0000798 count = PyNumber_AsSsize_t(arg, PyExc_ValueError);
Guido van Rossumd624f182006-04-24 13:47:05 +0000799 if (count == -1 && PyErr_Occurred())
800 PyErr_Clear();
801 else {
802 if (count < 0) {
803 PyErr_SetString(PyExc_ValueError, "negative count");
804 return -1;
805 }
806 if (count > 0) {
807 if (PyBytes_Resize((PyObject *)self, count))
808 return -1;
809 memset(self->ob_bytes, 0, count);
810 }
811 return 0;
812 }
Guido van Rossum75d38e92007-08-24 17:33:11 +0000813
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000814 /* Use the modern buffer interface */
815 if (PyObject_CheckBuffer(arg)) {
Guido van Rossumd624f182006-04-24 13:47:05 +0000816 Py_ssize_t size;
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000817 PyBuffer view;
818 if (PyObject_GetBuffer(arg, &view, PyBUF_FULL_RO) < 0)
Guido van Rossumd624f182006-04-24 13:47:05 +0000819 return -1;
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000820 size = view.len;
821 if (PyBytes_Resize((PyObject *)self, size) < 0) goto fail;
822 if (PyBuffer_ToContiguous(self->ob_bytes, &view, size, 'C') < 0)
823 goto fail;
824 PyObject_ReleaseBuffer(arg, &view);
Guido van Rossumd624f182006-04-24 13:47:05 +0000825 return 0;
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000826 fail:
827 PyObject_ReleaseBuffer(arg, &view);
828 return -1;
Guido van Rossumd624f182006-04-24 13:47:05 +0000829 }
830
831 /* XXX Optimize this if the arguments is a list, tuple */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000832
833 /* Get the iterator */
834 it = PyObject_GetIter(arg);
835 if (it == NULL)
Guido van Rossumd624f182006-04-24 13:47:05 +0000836 return -1;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000837 iternext = *Py_Type(it)->tp_iternext;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000838
839 /* Run the iterator to exhaustion */
840 for (;;) {
Guido van Rossumd624f182006-04-24 13:47:05 +0000841 PyObject *item;
842 Py_ssize_t value;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000843
Guido van Rossumd624f182006-04-24 13:47:05 +0000844 /* Get the next item */
845 item = iternext(it);
846 if (item == NULL) {
847 if (PyErr_Occurred()) {
848 if (!PyErr_ExceptionMatches(PyExc_StopIteration))
849 goto error;
850 PyErr_Clear();
851 }
852 break;
853 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000854
Guido van Rossumd624f182006-04-24 13:47:05 +0000855 /* Interpret it as an int (__index__) */
Thomas Woutersd204a712006-08-22 13:41:17 +0000856 value = PyNumber_AsSsize_t(item, PyExc_ValueError);
Guido van Rossumd624f182006-04-24 13:47:05 +0000857 Py_DECREF(item);
858 if (value == -1 && PyErr_Occurred())
859 goto error;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000860
Guido van Rossumd624f182006-04-24 13:47:05 +0000861 /* Range check */
862 if (value < 0 || value >= 256) {
863 PyErr_SetString(PyExc_ValueError,
864 "bytes must be in range(0, 256)");
865 goto error;
866 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000867
Guido van Rossumd624f182006-04-24 13:47:05 +0000868 /* Append the byte */
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000869 if (Py_Size(self) < self->ob_alloc)
870 Py_Size(self)++;
871 else if (PyBytes_Resize((PyObject *)self, Py_Size(self)+1) < 0)
Guido van Rossumd624f182006-04-24 13:47:05 +0000872 goto error;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000873 self->ob_bytes[Py_Size(self)-1] = value;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000874 }
875
876 /* Clean up and return success */
877 Py_DECREF(it);
878 return 0;
879
880 error:
881 /* Error handling when it != NULL */
882 Py_DECREF(it);
883 return -1;
884}
885
Georg Brandlee91be42007-02-24 19:41:35 +0000886/* Mostly copied from string_repr, but without the
887 "smart quote" functionality. */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000888static PyObject *
889bytes_repr(PyBytesObject *self)
890{
Walter Dörwald1ab83302007-05-18 17:15:44 +0000891 static const char *hexdigits = "0123456789abcdef";
Martin v. Löwis5d7428b2007-07-21 18:47:48 +0000892 size_t newsize = 3 + 4 * Py_Size(self);
Georg Brandlee91be42007-02-24 19:41:35 +0000893 PyObject *v;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000894 if (newsize > PY_SSIZE_T_MAX || newsize / 4 != Py_Size(self)) {
Georg Brandlee91be42007-02-24 19:41:35 +0000895 PyErr_SetString(PyExc_OverflowError,
896 "bytes object is too large to make repr");
Guido van Rossumd624f182006-04-24 13:47:05 +0000897 return NULL;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000898 }
Walter Dörwald1ab83302007-05-18 17:15:44 +0000899 v = PyUnicode_FromUnicode(NULL, newsize);
Georg Brandlee91be42007-02-24 19:41:35 +0000900 if (v == NULL) {
901 return NULL;
902 }
903 else {
904 register Py_ssize_t i;
Walter Dörwald1ab83302007-05-18 17:15:44 +0000905 register Py_UNICODE c;
906 register Py_UNICODE *p;
Georg Brandlee91be42007-02-24 19:41:35 +0000907 int quote = '\'';
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000908
Walter Dörwald1ab83302007-05-18 17:15:44 +0000909 p = PyUnicode_AS_UNICODE(v);
Georg Brandlee91be42007-02-24 19:41:35 +0000910 *p++ = 'b';
911 *p++ = quote;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000912 for (i = 0; i < Py_Size(self); i++) {
Georg Brandlee91be42007-02-24 19:41:35 +0000913 /* There's at least enough room for a hex escape
914 and a closing quote. */
Walter Dörwald1ab83302007-05-18 17:15:44 +0000915 assert(newsize - (p - PyUnicode_AS_UNICODE(v)) >= 5);
Georg Brandlee91be42007-02-24 19:41:35 +0000916 c = self->ob_bytes[i];
917 if (c == quote || c == '\\')
918 *p++ = '\\', *p++ = c;
919 else if (c == '\t')
920 *p++ = '\\', *p++ = 't';
921 else if (c == '\n')
922 *p++ = '\\', *p++ = 'n';
923 else if (c == '\r')
924 *p++ = '\\', *p++ = 'r';
925 else if (c == 0)
Guido van Rossum57b93ad2007-05-08 19:09:34 +0000926 *p++ = '\\', *p++ = 'x', *p++ = '0', *p++ = '0';
Georg Brandlee91be42007-02-24 19:41:35 +0000927 else if (c < ' ' || c >= 0x7f) {
Walter Dörwald1ab83302007-05-18 17:15:44 +0000928 *p++ = '\\';
929 *p++ = 'x';
930 *p++ = hexdigits[(c & 0xf0) >> 4];
931 *p++ = hexdigits[c & 0xf];
Georg Brandlee91be42007-02-24 19:41:35 +0000932 }
933 else
934 *p++ = c;
935 }
Walter Dörwald1ab83302007-05-18 17:15:44 +0000936 assert(newsize - (p - PyUnicode_AS_UNICODE(v)) >= 1);
Georg Brandlee91be42007-02-24 19:41:35 +0000937 *p++ = quote;
938 *p = '\0';
Walter Dörwald1ab83302007-05-18 17:15:44 +0000939 if (PyUnicode_Resize(&v, (p - PyUnicode_AS_UNICODE(v)))) {
940 Py_DECREF(v);
941 return NULL;
942 }
Georg Brandlee91be42007-02-24 19:41:35 +0000943 return v;
944 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000945}
946
947static PyObject *
Guido van Rossumd624f182006-04-24 13:47:05 +0000948bytes_str(PyBytesObject *self)
949{
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000950 return PyString_FromStringAndSize(self->ob_bytes, Py_Size(self));
Guido van Rossumd624f182006-04-24 13:47:05 +0000951}
952
953static PyObject *
Guido van Rossum343e97f2007-04-09 00:43:24 +0000954bytes_richcompare(PyObject *self, PyObject *other, int op)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000955{
Guido van Rossum343e97f2007-04-09 00:43:24 +0000956 Py_ssize_t self_size, other_size;
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000957 PyBuffer self_bytes, other_bytes;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000958 PyObject *res;
Guido van Rossum343e97f2007-04-09 00:43:24 +0000959 Py_ssize_t minsize;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000960 int cmp;
961
Jeremy Hylton18c3ff82007-08-29 18:47:16 +0000962 /* Bytes can be compared to anything that supports the (binary)
963 buffer API. Except that a comparison with Unicode is always an
964 error, even if the comparison is for equality. */
965 if (PyObject_IsInstance(self, (PyObject*)&PyUnicode_Type) ||
966 PyObject_IsInstance(other, (PyObject*)&PyUnicode_Type)) {
967 PyErr_SetString(PyExc_TypeError, "can't compare bytes and str");
968 return NULL;
969 }
Guido van Rossumebea9be2007-04-09 00:49:13 +0000970
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000971 self_size = _getbuffer(self, &self_bytes);
972 if (self_size < 0) {
Guido van Rossuma74184e2007-08-29 04:05:57 +0000973 PyErr_Clear();
Guido van Rossumebea9be2007-04-09 00:49:13 +0000974 Py_INCREF(Py_NotImplemented);
975 return Py_NotImplemented;
976 }
Guido van Rossum343e97f2007-04-09 00:43:24 +0000977
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000978 other_size = _getbuffer(other, &other_bytes);
979 if (other_size < 0) {
Guido van Rossuma74184e2007-08-29 04:05:57 +0000980 PyErr_Clear();
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000981 PyObject_ReleaseBuffer(self, &self_bytes);
Guido van Rossumd624f182006-04-24 13:47:05 +0000982 Py_INCREF(Py_NotImplemented);
983 return Py_NotImplemented;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000984 }
Guido van Rossum343e97f2007-04-09 00:43:24 +0000985
986 if (self_size != other_size && (op == Py_EQ || op == Py_NE)) {
Guido van Rossumd624f182006-04-24 13:47:05 +0000987 /* Shortcut: if the lengths differ, the objects differ */
988 cmp = (op == Py_NE);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000989 }
990 else {
Guido van Rossum343e97f2007-04-09 00:43:24 +0000991 minsize = self_size;
992 if (other_size < minsize)
993 minsize = other_size;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000994
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000995 cmp = memcmp(self_bytes.buf, other_bytes.buf, minsize);
Guido van Rossumd624f182006-04-24 13:47:05 +0000996 /* In ISO C, memcmp() guarantees to use unsigned bytes! */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000997
Guido van Rossumd624f182006-04-24 13:47:05 +0000998 if (cmp == 0) {
Guido van Rossum343e97f2007-04-09 00:43:24 +0000999 if (self_size < other_size)
Guido van Rossumd624f182006-04-24 13:47:05 +00001000 cmp = -1;
Guido van Rossum343e97f2007-04-09 00:43:24 +00001001 else if (self_size > other_size)
Guido van Rossumd624f182006-04-24 13:47:05 +00001002 cmp = 1;
1003 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00001004
Guido van Rossumd624f182006-04-24 13:47:05 +00001005 switch (op) {
1006 case Py_LT: cmp = cmp < 0; break;
1007 case Py_LE: cmp = cmp <= 0; break;
1008 case Py_EQ: cmp = cmp == 0; break;
1009 case Py_NE: cmp = cmp != 0; break;
1010 case Py_GT: cmp = cmp > 0; break;
1011 case Py_GE: cmp = cmp >= 0; break;
1012 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00001013 }
1014
1015 res = cmp ? Py_True : Py_False;
Travis E. Oliphantb99f7622007-08-18 11:21:56 +00001016 PyObject_ReleaseBuffer(self, &self_bytes);
Guido van Rossum75d38e92007-08-24 17:33:11 +00001017 PyObject_ReleaseBuffer(other, &other_bytes);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00001018 Py_INCREF(res);
1019 return res;
1020}
1021
1022static void
1023bytes_dealloc(PyBytesObject *self)
1024{
Guido van Rossumd624f182006-04-24 13:47:05 +00001025 if (self->ob_bytes != 0) {
1026 PyMem_Free(self->ob_bytes);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00001027 }
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001028 Py_Type(self)->tp_free((PyObject *)self);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00001029}
1030
Neal Norwitz6968b052007-02-27 19:02:19 +00001031
1032/* -------------------------------------------------------------------- */
1033/* Methods */
1034
1035#define STRINGLIB_CHAR char
1036#define STRINGLIB_CMP memcmp
1037#define STRINGLIB_LEN PyBytes_GET_SIZE
1038#define STRINGLIB_NEW PyBytes_FromStringAndSize
1039#define STRINGLIB_EMPTY nullbytes
1040
1041#include "stringlib/fastsearch.h"
1042#include "stringlib/count.h"
1043#include "stringlib/find.h"
1044#include "stringlib/partition.h"
1045
1046
1047/* The following Py_LOCAL_INLINE and Py_LOCAL functions
1048were copied from the old char* style string object. */
1049
1050Py_LOCAL_INLINE(void)
1051_adjust_indices(Py_ssize_t *start, Py_ssize_t *end, Py_ssize_t len)
1052{
1053 if (*end > len)
1054 *end = len;
1055 else if (*end < 0)
1056 *end += len;
1057 if (*end < 0)
1058 *end = 0;
1059 if (*start < 0)
1060 *start += len;
1061 if (*start < 0)
1062 *start = 0;
1063}
1064
1065
1066Py_LOCAL_INLINE(Py_ssize_t)
1067bytes_find_internal(PyBytesObject *self, PyObject *args, int dir)
1068{
1069 PyObject *subobj;
1070 const char *sub;
1071 Py_ssize_t sub_len;
1072 Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
1073
1074 if (!PyArg_ParseTuple(args, "O|O&O&:find/rfind/index/rindex", &subobj,
1075 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
1076 return -2;
1077 if (PyBytes_Check(subobj)) {
1078 sub = PyBytes_AS_STRING(subobj);
1079 sub_len = PyBytes_GET_SIZE(subobj);
1080 }
Travis E. Oliphantb99f7622007-08-18 11:21:56 +00001081 /* XXX --> use the modern buffer interface */
Guido van Rossuma74184e2007-08-29 04:05:57 +00001082 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len)) {
Neal Norwitz6968b052007-02-27 19:02:19 +00001083 /* XXX - the "expected a character buffer object" is pretty
1084 confusing for a non-expert. remap to something else ? */
1085 return -2;
Guido van Rossuma74184e2007-08-29 04:05:57 +00001086 }
Neal Norwitz6968b052007-02-27 19:02:19 +00001087
1088 if (dir > 0)
1089 return stringlib_find_slice(
1090 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1091 sub, sub_len, start, end);
1092 else
1093 return stringlib_rfind_slice(
1094 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1095 sub, sub_len, start, end);
1096}
1097
1098
1099PyDoc_STRVAR(find__doc__,
1100"B.find(sub [,start [,end]]) -> int\n\
1101\n\
1102Return the lowest index in B where subsection sub is found,\n\
1103such that sub is contained within s[start,end]. Optional\n\
1104arguments start and end are interpreted as in slice notation.\n\
1105\n\
1106Return -1 on failure.");
1107
1108static PyObject *
1109bytes_find(PyBytesObject *self, PyObject *args)
1110{
1111 Py_ssize_t result = bytes_find_internal(self, args, +1);
1112 if (result == -2)
1113 return NULL;
1114 return PyInt_FromSsize_t(result);
1115}
1116
1117PyDoc_STRVAR(count__doc__,
1118"B.count(sub[, start[, end]]) -> int\n\
1119\n\
1120Return the number of non-overlapping occurrences of subsection sub in\n\
1121bytes B[start:end]. Optional arguments start and end are interpreted\n\
1122as in slice notation.");
1123
1124static PyObject *
1125bytes_count(PyBytesObject *self, PyObject *args)
1126{
1127 PyObject *sub_obj;
1128 const char *str = PyBytes_AS_STRING(self), *sub;
1129 Py_ssize_t sub_len;
1130 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
1131
1132 if (!PyArg_ParseTuple(args, "O|O&O&:count", &sub_obj,
1133 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
1134 return NULL;
1135
1136 if (PyBytes_Check(sub_obj)) {
1137 sub = PyBytes_AS_STRING(sub_obj);
1138 sub_len = PyBytes_GET_SIZE(sub_obj);
1139 }
Travis E. Oliphantb99f7622007-08-18 11:21:56 +00001140 /* XXX --> use the modern buffer interface */
Neal Norwitz6968b052007-02-27 19:02:19 +00001141 else if (PyObject_AsCharBuffer(sub_obj, &sub, &sub_len))
1142 return NULL;
1143
Martin v. Löwis5b222132007-06-10 09:51:05 +00001144 _adjust_indices(&start, &end, PyBytes_GET_SIZE(self));
Neal Norwitz6968b052007-02-27 19:02:19 +00001145
1146 return PyInt_FromSsize_t(
1147 stringlib_count(str + start, end - start, sub, sub_len)
1148 );
1149}
1150
1151
1152PyDoc_STRVAR(index__doc__,
1153"B.index(sub [,start [,end]]) -> int\n\
1154\n\
1155Like B.find() but raise ValueError when the subsection is not found.");
1156
1157static PyObject *
1158bytes_index(PyBytesObject *self, PyObject *args)
1159{
1160 Py_ssize_t result = bytes_find_internal(self, args, +1);
1161 if (result == -2)
1162 return NULL;
1163 if (result == -1) {
1164 PyErr_SetString(PyExc_ValueError,
1165 "subsection not found");
1166 return NULL;
1167 }
1168 return PyInt_FromSsize_t(result);
1169}
1170
1171
1172PyDoc_STRVAR(rfind__doc__,
1173"B.rfind(sub [,start [,end]]) -> int\n\
1174\n\
1175Return the highest index in B where subsection sub is found,\n\
1176such that sub is contained within s[start,end]. Optional\n\
1177arguments start and end are interpreted as in slice notation.\n\
1178\n\
1179Return -1 on failure.");
1180
1181static PyObject *
1182bytes_rfind(PyBytesObject *self, PyObject *args)
1183{
1184 Py_ssize_t result = bytes_find_internal(self, args, -1);
1185 if (result == -2)
1186 return NULL;
1187 return PyInt_FromSsize_t(result);
1188}
1189
1190
1191PyDoc_STRVAR(rindex__doc__,
1192"B.rindex(sub [,start [,end]]) -> int\n\
1193\n\
1194Like B.rfind() but raise ValueError when the subsection is not found.");
1195
1196static PyObject *
1197bytes_rindex(PyBytesObject *self, PyObject *args)
1198{
1199 Py_ssize_t result = bytes_find_internal(self, args, -1);
1200 if (result == -2)
1201 return NULL;
1202 if (result == -1) {
1203 PyErr_SetString(PyExc_ValueError,
1204 "subsection not found");
1205 return NULL;
1206 }
1207 return PyInt_FromSsize_t(result);
1208}
1209
1210
1211/* Matches the end (direction >= 0) or start (direction < 0) of self
1212 * against substr, using the start and end arguments. Returns
1213 * -1 on error, 0 if not found and 1 if found.
1214 */
1215Py_LOCAL(int)
1216_bytes_tailmatch(PyBytesObject *self, PyObject *substr, Py_ssize_t start,
1217 Py_ssize_t end, int direction)
1218{
1219 Py_ssize_t len = PyBytes_GET_SIZE(self);
1220 Py_ssize_t slen;
1221 const char* sub;
1222 const char* str;
1223
1224 if (PyBytes_Check(substr)) {
1225 sub = PyBytes_AS_STRING(substr);
1226 slen = PyBytes_GET_SIZE(substr);
1227 }
Travis E. Oliphantb99f7622007-08-18 11:21:56 +00001228 /* XXX --> Use the modern buffer interface */
Neal Norwitz6968b052007-02-27 19:02:19 +00001229 else if (PyObject_AsCharBuffer(substr, &sub, &slen))
1230 return -1;
1231 str = PyBytes_AS_STRING(self);
1232
1233 _adjust_indices(&start, &end, len);
1234
1235 if (direction < 0) {
1236 /* startswith */
1237 if (start+slen > len)
1238 return 0;
1239 } else {
1240 /* endswith */
1241 if (end-start < slen || start > len)
1242 return 0;
1243
1244 if (end-slen > start)
1245 start = end - slen;
1246 }
1247 if (end-start >= slen)
1248 return ! memcmp(str+start, sub, slen);
1249 return 0;
1250}
1251
1252
1253PyDoc_STRVAR(startswith__doc__,
1254"B.startswith(prefix[, start[, end]]) -> bool\n\
1255\n\
1256Return True if B starts with the specified prefix, False otherwise.\n\
1257With optional start, test B beginning at that position.\n\
1258With optional end, stop comparing B at that position.\n\
1259prefix can also be a tuple of strings to try.");
1260
1261static PyObject *
1262bytes_startswith(PyBytesObject *self, PyObject *args)
1263{
1264 Py_ssize_t start = 0;
1265 Py_ssize_t end = PY_SSIZE_T_MAX;
1266 PyObject *subobj;
1267 int result;
1268
1269 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
1270 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
1271 return NULL;
1272 if (PyTuple_Check(subobj)) {
1273 Py_ssize_t i;
1274 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
1275 result = _bytes_tailmatch(self,
1276 PyTuple_GET_ITEM(subobj, i),
1277 start, end, -1);
1278 if (result == -1)
1279 return NULL;
1280 else if (result) {
1281 Py_RETURN_TRUE;
1282 }
1283 }
1284 Py_RETURN_FALSE;
1285 }
1286 result = _bytes_tailmatch(self, subobj, start, end, -1);
1287 if (result == -1)
1288 return NULL;
1289 else
1290 return PyBool_FromLong(result);
1291}
1292
1293PyDoc_STRVAR(endswith__doc__,
1294"B.endswith(suffix[, start[, end]]) -> bool\n\
1295\n\
1296Return True if B ends with the specified suffix, False otherwise.\n\
1297With optional start, test B beginning at that position.\n\
1298With optional end, stop comparing B at that position.\n\
1299suffix can also be a tuple of strings to try.");
1300
1301static PyObject *
1302bytes_endswith(PyBytesObject *self, PyObject *args)
1303{
1304 Py_ssize_t start = 0;
1305 Py_ssize_t end = PY_SSIZE_T_MAX;
1306 PyObject *subobj;
1307 int result;
1308
1309 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
1310 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
1311 return NULL;
1312 if (PyTuple_Check(subobj)) {
1313 Py_ssize_t i;
1314 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
1315 result = _bytes_tailmatch(self,
1316 PyTuple_GET_ITEM(subobj, i),
1317 start, end, +1);
1318 if (result == -1)
1319 return NULL;
1320 else if (result) {
1321 Py_RETURN_TRUE;
1322 }
1323 }
1324 Py_RETURN_FALSE;
1325 }
1326 result = _bytes_tailmatch(self, subobj, start, end, +1);
1327 if (result == -1)
1328 return NULL;
1329 else
1330 return PyBool_FromLong(result);
1331}
1332
1333
1334
1335PyDoc_STRVAR(translate__doc__,
1336"B.translate(table [,deletechars]) -> bytes\n\
1337\n\
1338Return a copy of the bytes B, where all characters occurring\n\
1339in the optional argument deletechars are removed, and the\n\
1340remaining characters have been mapped through the given\n\
1341translation table, which must be a bytes of length 256.");
1342
1343static PyObject *
1344bytes_translate(PyBytesObject *self, PyObject *args)
1345{
1346 register char *input, *output;
1347 register const char *table;
1348 register Py_ssize_t i, c, changed = 0;
1349 PyObject *input_obj = (PyObject*)self;
1350 const char *table1, *output_start, *del_table=NULL;
1351 Py_ssize_t inlen, tablen, dellen = 0;
1352 PyObject *result;
1353 int trans_table[256];
1354 PyObject *tableobj, *delobj = NULL;
1355
1356 if (!PyArg_UnpackTuple(args, "translate", 1, 2,
1357 &tableobj, &delobj))
1358 return NULL;
1359
1360 if (PyBytes_Check(tableobj)) {
1361 table1 = PyBytes_AS_STRING(tableobj);
1362 tablen = PyBytes_GET_SIZE(tableobj);
1363 }
Travis E. Oliphantb99f7622007-08-18 11:21:56 +00001364 /* XXX -> Use the modern buffer interface */
Neal Norwitz6968b052007-02-27 19:02:19 +00001365 else if (PyObject_AsCharBuffer(tableobj, &table1, &tablen))
1366 return NULL;
1367
1368 if (tablen != 256) {
1369 PyErr_SetString(PyExc_ValueError,
1370 "translation table must be 256 characters long");
1371 return NULL;
1372 }
1373
1374 if (delobj != NULL) {
1375 if (PyBytes_Check(delobj)) {
1376 del_table = PyBytes_AS_STRING(delobj);
1377 dellen = PyBytes_GET_SIZE(delobj);
1378 }
Travis E. Oliphantb99f7622007-08-18 11:21:56 +00001379 /* XXX -> use the modern buffer interface */
Neal Norwitz6968b052007-02-27 19:02:19 +00001380 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
1381 return NULL;
1382 }
1383 else {
1384 del_table = NULL;
1385 dellen = 0;
1386 }
1387
1388 table = table1;
1389 inlen = PyBytes_GET_SIZE(input_obj);
1390 result = PyBytes_FromStringAndSize((char *)NULL, inlen);
1391 if (result == NULL)
1392 return NULL;
1393 output_start = output = PyBytes_AsString(result);
1394 input = PyBytes_AS_STRING(input_obj);
1395
1396 if (dellen == 0) {
1397 /* If no deletions are required, use faster code */
1398 for (i = inlen; --i >= 0; ) {
1399 c = Py_CHARMASK(*input++);
1400 if (Py_CHARMASK((*output++ = table[c])) != c)
1401 changed = 1;
1402 }
1403 if (changed || !PyBytes_CheckExact(input_obj))
1404 return result;
1405 Py_DECREF(result);
1406 Py_INCREF(input_obj);
1407 return input_obj;
1408 }
1409
1410 for (i = 0; i < 256; i++)
1411 trans_table[i] = Py_CHARMASK(table[i]);
1412
1413 for (i = 0; i < dellen; i++)
1414 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
1415
1416 for (i = inlen; --i >= 0; ) {
1417 c = Py_CHARMASK(*input++);
1418 if (trans_table[c] != -1)
1419 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
1420 continue;
1421 changed = 1;
1422 }
1423 if (!changed && PyBytes_CheckExact(input_obj)) {
1424 Py_DECREF(result);
1425 Py_INCREF(input_obj);
1426 return input_obj;
1427 }
1428 /* Fix the size of the resulting string */
1429 if (inlen > 0)
1430 PyBytes_Resize(result, output - output_start);
1431 return result;
1432}
1433
1434
1435#define FORWARD 1
1436#define REVERSE -1
1437
1438/* find and count characters and substrings */
1439
1440#define findchar(target, target_len, c) \
1441 ((char *)memchr((const void *)(target), c, target_len))
1442
1443/* Don't call if length < 2 */
1444#define Py_STRING_MATCH(target, offset, pattern, length) \
1445 (target[offset] == pattern[0] && \
1446 target[offset+length-1] == pattern[length-1] && \
1447 !memcmp(target+offset+1, pattern+1, length-2) )
1448
1449
1450/* Bytes ops must return a string. */
1451/* If the object is subclass of bytes, create a copy */
1452Py_LOCAL(PyBytesObject *)
1453return_self(PyBytesObject *self)
1454{
1455 if (PyBytes_CheckExact(self)) {
1456 Py_INCREF(self);
1457 return (PyBytesObject *)self;
1458 }
1459 return (PyBytesObject *)PyBytes_FromStringAndSize(
1460 PyBytes_AS_STRING(self),
1461 PyBytes_GET_SIZE(self));
1462}
1463
1464Py_LOCAL_INLINE(Py_ssize_t)
1465countchar(const char *target, int target_len, char c, Py_ssize_t maxcount)
1466{
1467 Py_ssize_t count=0;
1468 const char *start=target;
1469 const char *end=target+target_len;
1470
1471 while ( (start=findchar(start, end-start, c)) != NULL ) {
1472 count++;
1473 if (count >= maxcount)
1474 break;
1475 start += 1;
1476 }
1477 return count;
1478}
1479
1480Py_LOCAL(Py_ssize_t)
1481findstring(const char *target, Py_ssize_t target_len,
1482 const char *pattern, Py_ssize_t pattern_len,
1483 Py_ssize_t start,
1484 Py_ssize_t end,
1485 int direction)
1486{
1487 if (start < 0) {
1488 start += target_len;
1489 if (start < 0)
1490 start = 0;
1491 }
1492 if (end > target_len) {
1493 end = target_len;
1494 } else if (end < 0) {
1495 end += target_len;
1496 if (end < 0)
1497 end = 0;
1498 }
1499
1500 /* zero-length substrings always match at the first attempt */
1501 if (pattern_len == 0)
1502 return (direction > 0) ? start : end;
1503
1504 end -= pattern_len;
1505
1506 if (direction < 0) {
1507 for (; end >= start; end--)
1508 if (Py_STRING_MATCH(target, end, pattern, pattern_len))
1509 return end;
1510 } else {
1511 for (; start <= end; start++)
1512 if (Py_STRING_MATCH(target, start, pattern, pattern_len))
1513 return start;
1514 }
1515 return -1;
1516}
1517
1518Py_LOCAL_INLINE(Py_ssize_t)
1519countstring(const char *target, Py_ssize_t target_len,
1520 const char *pattern, Py_ssize_t pattern_len,
1521 Py_ssize_t start,
1522 Py_ssize_t end,
1523 int direction, Py_ssize_t maxcount)
1524{
1525 Py_ssize_t count=0;
1526
1527 if (start < 0) {
1528 start += target_len;
1529 if (start < 0)
1530 start = 0;
1531 }
1532 if (end > target_len) {
1533 end = target_len;
1534 } else if (end < 0) {
1535 end += target_len;
1536 if (end < 0)
1537 end = 0;
1538 }
1539
1540 /* zero-length substrings match everywhere */
1541 if (pattern_len == 0 || maxcount == 0) {
1542 if (target_len+1 < maxcount)
1543 return target_len+1;
1544 return maxcount;
1545 }
1546
1547 end -= pattern_len;
1548 if (direction < 0) {
1549 for (; (end >= start); end--)
1550 if (Py_STRING_MATCH(target, end, pattern, pattern_len)) {
1551 count++;
1552 if (--maxcount <= 0) break;
1553 end -= pattern_len-1;
1554 }
1555 } else {
1556 for (; (start <= end); start++)
1557 if (Py_STRING_MATCH(target, start, pattern, pattern_len)) {
1558 count++;
1559 if (--maxcount <= 0)
1560 break;
1561 start += pattern_len-1;
1562 }
1563 }
1564 return count;
1565}
1566
1567
1568/* Algorithms for different cases of string replacement */
1569
1570/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
1571Py_LOCAL(PyBytesObject *)
1572replace_interleave(PyBytesObject *self,
1573 const char *to_s, Py_ssize_t to_len,
1574 Py_ssize_t maxcount)
1575{
1576 char *self_s, *result_s;
1577 Py_ssize_t self_len, result_len;
1578 Py_ssize_t count, i, product;
1579 PyBytesObject *result;
1580
1581 self_len = PyBytes_GET_SIZE(self);
1582
1583 /* 1 at the end plus 1 after every character */
1584 count = self_len+1;
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +00001585 if (maxcount < count)
Neal Norwitz6968b052007-02-27 19:02:19 +00001586 count = maxcount;
1587
1588 /* Check for overflow */
1589 /* result_len = count * to_len + self_len; */
1590 product = count * to_len;
1591 if (product / to_len != count) {
1592 PyErr_SetString(PyExc_OverflowError,
1593 "replace string is too long");
1594 return NULL;
1595 }
1596 result_len = product + self_len;
1597 if (result_len < 0) {
1598 PyErr_SetString(PyExc_OverflowError,
1599 "replace string is too long");
1600 return NULL;
1601 }
1602
1603 if (! (result = (PyBytesObject *)
1604 PyBytes_FromStringAndSize(NULL, result_len)) )
1605 return NULL;
1606
1607 self_s = PyBytes_AS_STRING(self);
1608 result_s = PyBytes_AS_STRING(result);
1609
1610 /* TODO: special case single character, which doesn't need memcpy */
1611
1612 /* Lay the first one down (guaranteed this will occur) */
1613 Py_MEMCPY(result_s, to_s, to_len);
1614 result_s += to_len;
1615 count -= 1;
1616
1617 for (i=0; i<count; i++) {
1618 *result_s++ = *self_s++;
1619 Py_MEMCPY(result_s, to_s, to_len);
1620 result_s += to_len;
1621 }
1622
1623 /* Copy the rest of the original string */
1624 Py_MEMCPY(result_s, self_s, self_len-i);
1625
1626 return result;
1627}
1628
1629/* Special case for deleting a single character */
1630/* len(self)>=1, len(from)==1, to="", maxcount>=1 */
1631Py_LOCAL(PyBytesObject *)
1632replace_delete_single_character(PyBytesObject *self,
1633 char from_c, Py_ssize_t maxcount)
1634{
1635 char *self_s, *result_s;
1636 char *start, *next, *end;
1637 Py_ssize_t self_len, result_len;
1638 Py_ssize_t count;
1639 PyBytesObject *result;
1640
1641 self_len = PyBytes_GET_SIZE(self);
1642 self_s = PyBytes_AS_STRING(self);
1643
1644 count = countchar(self_s, self_len, from_c, maxcount);
1645 if (count == 0) {
1646 return return_self(self);
1647 }
1648
1649 result_len = self_len - count; /* from_len == 1 */
1650 assert(result_len>=0);
1651
1652 if ( (result = (PyBytesObject *)
1653 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
1654 return NULL;
1655 result_s = PyBytes_AS_STRING(result);
1656
1657 start = self_s;
1658 end = self_s + self_len;
1659 while (count-- > 0) {
1660 next = findchar(start, end-start, from_c);
1661 if (next == NULL)
1662 break;
1663 Py_MEMCPY(result_s, start, next-start);
1664 result_s += (next-start);
1665 start = next+1;
1666 }
1667 Py_MEMCPY(result_s, start, end-start);
1668
1669 return result;
1670}
1671
1672/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
1673
1674Py_LOCAL(PyBytesObject *)
1675replace_delete_substring(PyBytesObject *self,
1676 const char *from_s, Py_ssize_t from_len,
1677 Py_ssize_t maxcount)
1678{
1679 char *self_s, *result_s;
1680 char *start, *next, *end;
1681 Py_ssize_t self_len, result_len;
1682 Py_ssize_t count, offset;
1683 PyBytesObject *result;
1684
1685 self_len = PyBytes_GET_SIZE(self);
1686 self_s = PyBytes_AS_STRING(self);
1687
1688 count = countstring(self_s, self_len,
1689 from_s, from_len,
1690 0, self_len, 1,
1691 maxcount);
1692
1693 if (count == 0) {
1694 /* no matches */
1695 return return_self(self);
1696 }
1697
1698 result_len = self_len - (count * from_len);
1699 assert (result_len>=0);
1700
1701 if ( (result = (PyBytesObject *)
1702 PyBytes_FromStringAndSize(NULL, result_len)) == NULL )
1703 return NULL;
1704
1705 result_s = PyBytes_AS_STRING(result);
1706
1707 start = self_s;
1708 end = self_s + self_len;
1709 while (count-- > 0) {
1710 offset = findstring(start, end-start,
1711 from_s, from_len,
1712 0, end-start, FORWARD);
1713 if (offset == -1)
1714 break;
1715 next = start + offset;
1716
1717 Py_MEMCPY(result_s, start, next-start);
1718
1719 result_s += (next-start);
1720 start = next+from_len;
1721 }
1722 Py_MEMCPY(result_s, start, end-start);
1723 return result;
1724}
1725
1726/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
1727Py_LOCAL(PyBytesObject *)
1728replace_single_character_in_place(PyBytesObject *self,
1729 char from_c, char to_c,
1730 Py_ssize_t maxcount)
1731{
1732 char *self_s, *result_s, *start, *end, *next;
1733 Py_ssize_t self_len;
1734 PyBytesObject *result;
1735
1736 /* The result string will be the same size */
1737 self_s = PyBytes_AS_STRING(self);
1738 self_len = PyBytes_GET_SIZE(self);
1739
1740 next = findchar(self_s, self_len, from_c);
1741
1742 if (next == NULL) {
1743 /* No matches; return the original bytes */
1744 return return_self(self);
1745 }
1746
1747 /* Need to make a new bytes */
1748 result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, self_len);
1749 if (result == NULL)
1750 return NULL;
1751 result_s = PyBytes_AS_STRING(result);
1752 Py_MEMCPY(result_s, self_s, self_len);
1753
1754 /* change everything in-place, starting with this one */
1755 start = result_s + (next-self_s);
1756 *start = to_c;
1757 start++;
1758 end = result_s + self_len;
1759
1760 while (--maxcount > 0) {
1761 next = findchar(start, end-start, from_c);
1762 if (next == NULL)
1763 break;
1764 *next = to_c;
1765 start = next+1;
1766 }
1767
1768 return result;
1769}
1770
1771/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
1772Py_LOCAL(PyBytesObject *)
1773replace_substring_in_place(PyBytesObject *self,
1774 const char *from_s, Py_ssize_t from_len,
1775 const char *to_s, Py_ssize_t to_len,
1776 Py_ssize_t maxcount)
1777{
1778 char *result_s, *start, *end;
1779 char *self_s;
1780 Py_ssize_t self_len, offset;
1781 PyBytesObject *result;
1782
1783 /* The result bytes will be the same size */
1784
1785 self_s = PyBytes_AS_STRING(self);
1786 self_len = PyBytes_GET_SIZE(self);
1787
1788 offset = findstring(self_s, self_len,
1789 from_s, from_len,
1790 0, self_len, FORWARD);
1791 if (offset == -1) {
1792 /* No matches; return the original bytes */
1793 return return_self(self);
1794 }
1795
1796 /* Need to make a new bytes */
1797 result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, self_len);
1798 if (result == NULL)
1799 return NULL;
1800 result_s = PyBytes_AS_STRING(result);
1801 Py_MEMCPY(result_s, self_s, self_len);
1802
1803 /* change everything in-place, starting with this one */
1804 start = result_s + offset;
1805 Py_MEMCPY(start, to_s, from_len);
1806 start += from_len;
1807 end = result_s + self_len;
1808
1809 while ( --maxcount > 0) {
1810 offset = findstring(start, end-start,
1811 from_s, from_len,
1812 0, end-start, FORWARD);
1813 if (offset==-1)
1814 break;
1815 Py_MEMCPY(start+offset, to_s, from_len);
1816 start += offset+from_len;
1817 }
1818
1819 return result;
1820}
1821
1822/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
1823Py_LOCAL(PyBytesObject *)
1824replace_single_character(PyBytesObject *self,
1825 char from_c,
1826 const char *to_s, Py_ssize_t to_len,
1827 Py_ssize_t maxcount)
1828{
1829 char *self_s, *result_s;
1830 char *start, *next, *end;
1831 Py_ssize_t self_len, result_len;
1832 Py_ssize_t count, product;
1833 PyBytesObject *result;
1834
1835 self_s = PyBytes_AS_STRING(self);
1836 self_len = PyBytes_GET_SIZE(self);
1837
1838 count = countchar(self_s, self_len, from_c, maxcount);
1839 if (count == 0) {
1840 /* no matches, return unchanged */
1841 return return_self(self);
1842 }
1843
1844 /* use the difference between current and new, hence the "-1" */
1845 /* result_len = self_len + count * (to_len-1) */
1846 product = count * (to_len-1);
1847 if (product / (to_len-1) != count) {
1848 PyErr_SetString(PyExc_OverflowError, "replace bytes is too long");
1849 return NULL;
1850 }
1851 result_len = self_len + product;
1852 if (result_len < 0) {
1853 PyErr_SetString(PyExc_OverflowError, "replace bytes is too long");
1854 return NULL;
1855 }
1856
1857 if ( (result = (PyBytesObject *)
1858 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
1859 return NULL;
1860 result_s = PyBytes_AS_STRING(result);
1861
1862 start = self_s;
1863 end = self_s + self_len;
1864 while (count-- > 0) {
1865 next = findchar(start, end-start, from_c);
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +00001866 if (next == NULL)
Neal Norwitz6968b052007-02-27 19:02:19 +00001867 break;
1868
1869 if (next == start) {
1870 /* replace with the 'to' */
1871 Py_MEMCPY(result_s, to_s, to_len);
1872 result_s += to_len;
1873 start += 1;
1874 } else {
1875 /* copy the unchanged old then the 'to' */
1876 Py_MEMCPY(result_s, start, next-start);
1877 result_s += (next-start);
1878 Py_MEMCPY(result_s, to_s, to_len);
1879 result_s += to_len;
1880 start = next+1;
1881 }
1882 }
1883 /* Copy the remainder of the remaining bytes */
1884 Py_MEMCPY(result_s, start, end-start);
1885
1886 return result;
1887}
1888
1889/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
1890Py_LOCAL(PyBytesObject *)
1891replace_substring(PyBytesObject *self,
1892 const char *from_s, Py_ssize_t from_len,
1893 const char *to_s, Py_ssize_t to_len,
1894 Py_ssize_t maxcount)
1895{
1896 char *self_s, *result_s;
1897 char *start, *next, *end;
1898 Py_ssize_t self_len, result_len;
1899 Py_ssize_t count, offset, product;
1900 PyBytesObject *result;
1901
1902 self_s = PyBytes_AS_STRING(self);
1903 self_len = PyBytes_GET_SIZE(self);
1904
1905 count = countstring(self_s, self_len,
1906 from_s, from_len,
1907 0, self_len, FORWARD, maxcount);
1908 if (count == 0) {
1909 /* no matches, return unchanged */
1910 return return_self(self);
1911 }
1912
1913 /* Check for overflow */
1914 /* result_len = self_len + count * (to_len-from_len) */
1915 product = count * (to_len-from_len);
1916 if (product / (to_len-from_len) != count) {
1917 PyErr_SetString(PyExc_OverflowError, "replace bytes is too long");
1918 return NULL;
1919 }
1920 result_len = self_len + product;
1921 if (result_len < 0) {
1922 PyErr_SetString(PyExc_OverflowError, "replace bytes is too long");
1923 return NULL;
1924 }
1925
1926 if ( (result = (PyBytesObject *)
1927 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
1928 return NULL;
1929 result_s = PyBytes_AS_STRING(result);
1930
1931 start = self_s;
1932 end = self_s + self_len;
1933 while (count-- > 0) {
1934 offset = findstring(start, end-start,
1935 from_s, from_len,
1936 0, end-start, FORWARD);
1937 if (offset == -1)
1938 break;
1939 next = start+offset;
1940 if (next == start) {
1941 /* replace with the 'to' */
1942 Py_MEMCPY(result_s, to_s, to_len);
1943 result_s += to_len;
1944 start += from_len;
1945 } else {
1946 /* copy the unchanged old then the 'to' */
1947 Py_MEMCPY(result_s, start, next-start);
1948 result_s += (next-start);
1949 Py_MEMCPY(result_s, to_s, to_len);
1950 result_s += to_len;
1951 start = next+from_len;
1952 }
1953 }
1954 /* Copy the remainder of the remaining bytes */
1955 Py_MEMCPY(result_s, start, end-start);
1956
1957 return result;
1958}
1959
1960
1961Py_LOCAL(PyBytesObject *)
1962replace(PyBytesObject *self,
1963 const char *from_s, Py_ssize_t from_len,
1964 const char *to_s, Py_ssize_t to_len,
1965 Py_ssize_t maxcount)
1966{
1967 if (maxcount < 0) {
1968 maxcount = PY_SSIZE_T_MAX;
1969 } else if (maxcount == 0 || PyBytes_GET_SIZE(self) == 0) {
1970 /* nothing to do; return the original bytes */
1971 return return_self(self);
1972 }
1973
1974 if (maxcount == 0 ||
1975 (from_len == 0 && to_len == 0)) {
1976 /* nothing to do; return the original bytes */
1977 return return_self(self);
1978 }
1979
1980 /* Handle zero-length special cases */
1981
1982 if (from_len == 0) {
1983 /* insert the 'to' bytes everywhere. */
1984 /* >>> "Python".replace("", ".") */
1985 /* '.P.y.t.h.o.n.' */
1986 return replace_interleave(self, to_s, to_len, maxcount);
1987 }
1988
1989 /* Except for "".replace("", "A") == "A" there is no way beyond this */
1990 /* point for an empty self bytes to generate a non-empty bytes */
1991 /* Special case so the remaining code always gets a non-empty bytes */
1992 if (PyBytes_GET_SIZE(self) == 0) {
1993 return return_self(self);
1994 }
1995
1996 if (to_len == 0) {
1997 /* delete all occurances of 'from' bytes */
1998 if (from_len == 1) {
1999 return replace_delete_single_character(
2000 self, from_s[0], maxcount);
2001 } else {
2002 return replace_delete_substring(self, from_s, from_len, maxcount);
2003 }
2004 }
2005
2006 /* Handle special case where both bytes have the same length */
2007
2008 if (from_len == to_len) {
2009 if (from_len == 1) {
2010 return replace_single_character_in_place(
2011 self,
2012 from_s[0],
2013 to_s[0],
2014 maxcount);
2015 } else {
2016 return replace_substring_in_place(
2017 self, from_s, from_len, to_s, to_len, maxcount);
2018 }
2019 }
2020
2021 /* Otherwise use the more generic algorithms */
2022 if (from_len == 1) {
2023 return replace_single_character(self, from_s[0],
2024 to_s, to_len, maxcount);
2025 } else {
2026 /* len('from')>=2, len('to')>=1 */
2027 return replace_substring(self, from_s, from_len, to_s, to_len, maxcount);
2028 }
2029}
2030
2031PyDoc_STRVAR(replace__doc__,
2032"B.replace (old, new[, count]) -> bytes\n\
2033\n\
2034Return a copy of bytes B with all occurrences of subsection\n\
2035old replaced by new. If the optional argument count is\n\
2036given, only the first count occurrences are replaced.");
2037
2038static PyObject *
2039bytes_replace(PyBytesObject *self, PyObject *args)
2040{
2041 Py_ssize_t count = -1;
Travis E. Oliphantb99f7622007-08-18 11:21:56 +00002042 PyObject *from, *to, *res;
Travis E. Oliphantb99f7622007-08-18 11:21:56 +00002043 PyBuffer vfrom, vto;
Neal Norwitz6968b052007-02-27 19:02:19 +00002044
2045 if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
2046 return NULL;
2047
Guido van Rossuma74184e2007-08-29 04:05:57 +00002048 if (_getbuffer(from, &vfrom) < 0)
2049 return NULL;
2050 if (_getbuffer(to, &vto) < 0) {
2051 PyObject_ReleaseBuffer(from, &vfrom);
2052 return NULL;
Travis E. Oliphantb99f7622007-08-18 11:21:56 +00002053 }
Neal Norwitz6968b052007-02-27 19:02:19 +00002054
Travis E. Oliphantb99f7622007-08-18 11:21:56 +00002055 res = (PyObject *)replace((PyBytesObject *) self,
Guido van Rossuma74184e2007-08-29 04:05:57 +00002056 vfrom.buf, vfrom.len,
2057 vto.buf, vto.len, count);
Travis E. Oliphantb99f7622007-08-18 11:21:56 +00002058
Guido van Rossuma74184e2007-08-29 04:05:57 +00002059 PyObject_ReleaseBuffer(from, &vfrom);
2060 PyObject_ReleaseBuffer(to, &vto);
Travis E. Oliphantb99f7622007-08-18 11:21:56 +00002061 return res;
Neal Norwitz6968b052007-02-27 19:02:19 +00002062}
2063
2064
2065/* Overallocate the initial list to reduce the number of reallocs for small
2066 split sizes. Eg, "A A A A A A A A A A".split() (10 elements) has three
2067 resizes, to sizes 4, 8, then 16. Most observed string splits are for human
2068 text (roughly 11 words per line) and field delimited data (usually 1-10
2069 fields). For large strings the split algorithms are bandwidth limited
2070 so increasing the preallocation likely will not improve things.*/
2071
2072#define MAX_PREALLOC 12
2073
2074/* 5 splits gives 6 elements */
2075#define PREALLOC_SIZE(maxsplit) \
2076 (maxsplit >= MAX_PREALLOC ? MAX_PREALLOC : maxsplit+1)
2077
2078#define SPLIT_APPEND(data, left, right) \
2079 str = PyBytes_FromStringAndSize((data) + (left), \
2080 (right) - (left)); \
2081 if (str == NULL) \
2082 goto onError; \
2083 if (PyList_Append(list, str)) { \
2084 Py_DECREF(str); \
2085 goto onError; \
2086 } \
2087 else \
2088 Py_DECREF(str);
2089
2090#define SPLIT_ADD(data, left, right) { \
2091 str = PyBytes_FromStringAndSize((data) + (left), \
2092 (right) - (left)); \
2093 if (str == NULL) \
2094 goto onError; \
2095 if (count < MAX_PREALLOC) { \
2096 PyList_SET_ITEM(list, count, str); \
2097 } else { \
2098 if (PyList_Append(list, str)) { \
2099 Py_DECREF(str); \
2100 goto onError; \
2101 } \
2102 else \
2103 Py_DECREF(str); \
2104 } \
2105 count++; }
2106
2107/* Always force the list to the expected size. */
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002108#define FIX_PREALLOC_SIZE(list) Py_Size(list) = count
Neal Norwitz6968b052007-02-27 19:02:19 +00002109
2110
2111Py_LOCAL_INLINE(PyObject *)
2112split_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount)
2113{
2114 register Py_ssize_t i, j, count=0;
2115 PyObject *str;
2116 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
2117
2118 if (list == NULL)
2119 return NULL;
2120
2121 i = j = 0;
2122 while ((j < len) && (maxcount-- > 0)) {
2123 for(; j<len; j++) {
2124 /* I found that using memchr makes no difference */
2125 if (s[j] == ch) {
2126 SPLIT_ADD(s, i, j);
2127 i = j = j + 1;
2128 break;
2129 }
2130 }
2131 }
2132 if (i <= len) {
2133 SPLIT_ADD(s, i, len);
2134 }
2135 FIX_PREALLOC_SIZE(list);
2136 return list;
2137
2138 onError:
2139 Py_DECREF(list);
2140 return NULL;
2141}
2142
2143PyDoc_STRVAR(split__doc__,
2144"B.split(sep [,maxsplit]) -> list of bytes\n\
2145\n\
2146Return a list of the bytes in the string B, using sep as the\n\
2147delimiter. If maxsplit is given, at most maxsplit\n\
2148splits are done.");
2149
2150static PyObject *
2151bytes_split(PyBytesObject *self, PyObject *args)
2152{
2153 Py_ssize_t len = PyBytes_GET_SIZE(self), n, i, j;
2154 Py_ssize_t maxsplit = -1, count=0;
2155 const char *s = PyBytes_AS_STRING(self), *sub;
2156 PyObject *list, *str, *subobj;
2157#ifdef USE_FAST
2158 Py_ssize_t pos;
2159#endif
2160
2161 if (!PyArg_ParseTuple(args, "O|n:split", &subobj, &maxsplit))
2162 return NULL;
2163 if (maxsplit < 0)
2164 maxsplit = PY_SSIZE_T_MAX;
2165 if (PyBytes_Check(subobj)) {
2166 sub = PyBytes_AS_STRING(subobj);
2167 n = PyBytes_GET_SIZE(subobj);
2168 }
Travis E. Oliphantb99f7622007-08-18 11:21:56 +00002169 /* XXX -> use the modern buffer interface */
Neal Norwitz6968b052007-02-27 19:02:19 +00002170 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
2171 return NULL;
2172
2173 if (n == 0) {
2174 PyErr_SetString(PyExc_ValueError, "empty separator");
2175 return NULL;
2176 }
2177 else if (n == 1)
2178 return split_char(s, len, sub[0], maxsplit);
2179
2180 list = PyList_New(PREALLOC_SIZE(maxsplit));
2181 if (list == NULL)
2182 return NULL;
2183
2184#ifdef USE_FAST
2185 i = j = 0;
2186 while (maxsplit-- > 0) {
2187 pos = fastsearch(s+i, len-i, sub, n, FAST_SEARCH);
2188 if (pos < 0)
2189 break;
2190 j = i+pos;
2191 SPLIT_ADD(s, i, j);
2192 i = j + n;
2193 }
2194#else
2195 i = j = 0;
2196 while ((j+n <= len) && (maxsplit-- > 0)) {
2197 for (; j+n <= len; j++) {
2198 if (Py_STRING_MATCH(s, j, sub, n)) {
2199 SPLIT_ADD(s, i, j);
2200 i = j = j + n;
2201 break;
2202 }
2203 }
2204 }
2205#endif
2206 SPLIT_ADD(s, i, len);
2207 FIX_PREALLOC_SIZE(list);
2208 return list;
2209
2210 onError:
2211 Py_DECREF(list);
2212 return NULL;
2213}
2214
2215PyDoc_STRVAR(partition__doc__,
2216"B.partition(sep) -> (head, sep, tail)\n\
2217\n\
2218Searches for the separator sep in B, and returns the part before it,\n\
2219the separator itself, and the part after it. If the separator is not\n\
2220found, returns B and two empty bytes.");
2221
2222static PyObject *
2223bytes_partition(PyBytesObject *self, PyObject *sep_obj)
2224{
2225 PyObject *bytesep, *result;
2226
2227 bytesep = PyBytes_FromObject(sep_obj);
2228 if (! bytesep)
2229 return NULL;
2230
2231 result = stringlib_partition(
2232 (PyObject*) self,
2233 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +00002234 bytesep,
Neal Norwitz6968b052007-02-27 19:02:19 +00002235 PyBytes_AS_STRING(bytesep), PyBytes_GET_SIZE(bytesep)
2236 );
2237
2238 Py_DECREF(bytesep);
2239 return result;
2240}
2241
2242PyDoc_STRVAR(rpartition__doc__,
2243"B.rpartition(sep) -> (tail, sep, head)\n\
2244\n\
2245Searches for the separator sep in B, starting at the end of B, and returns\n\
2246the part before it, the separator itself, and the part after it. If the\n\
2247separator is not found, returns two empty bytes and B.");
2248
2249static PyObject *
2250bytes_rpartition(PyBytesObject *self, PyObject *sep_obj)
2251{
2252 PyObject *bytesep, *result;
2253
2254 bytesep = PyBytes_FromObject(sep_obj);
2255 if (! bytesep)
2256 return NULL;
2257
2258 result = stringlib_rpartition(
2259 (PyObject*) self,
2260 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +00002261 bytesep,
Neal Norwitz6968b052007-02-27 19:02:19 +00002262 PyBytes_AS_STRING(bytesep), PyBytes_GET_SIZE(bytesep)
2263 );
2264
2265 Py_DECREF(bytesep);
2266 return result;
2267}
2268
2269Py_LOCAL_INLINE(PyObject *)
2270rsplit_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount)
2271{
2272 register Py_ssize_t i, j, count=0;
2273 PyObject *str;
2274 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
2275
2276 if (list == NULL)
2277 return NULL;
2278
2279 i = j = len - 1;
2280 while ((i >= 0) && (maxcount-- > 0)) {
2281 for (; i >= 0; i--) {
2282 if (s[i] == ch) {
2283 SPLIT_ADD(s, i + 1, j + 1);
2284 j = i = i - 1;
2285 break;
2286 }
2287 }
2288 }
2289 if (j >= -1) {
2290 SPLIT_ADD(s, 0, j + 1);
2291 }
2292 FIX_PREALLOC_SIZE(list);
2293 if (PyList_Reverse(list) < 0)
2294 goto onError;
2295
2296 return list;
2297
2298 onError:
2299 Py_DECREF(list);
2300 return NULL;
2301}
2302
2303PyDoc_STRVAR(rsplit__doc__,
2304"B.rsplit(sep [,maxsplit]) -> list of bytes\n\
2305\n\
2306Return a list of the sections in the byte B, using sep as the\n\
2307delimiter, starting at the end of the bytes and working\n\
2308to the front. If maxsplit is given, at most maxsplit splits are\n\
2309done.");
2310
2311static PyObject *
2312bytes_rsplit(PyBytesObject *self, PyObject *args)
2313{
2314 Py_ssize_t len = PyBytes_GET_SIZE(self), n, i, j;
2315 Py_ssize_t maxsplit = -1, count=0;
2316 const char *s = PyBytes_AS_STRING(self), *sub;
2317 PyObject *list, *str, *subobj;
2318
2319 if (!PyArg_ParseTuple(args, "O|n:rsplit", &subobj, &maxsplit))
2320 return NULL;
2321 if (maxsplit < 0)
2322 maxsplit = PY_SSIZE_T_MAX;
2323 if (PyBytes_Check(subobj)) {
2324 sub = PyBytes_AS_STRING(subobj);
2325 n = PyBytes_GET_SIZE(subobj);
2326 }
Travis E. Oliphantb99f7622007-08-18 11:21:56 +00002327 /* XXX -> Use the modern buffer interface */
Neal Norwitz6968b052007-02-27 19:02:19 +00002328 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
2329 return NULL;
2330
2331 if (n == 0) {
2332 PyErr_SetString(PyExc_ValueError, "empty separator");
2333 return NULL;
2334 }
2335 else if (n == 1)
2336 return rsplit_char(s, len, sub[0], maxsplit);
2337
2338 list = PyList_New(PREALLOC_SIZE(maxsplit));
2339 if (list == NULL)
2340 return NULL;
2341
2342 j = len;
2343 i = j - n;
2344
2345 while ( (i >= 0) && (maxsplit-- > 0) ) {
2346 for (; i>=0; i--) {
2347 if (Py_STRING_MATCH(s, i, sub, n)) {
2348 SPLIT_ADD(s, i + n, j);
2349 j = i;
2350 i -= n;
2351 break;
2352 }
2353 }
2354 }
2355 SPLIT_ADD(s, 0, j);
2356 FIX_PREALLOC_SIZE(list);
2357 if (PyList_Reverse(list) < 0)
2358 goto onError;
2359 return list;
2360
2361onError:
2362 Py_DECREF(list);
2363 return NULL;
2364}
2365
2366PyDoc_STRVAR(extend__doc__,
2367"B.extend(iterable int) -> None\n\
2368\n\
2369Append all the elements from the iterator or sequence to the\n\
2370end of the bytes.");
2371static PyObject *
2372bytes_extend(PyBytesObject *self, PyObject *arg)
2373{
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002374 if (bytes_setslice(self, Py_Size(self), Py_Size(self), arg) == -1)
Neal Norwitz6968b052007-02-27 19:02:19 +00002375 return NULL;
2376 Py_RETURN_NONE;
2377}
2378
2379
2380PyDoc_STRVAR(reverse__doc__,
2381"B.reverse() -> None\n\
2382\n\
2383Reverse the order of the values in bytes in place.");
2384static PyObject *
2385bytes_reverse(PyBytesObject *self, PyObject *unused)
2386{
2387 char swap, *head, *tail;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002388 Py_ssize_t i, j, n = Py_Size(self);
Neal Norwitz6968b052007-02-27 19:02:19 +00002389
2390 j = n / 2;
2391 head = self->ob_bytes;
2392 tail = head + n - 1;
2393 for (i = 0; i < j; i++) {
2394 swap = *head;
2395 *head++ = *tail;
2396 *tail-- = swap;
2397 }
2398
2399 Py_RETURN_NONE;
2400}
2401
2402PyDoc_STRVAR(insert__doc__,
2403"B.insert(index, int) -> None\n\
2404\n\
2405Insert a single item into the bytes before the given index.");
2406static PyObject *
2407bytes_insert(PyBytesObject *self, PyObject *args)
2408{
2409 int value;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002410 Py_ssize_t where, n = Py_Size(self);
Neal Norwitz6968b052007-02-27 19:02:19 +00002411
2412 if (!PyArg_ParseTuple(args, "ni:insert", &where, &value))
2413 return NULL;
2414
2415 if (n == PY_SSIZE_T_MAX) {
2416 PyErr_SetString(PyExc_OverflowError,
2417 "cannot add more objects to bytes");
2418 return NULL;
2419 }
2420 if (value < 0 || value >= 256) {
2421 PyErr_SetString(PyExc_ValueError,
2422 "byte must be in range(0, 256)");
2423 return NULL;
2424 }
2425 if (PyBytes_Resize((PyObject *)self, n + 1) < 0)
2426 return NULL;
2427
2428 if (where < 0) {
2429 where += n;
2430 if (where < 0)
2431 where = 0;
2432 }
2433 if (where > n)
2434 where = n;
Guido van Rossum4fc8ae42007-02-27 20:57:45 +00002435 memmove(self->ob_bytes + where + 1, self->ob_bytes + where, n - where);
Neal Norwitz6968b052007-02-27 19:02:19 +00002436 self->ob_bytes[where] = value;
2437
2438 Py_RETURN_NONE;
2439}
2440
2441PyDoc_STRVAR(append__doc__,
2442"B.append(int) -> None\n\
2443\n\
2444Append a single item to the end of the bytes.");
2445static PyObject *
2446bytes_append(PyBytesObject *self, PyObject *arg)
2447{
2448 int value;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002449 Py_ssize_t n = Py_Size(self);
Neal Norwitz6968b052007-02-27 19:02:19 +00002450
2451 if (! _getbytevalue(arg, &value))
2452 return NULL;
2453 if (n == PY_SSIZE_T_MAX) {
2454 PyErr_SetString(PyExc_OverflowError,
2455 "cannot add more objects to bytes");
2456 return NULL;
2457 }
2458 if (PyBytes_Resize((PyObject *)self, n + 1) < 0)
2459 return NULL;
2460
2461 self->ob_bytes[n] = value;
2462
2463 Py_RETURN_NONE;
2464}
2465
2466PyDoc_STRVAR(pop__doc__,
2467"B.pop([index]) -> int\n\
2468\n\
2469Remove and return a single item from the bytes. If no index\n\
2470argument is give, will pop the last value.");
2471static PyObject *
2472bytes_pop(PyBytesObject *self, PyObject *args)
2473{
2474 int value;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002475 Py_ssize_t where = -1, n = Py_Size(self);
Neal Norwitz6968b052007-02-27 19:02:19 +00002476
2477 if (!PyArg_ParseTuple(args, "|n:pop", &where))
2478 return NULL;
2479
2480 if (n == 0) {
2481 PyErr_SetString(PyExc_OverflowError,
2482 "cannot pop an empty bytes");
2483 return NULL;
2484 }
2485 if (where < 0)
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002486 where += Py_Size(self);
2487 if (where < 0 || where >= Py_Size(self)) {
Neal Norwitz6968b052007-02-27 19:02:19 +00002488 PyErr_SetString(PyExc_IndexError, "pop index out of range");
2489 return NULL;
2490 }
2491
2492 value = self->ob_bytes[where];
2493 memmove(self->ob_bytes + where, self->ob_bytes + where + 1, n - where);
2494 if (PyBytes_Resize((PyObject *)self, n - 1) < 0)
2495 return NULL;
2496
2497 return PyInt_FromLong(value);
2498}
2499
2500PyDoc_STRVAR(remove__doc__,
2501"B.remove(int) -> None\n\
2502\n\
2503Remove the first occurance of a value in bytes");
2504static PyObject *
2505bytes_remove(PyBytesObject *self, PyObject *arg)
2506{
2507 int value;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002508 Py_ssize_t where, n = Py_Size(self);
Neal Norwitz6968b052007-02-27 19:02:19 +00002509
2510 if (! _getbytevalue(arg, &value))
2511 return NULL;
2512
2513 for (where = 0; where < n; where++) {
2514 if (self->ob_bytes[where] == value)
2515 break;
2516 }
2517 if (where == n) {
2518 PyErr_SetString(PyExc_ValueError, "value not found in bytes");
2519 return NULL;
2520 }
2521
2522 memmove(self->ob_bytes + where, self->ob_bytes + where + 1, n - where);
2523 if (PyBytes_Resize((PyObject *)self, n - 1) < 0)
2524 return NULL;
2525
2526 Py_RETURN_NONE;
2527}
2528
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002529/* XXX These two helpers could be optimized if argsize == 1 */
2530
Neal Norwitz2bad9702007-08-27 06:19:22 +00002531static Py_ssize_t
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002532lstrip_helper(unsigned char *myptr, Py_ssize_t mysize,
2533 void *argptr, Py_ssize_t argsize)
2534{
2535 Py_ssize_t i = 0;
2536 while (i < mysize && memchr(argptr, myptr[i], argsize))
2537 i++;
2538 return i;
2539}
2540
Neal Norwitz2bad9702007-08-27 06:19:22 +00002541static Py_ssize_t
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002542rstrip_helper(unsigned char *myptr, Py_ssize_t mysize,
2543 void *argptr, Py_ssize_t argsize)
2544{
2545 Py_ssize_t i = mysize - 1;
2546 while (i >= 0 && memchr(argptr, myptr[i], argsize))
2547 i--;
2548 return i + 1;
2549}
2550
2551PyDoc_STRVAR(strip__doc__,
2552"B.strip(bytes) -> bytes\n\
2553\n\
2554Strip leading and trailing bytes contained in the argument.");
2555static PyObject *
2556bytes_strip(PyBytesObject *self, PyObject *arg)
2557{
2558 Py_ssize_t left, right, mysize, argsize;
2559 void *myptr, *argptr;
2560 if (arg == NULL || !PyBytes_Check(arg)) {
2561 PyErr_SetString(PyExc_TypeError, "strip() requires a bytes argument");
2562 return NULL;
2563 }
2564 myptr = self->ob_bytes;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002565 mysize = Py_Size(self);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002566 argptr = ((PyBytesObject *)arg)->ob_bytes;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002567 argsize = Py_Size(arg);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002568 left = lstrip_helper(myptr, mysize, argptr, argsize);
Guido van Rossumeb29e9a2007-08-08 21:55:33 +00002569 if (left == mysize)
2570 right = left;
2571 else
2572 right = rstrip_helper(myptr, mysize, argptr, argsize);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002573 return PyBytes_FromStringAndSize(self->ob_bytes + left, right - left);
2574}
2575
2576PyDoc_STRVAR(lstrip__doc__,
2577"B.lstrip(bytes) -> bytes\n\
2578\n\
2579Strip leading bytes contained in the argument.");
2580static PyObject *
2581bytes_lstrip(PyBytesObject *self, PyObject *arg)
2582{
2583 Py_ssize_t left, right, mysize, argsize;
2584 void *myptr, *argptr;
2585 if (arg == NULL || !PyBytes_Check(arg)) {
2586 PyErr_SetString(PyExc_TypeError, "strip() requires a bytes argument");
2587 return NULL;
2588 }
2589 myptr = self->ob_bytes;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002590 mysize = Py_Size(self);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002591 argptr = ((PyBytesObject *)arg)->ob_bytes;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002592 argsize = Py_Size(arg);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002593 left = lstrip_helper(myptr, mysize, argptr, argsize);
2594 right = mysize;
2595 return PyBytes_FromStringAndSize(self->ob_bytes + left, right - left);
2596}
2597
2598PyDoc_STRVAR(rstrip__doc__,
2599"B.rstrip(bytes) -> bytes\n\
2600\n\
2601Strip trailing bytes contained in the argument.");
2602static PyObject *
2603bytes_rstrip(PyBytesObject *self, PyObject *arg)
2604{
2605 Py_ssize_t left, right, mysize, argsize;
2606 void *myptr, *argptr;
2607 if (arg == NULL || !PyBytes_Check(arg)) {
2608 PyErr_SetString(PyExc_TypeError, "strip() requires a bytes argument");
2609 return NULL;
2610 }
2611 myptr = self->ob_bytes;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002612 mysize = Py_Size(self);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002613 argptr = ((PyBytesObject *)arg)->ob_bytes;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002614 argsize = Py_Size(arg);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002615 left = 0;
2616 right = rstrip_helper(myptr, mysize, argptr, argsize);
2617 return PyBytes_FromStringAndSize(self->ob_bytes + left, right - left);
2618}
Neal Norwitz6968b052007-02-27 19:02:19 +00002619
Guido van Rossumd624f182006-04-24 13:47:05 +00002620PyDoc_STRVAR(decode_doc,
2621"B.decode([encoding[,errors]]) -> unicode obect.\n\
2622\n\
2623Decodes B using the codec registered for encoding. encoding defaults\n\
2624to the default encoding. errors may be given to set a different error\n\
2625handling scheme. Default is 'strict' meaning that encoding errors raise\n\
2626a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
2627as well as any other name registerd with codecs.register_error that is\n\
2628able to handle UnicodeDecodeErrors.");
2629
2630static PyObject *
2631bytes_decode(PyObject *self, PyObject *args)
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +00002632{
Guido van Rossumd624f182006-04-24 13:47:05 +00002633 const char *encoding = NULL;
2634 const char *errors = NULL;
2635
2636 if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
2637 return NULL;
2638 if (encoding == NULL)
2639 encoding = PyUnicode_GetDefaultEncoding();
2640 return PyCodec_Decode(self, encoding, errors);
2641}
2642
Guido van Rossuma0867f72006-05-05 04:34:18 +00002643PyDoc_STRVAR(alloc_doc,
2644"B.__alloc__() -> int\n\
2645\n\
2646Returns the number of bytes actually allocated.");
2647
2648static PyObject *
2649bytes_alloc(PyBytesObject *self)
2650{
2651 return PyInt_FromSsize_t(self->ob_alloc);
2652}
2653
Guido van Rossum20188312006-05-05 15:15:40 +00002654PyDoc_STRVAR(join_doc,
Guido van Rossumcd6ae682007-05-09 19:52:16 +00002655"B.join(iterable_of_bytes) -> bytes\n\
Guido van Rossum20188312006-05-05 15:15:40 +00002656\n\
Guido van Rossumcd6ae682007-05-09 19:52:16 +00002657Concatenates any number of bytes objects, with B in between each pair.\n\
2658Example: b'.'.join([b'ab', b'pq', b'rs']) -> b'ab.pq.rs'.");
Guido van Rossum20188312006-05-05 15:15:40 +00002659
2660static PyObject *
Guido van Rossumcd6ae682007-05-09 19:52:16 +00002661bytes_join(PyBytesObject *self, PyObject *it)
Guido van Rossum20188312006-05-05 15:15:40 +00002662{
2663 PyObject *seq;
Martin v. Löwis5d7428b2007-07-21 18:47:48 +00002664 Py_ssize_t mysize = Py_Size(self);
Guido van Rossum20188312006-05-05 15:15:40 +00002665 Py_ssize_t i;
2666 Py_ssize_t n;
2667 PyObject **items;
2668 Py_ssize_t totalsize = 0;
2669 PyObject *result;
2670 char *dest;
2671
2672 seq = PySequence_Fast(it, "can only join an iterable");
2673 if (seq == NULL)
Georg Brandlb3f568f2007-02-27 08:49:18 +00002674 return NULL;
Guido van Rossum20188312006-05-05 15:15:40 +00002675 n = PySequence_Fast_GET_SIZE(seq);
2676 items = PySequence_Fast_ITEMS(seq);
2677
2678 /* Compute the total size, and check that they are all bytes */
2679 for (i = 0; i < n; i++) {
Georg Brandlb3f568f2007-02-27 08:49:18 +00002680 PyObject *obj = items[i];
2681 if (!PyBytes_Check(obj)) {
2682 PyErr_Format(PyExc_TypeError,
2683 "can only join an iterable of bytes "
2684 "(item %ld has type '%.100s')",
Guido van Rossum3cf5b1e2006-07-27 21:53:35 +00002685 /* XXX %ld isn't right on Win64 */
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002686 (long)i, Py_Type(obj)->tp_name);
Georg Brandlb3f568f2007-02-27 08:49:18 +00002687 goto error;
2688 }
Guido van Rossumcd6ae682007-05-09 19:52:16 +00002689 if (i > 0)
2690 totalsize += mysize;
Georg Brandlb3f568f2007-02-27 08:49:18 +00002691 totalsize += PyBytes_GET_SIZE(obj);
2692 if (totalsize < 0) {
2693 PyErr_NoMemory();
2694 goto error;
2695 }
Guido van Rossum20188312006-05-05 15:15:40 +00002696 }
2697
2698 /* Allocate the result, and copy the bytes */
2699 result = PyBytes_FromStringAndSize(NULL, totalsize);
2700 if (result == NULL)
Georg Brandlb3f568f2007-02-27 08:49:18 +00002701 goto error;
Guido van Rossum20188312006-05-05 15:15:40 +00002702 dest = PyBytes_AS_STRING(result);
2703 for (i = 0; i < n; i++) {
Georg Brandlb3f568f2007-02-27 08:49:18 +00002704 PyObject *obj = items[i];
2705 Py_ssize_t size = PyBytes_GET_SIZE(obj);
Guido van Rossumcd6ae682007-05-09 19:52:16 +00002706 if (i > 0) {
2707 memcpy(dest, self->ob_bytes, mysize);
2708 dest += mysize;
2709 }
Georg Brandlb3f568f2007-02-27 08:49:18 +00002710 memcpy(dest, PyBytes_AS_STRING(obj), size);
2711 dest += size;
Guido van Rossum20188312006-05-05 15:15:40 +00002712 }
2713
2714 /* Done */
2715 Py_DECREF(seq);
2716 return result;
2717
2718 /* Error handling */
2719 error:
2720 Py_DECREF(seq);
2721 return NULL;
2722}
2723
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002724PyDoc_STRVAR(fromhex_doc,
2725"bytes.fromhex(string) -> bytes\n\
2726\n\
2727Create a bytes object from a string of hexadecimal numbers.\n\
2728Spaces between two numbers are accepted. Example:\n\
2729bytes.fromhex('10 2030') -> bytes([0x10, 0x20, 0x30]).");
2730
2731static int
2732hex_digit_to_int(int c)
2733{
Georg Brandlb3f568f2007-02-27 08:49:18 +00002734 if (isdigit(c))
2735 return c - '0';
2736 else {
2737 if (isupper(c))
2738 c = tolower(c);
2739 if (c >= 'a' && c <= 'f')
2740 return c - 'a' + 10;
2741 }
2742 return -1;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002743}
2744
2745static PyObject *
2746bytes_fromhex(PyObject *cls, PyObject *args)
2747{
2748 PyObject *newbytes;
2749 char *hex, *buf;
2750 Py_ssize_t len, byteslen, i, j;
2751 int top, bot;
2752
2753 if (!PyArg_ParseTuple(args, "s#:fromhex", &hex, &len))
2754 return NULL;
2755
2756 byteslen = len / 2; /* max length if there are no spaces */
2757
2758 newbytes = PyBytes_FromStringAndSize(NULL, byteslen);
2759 if (!newbytes)
2760 return NULL;
2761 buf = PyBytes_AS_STRING(newbytes);
2762
Guido van Rossum4355a472007-05-04 05:00:04 +00002763 for (i = j = 0; i < len; i += 2) {
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002764 /* skip over spaces in the input */
2765 while (Py_CHARMASK(hex[i]) == ' ')
2766 i++;
2767 if (i >= len)
2768 break;
2769 top = hex_digit_to_int(Py_CHARMASK(hex[i]));
2770 bot = hex_digit_to_int(Py_CHARMASK(hex[i+1]));
2771 if (top == -1 || bot == -1) {
2772 PyErr_Format(PyExc_ValueError,
2773 "non-hexadecimal number string '%c%c' found in "
2774 "fromhex() arg at position %zd",
2775 hex[i], hex[i+1], i);
2776 goto error;
2777 }
2778 buf[j++] = (top << 4) + bot;
2779 }
2780 if (PyBytes_Resize(newbytes, j) < 0)
2781 goto error;
2782 return newbytes;
2783
2784 error:
2785 Py_DECREF(newbytes);
2786 return NULL;
2787}
2788
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002789PyDoc_STRVAR(reduce_doc, "Return state information for pickling.");
2790
2791static PyObject *
2792bytes_reduce(PyBytesObject *self)
2793{
Martin v. Löwis9c121062007-08-05 20:26:11 +00002794 PyObject *latin1;
2795 if (self->ob_bytes)
Guido van Rossuma74184e2007-08-29 04:05:57 +00002796 latin1 = PyUnicode_DecodeLatin1(self->ob_bytes,
2797 Py_Size(self), NULL);
Martin v. Löwis9c121062007-08-05 20:26:11 +00002798 else
Guido van Rossuma74184e2007-08-29 04:05:57 +00002799 latin1 = PyUnicode_FromString("");
Martin v. Löwis9c121062007-08-05 20:26:11 +00002800 return Py_BuildValue("(O(Ns))", Py_Type(self), latin1, "latin-1");
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002801}
2802
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002803static PySequenceMethods bytes_as_sequence = {
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002804 (lenfunc)bytes_length, /* sq_length */
2805 (binaryfunc)bytes_concat, /* sq_concat */
2806 (ssizeargfunc)bytes_repeat, /* sq_repeat */
2807 (ssizeargfunc)bytes_getitem, /* sq_item */
2808 0, /* sq_slice */
2809 (ssizeobjargproc)bytes_setitem, /* sq_ass_item */
2810 0, /* sq_ass_slice */
Guido van Rossumd624f182006-04-24 13:47:05 +00002811 (objobjproc)bytes_contains, /* sq_contains */
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002812 (binaryfunc)bytes_iconcat, /* sq_inplace_concat */
2813 (ssizeargfunc)bytes_irepeat, /* sq_inplace_repeat */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002814};
2815
2816static PyMappingMethods bytes_as_mapping = {
Guido van Rossumd624f182006-04-24 13:47:05 +00002817 (lenfunc)bytes_length,
Thomas Wouters376446d2006-12-19 08:30:14 +00002818 (binaryfunc)bytes_subscript,
2819 (objobjargproc)bytes_ass_subscript,
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002820};
2821
2822static PyBufferProcs bytes_as_buffer = {
Travis E. Oliphantb99f7622007-08-18 11:21:56 +00002823 (getbufferproc)bytes_getbuffer,
2824 (releasebufferproc)bytes_releasebuffer,
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002825};
2826
2827static PyMethodDef
2828bytes_methods[] = {
Neal Norwitz6968b052007-02-27 19:02:19 +00002829 {"find", (PyCFunction)bytes_find, METH_VARARGS, find__doc__},
2830 {"count", (PyCFunction)bytes_count, METH_VARARGS, count__doc__},
2831 {"index", (PyCFunction)bytes_index, METH_VARARGS, index__doc__},
2832 {"rfind", (PyCFunction)bytes_rfind, METH_VARARGS, rfind__doc__},
2833 {"rindex", (PyCFunction)bytes_rindex, METH_VARARGS, rindex__doc__},
2834 {"endswith", (PyCFunction)bytes_endswith, METH_VARARGS, endswith__doc__},
2835 {"startswith", (PyCFunction)bytes_startswith, METH_VARARGS,
2836 startswith__doc__},
2837 {"replace", (PyCFunction)bytes_replace, METH_VARARGS, replace__doc__},
2838 {"translate", (PyCFunction)bytes_translate, METH_VARARGS, translate__doc__},
2839 {"partition", (PyCFunction)bytes_partition, METH_O, partition__doc__},
2840 {"rpartition", (PyCFunction)bytes_rpartition, METH_O, rpartition__doc__},
2841 {"split", (PyCFunction)bytes_split, METH_VARARGS, split__doc__},
2842 {"rsplit", (PyCFunction)bytes_rsplit, METH_VARARGS, rsplit__doc__},
2843 {"extend", (PyCFunction)bytes_extend, METH_O, extend__doc__},
2844 {"insert", (PyCFunction)bytes_insert, METH_VARARGS, insert__doc__},
2845 {"append", (PyCFunction)bytes_append, METH_O, append__doc__},
2846 {"reverse", (PyCFunction)bytes_reverse, METH_NOARGS, reverse__doc__},
2847 {"pop", (PyCFunction)bytes_pop, METH_VARARGS, pop__doc__},
2848 {"remove", (PyCFunction)bytes_remove, METH_O, remove__doc__},
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002849 {"strip", (PyCFunction)bytes_strip, METH_O, strip__doc__},
2850 {"lstrip", (PyCFunction)bytes_lstrip, METH_O, lstrip__doc__},
2851 {"rstrip", (PyCFunction)bytes_rstrip, METH_O, rstrip__doc__},
Guido van Rossumd624f182006-04-24 13:47:05 +00002852 {"decode", (PyCFunction)bytes_decode, METH_VARARGS, decode_doc},
Guido van Rossuma0867f72006-05-05 04:34:18 +00002853 {"__alloc__", (PyCFunction)bytes_alloc, METH_NOARGS, alloc_doc},
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002854 {"fromhex", (PyCFunction)bytes_fromhex, METH_VARARGS|METH_CLASS,
2855 fromhex_doc},
Guido van Rossumcd6ae682007-05-09 19:52:16 +00002856 {"join", (PyCFunction)bytes_join, METH_O, join_doc},
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002857 {"__reduce__", (PyCFunction)bytes_reduce, METH_NOARGS, reduce_doc},
Guido van Rossuma0867f72006-05-05 04:34:18 +00002858 {NULL}
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002859};
2860
2861PyDoc_STRVAR(bytes_doc,
2862"bytes([iterable]) -> new array of bytes.\n\
2863\n\
2864If an argument is given it must be an iterable yielding ints in range(256).");
2865
2866PyTypeObject PyBytes_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002867 PyVarObject_HEAD_INIT(&PyType_Type, 0)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002868 "bytes",
2869 sizeof(PyBytesObject),
2870 0,
Guido van Rossumd624f182006-04-24 13:47:05 +00002871 (destructor)bytes_dealloc, /* tp_dealloc */
2872 0, /* tp_print */
2873 0, /* tp_getattr */
2874 0, /* tp_setattr */
2875 0, /* tp_compare */
2876 (reprfunc)bytes_repr, /* tp_repr */
2877 0, /* tp_as_number */
2878 &bytes_as_sequence, /* tp_as_sequence */
2879 &bytes_as_mapping, /* tp_as_mapping */
Georg Brandlb3f568f2007-02-27 08:49:18 +00002880 0, /* tp_hash */
Guido van Rossumd624f182006-04-24 13:47:05 +00002881 0, /* tp_call */
2882 (reprfunc)bytes_str, /* tp_str */
2883 PyObject_GenericGetAttr, /* tp_getattro */
2884 0, /* tp_setattro */
2885 &bytes_as_buffer, /* tp_as_buffer */
Georg Brandlb3f568f2007-02-27 08:49:18 +00002886 /* bytes is 'final' or 'sealed' */
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002887 Py_TPFLAGS_DEFAULT, /* tp_flags */
Guido van Rossumd624f182006-04-24 13:47:05 +00002888 bytes_doc, /* tp_doc */
2889 0, /* tp_traverse */
2890 0, /* tp_clear */
2891 (richcmpfunc)bytes_richcompare, /* tp_richcompare */
2892 0, /* tp_weaklistoffset */
2893 0, /* tp_iter */
2894 0, /* tp_iternext */
2895 bytes_methods, /* tp_methods */
2896 0, /* tp_members */
2897 0, /* tp_getset */
2898 0, /* tp_base */
2899 0, /* tp_dict */
2900 0, /* tp_descr_get */
2901 0, /* tp_descr_set */
2902 0, /* tp_dictoffset */
2903 (initproc)bytes_init, /* tp_init */
2904 PyType_GenericAlloc, /* tp_alloc */
2905 PyType_GenericNew, /* tp_new */
2906 PyObject_Del, /* tp_free */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002907};