blob: b267cac11185cf8c0cc3e49ac8b7004e23d4a57f [file] [log] [blame]
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00001/* Bytes object implementation */
2
3/* XXX TO DO: optimizations */
4
5#define PY_SSIZE_T_CLEAN
6#include "Python.h"
Guido van Rossuma0867f72006-05-05 04:34:18 +00007#include "structmember.h"
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00008
Neal Norwitz6968b052007-02-27 19:02:19 +00009/* The nullbytes are used by the stringlib during partition.
10 * If partition is removed from bytes, nullbytes and its helper
11 * Init/Fini should also be removed.
12 */
13static PyBytesObject *nullbytes = NULL;
14
15void
16PyBytes_Fini(void)
17{
18 Py_CLEAR(nullbytes);
19}
20
21int
22PyBytes_Init(void)
23{
24 nullbytes = PyObject_New(PyBytesObject, &PyBytes_Type);
25 if (nullbytes == NULL)
26 return 0;
27 nullbytes->ob_bytes = NULL;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +000028 Py_Size(nullbytes) = nullbytes->ob_alloc = 0;
Travis E. Oliphantb99f7622007-08-18 11:21:56 +000029 nullbytes->ob_exports = 0;
Neal Norwitz6968b052007-02-27 19:02:19 +000030 return 1;
31}
32
33/* end nullbytes support */
34
Guido van Rossumad7d8d12007-04-13 01:39:34 +000035/* Helpers */
36
37static int
38_getbytevalue(PyObject* arg, int *value)
Neal Norwitz6968b052007-02-27 19:02:19 +000039{
40 PyObject *intarg = PyNumber_Int(arg);
41 if (! intarg)
42 return 0;
43 *value = PyInt_AsLong(intarg);
44 Py_DECREF(intarg);
45 if (*value < 0 || *value >= 256) {
46 PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
47 return 0;
48 }
49 return 1;
50}
51
Travis E. Oliphantb99f7622007-08-18 11:21:56 +000052static int
53bytes_getbuffer(PyBytesObject *obj, PyBuffer *view, int flags)
Guido van Rossum75d38e92007-08-24 17:33:11 +000054{
Travis E. Oliphantb99f7622007-08-18 11:21:56 +000055 int ret;
56 void *ptr;
57 if (view == NULL) {
58 obj->ob_exports++;
59 return 0;
60 }
Guido van Rossum75d38e92007-08-24 17:33:11 +000061 if (obj->ob_bytes == NULL)
Travis E. Oliphantb99f7622007-08-18 11:21:56 +000062 ptr = "";
63 else
64 ptr = obj->ob_bytes;
65 ret = PyBuffer_FillInfo(view, ptr, Py_Size(obj), 0, flags);
66 if (ret >= 0) {
67 obj->ob_exports++;
68 }
69 return ret;
70}
71
72static void
73bytes_releasebuffer(PyBytesObject *obj, PyBuffer *view)
74{
75 obj->ob_exports--;
76}
77
Neal Norwitz2bad9702007-08-27 06:19:22 +000078static Py_ssize_t
Travis E. Oliphantb99f7622007-08-18 11:21:56 +000079_getbuffer(PyObject *obj, PyBuffer *view)
Guido van Rossumad7d8d12007-04-13 01:39:34 +000080{
Martin v. Löwis9f2e3462007-07-21 17:22:18 +000081 PyBufferProcs *buffer = Py_Type(obj)->tp_as_buffer;
Guido van Rossumad7d8d12007-04-13 01:39:34 +000082
83 if (buffer == NULL ||
84 PyUnicode_Check(obj) ||
Guido van Rossuma74184e2007-08-29 04:05:57 +000085 buffer->bf_getbuffer == NULL)
86 {
87 PyErr_Format(PyExc_TypeError,
88 "Type %.100s doesn't support the buffer API",
89 Py_Type(obj)->tp_name);
90 return -1;
91 }
Guido van Rossumad7d8d12007-04-13 01:39:34 +000092
Travis E. Oliphantb99f7622007-08-18 11:21:56 +000093 if (buffer->bf_getbuffer(obj, view, PyBUF_SIMPLE) < 0)
94 return -1;
95 return view->len;
Guido van Rossumad7d8d12007-04-13 01:39:34 +000096}
97
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000098/* Direct API functions */
99
100PyObject *
Guido van Rossumd624f182006-04-24 13:47:05 +0000101PyBytes_FromObject(PyObject *input)
102{
103 return PyObject_CallFunctionObjArgs((PyObject *)&PyBytes_Type,
104 input, NULL);
105}
106
107PyObject *
108PyBytes_FromStringAndSize(const char *bytes, Py_ssize_t size)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000109{
110 PyBytesObject *new;
Guido van Rossumf15a29f2007-05-04 00:41:39 +0000111 int alloc;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000112
Guido van Rossumd624f182006-04-24 13:47:05 +0000113 assert(size >= 0);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000114
115 new = PyObject_New(PyBytesObject, &PyBytes_Type);
116 if (new == NULL)
Guido van Rossumd624f182006-04-24 13:47:05 +0000117 return NULL;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000118
Guido van Rossumf15a29f2007-05-04 00:41:39 +0000119 if (size == 0) {
Guido van Rossumd624f182006-04-24 13:47:05 +0000120 new->ob_bytes = NULL;
Guido van Rossumf15a29f2007-05-04 00:41:39 +0000121 alloc = 0;
122 }
Guido van Rossumd624f182006-04-24 13:47:05 +0000123 else {
Guido van Rossumf15a29f2007-05-04 00:41:39 +0000124 alloc = size + 1;
125 new->ob_bytes = PyMem_Malloc(alloc);
Guido van Rossumd624f182006-04-24 13:47:05 +0000126 if (new->ob_bytes == NULL) {
127 Py_DECREF(new);
128 return NULL;
129 }
130 if (bytes != NULL)
131 memcpy(new->ob_bytes, bytes, size);
Guido van Rossumf15a29f2007-05-04 00:41:39 +0000132 new->ob_bytes[size] = '\0'; /* Trailing null byte */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000133 }
Martin v. Löwis5d7428b2007-07-21 18:47:48 +0000134 Py_Size(new) = size;
Guido van Rossumf15a29f2007-05-04 00:41:39 +0000135 new->ob_alloc = alloc;
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000136 new->ob_exports = 0;
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +0000137
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000138 return (PyObject *)new;
139}
140
141Py_ssize_t
142PyBytes_Size(PyObject *self)
143{
144 assert(self != NULL);
145 assert(PyBytes_Check(self));
146
Guido van Rossum20188312006-05-05 15:15:40 +0000147 return PyBytes_GET_SIZE(self);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000148}
149
150char *
151PyBytes_AsString(PyObject *self)
152{
153 assert(self != NULL);
154 assert(PyBytes_Check(self));
155
Guido van Rossum20188312006-05-05 15:15:40 +0000156 return PyBytes_AS_STRING(self);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000157}
158
159int
160PyBytes_Resize(PyObject *self, Py_ssize_t size)
161{
162 void *sval;
Guido van Rossuma0867f72006-05-05 04:34:18 +0000163 Py_ssize_t alloc = ((PyBytesObject *)self)->ob_alloc;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000164
165 assert(self != NULL);
166 assert(PyBytes_Check(self));
167 assert(size >= 0);
168
Guido van Rossuma0867f72006-05-05 04:34:18 +0000169 if (size < alloc / 2) {
170 /* Major downsize; resize down to exact size */
Guido van Rossum6c1e6742007-05-04 04:27:16 +0000171 alloc = size + 1;
Guido van Rossuma0867f72006-05-05 04:34:18 +0000172 }
Guido van Rossumf15a29f2007-05-04 00:41:39 +0000173 else if (size < alloc) {
Guido van Rossuma0867f72006-05-05 04:34:18 +0000174 /* Within allocated size; quick exit */
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000175 Py_Size(self) = size;
Guido van Rossuma74184e2007-08-29 04:05:57 +0000176 ((PyBytesObject *)self)->ob_bytes[size] = '\0'; /* Trailing null */
Guido van Rossuma0867f72006-05-05 04:34:18 +0000177 return 0;
178 }
179 else if (size <= alloc * 1.125) {
180 /* Moderate upsize; overallocate similar to list_resize() */
181 alloc = size + (size >> 3) + (size < 9 ? 3 : 6);
182 }
183 else {
184 /* Major upsize; resize up to exact size */
Guido van Rossumf15a29f2007-05-04 00:41:39 +0000185 alloc = size + 1;
Guido van Rossum6c1e6742007-05-04 04:27:16 +0000186 }
Guido van Rossuma0867f72006-05-05 04:34:18 +0000187
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000188 if (((PyBytesObject *)self)->ob_exports > 0) {
189 /*
Guido van Rossuma74184e2007-08-29 04:05:57 +0000190 fprintf(stderr, "%d: %s", ((PyBytesObject *)self)->ob_exports,
191 ((PyBytesObject *)self)->ob_bytes);
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000192 */
193 PyErr_SetString(PyExc_BufferError,
Guido van Rossuma74184e2007-08-29 04:05:57 +0000194 "Existing exports of data: object cannot be re-sized");
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000195 return -1;
196 }
197
Guido van Rossuma0867f72006-05-05 04:34:18 +0000198 sval = PyMem_Realloc(((PyBytesObject *)self)->ob_bytes, alloc);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000199 if (sval == NULL) {
Guido van Rossumd624f182006-04-24 13:47:05 +0000200 PyErr_NoMemory();
201 return -1;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000202 }
203
Guido van Rossumd624f182006-04-24 13:47:05 +0000204 ((PyBytesObject *)self)->ob_bytes = sval;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000205 Py_Size(self) = size;
Guido van Rossuma0867f72006-05-05 04:34:18 +0000206 ((PyBytesObject *)self)->ob_alloc = alloc;
Guido van Rossumf15a29f2007-05-04 00:41:39 +0000207 ((PyBytesObject *)self)->ob_bytes[size] = '\0'; /* Trailing null byte */
208
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000209 return 0;
210}
211
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000212PyObject *
213PyBytes_Concat(PyObject *a, PyObject *b)
214{
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000215 Py_ssize_t size;
216 PyBuffer va, vb;
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000217 PyBytesObject *result;
218
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000219 va.len = -1;
220 vb.len = -1;
221 if (_getbuffer(a, &va) < 0 ||
222 _getbuffer(b, &vb) < 0) {
Guido van Rossum75d38e92007-08-24 17:33:11 +0000223 if (va.len != -1)
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000224 PyObject_ReleaseBuffer(a, &va);
225 if (vb.len != -1)
226 PyObject_ReleaseBuffer(b, &vb);
227 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
228 Py_Type(a)->tp_name, Py_Type(b)->tp_name);
229 return NULL;
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000230 }
231
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000232 size = va.len + vb.len;
233 if (size < 0) {
234 PyObject_ReleaseBuffer(a, &va);
235 PyObject_ReleaseBuffer(b, &vb);
236 return PyErr_NoMemory();
237 }
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000238
239 result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, size);
240 if (result != NULL) {
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000241 memcpy(result->ob_bytes, va.buf, va.len);
242 memcpy(result->ob_bytes + va.len, vb.buf, vb.len);
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000243 }
Guido van Rossum75d38e92007-08-24 17:33:11 +0000244
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000245 PyObject_ReleaseBuffer(a, &va);
246 PyObject_ReleaseBuffer(b, &vb);
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000247 return (PyObject *)result;
248}
249
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000250/* Functions stuffed into the type object */
251
252static Py_ssize_t
253bytes_length(PyBytesObject *self)
254{
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000255 return Py_Size(self);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000256}
257
258static PyObject *
Guido van Rossumd624f182006-04-24 13:47:05 +0000259bytes_concat(PyBytesObject *self, PyObject *other)
260{
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000261 return PyBytes_Concat((PyObject *)self, other);
Guido van Rossumd624f182006-04-24 13:47:05 +0000262}
263
264static PyObject *
Guido van Rossum13e57212006-04-27 22:54:26 +0000265bytes_iconcat(PyBytesObject *self, PyObject *other)
266{
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000267 Py_ssize_t mysize;
Guido van Rossum13e57212006-04-27 22:54:26 +0000268 Py_ssize_t size;
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000269 PyBuffer vo;
Guido van Rossum13e57212006-04-27 22:54:26 +0000270
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000271 if (_getbuffer(other, &vo) < 0) {
Guido van Rossuma74184e2007-08-29 04:05:57 +0000272 PyErr_Format(PyExc_TypeError, "can't concat bytes to %.100s",
273 Py_Type(self)->tp_name);
274 return NULL;
Guido van Rossum13e57212006-04-27 22:54:26 +0000275 }
276
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000277 mysize = Py_Size(self);
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000278 size = mysize + vo.len;
279 if (size < 0) {
Guido van Rossuma74184e2007-08-29 04:05:57 +0000280 PyObject_ReleaseBuffer(other, &vo);
281 return PyErr_NoMemory();
Guido van Rossum6c1e6742007-05-04 04:27:16 +0000282 }
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000283 if (size < self->ob_alloc) {
Guido van Rossuma74184e2007-08-29 04:05:57 +0000284 Py_Size(self) = size;
285 self->ob_bytes[Py_Size(self)] = '\0'; /* Trailing null byte */
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000286 }
287 else if (PyBytes_Resize((PyObject *)self, size) < 0) {
Guido van Rossuma74184e2007-08-29 04:05:57 +0000288 PyObject_ReleaseBuffer(other, &vo);
289 return NULL;
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000290 }
291 memcpy(self->ob_bytes + mysize, vo.buf, vo.len);
292 PyObject_ReleaseBuffer(other, &vo);
Guido van Rossum13e57212006-04-27 22:54:26 +0000293 Py_INCREF(self);
294 return (PyObject *)self;
295}
296
297static PyObject *
Guido van Rossumd624f182006-04-24 13:47:05 +0000298bytes_repeat(PyBytesObject *self, Py_ssize_t count)
299{
300 PyBytesObject *result;
301 Py_ssize_t mysize;
302 Py_ssize_t size;
303
304 if (count < 0)
305 count = 0;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000306 mysize = Py_Size(self);
Guido van Rossumd624f182006-04-24 13:47:05 +0000307 size = mysize * count;
308 if (count != 0 && size / count != mysize)
309 return PyErr_NoMemory();
Guido van Rossumf15a29f2007-05-04 00:41:39 +0000310 result = (PyBytesObject *)PyBytes_FromStringAndSize(NULL, size);
Guido van Rossumd624f182006-04-24 13:47:05 +0000311 if (result != NULL && size != 0) {
312 if (mysize == 1)
313 memset(result->ob_bytes, self->ob_bytes[0], size);
314 else {
Guido van Rossum13e57212006-04-27 22:54:26 +0000315 Py_ssize_t i;
Guido van Rossumd624f182006-04-24 13:47:05 +0000316 for (i = 0; i < count; i++)
317 memcpy(result->ob_bytes + i*mysize, self->ob_bytes, mysize);
318 }
319 }
320 return (PyObject *)result;
321}
322
323static PyObject *
Guido van Rossum13e57212006-04-27 22:54:26 +0000324bytes_irepeat(PyBytesObject *self, Py_ssize_t count)
325{
326 Py_ssize_t mysize;
327 Py_ssize_t size;
328
329 if (count < 0)
330 count = 0;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000331 mysize = Py_Size(self);
Guido van Rossum13e57212006-04-27 22:54:26 +0000332 size = mysize * count;
333 if (count != 0 && size / count != mysize)
334 return PyErr_NoMemory();
Guido van Rossum6c1e6742007-05-04 04:27:16 +0000335 if (size < self->ob_alloc) {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000336 Py_Size(self) = size;
Guido van Rossuma74184e2007-08-29 04:05:57 +0000337 self->ob_bytes[Py_Size(self)] = '\0'; /* Trailing null byte */
Guido van Rossum6c1e6742007-05-04 04:27:16 +0000338 }
Guido van Rossuma0867f72006-05-05 04:34:18 +0000339 else if (PyBytes_Resize((PyObject *)self, size) < 0)
Guido van Rossum13e57212006-04-27 22:54:26 +0000340 return NULL;
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +0000341
Guido van Rossum13e57212006-04-27 22:54:26 +0000342 if (mysize == 1)
343 memset(self->ob_bytes, self->ob_bytes[0], size);
344 else {
345 Py_ssize_t i;
346 for (i = 1; i < count; i++)
347 memcpy(self->ob_bytes + i*mysize, self->ob_bytes, mysize);
348 }
349
350 Py_INCREF(self);
351 return (PyObject *)self;
352}
353
354static int
355bytes_substring(PyBytesObject *self, PyBytesObject *other)
356{
357 Py_ssize_t i;
358
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000359 if (Py_Size(other) == 1) {
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +0000360 return memchr(self->ob_bytes, other->ob_bytes[0],
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000361 Py_Size(self)) != NULL;
Guido van Rossum13e57212006-04-27 22:54:26 +0000362 }
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000363 if (Py_Size(other) == 0)
Guido van Rossum13e57212006-04-27 22:54:26 +0000364 return 1; /* Edge case */
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000365 for (i = 0; i + Py_Size(other) <= Py_Size(self); i++) {
Guido van Rossum13e57212006-04-27 22:54:26 +0000366 /* XXX Yeah, yeah, lots of optimizations possible... */
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000367 if (memcmp(self->ob_bytes + i, other->ob_bytes, Py_Size(other)) == 0)
Guido van Rossum13e57212006-04-27 22:54:26 +0000368 return 1;
369 }
370 return 0;
371}
372
373static int
374bytes_contains(PyBytesObject *self, PyObject *value)
375{
376 Py_ssize_t ival;
377
378 if (PyBytes_Check(value))
379 return bytes_substring(self, (PyBytesObject *)value);
380
Thomas Woutersd204a712006-08-22 13:41:17 +0000381 ival = PyNumber_AsSsize_t(value, PyExc_ValueError);
Guido van Rossum13e57212006-04-27 22:54:26 +0000382 if (ival == -1 && PyErr_Occurred())
383 return -1;
Guido van Rossum13e57212006-04-27 22:54:26 +0000384 if (ival < 0 || ival >= 256) {
385 PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
386 return -1;
387 }
388
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000389 return memchr(self->ob_bytes, ival, Py_Size(self)) != NULL;
Guido van Rossum13e57212006-04-27 22:54:26 +0000390}
391
392static PyObject *
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000393bytes_getitem(PyBytesObject *self, Py_ssize_t i)
394{
395 if (i < 0)
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000396 i += Py_Size(self);
397 if (i < 0 || i >= Py_Size(self)) {
Guido van Rossumd624f182006-04-24 13:47:05 +0000398 PyErr_SetString(PyExc_IndexError, "bytes index out of range");
399 return NULL;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000400 }
Guido van Rossumd624f182006-04-24 13:47:05 +0000401 return PyInt_FromLong((unsigned char)(self->ob_bytes[i]));
402}
403
404static PyObject *
Thomas Wouters376446d2006-12-19 08:30:14 +0000405bytes_subscript(PyBytesObject *self, PyObject *item)
Guido van Rossumd624f182006-04-24 13:47:05 +0000406{
Thomas Wouters376446d2006-12-19 08:30:14 +0000407 if (PyIndex_Check(item)) {
408 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Guido van Rossumd624f182006-04-24 13:47:05 +0000409
Thomas Wouters376446d2006-12-19 08:30:14 +0000410 if (i == -1 && PyErr_Occurred())
411 return NULL;
412
413 if (i < 0)
414 i += PyBytes_GET_SIZE(self);
415
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000416 if (i < 0 || i >= Py_Size(self)) {
Thomas Wouters376446d2006-12-19 08:30:14 +0000417 PyErr_SetString(PyExc_IndexError, "bytes index out of range");
418 return NULL;
419 }
420 return PyInt_FromLong((unsigned char)(self->ob_bytes[i]));
421 }
422 else if (PySlice_Check(item)) {
423 Py_ssize_t start, stop, step, slicelength, cur, i;
424 if (PySlice_GetIndicesEx((PySliceObject *)item,
425 PyBytes_GET_SIZE(self),
426 &start, &stop, &step, &slicelength) < 0) {
427 return NULL;
428 }
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +0000429
Thomas Wouters376446d2006-12-19 08:30:14 +0000430 if (slicelength <= 0)
431 return PyBytes_FromStringAndSize("", 0);
432 else if (step == 1) {
433 return PyBytes_FromStringAndSize(self->ob_bytes + start,
434 slicelength);
435 }
436 else {
437 char *source_buf = PyBytes_AS_STRING(self);
438 char *result_buf = (char *)PyMem_Malloc(slicelength);
439 PyObject *result;
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +0000440
Thomas Wouters376446d2006-12-19 08:30:14 +0000441 if (result_buf == NULL)
442 return PyErr_NoMemory();
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +0000443
Thomas Wouters376446d2006-12-19 08:30:14 +0000444 for (cur = start, i = 0; i < slicelength;
445 cur += step, i++) {
446 result_buf[i] = source_buf[cur];
447 }
448 result = PyBytes_FromStringAndSize(result_buf, slicelength);
449 PyMem_Free(result_buf);
450 return result;
451 }
452 }
453 else {
454 PyErr_SetString(PyExc_TypeError, "bytes indices must be integers");
455 return NULL;
456 }
457}
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +0000458
Guido van Rossumd624f182006-04-24 13:47:05 +0000459static int
Thomas Wouters376446d2006-12-19 08:30:14 +0000460bytes_setslice(PyBytesObject *self, Py_ssize_t lo, Py_ssize_t hi,
Guido van Rossumd624f182006-04-24 13:47:05 +0000461 PyObject *values)
462{
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000463 Py_ssize_t avail, needed;
464 void *bytes;
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000465 PyBuffer vbytes;
466 int res = 0;
Guido van Rossumd624f182006-04-24 13:47:05 +0000467
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000468 vbytes.len = -1;
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000469 if (values == (PyObject *)self) {
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000470 /* Make a copy and call this function recursively */
Guido van Rossumd624f182006-04-24 13:47:05 +0000471 int err;
472 values = PyBytes_FromObject(values);
473 if (values == NULL)
474 return -1;
475 err = bytes_setslice(self, lo, hi, values);
476 Py_DECREF(values);
477 return err;
478 }
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000479 if (values == NULL) {
480 /* del b[lo:hi] */
481 bytes = NULL;
482 needed = 0;
483 }
Guido van Rossumd624f182006-04-24 13:47:05 +0000484 else {
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000485 if (_getbuffer(values, &vbytes) < 0) {
486 PyErr_Format(PyExc_TypeError,
487 "can't set bytes slice from %.100s",
488 Py_Type(values)->tp_name);
489 return -1;
490 }
491 needed = vbytes.len;
492 bytes = vbytes.buf;
Guido van Rossumd624f182006-04-24 13:47:05 +0000493 }
494
495 if (lo < 0)
496 lo = 0;
Thomas Wouters9a6e62b2006-08-23 23:20:29 +0000497 if (hi < lo)
498 hi = lo;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000499 if (hi > Py_Size(self))
500 hi = Py_Size(self);
Guido van Rossumd624f182006-04-24 13:47:05 +0000501
502 avail = hi - lo;
503 if (avail < 0)
504 lo = hi = avail = 0;
505
506 if (avail != needed) {
507 if (avail > needed) {
508 /*
509 0 lo hi old_size
510 | |<----avail----->|<-----tomove------>|
511 | |<-needed->|<-----tomove------>|
512 0 lo new_hi new_size
513 */
514 memmove(self->ob_bytes + lo + needed, self->ob_bytes + hi,
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000515 Py_Size(self) - hi);
Guido van Rossumd624f182006-04-24 13:47:05 +0000516 }
Guido van Rossuma74184e2007-08-29 04:05:57 +0000517 /* XXX(nnorwitz): need to verify this can't overflow! */
Thomas Wouters376446d2006-12-19 08:30:14 +0000518 if (PyBytes_Resize((PyObject *)self,
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000519 Py_Size(self) + needed - avail) < 0) {
520 res = -1;
521 goto finish;
522 }
Guido van Rossumd624f182006-04-24 13:47:05 +0000523 if (avail < needed) {
524 /*
525 0 lo hi old_size
526 | |<-avail->|<-----tomove------>|
527 | |<----needed---->|<-----tomove------>|
528 0 lo new_hi new_size
529 */
530 memmove(self->ob_bytes + lo + needed, self->ob_bytes + hi,
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000531 Py_Size(self) - lo - needed);
Guido van Rossumd624f182006-04-24 13:47:05 +0000532 }
533 }
534
535 if (needed > 0)
536 memcpy(self->ob_bytes + lo, bytes, needed);
537
Guido van Rossum75d38e92007-08-24 17:33:11 +0000538
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000539 finish:
Guido van Rossum75d38e92007-08-24 17:33:11 +0000540 if (vbytes.len != -1)
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000541 PyObject_ReleaseBuffer(values, &vbytes);
542 return res;
Guido van Rossumd624f182006-04-24 13:47:05 +0000543}
544
545static int
546bytes_setitem(PyBytesObject *self, Py_ssize_t i, PyObject *value)
547{
548 Py_ssize_t ival;
549
550 if (i < 0)
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000551 i += Py_Size(self);
Guido van Rossumd624f182006-04-24 13:47:05 +0000552
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000553 if (i < 0 || i >= Py_Size(self)) {
Guido van Rossumd624f182006-04-24 13:47:05 +0000554 PyErr_SetString(PyExc_IndexError, "bytes index out of range");
555 return -1;
556 }
557
558 if (value == NULL)
559 return bytes_setslice(self, i, i+1, NULL);
560
Thomas Woutersd204a712006-08-22 13:41:17 +0000561 ival = PyNumber_AsSsize_t(value, PyExc_ValueError);
Guido van Rossumd624f182006-04-24 13:47:05 +0000562 if (ival == -1 && PyErr_Occurred())
563 return -1;
564
565 if (ival < 0 || ival >= 256) {
566 PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
567 return -1;
568 }
569
570 self->ob_bytes[i] = ival;
571 return 0;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000572}
573
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000574static int
Thomas Wouters376446d2006-12-19 08:30:14 +0000575bytes_ass_subscript(PyBytesObject *self, PyObject *item, PyObject *values)
576{
577 Py_ssize_t start, stop, step, slicelen, needed;
578 char *bytes;
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +0000579
Thomas Wouters376446d2006-12-19 08:30:14 +0000580 if (PyIndex_Check(item)) {
581 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
582
583 if (i == -1 && PyErr_Occurred())
584 return -1;
585
586 if (i < 0)
587 i += PyBytes_GET_SIZE(self);
588
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000589 if (i < 0 || i >= Py_Size(self)) {
Thomas Wouters376446d2006-12-19 08:30:14 +0000590 PyErr_SetString(PyExc_IndexError, "bytes index out of range");
591 return -1;
592 }
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +0000593
Thomas Wouters376446d2006-12-19 08:30:14 +0000594 if (values == NULL) {
595 /* Fall through to slice assignment */
596 start = i;
597 stop = i + 1;
598 step = 1;
599 slicelen = 1;
600 }
601 else {
602 Py_ssize_t ival = PyNumber_AsSsize_t(values, PyExc_ValueError);
603 if (ival == -1 && PyErr_Occurred())
604 return -1;
605 if (ival < 0 || ival >= 256) {
606 PyErr_SetString(PyExc_ValueError,
607 "byte must be in range(0, 256)");
608 return -1;
609 }
610 self->ob_bytes[i] = (char)ival;
611 return 0;
612 }
613 }
614 else if (PySlice_Check(item)) {
615 if (PySlice_GetIndicesEx((PySliceObject *)item,
616 PyBytes_GET_SIZE(self),
617 &start, &stop, &step, &slicelen) < 0) {
618 return -1;
619 }
620 }
621 else {
622 PyErr_SetString(PyExc_TypeError, "bytes indices must be integer");
623 return -1;
624 }
625
626 if (values == NULL) {
627 bytes = NULL;
628 needed = 0;
629 }
630 else if (values == (PyObject *)self || !PyBytes_Check(values)) {
631 /* Make a copy an call this function recursively */
632 int err;
633 values = PyBytes_FromObject(values);
634 if (values == NULL)
635 return -1;
636 err = bytes_ass_subscript(self, item, values);
637 Py_DECREF(values);
638 return err;
639 }
640 else {
641 assert(PyBytes_Check(values));
642 bytes = ((PyBytesObject *)values)->ob_bytes;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000643 needed = Py_Size(values);
Thomas Wouters376446d2006-12-19 08:30:14 +0000644 }
645 /* Make sure b[5:2] = ... inserts before 5, not before 2. */
646 if ((step < 0 && start < stop) ||
647 (step > 0 && start > stop))
648 stop = start;
649 if (step == 1) {
650 if (slicelen != needed) {
651 if (slicelen > needed) {
652 /*
653 0 start stop old_size
654 | |<---slicelen--->|<-----tomove------>|
655 | |<-needed->|<-----tomove------>|
656 0 lo new_hi new_size
657 */
658 memmove(self->ob_bytes + start + needed, self->ob_bytes + stop,
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000659 Py_Size(self) - stop);
Thomas Wouters376446d2006-12-19 08:30:14 +0000660 }
661 if (PyBytes_Resize((PyObject *)self,
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000662 Py_Size(self) + needed - slicelen) < 0)
Thomas Wouters376446d2006-12-19 08:30:14 +0000663 return -1;
664 if (slicelen < needed) {
665 /*
666 0 lo hi old_size
667 | |<-avail->|<-----tomove------>|
668 | |<----needed---->|<-----tomove------>|
669 0 lo new_hi new_size
670 */
671 memmove(self->ob_bytes + start + needed, self->ob_bytes + stop,
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000672 Py_Size(self) - start - needed);
Thomas Wouters376446d2006-12-19 08:30:14 +0000673 }
674 }
675
676 if (needed > 0)
677 memcpy(self->ob_bytes + start, bytes, needed);
678
679 return 0;
680 }
681 else {
682 if (needed == 0) {
683 /* Delete slice */
684 Py_ssize_t cur, i;
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +0000685
Thomas Wouters376446d2006-12-19 08:30:14 +0000686 if (step < 0) {
687 stop = start + 1;
688 start = stop + step * (slicelen - 1) - 1;
689 step = -step;
690 }
691 for (cur = start, i = 0;
692 i < slicelen; cur += step, i++) {
693 Py_ssize_t lim = step - 1;
694
695 if (cur + step >= PyBytes_GET_SIZE(self))
696 lim = PyBytes_GET_SIZE(self) - cur - 1;
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +0000697
Thomas Wouters376446d2006-12-19 08:30:14 +0000698 memmove(self->ob_bytes + cur - i,
699 self->ob_bytes + cur + 1, lim);
700 }
701 /* Move the tail of the bytes, in one chunk */
702 cur = start + slicelen*step;
703 if (cur < PyBytes_GET_SIZE(self)) {
704 memmove(self->ob_bytes + cur - slicelen,
705 self->ob_bytes + cur,
706 PyBytes_GET_SIZE(self) - cur);
707 }
708 if (PyBytes_Resize((PyObject *)self,
709 PyBytes_GET_SIZE(self) - slicelen) < 0)
710 return -1;
711
712 return 0;
713 }
714 else {
715 /* Assign slice */
716 Py_ssize_t cur, i;
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +0000717
Thomas Wouters376446d2006-12-19 08:30:14 +0000718 if (needed != slicelen) {
719 PyErr_Format(PyExc_ValueError,
720 "attempt to assign bytes of size %zd "
721 "to extended slice of size %zd",
722 needed, slicelen);
723 return -1;
724 }
725 for (cur = start, i = 0; i < slicelen; cur += step, i++)
726 self->ob_bytes[cur] = bytes[i];
727 return 0;
728 }
729 }
730}
731
732static int
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000733bytes_init(PyBytesObject *self, PyObject *args, PyObject *kwds)
734{
Guido van Rossumd624f182006-04-24 13:47:05 +0000735 static char *kwlist[] = {"source", "encoding", "errors", 0};
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000736 PyObject *arg = NULL;
Guido van Rossumd624f182006-04-24 13:47:05 +0000737 const char *encoding = NULL;
738 const char *errors = NULL;
739 Py_ssize_t count;
740 PyObject *it;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000741 PyObject *(*iternext)(PyObject *);
742
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000743 if (Py_Size(self) != 0) {
Guido van Rossuma0867f72006-05-05 04:34:18 +0000744 /* Empty previous contents (yes, do this first of all!) */
745 if (PyBytes_Resize((PyObject *)self, 0) < 0)
746 return -1;
747 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000748
Guido van Rossumd624f182006-04-24 13:47:05 +0000749 /* Parse arguments */
750 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oss:bytes", kwlist,
751 &arg, &encoding, &errors))
752 return -1;
753
754 /* Make a quick exit if no first argument */
755 if (arg == NULL) {
756 if (encoding != NULL || errors != NULL) {
757 PyErr_SetString(PyExc_TypeError,
758 "encoding or errors without sequence argument");
759 return -1;
760 }
761 return 0;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000762 }
763
Guido van Rossumd624f182006-04-24 13:47:05 +0000764 if (PyUnicode_Check(arg)) {
765 /* Encode via the codec registry */
Guido van Rossum4355a472007-05-04 05:00:04 +0000766 PyObject *encoded, *new;
Guido van Rossuma74184e2007-08-29 04:05:57 +0000767 if (encoding == NULL) {
768 PyErr_SetString(PyExc_TypeError,
769 "string argument without an encoding");
770 return -1;
771 }
Guido van Rossumd624f182006-04-24 13:47:05 +0000772 encoded = PyCodec_Encode(arg, encoding, errors);
773 if (encoded == NULL)
774 return -1;
Guido van Rossum4355a472007-05-04 05:00:04 +0000775 if (!PyBytes_Check(encoded) && !PyString_Check(encoded)) {
Guido van Rossumd624f182006-04-24 13:47:05 +0000776 PyErr_Format(PyExc_TypeError,
Guido van Rossum4355a472007-05-04 05:00:04 +0000777 "encoder did not return a str8 or bytes object (type=%.400s)",
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000778 Py_Type(encoded)->tp_name);
Guido van Rossumd624f182006-04-24 13:47:05 +0000779 Py_DECREF(encoded);
780 return -1;
781 }
Guido van Rossuma74184e2007-08-29 04:05:57 +0000782 new = bytes_iconcat(self, encoded);
783 Py_DECREF(encoded);
784 if (new == NULL)
785 return -1;
786 Py_DECREF(new);
787 return 0;
Guido van Rossumd624f182006-04-24 13:47:05 +0000788 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000789
Guido van Rossumd624f182006-04-24 13:47:05 +0000790 /* If it's not unicode, there can't be encoding or errors */
791 if (encoding != NULL || errors != NULL) {
792 PyErr_SetString(PyExc_TypeError,
793 "encoding or errors without a string argument");
794 return -1;
795 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000796
Guido van Rossumd624f182006-04-24 13:47:05 +0000797 /* Is it an int? */
Thomas Woutersd204a712006-08-22 13:41:17 +0000798 count = PyNumber_AsSsize_t(arg, PyExc_ValueError);
Guido van Rossumd624f182006-04-24 13:47:05 +0000799 if (count == -1 && PyErr_Occurred())
800 PyErr_Clear();
801 else {
802 if (count < 0) {
803 PyErr_SetString(PyExc_ValueError, "negative count");
804 return -1;
805 }
806 if (count > 0) {
807 if (PyBytes_Resize((PyObject *)self, count))
808 return -1;
809 memset(self->ob_bytes, 0, count);
810 }
811 return 0;
812 }
Guido van Rossum75d38e92007-08-24 17:33:11 +0000813
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000814 /* Use the modern buffer interface */
815 if (PyObject_CheckBuffer(arg)) {
Guido van Rossumd624f182006-04-24 13:47:05 +0000816 Py_ssize_t size;
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000817 PyBuffer view;
818 if (PyObject_GetBuffer(arg, &view, PyBUF_FULL_RO) < 0)
Guido van Rossumd624f182006-04-24 13:47:05 +0000819 return -1;
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000820 size = view.len;
821 if (PyBytes_Resize((PyObject *)self, size) < 0) goto fail;
822 if (PyBuffer_ToContiguous(self->ob_bytes, &view, size, 'C') < 0)
823 goto fail;
824 PyObject_ReleaseBuffer(arg, &view);
Guido van Rossumd624f182006-04-24 13:47:05 +0000825 return 0;
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000826 fail:
827 PyObject_ReleaseBuffer(arg, &view);
828 return -1;
Guido van Rossumd624f182006-04-24 13:47:05 +0000829 }
830
831 /* XXX Optimize this if the arguments is a list, tuple */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000832
833 /* Get the iterator */
834 it = PyObject_GetIter(arg);
835 if (it == NULL)
Guido van Rossumd624f182006-04-24 13:47:05 +0000836 return -1;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000837 iternext = *Py_Type(it)->tp_iternext;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000838
839 /* Run the iterator to exhaustion */
840 for (;;) {
Guido van Rossumd624f182006-04-24 13:47:05 +0000841 PyObject *item;
842 Py_ssize_t value;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000843
Guido van Rossumd624f182006-04-24 13:47:05 +0000844 /* Get the next item */
845 item = iternext(it);
846 if (item == NULL) {
847 if (PyErr_Occurred()) {
848 if (!PyErr_ExceptionMatches(PyExc_StopIteration))
849 goto error;
850 PyErr_Clear();
851 }
852 break;
853 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000854
Guido van Rossumd624f182006-04-24 13:47:05 +0000855 /* Interpret it as an int (__index__) */
Thomas Woutersd204a712006-08-22 13:41:17 +0000856 value = PyNumber_AsSsize_t(item, PyExc_ValueError);
Guido van Rossumd624f182006-04-24 13:47:05 +0000857 Py_DECREF(item);
858 if (value == -1 && PyErr_Occurred())
859 goto error;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000860
Guido van Rossumd624f182006-04-24 13:47:05 +0000861 /* Range check */
862 if (value < 0 || value >= 256) {
863 PyErr_SetString(PyExc_ValueError,
864 "bytes must be in range(0, 256)");
865 goto error;
866 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000867
Guido van Rossumd624f182006-04-24 13:47:05 +0000868 /* Append the byte */
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000869 if (Py_Size(self) < self->ob_alloc)
870 Py_Size(self)++;
871 else if (PyBytes_Resize((PyObject *)self, Py_Size(self)+1) < 0)
Guido van Rossumd624f182006-04-24 13:47:05 +0000872 goto error;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000873 self->ob_bytes[Py_Size(self)-1] = value;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000874 }
875
876 /* Clean up and return success */
877 Py_DECREF(it);
878 return 0;
879
880 error:
881 /* Error handling when it != NULL */
882 Py_DECREF(it);
883 return -1;
884}
885
Georg Brandlee91be42007-02-24 19:41:35 +0000886/* Mostly copied from string_repr, but without the
887 "smart quote" functionality. */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000888static PyObject *
889bytes_repr(PyBytesObject *self)
890{
Walter Dörwald1ab83302007-05-18 17:15:44 +0000891 static const char *hexdigits = "0123456789abcdef";
Martin v. Löwis5d7428b2007-07-21 18:47:48 +0000892 size_t newsize = 3 + 4 * Py_Size(self);
Georg Brandlee91be42007-02-24 19:41:35 +0000893 PyObject *v;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000894 if (newsize > PY_SSIZE_T_MAX || newsize / 4 != Py_Size(self)) {
Georg Brandlee91be42007-02-24 19:41:35 +0000895 PyErr_SetString(PyExc_OverflowError,
896 "bytes object is too large to make repr");
Guido van Rossumd624f182006-04-24 13:47:05 +0000897 return NULL;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000898 }
Walter Dörwald1ab83302007-05-18 17:15:44 +0000899 v = PyUnicode_FromUnicode(NULL, newsize);
Georg Brandlee91be42007-02-24 19:41:35 +0000900 if (v == NULL) {
901 return NULL;
902 }
903 else {
904 register Py_ssize_t i;
Walter Dörwald1ab83302007-05-18 17:15:44 +0000905 register Py_UNICODE c;
906 register Py_UNICODE *p;
Georg Brandlee91be42007-02-24 19:41:35 +0000907 int quote = '\'';
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000908
Walter Dörwald1ab83302007-05-18 17:15:44 +0000909 p = PyUnicode_AS_UNICODE(v);
Georg Brandlee91be42007-02-24 19:41:35 +0000910 *p++ = 'b';
911 *p++ = quote;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000912 for (i = 0; i < Py_Size(self); i++) {
Georg Brandlee91be42007-02-24 19:41:35 +0000913 /* There's at least enough room for a hex escape
914 and a closing quote. */
Walter Dörwald1ab83302007-05-18 17:15:44 +0000915 assert(newsize - (p - PyUnicode_AS_UNICODE(v)) >= 5);
Georg Brandlee91be42007-02-24 19:41:35 +0000916 c = self->ob_bytes[i];
917 if (c == quote || c == '\\')
918 *p++ = '\\', *p++ = c;
919 else if (c == '\t')
920 *p++ = '\\', *p++ = 't';
921 else if (c == '\n')
922 *p++ = '\\', *p++ = 'n';
923 else if (c == '\r')
924 *p++ = '\\', *p++ = 'r';
925 else if (c == 0)
Guido van Rossum57b93ad2007-05-08 19:09:34 +0000926 *p++ = '\\', *p++ = 'x', *p++ = '0', *p++ = '0';
Georg Brandlee91be42007-02-24 19:41:35 +0000927 else if (c < ' ' || c >= 0x7f) {
Walter Dörwald1ab83302007-05-18 17:15:44 +0000928 *p++ = '\\';
929 *p++ = 'x';
930 *p++ = hexdigits[(c & 0xf0) >> 4];
931 *p++ = hexdigits[c & 0xf];
Georg Brandlee91be42007-02-24 19:41:35 +0000932 }
933 else
934 *p++ = c;
935 }
Walter Dörwald1ab83302007-05-18 17:15:44 +0000936 assert(newsize - (p - PyUnicode_AS_UNICODE(v)) >= 1);
Georg Brandlee91be42007-02-24 19:41:35 +0000937 *p++ = quote;
938 *p = '\0';
Walter Dörwald1ab83302007-05-18 17:15:44 +0000939 if (PyUnicode_Resize(&v, (p - PyUnicode_AS_UNICODE(v)))) {
940 Py_DECREF(v);
941 return NULL;
942 }
Georg Brandlee91be42007-02-24 19:41:35 +0000943 return v;
944 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000945}
946
947static PyObject *
Guido van Rossumd624f182006-04-24 13:47:05 +0000948bytes_str(PyBytesObject *self)
949{
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000950 return PyString_FromStringAndSize(self->ob_bytes, Py_Size(self));
Guido van Rossumd624f182006-04-24 13:47:05 +0000951}
952
953static PyObject *
Guido van Rossum343e97f2007-04-09 00:43:24 +0000954bytes_richcompare(PyObject *self, PyObject *other, int op)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000955{
Guido van Rossum343e97f2007-04-09 00:43:24 +0000956 Py_ssize_t self_size, other_size;
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000957 PyBuffer self_bytes, other_bytes;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000958 PyObject *res;
Guido van Rossum343e97f2007-04-09 00:43:24 +0000959 Py_ssize_t minsize;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000960 int cmp;
961
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000962 /* Bytes can be compared to anything that supports the (binary) buffer
963 API. Except Unicode. */
Guido van Rossumebea9be2007-04-09 00:49:13 +0000964
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000965 self_size = _getbuffer(self, &self_bytes);
966 if (self_size < 0) {
Guido van Rossuma74184e2007-08-29 04:05:57 +0000967 PyErr_Clear();
Guido van Rossumebea9be2007-04-09 00:49:13 +0000968 Py_INCREF(Py_NotImplemented);
969 return Py_NotImplemented;
970 }
Guido van Rossum343e97f2007-04-09 00:43:24 +0000971
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000972 other_size = _getbuffer(other, &other_bytes);
973 if (other_size < 0) {
Guido van Rossuma74184e2007-08-29 04:05:57 +0000974 PyErr_Clear();
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000975 PyObject_ReleaseBuffer(self, &self_bytes);
Guido van Rossumd624f182006-04-24 13:47:05 +0000976 Py_INCREF(Py_NotImplemented);
977 return Py_NotImplemented;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000978 }
Guido van Rossum343e97f2007-04-09 00:43:24 +0000979
980 if (self_size != other_size && (op == Py_EQ || op == Py_NE)) {
Guido van Rossumd624f182006-04-24 13:47:05 +0000981 /* Shortcut: if the lengths differ, the objects differ */
982 cmp = (op == Py_NE);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000983 }
984 else {
Guido van Rossum343e97f2007-04-09 00:43:24 +0000985 minsize = self_size;
986 if (other_size < minsize)
987 minsize = other_size;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000988
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000989 cmp = memcmp(self_bytes.buf, other_bytes.buf, minsize);
Guido van Rossumd624f182006-04-24 13:47:05 +0000990 /* In ISO C, memcmp() guarantees to use unsigned bytes! */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000991
Guido van Rossumd624f182006-04-24 13:47:05 +0000992 if (cmp == 0) {
Guido van Rossum343e97f2007-04-09 00:43:24 +0000993 if (self_size < other_size)
Guido van Rossumd624f182006-04-24 13:47:05 +0000994 cmp = -1;
Guido van Rossum343e97f2007-04-09 00:43:24 +0000995 else if (self_size > other_size)
Guido van Rossumd624f182006-04-24 13:47:05 +0000996 cmp = 1;
997 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000998
Guido van Rossumd624f182006-04-24 13:47:05 +0000999 switch (op) {
1000 case Py_LT: cmp = cmp < 0; break;
1001 case Py_LE: cmp = cmp <= 0; break;
1002 case Py_EQ: cmp = cmp == 0; break;
1003 case Py_NE: cmp = cmp != 0; break;
1004 case Py_GT: cmp = cmp > 0; break;
1005 case Py_GE: cmp = cmp >= 0; break;
1006 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00001007 }
1008
1009 res = cmp ? Py_True : Py_False;
Travis E. Oliphantb99f7622007-08-18 11:21:56 +00001010 PyObject_ReleaseBuffer(self, &self_bytes);
Guido van Rossum75d38e92007-08-24 17:33:11 +00001011 PyObject_ReleaseBuffer(other, &other_bytes);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00001012 Py_INCREF(res);
1013 return res;
1014}
1015
1016static void
1017bytes_dealloc(PyBytesObject *self)
1018{
Guido van Rossumd624f182006-04-24 13:47:05 +00001019 if (self->ob_bytes != 0) {
1020 PyMem_Free(self->ob_bytes);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00001021 }
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001022 Py_Type(self)->tp_free((PyObject *)self);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00001023}
1024
Neal Norwitz6968b052007-02-27 19:02:19 +00001025
1026/* -------------------------------------------------------------------- */
1027/* Methods */
1028
1029#define STRINGLIB_CHAR char
1030#define STRINGLIB_CMP memcmp
1031#define STRINGLIB_LEN PyBytes_GET_SIZE
1032#define STRINGLIB_NEW PyBytes_FromStringAndSize
1033#define STRINGLIB_EMPTY nullbytes
1034
1035#include "stringlib/fastsearch.h"
1036#include "stringlib/count.h"
1037#include "stringlib/find.h"
1038#include "stringlib/partition.h"
1039
1040
1041/* The following Py_LOCAL_INLINE and Py_LOCAL functions
1042were copied from the old char* style string object. */
1043
1044Py_LOCAL_INLINE(void)
1045_adjust_indices(Py_ssize_t *start, Py_ssize_t *end, Py_ssize_t len)
1046{
1047 if (*end > len)
1048 *end = len;
1049 else if (*end < 0)
1050 *end += len;
1051 if (*end < 0)
1052 *end = 0;
1053 if (*start < 0)
1054 *start += len;
1055 if (*start < 0)
1056 *start = 0;
1057}
1058
1059
1060Py_LOCAL_INLINE(Py_ssize_t)
1061bytes_find_internal(PyBytesObject *self, PyObject *args, int dir)
1062{
1063 PyObject *subobj;
1064 const char *sub;
1065 Py_ssize_t sub_len;
1066 Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
1067
1068 if (!PyArg_ParseTuple(args, "O|O&O&:find/rfind/index/rindex", &subobj,
1069 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
1070 return -2;
1071 if (PyBytes_Check(subobj)) {
1072 sub = PyBytes_AS_STRING(subobj);
1073 sub_len = PyBytes_GET_SIZE(subobj);
1074 }
Travis E. Oliphantb99f7622007-08-18 11:21:56 +00001075 /* XXX --> use the modern buffer interface */
Guido van Rossuma74184e2007-08-29 04:05:57 +00001076 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len)) {
Neal Norwitz6968b052007-02-27 19:02:19 +00001077 /* XXX - the "expected a character buffer object" is pretty
1078 confusing for a non-expert. remap to something else ? */
1079 return -2;
Guido van Rossuma74184e2007-08-29 04:05:57 +00001080 }
Neal Norwitz6968b052007-02-27 19:02:19 +00001081
1082 if (dir > 0)
1083 return stringlib_find_slice(
1084 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1085 sub, sub_len, start, end);
1086 else
1087 return stringlib_rfind_slice(
1088 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1089 sub, sub_len, start, end);
1090}
1091
1092
1093PyDoc_STRVAR(find__doc__,
1094"B.find(sub [,start [,end]]) -> int\n\
1095\n\
1096Return the lowest index in B where subsection sub is found,\n\
1097such that sub is contained within s[start,end]. Optional\n\
1098arguments start and end are interpreted as in slice notation.\n\
1099\n\
1100Return -1 on failure.");
1101
1102static PyObject *
1103bytes_find(PyBytesObject *self, PyObject *args)
1104{
1105 Py_ssize_t result = bytes_find_internal(self, args, +1);
1106 if (result == -2)
1107 return NULL;
1108 return PyInt_FromSsize_t(result);
1109}
1110
1111PyDoc_STRVAR(count__doc__,
1112"B.count(sub[, start[, end]]) -> int\n\
1113\n\
1114Return the number of non-overlapping occurrences of subsection sub in\n\
1115bytes B[start:end]. Optional arguments start and end are interpreted\n\
1116as in slice notation.");
1117
1118static PyObject *
1119bytes_count(PyBytesObject *self, PyObject *args)
1120{
1121 PyObject *sub_obj;
1122 const char *str = PyBytes_AS_STRING(self), *sub;
1123 Py_ssize_t sub_len;
1124 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
1125
1126 if (!PyArg_ParseTuple(args, "O|O&O&:count", &sub_obj,
1127 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
1128 return NULL;
1129
1130 if (PyBytes_Check(sub_obj)) {
1131 sub = PyBytes_AS_STRING(sub_obj);
1132 sub_len = PyBytes_GET_SIZE(sub_obj);
1133 }
Travis E. Oliphantb99f7622007-08-18 11:21:56 +00001134 /* XXX --> use the modern buffer interface */
Neal Norwitz6968b052007-02-27 19:02:19 +00001135 else if (PyObject_AsCharBuffer(sub_obj, &sub, &sub_len))
1136 return NULL;
1137
Martin v. Löwis5b222132007-06-10 09:51:05 +00001138 _adjust_indices(&start, &end, PyBytes_GET_SIZE(self));
Neal Norwitz6968b052007-02-27 19:02:19 +00001139
1140 return PyInt_FromSsize_t(
1141 stringlib_count(str + start, end - start, sub, sub_len)
1142 );
1143}
1144
1145
1146PyDoc_STRVAR(index__doc__,
1147"B.index(sub [,start [,end]]) -> int\n\
1148\n\
1149Like B.find() but raise ValueError when the subsection is not found.");
1150
1151static PyObject *
1152bytes_index(PyBytesObject *self, PyObject *args)
1153{
1154 Py_ssize_t result = bytes_find_internal(self, args, +1);
1155 if (result == -2)
1156 return NULL;
1157 if (result == -1) {
1158 PyErr_SetString(PyExc_ValueError,
1159 "subsection not found");
1160 return NULL;
1161 }
1162 return PyInt_FromSsize_t(result);
1163}
1164
1165
1166PyDoc_STRVAR(rfind__doc__,
1167"B.rfind(sub [,start [,end]]) -> int\n\
1168\n\
1169Return the highest index in B where subsection sub is found,\n\
1170such that sub is contained within s[start,end]. Optional\n\
1171arguments start and end are interpreted as in slice notation.\n\
1172\n\
1173Return -1 on failure.");
1174
1175static PyObject *
1176bytes_rfind(PyBytesObject *self, PyObject *args)
1177{
1178 Py_ssize_t result = bytes_find_internal(self, args, -1);
1179 if (result == -2)
1180 return NULL;
1181 return PyInt_FromSsize_t(result);
1182}
1183
1184
1185PyDoc_STRVAR(rindex__doc__,
1186"B.rindex(sub [,start [,end]]) -> int\n\
1187\n\
1188Like B.rfind() but raise ValueError when the subsection is not found.");
1189
1190static PyObject *
1191bytes_rindex(PyBytesObject *self, PyObject *args)
1192{
1193 Py_ssize_t result = bytes_find_internal(self, args, -1);
1194 if (result == -2)
1195 return NULL;
1196 if (result == -1) {
1197 PyErr_SetString(PyExc_ValueError,
1198 "subsection not found");
1199 return NULL;
1200 }
1201 return PyInt_FromSsize_t(result);
1202}
1203
1204
1205/* Matches the end (direction >= 0) or start (direction < 0) of self
1206 * against substr, using the start and end arguments. Returns
1207 * -1 on error, 0 if not found and 1 if found.
1208 */
1209Py_LOCAL(int)
1210_bytes_tailmatch(PyBytesObject *self, PyObject *substr, Py_ssize_t start,
1211 Py_ssize_t end, int direction)
1212{
1213 Py_ssize_t len = PyBytes_GET_SIZE(self);
1214 Py_ssize_t slen;
1215 const char* sub;
1216 const char* str;
1217
1218 if (PyBytes_Check(substr)) {
1219 sub = PyBytes_AS_STRING(substr);
1220 slen = PyBytes_GET_SIZE(substr);
1221 }
Travis E. Oliphantb99f7622007-08-18 11:21:56 +00001222 /* XXX --> Use the modern buffer interface */
Neal Norwitz6968b052007-02-27 19:02:19 +00001223 else if (PyObject_AsCharBuffer(substr, &sub, &slen))
1224 return -1;
1225 str = PyBytes_AS_STRING(self);
1226
1227 _adjust_indices(&start, &end, len);
1228
1229 if (direction < 0) {
1230 /* startswith */
1231 if (start+slen > len)
1232 return 0;
1233 } else {
1234 /* endswith */
1235 if (end-start < slen || start > len)
1236 return 0;
1237
1238 if (end-slen > start)
1239 start = end - slen;
1240 }
1241 if (end-start >= slen)
1242 return ! memcmp(str+start, sub, slen);
1243 return 0;
1244}
1245
1246
1247PyDoc_STRVAR(startswith__doc__,
1248"B.startswith(prefix[, start[, end]]) -> bool\n\
1249\n\
1250Return True if B starts with the specified prefix, False otherwise.\n\
1251With optional start, test B beginning at that position.\n\
1252With optional end, stop comparing B at that position.\n\
1253prefix can also be a tuple of strings to try.");
1254
1255static PyObject *
1256bytes_startswith(PyBytesObject *self, PyObject *args)
1257{
1258 Py_ssize_t start = 0;
1259 Py_ssize_t end = PY_SSIZE_T_MAX;
1260 PyObject *subobj;
1261 int result;
1262
1263 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
1264 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
1265 return NULL;
1266 if (PyTuple_Check(subobj)) {
1267 Py_ssize_t i;
1268 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
1269 result = _bytes_tailmatch(self,
1270 PyTuple_GET_ITEM(subobj, i),
1271 start, end, -1);
1272 if (result == -1)
1273 return NULL;
1274 else if (result) {
1275 Py_RETURN_TRUE;
1276 }
1277 }
1278 Py_RETURN_FALSE;
1279 }
1280 result = _bytes_tailmatch(self, subobj, start, end, -1);
1281 if (result == -1)
1282 return NULL;
1283 else
1284 return PyBool_FromLong(result);
1285}
1286
1287PyDoc_STRVAR(endswith__doc__,
1288"B.endswith(suffix[, start[, end]]) -> bool\n\
1289\n\
1290Return True if B ends with the specified suffix, False otherwise.\n\
1291With optional start, test B beginning at that position.\n\
1292With optional end, stop comparing B at that position.\n\
1293suffix can also be a tuple of strings to try.");
1294
1295static PyObject *
1296bytes_endswith(PyBytesObject *self, PyObject *args)
1297{
1298 Py_ssize_t start = 0;
1299 Py_ssize_t end = PY_SSIZE_T_MAX;
1300 PyObject *subobj;
1301 int result;
1302
1303 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
1304 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
1305 return NULL;
1306 if (PyTuple_Check(subobj)) {
1307 Py_ssize_t i;
1308 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
1309 result = _bytes_tailmatch(self,
1310 PyTuple_GET_ITEM(subobj, i),
1311 start, end, +1);
1312 if (result == -1)
1313 return NULL;
1314 else if (result) {
1315 Py_RETURN_TRUE;
1316 }
1317 }
1318 Py_RETURN_FALSE;
1319 }
1320 result = _bytes_tailmatch(self, subobj, start, end, +1);
1321 if (result == -1)
1322 return NULL;
1323 else
1324 return PyBool_FromLong(result);
1325}
1326
1327
1328
1329PyDoc_STRVAR(translate__doc__,
1330"B.translate(table [,deletechars]) -> bytes\n\
1331\n\
1332Return a copy of the bytes B, where all characters occurring\n\
1333in the optional argument deletechars are removed, and the\n\
1334remaining characters have been mapped through the given\n\
1335translation table, which must be a bytes of length 256.");
1336
1337static PyObject *
1338bytes_translate(PyBytesObject *self, PyObject *args)
1339{
1340 register char *input, *output;
1341 register const char *table;
1342 register Py_ssize_t i, c, changed = 0;
1343 PyObject *input_obj = (PyObject*)self;
1344 const char *table1, *output_start, *del_table=NULL;
1345 Py_ssize_t inlen, tablen, dellen = 0;
1346 PyObject *result;
1347 int trans_table[256];
1348 PyObject *tableobj, *delobj = NULL;
1349
1350 if (!PyArg_UnpackTuple(args, "translate", 1, 2,
1351 &tableobj, &delobj))
1352 return NULL;
1353
1354 if (PyBytes_Check(tableobj)) {
1355 table1 = PyBytes_AS_STRING(tableobj);
1356 tablen = PyBytes_GET_SIZE(tableobj);
1357 }
Travis E. Oliphantb99f7622007-08-18 11:21:56 +00001358 /* XXX -> Use the modern buffer interface */
Neal Norwitz6968b052007-02-27 19:02:19 +00001359 else if (PyObject_AsCharBuffer(tableobj, &table1, &tablen))
1360 return NULL;
1361
1362 if (tablen != 256) {
1363 PyErr_SetString(PyExc_ValueError,
1364 "translation table must be 256 characters long");
1365 return NULL;
1366 }
1367
1368 if (delobj != NULL) {
1369 if (PyBytes_Check(delobj)) {
1370 del_table = PyBytes_AS_STRING(delobj);
1371 dellen = PyBytes_GET_SIZE(delobj);
1372 }
Travis E. Oliphantb99f7622007-08-18 11:21:56 +00001373 /* XXX -> use the modern buffer interface */
Neal Norwitz6968b052007-02-27 19:02:19 +00001374 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
1375 return NULL;
1376 }
1377 else {
1378 del_table = NULL;
1379 dellen = 0;
1380 }
1381
1382 table = table1;
1383 inlen = PyBytes_GET_SIZE(input_obj);
1384 result = PyBytes_FromStringAndSize((char *)NULL, inlen);
1385 if (result == NULL)
1386 return NULL;
1387 output_start = output = PyBytes_AsString(result);
1388 input = PyBytes_AS_STRING(input_obj);
1389
1390 if (dellen == 0) {
1391 /* If no deletions are required, use faster code */
1392 for (i = inlen; --i >= 0; ) {
1393 c = Py_CHARMASK(*input++);
1394 if (Py_CHARMASK((*output++ = table[c])) != c)
1395 changed = 1;
1396 }
1397 if (changed || !PyBytes_CheckExact(input_obj))
1398 return result;
1399 Py_DECREF(result);
1400 Py_INCREF(input_obj);
1401 return input_obj;
1402 }
1403
1404 for (i = 0; i < 256; i++)
1405 trans_table[i] = Py_CHARMASK(table[i]);
1406
1407 for (i = 0; i < dellen; i++)
1408 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
1409
1410 for (i = inlen; --i >= 0; ) {
1411 c = Py_CHARMASK(*input++);
1412 if (trans_table[c] != -1)
1413 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
1414 continue;
1415 changed = 1;
1416 }
1417 if (!changed && PyBytes_CheckExact(input_obj)) {
1418 Py_DECREF(result);
1419 Py_INCREF(input_obj);
1420 return input_obj;
1421 }
1422 /* Fix the size of the resulting string */
1423 if (inlen > 0)
1424 PyBytes_Resize(result, output - output_start);
1425 return result;
1426}
1427
1428
1429#define FORWARD 1
1430#define REVERSE -1
1431
1432/* find and count characters and substrings */
1433
1434#define findchar(target, target_len, c) \
1435 ((char *)memchr((const void *)(target), c, target_len))
1436
1437/* Don't call if length < 2 */
1438#define Py_STRING_MATCH(target, offset, pattern, length) \
1439 (target[offset] == pattern[0] && \
1440 target[offset+length-1] == pattern[length-1] && \
1441 !memcmp(target+offset+1, pattern+1, length-2) )
1442
1443
1444/* Bytes ops must return a string. */
1445/* If the object is subclass of bytes, create a copy */
1446Py_LOCAL(PyBytesObject *)
1447return_self(PyBytesObject *self)
1448{
1449 if (PyBytes_CheckExact(self)) {
1450 Py_INCREF(self);
1451 return (PyBytesObject *)self;
1452 }
1453 return (PyBytesObject *)PyBytes_FromStringAndSize(
1454 PyBytes_AS_STRING(self),
1455 PyBytes_GET_SIZE(self));
1456}
1457
1458Py_LOCAL_INLINE(Py_ssize_t)
1459countchar(const char *target, int target_len, char c, Py_ssize_t maxcount)
1460{
1461 Py_ssize_t count=0;
1462 const char *start=target;
1463 const char *end=target+target_len;
1464
1465 while ( (start=findchar(start, end-start, c)) != NULL ) {
1466 count++;
1467 if (count >= maxcount)
1468 break;
1469 start += 1;
1470 }
1471 return count;
1472}
1473
1474Py_LOCAL(Py_ssize_t)
1475findstring(const char *target, Py_ssize_t target_len,
1476 const char *pattern, Py_ssize_t pattern_len,
1477 Py_ssize_t start,
1478 Py_ssize_t end,
1479 int direction)
1480{
1481 if (start < 0) {
1482 start += target_len;
1483 if (start < 0)
1484 start = 0;
1485 }
1486 if (end > target_len) {
1487 end = target_len;
1488 } else if (end < 0) {
1489 end += target_len;
1490 if (end < 0)
1491 end = 0;
1492 }
1493
1494 /* zero-length substrings always match at the first attempt */
1495 if (pattern_len == 0)
1496 return (direction > 0) ? start : end;
1497
1498 end -= pattern_len;
1499
1500 if (direction < 0) {
1501 for (; end >= start; end--)
1502 if (Py_STRING_MATCH(target, end, pattern, pattern_len))
1503 return end;
1504 } else {
1505 for (; start <= end; start++)
1506 if (Py_STRING_MATCH(target, start, pattern, pattern_len))
1507 return start;
1508 }
1509 return -1;
1510}
1511
1512Py_LOCAL_INLINE(Py_ssize_t)
1513countstring(const char *target, Py_ssize_t target_len,
1514 const char *pattern, Py_ssize_t pattern_len,
1515 Py_ssize_t start,
1516 Py_ssize_t end,
1517 int direction, Py_ssize_t maxcount)
1518{
1519 Py_ssize_t count=0;
1520
1521 if (start < 0) {
1522 start += target_len;
1523 if (start < 0)
1524 start = 0;
1525 }
1526 if (end > target_len) {
1527 end = target_len;
1528 } else if (end < 0) {
1529 end += target_len;
1530 if (end < 0)
1531 end = 0;
1532 }
1533
1534 /* zero-length substrings match everywhere */
1535 if (pattern_len == 0 || maxcount == 0) {
1536 if (target_len+1 < maxcount)
1537 return target_len+1;
1538 return maxcount;
1539 }
1540
1541 end -= pattern_len;
1542 if (direction < 0) {
1543 for (; (end >= start); end--)
1544 if (Py_STRING_MATCH(target, end, pattern, pattern_len)) {
1545 count++;
1546 if (--maxcount <= 0) break;
1547 end -= pattern_len-1;
1548 }
1549 } else {
1550 for (; (start <= end); start++)
1551 if (Py_STRING_MATCH(target, start, pattern, pattern_len)) {
1552 count++;
1553 if (--maxcount <= 0)
1554 break;
1555 start += pattern_len-1;
1556 }
1557 }
1558 return count;
1559}
1560
1561
1562/* Algorithms for different cases of string replacement */
1563
1564/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
1565Py_LOCAL(PyBytesObject *)
1566replace_interleave(PyBytesObject *self,
1567 const char *to_s, Py_ssize_t to_len,
1568 Py_ssize_t maxcount)
1569{
1570 char *self_s, *result_s;
1571 Py_ssize_t self_len, result_len;
1572 Py_ssize_t count, i, product;
1573 PyBytesObject *result;
1574
1575 self_len = PyBytes_GET_SIZE(self);
1576
1577 /* 1 at the end plus 1 after every character */
1578 count = self_len+1;
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +00001579 if (maxcount < count)
Neal Norwitz6968b052007-02-27 19:02:19 +00001580 count = maxcount;
1581
1582 /* Check for overflow */
1583 /* result_len = count * to_len + self_len; */
1584 product = count * to_len;
1585 if (product / to_len != count) {
1586 PyErr_SetString(PyExc_OverflowError,
1587 "replace string is too long");
1588 return NULL;
1589 }
1590 result_len = product + self_len;
1591 if (result_len < 0) {
1592 PyErr_SetString(PyExc_OverflowError,
1593 "replace string is too long");
1594 return NULL;
1595 }
1596
1597 if (! (result = (PyBytesObject *)
1598 PyBytes_FromStringAndSize(NULL, result_len)) )
1599 return NULL;
1600
1601 self_s = PyBytes_AS_STRING(self);
1602 result_s = PyBytes_AS_STRING(result);
1603
1604 /* TODO: special case single character, which doesn't need memcpy */
1605
1606 /* Lay the first one down (guaranteed this will occur) */
1607 Py_MEMCPY(result_s, to_s, to_len);
1608 result_s += to_len;
1609 count -= 1;
1610
1611 for (i=0; i<count; i++) {
1612 *result_s++ = *self_s++;
1613 Py_MEMCPY(result_s, to_s, to_len);
1614 result_s += to_len;
1615 }
1616
1617 /* Copy the rest of the original string */
1618 Py_MEMCPY(result_s, self_s, self_len-i);
1619
1620 return result;
1621}
1622
1623/* Special case for deleting a single character */
1624/* len(self)>=1, len(from)==1, to="", maxcount>=1 */
1625Py_LOCAL(PyBytesObject *)
1626replace_delete_single_character(PyBytesObject *self,
1627 char from_c, Py_ssize_t maxcount)
1628{
1629 char *self_s, *result_s;
1630 char *start, *next, *end;
1631 Py_ssize_t self_len, result_len;
1632 Py_ssize_t count;
1633 PyBytesObject *result;
1634
1635 self_len = PyBytes_GET_SIZE(self);
1636 self_s = PyBytes_AS_STRING(self);
1637
1638 count = countchar(self_s, self_len, from_c, maxcount);
1639 if (count == 0) {
1640 return return_self(self);
1641 }
1642
1643 result_len = self_len - count; /* from_len == 1 */
1644 assert(result_len>=0);
1645
1646 if ( (result = (PyBytesObject *)
1647 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
1648 return NULL;
1649 result_s = PyBytes_AS_STRING(result);
1650
1651 start = self_s;
1652 end = self_s + self_len;
1653 while (count-- > 0) {
1654 next = findchar(start, end-start, from_c);
1655 if (next == NULL)
1656 break;
1657 Py_MEMCPY(result_s, start, next-start);
1658 result_s += (next-start);
1659 start = next+1;
1660 }
1661 Py_MEMCPY(result_s, start, end-start);
1662
1663 return result;
1664}
1665
1666/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
1667
1668Py_LOCAL(PyBytesObject *)
1669replace_delete_substring(PyBytesObject *self,
1670 const char *from_s, Py_ssize_t from_len,
1671 Py_ssize_t maxcount)
1672{
1673 char *self_s, *result_s;
1674 char *start, *next, *end;
1675 Py_ssize_t self_len, result_len;
1676 Py_ssize_t count, offset;
1677 PyBytesObject *result;
1678
1679 self_len = PyBytes_GET_SIZE(self);
1680 self_s = PyBytes_AS_STRING(self);
1681
1682 count = countstring(self_s, self_len,
1683 from_s, from_len,
1684 0, self_len, 1,
1685 maxcount);
1686
1687 if (count == 0) {
1688 /* no matches */
1689 return return_self(self);
1690 }
1691
1692 result_len = self_len - (count * from_len);
1693 assert (result_len>=0);
1694
1695 if ( (result = (PyBytesObject *)
1696 PyBytes_FromStringAndSize(NULL, result_len)) == NULL )
1697 return NULL;
1698
1699 result_s = PyBytes_AS_STRING(result);
1700
1701 start = self_s;
1702 end = self_s + self_len;
1703 while (count-- > 0) {
1704 offset = findstring(start, end-start,
1705 from_s, from_len,
1706 0, end-start, FORWARD);
1707 if (offset == -1)
1708 break;
1709 next = start + offset;
1710
1711 Py_MEMCPY(result_s, start, next-start);
1712
1713 result_s += (next-start);
1714 start = next+from_len;
1715 }
1716 Py_MEMCPY(result_s, start, end-start);
1717 return result;
1718}
1719
1720/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
1721Py_LOCAL(PyBytesObject *)
1722replace_single_character_in_place(PyBytesObject *self,
1723 char from_c, char to_c,
1724 Py_ssize_t maxcount)
1725{
1726 char *self_s, *result_s, *start, *end, *next;
1727 Py_ssize_t self_len;
1728 PyBytesObject *result;
1729
1730 /* The result string will be the same size */
1731 self_s = PyBytes_AS_STRING(self);
1732 self_len = PyBytes_GET_SIZE(self);
1733
1734 next = findchar(self_s, self_len, from_c);
1735
1736 if (next == NULL) {
1737 /* No matches; return the original bytes */
1738 return return_self(self);
1739 }
1740
1741 /* Need to make a new bytes */
1742 result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, self_len);
1743 if (result == NULL)
1744 return NULL;
1745 result_s = PyBytes_AS_STRING(result);
1746 Py_MEMCPY(result_s, self_s, self_len);
1747
1748 /* change everything in-place, starting with this one */
1749 start = result_s + (next-self_s);
1750 *start = to_c;
1751 start++;
1752 end = result_s + self_len;
1753
1754 while (--maxcount > 0) {
1755 next = findchar(start, end-start, from_c);
1756 if (next == NULL)
1757 break;
1758 *next = to_c;
1759 start = next+1;
1760 }
1761
1762 return result;
1763}
1764
1765/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
1766Py_LOCAL(PyBytesObject *)
1767replace_substring_in_place(PyBytesObject *self,
1768 const char *from_s, Py_ssize_t from_len,
1769 const char *to_s, Py_ssize_t to_len,
1770 Py_ssize_t maxcount)
1771{
1772 char *result_s, *start, *end;
1773 char *self_s;
1774 Py_ssize_t self_len, offset;
1775 PyBytesObject *result;
1776
1777 /* The result bytes will be the same size */
1778
1779 self_s = PyBytes_AS_STRING(self);
1780 self_len = PyBytes_GET_SIZE(self);
1781
1782 offset = findstring(self_s, self_len,
1783 from_s, from_len,
1784 0, self_len, FORWARD);
1785 if (offset == -1) {
1786 /* No matches; return the original bytes */
1787 return return_self(self);
1788 }
1789
1790 /* Need to make a new bytes */
1791 result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, self_len);
1792 if (result == NULL)
1793 return NULL;
1794 result_s = PyBytes_AS_STRING(result);
1795 Py_MEMCPY(result_s, self_s, self_len);
1796
1797 /* change everything in-place, starting with this one */
1798 start = result_s + offset;
1799 Py_MEMCPY(start, to_s, from_len);
1800 start += from_len;
1801 end = result_s + self_len;
1802
1803 while ( --maxcount > 0) {
1804 offset = findstring(start, end-start,
1805 from_s, from_len,
1806 0, end-start, FORWARD);
1807 if (offset==-1)
1808 break;
1809 Py_MEMCPY(start+offset, to_s, from_len);
1810 start += offset+from_len;
1811 }
1812
1813 return result;
1814}
1815
1816/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
1817Py_LOCAL(PyBytesObject *)
1818replace_single_character(PyBytesObject *self,
1819 char from_c,
1820 const char *to_s, Py_ssize_t to_len,
1821 Py_ssize_t maxcount)
1822{
1823 char *self_s, *result_s;
1824 char *start, *next, *end;
1825 Py_ssize_t self_len, result_len;
1826 Py_ssize_t count, product;
1827 PyBytesObject *result;
1828
1829 self_s = PyBytes_AS_STRING(self);
1830 self_len = PyBytes_GET_SIZE(self);
1831
1832 count = countchar(self_s, self_len, from_c, maxcount);
1833 if (count == 0) {
1834 /* no matches, return unchanged */
1835 return return_self(self);
1836 }
1837
1838 /* use the difference between current and new, hence the "-1" */
1839 /* result_len = self_len + count * (to_len-1) */
1840 product = count * (to_len-1);
1841 if (product / (to_len-1) != count) {
1842 PyErr_SetString(PyExc_OverflowError, "replace bytes is too long");
1843 return NULL;
1844 }
1845 result_len = self_len + product;
1846 if (result_len < 0) {
1847 PyErr_SetString(PyExc_OverflowError, "replace bytes is too long");
1848 return NULL;
1849 }
1850
1851 if ( (result = (PyBytesObject *)
1852 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
1853 return NULL;
1854 result_s = PyBytes_AS_STRING(result);
1855
1856 start = self_s;
1857 end = self_s + self_len;
1858 while (count-- > 0) {
1859 next = findchar(start, end-start, from_c);
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +00001860 if (next == NULL)
Neal Norwitz6968b052007-02-27 19:02:19 +00001861 break;
1862
1863 if (next == start) {
1864 /* replace with the 'to' */
1865 Py_MEMCPY(result_s, to_s, to_len);
1866 result_s += to_len;
1867 start += 1;
1868 } else {
1869 /* copy the unchanged old then the 'to' */
1870 Py_MEMCPY(result_s, start, next-start);
1871 result_s += (next-start);
1872 Py_MEMCPY(result_s, to_s, to_len);
1873 result_s += to_len;
1874 start = next+1;
1875 }
1876 }
1877 /* Copy the remainder of the remaining bytes */
1878 Py_MEMCPY(result_s, start, end-start);
1879
1880 return result;
1881}
1882
1883/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
1884Py_LOCAL(PyBytesObject *)
1885replace_substring(PyBytesObject *self,
1886 const char *from_s, Py_ssize_t from_len,
1887 const char *to_s, Py_ssize_t to_len,
1888 Py_ssize_t maxcount)
1889{
1890 char *self_s, *result_s;
1891 char *start, *next, *end;
1892 Py_ssize_t self_len, result_len;
1893 Py_ssize_t count, offset, product;
1894 PyBytesObject *result;
1895
1896 self_s = PyBytes_AS_STRING(self);
1897 self_len = PyBytes_GET_SIZE(self);
1898
1899 count = countstring(self_s, self_len,
1900 from_s, from_len,
1901 0, self_len, FORWARD, maxcount);
1902 if (count == 0) {
1903 /* no matches, return unchanged */
1904 return return_self(self);
1905 }
1906
1907 /* Check for overflow */
1908 /* result_len = self_len + count * (to_len-from_len) */
1909 product = count * (to_len-from_len);
1910 if (product / (to_len-from_len) != count) {
1911 PyErr_SetString(PyExc_OverflowError, "replace bytes is too long");
1912 return NULL;
1913 }
1914 result_len = self_len + product;
1915 if (result_len < 0) {
1916 PyErr_SetString(PyExc_OverflowError, "replace bytes is too long");
1917 return NULL;
1918 }
1919
1920 if ( (result = (PyBytesObject *)
1921 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
1922 return NULL;
1923 result_s = PyBytes_AS_STRING(result);
1924
1925 start = self_s;
1926 end = self_s + self_len;
1927 while (count-- > 0) {
1928 offset = findstring(start, end-start,
1929 from_s, from_len,
1930 0, end-start, FORWARD);
1931 if (offset == -1)
1932 break;
1933 next = start+offset;
1934 if (next == start) {
1935 /* replace with the 'to' */
1936 Py_MEMCPY(result_s, to_s, to_len);
1937 result_s += to_len;
1938 start += from_len;
1939 } else {
1940 /* copy the unchanged old then the 'to' */
1941 Py_MEMCPY(result_s, start, next-start);
1942 result_s += (next-start);
1943 Py_MEMCPY(result_s, to_s, to_len);
1944 result_s += to_len;
1945 start = next+from_len;
1946 }
1947 }
1948 /* Copy the remainder of the remaining bytes */
1949 Py_MEMCPY(result_s, start, end-start);
1950
1951 return result;
1952}
1953
1954
1955Py_LOCAL(PyBytesObject *)
1956replace(PyBytesObject *self,
1957 const char *from_s, Py_ssize_t from_len,
1958 const char *to_s, Py_ssize_t to_len,
1959 Py_ssize_t maxcount)
1960{
1961 if (maxcount < 0) {
1962 maxcount = PY_SSIZE_T_MAX;
1963 } else if (maxcount == 0 || PyBytes_GET_SIZE(self) == 0) {
1964 /* nothing to do; return the original bytes */
1965 return return_self(self);
1966 }
1967
1968 if (maxcount == 0 ||
1969 (from_len == 0 && to_len == 0)) {
1970 /* nothing to do; return the original bytes */
1971 return return_self(self);
1972 }
1973
1974 /* Handle zero-length special cases */
1975
1976 if (from_len == 0) {
1977 /* insert the 'to' bytes everywhere. */
1978 /* >>> "Python".replace("", ".") */
1979 /* '.P.y.t.h.o.n.' */
1980 return replace_interleave(self, to_s, to_len, maxcount);
1981 }
1982
1983 /* Except for "".replace("", "A") == "A" there is no way beyond this */
1984 /* point for an empty self bytes to generate a non-empty bytes */
1985 /* Special case so the remaining code always gets a non-empty bytes */
1986 if (PyBytes_GET_SIZE(self) == 0) {
1987 return return_self(self);
1988 }
1989
1990 if (to_len == 0) {
1991 /* delete all occurances of 'from' bytes */
1992 if (from_len == 1) {
1993 return replace_delete_single_character(
1994 self, from_s[0], maxcount);
1995 } else {
1996 return replace_delete_substring(self, from_s, from_len, maxcount);
1997 }
1998 }
1999
2000 /* Handle special case where both bytes have the same length */
2001
2002 if (from_len == to_len) {
2003 if (from_len == 1) {
2004 return replace_single_character_in_place(
2005 self,
2006 from_s[0],
2007 to_s[0],
2008 maxcount);
2009 } else {
2010 return replace_substring_in_place(
2011 self, from_s, from_len, to_s, to_len, maxcount);
2012 }
2013 }
2014
2015 /* Otherwise use the more generic algorithms */
2016 if (from_len == 1) {
2017 return replace_single_character(self, from_s[0],
2018 to_s, to_len, maxcount);
2019 } else {
2020 /* len('from')>=2, len('to')>=1 */
2021 return replace_substring(self, from_s, from_len, to_s, to_len, maxcount);
2022 }
2023}
2024
2025PyDoc_STRVAR(replace__doc__,
2026"B.replace (old, new[, count]) -> bytes\n\
2027\n\
2028Return a copy of bytes B with all occurrences of subsection\n\
2029old replaced by new. If the optional argument count is\n\
2030given, only the first count occurrences are replaced.");
2031
2032static PyObject *
2033bytes_replace(PyBytesObject *self, PyObject *args)
2034{
2035 Py_ssize_t count = -1;
Travis E. Oliphantb99f7622007-08-18 11:21:56 +00002036 PyObject *from, *to, *res;
Travis E. Oliphantb99f7622007-08-18 11:21:56 +00002037 PyBuffer vfrom, vto;
Neal Norwitz6968b052007-02-27 19:02:19 +00002038
2039 if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
2040 return NULL;
2041
Guido van Rossuma74184e2007-08-29 04:05:57 +00002042 if (_getbuffer(from, &vfrom) < 0)
2043 return NULL;
2044 if (_getbuffer(to, &vto) < 0) {
2045 PyObject_ReleaseBuffer(from, &vfrom);
2046 return NULL;
Travis E. Oliphantb99f7622007-08-18 11:21:56 +00002047 }
Neal Norwitz6968b052007-02-27 19:02:19 +00002048
Travis E. Oliphantb99f7622007-08-18 11:21:56 +00002049 res = (PyObject *)replace((PyBytesObject *) self,
Guido van Rossuma74184e2007-08-29 04:05:57 +00002050 vfrom.buf, vfrom.len,
2051 vto.buf, vto.len, count);
Travis E. Oliphantb99f7622007-08-18 11:21:56 +00002052
Guido van Rossuma74184e2007-08-29 04:05:57 +00002053 PyObject_ReleaseBuffer(from, &vfrom);
2054 PyObject_ReleaseBuffer(to, &vto);
Travis E. Oliphantb99f7622007-08-18 11:21:56 +00002055 return res;
Neal Norwitz6968b052007-02-27 19:02:19 +00002056}
2057
2058
2059/* Overallocate the initial list to reduce the number of reallocs for small
2060 split sizes. Eg, "A A A A A A A A A A".split() (10 elements) has three
2061 resizes, to sizes 4, 8, then 16. Most observed string splits are for human
2062 text (roughly 11 words per line) and field delimited data (usually 1-10
2063 fields). For large strings the split algorithms are bandwidth limited
2064 so increasing the preallocation likely will not improve things.*/
2065
2066#define MAX_PREALLOC 12
2067
2068/* 5 splits gives 6 elements */
2069#define PREALLOC_SIZE(maxsplit) \
2070 (maxsplit >= MAX_PREALLOC ? MAX_PREALLOC : maxsplit+1)
2071
2072#define SPLIT_APPEND(data, left, right) \
2073 str = PyBytes_FromStringAndSize((data) + (left), \
2074 (right) - (left)); \
2075 if (str == NULL) \
2076 goto onError; \
2077 if (PyList_Append(list, str)) { \
2078 Py_DECREF(str); \
2079 goto onError; \
2080 } \
2081 else \
2082 Py_DECREF(str);
2083
2084#define SPLIT_ADD(data, left, right) { \
2085 str = PyBytes_FromStringAndSize((data) + (left), \
2086 (right) - (left)); \
2087 if (str == NULL) \
2088 goto onError; \
2089 if (count < MAX_PREALLOC) { \
2090 PyList_SET_ITEM(list, count, str); \
2091 } else { \
2092 if (PyList_Append(list, str)) { \
2093 Py_DECREF(str); \
2094 goto onError; \
2095 } \
2096 else \
2097 Py_DECREF(str); \
2098 } \
2099 count++; }
2100
2101/* Always force the list to the expected size. */
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002102#define FIX_PREALLOC_SIZE(list) Py_Size(list) = count
Neal Norwitz6968b052007-02-27 19:02:19 +00002103
2104
2105Py_LOCAL_INLINE(PyObject *)
2106split_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount)
2107{
2108 register Py_ssize_t i, j, count=0;
2109 PyObject *str;
2110 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
2111
2112 if (list == NULL)
2113 return NULL;
2114
2115 i = j = 0;
2116 while ((j < len) && (maxcount-- > 0)) {
2117 for(; j<len; j++) {
2118 /* I found that using memchr makes no difference */
2119 if (s[j] == ch) {
2120 SPLIT_ADD(s, i, j);
2121 i = j = j + 1;
2122 break;
2123 }
2124 }
2125 }
2126 if (i <= len) {
2127 SPLIT_ADD(s, i, len);
2128 }
2129 FIX_PREALLOC_SIZE(list);
2130 return list;
2131
2132 onError:
2133 Py_DECREF(list);
2134 return NULL;
2135}
2136
2137PyDoc_STRVAR(split__doc__,
2138"B.split(sep [,maxsplit]) -> list of bytes\n\
2139\n\
2140Return a list of the bytes in the string B, using sep as the\n\
2141delimiter. If maxsplit is given, at most maxsplit\n\
2142splits are done.");
2143
2144static PyObject *
2145bytes_split(PyBytesObject *self, PyObject *args)
2146{
2147 Py_ssize_t len = PyBytes_GET_SIZE(self), n, i, j;
2148 Py_ssize_t maxsplit = -1, count=0;
2149 const char *s = PyBytes_AS_STRING(self), *sub;
2150 PyObject *list, *str, *subobj;
2151#ifdef USE_FAST
2152 Py_ssize_t pos;
2153#endif
2154
2155 if (!PyArg_ParseTuple(args, "O|n:split", &subobj, &maxsplit))
2156 return NULL;
2157 if (maxsplit < 0)
2158 maxsplit = PY_SSIZE_T_MAX;
2159 if (PyBytes_Check(subobj)) {
2160 sub = PyBytes_AS_STRING(subobj);
2161 n = PyBytes_GET_SIZE(subobj);
2162 }
Travis E. Oliphantb99f7622007-08-18 11:21:56 +00002163 /* XXX -> use the modern buffer interface */
Neal Norwitz6968b052007-02-27 19:02:19 +00002164 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
2165 return NULL;
2166
2167 if (n == 0) {
2168 PyErr_SetString(PyExc_ValueError, "empty separator");
2169 return NULL;
2170 }
2171 else if (n == 1)
2172 return split_char(s, len, sub[0], maxsplit);
2173
2174 list = PyList_New(PREALLOC_SIZE(maxsplit));
2175 if (list == NULL)
2176 return NULL;
2177
2178#ifdef USE_FAST
2179 i = j = 0;
2180 while (maxsplit-- > 0) {
2181 pos = fastsearch(s+i, len-i, sub, n, FAST_SEARCH);
2182 if (pos < 0)
2183 break;
2184 j = i+pos;
2185 SPLIT_ADD(s, i, j);
2186 i = j + n;
2187 }
2188#else
2189 i = j = 0;
2190 while ((j+n <= len) && (maxsplit-- > 0)) {
2191 for (; j+n <= len; j++) {
2192 if (Py_STRING_MATCH(s, j, sub, n)) {
2193 SPLIT_ADD(s, i, j);
2194 i = j = j + n;
2195 break;
2196 }
2197 }
2198 }
2199#endif
2200 SPLIT_ADD(s, i, len);
2201 FIX_PREALLOC_SIZE(list);
2202 return list;
2203
2204 onError:
2205 Py_DECREF(list);
2206 return NULL;
2207}
2208
2209PyDoc_STRVAR(partition__doc__,
2210"B.partition(sep) -> (head, sep, tail)\n\
2211\n\
2212Searches for the separator sep in B, and returns the part before it,\n\
2213the separator itself, and the part after it. If the separator is not\n\
2214found, returns B and two empty bytes.");
2215
2216static PyObject *
2217bytes_partition(PyBytesObject *self, PyObject *sep_obj)
2218{
2219 PyObject *bytesep, *result;
2220
2221 bytesep = PyBytes_FromObject(sep_obj);
2222 if (! bytesep)
2223 return NULL;
2224
2225 result = stringlib_partition(
2226 (PyObject*) self,
2227 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +00002228 bytesep,
Neal Norwitz6968b052007-02-27 19:02:19 +00002229 PyBytes_AS_STRING(bytesep), PyBytes_GET_SIZE(bytesep)
2230 );
2231
2232 Py_DECREF(bytesep);
2233 return result;
2234}
2235
2236PyDoc_STRVAR(rpartition__doc__,
2237"B.rpartition(sep) -> (tail, sep, head)\n\
2238\n\
2239Searches for the separator sep in B, starting at the end of B, and returns\n\
2240the part before it, the separator itself, and the part after it. If the\n\
2241separator is not found, returns two empty bytes and B.");
2242
2243static PyObject *
2244bytes_rpartition(PyBytesObject *self, PyObject *sep_obj)
2245{
2246 PyObject *bytesep, *result;
2247
2248 bytesep = PyBytes_FromObject(sep_obj);
2249 if (! bytesep)
2250 return NULL;
2251
2252 result = stringlib_rpartition(
2253 (PyObject*) self,
2254 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +00002255 bytesep,
Neal Norwitz6968b052007-02-27 19:02:19 +00002256 PyBytes_AS_STRING(bytesep), PyBytes_GET_SIZE(bytesep)
2257 );
2258
2259 Py_DECREF(bytesep);
2260 return result;
2261}
2262
2263Py_LOCAL_INLINE(PyObject *)
2264rsplit_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount)
2265{
2266 register Py_ssize_t i, j, count=0;
2267 PyObject *str;
2268 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
2269
2270 if (list == NULL)
2271 return NULL;
2272
2273 i = j = len - 1;
2274 while ((i >= 0) && (maxcount-- > 0)) {
2275 for (; i >= 0; i--) {
2276 if (s[i] == ch) {
2277 SPLIT_ADD(s, i + 1, j + 1);
2278 j = i = i - 1;
2279 break;
2280 }
2281 }
2282 }
2283 if (j >= -1) {
2284 SPLIT_ADD(s, 0, j + 1);
2285 }
2286 FIX_PREALLOC_SIZE(list);
2287 if (PyList_Reverse(list) < 0)
2288 goto onError;
2289
2290 return list;
2291
2292 onError:
2293 Py_DECREF(list);
2294 return NULL;
2295}
2296
2297PyDoc_STRVAR(rsplit__doc__,
2298"B.rsplit(sep [,maxsplit]) -> list of bytes\n\
2299\n\
2300Return a list of the sections in the byte B, using sep as the\n\
2301delimiter, starting at the end of the bytes and working\n\
2302to the front. If maxsplit is given, at most maxsplit splits are\n\
2303done.");
2304
2305static PyObject *
2306bytes_rsplit(PyBytesObject *self, PyObject *args)
2307{
2308 Py_ssize_t len = PyBytes_GET_SIZE(self), n, i, j;
2309 Py_ssize_t maxsplit = -1, count=0;
2310 const char *s = PyBytes_AS_STRING(self), *sub;
2311 PyObject *list, *str, *subobj;
2312
2313 if (!PyArg_ParseTuple(args, "O|n:rsplit", &subobj, &maxsplit))
2314 return NULL;
2315 if (maxsplit < 0)
2316 maxsplit = PY_SSIZE_T_MAX;
2317 if (PyBytes_Check(subobj)) {
2318 sub = PyBytes_AS_STRING(subobj);
2319 n = PyBytes_GET_SIZE(subobj);
2320 }
Travis E. Oliphantb99f7622007-08-18 11:21:56 +00002321 /* XXX -> Use the modern buffer interface */
Neal Norwitz6968b052007-02-27 19:02:19 +00002322 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
2323 return NULL;
2324
2325 if (n == 0) {
2326 PyErr_SetString(PyExc_ValueError, "empty separator");
2327 return NULL;
2328 }
2329 else if (n == 1)
2330 return rsplit_char(s, len, sub[0], maxsplit);
2331
2332 list = PyList_New(PREALLOC_SIZE(maxsplit));
2333 if (list == NULL)
2334 return NULL;
2335
2336 j = len;
2337 i = j - n;
2338
2339 while ( (i >= 0) && (maxsplit-- > 0) ) {
2340 for (; i>=0; i--) {
2341 if (Py_STRING_MATCH(s, i, sub, n)) {
2342 SPLIT_ADD(s, i + n, j);
2343 j = i;
2344 i -= n;
2345 break;
2346 }
2347 }
2348 }
2349 SPLIT_ADD(s, 0, j);
2350 FIX_PREALLOC_SIZE(list);
2351 if (PyList_Reverse(list) < 0)
2352 goto onError;
2353 return list;
2354
2355onError:
2356 Py_DECREF(list);
2357 return NULL;
2358}
2359
2360PyDoc_STRVAR(extend__doc__,
2361"B.extend(iterable int) -> None\n\
2362\n\
2363Append all the elements from the iterator or sequence to the\n\
2364end of the bytes.");
2365static PyObject *
2366bytes_extend(PyBytesObject *self, PyObject *arg)
2367{
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002368 if (bytes_setslice(self, Py_Size(self), Py_Size(self), arg) == -1)
Neal Norwitz6968b052007-02-27 19:02:19 +00002369 return NULL;
2370 Py_RETURN_NONE;
2371}
2372
2373
2374PyDoc_STRVAR(reverse__doc__,
2375"B.reverse() -> None\n\
2376\n\
2377Reverse the order of the values in bytes in place.");
2378static PyObject *
2379bytes_reverse(PyBytesObject *self, PyObject *unused)
2380{
2381 char swap, *head, *tail;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002382 Py_ssize_t i, j, n = Py_Size(self);
Neal Norwitz6968b052007-02-27 19:02:19 +00002383
2384 j = n / 2;
2385 head = self->ob_bytes;
2386 tail = head + n - 1;
2387 for (i = 0; i < j; i++) {
2388 swap = *head;
2389 *head++ = *tail;
2390 *tail-- = swap;
2391 }
2392
2393 Py_RETURN_NONE;
2394}
2395
2396PyDoc_STRVAR(insert__doc__,
2397"B.insert(index, int) -> None\n\
2398\n\
2399Insert a single item into the bytes before the given index.");
2400static PyObject *
2401bytes_insert(PyBytesObject *self, PyObject *args)
2402{
2403 int value;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002404 Py_ssize_t where, n = Py_Size(self);
Neal Norwitz6968b052007-02-27 19:02:19 +00002405
2406 if (!PyArg_ParseTuple(args, "ni:insert", &where, &value))
2407 return NULL;
2408
2409 if (n == PY_SSIZE_T_MAX) {
2410 PyErr_SetString(PyExc_OverflowError,
2411 "cannot add more objects to bytes");
2412 return NULL;
2413 }
2414 if (value < 0 || value >= 256) {
2415 PyErr_SetString(PyExc_ValueError,
2416 "byte must be in range(0, 256)");
2417 return NULL;
2418 }
2419 if (PyBytes_Resize((PyObject *)self, n + 1) < 0)
2420 return NULL;
2421
2422 if (where < 0) {
2423 where += n;
2424 if (where < 0)
2425 where = 0;
2426 }
2427 if (where > n)
2428 where = n;
Guido van Rossum4fc8ae42007-02-27 20:57:45 +00002429 memmove(self->ob_bytes + where + 1, self->ob_bytes + where, n - where);
Neal Norwitz6968b052007-02-27 19:02:19 +00002430 self->ob_bytes[where] = value;
2431
2432 Py_RETURN_NONE;
2433}
2434
2435PyDoc_STRVAR(append__doc__,
2436"B.append(int) -> None\n\
2437\n\
2438Append a single item to the end of the bytes.");
2439static PyObject *
2440bytes_append(PyBytesObject *self, PyObject *arg)
2441{
2442 int value;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002443 Py_ssize_t n = Py_Size(self);
Neal Norwitz6968b052007-02-27 19:02:19 +00002444
2445 if (! _getbytevalue(arg, &value))
2446 return NULL;
2447 if (n == PY_SSIZE_T_MAX) {
2448 PyErr_SetString(PyExc_OverflowError,
2449 "cannot add more objects to bytes");
2450 return NULL;
2451 }
2452 if (PyBytes_Resize((PyObject *)self, n + 1) < 0)
2453 return NULL;
2454
2455 self->ob_bytes[n] = value;
2456
2457 Py_RETURN_NONE;
2458}
2459
2460PyDoc_STRVAR(pop__doc__,
2461"B.pop([index]) -> int\n\
2462\n\
2463Remove and return a single item from the bytes. If no index\n\
2464argument is give, will pop the last value.");
2465static PyObject *
2466bytes_pop(PyBytesObject *self, PyObject *args)
2467{
2468 int value;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002469 Py_ssize_t where = -1, n = Py_Size(self);
Neal Norwitz6968b052007-02-27 19:02:19 +00002470
2471 if (!PyArg_ParseTuple(args, "|n:pop", &where))
2472 return NULL;
2473
2474 if (n == 0) {
2475 PyErr_SetString(PyExc_OverflowError,
2476 "cannot pop an empty bytes");
2477 return NULL;
2478 }
2479 if (where < 0)
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002480 where += Py_Size(self);
2481 if (where < 0 || where >= Py_Size(self)) {
Neal Norwitz6968b052007-02-27 19:02:19 +00002482 PyErr_SetString(PyExc_IndexError, "pop index out of range");
2483 return NULL;
2484 }
2485
2486 value = self->ob_bytes[where];
2487 memmove(self->ob_bytes + where, self->ob_bytes + where + 1, n - where);
2488 if (PyBytes_Resize((PyObject *)self, n - 1) < 0)
2489 return NULL;
2490
2491 return PyInt_FromLong(value);
2492}
2493
2494PyDoc_STRVAR(remove__doc__,
2495"B.remove(int) -> None\n\
2496\n\
2497Remove the first occurance of a value in bytes");
2498static PyObject *
2499bytes_remove(PyBytesObject *self, PyObject *arg)
2500{
2501 int value;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002502 Py_ssize_t where, n = Py_Size(self);
Neal Norwitz6968b052007-02-27 19:02:19 +00002503
2504 if (! _getbytevalue(arg, &value))
2505 return NULL;
2506
2507 for (where = 0; where < n; where++) {
2508 if (self->ob_bytes[where] == value)
2509 break;
2510 }
2511 if (where == n) {
2512 PyErr_SetString(PyExc_ValueError, "value not found in bytes");
2513 return NULL;
2514 }
2515
2516 memmove(self->ob_bytes + where, self->ob_bytes + where + 1, n - where);
2517 if (PyBytes_Resize((PyObject *)self, n - 1) < 0)
2518 return NULL;
2519
2520 Py_RETURN_NONE;
2521}
2522
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002523/* XXX These two helpers could be optimized if argsize == 1 */
2524
Neal Norwitz2bad9702007-08-27 06:19:22 +00002525static Py_ssize_t
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002526lstrip_helper(unsigned char *myptr, Py_ssize_t mysize,
2527 void *argptr, Py_ssize_t argsize)
2528{
2529 Py_ssize_t i = 0;
2530 while (i < mysize && memchr(argptr, myptr[i], argsize))
2531 i++;
2532 return i;
2533}
2534
Neal Norwitz2bad9702007-08-27 06:19:22 +00002535static Py_ssize_t
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002536rstrip_helper(unsigned char *myptr, Py_ssize_t mysize,
2537 void *argptr, Py_ssize_t argsize)
2538{
2539 Py_ssize_t i = mysize - 1;
2540 while (i >= 0 && memchr(argptr, myptr[i], argsize))
2541 i--;
2542 return i + 1;
2543}
2544
2545PyDoc_STRVAR(strip__doc__,
2546"B.strip(bytes) -> bytes\n\
2547\n\
2548Strip leading and trailing bytes contained in the argument.");
2549static PyObject *
2550bytes_strip(PyBytesObject *self, PyObject *arg)
2551{
2552 Py_ssize_t left, right, mysize, argsize;
2553 void *myptr, *argptr;
2554 if (arg == NULL || !PyBytes_Check(arg)) {
2555 PyErr_SetString(PyExc_TypeError, "strip() requires a bytes argument");
2556 return NULL;
2557 }
2558 myptr = self->ob_bytes;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002559 mysize = Py_Size(self);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002560 argptr = ((PyBytesObject *)arg)->ob_bytes;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002561 argsize = Py_Size(arg);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002562 left = lstrip_helper(myptr, mysize, argptr, argsize);
Guido van Rossumeb29e9a2007-08-08 21:55:33 +00002563 if (left == mysize)
2564 right = left;
2565 else
2566 right = rstrip_helper(myptr, mysize, argptr, argsize);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002567 return PyBytes_FromStringAndSize(self->ob_bytes + left, right - left);
2568}
2569
2570PyDoc_STRVAR(lstrip__doc__,
2571"B.lstrip(bytes) -> bytes\n\
2572\n\
2573Strip leading bytes contained in the argument.");
2574static PyObject *
2575bytes_lstrip(PyBytesObject *self, PyObject *arg)
2576{
2577 Py_ssize_t left, right, mysize, argsize;
2578 void *myptr, *argptr;
2579 if (arg == NULL || !PyBytes_Check(arg)) {
2580 PyErr_SetString(PyExc_TypeError, "strip() requires a bytes argument");
2581 return NULL;
2582 }
2583 myptr = self->ob_bytes;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002584 mysize = Py_Size(self);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002585 argptr = ((PyBytesObject *)arg)->ob_bytes;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002586 argsize = Py_Size(arg);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002587 left = lstrip_helper(myptr, mysize, argptr, argsize);
2588 right = mysize;
2589 return PyBytes_FromStringAndSize(self->ob_bytes + left, right - left);
2590}
2591
2592PyDoc_STRVAR(rstrip__doc__,
2593"B.rstrip(bytes) -> bytes\n\
2594\n\
2595Strip trailing bytes contained in the argument.");
2596static PyObject *
2597bytes_rstrip(PyBytesObject *self, PyObject *arg)
2598{
2599 Py_ssize_t left, right, mysize, argsize;
2600 void *myptr, *argptr;
2601 if (arg == NULL || !PyBytes_Check(arg)) {
2602 PyErr_SetString(PyExc_TypeError, "strip() requires a bytes argument");
2603 return NULL;
2604 }
2605 myptr = self->ob_bytes;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002606 mysize = Py_Size(self);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002607 argptr = ((PyBytesObject *)arg)->ob_bytes;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002608 argsize = Py_Size(arg);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002609 left = 0;
2610 right = rstrip_helper(myptr, mysize, argptr, argsize);
2611 return PyBytes_FromStringAndSize(self->ob_bytes + left, right - left);
2612}
Neal Norwitz6968b052007-02-27 19:02:19 +00002613
Guido van Rossumd624f182006-04-24 13:47:05 +00002614PyDoc_STRVAR(decode_doc,
2615"B.decode([encoding[,errors]]) -> unicode obect.\n\
2616\n\
2617Decodes B using the codec registered for encoding. encoding defaults\n\
2618to the default encoding. errors may be given to set a different error\n\
2619handling scheme. Default is 'strict' meaning that encoding errors raise\n\
2620a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
2621as well as any other name registerd with codecs.register_error that is\n\
2622able to handle UnicodeDecodeErrors.");
2623
2624static PyObject *
2625bytes_decode(PyObject *self, PyObject *args)
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +00002626{
Guido van Rossumd624f182006-04-24 13:47:05 +00002627 const char *encoding = NULL;
2628 const char *errors = NULL;
2629
2630 if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
2631 return NULL;
2632 if (encoding == NULL)
2633 encoding = PyUnicode_GetDefaultEncoding();
2634 return PyCodec_Decode(self, encoding, errors);
2635}
2636
Guido van Rossuma0867f72006-05-05 04:34:18 +00002637PyDoc_STRVAR(alloc_doc,
2638"B.__alloc__() -> int\n\
2639\n\
2640Returns the number of bytes actually allocated.");
2641
2642static PyObject *
2643bytes_alloc(PyBytesObject *self)
2644{
2645 return PyInt_FromSsize_t(self->ob_alloc);
2646}
2647
Guido van Rossum20188312006-05-05 15:15:40 +00002648PyDoc_STRVAR(join_doc,
Guido van Rossumcd6ae682007-05-09 19:52:16 +00002649"B.join(iterable_of_bytes) -> bytes\n\
Guido van Rossum20188312006-05-05 15:15:40 +00002650\n\
Guido van Rossumcd6ae682007-05-09 19:52:16 +00002651Concatenates any number of bytes objects, with B in between each pair.\n\
2652Example: b'.'.join([b'ab', b'pq', b'rs']) -> b'ab.pq.rs'.");
Guido van Rossum20188312006-05-05 15:15:40 +00002653
2654static PyObject *
Guido van Rossumcd6ae682007-05-09 19:52:16 +00002655bytes_join(PyBytesObject *self, PyObject *it)
Guido van Rossum20188312006-05-05 15:15:40 +00002656{
2657 PyObject *seq;
Martin v. Löwis5d7428b2007-07-21 18:47:48 +00002658 Py_ssize_t mysize = Py_Size(self);
Guido van Rossum20188312006-05-05 15:15:40 +00002659 Py_ssize_t i;
2660 Py_ssize_t n;
2661 PyObject **items;
2662 Py_ssize_t totalsize = 0;
2663 PyObject *result;
2664 char *dest;
2665
2666 seq = PySequence_Fast(it, "can only join an iterable");
2667 if (seq == NULL)
Georg Brandlb3f568f2007-02-27 08:49:18 +00002668 return NULL;
Guido van Rossum20188312006-05-05 15:15:40 +00002669 n = PySequence_Fast_GET_SIZE(seq);
2670 items = PySequence_Fast_ITEMS(seq);
2671
2672 /* Compute the total size, and check that they are all bytes */
2673 for (i = 0; i < n; i++) {
Georg Brandlb3f568f2007-02-27 08:49:18 +00002674 PyObject *obj = items[i];
2675 if (!PyBytes_Check(obj)) {
2676 PyErr_Format(PyExc_TypeError,
2677 "can only join an iterable of bytes "
2678 "(item %ld has type '%.100s')",
Guido van Rossum3cf5b1e2006-07-27 21:53:35 +00002679 /* XXX %ld isn't right on Win64 */
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002680 (long)i, Py_Type(obj)->tp_name);
Georg Brandlb3f568f2007-02-27 08:49:18 +00002681 goto error;
2682 }
Guido van Rossumcd6ae682007-05-09 19:52:16 +00002683 if (i > 0)
2684 totalsize += mysize;
Georg Brandlb3f568f2007-02-27 08:49:18 +00002685 totalsize += PyBytes_GET_SIZE(obj);
2686 if (totalsize < 0) {
2687 PyErr_NoMemory();
2688 goto error;
2689 }
Guido van Rossum20188312006-05-05 15:15:40 +00002690 }
2691
2692 /* Allocate the result, and copy the bytes */
2693 result = PyBytes_FromStringAndSize(NULL, totalsize);
2694 if (result == NULL)
Georg Brandlb3f568f2007-02-27 08:49:18 +00002695 goto error;
Guido van Rossum20188312006-05-05 15:15:40 +00002696 dest = PyBytes_AS_STRING(result);
2697 for (i = 0; i < n; i++) {
Georg Brandlb3f568f2007-02-27 08:49:18 +00002698 PyObject *obj = items[i];
2699 Py_ssize_t size = PyBytes_GET_SIZE(obj);
Guido van Rossumcd6ae682007-05-09 19:52:16 +00002700 if (i > 0) {
2701 memcpy(dest, self->ob_bytes, mysize);
2702 dest += mysize;
2703 }
Georg Brandlb3f568f2007-02-27 08:49:18 +00002704 memcpy(dest, PyBytes_AS_STRING(obj), size);
2705 dest += size;
Guido van Rossum20188312006-05-05 15:15:40 +00002706 }
2707
2708 /* Done */
2709 Py_DECREF(seq);
2710 return result;
2711
2712 /* Error handling */
2713 error:
2714 Py_DECREF(seq);
2715 return NULL;
2716}
2717
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002718PyDoc_STRVAR(fromhex_doc,
2719"bytes.fromhex(string) -> bytes\n\
2720\n\
2721Create a bytes object from a string of hexadecimal numbers.\n\
2722Spaces between two numbers are accepted. Example:\n\
2723bytes.fromhex('10 2030') -> bytes([0x10, 0x20, 0x30]).");
2724
2725static int
2726hex_digit_to_int(int c)
2727{
Georg Brandlb3f568f2007-02-27 08:49:18 +00002728 if (isdigit(c))
2729 return c - '0';
2730 else {
2731 if (isupper(c))
2732 c = tolower(c);
2733 if (c >= 'a' && c <= 'f')
2734 return c - 'a' + 10;
2735 }
2736 return -1;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002737}
2738
2739static PyObject *
2740bytes_fromhex(PyObject *cls, PyObject *args)
2741{
2742 PyObject *newbytes;
2743 char *hex, *buf;
2744 Py_ssize_t len, byteslen, i, j;
2745 int top, bot;
2746
2747 if (!PyArg_ParseTuple(args, "s#:fromhex", &hex, &len))
2748 return NULL;
2749
2750 byteslen = len / 2; /* max length if there are no spaces */
2751
2752 newbytes = PyBytes_FromStringAndSize(NULL, byteslen);
2753 if (!newbytes)
2754 return NULL;
2755 buf = PyBytes_AS_STRING(newbytes);
2756
Guido van Rossum4355a472007-05-04 05:00:04 +00002757 for (i = j = 0; i < len; i += 2) {
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002758 /* skip over spaces in the input */
2759 while (Py_CHARMASK(hex[i]) == ' ')
2760 i++;
2761 if (i >= len)
2762 break;
2763 top = hex_digit_to_int(Py_CHARMASK(hex[i]));
2764 bot = hex_digit_to_int(Py_CHARMASK(hex[i+1]));
2765 if (top == -1 || bot == -1) {
2766 PyErr_Format(PyExc_ValueError,
2767 "non-hexadecimal number string '%c%c' found in "
2768 "fromhex() arg at position %zd",
2769 hex[i], hex[i+1], i);
2770 goto error;
2771 }
2772 buf[j++] = (top << 4) + bot;
2773 }
2774 if (PyBytes_Resize(newbytes, j) < 0)
2775 goto error;
2776 return newbytes;
2777
2778 error:
2779 Py_DECREF(newbytes);
2780 return NULL;
2781}
2782
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002783PyDoc_STRVAR(reduce_doc, "Return state information for pickling.");
2784
2785static PyObject *
2786bytes_reduce(PyBytesObject *self)
2787{
Martin v. Löwis9c121062007-08-05 20:26:11 +00002788 PyObject *latin1;
2789 if (self->ob_bytes)
Guido van Rossuma74184e2007-08-29 04:05:57 +00002790 latin1 = PyUnicode_DecodeLatin1(self->ob_bytes,
2791 Py_Size(self), NULL);
Martin v. Löwis9c121062007-08-05 20:26:11 +00002792 else
Guido van Rossuma74184e2007-08-29 04:05:57 +00002793 latin1 = PyUnicode_FromString("");
Martin v. Löwis9c121062007-08-05 20:26:11 +00002794 return Py_BuildValue("(O(Ns))", Py_Type(self), latin1, "latin-1");
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002795}
2796
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002797static PySequenceMethods bytes_as_sequence = {
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002798 (lenfunc)bytes_length, /* sq_length */
2799 (binaryfunc)bytes_concat, /* sq_concat */
2800 (ssizeargfunc)bytes_repeat, /* sq_repeat */
2801 (ssizeargfunc)bytes_getitem, /* sq_item */
2802 0, /* sq_slice */
2803 (ssizeobjargproc)bytes_setitem, /* sq_ass_item */
2804 0, /* sq_ass_slice */
Guido van Rossumd624f182006-04-24 13:47:05 +00002805 (objobjproc)bytes_contains, /* sq_contains */
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002806 (binaryfunc)bytes_iconcat, /* sq_inplace_concat */
2807 (ssizeargfunc)bytes_irepeat, /* sq_inplace_repeat */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002808};
2809
2810static PyMappingMethods bytes_as_mapping = {
Guido van Rossumd624f182006-04-24 13:47:05 +00002811 (lenfunc)bytes_length,
Thomas Wouters376446d2006-12-19 08:30:14 +00002812 (binaryfunc)bytes_subscript,
2813 (objobjargproc)bytes_ass_subscript,
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002814};
2815
2816static PyBufferProcs bytes_as_buffer = {
Travis E. Oliphantb99f7622007-08-18 11:21:56 +00002817 (getbufferproc)bytes_getbuffer,
2818 (releasebufferproc)bytes_releasebuffer,
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002819};
2820
2821static PyMethodDef
2822bytes_methods[] = {
Neal Norwitz6968b052007-02-27 19:02:19 +00002823 {"find", (PyCFunction)bytes_find, METH_VARARGS, find__doc__},
2824 {"count", (PyCFunction)bytes_count, METH_VARARGS, count__doc__},
2825 {"index", (PyCFunction)bytes_index, METH_VARARGS, index__doc__},
2826 {"rfind", (PyCFunction)bytes_rfind, METH_VARARGS, rfind__doc__},
2827 {"rindex", (PyCFunction)bytes_rindex, METH_VARARGS, rindex__doc__},
2828 {"endswith", (PyCFunction)bytes_endswith, METH_VARARGS, endswith__doc__},
2829 {"startswith", (PyCFunction)bytes_startswith, METH_VARARGS,
2830 startswith__doc__},
2831 {"replace", (PyCFunction)bytes_replace, METH_VARARGS, replace__doc__},
2832 {"translate", (PyCFunction)bytes_translate, METH_VARARGS, translate__doc__},
2833 {"partition", (PyCFunction)bytes_partition, METH_O, partition__doc__},
2834 {"rpartition", (PyCFunction)bytes_rpartition, METH_O, rpartition__doc__},
2835 {"split", (PyCFunction)bytes_split, METH_VARARGS, split__doc__},
2836 {"rsplit", (PyCFunction)bytes_rsplit, METH_VARARGS, rsplit__doc__},
2837 {"extend", (PyCFunction)bytes_extend, METH_O, extend__doc__},
2838 {"insert", (PyCFunction)bytes_insert, METH_VARARGS, insert__doc__},
2839 {"append", (PyCFunction)bytes_append, METH_O, append__doc__},
2840 {"reverse", (PyCFunction)bytes_reverse, METH_NOARGS, reverse__doc__},
2841 {"pop", (PyCFunction)bytes_pop, METH_VARARGS, pop__doc__},
2842 {"remove", (PyCFunction)bytes_remove, METH_O, remove__doc__},
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002843 {"strip", (PyCFunction)bytes_strip, METH_O, strip__doc__},
2844 {"lstrip", (PyCFunction)bytes_lstrip, METH_O, lstrip__doc__},
2845 {"rstrip", (PyCFunction)bytes_rstrip, METH_O, rstrip__doc__},
Guido van Rossumd624f182006-04-24 13:47:05 +00002846 {"decode", (PyCFunction)bytes_decode, METH_VARARGS, decode_doc},
Guido van Rossuma0867f72006-05-05 04:34:18 +00002847 {"__alloc__", (PyCFunction)bytes_alloc, METH_NOARGS, alloc_doc},
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002848 {"fromhex", (PyCFunction)bytes_fromhex, METH_VARARGS|METH_CLASS,
2849 fromhex_doc},
Guido van Rossumcd6ae682007-05-09 19:52:16 +00002850 {"join", (PyCFunction)bytes_join, METH_O, join_doc},
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002851 {"__reduce__", (PyCFunction)bytes_reduce, METH_NOARGS, reduce_doc},
Guido van Rossuma0867f72006-05-05 04:34:18 +00002852 {NULL}
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002853};
2854
2855PyDoc_STRVAR(bytes_doc,
2856"bytes([iterable]) -> new array of bytes.\n\
2857\n\
2858If an argument is given it must be an iterable yielding ints in range(256).");
2859
2860PyTypeObject PyBytes_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002861 PyVarObject_HEAD_INIT(&PyType_Type, 0)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002862 "bytes",
2863 sizeof(PyBytesObject),
2864 0,
Guido van Rossumd624f182006-04-24 13:47:05 +00002865 (destructor)bytes_dealloc, /* tp_dealloc */
2866 0, /* tp_print */
2867 0, /* tp_getattr */
2868 0, /* tp_setattr */
2869 0, /* tp_compare */
2870 (reprfunc)bytes_repr, /* tp_repr */
2871 0, /* tp_as_number */
2872 &bytes_as_sequence, /* tp_as_sequence */
2873 &bytes_as_mapping, /* tp_as_mapping */
Georg Brandlb3f568f2007-02-27 08:49:18 +00002874 0, /* tp_hash */
Guido van Rossumd624f182006-04-24 13:47:05 +00002875 0, /* tp_call */
2876 (reprfunc)bytes_str, /* tp_str */
2877 PyObject_GenericGetAttr, /* tp_getattro */
2878 0, /* tp_setattro */
2879 &bytes_as_buffer, /* tp_as_buffer */
Georg Brandlb3f568f2007-02-27 08:49:18 +00002880 /* bytes is 'final' or 'sealed' */
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002881 Py_TPFLAGS_DEFAULT, /* tp_flags */
Guido van Rossumd624f182006-04-24 13:47:05 +00002882 bytes_doc, /* tp_doc */
2883 0, /* tp_traverse */
2884 0, /* tp_clear */
2885 (richcmpfunc)bytes_richcompare, /* tp_richcompare */
2886 0, /* tp_weaklistoffset */
2887 0, /* tp_iter */
2888 0, /* tp_iternext */
2889 bytes_methods, /* tp_methods */
2890 0, /* tp_members */
2891 0, /* tp_getset */
2892 0, /* tp_base */
2893 0, /* tp_dict */
2894 0, /* tp_descr_get */
2895 0, /* tp_descr_set */
2896 0, /* tp_dictoffset */
2897 (initproc)bytes_init, /* tp_init */
2898 PyType_GenericAlloc, /* tp_alloc */
2899 PyType_GenericNew, /* tp_new */
2900 PyObject_Del, /* tp_free */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002901};