blob: b28cacf09f17cca7ba122e24d6a8c227c0bfad44 [file] [log] [blame]
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00001/* Bytes object implementation */
2
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003#define PY_SSIZE_T_CLEAN
4#include "Python.h"
Guido van Rossuma0867f72006-05-05 04:34:18 +00005#include "structmember.h"
Gregory P. Smith60d241f2007-10-16 06:31:30 +00006#include "bytes_methods.h"
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00007
Neal Norwitz6968b052007-02-27 19:02:19 +00008static PyBytesObject *nullbytes = NULL;
9
10void
11PyBytes_Fini(void)
12{
13 Py_CLEAR(nullbytes);
14}
15
16int
17PyBytes_Init(void)
18{
19 nullbytes = PyObject_New(PyBytesObject, &PyBytes_Type);
20 if (nullbytes == NULL)
21 return 0;
22 nullbytes->ob_bytes = NULL;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +000023 Py_Size(nullbytes) = nullbytes->ob_alloc = 0;
Travis E. Oliphantb99f7622007-08-18 11:21:56 +000024 nullbytes->ob_exports = 0;
Neal Norwitz6968b052007-02-27 19:02:19 +000025 return 1;
26}
27
28/* end nullbytes support */
29
Guido van Rossumad7d8d12007-04-13 01:39:34 +000030/* Helpers */
31
32static int
33_getbytevalue(PyObject* arg, int *value)
Neal Norwitz6968b052007-02-27 19:02:19 +000034{
Gregory P. Smith60d241f2007-10-16 06:31:30 +000035 long face_value;
36
37 if (PyInt_Check(arg)) {
38 face_value = PyInt_AsLong(arg);
39 if (face_value < 0 || face_value >= 256) {
40 PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
41 return 0;
42 }
43 } else {
44 PyErr_Format(PyExc_TypeError, "an integer is required");
Neal Norwitz6968b052007-02-27 19:02:19 +000045 return 0;
46 }
Gregory P. Smith60d241f2007-10-16 06:31:30 +000047
48 *value = face_value;
Neal Norwitz6968b052007-02-27 19:02:19 +000049 return 1;
50}
51
Travis E. Oliphantb99f7622007-08-18 11:21:56 +000052static int
Travis E. Oliphant8ae62b62007-09-23 02:00:13 +000053bytes_getbuffer(PyBytesObject *obj, Py_buffer *view, int flags)
Guido van Rossum75d38e92007-08-24 17:33:11 +000054{
Travis E. Oliphantb99f7622007-08-18 11:21:56 +000055 int ret;
56 void *ptr;
57 if (view == NULL) {
58 obj->ob_exports++;
59 return 0;
60 }
Guido van Rossum75d38e92007-08-24 17:33:11 +000061 if (obj->ob_bytes == NULL)
Travis E. Oliphantb99f7622007-08-18 11:21:56 +000062 ptr = "";
63 else
64 ptr = obj->ob_bytes;
65 ret = PyBuffer_FillInfo(view, ptr, Py_Size(obj), 0, flags);
66 if (ret >= 0) {
67 obj->ob_exports++;
68 }
69 return ret;
70}
71
72static void
Travis E. Oliphant8ae62b62007-09-23 02:00:13 +000073bytes_releasebuffer(PyBytesObject *obj, Py_buffer *view)
Travis E. Oliphantb99f7622007-08-18 11:21:56 +000074{
75 obj->ob_exports--;
76}
77
Neal Norwitz2bad9702007-08-27 06:19:22 +000078static Py_ssize_t
Travis E. Oliphant8ae62b62007-09-23 02:00:13 +000079_getbuffer(PyObject *obj, Py_buffer *view)
Guido van Rossumad7d8d12007-04-13 01:39:34 +000080{
Martin v. Löwis9f2e3462007-07-21 17:22:18 +000081 PyBufferProcs *buffer = Py_Type(obj)->tp_as_buffer;
Guido van Rossumad7d8d12007-04-13 01:39:34 +000082
Gregory P. Smith60d241f2007-10-16 06:31:30 +000083 if (buffer == NULL || buffer->bf_getbuffer == NULL)
Guido van Rossuma74184e2007-08-29 04:05:57 +000084 {
85 PyErr_Format(PyExc_TypeError,
86 "Type %.100s doesn't support the buffer API",
87 Py_Type(obj)->tp_name);
88 return -1;
89 }
Guido van Rossumad7d8d12007-04-13 01:39:34 +000090
Travis E. Oliphantb99f7622007-08-18 11:21:56 +000091 if (buffer->bf_getbuffer(obj, view, PyBUF_SIMPLE) < 0)
92 return -1;
93 return view->len;
Guido van Rossumad7d8d12007-04-13 01:39:34 +000094}
95
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000096/* Direct API functions */
97
98PyObject *
Guido van Rossumd624f182006-04-24 13:47:05 +000099PyBytes_FromObject(PyObject *input)
100{
101 return PyObject_CallFunctionObjArgs((PyObject *)&PyBytes_Type,
102 input, NULL);
103}
104
105PyObject *
106PyBytes_FromStringAndSize(const char *bytes, Py_ssize_t size)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000107{
108 PyBytesObject *new;
Neal Norwitz61ec0d32007-10-26 06:44:10 +0000109 Py_ssize_t alloc;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000110
Guido van Rossumd624f182006-04-24 13:47:05 +0000111 assert(size >= 0);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000112
113 new = PyObject_New(PyBytesObject, &PyBytes_Type);
114 if (new == NULL)
Guido van Rossumd624f182006-04-24 13:47:05 +0000115 return NULL;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000116
Guido van Rossumf15a29f2007-05-04 00:41:39 +0000117 if (size == 0) {
Guido van Rossumd624f182006-04-24 13:47:05 +0000118 new->ob_bytes = NULL;
Guido van Rossumf15a29f2007-05-04 00:41:39 +0000119 alloc = 0;
120 }
Guido van Rossumd624f182006-04-24 13:47:05 +0000121 else {
Guido van Rossumf15a29f2007-05-04 00:41:39 +0000122 alloc = size + 1;
123 new->ob_bytes = PyMem_Malloc(alloc);
Guido van Rossumd624f182006-04-24 13:47:05 +0000124 if (new->ob_bytes == NULL) {
125 Py_DECREF(new);
Neal Norwitz16596dd2007-08-30 05:44:54 +0000126 return PyErr_NoMemory();
Guido van Rossumd624f182006-04-24 13:47:05 +0000127 }
128 if (bytes != NULL)
129 memcpy(new->ob_bytes, bytes, size);
Guido van Rossumf15a29f2007-05-04 00:41:39 +0000130 new->ob_bytes[size] = '\0'; /* Trailing null byte */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000131 }
Martin v. Löwis5d7428b2007-07-21 18:47:48 +0000132 Py_Size(new) = size;
Guido van Rossumf15a29f2007-05-04 00:41:39 +0000133 new->ob_alloc = alloc;
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000134 new->ob_exports = 0;
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +0000135
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000136 return (PyObject *)new;
137}
138
139Py_ssize_t
140PyBytes_Size(PyObject *self)
141{
142 assert(self != NULL);
143 assert(PyBytes_Check(self));
144
Guido van Rossum20188312006-05-05 15:15:40 +0000145 return PyBytes_GET_SIZE(self);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000146}
147
148char *
149PyBytes_AsString(PyObject *self)
150{
151 assert(self != NULL);
152 assert(PyBytes_Check(self));
153
Guido van Rossum20188312006-05-05 15:15:40 +0000154 return PyBytes_AS_STRING(self);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000155}
156
157int
158PyBytes_Resize(PyObject *self, Py_ssize_t size)
159{
160 void *sval;
Guido van Rossuma0867f72006-05-05 04:34:18 +0000161 Py_ssize_t alloc = ((PyBytesObject *)self)->ob_alloc;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000162
163 assert(self != NULL);
164 assert(PyBytes_Check(self));
165 assert(size >= 0);
166
Guido van Rossuma0867f72006-05-05 04:34:18 +0000167 if (size < alloc / 2) {
168 /* Major downsize; resize down to exact size */
Guido van Rossum6c1e6742007-05-04 04:27:16 +0000169 alloc = size + 1;
Guido van Rossuma0867f72006-05-05 04:34:18 +0000170 }
Guido van Rossumf15a29f2007-05-04 00:41:39 +0000171 else if (size < alloc) {
Guido van Rossuma0867f72006-05-05 04:34:18 +0000172 /* Within allocated size; quick exit */
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000173 Py_Size(self) = size;
Guido van Rossuma74184e2007-08-29 04:05:57 +0000174 ((PyBytesObject *)self)->ob_bytes[size] = '\0'; /* Trailing null */
Guido van Rossuma0867f72006-05-05 04:34:18 +0000175 return 0;
176 }
177 else if (size <= alloc * 1.125) {
178 /* Moderate upsize; overallocate similar to list_resize() */
179 alloc = size + (size >> 3) + (size < 9 ? 3 : 6);
180 }
181 else {
182 /* Major upsize; resize up to exact size */
Guido van Rossumf15a29f2007-05-04 00:41:39 +0000183 alloc = size + 1;
Guido van Rossum6c1e6742007-05-04 04:27:16 +0000184 }
Guido van Rossuma0867f72006-05-05 04:34:18 +0000185
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000186 if (((PyBytesObject *)self)->ob_exports > 0) {
187 /*
Guido van Rossuma74184e2007-08-29 04:05:57 +0000188 fprintf(stderr, "%d: %s", ((PyBytesObject *)self)->ob_exports,
189 ((PyBytesObject *)self)->ob_bytes);
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000190 */
191 PyErr_SetString(PyExc_BufferError,
Guido van Rossuma74184e2007-08-29 04:05:57 +0000192 "Existing exports of data: object cannot be re-sized");
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000193 return -1;
194 }
195
Guido van Rossuma0867f72006-05-05 04:34:18 +0000196 sval = PyMem_Realloc(((PyBytesObject *)self)->ob_bytes, alloc);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000197 if (sval == NULL) {
Guido van Rossumd624f182006-04-24 13:47:05 +0000198 PyErr_NoMemory();
199 return -1;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000200 }
201
Guido van Rossumd624f182006-04-24 13:47:05 +0000202 ((PyBytesObject *)self)->ob_bytes = sval;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000203 Py_Size(self) = size;
Guido van Rossuma0867f72006-05-05 04:34:18 +0000204 ((PyBytesObject *)self)->ob_alloc = alloc;
Guido van Rossumf15a29f2007-05-04 00:41:39 +0000205 ((PyBytesObject *)self)->ob_bytes[size] = '\0'; /* Trailing null byte */
206
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000207 return 0;
208}
209
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000210PyObject *
211PyBytes_Concat(PyObject *a, PyObject *b)
212{
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000213 Py_ssize_t size;
Travis E. Oliphant8ae62b62007-09-23 02:00:13 +0000214 Py_buffer va, vb;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000215 PyBytesObject *result = NULL;
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000216
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000217 va.len = -1;
218 vb.len = -1;
219 if (_getbuffer(a, &va) < 0 ||
220 _getbuffer(b, &vb) < 0) {
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000221 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
222 Py_Type(a)->tp_name, Py_Type(b)->tp_name);
Guido van Rossum98297ee2007-11-06 21:34:58 +0000223 goto done;
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000224 }
225
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000226 size = va.len + vb.len;
227 if (size < 0) {
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000228 return PyErr_NoMemory();
Guido van Rossum98297ee2007-11-06 21:34:58 +0000229 goto done;
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000230 }
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000231
232 result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, size);
233 if (result != NULL) {
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000234 memcpy(result->ob_bytes, va.buf, va.len);
235 memcpy(result->ob_bytes + va.len, vb.buf, vb.len);
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000236 }
Guido van Rossum75d38e92007-08-24 17:33:11 +0000237
Guido van Rossum98297ee2007-11-06 21:34:58 +0000238 done:
239 if (va.len != -1)
240 PyObject_ReleaseBuffer(a, &va);
241 if (vb.len != -1)
242 PyObject_ReleaseBuffer(b, &vb);
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000243 return (PyObject *)result;
244}
245
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000246/* Functions stuffed into the type object */
247
248static Py_ssize_t
249bytes_length(PyBytesObject *self)
250{
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000251 return Py_Size(self);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000252}
253
254static PyObject *
Guido van Rossum13e57212006-04-27 22:54:26 +0000255bytes_iconcat(PyBytesObject *self, PyObject *other)
256{
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000257 Py_ssize_t mysize;
Guido van Rossum13e57212006-04-27 22:54:26 +0000258 Py_ssize_t size;
Travis E. Oliphant8ae62b62007-09-23 02:00:13 +0000259 Py_buffer vo;
Guido van Rossum13e57212006-04-27 22:54:26 +0000260
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000261 if (_getbuffer(other, &vo) < 0) {
Guido van Rossuma74184e2007-08-29 04:05:57 +0000262 PyErr_Format(PyExc_TypeError, "can't concat bytes to %.100s",
263 Py_Type(self)->tp_name);
264 return NULL;
Guido van Rossum13e57212006-04-27 22:54:26 +0000265 }
266
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000267 mysize = Py_Size(self);
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000268 size = mysize + vo.len;
269 if (size < 0) {
Guido van Rossuma74184e2007-08-29 04:05:57 +0000270 PyObject_ReleaseBuffer(other, &vo);
271 return PyErr_NoMemory();
Guido van Rossum6c1e6742007-05-04 04:27:16 +0000272 }
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000273 if (size < self->ob_alloc) {
Guido van Rossuma74184e2007-08-29 04:05:57 +0000274 Py_Size(self) = size;
275 self->ob_bytes[Py_Size(self)] = '\0'; /* Trailing null byte */
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000276 }
277 else if (PyBytes_Resize((PyObject *)self, size) < 0) {
Guido van Rossuma74184e2007-08-29 04:05:57 +0000278 PyObject_ReleaseBuffer(other, &vo);
279 return NULL;
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000280 }
281 memcpy(self->ob_bytes + mysize, vo.buf, vo.len);
282 PyObject_ReleaseBuffer(other, &vo);
Guido van Rossum13e57212006-04-27 22:54:26 +0000283 Py_INCREF(self);
284 return (PyObject *)self;
285}
286
287static PyObject *
Guido van Rossumd624f182006-04-24 13:47:05 +0000288bytes_repeat(PyBytesObject *self, Py_ssize_t count)
289{
290 PyBytesObject *result;
291 Py_ssize_t mysize;
292 Py_ssize_t size;
293
294 if (count < 0)
295 count = 0;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000296 mysize = Py_Size(self);
Guido van Rossumd624f182006-04-24 13:47:05 +0000297 size = mysize * count;
298 if (count != 0 && size / count != mysize)
299 return PyErr_NoMemory();
Guido van Rossumf15a29f2007-05-04 00:41:39 +0000300 result = (PyBytesObject *)PyBytes_FromStringAndSize(NULL, size);
Guido van Rossumd624f182006-04-24 13:47:05 +0000301 if (result != NULL && size != 0) {
302 if (mysize == 1)
303 memset(result->ob_bytes, self->ob_bytes[0], size);
304 else {
Guido van Rossum13e57212006-04-27 22:54:26 +0000305 Py_ssize_t i;
Guido van Rossumd624f182006-04-24 13:47:05 +0000306 for (i = 0; i < count; i++)
307 memcpy(result->ob_bytes + i*mysize, self->ob_bytes, mysize);
308 }
309 }
310 return (PyObject *)result;
311}
312
313static PyObject *
Guido van Rossum13e57212006-04-27 22:54:26 +0000314bytes_irepeat(PyBytesObject *self, Py_ssize_t count)
315{
316 Py_ssize_t mysize;
317 Py_ssize_t size;
318
319 if (count < 0)
320 count = 0;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000321 mysize = Py_Size(self);
Guido van Rossum13e57212006-04-27 22:54:26 +0000322 size = mysize * count;
323 if (count != 0 && size / count != mysize)
324 return PyErr_NoMemory();
Guido van Rossum6c1e6742007-05-04 04:27:16 +0000325 if (size < self->ob_alloc) {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000326 Py_Size(self) = size;
Guido van Rossuma74184e2007-08-29 04:05:57 +0000327 self->ob_bytes[Py_Size(self)] = '\0'; /* Trailing null byte */
Guido van Rossum6c1e6742007-05-04 04:27:16 +0000328 }
Guido van Rossuma0867f72006-05-05 04:34:18 +0000329 else if (PyBytes_Resize((PyObject *)self, size) < 0)
Guido van Rossum13e57212006-04-27 22:54:26 +0000330 return NULL;
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +0000331
Guido van Rossum13e57212006-04-27 22:54:26 +0000332 if (mysize == 1)
333 memset(self->ob_bytes, self->ob_bytes[0], size);
334 else {
335 Py_ssize_t i;
336 for (i = 1; i < count; i++)
337 memcpy(self->ob_bytes + i*mysize, self->ob_bytes, mysize);
338 }
339
340 Py_INCREF(self);
341 return (PyObject *)self;
342}
343
Guido van Rossum13e57212006-04-27 22:54:26 +0000344static PyObject *
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000345bytes_getitem(PyBytesObject *self, Py_ssize_t i)
346{
347 if (i < 0)
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000348 i += Py_Size(self);
349 if (i < 0 || i >= Py_Size(self)) {
Guido van Rossum98297ee2007-11-06 21:34:58 +0000350 PyErr_SetString(PyExc_IndexError, "buffer index out of range");
Guido van Rossumd624f182006-04-24 13:47:05 +0000351 return NULL;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000352 }
Guido van Rossumd624f182006-04-24 13:47:05 +0000353 return PyInt_FromLong((unsigned char)(self->ob_bytes[i]));
354}
355
356static PyObject *
Thomas Wouters376446d2006-12-19 08:30:14 +0000357bytes_subscript(PyBytesObject *self, PyObject *item)
Guido van Rossumd624f182006-04-24 13:47:05 +0000358{
Thomas Wouters376446d2006-12-19 08:30:14 +0000359 if (PyIndex_Check(item)) {
360 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Guido van Rossumd624f182006-04-24 13:47:05 +0000361
Thomas Wouters376446d2006-12-19 08:30:14 +0000362 if (i == -1 && PyErr_Occurred())
363 return NULL;
364
365 if (i < 0)
366 i += PyBytes_GET_SIZE(self);
367
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000368 if (i < 0 || i >= Py_Size(self)) {
Guido van Rossum98297ee2007-11-06 21:34:58 +0000369 PyErr_SetString(PyExc_IndexError, "buffer index out of range");
Thomas Wouters376446d2006-12-19 08:30:14 +0000370 return NULL;
371 }
372 return PyInt_FromLong((unsigned char)(self->ob_bytes[i]));
373 }
374 else if (PySlice_Check(item)) {
375 Py_ssize_t start, stop, step, slicelength, cur, i;
376 if (PySlice_GetIndicesEx((PySliceObject *)item,
377 PyBytes_GET_SIZE(self),
378 &start, &stop, &step, &slicelength) < 0) {
379 return NULL;
380 }
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +0000381
Thomas Wouters376446d2006-12-19 08:30:14 +0000382 if (slicelength <= 0)
383 return PyBytes_FromStringAndSize("", 0);
384 else if (step == 1) {
385 return PyBytes_FromStringAndSize(self->ob_bytes + start,
386 slicelength);
387 }
388 else {
389 char *source_buf = PyBytes_AS_STRING(self);
390 char *result_buf = (char *)PyMem_Malloc(slicelength);
391 PyObject *result;
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +0000392
Thomas Wouters376446d2006-12-19 08:30:14 +0000393 if (result_buf == NULL)
394 return PyErr_NoMemory();
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +0000395
Thomas Wouters376446d2006-12-19 08:30:14 +0000396 for (cur = start, i = 0; i < slicelength;
397 cur += step, i++) {
398 result_buf[i] = source_buf[cur];
399 }
400 result = PyBytes_FromStringAndSize(result_buf, slicelength);
401 PyMem_Free(result_buf);
402 return result;
403 }
404 }
405 else {
Guido van Rossum98297ee2007-11-06 21:34:58 +0000406 PyErr_SetString(PyExc_TypeError, "buffer indices must be integers");
Thomas Wouters376446d2006-12-19 08:30:14 +0000407 return NULL;
408 }
409}
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +0000410
Guido van Rossumd624f182006-04-24 13:47:05 +0000411static int
Thomas Wouters376446d2006-12-19 08:30:14 +0000412bytes_setslice(PyBytesObject *self, Py_ssize_t lo, Py_ssize_t hi,
Guido van Rossumd624f182006-04-24 13:47:05 +0000413 PyObject *values)
414{
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000415 Py_ssize_t avail, needed;
416 void *bytes;
Travis E. Oliphant8ae62b62007-09-23 02:00:13 +0000417 Py_buffer vbytes;
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000418 int res = 0;
Guido van Rossumd624f182006-04-24 13:47:05 +0000419
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000420 vbytes.len = -1;
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000421 if (values == (PyObject *)self) {
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000422 /* Make a copy and call this function recursively */
Guido van Rossumd624f182006-04-24 13:47:05 +0000423 int err;
424 values = PyBytes_FromObject(values);
425 if (values == NULL)
426 return -1;
427 err = bytes_setslice(self, lo, hi, values);
428 Py_DECREF(values);
429 return err;
430 }
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000431 if (values == NULL) {
432 /* del b[lo:hi] */
433 bytes = NULL;
434 needed = 0;
435 }
Guido van Rossumd624f182006-04-24 13:47:05 +0000436 else {
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000437 if (_getbuffer(values, &vbytes) < 0) {
438 PyErr_Format(PyExc_TypeError,
439 "can't set bytes slice from %.100s",
440 Py_Type(values)->tp_name);
441 return -1;
442 }
443 needed = vbytes.len;
444 bytes = vbytes.buf;
Guido van Rossumd624f182006-04-24 13:47:05 +0000445 }
446
447 if (lo < 0)
448 lo = 0;
Thomas Wouters9a6e62b2006-08-23 23:20:29 +0000449 if (hi < lo)
450 hi = lo;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000451 if (hi > Py_Size(self))
452 hi = Py_Size(self);
Guido van Rossumd624f182006-04-24 13:47:05 +0000453
454 avail = hi - lo;
455 if (avail < 0)
456 lo = hi = avail = 0;
457
458 if (avail != needed) {
459 if (avail > needed) {
460 /*
461 0 lo hi old_size
462 | |<----avail----->|<-----tomove------>|
463 | |<-needed->|<-----tomove------>|
464 0 lo new_hi new_size
465 */
466 memmove(self->ob_bytes + lo + needed, self->ob_bytes + hi,
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000467 Py_Size(self) - hi);
Guido van Rossumd624f182006-04-24 13:47:05 +0000468 }
Guido van Rossuma74184e2007-08-29 04:05:57 +0000469 /* XXX(nnorwitz): need to verify this can't overflow! */
Thomas Wouters376446d2006-12-19 08:30:14 +0000470 if (PyBytes_Resize((PyObject *)self,
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000471 Py_Size(self) + needed - avail) < 0) {
472 res = -1;
473 goto finish;
474 }
Guido van Rossumd624f182006-04-24 13:47:05 +0000475 if (avail < needed) {
476 /*
477 0 lo hi old_size
478 | |<-avail->|<-----tomove------>|
479 | |<----needed---->|<-----tomove------>|
480 0 lo new_hi new_size
481 */
482 memmove(self->ob_bytes + lo + needed, self->ob_bytes + hi,
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000483 Py_Size(self) - lo - needed);
Guido van Rossumd624f182006-04-24 13:47:05 +0000484 }
485 }
486
487 if (needed > 0)
488 memcpy(self->ob_bytes + lo, bytes, needed);
489
Guido van Rossum75d38e92007-08-24 17:33:11 +0000490
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000491 finish:
Guido van Rossum75d38e92007-08-24 17:33:11 +0000492 if (vbytes.len != -1)
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000493 PyObject_ReleaseBuffer(values, &vbytes);
494 return res;
Guido van Rossumd624f182006-04-24 13:47:05 +0000495}
496
497static int
498bytes_setitem(PyBytesObject *self, Py_ssize_t i, PyObject *value)
499{
500 Py_ssize_t ival;
501
502 if (i < 0)
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000503 i += Py_Size(self);
Guido van Rossumd624f182006-04-24 13:47:05 +0000504
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000505 if (i < 0 || i >= Py_Size(self)) {
Guido van Rossum98297ee2007-11-06 21:34:58 +0000506 PyErr_SetString(PyExc_IndexError, "buffer index out of range");
Guido van Rossumd624f182006-04-24 13:47:05 +0000507 return -1;
508 }
509
510 if (value == NULL)
511 return bytes_setslice(self, i, i+1, NULL);
512
Thomas Woutersd204a712006-08-22 13:41:17 +0000513 ival = PyNumber_AsSsize_t(value, PyExc_ValueError);
Guido van Rossumd624f182006-04-24 13:47:05 +0000514 if (ival == -1 && PyErr_Occurred())
515 return -1;
516
517 if (ival < 0 || ival >= 256) {
518 PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
519 return -1;
520 }
521
522 self->ob_bytes[i] = ival;
523 return 0;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000524}
525
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000526static int
Thomas Wouters376446d2006-12-19 08:30:14 +0000527bytes_ass_subscript(PyBytesObject *self, PyObject *item, PyObject *values)
528{
529 Py_ssize_t start, stop, step, slicelen, needed;
530 char *bytes;
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +0000531
Thomas Wouters376446d2006-12-19 08:30:14 +0000532 if (PyIndex_Check(item)) {
533 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
534
535 if (i == -1 && PyErr_Occurred())
536 return -1;
537
538 if (i < 0)
539 i += PyBytes_GET_SIZE(self);
540
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000541 if (i < 0 || i >= Py_Size(self)) {
Guido van Rossum98297ee2007-11-06 21:34:58 +0000542 PyErr_SetString(PyExc_IndexError, "buffer index out of range");
Thomas Wouters376446d2006-12-19 08:30:14 +0000543 return -1;
544 }
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +0000545
Thomas Wouters376446d2006-12-19 08:30:14 +0000546 if (values == NULL) {
547 /* Fall through to slice assignment */
548 start = i;
549 stop = i + 1;
550 step = 1;
551 slicelen = 1;
552 }
553 else {
554 Py_ssize_t ival = PyNumber_AsSsize_t(values, PyExc_ValueError);
555 if (ival == -1 && PyErr_Occurred())
556 return -1;
557 if (ival < 0 || ival >= 256) {
558 PyErr_SetString(PyExc_ValueError,
559 "byte must be in range(0, 256)");
560 return -1;
561 }
562 self->ob_bytes[i] = (char)ival;
563 return 0;
564 }
565 }
566 else if (PySlice_Check(item)) {
567 if (PySlice_GetIndicesEx((PySliceObject *)item,
568 PyBytes_GET_SIZE(self),
569 &start, &stop, &step, &slicelen) < 0) {
570 return -1;
571 }
572 }
573 else {
Guido van Rossum98297ee2007-11-06 21:34:58 +0000574 PyErr_SetString(PyExc_TypeError, "buffer indices must be integer");
Thomas Wouters376446d2006-12-19 08:30:14 +0000575 return -1;
576 }
577
578 if (values == NULL) {
579 bytes = NULL;
580 needed = 0;
581 }
582 else if (values == (PyObject *)self || !PyBytes_Check(values)) {
583 /* Make a copy an call this function recursively */
584 int err;
585 values = PyBytes_FromObject(values);
586 if (values == NULL)
587 return -1;
588 err = bytes_ass_subscript(self, item, values);
589 Py_DECREF(values);
590 return err;
591 }
592 else {
593 assert(PyBytes_Check(values));
594 bytes = ((PyBytesObject *)values)->ob_bytes;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000595 needed = Py_Size(values);
Thomas Wouters376446d2006-12-19 08:30:14 +0000596 }
597 /* Make sure b[5:2] = ... inserts before 5, not before 2. */
598 if ((step < 0 && start < stop) ||
599 (step > 0 && start > stop))
600 stop = start;
601 if (step == 1) {
602 if (slicelen != needed) {
603 if (slicelen > needed) {
604 /*
605 0 start stop old_size
606 | |<---slicelen--->|<-----tomove------>|
607 | |<-needed->|<-----tomove------>|
608 0 lo new_hi new_size
609 */
610 memmove(self->ob_bytes + start + needed, self->ob_bytes + stop,
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000611 Py_Size(self) - stop);
Thomas Wouters376446d2006-12-19 08:30:14 +0000612 }
613 if (PyBytes_Resize((PyObject *)self,
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000614 Py_Size(self) + needed - slicelen) < 0)
Thomas Wouters376446d2006-12-19 08:30:14 +0000615 return -1;
616 if (slicelen < needed) {
617 /*
618 0 lo hi old_size
619 | |<-avail->|<-----tomove------>|
620 | |<----needed---->|<-----tomove------>|
621 0 lo new_hi new_size
622 */
623 memmove(self->ob_bytes + start + needed, self->ob_bytes + stop,
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000624 Py_Size(self) - start - needed);
Thomas Wouters376446d2006-12-19 08:30:14 +0000625 }
626 }
627
628 if (needed > 0)
629 memcpy(self->ob_bytes + start, bytes, needed);
630
631 return 0;
632 }
633 else {
634 if (needed == 0) {
635 /* Delete slice */
636 Py_ssize_t cur, i;
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +0000637
Thomas Wouters376446d2006-12-19 08:30:14 +0000638 if (step < 0) {
639 stop = start + 1;
640 start = stop + step * (slicelen - 1) - 1;
641 step = -step;
642 }
643 for (cur = start, i = 0;
644 i < slicelen; cur += step, i++) {
645 Py_ssize_t lim = step - 1;
646
647 if (cur + step >= PyBytes_GET_SIZE(self))
648 lim = PyBytes_GET_SIZE(self) - cur - 1;
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +0000649
Thomas Wouters376446d2006-12-19 08:30:14 +0000650 memmove(self->ob_bytes + cur - i,
651 self->ob_bytes + cur + 1, lim);
652 }
653 /* Move the tail of the bytes, in one chunk */
654 cur = start + slicelen*step;
655 if (cur < PyBytes_GET_SIZE(self)) {
656 memmove(self->ob_bytes + cur - slicelen,
657 self->ob_bytes + cur,
658 PyBytes_GET_SIZE(self) - cur);
659 }
660 if (PyBytes_Resize((PyObject *)self,
661 PyBytes_GET_SIZE(self) - slicelen) < 0)
662 return -1;
663
664 return 0;
665 }
666 else {
667 /* Assign slice */
668 Py_ssize_t cur, i;
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +0000669
Thomas Wouters376446d2006-12-19 08:30:14 +0000670 if (needed != slicelen) {
671 PyErr_Format(PyExc_ValueError,
672 "attempt to assign bytes of size %zd "
673 "to extended slice of size %zd",
674 needed, slicelen);
675 return -1;
676 }
677 for (cur = start, i = 0; i < slicelen; cur += step, i++)
678 self->ob_bytes[cur] = bytes[i];
679 return 0;
680 }
681 }
682}
683
684static int
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000685bytes_init(PyBytesObject *self, PyObject *args, PyObject *kwds)
686{
Guido van Rossumd624f182006-04-24 13:47:05 +0000687 static char *kwlist[] = {"source", "encoding", "errors", 0};
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000688 PyObject *arg = NULL;
Guido van Rossumd624f182006-04-24 13:47:05 +0000689 const char *encoding = NULL;
690 const char *errors = NULL;
691 Py_ssize_t count;
692 PyObject *it;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000693 PyObject *(*iternext)(PyObject *);
694
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000695 if (Py_Size(self) != 0) {
Guido van Rossuma0867f72006-05-05 04:34:18 +0000696 /* Empty previous contents (yes, do this first of all!) */
697 if (PyBytes_Resize((PyObject *)self, 0) < 0)
698 return -1;
699 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000700
Guido van Rossumd624f182006-04-24 13:47:05 +0000701 /* Parse arguments */
702 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oss:bytes", kwlist,
703 &arg, &encoding, &errors))
704 return -1;
705
706 /* Make a quick exit if no first argument */
707 if (arg == NULL) {
708 if (encoding != NULL || errors != NULL) {
709 PyErr_SetString(PyExc_TypeError,
710 "encoding or errors without sequence argument");
711 return -1;
712 }
713 return 0;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000714 }
715
Guido van Rossumd624f182006-04-24 13:47:05 +0000716 if (PyUnicode_Check(arg)) {
717 /* Encode via the codec registry */
Guido van Rossum4355a472007-05-04 05:00:04 +0000718 PyObject *encoded, *new;
Guido van Rossuma74184e2007-08-29 04:05:57 +0000719 if (encoding == NULL) {
720 PyErr_SetString(PyExc_TypeError,
721 "string argument without an encoding");
722 return -1;
723 }
Guido van Rossumd624f182006-04-24 13:47:05 +0000724 encoded = PyCodec_Encode(arg, encoding, errors);
725 if (encoded == NULL)
726 return -1;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000727 assert(PyString_Check(encoded));
Guido van Rossuma74184e2007-08-29 04:05:57 +0000728 new = bytes_iconcat(self, encoded);
729 Py_DECREF(encoded);
730 if (new == NULL)
731 return -1;
732 Py_DECREF(new);
733 return 0;
Guido van Rossumd624f182006-04-24 13:47:05 +0000734 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000735
Guido van Rossumd624f182006-04-24 13:47:05 +0000736 /* If it's not unicode, there can't be encoding or errors */
737 if (encoding != NULL || errors != NULL) {
738 PyErr_SetString(PyExc_TypeError,
739 "encoding or errors without a string argument");
740 return -1;
741 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000742
Guido van Rossumd624f182006-04-24 13:47:05 +0000743 /* Is it an int? */
Thomas Woutersd204a712006-08-22 13:41:17 +0000744 count = PyNumber_AsSsize_t(arg, PyExc_ValueError);
Guido van Rossumd624f182006-04-24 13:47:05 +0000745 if (count == -1 && PyErr_Occurred())
746 PyErr_Clear();
747 else {
748 if (count < 0) {
749 PyErr_SetString(PyExc_ValueError, "negative count");
750 return -1;
751 }
752 if (count > 0) {
753 if (PyBytes_Resize((PyObject *)self, count))
754 return -1;
755 memset(self->ob_bytes, 0, count);
756 }
757 return 0;
758 }
Guido van Rossum75d38e92007-08-24 17:33:11 +0000759
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000760 /* Use the modern buffer interface */
761 if (PyObject_CheckBuffer(arg)) {
Guido van Rossumd624f182006-04-24 13:47:05 +0000762 Py_ssize_t size;
Travis E. Oliphant8ae62b62007-09-23 02:00:13 +0000763 Py_buffer view;
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000764 if (PyObject_GetBuffer(arg, &view, PyBUF_FULL_RO) < 0)
Guido van Rossumd624f182006-04-24 13:47:05 +0000765 return -1;
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000766 size = view.len;
767 if (PyBytes_Resize((PyObject *)self, size) < 0) goto fail;
768 if (PyBuffer_ToContiguous(self->ob_bytes, &view, size, 'C') < 0)
769 goto fail;
770 PyObject_ReleaseBuffer(arg, &view);
Guido van Rossumd624f182006-04-24 13:47:05 +0000771 return 0;
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000772 fail:
773 PyObject_ReleaseBuffer(arg, &view);
774 return -1;
Guido van Rossumd624f182006-04-24 13:47:05 +0000775 }
776
777 /* XXX Optimize this if the arguments is a list, tuple */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000778
779 /* Get the iterator */
780 it = PyObject_GetIter(arg);
781 if (it == NULL)
Guido van Rossumd624f182006-04-24 13:47:05 +0000782 return -1;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000783 iternext = *Py_Type(it)->tp_iternext;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000784
785 /* Run the iterator to exhaustion */
786 for (;;) {
Guido van Rossumd624f182006-04-24 13:47:05 +0000787 PyObject *item;
788 Py_ssize_t value;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000789
Guido van Rossumd624f182006-04-24 13:47:05 +0000790 /* Get the next item */
791 item = iternext(it);
792 if (item == NULL) {
793 if (PyErr_Occurred()) {
794 if (!PyErr_ExceptionMatches(PyExc_StopIteration))
795 goto error;
796 PyErr_Clear();
797 }
798 break;
799 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000800
Guido van Rossumd624f182006-04-24 13:47:05 +0000801 /* Interpret it as an int (__index__) */
Thomas Woutersd204a712006-08-22 13:41:17 +0000802 value = PyNumber_AsSsize_t(item, PyExc_ValueError);
Guido van Rossumd624f182006-04-24 13:47:05 +0000803 Py_DECREF(item);
804 if (value == -1 && PyErr_Occurred())
805 goto error;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000806
Guido van Rossumd624f182006-04-24 13:47:05 +0000807 /* Range check */
808 if (value < 0 || value >= 256) {
809 PyErr_SetString(PyExc_ValueError,
810 "bytes must be in range(0, 256)");
811 goto error;
812 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000813
Guido van Rossumd624f182006-04-24 13:47:05 +0000814 /* Append the byte */
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000815 if (Py_Size(self) < self->ob_alloc)
816 Py_Size(self)++;
817 else if (PyBytes_Resize((PyObject *)self, Py_Size(self)+1) < 0)
Guido van Rossumd624f182006-04-24 13:47:05 +0000818 goto error;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000819 self->ob_bytes[Py_Size(self)-1] = value;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000820 }
821
822 /* Clean up and return success */
823 Py_DECREF(it);
824 return 0;
825
826 error:
827 /* Error handling when it != NULL */
828 Py_DECREF(it);
829 return -1;
830}
831
Georg Brandlee91be42007-02-24 19:41:35 +0000832/* Mostly copied from string_repr, but without the
833 "smart quote" functionality. */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000834static PyObject *
835bytes_repr(PyBytesObject *self)
836{
Walter Dörwald1ab83302007-05-18 17:15:44 +0000837 static const char *hexdigits = "0123456789abcdef";
Guido van Rossum98297ee2007-11-06 21:34:58 +0000838 const char *quote_prefix = "buffer(b";
839 const char *quote_postfix = ")";
840 Py_ssize_t length = Py_Size(self);
841 /* 9 prefix + 2 postfix */
842 size_t newsize = 11 + 4 * length;
Georg Brandlee91be42007-02-24 19:41:35 +0000843 PyObject *v;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000844 if (newsize > PY_SSIZE_T_MAX || newsize / 4 - 2 != length) {
Georg Brandlee91be42007-02-24 19:41:35 +0000845 PyErr_SetString(PyExc_OverflowError,
Guido van Rossum98297ee2007-11-06 21:34:58 +0000846 "buffer object is too large to make repr");
Guido van Rossumd624f182006-04-24 13:47:05 +0000847 return NULL;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000848 }
Walter Dörwald1ab83302007-05-18 17:15:44 +0000849 v = PyUnicode_FromUnicode(NULL, newsize);
Georg Brandlee91be42007-02-24 19:41:35 +0000850 if (v == NULL) {
851 return NULL;
852 }
853 else {
854 register Py_ssize_t i;
Walter Dörwald1ab83302007-05-18 17:15:44 +0000855 register Py_UNICODE c;
856 register Py_UNICODE *p;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000857 int quote;
858
859 /* Figure out which quote to use; single is preferred */
860 quote = '\'';
861 {
862 char *test, *start;
863 start = PyBytes_AS_STRING(self);
864 for (test = start; test < start+length; ++test) {
865 if (*test == '"') {
866 quote = '\''; /* back to single */
867 goto decided;
868 }
869 else if (*test == '\'')
870 quote = '"';
871 }
872 decided:
873 ;
874 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000875
Walter Dörwald1ab83302007-05-18 17:15:44 +0000876 p = PyUnicode_AS_UNICODE(v);
Guido van Rossum98297ee2007-11-06 21:34:58 +0000877 while (*quote_prefix)
878 *p++ = *quote_prefix++;
Georg Brandlee91be42007-02-24 19:41:35 +0000879 *p++ = quote;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000880
881 for (i = 0; i < length; i++) {
Georg Brandlee91be42007-02-24 19:41:35 +0000882 /* There's at least enough room for a hex escape
883 and a closing quote. */
Walter Dörwald1ab83302007-05-18 17:15:44 +0000884 assert(newsize - (p - PyUnicode_AS_UNICODE(v)) >= 5);
Georg Brandlee91be42007-02-24 19:41:35 +0000885 c = self->ob_bytes[i];
Guido van Rossum98297ee2007-11-06 21:34:58 +0000886 if (c == '\'' || c == '\\')
Georg Brandlee91be42007-02-24 19:41:35 +0000887 *p++ = '\\', *p++ = c;
888 else if (c == '\t')
889 *p++ = '\\', *p++ = 't';
890 else if (c == '\n')
891 *p++ = '\\', *p++ = 'n';
892 else if (c == '\r')
893 *p++ = '\\', *p++ = 'r';
894 else if (c == 0)
Guido van Rossum57b93ad2007-05-08 19:09:34 +0000895 *p++ = '\\', *p++ = 'x', *p++ = '0', *p++ = '0';
Georg Brandlee91be42007-02-24 19:41:35 +0000896 else if (c < ' ' || c >= 0x7f) {
Walter Dörwald1ab83302007-05-18 17:15:44 +0000897 *p++ = '\\';
898 *p++ = 'x';
899 *p++ = hexdigits[(c & 0xf0) >> 4];
900 *p++ = hexdigits[c & 0xf];
Georg Brandlee91be42007-02-24 19:41:35 +0000901 }
902 else
903 *p++ = c;
904 }
Walter Dörwald1ab83302007-05-18 17:15:44 +0000905 assert(newsize - (p - PyUnicode_AS_UNICODE(v)) >= 1);
Georg Brandlee91be42007-02-24 19:41:35 +0000906 *p++ = quote;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000907 while (*quote_postfix) {
908 *p++ = *quote_postfix++;
909 }
Georg Brandlee91be42007-02-24 19:41:35 +0000910 *p = '\0';
Walter Dörwald1ab83302007-05-18 17:15:44 +0000911 if (PyUnicode_Resize(&v, (p - PyUnicode_AS_UNICODE(v)))) {
912 Py_DECREF(v);
913 return NULL;
914 }
Georg Brandlee91be42007-02-24 19:41:35 +0000915 return v;
916 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000917}
918
919static PyObject *
Guido van Rossum98297ee2007-11-06 21:34:58 +0000920bytes_str(PyObject *op)
Guido van Rossumd624f182006-04-24 13:47:05 +0000921{
Guido van Rossum98297ee2007-11-06 21:34:58 +0000922 if (Py_BytesWarningFlag) {
923 if (PyErr_WarnEx(PyExc_BytesWarning,
924 "str() on a buffer instance", 1))
925 return NULL;
926 }
927 return bytes_repr((PyBytesObject*)op);
Guido van Rossumd624f182006-04-24 13:47:05 +0000928}
929
930static PyObject *
Guido van Rossum343e97f2007-04-09 00:43:24 +0000931bytes_richcompare(PyObject *self, PyObject *other, int op)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000932{
Guido van Rossum343e97f2007-04-09 00:43:24 +0000933 Py_ssize_t self_size, other_size;
Travis E. Oliphant8ae62b62007-09-23 02:00:13 +0000934 Py_buffer self_bytes, other_bytes;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000935 PyObject *res;
Guido van Rossum343e97f2007-04-09 00:43:24 +0000936 Py_ssize_t minsize;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000937 int cmp;
938
Jeremy Hylton18c3ff82007-08-29 18:47:16 +0000939 /* Bytes can be compared to anything that supports the (binary)
940 buffer API. Except that a comparison with Unicode is always an
941 error, even if the comparison is for equality. */
942 if (PyObject_IsInstance(self, (PyObject*)&PyUnicode_Type) ||
943 PyObject_IsInstance(other, (PyObject*)&PyUnicode_Type)) {
Guido van Rossum98297ee2007-11-06 21:34:58 +0000944 if (Py_BytesWarningFlag && op == Py_EQ) {
945 if (PyErr_WarnEx(PyExc_BytesWarning,
946 "Comparsion between buffer and string", 1))
947 return NULL;
948 }
949
Guido van Rossum1e35e762007-10-09 17:21:10 +0000950 Py_INCREF(Py_NotImplemented);
951 return Py_NotImplemented;
Jeremy Hylton18c3ff82007-08-29 18:47:16 +0000952 }
Guido van Rossumebea9be2007-04-09 00:49:13 +0000953
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000954 self_size = _getbuffer(self, &self_bytes);
955 if (self_size < 0) {
Guido van Rossuma74184e2007-08-29 04:05:57 +0000956 PyErr_Clear();
Guido van Rossumebea9be2007-04-09 00:49:13 +0000957 Py_INCREF(Py_NotImplemented);
958 return Py_NotImplemented;
959 }
Guido van Rossum343e97f2007-04-09 00:43:24 +0000960
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000961 other_size = _getbuffer(other, &other_bytes);
962 if (other_size < 0) {
Guido van Rossuma74184e2007-08-29 04:05:57 +0000963 PyErr_Clear();
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000964 PyObject_ReleaseBuffer(self, &self_bytes);
Guido van Rossumd624f182006-04-24 13:47:05 +0000965 Py_INCREF(Py_NotImplemented);
966 return Py_NotImplemented;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000967 }
Guido van Rossum343e97f2007-04-09 00:43:24 +0000968
969 if (self_size != other_size && (op == Py_EQ || op == Py_NE)) {
Guido van Rossumd624f182006-04-24 13:47:05 +0000970 /* Shortcut: if the lengths differ, the objects differ */
971 cmp = (op == Py_NE);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000972 }
973 else {
Guido van Rossum343e97f2007-04-09 00:43:24 +0000974 minsize = self_size;
975 if (other_size < minsize)
976 minsize = other_size;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000977
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000978 cmp = memcmp(self_bytes.buf, other_bytes.buf, minsize);
Guido van Rossumd624f182006-04-24 13:47:05 +0000979 /* In ISO C, memcmp() guarantees to use unsigned bytes! */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000980
Guido van Rossumd624f182006-04-24 13:47:05 +0000981 if (cmp == 0) {
Guido van Rossum343e97f2007-04-09 00:43:24 +0000982 if (self_size < other_size)
Guido van Rossumd624f182006-04-24 13:47:05 +0000983 cmp = -1;
Guido van Rossum343e97f2007-04-09 00:43:24 +0000984 else if (self_size > other_size)
Guido van Rossumd624f182006-04-24 13:47:05 +0000985 cmp = 1;
986 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000987
Guido van Rossumd624f182006-04-24 13:47:05 +0000988 switch (op) {
989 case Py_LT: cmp = cmp < 0; break;
990 case Py_LE: cmp = cmp <= 0; break;
991 case Py_EQ: cmp = cmp == 0; break;
992 case Py_NE: cmp = cmp != 0; break;
993 case Py_GT: cmp = cmp > 0; break;
994 case Py_GE: cmp = cmp >= 0; break;
995 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000996 }
997
998 res = cmp ? Py_True : Py_False;
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000999 PyObject_ReleaseBuffer(self, &self_bytes);
Guido van Rossum75d38e92007-08-24 17:33:11 +00001000 PyObject_ReleaseBuffer(other, &other_bytes);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00001001 Py_INCREF(res);
1002 return res;
1003}
1004
1005static void
1006bytes_dealloc(PyBytesObject *self)
1007{
Guido van Rossumd624f182006-04-24 13:47:05 +00001008 if (self->ob_bytes != 0) {
1009 PyMem_Free(self->ob_bytes);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00001010 }
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001011 Py_Type(self)->tp_free((PyObject *)self);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00001012}
1013
Neal Norwitz6968b052007-02-27 19:02:19 +00001014
1015/* -------------------------------------------------------------------- */
1016/* Methods */
1017
1018#define STRINGLIB_CHAR char
1019#define STRINGLIB_CMP memcmp
1020#define STRINGLIB_LEN PyBytes_GET_SIZE
Gregory P. Smith60d241f2007-10-16 06:31:30 +00001021#define STRINGLIB_STR PyBytes_AS_STRING
Neal Norwitz6968b052007-02-27 19:02:19 +00001022#define STRINGLIB_NEW PyBytes_FromStringAndSize
1023#define STRINGLIB_EMPTY nullbytes
Gregory P. Smith60d241f2007-10-16 06:31:30 +00001024#define STRINGLIB_CHECK_EXACT PyBytes_CheckExact
1025#define STRINGLIB_MUTABLE 1
Neal Norwitz6968b052007-02-27 19:02:19 +00001026
1027#include "stringlib/fastsearch.h"
1028#include "stringlib/count.h"
1029#include "stringlib/find.h"
1030#include "stringlib/partition.h"
Gregory P. Smith60d241f2007-10-16 06:31:30 +00001031#include "stringlib/ctype.h"
1032#include "stringlib/transmogrify.h"
Neal Norwitz6968b052007-02-27 19:02:19 +00001033
1034
1035/* The following Py_LOCAL_INLINE and Py_LOCAL functions
1036were copied from the old char* style string object. */
1037
1038Py_LOCAL_INLINE(void)
1039_adjust_indices(Py_ssize_t *start, Py_ssize_t *end, Py_ssize_t len)
1040{
1041 if (*end > len)
1042 *end = len;
1043 else if (*end < 0)
1044 *end += len;
1045 if (*end < 0)
1046 *end = 0;
1047 if (*start < 0)
1048 *start += len;
1049 if (*start < 0)
1050 *start = 0;
1051}
1052
1053
1054Py_LOCAL_INLINE(Py_ssize_t)
1055bytes_find_internal(PyBytesObject *self, PyObject *args, int dir)
1056{
1057 PyObject *subobj;
Travis E. Oliphant8ae62b62007-09-23 02:00:13 +00001058 Py_buffer subbuf;
Neal Norwitz6968b052007-02-27 19:02:19 +00001059 Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
Guido van Rossum06b8b022007-08-31 13:48:41 +00001060 Py_ssize_t res;
Neal Norwitz6968b052007-02-27 19:02:19 +00001061
1062 if (!PyArg_ParseTuple(args, "O|O&O&:find/rfind/index/rindex", &subobj,
1063 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
1064 return -2;
Guido van Rossum06b8b022007-08-31 13:48:41 +00001065 if (_getbuffer(subobj, &subbuf) < 0)
Neal Norwitz6968b052007-02-27 19:02:19 +00001066 return -2;
Neal Norwitz6968b052007-02-27 19:02:19 +00001067 if (dir > 0)
Guido van Rossum06b8b022007-08-31 13:48:41 +00001068 res = stringlib_find_slice(
Neal Norwitz6968b052007-02-27 19:02:19 +00001069 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
Guido van Rossum06b8b022007-08-31 13:48:41 +00001070 subbuf.buf, subbuf.len, start, end);
Neal Norwitz6968b052007-02-27 19:02:19 +00001071 else
Guido van Rossum06b8b022007-08-31 13:48:41 +00001072 res = stringlib_rfind_slice(
Neal Norwitz6968b052007-02-27 19:02:19 +00001073 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
Guido van Rossum06b8b022007-08-31 13:48:41 +00001074 subbuf.buf, subbuf.len, start, end);
1075 PyObject_ReleaseBuffer(subobj, &subbuf);
1076 return res;
Neal Norwitz6968b052007-02-27 19:02:19 +00001077}
1078
Neal Norwitz6968b052007-02-27 19:02:19 +00001079PyDoc_STRVAR(find__doc__,
1080"B.find(sub [,start [,end]]) -> int\n\
1081\n\
1082Return the lowest index in B where subsection sub is found,\n\
1083such that sub is contained within s[start,end]. Optional\n\
1084arguments start and end are interpreted as in slice notation.\n\
1085\n\
1086Return -1 on failure.");
1087
1088static PyObject *
1089bytes_find(PyBytesObject *self, PyObject *args)
1090{
1091 Py_ssize_t result = bytes_find_internal(self, args, +1);
1092 if (result == -2)
1093 return NULL;
1094 return PyInt_FromSsize_t(result);
1095}
1096
1097PyDoc_STRVAR(count__doc__,
Guido van Rossum98297ee2007-11-06 21:34:58 +00001098"B.count(sub [,start [,end]]) -> int\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001099\n\
1100Return the number of non-overlapping occurrences of subsection sub in\n\
1101bytes B[start:end]. Optional arguments start and end are interpreted\n\
1102as in slice notation.");
1103
1104static PyObject *
1105bytes_count(PyBytesObject *self, PyObject *args)
1106{
1107 PyObject *sub_obj;
Gregory P. Smith60d241f2007-10-16 06:31:30 +00001108 const char *str = PyBytes_AS_STRING(self);
Neal Norwitz6968b052007-02-27 19:02:19 +00001109 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
Gregory P. Smith60d241f2007-10-16 06:31:30 +00001110 Py_buffer vsub;
1111 PyObject *count_obj;
Neal Norwitz6968b052007-02-27 19:02:19 +00001112
1113 if (!PyArg_ParseTuple(args, "O|O&O&:count", &sub_obj,
1114 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
1115 return NULL;
1116
Gregory P. Smith60d241f2007-10-16 06:31:30 +00001117 if (_getbuffer(sub_obj, &vsub) < 0)
Neal Norwitz6968b052007-02-27 19:02:19 +00001118 return NULL;
1119
Martin v. Löwis5b222132007-06-10 09:51:05 +00001120 _adjust_indices(&start, &end, PyBytes_GET_SIZE(self));
Neal Norwitz6968b052007-02-27 19:02:19 +00001121
Gregory P. Smith60d241f2007-10-16 06:31:30 +00001122 count_obj = PyInt_FromSsize_t(
1123 stringlib_count(str + start, end - start, vsub.buf, vsub.len)
Neal Norwitz6968b052007-02-27 19:02:19 +00001124 );
Gregory P. Smith60d241f2007-10-16 06:31:30 +00001125 PyObject_ReleaseBuffer(sub_obj, &vsub);
1126 return count_obj;
Neal Norwitz6968b052007-02-27 19:02:19 +00001127}
1128
1129
1130PyDoc_STRVAR(index__doc__,
1131"B.index(sub [,start [,end]]) -> int\n\
1132\n\
1133Like B.find() but raise ValueError when the subsection is not found.");
1134
1135static PyObject *
1136bytes_index(PyBytesObject *self, PyObject *args)
1137{
1138 Py_ssize_t result = bytes_find_internal(self, args, +1);
1139 if (result == -2)
1140 return NULL;
1141 if (result == -1) {
1142 PyErr_SetString(PyExc_ValueError,
1143 "subsection not found");
1144 return NULL;
1145 }
1146 return PyInt_FromSsize_t(result);
1147}
1148
1149
1150PyDoc_STRVAR(rfind__doc__,
1151"B.rfind(sub [,start [,end]]) -> int\n\
1152\n\
1153Return the highest index in B where subsection sub is found,\n\
1154such that sub is contained within s[start,end]. Optional\n\
1155arguments start and end are interpreted as in slice notation.\n\
1156\n\
1157Return -1 on failure.");
1158
1159static PyObject *
1160bytes_rfind(PyBytesObject *self, PyObject *args)
1161{
1162 Py_ssize_t result = bytes_find_internal(self, args, -1);
1163 if (result == -2)
1164 return NULL;
1165 return PyInt_FromSsize_t(result);
1166}
1167
1168
1169PyDoc_STRVAR(rindex__doc__,
1170"B.rindex(sub [,start [,end]]) -> int\n\
1171\n\
1172Like B.rfind() but raise ValueError when the subsection is not found.");
1173
1174static PyObject *
1175bytes_rindex(PyBytesObject *self, PyObject *args)
1176{
1177 Py_ssize_t result = bytes_find_internal(self, args, -1);
1178 if (result == -2)
1179 return NULL;
1180 if (result == -1) {
1181 PyErr_SetString(PyExc_ValueError,
1182 "subsection not found");
1183 return NULL;
1184 }
1185 return PyInt_FromSsize_t(result);
1186}
1187
1188
Guido van Rossum98297ee2007-11-06 21:34:58 +00001189static int
1190bytes_contains(PyObject *self, PyObject *arg)
1191{
1192 Py_ssize_t ival = PyNumber_AsSsize_t(arg, PyExc_ValueError);
1193 if (ival == -1 && PyErr_Occurred()) {
1194 Py_buffer varg;
1195 int pos;
1196 PyErr_Clear();
1197 if (_getbuffer(arg, &varg) < 0)
1198 return -1;
1199 pos = stringlib_find(PyBytes_AS_STRING(self), Py_Size(self),
1200 varg.buf, varg.len, 0);
1201 PyObject_ReleaseBuffer(arg, &varg);
1202 return pos >= 0;
1203 }
1204 if (ival < 0 || ival >= 256) {
1205 PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
1206 return -1;
1207 }
1208
1209 return memchr(PyBytes_AS_STRING(self), ival, Py_Size(self)) != NULL;
1210}
1211
1212
Neal Norwitz6968b052007-02-27 19:02:19 +00001213/* Matches the end (direction >= 0) or start (direction < 0) of self
1214 * against substr, using the start and end arguments. Returns
1215 * -1 on error, 0 if not found and 1 if found.
1216 */
1217Py_LOCAL(int)
1218_bytes_tailmatch(PyBytesObject *self, PyObject *substr, Py_ssize_t start,
1219 Py_ssize_t end, int direction)
1220{
1221 Py_ssize_t len = PyBytes_GET_SIZE(self);
Neal Norwitz6968b052007-02-27 19:02:19 +00001222 const char* str;
Gregory P. Smith60d241f2007-10-16 06:31:30 +00001223 Py_buffer vsubstr;
Guido van Rossum40d20bc2007-10-22 00:09:51 +00001224 int rv = 0;
Neal Norwitz6968b052007-02-27 19:02:19 +00001225
Neal Norwitz6968b052007-02-27 19:02:19 +00001226 str = PyBytes_AS_STRING(self);
1227
Gregory P. Smith60d241f2007-10-16 06:31:30 +00001228 if (_getbuffer(substr, &vsubstr) < 0)
1229 return -1;
1230
Neal Norwitz6968b052007-02-27 19:02:19 +00001231 _adjust_indices(&start, &end, len);
1232
1233 if (direction < 0) {
1234 /* startswith */
Gregory P. Smith60d241f2007-10-16 06:31:30 +00001235 if (start+vsubstr.len > len) {
Gregory P. Smith60d241f2007-10-16 06:31:30 +00001236 goto done;
1237 }
Neal Norwitz6968b052007-02-27 19:02:19 +00001238 } else {
1239 /* endswith */
Gregory P. Smith60d241f2007-10-16 06:31:30 +00001240 if (end-start < vsubstr.len || start > len) {
Gregory P. Smith60d241f2007-10-16 06:31:30 +00001241 goto done;
1242 }
Neal Norwitz6968b052007-02-27 19:02:19 +00001243
Gregory P. Smith60d241f2007-10-16 06:31:30 +00001244 if (end-vsubstr.len > start)
1245 start = end - vsubstr.len;
Neal Norwitz6968b052007-02-27 19:02:19 +00001246 }
Gregory P. Smith60d241f2007-10-16 06:31:30 +00001247 if (end-start >= vsubstr.len)
1248 rv = ! memcmp(str+start, vsubstr.buf, vsubstr.len);
1249
1250done:
1251 PyObject_ReleaseBuffer(substr, &vsubstr);
1252 return rv;
Neal Norwitz6968b052007-02-27 19:02:19 +00001253}
1254
1255
1256PyDoc_STRVAR(startswith__doc__,
Guido van Rossum98297ee2007-11-06 21:34:58 +00001257"B.startswith(prefix [,start [,end]]) -> bool\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001258\n\
1259Return True if B starts with the specified prefix, False otherwise.\n\
1260With optional start, test B beginning at that position.\n\
1261With optional end, stop comparing B at that position.\n\
1262prefix can also be a tuple of strings to try.");
1263
1264static PyObject *
1265bytes_startswith(PyBytesObject *self, PyObject *args)
1266{
1267 Py_ssize_t start = 0;
1268 Py_ssize_t end = PY_SSIZE_T_MAX;
1269 PyObject *subobj;
1270 int result;
1271
1272 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
1273 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
1274 return NULL;
1275 if (PyTuple_Check(subobj)) {
1276 Py_ssize_t i;
1277 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
1278 result = _bytes_tailmatch(self,
1279 PyTuple_GET_ITEM(subobj, i),
1280 start, end, -1);
1281 if (result == -1)
1282 return NULL;
1283 else if (result) {
1284 Py_RETURN_TRUE;
1285 }
1286 }
1287 Py_RETURN_FALSE;
1288 }
1289 result = _bytes_tailmatch(self, subobj, start, end, -1);
1290 if (result == -1)
1291 return NULL;
1292 else
1293 return PyBool_FromLong(result);
1294}
1295
1296PyDoc_STRVAR(endswith__doc__,
Guido van Rossum98297ee2007-11-06 21:34:58 +00001297"B.endswith(suffix [,start [,end]]) -> bool\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001298\n\
1299Return True if B ends with the specified suffix, False otherwise.\n\
1300With optional start, test B beginning at that position.\n\
1301With optional end, stop comparing B at that position.\n\
1302suffix can also be a tuple of strings to try.");
1303
1304static PyObject *
1305bytes_endswith(PyBytesObject *self, PyObject *args)
1306{
1307 Py_ssize_t start = 0;
1308 Py_ssize_t end = PY_SSIZE_T_MAX;
1309 PyObject *subobj;
1310 int result;
1311
1312 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
1313 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
1314 return NULL;
1315 if (PyTuple_Check(subobj)) {
1316 Py_ssize_t i;
1317 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
1318 result = _bytes_tailmatch(self,
1319 PyTuple_GET_ITEM(subobj, i),
1320 start, end, +1);
1321 if (result == -1)
1322 return NULL;
1323 else if (result) {
1324 Py_RETURN_TRUE;
1325 }
1326 }
1327 Py_RETURN_FALSE;
1328 }
1329 result = _bytes_tailmatch(self, subobj, start, end, +1);
1330 if (result == -1)
1331 return NULL;
1332 else
1333 return PyBool_FromLong(result);
1334}
1335
1336
Neal Norwitz6968b052007-02-27 19:02:19 +00001337PyDoc_STRVAR(translate__doc__,
Guido van Rossum98297ee2007-11-06 21:34:58 +00001338"B.translate(table[, deletechars]) -> buffer\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001339\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001340Return a copy of B, where all characters occurring in the\n\
1341optional argument deletechars are removed, and the remaining\n\
1342characters have been mapped through the given translation\n\
1343table, which must be a bytes object of length 256.");
Neal Norwitz6968b052007-02-27 19:02:19 +00001344
1345static PyObject *
1346bytes_translate(PyBytesObject *self, PyObject *args)
1347{
1348 register char *input, *output;
1349 register const char *table;
1350 register Py_ssize_t i, c, changed = 0;
1351 PyObject *input_obj = (PyObject*)self;
Gregory P. Smith60d241f2007-10-16 06:31:30 +00001352 const char *output_start;
1353 Py_ssize_t inlen;
Neal Norwitz6968b052007-02-27 19:02:19 +00001354 PyObject *result;
1355 int trans_table[256];
1356 PyObject *tableobj, *delobj = NULL;
Gregory P. Smith60d241f2007-10-16 06:31:30 +00001357 Py_buffer vtable, vdel;
Neal Norwitz6968b052007-02-27 19:02:19 +00001358
1359 if (!PyArg_UnpackTuple(args, "translate", 1, 2,
1360 &tableobj, &delobj))
1361 return NULL;
1362
Gregory P. Smith60d241f2007-10-16 06:31:30 +00001363 if (_getbuffer(tableobj, &vtable) < 0)
Neal Norwitz6968b052007-02-27 19:02:19 +00001364 return NULL;
1365
Gregory P. Smith60d241f2007-10-16 06:31:30 +00001366 if (vtable.len != 256) {
Neal Norwitz6968b052007-02-27 19:02:19 +00001367 PyErr_SetString(PyExc_ValueError,
1368 "translation table must be 256 characters long");
Gregory P. Smith60d241f2007-10-16 06:31:30 +00001369 result = NULL;
1370 goto done;
Neal Norwitz6968b052007-02-27 19:02:19 +00001371 }
1372
1373 if (delobj != NULL) {
Gregory P. Smith60d241f2007-10-16 06:31:30 +00001374 if (_getbuffer(delobj, &vdel) < 0) {
1375 result = NULL;
1376 goto done;
Neal Norwitz6968b052007-02-27 19:02:19 +00001377 }
Neal Norwitz6968b052007-02-27 19:02:19 +00001378 }
1379 else {
Gregory P. Smith60d241f2007-10-16 06:31:30 +00001380 vdel.buf = NULL;
1381 vdel.len = 0;
Neal Norwitz6968b052007-02-27 19:02:19 +00001382 }
1383
Gregory P. Smith60d241f2007-10-16 06:31:30 +00001384 table = (const char *)vtable.buf;
Neal Norwitz6968b052007-02-27 19:02:19 +00001385 inlen = PyBytes_GET_SIZE(input_obj);
1386 result = PyBytes_FromStringAndSize((char *)NULL, inlen);
1387 if (result == NULL)
Gregory P. Smith60d241f2007-10-16 06:31:30 +00001388 goto done;
Neal Norwitz6968b052007-02-27 19:02:19 +00001389 output_start = output = PyBytes_AsString(result);
1390 input = PyBytes_AS_STRING(input_obj);
1391
Gregory P. Smith60d241f2007-10-16 06:31:30 +00001392 if (vdel.len == 0) {
Neal Norwitz6968b052007-02-27 19:02:19 +00001393 /* If no deletions are required, use faster code */
1394 for (i = inlen; --i >= 0; ) {
1395 c = Py_CHARMASK(*input++);
1396 if (Py_CHARMASK((*output++ = table[c])) != c)
1397 changed = 1;
1398 }
1399 if (changed || !PyBytes_CheckExact(input_obj))
Gregory P. Smith60d241f2007-10-16 06:31:30 +00001400 goto done;
Neal Norwitz6968b052007-02-27 19:02:19 +00001401 Py_DECREF(result);
1402 Py_INCREF(input_obj);
Gregory P. Smith60d241f2007-10-16 06:31:30 +00001403 result = input_obj;
1404 goto done;
Neal Norwitz6968b052007-02-27 19:02:19 +00001405 }
1406
1407 for (i = 0; i < 256; i++)
1408 trans_table[i] = Py_CHARMASK(table[i]);
1409
Gregory P. Smith60d241f2007-10-16 06:31:30 +00001410 for (i = 0; i < vdel.len; i++)
1411 trans_table[(int) Py_CHARMASK( ((unsigned char*)vdel.buf)[i] )] = -1;
Neal Norwitz6968b052007-02-27 19:02:19 +00001412
1413 for (i = inlen; --i >= 0; ) {
1414 c = Py_CHARMASK(*input++);
1415 if (trans_table[c] != -1)
1416 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
1417 continue;
1418 changed = 1;
1419 }
1420 if (!changed && PyBytes_CheckExact(input_obj)) {
1421 Py_DECREF(result);
1422 Py_INCREF(input_obj);
Gregory P. Smith60d241f2007-10-16 06:31:30 +00001423 result = input_obj;
1424 goto done;
Neal Norwitz6968b052007-02-27 19:02:19 +00001425 }
1426 /* Fix the size of the resulting string */
1427 if (inlen > 0)
1428 PyBytes_Resize(result, output - output_start);
Gregory P. Smith60d241f2007-10-16 06:31:30 +00001429
1430done:
1431 PyObject_ReleaseBuffer(tableobj, &vtable);
1432 if (delobj != NULL)
1433 PyObject_ReleaseBuffer(delobj, &vdel);
Neal Norwitz6968b052007-02-27 19:02:19 +00001434 return result;
1435}
1436
1437
1438#define FORWARD 1
1439#define REVERSE -1
1440
1441/* find and count characters and substrings */
1442
1443#define findchar(target, target_len, c) \
1444 ((char *)memchr((const void *)(target), c, target_len))
1445
1446/* Don't call if length < 2 */
1447#define Py_STRING_MATCH(target, offset, pattern, length) \
1448 (target[offset] == pattern[0] && \
1449 target[offset+length-1] == pattern[length-1] && \
1450 !memcmp(target+offset+1, pattern+1, length-2) )
1451
1452
1453/* Bytes ops must return a string. */
1454/* If the object is subclass of bytes, create a copy */
1455Py_LOCAL(PyBytesObject *)
1456return_self(PyBytesObject *self)
1457{
1458 if (PyBytes_CheckExact(self)) {
1459 Py_INCREF(self);
1460 return (PyBytesObject *)self;
1461 }
1462 return (PyBytesObject *)PyBytes_FromStringAndSize(
1463 PyBytes_AS_STRING(self),
1464 PyBytes_GET_SIZE(self));
1465}
1466
1467Py_LOCAL_INLINE(Py_ssize_t)
Neal Norwitz61ec0d32007-10-26 06:44:10 +00001468countchar(const char *target, Py_ssize_t target_len, char c, Py_ssize_t maxcount)
Neal Norwitz6968b052007-02-27 19:02:19 +00001469{
1470 Py_ssize_t count=0;
1471 const char *start=target;
1472 const char *end=target+target_len;
1473
1474 while ( (start=findchar(start, end-start, c)) != NULL ) {
1475 count++;
1476 if (count >= maxcount)
1477 break;
1478 start += 1;
1479 }
1480 return count;
1481}
1482
1483Py_LOCAL(Py_ssize_t)
1484findstring(const char *target, Py_ssize_t target_len,
1485 const char *pattern, Py_ssize_t pattern_len,
1486 Py_ssize_t start,
1487 Py_ssize_t end,
1488 int direction)
1489{
1490 if (start < 0) {
1491 start += target_len;
1492 if (start < 0)
1493 start = 0;
1494 }
1495 if (end > target_len) {
1496 end = target_len;
1497 } else if (end < 0) {
1498 end += target_len;
1499 if (end < 0)
1500 end = 0;
1501 }
1502
1503 /* zero-length substrings always match at the first attempt */
1504 if (pattern_len == 0)
1505 return (direction > 0) ? start : end;
1506
1507 end -= pattern_len;
1508
1509 if (direction < 0) {
1510 for (; end >= start; end--)
1511 if (Py_STRING_MATCH(target, end, pattern, pattern_len))
1512 return end;
1513 } else {
1514 for (; start <= end; start++)
1515 if (Py_STRING_MATCH(target, start, pattern, pattern_len))
1516 return start;
1517 }
1518 return -1;
1519}
1520
1521Py_LOCAL_INLINE(Py_ssize_t)
1522countstring(const char *target, Py_ssize_t target_len,
1523 const char *pattern, Py_ssize_t pattern_len,
1524 Py_ssize_t start,
1525 Py_ssize_t end,
1526 int direction, Py_ssize_t maxcount)
1527{
1528 Py_ssize_t count=0;
1529
1530 if (start < 0) {
1531 start += target_len;
1532 if (start < 0)
1533 start = 0;
1534 }
1535 if (end > target_len) {
1536 end = target_len;
1537 } else if (end < 0) {
1538 end += target_len;
1539 if (end < 0)
1540 end = 0;
1541 }
1542
1543 /* zero-length substrings match everywhere */
1544 if (pattern_len == 0 || maxcount == 0) {
1545 if (target_len+1 < maxcount)
1546 return target_len+1;
1547 return maxcount;
1548 }
1549
1550 end -= pattern_len;
1551 if (direction < 0) {
1552 for (; (end >= start); end--)
1553 if (Py_STRING_MATCH(target, end, pattern, pattern_len)) {
1554 count++;
1555 if (--maxcount <= 0) break;
1556 end -= pattern_len-1;
1557 }
1558 } else {
1559 for (; (start <= end); start++)
1560 if (Py_STRING_MATCH(target, start, pattern, pattern_len)) {
1561 count++;
1562 if (--maxcount <= 0)
1563 break;
1564 start += pattern_len-1;
1565 }
1566 }
1567 return count;
1568}
1569
1570
1571/* Algorithms for different cases of string replacement */
1572
1573/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
1574Py_LOCAL(PyBytesObject *)
1575replace_interleave(PyBytesObject *self,
1576 const char *to_s, Py_ssize_t to_len,
1577 Py_ssize_t maxcount)
1578{
1579 char *self_s, *result_s;
1580 Py_ssize_t self_len, result_len;
1581 Py_ssize_t count, i, product;
1582 PyBytesObject *result;
1583
1584 self_len = PyBytes_GET_SIZE(self);
1585
1586 /* 1 at the end plus 1 after every character */
1587 count = self_len+1;
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +00001588 if (maxcount < count)
Neal Norwitz6968b052007-02-27 19:02:19 +00001589 count = maxcount;
1590
1591 /* Check for overflow */
1592 /* result_len = count * to_len + self_len; */
1593 product = count * to_len;
1594 if (product / to_len != count) {
1595 PyErr_SetString(PyExc_OverflowError,
1596 "replace string is too long");
1597 return NULL;
1598 }
1599 result_len = product + self_len;
1600 if (result_len < 0) {
1601 PyErr_SetString(PyExc_OverflowError,
1602 "replace string is too long");
1603 return NULL;
1604 }
1605
1606 if (! (result = (PyBytesObject *)
1607 PyBytes_FromStringAndSize(NULL, result_len)) )
1608 return NULL;
1609
1610 self_s = PyBytes_AS_STRING(self);
1611 result_s = PyBytes_AS_STRING(result);
1612
1613 /* TODO: special case single character, which doesn't need memcpy */
1614
1615 /* Lay the first one down (guaranteed this will occur) */
1616 Py_MEMCPY(result_s, to_s, to_len);
1617 result_s += to_len;
1618 count -= 1;
1619
1620 for (i=0; i<count; i++) {
1621 *result_s++ = *self_s++;
1622 Py_MEMCPY(result_s, to_s, to_len);
1623 result_s += to_len;
1624 }
1625
1626 /* Copy the rest of the original string */
1627 Py_MEMCPY(result_s, self_s, self_len-i);
1628
1629 return result;
1630}
1631
1632/* Special case for deleting a single character */
1633/* len(self)>=1, len(from)==1, to="", maxcount>=1 */
1634Py_LOCAL(PyBytesObject *)
1635replace_delete_single_character(PyBytesObject *self,
1636 char from_c, Py_ssize_t maxcount)
1637{
1638 char *self_s, *result_s;
1639 char *start, *next, *end;
1640 Py_ssize_t self_len, result_len;
1641 Py_ssize_t count;
1642 PyBytesObject *result;
1643
1644 self_len = PyBytes_GET_SIZE(self);
1645 self_s = PyBytes_AS_STRING(self);
1646
1647 count = countchar(self_s, self_len, from_c, maxcount);
1648 if (count == 0) {
1649 return return_self(self);
1650 }
1651
1652 result_len = self_len - count; /* from_len == 1 */
1653 assert(result_len>=0);
1654
1655 if ( (result = (PyBytesObject *)
1656 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
1657 return NULL;
1658 result_s = PyBytes_AS_STRING(result);
1659
1660 start = self_s;
1661 end = self_s + self_len;
1662 while (count-- > 0) {
1663 next = findchar(start, end-start, from_c);
1664 if (next == NULL)
1665 break;
1666 Py_MEMCPY(result_s, start, next-start);
1667 result_s += (next-start);
1668 start = next+1;
1669 }
1670 Py_MEMCPY(result_s, start, end-start);
1671
1672 return result;
1673}
1674
1675/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
1676
1677Py_LOCAL(PyBytesObject *)
1678replace_delete_substring(PyBytesObject *self,
1679 const char *from_s, Py_ssize_t from_len,
1680 Py_ssize_t maxcount)
1681{
1682 char *self_s, *result_s;
1683 char *start, *next, *end;
1684 Py_ssize_t self_len, result_len;
1685 Py_ssize_t count, offset;
1686 PyBytesObject *result;
1687
1688 self_len = PyBytes_GET_SIZE(self);
1689 self_s = PyBytes_AS_STRING(self);
1690
1691 count = countstring(self_s, self_len,
1692 from_s, from_len,
1693 0, self_len, 1,
1694 maxcount);
1695
1696 if (count == 0) {
1697 /* no matches */
1698 return return_self(self);
1699 }
1700
1701 result_len = self_len - (count * from_len);
1702 assert (result_len>=0);
1703
1704 if ( (result = (PyBytesObject *)
1705 PyBytes_FromStringAndSize(NULL, result_len)) == NULL )
1706 return NULL;
1707
1708 result_s = PyBytes_AS_STRING(result);
1709
1710 start = self_s;
1711 end = self_s + self_len;
1712 while (count-- > 0) {
1713 offset = findstring(start, end-start,
1714 from_s, from_len,
1715 0, end-start, FORWARD);
1716 if (offset == -1)
1717 break;
1718 next = start + offset;
1719
1720 Py_MEMCPY(result_s, start, next-start);
1721
1722 result_s += (next-start);
1723 start = next+from_len;
1724 }
1725 Py_MEMCPY(result_s, start, end-start);
1726 return result;
1727}
1728
1729/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
1730Py_LOCAL(PyBytesObject *)
1731replace_single_character_in_place(PyBytesObject *self,
1732 char from_c, char to_c,
1733 Py_ssize_t maxcount)
1734{
1735 char *self_s, *result_s, *start, *end, *next;
1736 Py_ssize_t self_len;
1737 PyBytesObject *result;
1738
1739 /* The result string will be the same size */
1740 self_s = PyBytes_AS_STRING(self);
1741 self_len = PyBytes_GET_SIZE(self);
1742
1743 next = findchar(self_s, self_len, from_c);
1744
1745 if (next == NULL) {
1746 /* No matches; return the original bytes */
1747 return return_self(self);
1748 }
1749
1750 /* Need to make a new bytes */
1751 result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, self_len);
1752 if (result == NULL)
1753 return NULL;
1754 result_s = PyBytes_AS_STRING(result);
1755 Py_MEMCPY(result_s, self_s, self_len);
1756
1757 /* change everything in-place, starting with this one */
1758 start = result_s + (next-self_s);
1759 *start = to_c;
1760 start++;
1761 end = result_s + self_len;
1762
1763 while (--maxcount > 0) {
1764 next = findchar(start, end-start, from_c);
1765 if (next == NULL)
1766 break;
1767 *next = to_c;
1768 start = next+1;
1769 }
1770
1771 return result;
1772}
1773
1774/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
1775Py_LOCAL(PyBytesObject *)
1776replace_substring_in_place(PyBytesObject *self,
1777 const char *from_s, Py_ssize_t from_len,
1778 const char *to_s, Py_ssize_t to_len,
1779 Py_ssize_t maxcount)
1780{
1781 char *result_s, *start, *end;
1782 char *self_s;
1783 Py_ssize_t self_len, offset;
1784 PyBytesObject *result;
1785
1786 /* The result bytes will be the same size */
1787
1788 self_s = PyBytes_AS_STRING(self);
1789 self_len = PyBytes_GET_SIZE(self);
1790
1791 offset = findstring(self_s, self_len,
1792 from_s, from_len,
1793 0, self_len, FORWARD);
1794 if (offset == -1) {
1795 /* No matches; return the original bytes */
1796 return return_self(self);
1797 }
1798
1799 /* Need to make a new bytes */
1800 result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, self_len);
1801 if (result == NULL)
1802 return NULL;
1803 result_s = PyBytes_AS_STRING(result);
1804 Py_MEMCPY(result_s, self_s, self_len);
1805
1806 /* change everything in-place, starting with this one */
1807 start = result_s + offset;
1808 Py_MEMCPY(start, to_s, from_len);
1809 start += from_len;
1810 end = result_s + self_len;
1811
1812 while ( --maxcount > 0) {
1813 offset = findstring(start, end-start,
1814 from_s, from_len,
1815 0, end-start, FORWARD);
1816 if (offset==-1)
1817 break;
1818 Py_MEMCPY(start+offset, to_s, from_len);
1819 start += offset+from_len;
1820 }
1821
1822 return result;
1823}
1824
1825/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
1826Py_LOCAL(PyBytesObject *)
1827replace_single_character(PyBytesObject *self,
1828 char from_c,
1829 const char *to_s, Py_ssize_t to_len,
1830 Py_ssize_t maxcount)
1831{
1832 char *self_s, *result_s;
1833 char *start, *next, *end;
1834 Py_ssize_t self_len, result_len;
1835 Py_ssize_t count, product;
1836 PyBytesObject *result;
1837
1838 self_s = PyBytes_AS_STRING(self);
1839 self_len = PyBytes_GET_SIZE(self);
1840
1841 count = countchar(self_s, self_len, from_c, maxcount);
1842 if (count == 0) {
1843 /* no matches, return unchanged */
1844 return return_self(self);
1845 }
1846
1847 /* use the difference between current and new, hence the "-1" */
1848 /* result_len = self_len + count * (to_len-1) */
1849 product = count * (to_len-1);
1850 if (product / (to_len-1) != count) {
1851 PyErr_SetString(PyExc_OverflowError, "replace bytes is too long");
1852 return NULL;
1853 }
1854 result_len = self_len + product;
1855 if (result_len < 0) {
1856 PyErr_SetString(PyExc_OverflowError, "replace bytes is too long");
1857 return NULL;
1858 }
1859
1860 if ( (result = (PyBytesObject *)
1861 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
1862 return NULL;
1863 result_s = PyBytes_AS_STRING(result);
1864
1865 start = self_s;
1866 end = self_s + self_len;
1867 while (count-- > 0) {
1868 next = findchar(start, end-start, from_c);
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +00001869 if (next == NULL)
Neal Norwitz6968b052007-02-27 19:02:19 +00001870 break;
1871
1872 if (next == start) {
1873 /* replace with the 'to' */
1874 Py_MEMCPY(result_s, to_s, to_len);
1875 result_s += to_len;
1876 start += 1;
1877 } else {
1878 /* copy the unchanged old then the 'to' */
1879 Py_MEMCPY(result_s, start, next-start);
1880 result_s += (next-start);
1881 Py_MEMCPY(result_s, to_s, to_len);
1882 result_s += to_len;
1883 start = next+1;
1884 }
1885 }
1886 /* Copy the remainder of the remaining bytes */
1887 Py_MEMCPY(result_s, start, end-start);
1888
1889 return result;
1890}
1891
1892/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
1893Py_LOCAL(PyBytesObject *)
1894replace_substring(PyBytesObject *self,
1895 const char *from_s, Py_ssize_t from_len,
1896 const char *to_s, Py_ssize_t to_len,
1897 Py_ssize_t maxcount)
1898{
1899 char *self_s, *result_s;
1900 char *start, *next, *end;
1901 Py_ssize_t self_len, result_len;
1902 Py_ssize_t count, offset, product;
1903 PyBytesObject *result;
1904
1905 self_s = PyBytes_AS_STRING(self);
1906 self_len = PyBytes_GET_SIZE(self);
1907
1908 count = countstring(self_s, self_len,
1909 from_s, from_len,
1910 0, self_len, FORWARD, maxcount);
1911 if (count == 0) {
1912 /* no matches, return unchanged */
1913 return return_self(self);
1914 }
1915
1916 /* Check for overflow */
1917 /* result_len = self_len + count * (to_len-from_len) */
1918 product = count * (to_len-from_len);
1919 if (product / (to_len-from_len) != count) {
1920 PyErr_SetString(PyExc_OverflowError, "replace bytes is too long");
1921 return NULL;
1922 }
1923 result_len = self_len + product;
1924 if (result_len < 0) {
1925 PyErr_SetString(PyExc_OverflowError, "replace bytes is too long");
1926 return NULL;
1927 }
1928
1929 if ( (result = (PyBytesObject *)
1930 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
1931 return NULL;
1932 result_s = PyBytes_AS_STRING(result);
1933
1934 start = self_s;
1935 end = self_s + self_len;
1936 while (count-- > 0) {
1937 offset = findstring(start, end-start,
1938 from_s, from_len,
1939 0, end-start, FORWARD);
1940 if (offset == -1)
1941 break;
1942 next = start+offset;
1943 if (next == start) {
1944 /* replace with the 'to' */
1945 Py_MEMCPY(result_s, to_s, to_len);
1946 result_s += to_len;
1947 start += from_len;
1948 } else {
1949 /* copy the unchanged old then the 'to' */
1950 Py_MEMCPY(result_s, start, next-start);
1951 result_s += (next-start);
1952 Py_MEMCPY(result_s, to_s, to_len);
1953 result_s += to_len;
1954 start = next+from_len;
1955 }
1956 }
1957 /* Copy the remainder of the remaining bytes */
1958 Py_MEMCPY(result_s, start, end-start);
1959
1960 return result;
1961}
1962
1963
1964Py_LOCAL(PyBytesObject *)
1965replace(PyBytesObject *self,
1966 const char *from_s, Py_ssize_t from_len,
1967 const char *to_s, Py_ssize_t to_len,
1968 Py_ssize_t maxcount)
1969{
1970 if (maxcount < 0) {
1971 maxcount = PY_SSIZE_T_MAX;
1972 } else if (maxcount == 0 || PyBytes_GET_SIZE(self) == 0) {
1973 /* nothing to do; return the original bytes */
1974 return return_self(self);
1975 }
1976
1977 if (maxcount == 0 ||
1978 (from_len == 0 && to_len == 0)) {
1979 /* nothing to do; return the original bytes */
1980 return return_self(self);
1981 }
1982
1983 /* Handle zero-length special cases */
1984
1985 if (from_len == 0) {
1986 /* insert the 'to' bytes everywhere. */
1987 /* >>> "Python".replace("", ".") */
1988 /* '.P.y.t.h.o.n.' */
1989 return replace_interleave(self, to_s, to_len, maxcount);
1990 }
1991
1992 /* Except for "".replace("", "A") == "A" there is no way beyond this */
1993 /* point for an empty self bytes to generate a non-empty bytes */
1994 /* Special case so the remaining code always gets a non-empty bytes */
1995 if (PyBytes_GET_SIZE(self) == 0) {
1996 return return_self(self);
1997 }
1998
1999 if (to_len == 0) {
2000 /* delete all occurances of 'from' bytes */
2001 if (from_len == 1) {
2002 return replace_delete_single_character(
2003 self, from_s[0], maxcount);
2004 } else {
2005 return replace_delete_substring(self, from_s, from_len, maxcount);
2006 }
2007 }
2008
2009 /* Handle special case where both bytes have the same length */
2010
2011 if (from_len == to_len) {
2012 if (from_len == 1) {
2013 return replace_single_character_in_place(
2014 self,
2015 from_s[0],
2016 to_s[0],
2017 maxcount);
2018 } else {
2019 return replace_substring_in_place(
2020 self, from_s, from_len, to_s, to_len, maxcount);
2021 }
2022 }
2023
2024 /* Otherwise use the more generic algorithms */
2025 if (from_len == 1) {
2026 return replace_single_character(self, from_s[0],
2027 to_s, to_len, maxcount);
2028 } else {
2029 /* len('from')>=2, len('to')>=1 */
2030 return replace_substring(self, from_s, from_len, to_s, to_len, maxcount);
2031 }
2032}
2033
Gregory P. Smith60d241f2007-10-16 06:31:30 +00002034
Neal Norwitz6968b052007-02-27 19:02:19 +00002035PyDoc_STRVAR(replace__doc__,
Guido van Rossum98297ee2007-11-06 21:34:58 +00002036"B.replace(old, new[, count]) -> bytes\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00002037\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00002038Return a copy of B with all occurrences of subsection\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00002039old replaced by new. If the optional argument count is\n\
2040given, only the first count occurrences are replaced.");
2041
2042static PyObject *
2043bytes_replace(PyBytesObject *self, PyObject *args)
2044{
2045 Py_ssize_t count = -1;
Travis E. Oliphantb99f7622007-08-18 11:21:56 +00002046 PyObject *from, *to, *res;
Travis E. Oliphant8ae62b62007-09-23 02:00:13 +00002047 Py_buffer vfrom, vto;
Neal Norwitz6968b052007-02-27 19:02:19 +00002048
2049 if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
2050 return NULL;
2051
Guido van Rossuma74184e2007-08-29 04:05:57 +00002052 if (_getbuffer(from, &vfrom) < 0)
2053 return NULL;
2054 if (_getbuffer(to, &vto) < 0) {
2055 PyObject_ReleaseBuffer(from, &vfrom);
2056 return NULL;
Travis E. Oliphantb99f7622007-08-18 11:21:56 +00002057 }
Neal Norwitz6968b052007-02-27 19:02:19 +00002058
Travis E. Oliphantb99f7622007-08-18 11:21:56 +00002059 res = (PyObject *)replace((PyBytesObject *) self,
Guido van Rossuma74184e2007-08-29 04:05:57 +00002060 vfrom.buf, vfrom.len,
2061 vto.buf, vto.len, count);
Travis E. Oliphantb99f7622007-08-18 11:21:56 +00002062
Guido van Rossuma74184e2007-08-29 04:05:57 +00002063 PyObject_ReleaseBuffer(from, &vfrom);
2064 PyObject_ReleaseBuffer(to, &vto);
Travis E. Oliphantb99f7622007-08-18 11:21:56 +00002065 return res;
Neal Norwitz6968b052007-02-27 19:02:19 +00002066}
2067
2068
2069/* Overallocate the initial list to reduce the number of reallocs for small
2070 split sizes. Eg, "A A A A A A A A A A".split() (10 elements) has three
2071 resizes, to sizes 4, 8, then 16. Most observed string splits are for human
2072 text (roughly 11 words per line) and field delimited data (usually 1-10
2073 fields). For large strings the split algorithms are bandwidth limited
2074 so increasing the preallocation likely will not improve things.*/
2075
2076#define MAX_PREALLOC 12
2077
2078/* 5 splits gives 6 elements */
2079#define PREALLOC_SIZE(maxsplit) \
2080 (maxsplit >= MAX_PREALLOC ? MAX_PREALLOC : maxsplit+1)
2081
2082#define SPLIT_APPEND(data, left, right) \
2083 str = PyBytes_FromStringAndSize((data) + (left), \
2084 (right) - (left)); \
2085 if (str == NULL) \
2086 goto onError; \
2087 if (PyList_Append(list, str)) { \
2088 Py_DECREF(str); \
2089 goto onError; \
2090 } \
2091 else \
2092 Py_DECREF(str);
2093
2094#define SPLIT_ADD(data, left, right) { \
2095 str = PyBytes_FromStringAndSize((data) + (left), \
2096 (right) - (left)); \
2097 if (str == NULL) \
2098 goto onError; \
2099 if (count < MAX_PREALLOC) { \
2100 PyList_SET_ITEM(list, count, str); \
2101 } else { \
2102 if (PyList_Append(list, str)) { \
2103 Py_DECREF(str); \
2104 goto onError; \
2105 } \
2106 else \
2107 Py_DECREF(str); \
2108 } \
2109 count++; }
2110
2111/* Always force the list to the expected size. */
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002112#define FIX_PREALLOC_SIZE(list) Py_Size(list) = count
Neal Norwitz6968b052007-02-27 19:02:19 +00002113
2114
2115Py_LOCAL_INLINE(PyObject *)
2116split_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount)
2117{
Guido van Rossum8f950672007-09-10 16:53:45 +00002118 register Py_ssize_t i, j, count = 0;
Neal Norwitz6968b052007-02-27 19:02:19 +00002119 PyObject *str;
2120 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
2121
2122 if (list == NULL)
2123 return NULL;
2124
2125 i = j = 0;
2126 while ((j < len) && (maxcount-- > 0)) {
Guido van Rossum8f950672007-09-10 16:53:45 +00002127 for(; j < len; j++) {
Neal Norwitz6968b052007-02-27 19:02:19 +00002128 /* I found that using memchr makes no difference */
2129 if (s[j] == ch) {
2130 SPLIT_ADD(s, i, j);
2131 i = j = j + 1;
2132 break;
2133 }
2134 }
2135 }
2136 if (i <= len) {
2137 SPLIT_ADD(s, i, len);
2138 }
2139 FIX_PREALLOC_SIZE(list);
2140 return list;
2141
2142 onError:
2143 Py_DECREF(list);
2144 return NULL;
2145}
2146
Guido van Rossum8f950672007-09-10 16:53:45 +00002147
2148Py_LOCAL_INLINE(PyObject *)
2149split_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxcount)
2150{
2151 register Py_ssize_t i, j, count = 0;
2152 PyObject *str;
2153 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
2154
2155 if (list == NULL)
2156 return NULL;
2157
2158 for (i = j = 0; i < len; ) {
Guido van Rossum98297ee2007-11-06 21:34:58 +00002159 /* find a token */
2160 while (i < len && ISSPACE(s[i]))
2161 i++;
2162 j = i;
2163 while (i < len && !ISSPACE(s[i]))
2164 i++;
2165 if (j < i) {
2166 if (maxcount-- <= 0)
2167 break;
2168 SPLIT_ADD(s, j, i);
2169 while (i < len && ISSPACE(s[i]))
2170 i++;
2171 j = i;
2172 }
Guido van Rossum8f950672007-09-10 16:53:45 +00002173 }
2174 if (j < len) {
Guido van Rossum98297ee2007-11-06 21:34:58 +00002175 SPLIT_ADD(s, j, len);
Guido van Rossum8f950672007-09-10 16:53:45 +00002176 }
2177 FIX_PREALLOC_SIZE(list);
2178 return list;
2179
2180 onError:
2181 Py_DECREF(list);
2182 return NULL;
2183}
2184
Neal Norwitz6968b052007-02-27 19:02:19 +00002185PyDoc_STRVAR(split__doc__,
Guido van Rossum98297ee2007-11-06 21:34:58 +00002186"B.split([sep[, maxsplit]]) -> list of buffer\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00002187\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00002188Return a list of the sections in B, using sep as the delimiter.\n\
2189If sep is not given, B is split on ASCII whitespace characters\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00002190(space, tab, return, newline, formfeed, vertical tab).\n\
2191If maxsplit is given, at most maxsplit splits are done.");
Neal Norwitz6968b052007-02-27 19:02:19 +00002192
2193static PyObject *
2194bytes_split(PyBytesObject *self, PyObject *args)
2195{
2196 Py_ssize_t len = PyBytes_GET_SIZE(self), n, i, j;
Guido van Rossum8f950672007-09-10 16:53:45 +00002197 Py_ssize_t maxsplit = -1, count = 0;
Neal Norwitz6968b052007-02-27 19:02:19 +00002198 const char *s = PyBytes_AS_STRING(self), *sub;
Guido van Rossum8f950672007-09-10 16:53:45 +00002199 PyObject *list, *str, *subobj = Py_None;
Travis E. Oliphant8ae62b62007-09-23 02:00:13 +00002200 Py_buffer vsub;
Neal Norwitz6968b052007-02-27 19:02:19 +00002201#ifdef USE_FAST
2202 Py_ssize_t pos;
2203#endif
2204
Guido van Rossum8f950672007-09-10 16:53:45 +00002205 if (!PyArg_ParseTuple(args, "|On:split", &subobj, &maxsplit))
Neal Norwitz6968b052007-02-27 19:02:19 +00002206 return NULL;
2207 if (maxsplit < 0)
2208 maxsplit = PY_SSIZE_T_MAX;
Guido van Rossum8f950672007-09-10 16:53:45 +00002209
2210 if (subobj == Py_None)
2211 return split_whitespace(s, len, maxsplit);
2212
2213 if (_getbuffer(subobj, &vsub) < 0)
Neal Norwitz6968b052007-02-27 19:02:19 +00002214 return NULL;
Guido van Rossum8f950672007-09-10 16:53:45 +00002215 sub = vsub.buf;
2216 n = vsub.len;
Neal Norwitz6968b052007-02-27 19:02:19 +00002217
2218 if (n == 0) {
2219 PyErr_SetString(PyExc_ValueError, "empty separator");
Guido van Rossum8f950672007-09-10 16:53:45 +00002220 PyObject_ReleaseBuffer(subobj, &vsub);
Neal Norwitz6968b052007-02-27 19:02:19 +00002221 return NULL;
2222 }
Guido van Rossum8f950672007-09-10 16:53:45 +00002223 if (n == 1)
Neal Norwitz6968b052007-02-27 19:02:19 +00002224 return split_char(s, len, sub[0], maxsplit);
2225
2226 list = PyList_New(PREALLOC_SIZE(maxsplit));
Guido van Rossum8f950672007-09-10 16:53:45 +00002227 if (list == NULL) {
2228 PyObject_ReleaseBuffer(subobj, &vsub);
Neal Norwitz6968b052007-02-27 19:02:19 +00002229 return NULL;
Guido van Rossum8f950672007-09-10 16:53:45 +00002230 }
Neal Norwitz6968b052007-02-27 19:02:19 +00002231
2232#ifdef USE_FAST
2233 i = j = 0;
2234 while (maxsplit-- > 0) {
2235 pos = fastsearch(s+i, len-i, sub, n, FAST_SEARCH);
2236 if (pos < 0)
2237 break;
2238 j = i+pos;
2239 SPLIT_ADD(s, i, j);
2240 i = j + n;
2241 }
2242#else
2243 i = j = 0;
2244 while ((j+n <= len) && (maxsplit-- > 0)) {
2245 for (; j+n <= len; j++) {
2246 if (Py_STRING_MATCH(s, j, sub, n)) {
2247 SPLIT_ADD(s, i, j);
2248 i = j = j + n;
2249 break;
2250 }
2251 }
2252 }
2253#endif
2254 SPLIT_ADD(s, i, len);
2255 FIX_PREALLOC_SIZE(list);
Guido van Rossum8f950672007-09-10 16:53:45 +00002256 PyObject_ReleaseBuffer(subobj, &vsub);
Neal Norwitz6968b052007-02-27 19:02:19 +00002257 return list;
2258
2259 onError:
2260 Py_DECREF(list);
Guido van Rossum8f950672007-09-10 16:53:45 +00002261 PyObject_ReleaseBuffer(subobj, &vsub);
Neal Norwitz6968b052007-02-27 19:02:19 +00002262 return NULL;
2263}
2264
Guido van Rossum98297ee2007-11-06 21:34:58 +00002265/* stringlib's partition shares nullbytes in some cases.
2266 undo this, we don't want the nullbytes to be shared. */
2267static PyObject *
2268make_nullbytes_unique(PyObject *result)
2269{
2270 if (result != NULL) {
2271 int i;
2272 assert(PyTuple_Check(result));
2273 assert(PyTuple_GET_SIZE(result) == 3);
2274 for (i = 0; i < 3; i++) {
2275 if (PyTuple_GET_ITEM(result, i) == (PyObject *)nullbytes) {
2276 PyObject *new = PyBytes_FromStringAndSize(NULL, 0);
2277 if (new == NULL) {
2278 Py_DECREF(result);
2279 result = NULL;
2280 break;
2281 }
2282 Py_DECREF(nullbytes);
2283 PyTuple_SET_ITEM(result, i, new);
2284 }
2285 }
2286 }
2287 return result;
2288}
2289
Neal Norwitz6968b052007-02-27 19:02:19 +00002290PyDoc_STRVAR(partition__doc__,
2291"B.partition(sep) -> (head, sep, tail)\n\
2292\n\
2293Searches for the separator sep in B, and returns the part before it,\n\
2294the separator itself, and the part after it. If the separator is not\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00002295found, returns B and two empty buffer.");
Neal Norwitz6968b052007-02-27 19:02:19 +00002296
2297static PyObject *
2298bytes_partition(PyBytesObject *self, PyObject *sep_obj)
2299{
2300 PyObject *bytesep, *result;
2301
2302 bytesep = PyBytes_FromObject(sep_obj);
2303 if (! bytesep)
2304 return NULL;
2305
2306 result = stringlib_partition(
2307 (PyObject*) self,
2308 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +00002309 bytesep,
Neal Norwitz6968b052007-02-27 19:02:19 +00002310 PyBytes_AS_STRING(bytesep), PyBytes_GET_SIZE(bytesep)
2311 );
2312
2313 Py_DECREF(bytesep);
Guido van Rossum98297ee2007-11-06 21:34:58 +00002314 return make_nullbytes_unique(result);
Neal Norwitz6968b052007-02-27 19:02:19 +00002315}
2316
2317PyDoc_STRVAR(rpartition__doc__,
2318"B.rpartition(sep) -> (tail, sep, head)\n\
2319\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00002320Searches for the separator sep in B, starting at the end of B,\n\
2321and returns the part before it, the separator itself, and the\n\
2322part after it. If the separator is not found, returns two empty\n\
2323buffer objects and B.");
Neal Norwitz6968b052007-02-27 19:02:19 +00002324
2325static PyObject *
2326bytes_rpartition(PyBytesObject *self, PyObject *sep_obj)
2327{
2328 PyObject *bytesep, *result;
2329
2330 bytesep = PyBytes_FromObject(sep_obj);
2331 if (! bytesep)
2332 return NULL;
2333
2334 result = stringlib_rpartition(
2335 (PyObject*) self,
2336 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +00002337 bytesep,
Neal Norwitz6968b052007-02-27 19:02:19 +00002338 PyBytes_AS_STRING(bytesep), PyBytes_GET_SIZE(bytesep)
2339 );
2340
2341 Py_DECREF(bytesep);
Guido van Rossum98297ee2007-11-06 21:34:58 +00002342 return make_nullbytes_unique(result);
Neal Norwitz6968b052007-02-27 19:02:19 +00002343}
2344
2345Py_LOCAL_INLINE(PyObject *)
2346rsplit_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount)
2347{
2348 register Py_ssize_t i, j, count=0;
2349 PyObject *str;
2350 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
2351
2352 if (list == NULL)
2353 return NULL;
2354
2355 i = j = len - 1;
2356 while ((i >= 0) && (maxcount-- > 0)) {
2357 for (; i >= 0; i--) {
2358 if (s[i] == ch) {
2359 SPLIT_ADD(s, i + 1, j + 1);
2360 j = i = i - 1;
2361 break;
2362 }
2363 }
2364 }
2365 if (j >= -1) {
2366 SPLIT_ADD(s, 0, j + 1);
2367 }
2368 FIX_PREALLOC_SIZE(list);
2369 if (PyList_Reverse(list) < 0)
2370 goto onError;
2371
2372 return list;
2373
2374 onError:
2375 Py_DECREF(list);
2376 return NULL;
2377}
2378
Guido van Rossum8f950672007-09-10 16:53:45 +00002379Py_LOCAL_INLINE(PyObject *)
2380rsplit_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxcount)
2381{
2382 register Py_ssize_t i, j, count = 0;
2383 PyObject *str;
2384 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
2385
2386 if (list == NULL)
2387 return NULL;
2388
2389 for (i = j = len - 1; i >= 0; ) {
Guido van Rossum98297ee2007-11-06 21:34:58 +00002390 /* find a token */
2391 while (i >= 0 && Py_UNICODE_ISSPACE(s[i]))
2392 i--;
2393 j = i;
2394 while (i >= 0 && !Py_UNICODE_ISSPACE(s[i]))
2395 i--;
2396 if (j > i) {
2397 if (maxcount-- <= 0)
2398 break;
2399 SPLIT_ADD(s, i + 1, j + 1);
2400 while (i >= 0 && Py_UNICODE_ISSPACE(s[i]))
2401 i--;
2402 j = i;
2403 }
Guido van Rossum8f950672007-09-10 16:53:45 +00002404 }
2405 if (j >= 0) {
Guido van Rossum98297ee2007-11-06 21:34:58 +00002406 SPLIT_ADD(s, 0, j + 1);
Guido van Rossum8f950672007-09-10 16:53:45 +00002407 }
2408 FIX_PREALLOC_SIZE(list);
2409 if (PyList_Reverse(list) < 0)
2410 goto onError;
2411
2412 return list;
2413
2414 onError:
2415 Py_DECREF(list);
2416 return NULL;
2417}
2418
Neal Norwitz6968b052007-02-27 19:02:19 +00002419PyDoc_STRVAR(rsplit__doc__,
Guido van Rossum98297ee2007-11-06 21:34:58 +00002420"B.rsplit(sep[, maxsplit]) -> list of buffer\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00002421\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00002422Return a list of the sections in B, using sep as the delimiter,\n\
2423starting at the end of B and working to the front.\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00002424If sep is not given, B is split on ASCII whitespace characters\n\
2425(space, tab, return, newline, formfeed, vertical tab).\n\
2426If maxsplit is given, at most maxsplit splits are done.");
Neal Norwitz6968b052007-02-27 19:02:19 +00002427
2428static PyObject *
2429bytes_rsplit(PyBytesObject *self, PyObject *args)
2430{
2431 Py_ssize_t len = PyBytes_GET_SIZE(self), n, i, j;
Guido van Rossum8f950672007-09-10 16:53:45 +00002432 Py_ssize_t maxsplit = -1, count = 0;
Neal Norwitz6968b052007-02-27 19:02:19 +00002433 const char *s = PyBytes_AS_STRING(self), *sub;
Guido van Rossum8f950672007-09-10 16:53:45 +00002434 PyObject *list, *str, *subobj = Py_None;
Travis E. Oliphant8ae62b62007-09-23 02:00:13 +00002435 Py_buffer vsub;
Neal Norwitz6968b052007-02-27 19:02:19 +00002436
Guido van Rossum8f950672007-09-10 16:53:45 +00002437 if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit))
Neal Norwitz6968b052007-02-27 19:02:19 +00002438 return NULL;
2439 if (maxsplit < 0)
2440 maxsplit = PY_SSIZE_T_MAX;
Guido van Rossum8f950672007-09-10 16:53:45 +00002441
2442 if (subobj == Py_None)
2443 return rsplit_whitespace(s, len, maxsplit);
2444
2445 if (_getbuffer(subobj, &vsub) < 0)
Neal Norwitz6968b052007-02-27 19:02:19 +00002446 return NULL;
Guido van Rossum8f950672007-09-10 16:53:45 +00002447 sub = vsub.buf;
2448 n = vsub.len;
Neal Norwitz6968b052007-02-27 19:02:19 +00002449
2450 if (n == 0) {
2451 PyErr_SetString(PyExc_ValueError, "empty separator");
Guido van Rossum8f950672007-09-10 16:53:45 +00002452 PyObject_ReleaseBuffer(subobj, &vsub);
Neal Norwitz6968b052007-02-27 19:02:19 +00002453 return NULL;
2454 }
2455 else if (n == 1)
2456 return rsplit_char(s, len, sub[0], maxsplit);
2457
2458 list = PyList_New(PREALLOC_SIZE(maxsplit));
Guido van Rossum8f950672007-09-10 16:53:45 +00002459 if (list == NULL) {
2460 PyObject_ReleaseBuffer(subobj, &vsub);
Neal Norwitz6968b052007-02-27 19:02:19 +00002461 return NULL;
Guido van Rossum8f950672007-09-10 16:53:45 +00002462 }
Neal Norwitz6968b052007-02-27 19:02:19 +00002463
2464 j = len;
2465 i = j - n;
2466
2467 while ( (i >= 0) && (maxsplit-- > 0) ) {
2468 for (; i>=0; i--) {
2469 if (Py_STRING_MATCH(s, i, sub, n)) {
2470 SPLIT_ADD(s, i + n, j);
2471 j = i;
2472 i -= n;
2473 break;
2474 }
2475 }
2476 }
2477 SPLIT_ADD(s, 0, j);
2478 FIX_PREALLOC_SIZE(list);
2479 if (PyList_Reverse(list) < 0)
2480 goto onError;
Guido van Rossum8f950672007-09-10 16:53:45 +00002481 PyObject_ReleaseBuffer(subobj, &vsub);
Neal Norwitz6968b052007-02-27 19:02:19 +00002482 return list;
2483
2484onError:
2485 Py_DECREF(list);
Guido van Rossum8f950672007-09-10 16:53:45 +00002486 PyObject_ReleaseBuffer(subobj, &vsub);
Neal Norwitz6968b052007-02-27 19:02:19 +00002487 return NULL;
2488}
2489
2490PyDoc_STRVAR(extend__doc__,
2491"B.extend(iterable int) -> None\n\
2492\n\
2493Append all the elements from the iterator or sequence to the\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00002494end of B.");
Neal Norwitz6968b052007-02-27 19:02:19 +00002495static PyObject *
2496bytes_extend(PyBytesObject *self, PyObject *arg)
2497{
Gregory P. Smith60d241f2007-10-16 06:31:30 +00002498 /* XXX(gps): The docstring says any iterable int will do but the
2499 * bytes_setslice code only accepts something supporting PEP 3118.
2500 * A list or tuple of 0 <= int <= 255 is supposed to work. */
2501 /* bug being tracked on: http://bugs.python.org/issue1283 */
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002502 if (bytes_setslice(self, Py_Size(self), Py_Size(self), arg) == -1)
Neal Norwitz6968b052007-02-27 19:02:19 +00002503 return NULL;
2504 Py_RETURN_NONE;
2505}
2506
2507
2508PyDoc_STRVAR(reverse__doc__,
2509"B.reverse() -> None\n\
2510\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00002511Reverse the order of the values in B in place.");
Neal Norwitz6968b052007-02-27 19:02:19 +00002512static PyObject *
2513bytes_reverse(PyBytesObject *self, PyObject *unused)
2514{
2515 char swap, *head, *tail;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002516 Py_ssize_t i, j, n = Py_Size(self);
Neal Norwitz6968b052007-02-27 19:02:19 +00002517
2518 j = n / 2;
2519 head = self->ob_bytes;
2520 tail = head + n - 1;
2521 for (i = 0; i < j; i++) {
2522 swap = *head;
2523 *head++ = *tail;
2524 *tail-- = swap;
2525 }
2526
2527 Py_RETURN_NONE;
2528}
2529
2530PyDoc_STRVAR(insert__doc__,
2531"B.insert(index, int) -> None\n\
2532\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00002533Insert a single item into the buffer before the given index.");
Neal Norwitz6968b052007-02-27 19:02:19 +00002534static PyObject *
2535bytes_insert(PyBytesObject *self, PyObject *args)
2536{
2537 int value;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002538 Py_ssize_t where, n = Py_Size(self);
Neal Norwitz6968b052007-02-27 19:02:19 +00002539
2540 if (!PyArg_ParseTuple(args, "ni:insert", &where, &value))
2541 return NULL;
2542
2543 if (n == PY_SSIZE_T_MAX) {
2544 PyErr_SetString(PyExc_OverflowError,
2545 "cannot add more objects to bytes");
2546 return NULL;
2547 }
2548 if (value < 0 || value >= 256) {
2549 PyErr_SetString(PyExc_ValueError,
2550 "byte must be in range(0, 256)");
2551 return NULL;
2552 }
2553 if (PyBytes_Resize((PyObject *)self, n + 1) < 0)
2554 return NULL;
2555
2556 if (where < 0) {
2557 where += n;
2558 if (where < 0)
2559 where = 0;
2560 }
2561 if (where > n)
2562 where = n;
Guido van Rossum4fc8ae42007-02-27 20:57:45 +00002563 memmove(self->ob_bytes + where + 1, self->ob_bytes + where, n - where);
Neal Norwitz6968b052007-02-27 19:02:19 +00002564 self->ob_bytes[where] = value;
2565
2566 Py_RETURN_NONE;
2567}
2568
2569PyDoc_STRVAR(append__doc__,
2570"B.append(int) -> None\n\
2571\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00002572Append a single item to the end of B.");
Neal Norwitz6968b052007-02-27 19:02:19 +00002573static PyObject *
2574bytes_append(PyBytesObject *self, PyObject *arg)
2575{
2576 int value;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002577 Py_ssize_t n = Py_Size(self);
Neal Norwitz6968b052007-02-27 19:02:19 +00002578
2579 if (! _getbytevalue(arg, &value))
2580 return NULL;
2581 if (n == PY_SSIZE_T_MAX) {
2582 PyErr_SetString(PyExc_OverflowError,
2583 "cannot add more objects to bytes");
2584 return NULL;
2585 }
2586 if (PyBytes_Resize((PyObject *)self, n + 1) < 0)
2587 return NULL;
2588
2589 self->ob_bytes[n] = value;
2590
2591 Py_RETURN_NONE;
2592}
2593
2594PyDoc_STRVAR(pop__doc__,
2595"B.pop([index]) -> int\n\
2596\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00002597Remove and return a single item from B. If no index\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00002598argument is give, will pop the last value.");
2599static PyObject *
2600bytes_pop(PyBytesObject *self, PyObject *args)
2601{
2602 int value;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002603 Py_ssize_t where = -1, n = Py_Size(self);
Neal Norwitz6968b052007-02-27 19:02:19 +00002604
2605 if (!PyArg_ParseTuple(args, "|n:pop", &where))
2606 return NULL;
2607
2608 if (n == 0) {
2609 PyErr_SetString(PyExc_OverflowError,
2610 "cannot pop an empty bytes");
2611 return NULL;
2612 }
2613 if (where < 0)
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002614 where += Py_Size(self);
2615 if (where < 0 || where >= Py_Size(self)) {
Neal Norwitz6968b052007-02-27 19:02:19 +00002616 PyErr_SetString(PyExc_IndexError, "pop index out of range");
2617 return NULL;
2618 }
2619
2620 value = self->ob_bytes[where];
2621 memmove(self->ob_bytes + where, self->ob_bytes + where + 1, n - where);
2622 if (PyBytes_Resize((PyObject *)self, n - 1) < 0)
2623 return NULL;
2624
2625 return PyInt_FromLong(value);
2626}
2627
2628PyDoc_STRVAR(remove__doc__,
2629"B.remove(int) -> None\n\
2630\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00002631Remove the first occurance of a value in B.");
Neal Norwitz6968b052007-02-27 19:02:19 +00002632static PyObject *
2633bytes_remove(PyBytesObject *self, PyObject *arg)
2634{
2635 int value;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002636 Py_ssize_t where, n = Py_Size(self);
Neal Norwitz6968b052007-02-27 19:02:19 +00002637
2638 if (! _getbytevalue(arg, &value))
2639 return NULL;
2640
2641 for (where = 0; where < n; where++) {
2642 if (self->ob_bytes[where] == value)
2643 break;
2644 }
2645 if (where == n) {
2646 PyErr_SetString(PyExc_ValueError, "value not found in bytes");
2647 return NULL;
2648 }
2649
2650 memmove(self->ob_bytes + where, self->ob_bytes + where + 1, n - where);
2651 if (PyBytes_Resize((PyObject *)self, n - 1) < 0)
2652 return NULL;
2653
2654 Py_RETURN_NONE;
2655}
2656
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002657/* XXX These two helpers could be optimized if argsize == 1 */
2658
Neal Norwitz2bad9702007-08-27 06:19:22 +00002659static Py_ssize_t
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002660lstrip_helper(unsigned char *myptr, Py_ssize_t mysize,
2661 void *argptr, Py_ssize_t argsize)
2662{
2663 Py_ssize_t i = 0;
2664 while (i < mysize && memchr(argptr, myptr[i], argsize))
2665 i++;
2666 return i;
2667}
2668
Neal Norwitz2bad9702007-08-27 06:19:22 +00002669static Py_ssize_t
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002670rstrip_helper(unsigned char *myptr, Py_ssize_t mysize,
2671 void *argptr, Py_ssize_t argsize)
2672{
2673 Py_ssize_t i = mysize - 1;
2674 while (i >= 0 && memchr(argptr, myptr[i], argsize))
2675 i--;
2676 return i + 1;
2677}
2678
2679PyDoc_STRVAR(strip__doc__,
Guido van Rossum98297ee2007-11-06 21:34:58 +00002680"B.strip([bytes]) -> buffer\n\
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002681\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00002682Strip leading and trailing bytes contained in the argument.\n\
2683If the argument is omitted, strip ASCII whitespace.");
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002684static PyObject *
Guido van Rossum8f950672007-09-10 16:53:45 +00002685bytes_strip(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002686{
2687 Py_ssize_t left, right, mysize, argsize;
2688 void *myptr, *argptr;
Guido van Rossum8f950672007-09-10 16:53:45 +00002689 PyObject *arg = Py_None;
Travis E. Oliphant8ae62b62007-09-23 02:00:13 +00002690 Py_buffer varg;
Guido van Rossum8f950672007-09-10 16:53:45 +00002691 if (!PyArg_ParseTuple(args, "|O:strip", &arg))
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002692 return NULL;
Guido van Rossum8f950672007-09-10 16:53:45 +00002693 if (arg == Py_None) {
2694 argptr = "\t\n\r\f\v ";
2695 argsize = 6;
2696 }
2697 else {
Guido van Rossum98297ee2007-11-06 21:34:58 +00002698 if (_getbuffer(arg, &varg) < 0)
2699 return NULL;
2700 argptr = varg.buf;
2701 argsize = varg.len;
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002702 }
2703 myptr = self->ob_bytes;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002704 mysize = Py_Size(self);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002705 left = lstrip_helper(myptr, mysize, argptr, argsize);
Guido van Rossumeb29e9a2007-08-08 21:55:33 +00002706 if (left == mysize)
2707 right = left;
2708 else
2709 right = rstrip_helper(myptr, mysize, argptr, argsize);
Guido van Rossum8f950672007-09-10 16:53:45 +00002710 if (arg != Py_None)
Guido van Rossum98297ee2007-11-06 21:34:58 +00002711 PyObject_ReleaseBuffer(arg, &varg);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002712 return PyBytes_FromStringAndSize(self->ob_bytes + left, right - left);
2713}
2714
2715PyDoc_STRVAR(lstrip__doc__,
Guido van Rossum98297ee2007-11-06 21:34:58 +00002716"B.lstrip([bytes]) -> buffer\n\
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002717\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00002718Strip leading bytes contained in the argument.\n\
2719If the argument is omitted, strip leading ASCII whitespace.");
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002720static PyObject *
Guido van Rossum8f950672007-09-10 16:53:45 +00002721bytes_lstrip(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002722{
2723 Py_ssize_t left, right, mysize, argsize;
2724 void *myptr, *argptr;
Guido van Rossum8f950672007-09-10 16:53:45 +00002725 PyObject *arg = Py_None;
Travis E. Oliphant8ae62b62007-09-23 02:00:13 +00002726 Py_buffer varg;
Guido van Rossum8f950672007-09-10 16:53:45 +00002727 if (!PyArg_ParseTuple(args, "|O:lstrip", &arg))
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002728 return NULL;
Guido van Rossum8f950672007-09-10 16:53:45 +00002729 if (arg == Py_None) {
2730 argptr = "\t\n\r\f\v ";
2731 argsize = 6;
2732 }
2733 else {
Guido van Rossum98297ee2007-11-06 21:34:58 +00002734 if (_getbuffer(arg, &varg) < 0)
2735 return NULL;
2736 argptr = varg.buf;
2737 argsize = varg.len;
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002738 }
2739 myptr = self->ob_bytes;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002740 mysize = Py_Size(self);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002741 left = lstrip_helper(myptr, mysize, argptr, argsize);
2742 right = mysize;
Guido van Rossum8f950672007-09-10 16:53:45 +00002743 if (arg != Py_None)
Guido van Rossum98297ee2007-11-06 21:34:58 +00002744 PyObject_ReleaseBuffer(arg, &varg);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002745 return PyBytes_FromStringAndSize(self->ob_bytes + left, right - left);
2746}
2747
2748PyDoc_STRVAR(rstrip__doc__,
Guido van Rossum98297ee2007-11-06 21:34:58 +00002749"B.rstrip([bytes]) -> buffer\n\
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002750\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00002751Strip trailing bytes contained in the argument.\n\
2752If the argument is omitted, strip trailing ASCII whitespace.");
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002753static PyObject *
Guido van Rossum8f950672007-09-10 16:53:45 +00002754bytes_rstrip(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002755{
2756 Py_ssize_t left, right, mysize, argsize;
2757 void *myptr, *argptr;
Guido van Rossum8f950672007-09-10 16:53:45 +00002758 PyObject *arg = Py_None;
Travis E. Oliphant8ae62b62007-09-23 02:00:13 +00002759 Py_buffer varg;
Guido van Rossum8f950672007-09-10 16:53:45 +00002760 if (!PyArg_ParseTuple(args, "|O:rstrip", &arg))
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002761 return NULL;
Guido van Rossum8f950672007-09-10 16:53:45 +00002762 if (arg == Py_None) {
2763 argptr = "\t\n\r\f\v ";
2764 argsize = 6;
2765 }
2766 else {
Guido van Rossum98297ee2007-11-06 21:34:58 +00002767 if (_getbuffer(arg, &varg) < 0)
2768 return NULL;
2769 argptr = varg.buf;
2770 argsize = varg.len;
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002771 }
2772 myptr = self->ob_bytes;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002773 mysize = Py_Size(self);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002774 left = 0;
2775 right = rstrip_helper(myptr, mysize, argptr, argsize);
Guido van Rossum8f950672007-09-10 16:53:45 +00002776 if (arg != Py_None)
Guido van Rossum98297ee2007-11-06 21:34:58 +00002777 PyObject_ReleaseBuffer(arg, &varg);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002778 return PyBytes_FromStringAndSize(self->ob_bytes + left, right - left);
2779}
Neal Norwitz6968b052007-02-27 19:02:19 +00002780
Guido van Rossumd624f182006-04-24 13:47:05 +00002781PyDoc_STRVAR(decode_doc,
Guido van Rossum98297ee2007-11-06 21:34:58 +00002782"B.decode([encoding[, errors]]) -> unicode object.\n\
Guido van Rossumd624f182006-04-24 13:47:05 +00002783\n\
2784Decodes B using the codec registered for encoding. encoding defaults\n\
2785to the default encoding. errors may be given to set a different error\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00002786handling scheme. Default is 'strict' meaning that encoding errors raise\n\
2787a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
Guido van Rossumd624f182006-04-24 13:47:05 +00002788as well as any other name registerd with codecs.register_error that is\n\
2789able to handle UnicodeDecodeErrors.");
2790
2791static PyObject *
2792bytes_decode(PyObject *self, PyObject *args)
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +00002793{
Guido van Rossumd624f182006-04-24 13:47:05 +00002794 const char *encoding = NULL;
2795 const char *errors = NULL;
2796
2797 if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
2798 return NULL;
2799 if (encoding == NULL)
2800 encoding = PyUnicode_GetDefaultEncoding();
2801 return PyCodec_Decode(self, encoding, errors);
2802}
2803
Guido van Rossuma0867f72006-05-05 04:34:18 +00002804PyDoc_STRVAR(alloc_doc,
2805"B.__alloc__() -> int\n\
2806\n\
2807Returns the number of bytes actually allocated.");
2808
2809static PyObject *
2810bytes_alloc(PyBytesObject *self)
2811{
2812 return PyInt_FromSsize_t(self->ob_alloc);
2813}
2814
Guido van Rossum20188312006-05-05 15:15:40 +00002815PyDoc_STRVAR(join_doc,
Guido van Rossumcd6ae682007-05-09 19:52:16 +00002816"B.join(iterable_of_bytes) -> bytes\n\
Guido van Rossum20188312006-05-05 15:15:40 +00002817\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00002818Concatenates any number of buffer objects, with B in between each pair.");
Guido van Rossum20188312006-05-05 15:15:40 +00002819
2820static PyObject *
Guido van Rossumcd6ae682007-05-09 19:52:16 +00002821bytes_join(PyBytesObject *self, PyObject *it)
Guido van Rossum20188312006-05-05 15:15:40 +00002822{
2823 PyObject *seq;
Martin v. Löwis5d7428b2007-07-21 18:47:48 +00002824 Py_ssize_t mysize = Py_Size(self);
Guido van Rossum20188312006-05-05 15:15:40 +00002825 Py_ssize_t i;
2826 Py_ssize_t n;
2827 PyObject **items;
2828 Py_ssize_t totalsize = 0;
2829 PyObject *result;
2830 char *dest;
2831
2832 seq = PySequence_Fast(it, "can only join an iterable");
2833 if (seq == NULL)
Georg Brandlb3f568f2007-02-27 08:49:18 +00002834 return NULL;
Guido van Rossum20188312006-05-05 15:15:40 +00002835 n = PySequence_Fast_GET_SIZE(seq);
2836 items = PySequence_Fast_ITEMS(seq);
2837
2838 /* Compute the total size, and check that they are all bytes */
Guido van Rossum98297ee2007-11-06 21:34:58 +00002839 /* XXX Shouldn't we use _getbuffer() on these items instead? */
Guido van Rossum20188312006-05-05 15:15:40 +00002840 for (i = 0; i < n; i++) {
Georg Brandlb3f568f2007-02-27 08:49:18 +00002841 PyObject *obj = items[i];
Guido van Rossum98297ee2007-11-06 21:34:58 +00002842 if (!PyBytes_Check(obj) && !PyString_Check(obj)) {
Georg Brandlb3f568f2007-02-27 08:49:18 +00002843 PyErr_Format(PyExc_TypeError,
2844 "can only join an iterable of bytes "
2845 "(item %ld has type '%.100s')",
Guido van Rossum3cf5b1e2006-07-27 21:53:35 +00002846 /* XXX %ld isn't right on Win64 */
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002847 (long)i, Py_Type(obj)->tp_name);
Georg Brandlb3f568f2007-02-27 08:49:18 +00002848 goto error;
2849 }
Guido van Rossumcd6ae682007-05-09 19:52:16 +00002850 if (i > 0)
2851 totalsize += mysize;
Guido van Rossum98297ee2007-11-06 21:34:58 +00002852 totalsize += Py_Size(obj);
Georg Brandlb3f568f2007-02-27 08:49:18 +00002853 if (totalsize < 0) {
2854 PyErr_NoMemory();
2855 goto error;
2856 }
Guido van Rossum20188312006-05-05 15:15:40 +00002857 }
2858
2859 /* Allocate the result, and copy the bytes */
2860 result = PyBytes_FromStringAndSize(NULL, totalsize);
2861 if (result == NULL)
Georg Brandlb3f568f2007-02-27 08:49:18 +00002862 goto error;
Guido van Rossum20188312006-05-05 15:15:40 +00002863 dest = PyBytes_AS_STRING(result);
2864 for (i = 0; i < n; i++) {
Georg Brandlb3f568f2007-02-27 08:49:18 +00002865 PyObject *obj = items[i];
Guido van Rossum98297ee2007-11-06 21:34:58 +00002866 Py_ssize_t size = Py_Size(obj);
2867 char *buf;
2868 if (PyBytes_Check(obj))
2869 buf = PyBytes_AS_STRING(obj);
2870 else
2871 buf = PyString_AS_STRING(obj);
2872 if (i) {
Guido van Rossumcd6ae682007-05-09 19:52:16 +00002873 memcpy(dest, self->ob_bytes, mysize);
2874 dest += mysize;
2875 }
Guido van Rossum98297ee2007-11-06 21:34:58 +00002876 memcpy(dest, buf, size);
Georg Brandlb3f568f2007-02-27 08:49:18 +00002877 dest += size;
Guido van Rossum20188312006-05-05 15:15:40 +00002878 }
2879
2880 /* Done */
2881 Py_DECREF(seq);
2882 return result;
2883
2884 /* Error handling */
2885 error:
2886 Py_DECREF(seq);
2887 return NULL;
2888}
2889
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002890PyDoc_STRVAR(fromhex_doc,
Guido van Rossum98297ee2007-11-06 21:34:58 +00002891"buffer.fromhex(string) -> buffer\n\
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002892\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00002893Create a buffer object from a string of hexadecimal numbers.\n\
2894Spaces between two numbers are accepted.\n\
2895Example: buffer.fromhex('B9 01EF') -> buffer(b'\\xb9\\x01\\xef').");
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002896
2897static int
Guido van Rossumae404e22007-10-26 21:46:44 +00002898hex_digit_to_int(Py_UNICODE c)
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002899{
Guido van Rossumae404e22007-10-26 21:46:44 +00002900 if (c >= 128)
2901 return -1;
Gregory P. Smith60d241f2007-10-16 06:31:30 +00002902 if (ISDIGIT(c))
Georg Brandlb3f568f2007-02-27 08:49:18 +00002903 return c - '0';
2904 else {
Gregory P. Smith60d241f2007-10-16 06:31:30 +00002905 if (ISUPPER(c))
2906 c = TOLOWER(c);
Georg Brandlb3f568f2007-02-27 08:49:18 +00002907 if (c >= 'a' && c <= 'f')
2908 return c - 'a' + 10;
2909 }
2910 return -1;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002911}
2912
2913static PyObject *
2914bytes_fromhex(PyObject *cls, PyObject *args)
2915{
Gregory P. Smith60d241f2007-10-16 06:31:30 +00002916 PyObject *newbytes, *hexobj;
2917 char *buf;
Guido van Rossumae404e22007-10-26 21:46:44 +00002918 Py_UNICODE *hex;
2919 Py_ssize_t hexlen, byteslen, i, j;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002920 int top, bot;
2921
Guido van Rossumae404e22007-10-26 21:46:44 +00002922 if (!PyArg_ParseTuple(args, "U:fromhex", &hexobj))
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002923 return NULL;
Guido van Rossumae404e22007-10-26 21:46:44 +00002924 assert(PyUnicode_Check(hexobj));
2925 hexlen = PyUnicode_GET_SIZE(hexobj);
2926 hex = PyUnicode_AS_UNICODE(hexobj);
2927 byteslen = hexlen/2; /* This overestimates if there are spaces */
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002928 newbytes = PyBytes_FromStringAndSize(NULL, byteslen);
Guido van Rossumae404e22007-10-26 21:46:44 +00002929 if (!newbytes)
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002930 return NULL;
2931 buf = PyBytes_AS_STRING(newbytes);
Guido van Rossumae404e22007-10-26 21:46:44 +00002932 for (i = j = 0; i < hexlen; i += 2) {
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002933 /* skip over spaces in the input */
Guido van Rossumae404e22007-10-26 21:46:44 +00002934 while (hex[i] == ' ')
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002935 i++;
Guido van Rossumae404e22007-10-26 21:46:44 +00002936 if (i >= hexlen)
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002937 break;
Guido van Rossumae404e22007-10-26 21:46:44 +00002938 top = hex_digit_to_int(hex[i]);
2939 bot = hex_digit_to_int(hex[i+1]);
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002940 if (top == -1 || bot == -1) {
2941 PyErr_Format(PyExc_ValueError,
Guido van Rossumae404e22007-10-26 21:46:44 +00002942 "non-hexadecimal number found in "
2943 "fromhex() arg at position %zd", i);
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002944 goto error;
2945 }
2946 buf[j++] = (top << 4) + bot;
2947 }
2948 if (PyBytes_Resize(newbytes, j) < 0)
2949 goto error;
2950 return newbytes;
2951
2952 error:
2953 Py_DECREF(newbytes);
2954 return NULL;
2955}
2956
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002957PyDoc_STRVAR(reduce_doc, "Return state information for pickling.");
2958
2959static PyObject *
2960bytes_reduce(PyBytesObject *self)
2961{
Guido van Rossuma6c04be2007-11-03 00:24:24 +00002962 PyObject *latin1, *dict;
Martin v. Löwis9c121062007-08-05 20:26:11 +00002963 if (self->ob_bytes)
Guido van Rossuma74184e2007-08-29 04:05:57 +00002964 latin1 = PyUnicode_DecodeLatin1(self->ob_bytes,
2965 Py_Size(self), NULL);
Martin v. Löwis9c121062007-08-05 20:26:11 +00002966 else
Guido van Rossuma74184e2007-08-29 04:05:57 +00002967 latin1 = PyUnicode_FromString("");
Guido van Rossuma6c04be2007-11-03 00:24:24 +00002968
2969 dict = PyObject_GetAttrString((PyObject *)self, "__dict__");
2970 if (dict == NULL) {
2971 PyErr_Clear();
2972 dict = Py_None;
2973 Py_INCREF(dict);
2974 }
2975
2976 return Py_BuildValue("(O(Ns)N)", Py_Type(self), latin1, "latin-1", dict);
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002977}
2978
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002979static PySequenceMethods bytes_as_sequence = {
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002980 (lenfunc)bytes_length, /* sq_length */
Guido van Rossum98297ee2007-11-06 21:34:58 +00002981 (binaryfunc)PyBytes_Concat, /* sq_concat */
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002982 (ssizeargfunc)bytes_repeat, /* sq_repeat */
2983 (ssizeargfunc)bytes_getitem, /* sq_item */
2984 0, /* sq_slice */
2985 (ssizeobjargproc)bytes_setitem, /* sq_ass_item */
2986 0, /* sq_ass_slice */
Guido van Rossumd624f182006-04-24 13:47:05 +00002987 (objobjproc)bytes_contains, /* sq_contains */
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002988 (binaryfunc)bytes_iconcat, /* sq_inplace_concat */
2989 (ssizeargfunc)bytes_irepeat, /* sq_inplace_repeat */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002990};
2991
2992static PyMappingMethods bytes_as_mapping = {
Guido van Rossumd624f182006-04-24 13:47:05 +00002993 (lenfunc)bytes_length,
Thomas Wouters376446d2006-12-19 08:30:14 +00002994 (binaryfunc)bytes_subscript,
2995 (objobjargproc)bytes_ass_subscript,
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002996};
2997
2998static PyBufferProcs bytes_as_buffer = {
Travis E. Oliphantb99f7622007-08-18 11:21:56 +00002999 (getbufferproc)bytes_getbuffer,
3000 (releasebufferproc)bytes_releasebuffer,
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003001};
3002
3003static PyMethodDef
3004bytes_methods[] = {
Guido van Rossumae404e22007-10-26 21:46:44 +00003005 {"__alloc__", (PyCFunction)bytes_alloc, METH_NOARGS, alloc_doc},
3006 {"__reduce__", (PyCFunction)bytes_reduce, METH_NOARGS, reduce_doc},
3007 {"append", (PyCFunction)bytes_append, METH_O, append__doc__},
Gregory P. Smith60d241f2007-10-16 06:31:30 +00003008 {"capitalize", (PyCFunction)stringlib_capitalize, METH_NOARGS,
3009 _Py_capitalize__doc__},
Gregory P. Smith60d241f2007-10-16 06:31:30 +00003010 {"center", (PyCFunction)stringlib_center, METH_VARARGS, center__doc__},
Guido van Rossumae404e22007-10-26 21:46:44 +00003011 {"count", (PyCFunction)bytes_count, METH_VARARGS, count__doc__},
3012 {"decode", (PyCFunction)bytes_decode, METH_VARARGS, decode_doc},
3013 {"endswith", (PyCFunction)bytes_endswith, METH_VARARGS, endswith__doc__},
Gregory P. Smith60d241f2007-10-16 06:31:30 +00003014 {"expandtabs", (PyCFunction)stringlib_expandtabs, METH_VARARGS,
3015 expandtabs__doc__},
Guido van Rossumae404e22007-10-26 21:46:44 +00003016 {"extend", (PyCFunction)bytes_extend, METH_O, extend__doc__},
3017 {"find", (PyCFunction)bytes_find, METH_VARARGS, find__doc__},
3018 {"fromhex", (PyCFunction)bytes_fromhex, METH_VARARGS|METH_CLASS,
3019 fromhex_doc},
3020 {"index", (PyCFunction)bytes_index, METH_VARARGS, index__doc__},
3021 {"insert", (PyCFunction)bytes_insert, METH_VARARGS, insert__doc__},
3022 {"isalnum", (PyCFunction)stringlib_isalnum, METH_NOARGS,
3023 _Py_isalnum__doc__},
3024 {"isalpha", (PyCFunction)stringlib_isalpha, METH_NOARGS,
3025 _Py_isalpha__doc__},
3026 {"isdigit", (PyCFunction)stringlib_isdigit, METH_NOARGS,
3027 _Py_isdigit__doc__},
3028 {"islower", (PyCFunction)stringlib_islower, METH_NOARGS,
3029 _Py_islower__doc__},
3030 {"isspace", (PyCFunction)stringlib_isspace, METH_NOARGS,
3031 _Py_isspace__doc__},
3032 {"istitle", (PyCFunction)stringlib_istitle, METH_NOARGS,
3033 _Py_istitle__doc__},
3034 {"isupper", (PyCFunction)stringlib_isupper, METH_NOARGS,
3035 _Py_isupper__doc__},
3036 {"join", (PyCFunction)bytes_join, METH_O, join_doc},
3037 {"ljust", (PyCFunction)stringlib_ljust, METH_VARARGS, ljust__doc__},
3038 {"lower", (PyCFunction)stringlib_lower, METH_NOARGS, _Py_lower__doc__},
3039 {"lstrip", (PyCFunction)bytes_lstrip, METH_VARARGS, lstrip__doc__},
3040 {"partition", (PyCFunction)bytes_partition, METH_O, partition__doc__},
3041 {"pop", (PyCFunction)bytes_pop, METH_VARARGS, pop__doc__},
3042 {"remove", (PyCFunction)bytes_remove, METH_O, remove__doc__},
3043 {"replace", (PyCFunction)bytes_replace, METH_VARARGS, replace__doc__},
3044 {"reverse", (PyCFunction)bytes_reverse, METH_NOARGS, reverse__doc__},
3045 {"rfind", (PyCFunction)bytes_rfind, METH_VARARGS, rfind__doc__},
3046 {"rindex", (PyCFunction)bytes_rindex, METH_VARARGS, rindex__doc__},
3047 {"rjust", (PyCFunction)stringlib_rjust, METH_VARARGS, rjust__doc__},
3048 {"rpartition", (PyCFunction)bytes_rpartition, METH_O, rpartition__doc__},
3049 {"rsplit", (PyCFunction)bytes_rsplit, METH_VARARGS, rsplit__doc__},
3050 {"rstrip", (PyCFunction)bytes_rstrip, METH_VARARGS, rstrip__doc__},
3051 {"split", (PyCFunction)bytes_split, METH_VARARGS, split__doc__},
Gregory P. Smith60d241f2007-10-16 06:31:30 +00003052 {"splitlines", (PyCFunction)stringlib_splitlines, METH_VARARGS,
3053 splitlines__doc__},
Guido van Rossumae404e22007-10-26 21:46:44 +00003054 {"startswith", (PyCFunction)bytes_startswith, METH_VARARGS ,
3055 startswith__doc__},
3056 {"strip", (PyCFunction)bytes_strip, METH_VARARGS, strip__doc__},
3057 {"swapcase", (PyCFunction)stringlib_swapcase, METH_NOARGS,
3058 _Py_swapcase__doc__},
3059 {"title", (PyCFunction)stringlib_title, METH_NOARGS, _Py_title__doc__},
3060 {"translate", (PyCFunction)bytes_translate, METH_VARARGS,
3061 translate__doc__},
3062 {"upper", (PyCFunction)stringlib_upper, METH_NOARGS, _Py_upper__doc__},
3063 {"zfill", (PyCFunction)stringlib_zfill, METH_VARARGS, zfill__doc__},
Guido van Rossuma0867f72006-05-05 04:34:18 +00003064 {NULL}
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003065};
3066
3067PyDoc_STRVAR(bytes_doc,
Guido van Rossum98297ee2007-11-06 21:34:58 +00003068"buffer(iterable_of_ints) -> buffer.\n\
3069buffer(string, encoding[, errors]) -> buffer.\n\
3070buffer(bytes_or_buffer) -> mutable copy of bytes_or_buffer.\n\
3071buffer(memory_view) -> buffer.\n\
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003072\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00003073Construct an mutable buffer object from:\n\
3074 - an iterable yielding integers in range(256)\n\
3075 - a text string encoded using the specified encoding\n\
3076 - a bytes or a buffer object\n\
3077 - any object implementing the buffer API.\n\
3078\n\
3079buffer(int) -> buffer.\n\
3080\n\
3081Construct a zero-initialized buffer of the given length.");
3082
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003083
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003084static PyObject *bytes_iter(PyObject *seq);
3085
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003086PyTypeObject PyBytes_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00003087 PyVarObject_HEAD_INIT(&PyType_Type, 0)
Guido van Rossum98297ee2007-11-06 21:34:58 +00003088 "buffer",
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003089 sizeof(PyBytesObject),
3090 0,
Guido van Rossumd624f182006-04-24 13:47:05 +00003091 (destructor)bytes_dealloc, /* tp_dealloc */
3092 0, /* tp_print */
3093 0, /* tp_getattr */
3094 0, /* tp_setattr */
3095 0, /* tp_compare */
3096 (reprfunc)bytes_repr, /* tp_repr */
3097 0, /* tp_as_number */
3098 &bytes_as_sequence, /* tp_as_sequence */
3099 &bytes_as_mapping, /* tp_as_mapping */
Georg Brandlb3f568f2007-02-27 08:49:18 +00003100 0, /* tp_hash */
Guido van Rossumd624f182006-04-24 13:47:05 +00003101 0, /* tp_call */
Guido van Rossum98297ee2007-11-06 21:34:58 +00003102 bytes_str, /* tp_str */
Guido van Rossumd624f182006-04-24 13:47:05 +00003103 PyObject_GenericGetAttr, /* tp_getattro */
3104 0, /* tp_setattro */
3105 &bytes_as_buffer, /* tp_as_buffer */
Guido van Rossuma6c04be2007-11-03 00:24:24 +00003106 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
Guido van Rossumd624f182006-04-24 13:47:05 +00003107 bytes_doc, /* tp_doc */
3108 0, /* tp_traverse */
3109 0, /* tp_clear */
3110 (richcmpfunc)bytes_richcompare, /* tp_richcompare */
3111 0, /* tp_weaklistoffset */
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003112 bytes_iter, /* tp_iter */
Guido van Rossumd624f182006-04-24 13:47:05 +00003113 0, /* tp_iternext */
3114 bytes_methods, /* tp_methods */
3115 0, /* tp_members */
3116 0, /* tp_getset */
3117 0, /* tp_base */
3118 0, /* tp_dict */
3119 0, /* tp_descr_get */
3120 0, /* tp_descr_set */
3121 0, /* tp_dictoffset */
3122 (initproc)bytes_init, /* tp_init */
3123 PyType_GenericAlloc, /* tp_alloc */
3124 PyType_GenericNew, /* tp_new */
3125 PyObject_Del, /* tp_free */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003126};
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003127
3128/*********************** Bytes Iterator ****************************/
3129
3130typedef struct {
3131 PyObject_HEAD
3132 Py_ssize_t it_index;
3133 PyBytesObject *it_seq; /* Set to NULL when iterator is exhausted */
3134} bytesiterobject;
3135
3136static void
3137bytesiter_dealloc(bytesiterobject *it)
3138{
3139 _PyObject_GC_UNTRACK(it);
3140 Py_XDECREF(it->it_seq);
3141 PyObject_GC_Del(it);
3142}
3143
3144static int
3145bytesiter_traverse(bytesiterobject *it, visitproc visit, void *arg)
3146{
3147 Py_VISIT(it->it_seq);
3148 return 0;
3149}
3150
3151static PyObject *
3152bytesiter_next(bytesiterobject *it)
3153{
3154 PyBytesObject *seq;
3155 PyObject *item;
3156
3157 assert(it != NULL);
3158 seq = it->it_seq;
3159 if (seq == NULL)
3160 return NULL;
3161 assert(PyBytes_Check(seq));
3162
3163 if (it->it_index < PyBytes_GET_SIZE(seq)) {
3164 item = PyInt_FromLong(
3165 (unsigned char)seq->ob_bytes[it->it_index]);
3166 if (item != NULL)
3167 ++it->it_index;
3168 return item;
3169 }
3170
3171 Py_DECREF(seq);
3172 it->it_seq = NULL;
3173 return NULL;
3174}
3175
3176static PyObject *
3177bytesiter_length_hint(bytesiterobject *it)
3178{
3179 Py_ssize_t len = 0;
3180 if (it->it_seq)
3181 len = PyBytes_GET_SIZE(it->it_seq) - it->it_index;
3182 return PyInt_FromSsize_t(len);
3183}
3184
3185PyDoc_STRVAR(length_hint_doc,
3186 "Private method returning an estimate of len(list(it)).");
3187
3188static PyMethodDef bytesiter_methods[] = {
3189 {"__length_hint__", (PyCFunction)bytesiter_length_hint, METH_NOARGS,
3190 length_hint_doc},
3191 {NULL, NULL} /* sentinel */
3192};
3193
3194PyTypeObject PyBytesIter_Type = {
3195 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3196 "bytesiterator", /* tp_name */
3197 sizeof(bytesiterobject), /* tp_basicsize */
3198 0, /* tp_itemsize */
3199 /* methods */
3200 (destructor)bytesiter_dealloc, /* tp_dealloc */
3201 0, /* tp_print */
3202 0, /* tp_getattr */
3203 0, /* tp_setattr */
3204 0, /* tp_compare */
3205 0, /* tp_repr */
3206 0, /* tp_as_number */
3207 0, /* tp_as_sequence */
3208 0, /* tp_as_mapping */
3209 0, /* tp_hash */
3210 0, /* tp_call */
3211 0, /* tp_str */
3212 PyObject_GenericGetAttr, /* tp_getattro */
3213 0, /* tp_setattro */
3214 0, /* tp_as_buffer */
3215 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
3216 0, /* tp_doc */
3217 (traverseproc)bytesiter_traverse, /* tp_traverse */
3218 0, /* tp_clear */
3219 0, /* tp_richcompare */
3220 0, /* tp_weaklistoffset */
3221 PyObject_SelfIter, /* tp_iter */
3222 (iternextfunc)bytesiter_next, /* tp_iternext */
3223 bytesiter_methods, /* tp_methods */
3224 0,
3225};
3226
3227static PyObject *
3228bytes_iter(PyObject *seq)
3229{
3230 bytesiterobject *it;
3231
3232 if (!PyBytes_Check(seq)) {
3233 PyErr_BadInternalCall();
3234 return NULL;
3235 }
3236 it = PyObject_GC_New(bytesiterobject, &PyBytesIter_Type);
3237 if (it == NULL)
3238 return NULL;
3239 it->it_index = 0;
3240 Py_INCREF(seq);
3241 it->it_seq = (PyBytesObject *)seq;
3242 _PyObject_GC_TRACK(it);
3243 return (PyObject *)it;
3244}