blob: f2befed6bb1c3e47d71c4d120deed9571d10fcb0 [file] [log] [blame]
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00001/* Bytes object implementation */
2
3/* XXX TO DO: optimizations */
4
5#define PY_SSIZE_T_CLEAN
6#include "Python.h"
Guido van Rossuma0867f72006-05-05 04:34:18 +00007#include "structmember.h"
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00008
9/* Direct API functions */
10
11PyObject *
Guido van Rossumd624f182006-04-24 13:47:05 +000012PyBytes_FromObject(PyObject *input)
13{
14 return PyObject_CallFunctionObjArgs((PyObject *)&PyBytes_Type,
15 input, NULL);
16}
17
18PyObject *
19PyBytes_FromStringAndSize(const char *bytes, Py_ssize_t size)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000020{
21 PyBytesObject *new;
22
Guido van Rossumd624f182006-04-24 13:47:05 +000023 assert(size >= 0);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000024
25 new = PyObject_New(PyBytesObject, &PyBytes_Type);
26 if (new == NULL)
Guido van Rossumd624f182006-04-24 13:47:05 +000027 return NULL;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000028
Guido van Rossumd624f182006-04-24 13:47:05 +000029 if (size == 0)
30 new->ob_bytes = NULL;
31 else {
32 new->ob_bytes = PyMem_Malloc(size);
33 if (new->ob_bytes == NULL) {
34 Py_DECREF(new);
35 return NULL;
36 }
37 if (bytes != NULL)
38 memcpy(new->ob_bytes, bytes, size);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000039 }
Guido van Rossuma0867f72006-05-05 04:34:18 +000040 new->ob_size = new->ob_alloc = size;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000041
42 return (PyObject *)new;
43}
44
45Py_ssize_t
46PyBytes_Size(PyObject *self)
47{
48 assert(self != NULL);
49 assert(PyBytes_Check(self));
50
Guido van Rossum20188312006-05-05 15:15:40 +000051 return PyBytes_GET_SIZE(self);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000052}
53
54char *
55PyBytes_AsString(PyObject *self)
56{
57 assert(self != NULL);
58 assert(PyBytes_Check(self));
59
Guido van Rossum20188312006-05-05 15:15:40 +000060 return PyBytes_AS_STRING(self);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000061}
62
63int
64PyBytes_Resize(PyObject *self, Py_ssize_t size)
65{
66 void *sval;
Guido van Rossuma0867f72006-05-05 04:34:18 +000067 Py_ssize_t alloc = ((PyBytesObject *)self)->ob_alloc;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000068
69 assert(self != NULL);
70 assert(PyBytes_Check(self));
71 assert(size >= 0);
72
Guido van Rossuma0867f72006-05-05 04:34:18 +000073 if (size < alloc / 2) {
74 /* Major downsize; resize down to exact size */
75 alloc = size;
76 }
77 else if (size <= alloc) {
78 /* Within allocated size; quick exit */
79 ((PyBytesObject *)self)->ob_size = size;
80 return 0;
81 }
82 else if (size <= alloc * 1.125) {
83 /* Moderate upsize; overallocate similar to list_resize() */
84 alloc = size + (size >> 3) + (size < 9 ? 3 : 6);
85 }
86 else {
87 /* Major upsize; resize up to exact size */
88 alloc = size;
89 }
90
91 sval = PyMem_Realloc(((PyBytesObject *)self)->ob_bytes, alloc);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000092 if (sval == NULL) {
Guido van Rossumd624f182006-04-24 13:47:05 +000093 PyErr_NoMemory();
94 return -1;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000095 }
96
Guido van Rossumd624f182006-04-24 13:47:05 +000097 ((PyBytesObject *)self)->ob_bytes = sval;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000098 ((PyBytesObject *)self)->ob_size = size;
Guido van Rossuma0867f72006-05-05 04:34:18 +000099 ((PyBytesObject *)self)->ob_alloc = alloc;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000100
101 return 0;
102}
103
104/* Functions stuffed into the type object */
105
106static Py_ssize_t
107bytes_length(PyBytesObject *self)
108{
109 return self->ob_size;
110}
111
112static PyObject *
Guido van Rossumd624f182006-04-24 13:47:05 +0000113bytes_concat(PyBytesObject *self, PyObject *other)
114{
115 PyBytesObject *result;
116 Py_ssize_t mysize;
117 Py_ssize_t size;
118
119 if (!PyBytes_Check(other)) {
120 PyErr_Format(PyExc_TypeError,
121 "can't concat bytes to %.100s", other->ob_type->tp_name);
122 return NULL;
123 }
124
125 mysize = self->ob_size;
126 size = mysize + ((PyBytesObject *)other)->ob_size;
127 if (size < 0)
128 return PyErr_NoMemory();
129 result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, size);
130 if (result != NULL) {
131 memcpy(result->ob_bytes, self->ob_bytes, self->ob_size);
132 memcpy(result->ob_bytes + self->ob_size,
133 ((PyBytesObject *)other)->ob_bytes,
134 ((PyBytesObject *)other)->ob_size);
135 }
136 return (PyObject *)result;
137}
138
139static PyObject *
Guido van Rossum13e57212006-04-27 22:54:26 +0000140bytes_iconcat(PyBytesObject *self, PyObject *other)
141{
142 Py_ssize_t mysize;
143 Py_ssize_t osize;
144 Py_ssize_t size;
145
146 if (!PyBytes_Check(other)) {
147 PyErr_Format(PyExc_TypeError,
148 "can't concat bytes to %.100s", other->ob_type->tp_name);
149 return NULL;
150 }
151
152 mysize = self->ob_size;
153 osize = ((PyBytesObject *)other)->ob_size;
154 size = mysize + osize;
155 if (size < 0)
156 return PyErr_NoMemory();
Guido van Rossuma0867f72006-05-05 04:34:18 +0000157 if (size <= self->ob_alloc)
158 self->ob_size = size;
159 else if (PyBytes_Resize((PyObject *)self, size) < 0)
Guido van Rossum13e57212006-04-27 22:54:26 +0000160 return NULL;
161 memcpy(self->ob_bytes + mysize, ((PyBytesObject *)other)->ob_bytes, osize);
162 Py_INCREF(self);
163 return (PyObject *)self;
164}
165
166static PyObject *
Guido van Rossumd624f182006-04-24 13:47:05 +0000167bytes_repeat(PyBytesObject *self, Py_ssize_t count)
168{
169 PyBytesObject *result;
170 Py_ssize_t mysize;
171 Py_ssize_t size;
172
173 if (count < 0)
174 count = 0;
175 mysize = self->ob_size;
176 size = mysize * count;
177 if (count != 0 && size / count != mysize)
178 return PyErr_NoMemory();
179 result = (PyBytesObject *)PyBytes_FromStringAndSize(NULL, size);
180 if (result != NULL && size != 0) {
181 if (mysize == 1)
182 memset(result->ob_bytes, self->ob_bytes[0], size);
183 else {
Guido van Rossum13e57212006-04-27 22:54:26 +0000184 Py_ssize_t i;
Guido van Rossumd624f182006-04-24 13:47:05 +0000185 for (i = 0; i < count; i++)
186 memcpy(result->ob_bytes + i*mysize, self->ob_bytes, mysize);
187 }
188 }
189 return (PyObject *)result;
190}
191
192static PyObject *
Guido van Rossum13e57212006-04-27 22:54:26 +0000193bytes_irepeat(PyBytesObject *self, Py_ssize_t count)
194{
195 Py_ssize_t mysize;
196 Py_ssize_t size;
197
198 if (count < 0)
199 count = 0;
200 mysize = self->ob_size;
201 size = mysize * count;
202 if (count != 0 && size / count != mysize)
203 return PyErr_NoMemory();
Guido van Rossuma0867f72006-05-05 04:34:18 +0000204 if (size <= self->ob_alloc)
205 self->ob_size = size;
206 else if (PyBytes_Resize((PyObject *)self, size) < 0)
Guido van Rossum13e57212006-04-27 22:54:26 +0000207 return NULL;
208
209 if (mysize == 1)
210 memset(self->ob_bytes, self->ob_bytes[0], size);
211 else {
212 Py_ssize_t i;
213 for (i = 1; i < count; i++)
214 memcpy(self->ob_bytes + i*mysize, self->ob_bytes, mysize);
215 }
216
217 Py_INCREF(self);
218 return (PyObject *)self;
219}
220
221static int
222bytes_substring(PyBytesObject *self, PyBytesObject *other)
223{
224 Py_ssize_t i;
225
226 if (other->ob_size == 1) {
227 return memchr(self->ob_bytes, other->ob_bytes[0],
228 self->ob_size) != NULL;
229 }
230 if (other->ob_size == 0)
231 return 1; /* Edge case */
232 for (i = 0; i + other->ob_size <= self->ob_size; i++) {
233 /* XXX Yeah, yeah, lots of optimizations possible... */
234 if (memcmp(self->ob_bytes + i, other->ob_bytes, other->ob_size) == 0)
235 return 1;
236 }
237 return 0;
238}
239
240static int
241bytes_contains(PyBytesObject *self, PyObject *value)
242{
243 Py_ssize_t ival;
244
245 if (PyBytes_Check(value))
246 return bytes_substring(self, (PyBytesObject *)value);
247
Thomas Woutersd204a712006-08-22 13:41:17 +0000248 ival = PyNumber_AsSsize_t(value, PyExc_ValueError);
Guido van Rossum13e57212006-04-27 22:54:26 +0000249 if (ival == -1 && PyErr_Occurred())
250 return -1;
Guido van Rossum13e57212006-04-27 22:54:26 +0000251 if (ival < 0 || ival >= 256) {
252 PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
253 return -1;
254 }
255
256 return memchr(self->ob_bytes, ival, self->ob_size) != NULL;
257}
258
259static PyObject *
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000260bytes_getitem(PyBytesObject *self, Py_ssize_t i)
261{
262 if (i < 0)
Guido van Rossumd624f182006-04-24 13:47:05 +0000263 i += self->ob_size;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000264 if (i < 0 || i >= self->ob_size) {
Guido van Rossumd624f182006-04-24 13:47:05 +0000265 PyErr_SetString(PyExc_IndexError, "bytes index out of range");
266 return NULL;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000267 }
Guido van Rossumd624f182006-04-24 13:47:05 +0000268 return PyInt_FromLong((unsigned char)(self->ob_bytes[i]));
269}
270
271static PyObject *
Thomas Wouters376446d2006-12-19 08:30:14 +0000272bytes_subscript(PyBytesObject *self, PyObject *item)
Guido van Rossumd624f182006-04-24 13:47:05 +0000273{
Thomas Wouters376446d2006-12-19 08:30:14 +0000274 if (PyIndex_Check(item)) {
275 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Guido van Rossumd624f182006-04-24 13:47:05 +0000276
Thomas Wouters376446d2006-12-19 08:30:14 +0000277 if (i == -1 && PyErr_Occurred())
278 return NULL;
279
280 if (i < 0)
281 i += PyBytes_GET_SIZE(self);
282
283 if (i < 0 || i >= self->ob_size) {
284 PyErr_SetString(PyExc_IndexError, "bytes index out of range");
285 return NULL;
286 }
287 return PyInt_FromLong((unsigned char)(self->ob_bytes[i]));
288 }
289 else if (PySlice_Check(item)) {
290 Py_ssize_t start, stop, step, slicelength, cur, i;
291 if (PySlice_GetIndicesEx((PySliceObject *)item,
292 PyBytes_GET_SIZE(self),
293 &start, &stop, &step, &slicelength) < 0) {
294 return NULL;
295 }
296
297 if (slicelength <= 0)
298 return PyBytes_FromStringAndSize("", 0);
299 else if (step == 1) {
300 return PyBytes_FromStringAndSize(self->ob_bytes + start,
301 slicelength);
302 }
303 else {
304 char *source_buf = PyBytes_AS_STRING(self);
305 char *result_buf = (char *)PyMem_Malloc(slicelength);
306 PyObject *result;
307
308 if (result_buf == NULL)
309 return PyErr_NoMemory();
310
311 for (cur = start, i = 0; i < slicelength;
312 cur += step, i++) {
313 result_buf[i] = source_buf[cur];
314 }
315 result = PyBytes_FromStringAndSize(result_buf, slicelength);
316 PyMem_Free(result_buf);
317 return result;
318 }
319 }
320 else {
321 PyErr_SetString(PyExc_TypeError, "bytes indices must be integers");
322 return NULL;
323 }
324}
325
Guido van Rossumd624f182006-04-24 13:47:05 +0000326static int
Thomas Wouters376446d2006-12-19 08:30:14 +0000327bytes_setslice(PyBytesObject *self, Py_ssize_t lo, Py_ssize_t hi,
Guido van Rossumd624f182006-04-24 13:47:05 +0000328 PyObject *values)
329{
330 int avail;
331 int needed;
332 char *bytes;
333
334 if (values == NULL) {
335 bytes = NULL;
336 needed = 0;
337 }
338 else if (values == (PyObject *)self || !PyBytes_Check(values)) {
339 /* Make a copy an call this function recursively */
340 int err;
341 values = PyBytes_FromObject(values);
342 if (values == NULL)
343 return -1;
344 err = bytes_setslice(self, lo, hi, values);
345 Py_DECREF(values);
346 return err;
347 }
348 else {
349 assert(PyBytes_Check(values));
350 bytes = ((PyBytesObject *)values)->ob_bytes;
351 needed = ((PyBytesObject *)values)->ob_size;
352 }
353
354 if (lo < 0)
355 lo = 0;
Thomas Wouters9a6e62b2006-08-23 23:20:29 +0000356 if (hi < lo)
357 hi = lo;
Guido van Rossumd624f182006-04-24 13:47:05 +0000358 if (hi > self->ob_size)
359 hi = self->ob_size;
360
361 avail = hi - lo;
362 if (avail < 0)
363 lo = hi = avail = 0;
364
365 if (avail != needed) {
366 if (avail > needed) {
367 /*
368 0 lo hi old_size
369 | |<----avail----->|<-----tomove------>|
370 | |<-needed->|<-----tomove------>|
371 0 lo new_hi new_size
372 */
373 memmove(self->ob_bytes + lo + needed, self->ob_bytes + hi,
374 self->ob_size - hi);
375 }
Thomas Wouters376446d2006-12-19 08:30:14 +0000376 if (PyBytes_Resize((PyObject *)self,
Guido van Rossumd624f182006-04-24 13:47:05 +0000377 self->ob_size + needed - avail) < 0)
378 return -1;
379 if (avail < needed) {
380 /*
381 0 lo hi old_size
382 | |<-avail->|<-----tomove------>|
383 | |<----needed---->|<-----tomove------>|
384 0 lo new_hi new_size
385 */
386 memmove(self->ob_bytes + lo + needed, self->ob_bytes + hi,
387 self->ob_size - lo - needed);
388 }
389 }
390
391 if (needed > 0)
392 memcpy(self->ob_bytes + lo, bytes, needed);
393
394 return 0;
395}
396
397static int
398bytes_setitem(PyBytesObject *self, Py_ssize_t i, PyObject *value)
399{
400 Py_ssize_t ival;
401
402 if (i < 0)
403 i += self->ob_size;
404
405 if (i < 0 || i >= self->ob_size) {
406 PyErr_SetString(PyExc_IndexError, "bytes index out of range");
407 return -1;
408 }
409
410 if (value == NULL)
411 return bytes_setslice(self, i, i+1, NULL);
412
Thomas Woutersd204a712006-08-22 13:41:17 +0000413 ival = PyNumber_AsSsize_t(value, PyExc_ValueError);
Guido van Rossumd624f182006-04-24 13:47:05 +0000414 if (ival == -1 && PyErr_Occurred())
415 return -1;
416
417 if (ival < 0 || ival >= 256) {
418 PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
419 return -1;
420 }
421
422 self->ob_bytes[i] = ival;
423 return 0;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000424}
425
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000426static int
Thomas Wouters376446d2006-12-19 08:30:14 +0000427bytes_ass_subscript(PyBytesObject *self, PyObject *item, PyObject *values)
428{
429 Py_ssize_t start, stop, step, slicelen, needed;
430 char *bytes;
431
432 if (PyIndex_Check(item)) {
433 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
434
435 if (i == -1 && PyErr_Occurred())
436 return -1;
437
438 if (i < 0)
439 i += PyBytes_GET_SIZE(self);
440
441 if (i < 0 || i >= self->ob_size) {
442 PyErr_SetString(PyExc_IndexError, "bytes index out of range");
443 return -1;
444 }
445
446 if (values == NULL) {
447 /* Fall through to slice assignment */
448 start = i;
449 stop = i + 1;
450 step = 1;
451 slicelen = 1;
452 }
453 else {
454 Py_ssize_t ival = PyNumber_AsSsize_t(values, PyExc_ValueError);
455 if (ival == -1 && PyErr_Occurred())
456 return -1;
457 if (ival < 0 || ival >= 256) {
458 PyErr_SetString(PyExc_ValueError,
459 "byte must be in range(0, 256)");
460 return -1;
461 }
462 self->ob_bytes[i] = (char)ival;
463 return 0;
464 }
465 }
466 else if (PySlice_Check(item)) {
467 if (PySlice_GetIndicesEx((PySliceObject *)item,
468 PyBytes_GET_SIZE(self),
469 &start, &stop, &step, &slicelen) < 0) {
470 return -1;
471 }
472 }
473 else {
474 PyErr_SetString(PyExc_TypeError, "bytes indices must be integer");
475 return -1;
476 }
477
478 if (values == NULL) {
479 bytes = NULL;
480 needed = 0;
481 }
482 else if (values == (PyObject *)self || !PyBytes_Check(values)) {
483 /* Make a copy an call this function recursively */
484 int err;
485 values = PyBytes_FromObject(values);
486 if (values == NULL)
487 return -1;
488 err = bytes_ass_subscript(self, item, values);
489 Py_DECREF(values);
490 return err;
491 }
492 else {
493 assert(PyBytes_Check(values));
494 bytes = ((PyBytesObject *)values)->ob_bytes;
495 needed = ((PyBytesObject *)values)->ob_size;
496 }
497 /* Make sure b[5:2] = ... inserts before 5, not before 2. */
498 if ((step < 0 && start < stop) ||
499 (step > 0 && start > stop))
500 stop = start;
501 if (step == 1) {
502 if (slicelen != needed) {
503 if (slicelen > needed) {
504 /*
505 0 start stop old_size
506 | |<---slicelen--->|<-----tomove------>|
507 | |<-needed->|<-----tomove------>|
508 0 lo new_hi new_size
509 */
510 memmove(self->ob_bytes + start + needed, self->ob_bytes + stop,
511 self->ob_size - stop);
512 }
513 if (PyBytes_Resize((PyObject *)self,
514 self->ob_size + needed - slicelen) < 0)
515 return -1;
516 if (slicelen < needed) {
517 /*
518 0 lo hi old_size
519 | |<-avail->|<-----tomove------>|
520 | |<----needed---->|<-----tomove------>|
521 0 lo new_hi new_size
522 */
523 memmove(self->ob_bytes + start + needed, self->ob_bytes + stop,
524 self->ob_size - start - needed);
525 }
526 }
527
528 if (needed > 0)
529 memcpy(self->ob_bytes + start, bytes, needed);
530
531 return 0;
532 }
533 else {
534 if (needed == 0) {
535 /* Delete slice */
536 Py_ssize_t cur, i;
537
538 if (step < 0) {
539 stop = start + 1;
540 start = stop + step * (slicelen - 1) - 1;
541 step = -step;
542 }
543 for (cur = start, i = 0;
544 i < slicelen; cur += step, i++) {
545 Py_ssize_t lim = step - 1;
546
547 if (cur + step >= PyBytes_GET_SIZE(self))
548 lim = PyBytes_GET_SIZE(self) - cur - 1;
549
550 memmove(self->ob_bytes + cur - i,
551 self->ob_bytes + cur + 1, lim);
552 }
553 /* Move the tail of the bytes, in one chunk */
554 cur = start + slicelen*step;
555 if (cur < PyBytes_GET_SIZE(self)) {
556 memmove(self->ob_bytes + cur - slicelen,
557 self->ob_bytes + cur,
558 PyBytes_GET_SIZE(self) - cur);
559 }
560 if (PyBytes_Resize((PyObject *)self,
561 PyBytes_GET_SIZE(self) - slicelen) < 0)
562 return -1;
563
564 return 0;
565 }
566 else {
567 /* Assign slice */
568 Py_ssize_t cur, i;
569
570 if (needed != slicelen) {
571 PyErr_Format(PyExc_ValueError,
572 "attempt to assign bytes of size %zd "
573 "to extended slice of size %zd",
574 needed, slicelen);
575 return -1;
576 }
577 for (cur = start, i = 0; i < slicelen; cur += step, i++)
578 self->ob_bytes[cur] = bytes[i];
579 return 0;
580 }
581 }
582}
583
584static int
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000585bytes_init(PyBytesObject *self, PyObject *args, PyObject *kwds)
586{
Guido van Rossumd624f182006-04-24 13:47:05 +0000587 static char *kwlist[] = {"source", "encoding", "errors", 0};
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000588 PyObject *arg = NULL;
Guido van Rossumd624f182006-04-24 13:47:05 +0000589 const char *encoding = NULL;
590 const char *errors = NULL;
591 Py_ssize_t count;
592 PyObject *it;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000593 PyObject *(*iternext)(PyObject *);
594
Guido van Rossuma0867f72006-05-05 04:34:18 +0000595 if (self->ob_size != 0) {
596 /* Empty previous contents (yes, do this first of all!) */
597 if (PyBytes_Resize((PyObject *)self, 0) < 0)
598 return -1;
599 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000600
Guido van Rossumd624f182006-04-24 13:47:05 +0000601 /* Parse arguments */
602 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oss:bytes", kwlist,
603 &arg, &encoding, &errors))
604 return -1;
605
606 /* Make a quick exit if no first argument */
607 if (arg == NULL) {
608 if (encoding != NULL || errors != NULL) {
609 PyErr_SetString(PyExc_TypeError,
610 "encoding or errors without sequence argument");
611 return -1;
612 }
613 return 0;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000614 }
615
Guido van Rossumd624f182006-04-24 13:47:05 +0000616 if (PyUnicode_Check(arg)) {
617 /* Encode via the codec registry */
618 PyObject *encoded;
619 char *bytes;
620 Py_ssize_t size;
621 if (encoding == NULL)
622 encoding = PyUnicode_GetDefaultEncoding();
623 encoded = PyCodec_Encode(arg, encoding, errors);
624 if (encoded == NULL)
625 return -1;
626 if (!PyString_Check(encoded)) {
627 PyErr_Format(PyExc_TypeError,
628 "encoder did not return a string object (type=%.400s)",
629 encoded->ob_type->tp_name);
630 Py_DECREF(encoded);
631 return -1;
632 }
633 bytes = PyString_AS_STRING(encoded);
634 size = PyString_GET_SIZE(encoded);
Guido van Rossuma0867f72006-05-05 04:34:18 +0000635 if (size <= self->ob_alloc)
636 self->ob_size = size;
637 else if (PyBytes_Resize((PyObject *)self, size) < 0) {
Guido van Rossumd624f182006-04-24 13:47:05 +0000638 Py_DECREF(encoded);
639 return -1;
640 }
641 memcpy(self->ob_bytes, bytes, size);
642 Py_DECREF(encoded);
643 return 0;
644 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000645
Guido van Rossumd624f182006-04-24 13:47:05 +0000646 /* If it's not unicode, there can't be encoding or errors */
647 if (encoding != NULL || errors != NULL) {
648 PyErr_SetString(PyExc_TypeError,
649 "encoding or errors without a string argument");
650 return -1;
651 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000652
Guido van Rossumd624f182006-04-24 13:47:05 +0000653 /* Is it an int? */
Thomas Woutersd204a712006-08-22 13:41:17 +0000654 count = PyNumber_AsSsize_t(arg, PyExc_ValueError);
Guido van Rossumd624f182006-04-24 13:47:05 +0000655 if (count == -1 && PyErr_Occurred())
656 PyErr_Clear();
657 else {
658 if (count < 0) {
659 PyErr_SetString(PyExc_ValueError, "negative count");
660 return -1;
661 }
662 if (count > 0) {
663 if (PyBytes_Resize((PyObject *)self, count))
664 return -1;
665 memset(self->ob_bytes, 0, count);
666 }
667 return 0;
668 }
669
670 if (PyObject_CheckReadBuffer(arg)) {
671 const void *bytes;
672 Py_ssize_t size;
673 if (PyObject_AsReadBuffer(arg, &bytes, &size) < 0)
674 return -1;
675 if (PyBytes_Resize((PyObject *)self, size) < 0)
676 return -1;
677 memcpy(self->ob_bytes, bytes, size);
678 return 0;
679 }
680
681 /* XXX Optimize this if the arguments is a list, tuple */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000682
683 /* Get the iterator */
684 it = PyObject_GetIter(arg);
685 if (it == NULL)
Guido van Rossumd624f182006-04-24 13:47:05 +0000686 return -1;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000687 iternext = *it->ob_type->tp_iternext;
688
689 /* Run the iterator to exhaustion */
690 for (;;) {
Guido van Rossumd624f182006-04-24 13:47:05 +0000691 PyObject *item;
692 Py_ssize_t value;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000693
Guido van Rossumd624f182006-04-24 13:47:05 +0000694 /* Get the next item */
695 item = iternext(it);
696 if (item == NULL) {
697 if (PyErr_Occurred()) {
698 if (!PyErr_ExceptionMatches(PyExc_StopIteration))
699 goto error;
700 PyErr_Clear();
701 }
702 break;
703 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000704
Guido van Rossumd624f182006-04-24 13:47:05 +0000705 /* Interpret it as an int (__index__) */
Thomas Woutersd204a712006-08-22 13:41:17 +0000706 value = PyNumber_AsSsize_t(item, PyExc_ValueError);
Guido van Rossumd624f182006-04-24 13:47:05 +0000707 Py_DECREF(item);
708 if (value == -1 && PyErr_Occurred())
709 goto error;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000710
Guido van Rossumd624f182006-04-24 13:47:05 +0000711 /* Range check */
712 if (value < 0 || value >= 256) {
713 PyErr_SetString(PyExc_ValueError,
714 "bytes must be in range(0, 256)");
715 goto error;
716 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000717
Guido van Rossumd624f182006-04-24 13:47:05 +0000718 /* Append the byte */
Guido van Rossuma0867f72006-05-05 04:34:18 +0000719 if (self->ob_size < self->ob_alloc)
720 self->ob_size++;
721 else if (PyBytes_Resize((PyObject *)self, self->ob_size+1) < 0)
Guido van Rossumd624f182006-04-24 13:47:05 +0000722 goto error;
723 self->ob_bytes[self->ob_size-1] = value;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000724 }
725
726 /* Clean up and return success */
727 Py_DECREF(it);
728 return 0;
729
730 error:
731 /* Error handling when it != NULL */
732 Py_DECREF(it);
733 return -1;
734}
735
Georg Brandlee91be42007-02-24 19:41:35 +0000736/* Mostly copied from string_repr, but without the
737 "smart quote" functionality. */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000738static PyObject *
739bytes_repr(PyBytesObject *self)
740{
Georg Brandlee91be42007-02-24 19:41:35 +0000741 size_t newsize = 3 + 4 * self->ob_size;
742 PyObject *v;
743 if (newsize > PY_SSIZE_T_MAX || newsize / 4 != self->ob_size) {
744 PyErr_SetString(PyExc_OverflowError,
745 "bytes object is too large to make repr");
Guido van Rossumd624f182006-04-24 13:47:05 +0000746 return NULL;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000747 }
Georg Brandlee91be42007-02-24 19:41:35 +0000748 v = PyString_FromStringAndSize((char *)NULL, newsize);
749 if (v == NULL) {
750 return NULL;
751 }
752 else {
753 register Py_ssize_t i;
754 register char c;
755 register char *p;
756 int quote = '\'';
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000757
Georg Brandlee91be42007-02-24 19:41:35 +0000758 p = PyString_AS_STRING(v);
759 *p++ = 'b';
760 *p++ = quote;
761 for (i = 0; i < self->ob_size; i++) {
762 /* There's at least enough room for a hex escape
763 and a closing quote. */
764 assert(newsize - (p - PyString_AS_STRING(v)) >= 5);
765 c = self->ob_bytes[i];
766 if (c == quote || c == '\\')
767 *p++ = '\\', *p++ = c;
768 else if (c == '\t')
769 *p++ = '\\', *p++ = 't';
770 else if (c == '\n')
771 *p++ = '\\', *p++ = 'n';
772 else if (c == '\r')
773 *p++ = '\\', *p++ = 'r';
774 else if (c == 0)
775 *p++ = '\\', *p++ = '0';
776 else if (c < ' ' || c >= 0x7f) {
777 /* For performance, we don't want to call
778 PyOS_snprintf here (extra layers of
779 function call). */
780 sprintf(p, "\\x%02x", c & 0xff);
781 p += 4;
782 }
783 else
784 *p++ = c;
785 }
786 assert(newsize - (p - PyString_AS_STRING(v)) >= 1);
787 *p++ = quote;
788 *p = '\0';
789 _PyString_Resize(
790 &v, (p - PyString_AS_STRING(v)));
791 return v;
792 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000793}
794
795static PyObject *
Guido van Rossumd624f182006-04-24 13:47:05 +0000796bytes_str(PyBytesObject *self)
797{
798 return PyString_FromStringAndSize(self->ob_bytes, self->ob_size);
799}
800
801static PyObject *
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000802bytes_richcompare(PyBytesObject *self, PyBytesObject *other, int op)
803{
804 PyObject *res;
805 int minsize;
806 int cmp;
807
808 if (!PyBytes_Check(self) || !PyBytes_Check(other)) {
Guido van Rossumd624f182006-04-24 13:47:05 +0000809 Py_INCREF(Py_NotImplemented);
810 return Py_NotImplemented;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000811 }
812
813 if (self->ob_size != other->ob_size && (op == Py_EQ || op == Py_NE)) {
Guido van Rossumd624f182006-04-24 13:47:05 +0000814 /* Shortcut: if the lengths differ, the objects differ */
815 cmp = (op == Py_NE);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000816 }
817 else {
Guido van Rossumd624f182006-04-24 13:47:05 +0000818 minsize = self->ob_size;
819 if (other->ob_size < minsize)
820 minsize = other->ob_size;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000821
Guido van Rossumd624f182006-04-24 13:47:05 +0000822 cmp = memcmp(self->ob_bytes, other->ob_bytes, minsize);
823 /* In ISO C, memcmp() guarantees to use unsigned bytes! */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000824
Guido van Rossumd624f182006-04-24 13:47:05 +0000825 if (cmp == 0) {
826 if (self->ob_size < other->ob_size)
827 cmp = -1;
828 else if (self->ob_size > other->ob_size)
829 cmp = 1;
830 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000831
Guido van Rossumd624f182006-04-24 13:47:05 +0000832 switch (op) {
833 case Py_LT: cmp = cmp < 0; break;
834 case Py_LE: cmp = cmp <= 0; break;
835 case Py_EQ: cmp = cmp == 0; break;
836 case Py_NE: cmp = cmp != 0; break;
837 case Py_GT: cmp = cmp > 0; break;
838 case Py_GE: cmp = cmp >= 0; break;
839 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000840 }
841
842 res = cmp ? Py_True : Py_False;
843 Py_INCREF(res);
844 return res;
845}
846
847static void
848bytes_dealloc(PyBytesObject *self)
849{
Guido van Rossumd624f182006-04-24 13:47:05 +0000850 if (self->ob_bytes != 0) {
851 PyMem_Free(self->ob_bytes);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000852 }
853 self->ob_type->tp_free((PyObject *)self);
854}
855
Guido van Rossumd624f182006-04-24 13:47:05 +0000856static Py_ssize_t
857bytes_getbuffer(PyBytesObject *self, Py_ssize_t index, const void **ptr)
858{
859 if (index != 0) {
860 PyErr_SetString(PyExc_SystemError,
861 "accessing non-existent string segment");
862 return -1;
863 }
864 *ptr = (void *)self->ob_bytes;
865 return self->ob_size;
866}
867
868static Py_ssize_t
869bytes_getsegcount(PyStringObject *self, Py_ssize_t *lenp)
870{
871 if (lenp)
872 *lenp = self->ob_size;
873 return 1;
874}
875
876PyDoc_STRVAR(decode_doc,
877"B.decode([encoding[,errors]]) -> unicode obect.\n\
878\n\
879Decodes B using the codec registered for encoding. encoding defaults\n\
880to the default encoding. errors may be given to set a different error\n\
881handling scheme. Default is 'strict' meaning that encoding errors raise\n\
882a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
883as well as any other name registerd with codecs.register_error that is\n\
884able to handle UnicodeDecodeErrors.");
885
886static PyObject *
887bytes_decode(PyObject *self, PyObject *args)
888{
889 const char *encoding = NULL;
890 const char *errors = NULL;
891
892 if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
893 return NULL;
894 if (encoding == NULL)
895 encoding = PyUnicode_GetDefaultEncoding();
896 return PyCodec_Decode(self, encoding, errors);
897}
898
Guido van Rossuma0867f72006-05-05 04:34:18 +0000899PyDoc_STRVAR(alloc_doc,
900"B.__alloc__() -> int\n\
901\n\
902Returns the number of bytes actually allocated.");
903
904static PyObject *
905bytes_alloc(PyBytesObject *self)
906{
907 return PyInt_FromSsize_t(self->ob_alloc);
908}
909
Guido van Rossum20188312006-05-05 15:15:40 +0000910PyDoc_STRVAR(join_doc,
911"bytes.join(iterable_of_bytes) -> bytes\n\
912\n\
913Concatenates any number of bytes objects. Example:\n\
914bytes.join([bytes('ab'), bytes('pq'), bytes('rs')]) -> bytes('abpqrs').");
915
916static PyObject *
917bytes_join(PyObject *cls, PyObject *it)
918{
919 PyObject *seq;
920 Py_ssize_t i;
921 Py_ssize_t n;
922 PyObject **items;
923 Py_ssize_t totalsize = 0;
924 PyObject *result;
925 char *dest;
926
927 seq = PySequence_Fast(it, "can only join an iterable");
928 if (seq == NULL)
929 return NULL;
930 n = PySequence_Fast_GET_SIZE(seq);
931 items = PySequence_Fast_ITEMS(seq);
932
933 /* Compute the total size, and check that they are all bytes */
934 for (i = 0; i < n; i++) {
935 PyObject *obj = items[i];
936 if (!PyBytes_Check(obj)) {
937 PyErr_Format(PyExc_TypeError,
938 "can only join an iterable of bytes "
Guido van Rossum3cf5b1e2006-07-27 21:53:35 +0000939 "(item %ld has type '%.100s')",
940 /* XXX %ld isn't right on Win64 */
941 (long)i, obj->ob_type->tp_name);
Guido van Rossum20188312006-05-05 15:15:40 +0000942 goto error;
943 }
944 totalsize += PyBytes_GET_SIZE(obj);
945 if (totalsize < 0) {
946 PyErr_NoMemory();
947 goto error;
948 }
949 }
950
951 /* Allocate the result, and copy the bytes */
952 result = PyBytes_FromStringAndSize(NULL, totalsize);
953 if (result == NULL)
954 goto error;
955 dest = PyBytes_AS_STRING(result);
956 for (i = 0; i < n; i++) {
957 PyObject *obj = items[i];
958 Py_ssize_t size = PyBytes_GET_SIZE(obj);
959 memcpy(dest, PyBytes_AS_STRING(obj), size);
960 dest += size;
961 }
962
963 /* Done */
964 Py_DECREF(seq);
965 return result;
966
967 /* Error handling */
968 error:
969 Py_DECREF(seq);
970 return NULL;
971}
972
Georg Brandl0b9b9e02007-02-27 08:40:54 +0000973PyDoc_STRVAR(fromhex_doc,
974"bytes.fromhex(string) -> bytes\n\
975\n\
976Create a bytes object from a string of hexadecimal numbers.\n\
977Spaces between two numbers are accepted. Example:\n\
978bytes.fromhex('10 2030') -> bytes([0x10, 0x20, 0x30]).");
979
980static int
981hex_digit_to_int(int c)
982{
983 if (isdigit(c))
984 return c - '0';
985 else {
986 if (isupper(c))
987 c = tolower(c);
988 if (c >= 'a' && c <= 'f')
989 return c - 'a' + 10;
990 }
991 return -1;
992}
993
994static PyObject *
995bytes_fromhex(PyObject *cls, PyObject *args)
996{
997 PyObject *newbytes;
998 char *hex, *buf;
999 Py_ssize_t len, byteslen, i, j;
1000 int top, bot;
1001
1002 if (!PyArg_ParseTuple(args, "s#:fromhex", &hex, &len))
1003 return NULL;
1004
1005 byteslen = len / 2; /* max length if there are no spaces */
1006
1007 newbytes = PyBytes_FromStringAndSize(NULL, byteslen);
1008 if (!newbytes)
1009 return NULL;
1010 buf = PyBytes_AS_STRING(newbytes);
1011
1012 for (i = j = 0; ; i += 2) {
1013 /* skip over spaces in the input */
1014 while (Py_CHARMASK(hex[i]) == ' ')
1015 i++;
1016 if (i >= len)
1017 break;
1018 top = hex_digit_to_int(Py_CHARMASK(hex[i]));
1019 bot = hex_digit_to_int(Py_CHARMASK(hex[i+1]));
1020 if (top == -1 || bot == -1) {
1021 PyErr_Format(PyExc_ValueError,
1022 "non-hexadecimal number string '%c%c' found in "
1023 "fromhex() arg at position %zd",
1024 hex[i], hex[i+1], i);
1025 goto error;
1026 }
1027 buf[j++] = (top << 4) + bot;
1028 }
1029 if (PyBytes_Resize(newbytes, j) < 0)
1030 goto error;
1031 return newbytes;
1032
1033 error:
1034 Py_DECREF(newbytes);
1035 return NULL;
1036}
1037
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00001038static PySequenceMethods bytes_as_sequence = {
Georg Brandl0b9b9e02007-02-27 08:40:54 +00001039 (lenfunc)bytes_length, /* sq_length */
1040 (binaryfunc)bytes_concat, /* sq_concat */
1041 (ssizeargfunc)bytes_repeat, /* sq_repeat */
1042 (ssizeargfunc)bytes_getitem, /* sq_item */
1043 0, /* sq_slice */
1044 (ssizeobjargproc)bytes_setitem, /* sq_ass_item */
1045 0, /* sq_ass_slice */
Guido van Rossumd624f182006-04-24 13:47:05 +00001046 (objobjproc)bytes_contains, /* sq_contains */
Georg Brandl0b9b9e02007-02-27 08:40:54 +00001047 (binaryfunc)bytes_iconcat, /* sq_inplace_concat */
1048 (ssizeargfunc)bytes_irepeat, /* sq_inplace_repeat */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00001049};
1050
1051static PyMappingMethods bytes_as_mapping = {
Guido van Rossumd624f182006-04-24 13:47:05 +00001052 (lenfunc)bytes_length,
Thomas Wouters376446d2006-12-19 08:30:14 +00001053 (binaryfunc)bytes_subscript,
1054 (objobjargproc)bytes_ass_subscript,
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00001055};
1056
1057static PyBufferProcs bytes_as_buffer = {
Guido van Rossumd624f182006-04-24 13:47:05 +00001058 (readbufferproc)bytes_getbuffer,
1059 (writebufferproc)bytes_getbuffer,
1060 (segcountproc)bytes_getsegcount,
1061 /* XXX Bytes are not characters! But we need to implement
1062 bf_getcharbuffer() so we can be used as 't#' argument to codecs. */
1063 (charbufferproc)bytes_getbuffer,
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00001064};
1065
1066static PyMethodDef
1067bytes_methods[] = {
Guido van Rossumd624f182006-04-24 13:47:05 +00001068 {"decode", (PyCFunction)bytes_decode, METH_VARARGS, decode_doc},
Guido van Rossuma0867f72006-05-05 04:34:18 +00001069 {"__alloc__", (PyCFunction)bytes_alloc, METH_NOARGS, alloc_doc},
Georg Brandl0b9b9e02007-02-27 08:40:54 +00001070 {"fromhex", (PyCFunction)bytes_fromhex, METH_VARARGS|METH_CLASS, fromhex_doc},
Guido van Rossum20188312006-05-05 15:15:40 +00001071 {"join", (PyCFunction)bytes_join, METH_O|METH_CLASS, join_doc},
Guido van Rossuma0867f72006-05-05 04:34:18 +00001072 {NULL}
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00001073};
1074
1075PyDoc_STRVAR(bytes_doc,
1076"bytes([iterable]) -> new array of bytes.\n\
1077\n\
1078If an argument is given it must be an iterable yielding ints in range(256).");
1079
1080PyTypeObject PyBytes_Type = {
1081 PyObject_HEAD_INIT(&PyType_Type)
1082 0,
1083 "bytes",
1084 sizeof(PyBytesObject),
1085 0,
Guido van Rossumd624f182006-04-24 13:47:05 +00001086 (destructor)bytes_dealloc, /* tp_dealloc */
1087 0, /* tp_print */
1088 0, /* tp_getattr */
1089 0, /* tp_setattr */
1090 0, /* tp_compare */
1091 (reprfunc)bytes_repr, /* tp_repr */
1092 0, /* tp_as_number */
1093 &bytes_as_sequence, /* tp_as_sequence */
1094 &bytes_as_mapping, /* tp_as_mapping */
Guido van Rossum50e9fb92006-08-17 05:42:55 +00001095 0, /* tp_hash */
Guido van Rossumd624f182006-04-24 13:47:05 +00001096 0, /* tp_call */
1097 (reprfunc)bytes_str, /* tp_str */
1098 PyObject_GenericGetAttr, /* tp_getattro */
1099 0, /* tp_setattro */
1100 &bytes_as_buffer, /* tp_as_buffer */
Georg Brandl0b9b9e02007-02-27 08:40:54 +00001101 Py_TPFLAGS_DEFAULT, /* tp_flags */
Guido van Rossumd624f182006-04-24 13:47:05 +00001102 /* bytes is 'final' or 'sealed' */
1103 bytes_doc, /* tp_doc */
1104 0, /* tp_traverse */
1105 0, /* tp_clear */
1106 (richcmpfunc)bytes_richcompare, /* tp_richcompare */
1107 0, /* tp_weaklistoffset */
1108 0, /* tp_iter */
1109 0, /* tp_iternext */
1110 bytes_methods, /* tp_methods */
1111 0, /* tp_members */
1112 0, /* tp_getset */
1113 0, /* tp_base */
1114 0, /* tp_dict */
1115 0, /* tp_descr_get */
1116 0, /* tp_descr_set */
1117 0, /* tp_dictoffset */
1118 (initproc)bytes_init, /* tp_init */
1119 PyType_GenericAlloc, /* tp_alloc */
1120 PyType_GenericNew, /* tp_new */
1121 PyObject_Del, /* tp_free */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00001122};