Antoine Pitrou | cfc22b4 | 2012-10-16 21:07:23 +0200 | [diff] [blame] | 1 | /* stringlib: bytes joining implementation */ |
| 2 | |
Serhiy Storchaka | bcde10a | 2016-05-16 09:42:29 +0300 | [diff] [blame] | 3 | #if STRINGLIB_IS_UNICODE |
Antoine Pitrou | cfc22b4 | 2012-10-16 21:07:23 +0200 | [diff] [blame] | 4 | #error join.h only compatible with byte-wise strings |
| 5 | #endif |
| 6 | |
| 7 | Py_LOCAL_INLINE(PyObject *) |
| 8 | STRINGLIB(bytes_join)(PyObject *sep, PyObject *iterable) |
| 9 | { |
| 10 | char *sepstr = STRINGLIB_STR(sep); |
| 11 | const Py_ssize_t seplen = STRINGLIB_LEN(sep); |
| 12 | PyObject *res = NULL; |
| 13 | char *p; |
| 14 | Py_ssize_t seqlen = 0; |
| 15 | Py_ssize_t sz = 0; |
| 16 | Py_ssize_t i, nbufs; |
| 17 | PyObject *seq, *item; |
| 18 | Py_buffer *buffers = NULL; |
| 19 | #define NB_STATIC_BUFFERS 10 |
| 20 | Py_buffer static_buffers[NB_STATIC_BUFFERS]; |
| 21 | |
| 22 | seq = PySequence_Fast(iterable, "can only join an iterable"); |
| 23 | if (seq == NULL) { |
| 24 | return NULL; |
| 25 | } |
| 26 | |
| 27 | seqlen = PySequence_Fast_GET_SIZE(seq); |
| 28 | if (seqlen == 0) { |
| 29 | Py_DECREF(seq); |
| 30 | return STRINGLIB_NEW(NULL, 0); |
| 31 | } |
| 32 | #ifndef STRINGLIB_MUTABLE |
| 33 | if (seqlen == 1) { |
| 34 | item = PySequence_Fast_GET_ITEM(seq, 0); |
| 35 | if (STRINGLIB_CHECK_EXACT(item)) { |
| 36 | Py_INCREF(item); |
| 37 | Py_DECREF(seq); |
| 38 | return item; |
| 39 | } |
| 40 | } |
| 41 | #endif |
| 42 | if (seqlen > NB_STATIC_BUFFERS) { |
| 43 | buffers = PyMem_NEW(Py_buffer, seqlen); |
| 44 | if (buffers == NULL) { |
| 45 | Py_DECREF(seq); |
Christian Heimes | 5f7e8da | 2012-12-02 07:56:42 +0100 | [diff] [blame] | 46 | PyErr_NoMemory(); |
Antoine Pitrou | cfc22b4 | 2012-10-16 21:07:23 +0200 | [diff] [blame] | 47 | return NULL; |
| 48 | } |
| 49 | } |
| 50 | else { |
| 51 | buffers = static_buffers; |
| 52 | } |
| 53 | |
| 54 | /* Here is the general case. Do a pre-pass to figure out the total |
| 55 | * amount of space we'll need (sz), and see whether all arguments are |
Serhiy Storchaka | b757c83 | 2014-12-05 22:25:22 +0200 | [diff] [blame] | 56 | * bytes-like. |
Antoine Pitrou | cfc22b4 | 2012-10-16 21:07:23 +0200 | [diff] [blame] | 57 | */ |
| 58 | for (i = 0, nbufs = 0; i < seqlen; i++) { |
| 59 | Py_ssize_t itemlen; |
| 60 | item = PySequence_Fast_GET_ITEM(seq, i); |
Serhiy Storchaka | 4fdb684 | 2015-02-03 01:21:08 +0200 | [diff] [blame] | 61 | if (PyBytes_CheckExact(item)) { |
| 62 | /* Fast path. */ |
| 63 | Py_INCREF(item); |
| 64 | buffers[i].obj = item; |
| 65 | buffers[i].buf = PyBytes_AS_STRING(item); |
| 66 | buffers[i].len = PyBytes_GET_SIZE(item); |
| 67 | } |
| 68 | else if (PyObject_GetBuffer(item, &buffers[i], PyBUF_SIMPLE) != 0) { |
Antoine Pitrou | cfc22b4 | 2012-10-16 21:07:23 +0200 | [diff] [blame] | 69 | PyErr_Format(PyExc_TypeError, |
Serhiy Storchaka | b757c83 | 2014-12-05 22:25:22 +0200 | [diff] [blame] | 70 | "sequence item %zd: expected a bytes-like object, " |
| 71 | "%.80s found", |
Antoine Pitrou | cfc22b4 | 2012-10-16 21:07:23 +0200 | [diff] [blame] | 72 | i, Py_TYPE(item)->tp_name); |
| 73 | goto error; |
| 74 | } |
| 75 | nbufs = i + 1; /* for error cleanup */ |
| 76 | itemlen = buffers[i].len; |
| 77 | if (itemlen > PY_SSIZE_T_MAX - sz) { |
| 78 | PyErr_SetString(PyExc_OverflowError, |
| 79 | "join() result is too long"); |
| 80 | goto error; |
| 81 | } |
| 82 | sz += itemlen; |
| 83 | if (i != 0) { |
| 84 | if (seplen > PY_SSIZE_T_MAX - sz) { |
| 85 | PyErr_SetString(PyExc_OverflowError, |
| 86 | "join() result is too long"); |
| 87 | goto error; |
| 88 | } |
| 89 | sz += seplen; |
| 90 | } |
| 91 | if (seqlen != PySequence_Fast_GET_SIZE(seq)) { |
| 92 | PyErr_SetString(PyExc_RuntimeError, |
| 93 | "sequence changed size during iteration"); |
| 94 | goto error; |
| 95 | } |
| 96 | } |
| 97 | |
| 98 | /* Allocate result space. */ |
| 99 | res = STRINGLIB_NEW(NULL, sz); |
| 100 | if (res == NULL) |
| 101 | goto error; |
| 102 | |
| 103 | /* Catenate everything. */ |
| 104 | p = STRINGLIB_STR(res); |
Antoine Pitrou | 6f7b0da | 2012-10-20 23:08:34 +0200 | [diff] [blame] | 105 | if (!seplen) { |
| 106 | /* fast path */ |
| 107 | for (i = 0; i < nbufs; i++) { |
| 108 | Py_ssize_t n = buffers[i].len; |
| 109 | char *q = buffers[i].buf; |
Christian Heimes | f051e43 | 2016-09-13 20:22:02 +0200 | [diff] [blame] | 110 | memcpy(p, q, n); |
Antoine Pitrou | 6f7b0da | 2012-10-20 23:08:34 +0200 | [diff] [blame] | 111 | p += n; |
| 112 | } |
| 113 | goto done; |
| 114 | } |
Antoine Pitrou | cfc22b4 | 2012-10-16 21:07:23 +0200 | [diff] [blame] | 115 | for (i = 0; i < nbufs; i++) { |
| 116 | Py_ssize_t n; |
| 117 | char *q; |
| 118 | if (i) { |
Christian Heimes | f051e43 | 2016-09-13 20:22:02 +0200 | [diff] [blame] | 119 | memcpy(p, sepstr, seplen); |
Antoine Pitrou | cfc22b4 | 2012-10-16 21:07:23 +0200 | [diff] [blame] | 120 | p += seplen; |
| 121 | } |
| 122 | n = buffers[i].len; |
| 123 | q = buffers[i].buf; |
Christian Heimes | f051e43 | 2016-09-13 20:22:02 +0200 | [diff] [blame] | 124 | memcpy(p, q, n); |
Antoine Pitrou | cfc22b4 | 2012-10-16 21:07:23 +0200 | [diff] [blame] | 125 | p += n; |
| 126 | } |
| 127 | goto done; |
| 128 | |
| 129 | error: |
| 130 | res = NULL; |
| 131 | done: |
| 132 | Py_DECREF(seq); |
| 133 | for (i = 0; i < nbufs; i++) |
| 134 | PyBuffer_Release(&buffers[i]); |
| 135 | if (buffers != static_buffers) |
| 136 | PyMem_FREE(buffers); |
| 137 | return res; |
| 138 | } |
| 139 | |
| 140 | #undef NB_STATIC_BUFFERS |