Antoine Pitrou | cfc22b4 | 2012-10-16 21:07:23 +0200 | [diff] [blame] | 1 | /* stringlib: bytes joining implementation */ |
| 2 | |
Serhiy Storchaka | bcde10a | 2016-05-16 09:42:29 +0300 | [diff] [blame] | 3 | #if STRINGLIB_IS_UNICODE |
Antoine Pitrou | cfc22b4 | 2012-10-16 21:07:23 +0200 | [diff] [blame] | 4 | #error join.h only compatible with byte-wise strings |
| 5 | #endif |
| 6 | |
| 7 | Py_LOCAL_INLINE(PyObject *) |
| 8 | STRINGLIB(bytes_join)(PyObject *sep, PyObject *iterable) |
| 9 | { |
Serhiy Storchaka | 8f87eef | 2020-04-12 14:58:27 +0300 | [diff] [blame] | 10 | const char *sepstr = STRINGLIB_STR(sep); |
| 11 | Py_ssize_t seplen = STRINGLIB_LEN(sep); |
Antoine Pitrou | cfc22b4 | 2012-10-16 21:07:23 +0200 | [diff] [blame] | 12 | PyObject *res = NULL; |
| 13 | char *p; |
| 14 | Py_ssize_t seqlen = 0; |
| 15 | Py_ssize_t sz = 0; |
| 16 | Py_ssize_t i, nbufs; |
| 17 | PyObject *seq, *item; |
| 18 | Py_buffer *buffers = NULL; |
| 19 | #define NB_STATIC_BUFFERS 10 |
| 20 | Py_buffer static_buffers[NB_STATIC_BUFFERS]; |
Bruce Merry | d07d9f4 | 2020-01-29 09:09:24 +0200 | [diff] [blame] | 21 | #define GIL_THRESHOLD 1048576 |
| 22 | int drop_gil = 1; |
Inada Naoki | 869c0c9 | 2020-02-03 19:03:34 +0900 | [diff] [blame] | 23 | PyThreadState *save = NULL; |
Antoine Pitrou | cfc22b4 | 2012-10-16 21:07:23 +0200 | [diff] [blame] | 24 | |
| 25 | seq = PySequence_Fast(iterable, "can only join an iterable"); |
| 26 | if (seq == NULL) { |
| 27 | return NULL; |
| 28 | } |
| 29 | |
| 30 | seqlen = PySequence_Fast_GET_SIZE(seq); |
| 31 | if (seqlen == 0) { |
| 32 | Py_DECREF(seq); |
| 33 | return STRINGLIB_NEW(NULL, 0); |
| 34 | } |
| 35 | #ifndef STRINGLIB_MUTABLE |
| 36 | if (seqlen == 1) { |
| 37 | item = PySequence_Fast_GET_ITEM(seq, 0); |
| 38 | if (STRINGLIB_CHECK_EXACT(item)) { |
| 39 | Py_INCREF(item); |
| 40 | Py_DECREF(seq); |
| 41 | return item; |
| 42 | } |
| 43 | } |
| 44 | #endif |
| 45 | if (seqlen > NB_STATIC_BUFFERS) { |
| 46 | buffers = PyMem_NEW(Py_buffer, seqlen); |
| 47 | if (buffers == NULL) { |
| 48 | Py_DECREF(seq); |
Christian Heimes | 5f7e8da | 2012-12-02 07:56:42 +0100 | [diff] [blame] | 49 | PyErr_NoMemory(); |
Antoine Pitrou | cfc22b4 | 2012-10-16 21:07:23 +0200 | [diff] [blame] | 50 | return NULL; |
| 51 | } |
| 52 | } |
| 53 | else { |
| 54 | buffers = static_buffers; |
| 55 | } |
| 56 | |
| 57 | /* Here is the general case. Do a pre-pass to figure out the total |
| 58 | * amount of space we'll need (sz), and see whether all arguments are |
Serhiy Storchaka | b757c83 | 2014-12-05 22:25:22 +0200 | [diff] [blame] | 59 | * bytes-like. |
Antoine Pitrou | cfc22b4 | 2012-10-16 21:07:23 +0200 | [diff] [blame] | 60 | */ |
| 61 | for (i = 0, nbufs = 0; i < seqlen; i++) { |
| 62 | Py_ssize_t itemlen; |
| 63 | item = PySequence_Fast_GET_ITEM(seq, i); |
Serhiy Storchaka | 4fdb684 | 2015-02-03 01:21:08 +0200 | [diff] [blame] | 64 | if (PyBytes_CheckExact(item)) { |
| 65 | /* Fast path. */ |
| 66 | Py_INCREF(item); |
| 67 | buffers[i].obj = item; |
| 68 | buffers[i].buf = PyBytes_AS_STRING(item); |
| 69 | buffers[i].len = PyBytes_GET_SIZE(item); |
| 70 | } |
Bruce Merry | d07d9f4 | 2020-01-29 09:09:24 +0200 | [diff] [blame] | 71 | else { |
| 72 | if (PyObject_GetBuffer(item, &buffers[i], PyBUF_SIMPLE) != 0) { |
| 73 | PyErr_Format(PyExc_TypeError, |
| 74 | "sequence item %zd: expected a bytes-like object, " |
| 75 | "%.80s found", |
| 76 | i, Py_TYPE(item)->tp_name); |
| 77 | goto error; |
| 78 | } |
| 79 | /* If the backing objects are mutable, then dropping the GIL |
| 80 | * opens up race conditions where another thread tries to modify |
| 81 | * the object which we hold a buffer on it. Such code has data |
| 82 | * races anyway, but this is a conservative approach that avoids |
| 83 | * changing the behaviour of that data race. |
| 84 | */ |
| 85 | drop_gil = 0; |
Antoine Pitrou | cfc22b4 | 2012-10-16 21:07:23 +0200 | [diff] [blame] | 86 | } |
| 87 | nbufs = i + 1; /* for error cleanup */ |
| 88 | itemlen = buffers[i].len; |
| 89 | if (itemlen > PY_SSIZE_T_MAX - sz) { |
| 90 | PyErr_SetString(PyExc_OverflowError, |
| 91 | "join() result is too long"); |
| 92 | goto error; |
| 93 | } |
| 94 | sz += itemlen; |
| 95 | if (i != 0) { |
| 96 | if (seplen > PY_SSIZE_T_MAX - sz) { |
| 97 | PyErr_SetString(PyExc_OverflowError, |
| 98 | "join() result is too long"); |
| 99 | goto error; |
| 100 | } |
| 101 | sz += seplen; |
| 102 | } |
| 103 | if (seqlen != PySequence_Fast_GET_SIZE(seq)) { |
| 104 | PyErr_SetString(PyExc_RuntimeError, |
| 105 | "sequence changed size during iteration"); |
| 106 | goto error; |
| 107 | } |
| 108 | } |
| 109 | |
| 110 | /* Allocate result space. */ |
| 111 | res = STRINGLIB_NEW(NULL, sz); |
| 112 | if (res == NULL) |
| 113 | goto error; |
| 114 | |
| 115 | /* Catenate everything. */ |
| 116 | p = STRINGLIB_STR(res); |
Bruce Merry | d07d9f4 | 2020-01-29 09:09:24 +0200 | [diff] [blame] | 117 | if (sz < GIL_THRESHOLD) { |
| 118 | drop_gil = 0; /* Benefits are likely outweighed by the overheads */ |
| 119 | } |
| 120 | if (drop_gil) { |
| 121 | save = PyEval_SaveThread(); |
| 122 | } |
Antoine Pitrou | 6f7b0da | 2012-10-20 23:08:34 +0200 | [diff] [blame] | 123 | if (!seplen) { |
| 124 | /* fast path */ |
| 125 | for (i = 0; i < nbufs; i++) { |
| 126 | Py_ssize_t n = buffers[i].len; |
| 127 | char *q = buffers[i].buf; |
Christian Heimes | f051e43 | 2016-09-13 20:22:02 +0200 | [diff] [blame] | 128 | memcpy(p, q, n); |
Antoine Pitrou | 6f7b0da | 2012-10-20 23:08:34 +0200 | [diff] [blame] | 129 | p += n; |
| 130 | } |
Antoine Pitrou | 6f7b0da | 2012-10-20 23:08:34 +0200 | [diff] [blame] | 131 | } |
Bruce Merry | d07d9f4 | 2020-01-29 09:09:24 +0200 | [diff] [blame] | 132 | else { |
| 133 | for (i = 0; i < nbufs; i++) { |
| 134 | Py_ssize_t n; |
| 135 | char *q; |
| 136 | if (i) { |
| 137 | memcpy(p, sepstr, seplen); |
| 138 | p += seplen; |
| 139 | } |
| 140 | n = buffers[i].len; |
| 141 | q = buffers[i].buf; |
| 142 | memcpy(p, q, n); |
| 143 | p += n; |
Antoine Pitrou | cfc22b4 | 2012-10-16 21:07:23 +0200 | [diff] [blame] | 144 | } |
Bruce Merry | d07d9f4 | 2020-01-29 09:09:24 +0200 | [diff] [blame] | 145 | } |
| 146 | if (drop_gil) { |
| 147 | PyEval_RestoreThread(save); |
Antoine Pitrou | cfc22b4 | 2012-10-16 21:07:23 +0200 | [diff] [blame] | 148 | } |
| 149 | goto done; |
| 150 | |
| 151 | error: |
| 152 | res = NULL; |
| 153 | done: |
| 154 | Py_DECREF(seq); |
| 155 | for (i = 0; i < nbufs; i++) |
| 156 | PyBuffer_Release(&buffers[i]); |
| 157 | if (buffers != static_buffers) |
| 158 | PyMem_FREE(buffers); |
| 159 | return res; |
| 160 | } |
| 161 | |
| 162 | #undef NB_STATIC_BUFFERS |
Bruce Merry | d07d9f4 | 2020-01-29 09:09:24 +0200 | [diff] [blame] | 163 | #undef GIL_THRESHOLD |