blob: 53bcbdea7ade9d3d60ba3f8fa57ea329d0a0f599 [file] [log] [blame]
Antoine Pitroucfc22b42012-10-16 21:07:23 +02001/* stringlib: bytes joining implementation */
2
Serhiy Storchakabcde10a2016-05-16 09:42:29 +03003#if STRINGLIB_IS_UNICODE
Antoine Pitroucfc22b42012-10-16 21:07:23 +02004#error join.h only compatible with byte-wise strings
5#endif
6
7Py_LOCAL_INLINE(PyObject *)
8STRINGLIB(bytes_join)(PyObject *sep, PyObject *iterable)
9{
Serhiy Storchaka8f87eef2020-04-12 14:58:27 +030010 const char *sepstr = STRINGLIB_STR(sep);
11 Py_ssize_t seplen = STRINGLIB_LEN(sep);
Antoine Pitroucfc22b42012-10-16 21:07:23 +020012 PyObject *res = NULL;
13 char *p;
14 Py_ssize_t seqlen = 0;
15 Py_ssize_t sz = 0;
16 Py_ssize_t i, nbufs;
17 PyObject *seq, *item;
18 Py_buffer *buffers = NULL;
19#define NB_STATIC_BUFFERS 10
20 Py_buffer static_buffers[NB_STATIC_BUFFERS];
Bruce Merryd07d9f42020-01-29 09:09:24 +020021#define GIL_THRESHOLD 1048576
22 int drop_gil = 1;
Inada Naoki869c0c92020-02-03 19:03:34 +090023 PyThreadState *save = NULL;
Antoine Pitroucfc22b42012-10-16 21:07:23 +020024
25 seq = PySequence_Fast(iterable, "can only join an iterable");
26 if (seq == NULL) {
27 return NULL;
28 }
29
30 seqlen = PySequence_Fast_GET_SIZE(seq);
31 if (seqlen == 0) {
32 Py_DECREF(seq);
33 return STRINGLIB_NEW(NULL, 0);
34 }
35#ifndef STRINGLIB_MUTABLE
36 if (seqlen == 1) {
37 item = PySequence_Fast_GET_ITEM(seq, 0);
38 if (STRINGLIB_CHECK_EXACT(item)) {
39 Py_INCREF(item);
40 Py_DECREF(seq);
41 return item;
42 }
43 }
44#endif
45 if (seqlen > NB_STATIC_BUFFERS) {
46 buffers = PyMem_NEW(Py_buffer, seqlen);
47 if (buffers == NULL) {
48 Py_DECREF(seq);
Christian Heimes5f7e8da2012-12-02 07:56:42 +010049 PyErr_NoMemory();
Antoine Pitroucfc22b42012-10-16 21:07:23 +020050 return NULL;
51 }
52 }
53 else {
54 buffers = static_buffers;
55 }
56
57 /* Here is the general case. Do a pre-pass to figure out the total
58 * amount of space we'll need (sz), and see whether all arguments are
Serhiy Storchakab757c832014-12-05 22:25:22 +020059 * bytes-like.
Antoine Pitroucfc22b42012-10-16 21:07:23 +020060 */
61 for (i = 0, nbufs = 0; i < seqlen; i++) {
62 Py_ssize_t itemlen;
63 item = PySequence_Fast_GET_ITEM(seq, i);
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +020064 if (PyBytes_CheckExact(item)) {
65 /* Fast path. */
66 Py_INCREF(item);
67 buffers[i].obj = item;
68 buffers[i].buf = PyBytes_AS_STRING(item);
69 buffers[i].len = PyBytes_GET_SIZE(item);
70 }
Bruce Merryd07d9f42020-01-29 09:09:24 +020071 else {
72 if (PyObject_GetBuffer(item, &buffers[i], PyBUF_SIMPLE) != 0) {
73 PyErr_Format(PyExc_TypeError,
74 "sequence item %zd: expected a bytes-like object, "
75 "%.80s found",
76 i, Py_TYPE(item)->tp_name);
77 goto error;
78 }
79 /* If the backing objects are mutable, then dropping the GIL
80 * opens up race conditions where another thread tries to modify
81 * the object which we hold a buffer on it. Such code has data
82 * races anyway, but this is a conservative approach that avoids
83 * changing the behaviour of that data race.
84 */
85 drop_gil = 0;
Antoine Pitroucfc22b42012-10-16 21:07:23 +020086 }
87 nbufs = i + 1; /* for error cleanup */
88 itemlen = buffers[i].len;
89 if (itemlen > PY_SSIZE_T_MAX - sz) {
90 PyErr_SetString(PyExc_OverflowError,
91 "join() result is too long");
92 goto error;
93 }
94 sz += itemlen;
95 if (i != 0) {
96 if (seplen > PY_SSIZE_T_MAX - sz) {
97 PyErr_SetString(PyExc_OverflowError,
98 "join() result is too long");
99 goto error;
100 }
101 sz += seplen;
102 }
103 if (seqlen != PySequence_Fast_GET_SIZE(seq)) {
104 PyErr_SetString(PyExc_RuntimeError,
105 "sequence changed size during iteration");
106 goto error;
107 }
108 }
109
110 /* Allocate result space. */
111 res = STRINGLIB_NEW(NULL, sz);
112 if (res == NULL)
113 goto error;
114
115 /* Catenate everything. */
116 p = STRINGLIB_STR(res);
Bruce Merryd07d9f42020-01-29 09:09:24 +0200117 if (sz < GIL_THRESHOLD) {
118 drop_gil = 0; /* Benefits are likely outweighed by the overheads */
119 }
120 if (drop_gil) {
121 save = PyEval_SaveThread();
122 }
Antoine Pitrou6f7b0da2012-10-20 23:08:34 +0200123 if (!seplen) {
124 /* fast path */
125 for (i = 0; i < nbufs; i++) {
126 Py_ssize_t n = buffers[i].len;
127 char *q = buffers[i].buf;
Christian Heimesf051e432016-09-13 20:22:02 +0200128 memcpy(p, q, n);
Antoine Pitrou6f7b0da2012-10-20 23:08:34 +0200129 p += n;
130 }
Antoine Pitrou6f7b0da2012-10-20 23:08:34 +0200131 }
Bruce Merryd07d9f42020-01-29 09:09:24 +0200132 else {
133 for (i = 0; i < nbufs; i++) {
134 Py_ssize_t n;
135 char *q;
136 if (i) {
137 memcpy(p, sepstr, seplen);
138 p += seplen;
139 }
140 n = buffers[i].len;
141 q = buffers[i].buf;
142 memcpy(p, q, n);
143 p += n;
Antoine Pitroucfc22b42012-10-16 21:07:23 +0200144 }
Bruce Merryd07d9f42020-01-29 09:09:24 +0200145 }
146 if (drop_gil) {
147 PyEval_RestoreThread(save);
Antoine Pitroucfc22b42012-10-16 21:07:23 +0200148 }
149 goto done;
150
151error:
152 res = NULL;
153done:
154 Py_DECREF(seq);
155 for (i = 0; i < nbufs; i++)
156 PyBuffer_Release(&buffers[i]);
157 if (buffers != static_buffers)
158 PyMem_FREE(buffers);
159 return res;
160}
161
162#undef NB_STATIC_BUFFERS
Bruce Merryd07d9f42020-01-29 09:09:24 +0200163#undef GIL_THRESHOLD