blob: 9506019d5af2b8766b3f0cb052f7526651769319 [file] [log] [blame]
Serhiy Storchakabcde10a2016-05-16 09:42:29 +03001#if STRINGLIB_IS_UNICODE
2# error "transmogrify.h only compatible with byte-wise strings"
3#endif
Gregory P. Smith60d241f2007-10-16 06:31:30 +00004
Gregory P. Smith60d241f2007-10-16 06:31:30 +00005/* the more complicated methods. parts of these should be pulled out into the
6 shared code in bytes_methods.c to cut down on duplicate code bloat. */
7
Tal Einatc929df32018-07-06 13:17:38 +03008/*[clinic input]
9class B "PyObject *" "&PyType_Type"
10[clinic start generated code]*/
11/*[clinic end generated code: output=da39a3ee5e6b4b0d input=2935558188d97c76]*/
12
13#include "clinic/transmogrify.h.h"
14
Benjamin Peterson621b4302016-09-09 13:54:34 -070015static inline PyObject *
Serhiy Storchakafb81d3c2016-05-05 09:26:07 +030016return_self(PyObject *self)
17{
18#if !STRINGLIB_MUTABLE
19 if (STRINGLIB_CHECK_EXACT(self)) {
20 Py_INCREF(self);
21 return self;
22 }
23#endif
24 return STRINGLIB_NEW(STRINGLIB_STR(self), STRINGLIB_LEN(self));
25}
26
Tal Einatc929df32018-07-06 13:17:38 +030027/*[clinic input]
28B.expandtabs as stringlib_expandtabs
29
30 tabsize: int = 8
31
32Return a copy where all tab characters are expanded using spaces.
33
34If tabsize is not given, a tab size of 8 characters is assumed.
35[clinic start generated code]*/
36
37static PyObject *
38stringlib_expandtabs_impl(PyObject *self, int tabsize)
39/*[clinic end generated code: output=069cb7fae72e4c2b input=3c6d3b12aa3ccbea]*/
Gregory P. Smith60d241f2007-10-16 06:31:30 +000040{
41 const char *e, *p;
42 char *q;
Benjamin Peterson23cf4032014-03-30 19:47:57 -040043 Py_ssize_t i, j;
Gregory P. Smith60d241f2007-10-16 06:31:30 +000044 PyObject *u;
Ezio Melotti6b027722013-04-21 04:07:51 +030045
Gregory P. Smith60d241f2007-10-16 06:31:30 +000046 /* First pass: determine size of output string */
Antoine Pitrou8d4e5052009-01-13 22:59:11 +000047 i = j = 0;
Gregory P. Smith60d241f2007-10-16 06:31:30 +000048 e = STRINGLIB_STR(self) + STRINGLIB_LEN(self);
Benjamin Peterson23cf4032014-03-30 19:47:57 -040049 for (p = STRINGLIB_STR(self); p < e; p++) {
Gregory P. Smith60d241f2007-10-16 06:31:30 +000050 if (*p == '\t') {
Antoine Pitrou8d4e5052009-01-13 22:59:11 +000051 if (tabsize > 0) {
Benjamin Peterson23cf4032014-03-30 19:47:57 -040052 Py_ssize_t incr = tabsize - (j % tabsize);
53 if (j > PY_SSIZE_T_MAX - incr)
54 goto overflow;
55 j += incr;
Gregory P. Smith60d241f2007-10-16 06:31:30 +000056 }
57 }
Antoine Pitrou8d4e5052009-01-13 22:59:11 +000058 else {
Benjamin Peterson23cf4032014-03-30 19:47:57 -040059 if (j > PY_SSIZE_T_MAX - 1)
60 goto overflow;
Antoine Pitrou8d4e5052009-01-13 22:59:11 +000061 j++;
62 if (*p == '\n' || *p == '\r') {
Benjamin Peterson23cf4032014-03-30 19:47:57 -040063 if (i > PY_SSIZE_T_MAX - j)
64 goto overflow;
Antoine Pitrou8d4e5052009-01-13 22:59:11 +000065 i += j;
66 j = 0;
Antoine Pitrou8d4e5052009-01-13 22:59:11 +000067 }
68 }
Gregory P. Smith60d241f2007-10-16 06:31:30 +000069 }
Ezio Melotti6b027722013-04-21 04:07:51 +030070
Benjamin Peterson23cf4032014-03-30 19:47:57 -040071 if (i > PY_SSIZE_T_MAX - j)
72 goto overflow;
Benjamin Peterson0ad60982014-03-30 19:52:22 -040073
Gregory P. Smith60d241f2007-10-16 06:31:30 +000074 /* Second pass: create output string and fill it */
75 u = STRINGLIB_NEW(NULL, i + j);
76 if (!u)
77 return NULL;
Ezio Melotti6b027722013-04-21 04:07:51 +030078
Gregory P. Smith60d241f2007-10-16 06:31:30 +000079 j = 0;
80 q = STRINGLIB_STR(u);
Serhiy Storchaka009b8112015-03-18 21:53:15 +020081
Benjamin Peterson23cf4032014-03-30 19:47:57 -040082 for (p = STRINGLIB_STR(self); p < e; p++) {
Gregory P. Smith60d241f2007-10-16 06:31:30 +000083 if (*p == '\t') {
Antoine Pitrou8d4e5052009-01-13 22:59:11 +000084 if (tabsize > 0) {
85 i = tabsize - (j % tabsize);
86 j += i;
87 while (i--)
88 *q++ = ' ';
89 }
90 }
91 else {
Gregory P. Smith60d241f2007-10-16 06:31:30 +000092 j++;
Antoine Pitrou8d4e5052009-01-13 22:59:11 +000093 *q++ = *p;
Gregory P. Smith60d241f2007-10-16 06:31:30 +000094 if (*p == '\n' || *p == '\r')
95 j = 0;
96 }
Benjamin Peterson23cf4032014-03-30 19:47:57 -040097 }
Ezio Melotti6b027722013-04-21 04:07:51 +030098
Gregory P. Smith60d241f2007-10-16 06:31:30 +000099 return u;
Benjamin Peterson23cf4032014-03-30 19:47:57 -0400100 overflow:
101 PyErr_SetString(PyExc_OverflowError, "result too long");
102 return NULL;
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000103}
104
Benjamin Peterson621b4302016-09-09 13:54:34 -0700105static inline PyObject *
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000106pad(PyObject *self, Py_ssize_t left, Py_ssize_t right, char fill)
107{
108 PyObject *u;
109
110 if (left < 0)
111 left = 0;
112 if (right < 0)
113 right = 0;
114
Serhiy Storchakafb81d3c2016-05-05 09:26:07 +0300115 if (left == 0 && right == 0) {
116 return return_self(self);
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000117 }
118
Serhiy Storchakafb81d3c2016-05-05 09:26:07 +0300119 u = STRINGLIB_NEW(NULL, left + STRINGLIB_LEN(self) + right);
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000120 if (u) {
121 if (left)
122 memset(STRINGLIB_STR(u), fill, left);
Christian Heimesf051e432016-09-13 20:22:02 +0200123 memcpy(STRINGLIB_STR(u) + left,
Serhiy Storchakafb81d3c2016-05-05 09:26:07 +0300124 STRINGLIB_STR(self),
125 STRINGLIB_LEN(self));
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000126 if (right)
127 memset(STRINGLIB_STR(u) + left + STRINGLIB_LEN(self),
Serhiy Storchakafb81d3c2016-05-05 09:26:07 +0300128 fill, right);
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000129 }
130
131 return u;
132}
133
Tal Einatc929df32018-07-06 13:17:38 +0300134/*[clinic input]
135B.ljust as stringlib_ljust
136
137 width: Py_ssize_t
138 fillchar: char = b' '
139 /
140
141Return a left-justified string of length width.
142
143Padding is done using the specified fill character.
144[clinic start generated code]*/
145
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000146static PyObject *
Tal Einatc929df32018-07-06 13:17:38 +0300147stringlib_ljust_impl(PyObject *self, Py_ssize_t width, char fillchar)
148/*[clinic end generated code: output=c79ca173c5ff8337 input=eff2d014bc7d80df]*/
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000149{
Serhiy Storchakafb81d3c2016-05-05 09:26:07 +0300150 if (STRINGLIB_LEN(self) >= width) {
151 return return_self(self);
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000152 }
153
154 return pad(self, 0, width - STRINGLIB_LEN(self), fillchar);
155}
156
157
Tal Einatc929df32018-07-06 13:17:38 +0300158/*[clinic input]
159B.rjust as stringlib_rjust
160
161 width: Py_ssize_t
162 fillchar: char = b' '
163 /
164
165Return a right-justified string of length width.
166
167Padding is done using the specified fill character.
168[clinic start generated code]*/
169
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000170static PyObject *
Tal Einatc929df32018-07-06 13:17:38 +0300171stringlib_rjust_impl(PyObject *self, Py_ssize_t width, char fillchar)
172/*[clinic end generated code: output=7df5d728a5439570 input=218b0bd31308955d]*/
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000173{
Serhiy Storchakafb81d3c2016-05-05 09:26:07 +0300174 if (STRINGLIB_LEN(self) >= width) {
175 return return_self(self);
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000176 }
177
178 return pad(self, width - STRINGLIB_LEN(self), 0, fillchar);
179}
180
181
Tal Einatc929df32018-07-06 13:17:38 +0300182/*[clinic input]
183B.center as stringlib_center
184
185 width: Py_ssize_t
186 fillchar: char = b' '
187 /
188
189Return a centered string of length width.
190
191Padding is done using the specified fill character.
192[clinic start generated code]*/
193
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000194static PyObject *
Tal Einatc929df32018-07-06 13:17:38 +0300195stringlib_center_impl(PyObject *self, Py_ssize_t width, char fillchar)
196/*[clinic end generated code: output=d8da2e055288b4c2 input=3776fd278765d89b]*/
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000197{
198 Py_ssize_t marg, left;
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000199
Serhiy Storchakafb81d3c2016-05-05 09:26:07 +0300200 if (STRINGLIB_LEN(self) >= width) {
201 return return_self(self);
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000202 }
203
204 marg = width - STRINGLIB_LEN(self);
205 left = marg / 2 + (marg & width & 1);
206
207 return pad(self, left, marg - left, fillchar);
208}
209
Tal Einatc929df32018-07-06 13:17:38 +0300210/*[clinic input]
211B.zfill as stringlib_zfill
212
213 width: Py_ssize_t
214 /
215
216Pad a numeric string with zeros on the left, to fill a field of the given width.
217
218The original string is never truncated.
219[clinic start generated code]*/
220
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000221static PyObject *
Tal Einatc929df32018-07-06 13:17:38 +0300222stringlib_zfill_impl(PyObject *self, Py_ssize_t width)
223/*[clinic end generated code: output=0b3c684a7f1b2319 input=2da6d7b8e9bcb19a]*/
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000224{
225 Py_ssize_t fill;
226 PyObject *s;
227 char *p;
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000228
229 if (STRINGLIB_LEN(self) >= width) {
Serhiy Storchakafb81d3c2016-05-05 09:26:07 +0300230 return return_self(self);
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000231 }
232
233 fill = width - STRINGLIB_LEN(self);
234
235 s = pad(self, fill, 0, '0');
236
237 if (s == NULL)
238 return NULL;
239
240 p = STRINGLIB_STR(s);
241 if (p[fill] == '+' || p[fill] == '-') {
242 /* move sign to beginning of string */
243 p[0] = p[fill];
244 p[fill] = '0';
245 }
246
Serhiy Storchakafb81d3c2016-05-05 09:26:07 +0300247 return s;
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000248}
Serhiy Storchakafb81d3c2016-05-05 09:26:07 +0300249
250
251/* find and count characters and substrings */
252
253#define findchar(target, target_len, c) \
254 ((char *)memchr((const void *)(target), c, target_len))
255
256
Benjamin Peterson621b4302016-09-09 13:54:34 -0700257static Py_ssize_t
Serhiy Storchakafb81d3c2016-05-05 09:26:07 +0300258countchar(const char *target, Py_ssize_t target_len, char c,
259 Py_ssize_t maxcount)
260{
261 Py_ssize_t count = 0;
262 const char *start = target;
263 const char *end = target + target_len;
264
265 while ((start = findchar(start, end - start, c)) != NULL) {
266 count++;
267 if (count >= maxcount)
268 break;
269 start += 1;
270 }
271 return count;
272}
273
274
275/* Algorithms for different cases of string replacement */
276
277/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
Benjamin Peterson621b4302016-09-09 13:54:34 -0700278static PyObject *
Serhiy Storchakafb81d3c2016-05-05 09:26:07 +0300279stringlib_replace_interleave(PyObject *self,
280 const char *to_s, Py_ssize_t to_len,
281 Py_ssize_t maxcount)
282{
283 const char *self_s;
284 char *result_s;
285 Py_ssize_t self_len, result_len;
286 Py_ssize_t count, i;
287 PyObject *result;
288
289 self_len = STRINGLIB_LEN(self);
290
291 /* 1 at the end plus 1 after every character;
292 count = min(maxcount, self_len + 1) */
293 if (maxcount <= self_len) {
294 count = maxcount;
295 }
296 else {
297 /* Can't overflow: self_len + 1 <= maxcount <= PY_SSIZE_T_MAX. */
298 count = self_len + 1;
299 }
300
301 /* Check for overflow */
302 /* result_len = count * to_len + self_len; */
303 assert(count > 0);
304 if (to_len > (PY_SSIZE_T_MAX - self_len) / count) {
305 PyErr_SetString(PyExc_OverflowError,
Xiang Zhang7a4da322017-01-10 10:56:38 +0800306 "replace bytes is too long");
Serhiy Storchakafb81d3c2016-05-05 09:26:07 +0300307 return NULL;
308 }
309 result_len = count * to_len + self_len;
310 result = STRINGLIB_NEW(NULL, result_len);
311 if (result == NULL) {
312 return NULL;
313 }
314
315 self_s = STRINGLIB_STR(self);
316 result_s = STRINGLIB_STR(result);
317
318 if (to_len > 1) {
319 /* Lay the first one down (guaranteed this will occur) */
Christian Heimesf051e432016-09-13 20:22:02 +0200320 memcpy(result_s, to_s, to_len);
Serhiy Storchakafb81d3c2016-05-05 09:26:07 +0300321 result_s += to_len;
322 count -= 1;
323
324 for (i = 0; i < count; i++) {
325 *result_s++ = *self_s++;
Christian Heimesf051e432016-09-13 20:22:02 +0200326 memcpy(result_s, to_s, to_len);
Serhiy Storchakafb81d3c2016-05-05 09:26:07 +0300327 result_s += to_len;
328 }
329 }
330 else {
331 result_s[0] = to_s[0];
332 result_s += to_len;
333 count -= 1;
334 for (i = 0; i < count; i++) {
335 *result_s++ = *self_s++;
336 result_s[0] = to_s[0];
337 result_s += to_len;
338 }
339 }
340
341 /* Copy the rest of the original string */
Christian Heimesf051e432016-09-13 20:22:02 +0200342 memcpy(result_s, self_s, self_len - i);
Serhiy Storchakafb81d3c2016-05-05 09:26:07 +0300343
344 return result;
345}
346
347/* Special case for deleting a single character */
348/* len(self)>=1, len(from)==1, to="", maxcount>=1 */
Benjamin Peterson621b4302016-09-09 13:54:34 -0700349static PyObject *
Serhiy Storchakafb81d3c2016-05-05 09:26:07 +0300350stringlib_replace_delete_single_character(PyObject *self,
351 char from_c, Py_ssize_t maxcount)
352{
353 const char *self_s, *start, *next, *end;
354 char *result_s;
355 Py_ssize_t self_len, result_len;
356 Py_ssize_t count;
357 PyObject *result;
358
359 self_len = STRINGLIB_LEN(self);
360 self_s = STRINGLIB_STR(self);
361
362 count = countchar(self_s, self_len, from_c, maxcount);
363 if (count == 0) {
364 return return_self(self);
365 }
366
367 result_len = self_len - count; /* from_len == 1 */
368 assert(result_len>=0);
369
370 result = STRINGLIB_NEW(NULL, result_len);
371 if (result == NULL) {
372 return NULL;
373 }
374 result_s = STRINGLIB_STR(result);
375
376 start = self_s;
377 end = self_s + self_len;
378 while (count-- > 0) {
379 next = findchar(start, end - start, from_c);
380 if (next == NULL)
381 break;
Christian Heimesf051e432016-09-13 20:22:02 +0200382 memcpy(result_s, start, next - start);
Serhiy Storchakafb81d3c2016-05-05 09:26:07 +0300383 result_s += (next - start);
384 start = next + 1;
385 }
Christian Heimesf051e432016-09-13 20:22:02 +0200386 memcpy(result_s, start, end - start);
Serhiy Storchakafb81d3c2016-05-05 09:26:07 +0300387
388 return result;
389}
390
391/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
392
Benjamin Peterson621b4302016-09-09 13:54:34 -0700393static PyObject *
Serhiy Storchakafb81d3c2016-05-05 09:26:07 +0300394stringlib_replace_delete_substring(PyObject *self,
395 const char *from_s, Py_ssize_t from_len,
396 Py_ssize_t maxcount)
397{
398 const char *self_s, *start, *next, *end;
399 char *result_s;
400 Py_ssize_t self_len, result_len;
401 Py_ssize_t count, offset;
402 PyObject *result;
403
404 self_len = STRINGLIB_LEN(self);
405 self_s = STRINGLIB_STR(self);
406
407 count = stringlib_count(self_s, self_len,
408 from_s, from_len,
409 maxcount);
410
411 if (count == 0) {
412 /* no matches */
413 return return_self(self);
414 }
415
416 result_len = self_len - (count * from_len);
417 assert (result_len>=0);
418
419 result = STRINGLIB_NEW(NULL, result_len);
420 if (result == NULL) {
421 return NULL;
422 }
423 result_s = STRINGLIB_STR(result);
424
425 start = self_s;
426 end = self_s + self_len;
427 while (count-- > 0) {
428 offset = stringlib_find(start, end - start,
429 from_s, from_len,
430 0);
431 if (offset == -1)
432 break;
433 next = start + offset;
434
Christian Heimesf051e432016-09-13 20:22:02 +0200435 memcpy(result_s, start, next - start);
Serhiy Storchakafb81d3c2016-05-05 09:26:07 +0300436
437 result_s += (next - start);
438 start = next + from_len;
439 }
Christian Heimesf051e432016-09-13 20:22:02 +0200440 memcpy(result_s, start, end - start);
Serhiy Storchakafb81d3c2016-05-05 09:26:07 +0300441 return result;
442}
443
444/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
Benjamin Peterson621b4302016-09-09 13:54:34 -0700445static PyObject *
Serhiy Storchakafb81d3c2016-05-05 09:26:07 +0300446stringlib_replace_single_character_in_place(PyObject *self,
447 char from_c, char to_c,
448 Py_ssize_t maxcount)
449{
450 const char *self_s, *end;
451 char *result_s, *start, *next;
452 Py_ssize_t self_len;
453 PyObject *result;
454
455 /* The result string will be the same size */
456 self_s = STRINGLIB_STR(self);
457 self_len = STRINGLIB_LEN(self);
458
459 next = findchar(self_s, self_len, from_c);
460
461 if (next == NULL) {
462 /* No matches; return the original bytes */
463 return return_self(self);
464 }
465
466 /* Need to make a new bytes */
467 result = STRINGLIB_NEW(NULL, self_len);
468 if (result == NULL) {
469 return NULL;
470 }
471 result_s = STRINGLIB_STR(result);
Christian Heimesf051e432016-09-13 20:22:02 +0200472 memcpy(result_s, self_s, self_len);
Serhiy Storchakafb81d3c2016-05-05 09:26:07 +0300473
474 /* change everything in-place, starting with this one */
475 start = result_s + (next - self_s);
476 *start = to_c;
477 start++;
478 end = result_s + self_len;
479
480 while (--maxcount > 0) {
481 next = findchar(start, end - start, from_c);
482 if (next == NULL)
483 break;
484 *next = to_c;
485 start = next + 1;
486 }
487
488 return result;
489}
490
491/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
Benjamin Peterson621b4302016-09-09 13:54:34 -0700492static PyObject *
Serhiy Storchakafb81d3c2016-05-05 09:26:07 +0300493stringlib_replace_substring_in_place(PyObject *self,
494 const char *from_s, Py_ssize_t from_len,
495 const char *to_s, Py_ssize_t to_len,
496 Py_ssize_t maxcount)
497{
498 const char *self_s, *end;
499 char *result_s, *start;
500 Py_ssize_t self_len, offset;
501 PyObject *result;
502
503 /* The result bytes will be the same size */
504
505 self_s = STRINGLIB_STR(self);
506 self_len = STRINGLIB_LEN(self);
507
508 offset = stringlib_find(self_s, self_len,
509 from_s, from_len,
510 0);
511 if (offset == -1) {
512 /* No matches; return the original bytes */
513 return return_self(self);
514 }
515
516 /* Need to make a new bytes */
517 result = STRINGLIB_NEW(NULL, self_len);
518 if (result == NULL) {
519 return NULL;
520 }
521 result_s = STRINGLIB_STR(result);
Christian Heimesf051e432016-09-13 20:22:02 +0200522 memcpy(result_s, self_s, self_len);
Serhiy Storchakafb81d3c2016-05-05 09:26:07 +0300523
524 /* change everything in-place, starting with this one */
525 start = result_s + offset;
Christian Heimesf051e432016-09-13 20:22:02 +0200526 memcpy(start, to_s, from_len);
Serhiy Storchakafb81d3c2016-05-05 09:26:07 +0300527 start += from_len;
528 end = result_s + self_len;
529
530 while ( --maxcount > 0) {
531 offset = stringlib_find(start, end - start,
532 from_s, from_len,
533 0);
534 if (offset == -1)
535 break;
Christian Heimesf051e432016-09-13 20:22:02 +0200536 memcpy(start + offset, to_s, from_len);
Serhiy Storchakafb81d3c2016-05-05 09:26:07 +0300537 start += offset + from_len;
538 }
539
540 return result;
541}
542
543/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
Benjamin Peterson621b4302016-09-09 13:54:34 -0700544static PyObject *
Serhiy Storchakafb81d3c2016-05-05 09:26:07 +0300545stringlib_replace_single_character(PyObject *self,
546 char from_c,
547 const char *to_s, Py_ssize_t to_len,
548 Py_ssize_t maxcount)
549{
550 const char *self_s, *start, *next, *end;
551 char *result_s;
552 Py_ssize_t self_len, result_len;
553 Py_ssize_t count;
554 PyObject *result;
555
556 self_s = STRINGLIB_STR(self);
557 self_len = STRINGLIB_LEN(self);
558
559 count = countchar(self_s, self_len, from_c, maxcount);
560 if (count == 0) {
561 /* no matches, return unchanged */
562 return return_self(self);
563 }
564
565 /* use the difference between current and new, hence the "-1" */
566 /* result_len = self_len + count * (to_len-1) */
567 assert(count > 0);
568 if (to_len - 1 > (PY_SSIZE_T_MAX - self_len) / count) {
569 PyErr_SetString(PyExc_OverflowError, "replace bytes is too long");
570 return NULL;
571 }
572 result_len = self_len + count * (to_len - 1);
573
574 result = STRINGLIB_NEW(NULL, result_len);
575 if (result == NULL) {
576 return NULL;
577 }
578 result_s = STRINGLIB_STR(result);
579
580 start = self_s;
581 end = self_s + self_len;
582 while (count-- > 0) {
583 next = findchar(start, end - start, from_c);
584 if (next == NULL)
585 break;
586
587 if (next == start) {
588 /* replace with the 'to' */
Christian Heimesf051e432016-09-13 20:22:02 +0200589 memcpy(result_s, to_s, to_len);
Serhiy Storchakafb81d3c2016-05-05 09:26:07 +0300590 result_s += to_len;
591 start += 1;
592 } else {
593 /* copy the unchanged old then the 'to' */
Christian Heimesf051e432016-09-13 20:22:02 +0200594 memcpy(result_s, start, next - start);
Serhiy Storchakafb81d3c2016-05-05 09:26:07 +0300595 result_s += (next - start);
Christian Heimesf051e432016-09-13 20:22:02 +0200596 memcpy(result_s, to_s, to_len);
Serhiy Storchakafb81d3c2016-05-05 09:26:07 +0300597 result_s += to_len;
598 start = next + 1;
599 }
600 }
601 /* Copy the remainder of the remaining bytes */
Christian Heimesf051e432016-09-13 20:22:02 +0200602 memcpy(result_s, start, end - start);
Serhiy Storchakafb81d3c2016-05-05 09:26:07 +0300603
604 return result;
605}
606
607/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
Benjamin Peterson621b4302016-09-09 13:54:34 -0700608static PyObject *
Serhiy Storchakafb81d3c2016-05-05 09:26:07 +0300609stringlib_replace_substring(PyObject *self,
610 const char *from_s, Py_ssize_t from_len,
611 const char *to_s, Py_ssize_t to_len,
612 Py_ssize_t maxcount)
613{
614 const char *self_s, *start, *next, *end;
615 char *result_s;
616 Py_ssize_t self_len, result_len;
617 Py_ssize_t count, offset;
618 PyObject *result;
619
620 self_s = STRINGLIB_STR(self);
621 self_len = STRINGLIB_LEN(self);
622
623 count = stringlib_count(self_s, self_len,
624 from_s, from_len,
625 maxcount);
626
627 if (count == 0) {
628 /* no matches, return unchanged */
629 return return_self(self);
630 }
631
632 /* Check for overflow */
633 /* result_len = self_len + count * (to_len-from_len) */
634 assert(count > 0);
635 if (to_len - from_len > (PY_SSIZE_T_MAX - self_len) / count) {
636 PyErr_SetString(PyExc_OverflowError, "replace bytes is too long");
637 return NULL;
638 }
639 result_len = self_len + count * (to_len - from_len);
640
641 result = STRINGLIB_NEW(NULL, result_len);
642 if (result == NULL) {
643 return NULL;
644 }
645 result_s = STRINGLIB_STR(result);
646
647 start = self_s;
648 end = self_s + self_len;
649 while (count-- > 0) {
650 offset = stringlib_find(start, end - start,
651 from_s, from_len,
652 0);
653 if (offset == -1)
654 break;
655 next = start + offset;
656 if (next == start) {
657 /* replace with the 'to' */
Christian Heimesf051e432016-09-13 20:22:02 +0200658 memcpy(result_s, to_s, to_len);
Serhiy Storchakafb81d3c2016-05-05 09:26:07 +0300659 result_s += to_len;
660 start += from_len;
661 } else {
662 /* copy the unchanged old then the 'to' */
Christian Heimesf051e432016-09-13 20:22:02 +0200663 memcpy(result_s, start, next - start);
Serhiy Storchakafb81d3c2016-05-05 09:26:07 +0300664 result_s += (next - start);
Christian Heimesf051e432016-09-13 20:22:02 +0200665 memcpy(result_s, to_s, to_len);
Serhiy Storchakafb81d3c2016-05-05 09:26:07 +0300666 result_s += to_len;
667 start = next + from_len;
668 }
669 }
670 /* Copy the remainder of the remaining bytes */
Christian Heimesf051e432016-09-13 20:22:02 +0200671 memcpy(result_s, start, end - start);
Serhiy Storchakafb81d3c2016-05-05 09:26:07 +0300672
673 return result;
674}
675
676
Benjamin Peterson621b4302016-09-09 13:54:34 -0700677static PyObject *
Serhiy Storchakafb81d3c2016-05-05 09:26:07 +0300678stringlib_replace(PyObject *self,
679 const char *from_s, Py_ssize_t from_len,
680 const char *to_s, Py_ssize_t to_len,
681 Py_ssize_t maxcount)
682{
683 if (maxcount < 0) {
684 maxcount = PY_SSIZE_T_MAX;
685 } else if (maxcount == 0 || STRINGLIB_LEN(self) == 0) {
686 /* nothing to do; return the original bytes */
687 return return_self(self);
688 }
689
690 /* Handle zero-length special cases */
691 if (from_len == 0) {
692 if (to_len == 0) {
693 /* nothing to do; return the original bytes */
694 return return_self(self);
695 }
696 /* insert the 'to' bytes everywhere. */
697 /* >>> b"Python".replace(b"", b".") */
698 /* b'.P.y.t.h.o.n.' */
699 return stringlib_replace_interleave(self, to_s, to_len, maxcount);
700 }
701
702 /* Except for b"".replace(b"", b"A") == b"A" there is no way beyond this */
703 /* point for an empty self bytes to generate a non-empty bytes */
704 /* Special case so the remaining code always gets a non-empty bytes */
705 if (STRINGLIB_LEN(self) == 0) {
706 return return_self(self);
707 }
708
709 if (to_len == 0) {
710 /* delete all occurrences of 'from' bytes */
711 if (from_len == 1) {
712 return stringlib_replace_delete_single_character(
713 self, from_s[0], maxcount);
714 } else {
715 return stringlib_replace_delete_substring(
716 self, from_s, from_len, maxcount);
717 }
718 }
719
720 /* Handle special case where both bytes have the same length */
721
722 if (from_len == to_len) {
723 if (from_len == 1) {
724 return stringlib_replace_single_character_in_place(
725 self, from_s[0], to_s[0], maxcount);
726 } else {
727 return stringlib_replace_substring_in_place(
728 self, from_s, from_len, to_s, to_len, maxcount);
729 }
730 }
731
732 /* Otherwise use the more generic algorithms */
733 if (from_len == 1) {
734 return stringlib_replace_single_character(
735 self, from_s[0], to_s, to_len, maxcount);
736 } else {
737 /* len('from')>=2, len('to')>=1 */
738 return stringlib_replace_substring(
739 self, from_s, from_len, to_s, to_len, maxcount);
740 }
741}
742
743#undef findchar