blob: a314572a72ba49d0c8255eeada553720a605faab [file] [log] [blame]
Serhiy Storchakabcde10a2016-05-16 09:42:29 +03001#if STRINGLIB_IS_UNICODE
2# error "transmogrify.h only compatible with byte-wise strings"
3#endif
Gregory P. Smith60d241f2007-10-16 06:31:30 +00004
Gregory P. Smith60d241f2007-10-16 06:31:30 +00005/* the more complicated methods. parts of these should be pulled out into the
6 shared code in bytes_methods.c to cut down on duplicate code bloat. */
7
Benjamin Peterson621b4302016-09-09 13:54:34 -07008static inline PyObject *
Serhiy Storchakafb81d3c2016-05-05 09:26:07 +03009return_self(PyObject *self)
10{
11#if !STRINGLIB_MUTABLE
12 if (STRINGLIB_CHECK_EXACT(self)) {
13 Py_INCREF(self);
14 return self;
15 }
16#endif
17 return STRINGLIB_NEW(STRINGLIB_STR(self), STRINGLIB_LEN(self));
18}
19
Gregory P. Smith60d241f2007-10-16 06:31:30 +000020static PyObject*
Ezio Melotti745d54d2013-11-16 19:10:57 +020021stringlib_expandtabs(PyObject *self, PyObject *args, PyObject *kwds)
Gregory P. Smith60d241f2007-10-16 06:31:30 +000022{
23 const char *e, *p;
24 char *q;
Benjamin Peterson23cf4032014-03-30 19:47:57 -040025 Py_ssize_t i, j;
Gregory P. Smith60d241f2007-10-16 06:31:30 +000026 PyObject *u;
Ezio Melotti745d54d2013-11-16 19:10:57 +020027 static char *kwlist[] = {"tabsize", 0};
Gregory P. Smith60d241f2007-10-16 06:31:30 +000028 int tabsize = 8;
Ezio Melotti6b027722013-04-21 04:07:51 +030029
Ezio Melotti745d54d2013-11-16 19:10:57 +020030 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|i:expandtabs",
31 kwlist, &tabsize))
Antoine Pitrou8d4e5052009-01-13 22:59:11 +000032 return NULL;
Ezio Melotti6b027722013-04-21 04:07:51 +030033
Gregory P. Smith60d241f2007-10-16 06:31:30 +000034 /* First pass: determine size of output string */
Antoine Pitrou8d4e5052009-01-13 22:59:11 +000035 i = j = 0;
Gregory P. Smith60d241f2007-10-16 06:31:30 +000036 e = STRINGLIB_STR(self) + STRINGLIB_LEN(self);
Benjamin Peterson23cf4032014-03-30 19:47:57 -040037 for (p = STRINGLIB_STR(self); p < e; p++) {
Gregory P. Smith60d241f2007-10-16 06:31:30 +000038 if (*p == '\t') {
Antoine Pitrou8d4e5052009-01-13 22:59:11 +000039 if (tabsize > 0) {
Benjamin Peterson23cf4032014-03-30 19:47:57 -040040 Py_ssize_t incr = tabsize - (j % tabsize);
41 if (j > PY_SSIZE_T_MAX - incr)
42 goto overflow;
43 j += incr;
Gregory P. Smith60d241f2007-10-16 06:31:30 +000044 }
45 }
Antoine Pitrou8d4e5052009-01-13 22:59:11 +000046 else {
Benjamin Peterson23cf4032014-03-30 19:47:57 -040047 if (j > PY_SSIZE_T_MAX - 1)
48 goto overflow;
Antoine Pitrou8d4e5052009-01-13 22:59:11 +000049 j++;
50 if (*p == '\n' || *p == '\r') {
Benjamin Peterson23cf4032014-03-30 19:47:57 -040051 if (i > PY_SSIZE_T_MAX - j)
52 goto overflow;
Antoine Pitrou8d4e5052009-01-13 22:59:11 +000053 i += j;
54 j = 0;
Antoine Pitrou8d4e5052009-01-13 22:59:11 +000055 }
56 }
Gregory P. Smith60d241f2007-10-16 06:31:30 +000057 }
Ezio Melotti6b027722013-04-21 04:07:51 +030058
Benjamin Peterson23cf4032014-03-30 19:47:57 -040059 if (i > PY_SSIZE_T_MAX - j)
60 goto overflow;
Benjamin Peterson0ad60982014-03-30 19:52:22 -040061
Gregory P. Smith60d241f2007-10-16 06:31:30 +000062 /* Second pass: create output string and fill it */
63 u = STRINGLIB_NEW(NULL, i + j);
64 if (!u)
65 return NULL;
Ezio Melotti6b027722013-04-21 04:07:51 +030066
Gregory P. Smith60d241f2007-10-16 06:31:30 +000067 j = 0;
68 q = STRINGLIB_STR(u);
Serhiy Storchaka009b8112015-03-18 21:53:15 +020069
Benjamin Peterson23cf4032014-03-30 19:47:57 -040070 for (p = STRINGLIB_STR(self); p < e; p++) {
Gregory P. Smith60d241f2007-10-16 06:31:30 +000071 if (*p == '\t') {
Antoine Pitrou8d4e5052009-01-13 22:59:11 +000072 if (tabsize > 0) {
73 i = tabsize - (j % tabsize);
74 j += i;
75 while (i--)
76 *q++ = ' ';
77 }
78 }
79 else {
Gregory P. Smith60d241f2007-10-16 06:31:30 +000080 j++;
Antoine Pitrou8d4e5052009-01-13 22:59:11 +000081 *q++ = *p;
Gregory P. Smith60d241f2007-10-16 06:31:30 +000082 if (*p == '\n' || *p == '\r')
83 j = 0;
84 }
Benjamin Peterson23cf4032014-03-30 19:47:57 -040085 }
Ezio Melotti6b027722013-04-21 04:07:51 +030086
Gregory P. Smith60d241f2007-10-16 06:31:30 +000087 return u;
Benjamin Peterson23cf4032014-03-30 19:47:57 -040088 overflow:
89 PyErr_SetString(PyExc_OverflowError, "result too long");
90 return NULL;
Gregory P. Smith60d241f2007-10-16 06:31:30 +000091}
92
Benjamin Peterson621b4302016-09-09 13:54:34 -070093static inline PyObject *
Gregory P. Smith60d241f2007-10-16 06:31:30 +000094pad(PyObject *self, Py_ssize_t left, Py_ssize_t right, char fill)
95{
96 PyObject *u;
97
98 if (left < 0)
99 left = 0;
100 if (right < 0)
101 right = 0;
102
Serhiy Storchakafb81d3c2016-05-05 09:26:07 +0300103 if (left == 0 && right == 0) {
104 return return_self(self);
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000105 }
106
Serhiy Storchakafb81d3c2016-05-05 09:26:07 +0300107 u = STRINGLIB_NEW(NULL, left + STRINGLIB_LEN(self) + right);
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000108 if (u) {
109 if (left)
110 memset(STRINGLIB_STR(u), fill, left);
Christian Heimesf051e432016-09-13 20:22:02 +0200111 memcpy(STRINGLIB_STR(u) + left,
Serhiy Storchakafb81d3c2016-05-05 09:26:07 +0300112 STRINGLIB_STR(self),
113 STRINGLIB_LEN(self));
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000114 if (right)
115 memset(STRINGLIB_STR(u) + left + STRINGLIB_LEN(self),
Serhiy Storchakafb81d3c2016-05-05 09:26:07 +0300116 fill, right);
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000117 }
118
119 return u;
120}
121
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000122static PyObject *
123stringlib_ljust(PyObject *self, PyObject *args)
124{
125 Py_ssize_t width;
126 char fillchar = ' ';
127
128 if (!PyArg_ParseTuple(args, "n|c:ljust", &width, &fillchar))
129 return NULL;
130
Serhiy Storchakafb81d3c2016-05-05 09:26:07 +0300131 if (STRINGLIB_LEN(self) >= width) {
132 return return_self(self);
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000133 }
134
135 return pad(self, 0, width - STRINGLIB_LEN(self), fillchar);
136}
137
138
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000139static PyObject *
140stringlib_rjust(PyObject *self, PyObject *args)
141{
142 Py_ssize_t width;
143 char fillchar = ' ';
144
145 if (!PyArg_ParseTuple(args, "n|c:rjust", &width, &fillchar))
146 return NULL;
147
Serhiy Storchakafb81d3c2016-05-05 09:26:07 +0300148 if (STRINGLIB_LEN(self) >= width) {
149 return return_self(self);
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000150 }
151
152 return pad(self, width - STRINGLIB_LEN(self), 0, fillchar);
153}
154
155
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000156static PyObject *
157stringlib_center(PyObject *self, PyObject *args)
158{
159 Py_ssize_t marg, left;
160 Py_ssize_t width;
161 char fillchar = ' ';
162
163 if (!PyArg_ParseTuple(args, "n|c:center", &width, &fillchar))
164 return NULL;
165
Serhiy Storchakafb81d3c2016-05-05 09:26:07 +0300166 if (STRINGLIB_LEN(self) >= width) {
167 return return_self(self);
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000168 }
169
170 marg = width - STRINGLIB_LEN(self);
171 left = marg / 2 + (marg & width & 1);
172
173 return pad(self, left, marg - left, fillchar);
174}
175
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000176static PyObject *
177stringlib_zfill(PyObject *self, PyObject *args)
178{
179 Py_ssize_t fill;
180 PyObject *s;
181 char *p;
182 Py_ssize_t width;
183
184 if (!PyArg_ParseTuple(args, "n:zfill", &width))
185 return NULL;
186
187 if (STRINGLIB_LEN(self) >= width) {
Serhiy Storchakafb81d3c2016-05-05 09:26:07 +0300188 return return_self(self);
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000189 }
190
191 fill = width - STRINGLIB_LEN(self);
192
193 s = pad(self, fill, 0, '0');
194
195 if (s == NULL)
196 return NULL;
197
198 p = STRINGLIB_STR(s);
199 if (p[fill] == '+' || p[fill] == '-') {
200 /* move sign to beginning of string */
201 p[0] = p[fill];
202 p[fill] = '0';
203 }
204
Serhiy Storchakafb81d3c2016-05-05 09:26:07 +0300205 return s;
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000206}
Serhiy Storchakafb81d3c2016-05-05 09:26:07 +0300207
208
209/* find and count characters and substrings */
210
211#define findchar(target, target_len, c) \
212 ((char *)memchr((const void *)(target), c, target_len))
213
214
Benjamin Peterson621b4302016-09-09 13:54:34 -0700215static Py_ssize_t
Serhiy Storchakafb81d3c2016-05-05 09:26:07 +0300216countchar(const char *target, Py_ssize_t target_len, char c,
217 Py_ssize_t maxcount)
218{
219 Py_ssize_t count = 0;
220 const char *start = target;
221 const char *end = target + target_len;
222
223 while ((start = findchar(start, end - start, c)) != NULL) {
224 count++;
225 if (count >= maxcount)
226 break;
227 start += 1;
228 }
229 return count;
230}
231
232
233/* Algorithms for different cases of string replacement */
234
235/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
Benjamin Peterson621b4302016-09-09 13:54:34 -0700236static PyObject *
Serhiy Storchakafb81d3c2016-05-05 09:26:07 +0300237stringlib_replace_interleave(PyObject *self,
238 const char *to_s, Py_ssize_t to_len,
239 Py_ssize_t maxcount)
240{
241 const char *self_s;
242 char *result_s;
243 Py_ssize_t self_len, result_len;
244 Py_ssize_t count, i;
245 PyObject *result;
246
247 self_len = STRINGLIB_LEN(self);
248
249 /* 1 at the end plus 1 after every character;
250 count = min(maxcount, self_len + 1) */
251 if (maxcount <= self_len) {
252 count = maxcount;
253 }
254 else {
255 /* Can't overflow: self_len + 1 <= maxcount <= PY_SSIZE_T_MAX. */
256 count = self_len + 1;
257 }
258
259 /* Check for overflow */
260 /* result_len = count * to_len + self_len; */
261 assert(count > 0);
262 if (to_len > (PY_SSIZE_T_MAX - self_len) / count) {
263 PyErr_SetString(PyExc_OverflowError,
264 "replace bytes are too long");
265 return NULL;
266 }
267 result_len = count * to_len + self_len;
268 result = STRINGLIB_NEW(NULL, result_len);
269 if (result == NULL) {
270 return NULL;
271 }
272
273 self_s = STRINGLIB_STR(self);
274 result_s = STRINGLIB_STR(result);
275
276 if (to_len > 1) {
277 /* Lay the first one down (guaranteed this will occur) */
Christian Heimesf051e432016-09-13 20:22:02 +0200278 memcpy(result_s, to_s, to_len);
Serhiy Storchakafb81d3c2016-05-05 09:26:07 +0300279 result_s += to_len;
280 count -= 1;
281
282 for (i = 0; i < count; i++) {
283 *result_s++ = *self_s++;
Christian Heimesf051e432016-09-13 20:22:02 +0200284 memcpy(result_s, to_s, to_len);
Serhiy Storchakafb81d3c2016-05-05 09:26:07 +0300285 result_s += to_len;
286 }
287 }
288 else {
289 result_s[0] = to_s[0];
290 result_s += to_len;
291 count -= 1;
292 for (i = 0; i < count; i++) {
293 *result_s++ = *self_s++;
294 result_s[0] = to_s[0];
295 result_s += to_len;
296 }
297 }
298
299 /* Copy the rest of the original string */
Christian Heimesf051e432016-09-13 20:22:02 +0200300 memcpy(result_s, self_s, self_len - i);
Serhiy Storchakafb81d3c2016-05-05 09:26:07 +0300301
302 return result;
303}
304
305/* Special case for deleting a single character */
306/* len(self)>=1, len(from)==1, to="", maxcount>=1 */
Benjamin Peterson621b4302016-09-09 13:54:34 -0700307static PyObject *
Serhiy Storchakafb81d3c2016-05-05 09:26:07 +0300308stringlib_replace_delete_single_character(PyObject *self,
309 char from_c, Py_ssize_t maxcount)
310{
311 const char *self_s, *start, *next, *end;
312 char *result_s;
313 Py_ssize_t self_len, result_len;
314 Py_ssize_t count;
315 PyObject *result;
316
317 self_len = STRINGLIB_LEN(self);
318 self_s = STRINGLIB_STR(self);
319
320 count = countchar(self_s, self_len, from_c, maxcount);
321 if (count == 0) {
322 return return_self(self);
323 }
324
325 result_len = self_len - count; /* from_len == 1 */
326 assert(result_len>=0);
327
328 result = STRINGLIB_NEW(NULL, result_len);
329 if (result == NULL) {
330 return NULL;
331 }
332 result_s = STRINGLIB_STR(result);
333
334 start = self_s;
335 end = self_s + self_len;
336 while (count-- > 0) {
337 next = findchar(start, end - start, from_c);
338 if (next == NULL)
339 break;
Christian Heimesf051e432016-09-13 20:22:02 +0200340 memcpy(result_s, start, next - start);
Serhiy Storchakafb81d3c2016-05-05 09:26:07 +0300341 result_s += (next - start);
342 start = next + 1;
343 }
Christian Heimesf051e432016-09-13 20:22:02 +0200344 memcpy(result_s, start, end - start);
Serhiy Storchakafb81d3c2016-05-05 09:26:07 +0300345
346 return result;
347}
348
349/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
350
Benjamin Peterson621b4302016-09-09 13:54:34 -0700351static PyObject *
Serhiy Storchakafb81d3c2016-05-05 09:26:07 +0300352stringlib_replace_delete_substring(PyObject *self,
353 const char *from_s, Py_ssize_t from_len,
354 Py_ssize_t maxcount)
355{
356 const char *self_s, *start, *next, *end;
357 char *result_s;
358 Py_ssize_t self_len, result_len;
359 Py_ssize_t count, offset;
360 PyObject *result;
361
362 self_len = STRINGLIB_LEN(self);
363 self_s = STRINGLIB_STR(self);
364
365 count = stringlib_count(self_s, self_len,
366 from_s, from_len,
367 maxcount);
368
369 if (count == 0) {
370 /* no matches */
371 return return_self(self);
372 }
373
374 result_len = self_len - (count * from_len);
375 assert (result_len>=0);
376
377 result = STRINGLIB_NEW(NULL, result_len);
378 if (result == NULL) {
379 return NULL;
380 }
381 result_s = STRINGLIB_STR(result);
382
383 start = self_s;
384 end = self_s + self_len;
385 while (count-- > 0) {
386 offset = stringlib_find(start, end - start,
387 from_s, from_len,
388 0);
389 if (offset == -1)
390 break;
391 next = start + offset;
392
Christian Heimesf051e432016-09-13 20:22:02 +0200393 memcpy(result_s, start, next - start);
Serhiy Storchakafb81d3c2016-05-05 09:26:07 +0300394
395 result_s += (next - start);
396 start = next + from_len;
397 }
Christian Heimesf051e432016-09-13 20:22:02 +0200398 memcpy(result_s, start, end - start);
Serhiy Storchakafb81d3c2016-05-05 09:26:07 +0300399 return result;
400}
401
402/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
Benjamin Peterson621b4302016-09-09 13:54:34 -0700403static PyObject *
Serhiy Storchakafb81d3c2016-05-05 09:26:07 +0300404stringlib_replace_single_character_in_place(PyObject *self,
405 char from_c, char to_c,
406 Py_ssize_t maxcount)
407{
408 const char *self_s, *end;
409 char *result_s, *start, *next;
410 Py_ssize_t self_len;
411 PyObject *result;
412
413 /* The result string will be the same size */
414 self_s = STRINGLIB_STR(self);
415 self_len = STRINGLIB_LEN(self);
416
417 next = findchar(self_s, self_len, from_c);
418
419 if (next == NULL) {
420 /* No matches; return the original bytes */
421 return return_self(self);
422 }
423
424 /* Need to make a new bytes */
425 result = STRINGLIB_NEW(NULL, self_len);
426 if (result == NULL) {
427 return NULL;
428 }
429 result_s = STRINGLIB_STR(result);
Christian Heimesf051e432016-09-13 20:22:02 +0200430 memcpy(result_s, self_s, self_len);
Serhiy Storchakafb81d3c2016-05-05 09:26:07 +0300431
432 /* change everything in-place, starting with this one */
433 start = result_s + (next - self_s);
434 *start = to_c;
435 start++;
436 end = result_s + self_len;
437
438 while (--maxcount > 0) {
439 next = findchar(start, end - start, from_c);
440 if (next == NULL)
441 break;
442 *next = to_c;
443 start = next + 1;
444 }
445
446 return result;
447}
448
449/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
Benjamin Peterson621b4302016-09-09 13:54:34 -0700450static PyObject *
Serhiy Storchakafb81d3c2016-05-05 09:26:07 +0300451stringlib_replace_substring_in_place(PyObject *self,
452 const char *from_s, Py_ssize_t from_len,
453 const char *to_s, Py_ssize_t to_len,
454 Py_ssize_t maxcount)
455{
456 const char *self_s, *end;
457 char *result_s, *start;
458 Py_ssize_t self_len, offset;
459 PyObject *result;
460
461 /* The result bytes will be the same size */
462
463 self_s = STRINGLIB_STR(self);
464 self_len = STRINGLIB_LEN(self);
465
466 offset = stringlib_find(self_s, self_len,
467 from_s, from_len,
468 0);
469 if (offset == -1) {
470 /* No matches; return the original bytes */
471 return return_self(self);
472 }
473
474 /* Need to make a new bytes */
475 result = STRINGLIB_NEW(NULL, self_len);
476 if (result == NULL) {
477 return NULL;
478 }
479 result_s = STRINGLIB_STR(result);
Christian Heimesf051e432016-09-13 20:22:02 +0200480 memcpy(result_s, self_s, self_len);
Serhiy Storchakafb81d3c2016-05-05 09:26:07 +0300481
482 /* change everything in-place, starting with this one */
483 start = result_s + offset;
Christian Heimesf051e432016-09-13 20:22:02 +0200484 memcpy(start, to_s, from_len);
Serhiy Storchakafb81d3c2016-05-05 09:26:07 +0300485 start += from_len;
486 end = result_s + self_len;
487
488 while ( --maxcount > 0) {
489 offset = stringlib_find(start, end - start,
490 from_s, from_len,
491 0);
492 if (offset == -1)
493 break;
Christian Heimesf051e432016-09-13 20:22:02 +0200494 memcpy(start + offset, to_s, from_len);
Serhiy Storchakafb81d3c2016-05-05 09:26:07 +0300495 start += offset + from_len;
496 }
497
498 return result;
499}
500
501/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
Benjamin Peterson621b4302016-09-09 13:54:34 -0700502static PyObject *
Serhiy Storchakafb81d3c2016-05-05 09:26:07 +0300503stringlib_replace_single_character(PyObject *self,
504 char from_c,
505 const char *to_s, Py_ssize_t to_len,
506 Py_ssize_t maxcount)
507{
508 const char *self_s, *start, *next, *end;
509 char *result_s;
510 Py_ssize_t self_len, result_len;
511 Py_ssize_t count;
512 PyObject *result;
513
514 self_s = STRINGLIB_STR(self);
515 self_len = STRINGLIB_LEN(self);
516
517 count = countchar(self_s, self_len, from_c, maxcount);
518 if (count == 0) {
519 /* no matches, return unchanged */
520 return return_self(self);
521 }
522
523 /* use the difference between current and new, hence the "-1" */
524 /* result_len = self_len + count * (to_len-1) */
525 assert(count > 0);
526 if (to_len - 1 > (PY_SSIZE_T_MAX - self_len) / count) {
527 PyErr_SetString(PyExc_OverflowError, "replace bytes is too long");
528 return NULL;
529 }
530 result_len = self_len + count * (to_len - 1);
531
532 result = STRINGLIB_NEW(NULL, result_len);
533 if (result == NULL) {
534 return NULL;
535 }
536 result_s = STRINGLIB_STR(result);
537
538 start = self_s;
539 end = self_s + self_len;
540 while (count-- > 0) {
541 next = findchar(start, end - start, from_c);
542 if (next == NULL)
543 break;
544
545 if (next == start) {
546 /* replace with the 'to' */
Christian Heimesf051e432016-09-13 20:22:02 +0200547 memcpy(result_s, to_s, to_len);
Serhiy Storchakafb81d3c2016-05-05 09:26:07 +0300548 result_s += to_len;
549 start += 1;
550 } else {
551 /* copy the unchanged old then the 'to' */
Christian Heimesf051e432016-09-13 20:22:02 +0200552 memcpy(result_s, start, next - start);
Serhiy Storchakafb81d3c2016-05-05 09:26:07 +0300553 result_s += (next - start);
Christian Heimesf051e432016-09-13 20:22:02 +0200554 memcpy(result_s, to_s, to_len);
Serhiy Storchakafb81d3c2016-05-05 09:26:07 +0300555 result_s += to_len;
556 start = next + 1;
557 }
558 }
559 /* Copy the remainder of the remaining bytes */
Christian Heimesf051e432016-09-13 20:22:02 +0200560 memcpy(result_s, start, end - start);
Serhiy Storchakafb81d3c2016-05-05 09:26:07 +0300561
562 return result;
563}
564
565/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
Benjamin Peterson621b4302016-09-09 13:54:34 -0700566static PyObject *
Serhiy Storchakafb81d3c2016-05-05 09:26:07 +0300567stringlib_replace_substring(PyObject *self,
568 const char *from_s, Py_ssize_t from_len,
569 const char *to_s, Py_ssize_t to_len,
570 Py_ssize_t maxcount)
571{
572 const char *self_s, *start, *next, *end;
573 char *result_s;
574 Py_ssize_t self_len, result_len;
575 Py_ssize_t count, offset;
576 PyObject *result;
577
578 self_s = STRINGLIB_STR(self);
579 self_len = STRINGLIB_LEN(self);
580
581 count = stringlib_count(self_s, self_len,
582 from_s, from_len,
583 maxcount);
584
585 if (count == 0) {
586 /* no matches, return unchanged */
587 return return_self(self);
588 }
589
590 /* Check for overflow */
591 /* result_len = self_len + count * (to_len-from_len) */
592 assert(count > 0);
593 if (to_len - from_len > (PY_SSIZE_T_MAX - self_len) / count) {
594 PyErr_SetString(PyExc_OverflowError, "replace bytes is too long");
595 return NULL;
596 }
597 result_len = self_len + count * (to_len - from_len);
598
599 result = STRINGLIB_NEW(NULL, result_len);
600 if (result == NULL) {
601 return NULL;
602 }
603 result_s = STRINGLIB_STR(result);
604
605 start = self_s;
606 end = self_s + self_len;
607 while (count-- > 0) {
608 offset = stringlib_find(start, end - start,
609 from_s, from_len,
610 0);
611 if (offset == -1)
612 break;
613 next = start + offset;
614 if (next == start) {
615 /* replace with the 'to' */
Christian Heimesf051e432016-09-13 20:22:02 +0200616 memcpy(result_s, to_s, to_len);
Serhiy Storchakafb81d3c2016-05-05 09:26:07 +0300617 result_s += to_len;
618 start += from_len;
619 } else {
620 /* copy the unchanged old then the 'to' */
Christian Heimesf051e432016-09-13 20:22:02 +0200621 memcpy(result_s, start, next - start);
Serhiy Storchakafb81d3c2016-05-05 09:26:07 +0300622 result_s += (next - start);
Christian Heimesf051e432016-09-13 20:22:02 +0200623 memcpy(result_s, to_s, to_len);
Serhiy Storchakafb81d3c2016-05-05 09:26:07 +0300624 result_s += to_len;
625 start = next + from_len;
626 }
627 }
628 /* Copy the remainder of the remaining bytes */
Christian Heimesf051e432016-09-13 20:22:02 +0200629 memcpy(result_s, start, end - start);
Serhiy Storchakafb81d3c2016-05-05 09:26:07 +0300630
631 return result;
632}
633
634
Benjamin Peterson621b4302016-09-09 13:54:34 -0700635static PyObject *
Serhiy Storchakafb81d3c2016-05-05 09:26:07 +0300636stringlib_replace(PyObject *self,
637 const char *from_s, Py_ssize_t from_len,
638 const char *to_s, Py_ssize_t to_len,
639 Py_ssize_t maxcount)
640{
641 if (maxcount < 0) {
642 maxcount = PY_SSIZE_T_MAX;
643 } else if (maxcount == 0 || STRINGLIB_LEN(self) == 0) {
644 /* nothing to do; return the original bytes */
645 return return_self(self);
646 }
647
648 /* Handle zero-length special cases */
649 if (from_len == 0) {
650 if (to_len == 0) {
651 /* nothing to do; return the original bytes */
652 return return_self(self);
653 }
654 /* insert the 'to' bytes everywhere. */
655 /* >>> b"Python".replace(b"", b".") */
656 /* b'.P.y.t.h.o.n.' */
657 return stringlib_replace_interleave(self, to_s, to_len, maxcount);
658 }
659
660 /* Except for b"".replace(b"", b"A") == b"A" there is no way beyond this */
661 /* point for an empty self bytes to generate a non-empty bytes */
662 /* Special case so the remaining code always gets a non-empty bytes */
663 if (STRINGLIB_LEN(self) == 0) {
664 return return_self(self);
665 }
666
667 if (to_len == 0) {
668 /* delete all occurrences of 'from' bytes */
669 if (from_len == 1) {
670 return stringlib_replace_delete_single_character(
671 self, from_s[0], maxcount);
672 } else {
673 return stringlib_replace_delete_substring(
674 self, from_s, from_len, maxcount);
675 }
676 }
677
678 /* Handle special case where both bytes have the same length */
679
680 if (from_len == to_len) {
681 if (from_len == 1) {
682 return stringlib_replace_single_character_in_place(
683 self, from_s[0], to_s[0], maxcount);
684 } else {
685 return stringlib_replace_substring_in_place(
686 self, from_s, from_len, to_s, to_len, maxcount);
687 }
688 }
689
690 /* Otherwise use the more generic algorithms */
691 if (from_len == 1) {
692 return stringlib_replace_single_character(
693 self, from_s[0], to_s, to_len, maxcount);
694 } else {
695 /* len('from')>=2, len('to')>=1 */
696 return stringlib_replace_substring(
697 self, from_s, from_len, to_s, to_len, maxcount);
698 }
699}
700
701#undef findchar