blob: 85d9ceea52dd49b5c750dca1a3be27bfc8db9e66 [file] [log] [blame]
Serhiy Storchakaab8bcb32016-07-03 13:26:52 +03001#define PY_SSIZE_T_CLEAN
Gregory P. Smith60d241f2007-10-16 06:31:30 +00002#include "Python.h"
3#include "bytes_methods.h"
4
Gregory P. Smith60d241f2007-10-16 06:31:30 +00005PyDoc_STRVAR_shared(_Py_isspace__doc__,
6"B.isspace() -> bool\n\
7\n\
8Return True if all characters in B are whitespace\n\
9and there is at least one character in B, False otherwise.");
10
11PyObject*
12_Py_bytes_isspace(const char *cptr, Py_ssize_t len)
13{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +020014 const unsigned char *p
Gregory P. Smith60d241f2007-10-16 06:31:30 +000015 = (unsigned char *) cptr;
Antoine Pitrou9ed5f272013-08-13 20:18:52 +020016 const unsigned char *e;
Gregory P. Smith60d241f2007-10-16 06:31:30 +000017
18 /* Shortcut for single character strings */
Eric Smith6dc46f52009-04-27 20:39:49 +000019 if (len == 1 && Py_ISSPACE(*p))
Gregory P. Smith60d241f2007-10-16 06:31:30 +000020 Py_RETURN_TRUE;
21
22 /* Special case for empty strings */
23 if (len == 0)
24 Py_RETURN_FALSE;
25
26 e = p + len;
27 for (; p < e; p++) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000028 if (!Py_ISSPACE(*p))
Gregory P. Smith60d241f2007-10-16 06:31:30 +000029 Py_RETURN_FALSE;
30 }
31 Py_RETURN_TRUE;
32}
33
34
35PyDoc_STRVAR_shared(_Py_isalpha__doc__,
36"B.isalpha() -> bool\n\
37\n\
38Return True if all characters in B are alphabetic\n\
39and there is at least one character in B, False otherwise.");
40
41PyObject*
42_Py_bytes_isalpha(const char *cptr, Py_ssize_t len)
43{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +020044 const unsigned char *p
Gregory P. Smith60d241f2007-10-16 06:31:30 +000045 = (unsigned char *) cptr;
Antoine Pitrou9ed5f272013-08-13 20:18:52 +020046 const unsigned char *e;
Gregory P. Smith60d241f2007-10-16 06:31:30 +000047
48 /* Shortcut for single character strings */
Eric Smith6dc46f52009-04-27 20:39:49 +000049 if (len == 1 && Py_ISALPHA(*p))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000050 Py_RETURN_TRUE;
Gregory P. Smith60d241f2007-10-16 06:31:30 +000051
52 /* Special case for empty strings */
53 if (len == 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000054 Py_RETURN_FALSE;
Gregory P. Smith60d241f2007-10-16 06:31:30 +000055
56 e = p + len;
57 for (; p < e; p++) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000058 if (!Py_ISALPHA(*p))
59 Py_RETURN_FALSE;
Gregory P. Smith60d241f2007-10-16 06:31:30 +000060 }
61 Py_RETURN_TRUE;
62}
63
64
65PyDoc_STRVAR_shared(_Py_isalnum__doc__,
66"B.isalnum() -> bool\n\
67\n\
68Return True if all characters in B are alphanumeric\n\
69and there is at least one character in B, False otherwise.");
70
71PyObject*
72_Py_bytes_isalnum(const char *cptr, Py_ssize_t len)
73{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +020074 const unsigned char *p
Gregory P. Smith60d241f2007-10-16 06:31:30 +000075 = (unsigned char *) cptr;
Antoine Pitrou9ed5f272013-08-13 20:18:52 +020076 const unsigned char *e;
Gregory P. Smith60d241f2007-10-16 06:31:30 +000077
78 /* Shortcut for single character strings */
Eric Smith6dc46f52009-04-27 20:39:49 +000079 if (len == 1 && Py_ISALNUM(*p))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000080 Py_RETURN_TRUE;
Gregory P. Smith60d241f2007-10-16 06:31:30 +000081
82 /* Special case for empty strings */
83 if (len == 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000084 Py_RETURN_FALSE;
Gregory P. Smith60d241f2007-10-16 06:31:30 +000085
86 e = p + len;
87 for (; p < e; p++) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000088 if (!Py_ISALNUM(*p))
89 Py_RETURN_FALSE;
Gregory P. Smith60d241f2007-10-16 06:31:30 +000090 }
91 Py_RETURN_TRUE;
92}
93
94
95PyDoc_STRVAR_shared(_Py_isdigit__doc__,
96"B.isdigit() -> bool\n\
97\n\
98Return True if all characters in B are digits\n\
99and there is at least one character in B, False otherwise.");
100
101PyObject*
102_Py_bytes_isdigit(const char *cptr, Py_ssize_t len)
103{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +0200104 const unsigned char *p
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000105 = (unsigned char *) cptr;
Antoine Pitrou9ed5f272013-08-13 20:18:52 +0200106 const unsigned char *e;
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000107
108 /* Shortcut for single character strings */
Eric Smith6dc46f52009-04-27 20:39:49 +0000109 if (len == 1 && Py_ISDIGIT(*p))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000110 Py_RETURN_TRUE;
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000111
112 /* Special case for empty strings */
113 if (len == 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000114 Py_RETURN_FALSE;
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000115
116 e = p + len;
117 for (; p < e; p++) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000118 if (!Py_ISDIGIT(*p))
119 Py_RETURN_FALSE;
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000120 }
121 Py_RETURN_TRUE;
122}
123
124
125PyDoc_STRVAR_shared(_Py_islower__doc__,
126"B.islower() -> bool\n\
127\n\
128Return True if all cased characters in B are lowercase and there is\n\
129at least one cased character in B, False otherwise.");
130
131PyObject*
132_Py_bytes_islower(const char *cptr, Py_ssize_t len)
133{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +0200134 const unsigned char *p
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000135 = (unsigned char *) cptr;
Antoine Pitrou9ed5f272013-08-13 20:18:52 +0200136 const unsigned char *e;
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000137 int cased;
138
139 /* Shortcut for single character strings */
140 if (len == 1)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000141 return PyBool_FromLong(Py_ISLOWER(*p));
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000142
143 /* Special case for empty strings */
144 if (len == 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000145 Py_RETURN_FALSE;
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000146
147 e = p + len;
148 cased = 0;
149 for (; p < e; p++) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000150 if (Py_ISUPPER(*p))
151 Py_RETURN_FALSE;
152 else if (!cased && Py_ISLOWER(*p))
153 cased = 1;
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000154 }
155 return PyBool_FromLong(cased);
156}
157
158
159PyDoc_STRVAR_shared(_Py_isupper__doc__,
160"B.isupper() -> bool\n\
161\n\
162Return True if all cased characters in B are uppercase and there is\n\
163at least one cased character in B, False otherwise.");
164
165PyObject*
166_Py_bytes_isupper(const char *cptr, Py_ssize_t len)
167{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +0200168 const unsigned char *p
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000169 = (unsigned char *) cptr;
Antoine Pitrou9ed5f272013-08-13 20:18:52 +0200170 const unsigned char *e;
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000171 int cased;
172
173 /* Shortcut for single character strings */
174 if (len == 1)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000175 return PyBool_FromLong(Py_ISUPPER(*p));
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000176
177 /* Special case for empty strings */
178 if (len == 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000179 Py_RETURN_FALSE;
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000180
181 e = p + len;
182 cased = 0;
183 for (; p < e; p++) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000184 if (Py_ISLOWER(*p))
185 Py_RETURN_FALSE;
186 else if (!cased && Py_ISUPPER(*p))
187 cased = 1;
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000188 }
189 return PyBool_FromLong(cased);
190}
191
192
193PyDoc_STRVAR_shared(_Py_istitle__doc__,
194"B.istitle() -> bool\n\
195\n\
196Return True if B is a titlecased string and there is at least one\n\
197character in B, i.e. uppercase characters may only follow uncased\n\
198characters and lowercase characters only cased ones. Return False\n\
199otherwise.");
200
201PyObject*
202_Py_bytes_istitle(const char *cptr, Py_ssize_t len)
203{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +0200204 const unsigned char *p
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000205 = (unsigned char *) cptr;
Antoine Pitrou9ed5f272013-08-13 20:18:52 +0200206 const unsigned char *e;
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000207 int cased, previous_is_cased;
208
209 /* Shortcut for single character strings */
210 if (len == 1)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000211 return PyBool_FromLong(Py_ISUPPER(*p));
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000212
213 /* Special case for empty strings */
214 if (len == 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000215 Py_RETURN_FALSE;
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000216
217 e = p + len;
218 cased = 0;
219 previous_is_cased = 0;
220 for (; p < e; p++) {
Antoine Pitrou9ed5f272013-08-13 20:18:52 +0200221 const unsigned char ch = *p;
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000222
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000223 if (Py_ISUPPER(ch)) {
224 if (previous_is_cased)
225 Py_RETURN_FALSE;
226 previous_is_cased = 1;
227 cased = 1;
228 }
229 else if (Py_ISLOWER(ch)) {
230 if (!previous_is_cased)
231 Py_RETURN_FALSE;
232 previous_is_cased = 1;
233 cased = 1;
234 }
235 else
236 previous_is_cased = 0;
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000237 }
238 return PyBool_FromLong(cased);
239}
240
241
242PyDoc_STRVAR_shared(_Py_lower__doc__,
243"B.lower() -> copy of B\n\
244\n\
245Return a copy of B with all ASCII characters converted to lowercase.");
246
247void
248_Py_bytes_lower(char *result, const char *cptr, Py_ssize_t len)
249{
Antoine Pitrou9b491922010-08-15 17:38:46 +0000250 Py_ssize_t i;
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000251
Antoine Pitrou9b491922010-08-15 17:38:46 +0000252 for (i = 0; i < len; i++) {
Antoine Pitrou94f6fa62012-01-08 16:22:46 +0100253 result[i] = Py_TOLOWER((unsigned char) cptr[i]);
Antoine Pitrou9b491922010-08-15 17:38:46 +0000254 }
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000255}
256
257
258PyDoc_STRVAR_shared(_Py_upper__doc__,
259"B.upper() -> copy of B\n\
260\n\
261Return a copy of B with all ASCII characters converted to uppercase.");
262
263void
264_Py_bytes_upper(char *result, const char *cptr, Py_ssize_t len)
265{
Antoine Pitrou9b491922010-08-15 17:38:46 +0000266 Py_ssize_t i;
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000267
Antoine Pitrou9b491922010-08-15 17:38:46 +0000268 for (i = 0; i < len; i++) {
Antoine Pitrou94f6fa62012-01-08 16:22:46 +0100269 result[i] = Py_TOUPPER((unsigned char) cptr[i]);
Antoine Pitrou9b491922010-08-15 17:38:46 +0000270 }
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000271}
272
273
274PyDoc_STRVAR_shared(_Py_title__doc__,
275"B.title() -> copy of B\n\
276\n\
277Return a titlecased version of B, i.e. ASCII words start with uppercase\n\
278characters, all remaining cased characters have lowercase.");
279
280void
Serhiy Storchakaef1585e2015-12-25 20:01:53 +0200281_Py_bytes_title(char *result, const char *s, Py_ssize_t len)
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000282{
Antoine Pitrou9b491922010-08-15 17:38:46 +0000283 Py_ssize_t i;
284 int previous_is_cased = 0;
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000285
Antoine Pitrou9b491922010-08-15 17:38:46 +0000286 for (i = 0; i < len; i++) {
287 int c = Py_CHARMASK(*s++);
288 if (Py_ISLOWER(c)) {
289 if (!previous_is_cased)
290 c = Py_TOUPPER(c);
291 previous_is_cased = 1;
292 } else if (Py_ISUPPER(c)) {
293 if (previous_is_cased)
294 c = Py_TOLOWER(c);
295 previous_is_cased = 1;
296 } else
297 previous_is_cased = 0;
298 *result++ = c;
299 }
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000300}
301
302
303PyDoc_STRVAR_shared(_Py_capitalize__doc__,
304"B.capitalize() -> copy of B\n\
305\n\
Senthil Kumarane51ee8a2010-07-05 12:00:56 +0000306Return a copy of B with only its first character capitalized (ASCII)\n\
307and the rest lower-cased.");
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000308
309void
Serhiy Storchakaef1585e2015-12-25 20:01:53 +0200310_Py_bytes_capitalize(char *result, const char *s, Py_ssize_t len)
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000311{
Antoine Pitrou9b491922010-08-15 17:38:46 +0000312 Py_ssize_t i;
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000313
Antoine Pitrou9b491922010-08-15 17:38:46 +0000314 if (0 < len) {
315 int c = Py_CHARMASK(*s++);
316 if (Py_ISLOWER(c))
317 *result = Py_TOUPPER(c);
318 else
319 *result = c;
320 result++;
321 }
322 for (i = 1; i < len; i++) {
323 int c = Py_CHARMASK(*s++);
324 if (Py_ISUPPER(c))
325 *result = Py_TOLOWER(c);
326 else
327 *result = c;
328 result++;
329 }
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000330}
331
332
333PyDoc_STRVAR_shared(_Py_swapcase__doc__,
334"B.swapcase() -> copy of B\n\
335\n\
336Return a copy of B with uppercase ASCII characters converted\n\
337to lowercase ASCII and vice versa.");
338
339void
Serhiy Storchakaef1585e2015-12-25 20:01:53 +0200340_Py_bytes_swapcase(char *result, const char *s, Py_ssize_t len)
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000341{
Antoine Pitrou9b491922010-08-15 17:38:46 +0000342 Py_ssize_t i;
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000343
Antoine Pitrou9b491922010-08-15 17:38:46 +0000344 for (i = 0; i < len; i++) {
345 int c = Py_CHARMASK(*s++);
346 if (Py_ISLOWER(c)) {
347 *result = Py_TOUPPER(c);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000348 }
Antoine Pitrou9b491922010-08-15 17:38:46 +0000349 else if (Py_ISUPPER(c)) {
350 *result = Py_TOLOWER(c);
351 }
352 else
353 *result = c;
354 result++;
355 }
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000356}
357
Georg Brandlabc38772009-04-12 15:51:51 +0000358
359PyDoc_STRVAR_shared(_Py_maketrans__doc__,
360"B.maketrans(frm, to) -> translation table\n\
361\n\
Senthil Kumaran84e3ccc2011-06-27 09:06:45 -0700362Return a translation table (a bytes object of length 256) suitable\n\
363for use in the bytes or bytearray translate method where each byte\n\
364in frm is mapped to the byte at the same position in to.\n\
365The bytes objects frm and to must be of the same length.");
Georg Brandlabc38772009-04-12 15:51:51 +0000366
Georg Brandlabc38772009-04-12 15:51:51 +0000367PyObject *
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +0200368_Py_bytes_maketrans(Py_buffer *frm, Py_buffer *to)
Georg Brandlabc38772009-04-12 15:51:51 +0000369{
Martin v. Löwis7252a6e2014-07-27 16:25:09 +0200370 PyObject *res = NULL;
Antoine Pitrou9b491922010-08-15 17:38:46 +0000371 Py_ssize_t i;
372 char *p;
Georg Brandlabc38772009-04-12 15:51:51 +0000373
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +0200374 if (frm->len != to->len) {
Antoine Pitrou9b491922010-08-15 17:38:46 +0000375 PyErr_Format(PyExc_ValueError,
376 "maketrans arguments must have same length");
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +0200377 return NULL;
Antoine Pitrou9b491922010-08-15 17:38:46 +0000378 }
379 res = PyBytes_FromStringAndSize(NULL, 256);
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +0200380 if (!res)
381 return NULL;
Antoine Pitrou9b491922010-08-15 17:38:46 +0000382 p = PyBytes_AS_STRING(res);
383 for (i = 0; i < 256; i++)
Antoine Pitrou47019e52010-08-15 17:41:31 +0000384 p[i] = (char) i;
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +0200385 for (i = 0; i < frm->len; i++) {
386 p[((unsigned char *)frm->buf)[i]] = ((char *)to->buf)[i];
Antoine Pitrou9b491922010-08-15 17:38:46 +0000387 }
Georg Brandlabc38772009-04-12 15:51:51 +0000388
Antoine Pitrou9b491922010-08-15 17:38:46 +0000389 return res;
Georg Brandlabc38772009-04-12 15:51:51 +0000390}
Serhiy Storchakadd40fc32016-05-04 22:23:26 +0300391
392#define FASTSEARCH fastsearch
393#define STRINGLIB(F) stringlib_##F
394#define STRINGLIB_CHAR char
395#define STRINGLIB_SIZEOF_CHAR 1
396
397#include "stringlib/fastsearch.h"
398#include "stringlib/count.h"
399#include "stringlib/find.h"
400
401/*
Oren Milman00425102017-03-13 00:37:05 +0200402Wraps stringlib_parse_args_finds() and additionally checks the first
403argument type.
Serhiy Storchakadd40fc32016-05-04 22:23:26 +0300404
Oren Milman00425102017-03-13 00:37:05 +0200405In case the first argument is a bytes-like object, sets it to subobj,
406and doesn't touch the byte parameter.
407In case it is an integer in range(0, 256), writes the integer value
408to byte, and sets subobj to NULL.
409
410The other parameters are similar to those of
Serhiy Storchakadd40fc32016-05-04 22:23:26 +0300411stringlib_parse_args_finds().
412*/
413
414Py_LOCAL_INLINE(int)
415parse_args_finds_byte(const char *function_name, PyObject *args,
416 PyObject **subobj, char *byte,
417 Py_ssize_t *start, Py_ssize_t *end)
418{
419 PyObject *tmp_subobj;
420 Py_ssize_t ival;
Serhiy Storchakadd40fc32016-05-04 22:23:26 +0300421
422 if(!stringlib_parse_args_finds(function_name, args, &tmp_subobj,
423 start, end))
424 return 0;
425
Oren Milman00425102017-03-13 00:37:05 +0200426 if (PyObject_CheckBuffer(tmp_subobj)) {
Serhiy Storchakadd40fc32016-05-04 22:23:26 +0300427 *subobj = tmp_subobj;
428 return 1;
429 }
430
Oren Milman00425102017-03-13 00:37:05 +0200431 if (!PyIndex_Check(tmp_subobj)) {
432 PyErr_Format(PyExc_TypeError,
433 "argument should be integer or bytes-like object, "
434 "not '%.200s'",
435 Py_TYPE(tmp_subobj)->tp_name);
436 return 0;
Serhiy Storchakadd40fc32016-05-04 22:23:26 +0300437 }
438
Oren Milman00425102017-03-13 00:37:05 +0200439 ival = PyNumber_AsSsize_t(tmp_subobj, NULL);
440 if (ival == -1 && PyErr_Occurred()) {
441 return 0;
442 }
Serhiy Storchakadd40fc32016-05-04 22:23:26 +0300443 if (ival < 0 || ival > 255) {
444 PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
445 return 0;
446 }
447
448 *subobj = NULL;
449 *byte = (char)ival;
450 return 1;
451}
452
453/* helper macro to fixup start/end slice values */
454#define ADJUST_INDICES(start, end, len) \
455 if (end > len) \
456 end = len; \
457 else if (end < 0) { \
458 end += len; \
459 if (end < 0) \
460 end = 0; \
461 } \
462 if (start < 0) { \
463 start += len; \
464 if (start < 0) \
465 start = 0; \
466 }
467
468Py_LOCAL_INLINE(Py_ssize_t)
469find_internal(const char *str, Py_ssize_t len,
470 const char *function_name, PyObject *args, int dir)
471{
472 PyObject *subobj;
473 char byte;
474 Py_buffer subbuf;
475 const char *sub;
476 Py_ssize_t sub_len;
477 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
478 Py_ssize_t res;
479
480 if (!parse_args_finds_byte(function_name, args,
481 &subobj, &byte, &start, &end))
482 return -2;
483
484 if (subobj) {
485 if (PyObject_GetBuffer(subobj, &subbuf, PyBUF_SIMPLE) != 0)
486 return -2;
487
488 sub = subbuf.buf;
489 sub_len = subbuf.len;
490 }
491 else {
492 sub = &byte;
493 sub_len = 1;
494 }
495
496 ADJUST_INDICES(start, end, len);
497 if (end - start < sub_len)
498 res = -1;
499 else if (sub_len == 1) {
500 if (dir > 0)
501 res = stringlib_find_char(
502 str + start, end - start,
503 *sub);
504 else
505 res = stringlib_rfind_char(
506 str + start, end - start,
507 *sub);
508 if (res >= 0)
509 res += start;
510 }
511 else {
512 if (dir > 0)
513 res = stringlib_find_slice(
514 str, len,
515 sub, sub_len, start, end);
516 else
517 res = stringlib_rfind_slice(
518 str, len,
519 sub, sub_len, start, end);
520 }
521
522 if (subobj)
523 PyBuffer_Release(&subbuf);
524
525 return res;
526}
527
528PyDoc_STRVAR_shared(_Py_find__doc__,
529"B.find(sub[, start[, end]]) -> int\n\
530\n\
531Return the lowest index in B where subsection sub is found,\n\
532such that sub is contained within B[start,end]. Optional\n\
533arguments start and end are interpreted as in slice notation.\n\
534\n\
535Return -1 on failure.");
536
537PyObject *
538_Py_bytes_find(const char *str, Py_ssize_t len, PyObject *args)
539{
540 Py_ssize_t result = find_internal(str, len, "find", args, +1);
541 if (result == -2)
542 return NULL;
543 return PyLong_FromSsize_t(result);
544}
545
546PyDoc_STRVAR_shared(_Py_index__doc__,
547"B.index(sub[, start[, end]]) -> int\n\
548\n\
549Like B.find() but raise ValueError when the subsection is not found.");
550
551PyObject *
552_Py_bytes_index(const char *str, Py_ssize_t len, PyObject *args)
553{
554 Py_ssize_t result = find_internal(str, len, "index", args, +1);
555 if (result == -2)
556 return NULL;
557 if (result == -1) {
558 PyErr_SetString(PyExc_ValueError,
559 "subsection not found");
560 return NULL;
561 }
562 return PyLong_FromSsize_t(result);
563}
564
565PyDoc_STRVAR_shared(_Py_rfind__doc__,
566"B.rfind(sub[, start[, end]]) -> int\n\
567\n\
568Return the highest index in B where subsection sub is found,\n\
569such that sub is contained within B[start,end]. Optional\n\
570arguments start and end are interpreted as in slice notation.\n\
571\n\
572Return -1 on failure.");
573
574PyObject *
575_Py_bytes_rfind(const char *str, Py_ssize_t len, PyObject *args)
576{
577 Py_ssize_t result = find_internal(str, len, "rfind", args, -1);
578 if (result == -2)
579 return NULL;
580 return PyLong_FromSsize_t(result);
581}
582
583PyDoc_STRVAR_shared(_Py_rindex__doc__,
584"B.rindex(sub[, start[, end]]) -> int\n\
585\n\
586Like B.rfind() but raise ValueError when the subsection is not found.");
587
588PyObject *
589_Py_bytes_rindex(const char *str, Py_ssize_t len, PyObject *args)
590{
591 Py_ssize_t result = find_internal(str, len, "rindex", args, -1);
592 if (result == -2)
593 return NULL;
594 if (result == -1) {
595 PyErr_SetString(PyExc_ValueError,
596 "subsection not found");
597 return NULL;
598 }
599 return PyLong_FromSsize_t(result);
600}
601
602PyDoc_STRVAR_shared(_Py_count__doc__,
603"B.count(sub[, start[, end]]) -> int\n\
604\n\
605Return the number of non-overlapping occurrences of subsection sub in\n\
606bytes B[start:end]. Optional arguments start and end are interpreted\n\
607as in slice notation.");
608
609PyObject *
610_Py_bytes_count(const char *str, Py_ssize_t len, PyObject *args)
611{
612 PyObject *sub_obj;
613 const char *sub;
614 Py_ssize_t sub_len;
615 char byte;
616 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
617
618 Py_buffer vsub;
619 PyObject *count_obj;
620
621 if (!parse_args_finds_byte("count", args,
622 &sub_obj, &byte, &start, &end))
623 return NULL;
624
625 if (sub_obj) {
626 if (PyObject_GetBuffer(sub_obj, &vsub, PyBUF_SIMPLE) != 0)
627 return NULL;
628
629 sub = vsub.buf;
630 sub_len = vsub.len;
631 }
632 else {
633 sub = &byte;
634 sub_len = 1;
635 }
636
637 ADJUST_INDICES(start, end, len);
638
639 count_obj = PyLong_FromSsize_t(
640 stringlib_count(str + start, end - start, sub, sub_len, PY_SSIZE_T_MAX)
641 );
642
643 if (sub_obj)
644 PyBuffer_Release(&vsub);
645
646 return count_obj;
647}
648
649int
650_Py_bytes_contains(const char *str, Py_ssize_t len, PyObject *arg)
651{
Serhiy Storchakaf9efb8b2016-07-10 12:37:30 +0300652 Py_ssize_t ival = PyNumber_AsSsize_t(arg, NULL);
Serhiy Storchakadd40fc32016-05-04 22:23:26 +0300653 if (ival == -1 && PyErr_Occurred()) {
654 Py_buffer varg;
655 Py_ssize_t pos;
656 PyErr_Clear();
657 if (PyObject_GetBuffer(arg, &varg, PyBUF_SIMPLE) != 0)
658 return -1;
659 pos = stringlib_find(str, len,
660 varg.buf, varg.len, 0);
661 PyBuffer_Release(&varg);
662 return pos >= 0;
663 }
664 if (ival < 0 || ival >= 256) {
665 PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
666 return -1;
667 }
668
669 return memchr(str, (int) ival, len) != NULL;
670}
671
672
673/* Matches the end (direction >= 0) or start (direction < 0) of the buffer
674 * against substr, using the start and end arguments. Returns
675 * -1 on error, 0 if not found and 1 if found.
676 */
Benjamin Peterson621b4302016-09-09 13:54:34 -0700677static int
Serhiy Storchakadd40fc32016-05-04 22:23:26 +0300678tailmatch(const char *str, Py_ssize_t len, PyObject *substr,
679 Py_ssize_t start, Py_ssize_t end, int direction)
680{
681 Py_buffer sub_view = {NULL, NULL};
682 const char *sub;
683 Py_ssize_t slen;
684
685 if (PyBytes_Check(substr)) {
686 sub = PyBytes_AS_STRING(substr);
687 slen = PyBytes_GET_SIZE(substr);
688 }
689 else {
690 if (PyObject_GetBuffer(substr, &sub_view, PyBUF_SIMPLE) != 0)
691 return -1;
692 sub = sub_view.buf;
693 slen = sub_view.len;
694 }
695
696 ADJUST_INDICES(start, end, len);
697
698 if (direction < 0) {
699 /* startswith */
700 if (start + slen > len)
701 goto notfound;
702 } else {
703 /* endswith */
704 if (end - start < slen || start > len)
705 goto notfound;
706
707 if (end - slen > start)
708 start = end - slen;
709 }
710 if (end - start < slen)
711 goto notfound;
712 if (memcmp(str + start, sub, slen) != 0)
713 goto notfound;
714
715 PyBuffer_Release(&sub_view);
716 return 1;
717
718notfound:
719 PyBuffer_Release(&sub_view);
720 return 0;
721}
722
Benjamin Peterson621b4302016-09-09 13:54:34 -0700723static PyObject *
Serhiy Storchakadd40fc32016-05-04 22:23:26 +0300724_Py_bytes_tailmatch(const char *str, Py_ssize_t len,
725 const char *function_name, PyObject *args,
726 int direction)
727{
728 Py_ssize_t start = 0;
729 Py_ssize_t end = PY_SSIZE_T_MAX;
730 PyObject *subobj;
731 int result;
732
733 if (!stringlib_parse_args_finds(function_name, args, &subobj, &start, &end))
734 return NULL;
735 if (PyTuple_Check(subobj)) {
736 Py_ssize_t i;
737 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
738 result = tailmatch(str, len, PyTuple_GET_ITEM(subobj, i),
739 start, end, direction);
740 if (result == -1)
741 return NULL;
742 else if (result) {
743 Py_RETURN_TRUE;
744 }
745 }
746 Py_RETURN_FALSE;
747 }
748 result = tailmatch(str, len, subobj, start, end, direction);
749 if (result == -1) {
750 if (PyErr_ExceptionMatches(PyExc_TypeError))
751 PyErr_Format(PyExc_TypeError,
752 "%s first arg must be bytes or a tuple of bytes, "
753 "not %s",
754 function_name, Py_TYPE(subobj)->tp_name);
755 return NULL;
756 }
757 else
758 return PyBool_FromLong(result);
759}
760
761PyDoc_STRVAR_shared(_Py_startswith__doc__,
762"B.startswith(prefix[, start[, end]]) -> bool\n\
763\n\
764Return True if B starts with the specified prefix, False otherwise.\n\
765With optional start, test B beginning at that position.\n\
766With optional end, stop comparing B at that position.\n\
767prefix can also be a tuple of bytes to try.");
768
769PyObject *
770_Py_bytes_startswith(const char *str, Py_ssize_t len, PyObject *args)
771{
772 return _Py_bytes_tailmatch(str, len, "startswith", args, -1);
773}
774
775PyDoc_STRVAR_shared(_Py_endswith__doc__,
776"B.endswith(suffix[, start[, end]]) -> bool\n\
777\n\
778Return True if B ends with the specified suffix, False otherwise.\n\
779With optional start, test B beginning at that position.\n\
780With optional end, stop comparing B at that position.\n\
781suffix can also be a tuple of bytes to try.");
782
783PyObject *
784_Py_bytes_endswith(const char *str, Py_ssize_t len, PyObject *args)
785{
786 return _Py_bytes_tailmatch(str, len, "endswith", args, +1);
787}
788
789PyDoc_STRVAR_shared(_Py_expandtabs__doc__,
790"B.expandtabs(tabsize=8) -> copy of B\n\
791\n\
792Return a copy of B where all tab characters are expanded using spaces.\n\
793If tabsize is not given, a tab size of 8 characters is assumed.");
794
795PyDoc_STRVAR_shared(_Py_ljust__doc__,
796"B.ljust(width[, fillchar]) -> copy of B\n"
797"\n"
798"Return B left justified in a string of length width. Padding is\n"
799"done using the specified fill character (default is a space).");
800
801PyDoc_STRVAR_shared(_Py_rjust__doc__,
802"B.rjust(width[, fillchar]) -> copy of B\n"
803"\n"
804"Return B right justified in a string of length width. Padding is\n"
805"done using the specified fill character (default is a space)");
806
807PyDoc_STRVAR_shared(_Py_center__doc__,
808"B.center(width[, fillchar]) -> copy of B\n"
809"\n"
810"Return B centered in a string of length width. Padding is\n"
811"done using the specified fill character (default is a space).");
812
813PyDoc_STRVAR_shared(_Py_zfill__doc__,
814"B.zfill(width) -> copy of B\n"
815"\n"
816"Pad a numeric string B with zeros on the left, to fill a field\n"
817"of the specified width. B is never truncated.");
818