blob: bd79773a54eabff285796caedf79954d400c6239 [file] [log] [blame]
Serhiy Storchakaab8bcb32016-07-03 13:26:52 +03001#define PY_SSIZE_T_CLEAN
Gregory P. Smith60d241f2007-10-16 06:31:30 +00002#include "Python.h"
3#include "bytes_methods.h"
4
Gregory P. Smith60d241f2007-10-16 06:31:30 +00005PyDoc_STRVAR_shared(_Py_isspace__doc__,
6"B.isspace() -> bool\n\
7\n\
8Return True if all characters in B are whitespace\n\
9and there is at least one character in B, False otherwise.");
10
11PyObject*
12_Py_bytes_isspace(const char *cptr, Py_ssize_t len)
13{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +020014 const unsigned char *p
Gregory P. Smith60d241f2007-10-16 06:31:30 +000015 = (unsigned char *) cptr;
Antoine Pitrou9ed5f272013-08-13 20:18:52 +020016 const unsigned char *e;
Gregory P. Smith60d241f2007-10-16 06:31:30 +000017
18 /* Shortcut for single character strings */
Eric Smith6dc46f52009-04-27 20:39:49 +000019 if (len == 1 && Py_ISSPACE(*p))
Gregory P. Smith60d241f2007-10-16 06:31:30 +000020 Py_RETURN_TRUE;
21
22 /* Special case for empty strings */
23 if (len == 0)
24 Py_RETURN_FALSE;
25
26 e = p + len;
27 for (; p < e; p++) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000028 if (!Py_ISSPACE(*p))
Gregory P. Smith60d241f2007-10-16 06:31:30 +000029 Py_RETURN_FALSE;
30 }
31 Py_RETURN_TRUE;
32}
33
34
35PyDoc_STRVAR_shared(_Py_isalpha__doc__,
36"B.isalpha() -> bool\n\
37\n\
38Return True if all characters in B are alphabetic\n\
39and there is at least one character in B, False otherwise.");
40
41PyObject*
42_Py_bytes_isalpha(const char *cptr, Py_ssize_t len)
43{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +020044 const unsigned char *p
Gregory P. Smith60d241f2007-10-16 06:31:30 +000045 = (unsigned char *) cptr;
Antoine Pitrou9ed5f272013-08-13 20:18:52 +020046 const unsigned char *e;
Gregory P. Smith60d241f2007-10-16 06:31:30 +000047
48 /* Shortcut for single character strings */
Eric Smith6dc46f52009-04-27 20:39:49 +000049 if (len == 1 && Py_ISALPHA(*p))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000050 Py_RETURN_TRUE;
Gregory P. Smith60d241f2007-10-16 06:31:30 +000051
52 /* Special case for empty strings */
53 if (len == 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000054 Py_RETURN_FALSE;
Gregory P. Smith60d241f2007-10-16 06:31:30 +000055
56 e = p + len;
57 for (; p < e; p++) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000058 if (!Py_ISALPHA(*p))
59 Py_RETURN_FALSE;
Gregory P. Smith60d241f2007-10-16 06:31:30 +000060 }
61 Py_RETURN_TRUE;
62}
63
64
65PyDoc_STRVAR_shared(_Py_isalnum__doc__,
66"B.isalnum() -> bool\n\
67\n\
68Return True if all characters in B are alphanumeric\n\
69and there is at least one character in B, False otherwise.");
70
71PyObject*
72_Py_bytes_isalnum(const char *cptr, Py_ssize_t len)
73{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +020074 const unsigned char *p
Gregory P. Smith60d241f2007-10-16 06:31:30 +000075 = (unsigned char *) cptr;
Antoine Pitrou9ed5f272013-08-13 20:18:52 +020076 const unsigned char *e;
Gregory P. Smith60d241f2007-10-16 06:31:30 +000077
78 /* Shortcut for single character strings */
Eric Smith6dc46f52009-04-27 20:39:49 +000079 if (len == 1 && Py_ISALNUM(*p))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000080 Py_RETURN_TRUE;
Gregory P. Smith60d241f2007-10-16 06:31:30 +000081
82 /* Special case for empty strings */
83 if (len == 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000084 Py_RETURN_FALSE;
Gregory P. Smith60d241f2007-10-16 06:31:30 +000085
86 e = p + len;
87 for (; p < e; p++) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000088 if (!Py_ISALNUM(*p))
89 Py_RETURN_FALSE;
Gregory P. Smith60d241f2007-10-16 06:31:30 +000090 }
91 Py_RETURN_TRUE;
92}
93
94
95PyDoc_STRVAR_shared(_Py_isdigit__doc__,
96"B.isdigit() -> bool\n\
97\n\
98Return True if all characters in B are digits\n\
99and there is at least one character in B, False otherwise.");
100
101PyObject*
102_Py_bytes_isdigit(const char *cptr, Py_ssize_t len)
103{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +0200104 const unsigned char *p
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000105 = (unsigned char *) cptr;
Antoine Pitrou9ed5f272013-08-13 20:18:52 +0200106 const unsigned char *e;
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000107
108 /* Shortcut for single character strings */
Eric Smith6dc46f52009-04-27 20:39:49 +0000109 if (len == 1 && Py_ISDIGIT(*p))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000110 Py_RETURN_TRUE;
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000111
112 /* Special case for empty strings */
113 if (len == 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000114 Py_RETURN_FALSE;
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000115
116 e = p + len;
117 for (; p < e; p++) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000118 if (!Py_ISDIGIT(*p))
119 Py_RETURN_FALSE;
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000120 }
121 Py_RETURN_TRUE;
122}
123
124
125PyDoc_STRVAR_shared(_Py_islower__doc__,
126"B.islower() -> bool\n\
127\n\
128Return True if all cased characters in B are lowercase and there is\n\
129at least one cased character in B, False otherwise.");
130
131PyObject*
132_Py_bytes_islower(const char *cptr, Py_ssize_t len)
133{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +0200134 const unsigned char *p
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000135 = (unsigned char *) cptr;
Antoine Pitrou9ed5f272013-08-13 20:18:52 +0200136 const unsigned char *e;
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000137 int cased;
138
139 /* Shortcut for single character strings */
140 if (len == 1)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000141 return PyBool_FromLong(Py_ISLOWER(*p));
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000142
143 /* Special case for empty strings */
144 if (len == 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000145 Py_RETURN_FALSE;
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000146
147 e = p + len;
148 cased = 0;
149 for (; p < e; p++) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000150 if (Py_ISUPPER(*p))
151 Py_RETURN_FALSE;
152 else if (!cased && Py_ISLOWER(*p))
153 cased = 1;
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000154 }
155 return PyBool_FromLong(cased);
156}
157
158
159PyDoc_STRVAR_shared(_Py_isupper__doc__,
160"B.isupper() -> bool\n\
161\n\
162Return True if all cased characters in B are uppercase and there is\n\
163at least one cased character in B, False otherwise.");
164
165PyObject*
166_Py_bytes_isupper(const char *cptr, Py_ssize_t len)
167{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +0200168 const unsigned char *p
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000169 = (unsigned char *) cptr;
Antoine Pitrou9ed5f272013-08-13 20:18:52 +0200170 const unsigned char *e;
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000171 int cased;
172
173 /* Shortcut for single character strings */
174 if (len == 1)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000175 return PyBool_FromLong(Py_ISUPPER(*p));
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000176
177 /* Special case for empty strings */
178 if (len == 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000179 Py_RETURN_FALSE;
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000180
181 e = p + len;
182 cased = 0;
183 for (; p < e; p++) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000184 if (Py_ISLOWER(*p))
185 Py_RETURN_FALSE;
186 else if (!cased && Py_ISUPPER(*p))
187 cased = 1;
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000188 }
189 return PyBool_FromLong(cased);
190}
191
192
193PyDoc_STRVAR_shared(_Py_istitle__doc__,
194"B.istitle() -> bool\n\
195\n\
196Return True if B is a titlecased string and there is at least one\n\
197character in B, i.e. uppercase characters may only follow uncased\n\
198characters and lowercase characters only cased ones. Return False\n\
199otherwise.");
200
201PyObject*
202_Py_bytes_istitle(const char *cptr, Py_ssize_t len)
203{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +0200204 const unsigned char *p
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000205 = (unsigned char *) cptr;
Antoine Pitrou9ed5f272013-08-13 20:18:52 +0200206 const unsigned char *e;
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000207 int cased, previous_is_cased;
208
209 /* Shortcut for single character strings */
210 if (len == 1)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000211 return PyBool_FromLong(Py_ISUPPER(*p));
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000212
213 /* Special case for empty strings */
214 if (len == 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000215 Py_RETURN_FALSE;
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000216
217 e = p + len;
218 cased = 0;
219 previous_is_cased = 0;
220 for (; p < e; p++) {
Antoine Pitrou9ed5f272013-08-13 20:18:52 +0200221 const unsigned char ch = *p;
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000222
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000223 if (Py_ISUPPER(ch)) {
224 if (previous_is_cased)
225 Py_RETURN_FALSE;
226 previous_is_cased = 1;
227 cased = 1;
228 }
229 else if (Py_ISLOWER(ch)) {
230 if (!previous_is_cased)
231 Py_RETURN_FALSE;
232 previous_is_cased = 1;
233 cased = 1;
234 }
235 else
236 previous_is_cased = 0;
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000237 }
238 return PyBool_FromLong(cased);
239}
240
241
242PyDoc_STRVAR_shared(_Py_lower__doc__,
243"B.lower() -> copy of B\n\
244\n\
245Return a copy of B with all ASCII characters converted to lowercase.");
246
247void
248_Py_bytes_lower(char *result, const char *cptr, Py_ssize_t len)
249{
Antoine Pitrou9b491922010-08-15 17:38:46 +0000250 Py_ssize_t i;
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000251
Antoine Pitrou9b491922010-08-15 17:38:46 +0000252 for (i = 0; i < len; i++) {
Antoine Pitrou94f6fa62012-01-08 16:22:46 +0100253 result[i] = Py_TOLOWER((unsigned char) cptr[i]);
Antoine Pitrou9b491922010-08-15 17:38:46 +0000254 }
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000255}
256
257
258PyDoc_STRVAR_shared(_Py_upper__doc__,
259"B.upper() -> copy of B\n\
260\n\
261Return a copy of B with all ASCII characters converted to uppercase.");
262
263void
264_Py_bytes_upper(char *result, const char *cptr, Py_ssize_t len)
265{
Antoine Pitrou9b491922010-08-15 17:38:46 +0000266 Py_ssize_t i;
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000267
Antoine Pitrou9b491922010-08-15 17:38:46 +0000268 for (i = 0; i < len; i++) {
Antoine Pitrou94f6fa62012-01-08 16:22:46 +0100269 result[i] = Py_TOUPPER((unsigned char) cptr[i]);
Antoine Pitrou9b491922010-08-15 17:38:46 +0000270 }
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000271}
272
273
274PyDoc_STRVAR_shared(_Py_title__doc__,
275"B.title() -> copy of B\n\
276\n\
277Return a titlecased version of B, i.e. ASCII words start with uppercase\n\
278characters, all remaining cased characters have lowercase.");
279
280void
Serhiy Storchakaef1585e2015-12-25 20:01:53 +0200281_Py_bytes_title(char *result, const char *s, Py_ssize_t len)
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000282{
Antoine Pitrou9b491922010-08-15 17:38:46 +0000283 Py_ssize_t i;
284 int previous_is_cased = 0;
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000285
Antoine Pitrou9b491922010-08-15 17:38:46 +0000286 for (i = 0; i < len; i++) {
287 int c = Py_CHARMASK(*s++);
288 if (Py_ISLOWER(c)) {
289 if (!previous_is_cased)
290 c = Py_TOUPPER(c);
291 previous_is_cased = 1;
292 } else if (Py_ISUPPER(c)) {
293 if (previous_is_cased)
294 c = Py_TOLOWER(c);
295 previous_is_cased = 1;
296 } else
297 previous_is_cased = 0;
298 *result++ = c;
299 }
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000300}
301
302
303PyDoc_STRVAR_shared(_Py_capitalize__doc__,
304"B.capitalize() -> copy of B\n\
305\n\
Senthil Kumarane51ee8a2010-07-05 12:00:56 +0000306Return a copy of B with only its first character capitalized (ASCII)\n\
307and the rest lower-cased.");
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000308
309void
Serhiy Storchakaef1585e2015-12-25 20:01:53 +0200310_Py_bytes_capitalize(char *result, const char *s, Py_ssize_t len)
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000311{
Antoine Pitrou9b491922010-08-15 17:38:46 +0000312 Py_ssize_t i;
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000313
Antoine Pitrou9b491922010-08-15 17:38:46 +0000314 if (0 < len) {
315 int c = Py_CHARMASK(*s++);
316 if (Py_ISLOWER(c))
317 *result = Py_TOUPPER(c);
318 else
319 *result = c;
320 result++;
321 }
322 for (i = 1; i < len; i++) {
323 int c = Py_CHARMASK(*s++);
324 if (Py_ISUPPER(c))
325 *result = Py_TOLOWER(c);
326 else
327 *result = c;
328 result++;
329 }
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000330}
331
332
333PyDoc_STRVAR_shared(_Py_swapcase__doc__,
334"B.swapcase() -> copy of B\n\
335\n\
336Return a copy of B with uppercase ASCII characters converted\n\
337to lowercase ASCII and vice versa.");
338
339void
Serhiy Storchakaef1585e2015-12-25 20:01:53 +0200340_Py_bytes_swapcase(char *result, const char *s, Py_ssize_t len)
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000341{
Antoine Pitrou9b491922010-08-15 17:38:46 +0000342 Py_ssize_t i;
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000343
Antoine Pitrou9b491922010-08-15 17:38:46 +0000344 for (i = 0; i < len; i++) {
345 int c = Py_CHARMASK(*s++);
346 if (Py_ISLOWER(c)) {
347 *result = Py_TOUPPER(c);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000348 }
Antoine Pitrou9b491922010-08-15 17:38:46 +0000349 else if (Py_ISUPPER(c)) {
350 *result = Py_TOLOWER(c);
351 }
352 else
353 *result = c;
354 result++;
355 }
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000356}
357
Georg Brandlabc38772009-04-12 15:51:51 +0000358
359PyDoc_STRVAR_shared(_Py_maketrans__doc__,
360"B.maketrans(frm, to) -> translation table\n\
361\n\
Senthil Kumaran84e3ccc2011-06-27 09:06:45 -0700362Return a translation table (a bytes object of length 256) suitable\n\
363for use in the bytes or bytearray translate method where each byte\n\
364in frm is mapped to the byte at the same position in to.\n\
365The bytes objects frm and to must be of the same length.");
Georg Brandlabc38772009-04-12 15:51:51 +0000366
Georg Brandlabc38772009-04-12 15:51:51 +0000367PyObject *
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +0200368_Py_bytes_maketrans(Py_buffer *frm, Py_buffer *to)
Georg Brandlabc38772009-04-12 15:51:51 +0000369{
Martin v. Löwis7252a6e2014-07-27 16:25:09 +0200370 PyObject *res = NULL;
Antoine Pitrou9b491922010-08-15 17:38:46 +0000371 Py_ssize_t i;
372 char *p;
Georg Brandlabc38772009-04-12 15:51:51 +0000373
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +0200374 if (frm->len != to->len) {
Antoine Pitrou9b491922010-08-15 17:38:46 +0000375 PyErr_Format(PyExc_ValueError,
376 "maketrans arguments must have same length");
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +0200377 return NULL;
Antoine Pitrou9b491922010-08-15 17:38:46 +0000378 }
379 res = PyBytes_FromStringAndSize(NULL, 256);
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +0200380 if (!res)
381 return NULL;
Antoine Pitrou9b491922010-08-15 17:38:46 +0000382 p = PyBytes_AS_STRING(res);
383 for (i = 0; i < 256; i++)
Antoine Pitrou47019e52010-08-15 17:41:31 +0000384 p[i] = (char) i;
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +0200385 for (i = 0; i < frm->len; i++) {
386 p[((unsigned char *)frm->buf)[i]] = ((char *)to->buf)[i];
Antoine Pitrou9b491922010-08-15 17:38:46 +0000387 }
Georg Brandlabc38772009-04-12 15:51:51 +0000388
Antoine Pitrou9b491922010-08-15 17:38:46 +0000389 return res;
Georg Brandlabc38772009-04-12 15:51:51 +0000390}
Serhiy Storchakadd40fc32016-05-04 22:23:26 +0300391
392#define FASTSEARCH fastsearch
393#define STRINGLIB(F) stringlib_##F
394#define STRINGLIB_CHAR char
395#define STRINGLIB_SIZEOF_CHAR 1
396
397#include "stringlib/fastsearch.h"
398#include "stringlib/count.h"
399#include "stringlib/find.h"
400
401/*
Oren Milman00425102017-03-13 00:37:05 +0200402Wraps stringlib_parse_args_finds() and additionally checks the first
403argument type.
Serhiy Storchakadd40fc32016-05-04 22:23:26 +0300404
Oren Milman00425102017-03-13 00:37:05 +0200405In case the first argument is a bytes-like object, sets it to subobj,
406and doesn't touch the byte parameter.
407In case it is an integer in range(0, 256), writes the integer value
408to byte, and sets subobj to NULL.
409
410The other parameters are similar to those of
Serhiy Storchakadd40fc32016-05-04 22:23:26 +0300411stringlib_parse_args_finds().
412*/
413
414Py_LOCAL_INLINE(int)
415parse_args_finds_byte(const char *function_name, PyObject *args,
416 PyObject **subobj, char *byte,
417 Py_ssize_t *start, Py_ssize_t *end)
418{
419 PyObject *tmp_subobj;
420 Py_ssize_t ival;
Serhiy Storchakadd40fc32016-05-04 22:23:26 +0300421
422 if(!stringlib_parse_args_finds(function_name, args, &tmp_subobj,
423 start, end))
424 return 0;
425
Oren Milman00425102017-03-13 00:37:05 +0200426 if (PyObject_CheckBuffer(tmp_subobj)) {
Serhiy Storchakadd40fc32016-05-04 22:23:26 +0300427 *subobj = tmp_subobj;
428 return 1;
429 }
430
Oren Milman00425102017-03-13 00:37:05 +0200431 if (!PyIndex_Check(tmp_subobj)) {
432 PyErr_Format(PyExc_TypeError,
433 "argument should be integer or bytes-like object, "
434 "not '%.200s'",
435 Py_TYPE(tmp_subobj)->tp_name);
436 return 0;
Serhiy Storchakadd40fc32016-05-04 22:23:26 +0300437 }
438
Oren Milman00425102017-03-13 00:37:05 +0200439 ival = PyNumber_AsSsize_t(tmp_subobj, NULL);
440 if (ival == -1 && PyErr_Occurred()) {
441 return 0;
442 }
Serhiy Storchakadd40fc32016-05-04 22:23:26 +0300443 if (ival < 0 || ival > 255) {
444 PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
445 return 0;
446 }
447
448 *subobj = NULL;
449 *byte = (char)ival;
450 return 1;
451}
452
453/* helper macro to fixup start/end slice values */
454#define ADJUST_INDICES(start, end, len) \
455 if (end > len) \
456 end = len; \
457 else if (end < 0) { \
458 end += len; \
459 if (end < 0) \
460 end = 0; \
461 } \
462 if (start < 0) { \
463 start += len; \
464 if (start < 0) \
465 start = 0; \
466 }
467
468Py_LOCAL_INLINE(Py_ssize_t)
469find_internal(const char *str, Py_ssize_t len,
470 const char *function_name, PyObject *args, int dir)
471{
472 PyObject *subobj;
473 char byte;
474 Py_buffer subbuf;
475 const char *sub;
476 Py_ssize_t sub_len;
477 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
478 Py_ssize_t res;
479
480 if (!parse_args_finds_byte(function_name, args,
481 &subobj, &byte, &start, &end))
482 return -2;
483
484 if (subobj) {
485 if (PyObject_GetBuffer(subobj, &subbuf, PyBUF_SIMPLE) != 0)
486 return -2;
487
488 sub = subbuf.buf;
489 sub_len = subbuf.len;
490 }
491 else {
492 sub = &byte;
493 sub_len = 1;
494 }
495
496 ADJUST_INDICES(start, end, len);
497 if (end - start < sub_len)
498 res = -1;
499 else if (sub_len == 1) {
500 if (dir > 0)
501 res = stringlib_find_char(
502 str + start, end - start,
503 *sub);
504 else
505 res = stringlib_rfind_char(
506 str + start, end - start,
507 *sub);
508 if (res >= 0)
509 res += start;
510 }
511 else {
512 if (dir > 0)
513 res = stringlib_find_slice(
514 str, len,
515 sub, sub_len, start, end);
516 else
517 res = stringlib_rfind_slice(
518 str, len,
519 sub, sub_len, start, end);
520 }
521
522 if (subobj)
523 PyBuffer_Release(&subbuf);
524
525 return res;
526}
527
528PyDoc_STRVAR_shared(_Py_find__doc__,
529"B.find(sub[, start[, end]]) -> int\n\
530\n\
531Return the lowest index in B where subsection sub is found,\n\
532such that sub is contained within B[start,end]. Optional\n\
533arguments start and end are interpreted as in slice notation.\n\
534\n\
535Return -1 on failure.");
536
537PyObject *
538_Py_bytes_find(const char *str, Py_ssize_t len, PyObject *args)
539{
540 Py_ssize_t result = find_internal(str, len, "find", args, +1);
541 if (result == -2)
542 return NULL;
543 return PyLong_FromSsize_t(result);
544}
545
546PyDoc_STRVAR_shared(_Py_index__doc__,
547"B.index(sub[, start[, end]]) -> int\n\
548\n\
Lisa Roach43ba8862017-04-04 22:36:22 -0700549Return the lowest index in B where subsection sub is found,\n\
550such that sub is contained within B[start,end]. Optional\n\
551arguments start and end are interpreted as in slice notation.\n\
552\n\
553Raises ValueError when the subsection is not found.");
Serhiy Storchakadd40fc32016-05-04 22:23:26 +0300554
555PyObject *
556_Py_bytes_index(const char *str, Py_ssize_t len, PyObject *args)
557{
558 Py_ssize_t result = find_internal(str, len, "index", args, +1);
559 if (result == -2)
560 return NULL;
561 if (result == -1) {
562 PyErr_SetString(PyExc_ValueError,
563 "subsection not found");
564 return NULL;
565 }
566 return PyLong_FromSsize_t(result);
567}
568
569PyDoc_STRVAR_shared(_Py_rfind__doc__,
570"B.rfind(sub[, start[, end]]) -> int\n\
571\n\
572Return the highest index in B where subsection sub is found,\n\
573such that sub is contained within B[start,end]. Optional\n\
574arguments start and end are interpreted as in slice notation.\n\
575\n\
576Return -1 on failure.");
577
578PyObject *
579_Py_bytes_rfind(const char *str, Py_ssize_t len, PyObject *args)
580{
581 Py_ssize_t result = find_internal(str, len, "rfind", args, -1);
582 if (result == -2)
583 return NULL;
584 return PyLong_FromSsize_t(result);
585}
586
587PyDoc_STRVAR_shared(_Py_rindex__doc__,
588"B.rindex(sub[, start[, end]]) -> int\n\
589\n\
Lisa Roach43ba8862017-04-04 22:36:22 -0700590Return the highest index in B where subsection sub is found,\n\
591such that sub is contained within B[start,end]. Optional\n\
592arguments start and end are interpreted as in slice notation.\n\
593\n\
594Raise ValueError when the subsection is not found.");
Serhiy Storchakadd40fc32016-05-04 22:23:26 +0300595
596PyObject *
597_Py_bytes_rindex(const char *str, Py_ssize_t len, PyObject *args)
598{
599 Py_ssize_t result = find_internal(str, len, "rindex", args, -1);
600 if (result == -2)
601 return NULL;
602 if (result == -1) {
603 PyErr_SetString(PyExc_ValueError,
604 "subsection not found");
605 return NULL;
606 }
607 return PyLong_FromSsize_t(result);
608}
609
610PyDoc_STRVAR_shared(_Py_count__doc__,
611"B.count(sub[, start[, end]]) -> int\n\
612\n\
613Return the number of non-overlapping occurrences of subsection sub in\n\
614bytes B[start:end]. Optional arguments start and end are interpreted\n\
615as in slice notation.");
616
617PyObject *
618_Py_bytes_count(const char *str, Py_ssize_t len, PyObject *args)
619{
620 PyObject *sub_obj;
621 const char *sub;
622 Py_ssize_t sub_len;
623 char byte;
624 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
625
626 Py_buffer vsub;
627 PyObject *count_obj;
628
629 if (!parse_args_finds_byte("count", args,
630 &sub_obj, &byte, &start, &end))
631 return NULL;
632
633 if (sub_obj) {
634 if (PyObject_GetBuffer(sub_obj, &vsub, PyBUF_SIMPLE) != 0)
635 return NULL;
636
637 sub = vsub.buf;
638 sub_len = vsub.len;
639 }
640 else {
641 sub = &byte;
642 sub_len = 1;
643 }
644
645 ADJUST_INDICES(start, end, len);
646
647 count_obj = PyLong_FromSsize_t(
648 stringlib_count(str + start, end - start, sub, sub_len, PY_SSIZE_T_MAX)
649 );
650
651 if (sub_obj)
652 PyBuffer_Release(&vsub);
653
654 return count_obj;
655}
656
657int
658_Py_bytes_contains(const char *str, Py_ssize_t len, PyObject *arg)
659{
Serhiy Storchakaf9efb8b2016-07-10 12:37:30 +0300660 Py_ssize_t ival = PyNumber_AsSsize_t(arg, NULL);
Serhiy Storchakadd40fc32016-05-04 22:23:26 +0300661 if (ival == -1 && PyErr_Occurred()) {
662 Py_buffer varg;
663 Py_ssize_t pos;
664 PyErr_Clear();
665 if (PyObject_GetBuffer(arg, &varg, PyBUF_SIMPLE) != 0)
666 return -1;
667 pos = stringlib_find(str, len,
668 varg.buf, varg.len, 0);
669 PyBuffer_Release(&varg);
670 return pos >= 0;
671 }
672 if (ival < 0 || ival >= 256) {
673 PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
674 return -1;
675 }
676
677 return memchr(str, (int) ival, len) != NULL;
678}
679
680
681/* Matches the end (direction >= 0) or start (direction < 0) of the buffer
682 * against substr, using the start and end arguments. Returns
683 * -1 on error, 0 if not found and 1 if found.
684 */
Benjamin Peterson621b4302016-09-09 13:54:34 -0700685static int
Serhiy Storchakadd40fc32016-05-04 22:23:26 +0300686tailmatch(const char *str, Py_ssize_t len, PyObject *substr,
687 Py_ssize_t start, Py_ssize_t end, int direction)
688{
689 Py_buffer sub_view = {NULL, NULL};
690 const char *sub;
691 Py_ssize_t slen;
692
693 if (PyBytes_Check(substr)) {
694 sub = PyBytes_AS_STRING(substr);
695 slen = PyBytes_GET_SIZE(substr);
696 }
697 else {
698 if (PyObject_GetBuffer(substr, &sub_view, PyBUF_SIMPLE) != 0)
699 return -1;
700 sub = sub_view.buf;
701 slen = sub_view.len;
702 }
703
704 ADJUST_INDICES(start, end, len);
705
706 if (direction < 0) {
707 /* startswith */
708 if (start + slen > len)
709 goto notfound;
710 } else {
711 /* endswith */
712 if (end - start < slen || start > len)
713 goto notfound;
714
715 if (end - slen > start)
716 start = end - slen;
717 }
718 if (end - start < slen)
719 goto notfound;
720 if (memcmp(str + start, sub, slen) != 0)
721 goto notfound;
722
723 PyBuffer_Release(&sub_view);
724 return 1;
725
726notfound:
727 PyBuffer_Release(&sub_view);
728 return 0;
729}
730
Benjamin Peterson621b4302016-09-09 13:54:34 -0700731static PyObject *
Serhiy Storchakadd40fc32016-05-04 22:23:26 +0300732_Py_bytes_tailmatch(const char *str, Py_ssize_t len,
733 const char *function_name, PyObject *args,
734 int direction)
735{
736 Py_ssize_t start = 0;
737 Py_ssize_t end = PY_SSIZE_T_MAX;
738 PyObject *subobj;
739 int result;
740
741 if (!stringlib_parse_args_finds(function_name, args, &subobj, &start, &end))
742 return NULL;
743 if (PyTuple_Check(subobj)) {
744 Py_ssize_t i;
745 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
746 result = tailmatch(str, len, PyTuple_GET_ITEM(subobj, i),
747 start, end, direction);
748 if (result == -1)
749 return NULL;
750 else if (result) {
751 Py_RETURN_TRUE;
752 }
753 }
754 Py_RETURN_FALSE;
755 }
756 result = tailmatch(str, len, subobj, start, end, direction);
757 if (result == -1) {
758 if (PyErr_ExceptionMatches(PyExc_TypeError))
759 PyErr_Format(PyExc_TypeError,
760 "%s first arg must be bytes or a tuple of bytes, "
761 "not %s",
762 function_name, Py_TYPE(subobj)->tp_name);
763 return NULL;
764 }
765 else
766 return PyBool_FromLong(result);
767}
768
769PyDoc_STRVAR_shared(_Py_startswith__doc__,
770"B.startswith(prefix[, start[, end]]) -> bool\n\
771\n\
772Return True if B starts with the specified prefix, False otherwise.\n\
773With optional start, test B beginning at that position.\n\
774With optional end, stop comparing B at that position.\n\
775prefix can also be a tuple of bytes to try.");
776
777PyObject *
778_Py_bytes_startswith(const char *str, Py_ssize_t len, PyObject *args)
779{
780 return _Py_bytes_tailmatch(str, len, "startswith", args, -1);
781}
782
783PyDoc_STRVAR_shared(_Py_endswith__doc__,
784"B.endswith(suffix[, start[, end]]) -> bool\n\
785\n\
786Return True if B ends with the specified suffix, False otherwise.\n\
787With optional start, test B beginning at that position.\n\
788With optional end, stop comparing B at that position.\n\
789suffix can also be a tuple of bytes to try.");
790
791PyObject *
792_Py_bytes_endswith(const char *str, Py_ssize_t len, PyObject *args)
793{
794 return _Py_bytes_tailmatch(str, len, "endswith", args, +1);
795}
796
797PyDoc_STRVAR_shared(_Py_expandtabs__doc__,
798"B.expandtabs(tabsize=8) -> copy of B\n\
799\n\
800Return a copy of B where all tab characters are expanded using spaces.\n\
801If tabsize is not given, a tab size of 8 characters is assumed.");
802
803PyDoc_STRVAR_shared(_Py_ljust__doc__,
804"B.ljust(width[, fillchar]) -> copy of B\n"
805"\n"
806"Return B left justified in a string of length width. Padding is\n"
807"done using the specified fill character (default is a space).");
808
809PyDoc_STRVAR_shared(_Py_rjust__doc__,
810"B.rjust(width[, fillchar]) -> copy of B\n"
811"\n"
812"Return B right justified in a string of length width. Padding is\n"
813"done using the specified fill character (default is a space)");
814
815PyDoc_STRVAR_shared(_Py_center__doc__,
816"B.center(width[, fillchar]) -> copy of B\n"
817"\n"
818"Return B centered in a string of length width. Padding is\n"
819"done using the specified fill character (default is a space).");
820
821PyDoc_STRVAR_shared(_Py_zfill__doc__,
822"B.zfill(width) -> copy of B\n"
823"\n"
824"Pad a numeric string B with zeros on the left, to fill a field\n"
825"of the specified width. B is never truncated.");