blob: 1512086e6131fc7dfaecbebc0c1a9a36441b9865 [file] [log] [blame]
Serhiy Storchakaab8bcb32016-07-03 13:26:52 +03001#define PY_SSIZE_T_CLEAN
Gregory P. Smith60d241f2007-10-16 06:31:30 +00002#include "Python.h"
Victor Stinnera15e2602020-04-08 02:01:56 +02003#include "pycore_abstract.h" // _PyIndex_Check()
Victor Stinner45876a92020-02-12 22:32:34 +01004#include "pycore_bytes_methods.h"
Gregory P. Smith60d241f2007-10-16 06:31:30 +00005
Gregory P. Smith60d241f2007-10-16 06:31:30 +00006PyDoc_STRVAR_shared(_Py_isspace__doc__,
7"B.isspace() -> bool\n\
8\n\
9Return True if all characters in B are whitespace\n\
10and there is at least one character in B, False otherwise.");
11
12PyObject*
13_Py_bytes_isspace(const char *cptr, Py_ssize_t len)
14{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +020015 const unsigned char *p
Andy Lestere6be9b52020-02-11 20:28:35 -060016 = (const unsigned char *) cptr;
Antoine Pitrou9ed5f272013-08-13 20:18:52 +020017 const unsigned char *e;
Gregory P. Smith60d241f2007-10-16 06:31:30 +000018
19 /* Shortcut for single character strings */
Eric Smith6dc46f52009-04-27 20:39:49 +000020 if (len == 1 && Py_ISSPACE(*p))
Gregory P. Smith60d241f2007-10-16 06:31:30 +000021 Py_RETURN_TRUE;
22
23 /* Special case for empty strings */
24 if (len == 0)
25 Py_RETURN_FALSE;
26
27 e = p + len;
28 for (; p < e; p++) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000029 if (!Py_ISSPACE(*p))
Gregory P. Smith60d241f2007-10-16 06:31:30 +000030 Py_RETURN_FALSE;
31 }
32 Py_RETURN_TRUE;
33}
34
35
36PyDoc_STRVAR_shared(_Py_isalpha__doc__,
37"B.isalpha() -> bool\n\
38\n\
39Return True if all characters in B are alphabetic\n\
40and there is at least one character in B, False otherwise.");
41
42PyObject*
43_Py_bytes_isalpha(const char *cptr, Py_ssize_t len)
44{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +020045 const unsigned char *p
Andy Lestere6be9b52020-02-11 20:28:35 -060046 = (const unsigned char *) cptr;
Antoine Pitrou9ed5f272013-08-13 20:18:52 +020047 const unsigned char *e;
Gregory P. Smith60d241f2007-10-16 06:31:30 +000048
49 /* Shortcut for single character strings */
Eric Smith6dc46f52009-04-27 20:39:49 +000050 if (len == 1 && Py_ISALPHA(*p))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000051 Py_RETURN_TRUE;
Gregory P. Smith60d241f2007-10-16 06:31:30 +000052
53 /* Special case for empty strings */
54 if (len == 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000055 Py_RETURN_FALSE;
Gregory P. Smith60d241f2007-10-16 06:31:30 +000056
57 e = p + len;
58 for (; p < e; p++) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000059 if (!Py_ISALPHA(*p))
60 Py_RETURN_FALSE;
Gregory P. Smith60d241f2007-10-16 06:31:30 +000061 }
62 Py_RETURN_TRUE;
63}
64
65
66PyDoc_STRVAR_shared(_Py_isalnum__doc__,
67"B.isalnum() -> bool\n\
68\n\
69Return True if all characters in B are alphanumeric\n\
70and there is at least one character in B, False otherwise.");
71
72PyObject*
73_Py_bytes_isalnum(const char *cptr, Py_ssize_t len)
74{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +020075 const unsigned char *p
Andy Lestere6be9b52020-02-11 20:28:35 -060076 = (const unsigned char *) cptr;
Antoine Pitrou9ed5f272013-08-13 20:18:52 +020077 const unsigned char *e;
Gregory P. Smith60d241f2007-10-16 06:31:30 +000078
79 /* Shortcut for single character strings */
Eric Smith6dc46f52009-04-27 20:39:49 +000080 if (len == 1 && Py_ISALNUM(*p))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000081 Py_RETURN_TRUE;
Gregory P. Smith60d241f2007-10-16 06:31:30 +000082
83 /* Special case for empty strings */
84 if (len == 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000085 Py_RETURN_FALSE;
Gregory P. Smith60d241f2007-10-16 06:31:30 +000086
87 e = p + len;
88 for (; p < e; p++) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000089 if (!Py_ISALNUM(*p))
90 Py_RETURN_FALSE;
Gregory P. Smith60d241f2007-10-16 06:31:30 +000091 }
92 Py_RETURN_TRUE;
93}
94
95
INADA Naokia49ac992018-01-27 14:06:21 +090096PyDoc_STRVAR_shared(_Py_isascii__doc__,
97"B.isascii() -> bool\n\
98\n\
99Return True if B is empty or all characters in B are ASCII,\n\
100False otherwise.");
101
INADA Naokibea57062018-01-28 09:59:12 +0900102// Optimization is copied from ascii_decode in unicodeobject.c
Ma Lina0c603c2020-10-18 22:48:38 +0800103/* Mask to quickly check whether a C 'size_t' contains a
INADA Naokibea57062018-01-28 09:59:12 +0900104 non-ASCII, UTF8-encoded char. */
Ma Lina0c603c2020-10-18 22:48:38 +0800105#if (SIZEOF_SIZE_T == 8)
106# define ASCII_CHAR_MASK 0x8080808080808080ULL
107#elif (SIZEOF_SIZE_T == 4)
108# define ASCII_CHAR_MASK 0x80808080U
INADA Naokibea57062018-01-28 09:59:12 +0900109#else
Ma Lina0c603c2020-10-18 22:48:38 +0800110# error C 'size_t' size should be either 4 or 8!
INADA Naokibea57062018-01-28 09:59:12 +0900111#endif
112
INADA Naokia49ac992018-01-27 14:06:21 +0900113PyObject*
114_Py_bytes_isascii(const char *cptr, Py_ssize_t len)
115{
INADA Naokibea57062018-01-28 09:59:12 +0900116 const char *p = cptr;
117 const char *end = p + len;
Ma Lina0c603c2020-10-18 22:48:38 +0800118 const char *aligned_end = (const char *) _Py_ALIGN_DOWN(end, SIZEOF_SIZE_T);
INADA Naokibea57062018-01-28 09:59:12 +0900119
120 while (p < end) {
121 /* Fast path, see in STRINGLIB(utf8_decode) in stringlib/codecs.h
122 for an explanation. */
Ma Lina0c603c2020-10-18 22:48:38 +0800123 if (_Py_IS_ALIGNED(p, SIZEOF_SIZE_T)) {
INADA Naokibea57062018-01-28 09:59:12 +0900124 /* Help allocation */
125 const char *_p = p;
126 while (_p < aligned_end) {
Ma Lina0c603c2020-10-18 22:48:38 +0800127 size_t value = *(const size_t *) _p;
INADA Naokibea57062018-01-28 09:59:12 +0900128 if (value & ASCII_CHAR_MASK) {
129 Py_RETURN_FALSE;
130 }
Ma Lina0c603c2020-10-18 22:48:38 +0800131 _p += SIZEOF_SIZE_T;
INADA Naokibea57062018-01-28 09:59:12 +0900132 }
133 p = _p;
134 if (_p == end)
135 break;
136 }
137 if ((unsigned char)*p & 0x80) {
INADA Naokia49ac992018-01-27 14:06:21 +0900138 Py_RETURN_FALSE;
139 }
INADA Naokibea57062018-01-28 09:59:12 +0900140 p++;
INADA Naokia49ac992018-01-27 14:06:21 +0900141 }
142 Py_RETURN_TRUE;
143}
144
INADA Naokibea57062018-01-28 09:59:12 +0900145#undef ASCII_CHAR_MASK
146
INADA Naokia49ac992018-01-27 14:06:21 +0900147
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000148PyDoc_STRVAR_shared(_Py_isdigit__doc__,
149"B.isdigit() -> bool\n\
150\n\
151Return True if all characters in B are digits\n\
152and there is at least one character in B, False otherwise.");
153
154PyObject*
155_Py_bytes_isdigit(const char *cptr, Py_ssize_t len)
156{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +0200157 const unsigned char *p
Andy Lestere6be9b52020-02-11 20:28:35 -0600158 = (const unsigned char *) cptr;
Antoine Pitrou9ed5f272013-08-13 20:18:52 +0200159 const unsigned char *e;
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000160
161 /* Shortcut for single character strings */
Eric Smith6dc46f52009-04-27 20:39:49 +0000162 if (len == 1 && Py_ISDIGIT(*p))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000163 Py_RETURN_TRUE;
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000164
165 /* Special case for empty strings */
166 if (len == 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000167 Py_RETURN_FALSE;
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000168
169 e = p + len;
170 for (; p < e; p++) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000171 if (!Py_ISDIGIT(*p))
172 Py_RETURN_FALSE;
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000173 }
174 Py_RETURN_TRUE;
175}
176
177
178PyDoc_STRVAR_shared(_Py_islower__doc__,
179"B.islower() -> bool\n\
180\n\
181Return True if all cased characters in B are lowercase and there is\n\
182at least one cased character in B, False otherwise.");
183
184PyObject*
185_Py_bytes_islower(const char *cptr, Py_ssize_t len)
186{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +0200187 const unsigned char *p
Andy Lestere6be9b52020-02-11 20:28:35 -0600188 = (const unsigned char *) cptr;
Antoine Pitrou9ed5f272013-08-13 20:18:52 +0200189 const unsigned char *e;
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000190 int cased;
191
192 /* Shortcut for single character strings */
193 if (len == 1)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000194 return PyBool_FromLong(Py_ISLOWER(*p));
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000195
196 /* Special case for empty strings */
197 if (len == 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000198 Py_RETURN_FALSE;
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000199
200 e = p + len;
201 cased = 0;
202 for (; p < e; p++) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000203 if (Py_ISUPPER(*p))
204 Py_RETURN_FALSE;
205 else if (!cased && Py_ISLOWER(*p))
206 cased = 1;
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000207 }
208 return PyBool_FromLong(cased);
209}
210
211
212PyDoc_STRVAR_shared(_Py_isupper__doc__,
213"B.isupper() -> bool\n\
214\n\
215Return True if all cased characters in B are uppercase and there is\n\
216at least one cased character in B, False otherwise.");
217
218PyObject*
219_Py_bytes_isupper(const char *cptr, Py_ssize_t len)
220{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +0200221 const unsigned char *p
Andy Lestere6be9b52020-02-11 20:28:35 -0600222 = (const unsigned char *) cptr;
Antoine Pitrou9ed5f272013-08-13 20:18:52 +0200223 const unsigned char *e;
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000224 int cased;
225
226 /* Shortcut for single character strings */
227 if (len == 1)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000228 return PyBool_FromLong(Py_ISUPPER(*p));
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000229
230 /* Special case for empty strings */
231 if (len == 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000232 Py_RETURN_FALSE;
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000233
234 e = p + len;
235 cased = 0;
236 for (; p < e; p++) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000237 if (Py_ISLOWER(*p))
238 Py_RETURN_FALSE;
239 else if (!cased && Py_ISUPPER(*p))
240 cased = 1;
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000241 }
242 return PyBool_FromLong(cased);
243}
244
245
246PyDoc_STRVAR_shared(_Py_istitle__doc__,
247"B.istitle() -> bool\n\
248\n\
249Return True if B is a titlecased string and there is at least one\n\
250character in B, i.e. uppercase characters may only follow uncased\n\
251characters and lowercase characters only cased ones. Return False\n\
252otherwise.");
253
254PyObject*
255_Py_bytes_istitle(const char *cptr, Py_ssize_t len)
256{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +0200257 const unsigned char *p
Andy Lestere6be9b52020-02-11 20:28:35 -0600258 = (const unsigned char *) cptr;
Antoine Pitrou9ed5f272013-08-13 20:18:52 +0200259 const unsigned char *e;
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000260 int cased, previous_is_cased;
261
262 /* Shortcut for single character strings */
263 if (len == 1)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000264 return PyBool_FromLong(Py_ISUPPER(*p));
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000265
266 /* Special case for empty strings */
267 if (len == 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000268 Py_RETURN_FALSE;
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000269
270 e = p + len;
271 cased = 0;
272 previous_is_cased = 0;
273 for (; p < e; p++) {
Antoine Pitrou9ed5f272013-08-13 20:18:52 +0200274 const unsigned char ch = *p;
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000275
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000276 if (Py_ISUPPER(ch)) {
277 if (previous_is_cased)
278 Py_RETURN_FALSE;
279 previous_is_cased = 1;
280 cased = 1;
281 }
282 else if (Py_ISLOWER(ch)) {
283 if (!previous_is_cased)
284 Py_RETURN_FALSE;
285 previous_is_cased = 1;
286 cased = 1;
287 }
288 else
289 previous_is_cased = 0;
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000290 }
291 return PyBool_FromLong(cased);
292}
293
294
295PyDoc_STRVAR_shared(_Py_lower__doc__,
296"B.lower() -> copy of B\n\
297\n\
298Return a copy of B with all ASCII characters converted to lowercase.");
299
300void
301_Py_bytes_lower(char *result, const char *cptr, Py_ssize_t len)
302{
Antoine Pitrou9b491922010-08-15 17:38:46 +0000303 Py_ssize_t i;
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000304
Antoine Pitrou9b491922010-08-15 17:38:46 +0000305 for (i = 0; i < len; i++) {
Antoine Pitrou94f6fa62012-01-08 16:22:46 +0100306 result[i] = Py_TOLOWER((unsigned char) cptr[i]);
Antoine Pitrou9b491922010-08-15 17:38:46 +0000307 }
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000308}
309
310
311PyDoc_STRVAR_shared(_Py_upper__doc__,
312"B.upper() -> copy of B\n\
313\n\
314Return a copy of B with all ASCII characters converted to uppercase.");
315
316void
317_Py_bytes_upper(char *result, const char *cptr, Py_ssize_t len)
318{
Antoine Pitrou9b491922010-08-15 17:38:46 +0000319 Py_ssize_t i;
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000320
Antoine Pitrou9b491922010-08-15 17:38:46 +0000321 for (i = 0; i < len; i++) {
Antoine Pitrou94f6fa62012-01-08 16:22:46 +0100322 result[i] = Py_TOUPPER((unsigned char) cptr[i]);
Antoine Pitrou9b491922010-08-15 17:38:46 +0000323 }
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000324}
325
326
327PyDoc_STRVAR_shared(_Py_title__doc__,
328"B.title() -> copy of B\n\
329\n\
330Return a titlecased version of B, i.e. ASCII words start with uppercase\n\
331characters, all remaining cased characters have lowercase.");
332
333void
Serhiy Storchakaef1585e2015-12-25 20:01:53 +0200334_Py_bytes_title(char *result, const char *s, Py_ssize_t len)
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000335{
Antoine Pitrou9b491922010-08-15 17:38:46 +0000336 Py_ssize_t i;
337 int previous_is_cased = 0;
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000338
Antoine Pitrou9b491922010-08-15 17:38:46 +0000339 for (i = 0; i < len; i++) {
340 int c = Py_CHARMASK(*s++);
341 if (Py_ISLOWER(c)) {
342 if (!previous_is_cased)
343 c = Py_TOUPPER(c);
344 previous_is_cased = 1;
345 } else if (Py_ISUPPER(c)) {
346 if (previous_is_cased)
347 c = Py_TOLOWER(c);
348 previous_is_cased = 1;
349 } else
350 previous_is_cased = 0;
351 *result++ = c;
352 }
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000353}
354
355
356PyDoc_STRVAR_shared(_Py_capitalize__doc__,
357"B.capitalize() -> copy of B\n\
358\n\
Senthil Kumarane51ee8a2010-07-05 12:00:56 +0000359Return a copy of B with only its first character capitalized (ASCII)\n\
360and the rest lower-cased.");
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000361
362void
Serhiy Storchakaef1585e2015-12-25 20:01:53 +0200363_Py_bytes_capitalize(char *result, const char *s, Py_ssize_t len)
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000364{
Sergey Fedoseev593bb302018-09-07 09:54:49 +0500365 if (len > 0) {
366 *result = Py_TOUPPER(*s);
367 _Py_bytes_lower(result + 1, s + 1, len - 1);
Antoine Pitrou9b491922010-08-15 17:38:46 +0000368 }
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000369}
370
371
372PyDoc_STRVAR_shared(_Py_swapcase__doc__,
373"B.swapcase() -> copy of B\n\
374\n\
375Return a copy of B with uppercase ASCII characters converted\n\
376to lowercase ASCII and vice versa.");
377
378void
Serhiy Storchakaef1585e2015-12-25 20:01:53 +0200379_Py_bytes_swapcase(char *result, const char *s, Py_ssize_t len)
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000380{
Antoine Pitrou9b491922010-08-15 17:38:46 +0000381 Py_ssize_t i;
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000382
Antoine Pitrou9b491922010-08-15 17:38:46 +0000383 for (i = 0; i < len; i++) {
384 int c = Py_CHARMASK(*s++);
385 if (Py_ISLOWER(c)) {
386 *result = Py_TOUPPER(c);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000387 }
Antoine Pitrou9b491922010-08-15 17:38:46 +0000388 else if (Py_ISUPPER(c)) {
389 *result = Py_TOLOWER(c);
390 }
391 else
392 *result = c;
393 result++;
394 }
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000395}
396
Georg Brandlabc38772009-04-12 15:51:51 +0000397
398PyDoc_STRVAR_shared(_Py_maketrans__doc__,
399"B.maketrans(frm, to) -> translation table\n\
400\n\
Senthil Kumaran84e3ccc2011-06-27 09:06:45 -0700401Return a translation table (a bytes object of length 256) suitable\n\
402for use in the bytes or bytearray translate method where each byte\n\
403in frm is mapped to the byte at the same position in to.\n\
404The bytes objects frm and to must be of the same length.");
Georg Brandlabc38772009-04-12 15:51:51 +0000405
Georg Brandlabc38772009-04-12 15:51:51 +0000406PyObject *
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +0200407_Py_bytes_maketrans(Py_buffer *frm, Py_buffer *to)
Georg Brandlabc38772009-04-12 15:51:51 +0000408{
Martin v. Löwis7252a6e2014-07-27 16:25:09 +0200409 PyObject *res = NULL;
Antoine Pitrou9b491922010-08-15 17:38:46 +0000410 Py_ssize_t i;
411 char *p;
Georg Brandlabc38772009-04-12 15:51:51 +0000412
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +0200413 if (frm->len != to->len) {
Antoine Pitrou9b491922010-08-15 17:38:46 +0000414 PyErr_Format(PyExc_ValueError,
415 "maketrans arguments must have same length");
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +0200416 return NULL;
Antoine Pitrou9b491922010-08-15 17:38:46 +0000417 }
418 res = PyBytes_FromStringAndSize(NULL, 256);
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +0200419 if (!res)
420 return NULL;
Antoine Pitrou9b491922010-08-15 17:38:46 +0000421 p = PyBytes_AS_STRING(res);
422 for (i = 0; i < 256; i++)
Antoine Pitrou47019e52010-08-15 17:41:31 +0000423 p[i] = (char) i;
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +0200424 for (i = 0; i < frm->len; i++) {
425 p[((unsigned char *)frm->buf)[i]] = ((char *)to->buf)[i];
Antoine Pitrou9b491922010-08-15 17:38:46 +0000426 }
Georg Brandlabc38772009-04-12 15:51:51 +0000427
Antoine Pitrou9b491922010-08-15 17:38:46 +0000428 return res;
Georg Brandlabc38772009-04-12 15:51:51 +0000429}
Serhiy Storchakadd40fc32016-05-04 22:23:26 +0300430
431#define FASTSEARCH fastsearch
432#define STRINGLIB(F) stringlib_##F
433#define STRINGLIB_CHAR char
434#define STRINGLIB_SIZEOF_CHAR 1
435
436#include "stringlib/fastsearch.h"
437#include "stringlib/count.h"
438#include "stringlib/find.h"
439
440/*
Oren Milman00425102017-03-13 00:37:05 +0200441Wraps stringlib_parse_args_finds() and additionally checks the first
442argument type.
Serhiy Storchakadd40fc32016-05-04 22:23:26 +0300443
Oren Milman00425102017-03-13 00:37:05 +0200444In case the first argument is a bytes-like object, sets it to subobj,
445and doesn't touch the byte parameter.
446In case it is an integer in range(0, 256), writes the integer value
447to byte, and sets subobj to NULL.
448
449The other parameters are similar to those of
Serhiy Storchakadd40fc32016-05-04 22:23:26 +0300450stringlib_parse_args_finds().
451*/
452
453Py_LOCAL_INLINE(int)
454parse_args_finds_byte(const char *function_name, PyObject *args,
455 PyObject **subobj, char *byte,
456 Py_ssize_t *start, Py_ssize_t *end)
457{
458 PyObject *tmp_subobj;
459 Py_ssize_t ival;
Serhiy Storchakadd40fc32016-05-04 22:23:26 +0300460
461 if(!stringlib_parse_args_finds(function_name, args, &tmp_subobj,
462 start, end))
463 return 0;
464
Oren Milman00425102017-03-13 00:37:05 +0200465 if (PyObject_CheckBuffer(tmp_subobj)) {
Serhiy Storchakadd40fc32016-05-04 22:23:26 +0300466 *subobj = tmp_subobj;
467 return 1;
468 }
469
Victor Stinnera15e2602020-04-08 02:01:56 +0200470 if (!_PyIndex_Check(tmp_subobj)) {
Oren Milman00425102017-03-13 00:37:05 +0200471 PyErr_Format(PyExc_TypeError,
472 "argument should be integer or bytes-like object, "
473 "not '%.200s'",
474 Py_TYPE(tmp_subobj)->tp_name);
475 return 0;
Serhiy Storchakadd40fc32016-05-04 22:23:26 +0300476 }
477
Oren Milman00425102017-03-13 00:37:05 +0200478 ival = PyNumber_AsSsize_t(tmp_subobj, NULL);
479 if (ival == -1 && PyErr_Occurred()) {
480 return 0;
481 }
Serhiy Storchakadd40fc32016-05-04 22:23:26 +0300482 if (ival < 0 || ival > 255) {
483 PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
484 return 0;
485 }
486
487 *subobj = NULL;
488 *byte = (char)ival;
489 return 1;
490}
491
492/* helper macro to fixup start/end slice values */
493#define ADJUST_INDICES(start, end, len) \
494 if (end > len) \
495 end = len; \
496 else if (end < 0) { \
497 end += len; \
498 if (end < 0) \
499 end = 0; \
500 } \
501 if (start < 0) { \
502 start += len; \
503 if (start < 0) \
504 start = 0; \
505 }
506
507Py_LOCAL_INLINE(Py_ssize_t)
508find_internal(const char *str, Py_ssize_t len,
509 const char *function_name, PyObject *args, int dir)
510{
511 PyObject *subobj;
512 char byte;
513 Py_buffer subbuf;
514 const char *sub;
515 Py_ssize_t sub_len;
516 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
517 Py_ssize_t res;
518
519 if (!parse_args_finds_byte(function_name, args,
520 &subobj, &byte, &start, &end))
521 return -2;
522
523 if (subobj) {
524 if (PyObject_GetBuffer(subobj, &subbuf, PyBUF_SIMPLE) != 0)
525 return -2;
526
527 sub = subbuf.buf;
528 sub_len = subbuf.len;
529 }
530 else {
531 sub = &byte;
532 sub_len = 1;
533 }
534
535 ADJUST_INDICES(start, end, len);
536 if (end - start < sub_len)
537 res = -1;
538 else if (sub_len == 1) {
539 if (dir > 0)
540 res = stringlib_find_char(
541 str + start, end - start,
542 *sub);
543 else
544 res = stringlib_rfind_char(
545 str + start, end - start,
546 *sub);
547 if (res >= 0)
548 res += start;
549 }
550 else {
551 if (dir > 0)
552 res = stringlib_find_slice(
553 str, len,
554 sub, sub_len, start, end);
555 else
556 res = stringlib_rfind_slice(
557 str, len,
558 sub, sub_len, start, end);
559 }
560
561 if (subobj)
562 PyBuffer_Release(&subbuf);
563
564 return res;
565}
566
567PyDoc_STRVAR_shared(_Py_find__doc__,
568"B.find(sub[, start[, end]]) -> int\n\
569\n\
570Return the lowest index in B where subsection sub is found,\n\
571such that sub is contained within B[start,end]. Optional\n\
572arguments start and end are interpreted as in slice notation.\n\
573\n\
574Return -1 on failure.");
575
576PyObject *
577_Py_bytes_find(const char *str, Py_ssize_t len, PyObject *args)
578{
579 Py_ssize_t result = find_internal(str, len, "find", args, +1);
580 if (result == -2)
581 return NULL;
582 return PyLong_FromSsize_t(result);
583}
584
585PyDoc_STRVAR_shared(_Py_index__doc__,
586"B.index(sub[, start[, end]]) -> int\n\
587\n\
Lisa Roach43ba8862017-04-04 22:36:22 -0700588Return the lowest index in B where subsection sub is found,\n\
589such that sub is contained within B[start,end]. Optional\n\
590arguments start and end are interpreted as in slice notation.\n\
591\n\
592Raises ValueError when the subsection is not found.");
Serhiy Storchakadd40fc32016-05-04 22:23:26 +0300593
594PyObject *
595_Py_bytes_index(const char *str, Py_ssize_t len, PyObject *args)
596{
597 Py_ssize_t result = find_internal(str, len, "index", args, +1);
598 if (result == -2)
599 return NULL;
600 if (result == -1) {
601 PyErr_SetString(PyExc_ValueError,
602 "subsection not found");
603 return NULL;
604 }
605 return PyLong_FromSsize_t(result);
606}
607
608PyDoc_STRVAR_shared(_Py_rfind__doc__,
609"B.rfind(sub[, start[, end]]) -> int\n\
610\n\
611Return the highest index in B where subsection sub is found,\n\
612such that sub is contained within B[start,end]. Optional\n\
613arguments start and end are interpreted as in slice notation.\n\
614\n\
615Return -1 on failure.");
616
617PyObject *
618_Py_bytes_rfind(const char *str, Py_ssize_t len, PyObject *args)
619{
620 Py_ssize_t result = find_internal(str, len, "rfind", args, -1);
621 if (result == -2)
622 return NULL;
623 return PyLong_FromSsize_t(result);
624}
625
626PyDoc_STRVAR_shared(_Py_rindex__doc__,
627"B.rindex(sub[, start[, end]]) -> int\n\
628\n\
Lisa Roach43ba8862017-04-04 22:36:22 -0700629Return the highest index in B where subsection sub is found,\n\
630such that sub is contained within B[start,end]. Optional\n\
631arguments start and end are interpreted as in slice notation.\n\
632\n\
633Raise ValueError when the subsection is not found.");
Serhiy Storchakadd40fc32016-05-04 22:23:26 +0300634
635PyObject *
636_Py_bytes_rindex(const char *str, Py_ssize_t len, PyObject *args)
637{
638 Py_ssize_t result = find_internal(str, len, "rindex", args, -1);
639 if (result == -2)
640 return NULL;
641 if (result == -1) {
642 PyErr_SetString(PyExc_ValueError,
643 "subsection not found");
644 return NULL;
645 }
646 return PyLong_FromSsize_t(result);
647}
648
649PyDoc_STRVAR_shared(_Py_count__doc__,
650"B.count(sub[, start[, end]]) -> int\n\
651\n\
652Return the number of non-overlapping occurrences of subsection sub in\n\
653bytes B[start:end]. Optional arguments start and end are interpreted\n\
654as in slice notation.");
655
656PyObject *
657_Py_bytes_count(const char *str, Py_ssize_t len, PyObject *args)
658{
659 PyObject *sub_obj;
660 const char *sub;
661 Py_ssize_t sub_len;
662 char byte;
663 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
664
665 Py_buffer vsub;
666 PyObject *count_obj;
667
668 if (!parse_args_finds_byte("count", args,
669 &sub_obj, &byte, &start, &end))
670 return NULL;
671
672 if (sub_obj) {
673 if (PyObject_GetBuffer(sub_obj, &vsub, PyBUF_SIMPLE) != 0)
674 return NULL;
675
676 sub = vsub.buf;
677 sub_len = vsub.len;
678 }
679 else {
680 sub = &byte;
681 sub_len = 1;
682 }
683
684 ADJUST_INDICES(start, end, len);
685
686 count_obj = PyLong_FromSsize_t(
687 stringlib_count(str + start, end - start, sub, sub_len, PY_SSIZE_T_MAX)
688 );
689
690 if (sub_obj)
691 PyBuffer_Release(&vsub);
692
693 return count_obj;
694}
695
696int
697_Py_bytes_contains(const char *str, Py_ssize_t len, PyObject *arg)
698{
Serhiy Storchakaf9efb8b2016-07-10 12:37:30 +0300699 Py_ssize_t ival = PyNumber_AsSsize_t(arg, NULL);
Serhiy Storchakadd40fc32016-05-04 22:23:26 +0300700 if (ival == -1 && PyErr_Occurred()) {
701 Py_buffer varg;
702 Py_ssize_t pos;
703 PyErr_Clear();
704 if (PyObject_GetBuffer(arg, &varg, PyBUF_SIMPLE) != 0)
705 return -1;
706 pos = stringlib_find(str, len,
707 varg.buf, varg.len, 0);
708 PyBuffer_Release(&varg);
709 return pos >= 0;
710 }
711 if (ival < 0 || ival >= 256) {
712 PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
713 return -1;
714 }
715
716 return memchr(str, (int) ival, len) != NULL;
717}
718
719
720/* Matches the end (direction >= 0) or start (direction < 0) of the buffer
721 * against substr, using the start and end arguments. Returns
722 * -1 on error, 0 if not found and 1 if found.
723 */
Benjamin Peterson621b4302016-09-09 13:54:34 -0700724static int
Serhiy Storchakadd40fc32016-05-04 22:23:26 +0300725tailmatch(const char *str, Py_ssize_t len, PyObject *substr,
726 Py_ssize_t start, Py_ssize_t end, int direction)
727{
728 Py_buffer sub_view = {NULL, NULL};
729 const char *sub;
730 Py_ssize_t slen;
731
732 if (PyBytes_Check(substr)) {
733 sub = PyBytes_AS_STRING(substr);
734 slen = PyBytes_GET_SIZE(substr);
735 }
736 else {
737 if (PyObject_GetBuffer(substr, &sub_view, PyBUF_SIMPLE) != 0)
738 return -1;
739 sub = sub_view.buf;
740 slen = sub_view.len;
741 }
742
743 ADJUST_INDICES(start, end, len);
744
745 if (direction < 0) {
746 /* startswith */
Hai Shi24ddd9c2019-10-06 20:17:18 +0800747 if (start > len - slen)
Serhiy Storchakadd40fc32016-05-04 22:23:26 +0300748 goto notfound;
749 } else {
750 /* endswith */
751 if (end - start < slen || start > len)
752 goto notfound;
753
754 if (end - slen > start)
755 start = end - slen;
756 }
757 if (end - start < slen)
758 goto notfound;
759 if (memcmp(str + start, sub, slen) != 0)
760 goto notfound;
761
762 PyBuffer_Release(&sub_view);
763 return 1;
764
765notfound:
766 PyBuffer_Release(&sub_view);
767 return 0;
768}
769
Benjamin Peterson621b4302016-09-09 13:54:34 -0700770static PyObject *
Serhiy Storchakadd40fc32016-05-04 22:23:26 +0300771_Py_bytes_tailmatch(const char *str, Py_ssize_t len,
772 const char *function_name, PyObject *args,
773 int direction)
774{
775 Py_ssize_t start = 0;
776 Py_ssize_t end = PY_SSIZE_T_MAX;
777 PyObject *subobj;
778 int result;
779
780 if (!stringlib_parse_args_finds(function_name, args, &subobj, &start, &end))
781 return NULL;
782 if (PyTuple_Check(subobj)) {
783 Py_ssize_t i;
784 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
785 result = tailmatch(str, len, PyTuple_GET_ITEM(subobj, i),
786 start, end, direction);
787 if (result == -1)
788 return NULL;
789 else if (result) {
790 Py_RETURN_TRUE;
791 }
792 }
793 Py_RETURN_FALSE;
794 }
795 result = tailmatch(str, len, subobj, start, end, direction);
796 if (result == -1) {
797 if (PyErr_ExceptionMatches(PyExc_TypeError))
798 PyErr_Format(PyExc_TypeError,
799 "%s first arg must be bytes or a tuple of bytes, "
800 "not %s",
801 function_name, Py_TYPE(subobj)->tp_name);
802 return NULL;
803 }
804 else
805 return PyBool_FromLong(result);
806}
807
808PyDoc_STRVAR_shared(_Py_startswith__doc__,
809"B.startswith(prefix[, start[, end]]) -> bool\n\
810\n\
811Return True if B starts with the specified prefix, False otherwise.\n\
812With optional start, test B beginning at that position.\n\
813With optional end, stop comparing B at that position.\n\
814prefix can also be a tuple of bytes to try.");
815
816PyObject *
817_Py_bytes_startswith(const char *str, Py_ssize_t len, PyObject *args)
818{
819 return _Py_bytes_tailmatch(str, len, "startswith", args, -1);
820}
821
822PyDoc_STRVAR_shared(_Py_endswith__doc__,
823"B.endswith(suffix[, start[, end]]) -> bool\n\
824\n\
825Return True if B ends with the specified suffix, False otherwise.\n\
826With optional start, test B beginning at that position.\n\
827With optional end, stop comparing B at that position.\n\
828suffix can also be a tuple of bytes to try.");
829
830PyObject *
831_Py_bytes_endswith(const char *str, Py_ssize_t len, PyObject *args)
832{
833 return _Py_bytes_tailmatch(str, len, "endswith", args, +1);
834}