blob: db030be4fe7561600e8c968759e073981bc8d776 [file] [log] [blame]
Serhiy Storchakaab8bcb32016-07-03 13:26:52 +03001#define PY_SSIZE_T_CLEAN
Gregory P. Smith60d241f2007-10-16 06:31:30 +00002#include "Python.h"
3#include "bytes_methods.h"
4
Gregory P. Smith60d241f2007-10-16 06:31:30 +00005PyDoc_STRVAR_shared(_Py_isspace__doc__,
6"B.isspace() -> bool\n\
7\n\
8Return True if all characters in B are whitespace\n\
9and there is at least one character in B, False otherwise.");
10
11PyObject*
12_Py_bytes_isspace(const char *cptr, Py_ssize_t len)
13{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +020014 const unsigned char *p
Andy Lestere6be9b52020-02-11 20:28:35 -060015 = (const unsigned char *) cptr;
Antoine Pitrou9ed5f272013-08-13 20:18:52 +020016 const unsigned char *e;
Gregory P. Smith60d241f2007-10-16 06:31:30 +000017
18 /* Shortcut for single character strings */
Eric Smith6dc46f52009-04-27 20:39:49 +000019 if (len == 1 && Py_ISSPACE(*p))
Gregory P. Smith60d241f2007-10-16 06:31:30 +000020 Py_RETURN_TRUE;
21
22 /* Special case for empty strings */
23 if (len == 0)
24 Py_RETURN_FALSE;
25
26 e = p + len;
27 for (; p < e; p++) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000028 if (!Py_ISSPACE(*p))
Gregory P. Smith60d241f2007-10-16 06:31:30 +000029 Py_RETURN_FALSE;
30 }
31 Py_RETURN_TRUE;
32}
33
34
35PyDoc_STRVAR_shared(_Py_isalpha__doc__,
36"B.isalpha() -> bool\n\
37\n\
38Return True if all characters in B are alphabetic\n\
39and there is at least one character in B, False otherwise.");
40
41PyObject*
42_Py_bytes_isalpha(const char *cptr, Py_ssize_t len)
43{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +020044 const unsigned char *p
Andy Lestere6be9b52020-02-11 20:28:35 -060045 = (const unsigned char *) cptr;
Antoine Pitrou9ed5f272013-08-13 20:18:52 +020046 const unsigned char *e;
Gregory P. Smith60d241f2007-10-16 06:31:30 +000047
48 /* Shortcut for single character strings */
Eric Smith6dc46f52009-04-27 20:39:49 +000049 if (len == 1 && Py_ISALPHA(*p))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000050 Py_RETURN_TRUE;
Gregory P. Smith60d241f2007-10-16 06:31:30 +000051
52 /* Special case for empty strings */
53 if (len == 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000054 Py_RETURN_FALSE;
Gregory P. Smith60d241f2007-10-16 06:31:30 +000055
56 e = p + len;
57 for (; p < e; p++) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000058 if (!Py_ISALPHA(*p))
59 Py_RETURN_FALSE;
Gregory P. Smith60d241f2007-10-16 06:31:30 +000060 }
61 Py_RETURN_TRUE;
62}
63
64
65PyDoc_STRVAR_shared(_Py_isalnum__doc__,
66"B.isalnum() -> bool\n\
67\n\
68Return True if all characters in B are alphanumeric\n\
69and there is at least one character in B, False otherwise.");
70
71PyObject*
72_Py_bytes_isalnum(const char *cptr, Py_ssize_t len)
73{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +020074 const unsigned char *p
Andy Lestere6be9b52020-02-11 20:28:35 -060075 = (const unsigned char *) cptr;
Antoine Pitrou9ed5f272013-08-13 20:18:52 +020076 const unsigned char *e;
Gregory P. Smith60d241f2007-10-16 06:31:30 +000077
78 /* Shortcut for single character strings */
Eric Smith6dc46f52009-04-27 20:39:49 +000079 if (len == 1 && Py_ISALNUM(*p))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000080 Py_RETURN_TRUE;
Gregory P. Smith60d241f2007-10-16 06:31:30 +000081
82 /* Special case for empty strings */
83 if (len == 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000084 Py_RETURN_FALSE;
Gregory P. Smith60d241f2007-10-16 06:31:30 +000085
86 e = p + len;
87 for (; p < e; p++) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000088 if (!Py_ISALNUM(*p))
89 Py_RETURN_FALSE;
Gregory P. Smith60d241f2007-10-16 06:31:30 +000090 }
91 Py_RETURN_TRUE;
92}
93
94
INADA Naokia49ac992018-01-27 14:06:21 +090095PyDoc_STRVAR_shared(_Py_isascii__doc__,
96"B.isascii() -> bool\n\
97\n\
98Return True if B is empty or all characters in B are ASCII,\n\
99False otherwise.");
100
INADA Naokibea57062018-01-28 09:59:12 +0900101// Optimization is copied from ascii_decode in unicodeobject.c
102/* Mask to quickly check whether a C 'long' contains a
103 non-ASCII, UTF8-encoded char. */
104#if (SIZEOF_LONG == 8)
105# define ASCII_CHAR_MASK 0x8080808080808080UL
106#elif (SIZEOF_LONG == 4)
107# define ASCII_CHAR_MASK 0x80808080UL
108#else
109# error C 'long' size should be either 4 or 8!
110#endif
111
INADA Naokia49ac992018-01-27 14:06:21 +0900112PyObject*
113_Py_bytes_isascii(const char *cptr, Py_ssize_t len)
114{
INADA Naokibea57062018-01-28 09:59:12 +0900115 const char *p = cptr;
116 const char *end = p + len;
117 const char *aligned_end = (const char *) _Py_ALIGN_DOWN(end, SIZEOF_LONG);
118
119 while (p < end) {
120 /* Fast path, see in STRINGLIB(utf8_decode) in stringlib/codecs.h
121 for an explanation. */
122 if (_Py_IS_ALIGNED(p, SIZEOF_LONG)) {
123 /* Help allocation */
124 const char *_p = p;
125 while (_p < aligned_end) {
Andy Lestere6be9b52020-02-11 20:28:35 -0600126 unsigned long value = *(const unsigned long *) _p;
INADA Naokibea57062018-01-28 09:59:12 +0900127 if (value & ASCII_CHAR_MASK) {
128 Py_RETURN_FALSE;
129 }
130 _p += SIZEOF_LONG;
131 }
132 p = _p;
133 if (_p == end)
134 break;
135 }
136 if ((unsigned char)*p & 0x80) {
INADA Naokia49ac992018-01-27 14:06:21 +0900137 Py_RETURN_FALSE;
138 }
INADA Naokibea57062018-01-28 09:59:12 +0900139 p++;
INADA Naokia49ac992018-01-27 14:06:21 +0900140 }
141 Py_RETURN_TRUE;
142}
143
INADA Naokibea57062018-01-28 09:59:12 +0900144#undef ASCII_CHAR_MASK
145
INADA Naokia49ac992018-01-27 14:06:21 +0900146
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000147PyDoc_STRVAR_shared(_Py_isdigit__doc__,
148"B.isdigit() -> bool\n\
149\n\
150Return True if all characters in B are digits\n\
151and there is at least one character in B, False otherwise.");
152
153PyObject*
154_Py_bytes_isdigit(const char *cptr, Py_ssize_t len)
155{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +0200156 const unsigned char *p
Andy Lestere6be9b52020-02-11 20:28:35 -0600157 = (const unsigned char *) cptr;
Antoine Pitrou9ed5f272013-08-13 20:18:52 +0200158 const unsigned char *e;
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000159
160 /* Shortcut for single character strings */
Eric Smith6dc46f52009-04-27 20:39:49 +0000161 if (len == 1 && Py_ISDIGIT(*p))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000162 Py_RETURN_TRUE;
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000163
164 /* Special case for empty strings */
165 if (len == 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000166 Py_RETURN_FALSE;
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000167
168 e = p + len;
169 for (; p < e; p++) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000170 if (!Py_ISDIGIT(*p))
171 Py_RETURN_FALSE;
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000172 }
173 Py_RETURN_TRUE;
174}
175
176
177PyDoc_STRVAR_shared(_Py_islower__doc__,
178"B.islower() -> bool\n\
179\n\
180Return True if all cased characters in B are lowercase and there is\n\
181at least one cased character in B, False otherwise.");
182
183PyObject*
184_Py_bytes_islower(const char *cptr, Py_ssize_t len)
185{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +0200186 const unsigned char *p
Andy Lestere6be9b52020-02-11 20:28:35 -0600187 = (const unsigned char *) cptr;
Antoine Pitrou9ed5f272013-08-13 20:18:52 +0200188 const unsigned char *e;
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000189 int cased;
190
191 /* Shortcut for single character strings */
192 if (len == 1)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000193 return PyBool_FromLong(Py_ISLOWER(*p));
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000194
195 /* Special case for empty strings */
196 if (len == 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000197 Py_RETURN_FALSE;
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000198
199 e = p + len;
200 cased = 0;
201 for (; p < e; p++) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000202 if (Py_ISUPPER(*p))
203 Py_RETURN_FALSE;
204 else if (!cased && Py_ISLOWER(*p))
205 cased = 1;
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000206 }
207 return PyBool_FromLong(cased);
208}
209
210
211PyDoc_STRVAR_shared(_Py_isupper__doc__,
212"B.isupper() -> bool\n\
213\n\
214Return True if all cased characters in B are uppercase and there is\n\
215at least one cased character in B, False otherwise.");
216
217PyObject*
218_Py_bytes_isupper(const char *cptr, Py_ssize_t len)
219{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +0200220 const unsigned char *p
Andy Lestere6be9b52020-02-11 20:28:35 -0600221 = (const unsigned char *) cptr;
Antoine Pitrou9ed5f272013-08-13 20:18:52 +0200222 const unsigned char *e;
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000223 int cased;
224
225 /* Shortcut for single character strings */
226 if (len == 1)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000227 return PyBool_FromLong(Py_ISUPPER(*p));
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000228
229 /* Special case for empty strings */
230 if (len == 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000231 Py_RETURN_FALSE;
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000232
233 e = p + len;
234 cased = 0;
235 for (; p < e; p++) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000236 if (Py_ISLOWER(*p))
237 Py_RETURN_FALSE;
238 else if (!cased && Py_ISUPPER(*p))
239 cased = 1;
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000240 }
241 return PyBool_FromLong(cased);
242}
243
244
245PyDoc_STRVAR_shared(_Py_istitle__doc__,
246"B.istitle() -> bool\n\
247\n\
248Return True if B is a titlecased string and there is at least one\n\
249character in B, i.e. uppercase characters may only follow uncased\n\
250characters and lowercase characters only cased ones. Return False\n\
251otherwise.");
252
253PyObject*
254_Py_bytes_istitle(const char *cptr, Py_ssize_t len)
255{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +0200256 const unsigned char *p
Andy Lestere6be9b52020-02-11 20:28:35 -0600257 = (const unsigned char *) cptr;
Antoine Pitrou9ed5f272013-08-13 20:18:52 +0200258 const unsigned char *e;
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000259 int cased, previous_is_cased;
260
261 /* Shortcut for single character strings */
262 if (len == 1)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000263 return PyBool_FromLong(Py_ISUPPER(*p));
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000264
265 /* Special case for empty strings */
266 if (len == 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000267 Py_RETURN_FALSE;
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000268
269 e = p + len;
270 cased = 0;
271 previous_is_cased = 0;
272 for (; p < e; p++) {
Antoine Pitrou9ed5f272013-08-13 20:18:52 +0200273 const unsigned char ch = *p;
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000274
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000275 if (Py_ISUPPER(ch)) {
276 if (previous_is_cased)
277 Py_RETURN_FALSE;
278 previous_is_cased = 1;
279 cased = 1;
280 }
281 else if (Py_ISLOWER(ch)) {
282 if (!previous_is_cased)
283 Py_RETURN_FALSE;
284 previous_is_cased = 1;
285 cased = 1;
286 }
287 else
288 previous_is_cased = 0;
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000289 }
290 return PyBool_FromLong(cased);
291}
292
293
294PyDoc_STRVAR_shared(_Py_lower__doc__,
295"B.lower() -> copy of B\n\
296\n\
297Return a copy of B with all ASCII characters converted to lowercase.");
298
299void
300_Py_bytes_lower(char *result, const char *cptr, Py_ssize_t len)
301{
Antoine Pitrou9b491922010-08-15 17:38:46 +0000302 Py_ssize_t i;
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000303
Antoine Pitrou9b491922010-08-15 17:38:46 +0000304 for (i = 0; i < len; i++) {
Antoine Pitrou94f6fa62012-01-08 16:22:46 +0100305 result[i] = Py_TOLOWER((unsigned char) cptr[i]);
Antoine Pitrou9b491922010-08-15 17:38:46 +0000306 }
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000307}
308
309
310PyDoc_STRVAR_shared(_Py_upper__doc__,
311"B.upper() -> copy of B\n\
312\n\
313Return a copy of B with all ASCII characters converted to uppercase.");
314
315void
316_Py_bytes_upper(char *result, const char *cptr, Py_ssize_t len)
317{
Antoine Pitrou9b491922010-08-15 17:38:46 +0000318 Py_ssize_t i;
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000319
Antoine Pitrou9b491922010-08-15 17:38:46 +0000320 for (i = 0; i < len; i++) {
Antoine Pitrou94f6fa62012-01-08 16:22:46 +0100321 result[i] = Py_TOUPPER((unsigned char) cptr[i]);
Antoine Pitrou9b491922010-08-15 17:38:46 +0000322 }
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000323}
324
325
326PyDoc_STRVAR_shared(_Py_title__doc__,
327"B.title() -> copy of B\n\
328\n\
329Return a titlecased version of B, i.e. ASCII words start with uppercase\n\
330characters, all remaining cased characters have lowercase.");
331
332void
Serhiy Storchakaef1585e2015-12-25 20:01:53 +0200333_Py_bytes_title(char *result, const char *s, Py_ssize_t len)
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000334{
Antoine Pitrou9b491922010-08-15 17:38:46 +0000335 Py_ssize_t i;
336 int previous_is_cased = 0;
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000337
Antoine Pitrou9b491922010-08-15 17:38:46 +0000338 for (i = 0; i < len; i++) {
339 int c = Py_CHARMASK(*s++);
340 if (Py_ISLOWER(c)) {
341 if (!previous_is_cased)
342 c = Py_TOUPPER(c);
343 previous_is_cased = 1;
344 } else if (Py_ISUPPER(c)) {
345 if (previous_is_cased)
346 c = Py_TOLOWER(c);
347 previous_is_cased = 1;
348 } else
349 previous_is_cased = 0;
350 *result++ = c;
351 }
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000352}
353
354
355PyDoc_STRVAR_shared(_Py_capitalize__doc__,
356"B.capitalize() -> copy of B\n\
357\n\
Senthil Kumarane51ee8a2010-07-05 12:00:56 +0000358Return a copy of B with only its first character capitalized (ASCII)\n\
359and the rest lower-cased.");
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000360
361void
Serhiy Storchakaef1585e2015-12-25 20:01:53 +0200362_Py_bytes_capitalize(char *result, const char *s, Py_ssize_t len)
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000363{
Sergey Fedoseev593bb302018-09-07 09:54:49 +0500364 if (len > 0) {
365 *result = Py_TOUPPER(*s);
366 _Py_bytes_lower(result + 1, s + 1, len - 1);
Antoine Pitrou9b491922010-08-15 17:38:46 +0000367 }
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000368}
369
370
371PyDoc_STRVAR_shared(_Py_swapcase__doc__,
372"B.swapcase() -> copy of B\n\
373\n\
374Return a copy of B with uppercase ASCII characters converted\n\
375to lowercase ASCII and vice versa.");
376
377void
Serhiy Storchakaef1585e2015-12-25 20:01:53 +0200378_Py_bytes_swapcase(char *result, const char *s, Py_ssize_t len)
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000379{
Antoine Pitrou9b491922010-08-15 17:38:46 +0000380 Py_ssize_t i;
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000381
Antoine Pitrou9b491922010-08-15 17:38:46 +0000382 for (i = 0; i < len; i++) {
383 int c = Py_CHARMASK(*s++);
384 if (Py_ISLOWER(c)) {
385 *result = Py_TOUPPER(c);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000386 }
Antoine Pitrou9b491922010-08-15 17:38:46 +0000387 else if (Py_ISUPPER(c)) {
388 *result = Py_TOLOWER(c);
389 }
390 else
391 *result = c;
392 result++;
393 }
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000394}
395
Georg Brandlabc38772009-04-12 15:51:51 +0000396
397PyDoc_STRVAR_shared(_Py_maketrans__doc__,
398"B.maketrans(frm, to) -> translation table\n\
399\n\
Senthil Kumaran84e3ccc2011-06-27 09:06:45 -0700400Return a translation table (a bytes object of length 256) suitable\n\
401for use in the bytes or bytearray translate method where each byte\n\
402in frm is mapped to the byte at the same position in to.\n\
403The bytes objects frm and to must be of the same length.");
Georg Brandlabc38772009-04-12 15:51:51 +0000404
Georg Brandlabc38772009-04-12 15:51:51 +0000405PyObject *
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +0200406_Py_bytes_maketrans(Py_buffer *frm, Py_buffer *to)
Georg Brandlabc38772009-04-12 15:51:51 +0000407{
Martin v. Löwis7252a6e2014-07-27 16:25:09 +0200408 PyObject *res = NULL;
Antoine Pitrou9b491922010-08-15 17:38:46 +0000409 Py_ssize_t i;
410 char *p;
Georg Brandlabc38772009-04-12 15:51:51 +0000411
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +0200412 if (frm->len != to->len) {
Antoine Pitrou9b491922010-08-15 17:38:46 +0000413 PyErr_Format(PyExc_ValueError,
414 "maketrans arguments must have same length");
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +0200415 return NULL;
Antoine Pitrou9b491922010-08-15 17:38:46 +0000416 }
417 res = PyBytes_FromStringAndSize(NULL, 256);
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +0200418 if (!res)
419 return NULL;
Antoine Pitrou9b491922010-08-15 17:38:46 +0000420 p = PyBytes_AS_STRING(res);
421 for (i = 0; i < 256; i++)
Antoine Pitrou47019e52010-08-15 17:41:31 +0000422 p[i] = (char) i;
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +0200423 for (i = 0; i < frm->len; i++) {
424 p[((unsigned char *)frm->buf)[i]] = ((char *)to->buf)[i];
Antoine Pitrou9b491922010-08-15 17:38:46 +0000425 }
Georg Brandlabc38772009-04-12 15:51:51 +0000426
Antoine Pitrou9b491922010-08-15 17:38:46 +0000427 return res;
Georg Brandlabc38772009-04-12 15:51:51 +0000428}
Serhiy Storchakadd40fc32016-05-04 22:23:26 +0300429
430#define FASTSEARCH fastsearch
431#define STRINGLIB(F) stringlib_##F
432#define STRINGLIB_CHAR char
433#define STRINGLIB_SIZEOF_CHAR 1
434
435#include "stringlib/fastsearch.h"
436#include "stringlib/count.h"
437#include "stringlib/find.h"
438
439/*
Oren Milman00425102017-03-13 00:37:05 +0200440Wraps stringlib_parse_args_finds() and additionally checks the first
441argument type.
Serhiy Storchakadd40fc32016-05-04 22:23:26 +0300442
Oren Milman00425102017-03-13 00:37:05 +0200443In case the first argument is a bytes-like object, sets it to subobj,
444and doesn't touch the byte parameter.
445In case it is an integer in range(0, 256), writes the integer value
446to byte, and sets subobj to NULL.
447
448The other parameters are similar to those of
Serhiy Storchakadd40fc32016-05-04 22:23:26 +0300449stringlib_parse_args_finds().
450*/
451
452Py_LOCAL_INLINE(int)
453parse_args_finds_byte(const char *function_name, PyObject *args,
454 PyObject **subobj, char *byte,
455 Py_ssize_t *start, Py_ssize_t *end)
456{
457 PyObject *tmp_subobj;
458 Py_ssize_t ival;
Serhiy Storchakadd40fc32016-05-04 22:23:26 +0300459
460 if(!stringlib_parse_args_finds(function_name, args, &tmp_subobj,
461 start, end))
462 return 0;
463
Oren Milman00425102017-03-13 00:37:05 +0200464 if (PyObject_CheckBuffer(tmp_subobj)) {
Serhiy Storchakadd40fc32016-05-04 22:23:26 +0300465 *subobj = tmp_subobj;
466 return 1;
467 }
468
Oren Milman00425102017-03-13 00:37:05 +0200469 if (!PyIndex_Check(tmp_subobj)) {
470 PyErr_Format(PyExc_TypeError,
471 "argument should be integer or bytes-like object, "
472 "not '%.200s'",
473 Py_TYPE(tmp_subobj)->tp_name);
474 return 0;
Serhiy Storchakadd40fc32016-05-04 22:23:26 +0300475 }
476
Oren Milman00425102017-03-13 00:37:05 +0200477 ival = PyNumber_AsSsize_t(tmp_subobj, NULL);
478 if (ival == -1 && PyErr_Occurred()) {
479 return 0;
480 }
Serhiy Storchakadd40fc32016-05-04 22:23:26 +0300481 if (ival < 0 || ival > 255) {
482 PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
483 return 0;
484 }
485
486 *subobj = NULL;
487 *byte = (char)ival;
488 return 1;
489}
490
491/* helper macro to fixup start/end slice values */
492#define ADJUST_INDICES(start, end, len) \
493 if (end > len) \
494 end = len; \
495 else if (end < 0) { \
496 end += len; \
497 if (end < 0) \
498 end = 0; \
499 } \
500 if (start < 0) { \
501 start += len; \
502 if (start < 0) \
503 start = 0; \
504 }
505
506Py_LOCAL_INLINE(Py_ssize_t)
507find_internal(const char *str, Py_ssize_t len,
508 const char *function_name, PyObject *args, int dir)
509{
510 PyObject *subobj;
511 char byte;
512 Py_buffer subbuf;
513 const char *sub;
514 Py_ssize_t sub_len;
515 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
516 Py_ssize_t res;
517
518 if (!parse_args_finds_byte(function_name, args,
519 &subobj, &byte, &start, &end))
520 return -2;
521
522 if (subobj) {
523 if (PyObject_GetBuffer(subobj, &subbuf, PyBUF_SIMPLE) != 0)
524 return -2;
525
526 sub = subbuf.buf;
527 sub_len = subbuf.len;
528 }
529 else {
530 sub = &byte;
531 sub_len = 1;
532 }
533
534 ADJUST_INDICES(start, end, len);
535 if (end - start < sub_len)
536 res = -1;
537 else if (sub_len == 1) {
538 if (dir > 0)
539 res = stringlib_find_char(
540 str + start, end - start,
541 *sub);
542 else
543 res = stringlib_rfind_char(
544 str + start, end - start,
545 *sub);
546 if (res >= 0)
547 res += start;
548 }
549 else {
550 if (dir > 0)
551 res = stringlib_find_slice(
552 str, len,
553 sub, sub_len, start, end);
554 else
555 res = stringlib_rfind_slice(
556 str, len,
557 sub, sub_len, start, end);
558 }
559
560 if (subobj)
561 PyBuffer_Release(&subbuf);
562
563 return res;
564}
565
566PyDoc_STRVAR_shared(_Py_find__doc__,
567"B.find(sub[, start[, end]]) -> int\n\
568\n\
569Return the lowest index in B where subsection sub is found,\n\
570such that sub is contained within B[start,end]. Optional\n\
571arguments start and end are interpreted as in slice notation.\n\
572\n\
573Return -1 on failure.");
574
575PyObject *
576_Py_bytes_find(const char *str, Py_ssize_t len, PyObject *args)
577{
578 Py_ssize_t result = find_internal(str, len, "find", args, +1);
579 if (result == -2)
580 return NULL;
581 return PyLong_FromSsize_t(result);
582}
583
584PyDoc_STRVAR_shared(_Py_index__doc__,
585"B.index(sub[, start[, end]]) -> int\n\
586\n\
Lisa Roach43ba8862017-04-04 22:36:22 -0700587Return the lowest index in B where subsection sub is found,\n\
588such that sub is contained within B[start,end]. Optional\n\
589arguments start and end are interpreted as in slice notation.\n\
590\n\
591Raises ValueError when the subsection is not found.");
Serhiy Storchakadd40fc32016-05-04 22:23:26 +0300592
593PyObject *
594_Py_bytes_index(const char *str, Py_ssize_t len, PyObject *args)
595{
596 Py_ssize_t result = find_internal(str, len, "index", args, +1);
597 if (result == -2)
598 return NULL;
599 if (result == -1) {
600 PyErr_SetString(PyExc_ValueError,
601 "subsection not found");
602 return NULL;
603 }
604 return PyLong_FromSsize_t(result);
605}
606
607PyDoc_STRVAR_shared(_Py_rfind__doc__,
608"B.rfind(sub[, start[, end]]) -> int\n\
609\n\
610Return the highest index in B where subsection sub is found,\n\
611such that sub is contained within B[start,end]. Optional\n\
612arguments start and end are interpreted as in slice notation.\n\
613\n\
614Return -1 on failure.");
615
616PyObject *
617_Py_bytes_rfind(const char *str, Py_ssize_t len, PyObject *args)
618{
619 Py_ssize_t result = find_internal(str, len, "rfind", args, -1);
620 if (result == -2)
621 return NULL;
622 return PyLong_FromSsize_t(result);
623}
624
625PyDoc_STRVAR_shared(_Py_rindex__doc__,
626"B.rindex(sub[, start[, end]]) -> int\n\
627\n\
Lisa Roach43ba8862017-04-04 22:36:22 -0700628Return the highest index in B where subsection sub is found,\n\
629such that sub is contained within B[start,end]. Optional\n\
630arguments start and end are interpreted as in slice notation.\n\
631\n\
632Raise ValueError when the subsection is not found.");
Serhiy Storchakadd40fc32016-05-04 22:23:26 +0300633
634PyObject *
635_Py_bytes_rindex(const char *str, Py_ssize_t len, PyObject *args)
636{
637 Py_ssize_t result = find_internal(str, len, "rindex", args, -1);
638 if (result == -2)
639 return NULL;
640 if (result == -1) {
641 PyErr_SetString(PyExc_ValueError,
642 "subsection not found");
643 return NULL;
644 }
645 return PyLong_FromSsize_t(result);
646}
647
648PyDoc_STRVAR_shared(_Py_count__doc__,
649"B.count(sub[, start[, end]]) -> int\n\
650\n\
651Return the number of non-overlapping occurrences of subsection sub in\n\
652bytes B[start:end]. Optional arguments start and end are interpreted\n\
653as in slice notation.");
654
655PyObject *
656_Py_bytes_count(const char *str, Py_ssize_t len, PyObject *args)
657{
658 PyObject *sub_obj;
659 const char *sub;
660 Py_ssize_t sub_len;
661 char byte;
662 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
663
664 Py_buffer vsub;
665 PyObject *count_obj;
666
667 if (!parse_args_finds_byte("count", args,
668 &sub_obj, &byte, &start, &end))
669 return NULL;
670
671 if (sub_obj) {
672 if (PyObject_GetBuffer(sub_obj, &vsub, PyBUF_SIMPLE) != 0)
673 return NULL;
674
675 sub = vsub.buf;
676 sub_len = vsub.len;
677 }
678 else {
679 sub = &byte;
680 sub_len = 1;
681 }
682
683 ADJUST_INDICES(start, end, len);
684
685 count_obj = PyLong_FromSsize_t(
686 stringlib_count(str + start, end - start, sub, sub_len, PY_SSIZE_T_MAX)
687 );
688
689 if (sub_obj)
690 PyBuffer_Release(&vsub);
691
692 return count_obj;
693}
694
695int
696_Py_bytes_contains(const char *str, Py_ssize_t len, PyObject *arg)
697{
Serhiy Storchakaf9efb8b2016-07-10 12:37:30 +0300698 Py_ssize_t ival = PyNumber_AsSsize_t(arg, NULL);
Serhiy Storchakadd40fc32016-05-04 22:23:26 +0300699 if (ival == -1 && PyErr_Occurred()) {
700 Py_buffer varg;
701 Py_ssize_t pos;
702 PyErr_Clear();
703 if (PyObject_GetBuffer(arg, &varg, PyBUF_SIMPLE) != 0)
704 return -1;
705 pos = stringlib_find(str, len,
706 varg.buf, varg.len, 0);
707 PyBuffer_Release(&varg);
708 return pos >= 0;
709 }
710 if (ival < 0 || ival >= 256) {
711 PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
712 return -1;
713 }
714
715 return memchr(str, (int) ival, len) != NULL;
716}
717
718
719/* Matches the end (direction >= 0) or start (direction < 0) of the buffer
720 * against substr, using the start and end arguments. Returns
721 * -1 on error, 0 if not found and 1 if found.
722 */
Benjamin Peterson621b4302016-09-09 13:54:34 -0700723static int
Serhiy Storchakadd40fc32016-05-04 22:23:26 +0300724tailmatch(const char *str, Py_ssize_t len, PyObject *substr,
725 Py_ssize_t start, Py_ssize_t end, int direction)
726{
727 Py_buffer sub_view = {NULL, NULL};
728 const char *sub;
729 Py_ssize_t slen;
730
731 if (PyBytes_Check(substr)) {
732 sub = PyBytes_AS_STRING(substr);
733 slen = PyBytes_GET_SIZE(substr);
734 }
735 else {
736 if (PyObject_GetBuffer(substr, &sub_view, PyBUF_SIMPLE) != 0)
737 return -1;
738 sub = sub_view.buf;
739 slen = sub_view.len;
740 }
741
742 ADJUST_INDICES(start, end, len);
743
744 if (direction < 0) {
745 /* startswith */
Hai Shi24ddd9c2019-10-06 20:17:18 +0800746 if (start > len - slen)
Serhiy Storchakadd40fc32016-05-04 22:23:26 +0300747 goto notfound;
748 } else {
749 /* endswith */
750 if (end - start < slen || start > len)
751 goto notfound;
752
753 if (end - slen > start)
754 start = end - slen;
755 }
756 if (end - start < slen)
757 goto notfound;
758 if (memcmp(str + start, sub, slen) != 0)
759 goto notfound;
760
761 PyBuffer_Release(&sub_view);
762 return 1;
763
764notfound:
765 PyBuffer_Release(&sub_view);
766 return 0;
767}
768
Benjamin Peterson621b4302016-09-09 13:54:34 -0700769static PyObject *
Serhiy Storchakadd40fc32016-05-04 22:23:26 +0300770_Py_bytes_tailmatch(const char *str, Py_ssize_t len,
771 const char *function_name, PyObject *args,
772 int direction)
773{
774 Py_ssize_t start = 0;
775 Py_ssize_t end = PY_SSIZE_T_MAX;
776 PyObject *subobj;
777 int result;
778
779 if (!stringlib_parse_args_finds(function_name, args, &subobj, &start, &end))
780 return NULL;
781 if (PyTuple_Check(subobj)) {
782 Py_ssize_t i;
783 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
784 result = tailmatch(str, len, PyTuple_GET_ITEM(subobj, i),
785 start, end, direction);
786 if (result == -1)
787 return NULL;
788 else if (result) {
789 Py_RETURN_TRUE;
790 }
791 }
792 Py_RETURN_FALSE;
793 }
794 result = tailmatch(str, len, subobj, start, end, direction);
795 if (result == -1) {
796 if (PyErr_ExceptionMatches(PyExc_TypeError))
797 PyErr_Format(PyExc_TypeError,
798 "%s first arg must be bytes or a tuple of bytes, "
799 "not %s",
800 function_name, Py_TYPE(subobj)->tp_name);
801 return NULL;
802 }
803 else
804 return PyBool_FromLong(result);
805}
806
807PyDoc_STRVAR_shared(_Py_startswith__doc__,
808"B.startswith(prefix[, start[, end]]) -> bool\n\
809\n\
810Return True if B starts with the specified prefix, False otherwise.\n\
811With optional start, test B beginning at that position.\n\
812With optional end, stop comparing B at that position.\n\
813prefix can also be a tuple of bytes to try.");
814
815PyObject *
816_Py_bytes_startswith(const char *str, Py_ssize_t len, PyObject *args)
817{
818 return _Py_bytes_tailmatch(str, len, "startswith", args, -1);
819}
820
821PyDoc_STRVAR_shared(_Py_endswith__doc__,
822"B.endswith(suffix[, start[, end]]) -> bool\n\
823\n\
824Return True if B ends with the specified suffix, False otherwise.\n\
825With optional start, test B beginning at that position.\n\
826With optional end, stop comparing B at that position.\n\
827suffix can also be a tuple of bytes to try.");
828
829PyObject *
830_Py_bytes_endswith(const char *str, Py_ssize_t len, PyObject *args)
831{
832 return _Py_bytes_tailmatch(str, len, "endswith", args, +1);
833}