blob: 07842f7469101365614139706d0bf2b3f35549e9 [file] [log] [blame]
Serhiy Storchakaab8bcb32016-07-03 13:26:52 +03001#define PY_SSIZE_T_CLEAN
Gregory P. Smith60d241f2007-10-16 06:31:30 +00002#include "Python.h"
3#include "bytes_methods.h"
4
Gregory P. Smith60d241f2007-10-16 06:31:30 +00005PyDoc_STRVAR_shared(_Py_isspace__doc__,
6"B.isspace() -> bool\n\
7\n\
8Return True if all characters in B are whitespace\n\
9and there is at least one character in B, False otherwise.");
10
11PyObject*
12_Py_bytes_isspace(const char *cptr, Py_ssize_t len)
13{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +020014 const unsigned char *p
Gregory P. Smith60d241f2007-10-16 06:31:30 +000015 = (unsigned char *) cptr;
Antoine Pitrou9ed5f272013-08-13 20:18:52 +020016 const unsigned char *e;
Gregory P. Smith60d241f2007-10-16 06:31:30 +000017
18 /* Shortcut for single character strings */
Eric Smith6dc46f52009-04-27 20:39:49 +000019 if (len == 1 && Py_ISSPACE(*p))
Gregory P. Smith60d241f2007-10-16 06:31:30 +000020 Py_RETURN_TRUE;
21
22 /* Special case for empty strings */
23 if (len == 0)
24 Py_RETURN_FALSE;
25
26 e = p + len;
27 for (; p < e; p++) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000028 if (!Py_ISSPACE(*p))
Gregory P. Smith60d241f2007-10-16 06:31:30 +000029 Py_RETURN_FALSE;
30 }
31 Py_RETURN_TRUE;
32}
33
34
35PyDoc_STRVAR_shared(_Py_isalpha__doc__,
36"B.isalpha() -> bool\n\
37\n\
38Return True if all characters in B are alphabetic\n\
39and there is at least one character in B, False otherwise.");
40
41PyObject*
42_Py_bytes_isalpha(const char *cptr, Py_ssize_t len)
43{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +020044 const unsigned char *p
Gregory P. Smith60d241f2007-10-16 06:31:30 +000045 = (unsigned char *) cptr;
Antoine Pitrou9ed5f272013-08-13 20:18:52 +020046 const unsigned char *e;
Gregory P. Smith60d241f2007-10-16 06:31:30 +000047
48 /* Shortcut for single character strings */
Eric Smith6dc46f52009-04-27 20:39:49 +000049 if (len == 1 && Py_ISALPHA(*p))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000050 Py_RETURN_TRUE;
Gregory P. Smith60d241f2007-10-16 06:31:30 +000051
52 /* Special case for empty strings */
53 if (len == 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000054 Py_RETURN_FALSE;
Gregory P. Smith60d241f2007-10-16 06:31:30 +000055
56 e = p + len;
57 for (; p < e; p++) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000058 if (!Py_ISALPHA(*p))
59 Py_RETURN_FALSE;
Gregory P. Smith60d241f2007-10-16 06:31:30 +000060 }
61 Py_RETURN_TRUE;
62}
63
64
65PyDoc_STRVAR_shared(_Py_isalnum__doc__,
66"B.isalnum() -> bool\n\
67\n\
68Return True if all characters in B are alphanumeric\n\
69and there is at least one character in B, False otherwise.");
70
71PyObject*
72_Py_bytes_isalnum(const char *cptr, Py_ssize_t len)
73{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +020074 const unsigned char *p
Gregory P. Smith60d241f2007-10-16 06:31:30 +000075 = (unsigned char *) cptr;
Antoine Pitrou9ed5f272013-08-13 20:18:52 +020076 const unsigned char *e;
Gregory P. Smith60d241f2007-10-16 06:31:30 +000077
78 /* Shortcut for single character strings */
Eric Smith6dc46f52009-04-27 20:39:49 +000079 if (len == 1 && Py_ISALNUM(*p))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000080 Py_RETURN_TRUE;
Gregory P. Smith60d241f2007-10-16 06:31:30 +000081
82 /* Special case for empty strings */
83 if (len == 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000084 Py_RETURN_FALSE;
Gregory P. Smith60d241f2007-10-16 06:31:30 +000085
86 e = p + len;
87 for (; p < e; p++) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000088 if (!Py_ISALNUM(*p))
89 Py_RETURN_FALSE;
Gregory P. Smith60d241f2007-10-16 06:31:30 +000090 }
91 Py_RETURN_TRUE;
92}
93
94
INADA Naokia49ac992018-01-27 14:06:21 +090095PyDoc_STRVAR_shared(_Py_isascii__doc__,
96"B.isascii() -> bool\n\
97\n\
98Return True if B is empty or all characters in B are ASCII,\n\
99False otherwise.");
100
INADA Naokibea57062018-01-28 09:59:12 +0900101// Optimization is copied from ascii_decode in unicodeobject.c
102/* Mask to quickly check whether a C 'long' contains a
103 non-ASCII, UTF8-encoded char. */
104#if (SIZEOF_LONG == 8)
105# define ASCII_CHAR_MASK 0x8080808080808080UL
106#elif (SIZEOF_LONG == 4)
107# define ASCII_CHAR_MASK 0x80808080UL
108#else
109# error C 'long' size should be either 4 or 8!
110#endif
111
INADA Naokia49ac992018-01-27 14:06:21 +0900112PyObject*
113_Py_bytes_isascii(const char *cptr, Py_ssize_t len)
114{
INADA Naokibea57062018-01-28 09:59:12 +0900115 const char *p = cptr;
116 const char *end = p + len;
117 const char *aligned_end = (const char *) _Py_ALIGN_DOWN(end, SIZEOF_LONG);
118
119 while (p < end) {
120 /* Fast path, see in STRINGLIB(utf8_decode) in stringlib/codecs.h
121 for an explanation. */
122 if (_Py_IS_ALIGNED(p, SIZEOF_LONG)) {
123 /* Help allocation */
124 const char *_p = p;
125 while (_p < aligned_end) {
126 unsigned long value = *(unsigned long *) _p;
127 if (value & ASCII_CHAR_MASK) {
128 Py_RETURN_FALSE;
129 }
130 _p += SIZEOF_LONG;
131 }
132 p = _p;
133 if (_p == end)
134 break;
135 }
136 if ((unsigned char)*p & 0x80) {
INADA Naokia49ac992018-01-27 14:06:21 +0900137 Py_RETURN_FALSE;
138 }
INADA Naokibea57062018-01-28 09:59:12 +0900139 p++;
INADA Naokia49ac992018-01-27 14:06:21 +0900140 }
141 Py_RETURN_TRUE;
142}
143
INADA Naokibea57062018-01-28 09:59:12 +0900144#undef ASCII_CHAR_MASK
145
INADA Naokia49ac992018-01-27 14:06:21 +0900146
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000147PyDoc_STRVAR_shared(_Py_isdigit__doc__,
148"B.isdigit() -> bool\n\
149\n\
150Return True if all characters in B are digits\n\
151and there is at least one character in B, False otherwise.");
152
153PyObject*
154_Py_bytes_isdigit(const char *cptr, Py_ssize_t len)
155{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +0200156 const unsigned char *p
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000157 = (unsigned char *) cptr;
Antoine Pitrou9ed5f272013-08-13 20:18:52 +0200158 const unsigned char *e;
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000159
160 /* Shortcut for single character strings */
Eric Smith6dc46f52009-04-27 20:39:49 +0000161 if (len == 1 && Py_ISDIGIT(*p))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000162 Py_RETURN_TRUE;
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000163
164 /* Special case for empty strings */
165 if (len == 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000166 Py_RETURN_FALSE;
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000167
168 e = p + len;
169 for (; p < e; p++) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000170 if (!Py_ISDIGIT(*p))
171 Py_RETURN_FALSE;
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000172 }
173 Py_RETURN_TRUE;
174}
175
176
177PyDoc_STRVAR_shared(_Py_islower__doc__,
178"B.islower() -> bool\n\
179\n\
180Return True if all cased characters in B are lowercase and there is\n\
181at least one cased character in B, False otherwise.");
182
183PyObject*
184_Py_bytes_islower(const char *cptr, Py_ssize_t len)
185{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +0200186 const unsigned char *p
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000187 = (unsigned char *) cptr;
Antoine Pitrou9ed5f272013-08-13 20:18:52 +0200188 const unsigned char *e;
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000189 int cased;
190
191 /* Shortcut for single character strings */
192 if (len == 1)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000193 return PyBool_FromLong(Py_ISLOWER(*p));
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000194
195 /* Special case for empty strings */
196 if (len == 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000197 Py_RETURN_FALSE;
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000198
199 e = p + len;
200 cased = 0;
201 for (; p < e; p++) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000202 if (Py_ISUPPER(*p))
203 Py_RETURN_FALSE;
204 else if (!cased && Py_ISLOWER(*p))
205 cased = 1;
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000206 }
207 return PyBool_FromLong(cased);
208}
209
210
211PyDoc_STRVAR_shared(_Py_isupper__doc__,
212"B.isupper() -> bool\n\
213\n\
214Return True if all cased characters in B are uppercase and there is\n\
215at least one cased character in B, False otherwise.");
216
217PyObject*
218_Py_bytes_isupper(const char *cptr, Py_ssize_t len)
219{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +0200220 const unsigned char *p
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000221 = (unsigned char *) cptr;
Antoine Pitrou9ed5f272013-08-13 20:18:52 +0200222 const unsigned char *e;
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000223 int cased;
224
225 /* Shortcut for single character strings */
226 if (len == 1)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000227 return PyBool_FromLong(Py_ISUPPER(*p));
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000228
229 /* Special case for empty strings */
230 if (len == 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000231 Py_RETURN_FALSE;
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000232
233 e = p + len;
234 cased = 0;
235 for (; p < e; p++) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000236 if (Py_ISLOWER(*p))
237 Py_RETURN_FALSE;
238 else if (!cased && Py_ISUPPER(*p))
239 cased = 1;
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000240 }
241 return PyBool_FromLong(cased);
242}
243
244
245PyDoc_STRVAR_shared(_Py_istitle__doc__,
246"B.istitle() -> bool\n\
247\n\
248Return True if B is a titlecased string and there is at least one\n\
249character in B, i.e. uppercase characters may only follow uncased\n\
250characters and lowercase characters only cased ones. Return False\n\
251otherwise.");
252
253PyObject*
254_Py_bytes_istitle(const char *cptr, Py_ssize_t len)
255{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +0200256 const unsigned char *p
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000257 = (unsigned char *) cptr;
Antoine Pitrou9ed5f272013-08-13 20:18:52 +0200258 const unsigned char *e;
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000259 int cased, previous_is_cased;
260
261 /* Shortcut for single character strings */
262 if (len == 1)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000263 return PyBool_FromLong(Py_ISUPPER(*p));
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000264
265 /* Special case for empty strings */
266 if (len == 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000267 Py_RETURN_FALSE;
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000268
269 e = p + len;
270 cased = 0;
271 previous_is_cased = 0;
272 for (; p < e; p++) {
Antoine Pitrou9ed5f272013-08-13 20:18:52 +0200273 const unsigned char ch = *p;
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000274
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000275 if (Py_ISUPPER(ch)) {
276 if (previous_is_cased)
277 Py_RETURN_FALSE;
278 previous_is_cased = 1;
279 cased = 1;
280 }
281 else if (Py_ISLOWER(ch)) {
282 if (!previous_is_cased)
283 Py_RETURN_FALSE;
284 previous_is_cased = 1;
285 cased = 1;
286 }
287 else
288 previous_is_cased = 0;
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000289 }
290 return PyBool_FromLong(cased);
291}
292
293
294PyDoc_STRVAR_shared(_Py_lower__doc__,
295"B.lower() -> copy of B\n\
296\n\
297Return a copy of B with all ASCII characters converted to lowercase.");
298
299void
300_Py_bytes_lower(char *result, const char *cptr, Py_ssize_t len)
301{
Antoine Pitrou9b491922010-08-15 17:38:46 +0000302 Py_ssize_t i;
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000303
Antoine Pitrou9b491922010-08-15 17:38:46 +0000304 for (i = 0; i < len; i++) {
Antoine Pitrou94f6fa62012-01-08 16:22:46 +0100305 result[i] = Py_TOLOWER((unsigned char) cptr[i]);
Antoine Pitrou9b491922010-08-15 17:38:46 +0000306 }
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000307}
308
309
310PyDoc_STRVAR_shared(_Py_upper__doc__,
311"B.upper() -> copy of B\n\
312\n\
313Return a copy of B with all ASCII characters converted to uppercase.");
314
315void
316_Py_bytes_upper(char *result, const char *cptr, Py_ssize_t len)
317{
Antoine Pitrou9b491922010-08-15 17:38:46 +0000318 Py_ssize_t i;
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000319
Antoine Pitrou9b491922010-08-15 17:38:46 +0000320 for (i = 0; i < len; i++) {
Antoine Pitrou94f6fa62012-01-08 16:22:46 +0100321 result[i] = Py_TOUPPER((unsigned char) cptr[i]);
Antoine Pitrou9b491922010-08-15 17:38:46 +0000322 }
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000323}
324
325
326PyDoc_STRVAR_shared(_Py_title__doc__,
327"B.title() -> copy of B\n\
328\n\
329Return a titlecased version of B, i.e. ASCII words start with uppercase\n\
330characters, all remaining cased characters have lowercase.");
331
332void
Serhiy Storchakaef1585e2015-12-25 20:01:53 +0200333_Py_bytes_title(char *result, const char *s, Py_ssize_t len)
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000334{
Antoine Pitrou9b491922010-08-15 17:38:46 +0000335 Py_ssize_t i;
336 int previous_is_cased = 0;
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000337
Antoine Pitrou9b491922010-08-15 17:38:46 +0000338 for (i = 0; i < len; i++) {
339 int c = Py_CHARMASK(*s++);
340 if (Py_ISLOWER(c)) {
341 if (!previous_is_cased)
342 c = Py_TOUPPER(c);
343 previous_is_cased = 1;
344 } else if (Py_ISUPPER(c)) {
345 if (previous_is_cased)
346 c = Py_TOLOWER(c);
347 previous_is_cased = 1;
348 } else
349 previous_is_cased = 0;
350 *result++ = c;
351 }
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000352}
353
354
355PyDoc_STRVAR_shared(_Py_capitalize__doc__,
356"B.capitalize() -> copy of B\n\
357\n\
Senthil Kumarane51ee8a2010-07-05 12:00:56 +0000358Return a copy of B with only its first character capitalized (ASCII)\n\
359and the rest lower-cased.");
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000360
361void
Serhiy Storchakaef1585e2015-12-25 20:01:53 +0200362_Py_bytes_capitalize(char *result, const char *s, Py_ssize_t len)
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000363{
Antoine Pitrou9b491922010-08-15 17:38:46 +0000364 Py_ssize_t i;
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000365
Antoine Pitrou9b491922010-08-15 17:38:46 +0000366 if (0 < len) {
367 int c = Py_CHARMASK(*s++);
368 if (Py_ISLOWER(c))
369 *result = Py_TOUPPER(c);
370 else
371 *result = c;
372 result++;
373 }
374 for (i = 1; i < len; i++) {
375 int c = Py_CHARMASK(*s++);
376 if (Py_ISUPPER(c))
377 *result = Py_TOLOWER(c);
378 else
379 *result = c;
380 result++;
381 }
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000382}
383
384
385PyDoc_STRVAR_shared(_Py_swapcase__doc__,
386"B.swapcase() -> copy of B\n\
387\n\
388Return a copy of B with uppercase ASCII characters converted\n\
389to lowercase ASCII and vice versa.");
390
391void
Serhiy Storchakaef1585e2015-12-25 20:01:53 +0200392_Py_bytes_swapcase(char *result, const char *s, Py_ssize_t len)
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000393{
Antoine Pitrou9b491922010-08-15 17:38:46 +0000394 Py_ssize_t i;
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000395
Antoine Pitrou9b491922010-08-15 17:38:46 +0000396 for (i = 0; i < len; i++) {
397 int c = Py_CHARMASK(*s++);
398 if (Py_ISLOWER(c)) {
399 *result = Py_TOUPPER(c);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000400 }
Antoine Pitrou9b491922010-08-15 17:38:46 +0000401 else if (Py_ISUPPER(c)) {
402 *result = Py_TOLOWER(c);
403 }
404 else
405 *result = c;
406 result++;
407 }
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000408}
409
Georg Brandlabc38772009-04-12 15:51:51 +0000410
411PyDoc_STRVAR_shared(_Py_maketrans__doc__,
412"B.maketrans(frm, to) -> translation table\n\
413\n\
Senthil Kumaran84e3ccc2011-06-27 09:06:45 -0700414Return a translation table (a bytes object of length 256) suitable\n\
415for use in the bytes or bytearray translate method where each byte\n\
416in frm is mapped to the byte at the same position in to.\n\
417The bytes objects frm and to must be of the same length.");
Georg Brandlabc38772009-04-12 15:51:51 +0000418
Georg Brandlabc38772009-04-12 15:51:51 +0000419PyObject *
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +0200420_Py_bytes_maketrans(Py_buffer *frm, Py_buffer *to)
Georg Brandlabc38772009-04-12 15:51:51 +0000421{
Martin v. Löwis7252a6e2014-07-27 16:25:09 +0200422 PyObject *res = NULL;
Antoine Pitrou9b491922010-08-15 17:38:46 +0000423 Py_ssize_t i;
424 char *p;
Georg Brandlabc38772009-04-12 15:51:51 +0000425
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +0200426 if (frm->len != to->len) {
Antoine Pitrou9b491922010-08-15 17:38:46 +0000427 PyErr_Format(PyExc_ValueError,
428 "maketrans arguments must have same length");
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +0200429 return NULL;
Antoine Pitrou9b491922010-08-15 17:38:46 +0000430 }
431 res = PyBytes_FromStringAndSize(NULL, 256);
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +0200432 if (!res)
433 return NULL;
Antoine Pitrou9b491922010-08-15 17:38:46 +0000434 p = PyBytes_AS_STRING(res);
435 for (i = 0; i < 256; i++)
Antoine Pitrou47019e52010-08-15 17:41:31 +0000436 p[i] = (char) i;
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +0200437 for (i = 0; i < frm->len; i++) {
438 p[((unsigned char *)frm->buf)[i]] = ((char *)to->buf)[i];
Antoine Pitrou9b491922010-08-15 17:38:46 +0000439 }
Georg Brandlabc38772009-04-12 15:51:51 +0000440
Antoine Pitrou9b491922010-08-15 17:38:46 +0000441 return res;
Georg Brandlabc38772009-04-12 15:51:51 +0000442}
Serhiy Storchakadd40fc32016-05-04 22:23:26 +0300443
444#define FASTSEARCH fastsearch
445#define STRINGLIB(F) stringlib_##F
446#define STRINGLIB_CHAR char
447#define STRINGLIB_SIZEOF_CHAR 1
448
449#include "stringlib/fastsearch.h"
450#include "stringlib/count.h"
451#include "stringlib/find.h"
452
453/*
Oren Milman00425102017-03-13 00:37:05 +0200454Wraps stringlib_parse_args_finds() and additionally checks the first
455argument type.
Serhiy Storchakadd40fc32016-05-04 22:23:26 +0300456
Oren Milman00425102017-03-13 00:37:05 +0200457In case the first argument is a bytes-like object, sets it to subobj,
458and doesn't touch the byte parameter.
459In case it is an integer in range(0, 256), writes the integer value
460to byte, and sets subobj to NULL.
461
462The other parameters are similar to those of
Serhiy Storchakadd40fc32016-05-04 22:23:26 +0300463stringlib_parse_args_finds().
464*/
465
466Py_LOCAL_INLINE(int)
467parse_args_finds_byte(const char *function_name, PyObject *args,
468 PyObject **subobj, char *byte,
469 Py_ssize_t *start, Py_ssize_t *end)
470{
471 PyObject *tmp_subobj;
472 Py_ssize_t ival;
Serhiy Storchakadd40fc32016-05-04 22:23:26 +0300473
474 if(!stringlib_parse_args_finds(function_name, args, &tmp_subobj,
475 start, end))
476 return 0;
477
Oren Milman00425102017-03-13 00:37:05 +0200478 if (PyObject_CheckBuffer(tmp_subobj)) {
Serhiy Storchakadd40fc32016-05-04 22:23:26 +0300479 *subobj = tmp_subobj;
480 return 1;
481 }
482
Oren Milman00425102017-03-13 00:37:05 +0200483 if (!PyIndex_Check(tmp_subobj)) {
484 PyErr_Format(PyExc_TypeError,
485 "argument should be integer or bytes-like object, "
486 "not '%.200s'",
487 Py_TYPE(tmp_subobj)->tp_name);
488 return 0;
Serhiy Storchakadd40fc32016-05-04 22:23:26 +0300489 }
490
Oren Milman00425102017-03-13 00:37:05 +0200491 ival = PyNumber_AsSsize_t(tmp_subobj, NULL);
492 if (ival == -1 && PyErr_Occurred()) {
493 return 0;
494 }
Serhiy Storchakadd40fc32016-05-04 22:23:26 +0300495 if (ival < 0 || ival > 255) {
496 PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
497 return 0;
498 }
499
500 *subobj = NULL;
501 *byte = (char)ival;
502 return 1;
503}
504
505/* helper macro to fixup start/end slice values */
506#define ADJUST_INDICES(start, end, len) \
507 if (end > len) \
508 end = len; \
509 else if (end < 0) { \
510 end += len; \
511 if (end < 0) \
512 end = 0; \
513 } \
514 if (start < 0) { \
515 start += len; \
516 if (start < 0) \
517 start = 0; \
518 }
519
520Py_LOCAL_INLINE(Py_ssize_t)
521find_internal(const char *str, Py_ssize_t len,
522 const char *function_name, PyObject *args, int dir)
523{
524 PyObject *subobj;
525 char byte;
526 Py_buffer subbuf;
527 const char *sub;
528 Py_ssize_t sub_len;
529 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
530 Py_ssize_t res;
531
532 if (!parse_args_finds_byte(function_name, args,
533 &subobj, &byte, &start, &end))
534 return -2;
535
536 if (subobj) {
537 if (PyObject_GetBuffer(subobj, &subbuf, PyBUF_SIMPLE) != 0)
538 return -2;
539
540 sub = subbuf.buf;
541 sub_len = subbuf.len;
542 }
543 else {
544 sub = &byte;
545 sub_len = 1;
546 }
547
548 ADJUST_INDICES(start, end, len);
549 if (end - start < sub_len)
550 res = -1;
551 else if (sub_len == 1) {
552 if (dir > 0)
553 res = stringlib_find_char(
554 str + start, end - start,
555 *sub);
556 else
557 res = stringlib_rfind_char(
558 str + start, end - start,
559 *sub);
560 if (res >= 0)
561 res += start;
562 }
563 else {
564 if (dir > 0)
565 res = stringlib_find_slice(
566 str, len,
567 sub, sub_len, start, end);
568 else
569 res = stringlib_rfind_slice(
570 str, len,
571 sub, sub_len, start, end);
572 }
573
574 if (subobj)
575 PyBuffer_Release(&subbuf);
576
577 return res;
578}
579
580PyDoc_STRVAR_shared(_Py_find__doc__,
581"B.find(sub[, start[, end]]) -> int\n\
582\n\
583Return the lowest index in B where subsection sub is found,\n\
584such that sub is contained within B[start,end]. Optional\n\
585arguments start and end are interpreted as in slice notation.\n\
586\n\
587Return -1 on failure.");
588
589PyObject *
590_Py_bytes_find(const char *str, Py_ssize_t len, PyObject *args)
591{
592 Py_ssize_t result = find_internal(str, len, "find", args, +1);
593 if (result == -2)
594 return NULL;
595 return PyLong_FromSsize_t(result);
596}
597
598PyDoc_STRVAR_shared(_Py_index__doc__,
599"B.index(sub[, start[, end]]) -> int\n\
600\n\
Lisa Roach43ba8862017-04-04 22:36:22 -0700601Return the lowest index in B where subsection sub is found,\n\
602such that sub is contained within B[start,end]. Optional\n\
603arguments start and end are interpreted as in slice notation.\n\
604\n\
605Raises ValueError when the subsection is not found.");
Serhiy Storchakadd40fc32016-05-04 22:23:26 +0300606
607PyObject *
608_Py_bytes_index(const char *str, Py_ssize_t len, PyObject *args)
609{
610 Py_ssize_t result = find_internal(str, len, "index", args, +1);
611 if (result == -2)
612 return NULL;
613 if (result == -1) {
614 PyErr_SetString(PyExc_ValueError,
615 "subsection not found");
616 return NULL;
617 }
618 return PyLong_FromSsize_t(result);
619}
620
621PyDoc_STRVAR_shared(_Py_rfind__doc__,
622"B.rfind(sub[, start[, end]]) -> int\n\
623\n\
624Return the highest index in B where subsection sub is found,\n\
625such that sub is contained within B[start,end]. Optional\n\
626arguments start and end are interpreted as in slice notation.\n\
627\n\
628Return -1 on failure.");
629
630PyObject *
631_Py_bytes_rfind(const char *str, Py_ssize_t len, PyObject *args)
632{
633 Py_ssize_t result = find_internal(str, len, "rfind", args, -1);
634 if (result == -2)
635 return NULL;
636 return PyLong_FromSsize_t(result);
637}
638
639PyDoc_STRVAR_shared(_Py_rindex__doc__,
640"B.rindex(sub[, start[, end]]) -> int\n\
641\n\
Lisa Roach43ba8862017-04-04 22:36:22 -0700642Return the highest index in B where subsection sub is found,\n\
643such that sub is contained within B[start,end]. Optional\n\
644arguments start and end are interpreted as in slice notation.\n\
645\n\
646Raise ValueError when the subsection is not found.");
Serhiy Storchakadd40fc32016-05-04 22:23:26 +0300647
648PyObject *
649_Py_bytes_rindex(const char *str, Py_ssize_t len, PyObject *args)
650{
651 Py_ssize_t result = find_internal(str, len, "rindex", args, -1);
652 if (result == -2)
653 return NULL;
654 if (result == -1) {
655 PyErr_SetString(PyExc_ValueError,
656 "subsection not found");
657 return NULL;
658 }
659 return PyLong_FromSsize_t(result);
660}
661
662PyDoc_STRVAR_shared(_Py_count__doc__,
663"B.count(sub[, start[, end]]) -> int\n\
664\n\
665Return the number of non-overlapping occurrences of subsection sub in\n\
666bytes B[start:end]. Optional arguments start and end are interpreted\n\
667as in slice notation.");
668
669PyObject *
670_Py_bytes_count(const char *str, Py_ssize_t len, PyObject *args)
671{
672 PyObject *sub_obj;
673 const char *sub;
674 Py_ssize_t sub_len;
675 char byte;
676 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
677
678 Py_buffer vsub;
679 PyObject *count_obj;
680
681 if (!parse_args_finds_byte("count", args,
682 &sub_obj, &byte, &start, &end))
683 return NULL;
684
685 if (sub_obj) {
686 if (PyObject_GetBuffer(sub_obj, &vsub, PyBUF_SIMPLE) != 0)
687 return NULL;
688
689 sub = vsub.buf;
690 sub_len = vsub.len;
691 }
692 else {
693 sub = &byte;
694 sub_len = 1;
695 }
696
697 ADJUST_INDICES(start, end, len);
698
699 count_obj = PyLong_FromSsize_t(
700 stringlib_count(str + start, end - start, sub, sub_len, PY_SSIZE_T_MAX)
701 );
702
703 if (sub_obj)
704 PyBuffer_Release(&vsub);
705
706 return count_obj;
707}
708
709int
710_Py_bytes_contains(const char *str, Py_ssize_t len, PyObject *arg)
711{
Serhiy Storchakaf9efb8b2016-07-10 12:37:30 +0300712 Py_ssize_t ival = PyNumber_AsSsize_t(arg, NULL);
Serhiy Storchakadd40fc32016-05-04 22:23:26 +0300713 if (ival == -1 && PyErr_Occurred()) {
714 Py_buffer varg;
715 Py_ssize_t pos;
716 PyErr_Clear();
717 if (PyObject_GetBuffer(arg, &varg, PyBUF_SIMPLE) != 0)
718 return -1;
719 pos = stringlib_find(str, len,
720 varg.buf, varg.len, 0);
721 PyBuffer_Release(&varg);
722 return pos >= 0;
723 }
724 if (ival < 0 || ival >= 256) {
725 PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
726 return -1;
727 }
728
729 return memchr(str, (int) ival, len) != NULL;
730}
731
732
733/* Matches the end (direction >= 0) or start (direction < 0) of the buffer
734 * against substr, using the start and end arguments. Returns
735 * -1 on error, 0 if not found and 1 if found.
736 */
Benjamin Peterson621b4302016-09-09 13:54:34 -0700737static int
Serhiy Storchakadd40fc32016-05-04 22:23:26 +0300738tailmatch(const char *str, Py_ssize_t len, PyObject *substr,
739 Py_ssize_t start, Py_ssize_t end, int direction)
740{
741 Py_buffer sub_view = {NULL, NULL};
742 const char *sub;
743 Py_ssize_t slen;
744
745 if (PyBytes_Check(substr)) {
746 sub = PyBytes_AS_STRING(substr);
747 slen = PyBytes_GET_SIZE(substr);
748 }
749 else {
750 if (PyObject_GetBuffer(substr, &sub_view, PyBUF_SIMPLE) != 0)
751 return -1;
752 sub = sub_view.buf;
753 slen = sub_view.len;
754 }
755
756 ADJUST_INDICES(start, end, len);
757
758 if (direction < 0) {
759 /* startswith */
760 if (start + slen > len)
761 goto notfound;
762 } else {
763 /* endswith */
764 if (end - start < slen || start > len)
765 goto notfound;
766
767 if (end - slen > start)
768 start = end - slen;
769 }
770 if (end - start < slen)
771 goto notfound;
772 if (memcmp(str + start, sub, slen) != 0)
773 goto notfound;
774
775 PyBuffer_Release(&sub_view);
776 return 1;
777
778notfound:
779 PyBuffer_Release(&sub_view);
780 return 0;
781}
782
Benjamin Peterson621b4302016-09-09 13:54:34 -0700783static PyObject *
Serhiy Storchakadd40fc32016-05-04 22:23:26 +0300784_Py_bytes_tailmatch(const char *str, Py_ssize_t len,
785 const char *function_name, PyObject *args,
786 int direction)
787{
788 Py_ssize_t start = 0;
789 Py_ssize_t end = PY_SSIZE_T_MAX;
790 PyObject *subobj;
791 int result;
792
793 if (!stringlib_parse_args_finds(function_name, args, &subobj, &start, &end))
794 return NULL;
795 if (PyTuple_Check(subobj)) {
796 Py_ssize_t i;
797 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
798 result = tailmatch(str, len, PyTuple_GET_ITEM(subobj, i),
799 start, end, direction);
800 if (result == -1)
801 return NULL;
802 else if (result) {
803 Py_RETURN_TRUE;
804 }
805 }
806 Py_RETURN_FALSE;
807 }
808 result = tailmatch(str, len, subobj, start, end, direction);
809 if (result == -1) {
810 if (PyErr_ExceptionMatches(PyExc_TypeError))
811 PyErr_Format(PyExc_TypeError,
812 "%s first arg must be bytes or a tuple of bytes, "
813 "not %s",
814 function_name, Py_TYPE(subobj)->tp_name);
815 return NULL;
816 }
817 else
818 return PyBool_FromLong(result);
819}
820
821PyDoc_STRVAR_shared(_Py_startswith__doc__,
822"B.startswith(prefix[, start[, end]]) -> bool\n\
823\n\
824Return True if B starts with the specified prefix, False otherwise.\n\
825With optional start, test B beginning at that position.\n\
826With optional end, stop comparing B at that position.\n\
827prefix can also be a tuple of bytes to try.");
828
829PyObject *
830_Py_bytes_startswith(const char *str, Py_ssize_t len, PyObject *args)
831{
832 return _Py_bytes_tailmatch(str, len, "startswith", args, -1);
833}
834
835PyDoc_STRVAR_shared(_Py_endswith__doc__,
836"B.endswith(suffix[, start[, end]]) -> bool\n\
837\n\
838Return True if B ends with the specified suffix, False otherwise.\n\
839With optional start, test B beginning at that position.\n\
840With optional end, stop comparing B at that position.\n\
841suffix can also be a tuple of bytes to try.");
842
843PyObject *
844_Py_bytes_endswith(const char *str, Py_ssize_t len, PyObject *args)
845{
846 return _Py_bytes_tailmatch(str, len, "endswith", args, +1);
847}
848
849PyDoc_STRVAR_shared(_Py_expandtabs__doc__,
850"B.expandtabs(tabsize=8) -> copy of B\n\
851\n\
852Return a copy of B where all tab characters are expanded using spaces.\n\
853If tabsize is not given, a tab size of 8 characters is assumed.");
854
855PyDoc_STRVAR_shared(_Py_ljust__doc__,
856"B.ljust(width[, fillchar]) -> copy of B\n"
857"\n"
858"Return B left justified in a string of length width. Padding is\n"
859"done using the specified fill character (default is a space).");
860
861PyDoc_STRVAR_shared(_Py_rjust__doc__,
862"B.rjust(width[, fillchar]) -> copy of B\n"
863"\n"
864"Return B right justified in a string of length width. Padding is\n"
865"done using the specified fill character (default is a space)");
866
867PyDoc_STRVAR_shared(_Py_center__doc__,
868"B.center(width[, fillchar]) -> copy of B\n"
869"\n"
870"Return B centered in a string of length width. Padding is\n"
871"done using the specified fill character (default is a space).");
872
873PyDoc_STRVAR_shared(_Py_zfill__doc__,
874"B.zfill(width) -> copy of B\n"
875"\n"
876"Pad a numeric string B with zeros on the left, to fill a field\n"
877"of the specified width. B is never truncated.");