blob: d5c4fe6346fc53a984babb3b91870e2a7dbe2d68 [file] [log] [blame]
Serhiy Storchakaab8bcb32016-07-03 13:26:52 +03001#define PY_SSIZE_T_CLEAN
Gregory P. Smith60d241f2007-10-16 06:31:30 +00002#include "Python.h"
3#include "bytes_methods.h"
4
Gregory P. Smith60d241f2007-10-16 06:31:30 +00005PyDoc_STRVAR_shared(_Py_isspace__doc__,
6"B.isspace() -> bool\n\
7\n\
8Return True if all characters in B are whitespace\n\
9and there is at least one character in B, False otherwise.");
10
11PyObject*
12_Py_bytes_isspace(const char *cptr, Py_ssize_t len)
13{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +020014 const unsigned char *p
Gregory P. Smith60d241f2007-10-16 06:31:30 +000015 = (unsigned char *) cptr;
Antoine Pitrou9ed5f272013-08-13 20:18:52 +020016 const unsigned char *e;
Gregory P. Smith60d241f2007-10-16 06:31:30 +000017
18 /* Shortcut for single character strings */
Eric Smith6dc46f52009-04-27 20:39:49 +000019 if (len == 1 && Py_ISSPACE(*p))
Gregory P. Smith60d241f2007-10-16 06:31:30 +000020 Py_RETURN_TRUE;
21
22 /* Special case for empty strings */
23 if (len == 0)
24 Py_RETURN_FALSE;
25
26 e = p + len;
27 for (; p < e; p++) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000028 if (!Py_ISSPACE(*p))
Gregory P. Smith60d241f2007-10-16 06:31:30 +000029 Py_RETURN_FALSE;
30 }
31 Py_RETURN_TRUE;
32}
33
34
35PyDoc_STRVAR_shared(_Py_isalpha__doc__,
36"B.isalpha() -> bool\n\
37\n\
38Return True if all characters in B are alphabetic\n\
39and there is at least one character in B, False otherwise.");
40
41PyObject*
42_Py_bytes_isalpha(const char *cptr, Py_ssize_t len)
43{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +020044 const unsigned char *p
Gregory P. Smith60d241f2007-10-16 06:31:30 +000045 = (unsigned char *) cptr;
Antoine Pitrou9ed5f272013-08-13 20:18:52 +020046 const unsigned char *e;
Gregory P. Smith60d241f2007-10-16 06:31:30 +000047
48 /* Shortcut for single character strings */
Eric Smith6dc46f52009-04-27 20:39:49 +000049 if (len == 1 && Py_ISALPHA(*p))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000050 Py_RETURN_TRUE;
Gregory P. Smith60d241f2007-10-16 06:31:30 +000051
52 /* Special case for empty strings */
53 if (len == 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000054 Py_RETURN_FALSE;
Gregory P. Smith60d241f2007-10-16 06:31:30 +000055
56 e = p + len;
57 for (; p < e; p++) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000058 if (!Py_ISALPHA(*p))
59 Py_RETURN_FALSE;
Gregory P. Smith60d241f2007-10-16 06:31:30 +000060 }
61 Py_RETURN_TRUE;
62}
63
64
65PyDoc_STRVAR_shared(_Py_isalnum__doc__,
66"B.isalnum() -> bool\n\
67\n\
68Return True if all characters in B are alphanumeric\n\
69and there is at least one character in B, False otherwise.");
70
71PyObject*
72_Py_bytes_isalnum(const char *cptr, Py_ssize_t len)
73{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +020074 const unsigned char *p
Gregory P. Smith60d241f2007-10-16 06:31:30 +000075 = (unsigned char *) cptr;
Antoine Pitrou9ed5f272013-08-13 20:18:52 +020076 const unsigned char *e;
Gregory P. Smith60d241f2007-10-16 06:31:30 +000077
78 /* Shortcut for single character strings */
Eric Smith6dc46f52009-04-27 20:39:49 +000079 if (len == 1 && Py_ISALNUM(*p))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000080 Py_RETURN_TRUE;
Gregory P. Smith60d241f2007-10-16 06:31:30 +000081
82 /* Special case for empty strings */
83 if (len == 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000084 Py_RETURN_FALSE;
Gregory P. Smith60d241f2007-10-16 06:31:30 +000085
86 e = p + len;
87 for (; p < e; p++) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000088 if (!Py_ISALNUM(*p))
89 Py_RETURN_FALSE;
Gregory P. Smith60d241f2007-10-16 06:31:30 +000090 }
91 Py_RETURN_TRUE;
92}
93
94
95PyDoc_STRVAR_shared(_Py_isdigit__doc__,
96"B.isdigit() -> bool\n\
97\n\
98Return True if all characters in B are digits\n\
99and there is at least one character in B, False otherwise.");
100
101PyObject*
102_Py_bytes_isdigit(const char *cptr, Py_ssize_t len)
103{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +0200104 const unsigned char *p
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000105 = (unsigned char *) cptr;
Antoine Pitrou9ed5f272013-08-13 20:18:52 +0200106 const unsigned char *e;
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000107
108 /* Shortcut for single character strings */
Eric Smith6dc46f52009-04-27 20:39:49 +0000109 if (len == 1 && Py_ISDIGIT(*p))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000110 Py_RETURN_TRUE;
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000111
112 /* Special case for empty strings */
113 if (len == 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000114 Py_RETURN_FALSE;
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000115
116 e = p + len;
117 for (; p < e; p++) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000118 if (!Py_ISDIGIT(*p))
119 Py_RETURN_FALSE;
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000120 }
121 Py_RETURN_TRUE;
122}
123
124
125PyDoc_STRVAR_shared(_Py_islower__doc__,
126"B.islower() -> bool\n\
127\n\
128Return True if all cased characters in B are lowercase and there is\n\
129at least one cased character in B, False otherwise.");
130
131PyObject*
132_Py_bytes_islower(const char *cptr, Py_ssize_t len)
133{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +0200134 const unsigned char *p
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000135 = (unsigned char *) cptr;
Antoine Pitrou9ed5f272013-08-13 20:18:52 +0200136 const unsigned char *e;
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000137 int cased;
138
139 /* Shortcut for single character strings */
140 if (len == 1)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000141 return PyBool_FromLong(Py_ISLOWER(*p));
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000142
143 /* Special case for empty strings */
144 if (len == 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000145 Py_RETURN_FALSE;
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000146
147 e = p + len;
148 cased = 0;
149 for (; p < e; p++) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000150 if (Py_ISUPPER(*p))
151 Py_RETURN_FALSE;
152 else if (!cased && Py_ISLOWER(*p))
153 cased = 1;
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000154 }
155 return PyBool_FromLong(cased);
156}
157
158
159PyDoc_STRVAR_shared(_Py_isupper__doc__,
160"B.isupper() -> bool\n\
161\n\
162Return True if all cased characters in B are uppercase and there is\n\
163at least one cased character in B, False otherwise.");
164
165PyObject*
166_Py_bytes_isupper(const char *cptr, Py_ssize_t len)
167{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +0200168 const unsigned char *p
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000169 = (unsigned char *) cptr;
Antoine Pitrou9ed5f272013-08-13 20:18:52 +0200170 const unsigned char *e;
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000171 int cased;
172
173 /* Shortcut for single character strings */
174 if (len == 1)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000175 return PyBool_FromLong(Py_ISUPPER(*p));
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000176
177 /* Special case for empty strings */
178 if (len == 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000179 Py_RETURN_FALSE;
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000180
181 e = p + len;
182 cased = 0;
183 for (; p < e; p++) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000184 if (Py_ISLOWER(*p))
185 Py_RETURN_FALSE;
186 else if (!cased && Py_ISUPPER(*p))
187 cased = 1;
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000188 }
189 return PyBool_FromLong(cased);
190}
191
192
193PyDoc_STRVAR_shared(_Py_istitle__doc__,
194"B.istitle() -> bool\n\
195\n\
196Return True if B is a titlecased string and there is at least one\n\
197character in B, i.e. uppercase characters may only follow uncased\n\
198characters and lowercase characters only cased ones. Return False\n\
199otherwise.");
200
201PyObject*
202_Py_bytes_istitle(const char *cptr, Py_ssize_t len)
203{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +0200204 const unsigned char *p
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000205 = (unsigned char *) cptr;
Antoine Pitrou9ed5f272013-08-13 20:18:52 +0200206 const unsigned char *e;
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000207 int cased, previous_is_cased;
208
209 /* Shortcut for single character strings */
210 if (len == 1)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000211 return PyBool_FromLong(Py_ISUPPER(*p));
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000212
213 /* Special case for empty strings */
214 if (len == 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000215 Py_RETURN_FALSE;
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000216
217 e = p + len;
218 cased = 0;
219 previous_is_cased = 0;
220 for (; p < e; p++) {
Antoine Pitrou9ed5f272013-08-13 20:18:52 +0200221 const unsigned char ch = *p;
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000222
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000223 if (Py_ISUPPER(ch)) {
224 if (previous_is_cased)
225 Py_RETURN_FALSE;
226 previous_is_cased = 1;
227 cased = 1;
228 }
229 else if (Py_ISLOWER(ch)) {
230 if (!previous_is_cased)
231 Py_RETURN_FALSE;
232 previous_is_cased = 1;
233 cased = 1;
234 }
235 else
236 previous_is_cased = 0;
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000237 }
238 return PyBool_FromLong(cased);
239}
240
241
242PyDoc_STRVAR_shared(_Py_lower__doc__,
243"B.lower() -> copy of B\n\
244\n\
245Return a copy of B with all ASCII characters converted to lowercase.");
246
247void
248_Py_bytes_lower(char *result, const char *cptr, Py_ssize_t len)
249{
Antoine Pitrou9b491922010-08-15 17:38:46 +0000250 Py_ssize_t i;
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000251
Antoine Pitrou9b491922010-08-15 17:38:46 +0000252 for (i = 0; i < len; i++) {
Antoine Pitrou94f6fa62012-01-08 16:22:46 +0100253 result[i] = Py_TOLOWER((unsigned char) cptr[i]);
Antoine Pitrou9b491922010-08-15 17:38:46 +0000254 }
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000255}
256
257
258PyDoc_STRVAR_shared(_Py_upper__doc__,
259"B.upper() -> copy of B\n\
260\n\
261Return a copy of B with all ASCII characters converted to uppercase.");
262
263void
264_Py_bytes_upper(char *result, const char *cptr, Py_ssize_t len)
265{
Antoine Pitrou9b491922010-08-15 17:38:46 +0000266 Py_ssize_t i;
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000267
Antoine Pitrou9b491922010-08-15 17:38:46 +0000268 for (i = 0; i < len; i++) {
Antoine Pitrou94f6fa62012-01-08 16:22:46 +0100269 result[i] = Py_TOUPPER((unsigned char) cptr[i]);
Antoine Pitrou9b491922010-08-15 17:38:46 +0000270 }
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000271}
272
273
274PyDoc_STRVAR_shared(_Py_title__doc__,
275"B.title() -> copy of B\n\
276\n\
277Return a titlecased version of B, i.e. ASCII words start with uppercase\n\
278characters, all remaining cased characters have lowercase.");
279
280void
Serhiy Storchakaef1585e2015-12-25 20:01:53 +0200281_Py_bytes_title(char *result, const char *s, Py_ssize_t len)
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000282{
Antoine Pitrou9b491922010-08-15 17:38:46 +0000283 Py_ssize_t i;
284 int previous_is_cased = 0;
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000285
Antoine Pitrou9b491922010-08-15 17:38:46 +0000286 for (i = 0; i < len; i++) {
287 int c = Py_CHARMASK(*s++);
288 if (Py_ISLOWER(c)) {
289 if (!previous_is_cased)
290 c = Py_TOUPPER(c);
291 previous_is_cased = 1;
292 } else if (Py_ISUPPER(c)) {
293 if (previous_is_cased)
294 c = Py_TOLOWER(c);
295 previous_is_cased = 1;
296 } else
297 previous_is_cased = 0;
298 *result++ = c;
299 }
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000300}
301
302
303PyDoc_STRVAR_shared(_Py_capitalize__doc__,
304"B.capitalize() -> copy of B\n\
305\n\
Senthil Kumarane51ee8a2010-07-05 12:00:56 +0000306Return a copy of B with only its first character capitalized (ASCII)\n\
307and the rest lower-cased.");
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000308
309void
Serhiy Storchakaef1585e2015-12-25 20:01:53 +0200310_Py_bytes_capitalize(char *result, const char *s, Py_ssize_t len)
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000311{
Antoine Pitrou9b491922010-08-15 17:38:46 +0000312 Py_ssize_t i;
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000313
Antoine Pitrou9b491922010-08-15 17:38:46 +0000314 if (0 < len) {
315 int c = Py_CHARMASK(*s++);
316 if (Py_ISLOWER(c))
317 *result = Py_TOUPPER(c);
318 else
319 *result = c;
320 result++;
321 }
322 for (i = 1; i < len; i++) {
323 int c = Py_CHARMASK(*s++);
324 if (Py_ISUPPER(c))
325 *result = Py_TOLOWER(c);
326 else
327 *result = c;
328 result++;
329 }
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000330}
331
332
333PyDoc_STRVAR_shared(_Py_swapcase__doc__,
334"B.swapcase() -> copy of B\n\
335\n\
336Return a copy of B with uppercase ASCII characters converted\n\
337to lowercase ASCII and vice versa.");
338
339void
Serhiy Storchakaef1585e2015-12-25 20:01:53 +0200340_Py_bytes_swapcase(char *result, const char *s, Py_ssize_t len)
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000341{
Antoine Pitrou9b491922010-08-15 17:38:46 +0000342 Py_ssize_t i;
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000343
Antoine Pitrou9b491922010-08-15 17:38:46 +0000344 for (i = 0; i < len; i++) {
345 int c = Py_CHARMASK(*s++);
346 if (Py_ISLOWER(c)) {
347 *result = Py_TOUPPER(c);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000348 }
Antoine Pitrou9b491922010-08-15 17:38:46 +0000349 else if (Py_ISUPPER(c)) {
350 *result = Py_TOLOWER(c);
351 }
352 else
353 *result = c;
354 result++;
355 }
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000356}
357
Georg Brandlabc38772009-04-12 15:51:51 +0000358
359PyDoc_STRVAR_shared(_Py_maketrans__doc__,
360"B.maketrans(frm, to) -> translation table\n\
361\n\
Senthil Kumaran84e3ccc2011-06-27 09:06:45 -0700362Return a translation table (a bytes object of length 256) suitable\n\
363for use in the bytes or bytearray translate method where each byte\n\
364in frm is mapped to the byte at the same position in to.\n\
365The bytes objects frm and to must be of the same length.");
Georg Brandlabc38772009-04-12 15:51:51 +0000366
Georg Brandlabc38772009-04-12 15:51:51 +0000367PyObject *
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +0200368_Py_bytes_maketrans(Py_buffer *frm, Py_buffer *to)
Georg Brandlabc38772009-04-12 15:51:51 +0000369{
Martin v. Löwis7252a6e2014-07-27 16:25:09 +0200370 PyObject *res = NULL;
Antoine Pitrou9b491922010-08-15 17:38:46 +0000371 Py_ssize_t i;
372 char *p;
Georg Brandlabc38772009-04-12 15:51:51 +0000373
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +0200374 if (frm->len != to->len) {
Antoine Pitrou9b491922010-08-15 17:38:46 +0000375 PyErr_Format(PyExc_ValueError,
376 "maketrans arguments must have same length");
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +0200377 return NULL;
Antoine Pitrou9b491922010-08-15 17:38:46 +0000378 }
379 res = PyBytes_FromStringAndSize(NULL, 256);
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +0200380 if (!res)
381 return NULL;
Antoine Pitrou9b491922010-08-15 17:38:46 +0000382 p = PyBytes_AS_STRING(res);
383 for (i = 0; i < 256; i++)
Antoine Pitrou47019e52010-08-15 17:41:31 +0000384 p[i] = (char) i;
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +0200385 for (i = 0; i < frm->len; i++) {
386 p[((unsigned char *)frm->buf)[i]] = ((char *)to->buf)[i];
Antoine Pitrou9b491922010-08-15 17:38:46 +0000387 }
Georg Brandlabc38772009-04-12 15:51:51 +0000388
Antoine Pitrou9b491922010-08-15 17:38:46 +0000389 return res;
Georg Brandlabc38772009-04-12 15:51:51 +0000390}
Serhiy Storchakadd40fc32016-05-04 22:23:26 +0300391
392#define FASTSEARCH fastsearch
393#define STRINGLIB(F) stringlib_##F
394#define STRINGLIB_CHAR char
395#define STRINGLIB_SIZEOF_CHAR 1
396
397#include "stringlib/fastsearch.h"
398#include "stringlib/count.h"
399#include "stringlib/find.h"
400
401/*
402Wraps stringlib_parse_args_finds() and additionally checks whether the
403first argument is an integer in range(0, 256).
404
405If this is the case, writes the integer value to the byte parameter
406and sets subobj to NULL. Otherwise, sets the first argument to subobj
407and doesn't touch byte. The other parameters are similar to those of
408stringlib_parse_args_finds().
409*/
410
411Py_LOCAL_INLINE(int)
412parse_args_finds_byte(const char *function_name, PyObject *args,
413 PyObject **subobj, char *byte,
414 Py_ssize_t *start, Py_ssize_t *end)
415{
416 PyObject *tmp_subobj;
417 Py_ssize_t ival;
418 PyObject *err;
419
420 if(!stringlib_parse_args_finds(function_name, args, &tmp_subobj,
421 start, end))
422 return 0;
423
424 if (!PyNumber_Check(tmp_subobj)) {
425 *subobj = tmp_subobj;
426 return 1;
427 }
428
429 ival = PyNumber_AsSsize_t(tmp_subobj, PyExc_OverflowError);
430 if (ival == -1) {
431 err = PyErr_Occurred();
432 if (err && !PyErr_GivenExceptionMatches(err, PyExc_OverflowError)) {
433 PyErr_Clear();
434 *subobj = tmp_subobj;
435 return 1;
436 }
437 }
438
439 if (ival < 0 || ival > 255) {
440 PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
441 return 0;
442 }
443
444 *subobj = NULL;
445 *byte = (char)ival;
446 return 1;
447}
448
449/* helper macro to fixup start/end slice values */
450#define ADJUST_INDICES(start, end, len) \
451 if (end > len) \
452 end = len; \
453 else if (end < 0) { \
454 end += len; \
455 if (end < 0) \
456 end = 0; \
457 } \
458 if (start < 0) { \
459 start += len; \
460 if (start < 0) \
461 start = 0; \
462 }
463
464Py_LOCAL_INLINE(Py_ssize_t)
465find_internal(const char *str, Py_ssize_t len,
466 const char *function_name, PyObject *args, int dir)
467{
468 PyObject *subobj;
469 char byte;
470 Py_buffer subbuf;
471 const char *sub;
472 Py_ssize_t sub_len;
473 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
474 Py_ssize_t res;
475
476 if (!parse_args_finds_byte(function_name, args,
477 &subobj, &byte, &start, &end))
478 return -2;
479
480 if (subobj) {
481 if (PyObject_GetBuffer(subobj, &subbuf, PyBUF_SIMPLE) != 0)
482 return -2;
483
484 sub = subbuf.buf;
485 sub_len = subbuf.len;
486 }
487 else {
488 sub = &byte;
489 sub_len = 1;
490 }
491
492 ADJUST_INDICES(start, end, len);
493 if (end - start < sub_len)
494 res = -1;
495 else if (sub_len == 1) {
496 if (dir > 0)
497 res = stringlib_find_char(
498 str + start, end - start,
499 *sub);
500 else
501 res = stringlib_rfind_char(
502 str + start, end - start,
503 *sub);
504 if (res >= 0)
505 res += start;
506 }
507 else {
508 if (dir > 0)
509 res = stringlib_find_slice(
510 str, len,
511 sub, sub_len, start, end);
512 else
513 res = stringlib_rfind_slice(
514 str, len,
515 sub, sub_len, start, end);
516 }
517
518 if (subobj)
519 PyBuffer_Release(&subbuf);
520
521 return res;
522}
523
524PyDoc_STRVAR_shared(_Py_find__doc__,
525"B.find(sub[, start[, end]]) -> int\n\
526\n\
527Return the lowest index in B where subsection sub is found,\n\
528such that sub is contained within B[start,end]. Optional\n\
529arguments start and end are interpreted as in slice notation.\n\
530\n\
531Return -1 on failure.");
532
533PyObject *
534_Py_bytes_find(const char *str, Py_ssize_t len, PyObject *args)
535{
536 Py_ssize_t result = find_internal(str, len, "find", args, +1);
537 if (result == -2)
538 return NULL;
539 return PyLong_FromSsize_t(result);
540}
541
542PyDoc_STRVAR_shared(_Py_index__doc__,
543"B.index(sub[, start[, end]]) -> int\n\
544\n\
545Like B.find() but raise ValueError when the subsection is not found.");
546
547PyObject *
548_Py_bytes_index(const char *str, Py_ssize_t len, PyObject *args)
549{
550 Py_ssize_t result = find_internal(str, len, "index", args, +1);
551 if (result == -2)
552 return NULL;
553 if (result == -1) {
554 PyErr_SetString(PyExc_ValueError,
555 "subsection not found");
556 return NULL;
557 }
558 return PyLong_FromSsize_t(result);
559}
560
561PyDoc_STRVAR_shared(_Py_rfind__doc__,
562"B.rfind(sub[, start[, end]]) -> int\n\
563\n\
564Return the highest index in B where subsection sub is found,\n\
565such that sub is contained within B[start,end]. Optional\n\
566arguments start and end are interpreted as in slice notation.\n\
567\n\
568Return -1 on failure.");
569
570PyObject *
571_Py_bytes_rfind(const char *str, Py_ssize_t len, PyObject *args)
572{
573 Py_ssize_t result = find_internal(str, len, "rfind", args, -1);
574 if (result == -2)
575 return NULL;
576 return PyLong_FromSsize_t(result);
577}
578
579PyDoc_STRVAR_shared(_Py_rindex__doc__,
580"B.rindex(sub[, start[, end]]) -> int\n\
581\n\
582Like B.rfind() but raise ValueError when the subsection is not found.");
583
584PyObject *
585_Py_bytes_rindex(const char *str, Py_ssize_t len, PyObject *args)
586{
587 Py_ssize_t result = find_internal(str, len, "rindex", args, -1);
588 if (result == -2)
589 return NULL;
590 if (result == -1) {
591 PyErr_SetString(PyExc_ValueError,
592 "subsection not found");
593 return NULL;
594 }
595 return PyLong_FromSsize_t(result);
596}
597
598PyDoc_STRVAR_shared(_Py_count__doc__,
599"B.count(sub[, start[, end]]) -> int\n\
600\n\
601Return the number of non-overlapping occurrences of subsection sub in\n\
602bytes B[start:end]. Optional arguments start and end are interpreted\n\
603as in slice notation.");
604
605PyObject *
606_Py_bytes_count(const char *str, Py_ssize_t len, PyObject *args)
607{
608 PyObject *sub_obj;
609 const char *sub;
610 Py_ssize_t sub_len;
611 char byte;
612 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
613
614 Py_buffer vsub;
615 PyObject *count_obj;
616
617 if (!parse_args_finds_byte("count", args,
618 &sub_obj, &byte, &start, &end))
619 return NULL;
620
621 if (sub_obj) {
622 if (PyObject_GetBuffer(sub_obj, &vsub, PyBUF_SIMPLE) != 0)
623 return NULL;
624
625 sub = vsub.buf;
626 sub_len = vsub.len;
627 }
628 else {
629 sub = &byte;
630 sub_len = 1;
631 }
632
633 ADJUST_INDICES(start, end, len);
634
635 count_obj = PyLong_FromSsize_t(
636 stringlib_count(str + start, end - start, sub, sub_len, PY_SSIZE_T_MAX)
637 );
638
639 if (sub_obj)
640 PyBuffer_Release(&vsub);
641
642 return count_obj;
643}
644
645int
646_Py_bytes_contains(const char *str, Py_ssize_t len, PyObject *arg)
647{
Serhiy Storchakaf9efb8b2016-07-10 12:37:30 +0300648 Py_ssize_t ival = PyNumber_AsSsize_t(arg, NULL);
Serhiy Storchakadd40fc32016-05-04 22:23:26 +0300649 if (ival == -1 && PyErr_Occurred()) {
650 Py_buffer varg;
651 Py_ssize_t pos;
652 PyErr_Clear();
653 if (PyObject_GetBuffer(arg, &varg, PyBUF_SIMPLE) != 0)
654 return -1;
655 pos = stringlib_find(str, len,
656 varg.buf, varg.len, 0);
657 PyBuffer_Release(&varg);
658 return pos >= 0;
659 }
660 if (ival < 0 || ival >= 256) {
661 PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
662 return -1;
663 }
664
665 return memchr(str, (int) ival, len) != NULL;
666}
667
668
669/* Matches the end (direction >= 0) or start (direction < 0) of the buffer
670 * against substr, using the start and end arguments. Returns
671 * -1 on error, 0 if not found and 1 if found.
672 */
Benjamin Peterson621b4302016-09-09 13:54:34 -0700673static int
Serhiy Storchakadd40fc32016-05-04 22:23:26 +0300674tailmatch(const char *str, Py_ssize_t len, PyObject *substr,
675 Py_ssize_t start, Py_ssize_t end, int direction)
676{
677 Py_buffer sub_view = {NULL, NULL};
678 const char *sub;
679 Py_ssize_t slen;
680
681 if (PyBytes_Check(substr)) {
682 sub = PyBytes_AS_STRING(substr);
683 slen = PyBytes_GET_SIZE(substr);
684 }
685 else {
686 if (PyObject_GetBuffer(substr, &sub_view, PyBUF_SIMPLE) != 0)
687 return -1;
688 sub = sub_view.buf;
689 slen = sub_view.len;
690 }
691
692 ADJUST_INDICES(start, end, len);
693
694 if (direction < 0) {
695 /* startswith */
696 if (start + slen > len)
697 goto notfound;
698 } else {
699 /* endswith */
700 if (end - start < slen || start > len)
701 goto notfound;
702
703 if (end - slen > start)
704 start = end - slen;
705 }
706 if (end - start < slen)
707 goto notfound;
708 if (memcmp(str + start, sub, slen) != 0)
709 goto notfound;
710
711 PyBuffer_Release(&sub_view);
712 return 1;
713
714notfound:
715 PyBuffer_Release(&sub_view);
716 return 0;
717}
718
Benjamin Peterson621b4302016-09-09 13:54:34 -0700719static PyObject *
Serhiy Storchakadd40fc32016-05-04 22:23:26 +0300720_Py_bytes_tailmatch(const char *str, Py_ssize_t len,
721 const char *function_name, PyObject *args,
722 int direction)
723{
724 Py_ssize_t start = 0;
725 Py_ssize_t end = PY_SSIZE_T_MAX;
726 PyObject *subobj;
727 int result;
728
729 if (!stringlib_parse_args_finds(function_name, args, &subobj, &start, &end))
730 return NULL;
731 if (PyTuple_Check(subobj)) {
732 Py_ssize_t i;
733 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
734 result = tailmatch(str, len, PyTuple_GET_ITEM(subobj, i),
735 start, end, direction);
736 if (result == -1)
737 return NULL;
738 else if (result) {
739 Py_RETURN_TRUE;
740 }
741 }
742 Py_RETURN_FALSE;
743 }
744 result = tailmatch(str, len, subobj, start, end, direction);
745 if (result == -1) {
746 if (PyErr_ExceptionMatches(PyExc_TypeError))
747 PyErr_Format(PyExc_TypeError,
748 "%s first arg must be bytes or a tuple of bytes, "
749 "not %s",
750 function_name, Py_TYPE(subobj)->tp_name);
751 return NULL;
752 }
753 else
754 return PyBool_FromLong(result);
755}
756
757PyDoc_STRVAR_shared(_Py_startswith__doc__,
758"B.startswith(prefix[, start[, end]]) -> bool\n\
759\n\
760Return True if B starts with the specified prefix, False otherwise.\n\
761With optional start, test B beginning at that position.\n\
762With optional end, stop comparing B at that position.\n\
763prefix can also be a tuple of bytes to try.");
764
765PyObject *
766_Py_bytes_startswith(const char *str, Py_ssize_t len, PyObject *args)
767{
768 return _Py_bytes_tailmatch(str, len, "startswith", args, -1);
769}
770
771PyDoc_STRVAR_shared(_Py_endswith__doc__,
772"B.endswith(suffix[, start[, end]]) -> bool\n\
773\n\
774Return True if B ends with the specified suffix, False otherwise.\n\
775With optional start, test B beginning at that position.\n\
776With optional end, stop comparing B at that position.\n\
777suffix can also be a tuple of bytes to try.");
778
779PyObject *
780_Py_bytes_endswith(const char *str, Py_ssize_t len, PyObject *args)
781{
782 return _Py_bytes_tailmatch(str, len, "endswith", args, +1);
783}
784
785PyDoc_STRVAR_shared(_Py_expandtabs__doc__,
786"B.expandtabs(tabsize=8) -> copy of B\n\
787\n\
788Return a copy of B where all tab characters are expanded using spaces.\n\
789If tabsize is not given, a tab size of 8 characters is assumed.");
790
791PyDoc_STRVAR_shared(_Py_ljust__doc__,
792"B.ljust(width[, fillchar]) -> copy of B\n"
793"\n"
794"Return B left justified in a string of length width. Padding is\n"
795"done using the specified fill character (default is a space).");
796
797PyDoc_STRVAR_shared(_Py_rjust__doc__,
798"B.rjust(width[, fillchar]) -> copy of B\n"
799"\n"
800"Return B right justified in a string of length width. Padding is\n"
801"done using the specified fill character (default is a space)");
802
803PyDoc_STRVAR_shared(_Py_center__doc__,
804"B.center(width[, fillchar]) -> copy of B\n"
805"\n"
806"Return B centered in a string of length width. Padding is\n"
807"done using the specified fill character (default is a space).");
808
809PyDoc_STRVAR_shared(_Py_zfill__doc__,
810"B.zfill(width) -> copy of B\n"
811"\n"
812"Pad a numeric string B with zeros on the left, to fill a field\n"
813"of the specified width. B is never truncated.");
814