blob: 518e012b759f8485fec4193c2537f28ebcf72c77 [file] [log] [blame]
Thomas Wouters477c8d52006-05-27 19:21:47 +00001/* stringlib: find/index implementation */
2
Thomas Wouters477c8d52006-05-27 19:21:47 +00003#ifndef STRINGLIB_FASTSEARCH_H
4#error must include "stringlib/fastsearch.h" before including this module
5#endif
6
7Py_LOCAL_INLINE(Py_ssize_t)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02008STRINGLIB(find)(const STRINGLIB_CHAR* str, Py_ssize_t str_len,
Thomas Wouters477c8d52006-05-27 19:21:47 +00009 const STRINGLIB_CHAR* sub, Py_ssize_t sub_len,
10 Py_ssize_t offset)
11{
12 Py_ssize_t pos;
13
Amaury Forgeot d'Arcf2e93682008-09-26 22:48:41 +000014 if (str_len < 0)
15 return -1;
16 if (sub_len == 0)
Thomas Wouters477c8d52006-05-27 19:21:47 +000017 return offset;
18
Martin v. Löwisd63a3b82011-09-28 07:41:54 +020019 pos = FASTSEARCH(str, str_len, sub, sub_len, -1, FAST_SEARCH);
Thomas Wouters477c8d52006-05-27 19:21:47 +000020
21 if (pos >= 0)
22 pos += offset;
23
24 return pos;
25}
26
27Py_LOCAL_INLINE(Py_ssize_t)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +020028STRINGLIB(rfind)(const STRINGLIB_CHAR* str, Py_ssize_t str_len,
Thomas Wouters477c8d52006-05-27 19:21:47 +000029 const STRINGLIB_CHAR* sub, Py_ssize_t sub_len,
30 Py_ssize_t offset)
31{
Antoine Pitrouda2ecaf2010-01-02 21:40:36 +000032 Py_ssize_t pos;
33
34 if (str_len < 0)
35 return -1;
36 if (sub_len == 0)
37 return str_len + offset;
38
Martin v. Löwisd63a3b82011-09-28 07:41:54 +020039 pos = FASTSEARCH(str, str_len, sub, sub_len, -1, FAST_RSEARCH);
Antoine Pitrouda2ecaf2010-01-02 21:40:36 +000040
41 if (pos >= 0)
42 pos += offset;
43
44 return pos;
Thomas Wouters477c8d52006-05-27 19:21:47 +000045}
46
Antoine Pitrouf2c54842010-01-13 08:07:53 +000047/* helper macro to fixup start/end slice values */
48#define ADJUST_INDICES(start, end, len) \
49 if (end > len) \
50 end = len; \
51 else if (end < 0) { \
52 end += len; \
53 if (end < 0) \
54 end = 0; \
55 } \
56 if (start < 0) { \
57 start += len; \
58 if (start < 0) \
59 start = 0; \
60 }
61
Thomas Wouters477c8d52006-05-27 19:21:47 +000062Py_LOCAL_INLINE(Py_ssize_t)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +020063STRINGLIB(find_slice)(const STRINGLIB_CHAR* str, Py_ssize_t str_len,
Thomas Wouters477c8d52006-05-27 19:21:47 +000064 const STRINGLIB_CHAR* sub, Py_ssize_t sub_len,
65 Py_ssize_t start, Py_ssize_t end)
66{
Antoine Pitrouf2c54842010-01-13 08:07:53 +000067 ADJUST_INDICES(start, end, str_len);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +020068 return STRINGLIB(find)(str + start, end - start, sub, sub_len, start);
Thomas Wouters477c8d52006-05-27 19:21:47 +000069}
70
71Py_LOCAL_INLINE(Py_ssize_t)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +020072STRINGLIB(rfind_slice)(const STRINGLIB_CHAR* str, Py_ssize_t str_len,
Thomas Wouters477c8d52006-05-27 19:21:47 +000073 const STRINGLIB_CHAR* sub, Py_ssize_t sub_len,
74 Py_ssize_t start, Py_ssize_t end)
75{
Antoine Pitrouf2c54842010-01-13 08:07:53 +000076 ADJUST_INDICES(start, end, str_len);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +020077 return STRINGLIB(rfind)(str + start, end - start, sub, sub_len, start);
Thomas Wouters477c8d52006-05-27 19:21:47 +000078}
79
Christian Heimes4f3c5612008-08-28 14:55:10 +000080#ifdef STRINGLIB_WANT_CONTAINS_OBJ
Thomas Wouters477c8d52006-05-27 19:21:47 +000081
82Py_LOCAL_INLINE(int)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +020083STRINGLIB(contains_obj)(PyObject* str, PyObject* sub)
Thomas Wouters477c8d52006-05-27 19:21:47 +000084{
Martin v. Löwisd63a3b82011-09-28 07:41:54 +020085 return STRINGLIB(find)(
Thomas Wouters477c8d52006-05-27 19:21:47 +000086 STRINGLIB_STR(str), STRINGLIB_LEN(str),
87 STRINGLIB_STR(sub), STRINGLIB_LEN(sub), 0
88 ) != -1;
89}
90
Antoine Pitrouf2c54842010-01-13 08:07:53 +000091#endif /* STRINGLIB_WANT_CONTAINS_OBJ */
Thomas Wouters477c8d52006-05-27 19:21:47 +000092
Christian Heimes9cd17752007-11-18 19:35:23 +000093/*
94This function is a helper for the "find" family (find, rfind, index,
Jesus Ceaac451502011-04-20 17:09:23 +020095rindex) and for count, startswith and endswith, because they all have
96the same behaviour for the arguments.
Christian Heimes9cd17752007-11-18 19:35:23 +000097
Victor Stinner9db1a8b2011-10-23 20:04:37 +020098It does not touch the variables received until it knows everything
Christian Heimes9cd17752007-11-18 19:35:23 +000099is ok.
Christian Heimes9cd17752007-11-18 19:35:23 +0000100*/
101
Jesus Ceaac451502011-04-20 17:09:23 +0200102#define FORMAT_BUFFER_SIZE 50
103
Christian Heimes9cd17752007-11-18 19:35:23 +0000104Py_LOCAL_INLINE(int)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200105STRINGLIB(parse_args_finds)(const char * function_name, PyObject *args,
Jesus Ceaac451502011-04-20 17:09:23 +0200106 PyObject **subobj,
107 Py_ssize_t *start, Py_ssize_t *end)
108{
109 PyObject *tmp_subobj;
Christian Heimes9cd17752007-11-18 19:35:23 +0000110 Py_ssize_t tmp_start = 0;
111 Py_ssize_t tmp_end = PY_SSIZE_T_MAX;
112 PyObject *obj_start=Py_None, *obj_end=Py_None;
Jesus Ceaac451502011-04-20 17:09:23 +0200113 char format[FORMAT_BUFFER_SIZE] = "O|OO:";
114 size_t len = strlen(format);
Christian Heimes9cd17752007-11-18 19:35:23 +0000115
Jesus Ceaac451502011-04-20 17:09:23 +0200116 strncpy(format + len, function_name, FORMAT_BUFFER_SIZE - len - 1);
117 format[FORMAT_BUFFER_SIZE - 1] = '\0';
118
119 if (!PyArg_ParseTuple(args, format, &tmp_subobj, &obj_start, &obj_end))
Christian Heimes9cd17752007-11-18 19:35:23 +0000120 return 0;
121
122 /* To support None in "start" and "end" arguments, meaning
123 the same as if they were not passed.
124 */
125 if (obj_start != Py_None)
126 if (!_PyEval_SliceIndex(obj_start, &tmp_start))
127 return 0;
128 if (obj_end != Py_None)
129 if (!_PyEval_SliceIndex(obj_end, &tmp_end))
130 return 0;
131
Christian Heimes9cd17752007-11-18 19:35:23 +0000132 *start = tmp_start;
133 *end = tmp_end;
Jesus Ceaac451502011-04-20 17:09:23 +0200134 *subobj = tmp_subobj;
Christian Heimes9cd17752007-11-18 19:35:23 +0000135 return 1;
136}
137
Jesus Ceaac451502011-04-20 17:09:23 +0200138#undef FORMAT_BUFFER_SIZE
139
Jesus Cea6159ee32011-04-20 17:42:50 +0200140#if STRINGLIB_IS_UNICODE
Jesus Ceaac451502011-04-20 17:09:23 +0200141
142/*
143Wraps stringlib_parse_args_finds() and additionally ensures that the
144first argument is a unicode object.
145
146Note that we receive a pointer to the pointer of the substring object,
147so when we create that object in this function we don't DECREF it,
Victor Stinner9db1a8b2011-10-23 20:04:37 +0200148because it continues living in the caller functions (those functions,
Jesus Ceaac451502011-04-20 17:09:23 +0200149after finishing using the substring, must DECREF it).
150*/
151
152Py_LOCAL_INLINE(int)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200153STRINGLIB(parse_args_finds_unicode)(const char * function_name, PyObject *args,
Victor Stinner9db1a8b2011-10-23 20:04:37 +0200154 PyObject **substring,
Jesus Ceaac451502011-04-20 17:09:23 +0200155 Py_ssize_t *start, Py_ssize_t *end)
156{
157 PyObject *tmp_substring;
158
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200159 if(STRINGLIB(parse_args_finds)(function_name, args, &tmp_substring,
Jesus Ceaac451502011-04-20 17:09:23 +0200160 start, end)) {
161 tmp_substring = PyUnicode_FromObject(tmp_substring);
162 if (!tmp_substring)
163 return 0;
Victor Stinner9db1a8b2011-10-23 20:04:37 +0200164 *substring = tmp_substring;
Jesus Ceaac451502011-04-20 17:09:23 +0200165 return 1;
166 }
167 return 0;
168}
169
Antoine Pitrouac65d962011-10-20 23:54:17 +0200170#else /* !STRINGLIB_IS_UNICODE */
171
172/*
173Wraps stringlib_parse_args_finds() and additionally checks whether the
174first argument is an integer in range(0, 256).
175
176If this is the case, writes the integer value to the byte parameter
177and sets subobj to NULL. Otherwise, sets the first argument to subobj
178and doesn't touch byte. The other parameters are similar to those of
179stringlib_parse_args_finds().
180*/
181
182Py_LOCAL_INLINE(int)
183STRINGLIB(parse_args_finds_byte)(const char *function_name, PyObject *args,
184 PyObject **subobj, char *byte,
185 Py_ssize_t *start, Py_ssize_t *end)
186{
187 PyObject *tmp_subobj;
188 Py_ssize_t ival;
Victor Stinnerf8eac002011-12-18 01:17:41 +0100189 PyObject *err;
Antoine Pitrouac65d962011-10-20 23:54:17 +0200190
191 if(!STRINGLIB(parse_args_finds)(function_name, args, &tmp_subobj,
192 start, end))
193 return 0;
194
Victor Stinnerf8eac002011-12-18 01:17:41 +0100195 if (!PyNumber_Check(tmp_subobj)) {
Antoine Pitrouac65d962011-10-20 23:54:17 +0200196 *subobj = tmp_subobj;
Victor Stinnerf8eac002011-12-18 01:17:41 +0100197 return 1;
Antoine Pitrouac65d962011-10-20 23:54:17 +0200198 }
Victor Stinnerf8eac002011-12-18 01:17:41 +0100199
200 ival = PyNumber_AsSsize_t(tmp_subobj, PyExc_OverflowError);
201 if (ival == -1) {
202 err = PyErr_Occurred();
203 if (err && !PyErr_GivenExceptionMatches(err, PyExc_OverflowError)) {
204 PyErr_Clear();
205 *subobj = tmp_subobj;
206 return 1;
Antoine Pitrouac65d962011-10-20 23:54:17 +0200207 }
Antoine Pitrouac65d962011-10-20 23:54:17 +0200208 }
209
Victor Stinnerf8eac002011-12-18 01:17:41 +0100210 if (ival < 0 || ival > 255) {
211 PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
212 return 0;
213 }
214
215 *subobj = NULL;
216 *byte = (char)ival;
Antoine Pitrouac65d962011-10-20 23:54:17 +0200217 return 1;
218}
219
Antoine Pitrouf2c54842010-01-13 08:07:53 +0000220#endif /* STRINGLIB_IS_UNICODE */