Issue #7622: Improve the split(), rsplit(), splitlines() and replace()
methods of bytes, bytearray and unicode objects by using a common
implementation based on stringlib's fast search. Patch by Florent Xicluna.
diff --git a/Objects/stringlib/find.h b/Objects/stringlib/find.h
index b5bace7..f915296c 100644
--- a/Objects/stringlib/find.h
+++ b/Objects/stringlib/find.h
@@ -19,7 +19,7 @@
if (sub_len == 0)
return offset;
- pos = fastsearch(str, str_len, sub, sub_len, FAST_SEARCH);
+ pos = fastsearch(str, str_len, sub, sub_len, -1, FAST_SEARCH);
if (pos >= 0)
pos += offset;
@@ -39,7 +39,7 @@
if (sub_len == 0)
return str_len + offset;
- pos = fastsearch(str, str_len, sub, sub_len, FAST_RSEARCH);
+ pos = fastsearch(str, str_len, sub, sub_len, -1, FAST_RSEARCH);
if (pos >= 0)
pos += offset;
@@ -47,22 +47,27 @@
return pos;
}
+/* helper macro to fixup start/end slice values */
+#define ADJUST_INDICES(start, end, len) \
+ if (end > len) \
+ end = len; \
+ else if (end < 0) { \
+ end += len; \
+ if (end < 0) \
+ end = 0; \
+ } \
+ if (start < 0) { \
+ start += len; \
+ if (start < 0) \
+ start = 0; \
+ }
+
Py_LOCAL_INLINE(Py_ssize_t)
stringlib_find_slice(const STRINGLIB_CHAR* str, Py_ssize_t str_len,
const STRINGLIB_CHAR* sub, Py_ssize_t sub_len,
Py_ssize_t start, Py_ssize_t end)
{
- if (start < 0)
- start += str_len;
- if (start < 0)
- start = 0;
- if (end > str_len)
- end = str_len;
- if (end < 0)
- end += str_len;
- if (end < 0)
- end = 0;
-
+ ADJUST_INDICES(start, end, str_len);
return stringlib_find(str + start, end - start, sub, sub_len, start);
}
@@ -71,21 +76,11 @@
const STRINGLIB_CHAR* sub, Py_ssize_t sub_len,
Py_ssize_t start, Py_ssize_t end)
{
- if (start < 0)
- start += str_len;
- if (start < 0)
- start = 0;
- if (end > str_len)
- end = str_len;
- if (end < 0)
- end += str_len;
- if (end < 0)
- end = 0;
-
+ ADJUST_INDICES(start, end, str_len);
return stringlib_rfind(str + start, end - start, sub, sub_len, start);
}
-#if defined(STRINGLIB_STR) && !defined(FROM_BYTEARRAY)
+#ifdef STRINGLIB_WANT_CONTAINS_OBJ
Py_LOCAL_INLINE(int)
stringlib_contains_obj(PyObject* str, PyObject* sub)
@@ -96,9 +91,9 @@
) != -1;
}
-#endif /* STRINGLIB_STR */
+#endif /* STRINGLIB_WANT_CONTAINS_OBJ */
-#ifdef FROM_UNICODE
+#if STRINGLIB_IS_UNICODE
/*
This function is a helper for the "find" family (find, rfind, index,
@@ -146,13 +141,6 @@
return 1;
}
-#endif /* FROM_UNICODE */
+#endif /* STRINGLIB_IS_UNICODE */
#endif /* STRINGLIB_FIND_H */
-
-/*
-Local variables:
-c-basic-offset: 4
-indent-tabs-mode: nil
-End:
-*/