blob: 8c3456aa0ac97bc6fb4ea7fb2b1640b61937e930 [file] [log] [blame]
Ben Murdoch3ef787d2012-04-12 10:51:47 +01001// Copyright 2011 the V8 project authors. All rights reserved.
Steve Block59151502010-09-22 15:07:15 +01002// Redistribution and use in source and binary forms, with or without
3// modification, are permitted provided that the following conditions are
4// met:
5//
6// * Redistributions of source code must retain the above copyright
7// notice, this list of conditions and the following disclaimer.
8// * Redistributions in binary form must reproduce the above
9// copyright notice, this list of conditions and the following
10// disclaimer in the documentation and/or other materials provided
11// with the distribution.
12// * Neither the name of Google Inc. nor the names of its
13// contributors may be used to endorse or promote products derived
14// from this software without specific prior written permission.
15//
16// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27
28#ifndef V8_STRING_SEARCH_H_
29#define V8_STRING_SEARCH_H_
30
31namespace v8 {
32namespace internal {
33
34
Ben Murdochf87a2032010-10-22 12:50:53 +010035//---------------------------------------------------------------------
36// String Search object.
37//---------------------------------------------------------------------
Steve Block59151502010-09-22 15:07:15 +010038
Ben Murdochf87a2032010-10-22 12:50:53 +010039// Class holding constants and methods that apply to all string search variants,
40// independently of subject and pattern char size.
41class StringSearchBase {
42 protected:
43 // Cap on the maximal shift in the Boyer-Moore implementation. By setting a
44 // limit, we can fix the size of tables. For a needle longer than this limit,
45 // search will not be optimal, since we only build tables for a suffix
46 // of the string, but it is a safe approximation.
Steve Block44f0eee2011-05-26 01:26:41 +010047 static const int kBMMaxShift = Isolate::kBMMaxShift;
Ben Murdochf87a2032010-10-22 12:50:53 +010048
49 // Reduce alphabet to this size.
50 // One of the tables used by Boyer-Moore and Boyer-Moore-Horspool has size
51 // proportional to the input alphabet. We reduce the alphabet size by
52 // equating input characters modulo a smaller alphabet size. This gives
53 // a potentially less efficient searching, but is a safe approximation.
54 // For needles using only characters in the same Unicode 256-code point page,
55 // there is no search speed degradation.
56 static const int kAsciiAlphabetSize = 128;
Steve Block44f0eee2011-05-26 01:26:41 +010057 static const int kUC16AlphabetSize = Isolate::kUC16AlphabetSize;
Ben Murdochf87a2032010-10-22 12:50:53 +010058
59 // Bad-char shift table stored in the state. It's length is the alphabet size.
60 // For patterns below this length, the skip length of Boyer-Moore is too short
61 // to compensate for the algorithmic overhead compared to simple brute force.
62 static const int kBMMinPatternLength = 7;
63
64 static inline bool IsAsciiString(Vector<const char>) {
65 return true;
66 }
67
68 static inline bool IsAsciiString(Vector<const uc16> string) {
Steve Block9fac8402011-05-12 15:51:54 +010069 return String::IsAscii(string.start(), string.length());
Ben Murdochf87a2032010-10-22 12:50:53 +010070 }
71
Steve Block44f0eee2011-05-26 01:26:41 +010072 friend class Isolate;
Ben Murdochf87a2032010-10-22 12:50:53 +010073};
74
75
76template <typename PatternChar, typename SubjectChar>
77class StringSearch : private StringSearchBase {
Steve Block59151502010-09-22 15:07:15 +010078 public:
Steve Block44f0eee2011-05-26 01:26:41 +010079 StringSearch(Isolate* isolate, Vector<const PatternChar> pattern)
80 : isolate_(isolate),
81 pattern_(pattern),
Ben Murdochf87a2032010-10-22 12:50:53 +010082 start_(Max(0, pattern.length() - kBMMaxShift)) {
83 if (sizeof(PatternChar) > sizeof(SubjectChar)) {
84 if (!IsAsciiString(pattern_)) {
85 strategy_ = &FailSearch;
86 return;
87 }
88 }
89 int pattern_length = pattern_.length();
90 if (pattern_length < kBMMinPatternLength) {
91 if (pattern_length == 1) {
92 strategy_ = &SingleCharSearch;
93 return;
94 }
95 strategy_ = &LinearSearch;
96 return;
97 }
98 strategy_ = &InitialSearch;
99 }
100
101 int Search(Vector<const SubjectChar> subject, int index) {
102 return strategy_(this, subject, index);
103 }
104
105 static inline int AlphabetSize() {
106 if (sizeof(PatternChar) == 1) {
107 // ASCII needle.
108 return kAsciiAlphabetSize;
109 } else {
110 ASSERT(sizeof(PatternChar) == 2);
111 // UC16 needle.
112 return kUC16AlphabetSize;
Steve Block59151502010-09-22 15:07:15 +0100113 }
114 }
Ben Murdochf87a2032010-10-22 12:50:53 +0100115
Steve Block59151502010-09-22 15:07:15 +0100116 private:
Ben Murdochf87a2032010-10-22 12:50:53 +0100117 typedef int (*SearchFunction)( // NOLINT - it's not a cast!
118 StringSearch<PatternChar, SubjectChar>*,
119 Vector<const SubjectChar>,
120 int);
121
122 static int FailSearch(StringSearch<PatternChar, SubjectChar>*,
123 Vector<const SubjectChar>,
124 int) {
125 return -1;
126 }
127
128 static int SingleCharSearch(StringSearch<PatternChar, SubjectChar>* search,
129 Vector<const SubjectChar> subject,
130 int start_index);
131
132 static int LinearSearch(StringSearch<PatternChar, SubjectChar>* search,
133 Vector<const SubjectChar> subject,
134 int start_index);
135
136 static int InitialSearch(StringSearch<PatternChar, SubjectChar>* search,
137 Vector<const SubjectChar> subject,
138 int start_index);
139
140 static int BoyerMooreHorspoolSearch(
141 StringSearch<PatternChar, SubjectChar>* search,
142 Vector<const SubjectChar> subject,
143 int start_index);
144
145 static int BoyerMooreSearch(StringSearch<PatternChar, SubjectChar>* search,
146 Vector<const SubjectChar> subject,
147 int start_index);
148
149 void PopulateBoyerMooreHorspoolTable();
150
151 void PopulateBoyerMooreTable();
152
153 static inline int CharOccurrence(int* bad_char_occurrence,
154 SubjectChar char_code) {
155 if (sizeof(SubjectChar) == 1) {
156 return bad_char_occurrence[static_cast<int>(char_code)];
157 }
158 if (sizeof(PatternChar) == 1) {
159 if (static_cast<unsigned int>(char_code) > String::kMaxAsciiCharCodeU) {
160 return -1;
161 }
162 return bad_char_occurrence[static_cast<unsigned int>(char_code)];
163 }
164 // Both pattern and subject are UC16. Reduce character to equivalence class.
165 int equiv_class = char_code % kUC16AlphabetSize;
166 return bad_char_occurrence[equiv_class];
167 }
168
Steve Block44f0eee2011-05-26 01:26:41 +0100169 // The following tables are shared by all searches.
170 // TODO(lrn): Introduce a way for a pattern to keep its tables
171 // between searches (e.g., for an Atom RegExp).
172
173 // Store for the BoyerMoore(Horspool) bad char shift table.
Ben Murdochf87a2032010-10-22 12:50:53 +0100174 // Return a table covering the last kBMMaxShift+1 positions of
175 // pattern.
176 int* bad_char_table() {
Steve Block44f0eee2011-05-26 01:26:41 +0100177 return isolate_->bad_char_shift_table();
Ben Murdochf87a2032010-10-22 12:50:53 +0100178 }
179
Steve Block44f0eee2011-05-26 01:26:41 +0100180 // Store for the BoyerMoore good suffix shift table.
Ben Murdochf87a2032010-10-22 12:50:53 +0100181 int* good_suffix_shift_table() {
182 // Return biased pointer that maps the range [start_..pattern_.length()
183 // to the kGoodSuffixShiftTable array.
Steve Block44f0eee2011-05-26 01:26:41 +0100184 return isolate_->good_suffix_shift_table() - start_;
Ben Murdochf87a2032010-10-22 12:50:53 +0100185 }
186
Steve Block44f0eee2011-05-26 01:26:41 +0100187 // Table used temporarily while building the BoyerMoore good suffix
188 // shift table.
Ben Murdochf87a2032010-10-22 12:50:53 +0100189 int* suffix_table() {
190 // Return biased pointer that maps the range [start_..pattern_.length()
191 // to the kSuffixTable array.
Steve Block44f0eee2011-05-26 01:26:41 +0100192 return isolate_->suffix_table() - start_;
Ben Murdochf87a2032010-10-22 12:50:53 +0100193 }
194
Steve Block44f0eee2011-05-26 01:26:41 +0100195 Isolate* isolate_;
Ben Murdochf87a2032010-10-22 12:50:53 +0100196 // The pattern to search for.
197 Vector<const PatternChar> pattern_;
198 // Pointer to implementation of the search.
199 SearchFunction strategy_;
200 // Cache value of Max(0, pattern_length() - kBMMaxShift)
201 int start_;
Steve Block59151502010-09-22 15:07:15 +0100202};
203
Steve Block59151502010-09-22 15:07:15 +0100204
Ben Murdochf87a2032010-10-22 12:50:53 +0100205//---------------------------------------------------------------------
206// Single Character Pattern Search Strategy
207//---------------------------------------------------------------------
Steve Block59151502010-09-22 15:07:15 +0100208
Ben Murdochf87a2032010-10-22 12:50:53 +0100209template <typename PatternChar, typename SubjectChar>
210int StringSearch<PatternChar, SubjectChar>::SingleCharSearch(
211 StringSearch<PatternChar, SubjectChar>* search,
212 Vector<const SubjectChar> subject,
213 int index) {
214 ASSERT_EQ(1, search->pattern_.length());
215 PatternChar pattern_first_char = search->pattern_[0];
216 int i = index;
217 if (sizeof(SubjectChar) == 1 && sizeof(PatternChar) == 1) {
218 const SubjectChar* pos = reinterpret_cast<const SubjectChar*>(
219 memchr(subject.start() + i,
220 pattern_first_char,
221 subject.length() - i));
222 if (pos == NULL) return -1;
223 return static_cast<int>(pos - subject.start());
Steve Block59151502010-09-22 15:07:15 +0100224 } else {
Ben Murdochf87a2032010-10-22 12:50:53 +0100225 if (sizeof(PatternChar) > sizeof(SubjectChar)) {
226 if (static_cast<uc16>(pattern_first_char) > String::kMaxAsciiCharCodeU) {
Steve Block59151502010-09-22 15:07:15 +0100227 return -1;
228 }
229 }
Ben Murdochf87a2032010-10-22 12:50:53 +0100230 SubjectChar search_char = static_cast<SubjectChar>(pattern_first_char);
231 int n = subject.length();
232 while (i < n) {
233 if (subject[i++] == search_char) return i - 1;
234 }
235 return -1;
236 }
237}
238
239//---------------------------------------------------------------------
240// Linear Search Strategy
241//---------------------------------------------------------------------
242
243
244template <typename PatternChar, typename SubjectChar>
Ben Murdoch3ef787d2012-04-12 10:51:47 +0100245inline bool CharCompare(const PatternChar* pattern,
246 const SubjectChar* subject,
247 int length) {
Ben Murdochf87a2032010-10-22 12:50:53 +0100248 ASSERT(length > 0);
249 int pos = 0;
250 do {
251 if (pattern[pos] != subject[pos]) {
252 return false;
253 }
254 pos++;
255 } while (pos < length);
256 return true;
257}
258
259
260// Simple linear search for short patterns. Never bails out.
261template <typename PatternChar, typename SubjectChar>
262int StringSearch<PatternChar, SubjectChar>::LinearSearch(
263 StringSearch<PatternChar, SubjectChar>* search,
264 Vector<const SubjectChar> subject,
265 int index) {
266 Vector<const PatternChar> pattern = search->pattern_;
267 ASSERT(pattern.length() > 1);
268 int pattern_length = pattern.length();
269 PatternChar pattern_first_char = pattern[0];
270 int i = index;
271 int n = subject.length() - pattern_length;
272 while (i <= n) {
273 if (sizeof(SubjectChar) == 1 && sizeof(PatternChar) == 1) {
274 const SubjectChar* pos = reinterpret_cast<const SubjectChar*>(
275 memchr(subject.start() + i,
276 pattern_first_char,
277 n - i + 1));
278 if (pos == NULL) return -1;
279 i = static_cast<int>(pos - subject.start()) + 1;
Steve Block59151502010-09-22 15:07:15 +0100280 } else {
Ben Murdochf87a2032010-10-22 12:50:53 +0100281 if (subject[i++] != pattern_first_char) continue;
282 }
283 // Loop extracted to separate function to allow using return to do
284 // a deeper break.
285 if (CharCompare(pattern.start() + 1,
286 subject.start() + i,
287 pattern_length - 1)) {
288 return i - 1;
Steve Block59151502010-09-22 15:07:15 +0100289 }
290 }
Steve Block59151502010-09-22 15:07:15 +0100291 return -1;
292}
293
Ben Murdochf87a2032010-10-22 12:50:53 +0100294//---------------------------------------------------------------------
295// Boyer-Moore string search
296//---------------------------------------------------------------------
Steve Block59151502010-09-22 15:07:15 +0100297
Ben Murdochf87a2032010-10-22 12:50:53 +0100298template <typename PatternChar, typename SubjectChar>
299int StringSearch<PatternChar, SubjectChar>::BoyerMooreSearch(
300 StringSearch<PatternChar, SubjectChar>* search,
301 Vector<const SubjectChar> subject,
302 int start_index) {
303 Vector<const PatternChar> pattern = search->pattern_;
304 int subject_length = subject.length();
305 int pattern_length = pattern.length();
Steve Block59151502010-09-22 15:07:15 +0100306 // Only preprocess at most kBMMaxShift last characters of pattern.
Ben Murdochf87a2032010-10-22 12:50:53 +0100307 int start = search->start_;
Steve Block59151502010-09-22 15:07:15 +0100308
Ben Murdochf87a2032010-10-22 12:50:53 +0100309 int* bad_char_occurence = search->bad_char_table();
310 int* good_suffix_shift = search->good_suffix_shift_table();
311
312 PatternChar last_char = pattern[pattern_length - 1];
313 int index = start_index;
Steve Block59151502010-09-22 15:07:15 +0100314 // Continue search from i.
Ben Murdochf87a2032010-10-22 12:50:53 +0100315 while (index <= subject_length - pattern_length) {
316 int j = pattern_length - 1;
317 int c;
318 while (last_char != (c = subject[index + j])) {
319 int shift =
320 j - CharOccurrence(bad_char_occurence, c);
321 index += shift;
322 if (index > subject_length - pattern_length) {
Steve Block59151502010-09-22 15:07:15 +0100323 return -1;
324 }
325 }
Ben Murdochf87a2032010-10-22 12:50:53 +0100326 while (j >= 0 && pattern[j] == (c = subject[index + j])) j--;
Steve Block59151502010-09-22 15:07:15 +0100327 if (j < 0) {
Ben Murdochf87a2032010-10-22 12:50:53 +0100328 return index;
Steve Block59151502010-09-22 15:07:15 +0100329 } else if (j < start) {
330 // we have matched more than our tables allow us to be smart about.
331 // Fall back on BMH shift.
Ben Murdochf87a2032010-10-22 12:50:53 +0100332 index += pattern_length - 1
333 - CharOccurrence(bad_char_occurence,
334 static_cast<SubjectChar>(last_char));
Steve Block59151502010-09-22 15:07:15 +0100335 } else {
Ben Murdochf87a2032010-10-22 12:50:53 +0100336 int gs_shift = good_suffix_shift[j + 1];
337 int bc_occ =
338 CharOccurrence(bad_char_occurence, c);
Steve Block59151502010-09-22 15:07:15 +0100339 int shift = j - bc_occ;
340 if (gs_shift > shift) {
341 shift = gs_shift;
342 }
Ben Murdochf87a2032010-10-22 12:50:53 +0100343 index += shift;
Steve Block59151502010-09-22 15:07:15 +0100344 }
345 }
346
347 return -1;
348}
349
350
Steve Block59151502010-09-22 15:07:15 +0100351template <typename PatternChar, typename SubjectChar>
Ben Murdochf87a2032010-10-22 12:50:53 +0100352void StringSearch<PatternChar, SubjectChar>::PopulateBoyerMooreTable() {
353 int pattern_length = pattern_.length();
354 const PatternChar* pattern = pattern_.start();
355 // Only look at the last kBMMaxShift characters of pattern (from start_
356 // to pattern_length).
357 int start = start_;
358 int length = pattern_length - start;
359
360 // Biased tables so that we can use pattern indices as table indices,
361 // even if we only cover the part of the pattern from offset start.
362 int* shift_table = good_suffix_shift_table();
363 int* suffix_table = this->suffix_table();
364
365 // Initialize table.
366 for (int i = start; i < pattern_length; i++) {
367 shift_table[i] = length;
368 }
369 shift_table[pattern_length] = 1;
370 suffix_table[pattern_length] = pattern_length + 1;
371
Ben Murdoch3ef787d2012-04-12 10:51:47 +0100372 if (pattern_length <= start) {
373 return;
374 }
375
Ben Murdochf87a2032010-10-22 12:50:53 +0100376 // Find suffixes.
377 PatternChar last_char = pattern[pattern_length - 1];
378 int suffix = pattern_length + 1;
379 {
380 int i = pattern_length;
381 while (i > start) {
382 PatternChar c = pattern[i - 1];
383 while (suffix <= pattern_length && c != pattern[suffix - 1]) {
384 if (shift_table[suffix] == length) {
385 shift_table[suffix] = suffix - i;
386 }
387 suffix = suffix_table[suffix];
388 }
389 suffix_table[--i] = --suffix;
390 if (suffix == pattern_length) {
391 // No suffix to extend, so we check against last_char only.
392 while ((i > start) && (pattern[i - 1] != last_char)) {
393 if (shift_table[pattern_length] == length) {
394 shift_table[pattern_length] = pattern_length - i;
395 }
396 suffix_table[--i] = pattern_length;
397 }
398 if (i > start) {
399 suffix_table[--i] = --suffix;
400 }
401 }
402 }
403 }
404 // Build shift table using suffixes.
405 if (suffix < pattern_length) {
406 for (int i = start; i <= pattern_length; i++) {
407 if (shift_table[i] == length) {
408 shift_table[i] = suffix - start;
409 }
410 if (i == suffix) {
411 suffix = suffix_table[suffix];
412 }
413 }
414 }
415}
416
417//---------------------------------------------------------------------
418// Boyer-Moore-Horspool string search.
419//---------------------------------------------------------------------
420
421template <typename PatternChar, typename SubjectChar>
422int StringSearch<PatternChar, SubjectChar>::BoyerMooreHorspoolSearch(
423 StringSearch<PatternChar, SubjectChar>* search,
424 Vector<const SubjectChar> subject,
425 int start_index) {
426 Vector<const PatternChar> pattern = search->pattern_;
427 int subject_length = subject.length();
428 int pattern_length = pattern.length();
429 int* char_occurrences = search->bad_char_table();
430 int badness = -pattern_length;
431
432 // How bad we are doing without a good-suffix table.
433 PatternChar last_char = pattern[pattern_length - 1];
434 int last_char_shift = pattern_length - 1 -
435 CharOccurrence(char_occurrences, static_cast<SubjectChar>(last_char));
436 // Perform search
437 int index = start_index; // No matches found prior to this index.
438 while (index <= subject_length - pattern_length) {
439 int j = pattern_length - 1;
440 int subject_char;
441 while (last_char != (subject_char = subject[index + j])) {
442 int bc_occ = CharOccurrence(char_occurrences, subject_char);
443 int shift = j - bc_occ;
444 index += shift;
445 badness += 1 - shift; // at most zero, so badness cannot increase.
446 if (index > subject_length - pattern_length) {
447 return -1;
448 }
449 }
450 j--;
451 while (j >= 0 && pattern[j] == (subject[index + j])) j--;
452 if (j < 0) {
453 return index;
454 } else {
455 index += last_char_shift;
456 // Badness increases by the number of characters we have
457 // checked, and decreases by the number of characters we
458 // can skip by shifting. It's a measure of how we are doing
459 // compared to reading each character exactly once.
460 badness += (pattern_length - j) - last_char_shift;
461 if (badness > 0) {
462 search->PopulateBoyerMooreTable();
463 search->strategy_ = &BoyerMooreSearch;
464 return BoyerMooreSearch(search, subject, index);
465 }
466 }
467 }
468 return -1;
469}
470
471
472template <typename PatternChar, typename SubjectChar>
473void StringSearch<PatternChar, SubjectChar>::PopulateBoyerMooreHorspoolTable() {
474 int pattern_length = pattern_.length();
475
476 int* bad_char_occurrence = bad_char_table();
477
478 // Only preprocess at most kBMMaxShift last characters of pattern.
479 int start = start_;
480 // Run forwards to populate bad_char_table, so that *last* instance
481 // of character equivalence class is the one registered.
482 // Notice: Doesn't include the last character.
483 int table_size = AlphabetSize();
484 if (start == 0) { // All patterns less than kBMMaxShift in length.
485 memset(bad_char_occurrence,
486 -1,
487 table_size * sizeof(*bad_char_occurrence));
488 } else {
489 for (int i = 0; i < table_size; i++) {
490 bad_char_occurrence[i] = start - 1;
491 }
492 }
493 for (int i = start; i < pattern_length - 1; i++) {
494 PatternChar c = pattern_[i];
495 int bucket = (sizeof(PatternChar) == 1) ? c : c % AlphabetSize();
496 bad_char_occurrence[bucket] = i;
497 }
498}
499
500//---------------------------------------------------------------------
501// Linear string search with bailout to BMH.
502//---------------------------------------------------------------------
503
504// Simple linear search for short patterns, which bails out if the string
505// isn't found very early in the subject. Upgrades to BoyerMooreHorspool.
506template <typename PatternChar, typename SubjectChar>
507int StringSearch<PatternChar, SubjectChar>::InitialSearch(
508 StringSearch<PatternChar, SubjectChar>* search,
509 Vector<const SubjectChar> subject,
510 int index) {
511 Vector<const PatternChar> pattern = search->pattern_;
Steve Block59151502010-09-22 15:07:15 +0100512 int pattern_length = pattern.length();
513 // Badness is a count of how much work we have done. When we have
514 // done enough work we decide it's probably worth switching to a better
515 // algorithm.
516 int badness = -10 - (pattern_length << 2);
517
518 // We know our pattern is at least 2 characters, we cache the first so
519 // the common case of the first character not matching is faster.
520 PatternChar pattern_first_char = pattern[0];
Ben Murdochf87a2032010-10-22 12:50:53 +0100521 for (int i = index, n = subject.length() - pattern_length; i <= n; i++) {
Steve Block59151502010-09-22 15:07:15 +0100522 badness++;
Ben Murdochf87a2032010-10-22 12:50:53 +0100523 if (badness <= 0) {
524 if (sizeof(SubjectChar) == 1 && sizeof(PatternChar) == 1) {
525 const SubjectChar* pos = reinterpret_cast<const SubjectChar*>(
526 memchr(subject.start() + i,
527 pattern_first_char,
528 n - i + 1));
529 if (pos == NULL) {
530 return -1;
531 }
532 i = static_cast<int>(pos - subject.start());
533 } else {
534 if (subject[i] != pattern_first_char) continue;
Steve Block59151502010-09-22 15:07:15 +0100535 }
Ben Murdochf87a2032010-10-22 12:50:53 +0100536 int j = 1;
537 do {
538 if (pattern[j] != subject[i + j]) {
539 break;
540 }
541 j++;
542 } while (j < pattern_length);
543 if (j == pattern_length) {
544 return i;
545 }
546 badness += j;
Steve Block59151502010-09-22 15:07:15 +0100547 } else {
Ben Murdochf87a2032010-10-22 12:50:53 +0100548 search->PopulateBoyerMooreHorspoolTable();
549 search->strategy_ = &BoyerMooreHorspoolSearch;
550 return BoyerMooreHorspoolSearch(search, subject, i);
Steve Block59151502010-09-22 15:07:15 +0100551 }
552 }
553 return -1;
554}
555
556
Ben Murdochf87a2032010-10-22 12:50:53 +0100557// Perform a a single stand-alone search.
558// If searching multiple times for the same pattern, a search
559// object should be constructed once and the Search function then called
560// for each search.
Steve Block59151502010-09-22 15:07:15 +0100561template <typename SubjectChar, typename PatternChar>
Ben Murdoch3ef787d2012-04-12 10:51:47 +0100562int SearchString(Isolate* isolate,
563 Vector<const SubjectChar> subject,
564 Vector<const PatternChar> pattern,
565 int start_index) {
Steve Block44f0eee2011-05-26 01:26:41 +0100566 StringSearch<PatternChar, SubjectChar> search(isolate, pattern);
Ben Murdochf87a2032010-10-22 12:50:53 +0100567 return search.Search(subject, start_index);
Steve Block59151502010-09-22 15:07:15 +0100568}
569
570}} // namespace v8::internal
571
572#endif // V8_STRING_SEARCH_H_