blob: bc685ffe5846d9c5336c271e6312a9ba841695ec [file] [log] [blame]
mstarzinger@chromium.org1b3afd12011-11-29 14:28:56 +00001// Copyright 2011 the V8 project authors. All rights reserved.
sgjesse@chromium.org2ec107f2010-09-13 09:19:46 +00002// Redistribution and use in source and binary forms, with or without
3// modification, are permitted provided that the following conditions are
4// met:
5//
6// * Redistributions of source code must retain the above copyright
7// notice, this list of conditions and the following disclaimer.
8// * Redistributions in binary form must reproduce the above
9// copyright notice, this list of conditions and the following
10// disclaimer in the documentation and/or other materials provided
11// with the distribution.
12// * Neither the name of Google Inc. nor the names of its
13// contributors may be used to endorse or promote products derived
14// from this software without specific prior written permission.
15//
16// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27
28#ifndef V8_STRING_SEARCH_H_
29#define V8_STRING_SEARCH_H_
30
31namespace v8 {
32namespace internal {
33
34
kmillikin@chromium.orgf05f2912010-09-30 10:07:24 +000035//---------------------------------------------------------------------
36// String Search object.
37//---------------------------------------------------------------------
sgjesse@chromium.org2ec107f2010-09-13 09:19:46 +000038
kmillikin@chromium.orgf05f2912010-09-30 10:07:24 +000039// Class holding constants and methods that apply to all string search variants,
40// independently of subject and pattern char size.
41class StringSearchBase {
42 protected:
43 // Cap on the maximal shift in the Boyer-Moore implementation. By setting a
44 // limit, we can fix the size of tables. For a needle longer than this limit,
45 // search will not be optimal, since we only build tables for a suffix
46 // of the string, but it is a safe approximation.
sgjesse@chromium.orgea88ce92011-03-23 11:19:56 +000047 static const int kBMMaxShift = Isolate::kBMMaxShift;
kmillikin@chromium.orgf05f2912010-09-30 10:07:24 +000048
49 // Reduce alphabet to this size.
50 // One of the tables used by Boyer-Moore and Boyer-Moore-Horspool has size
51 // proportional to the input alphabet. We reduce the alphabet size by
52 // equating input characters modulo a smaller alphabet size. This gives
53 // a potentially less efficient searching, but is a safe approximation.
54 // For needles using only characters in the same Unicode 256-code point page,
55 // there is no search speed degradation.
jkummerow@chromium.org59297c72013-01-09 16:32:23 +000056 static const int kAsciiAlphabetSize = 256;
sgjesse@chromium.orgea88ce92011-03-23 11:19:56 +000057 static const int kUC16AlphabetSize = Isolate::kUC16AlphabetSize;
kmillikin@chromium.orgf05f2912010-09-30 10:07:24 +000058
59 // Bad-char shift table stored in the state. It's length is the alphabet size.
60 // For patterns below this length, the skip length of Boyer-Moore is too short
61 // to compensate for the algorithmic overhead compared to simple brute force.
62 static const int kBMMinPatternLength = 7;
63
jkummerow@chromium.org59297c72013-01-09 16:32:23 +000064 static inline bool IsOneByteString(Vector<const uint8_t> string) {
kmillikin@chromium.orgf05f2912010-09-30 10:07:24 +000065 return true;
66 }
67
jkummerow@chromium.org59297c72013-01-09 16:32:23 +000068 static inline bool IsOneByteString(Vector<const uc16> string) {
69 return String::IsOneByte(string.start(), string.length());
kmillikin@chromium.orgf05f2912010-09-30 10:07:24 +000070 }
71
sgjesse@chromium.orgea88ce92011-03-23 11:19:56 +000072 friend class Isolate;
kmillikin@chromium.orgf05f2912010-09-30 10:07:24 +000073};
74
75
76template <typename PatternChar, typename SubjectChar>
77class StringSearch : private StringSearchBase {
sgjesse@chromium.org2ec107f2010-09-13 09:19:46 +000078 public:
sgjesse@chromium.orgea88ce92011-03-23 11:19:56 +000079 StringSearch(Isolate* isolate, Vector<const PatternChar> pattern)
80 : isolate_(isolate),
81 pattern_(pattern),
kmillikin@chromium.orgf05f2912010-09-30 10:07:24 +000082 start_(Max(0, pattern.length() - kBMMaxShift)) {
83 if (sizeof(PatternChar) > sizeof(SubjectChar)) {
jkummerow@chromium.org59297c72013-01-09 16:32:23 +000084 if (!IsOneByteString(pattern_)) {
kmillikin@chromium.orgf05f2912010-09-30 10:07:24 +000085 strategy_ = &FailSearch;
86 return;
87 }
88 }
89 int pattern_length = pattern_.length();
90 if (pattern_length < kBMMinPatternLength) {
91 if (pattern_length == 1) {
92 strategy_ = &SingleCharSearch;
93 return;
94 }
95 strategy_ = &LinearSearch;
96 return;
97 }
98 strategy_ = &InitialSearch;
99 }
100
101 int Search(Vector<const SubjectChar> subject, int index) {
102 return strategy_(this, subject, index);
103 }
104
105 static inline int AlphabetSize() {
106 if (sizeof(PatternChar) == 1) {
107 // ASCII needle.
108 return kAsciiAlphabetSize;
109 } else {
110 ASSERT(sizeof(PatternChar) == 2);
111 // UC16 needle.
112 return kUC16AlphabetSize;
sgjesse@chromium.org2ec107f2010-09-13 09:19:46 +0000113 }
114 }
kmillikin@chromium.orgf05f2912010-09-30 10:07:24 +0000115
sgjesse@chromium.org2ec107f2010-09-13 09:19:46 +0000116 private:
kmillikin@chromium.orgf05f2912010-09-30 10:07:24 +0000117 typedef int (*SearchFunction)( // NOLINT - it's not a cast!
118 StringSearch<PatternChar, SubjectChar>*,
119 Vector<const SubjectChar>,
120 int);
121
122 static int FailSearch(StringSearch<PatternChar, SubjectChar>*,
123 Vector<const SubjectChar>,
124 int) {
125 return -1;
126 }
127
128 static int SingleCharSearch(StringSearch<PatternChar, SubjectChar>* search,
129 Vector<const SubjectChar> subject,
130 int start_index);
131
132 static int LinearSearch(StringSearch<PatternChar, SubjectChar>* search,
133 Vector<const SubjectChar> subject,
134 int start_index);
135
136 static int InitialSearch(StringSearch<PatternChar, SubjectChar>* search,
137 Vector<const SubjectChar> subject,
138 int start_index);
139
140 static int BoyerMooreHorspoolSearch(
141 StringSearch<PatternChar, SubjectChar>* search,
142 Vector<const SubjectChar> subject,
143 int start_index);
144
145 static int BoyerMooreSearch(StringSearch<PatternChar, SubjectChar>* search,
146 Vector<const SubjectChar> subject,
147 int start_index);
148
149 void PopulateBoyerMooreHorspoolTable();
150
151 void PopulateBoyerMooreTable();
152
jkummerow@chromium.org59297c72013-01-09 16:32:23 +0000153 static inline bool exceedsOneByte(uint8_t c) {
jkummerow@chromium.org59297c72013-01-09 16:32:23 +0000154 return false;
jkummerow@chromium.org59297c72013-01-09 16:32:23 +0000155 }
156
157 static inline bool exceedsOneByte(uint16_t c) {
158 return c > String::kMaxOneByteCharCodeU;
159 }
160
kmillikin@chromium.orgf05f2912010-09-30 10:07:24 +0000161 static inline int CharOccurrence(int* bad_char_occurrence,
162 SubjectChar char_code) {
163 if (sizeof(SubjectChar) == 1) {
164 return bad_char_occurrence[static_cast<int>(char_code)];
165 }
166 if (sizeof(PatternChar) == 1) {
jkummerow@chromium.org59297c72013-01-09 16:32:23 +0000167 if (exceedsOneByte(char_code)) {
kmillikin@chromium.orgf05f2912010-09-30 10:07:24 +0000168 return -1;
169 }
kmillikin@chromium.orgec7067e2010-10-01 09:22:58 +0000170 return bad_char_occurrence[static_cast<unsigned int>(char_code)];
kmillikin@chromium.orgf05f2912010-09-30 10:07:24 +0000171 }
kmillikin@chromium.orgec7067e2010-10-01 09:22:58 +0000172 // Both pattern and subject are UC16. Reduce character to equivalence class.
kmillikin@chromium.orgf05f2912010-09-30 10:07:24 +0000173 int equiv_class = char_code % kUC16AlphabetSize;
174 return bad_char_occurrence[equiv_class];
175 }
176
sgjesse@chromium.orgea88ce92011-03-23 11:19:56 +0000177 // The following tables are shared by all searches.
178 // TODO(lrn): Introduce a way for a pattern to keep its tables
179 // between searches (e.g., for an Atom RegExp).
180
181 // Store for the BoyerMoore(Horspool) bad char shift table.
kmillikin@chromium.orgf05f2912010-09-30 10:07:24 +0000182 // Return a table covering the last kBMMaxShift+1 positions of
183 // pattern.
184 int* bad_char_table() {
sgjesse@chromium.orgea88ce92011-03-23 11:19:56 +0000185 return isolate_->bad_char_shift_table();
kmillikin@chromium.orgf05f2912010-09-30 10:07:24 +0000186 }
187
sgjesse@chromium.orgea88ce92011-03-23 11:19:56 +0000188 // Store for the BoyerMoore good suffix shift table.
kmillikin@chromium.orgf05f2912010-09-30 10:07:24 +0000189 int* good_suffix_shift_table() {
190 // Return biased pointer that maps the range [start_..pattern_.length()
191 // to the kGoodSuffixShiftTable array.
sgjesse@chromium.orgea88ce92011-03-23 11:19:56 +0000192 return isolate_->good_suffix_shift_table() - start_;
kmillikin@chromium.orgf05f2912010-09-30 10:07:24 +0000193 }
194
sgjesse@chromium.orgea88ce92011-03-23 11:19:56 +0000195 // Table used temporarily while building the BoyerMoore good suffix
196 // shift table.
kmillikin@chromium.orgf05f2912010-09-30 10:07:24 +0000197 int* suffix_table() {
198 // Return biased pointer that maps the range [start_..pattern_.length()
199 // to the kSuffixTable array.
sgjesse@chromium.orgea88ce92011-03-23 11:19:56 +0000200 return isolate_->suffix_table() - start_;
kmillikin@chromium.orgf05f2912010-09-30 10:07:24 +0000201 }
202
sgjesse@chromium.orgea88ce92011-03-23 11:19:56 +0000203 Isolate* isolate_;
kmillikin@chromium.orgf05f2912010-09-30 10:07:24 +0000204 // The pattern to search for.
205 Vector<const PatternChar> pattern_;
206 // Pointer to implementation of the search.
207 SearchFunction strategy_;
208 // Cache value of Max(0, pattern_length() - kBMMaxShift)
209 int start_;
sgjesse@chromium.org2ec107f2010-09-13 09:19:46 +0000210};
211
sgjesse@chromium.org2ec107f2010-09-13 09:19:46 +0000212
kmillikin@chromium.orgf05f2912010-09-30 10:07:24 +0000213//---------------------------------------------------------------------
214// Single Character Pattern Search Strategy
215//---------------------------------------------------------------------
sgjesse@chromium.org2ec107f2010-09-13 09:19:46 +0000216
kmillikin@chromium.orgf05f2912010-09-30 10:07:24 +0000217template <typename PatternChar, typename SubjectChar>
218int StringSearch<PatternChar, SubjectChar>::SingleCharSearch(
219 StringSearch<PatternChar, SubjectChar>* search,
220 Vector<const SubjectChar> subject,
221 int index) {
222 ASSERT_EQ(1, search->pattern_.length());
223 PatternChar pattern_first_char = search->pattern_[0];
224 int i = index;
225 if (sizeof(SubjectChar) == 1 && sizeof(PatternChar) == 1) {
226 const SubjectChar* pos = reinterpret_cast<const SubjectChar*>(
227 memchr(subject.start() + i,
228 pattern_first_char,
229 subject.length() - i));
230 if (pos == NULL) return -1;
231 return static_cast<int>(pos - subject.start());
sgjesse@chromium.org2ec107f2010-09-13 09:19:46 +0000232 } else {
kmillikin@chromium.orgf05f2912010-09-30 10:07:24 +0000233 if (sizeof(PatternChar) > sizeof(SubjectChar)) {
jkummerow@chromium.org59297c72013-01-09 16:32:23 +0000234 if (exceedsOneByte(pattern_first_char)) {
sgjesse@chromium.org2ec107f2010-09-13 09:19:46 +0000235 return -1;
236 }
237 }
kmillikin@chromium.orgf05f2912010-09-30 10:07:24 +0000238 SubjectChar search_char = static_cast<SubjectChar>(pattern_first_char);
239 int n = subject.length();
240 while (i < n) {
241 if (subject[i++] == search_char) return i - 1;
242 }
243 return -1;
244 }
245}
246
247//---------------------------------------------------------------------
248// Linear Search Strategy
249//---------------------------------------------------------------------
250
251
252template <typename PatternChar, typename SubjectChar>
mstarzinger@chromium.org1b3afd12011-11-29 14:28:56 +0000253inline bool CharCompare(const PatternChar* pattern,
254 const SubjectChar* subject,
255 int length) {
kmillikin@chromium.orgf05f2912010-09-30 10:07:24 +0000256 ASSERT(length > 0);
257 int pos = 0;
258 do {
259 if (pattern[pos] != subject[pos]) {
260 return false;
261 }
262 pos++;
263 } while (pos < length);
264 return true;
265}
266
267
268// Simple linear search for short patterns. Never bails out.
269template <typename PatternChar, typename SubjectChar>
270int StringSearch<PatternChar, SubjectChar>::LinearSearch(
271 StringSearch<PatternChar, SubjectChar>* search,
272 Vector<const SubjectChar> subject,
273 int index) {
274 Vector<const PatternChar> pattern = search->pattern_;
275 ASSERT(pattern.length() > 1);
276 int pattern_length = pattern.length();
277 PatternChar pattern_first_char = pattern[0];
278 int i = index;
279 int n = subject.length() - pattern_length;
280 while (i <= n) {
281 if (sizeof(SubjectChar) == 1 && sizeof(PatternChar) == 1) {
282 const SubjectChar* pos = reinterpret_cast<const SubjectChar*>(
283 memchr(subject.start() + i,
284 pattern_first_char,
285 n - i + 1));
286 if (pos == NULL) return -1;
287 i = static_cast<int>(pos - subject.start()) + 1;
sgjesse@chromium.org2ec107f2010-09-13 09:19:46 +0000288 } else {
kmillikin@chromium.orgf05f2912010-09-30 10:07:24 +0000289 if (subject[i++] != pattern_first_char) continue;
290 }
291 // Loop extracted to separate function to allow using return to do
292 // a deeper break.
293 if (CharCompare(pattern.start() + 1,
294 subject.start() + i,
295 pattern_length - 1)) {
296 return i - 1;
sgjesse@chromium.org2ec107f2010-09-13 09:19:46 +0000297 }
298 }
sgjesse@chromium.org2ec107f2010-09-13 09:19:46 +0000299 return -1;
300}
301
kmillikin@chromium.orgf05f2912010-09-30 10:07:24 +0000302//---------------------------------------------------------------------
303// Boyer-Moore string search
304//---------------------------------------------------------------------
sgjesse@chromium.org2ec107f2010-09-13 09:19:46 +0000305
kmillikin@chromium.orgf05f2912010-09-30 10:07:24 +0000306template <typename PatternChar, typename SubjectChar>
307int StringSearch<PatternChar, SubjectChar>::BoyerMooreSearch(
308 StringSearch<PatternChar, SubjectChar>* search,
309 Vector<const SubjectChar> subject,
310 int start_index) {
311 Vector<const PatternChar> pattern = search->pattern_;
312 int subject_length = subject.length();
313 int pattern_length = pattern.length();
sgjesse@chromium.org2ec107f2010-09-13 09:19:46 +0000314 // Only preprocess at most kBMMaxShift last characters of pattern.
kmillikin@chromium.orgf05f2912010-09-30 10:07:24 +0000315 int start = search->start_;
sgjesse@chromium.org2ec107f2010-09-13 09:19:46 +0000316
kmillikin@chromium.orgf05f2912010-09-30 10:07:24 +0000317 int* bad_char_occurence = search->bad_char_table();
318 int* good_suffix_shift = search->good_suffix_shift_table();
319
320 PatternChar last_char = pattern[pattern_length - 1];
321 int index = start_index;
sgjesse@chromium.org2ec107f2010-09-13 09:19:46 +0000322 // Continue search from i.
kmillikin@chromium.orgf05f2912010-09-30 10:07:24 +0000323 while (index <= subject_length - pattern_length) {
324 int j = pattern_length - 1;
325 int c;
326 while (last_char != (c = subject[index + j])) {
327 int shift =
328 j - CharOccurrence(bad_char_occurence, c);
329 index += shift;
330 if (index > subject_length - pattern_length) {
sgjesse@chromium.org2ec107f2010-09-13 09:19:46 +0000331 return -1;
332 }
333 }
kmillikin@chromium.orgf05f2912010-09-30 10:07:24 +0000334 while (j >= 0 && pattern[j] == (c = subject[index + j])) j--;
sgjesse@chromium.org2ec107f2010-09-13 09:19:46 +0000335 if (j < 0) {
kmillikin@chromium.orgf05f2912010-09-30 10:07:24 +0000336 return index;
sgjesse@chromium.org2ec107f2010-09-13 09:19:46 +0000337 } else if (j < start) {
338 // we have matched more than our tables allow us to be smart about.
339 // Fall back on BMH shift.
kmillikin@chromium.orgf05f2912010-09-30 10:07:24 +0000340 index += pattern_length - 1
341 - CharOccurrence(bad_char_occurence,
342 static_cast<SubjectChar>(last_char));
sgjesse@chromium.org2ec107f2010-09-13 09:19:46 +0000343 } else {
kmillikin@chromium.orgf05f2912010-09-30 10:07:24 +0000344 int gs_shift = good_suffix_shift[j + 1];
345 int bc_occ =
346 CharOccurrence(bad_char_occurence, c);
sgjesse@chromium.org2ec107f2010-09-13 09:19:46 +0000347 int shift = j - bc_occ;
348 if (gs_shift > shift) {
349 shift = gs_shift;
350 }
kmillikin@chromium.orgf05f2912010-09-30 10:07:24 +0000351 index += shift;
sgjesse@chromium.org2ec107f2010-09-13 09:19:46 +0000352 }
353 }
354
355 return -1;
356}
357
358
sgjesse@chromium.org2ec107f2010-09-13 09:19:46 +0000359template <typename PatternChar, typename SubjectChar>
kmillikin@chromium.orgf05f2912010-09-30 10:07:24 +0000360void StringSearch<PatternChar, SubjectChar>::PopulateBoyerMooreTable() {
361 int pattern_length = pattern_.length();
362 const PatternChar* pattern = pattern_.start();
363 // Only look at the last kBMMaxShift characters of pattern (from start_
364 // to pattern_length).
365 int start = start_;
366 int length = pattern_length - start;
367
368 // Biased tables so that we can use pattern indices as table indices,
369 // even if we only cover the part of the pattern from offset start.
370 int* shift_table = good_suffix_shift_table();
371 int* suffix_table = this->suffix_table();
372
373 // Initialize table.
374 for (int i = start; i < pattern_length; i++) {
375 shift_table[i] = length;
376 }
377 shift_table[pattern_length] = 1;
378 suffix_table[pattern_length] = pattern_length + 1;
379
danno@chromium.orgbf0c8202011-12-27 10:09:42 +0000380 if (pattern_length <= start) {
381 return;
382 }
383
kmillikin@chromium.orgf05f2912010-09-30 10:07:24 +0000384 // Find suffixes.
385 PatternChar last_char = pattern[pattern_length - 1];
386 int suffix = pattern_length + 1;
387 {
388 int i = pattern_length;
389 while (i > start) {
390 PatternChar c = pattern[i - 1];
391 while (suffix <= pattern_length && c != pattern[suffix - 1]) {
392 if (shift_table[suffix] == length) {
393 shift_table[suffix] = suffix - i;
394 }
395 suffix = suffix_table[suffix];
396 }
397 suffix_table[--i] = --suffix;
398 if (suffix == pattern_length) {
399 // No suffix to extend, so we check against last_char only.
400 while ((i > start) && (pattern[i - 1] != last_char)) {
401 if (shift_table[pattern_length] == length) {
402 shift_table[pattern_length] = pattern_length - i;
403 }
404 suffix_table[--i] = pattern_length;
405 }
406 if (i > start) {
407 suffix_table[--i] = --suffix;
408 }
409 }
410 }
411 }
412 // Build shift table using suffixes.
413 if (suffix < pattern_length) {
414 for (int i = start; i <= pattern_length; i++) {
415 if (shift_table[i] == length) {
416 shift_table[i] = suffix - start;
417 }
418 if (i == suffix) {
419 suffix = suffix_table[suffix];
420 }
421 }
422 }
423}
424
425//---------------------------------------------------------------------
426// Boyer-Moore-Horspool string search.
427//---------------------------------------------------------------------
428
429template <typename PatternChar, typename SubjectChar>
430int StringSearch<PatternChar, SubjectChar>::BoyerMooreHorspoolSearch(
431 StringSearch<PatternChar, SubjectChar>* search,
432 Vector<const SubjectChar> subject,
433 int start_index) {
434 Vector<const PatternChar> pattern = search->pattern_;
435 int subject_length = subject.length();
436 int pattern_length = pattern.length();
437 int* char_occurrences = search->bad_char_table();
438 int badness = -pattern_length;
439
440 // How bad we are doing without a good-suffix table.
441 PatternChar last_char = pattern[pattern_length - 1];
442 int last_char_shift = pattern_length - 1 -
443 CharOccurrence(char_occurrences, static_cast<SubjectChar>(last_char));
444 // Perform search
445 int index = start_index; // No matches found prior to this index.
446 while (index <= subject_length - pattern_length) {
447 int j = pattern_length - 1;
448 int subject_char;
449 while (last_char != (subject_char = subject[index + j])) {
450 int bc_occ = CharOccurrence(char_occurrences, subject_char);
451 int shift = j - bc_occ;
452 index += shift;
453 badness += 1 - shift; // at most zero, so badness cannot increase.
454 if (index > subject_length - pattern_length) {
455 return -1;
456 }
457 }
458 j--;
459 while (j >= 0 && pattern[j] == (subject[index + j])) j--;
460 if (j < 0) {
461 return index;
462 } else {
463 index += last_char_shift;
464 // Badness increases by the number of characters we have
465 // checked, and decreases by the number of characters we
466 // can skip by shifting. It's a measure of how we are doing
467 // compared to reading each character exactly once.
468 badness += (pattern_length - j) - last_char_shift;
469 if (badness > 0) {
470 search->PopulateBoyerMooreTable();
471 search->strategy_ = &BoyerMooreSearch;
472 return BoyerMooreSearch(search, subject, index);
473 }
474 }
475 }
476 return -1;
477}
478
479
480template <typename PatternChar, typename SubjectChar>
481void StringSearch<PatternChar, SubjectChar>::PopulateBoyerMooreHorspoolTable() {
482 int pattern_length = pattern_.length();
483
484 int* bad_char_occurrence = bad_char_table();
485
486 // Only preprocess at most kBMMaxShift last characters of pattern.
487 int start = start_;
488 // Run forwards to populate bad_char_table, so that *last* instance
489 // of character equivalence class is the one registered.
490 // Notice: Doesn't include the last character.
491 int table_size = AlphabetSize();
492 if (start == 0) { // All patterns less than kBMMaxShift in length.
493 memset(bad_char_occurrence,
494 -1,
495 table_size * sizeof(*bad_char_occurrence));
496 } else {
497 for (int i = 0; i < table_size; i++) {
498 bad_char_occurrence[i] = start - 1;
499 }
500 }
501 for (int i = start; i < pattern_length - 1; i++) {
502 PatternChar c = pattern_[i];
503 int bucket = (sizeof(PatternChar) == 1) ? c : c % AlphabetSize();
504 bad_char_occurrence[bucket] = i;
505 }
506}
507
508//---------------------------------------------------------------------
509// Linear string search with bailout to BMH.
510//---------------------------------------------------------------------
511
512// Simple linear search for short patterns, which bails out if the string
513// isn't found very early in the subject. Upgrades to BoyerMooreHorspool.
514template <typename PatternChar, typename SubjectChar>
515int StringSearch<PatternChar, SubjectChar>::InitialSearch(
516 StringSearch<PatternChar, SubjectChar>* search,
517 Vector<const SubjectChar> subject,
518 int index) {
519 Vector<const PatternChar> pattern = search->pattern_;
sgjesse@chromium.org2ec107f2010-09-13 09:19:46 +0000520 int pattern_length = pattern.length();
521 // Badness is a count of how much work we have done. When we have
522 // done enough work we decide it's probably worth switching to a better
523 // algorithm.
524 int badness = -10 - (pattern_length << 2);
525
526 // We know our pattern is at least 2 characters, we cache the first so
527 // the common case of the first character not matching is faster.
528 PatternChar pattern_first_char = pattern[0];
kmillikin@chromium.orgf05f2912010-09-30 10:07:24 +0000529 for (int i = index, n = subject.length() - pattern_length; i <= n; i++) {
sgjesse@chromium.org2ec107f2010-09-13 09:19:46 +0000530 badness++;
kmillikin@chromium.orgf05f2912010-09-30 10:07:24 +0000531 if (badness <= 0) {
532 if (sizeof(SubjectChar) == 1 && sizeof(PatternChar) == 1) {
533 const SubjectChar* pos = reinterpret_cast<const SubjectChar*>(
534 memchr(subject.start() + i,
535 pattern_first_char,
536 n - i + 1));
537 if (pos == NULL) {
538 return -1;
539 }
540 i = static_cast<int>(pos - subject.start());
541 } else {
542 if (subject[i] != pattern_first_char) continue;
sgjesse@chromium.org2ec107f2010-09-13 09:19:46 +0000543 }
kmillikin@chromium.orgf05f2912010-09-30 10:07:24 +0000544 int j = 1;
545 do {
546 if (pattern[j] != subject[i + j]) {
547 break;
548 }
549 j++;
550 } while (j < pattern_length);
551 if (j == pattern_length) {
552 return i;
553 }
554 badness += j;
sgjesse@chromium.org2ec107f2010-09-13 09:19:46 +0000555 } else {
kmillikin@chromium.orgf05f2912010-09-30 10:07:24 +0000556 search->PopulateBoyerMooreHorspoolTable();
557 search->strategy_ = &BoyerMooreHorspoolSearch;
558 return BoyerMooreHorspoolSearch(search, subject, i);
sgjesse@chromium.org2ec107f2010-09-13 09:19:46 +0000559 }
560 }
561 return -1;
562}
563
564
kmillikin@chromium.orgf05f2912010-09-30 10:07:24 +0000565// Perform a a single stand-alone search.
566// If searching multiple times for the same pattern, a search
567// object should be constructed once and the Search function then called
568// for each search.
sgjesse@chromium.org2ec107f2010-09-13 09:19:46 +0000569template <typename SubjectChar, typename PatternChar>
mstarzinger@chromium.org1b3afd12011-11-29 14:28:56 +0000570int SearchString(Isolate* isolate,
571 Vector<const SubjectChar> subject,
572 Vector<const PatternChar> pattern,
573 int start_index) {
sgjesse@chromium.orgea88ce92011-03-23 11:19:56 +0000574 StringSearch<PatternChar, SubjectChar> search(isolate, pattern);
kmillikin@chromium.orgf05f2912010-09-30 10:07:24 +0000575 return search.Search(subject, start_index);
sgjesse@chromium.org2ec107f2010-09-13 09:19:46 +0000576}
577
578}} // namespace v8::internal
579
580#endif // V8_STRING_SEARCH_H_