blob: 7fcd2bd93fef9d814c923838d083210ae015fec6 [file] [log] [blame]
Fredrik Roubert0596fae2017-04-18 21:34:02 +02001// © 2016 and later: Unicode, Inc. and others.
Fredrik Roubert64339d32016-10-21 19:43:16 +02002// License & terms of use: http://www.unicode.org/copyright.html
Jean-Baptiste Queruac04d0b2009-07-17 17:11:19 -07003/*
4**********************************************************************
ccorneliusfceb3982014-04-16 12:27:14 -07005* Copyright (C) 2001-2014 IBM and others. All rights reserved.
Jean-Baptiste Queruac04d0b2009-07-17 17:11:19 -07006**********************************************************************
7* Date Name Description
8* 03/22/2000 helena Creation.
9**********************************************************************
10*/
11
12#include "unicode/utypes.h"
13
Claire Ho85bf2e22009-11-24 14:23:02 -080014#if !UCONFIG_NO_COLLATION && !UCONFIG_NO_BREAK_ITERATION
Jean-Baptiste Queruac04d0b2009-07-17 17:11:19 -070015
16#include "unicode/stsearch.h"
17#include "usrchimp.h"
18#include "cmemory.h"
19
20U_NAMESPACE_BEGIN
21
22UOBJECT_DEFINE_RTTI_IMPLEMENTATION(StringSearch)
23
24// public constructors and destructors -----------------------------------
25
26StringSearch::StringSearch(const UnicodeString &pattern,
27 const UnicodeString &text,
28 const Locale &locale,
29 BreakIterator *breakiter,
30 UErrorCode &status) :
31 SearchIterator(text, breakiter),
Jean-Baptiste Queruac04d0b2009-07-17 17:11:19 -070032 m_pattern_(pattern)
33{
34 if (U_FAILURE(status)) {
35 m_strsrch_ = NULL;
36 return;
37 }
38
39 m_strsrch_ = usearch_open(m_pattern_.getBuffer(), m_pattern_.length(),
40 m_text_.getBuffer(), m_text_.length(),
41 locale.getName(), (UBreakIterator *)breakiter,
42 &status);
43 uprv_free(m_search_);
44 m_search_ = NULL;
45
Jean-Baptiste Queruac04d0b2009-07-17 17:11:19 -070046 if (U_SUCCESS(status)) {
Jean-Baptiste Queruac04d0b2009-07-17 17:11:19 -070047 // m_search_ has been created by the base SearchIterator class
48 m_search_ = m_strsrch_->search;
49 }
50}
51
52StringSearch::StringSearch(const UnicodeString &pattern,
53 const UnicodeString &text,
54 RuleBasedCollator *coll,
55 BreakIterator *breakiter,
56 UErrorCode &status) :
57 SearchIterator(text, breakiter),
Jean-Baptiste Queruac04d0b2009-07-17 17:11:19 -070058 m_pattern_(pattern)
59{
60 if (U_FAILURE(status)) {
61 m_strsrch_ = NULL;
62 return;
63 }
64 if (coll == NULL) {
65 status = U_ILLEGAL_ARGUMENT_ERROR;
66 m_strsrch_ = NULL;
67 return;
68 }
69 m_strsrch_ = usearch_openFromCollator(m_pattern_.getBuffer(),
70 m_pattern_.length(),
71 m_text_.getBuffer(),
ccorneliusfceb3982014-04-16 12:27:14 -070072 m_text_.length(), coll->toUCollator(),
Jean-Baptiste Queruac04d0b2009-07-17 17:11:19 -070073 (UBreakIterator *)breakiter,
74 &status);
75 uprv_free(m_search_);
76 m_search_ = NULL;
77
78 if (U_SUCCESS(status)) {
Jean-Baptiste Queruac04d0b2009-07-17 17:11:19 -070079 // m_search_ has been created by the base SearchIterator class
80 m_search_ = m_strsrch_->search;
81 }
82}
83
84StringSearch::StringSearch(const UnicodeString &pattern,
85 CharacterIterator &text,
86 const Locale &locale,
87 BreakIterator *breakiter,
88 UErrorCode &status) :
89 SearchIterator(text, breakiter),
Jean-Baptiste Queruac04d0b2009-07-17 17:11:19 -070090 m_pattern_(pattern)
91{
92 if (U_FAILURE(status)) {
93 m_strsrch_ = NULL;
94 return;
95 }
96 m_strsrch_ = usearch_open(m_pattern_.getBuffer(), m_pattern_.length(),
97 m_text_.getBuffer(), m_text_.length(),
98 locale.getName(), (UBreakIterator *)breakiter,
99 &status);
100 uprv_free(m_search_);
101 m_search_ = NULL;
102
103 if (U_SUCCESS(status)) {
Jean-Baptiste Queruac04d0b2009-07-17 17:11:19 -0700104 // m_search_ has been created by the base SearchIterator class
105 m_search_ = m_strsrch_->search;
106 }
107}
108
109StringSearch::StringSearch(const UnicodeString &pattern,
110 CharacterIterator &text,
111 RuleBasedCollator *coll,
112 BreakIterator *breakiter,
113 UErrorCode &status) :
114 SearchIterator(text, breakiter),
Jean-Baptiste Queruac04d0b2009-07-17 17:11:19 -0700115 m_pattern_(pattern)
116{
117 if (U_FAILURE(status)) {
118 m_strsrch_ = NULL;
119 return;
120 }
121 if (coll == NULL) {
122 status = U_ILLEGAL_ARGUMENT_ERROR;
123 m_strsrch_ = NULL;
124 return;
125 }
126 m_strsrch_ = usearch_openFromCollator(m_pattern_.getBuffer(),
127 m_pattern_.length(),
128 m_text_.getBuffer(),
ccorneliusfceb3982014-04-16 12:27:14 -0700129 m_text_.length(), coll->toUCollator(),
Jean-Baptiste Queruac04d0b2009-07-17 17:11:19 -0700130 (UBreakIterator *)breakiter,
131 &status);
132 uprv_free(m_search_);
133 m_search_ = NULL;
134
135 if (U_SUCCESS(status)) {
Jean-Baptiste Queruac04d0b2009-07-17 17:11:19 -0700136 // m_search_ has been created by the base SearchIterator class
137 m_search_ = m_strsrch_->search;
138 }
139}
140
141StringSearch::StringSearch(const StringSearch &that) :
142 SearchIterator(that.m_text_, that.m_breakiterator_),
Jean-Baptiste Queruac04d0b2009-07-17 17:11:19 -0700143 m_pattern_(that.m_pattern_)
144{
145 UErrorCode status = U_ZERO_ERROR;
146
147 // Free m_search_ from the superclass
148 uprv_free(m_search_);
149 m_search_ = NULL;
150
151 if (that.m_strsrch_ == NULL) {
152 // This was not a good copy
153 m_strsrch_ = NULL;
154 }
155 else {
156 // Make a deep copy
157 m_strsrch_ = usearch_openFromCollator(m_pattern_.getBuffer(),
158 m_pattern_.length(),
159 m_text_.getBuffer(),
160 m_text_.length(),
161 that.m_strsrch_->collator,
162 (UBreakIterator *)that.m_breakiterator_,
163 &status);
164 if (U_SUCCESS(status)) {
Jean-Baptiste Queruac04d0b2009-07-17 17:11:19 -0700165 // m_search_ has been created by the base SearchIterator class
166 m_search_ = m_strsrch_->search;
167 }
168 }
169}
170
171StringSearch::~StringSearch()
172{
173 if (m_strsrch_ != NULL) {
174 usearch_close(m_strsrch_);
175 m_search_ = NULL;
176 }
177}
178
179StringSearch *
180StringSearch::clone() const {
181 return new StringSearch(*this);
182}
183
184// operator overloading ---------------------------------------------
185StringSearch & StringSearch::operator=(const StringSearch &that)
186{
Victor Chang92c98b52021-04-27 16:37:23 +0100187 if (this != &that) {
Jean-Baptiste Queruac04d0b2009-07-17 17:11:19 -0700188 UErrorCode status = U_ZERO_ERROR;
189 m_text_ = that.m_text_;
190 m_breakiterator_ = that.m_breakiterator_;
191 m_pattern_ = that.m_pattern_;
192 // all m_search_ in the parent class is linked up with m_strsrch_
193 usearch_close(m_strsrch_);
194 m_strsrch_ = usearch_openFromCollator(m_pattern_.getBuffer(),
195 m_pattern_.length(),
196 m_text_.getBuffer(),
197 m_text_.length(),
198 that.m_strsrch_->collator,
199 NULL, &status);
Claire Ho85bf2e22009-11-24 14:23:02 -0800200 // Check null pointer
201 if (m_strsrch_ != NULL) {
ccorneliusfceb3982014-04-16 12:27:14 -0700202 m_search_ = m_strsrch_->search;
Claire Ho85bf2e22009-11-24 14:23:02 -0800203 }
Jean-Baptiste Queruac04d0b2009-07-17 17:11:19 -0700204 }
205 return *this;
206}
207
208UBool StringSearch::operator==(const SearchIterator &that) const
209{
210 if (this == &that) {
211 return TRUE;
212 }
213 if (SearchIterator::operator ==(that)) {
214 StringSearch &thatsrch = (StringSearch &)that;
215 return (this->m_pattern_ == thatsrch.m_pattern_ &&
216 this->m_strsrch_->collator == thatsrch.m_strsrch_->collator);
217 }
218 return FALSE;
219}
220
221// public get and set methods ----------------------------------------
222
223void StringSearch::setOffset(int32_t position, UErrorCode &status)
224{
225 // status checked in usearch_setOffset
226 usearch_setOffset(m_strsrch_, position, &status);
227}
228
229int32_t StringSearch::getOffset(void) const
230{
231 return usearch_getOffset(m_strsrch_);
232}
233
234void StringSearch::setText(const UnicodeString &text, UErrorCode &status)
235{
236 if (U_SUCCESS(status)) {
237 m_text_ = text;
238 usearch_setText(m_strsrch_, text.getBuffer(), text.length(), &status);
239 }
240}
241
242void StringSearch::setText(CharacterIterator &text, UErrorCode &status)
243{
244 if (U_SUCCESS(status)) {
245 text.getText(m_text_);
246 usearch_setText(m_strsrch_, m_text_.getBuffer(), m_text_.length(), &status);
247 }
248}
249
250RuleBasedCollator * StringSearch::getCollator() const
251{
ccorneliusfceb3982014-04-16 12:27:14 -0700252 // Note the const_cast. It would be cleaner if this const method returned a const collator.
253 return RuleBasedCollator::rbcFromUCollator(const_cast<UCollator *>(m_strsrch_->collator));
Jean-Baptiste Queruac04d0b2009-07-17 17:11:19 -0700254}
255
256void StringSearch::setCollator(RuleBasedCollator *coll, UErrorCode &status)
257{
258 if (U_SUCCESS(status)) {
ccorneliusfceb3982014-04-16 12:27:14 -0700259 usearch_setCollator(m_strsrch_, coll->toUCollator(), &status);
Jean-Baptiste Queruac04d0b2009-07-17 17:11:19 -0700260 }
261}
262
263void StringSearch::setPattern(const UnicodeString &pattern,
264 UErrorCode &status)
265{
266 if (U_SUCCESS(status)) {
267 m_pattern_ = pattern;
268 usearch_setPattern(m_strsrch_, m_pattern_.getBuffer(), m_pattern_.length(),
269 &status);
270 }
271}
272
273const UnicodeString & StringSearch::getPattern() const
274{
275 return m_pattern_;
276}
277
278// public methods ----------------------------------------------------
279
280void StringSearch::reset()
281{
282 usearch_reset(m_strsrch_);
283}
284
Nikita Iashchenko4c0e2862019-11-05 16:38:00 +0000285StringSearch * StringSearch::safeClone() const
Jean-Baptiste Queruac04d0b2009-07-17 17:11:19 -0700286{
287 UErrorCode status = U_ZERO_ERROR;
288 StringSearch *result = new StringSearch(m_pattern_, m_text_,
ccorneliusfceb3982014-04-16 12:27:14 -0700289 getCollator(),
Jean-Baptiste Queruac04d0b2009-07-17 17:11:19 -0700290 m_breakiterator_,
291 status);
292 /* test for NULL */
293 if (result == 0) {
294 status = U_MEMORY_ALLOCATION_ERROR;
295 return 0;
296 }
297 result->setOffset(getOffset(), status);
298 result->setMatchStart(m_strsrch_->search->matchedIndex);
299 result->setMatchLength(m_strsrch_->search->matchedLength);
300 if (U_FAILURE(status)) {
301 return NULL;
302 }
303 return result;
304}
305
306// protected method -------------------------------------------------
307
308int32_t StringSearch::handleNext(int32_t position, UErrorCode &status)
309{
310 // values passed here are already in the pre-shift position
311 if (U_SUCCESS(status)) {
ccorneliusf9878a22014-11-20 18:09:39 -0800312 if (m_strsrch_->pattern.cesLength == 0) {
Jean-Baptiste Queruac04d0b2009-07-17 17:11:19 -0700313 m_search_->matchedIndex =
314 m_search_->matchedIndex == USEARCH_DONE ?
315 getOffset() : m_search_->matchedIndex + 1;
316 m_search_->matchedLength = 0;
317 ucol_setOffset(m_strsrch_->textIter, m_search_->matchedIndex,
318 &status);
319 if (m_search_->matchedIndex == m_search_->textLength) {
320 m_search_->matchedIndex = USEARCH_DONE;
321 }
322 }
323 else {
324 // looking at usearch.cpp, this part is shifted out to
325 // StringSearch instead of SearchIterator because m_strsrch_ is
326 // not accessible in SearchIterator
Claire Ho85bf2e22009-11-24 14:23:02 -0800327#if 0
Jean-Baptiste Queruac04d0b2009-07-17 17:11:19 -0700328 if (position + m_strsrch_->pattern.defaultShiftSize
329 > m_search_->textLength) {
330 setMatchNotFound();
331 return USEARCH_DONE;
332 }
Claire Ho85bf2e22009-11-24 14:23:02 -0800333#endif
Jean-Baptiste Queruac04d0b2009-07-17 17:11:19 -0700334 if (m_search_->matchedLength <= 0) {
335 // the flipping direction issue has already been handled
336 // in next()
337 // for boundary check purposes. this will ensure that the
338 // next match will not preceed the current offset
339 // note search->matchedIndex will always be set to something
340 // in the code
341 m_search_->matchedIndex = position - 1;
342 }
343
344 ucol_setOffset(m_strsrch_->textIter, position, &status);
Claire Ho85bf2e22009-11-24 14:23:02 -0800345
346#if 0
Jean-Baptiste Queruac04d0b2009-07-17 17:11:19 -0700347 for (;;) {
348 if (m_search_->isCanonicalMatch) {
349 // can't use exact here since extra accents are allowed.
350 usearch_handleNextCanonical(m_strsrch_, &status);
351 }
352 else {
353 usearch_handleNextExact(m_strsrch_, &status);
354 }
355 if (U_FAILURE(status)) {
356 return USEARCH_DONE;
357 }
358 if (m_breakiterator_ == NULL
359#if !UCONFIG_NO_BREAK_ITERATION
360 ||
361 m_search_->matchedIndex == USEARCH_DONE ||
362 (m_breakiterator_->isBoundary(m_search_->matchedIndex) &&
363 m_breakiterator_->isBoundary(m_search_->matchedIndex +
364 m_search_->matchedLength))
365#endif
366 ) {
367 if (m_search_->matchedIndex == USEARCH_DONE) {
368 ucol_setOffset(m_strsrch_->textIter,
369 m_search_->textLength, &status);
370 }
371 else {
372 ucol_setOffset(m_strsrch_->textIter,
373 m_search_->matchedIndex, &status);
374 }
375 return m_search_->matchedIndex;
376 }
377 }
Claire Ho85bf2e22009-11-24 14:23:02 -0800378#else
379 // if m_strsrch_->breakIter is always the same as m_breakiterator_
380 // then we don't need to check the match boundaries here because
381 // usearch_handleNextXXX will already have done it.
382 if (m_search_->isCanonicalMatch) {
383 // *could* actually use exact here 'cause no extra accents allowed...
384 usearch_handleNextCanonical(m_strsrch_, &status);
385 } else {
386 usearch_handleNextExact(m_strsrch_, &status);
387 }
388
389 if (U_FAILURE(status)) {
390 return USEARCH_DONE;
391 }
392
393 if (m_search_->matchedIndex == USEARCH_DONE) {
394 ucol_setOffset(m_strsrch_->textIter, m_search_->textLength, &status);
395 } else {
396 ucol_setOffset(m_strsrch_->textIter, m_search_->matchedIndex, &status);
397 }
398
399 return m_search_->matchedIndex;
400#endif
Jean-Baptiste Queruac04d0b2009-07-17 17:11:19 -0700401 }
402 }
403 return USEARCH_DONE;
404}
405
406int32_t StringSearch::handlePrev(int32_t position, UErrorCode &status)
407{
408 // values passed here are already in the pre-shift position
409 if (U_SUCCESS(status)) {
ccorneliusf9878a22014-11-20 18:09:39 -0800410 if (m_strsrch_->pattern.cesLength == 0) {
Jean-Baptiste Queruac04d0b2009-07-17 17:11:19 -0700411 m_search_->matchedIndex =
412 (m_search_->matchedIndex == USEARCH_DONE ? getOffset() :
413 m_search_->matchedIndex);
414 if (m_search_->matchedIndex == 0) {
415 setMatchNotFound();
416 }
417 else {
418 m_search_->matchedIndex --;
419 ucol_setOffset(m_strsrch_->textIter, m_search_->matchedIndex,
420 &status);
421 m_search_->matchedLength = 0;
422 }
423 }
424 else {
425 // looking at usearch.cpp, this part is shifted out to
426 // StringSearch instead of SearchIterator because m_strsrch_ is
427 // not accessible in SearchIterator
Claire Ho85bf2e22009-11-24 14:23:02 -0800428#if 0
Jean-Baptiste Queruac04d0b2009-07-17 17:11:19 -0700429 if (!m_search_->isOverlap &&
430 position - m_strsrch_->pattern.defaultShiftSize < 0) {
431 setMatchNotFound();
432 return USEARCH_DONE;
433 }
Claire Ho85bf2e22009-11-24 14:23:02 -0800434
Jean-Baptiste Queruac04d0b2009-07-17 17:11:19 -0700435 for (;;) {
436 if (m_search_->isCanonicalMatch) {
437 // can't use exact here since extra accents are allowed.
438 usearch_handlePreviousCanonical(m_strsrch_, &status);
439 }
440 else {
441 usearch_handlePreviousExact(m_strsrch_, &status);
442 }
443 if (U_FAILURE(status)) {
444 return USEARCH_DONE;
445 }
446 if (m_breakiterator_ == NULL
447#if !UCONFIG_NO_BREAK_ITERATION
448 ||
449 m_search_->matchedIndex == USEARCH_DONE ||
450 (m_breakiterator_->isBoundary(m_search_->matchedIndex) &&
451 m_breakiterator_->isBoundary(m_search_->matchedIndex +
452 m_search_->matchedLength))
453#endif
454 ) {
455 return m_search_->matchedIndex;
456 }
457 }
Claire Ho85bf2e22009-11-24 14:23:02 -0800458#else
459 ucol_setOffset(m_strsrch_->textIter, position, &status);
460
461 if (m_search_->isCanonicalMatch) {
462 // *could* use exact match here since extra accents *not* allowed!
463 usearch_handlePreviousCanonical(m_strsrch_, &status);
464 } else {
465 usearch_handlePreviousExact(m_strsrch_, &status);
466 }
467
468 if (U_FAILURE(status)) {
469 return USEARCH_DONE;
470 }
471
472 return m_search_->matchedIndex;
473#endif
Jean-Baptiste Queruac04d0b2009-07-17 17:11:19 -0700474 }
475
476 return m_search_->matchedIndex;
477 }
478 return USEARCH_DONE;
479}
480
481U_NAMESPACE_END
482
483#endif /* #if !UCONFIG_NO_COLLATION */