Victor Chang | 7322950 | 2020-09-17 13:39:19 +0100 | [diff] [blame] | 1 | // © 2016 and later: Unicode, Inc. and others. |
| 2 | // License & terms of use: http://www.unicode.org/copyright.html |
| 3 | /* |
| 4 | ********************************************************************** |
| 5 | * Copyright (C) 1999-2010, International Business Machines Corporation and others. |
| 6 | * All Rights Reserved. |
| 7 | ********************************************************************** |
| 8 | * Date Name Description |
| 9 | * 11/17/99 aliu Creation. |
| 10 | ********************************************************************** |
| 11 | */ |
| 12 | #ifndef UNIFILT_H |
| 13 | #define UNIFILT_H |
| 14 | |
| 15 | #include "unicode/utypes.h" |
| 16 | |
| 17 | #if U_SHOW_CPLUSPLUS_API |
| 18 | |
| 19 | #include "unicode/unifunct.h" |
| 20 | #include "unicode/unimatch.h" |
| 21 | |
| 22 | /** |
| 23 | * \file |
| 24 | * \brief C++ API: Unicode Filter |
| 25 | */ |
| 26 | |
| 27 | U_NAMESPACE_BEGIN |
| 28 | |
| 29 | /** |
| 30 | * U_ETHER is used to represent character values for positions outside |
| 31 | * a range. For example, transliterator uses this to represent |
| 32 | * characters outside the range contextStart..contextLimit-1. This |
| 33 | * allows explicit matching by rules and UnicodeSets of text outside a |
| 34 | * defined range. |
| 35 | * @stable ICU 3.0 |
| 36 | */ |
| 37 | #define U_ETHER ((char16_t)0xFFFF) |
| 38 | |
| 39 | /** |
| 40 | * |
| 41 | * <code>UnicodeFilter</code> defines a protocol for selecting a |
| 42 | * subset of the full range (U+0000 to U+10FFFF) of Unicode characters. |
| 43 | * Currently, filters are used in conjunction with classes like {@link |
| 44 | * Transliterator} to only process selected characters through a |
| 45 | * transformation. |
| 46 | * |
| 47 | * <p>Note: UnicodeFilter currently stubs out two pure virtual methods |
| 48 | * of its base class, UnicodeMatcher. These methods are toPattern() |
| 49 | * and matchesIndexValue(). This is done so that filter classes that |
| 50 | * are not actually used as matchers -- specifically, those in the |
| 51 | * UnicodeFilterLogic component, and those in tests -- can continue to |
| 52 | * work without defining these methods. As long as a filter is not |
| 53 | * used in an RBT during real transliteration, these methods will not |
| 54 | * be called. However, this breaks the UnicodeMatcher base class |
| 55 | * protocol, and it is not a correct solution. |
| 56 | * |
| 57 | * <p>In the future we may revisit the UnicodeMatcher / UnicodeFilter |
| 58 | * hierarchy and either redesign it, or simply remove the stubs in |
| 59 | * UnicodeFilter and force subclasses to implement the full |
| 60 | * UnicodeMatcher protocol. |
| 61 | * |
| 62 | * @see UnicodeFilterLogic |
| 63 | * @stable ICU 2.0 |
| 64 | */ |
| 65 | class U_COMMON_API UnicodeFilter : public UnicodeFunctor, public UnicodeMatcher { |
| 66 | |
| 67 | public: |
| 68 | /** |
| 69 | * Destructor |
| 70 | * @stable ICU 2.0 |
| 71 | */ |
| 72 | virtual ~UnicodeFilter(); |
| 73 | |
| 74 | /** |
| 75 | * Clones this object polymorphically. |
| 76 | * The caller owns the result and should delete it when done. |
| 77 | * @return clone, or nullptr if an error occurred |
| 78 | * @stable ICU 2.4 |
| 79 | */ |
| 80 | virtual UnicodeFilter* clone() const = 0; |
| 81 | |
| 82 | /** |
| 83 | * Returns <tt>true</tt> for characters that are in the selected |
| 84 | * subset. In other words, if a character is <b>to be |
| 85 | * filtered</b>, then <tt>contains()</tt> returns |
| 86 | * <b><tt>false</tt></b>. |
| 87 | * @stable ICU 2.0 |
| 88 | */ |
| 89 | virtual UBool contains(UChar32 c) const = 0; |
| 90 | |
| 91 | /** |
| 92 | * UnicodeFunctor API. Cast 'this' to a UnicodeMatcher* pointer |
| 93 | * and return the pointer. |
| 94 | * @stable ICU 2.4 |
| 95 | */ |
| 96 | virtual UnicodeMatcher* toMatcher() const; |
| 97 | |
| 98 | /** |
| 99 | * Implement UnicodeMatcher API. |
| 100 | * @stable ICU 2.4 |
| 101 | */ |
| 102 | virtual UMatchDegree matches(const Replaceable& text, |
| 103 | int32_t& offset, |
| 104 | int32_t limit, |
| 105 | UBool incremental); |
| 106 | |
| 107 | /** |
| 108 | * UnicodeFunctor API. Nothing to do. |
| 109 | * @stable ICU 2.4 |
| 110 | */ |
| 111 | virtual void setData(const TransliterationRuleData*); |
| 112 | |
| 113 | /** |
| 114 | * ICU "poor man's RTTI", returns a UClassID for this class. |
| 115 | * |
| 116 | * @stable ICU 2.2 |
| 117 | */ |
| 118 | static UClassID U_EXPORT2 getStaticClassID(); |
| 119 | |
| 120 | protected: |
| 121 | |
| 122 | /* |
| 123 | * Since this class has pure virtual functions, |
| 124 | * a constructor can't be used. |
| 125 | * @stable ICU 2.0 |
| 126 | */ |
| 127 | /* UnicodeFilter();*/ |
| 128 | }; |
| 129 | |
| 130 | /*inline UnicodeFilter::UnicodeFilter() {}*/ |
| 131 | |
| 132 | U_NAMESPACE_END |
| 133 | |
| 134 | #endif /* U_SHOW_CPLUSPLUS_API */ |
| 135 | |
| 136 | #endif |