| // © 2016 and later: Unicode, Inc. and others. |
| // License & terms of use: http://www.unicode.org/copyright.html |
| /* |
| ******************************************************************************* |
| * Copyright (C) 2013-2014, International Business Machines |
| * Corporation and others. All Rights Reserved. |
| ******************************************************************************* |
| * collationsets.h |
| * |
| * created on: 2013feb09 |
| * created by: Markus W. Scherer |
| */ |
| |
| #ifndef __COLLATIONSETS_H__ |
| #define __COLLATIONSETS_H__ |
| |
| #include "unicode/utypes.h" |
| |
| #if !UCONFIG_NO_COLLATION |
| |
| #include "unicode/uniset.h" |
| #include "collation.h" |
| |
| U_NAMESPACE_BEGIN |
| |
| struct CollationData; |
| |
| /** |
| * Finds the set of characters and strings that sort differently in the tailoring |
| * from the base data. |
| * |
| * Every mapping in the tailoring needs to be compared to the base, |
| * because some mappings are copied for optimization, and |
| * all contractions for a character are copied if any contractions for that character |
| * are added, modified or removed. |
| * |
| * It might be simpler to re-parse the rule string, but: |
| * - That would require duplicating some of the from-rules builder code. |
| * - That would make the runtime code depend on the builder. |
| * - That would only work if we have the rule string, and we allow users to |
| * omit the rule string from data files. |
| */ |
| class TailoredSet : public UMemory { |
| public: |
| TailoredSet(UnicodeSet *t) |
| : data(NULL), baseData(NULL), |
| tailored(t), |
| suffix(NULL), |
| errorCode(U_ZERO_ERROR) {} |
| |
| void forData(const CollationData *d, UErrorCode &errorCode); |
| |
| /** |
| * @return U_SUCCESS(errorCode) in C++, void in Java |
| * @internal only public for access by callback |
| */ |
| UBool handleCE32(UChar32 start, UChar32 end, uint32_t ce32); |
| |
| private: |
| void compare(UChar32 c, uint32_t ce32, uint32_t baseCE32); |
| void comparePrefixes(UChar32 c, const UChar *p, const UChar *q); |
| void compareContractions(UChar32 c, const UChar *p, const UChar *q); |
| |
| void addPrefixes(const CollationData *d, UChar32 c, const UChar *p); |
| void addPrefix(const CollationData *d, const UnicodeString &pfx, UChar32 c, uint32_t ce32); |
| void addContractions(UChar32 c, const UChar *p); |
| void addSuffix(UChar32 c, const UnicodeString &sfx); |
| void add(UChar32 c); |
| |
| /** Prefixes are reversed in the data structure. */ |
| void setPrefix(const UnicodeString &pfx) { |
| unreversedPrefix = pfx; |
| unreversedPrefix.reverse(); |
| } |
| void resetPrefix() { |
| unreversedPrefix.remove(); |
| } |
| |
| const CollationData *data; |
| const CollationData *baseData; |
| UnicodeSet *tailored; |
| UnicodeString unreversedPrefix; |
| const UnicodeString *suffix; |
| UErrorCode errorCode; |
| }; |
| |
| class ContractionsAndExpansions : public UMemory { |
| public: |
| class CESink : public UMemory { |
| public: |
| virtual ~CESink(); |
| virtual void handleCE(int64_t ce) = 0; |
| virtual void handleExpansion(const int64_t ces[], int32_t length) = 0; |
| }; |
| |
| ContractionsAndExpansions(UnicodeSet *con, UnicodeSet *exp, CESink *s, UBool prefixes) |
| : data(NULL), |
| contractions(con), expansions(exp), |
| sink(s), |
| addPrefixes(prefixes), |
| checkTailored(0), |
| suffix(NULL), |
| errorCode(U_ZERO_ERROR) {} |
| |
| void forData(const CollationData *d, UErrorCode &errorCode); |
| void forCodePoint(const CollationData *d, UChar32 c, UErrorCode &ec); |
| |
| // all following: @internal, only public for access by callback |
| |
| void handleCE32(UChar32 start, UChar32 end, uint32_t ce32); |
| |
| void handlePrefixes(UChar32 start, UChar32 end, uint32_t ce32); |
| void handleContractions(UChar32 start, UChar32 end, uint32_t ce32); |
| |
| void addExpansions(UChar32 start, UChar32 end); |
| void addStrings(UChar32 start, UChar32 end, UnicodeSet *set); |
| |
| /** Prefixes are reversed in the data structure. */ |
| void setPrefix(const UnicodeString &pfx) { |
| unreversedPrefix = pfx; |
| unreversedPrefix.reverse(); |
| } |
| void resetPrefix() { |
| unreversedPrefix.remove(); |
| } |
| |
| const CollationData *data; |
| UnicodeSet *contractions; |
| UnicodeSet *expansions; |
| CESink *sink; |
| UBool addPrefixes; |
| int8_t checkTailored; // -1: collected tailored +1: exclude tailored |
| UnicodeSet tailored; |
| UnicodeSet ranges; |
| UnicodeString unreversedPrefix; |
| const UnicodeString *suffix; |
| int64_t ces[Collation::MAX_EXPANSION_LENGTH]; |
| UErrorCode errorCode; |
| }; |
| |
| U_NAMESPACE_END |
| |
| #endif // !UCONFIG_NO_COLLATION |
| #endif // __COLLATIONSETS_H__ |