| // © 2020 and later: Unicode, Inc. and others. |
| // License & terms of use: http://www.unicode.org/copyright.html |
| |
| // uniquecharstr.h |
| // created: 2020sep01 Frank Yung-Fong Tang |
| |
| #ifndef __UNIQUECHARSTR_H__ |
| #define __UNIQUECHARSTR_H__ |
| |
| #include "charstr.h" |
| #include "uassert.h" |
| #include "uhash.h" |
| |
| U_NAMESPACE_BEGIN |
| |
| /** |
| * Stores NUL-terminated strings with duplicate elimination. |
| * Checks for unique UTF-16 string pointers and converts to invariant characters. |
| * |
| * Intended to be stack-allocated. Add strings, get a unique number for each, |
| * freeze the object, get a char * pointer for each string, |
| * call orphanCharStrings() to capture the string storage, and let this object go out of scope. |
| */ |
| class UniqueCharStrings { |
| public: |
| UniqueCharStrings(UErrorCode &errorCode) : strings(nullptr) { |
| // Note: We hash on string contents but store stable char16_t * pointers. |
| // If the strings are stored in resource bundles which should be built with |
| // duplicate elimination, then we should be able to hash on just the pointer values. |
| uhash_init(&map, uhash_hashUChars, uhash_compareUChars, uhash_compareLong, &errorCode); |
| if (U_FAILURE(errorCode)) { return; } |
| strings = new CharString(); |
| if (strings == nullptr) { |
| errorCode = U_MEMORY_ALLOCATION_ERROR; |
| } |
| } |
| ~UniqueCharStrings() { |
| uhash_close(&map); |
| delete strings; |
| } |
| |
| /** Returns/orphans the CharString that contains all strings. */ |
| CharString *orphanCharStrings() { |
| CharString *result = strings; |
| strings = nullptr; |
| return result; |
| } |
| |
| /** |
| * Adds a string and returns a unique number for it. |
| * The string's buffer contents must not change, nor move around in memory, |
| * while this UniqueCharStrings is in use. |
| * The string contents must be NUL-terminated exactly at s.length(). |
| * |
| * Best used with read-only-alias UnicodeString objects that point to |
| * stable storage, such as strings returned by resource bundle functions. |
| */ |
| int32_t add(const UnicodeString &s, UErrorCode &errorCode) { |
| if (U_FAILURE(errorCode)) { return 0; } |
| if (isFrozen) { |
| errorCode = U_NO_WRITE_PERMISSION; |
| return 0; |
| } |
| // The string points into the resource bundle. |
| const char16_t *p = s.getBuffer(); |
| int32_t oldIndex = uhash_geti(&map, p); |
| if (oldIndex != 0) { // found duplicate |
| return oldIndex; |
| } |
| // Explicit NUL terminator for the previous string. |
| // The strings object is also terminated with one implicit NUL. |
| strings->append(0, errorCode); |
| int32_t newIndex = strings->length(); |
| strings->appendInvariantChars(s, errorCode); |
| uhash_puti(&map, const_cast<char16_t *>(p), newIndex, &errorCode); |
| return newIndex; |
| } |
| |
| void freeze() { isFrozen = true; } |
| |
| /** |
| * Returns a string pointer for its unique number, if this object is frozen. |
| * Otherwise nullptr. |
| */ |
| const char *get(int32_t i) const { |
| U_ASSERT(isFrozen); |
| return isFrozen && i > 0 ? strings->data() + i : nullptr; |
| } |
| |
| private: |
| UHashtable map; |
| CharString *strings; |
| bool isFrozen = false; |
| }; |
| |
| U_NAMESPACE_END |
| |
| #endif // __UNIQUECHARSTR_H__ |