| // © 2016 and later: Unicode, Inc. and others. |
| // License & terms of use: http://www.unicode.org/copyright.html |
| /* |
| ********************************************************************** |
| * Copyright (c) 2001-2014, International Business Machines |
| * Corporation and others. All Rights Reserved. |
| ********************************************************************** |
| * Date Name Description |
| * 08/10/2001 aliu Creation. |
| ********************************************************************** |
| */ |
| #ifndef _TRANSREG_H |
| #define _TRANSREG_H |
| |
| #include "unicode/utypes.h" |
| |
| #if !UCONFIG_NO_TRANSLITERATION |
| |
| #include "unicode/uobject.h" |
| #include "unicode/translit.h" |
| #include "hash.h" |
| #include "uvector.h" |
| |
| U_NAMESPACE_BEGIN |
| |
| class TransliteratorEntry; |
| class TransliteratorSpec; |
| class UnicodeString; |
| |
| //------------------------------------------------------------------ |
| // TransliteratorAlias |
| //------------------------------------------------------------------ |
| |
| /** |
| * A TransliteratorAlias object is returned by get() if the given ID |
| * actually translates into something else. The caller then invokes |
| * the create() method on the alias to create the actual |
| * transliterator, and deletes the alias. |
| * |
| * Why all the shenanigans? To prevent circular calls between |
| * the registry code and the transliterator code that deadlocks. |
| */ |
| class TransliteratorAlias : public UMemory { |
| public: |
| /** |
| * Construct a simple alias (type == SIMPLE) |
| * @param aliasID the given id. |
| */ |
| TransliteratorAlias(const UnicodeString& aliasID, const UnicodeSet* compoundFilter); |
| |
| /** |
| * Construct a compound RBT alias (type == COMPOUND) |
| */ |
| TransliteratorAlias(const UnicodeString& ID, const UnicodeString& idBlocks, |
| UVector* adoptedTransliterators, |
| const UnicodeSet* compoundFilter); |
| |
| /** |
| * Construct a rules alias (type = RULES) |
| */ |
| TransliteratorAlias(const UnicodeString& theID, |
| const UnicodeString& rules, |
| UTransDirection dir); |
| |
| ~TransliteratorAlias(); |
| |
| /** |
| * The whole point of create() is that the caller must invoke |
| * it when the registry mutex is NOT held, to prevent deadlock. |
| * It may only be called once. |
| * |
| * Note: Only call create() if isRuleBased() returns FALSE. |
| * |
| * This method must be called *outside* of the TransliteratorRegistry |
| * mutex. |
| */ |
| Transliterator* create(UParseError&, UErrorCode&); |
| |
| /** |
| * Return TRUE if this alias is rule-based. If so, the caller |
| * must call parse() on it, then call TransliteratorRegistry::reget(). |
| */ |
| UBool isRuleBased() const; |
| |
| /** |
| * If isRuleBased() returns TRUE, then the caller must call this |
| * method, followed by TransliteratorRegistry::reget(). The latter |
| * method must be called inside the TransliteratorRegistry mutex. |
| * |
| * Note: Only call parse() if isRuleBased() returns TRUE. |
| * |
| * This method must be called *outside* of the TransliteratorRegistry |
| * mutex, because it can instantiate Transliterators embedded in |
| * the rules via the "&Latin-Arabic()" syntax. |
| */ |
| void parse(TransliteratorParser& parser, |
| UParseError& pe, UErrorCode& ec) const; |
| |
| private: |
| // We actually come in three flavors: |
| // 1. Simple alias |
| // Here aliasID is the alias string. Everything else is |
| // null, zero, empty. |
| // 2. CompoundRBT |
| // Here ID is the ID, aliasID is the idBlock, trans is the |
| // contained RBT, and idSplitPoint is the offet in aliasID |
| // where the contained RBT goes. compoundFilter is the |
| // compound filter, and it is _not_ owned. |
| // 3. Rules |
| // Here ID is the ID, aliasID is the rules string. |
| // idSplitPoint is the UTransDirection. |
| UnicodeString ID; |
| UnicodeString aliasesOrRules; |
| UVector* transes; // owned |
| const UnicodeSet* compoundFilter; // alias |
| UTransDirection direction; |
| enum { SIMPLE, COMPOUND, RULES } type; |
| |
| TransliteratorAlias(const TransliteratorAlias &other); // forbid copying of this class |
| TransliteratorAlias &operator=(const TransliteratorAlias &other); // forbid copying of this class |
| }; |
| |
| |
| /** |
| * A registry of system transliterators. This is the data structure |
| * that implements the mapping between transliterator IDs and the data |
| * or function pointers used to create the corresponding |
| * transliterators. There is one instance of the registry that is |
| * created statically. |
| * |
| * The registry consists of a dynamic component -- a hashtable -- and |
| * a static component -- locale resource bundles. The dynamic store |
| * is semantically overlaid on the static store, so the static mapping |
| * can be dynamically overridden. |
| * |
| * This is an internal class that is only used by Transliterator. |
| * Transliterator maintains one static instance of this class and |
| * delegates all registry-related operations to it. |
| * |
| * @author Alan Liu |
| */ |
| class TransliteratorRegistry : public UMemory { |
| |
| public: |
| |
| /** |
| * Contructor |
| * @param status Output param set to success/failure code. |
| */ |
| TransliteratorRegistry(UErrorCode& status); |
| |
| /** |
| * Nonvirtual destructor -- this class is not subclassable. |
| */ |
| ~TransliteratorRegistry(); |
| |
| //------------------------------------------------------------------ |
| // Basic public API |
| //------------------------------------------------------------------ |
| |
| /** |
| * Given a simple ID (forward direction, no inline filter, not |
| * compound) attempt to instantiate it from the registry. Return |
| * 0 on failure. |
| * |
| * Return a non-NULL aliasReturn value if the ID points to an alias. |
| * We cannot instantiate it ourselves because the alias may contain |
| * filters or compounds, which we do not understand. Caller should |
| * make aliasReturn NULL before calling. |
| * @param ID the given ID |
| * @param aliasReturn output param to receive TransliteratorAlias; |
| * should be NULL on entry |
| * @param parseError Struct to recieve information on position |
| * of error if an error is encountered |
| * @param status Output param set to success/failure code. |
| */ |
| Transliterator* get(const UnicodeString& ID, |
| TransliteratorAlias*& aliasReturn, |
| UErrorCode& status); |
| |
| /** |
| * The caller must call this after calling get(), if [a] calling get() |
| * returns an alias, and [b] the alias is rule based. In that |
| * situation the caller must call alias->parse() to do the parsing |
| * OUTSIDE THE REGISTRY MUTEX, then call this method to retry |
| * instantiating the transliterator. |
| * |
| * Note: Another alias might be returned by this method. |
| * |
| * This method (like all public methods of this class) must be called |
| * from within the TransliteratorRegistry mutex. |
| * |
| * @param aliasReturn output param to receive TransliteratorAlias; |
| * should be NULL on entry |
| */ |
| Transliterator* reget(const UnicodeString& ID, |
| TransliteratorParser& parser, |
| TransliteratorAlias*& aliasReturn, |
| UErrorCode& status); |
| |
| /** |
| * Register a prototype (adopted). This adds an entry to the |
| * dynamic store, or replaces an existing entry. Any entry in the |
| * underlying static locale resource store is masked. |
| */ |
| void put(Transliterator* adoptedProto, |
| UBool visible, |
| UErrorCode& ec); |
| |
| /** |
| * Register an ID and a factory function pointer. This adds an |
| * entry to the dynamic store, or replaces an existing entry. Any |
| * entry in the underlying static locale resource store is masked. |
| */ |
| void put(const UnicodeString& ID, |
| Transliterator::Factory factory, |
| Transliterator::Token context, |
| UBool visible, |
| UErrorCode& ec); |
| |
| /** |
| * Register an ID and a resource name. This adds an entry to the |
| * dynamic store, or replaces an existing entry. Any entry in the |
| * underlying static locale resource store is masked. |
| */ |
| void put(const UnicodeString& ID, |
| const UnicodeString& resourceName, |
| UTransDirection dir, |
| UBool readonlyResourceAlias, |
| UBool visible, |
| UErrorCode& ec); |
| |
| /** |
| * Register an ID and an alias ID. This adds an entry to the |
| * dynamic store, or replaces an existing entry. Any entry in the |
| * underlying static locale resource store is masked. |
| */ |
| void put(const UnicodeString& ID, |
| const UnicodeString& alias, |
| UBool readonlyAliasAlias, |
| UBool visible, |
| UErrorCode& ec); |
| |
| /** |
| * Unregister an ID. This removes an entry from the dynamic store |
| * if there is one. The static locale resource store is |
| * unaffected. |
| * @param ID the given ID. |
| */ |
| void remove(const UnicodeString& ID); |
| |
| //------------------------------------------------------------------ |
| // Public ID and spec management |
| //------------------------------------------------------------------ |
| |
| /** |
| * Return a StringEnumeration over the IDs currently registered |
| * with the system. |
| * @internal |
| */ |
| StringEnumeration* getAvailableIDs() const; |
| |
| /** |
| * == OBSOLETE - remove in ICU 3.4 == |
| * Return the number of IDs currently registered with the system. |
| * To retrieve the actual IDs, call getAvailableID(i) with |
| * i from 0 to countAvailableIDs() - 1. |
| * @return the number of IDs currently registered with the system. |
| * @internal |
| */ |
| int32_t countAvailableIDs(void) const; |
| |
| /** |
| * == OBSOLETE - remove in ICU 3.4 == |
| * Return the index-th available ID. index must be between 0 |
| * and countAvailableIDs() - 1, inclusive. If index is out of |
| * range, the result of getAvailableID(0) is returned. |
| * @param index the given index. |
| * @return the index-th available ID. index must be between 0 |
| * and countAvailableIDs() - 1, inclusive. If index is out of |
| * range, the result of getAvailableID(0) is returned. |
| * @internal |
| */ |
| const UnicodeString& getAvailableID(int32_t index) const; |
| |
| /** |
| * Return the number of registered source specifiers. |
| * @return the number of registered source specifiers. |
| */ |
| int32_t countAvailableSources(void) const; |
| |
| /** |
| * Return a registered source specifier. |
| * @param index which specifier to return, from 0 to n-1, where |
| * n = countAvailableSources() |
| * @param result fill-in paramter to receive the source specifier. |
| * If index is out of range, result will be empty. |
| * @return reference to result |
| */ |
| UnicodeString& getAvailableSource(int32_t index, |
| UnicodeString& result) const; |
| |
| /** |
| * Return the number of registered target specifiers for a given |
| * source specifier. |
| * @param source the given source specifier. |
| * @return the number of registered target specifiers for a given |
| * source specifier. |
| */ |
| int32_t countAvailableTargets(const UnicodeString& source) const; |
| |
| /** |
| * Return a registered target specifier for a given source. |
| * @param index which specifier to return, from 0 to n-1, where |
| * n = countAvailableTargets(source) |
| * @param source the source specifier |
| * @param result fill-in paramter to receive the target specifier. |
| * If source is invalid or if index is out of range, result will |
| * be empty. |
| * @return reference to result |
| */ |
| UnicodeString& getAvailableTarget(int32_t index, |
| const UnicodeString& source, |
| UnicodeString& result) const; |
| |
| /** |
| * Return the number of registered variant specifiers for a given |
| * source-target pair. There is always at least one variant: If |
| * just source-target is registered, then the single variant |
| * NO_VARIANT is returned. If source-target/variant is registered |
| * then that variant is returned. |
| * @param source the source specifiers |
| * @param target the target specifiers |
| * @return the number of registered variant specifiers for a given |
| * source-target pair. |
| */ |
| int32_t countAvailableVariants(const UnicodeString& source, |
| const UnicodeString& target) const; |
| |
| /** |
| * Return a registered variant specifier for a given source-target |
| * pair. If NO_VARIANT is one of the variants, then it will be |
| * at index 0. |
| * @param index which specifier to return, from 0 to n-1, where |
| * n = countAvailableVariants(source, target) |
| * @param source the source specifier |
| * @param target the target specifier |
| * @param result fill-in paramter to receive the variant |
| * specifier. If source is invalid or if target is invalid or if |
| * index is out of range, result will be empty. |
| * @return reference to result |
| */ |
| UnicodeString& getAvailableVariant(int32_t index, |
| const UnicodeString& source, |
| const UnicodeString& target, |
| UnicodeString& result) const; |
| |
| private: |
| |
| //---------------------------------------------------------------- |
| // Private implementation |
| //---------------------------------------------------------------- |
| |
| TransliteratorEntry* find(const UnicodeString& ID); |
| |
| TransliteratorEntry* find(UnicodeString& source, |
| UnicodeString& target, |
| UnicodeString& variant); |
| |
| TransliteratorEntry* findInDynamicStore(const TransliteratorSpec& src, |
| const TransliteratorSpec& trg, |
| const UnicodeString& variant) const; |
| |
| TransliteratorEntry* findInStaticStore(const TransliteratorSpec& src, |
| const TransliteratorSpec& trg, |
| const UnicodeString& variant); |
| |
| static TransliteratorEntry* findInBundle(const TransliteratorSpec& specToOpen, |
| const TransliteratorSpec& specToFind, |
| const UnicodeString& variant, |
| UTransDirection direction); |
| |
| void registerEntry(const UnicodeString& source, |
| const UnicodeString& target, |
| const UnicodeString& variant, |
| TransliteratorEntry* adopted, |
| UBool visible); |
| |
| void registerEntry(const UnicodeString& ID, |
| TransliteratorEntry* adopted, |
| UBool visible); |
| |
| void registerEntry(const UnicodeString& ID, |
| const UnicodeString& source, |
| const UnicodeString& target, |
| const UnicodeString& variant, |
| TransliteratorEntry* adopted, |
| UBool visible); |
| |
| void registerSTV(const UnicodeString& source, |
| const UnicodeString& target, |
| const UnicodeString& variant); |
| |
| void removeSTV(const UnicodeString& source, |
| const UnicodeString& target, |
| const UnicodeString& variant); |
| |
| Transliterator* instantiateEntry(const UnicodeString& ID, |
| TransliteratorEntry *entry, |
| TransliteratorAlias*& aliasReturn, |
| UErrorCode& status); |
| |
| /** |
| * A StringEnumeration over the registered IDs in this object. |
| */ |
| class Enumeration : public StringEnumeration { |
| public: |
| Enumeration(const TransliteratorRegistry& reg); |
| virtual ~Enumeration(); |
| virtual int32_t count(UErrorCode& status) const; |
| virtual const UnicodeString* snext(UErrorCode& status); |
| virtual void reset(UErrorCode& status); |
| static UClassID U_EXPORT2 getStaticClassID(); |
| virtual UClassID getDynamicClassID() const; |
| private: |
| int32_t index; |
| const TransliteratorRegistry& reg; |
| }; |
| friend class Enumeration; |
| |
| private: |
| |
| /** |
| * Dynamic registry mapping full IDs to Entry objects. This |
| * contains both public and internal entities. The visibility is |
| * controlled by whether an entry is listed in availableIDs and |
| * specDAG or not. |
| */ |
| Hashtable registry; |
| |
| /** |
| * DAG of visible IDs by spec. Hashtable: source => (Hashtable: |
| * target => variant bitmask) |
| */ |
| Hashtable specDAG; |
| |
| /** |
| * Vector of all variant names |
| */ |
| UVector variantList; |
| |
| /** |
| * Vector of public full IDs. |
| */ |
| UVector availableIDs; |
| |
| TransliteratorRegistry(const TransliteratorRegistry &other); // forbid copying of this class |
| TransliteratorRegistry &operator=(const TransliteratorRegistry &other); // forbid copying of this class |
| }; |
| |
| U_NAMESPACE_END |
| |
| U_CFUNC UBool utrans_transliterator_cleanup(void); |
| |
| #endif /* #if !UCONFIG_NO_TRANSLITERATION */ |
| |
| #endif |
| //eof |