blob: 04ed3fb501059c89bf11072aac32d2995d14a3d3 [file] [log] [blame]
Victor Chang73229502020-09-17 13:39:19 +01001// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
3/*
4**********************************************************************
5* Copyright (c) 2001-2014, International Business Machines
6* Corporation and others. All Rights Reserved.
7**********************************************************************
8* Date Name Description
9* 08/10/2001 aliu Creation.
10**********************************************************************
11*/
12#ifndef _TRANSREG_H
13#define _TRANSREG_H
14
15#include "unicode/utypes.h"
16
17#if !UCONFIG_NO_TRANSLITERATION
18
19#include "unicode/uobject.h"
20#include "unicode/translit.h"
21#include "hash.h"
22#include "uvector.h"
23
24U_NAMESPACE_BEGIN
25
26class TransliteratorEntry;
27class TransliteratorSpec;
28class UnicodeString;
29
30//------------------------------------------------------------------
31// TransliteratorAlias
32//------------------------------------------------------------------
33
34/**
35 * A TransliteratorAlias object is returned by get() if the given ID
36 * actually translates into something else. The caller then invokes
37 * the create() method on the alias to create the actual
38 * transliterator, and deletes the alias.
39 *
40 * Why all the shenanigans? To prevent circular calls between
41 * the registry code and the transliterator code that deadlocks.
42 */
43class TransliteratorAlias : public UMemory {
44 public:
45 /**
46 * Construct a simple alias (type == SIMPLE)
47 * @param aliasID the given id.
48 */
49 TransliteratorAlias(const UnicodeString& aliasID, const UnicodeSet* compoundFilter);
50
51 /**
52 * Construct a compound RBT alias (type == COMPOUND)
53 */
54 TransliteratorAlias(const UnicodeString& ID, const UnicodeString& idBlocks,
55 UVector* adoptedTransliterators,
56 const UnicodeSet* compoundFilter);
57
58 /**
59 * Construct a rules alias (type = RULES)
60 */
61 TransliteratorAlias(const UnicodeString& theID,
62 const UnicodeString& rules,
63 UTransDirection dir);
64
65 ~TransliteratorAlias();
66
67 /**
68 * The whole point of create() is that the caller must invoke
69 * it when the registry mutex is NOT held, to prevent deadlock.
70 * It may only be called once.
71 *
Victor Changce4bf3c2021-01-19 16:34:24 +000072 * Note: Only call create() if isRuleBased() returns false.
Victor Chang73229502020-09-17 13:39:19 +010073 *
74 * This method must be called *outside* of the TransliteratorRegistry
75 * mutex.
76 */
77 Transliterator* create(UParseError&, UErrorCode&);
78
79 /**
Victor Changce4bf3c2021-01-19 16:34:24 +000080 * Return true if this alias is rule-based. If so, the caller
Victor Chang73229502020-09-17 13:39:19 +010081 * must call parse() on it, then call TransliteratorRegistry::reget().
82 */
83 UBool isRuleBased() const;
84
85 /**
Victor Changce4bf3c2021-01-19 16:34:24 +000086 * If isRuleBased() returns true, then the caller must call this
Victor Chang73229502020-09-17 13:39:19 +010087 * method, followed by TransliteratorRegistry::reget(). The latter
88 * method must be called inside the TransliteratorRegistry mutex.
89 *
Victor Changce4bf3c2021-01-19 16:34:24 +000090 * Note: Only call parse() if isRuleBased() returns true.
Victor Chang73229502020-09-17 13:39:19 +010091 *
92 * This method must be called *outside* of the TransliteratorRegistry
93 * mutex, because it can instantiate Transliterators embedded in
94 * the rules via the "&Latin-Arabic()" syntax.
95 */
96 void parse(TransliteratorParser& parser,
97 UParseError& pe, UErrorCode& ec) const;
98
99 private:
100 // We actually come in three flavors:
101 // 1. Simple alias
102 // Here aliasID is the alias string. Everything else is
103 // null, zero, empty.
104 // 2. CompoundRBT
105 // Here ID is the ID, aliasID is the idBlock, trans is the
106 // contained RBT, and idSplitPoint is the offet in aliasID
107 // where the contained RBT goes. compoundFilter is the
108 // compound filter, and it is _not_ owned.
109 // 3. Rules
110 // Here ID is the ID, aliasID is the rules string.
111 // idSplitPoint is the UTransDirection.
112 UnicodeString ID;
113 UnicodeString aliasesOrRules;
114 UVector* transes; // owned
115 const UnicodeSet* compoundFilter; // alias
116 UTransDirection direction;
117 enum { SIMPLE, COMPOUND, RULES } type;
118
119 TransliteratorAlias(const TransliteratorAlias &other); // forbid copying of this class
120 TransliteratorAlias &operator=(const TransliteratorAlias &other); // forbid copying of this class
121};
122
123
124/**
125 * A registry of system transliterators. This is the data structure
126 * that implements the mapping between transliterator IDs and the data
127 * or function pointers used to create the corresponding
128 * transliterators. There is one instance of the registry that is
129 * created statically.
130 *
131 * The registry consists of a dynamic component -- a hashtable -- and
132 * a static component -- locale resource bundles. The dynamic store
133 * is semantically overlaid on the static store, so the static mapping
134 * can be dynamically overridden.
135 *
136 * This is an internal class that is only used by Transliterator.
137 * Transliterator maintains one static instance of this class and
138 * delegates all registry-related operations to it.
139 *
140 * @author Alan Liu
141 */
142class TransliteratorRegistry : public UMemory {
143
144 public:
145
146 /**
147 * Contructor
148 * @param status Output param set to success/failure code.
149 */
150 TransliteratorRegistry(UErrorCode& status);
151
152 /**
153 * Nonvirtual destructor -- this class is not subclassable.
154 */
155 ~TransliteratorRegistry();
156
157 //------------------------------------------------------------------
158 // Basic public API
159 //------------------------------------------------------------------
160
161 /**
162 * Given a simple ID (forward direction, no inline filter, not
163 * compound) attempt to instantiate it from the registry. Return
164 * 0 on failure.
165 *
166 * Return a non-NULL aliasReturn value if the ID points to an alias.
167 * We cannot instantiate it ourselves because the alias may contain
168 * filters or compounds, which we do not understand. Caller should
169 * make aliasReturn NULL before calling.
170 * @param ID the given ID
171 * @param aliasReturn output param to receive TransliteratorAlias;
172 * should be NULL on entry
173 * @param parseError Struct to recieve information on position
174 * of error if an error is encountered
175 * @param status Output param set to success/failure code.
176 */
177 Transliterator* get(const UnicodeString& ID,
178 TransliteratorAlias*& aliasReturn,
179 UErrorCode& status);
180
181 /**
182 * The caller must call this after calling get(), if [a] calling get()
183 * returns an alias, and [b] the alias is rule based. In that
184 * situation the caller must call alias->parse() to do the parsing
185 * OUTSIDE THE REGISTRY MUTEX, then call this method to retry
186 * instantiating the transliterator.
187 *
188 * Note: Another alias might be returned by this method.
189 *
190 * This method (like all public methods of this class) must be called
191 * from within the TransliteratorRegistry mutex.
192 *
193 * @param aliasReturn output param to receive TransliteratorAlias;
194 * should be NULL on entry
195 */
196 Transliterator* reget(const UnicodeString& ID,
197 TransliteratorParser& parser,
198 TransliteratorAlias*& aliasReturn,
199 UErrorCode& status);
200
201 /**
202 * Register a prototype (adopted). This adds an entry to the
203 * dynamic store, or replaces an existing entry. Any entry in the
204 * underlying static locale resource store is masked.
205 */
206 void put(Transliterator* adoptedProto,
207 UBool visible,
208 UErrorCode& ec);
209
210 /**
211 * Register an ID and a factory function pointer. This adds an
212 * entry to the dynamic store, or replaces an existing entry. Any
213 * entry in the underlying static locale resource store is masked.
214 */
215 void put(const UnicodeString& ID,
216 Transliterator::Factory factory,
217 Transliterator::Token context,
218 UBool visible,
219 UErrorCode& ec);
220
221 /**
222 * Register an ID and a resource name. This adds an entry to the
223 * dynamic store, or replaces an existing entry. Any entry in the
224 * underlying static locale resource store is masked.
225 */
226 void put(const UnicodeString& ID,
227 const UnicodeString& resourceName,
228 UTransDirection dir,
229 UBool readonlyResourceAlias,
230 UBool visible,
231 UErrorCode& ec);
232
233 /**
234 * Register an ID and an alias ID. This adds an entry to the
235 * dynamic store, or replaces an existing entry. Any entry in the
236 * underlying static locale resource store is masked.
237 */
238 void put(const UnicodeString& ID,
239 const UnicodeString& alias,
240 UBool readonlyAliasAlias,
241 UBool visible,
242 UErrorCode& ec);
243
244 /**
245 * Unregister an ID. This removes an entry from the dynamic store
246 * if there is one. The static locale resource store is
247 * unaffected.
248 * @param ID the given ID.
249 */
250 void remove(const UnicodeString& ID);
251
252 //------------------------------------------------------------------
253 // Public ID and spec management
254 //------------------------------------------------------------------
255
256 /**
257 * Return a StringEnumeration over the IDs currently registered
258 * with the system.
259 * @internal
260 */
261 StringEnumeration* getAvailableIDs() const;
262
263 /**
264 * == OBSOLETE - remove in ICU 3.4 ==
265 * Return the number of IDs currently registered with the system.
266 * To retrieve the actual IDs, call getAvailableID(i) with
267 * i from 0 to countAvailableIDs() - 1.
268 * @return the number of IDs currently registered with the system.
269 * @internal
270 */
271 int32_t countAvailableIDs(void) const;
272
273 /**
274 * == OBSOLETE - remove in ICU 3.4 ==
275 * Return the index-th available ID. index must be between 0
276 * and countAvailableIDs() - 1, inclusive. If index is out of
277 * range, the result of getAvailableID(0) is returned.
278 * @param index the given index.
279 * @return the index-th available ID. index must be between 0
280 * and countAvailableIDs() - 1, inclusive. If index is out of
281 * range, the result of getAvailableID(0) is returned.
282 * @internal
283 */
284 const UnicodeString& getAvailableID(int32_t index) const;
285
286 /**
287 * Return the number of registered source specifiers.
288 * @return the number of registered source specifiers.
289 */
290 int32_t countAvailableSources(void) const;
291
292 /**
293 * Return a registered source specifier.
294 * @param index which specifier to return, from 0 to n-1, where
295 * n = countAvailableSources()
296 * @param result fill-in paramter to receive the source specifier.
297 * If index is out of range, result will be empty.
298 * @return reference to result
299 */
300 UnicodeString& getAvailableSource(int32_t index,
301 UnicodeString& result) const;
302
303 /**
304 * Return the number of registered target specifiers for a given
305 * source specifier.
306 * @param source the given source specifier.
307 * @return the number of registered target specifiers for a given
308 * source specifier.
309 */
310 int32_t countAvailableTargets(const UnicodeString& source) const;
311
312 /**
313 * Return a registered target specifier for a given source.
314 * @param index which specifier to return, from 0 to n-1, where
315 * n = countAvailableTargets(source)
316 * @param source the source specifier
317 * @param result fill-in paramter to receive the target specifier.
318 * If source is invalid or if index is out of range, result will
319 * be empty.
320 * @return reference to result
321 */
322 UnicodeString& getAvailableTarget(int32_t index,
323 const UnicodeString& source,
324 UnicodeString& result) const;
325
326 /**
327 * Return the number of registered variant specifiers for a given
328 * source-target pair. There is always at least one variant: If
329 * just source-target is registered, then the single variant
330 * NO_VARIANT is returned. If source-target/variant is registered
331 * then that variant is returned.
332 * @param source the source specifiers
333 * @param target the target specifiers
334 * @return the number of registered variant specifiers for a given
335 * source-target pair.
336 */
337 int32_t countAvailableVariants(const UnicodeString& source,
338 const UnicodeString& target) const;
339
340 /**
341 * Return a registered variant specifier for a given source-target
342 * pair. If NO_VARIANT is one of the variants, then it will be
343 * at index 0.
344 * @param index which specifier to return, from 0 to n-1, where
345 * n = countAvailableVariants(source, target)
346 * @param source the source specifier
347 * @param target the target specifier
348 * @param result fill-in paramter to receive the variant
349 * specifier. If source is invalid or if target is invalid or if
350 * index is out of range, result will be empty.
351 * @return reference to result
352 */
353 UnicodeString& getAvailableVariant(int32_t index,
354 const UnicodeString& source,
355 const UnicodeString& target,
356 UnicodeString& result) const;
357
358 private:
359
360 //----------------------------------------------------------------
361 // Private implementation
362 //----------------------------------------------------------------
363
364 TransliteratorEntry* find(const UnicodeString& ID);
365
366 TransliteratorEntry* find(UnicodeString& source,
367 UnicodeString& target,
368 UnicodeString& variant);
369
370 TransliteratorEntry* findInDynamicStore(const TransliteratorSpec& src,
371 const TransliteratorSpec& trg,
372 const UnicodeString& variant) const;
373
374 TransliteratorEntry* findInStaticStore(const TransliteratorSpec& src,
375 const TransliteratorSpec& trg,
376 const UnicodeString& variant);
377
378 static TransliteratorEntry* findInBundle(const TransliteratorSpec& specToOpen,
379 const TransliteratorSpec& specToFind,
380 const UnicodeString& variant,
381 UTransDirection direction);
382
383 void registerEntry(const UnicodeString& source,
384 const UnicodeString& target,
385 const UnicodeString& variant,
386 TransliteratorEntry* adopted,
387 UBool visible);
388
389 void registerEntry(const UnicodeString& ID,
390 TransliteratorEntry* adopted,
391 UBool visible);
392
393 void registerEntry(const UnicodeString& ID,
394 const UnicodeString& source,
395 const UnicodeString& target,
396 const UnicodeString& variant,
397 TransliteratorEntry* adopted,
398 UBool visible);
399
400 void registerSTV(const UnicodeString& source,
401 const UnicodeString& target,
402 const UnicodeString& variant);
403
404 void removeSTV(const UnicodeString& source,
405 const UnicodeString& target,
406 const UnicodeString& variant);
407
408 Transliterator* instantiateEntry(const UnicodeString& ID,
409 TransliteratorEntry *entry,
410 TransliteratorAlias*& aliasReturn,
411 UErrorCode& status);
412
413 /**
414 * A StringEnumeration over the registered IDs in this object.
415 */
416 class Enumeration : public StringEnumeration {
417 public:
418 Enumeration(const TransliteratorRegistry& reg);
419 virtual ~Enumeration();
420 virtual int32_t count(UErrorCode& status) const;
421 virtual const UnicodeString* snext(UErrorCode& status);
422 virtual void reset(UErrorCode& status);
423 static UClassID U_EXPORT2 getStaticClassID();
424 virtual UClassID getDynamicClassID() const;
425 private:
426 int32_t index;
427 const TransliteratorRegistry& reg;
428 };
429 friend class Enumeration;
430
431 private:
432
433 /**
434 * Dynamic registry mapping full IDs to Entry objects. This
435 * contains both public and internal entities. The visibility is
436 * controlled by whether an entry is listed in availableIDs and
437 * specDAG or not.
438 */
439 Hashtable registry;
440
441 /**
442 * DAG of visible IDs by spec. Hashtable: source => (Hashtable:
443 * target => variant bitmask)
444 */
445 Hashtable specDAG;
446
447 /**
448 * Vector of all variant names
449 */
450 UVector variantList;
451
452 /**
453 * Vector of public full IDs.
454 */
455 UVector availableIDs;
456
457 TransliteratorRegistry(const TransliteratorRegistry &other); // forbid copying of this class
458 TransliteratorRegistry &operator=(const TransliteratorRegistry &other); // forbid copying of this class
459};
460
461U_NAMESPACE_END
462
463U_CFUNC UBool utrans_transliterator_cleanup(void);
464
465#endif /* #if !UCONFIG_NO_TRANSLITERATION */
466
467#endif
468//eof