blob: 35ae3fbb4baf2c4454ff9e29fa321e6299319a93 [file] [log] [blame]
Victor Chang73229502020-09-17 13:39:19 +01001// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
3/*
4**********************************************************************
5* Copyright (C) 1999-2007, International Business Machines Corporation
6* and others. All Rights Reserved.
7**********************************************************************
8* Date Name Description
9* 11/17/99 aliu Creation.
10**********************************************************************
11*/
12#ifndef RBT_SET_H
13#define RBT_SET_H
14
15#include "unicode/utypes.h"
16
17#if !UCONFIG_NO_TRANSLITERATION
18
19#include "unicode/uobject.h"
20#include "unicode/utrans.h"
21#include "uvector.h"
22
23U_NAMESPACE_BEGIN
24
25class Replaceable;
26class TransliterationRule;
27class TransliterationRuleData;
28class UnicodeFilter;
29class UnicodeString;
30class UnicodeSet;
31
32/**
33 * A set of rules for a <code>RuleBasedTransliterator</code>.
34 * @author Alan Liu
35 */
36class TransliterationRuleSet : public UMemory {
37 /**
38 * Vector of rules, in the order added. This is used while the
39 * rule set is getting built. After that, freeze() reorders and
40 * indexes the rules into rules[]. Any given rule is stored once
41 * in ruleVector, and one or more times in rules[]. ruleVector
42 * owns and deletes the rules.
43 */
44 UVector* ruleVector;
45
46 /**
47 * Sorted and indexed table of rules. This is created by freeze()
48 * from the rules in ruleVector. It contains alias pointers to
49 * the rules in ruleVector. It is zero before freeze() is called
50 * and non-zero thereafter.
51 */
52 TransliterationRule** rules;
53
54 /**
55 * Index table. For text having a first character c, compute x = c&0xFF.
56 * Now use rules[index[x]..index[x+1]-1]. This index table is created by
57 * freeze(). Before freeze() is called it contains garbage.
58 */
59 int32_t index[257];
60
61 /**
62 * Length of the longest preceding context
63 */
64 int32_t maxContextLength;
65
66public:
67
68 /**
69 * Construct a new empty rule set.
70 * @param status Output parameter filled in with success or failure status.
71 */
72 TransliterationRuleSet(UErrorCode& status);
73
74 /**
75 * Copy constructor.
76 */
77 TransliterationRuleSet(const TransliterationRuleSet&);
78
79 /**
80 * Destructor.
81 */
82 virtual ~TransliterationRuleSet();
83
84 /**
85 * Change the data object that this rule belongs to. Used
86 * internally by the TransliterationRuleData copy constructor.
87 * @param data the new data value to be set.
88 */
89 void setData(const TransliterationRuleData* data);
90
91 /**
92 * Return the maximum context length.
93 * @return the length of the longest preceding context.
94 */
95 virtual int32_t getMaximumContextLength(void) const;
96
97 /**
98 * Add a rule to this set. Rules are added in order, and order is
99 * significant. The last call to this method must be followed by
100 * a call to <code>freeze()</code> before the rule set is used.
101 * This method must <em>not</em> be called after freeze() has been
102 * called.
103 *
104 * @param adoptedRule the rule to add
105 */
106 virtual void addRule(TransliterationRule* adoptedRule,
107 UErrorCode& status);
108
109 /**
110 * Check this for masked rules and index it to optimize performance.
111 * The sequence of operations is: (1) add rules to a set using
112 * <code>addRule()</code>; (2) freeze the set using
113 * <code>freeze()</code>; (3) use the rule set. If
114 * <code>addRule()</code> is called after calling this method, it
115 * invalidates this object, and this method must be called again.
116 * That is, <code>freeze()</code> may be called multiple times,
117 * although for optimal performance it shouldn't be.
118 * @param parseError A pointer to UParseError to receive information about errors
119 * occurred.
120 * @param status Output parameter filled in with success or failure status.
121 */
122 virtual void freeze(UParseError& parseError, UErrorCode& status);
123
124 /**
125 * Transliterate the given text with the given UTransPosition
Victor Changce4bf3c2021-01-19 16:34:24 +0000126 * indices. Return true if the transliteration should continue
127 * or false if it should halt (because of a U_PARTIAL_MATCH match).
128 * Note that false is only ever returned if isIncremental is true.
Victor Chang73229502020-09-17 13:39:19 +0100129 * @param text the text to be transliterated
130 * @param index the position indices, which will be updated
Victor Changce4bf3c2021-01-19 16:34:24 +0000131 * @param isIncremental if true, assume new text may be inserted
132 * at index.limit, and return false if thre is a partial match.
133 * @return true unless a U_PARTIAL_MATCH has been obtained,
Victor Chang73229502020-09-17 13:39:19 +0100134 * indicating that transliteration should stop until more text
135 * arrives.
136 */
137 UBool transliterate(Replaceable& text,
138 UTransPosition& index,
139 UBool isIncremental);
140
141 /**
142 * Create rule strings that represents this rule set.
143 * @param result string to receive the rule strings. Current
144 * contents will be deleted.
145 * @param escapeUnprintable True, will escape the unprintable characters
146 * @return A reference to 'result'.
147 */
148 virtual UnicodeString& toRules(UnicodeString& result,
149 UBool escapeUnprintable) const;
150
151 /**
152 * Return the set of all characters that may be modified
153 * (getTarget=false) or emitted (getTarget=true) by this set.
154 */
155 UnicodeSet& getSourceTargetSet(UnicodeSet& result,
156 UBool getTarget) const;
157
158private:
159
160 TransliterationRuleSet &operator=(const TransliterationRuleSet &other); // forbid copying of this class
161};
162
163U_NAMESPACE_END
164
165#endif /* #if !UCONFIG_NO_TRANSLITERATION */
166
167#endif