blob: 1b69e204b85f16638c13320921770a3e40533048 [file] [log] [blame]
J. Duke319a3b92007-12-01 00:00:00 +00001/*
2 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
3 *
4 * This code is free software; you can redistribute it and/or modify it
5 * under the terms of the GNU General Public License version 2 only, as
6 * published by the Free Software Foundation. Sun designates this
7 * particular file as subject to the "Classpath" exception as provided
8 * by Sun in the LICENSE file that accompanied this code.
9 *
10 * This code is distributed in the hope that it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
13 * version 2 for more details (a copy is included in the LICENSE file that
14 * accompanied this code).
15 *
16 * You should have received a copy of the GNU General Public License version
17 * 2 along with this work; if not, write to the Free Software Foundation,
18 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
19 *
20 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
21 * CA 95054 USA or visit www.sun.com if you need additional information or
22 * have any questions.
23 *
24 */
25
26/*
27 *
28 * (C) Copyright IBM Corp. 1998-2005 - All Rights Reserved
29 *
30 */
31
32#ifndef __INDICREORDERING_H
33#define __INDICREORDERING_H
34
35#include "LETypes.h"
36#include "OpenTypeTables.h"
37
38// Characters that get refered to by name...
39#define C_SIGN_ZWNJ 0x200C
40#define C_SIGN_ZWJ 0x200D
41
42// Character class values
43#define CC_RESERVED 0U
44#define CC_VOWEL_MODIFIER 1U
45#define CC_STRESS_MARK 2U
46#define CC_INDEPENDENT_VOWEL 3U
47#define CC_INDEPENDENT_VOWEL_2 4U
48#define CC_INDEPENDENT_VOWEL_3 5U
49#define CC_CONSONANT 6U
50#define CC_CONSONANT_WITH_NUKTA 7U
51#define CC_NUKTA 8U
52#define CC_DEPENDENT_VOWEL 9U
53#define CC_SPLIT_VOWEL_PIECE_1 10U
54#define CC_SPLIT_VOWEL_PIECE_2 11U
55#define CC_SPLIT_VOWEL_PIECE_3 12U
56#define CC_VIRAMA 13U
57#define CC_ZERO_WIDTH_MARK 14U
58#define CC_COUNT 15U
59
60// Character class flags
61#define CF_CLASS_MASK 0x0000FFFFU
62
63#define CF_CONSONANT 0x80000000U
64
65#define CF_REPH 0x40000000U
66#define CF_VATTU 0x20000000U
67#define CF_BELOW_BASE 0x10000000U
68#define CF_POST_BASE 0x08000000U
69#define CF_LENGTH_MARK 0x04000000U
70
71#define CF_POS_BEFORE 0x00300000U
72#define CF_POS_BELOW 0x00200000U
73#define CF_POS_ABOVE 0x00100000U
74#define CF_POS_AFTER 0x00000000U
75#define CF_POS_MASK 0x00300000U
76
77#define CF_INDEX_MASK 0x000F0000U
78#define CF_INDEX_SHIFT 16
79
80// Script flag bits
81#define SF_MATRAS_AFTER_BASE 0x80000000U
82#define SF_REPH_AFTER_BELOW 0x40000000U
83#define SF_EYELASH_RA 0x20000000U
84#define SF_MPRE_FIXUP 0x10000000U
85
86#define SF_POST_BASE_LIMIT_MASK 0x0000FFFFU
87#define SF_NO_POST_BASE_LIMIT 0x00007FFFU
88
89typedef LEUnicode SplitMatra[3];
90
91class MPreFixups;
92class LEGlyphStorage;
93
94struct IndicClassTable
95{
96 typedef le_uint32 CharClass;
97 typedef le_uint32 ScriptFlags;
98
99 LEUnicode firstChar;
100 LEUnicode lastChar;
101 le_int32 worstCaseExpansion;
102 ScriptFlags scriptFlags;
103 const CharClass *classTable;
104 const SplitMatra *splitMatraTable;
105
106 inline le_int32 getWorstCaseExpansion() const;
107
108 CharClass getCharClass(LEUnicode ch) const;
109
110 inline const SplitMatra *getSplitMatra(CharClass charClass) const;
111
112 inline le_bool isVowelModifier(LEUnicode ch) const;
113 inline le_bool isStressMark(LEUnicode ch) const;
114 inline le_bool isConsonant(LEUnicode ch) const;
115 inline le_bool isReph(LEUnicode ch) const;
116 inline le_bool isVirama(LEUnicode ch) const;
117 inline le_bool isNukta(LEUnicode ch) const;
118 inline le_bool isVattu(LEUnicode ch) const;
119 inline le_bool isMatra(LEUnicode ch) const;
120 inline le_bool isSplitMatra(LEUnicode ch) const;
121 inline le_bool isLengthMark(LEUnicode ch) const;
122 inline le_bool hasPostOrBelowBaseForm(LEUnicode ch) const;
123 inline le_bool hasPostBaseForm(LEUnicode ch) const;
124 inline le_bool hasBelowBaseForm(LEUnicode ch) const;
125
126 inline static le_bool isVowelModifier(CharClass charClass);
127 inline static le_bool isStressMark(CharClass charClass);
128 inline static le_bool isConsonant(CharClass charClass);
129 inline static le_bool isReph(CharClass charClass);
130 inline static le_bool isVirama(CharClass charClass);
131 inline static le_bool isNukta(CharClass charClass);
132 inline static le_bool isVattu(CharClass charClass);
133 inline static le_bool isMatra(CharClass charClass);
134 inline static le_bool isSplitMatra(CharClass charClass);
135 inline static le_bool isLengthMark(CharClass charClass);
136 inline static le_bool hasPostOrBelowBaseForm(CharClass charClass);
137 inline static le_bool hasPostBaseForm(CharClass charClass);
138 inline static le_bool hasBelowBaseForm(CharClass charClass);
139
140 static const IndicClassTable *getScriptClassTable(le_int32 scriptCode);
141};
142
143class IndicReordering {
144public:
145 static le_int32 getWorstCaseExpansion(le_int32 scriptCode);
146
147 static le_int32 reorder(const LEUnicode *theChars, le_int32 charCount, le_int32 scriptCode,
148 LEUnicode *outChars, LEGlyphStorage &glyphStorage,
149 MPreFixups **outMPreFixups);
150
151 static void adjustMPres(MPreFixups *mpreFixups, LEGlyphStorage &glyphStorage);
152
153 static const FeatureMap *getFeatureMap(le_int32 &count);
154
155private:
156 // do not instantiate
157 IndicReordering();
158
159 static le_int32 findSyllable(const IndicClassTable *classTable, const LEUnicode *chars,
160 le_int32 prev, le_int32 charCount);
161
162};
163
164inline le_int32 IndicClassTable::getWorstCaseExpansion() const
165{
166 return worstCaseExpansion;
167}
168
169inline const SplitMatra *IndicClassTable::getSplitMatra(CharClass charClass) const
170{
171 le_int32 index = (charClass & CF_INDEX_MASK) >> CF_INDEX_SHIFT;
172
173 return &splitMatraTable[index - 1];
174}
175
176inline le_bool IndicClassTable::isVowelModifier(CharClass charClass)
177{
178 return (charClass & CF_CLASS_MASK) == CC_VOWEL_MODIFIER;
179}
180
181inline le_bool IndicClassTable::isStressMark(CharClass charClass)
182{
183 return (charClass & CF_CLASS_MASK) == CC_STRESS_MARK;
184}
185
186inline le_bool IndicClassTable::isConsonant(CharClass charClass)
187{
188 return (charClass & CF_CONSONANT) != 0;
189}
190
191inline le_bool IndicClassTable::isReph(CharClass charClass)
192{
193 return (charClass & CF_REPH) != 0;
194}
195
196inline le_bool IndicClassTable::isNukta(CharClass charClass)
197{
198 return (charClass & CF_CLASS_MASK) == CC_NUKTA;
199}
200
201inline le_bool IndicClassTable::isVirama(CharClass charClass)
202{
203 return (charClass & CF_CLASS_MASK) == CC_VIRAMA;
204}
205
206inline le_bool IndicClassTable::isVattu(CharClass charClass)
207{
208 return (charClass & CF_VATTU) != 0;
209}
210
211inline le_bool IndicClassTable::isMatra(CharClass charClass)
212{
213 charClass &= CF_CLASS_MASK;
214
215 return charClass >= CC_DEPENDENT_VOWEL && charClass <= CC_SPLIT_VOWEL_PIECE_3;
216}
217
218inline le_bool IndicClassTable::isSplitMatra(CharClass charClass)
219{
220 return (charClass & CF_INDEX_MASK) != 0;
221}
222
223inline le_bool IndicClassTable::isLengthMark(CharClass charClass)
224{
225 return (charClass & CF_LENGTH_MARK) != 0;
226}
227
228inline le_bool IndicClassTable::hasPostOrBelowBaseForm(CharClass charClass)
229{
230 return (charClass & (CF_POST_BASE | CF_BELOW_BASE)) != 0;
231}
232
233inline le_bool IndicClassTable::hasPostBaseForm(CharClass charClass)
234{
235 return (charClass & CF_POST_BASE) != 0;
236}
237
238inline le_bool IndicClassTable::hasBelowBaseForm(CharClass charClass)
239{
240 return (charClass & CF_BELOW_BASE) != 0;
241}
242
243inline le_bool IndicClassTable::isVowelModifier(LEUnicode ch) const
244{
245 return isVowelModifier(getCharClass(ch));
246}
247
248inline le_bool IndicClassTable::isStressMark(LEUnicode ch) const
249{
250 return isStressMark(getCharClass(ch));
251}
252
253inline le_bool IndicClassTable::isConsonant(LEUnicode ch) const
254{
255 return isConsonant(getCharClass(ch));
256}
257
258inline le_bool IndicClassTable::isReph(LEUnicode ch) const
259{
260 return isReph(getCharClass(ch));
261}
262
263inline le_bool IndicClassTable::isVirama(LEUnicode ch) const
264{
265 return isVirama(getCharClass(ch));
266}
267
268inline le_bool IndicClassTable::isNukta(LEUnicode ch) const
269{
270 return isNukta(getCharClass(ch));
271}
272
273inline le_bool IndicClassTable::isVattu(LEUnicode ch) const
274{
275 return isVattu(getCharClass(ch));
276}
277
278inline le_bool IndicClassTable::isMatra(LEUnicode ch) const
279{
280 return isMatra(getCharClass(ch));
281}
282
283inline le_bool IndicClassTable::isSplitMatra(LEUnicode ch) const
284{
285 return isSplitMatra(getCharClass(ch));
286}
287
288inline le_bool IndicClassTable::isLengthMark(LEUnicode ch) const
289{
290 return isLengthMark(getCharClass(ch));
291}
292
293inline le_bool IndicClassTable::hasPostOrBelowBaseForm(LEUnicode ch) const
294{
295 return hasPostOrBelowBaseForm(getCharClass(ch));
296}
297
298inline le_bool IndicClassTable::hasPostBaseForm(LEUnicode ch) const
299{
300 return hasPostBaseForm(getCharClass(ch));
301}
302
303inline le_bool IndicClassTable::hasBelowBaseForm(LEUnicode ch) const
304{
305 return hasBelowBaseForm(getCharClass(ch));
306}
307
308#endif