blob: 33f535fd1a7ca346b6083ca0bf966a9e8b78b840 [file] [log] [blame]
The Android Open Source Project7c1b96a2008-10-21 07:00:00 -07001/*
2 * Copyright (C) 2008 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "utils/AndroidUnicode.h"
18#include "characterData.h"
19
20#define LOG_TAG "Unicode"
21#include "utils/Log.h"
22
23// ICU headers for using macros
24#include <unicode/utf16.h>
25
26#define MIN_RADIX 2
27#define MAX_RADIX 36
28
29#define TYPE_SHIFT 0
30#define TYPE_MASK ((1<<5)-1)
31
32#define DIRECTION_SHIFT (TYPE_SHIFT+5)
33#define DIRECTION_MASK ((1<<5)-1)
34
35#define MIRRORED_SHIFT (DIRECTION_SHIFT+5)
36#define MIRRORED_MASK ((1<<1)-1)
37
38#define TOUPPER_SHIFT (MIRRORED_SHIFT+1)
39#define TOUPPER_MASK ((1<<6)-1)
40
41#define TOLOWER_SHIFT (TOUPPER_SHIFT+6)
42#define TOLOWER_MASK ((1<<6)-1)
43
44#define TOTITLE_SHIFT (TOLOWER_SHIFT+6)
45#define TOTITLE_MASK ((1<<2)-1)
46
47#define MIRROR_SHIFT (TOTITLE_SHIFT+2)
48#define MIRROR_MASK ((1<<5)-1)
49
50#define NUMERIC_SHIFT (TOTITLE_SHIFT+2)
51#define NUMERIC_MASK ((1<<7)-1)
52
53#define DECOMPOSITION_SHIFT (11)
54#define DECOMPOSITION_MASK ((1<<5)-1)
55
56/*
57 * Returns the value stored in the CharacterData tables that contains
58 * an index into the packed data table and the decomposition type.
59 */
60static uint16_t findCharacterValue(UChar32 c)
61{
62 LOG_ASSERT(c >= 0 && c <= 0x10FFFF, "findCharacterValue received an invalid codepoint");
63 if (c < 256)
64 return CharacterData::LATIN1_DATA[c];
65
66 // Rotate the bits because the tables are separated into even and odd codepoints
67 c = (c >> 1) | ((c & 1) << 20);
68
69 CharacterData::Range search = CharacterData::FULL_DATA[c >> 16];
70 const uint32_t* array = search.array;
71
72 // This trick is so that that compare in the while loop does not
73 // need to shift the array entry down by 16
74 c <<= 16;
75 c |= 0xFFFF;
76
77 int high = (int)search.length - 1;
78 int low = 0;
79
80 if (high < 0)
81 return 0;
82
83 while (low < high - 1)
84 {
85 int probe = (high + low) >> 1;
86
87 // The entries contain the codepoint in the high 16 bits and the index
88 // into PACKED_DATA in the low 16.
89 if (array[probe] > (unsigned)c)
90 high = probe;
91 else
92 low = probe;
93 }
94
95 LOG_ASSERT((array[low] <= (unsigned)c), "A suitable range was not found");
96 return array[low] & 0xFFFF;
97}
98
99uint32_t android::Unicode::getPackedData(UChar32 c)
100{
101 // findCharacterValue returns a 16-bit value with the top 5 bits containing a decomposition type
102 // and the remaining bits containing an index.
103 return CharacterData::PACKED_DATA[findCharacterValue(c) & 0x7FF];
104}
105
106android::Unicode::CharType android::Unicode::getType(UChar32 c)
107{
108 if (c < 0 || c >= 0x10FFFF)
109 return CHARTYPE_UNASSIGNED;
110 return (CharType)((getPackedData(c) >> TYPE_SHIFT) & TYPE_MASK);
111}
112
113android::Unicode::DecompositionType android::Unicode::getDecompositionType(UChar32 c)
114{
115 // findCharacterValue returns a 16-bit value with the top 5 bits containing a decomposition type
116 // and the remaining bits containing an index.
117 return (DecompositionType)((findCharacterValue(c) >> DECOMPOSITION_SHIFT) & DECOMPOSITION_MASK);
118}
119
120int android::Unicode::getDigitValue(UChar32 c, int radix)
121{
122 if (radix < MIN_RADIX || radix > MAX_RADIX)
123 return -1;
124
125 int tempValue = radix;
126
127 if (c >= '0' && c <= '9')
128 tempValue = c - '0';
129 else if (c >= 'a' && c <= 'z')
130 tempValue = c - 'a' + 10;
131 else if (c >= 'A' && c <= 'Z')
132 tempValue = c - 'A' + 10;
133
134 return tempValue < radix ? tempValue : -1;
135}
136
137int android::Unicode::getNumericValue(UChar32 c)
138{
139 if (isMirrored(c))
140 return -1;
141
142 return (int) CharacterData::NUMERICS[((getPackedData(c) >> NUMERIC_SHIFT) & NUMERIC_MASK)];
143}
144
145UChar32 android::Unicode::toLower(UChar32 c)
146{
147 return c + CharacterData::LCDIFF[(getPackedData(c) >> TOLOWER_SHIFT) & TOLOWER_MASK];
148}
149
150UChar32 android::Unicode::toUpper(UChar32 c)
151{
152 return c + CharacterData::UCDIFF[(getPackedData(c) >> TOUPPER_SHIFT) & TOUPPER_MASK];
153}
154
155android::Unicode::Direction android::Unicode::getDirectionality(UChar32 c)
156{
157 uint32_t data = getPackedData(c);
158
159 if (0 == data)
160 return DIRECTIONALITY_UNDEFINED;
161
162 Direction d = (Direction) ((data >> DIRECTION_SHIFT) & DIRECTION_MASK);
163
164 if (DIRECTION_MASK == d)
165 return DIRECTIONALITY_UNDEFINED;
166
167 return d;
168}
169
170bool android::Unicode::isMirrored(UChar32 c)
171{
172 return ((getPackedData(c) >> MIRRORED_SHIFT) & MIRRORED_MASK) != 0;
173}
174
175UChar32 android::Unicode::toMirror(UChar32 c)
176{
177 if (!isMirrored(c))
178 return c;
179
180 return c + CharacterData::MIRROR_DIFF[(getPackedData(c) >> MIRROR_SHIFT) & MIRROR_MASK];
181}
182
183UChar32 android::Unicode::toTitle(UChar32 c)
184{
185 int32_t diff = CharacterData::TCDIFF[(getPackedData(c) >> TOTITLE_SHIFT) & TOTITLE_MASK];
186
187 if (TOTITLE_MASK == diff)
188 return toUpper(c);
189
190 return c + diff;
191}
192
193