blob: 9a228b769c1a2744c013b23771dcbc4752cb6e90 [file] [log] [blame]
J. Duke319a3b92007-12-01 00:00:00 +00001/*
2 * Copyright 2003-2006 Sun Microsystems, Inc. All Rights Reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation. Sun designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Sun in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
22 * CA 95054 USA or visit www.sun.com if you need additional information or
23 * have any questions.
24 */
25
26package java.lang;
27
28/** The CharacterData class encapsulates the large tables once found in
29 * java.lang.Character.
30 */
31
32class CharacterData01 extends CharacterData {
33 /* The character properties are currently encoded into 32 bits in the following manner:
34 1 bit mirrored property
35 4 bits directionality property
36 9 bits signed offset used for converting case
37 1 bit if 1, adding the signed offset converts the character to lowercase
38 1 bit if 1, subtracting the signed offset converts the character to uppercase
39 1 bit if 1, this character has a titlecase equivalent (possibly itself)
40 3 bits 0 may not be part of an identifier
41 1 ignorable control; may continue a Unicode identifier or Java identifier
42 2 may continue a Java identifier but not a Unicode identifier (unused)
43 3 may continue a Unicode identifier or Java identifier
44 4 is a Java whitespace character
45 5 may start or continue a Java identifier;
46 may continue but not start a Unicode identifier (underscores)
47 6 may start or continue a Java identifier but not a Unicode identifier ($)
48 7 may start or continue a Unicode identifier or Java identifier
49 Thus:
50 5, 6, 7 may start a Java identifier
51 1, 2, 3, 5, 6, 7 may continue a Java identifier
52 7 may start a Unicode identifier
53 1, 3, 5, 7 may continue a Unicode identifier
54 1 is ignorable within an identifier
55 4 is Java whitespace
56 2 bits 0 this character has no numeric property
57 1 adding the digit offset to the character code and then
58 masking with 0x1F will produce the desired numeric value
59 2 this character has a "strange" numeric value
60 3 a Java supradecimal digit: adding the digit offset to the
61 character code, then masking with 0x1F, then adding 10
62 will produce the desired numeric value
63 5 bits digit offset
64 5 bits character type
65
66 The encoding of character properties is subject to change at any time.
67 */
68
69 int getProperties(int ch) {
70 char offset = (char)ch;
71 int props = $$Lookup(offset);
72 return props;
73 }
74
75 int getType(int ch) {
76 int props = getProperties(ch);
77 return (props & $$maskType);
78 }
79
80 boolean isJavaIdentifierStart(int ch) {
81 int props = getProperties(ch);
82 return ((props & $$maskIdentifierInfo) >= $$lowJavaStart);
83 }
84
85 boolean isJavaIdentifierPart(int ch) {
86 int props = getProperties(ch);
87 return ((props & $$nonzeroJavaPart) != 0);
88 }
89
90 boolean isUnicodeIdentifierStart(int ch) {
91 int props = getProperties(ch);
92 return ((props & $$maskIdentifierInfo) == $$valueUnicodeStart);
93 }
94
95 boolean isUnicodeIdentifierPart(int ch) {
96 int props = getProperties(ch);
97 return ((props & $$maskUnicodePart) != 0);
98 }
99
100 boolean isIdentifierIgnorable(int ch) {
101 int props = getProperties(ch);
102 return ((props & $$maskIdentifierInfo) == $$valueIgnorable);
103 }
104
105 int toLowerCase(int ch) {
106 int mapChar = ch;
107 int val = getProperties(ch);
108
109 if ((val & $$maskLowerCase) != 0) {
110 int offset = val << $$shiftCaseOffsetSign >> ($$shiftCaseOffsetSign+$$shiftCaseOffset);
111 mapChar = ch + offset;
112 }
113 return mapChar;
114 }
115
116 int toUpperCase(int ch) {
117 int mapChar = ch;
118 int val = getProperties(ch);
119
120 if ((val & $$maskUpperCase) != 0) {
121 int offset = val << $$shiftCaseOffsetSign >> ($$shiftCaseOffsetSign+$$shiftCaseOffset);
122 mapChar = ch - offset;
123 }
124 return mapChar;
125 }
126
127 int toTitleCase(int ch) {
128 int mapChar = ch;
129 int val = getProperties(ch);
130
131 if ((val & $$maskTitleCase) != 0) {
132 // There is a titlecase equivalent. Perform further checks:
133 if ((val & $$maskUpperCase) == 0) {
134 // The character does not have an uppercase equivalent, so it must
135 // already be uppercase; so add 1 to get the titlecase form.
136 mapChar = ch + 1;
137 }
138 else if ((val & $$maskLowerCase) == 0) {
139 // The character does not have a lowercase equivalent, so it must
140 // already be lowercase; so subtract 1 to get the titlecase form.
141 mapChar = ch - 1;
142 }
143 // else {
144 // The character has both an uppercase equivalent and a lowercase
145 // equivalent, so it must itself be a titlecase form; return it.
146 // return ch;
147 //}
148 }
149 else if ((val & $$maskUpperCase) != 0) {
150 // This character has no titlecase equivalent but it does have an
151 // uppercase equivalent, so use that (subtract the signed case offset).
152 mapChar = toUpperCase(ch);
153 }
154 return mapChar;
155 }
156
157 int digit(int ch, int radix) {
158 int value = -1;
159 if (radix >= Character.MIN_RADIX && radix <= Character.MAX_RADIX) {
160 int val = getProperties(ch);
161 int kind = val & $$maskType;
162 if (kind == Character.DECIMAL_DIGIT_NUMBER) {
163 value = ch + ((val & $$maskDigitOffset) >> $$shiftDigitOffset) & $$maskDigit;
164 }
165 else if ((val & $$maskNumericType) == $$valueJavaSupradecimal) {
166 // Java supradecimal digit
167 value = (ch + ((val & $$maskDigitOffset) >> $$shiftDigitOffset) & $$maskDigit) + 10;
168 }
169 }
170 return (value < radix) ? value : -1;
171 }
172
173 int getNumericValue(int ch) {
174 int val = getProperties(ch);
175 int retval = -1;
176
177 switch (val & $$maskNumericType) {
178 default: // cannot occur
179 case ($$valueNotNumeric): // not numeric
180 retval = -1;
181 break;
182 case ($$valueDigit): // simple numeric
183 retval = ch + ((val & $$maskDigitOffset) >> $$shiftDigitOffset) & $$maskDigit;
184 break;
185 case ($$valueStrangeNumeric) : // "strange" numeric
186 switch(ch) {
187 case 0x10113: retval = 40; break; // AEGEAN NUMBER FORTY
188 case 0x10114: retval = 50; break; // AEGEAN NUMBER FIFTY
189 case 0x10115: retval = 60; break; // AEGEAN NUMBER SIXTY
190 case 0x10116: retval = 70; break; // AEGEAN NUMBER SEVENTY
191 case 0x10117: retval = 80; break; // AEGEAN NUMBER EIGHTY
192 case 0x10118: retval = 90; break; // AEGEAN NUMBER NINETY
193 case 0x10119: retval = 100; break; // AEGEAN NUMBER ONE HUNDRED
194 case 0x1011A: retval = 200; break; // AEGEAN NUMBER TWO HUNDRED
195 case 0x1011B: retval = 300; break; // AEGEAN NUMBER THREE HUNDRED
196 case 0x1011C: retval = 400; break; // AEGEAN NUMBER FOUR HUNDRED
197 case 0x1011D: retval = 500; break; // AEGEAN NUMBER FIVE HUNDRED
198 case 0x1011E: retval = 600; break; // AEGEAN NUMBER SIX HUNDRED
199 case 0x1011F: retval = 700; break; // AEGEAN NUMBER SEVEN HUNDRED
200 case 0x10120: retval = 800; break; // AEGEAN NUMBER EIGHT HUNDRED
201 case 0x10121: retval = 900; break; // AEGEAN NUMBER NINE HUNDRED
202 case 0x10122: retval = 1000; break; // AEGEAN NUMBER ONE THOUSAND
203 case 0x10123: retval = 2000; break; // AEGEAN NUMBER TWO THOUSAND
204 case 0x10124: retval = 3000; break; // AEGEAN NUMBER THREE THOUSAND
205 case 0x10125: retval = 4000; break; // AEGEAN NUMBER FOUR THOUSAND
206 case 0x10126: retval = 5000; break; // AEGEAN NUMBER FIVE THOUSAND
207 case 0x10127: retval = 6000; break; // AEGEAN NUMBER SIX THOUSAND
208 case 0x10128: retval = 7000; break; // AEGEAN NUMBER SEVEN THOUSAND
209 case 0x10129: retval = 8000; break; // AEGEAN NUMBER EIGHT THOUSAND
210 case 0x1012A: retval = 9000; break; // AEGEAN NUMBER NINE THOUSAND
211 case 0x1012B: retval = 10000; break; // AEGEAN NUMBER TEN THOUSAND
212 case 0x1012C: retval = 20000; break; // AEGEAN NUMBER TWENTY THOUSAND
213 case 0x1012D: retval = 30000; break; // AEGEAN NUMBER THIRTY THOUSAND
214 case 0x1012E: retval = 40000; break; // AEGEAN NUMBER FORTY THOUSAND
215 case 0x1012F: retval = 50000; break; // AEGEAN NUMBER FIFTY THOUSAND
216 case 0x10130: retval = 60000; break; // AEGEAN NUMBER SIXTY THOUSAND
217 case 0x10131: retval = 70000; break; // AEGEAN NUMBER SEVENTY THOUSAND
218 case 0x10132: retval = 80000; break; // AEGEAN NUMBER EIGHTY THOUSAND
219 case 0x10133: retval = 90000; break; // AEGEAN NUMBER NINETY THOUSAND
220 case 0x10323: retval = 50; break; // OLD ITALIC NUMERAL FIFTY
221 default: retval = -2; break;
222 }
223
224 break;
225 case ($$valueJavaSupradecimal): // Java supradecimal
226 retval = (ch + ((val & $$maskDigitOffset) >> $$shiftDigitOffset) & $$maskDigit) + 10;
227 break;
228 }
229 return retval;
230 }
231
232 boolean isWhitespace(int ch) {
233 int props = getProperties(ch);
234 return ((props & $$maskIdentifierInfo) == $$valueJavaWhitespace);
235 }
236
237 byte getDirectionality(int ch) {
238 int val = getProperties(ch);
239 byte directionality = (byte)((val & $$maskBidi) >> $$shiftBidi);
240 if (directionality == 0xF ) {
241 directionality = Character.DIRECTIONALITY_UNDEFINED;
242 }
243 return directionality;
244 }
245
246 boolean isMirrored(int ch) {
247 int props = getProperties(ch);
248 return ((props & $$maskMirrored) != 0);
249 }
250
251 static final CharacterData instance = new CharacterData01();
252 private CharacterData01() {};
253
254 $$Tables
255
256 static {
257 $$Initializers
258 }
259}