blob: 38a63b4c6f38134ed7325ba9ac1952d675bd0936 [file] [log] [blame]
J. Duke319a3b92007-12-01 00:00:00 +00001/*
2 * Copyright 2003-2006 Sun Microsystems, Inc. All Rights Reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation. Sun designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Sun in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
22 * CA 95054 USA or visit www.sun.com if you need additional information or
23 * have any questions.
24 */
25
26package java.lang;
27
28/** The CharacterData class encapsulates the large tables found in
29 Java.lang.Character. */
30
31class CharacterData02 extends CharacterData {
32 /* The character properties are currently encoded into 32 bits in the following manner:
33 1 bit mirrored property
34 4 bits directionality property
35 9 bits signed offset used for converting case
36 1 bit if 1, adding the signed offset converts the character to lowercase
37 1 bit if 1, subtracting the signed offset converts the character to uppercase
38 1 bit if 1, this character has a titlecase equivalent (possibly itself)
39 3 bits 0 may not be part of an identifier
40 1 ignorable control; may continue a Unicode identifier or Java identifier
41 2 may continue a Java identifier but not a Unicode identifier (unused)
42 3 may continue a Unicode identifier or Java identifier
43 4 is a Java whitespace character
44 5 may start or continue a Java identifier;
45 may continue but not start a Unicode identifier (underscores)
46 6 may start or continue a Java identifier but not a Unicode identifier ($)
47 7 may start or continue a Unicode identifier or Java identifier
48 Thus:
49 5, 6, 7 may start a Java identifier
50 1, 2, 3, 5, 6, 7 may continue a Java identifier
51 7 may start a Unicode identifier
52 1, 3, 5, 7 may continue a Unicode identifier
53 1 is ignorable within an identifier
54 4 is Java whitespace
55 2 bits 0 this character has no numeric property
56 1 adding the digit offset to the character code and then
57 masking with 0x1F will produce the desired numeric value
58 2 this character has a "strange" numeric value
59 3 a Java supradecimal digit: adding the digit offset to the
60 character code, then masking with 0x1F, then adding 10
61 will produce the desired numeric value
62 5 bits digit offset
63 5 bits character type
64
65 The encoding of character properties is subject to change at any time.
66 */
67
68 int getProperties(int ch) {
69 char offset = (char)ch;
70 int props = $$Lookup(offset);
71 return props;
72 }
73
74 int getType(int ch) {
75 int props = getProperties(ch);
76 return (props & $$maskType);
77 }
78
79 boolean isJavaIdentifierStart(int ch) {
80 int props = getProperties(ch);
81 return ((props & $$maskIdentifierInfo) >= $$lowJavaStart);
82 }
83
84 boolean isJavaIdentifierPart(int ch) {
85 int props = getProperties(ch);
86 return ((props & $$nonzeroJavaPart) != 0);
87 }
88
89 boolean isUnicodeIdentifierStart(int ch) {
90 int props = getProperties(ch);
91 return ((props & $$maskIdentifierInfo) == $$valueUnicodeStart);
92 }
93
94 boolean isUnicodeIdentifierPart(int ch) {
95 int props = getProperties(ch);
96 return ((props & $$maskUnicodePart) != 0);
97 }
98
99 boolean isIdentifierIgnorable(int ch) {
100 int props = getProperties(ch);
101 return ((props & $$maskIdentifierInfo) == $$valueIgnorable);
102 }
103
104 int toLowerCase(int ch) {
105 int mapChar = ch;
106 int val = getProperties(ch);
107
108 if ((val & $$maskLowerCase) != 0) {
109 int offset = val << $$shiftCaseOffsetSign >> ($$shiftCaseOffsetSign+$$shiftCaseOffset);
110 mapChar = ch + offset;
111 }
112 return mapChar;
113 }
114
115 int toUpperCase(int ch) {
116 int mapChar = ch;
117 int val = getProperties(ch);
118
119 if ((val & $$maskUpperCase) != 0) {
120 int offset = val << $$shiftCaseOffsetSign >> ($$shiftCaseOffsetSign+$$shiftCaseOffset);
121 mapChar = ch - offset;
122 }
123 return mapChar;
124 }
125
126 int toTitleCase(int ch) {
127 int mapChar = ch;
128 int val = getProperties(ch);
129
130 if ((val & $$maskTitleCase) != 0) {
131 // There is a titlecase equivalent. Perform further checks:
132 if ((val & $$maskUpperCase) == 0) {
133 // The character does not have an uppercase equivalent, so it must
134 // already be uppercase; so add 1 to get the titlecase form.
135 mapChar = ch + 1;
136 }
137 else if ((val & $$maskLowerCase) == 0) {
138 // The character does not have a lowercase equivalent, so it must
139 // already be lowercase; so subtract 1 to get the titlecase form.
140 mapChar = ch - 1;
141 }
142 // else {
143 // The character has both an uppercase equivalent and a lowercase
144 // equivalent, so it must itself be a titlecase form; return it.
145 // return ch;
146 //}
147 }
148 else if ((val & $$maskUpperCase) != 0) {
149 // This character has no titlecase equivalent but it does have an
150 // uppercase equivalent, so use that (subtract the signed case offset).
151 mapChar = toUpperCase(ch);
152 }
153 return mapChar;
154 }
155
156 int digit(int ch, int radix) {
157 int value = -1;
158 if (radix >= Character.MIN_RADIX && radix <= Character.MAX_RADIX) {
159 int val = getProperties(ch);
160 int kind = val & $$maskType;
161 if (kind == Character.DECIMAL_DIGIT_NUMBER) {
162 value = ch + ((val & $$maskDigitOffset) >> $$shiftDigitOffset) & $$maskDigit;
163 }
164 else if ((val & $$maskNumericType) == $$valueJavaSupradecimal) {
165 // Java supradecimal digit
166 value = (ch + ((val & $$maskDigitOffset) >> $$shiftDigitOffset) & $$maskDigit) + 10;
167 }
168 }
169 return (value < radix) ? value : -1;
170 }
171
172 int getNumericValue(int ch) {
173 int val = getProperties(ch);
174 int retval = -1;
175
176 switch (val & $$maskNumericType) {
177 default: // cannot occur
178 case ($$valueNotNumeric): // not numeric
179 retval = -1;
180 break;
181 case ($$valueDigit): // simple numeric
182 retval = ch + ((val & $$maskDigitOffset) >> $$shiftDigitOffset) & $$maskDigit;
183 break;
184 case ($$valueStrangeNumeric) : // "strange" numeric
185 retval = -2;
186 break;
187 case ($$valueJavaSupradecimal): // Java supradecimal
188 retval = (ch + ((val & $$maskDigitOffset) >> $$shiftDigitOffset) & $$maskDigit) + 10;
189 break;
190 }
191 return retval;
192 }
193
194 boolean isWhitespace(int ch) {
195 return (getProperties(ch) & $$maskIdentifierInfo) == $$valueJavaWhitespace;
196 }
197
198 byte getDirectionality(int ch) {
199 int val = getProperties(ch);
200 byte directionality = (byte)((val & $$maskBidi) >> $$shiftBidi);
201 if (directionality == 0xF ) {
202 directionality = Character.DIRECTIONALITY_UNDEFINED;
203 }
204 return directionality;
205 }
206
207 boolean isMirrored(int ch) {
208 return (getProperties(ch) & $$maskMirrored) != 0;
209 }
210
211 static final CharacterData instance = new CharacterData02();
212 private CharacterData02() {};
213
214 $$Tables
215
216 static {
217 $$Initializers
218 }
219}