blob: 4669a7cd9208833b39818f67368958e1b442abbe [file] [log] [blame]
J. Duke319a3b92007-12-01 00:00:00 +00001/*
2 * Copyright 2002-2006 Sun Microsystems, Inc. All Rights Reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation. Sun designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Sun in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
22 * CA 95054 USA or visit www.sun.com if you need additional information or
23 * have any questions.
24 */
25
26package java.lang;
27
28/** The CharacterData class encapsulates the large tables found in
29 Java.lang.Character. */
30
31class CharacterDataLatin1 extends CharacterData {
32
33 /* The character properties are currently encoded into 32 bits in the following manner:
34 1 bit mirrored property
35 4 bits directionality property
36 9 bits signed offset used for converting case
37 1 bit if 1, adding the signed offset converts the character to lowercase
38 1 bit if 1, subtracting the signed offset converts the character to uppercase
39 1 bit if 1, this character has a titlecase equivalent (possibly itself)
40 3 bits 0 may not be part of an identifier
41 1 ignorable control; may continue a Unicode identifier or Java identifier
42 2 may continue a Java identifier but not a Unicode identifier (unused)
43 3 may continue a Unicode identifier or Java identifier
44 4 is a Java whitespace character
45 5 may start or continue a Java identifier;
46 may continue but not start a Unicode identifier (underscores)
47 6 may start or continue a Java identifier but not a Unicode identifier ($)
48 7 may start or continue a Unicode identifier or Java identifier
49 Thus:
50 5, 6, 7 may start a Java identifier
51 1, 2, 3, 5, 6, 7 may continue a Java identifier
52 7 may start a Unicode identifier
53 1, 3, 5, 7 may continue a Unicode identifier
54 1 is ignorable within an identifier
55 4 is Java whitespace
56 2 bits 0 this character has no numeric property
57 1 adding the digit offset to the character code and then
58 masking with 0x1F will produce the desired numeric value
59 2 this character has a "strange" numeric value
60 3 a Java supradecimal digit: adding the digit offset to the
61 character code, then masking with 0x1F, then adding 10
62 will produce the desired numeric value
63 5 bits digit offset
64 5 bits character type
65
66 The encoding of character properties is subject to change at any time.
67 */
68
69 int getProperties(int ch) {
70 char offset = (char)ch;
71 int props = $$Lookup(offset);
72 return props;
73 }
74
75 int getType(int ch) {
76 int props = getProperties(ch);
77 return (props & $$maskType);
78 }
79
80 boolean isJavaIdentifierStart(int ch) {
81 int props = getProperties(ch);
82 return ((props & $$maskIdentifierInfo) >= $$lowJavaStart);
83 }
84
85 boolean isJavaIdentifierPart(int ch) {
86 int props = getProperties(ch);
87 return ((props & $$nonzeroJavaPart) != 0);
88 }
89
90 boolean isUnicodeIdentifierStart(int ch) {
91 int props = getProperties(ch);
92 return ((props & $$maskIdentifierInfo) == $$valueUnicodeStart);
93 }
94
95 boolean isUnicodeIdentifierPart(int ch) {
96 int props = getProperties(ch);
97 return ((props & $$maskUnicodePart) != 0);
98 }
99
100 boolean isIdentifierIgnorable(int ch) {
101 int props = getProperties(ch);
102 return ((props & $$maskIdentifierInfo) == $$valueIgnorable);
103 }
104
105 int toLowerCase(int ch) {
106 int mapChar = ch;
107 int val = getProperties(ch);
108
109 if (((val & $$maskLowerCase) != 0) &&
110 ((val & $$maskCaseOffset) != $$maskCaseOffset)) {
111 int offset = val << $$shiftCaseOffsetSign >> ($$shiftCaseOffsetSign+$$shiftCaseOffset);
112 mapChar = ch + offset;
113 }
114 return mapChar;
115 }
116
117 int toUpperCase(int ch) {
118 int mapChar = ch;
119 int val = getProperties(ch);
120
121 if ((val & $$maskUpperCase) != 0) {
122 if ((val & $$maskCaseOffset) != $$maskCaseOffset) {
123 int offset = val << $$shiftCaseOffsetSign >> ($$shiftCaseOffsetSign+$$shiftCaseOffset);
124 mapChar = ch - offset;
125 } else if (ch == 0x00B5) {
126 mapChar = 0x039C;
127 }
128 }
129 return mapChar;
130 }
131
132 int toTitleCase(int ch) {
133 return toUpperCase(ch);
134 }
135
136 int digit(int ch, int radix) {
137 int value = -1;
138 if (radix >= Character.MIN_RADIX && radix <= Character.MAX_RADIX) {
139 int val = getProperties(ch);
140 int kind = val & $$maskType;
141 if (kind == Character.DECIMAL_DIGIT_NUMBER) {
142 value = ch + ((val & $$maskDigitOffset) >> $$shiftDigitOffset) & $$maskDigit;
143 }
144 else if ((val & $$maskNumericType) == $$valueJavaSupradecimal) {
145 // Java supradecimal digit
146 value = (ch + ((val & $$maskDigitOffset) >> $$shiftDigitOffset) & $$maskDigit) + 10;
147 }
148 }
149 return (value < radix) ? value : -1;
150 }
151
152 int getNumericValue(int ch) {
153 int val = getProperties(ch);
154 int retval = -1;
155
156 switch (val & $$maskNumericType) {
157 default: // cannot occur
158 case ($$valueNotNumeric): // not numeric
159 retval = -1;
160 break;
161 case ($$valueDigit): // simple numeric
162 retval = ch + ((val & $$maskDigitOffset) >> $$shiftDigitOffset) & $$maskDigit;
163 break;
164 case ($$valueStrangeNumeric) : // "strange" numeric
165 retval = -2;
166 break;
167 case ($$valueJavaSupradecimal): // Java supradecimal
168 retval = (ch + ((val & $$maskDigitOffset) >> $$shiftDigitOffset) & $$maskDigit) + 10;
169 break;
170 }
171 return retval;
172 }
173
174 boolean isWhitespace(int ch) {
175 int props = getProperties(ch);
176 return ((props & $$maskIdentifierInfo) == $$valueJavaWhitespace);
177 }
178
179 byte getDirectionality(int ch) {
180 int val = getProperties(ch);
181 byte directionality = (byte)((val & $$maskBidi) >> $$shiftBidi);
182
183 if (directionality == 0xF ) {
184 directionality = -1;
185 }
186 return directionality;
187 }
188
189 boolean isMirrored(int ch) {
190 int props = getProperties(ch);
191 return ((props & $$maskMirrored) != 0);
192 }
193
194 int toUpperCaseEx(int ch) {
195 int mapChar = ch;
196 int val = getProperties(ch);
197
198 if ((val & $$maskUpperCase) != 0) {
199 if ((val & $$maskCaseOffset) != $$maskCaseOffset) {
200 int offset = val << $$shiftCaseOffsetSign >> ($$shiftCaseOffsetSign+$$shiftCaseOffset);
201 mapChar = ch - offset;
202 }
203 else {
204 switch(ch) {
205 // map overflow characters
206 case 0x00B5 : mapChar = 0x039C; break;
207 default : mapChar = Character.ERROR; break;
208 }
209 }
210 }
211 return mapChar;
212 }
213
214 static char[] sharpsMap = new char[] {'S', 'S'};
215
216 char[] toUpperCaseCharArray(int ch) {
217 char[] upperMap = {(char)ch};
218 if (ch == 0x00DF) {
219 upperMap = sharpsMap;
220 }
221 return upperMap;
222 }
223
224 static final CharacterDataLatin1 instance = new CharacterDataLatin1();
225 private CharacterDataLatin1() {};
226
227 $$Tables
228
229 static {
230 $$Initializers
231 }
232}
233