blob: b681b42fdc017c355f9698851a5c1169d8834ec2 [file] [log] [blame]
duke6e45e102007-12-01 00:00:00 +00001/*
alanb0d058232012-11-02 15:50:11 +00002 * Copyright (c) 2002, 2011, Oracle and/or its affiliates. All rights reserved.
duke6e45e102007-12-01 00:00:00 +00003 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
ohair2283b9d2010-05-25 15:58:33 -07007 * published by the Free Software Foundation. Oracle designates this
duke6e45e102007-12-01 00:00:00 +00008 * particular file as subject to the "Classpath" exception as provided
ohair2283b9d2010-05-25 15:58:33 -07009 * by Oracle in the LICENSE file that accompanied this code.
duke6e45e102007-12-01 00:00:00 +000010 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
ohair2283b9d2010-05-25 15:58:33 -070021 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
23 * questions.
duke6e45e102007-12-01 00:00:00 +000024 */
25
26package java.lang;
27
28/** The CharacterData class encapsulates the large tables found in
29 Java.lang.Character. */
30
31class CharacterDataLatin1 extends CharacterData {
32
33 /* The character properties are currently encoded into 32 bits in the following manner:
34 1 bit mirrored property
35 4 bits directionality property
36 9 bits signed offset used for converting case
37 1 bit if 1, adding the signed offset converts the character to lowercase
38 1 bit if 1, subtracting the signed offset converts the character to uppercase
39 1 bit if 1, this character has a titlecase equivalent (possibly itself)
40 3 bits 0 may not be part of an identifier
41 1 ignorable control; may continue a Unicode identifier or Java identifier
42 2 may continue a Java identifier but not a Unicode identifier (unused)
43 3 may continue a Unicode identifier or Java identifier
44 4 is a Java whitespace character
45 5 may start or continue a Java identifier;
46 may continue but not start a Unicode identifier (underscores)
47 6 may start or continue a Java identifier but not a Unicode identifier ($)
48 7 may start or continue a Unicode identifier or Java identifier
49 Thus:
50 5, 6, 7 may start a Java identifier
51 1, 2, 3, 5, 6, 7 may continue a Java identifier
52 7 may start a Unicode identifier
53 1, 3, 5, 7 may continue a Unicode identifier
54 1 is ignorable within an identifier
55 4 is Java whitespace
56 2 bits 0 this character has no numeric property
57 1 adding the digit offset to the character code and then
58 masking with 0x1F will produce the desired numeric value
59 2 this character has a "strange" numeric value
60 3 a Java supradecimal digit: adding the digit offset to the
61 character code, then masking with 0x1F, then adding 10
62 will produce the desired numeric value
63 5 bits digit offset
64 5 bits character type
65
66 The encoding of character properties is subject to change at any time.
67 */
68
69 int getProperties(int ch) {
sherman47083992011-04-28 20:18:57 -070070 char offset = (char)ch;
duke6e45e102007-12-01 00:00:00 +000071 int props = $$Lookup(offset);
72 return props;
73 }
74
sherman47083992011-04-28 20:18:57 -070075 int getPropertiesEx(int ch) {
76 char offset = (char)ch;
77 int props = $$LookupEx(offset);
78 return props;
79 }
80
81 boolean isOtherLowercase(int ch) {
82 int props = getPropertiesEx(ch);
83 return (props & $$maskOtherLowercase) != 0;
84 }
85
86 boolean isOtherUppercase(int ch) {
87 int props = getPropertiesEx(ch);
88 return (props & $$maskOtherUppercase) != 0;
89 }
90
91 boolean isOtherAlphabetic(int ch) {
92 int props = getPropertiesEx(ch);
93 return (props & $$maskOtherAlphabetic) != 0;
94 }
95
96 boolean isIdeographic(int ch) {
97 int props = getPropertiesEx(ch);
98 return (props & $$maskIdeographic) != 0;
99 }
100
duke6e45e102007-12-01 00:00:00 +0000101 int getType(int ch) {
102 int props = getProperties(ch);
103 return (props & $$maskType);
104 }
105
106 boolean isJavaIdentifierStart(int ch) {
107 int props = getProperties(ch);
108 return ((props & $$maskIdentifierInfo) >= $$lowJavaStart);
109 }
110
111 boolean isJavaIdentifierPart(int ch) {
112 int props = getProperties(ch);
113 return ((props & $$nonzeroJavaPart) != 0);
114 }
115
116 boolean isUnicodeIdentifierStart(int ch) {
117 int props = getProperties(ch);
118 return ((props & $$maskIdentifierInfo) == $$valueUnicodeStart);
119 }
120
121 boolean isUnicodeIdentifierPart(int ch) {
122 int props = getProperties(ch);
123 return ((props & $$maskUnicodePart) != 0);
124 }
125
126 boolean isIdentifierIgnorable(int ch) {
127 int props = getProperties(ch);
128 return ((props & $$maskIdentifierInfo) == $$valueIgnorable);
129 }
130
131 int toLowerCase(int ch) {
132 int mapChar = ch;
133 int val = getProperties(ch);
134
135 if (((val & $$maskLowerCase) != 0) &&
136 ((val & $$maskCaseOffset) != $$maskCaseOffset)) {
137 int offset = val << $$shiftCaseOffsetSign >> ($$shiftCaseOffsetSign+$$shiftCaseOffset);
138 mapChar = ch + offset;
139 }
140 return mapChar;
141 }
142
143 int toUpperCase(int ch) {
144 int mapChar = ch;
145 int val = getProperties(ch);
146
147 if ((val & $$maskUpperCase) != 0) {
148 if ((val & $$maskCaseOffset) != $$maskCaseOffset) {
149 int offset = val << $$shiftCaseOffsetSign >> ($$shiftCaseOffsetSign+$$shiftCaseOffset);
150 mapChar = ch - offset;
151 } else if (ch == 0x00B5) {
152 mapChar = 0x039C;
153 }
154 }
155 return mapChar;
156 }
157
158 int toTitleCase(int ch) {
159 return toUpperCase(ch);
160 }
161
162 int digit(int ch, int radix) {
163 int value = -1;
164 if (radix >= Character.MIN_RADIX && radix <= Character.MAX_RADIX) {
165 int val = getProperties(ch);
166 int kind = val & $$maskType;
167 if (kind == Character.DECIMAL_DIGIT_NUMBER) {
168 value = ch + ((val & $$maskDigitOffset) >> $$shiftDigitOffset) & $$maskDigit;
169 }
170 else if ((val & $$maskNumericType) == $$valueJavaSupradecimal) {
171 // Java supradecimal digit
172 value = (ch + ((val & $$maskDigitOffset) >> $$shiftDigitOffset) & $$maskDigit) + 10;
173 }
174 }
175 return (value < radix) ? value : -1;
176 }
177
178 int getNumericValue(int ch) {
179 int val = getProperties(ch);
180 int retval = -1;
181
182 switch (val & $$maskNumericType) {
183 default: // cannot occur
184 case ($$valueNotNumeric): // not numeric
185 retval = -1;
186 break;
187 case ($$valueDigit): // simple numeric
188 retval = ch + ((val & $$maskDigitOffset) >> $$shiftDigitOffset) & $$maskDigit;
189 break;
190 case ($$valueStrangeNumeric) : // "strange" numeric
191 retval = -2;
192 break;
193 case ($$valueJavaSupradecimal): // Java supradecimal
194 retval = (ch + ((val & $$maskDigitOffset) >> $$shiftDigitOffset) & $$maskDigit) + 10;
195 break;
196 }
197 return retval;
198 }
199
200 boolean isWhitespace(int ch) {
201 int props = getProperties(ch);
202 return ((props & $$maskIdentifierInfo) == $$valueJavaWhitespace);
203 }
204
205 byte getDirectionality(int ch) {
206 int val = getProperties(ch);
207 byte directionality = (byte)((val & $$maskBidi) >> $$shiftBidi);
208
209 if (directionality == 0xF ) {
210 directionality = -1;
211 }
212 return directionality;
213 }
214
215 boolean isMirrored(int ch) {
216 int props = getProperties(ch);
217 return ((props & $$maskMirrored) != 0);
218 }
219
220 int toUpperCaseEx(int ch) {
221 int mapChar = ch;
222 int val = getProperties(ch);
223
224 if ((val & $$maskUpperCase) != 0) {
225 if ((val & $$maskCaseOffset) != $$maskCaseOffset) {
226 int offset = val << $$shiftCaseOffsetSign >> ($$shiftCaseOffsetSign+$$shiftCaseOffset);
227 mapChar = ch - offset;
228 }
229 else {
230 switch(ch) {
231 // map overflow characters
232 case 0x00B5 : mapChar = 0x039C; break;
233 default : mapChar = Character.ERROR; break;
234 }
235 }
236 }
237 return mapChar;
238 }
239
240 static char[] sharpsMap = new char[] {'S', 'S'};
241
242 char[] toUpperCaseCharArray(int ch) {
243 char[] upperMap = {(char)ch};
244 if (ch == 0x00DF) {
245 upperMap = sharpsMap;
246 }
247 return upperMap;
248 }
249
250 static final CharacterDataLatin1 instance = new CharacterDataLatin1();
251 private CharacterDataLatin1() {};
252
253 $$Tables
254
255 static {
256 $$Initializers
257 }
258}
259