blob: 0812b6822e24db7d33a91cd1fe7f17e77a7d5b6b [file] [log] [blame]
J. Duke319a3b92007-12-01 00:00:00 +00001/*
2 * Copyright 1996-2002 Sun Microsystems, Inc. All Rights Reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation. Sun designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Sun in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
22 * CA 95054 USA or visit www.sun.com if you need additional information or
23 * have any questions.
24 */
25
26package sun.io;
27
28/**
29* A table driven conversion from char to byte for single byte
30* character sets. Tables will reside in the class CharToByteYYYYY,
31* where YYYYY is a unique character set identifier
32
33 < TBD: Tables are of the form... >
34
35*
36* @author Lloyd Honomichl
37* @author Asmus Freytag
38*/
39
40public abstract class CharToByteSingleByte extends CharToByteConverter {
41
42 /*
43 * 1st level index, provided by subclass
44 */
45 protected short index1[];
46
47 /*
48 * 2nd level index, provided by subclass
49 */
50 protected String index2;
51
52 /*
53 * Mask to isolate bits for 1st level index, from subclass
54 */
55 protected int mask1;
56
57 /*
58 * Mask to isolate bits for 2nd level index, from subclass
59 */
60 protected int mask2;
61
62 /*
63 * Shift to isolate bits for 1st level index, from subclass
64 */
65 protected int shift;
66
67 private char highHalfZoneCode;
68
69 public short[] getIndex1() {
70 return index1;
71 }
72
73 public String getIndex2() {
74 return index2;
75 }
76 public int flush(byte[] output, int outStart, int outEnd)
77 throws MalformedInputException
78 {
79 if (highHalfZoneCode != 0) {
80 highHalfZoneCode = 0;
81 badInputLength = 0;
82 throw new MalformedInputException();
83 }
84 byteOff = charOff = 0;
85 return 0;
86 }
87
88 /**
89 * Converts characters to sequences of bytes.
90 * Conversions that result in Exceptions can be restarted by calling
91 * convert again, with appropriately modified parameters.
92 * @return the characters written to output.
93 * @param input char array containing text in Unicode
94 * @param inStart offset in input array
95 * @param inEnd offset of last byte to be converted
96 * @param output byte array to receive conversion result
97 * @param outStart starting offset
98 * @param outEnd offset of last byte to be written to
99 * @throw MalformedInputException for any sequence of chars that is
100 * illegal in Unicode (principally unpaired surrogates
101 * and \uFFFF or \uFFFE), including any partial surrogate pair
102 * which occurs at the end of an input buffer.
103 * @throw UnsupportedCharacterException for any character that
104 * that cannot be converted to the external character set.
105 */
106 public int convert(char[] input, int inOff, int inEnd,
107 byte[] output, int outOff, int outEnd)
108 throws MalformedInputException,
109 UnknownCharacterException,
110 ConversionBufferFullException
111 {
112 char inputChar; // Input character to be converted
113 byte[] outputByte; // Output byte written to output
114 int inputSize; // Size of input
115 int outputSize; // Size of output
116
117 byte[] tmpArray = new byte[1];
118
119 // Record beginning offsets
120 charOff = inOff;
121 byteOff = outOff;
122
123 if (highHalfZoneCode != 0) {
124 inputChar = highHalfZoneCode;
125 highHalfZoneCode = 0;
126 if (input[inOff] >= 0xdc00 && input[inOff] <= 0xdfff) {
127 // This is legal UTF16 sequence.
128 badInputLength = 1;
129 throw new UnknownCharacterException();
130 } else {
131 // This is illegal UTF16 sequence.
132 badInputLength = 0;
133 throw new MalformedInputException();
134 }
135 }
136
137 // Loop until we hit the end of the input
138 while(charOff < inEnd) {
139
140 outputByte = tmpArray;
141
142 // Get the input character
143 inputChar = input[charOff];
144
145 // Default output size
146 outputSize = 1;
147
148 // Assume this is a simple character
149 inputSize = 1;
150
151 // Is this a high surrogate?
152 if(inputChar >= '\uD800' && inputChar <= '\uDBFF') {
153 // Is this the last character in the input?
154 if (charOff + 1 >= inEnd) {
155 highHalfZoneCode = inputChar;
156 break;
157 }
158
159 // Is there a low surrogate following?
160 inputChar = input[charOff + 1];
161 if (inputChar >= '\uDC00' && inputChar <= '\uDFFF') {
162 // We have a valid surrogate pair. Too bad we don't map
163 // surrogates. Is substitution enabled?
164 if (subMode) {
165 outputByte = subBytes;
166 outputSize = subBytes.length;
167 inputSize = 2;
168 } else {
169 badInputLength = 2;
170 throw new UnknownCharacterException();
171 }
172 } else {
173 // We have a malformed surrogate pair
174 badInputLength = 1;
175 throw new MalformedInputException();
176 }
177 }
178
179 // Is this an unaccompanied low surrogate?
180 else if (inputChar >= '\uDC00' && inputChar <= '\uDFFF') {
181 badInputLength = 1;
182 throw new MalformedInputException();
183 }
184
185 // Not part of a surrogate, so look it up
186 else {
187 // Get output using two level lookup
188 outputByte[0] = getNative(inputChar);
189
190 // Might this character be unmappable?
191 if (outputByte[0] == 0) {
192 // If outputByte is zero because the input was zero
193 // then this character is actually mappable
194 if (input[charOff] != '\u0000') {
195 // We have an unmappable character
196 // Is substitution enabled?
197 if (subMode) {
198 outputByte = subBytes;
199 outputSize = subBytes.length;
200 } else {
201 badInputLength = 1;
202 throw new UnknownCharacterException();
203 }
204 }
205 }
206 }
207
208 // If we don't have room for the output, throw an exception
209 if (byteOff + outputSize > outEnd)
210 throw new ConversionBufferFullException();
211
212 // Put the byte in the output buffer
213 for (int i = 0; i < outputSize; i++) {
214 output[byteOff++] = outputByte[i];
215 }
216 charOff += inputSize;
217
218 }
219
220 // Return the length written to the output buffer
221 return byteOff - outOff;
222 }
223
224 /**
225 * the maximum number of bytes needed to hold a converted char
226 * @returns the maximum number of bytes needed for a converted char
227 */
228 public int getMaxBytesPerChar() {
229 return 1;
230 }
231
232 public byte getNative(char inputChar) {
233 return (byte)index2.charAt(index1[(inputChar & mask1) >> shift]
234 + (inputChar & mask2));
235 }
236
237 /**
238 * Resets the converter.
239 * Call this method to reset the converter to its initial state
240 */
241 public void reset() {
242 byteOff = charOff = 0;
243 highHalfZoneCode = 0;
244 }
245
246 /**
247 * Return whether a character is mappable or not
248 * @return true if a character is mappable
249 */
250 public boolean canConvert(char ch) {
251 // Look it up in the table
252 if (index2.charAt(index1[((ch & mask1) >> shift)] + (ch & mask2)) != '\u0000')
253 return true;
254
255 // Nulls are always mappable
256 return (ch == '\u0000');
257 }
258}