blob: 2def9b0cdaf0f8a59833e798b6b9a898ef238f81 [file] [log] [blame]
J. Duke319a3b92007-12-01 00:00:00 +00001/*
2 * Copyright 1996-1999 Sun Microsystems, Inc. All Rights Reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation. Sun designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Sun in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
22 * CA 95054 USA or visit www.sun.com if you need additional information or
23 * have any questions.
24 */
25
26package sun.io;
27import java.io.*;
28
29public class CharToByteISO2022JP extends CharToByteJIS0208 {
30
31 private static final int ASCII = 0; // ESC ( B
32 private static final int JISX0201_1976 = 1; // ESC ( J
33 private static final int JISX0208_1978 = 2; // ESC $ @
34 private static final int JISX0208_1983 = 3; // ESC $ B
35 private static final int JISX0201_1976_KANA = 4; // ESC ( I
36
37 private char highHalfZoneCode;
38 private boolean flushed = true;
39
40 // JIS is state full encoding, so currentMode keep the
41 // current codeset
42 private int currentMode = ASCII;
43
44 /**
45 * Bytes for substitute for unmappable input.
46 */
47 // XXX: Assumes subBytes are ASCII string. Need to change Escape sequence
48 // for other character sets.
49 protected byte[] subBytesEscape = { (byte)0x1b, (byte)0x28, (byte)0x42 }; // ESC ( B
50 protected int subBytesMode = ASCII;
51
52 public int flush(byte[] output, int outStart, int outEnd)
53 throws MalformedInputException, ConversionBufferFullException
54 {
55 if (highHalfZoneCode != 0) {
56 highHalfZoneCode = 0;
57 badInputLength = 0;
58 throw new MalformedInputException();
59 }
60
61 if (!flushed && (currentMode != ASCII)) {
62 if (outEnd - outStart < 3) {
63 throw new ConversionBufferFullException();
64 }
65 output[outStart] = (byte)0x1b;
66 output[outStart + 1] = (byte)0x28;
67 output[outStart + 2] = (byte)0x42;
68 byteOff += 3;
69 byteOff = charOff = 0;
70 flushed = true;
71 currentMode = ASCII;
72 return 3;
73 }
74 return 0;
75 }
76
77 public int convert(char[] input, int inOff, int inEnd,
78 byte[] output, int outOff, int outEnd)
79 throws MalformedInputException, UnknownCharacterException,
80 ConversionBufferFullException
81
82 {
83 char inputChar; // Input character to be converted
84 int inputSize; // Size of the input
85 int outputSize; // Size of the output
86
87 // Buffer for output bytes
88 byte[] tmpArray = new byte[6];
89 byte[] outputByte;
90
91 flushed = false;
92
93 // Make copies of input and output indexes
94 charOff = inOff;
95 byteOff = outOff;
96
97 if (highHalfZoneCode != 0) {
98 inputChar = highHalfZoneCode;
99 highHalfZoneCode = 0;
100 if (input[inOff] >= 0xdc00 && input[inOff] <= 0xdfff) {
101 // This is legal UTF16 sequence.
102 badInputLength = 1;
103 throw new UnknownCharacterException();
104 } else {
105 // This is illegal UTF16 sequence.
106 badInputLength = 0;
107 throw new MalformedInputException();
108 }
109 }
110
111 // Loop until we run out of input
112 while(charOff < inEnd) {
113 outputByte = tmpArray;
114 int newMode = currentMode; // Trace character mode changing
115
116 // Get the input character
117 inputChar = input[charOff];
118 inputSize = 1;
119 outputSize = 1;
120
121 // Is this a high surrogate?
122 if(inputChar >= '\uD800' && inputChar <= '\uDBFF') {
123 // Is this the last character of the input?
124 if (charOff + 1 >= inEnd) {
125 highHalfZoneCode = inputChar;
126 break;
127 }
128
129 // Is there a low surrogate following?
130 inputChar = input[charOff + 1];
131 if (inputChar >= '\uDC00' && inputChar <= '\uDFFF') {
132 // We have a valid surrogate pair. Too bad we don't do
133 // surrogates. Is substitution enabled?
134 if (subMode) {
135 if (currentMode != subBytesMode) {
136 System.arraycopy(subBytesEscape, 0, outputByte, 0,
137 subBytesEscape.length);
138 outputSize = subBytesEscape.length;
139 System.arraycopy(subBytes, 0, outputByte,
140 outputSize, subBytes.length);
141 outputSize += subBytes.length;
142 newMode = subBytesMode;
143 } else {
144 outputByte = subBytes;
145 outputSize = subBytes.length;
146 }
147 inputSize = 2;
148 } else {
149 badInputLength = 2;
150 throw new UnknownCharacterException();
151 }
152 } else {
153 // We have a malformed surrogate pair
154 badInputLength = 1;
155 throw new MalformedInputException();
156 }
157 }
158
159 // Is this an unaccompanied low surrogate?
160 else if (inputChar >= '\uDC00' && inputChar <= '\uDFFF') {
161 badInputLength = 1;
162 throw new MalformedInputException();
163 } else {
164 // Not part of a surrogate
165
166 // Does this map to the Roman range?
167 if (inputChar <= '\u007F') {
168 if (currentMode != ASCII) {
169 outputByte[0] = (byte)0x1b;
170 outputByte[1] = (byte)0x28;
171 outputByte[2] = (byte)0x42;
172 outputByte[3] = (byte)inputChar;
173 outputSize = 4;
174 newMode = ASCII;
175 } else {
176 outputByte[0] = (byte)inputChar;
177 outputSize = 1;
178 }
179 }
180 // Is it a single byte kana?
181 else if (inputChar >= 0xFF61 && inputChar <= 0xFF9F) {
182 if (currentMode != JISX0201_1976_KANA) {
183 outputByte[0] = (byte)0x1b;
184 outputByte[1] = (byte)0x28;
185 outputByte[2] = (byte)0x49;
186 outputByte[3] = (byte)(inputChar - 0xff40);
187 outputSize = 4;
188 newMode = JISX0201_1976_KANA;
189 } else {
190 outputByte[0] = (byte)(inputChar - 0xff40);
191 outputSize = 1;
192 }
193 }
194 // Is it a yen sign?
195 else if (inputChar == '\u00A5') {
196 if (currentMode != JISX0201_1976) {
197 outputByte[0] = (byte)0x1b;
198 outputByte[1] = (byte)0x28;
199 outputByte[2] = (byte)0x4a;
200 outputByte[3] = (byte)0x5c;
201 outputSize = 4;
202 newMode = JISX0201_1976;
203 } else {
204 outputByte[0] = (byte)0x5C;
205 outputSize = 1;
206 }
207 }
208 // Is it a tilde?
209 else if (inputChar == '\u203E')
210 {
211 if (currentMode != JISX0201_1976) {
212 outputByte[0] = (byte)0x1b;
213 outputByte[1] = (byte)0x28;
214 outputByte[2] = (byte)0x4a;
215 outputByte[3] = (byte)0x7e;
216 outputSize = 4;
217 newMode = JISX0201_1976;
218 } else {
219 outputByte[0] = (byte)0x7e;
220 outputSize = 1;
221 }
222 }
223 // Is it a JIS-X-0208 character?
224 else {
225 int index = getNative(inputChar);
226 if (index != 0) {
227 if (currentMode != JISX0208_1983) {
228 outputByte[0] = (byte)0x1b;
229 outputByte[1] = (byte)0x24;
230 outputByte[2] = (byte)0x42;
231 outputByte[3] = (byte)(index >> 8);
232 outputByte[4] = (byte)(index & 0xff);
233 outputSize = 5;
234 newMode = JISX0208_1983;
235 } else {
236 outputByte[0] = (byte)(index >> 8);
237 outputByte[1] = (byte)(index & 0xff);
238 outputSize = 2;
239 }
240 }
241 // It doesn't map to JIS-0208!
242 else {
243 if (subMode) {
244 if (currentMode != subBytesMode) {
245 System.arraycopy(subBytesEscape, 0, outputByte, 0,
246 subBytesEscape.length);
247 outputSize = subBytesEscape.length;
248 System.arraycopy(subBytes, 0, outputByte,
249 outputSize, subBytes.length);
250 outputSize += subBytes.length;
251 newMode = subBytesMode;
252 } else {
253 outputByte = subBytes;
254 outputSize = subBytes.length;
255 }
256 } else {
257 badInputLength = 1;
258 throw new UnknownCharacterException();
259 }
260 }
261 }
262 }
263
264 // Is there room in the output buffer?
265 // XXX: The code assumes output buffer can hold at least 5 bytes,
266 // in this coverter case. However, there is no way for apps to
267 // see how many bytes will be necessary for next call.
268 // getMaxBytesPerChar() should be overriden in every subclass of
269 // CharToByteConverter and reflect real value (5 for this).
270 if (byteOff + outputSize > outEnd)
271 throw new ConversionBufferFullException();
272
273 // Put the output into the buffer
274 for ( int i = 0 ; i < outputSize ; i++ )
275 output[byteOff++] = outputByte[i];
276
277 // Advance the input pointer
278 charOff += inputSize;
279
280 // We can successfuly output the characters, changes
281 // current mode. Fix for 4251646.
282 currentMode = newMode;
283 }
284
285 // return mode ASCII at the end
286 if (currentMode != ASCII){
287 if (byteOff + 3 > outEnd)
288 throw new ConversionBufferFullException();
289
290 output[byteOff++] = 0x1b;
291 output[byteOff++] = 0x28;
292 output[byteOff++] = 0x42;
293 currentMode = ASCII;
294 }
295
296 // Return the length written to the output buffer
297 return byteOff-outOff;
298 }
299
300 // Reset
301 public void reset() {
302 highHalfZoneCode = 0;
303 byteOff = charOff = 0;
304 currentMode = ASCII;
305 }
306
307 /**
308 * returns the maximum number of bytes needed to convert a char
309 */
310 public int getMaxBytesPerChar() {
311 return 8;
312 }
313
314 // Return the character set ID
315 public String getCharacterEncoding() {
316 return "ISO2022JP";
317 }
318
319}