blob: 064e0c3d0ab91159d508b7190861ca5780b8d873 [file] [log] [blame]
J. Duke319a3b92007-12-01 00:00:00 +00001/*
2 * Copyright 1996-2004 Sun Microsystems, Inc. All Rights Reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation. Sun designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Sun in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
22 * CA 95054 USA or visit www.sun.com if you need additional information or
23 * have any questions.
24 */
25
26package sun.io;
27
28import sun.nio.cs.ext.EUC_TW;
29
30/*
31 * @author Limin Shi
32 */
33public class ByteToCharEUC_TW extends ByteToCharConverter
34{
35 private final byte G0 = 0;
36 private final byte G1 = 1;
37 private final byte G2 = 2;
38 private final byte G3 = 3;
39 private final byte G4 = 4;
40 private final byte MSB = (byte) 0x80;
41 private final byte SS2 = (byte) 0x8E;
42 private final byte P2 = (byte) 0xA2;
43 private final byte P3 = (byte) 0xA3;
44
45 protected final char REPLACE_CHAR = '\uFFFD';
46
47 private byte firstByte = 0, state = G0;
48 public static String unicodeCNS2, unicodeCNS3;
49 private static String unicodeCNS4, unicodeCNS5, unicodeCNS6;
50 private static String unicodeCNS7, unicodeCNS15;
51
52 private int cnsPlane = 0;
53 private final static EUC_TW nioCoder = new EUC_TW();
54
55 public static String unicodeCNS1 = nioCoder.getUnicodeCNS1();
56
57 static String[] cnsChars = {
58 unicodeCNS2 = nioCoder.getUnicodeCNS2(),
59 unicodeCNS3 = nioCoder.getUnicodeCNS3(),
60 unicodeCNS4 = nioCoder.getUnicodeCNS4(),
61 unicodeCNS5 = nioCoder.getUnicodeCNS5(),
62 unicodeCNS6 = nioCoder.getUnicodeCNS6(),
63 unicodeCNS7 = nioCoder.getUnicodeCNS7(),
64 unicodeCNS15 = nioCoder.getUnicodeCNS15()
65 };
66
67 public ByteToCharEUC_TW() {
68 }
69
70 public int flush(char[] output, int outStart, int outEnd)
71 throws MalformedInputException
72 {
73 if (state != G0) {
74 state = G0;
75 firstByte = 0;
76 badInputLength = 0;
77 throw new MalformedInputException();
78 }
79 reset();
80 return 0;
81 }
82
83 public void reset() {
84 state = G0;
85 firstByte = 0;
86 byteOff = charOff = 0;
87 }
88
89 /**
90 * Character conversion
91 */
92 public int convert(byte[] input, int inOff, int inEnd,
93 char[] output, int outOff, int outEnd)
94 throws UnknownCharacterException, MalformedInputException,
95 ConversionBufferFullException
96 {
97 int inputSize = 0;
98 char outputChar = (char) 0;
99
100 byteOff = inOff;
101 charOff = outOff;
102
103 cnsPlane = 3;
104 while (byteOff < inEnd) {
105 if (charOff >= outEnd)
106 throw new ConversionBufferFullException();
107
108 switch (state) {
109 case G0:
110 if ( (input[byteOff] & MSB) == 0) { // ASCII
111 outputChar = (char) input[byteOff];
112 } else if (input[byteOff] == SS2) { // Codeset 2
113 state = G2;
114 } else { // Codeset 1
115 firstByte = input[byteOff];
116 state = G1;
117 }
118 break;
119 case G1:
120 inputSize = 2;
121 if ( (input[byteOff] & MSB) != 0) { // 2nd byte
122 cnsPlane = 1;
123 outputChar = convToUnicode(firstByte,
124 input[byteOff], unicodeCNS1);
125 } else { // Error
126 badInputLength = 1;
127 throw new MalformedInputException();
128 }
129 firstByte = 0;
130 state = G0;
131 break;
132 case G2:
133 cnsPlane = (input[byteOff] & (byte)0x0f);
134 // Adjust String array index for plan 15
135 cnsPlane = (cnsPlane == 15)? 8 : cnsPlane;
136
137 if (cnsPlane < 15) {
138 state = G3;
139 } else {
140 badInputLength = 2;
141 throw new MalformedInputException();
142 }
143
144 break;
145 case G3:
146 if ( (input[byteOff] & MSB) != 0) { // 1st byte
147 firstByte = input[byteOff];
148 state = G4;
149 } else { // Error
150 state = G0;
151 badInputLength = 2;
152 throw new MalformedInputException();
153 }
154 break;
155 case G4:
156 if ( (input[byteOff] & MSB) != 0) { // 2nd byte
157 outputChar = convToUnicode(firstByte,
158 input[byteOff],
159 cnsChars[cnsPlane - 2]);
160 } else { // Error
161 badInputLength = 3;
162 throw new MalformedInputException();
163 }
164 firstByte = 0;
165 state = G0;
166 break;
167 }
168 byteOff++;
169
170 if (outputChar != (char) 0) {
171 if (outputChar == REPLACE_CHAR) {
172 if (subMode) // substitution enabled
173 outputChar = subChars[0];
174 else {
175 badInputLength = inputSize;
176 throw new UnknownCharacterException();
177 }
178 }
179 output[charOff++] = outputChar;
180 outputChar = 0;
181 }
182 }
183
184 return charOff - outOff;
185 }
186
187
188 /**
189 * Return the character set ID
190 */
191 public String getCharacterEncoding() {
192 return "EUC_TW";
193 }
194
195 protected char convToUnicode(byte byte1, byte byte2, String table)
196 {
197 int index;
198
199 if ((byte1 & 0xff) < 0xa1 || (byte2 & 0xff) < 0xa1 ||
200 (byte1 & 0xff) > 0xfe || (byte2 & 0xff) > 0xfe)
201 return REPLACE_CHAR;
202 index = (((byte1 & 0xff) - 0xa1) * 94) + (byte2 & 0xff) - 0xa1;
203 if (index < 0 || index >= table.length())
204 return REPLACE_CHAR;
205
206 // Planes 3 and above containing zero value lead byte
207 // to accommodate surrogates for mappings which decode to a surrogate
208 // pair
209
210 if (this.cnsPlane >= 3)
211 index = (index * 2) + 1;
212
213 return table.charAt(index);
214 }
215}