blob: 208ff2a99864f02fc7517aa34752df1fd9fd7e88 [file] [log] [blame]
J. Duke319a3b92007-12-01 00:00:00 +00001/*
2 * Copyright 1997-2003 Sun Microsystems, Inc. All Rights Reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation. Sun designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Sun in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
22 * CA 95054 USA or visit www.sun.com if you need additional information or
23 * have any questions.
24 */
25package sun.io;
26
27import java.io.UnsupportedEncodingException;
28import sun.nio.cs.ext.JISAutoDetect;
29
30public class ByteToCharJISAutoDetect extends ByteToCharConverter {
31
32 private final static int EUCJP_MASK = 0x01;
33 private final static int SJIS2B_MASK = 0x02;
34 private final static int SJIS1B_MASK = 0x04;
35 private final static int EUCJP_KANA1_MASK = 0x08;
36 private final static int EUCJP_KANA2_MASK = 0x10;
37 private static byte[] maskTable1;
38 private static byte[] maskTable2;
39
40 private final static int SS2 = 0x8e;
41 private final static int SS3 = 0x8f;
42
43 private final static JISAutoDetect nioCoder = new JISAutoDetect();
44
45 // SJISName is set to either "SJIS" or "MS932"
46 private String SJISName;
47 private String EUCJPName;
48
49 private String convName = null;
50 private ByteToCharConverter detectedConv = null;
51 private ByteToCharConverter defaultConv = null;
52
53 public ByteToCharJISAutoDetect() {
54 super();
55 SJISName = CharacterEncoding.getSJISName();
56 EUCJPName = CharacterEncoding.getEUCJPName();
57 defaultConv = new ByteToCharISO8859_1();
58 defaultConv.subChars = subChars;
59 defaultConv.subMode = subMode;
60 maskTable1 = nioCoder.getByteMask1();
61 maskTable2 = nioCoder.getByteMask2();
62 }
63
64 public int flush(char [] output, int outStart, int outEnd)
65 throws MalformedInputException, ConversionBufferFullException
66 {
67 badInputLength = 0;
68 if(detectedConv != null)
69 return detectedConv.flush(output, outStart, outEnd);
70 else
71 return defaultConv.flush(output, outStart, outEnd);
72 }
73
74
75 /**
76 * Character conversion
77 */
78 public int convert(byte[] input, int inOff, int inEnd,
79 char[] output, int outOff, int outEnd)
80 throws UnknownCharacterException, MalformedInputException,
81 ConversionBufferFullException
82 {
83 int num = 0;
84
85 charOff = outOff;
86 byteOff = inOff;
87
88 try {
89 if (detectedConv == null) {
90 int euckana = 0;
91 int ss2count = 0;
92 int firstmask = 0;
93 int secondmask = 0;
94 int cnt;
95 boolean nonAsciiFound = false;
96
97 for (cnt = inOff; cnt < inEnd; cnt++) {
98 firstmask = 0;
99 secondmask = 0;
100 int byte1 = input[cnt]&0xff;
101 int byte2;
102
103 // TODO: should check valid escape sequences!
104 if (byte1 == 0x1b) {
105 convName = "ISO2022JP";
106 break;
107 }
108
109 // Try to convert all leading ASCII characters.
110 if ((nonAsciiFound == false) && (byte1 < 0x80)) {
111 if (charOff >= outEnd)
112 throw new ConversionBufferFullException();
113 output[charOff++] = (char) byte1;
114 byteOff++;
115 num++;
116 continue;
117 }
118
119 // We can no longer convert ASCII.
120 nonAsciiFound = true;
121
122 firstmask = maskTable1[byte1];
123 if (byte1 == SS2)
124 ss2count++;
125
126 if (firstmask != 0) {
127 if (cnt+1 < inEnd) {
128 byte2 = input[++cnt] & 0xff;
129 secondmask = maskTable2[byte2];
130 int mask = firstmask & secondmask;
131 if (mask == EUCJP_MASK) {
132 convName = EUCJPName;
133 break;
134 }
135 if ((mask == SJIS2B_MASK) || (mask == SJIS1B_MASK)
136 || (nioCoder.canBeSJIS1B(firstmask) && secondmask == 0)) {
137 convName = SJISName;
138 break;
139 }
140
141 // If the first byte is a SS3 and the third byte
142 // is not an EUC byte, it should be SJIS.
143 // Otherwise, we can't determine it yet, but it's
144 // very likely SJIS. So we don't take the EUCJP CS3
145 // character boundary. If we tried both
146 // possibilities here, it might be able to be
147 // determined correctly.
148 if ((byte1 == SS3) && nioCoder.canBeEUCJP(secondmask)) {
149 if (cnt+1 < inEnd) {
150 int nextbyte = input[cnt+1] & 0xff;
151 if (! nioCoder.canBeEUCJP(maskTable2[nextbyte]))
152 convName = SJISName;
153 } else
154 convName = SJISName;
155 }
156 if (nioCoder.canBeEUCKana(firstmask, secondmask))
157 euckana++;
158 } else {
159 if ((firstmask & SJIS1B_MASK) != 0) {
160 convName = SJISName;
161 break;
162 }
163 }
164 }
165 }
166
167 if (nonAsciiFound && (convName == null)) {
168 if ((euckana > 1) || (ss2count > 1))
169 convName = EUCJPName;
170 else
171 convName = SJISName;
172 }
173
174 if (convName != null) {
175 try {
176 detectedConv = ByteToCharConverter.getConverter(convName);
177 detectedConv.subChars = subChars;
178 detectedConv.subMode = subMode;
179 } catch (UnsupportedEncodingException e){
180 detectedConv = null;
181 convName = null;
182 }
183 }
184 }
185 } catch (ConversionBufferFullException bufferFullException) {
186 throw bufferFullException;
187 } catch (Exception e) {
188 // If we fail to detect the converter needed for any reason,
189 // use the default converter.
190 detectedConv = defaultConv;
191 }
192
193 // If we've converted all ASCII characters, then return.
194 if (byteOff == inEnd) {
195 return num;
196 }
197
198 if(detectedConv != null) {
199 try {
200 num += detectedConv.convert(input, inOff + num, inEnd,
201 output, outOff + num, outEnd);
202 } finally {
203 charOff = detectedConv.nextCharIndex();
204 byteOff = detectedConv.nextByteIndex();
205 badInputLength = detectedConv.badInputLength;
206 }
207 } else {
208 try {
209 num += defaultConv.convert(input, inOff + num, inEnd,
210 output, outOff + num, outEnd);
211 } finally {
212 charOff = defaultConv.nextCharIndex();
213 byteOff = defaultConv.nextByteIndex();
214 badInputLength = defaultConv.badInputLength;
215 }
216 }
217 return num;
218 }
219
220 public void reset() {
221 if(detectedConv != null) {
222 detectedConv.reset();
223 detectedConv = null;
224 convName = null;
225 } else
226 defaultConv.reset();
227 charOff = byteOff = 0;
228 }
229
230 public String getCharacterEncoding() {
231 return "JISAutoDetect";
232 }
233
234 public String toString() {
235 String s = getCharacterEncoding();
236 if (detectedConv != null) {
237 s += "[" + detectedConv.getCharacterEncoding() + "]";
238 } else {
239 s += "[unknown]";
240 }
241 return s;
242 }
243}