blob: 69966bff6abe4c130020b37d32626b20d09af9a4 [file] [log] [blame]
J. Duke319a3b92007-12-01 00:00:00 +00001/*
2 * Copyright 2001-2003 Sun Microsystems, Inc. All Rights Reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation. Sun designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Sun in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
22 * CA 95054 USA or visit www.sun.com if you need additional information or
23 * have any questions.
24 */
25
26/*
27 */
28
29package sun.io;
30
31import sun.nio.cs.ext.GB18030;
32
33public class ByteToCharGB18030 extends ByteToCharGB18030DB {
34
35 private static final int GB18030_SINGLE_BYTE = 1;
36 private static final int GB18030_DOUBLE_BYTE = 2;
37 private static final int GB18030_FOUR_BYTE = 3;
38 private static short[] decoderIndex1;
39 private static String[] decoderIndex2;
40
41 private int currentState;
42 private int savedSize;
43 private byte[] savedBytes;
44
45 public ByteToCharGB18030() {
46 super();
47 GB18030 nioCoder = new GB18030();
48 savedBytes = new byte[3];
49 currentState = GB18030_DOUBLE_BYTE;
50 decoderIndex1 = nioCoder.getDecoderIndex1();
51 decoderIndex2 = nioCoder.getDecoderIndex2();
52 savedSize = 0;
53 }
54
55 public short[] getOuter() {
56 return(index1);
57 }
58
59 public String[] getInner() {
60 return(index2);
61 }
62
63 public short[] getDBIndex1() {
64 return(super.index1);
65 }
66
67 public String[] getDBIndex2() {
68 return(super.index2);
69 }
70
71 public int flush(char [] output, int outStart, int outEnd)
72 throws MalformedInputException
73 {
74 if (savedSize != 0) {
75 savedSize = 0;
76 currentState = GB18030_DOUBLE_BYTE;
77 badInputLength = 0;
78 throw new MalformedInputException();
79 }
80 byteOff = charOff = 0;
81 return 0;
82 }
83
84
85 /**
86 * Character conversion
87 */
88 public int convert(byte[] input, int inOff, int inEnd,
89 char[] output, int outOff, int outEnd)
90 throws UnknownCharacterException, MalformedInputException,
91 ConversionBufferFullException
92 {
93 int inputSize = 0;
94 char outputChar = '\uFFFD';
95 // readOff keeps the actual buffer's pointer.
96 // byteOff keeps original buffer's pointer.
97 int readOff = byteOff = inOff;
98
99 if (savedSize != 0) {
100 // Filter illegal bytes when they are detected in saved
101 // partial input from a previous conversion attempt.
102 if (((savedBytes[0] & 0xFF) < 0x81 || savedBytes[0] > 0xFE) ||
103 (savedSize > 1 &&
104 (savedBytes[1] & 0xFF) < 0x30 ) ||
105 (savedSize > 2 &&
106 ((savedBytes[2] & 0xFF) < 0x81 ||
107 (savedBytes[2] & 0xFF) > 0xFE ))) {
108 badInputLength = 0;
109 throw new MalformedInputException();
110 }
111
112 byte[] newBuf = new byte[inEnd - inOff + savedSize];
113 for (int i = 0; i < savedSize; i++) {
114 newBuf[i] = savedBytes[i];
115 }
116 System.arraycopy(input, inOff, newBuf, savedSize, inEnd - inOff);
117 byteOff -= savedSize;
118 input = newBuf;
119 inOff = 0;
120 inEnd = newBuf.length;
121 savedSize = 0;
122 }
123
124 charOff = outOff;
125 readOff = inOff;
126
127 while(readOff < inEnd) {
128 int byte1 = 0 , byte2 = 0, byte3 = 0, byte4 = 0;
129
130 // Is there room in the output buffer for the result?
131 if (charOff >= outEnd) {
132 throw new ConversionBufferFullException();
133 }
134
135 // Get the input byte
136 byte1 = input[readOff++] & 0xFF;
137 inputSize = 1;
138
139 if ((byte1 & (byte)0x80) == 0){ // US-ASCII range
140 outputChar = (char)byte1;
141 currentState = GB18030_SINGLE_BYTE;
142 }
143
144 else if (byte1 < 0x81 || byte1 > 0xfe) {
145 if (subMode)
146 outputChar = subChars[0];
147 else {
148 badInputLength = 1;
149 throw new UnknownCharacterException();
150 }
151 }
152 else {
153 // Either 2 or 4 byte sequence follows
154 // If an underrun is detected save for later
155 // replay.
156
157 if (readOff + inputSize > inEnd) {
158 savedBytes[0]=(byte)byte1;
159 savedSize = 1;
160 break;
161 }
162
163 byte2 = input[readOff++] & 0xFF;
164 inputSize = 2;
165
166 if (byte2 < 0x30) {
167 badInputLength = 1;
168 throw new MalformedInputException();
169 }
170 else if (byte2 >= 0x30 && byte2 <= 0x39) {
171 currentState = GB18030_FOUR_BYTE;
172 inputSize = 4;
173
174 if (readOff + 2 > inEnd) {
175 if (readOff + 1 > inEnd) {
176 savedBytes[0] = (byte)byte1;
177 savedBytes[1] = (byte)byte2;
178 savedSize = 2;
179 }
180 else {
181 savedBytes[0] = (byte)byte1;
182 savedBytes[1] = (byte)byte2;
183 savedBytes[2] = input[readOff++];
184 savedSize = 3;
185 }
186 break;
187 }
188 byte3 = input[readOff++] & 0xFF;
189 if (byte3 < 0x81 || byte3 > 0xfe) {
190 badInputLength = 3;
191 throw new MalformedInputException();
192 }
193
194 byte4 = input[readOff++] & 0xFF;
195
196 if (byte4 < 0x30 || byte4 > 0x39) {
197 badInputLength = 4;
198 throw new MalformedInputException();
199 }
200 }
201 else if (byte2 == 0x7f || byte2 == 0xff ||
202 (byte2 < 0x40 )) {
203 badInputLength = 2;
204 throw new MalformedInputException();
205 }
206 else
207 currentState = GB18030_DOUBLE_BYTE;
208 }
209
210 switch (currentState){
211 case GB18030_SINGLE_BYTE:
212 output[charOff++] = (char)(byte1);
213 break;
214 case GB18030_DOUBLE_BYTE:
215 output[charOff++] = super.getUnicode(byte1, byte2);
216 break;
217 case GB18030_FOUR_BYTE:
218 int offset = (((byte1 - 0x81) * 10 +
219 (byte2 - 0x30)) * 126 +
220 byte3 - 0x81) * 10 + byte4 - 0x30;
221 int hiByte = (offset >>8) & 0xFF;
222 int lowByte = (offset & 0xFF);
223
224 // Mixture of table lookups and algorithmic calculation
225 // of character values.
226
227 // BMP Ranges
228
229 if (offset <= 0x4A62)
230 output[charOff++] = getChar(offset);
231 else if (offset > 0x4A62 && offset <= 0x82BC)
232 output[charOff++] = (char) (offset + 0x5543);
233 else if (offset >= 0x82BD && offset <= 0x830D)
234 output[charOff++] = getChar(offset);
235 else if (offset >= 0x830D && offset <= 0x93A8)
236 output[charOff++] = (char) (offset + 0x6557);
237 else if (offset >= 0x93A9 && offset <= 0x99FB)
238 output[charOff++] = getChar(offset);
239 // Supplemental UCS planes handled via surrogates
240 else if (offset >= 0x2E248 && offset < 0x12E248) {
241 if (offset >= 0x12E248) {
242 if (subMode)
243 return subChars[0];
244 else {
245 badInputLength = 4;
246 throw new UnknownCharacterException();
247 }
248 }
249
250 if (charOff +2 > outEnd) {
251 throw new ConversionBufferFullException();
252 }
253 offset -= 0x1e248;
254 char highSurr = (char) ((offset - 0x10000) / 0x400 + 0xD800);
255 char lowSurr = (char) ((offset - 0x10000) % 0x400 + 0xDC00);
256 output[charOff++] = highSurr;
257 output[charOff++] = lowSurr;
258 }
259 else {
260 badInputLength = 4;
261 throw new MalformedInputException();
262 }
263 break;
264 }
265 byteOff += inputSize;
266 }
267
268 byteOff += savedSize;
269 return charOff - outOff;
270 }
271
272 public void reset() {
273 byteOff = charOff = 0;
274 currentState = GB18030_DOUBLE_BYTE;
275 savedSize = 0;
276 }
277
278 public String getCharacterEncoding() {
279 return "GB18030";
280 }
281
282 private char getChar(int offset) throws UnknownCharacterException {
283 int byte1 = (offset >>8) & 0xFF;
284 int byte2 = (offset & 0xFF);
285 int start = 0, end = 0xFF;
286
287 if (((byte1 < 0) || (byte1 > getOuter().length))
288 || ((byte2 < start) || (byte2 > end))) {
289 if (subMode)
290 return subChars[0];
291 else {
292 badInputLength = 1;
293 throw new UnknownCharacterException();
294 }
295 }
296
297 int n = (decoderIndex1[byte1] & 0xf) * (end - start + 1) + (byte2 - start);
298 return decoderIndex2[decoderIndex1[byte1] >> 4].charAt(n);
299 }
300}