blob: 337d12e0cd58ba229a5809488e1754b3451d40c2 [file] [log] [blame]
J. Duke319a3b92007-12-01 00:00:00 +00001/*
2 * Copyright 2003-2006 Sun Microsystems, Inc. All Rights Reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation. Sun designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Sun in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
22 * CA 95054 USA or visit www.sun.com if you need additional information or
23 * have any questions.
24 */
25
26/*
27 */
28
29package sun.nio.cs.ext;
30
31import java.nio.ByteBuffer;
32import java.nio.CharBuffer;
33import java.nio.charset.Charset;
34import java.nio.charset.CharsetDecoder;
35import java.nio.charset.CharsetEncoder;
36import java.nio.charset.CoderResult;
37import java.nio.charset.CharacterCodingException;
38import sun.nio.cs.HistoricallyNamedCharset;
39import sun.nio.cs.US_ASCII;
40
41public class ISO2022_CN
42 extends Charset
43 implements HistoricallyNamedCharset
44{
45 private static final byte ISO_ESC = 0x1b;
46 private static final byte ISO_SI = 0x0f;
47 private static final byte ISO_SO = 0x0e;
48 private static final byte ISO_SS2_7 = 0x4e;
49 private static final byte ISO_SS3_7 = 0x4f;
50 private static final byte MSB = (byte)0x80;
51 private static final char REPLACE_CHAR = '\uFFFD';
52
53 private static final byte SODesigGB = 0;
54 private static final byte SODesigCNS = 1;
55
56 public ISO2022_CN() {
57 super("ISO-2022-CN", ExtendedCharsets.aliasesFor("ISO-2022-CN"));
58 }
59
60 public String historicalName() {
61 return "ISO2022CN";
62 }
63
64 public boolean contains(Charset cs) {
65 return ((cs instanceof EUC_CN) // GB2312-80 repertoire
66 || (cs instanceof US_ASCII)
67 || (cs instanceof EUC_TW) // CNS11643 repertoire
68 || (cs instanceof ISO2022_CN));
69 }
70
71 public CharsetDecoder newDecoder() {
72 return new Decoder(this);
73 }
74
75 public CharsetEncoder newEncoder() {
76 throw new UnsupportedOperationException();
77 }
78
79 public boolean canEncode() {
80 return false;
81 }
82
83 static class Decoder extends CharsetDecoder {
84 private boolean shiftOut;
85 private byte currentSODesig;
86
87 private static final Charset gb2312 = new EUC_CN();
88 private static final Charset cns = new EUC_TW();
89 private final EUC_CN.Decoder gb2312Decoder;
90 private final EUC_TW.Decoder cnsDecoder;
91
92 Decoder(Charset cs) {
93 super(cs, 1.0f, 1.0f);
94 shiftOut = false;
95 currentSODesig = SODesigGB;
96 gb2312Decoder = (EUC_CN.Decoder)gb2312.newDecoder();
97 cnsDecoder = (EUC_TW.Decoder)cns.newDecoder();
98 }
99
100 protected void implReset() {
101 shiftOut= false;
102 currentSODesig = SODesigGB;
103 }
104
105 private char cnsDecode(byte byte1, byte byte2, byte SS) {
106 byte1 |= MSB;
107 byte2 |= MSB;
108 if (SS == ISO_SS2_7) {
109 return cnsDecoder.convToUnicode(byte1, byte2,
110 cnsDecoder.unicodeCNS2);
111
112 } else { //SS == ISO_SS3_7
113 char[] outSurr = cnsDecoder.convToSurrogate(byte1, byte2,
114 cnsDecoder.unicodeCNS3);
115 if (outSurr == null || outSurr[0] != '\u0000')
116 return REPLACE_CHAR;
117 return outSurr[1];
118 }
119 }
120
121 private char SODecode(byte byte1, byte byte2, byte SOD) {
122 byte1 |= MSB;
123 byte2 |= MSB;
124 if (SOD == SODesigGB) {
125 return gb2312Decoder.decodeDouble(byte1 & 0xff,
126 byte2 & 0xff);
127 } else { // SOD == SODesigCNS
128 return cnsDecoder.convToUnicode(byte1,
129 byte2,
130 cnsDecoder.unicodeCNS1);
131 }
132 }
133
134 private CoderResult decodeBufferLoop(ByteBuffer src,
135 CharBuffer dst)
136 {
137 int mark = src.position();
138 byte b1 = 0, b2 = 0, b3 = 0, b4 = 0;
139 int inputSize = 0;
140 char c = REPLACE_CHAR;
141 try {
142 while (src.hasRemaining()) {
143 b1 = src.get();
144 inputSize = 1;
145
146 while (b1 == ISO_ESC ||
147 b1 == ISO_SO ||
148 b1 == ISO_SI) {
149 if (b1 == ISO_ESC) { // ESC
150 currentSODesig = SODesigGB;
151
152 if (src.remaining() < 1)
153 return CoderResult.UNDERFLOW;
154
155 b2 = src.get();
156 inputSize++;
157
158 if ((b2 & (byte)0x80) != 0)
159 return CoderResult.malformedForLength(inputSize);
160
161 if (b2 == (byte)0x24) {
162 if (src.remaining() < 1)
163 return CoderResult.UNDERFLOW;
164
165 b3 = src.get();
166 inputSize++;
167
168 if ((b3 & (byte)0x80) != 0)
169 return CoderResult.malformedForLength(inputSize);
170 if (b3 == 'A'){ // "$A"
171 currentSODesig = SODesigGB;
172 } else if (b3 == ')') {
173 if (src.remaining() < 1)
174 return CoderResult.UNDERFLOW;
175 b4 = src.get();
176 inputSize++;
177 if (b4 == 'A'){ // "$)A"
178 currentSODesig = SODesigGB;
179 } else if (b4 == 'G'){ // "$)G"
180 currentSODesig = SODesigCNS;
181 } else {
182 return CoderResult.malformedForLength(inputSize);
183 }
184 } else if (b3 == '*') {
185 if (src.remaining() < 1)
186 return CoderResult.UNDERFLOW;
187 b4 = src.get();
188 inputSize++;
189 if (b4 != 'H') { // "$*H"
190 //SS2Desig -> CNS-P1
191 return CoderResult.malformedForLength(inputSize);
192 }
193 } else if (b3 == '+') {
194 if (src.remaining() < 1)
195 return CoderResult.UNDERFLOW;
196 b4 = src.get();
197 inputSize++;
198 if (b4 != 'I'){ // "$+I"
199 //SS3Desig -> CNS-P2.
200 return CoderResult.malformedForLength(inputSize);
201 }
202 } else {
203 return CoderResult.malformedForLength(inputSize);
204 }
205 } else if (b2 == ISO_SS2_7 || b2 == ISO_SS3_7) {
206 if (src.remaining() < 2)
207 return CoderResult.UNDERFLOW;
208 b3 = src.get();
209 b4 = src.get();
210 inputSize += 2;
211 if (dst.remaining() < 1)
212 return CoderResult.OVERFLOW;
213 //SS2->CNS-P2, SS3->CNS-P3
214 c = cnsDecode(b3, b4, b2);
215 if (c == REPLACE_CHAR)
216 return CoderResult.unmappableForLength(inputSize);
217 dst.put(c);
218 } else {
219 return CoderResult.malformedForLength(inputSize);
220 }
221 } else if (b1 == ISO_SO) {
222 shiftOut = true;
223 } else if (b1 == ISO_SI) { // shift back in
224 shiftOut = false;
225 }
226 mark += inputSize;
227 if (src.remaining() < 1)
228 return CoderResult.UNDERFLOW;
229 b1 = src.get();
230 inputSize = 1;
231 }
232
233 if (dst.remaining() < 1)
234 return CoderResult.OVERFLOW;
235
236 if (!shiftOut) {
237 dst.put((char)(b1 & 0xff)); //clear the upper byte
238 mark += inputSize;
239 } else {
240 if (src.remaining() < 1)
241 return CoderResult.UNDERFLOW;
242 b2 = src.get();
243 inputSize++;
244 c = SODecode(b1, b2, currentSODesig);
245 if (c == REPLACE_CHAR)
246 return CoderResult.unmappableForLength(inputSize);
247 dst.put(c);
248 mark += inputSize;
249 }
250 }
251 return CoderResult.UNDERFLOW;
252 } finally {
253 src.position(mark);
254 }
255 }
256
257 private CoderResult decodeArrayLoop(ByteBuffer src,
258 CharBuffer dst)
259 {
260 int inputSize = 0;
261 byte b1 = 0, b2 = 0, b3 = 0, b4 = 0;
262 char c = REPLACE_CHAR;
263
264 byte[] sa = src.array();
265 int sp = src.arrayOffset() + src.position();
266 int sl = src.arrayOffset() + src.limit();
267 assert (sp <= sl);
268 sp = (sp <= sl ? sp : sl);
269
270 char[] da = dst.array();
271 int dp = dst.arrayOffset() + dst.position();
272 int dl = dst.arrayOffset() + dst.limit();
273 assert (dp <= dl);
274 dp = (dp <= dl ? dp : dl);
275
276 try {
277 while (sp < sl) {
278 b1 = sa[sp];
279 inputSize = 1;
280
281 while (b1 == ISO_ESC || b1 == ISO_SO || b1 == ISO_SI) {
282 if (b1 == ISO_ESC) { // ESC
283 currentSODesig = SODesigGB;
284
285 if (sp + 2 > sl)
286 return CoderResult.UNDERFLOW;
287
288 b2 = sa[sp + 1];
289 inputSize++;
290
291 if ((b2 & (byte)0x80) != 0)
292 return CoderResult.malformedForLength(inputSize);
293 if (b2 == (byte)0x24) {
294 if (sp + 3 > sl)
295 return CoderResult.UNDERFLOW;
296
297 b3 = sa[sp + 2];
298 inputSize++;
299
300 if ((b3 & (byte)0x80) != 0)
301 return CoderResult.malformedForLength(inputSize);
302 if (b3 == 'A'){ // "$A"
303 /* <ESC>$A is not a legal designator sequence for
304 ISO2022_CN, it is listed as an escape sequence
305 for GB2312 in ISO2022-JP-2. Keep it here just for
306 the sake of "compatibility".
307 */
308 currentSODesig = SODesigGB;
309 } else if (b3 == ')') {
310 if (sp + 4 > sl)
311 return CoderResult.UNDERFLOW;
312 b4 = sa[sp + 3];
313 inputSize++;
314
315 if (b4 == 'A'){ // "$)A"
316 currentSODesig = SODesigGB;
317 } else if (b4 == 'G'){ // "$)G"
318 currentSODesig = SODesigCNS;
319 } else {
320 return CoderResult.malformedForLength(inputSize);
321 }
322 } else if (b3 == '*') {
323 if (sp + 4 > sl)
324 return CoderResult.UNDERFLOW;
325 b4 = sa[sp + 3];
326 inputSize++;
327 if (b4 != 'H'){ // "$*H"
328 return CoderResult.malformedForLength(inputSize);
329 }
330 } else if (b3 == '+') {
331 if (sp + 4 > sl)
332 return CoderResult.UNDERFLOW;
333 b4 = sa[sp + 3];
334 inputSize++;
335 if (b4 != 'I'){ // "$+I"
336 return CoderResult.malformedForLength(inputSize);
337 }
338 } else {
339 return CoderResult.malformedForLength(inputSize);
340 }
341 } else if (b2 == ISO_SS2_7 || b2 == ISO_SS3_7) {
342 if (sp + 4 > sl) {
343 return CoderResult.UNDERFLOW;
344 }
345 b3 = sa[sp + 2];
346 b4 = sa[sp + 3];
347 if (dl - dp < 1) {
348 return CoderResult.OVERFLOW;
349 }
350 inputSize += 2;
351 c = cnsDecode(b3, b4, b2);
352 if (c == REPLACE_CHAR)
353 return CoderResult.unmappableForLength(inputSize);
354 da[dp++] = c;
355 } else {
356 return CoderResult.malformedForLength(inputSize);
357 }
358 } else if (b1 == ISO_SO) {
359 shiftOut = true;
360 } else if (b1 == ISO_SI) { // shift back in
361 shiftOut = false;
362 }
363 sp += inputSize;
364 if (sp + 1 > sl)
365 return CoderResult.UNDERFLOW;
366 b1 = sa[sp];
367 inputSize = 1;
368 }
369
370 if (dl - dp < 1) {
371 return CoderResult.OVERFLOW;
372 }
373
374 if (!shiftOut) {
375 da[dp++] = (char)(b1 & 0xff); //clear the upper byte
376 } else {
377 if (sp + 2 > sl)
378 return CoderResult.UNDERFLOW;
379 b2 = sa[sp + 1];
380 inputSize++;
381 c = SODecode(b1, b2, currentSODesig);
382 if (c == REPLACE_CHAR)
383 return CoderResult.unmappableForLength(inputSize);
384 da[dp++] = c;
385 }
386 sp += inputSize;
387 }
388 return CoderResult.UNDERFLOW;
389 } finally {
390 src.position(sp - src.arrayOffset());
391 dst.position(dp - dst.arrayOffset());
392 }
393 }
394
395 protected CoderResult decodeLoop(ByteBuffer src,
396 CharBuffer dst)
397 {
398 if (src.hasArray() && dst.hasArray())
399 return decodeArrayLoop(src, dst);
400 else
401 return decodeBufferLoop(src, dst);
402 }
403 }
404}