J. Duke | 319a3b9 | 2007-12-01 00:00:00 +0000 | [diff] [blame^] | 1 | /* |
| 2 | * Copyright 2003-2006 Sun Microsystems, Inc. All Rights Reserved. |
| 3 | * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
| 4 | * |
| 5 | * This code is free software; you can redistribute it and/or modify it |
| 6 | * under the terms of the GNU General Public License version 2 only, as |
| 7 | * published by the Free Software Foundation. Sun designates this |
| 8 | * particular file as subject to the "Classpath" exception as provided |
| 9 | * by Sun in the LICENSE file that accompanied this code. |
| 10 | * |
| 11 | * This code is distributed in the hope that it will be useful, but WITHOUT |
| 12 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
| 13 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
| 14 | * version 2 for more details (a copy is included in the LICENSE file that |
| 15 | * accompanied this code). |
| 16 | * |
| 17 | * You should have received a copy of the GNU General Public License version |
| 18 | * 2 along with this work; if not, write to the Free Software Foundation, |
| 19 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
| 20 | * |
| 21 | * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, |
| 22 | * CA 95054 USA or visit www.sun.com if you need additional information or |
| 23 | * have any questions. |
| 24 | */ |
| 25 | |
| 26 | /* |
| 27 | */ |
| 28 | |
| 29 | package sun.nio.cs.ext; |
| 30 | |
| 31 | import java.nio.ByteBuffer; |
| 32 | import java.nio.CharBuffer; |
| 33 | import java.nio.charset.Charset; |
| 34 | import java.nio.charset.CharsetDecoder; |
| 35 | import java.nio.charset.CharsetEncoder; |
| 36 | import java.nio.charset.CoderResult; |
| 37 | import java.nio.charset.CharacterCodingException; |
| 38 | import sun.nio.cs.HistoricallyNamedCharset; |
| 39 | import sun.nio.cs.US_ASCII; |
| 40 | |
| 41 | public class ISO2022_CN |
| 42 | extends Charset |
| 43 | implements HistoricallyNamedCharset |
| 44 | { |
| 45 | private static final byte ISO_ESC = 0x1b; |
| 46 | private static final byte ISO_SI = 0x0f; |
| 47 | private static final byte ISO_SO = 0x0e; |
| 48 | private static final byte ISO_SS2_7 = 0x4e; |
| 49 | private static final byte ISO_SS3_7 = 0x4f; |
| 50 | private static final byte MSB = (byte)0x80; |
| 51 | private static final char REPLACE_CHAR = '\uFFFD'; |
| 52 | |
| 53 | private static final byte SODesigGB = 0; |
| 54 | private static final byte SODesigCNS = 1; |
| 55 | |
| 56 | public ISO2022_CN() { |
| 57 | super("ISO-2022-CN", ExtendedCharsets.aliasesFor("ISO-2022-CN")); |
| 58 | } |
| 59 | |
| 60 | public String historicalName() { |
| 61 | return "ISO2022CN"; |
| 62 | } |
| 63 | |
| 64 | public boolean contains(Charset cs) { |
| 65 | return ((cs instanceof EUC_CN) // GB2312-80 repertoire |
| 66 | || (cs instanceof US_ASCII) |
| 67 | || (cs instanceof EUC_TW) // CNS11643 repertoire |
| 68 | || (cs instanceof ISO2022_CN)); |
| 69 | } |
| 70 | |
| 71 | public CharsetDecoder newDecoder() { |
| 72 | return new Decoder(this); |
| 73 | } |
| 74 | |
| 75 | public CharsetEncoder newEncoder() { |
| 76 | throw new UnsupportedOperationException(); |
| 77 | } |
| 78 | |
| 79 | public boolean canEncode() { |
| 80 | return false; |
| 81 | } |
| 82 | |
| 83 | static class Decoder extends CharsetDecoder { |
| 84 | private boolean shiftOut; |
| 85 | private byte currentSODesig; |
| 86 | |
| 87 | private static final Charset gb2312 = new EUC_CN(); |
| 88 | private static final Charset cns = new EUC_TW(); |
| 89 | private final EUC_CN.Decoder gb2312Decoder; |
| 90 | private final EUC_TW.Decoder cnsDecoder; |
| 91 | |
| 92 | Decoder(Charset cs) { |
| 93 | super(cs, 1.0f, 1.0f); |
| 94 | shiftOut = false; |
| 95 | currentSODesig = SODesigGB; |
| 96 | gb2312Decoder = (EUC_CN.Decoder)gb2312.newDecoder(); |
| 97 | cnsDecoder = (EUC_TW.Decoder)cns.newDecoder(); |
| 98 | } |
| 99 | |
| 100 | protected void implReset() { |
| 101 | shiftOut= false; |
| 102 | currentSODesig = SODesigGB; |
| 103 | } |
| 104 | |
| 105 | private char cnsDecode(byte byte1, byte byte2, byte SS) { |
| 106 | byte1 |= MSB; |
| 107 | byte2 |= MSB; |
| 108 | if (SS == ISO_SS2_7) { |
| 109 | return cnsDecoder.convToUnicode(byte1, byte2, |
| 110 | cnsDecoder.unicodeCNS2); |
| 111 | |
| 112 | } else { //SS == ISO_SS3_7 |
| 113 | char[] outSurr = cnsDecoder.convToSurrogate(byte1, byte2, |
| 114 | cnsDecoder.unicodeCNS3); |
| 115 | if (outSurr == null || outSurr[0] != '\u0000') |
| 116 | return REPLACE_CHAR; |
| 117 | return outSurr[1]; |
| 118 | } |
| 119 | } |
| 120 | |
| 121 | private char SODecode(byte byte1, byte byte2, byte SOD) { |
| 122 | byte1 |= MSB; |
| 123 | byte2 |= MSB; |
| 124 | if (SOD == SODesigGB) { |
| 125 | return gb2312Decoder.decodeDouble(byte1 & 0xff, |
| 126 | byte2 & 0xff); |
| 127 | } else { // SOD == SODesigCNS |
| 128 | return cnsDecoder.convToUnicode(byte1, |
| 129 | byte2, |
| 130 | cnsDecoder.unicodeCNS1); |
| 131 | } |
| 132 | } |
| 133 | |
| 134 | private CoderResult decodeBufferLoop(ByteBuffer src, |
| 135 | CharBuffer dst) |
| 136 | { |
| 137 | int mark = src.position(); |
| 138 | byte b1 = 0, b2 = 0, b3 = 0, b4 = 0; |
| 139 | int inputSize = 0; |
| 140 | char c = REPLACE_CHAR; |
| 141 | try { |
| 142 | while (src.hasRemaining()) { |
| 143 | b1 = src.get(); |
| 144 | inputSize = 1; |
| 145 | |
| 146 | while (b1 == ISO_ESC || |
| 147 | b1 == ISO_SO || |
| 148 | b1 == ISO_SI) { |
| 149 | if (b1 == ISO_ESC) { // ESC |
| 150 | currentSODesig = SODesigGB; |
| 151 | |
| 152 | if (src.remaining() < 1) |
| 153 | return CoderResult.UNDERFLOW; |
| 154 | |
| 155 | b2 = src.get(); |
| 156 | inputSize++; |
| 157 | |
| 158 | if ((b2 & (byte)0x80) != 0) |
| 159 | return CoderResult.malformedForLength(inputSize); |
| 160 | |
| 161 | if (b2 == (byte)0x24) { |
| 162 | if (src.remaining() < 1) |
| 163 | return CoderResult.UNDERFLOW; |
| 164 | |
| 165 | b3 = src.get(); |
| 166 | inputSize++; |
| 167 | |
| 168 | if ((b3 & (byte)0x80) != 0) |
| 169 | return CoderResult.malformedForLength(inputSize); |
| 170 | if (b3 == 'A'){ // "$A" |
| 171 | currentSODesig = SODesigGB; |
| 172 | } else if (b3 == ')') { |
| 173 | if (src.remaining() < 1) |
| 174 | return CoderResult.UNDERFLOW; |
| 175 | b4 = src.get(); |
| 176 | inputSize++; |
| 177 | if (b4 == 'A'){ // "$)A" |
| 178 | currentSODesig = SODesigGB; |
| 179 | } else if (b4 == 'G'){ // "$)G" |
| 180 | currentSODesig = SODesigCNS; |
| 181 | } else { |
| 182 | return CoderResult.malformedForLength(inputSize); |
| 183 | } |
| 184 | } else if (b3 == '*') { |
| 185 | if (src.remaining() < 1) |
| 186 | return CoderResult.UNDERFLOW; |
| 187 | b4 = src.get(); |
| 188 | inputSize++; |
| 189 | if (b4 != 'H') { // "$*H" |
| 190 | //SS2Desig -> CNS-P1 |
| 191 | return CoderResult.malformedForLength(inputSize); |
| 192 | } |
| 193 | } else if (b3 == '+') { |
| 194 | if (src.remaining() < 1) |
| 195 | return CoderResult.UNDERFLOW; |
| 196 | b4 = src.get(); |
| 197 | inputSize++; |
| 198 | if (b4 != 'I'){ // "$+I" |
| 199 | //SS3Desig -> CNS-P2. |
| 200 | return CoderResult.malformedForLength(inputSize); |
| 201 | } |
| 202 | } else { |
| 203 | return CoderResult.malformedForLength(inputSize); |
| 204 | } |
| 205 | } else if (b2 == ISO_SS2_7 || b2 == ISO_SS3_7) { |
| 206 | if (src.remaining() < 2) |
| 207 | return CoderResult.UNDERFLOW; |
| 208 | b3 = src.get(); |
| 209 | b4 = src.get(); |
| 210 | inputSize += 2; |
| 211 | if (dst.remaining() < 1) |
| 212 | return CoderResult.OVERFLOW; |
| 213 | //SS2->CNS-P2, SS3->CNS-P3 |
| 214 | c = cnsDecode(b3, b4, b2); |
| 215 | if (c == REPLACE_CHAR) |
| 216 | return CoderResult.unmappableForLength(inputSize); |
| 217 | dst.put(c); |
| 218 | } else { |
| 219 | return CoderResult.malformedForLength(inputSize); |
| 220 | } |
| 221 | } else if (b1 == ISO_SO) { |
| 222 | shiftOut = true; |
| 223 | } else if (b1 == ISO_SI) { // shift back in |
| 224 | shiftOut = false; |
| 225 | } |
| 226 | mark += inputSize; |
| 227 | if (src.remaining() < 1) |
| 228 | return CoderResult.UNDERFLOW; |
| 229 | b1 = src.get(); |
| 230 | inputSize = 1; |
| 231 | } |
| 232 | |
| 233 | if (dst.remaining() < 1) |
| 234 | return CoderResult.OVERFLOW; |
| 235 | |
| 236 | if (!shiftOut) { |
| 237 | dst.put((char)(b1 & 0xff)); //clear the upper byte |
| 238 | mark += inputSize; |
| 239 | } else { |
| 240 | if (src.remaining() < 1) |
| 241 | return CoderResult.UNDERFLOW; |
| 242 | b2 = src.get(); |
| 243 | inputSize++; |
| 244 | c = SODecode(b1, b2, currentSODesig); |
| 245 | if (c == REPLACE_CHAR) |
| 246 | return CoderResult.unmappableForLength(inputSize); |
| 247 | dst.put(c); |
| 248 | mark += inputSize; |
| 249 | } |
| 250 | } |
| 251 | return CoderResult.UNDERFLOW; |
| 252 | } finally { |
| 253 | src.position(mark); |
| 254 | } |
| 255 | } |
| 256 | |
| 257 | private CoderResult decodeArrayLoop(ByteBuffer src, |
| 258 | CharBuffer dst) |
| 259 | { |
| 260 | int inputSize = 0; |
| 261 | byte b1 = 0, b2 = 0, b3 = 0, b4 = 0; |
| 262 | char c = REPLACE_CHAR; |
| 263 | |
| 264 | byte[] sa = src.array(); |
| 265 | int sp = src.arrayOffset() + src.position(); |
| 266 | int sl = src.arrayOffset() + src.limit(); |
| 267 | assert (sp <= sl); |
| 268 | sp = (sp <= sl ? sp : sl); |
| 269 | |
| 270 | char[] da = dst.array(); |
| 271 | int dp = dst.arrayOffset() + dst.position(); |
| 272 | int dl = dst.arrayOffset() + dst.limit(); |
| 273 | assert (dp <= dl); |
| 274 | dp = (dp <= dl ? dp : dl); |
| 275 | |
| 276 | try { |
| 277 | while (sp < sl) { |
| 278 | b1 = sa[sp]; |
| 279 | inputSize = 1; |
| 280 | |
| 281 | while (b1 == ISO_ESC || b1 == ISO_SO || b1 == ISO_SI) { |
| 282 | if (b1 == ISO_ESC) { // ESC |
| 283 | currentSODesig = SODesigGB; |
| 284 | |
| 285 | if (sp + 2 > sl) |
| 286 | return CoderResult.UNDERFLOW; |
| 287 | |
| 288 | b2 = sa[sp + 1]; |
| 289 | inputSize++; |
| 290 | |
| 291 | if ((b2 & (byte)0x80) != 0) |
| 292 | return CoderResult.malformedForLength(inputSize); |
| 293 | if (b2 == (byte)0x24) { |
| 294 | if (sp + 3 > sl) |
| 295 | return CoderResult.UNDERFLOW; |
| 296 | |
| 297 | b3 = sa[sp + 2]; |
| 298 | inputSize++; |
| 299 | |
| 300 | if ((b3 & (byte)0x80) != 0) |
| 301 | return CoderResult.malformedForLength(inputSize); |
| 302 | if (b3 == 'A'){ // "$A" |
| 303 | /* <ESC>$A is not a legal designator sequence for |
| 304 | ISO2022_CN, it is listed as an escape sequence |
| 305 | for GB2312 in ISO2022-JP-2. Keep it here just for |
| 306 | the sake of "compatibility". |
| 307 | */ |
| 308 | currentSODesig = SODesigGB; |
| 309 | } else if (b3 == ')') { |
| 310 | if (sp + 4 > sl) |
| 311 | return CoderResult.UNDERFLOW; |
| 312 | b4 = sa[sp + 3]; |
| 313 | inputSize++; |
| 314 | |
| 315 | if (b4 == 'A'){ // "$)A" |
| 316 | currentSODesig = SODesigGB; |
| 317 | } else if (b4 == 'G'){ // "$)G" |
| 318 | currentSODesig = SODesigCNS; |
| 319 | } else { |
| 320 | return CoderResult.malformedForLength(inputSize); |
| 321 | } |
| 322 | } else if (b3 == '*') { |
| 323 | if (sp + 4 > sl) |
| 324 | return CoderResult.UNDERFLOW; |
| 325 | b4 = sa[sp + 3]; |
| 326 | inputSize++; |
| 327 | if (b4 != 'H'){ // "$*H" |
| 328 | return CoderResult.malformedForLength(inputSize); |
| 329 | } |
| 330 | } else if (b3 == '+') { |
| 331 | if (sp + 4 > sl) |
| 332 | return CoderResult.UNDERFLOW; |
| 333 | b4 = sa[sp + 3]; |
| 334 | inputSize++; |
| 335 | if (b4 != 'I'){ // "$+I" |
| 336 | return CoderResult.malformedForLength(inputSize); |
| 337 | } |
| 338 | } else { |
| 339 | return CoderResult.malformedForLength(inputSize); |
| 340 | } |
| 341 | } else if (b2 == ISO_SS2_7 || b2 == ISO_SS3_7) { |
| 342 | if (sp + 4 > sl) { |
| 343 | return CoderResult.UNDERFLOW; |
| 344 | } |
| 345 | b3 = sa[sp + 2]; |
| 346 | b4 = sa[sp + 3]; |
| 347 | if (dl - dp < 1) { |
| 348 | return CoderResult.OVERFLOW; |
| 349 | } |
| 350 | inputSize += 2; |
| 351 | c = cnsDecode(b3, b4, b2); |
| 352 | if (c == REPLACE_CHAR) |
| 353 | return CoderResult.unmappableForLength(inputSize); |
| 354 | da[dp++] = c; |
| 355 | } else { |
| 356 | return CoderResult.malformedForLength(inputSize); |
| 357 | } |
| 358 | } else if (b1 == ISO_SO) { |
| 359 | shiftOut = true; |
| 360 | } else if (b1 == ISO_SI) { // shift back in |
| 361 | shiftOut = false; |
| 362 | } |
| 363 | sp += inputSize; |
| 364 | if (sp + 1 > sl) |
| 365 | return CoderResult.UNDERFLOW; |
| 366 | b1 = sa[sp]; |
| 367 | inputSize = 1; |
| 368 | } |
| 369 | |
| 370 | if (dl - dp < 1) { |
| 371 | return CoderResult.OVERFLOW; |
| 372 | } |
| 373 | |
| 374 | if (!shiftOut) { |
| 375 | da[dp++] = (char)(b1 & 0xff); //clear the upper byte |
| 376 | } else { |
| 377 | if (sp + 2 > sl) |
| 378 | return CoderResult.UNDERFLOW; |
| 379 | b2 = sa[sp + 1]; |
| 380 | inputSize++; |
| 381 | c = SODecode(b1, b2, currentSODesig); |
| 382 | if (c == REPLACE_CHAR) |
| 383 | return CoderResult.unmappableForLength(inputSize); |
| 384 | da[dp++] = c; |
| 385 | } |
| 386 | sp += inputSize; |
| 387 | } |
| 388 | return CoderResult.UNDERFLOW; |
| 389 | } finally { |
| 390 | src.position(sp - src.arrayOffset()); |
| 391 | dst.position(dp - dst.arrayOffset()); |
| 392 | } |
| 393 | } |
| 394 | |
| 395 | protected CoderResult decodeLoop(ByteBuffer src, |
| 396 | CharBuffer dst) |
| 397 | { |
| 398 | if (src.hasArray() && dst.hasArray()) |
| 399 | return decodeArrayLoop(src, dst); |
| 400 | else |
| 401 | return decodeBufferLoop(src, dst); |
| 402 | } |
| 403 | } |
| 404 | } |