| /* |
| * Copyright (c) 2009, 2013, Oracle and/or its affiliates. All rights reserved. |
| * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
| * |
| * This code is free software; you can redistribute it and/or modify it |
| * under the terms of the GNU General Public License version 2 only, as |
| * published by the Free Software Foundation. Oracle designates this |
| * particular file as subject to the "Classpath" exception as provided |
| * by Oracle in the LICENSE file that accompanied this code. |
| * |
| * This code is distributed in the hope that it will be useful, but WITHOUT |
| * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
| * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
| * version 2 for more details (a copy is included in the LICENSE file that |
| * accompanied this code). |
| * |
| * You should have received a copy of the GNU General Public License version |
| * 2 along with this work; if not, write to the Free Software Foundation, |
| * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
| * |
| * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
| * or visit www.oracle.com if you need additional information or have any |
| * questions. |
| */ |
| |
| package sun.nio.cs; |
| |
| import java.nio.ByteBuffer; |
| import java.nio.CharBuffer; |
| import java.nio.charset.Charset; |
| import java.nio.charset.CharsetDecoder; |
| import java.nio.charset.CharsetEncoder; |
| import java.nio.charset.CoderResult; |
| import java.util.Arrays; |
| import sun.nio.cs.Surrogate; |
| import sun.nio.cs.ArrayDecoder; |
| import sun.nio.cs.ArrayEncoder; |
| import static sun.nio.cs.CharsetMapping.*; |
| |
| /* |
| * Four types of "DoubleByte" charsets are implemented in this class |
| * (1)DoubleByte |
| * The "mostly widely used" multibyte charset, a combination of |
| * a singlebyte character set (usually the ASCII charset) and a |
| * doublebyte character set. The codepoint values of singlebyte |
| * and doublebyte don't overlap. Microsoft's multibyte charsets |
| * and IBM's "DBCS_ASCII" charsets, such as IBM1381, 942, 943, |
| * 948, 949 and 950 are such charsets. |
| * |
| * (2)DoubleByte_EBCDIC |
| * IBM EBCDIC Mix multibyte charset. Use SO and SI to shift (switch) |
| * in and out between the singlebyte character set and doublebyte |
| * character set. |
| * |
| * (3)DoubleByte_SIMPLE_EUC |
| * It's a "simple" form of EUC encoding scheme, only have the |
| * singlebyte character set G0 and one doublebyte character set |
| * G1 are defined, G2 (with SS2) and G3 (with SS3) are not used. |
| * So it is actually the same as the "typical" type (1) mentioned |
| * above, except it return "malformed" for the SS2 and SS3 when |
| * decoding. |
| * |
| * (4)DoubleByte ONLY |
| * A "pure" doublebyte only character set. From implementation |
| * point of view, this is the type (1) with "decodeSingle" always |
| * returns unmappable. |
| * |
| * For simplicity, all implementations share the same decoding and |
| * encoding data structure. |
| * |
| * Decoding: |
| * |
| * char[][] b2c; |
| * char[] b2cSB; |
| * int b2Min, b2Max |
| * |
| * public char decodeSingle(int b) { |
| * return b2cSB.[b]; |
| * } |
| * |
| * public char decodeDouble(int b1, int b2) { |
| * if (b2 < b2Min || b2 > b2Max) |
| * return UNMAPPABLE_DECODING; |
| * return b2c[b1][b2 - b2Min]; |
| * } |
| * |
| * (1)b2Min, b2Max are the corresponding min and max value of the |
| * low-half of the double-byte. |
| * (2)The high 8-bit/b1 of the double-byte are used to indexed into |
| * b2c array. |
| * |
| * Encoding: |
| * |
| * char[] c2b; |
| * char[] c2bIndex; |
| * |
| * public int encodeChar(char ch) { |
| * return c2b[c2bIndex[ch >> 8] + (ch & 0xff)]; |
| * } |
| * |
| */ |
| |
| public class DoubleByte { |
| |
| public static final char[] B2C_UNMAPPABLE; |
| static { |
| B2C_UNMAPPABLE = new char[0x100]; |
| Arrays.fill(B2C_UNMAPPABLE, UNMAPPABLE_DECODING); |
| } |
| |
| public static class Decoder extends CharsetDecoder |
| implements DelegatableDecoder, ArrayDecoder |
| { |
| final char[][] b2c; |
| final char[] b2cSB; |
| final int b2Min; |
| final int b2Max; |
| final boolean isASCIICompatible; |
| |
| // for SimpleEUC override |
| protected CoderResult crMalformedOrUnderFlow(int b) { |
| return CoderResult.UNDERFLOW; |
| } |
| |
| protected CoderResult crMalformedOrUnmappable(int b1, int b2) { |
| if (b2c[b1] == B2C_UNMAPPABLE || // isNotLeadingByte(b1) |
| b2c[b2] != B2C_UNMAPPABLE || // isLeadingByte(b2) |
| decodeSingle(b2) != UNMAPPABLE_DECODING) { // isSingle(b2) |
| return CoderResult.malformedForLength(1); |
| } |
| return CoderResult.unmappableForLength(2); |
| } |
| |
| public Decoder(Charset cs, float avgcpb, float maxcpb, |
| char[][] b2c, char[] b2cSB, |
| int b2Min, int b2Max, |
| boolean isASCIICompatible) { |
| super(cs, avgcpb, maxcpb); |
| this.b2c = b2c; |
| this.b2cSB = b2cSB; |
| this.b2Min = b2Min; |
| this.b2Max = b2Max; |
| this.isASCIICompatible = isASCIICompatible; |
| } |
| |
| public Decoder(Charset cs, char[][] b2c, char[] b2cSB, int b2Min, int b2Max, |
| boolean isASCIICompatible) { |
| this(cs, 0.5f, 1.0f, b2c, b2cSB, b2Min, b2Max, isASCIICompatible); |
| } |
| |
| public Decoder(Charset cs, char[][] b2c, char[] b2cSB, int b2Min, int b2Max) { |
| this(cs, 0.5f, 1.0f, b2c, b2cSB, b2Min, b2Max, false); |
| } |
| |
| protected CoderResult decodeArrayLoop(ByteBuffer src, CharBuffer dst) { |
| byte[] sa = src.array(); |
| int sp = src.arrayOffset() + src.position(); |
| int sl = src.arrayOffset() + src.limit(); |
| |
| char[] da = dst.array(); |
| int dp = dst.arrayOffset() + dst.position(); |
| int dl = dst.arrayOffset() + dst.limit(); |
| |
| try { |
| while (sp < sl && dp < dl) { |
| // inline the decodeSingle/Double() for better performance |
| int inSize = 1; |
| int b1 = sa[sp] & 0xff; |
| char c = b2cSB[b1]; |
| if (c == UNMAPPABLE_DECODING) { |
| if (sl - sp < 2) |
| return crMalformedOrUnderFlow(b1); |
| int b2 = sa[sp + 1] & 0xff; |
| if (b2 < b2Min || b2 > b2Max || |
| (c = b2c[b1][b2 - b2Min]) == UNMAPPABLE_DECODING) { |
| return crMalformedOrUnmappable(b1, b2); |
| } |
| inSize++; |
| } |
| da[dp++] = c; |
| sp += inSize; |
| } |
| return (sp >= sl) ? CoderResult.UNDERFLOW |
| : CoderResult.OVERFLOW; |
| } finally { |
| src.position(sp - src.arrayOffset()); |
| dst.position(dp - dst.arrayOffset()); |
| } |
| } |
| |
| protected CoderResult decodeBufferLoop(ByteBuffer src, CharBuffer dst) { |
| int mark = src.position(); |
| try { |
| |
| while (src.hasRemaining() && dst.hasRemaining()) { |
| int b1 = src.get() & 0xff; |
| char c = b2cSB[b1]; |
| int inSize = 1; |
| if (c == UNMAPPABLE_DECODING) { |
| if (src.remaining() < 1) |
| return crMalformedOrUnderFlow(b1); |
| int b2 = src.get() & 0xff; |
| if (b2 < b2Min || b2 > b2Max || |
| (c = b2c[b1][b2 - b2Min]) == UNMAPPABLE_DECODING) |
| return crMalformedOrUnmappable(b1, b2); |
| inSize++; |
| } |
| dst.put(c); |
| mark += inSize; |
| } |
| return src.hasRemaining()? CoderResult.OVERFLOW |
| : CoderResult.UNDERFLOW; |
| } finally { |
| src.position(mark); |
| } |
| } |
| |
| // Make some protected methods public for use by JISAutoDetect |
| public CoderResult decodeLoop(ByteBuffer src, CharBuffer dst) { |
| if (src.hasArray() && dst.hasArray()) |
| return decodeArrayLoop(src, dst); |
| else |
| return decodeBufferLoop(src, dst); |
| } |
| |
| @Override |
| public int decode(byte[] src, int sp, int len, char[] dst) { |
| int dp = 0; |
| int sl = sp + len; |
| char repl = replacement().charAt(0); |
| while (sp < sl) { |
| int b1 = src[sp++] & 0xff; |
| char c = b2cSB[b1]; |
| if (c == UNMAPPABLE_DECODING) { |
| if (sp < sl) { |
| int b2 = src[sp++] & 0xff; |
| if (b2 < b2Min || b2 > b2Max || |
| (c = b2c[b1][b2 - b2Min]) == UNMAPPABLE_DECODING) { |
| if (b2c[b1] == B2C_UNMAPPABLE || // isNotLeadingByte |
| b2c[b2] != B2C_UNMAPPABLE || // isLeadingByte |
| decodeSingle(b2) != UNMAPPABLE_DECODING) { |
| sp--; |
| } |
| } |
| } |
| if (c == UNMAPPABLE_DECODING) { |
| c = repl; |
| } |
| } |
| dst[dp++] = c; |
| } |
| return dp; |
| } |
| |
| @Override |
| public boolean isASCIICompatible() { |
| return isASCIICompatible; |
| } |
| |
| public void implReset() { |
| super.implReset(); |
| } |
| |
| public CoderResult implFlush(CharBuffer out) { |
| return super.implFlush(out); |
| } |
| |
| // decode loops are not using decodeSingle/Double() for performance |
| // reason. |
| public char decodeSingle(int b) { |
| return b2cSB[b]; |
| } |
| |
| public char decodeDouble(int b1, int b2) { |
| if (b1 < 0 || b1 > b2c.length || |
| b2 < b2Min || b2 > b2Max) |
| return UNMAPPABLE_DECODING; |
| return b2c[b1][b2 - b2Min]; |
| } |
| } |
| |
| // IBM_EBCDIC_DBCS |
| public static class Decoder_EBCDIC extends Decoder { |
| private static final int SBCS = 0; |
| private static final int DBCS = 1; |
| private static final int SO = 0x0e; |
| private static final int SI = 0x0f; |
| private int currentState; |
| |
| public Decoder_EBCDIC(Charset cs, |
| char[][] b2c, char[] b2cSB, int b2Min, int b2Max, |
| boolean isASCIICompatible) { |
| super(cs, b2c, b2cSB, b2Min, b2Max, isASCIICompatible); |
| } |
| |
| public Decoder_EBCDIC(Charset cs, |
| char[][] b2c, char[] b2cSB, int b2Min, int b2Max) { |
| super(cs, b2c, b2cSB, b2Min, b2Max, false); |
| } |
| |
| public void implReset() { |
| currentState = SBCS; |
| } |
| |
| // Check validity of dbcs ebcdic byte pair values |
| // |
| // First byte : 0x41 -- 0xFE |
| // Second byte: 0x41 -- 0xFE |
| // Doublebyte blank: 0x4040 |
| // |
| // The validation implementation in "old" DBCS_IBM_EBCDIC and sun.io |
| // as |
| // if ((b1 != 0x40 || b2 != 0x40) && |
| // (b2 < 0x41 || b2 > 0xfe)) {...} |
| // is not correct/complete (range check for b1) |
| // |
| private static boolean isDoubleByte(int b1, int b2) { |
| return (0x41 <= b1 && b1 <= 0xfe && 0x41 <= b2 && b2 <= 0xfe) |
| || (b1 == 0x40 && b2 == 0x40); // DBCS-HOST SPACE |
| } |
| |
| protected CoderResult decodeArrayLoop(ByteBuffer src, CharBuffer dst) { |
| byte[] sa = src.array(); |
| int sp = src.arrayOffset() + src.position(); |
| int sl = src.arrayOffset() + src.limit(); |
| char[] da = dst.array(); |
| int dp = dst.arrayOffset() + dst.position(); |
| int dl = dst.arrayOffset() + dst.limit(); |
| |
| try { |
| // don't check dp/dl together here, it's possible to |
| // decdoe a SO/SI without space in output buffer. |
| while (sp < sl) { |
| int b1 = sa[sp] & 0xff; |
| int inSize = 1; |
| if (b1 == SO) { // Shift out |
| if (currentState != SBCS) |
| return CoderResult.malformedForLength(1); |
| else |
| currentState = DBCS; |
| } else if (b1 == SI) { |
| if (currentState != DBCS) |
| return CoderResult.malformedForLength(1); |
| else |
| currentState = SBCS; |
| } else { |
| char c = UNMAPPABLE_DECODING; |
| if (currentState == SBCS) { |
| c = b2cSB[b1]; |
| if (c == UNMAPPABLE_DECODING) |
| return CoderResult.unmappableForLength(1); |
| } else { |
| if (sl - sp < 2) |
| return CoderResult.UNDERFLOW; |
| int b2 = sa[sp + 1] & 0xff; |
| if (b2 < b2Min || b2 > b2Max || |
| (c = b2c[b1][b2 - b2Min]) == UNMAPPABLE_DECODING) { |
| if (!isDoubleByte(b1, b2)) |
| return CoderResult.malformedForLength(2); |
| return CoderResult.unmappableForLength(2); |
| } |
| inSize++; |
| } |
| if (dl - dp < 1) |
| return CoderResult.OVERFLOW; |
| |
| da[dp++] = c; |
| } |
| sp += inSize; |
| } |
| return CoderResult.UNDERFLOW; |
| } finally { |
| src.position(sp - src.arrayOffset()); |
| dst.position(dp - dst.arrayOffset()); |
| } |
| } |
| |
| protected CoderResult decodeBufferLoop(ByteBuffer src, CharBuffer dst) { |
| int mark = src.position(); |
| try { |
| while (src.hasRemaining()) { |
| int b1 = src.get() & 0xff; |
| int inSize = 1; |
| if (b1 == SO) { // Shift out |
| if (currentState != SBCS) |
| return CoderResult.malformedForLength(1); |
| else |
| currentState = DBCS; |
| } else if (b1 == SI) { |
| if (currentState != DBCS) |
| return CoderResult.malformedForLength(1); |
| else |
| currentState = SBCS; |
| } else { |
| char c = UNMAPPABLE_DECODING; |
| if (currentState == SBCS) { |
| c = b2cSB[b1]; |
| if (c == UNMAPPABLE_DECODING) |
| return CoderResult.unmappableForLength(1); |
| } else { |
| if (src.remaining() < 1) |
| return CoderResult.UNDERFLOW; |
| int b2 = src.get()&0xff; |
| if (b2 < b2Min || b2 > b2Max || |
| (c = b2c[b1][b2 - b2Min]) == UNMAPPABLE_DECODING) { |
| if (!isDoubleByte(b1, b2)) |
| return CoderResult.malformedForLength(2); |
| return CoderResult.unmappableForLength(2); |
| } |
| inSize++; |
| } |
| |
| if (dst.remaining() < 1) |
| return CoderResult.OVERFLOW; |
| |
| dst.put(c); |
| } |
| mark += inSize; |
| } |
| return CoderResult.UNDERFLOW; |
| } finally { |
| src.position(mark); |
| } |
| } |
| |
| @Override |
| public int decode(byte[] src, int sp, int len, char[] dst) { |
| int dp = 0; |
| int sl = sp + len; |
| currentState = SBCS; |
| char repl = replacement().charAt(0); |
| while (sp < sl) { |
| int b1 = src[sp++] & 0xff; |
| if (b1 == SO) { // Shift out |
| if (currentState != SBCS) |
| dst[dp++] = repl; |
| else |
| currentState = DBCS; |
| } else if (b1 == SI) { |
| if (currentState != DBCS) |
| dst[dp++] = repl; |
| else |
| currentState = SBCS; |
| } else { |
| char c = UNMAPPABLE_DECODING; |
| if (currentState == SBCS) { |
| c = b2cSB[b1]; |
| if (c == UNMAPPABLE_DECODING) |
| c = repl; |
| } else { |
| if (sl == sp) { |
| c = repl; |
| } else { |
| int b2 = src[sp++] & 0xff; |
| if (b2 < b2Min || b2 > b2Max || |
| (c = b2c[b1][b2 - b2Min]) == UNMAPPABLE_DECODING) { |
| c = repl; |
| } |
| } |
| } |
| dst[dp++] = c; |
| } |
| } |
| return dp; |
| } |
| } |
| |
| // DBCS_ONLY |
| public static class Decoder_DBCSONLY extends Decoder { |
| static final char[] b2cSB_UNMAPPABLE; |
| static { |
| b2cSB_UNMAPPABLE = new char[0x100]; |
| Arrays.fill(b2cSB_UNMAPPABLE, UNMAPPABLE_DECODING); |
| } |
| public Decoder_DBCSONLY(Charset cs, char[][] b2c, char[] b2cSB, int b2Min, int b2Max, |
| boolean isASCIICompatible) { |
| super(cs, 0.5f, 1.0f, b2c, b2cSB_UNMAPPABLE, b2Min, b2Max, isASCIICompatible); |
| } |
| |
| public Decoder_DBCSONLY(Charset cs, char[][] b2c, char[] b2cSB, int b2Min, int b2Max) { |
| super(cs, 0.5f, 1.0f, b2c, b2cSB_UNMAPPABLE, b2Min, b2Max, false); |
| } |
| } |
| |
| // EUC_SIMPLE |
| // The only thing we need to "override" is to check SS2/SS3 and |
| // return "malformed" if found |
| public static class Decoder_EUC_SIM extends Decoder { |
| private final int SS2 = 0x8E; |
| private final int SS3 = 0x8F; |
| |
| public Decoder_EUC_SIM(Charset cs, |
| char[][] b2c, char[] b2cSB, int b2Min, int b2Max, |
| boolean isASCIICompatible) { |
| super(cs, b2c, b2cSB, b2Min, b2Max, isASCIICompatible); |
| } |
| |
| // No support provided for G2/G3 for SimpleEUC |
| protected CoderResult crMalformedOrUnderFlow(int b) { |
| if (b == SS2 || b == SS3 ) |
| return CoderResult.malformedForLength(1); |
| return CoderResult.UNDERFLOW; |
| } |
| |
| protected CoderResult crMalformedOrUnmappable(int b1, int b2) { |
| if (b1 == SS2 || b1 == SS3 ) |
| return CoderResult.malformedForLength(1); |
| return CoderResult.unmappableForLength(2); |
| } |
| |
| @Override |
| public int decode(byte[] src, int sp, int len, char[] dst) { |
| int dp = 0; |
| int sl = sp + len; |
| char repl = replacement().charAt(0); |
| while (sp < sl) { |
| int b1 = src[sp++] & 0xff; |
| char c = b2cSB[b1]; |
| if (c == UNMAPPABLE_DECODING) { |
| if (sp < sl) { |
| int b2 = src[sp++] & 0xff; |
| if (b2 < b2Min || b2 > b2Max || |
| (c = b2c[b1][b2 - b2Min]) == UNMAPPABLE_DECODING) { |
| if (b1 == SS2 || b1 == SS3) { |
| sp--; |
| } |
| c = repl; |
| } |
| } else { |
| c = repl; |
| } |
| } |
| dst[dp++] = c; |
| } |
| return dp; |
| } |
| } |
| |
| public static class Encoder extends CharsetEncoder |
| implements ArrayEncoder |
| { |
| protected final int MAX_SINGLEBYTE = 0xff; |
| private final char[] c2b; |
| private final char[] c2bIndex; |
| protected Surrogate.Parser sgp; |
| final boolean isASCIICompatible; |
| |
| public Encoder(Charset cs, char[] c2b, char[] c2bIndex) { |
| this(cs, c2b, c2bIndex, false); |
| } |
| |
| public Encoder(Charset cs, char[] c2b, char[] c2bIndex, boolean isASCIICompatible) { |
| super(cs, 2.0f, 2.0f); |
| this.c2b = c2b; |
| this.c2bIndex = c2bIndex; |
| this.isASCIICompatible = isASCIICompatible; |
| } |
| |
| public Encoder(Charset cs, float avg, float max, byte[] repl, char[] c2b, char[] c2bIndex, |
| boolean isASCIICompatible) { |
| super(cs, avg, max, repl); |
| this.c2b = c2b; |
| this.c2bIndex = c2bIndex; |
| this.isASCIICompatible = isASCIICompatible; |
| } |
| |
| public boolean canEncode(char c) { |
| return encodeChar(c) != UNMAPPABLE_ENCODING; |
| } |
| |
| protected Surrogate.Parser sgp() { |
| if (sgp == null) |
| sgp = new Surrogate.Parser(); |
| return sgp; |
| } |
| |
| protected CoderResult encodeArrayLoop(CharBuffer src, ByteBuffer dst) { |
| char[] sa = src.array(); |
| int sp = src.arrayOffset() + src.position(); |
| int sl = src.arrayOffset() + src.limit(); |
| |
| byte[] da = dst.array(); |
| int dp = dst.arrayOffset() + dst.position(); |
| int dl = dst.arrayOffset() + dst.limit(); |
| |
| try { |
| while (sp < sl) { |
| char c = sa[sp]; |
| int bb = encodeChar(c); |
| if (bb == UNMAPPABLE_ENCODING) { |
| if (Character.isSurrogate(c)) { |
| if (sgp().parse(c, sa, sp, sl) < 0) |
| return sgp.error(); |
| return sgp.unmappableResult(); |
| } |
| return CoderResult.unmappableForLength(1); |
| } |
| |
| if (bb > MAX_SINGLEBYTE) { // DoubleByte |
| if (dl - dp < 2) |
| return CoderResult.OVERFLOW; |
| da[dp++] = (byte)(bb >> 8); |
| da[dp++] = (byte)bb; |
| } else { // SingleByte |
| if (dl - dp < 1) |
| return CoderResult.OVERFLOW; |
| da[dp++] = (byte)bb; |
| } |
| |
| sp++; |
| } |
| return CoderResult.UNDERFLOW; |
| } finally { |
| src.position(sp - src.arrayOffset()); |
| dst.position(dp - dst.arrayOffset()); |
| } |
| } |
| |
| protected CoderResult encodeBufferLoop(CharBuffer src, ByteBuffer dst) { |
| int mark = src.position(); |
| try { |
| while (src.hasRemaining()) { |
| char c = src.get(); |
| int bb = encodeChar(c); |
| if (bb == UNMAPPABLE_ENCODING) { |
| if (Character.isSurrogate(c)) { |
| if (sgp().parse(c, src) < 0) |
| return sgp.error(); |
| return sgp.unmappableResult(); |
| } |
| return CoderResult.unmappableForLength(1); |
| } |
| if (bb > MAX_SINGLEBYTE) { // DoubleByte |
| if (dst.remaining() < 2) |
| return CoderResult.OVERFLOW; |
| dst.put((byte)(bb >> 8)); |
| dst.put((byte)(bb)); |
| } else { |
| if (dst.remaining() < 1) |
| return CoderResult.OVERFLOW; |
| dst.put((byte)bb); |
| } |
| mark++; |
| } |
| return CoderResult.UNDERFLOW; |
| } finally { |
| src.position(mark); |
| } |
| } |
| |
| protected CoderResult encodeLoop(CharBuffer src, ByteBuffer dst) { |
| if (src.hasArray() && dst.hasArray()) |
| return encodeArrayLoop(src, dst); |
| else |
| return encodeBufferLoop(src, dst); |
| } |
| |
| protected byte[] repl = replacement(); |
| protected void implReplaceWith(byte[] newReplacement) { |
| repl = newReplacement; |
| } |
| |
| @Override |
| public int encode(char[] src, int sp, int len, byte[] dst) { |
| int dp = 0; |
| int sl = sp + len; |
| int dl = dst.length; |
| while (sp < sl) { |
| char c = src[sp++]; |
| int bb = encodeChar(c); |
| if (bb == UNMAPPABLE_ENCODING) { |
| if (Character.isHighSurrogate(c) && sp < sl && |
| Character.isLowSurrogate(src[sp])) { |
| sp++; |
| } |
| dst[dp++] = repl[0]; |
| if (repl.length > 1) |
| dst[dp++] = repl[1]; |
| continue; |
| } //else |
| if (bb > MAX_SINGLEBYTE) { // DoubleByte |
| dst[dp++] = (byte)(bb >> 8); |
| dst[dp++] = (byte)bb; |
| } else { // SingleByte |
| dst[dp++] = (byte)bb; |
| } |
| } |
| return dp; |
| } |
| |
| @Override |
| public int encodeFromLatin1(byte[] src, int sp, int len, byte[] dst) { |
| int dp = 0; |
| int sl = sp + len; |
| while (sp < sl) { |
| char c = (char)(src[sp++] & 0xff); |
| int bb = encodeChar(c); |
| if (bb == UNMAPPABLE_ENCODING) { |
| // no surrogate pair in latin1 string |
| dst[dp++] = repl[0]; |
| if (repl.length > 1) { |
| dst[dp++] = repl[1]; |
| } |
| continue; |
| } //else |
| if (bb > MAX_SINGLEBYTE) { // DoubleByte |
| dst[dp++] = (byte)(bb >> 8); |
| dst[dp++] = (byte)bb; |
| } else { // SingleByte |
| dst[dp++] = (byte)bb; |
| } |
| |
| } |
| return dp; |
| } |
| |
| @Override |
| public int encodeFromUTF16(byte[] src, int sp, int len, byte[] dst) { |
| int dp = 0; |
| int sl = sp + len; |
| while (sp < sl) { |
| char c = StringUTF16.getChar(src, sp++); |
| int bb = encodeChar(c); |
| if (bb == UNMAPPABLE_ENCODING) { |
| if (Character.isHighSurrogate(c) && sp < sl && |
| Character.isLowSurrogate(StringUTF16.getChar(src, sp))) { |
| sp++; |
| } |
| dst[dp++] = repl[0]; |
| if (repl.length > 1) { |
| dst[dp++] = repl[1]; |
| } |
| continue; |
| } //else |
| if (bb > MAX_SINGLEBYTE) { // DoubleByte |
| dst[dp++] = (byte)(bb >> 8); |
| dst[dp++] = (byte)bb; |
| } else { // SingleByte |
| dst[dp++] = (byte)bb; |
| } |
| } |
| return dp; |
| } |
| |
| @Override |
| public boolean isASCIICompatible() { |
| return isASCIICompatible; |
| } |
| |
| public int encodeChar(char ch) { |
| return c2b[c2bIndex[ch >> 8] + (ch & 0xff)]; |
| } |
| |
| // init the c2b and c2bIndex tables from b2c. |
| public static void initC2B(String[] b2c, String b2cSB, String b2cNR, String c2bNR, |
| int b2Min, int b2Max, |
| char[] c2b, char[] c2bIndex) |
| { |
| Arrays.fill(c2b, (char)UNMAPPABLE_ENCODING); |
| int off = 0x100; |
| |
| char[][] b2c_ca = new char[b2c.length][]; |
| char[] b2cSB_ca = null; |
| if (b2cSB != null) |
| b2cSB_ca = b2cSB.toCharArray(); |
| |
| for (int i = 0; i < b2c.length; i++) { |
| if (b2c[i] == null) |
| continue; |
| b2c_ca[i] = b2c[i].toCharArray(); |
| } |
| |
| if (b2cNR != null) { |
| int j = 0; |
| while (j < b2cNR.length()) { |
| char b = b2cNR.charAt(j++); |
| char c = b2cNR.charAt(j++); |
| if (b < 0x100 && b2cSB_ca != null) { |
| if (b2cSB_ca[b] == c) |
| b2cSB_ca[b] = UNMAPPABLE_DECODING; |
| } else { |
| if (b2c_ca[b >> 8][(b & 0xff) - b2Min] == c) |
| b2c_ca[b >> 8][(b & 0xff) - b2Min] = UNMAPPABLE_DECODING; |
| } |
| } |
| } |
| |
| if (b2cSB_ca != null) { // SingleByte |
| for (int b = 0; b < b2cSB_ca.length; b++) { |
| char c = b2cSB_ca[b]; |
| if (c == UNMAPPABLE_DECODING) |
| continue; |
| int index = c2bIndex[c >> 8]; |
| if (index == 0) { |
| index = off; |
| off += 0x100; |
| c2bIndex[c >> 8] = (char)index; |
| } |
| c2b[index + (c & 0xff)] = (char)b; |
| } |
| } |
| |
| for (int b1 = 0; b1 < b2c.length; b1++) { // DoubleByte |
| char[] db = b2c_ca[b1]; |
| if (db == null) |
| continue; |
| for (int b2 = b2Min; b2 <= b2Max; b2++) { |
| char c = db[b2 - b2Min]; |
| if (c == UNMAPPABLE_DECODING) |
| continue; |
| int index = c2bIndex[c >> 8]; |
| if (index == 0) { |
| index = off; |
| off += 0x100; |
| c2bIndex[c >> 8] = (char)index; |
| } |
| c2b[index + (c & 0xff)] = (char)((b1 << 8) | b2); |
| } |
| } |
| |
| if (c2bNR != null) { |
| // add c->b only nr entries |
| for (int i = 0; i < c2bNR.length(); i += 2) { |
| char b = c2bNR.charAt(i); |
| char c = c2bNR.charAt(i + 1); |
| int index = (c >> 8); |
| if (c2bIndex[index] == 0) { |
| c2bIndex[index] = (char)off; |
| off += 0x100; |
| } |
| index = c2bIndex[index] + (c & 0xff); |
| c2b[index] = b; |
| } |
| } |
| } |
| } |
| |
| public static class Encoder_DBCSONLY extends Encoder { |
| |
| public Encoder_DBCSONLY(Charset cs, byte[] repl, |
| char[] c2b, char[] c2bIndex, |
| boolean isASCIICompatible) { |
| super(cs, 2.0f, 2.0f, repl, c2b, c2bIndex, isASCIICompatible); |
| } |
| |
| public int encodeChar(char ch) { |
| int bb = super.encodeChar(ch); |
| if (bb <= MAX_SINGLEBYTE) |
| return UNMAPPABLE_ENCODING; |
| return bb; |
| } |
| } |
| |
| public static class Encoder_EBCDIC extends Encoder { |
| static final int SBCS = 0; |
| static final int DBCS = 1; |
| static final byte SO = 0x0e; |
| static final byte SI = 0x0f; |
| |
| protected int currentState = SBCS; |
| |
| public Encoder_EBCDIC(Charset cs, char[] c2b, char[] c2bIndex, |
| boolean isASCIICompatible) { |
| super(cs, 4.0f, 5.0f, new byte[] {(byte)0x6f}, c2b, c2bIndex, isASCIICompatible); |
| } |
| |
| protected void implReset() { |
| currentState = SBCS; |
| } |
| |
| protected CoderResult implFlush(ByteBuffer out) { |
| if (currentState == DBCS) { |
| if (out.remaining() < 1) |
| return CoderResult.OVERFLOW; |
| out.put(SI); |
| } |
| implReset(); |
| return CoderResult.UNDERFLOW; |
| } |
| |
| protected CoderResult encodeArrayLoop(CharBuffer src, ByteBuffer dst) { |
| char[] sa = src.array(); |
| int sp = src.arrayOffset() + src.position(); |
| int sl = src.arrayOffset() + src.limit(); |
| byte[] da = dst.array(); |
| int dp = dst.arrayOffset() + dst.position(); |
| int dl = dst.arrayOffset() + dst.limit(); |
| |
| try { |
| while (sp < sl) { |
| char c = sa[sp]; |
| int bb = encodeChar(c); |
| if (bb == UNMAPPABLE_ENCODING) { |
| if (Character.isSurrogate(c)) { |
| if (sgp().parse(c, sa, sp, sl) < 0) |
| return sgp.error(); |
| return sgp.unmappableResult(); |
| } |
| return CoderResult.unmappableForLength(1); |
| } |
| if (bb > MAX_SINGLEBYTE) { // DoubleByte |
| if (currentState == SBCS) { |
| if (dl - dp < 1) |
| return CoderResult.OVERFLOW; |
| currentState = DBCS; |
| da[dp++] = SO; |
| } |
| if (dl - dp < 2) |
| return CoderResult.OVERFLOW; |
| da[dp++] = (byte)(bb >> 8); |
| da[dp++] = (byte)bb; |
| } else { // SingleByte |
| if (currentState == DBCS) { |
| if (dl - dp < 1) |
| return CoderResult.OVERFLOW; |
| currentState = SBCS; |
| da[dp++] = SI; |
| } |
| if (dl - dp < 1) |
| return CoderResult.OVERFLOW; |
| da[dp++] = (byte)bb; |
| |
| } |
| sp++; |
| } |
| return CoderResult.UNDERFLOW; |
| } finally { |
| src.position(sp - src.arrayOffset()); |
| dst.position(dp - dst.arrayOffset()); |
| } |
| } |
| |
| protected CoderResult encodeBufferLoop(CharBuffer src, ByteBuffer dst) { |
| int mark = src.position(); |
| try { |
| while (src.hasRemaining()) { |
| char c = src.get(); |
| int bb = encodeChar(c); |
| if (bb == UNMAPPABLE_ENCODING) { |
| if (Character.isSurrogate(c)) { |
| if (sgp().parse(c, src) < 0) |
| return sgp.error(); |
| return sgp.unmappableResult(); |
| } |
| return CoderResult.unmappableForLength(1); |
| } |
| if (bb > MAX_SINGLEBYTE) { // DoubleByte |
| if (currentState == SBCS) { |
| if (dst.remaining() < 1) |
| return CoderResult.OVERFLOW; |
| currentState = DBCS; |
| dst.put(SO); |
| } |
| if (dst.remaining() < 2) |
| return CoderResult.OVERFLOW; |
| dst.put((byte)(bb >> 8)); |
| dst.put((byte)(bb)); |
| } else { // Single-byte |
| if (currentState == DBCS) { |
| if (dst.remaining() < 1) |
| return CoderResult.OVERFLOW; |
| currentState = SBCS; |
| dst.put(SI); |
| } |
| if (dst.remaining() < 1) |
| return CoderResult.OVERFLOW; |
| dst.put((byte)bb); |
| } |
| mark++; |
| } |
| return CoderResult.UNDERFLOW; |
| } finally { |
| src.position(mark); |
| } |
| } |
| |
| @Override |
| public int encode(char[] src, int sp, int len, byte[] dst) { |
| int dp = 0; |
| int sl = sp + len; |
| while (sp < sl) { |
| char c = src[sp++]; |
| int bb = encodeChar(c); |
| |
| if (bb == UNMAPPABLE_ENCODING) { |
| if (Character.isHighSurrogate(c) && sp < sl && |
| Character.isLowSurrogate(src[sp])) { |
| sp++; |
| } |
| dst[dp++] = repl[0]; |
| if (repl.length > 1) |
| dst[dp++] = repl[1]; |
| continue; |
| } //else |
| if (bb > MAX_SINGLEBYTE) { // DoubleByte |
| if (currentState == SBCS) { |
| currentState = DBCS; |
| dst[dp++] = SO; |
| } |
| dst[dp++] = (byte)(bb >> 8); |
| dst[dp++] = (byte)bb; |
| } else { // SingleByte |
| if (currentState == DBCS) { |
| currentState = SBCS; |
| dst[dp++] = SI; |
| } |
| dst[dp++] = (byte)bb; |
| } |
| } |
| |
| if (currentState == DBCS) { |
| currentState = SBCS; |
| dst[dp++] = SI; |
| } |
| return dp; |
| } |
| |
| @Override |
| public int encodeFromLatin1(byte[] src, int sp, int len, byte[] dst) { |
| int dp = 0; |
| int sl = sp + len; |
| while (sp < sl) { |
| char c = (char)(src[sp++] & 0xff); |
| int bb = encodeChar(c); |
| if (bb == UNMAPPABLE_ENCODING) { |
| // no surrogate pair in latin1 string |
| dst[dp++] = repl[0]; |
| if (repl.length > 1) |
| dst[dp++] = repl[1]; |
| continue; |
| } //else |
| if (bb > MAX_SINGLEBYTE) { // DoubleByte |
| if (currentState == SBCS) { |
| currentState = DBCS; |
| dst[dp++] = SO; |
| } |
| dst[dp++] = (byte)(bb >> 8); |
| dst[dp++] = (byte)bb; |
| } else { // SingleByte |
| if (currentState == DBCS) { |
| currentState = SBCS; |
| dst[dp++] = SI; |
| } |
| dst[dp++] = (byte)bb; |
| } |
| } |
| if (currentState == DBCS) { |
| currentState = SBCS; |
| dst[dp++] = SI; |
| } |
| return dp; |
| } |
| |
| @Override |
| public int encodeFromUTF16(byte[] src, int sp, int len, byte[] dst) { |
| int dp = 0; |
| int sl = sp + len; |
| while (sp < sl) { |
| char c = StringUTF16.getChar(src, sp++); |
| int bb = encodeChar(c); |
| if (bb == UNMAPPABLE_ENCODING) { |
| if (Character.isHighSurrogate(c) && sp < sl && |
| Character.isLowSurrogate(StringUTF16.getChar(src, sp))) { |
| sp++; |
| } |
| dst[dp++] = repl[0]; |
| if (repl.length > 1) |
| dst[dp++] = repl[1]; |
| continue; |
| } //else |
| if (bb > MAX_SINGLEBYTE) { // DoubleByte |
| if (currentState == SBCS) { |
| currentState = DBCS; |
| dst[dp++] = SO; |
| } |
| dst[dp++] = (byte)(bb >> 8); |
| dst[dp++] = (byte)bb; |
| } else { // SingleByte |
| if (currentState == DBCS) { |
| currentState = SBCS; |
| dst[dp++] = SI; |
| } |
| dst[dp++] = (byte)bb; |
| } |
| } |
| if (currentState == DBCS) { |
| currentState = SBCS; |
| dst[dp++] = SI; |
| } |
| return dp; |
| } |
| } |
| |
| // EUC_SIMPLE |
| public static class Encoder_EUC_SIM extends Encoder { |
| public Encoder_EUC_SIM(Charset cs, char[] c2b, char[] c2bIndex, |
| boolean isASCIICompatible) { |
| super(cs, c2b, c2bIndex, isASCIICompatible); |
| } |
| } |
| |
| } |