| /* |
| * Copyright 2000-2006 Sun Microsystems, Inc. All Rights Reserved. |
| * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
| * |
| * This code is free software; you can redistribute it and/or modify it |
| * under the terms of the GNU General Public License version 2 only, as |
| * published by the Free Software Foundation. Sun designates this |
| * particular file as subject to the "Classpath" exception as provided |
| * by Sun in the LICENSE file that accompanied this code. |
| * |
| * This code is distributed in the hope that it will be useful, but WITHOUT |
| * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
| * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
| * version 2 for more details (a copy is included in the LICENSE file that |
| * accompanied this code). |
| * |
| * You should have received a copy of the GNU General Public License version |
| * 2 along with this work; if not, write to the Free Software Foundation, |
| * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
| * |
| * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, |
| * CA 95054 USA or visit www.sun.com if you need additional information or |
| * have any questions. |
| */ |
| |
| package sun.nio.cs; |
| |
| import java.nio.ByteBuffer; |
| import java.nio.CharBuffer; |
| import java.nio.BufferOverflowException; |
| import java.nio.BufferUnderflowException; |
| import java.nio.charset.Charset; |
| import java.nio.charset.CharsetDecoder; |
| import java.nio.charset.CharsetEncoder; |
| import java.nio.charset.CoderResult; |
| import java.nio.charset.CharacterCodingException; |
| import java.nio.charset.MalformedInputException; |
| import java.nio.charset.UnmappableCharacterException; |
| |
| |
| /* |
| * # Bits Bit pattern |
| * 1 7 0xxxxxxx |
| * 2 11 110xxxxx 10xxxxxx |
| * 3 16 1110xxxx 10xxxxxx 10xxxxxx |
| * 4 21 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx |
| * 5 26 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx |
| * 6 31 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx |
| * |
| * UCS-2 uses 1-3, UTF-16 uses 1-4, UCS-4 uses 1-6 |
| */ |
| |
| class UTF_8 extends Unicode |
| { |
| |
| public UTF_8() { |
| super("UTF-8", StandardCharsets.aliases_UTF_8); |
| } |
| |
| public String historicalName() { |
| return "UTF8"; |
| } |
| |
| public CharsetDecoder newDecoder() { |
| return new Decoder(this); |
| } |
| |
| public CharsetEncoder newEncoder() { |
| return new Encoder(this); |
| } |
| |
| |
| private static class Decoder extends CharsetDecoder { |
| private Decoder(Charset cs) { |
| super(cs, 1.0f, 1.0f); |
| } |
| |
| private boolean isContinuation(int b) { |
| return ((b & 0xc0) == 0x80); |
| } |
| |
| private final Surrogate.Generator sgg = new Surrogate.Generator(); |
| |
| private CoderResult decodeArrayLoop(ByteBuffer src, |
| CharBuffer dst) |
| { |
| byte[] sa = src.array(); |
| int sp = src.arrayOffset() + src.position(); |
| int sl = src.arrayOffset() + src.limit(); |
| assert (sp <= sl); |
| sp = (sp <= sl ? sp : sl); |
| char[] da = dst.array(); |
| int dp = dst.arrayOffset() + dst.position(); |
| int dl = dst.arrayOffset() + dst.limit(); |
| assert (dp <= dl); |
| dp = (dp <= dl ? dp : dl); |
| |
| try { |
| while (sp < sl) { |
| int b1 = sa[sp]; |
| int b2, b3; |
| switch ((b1 >> 4) & 0x0f) { |
| |
| case 0: case 1: case 2: case 3: |
| case 4: case 5: case 6: case 7: |
| // 1 byte, 7 bits: 0xxxxxxx |
| if (dl - dp < 1) |
| return CoderResult.OVERFLOW; |
| da[dp++] = (char)(b1 & 0x7f); |
| sp++; |
| continue; |
| |
| case 12: case 13: |
| // 2 bytes, 11 bits: 110xxxxx 10xxxxxx |
| if (sl - sp < 2) |
| return CoderResult.UNDERFLOW; |
| if (dl - dp < 1) |
| return CoderResult.OVERFLOW; |
| if (!isContinuation(b2 = sa[sp + 1])) |
| return CoderResult.malformedForLength(1); |
| da[dp++] = ((char)(((b1 & 0x1f) << 6) | |
| ((b2 & 0x3f) << 0))); |
| sp += 2; |
| continue; |
| |
| case 14: |
| // 3 bytes, 16 bits: 1110xxxx 10xxxxxx 10xxxxxx |
| if (sl - sp < 3) |
| return CoderResult.UNDERFLOW; |
| if (dl - dp < 1) |
| return CoderResult.OVERFLOW; |
| if (!isContinuation(b2 = sa[sp + 1])) |
| return CoderResult.malformedForLength(1); |
| if (!isContinuation(b3 = sa[sp + 2])) |
| return CoderResult.malformedForLength(2); |
| da[dp++] = ((char)(((b1 & 0x0f) << 12) | |
| ((b2 & 0x3f) << 06) | |
| ((b3 & 0x3f) << 0))); |
| sp += 3; |
| continue; |
| |
| case 15: |
| // 4, 5, or 6 bytes |
| |
| int b4, b5, b6, uc, n; |
| switch (b1 & 0x0f) { |
| |
| case 0: case 1: case 2: case 3: |
| case 4: case 5: case 6: case 7: |
| // 4 bytes, 21 bits |
| if (sl - sp < 4) |
| return CoderResult.UNDERFLOW; |
| if (!isContinuation(b2 = sa[sp + 1])) |
| return CoderResult.malformedForLength(1); |
| if (!isContinuation(b3 = sa[sp + 2])) |
| return CoderResult.malformedForLength(2); |
| if (!isContinuation(b4 = sa[sp + 3])) |
| return CoderResult.malformedForLength(3); |
| uc = (((b1 & 0x07) << 18) | |
| ((b2 & 0x3f) << 12) | |
| ((b3 & 0x3f) << 06) | |
| ((b4 & 0x3f) << 00)); |
| n = 4; |
| break; |
| |
| case 8: case 9: case 10: case 11: |
| // 5 bytes, 26 bits |
| if (sl - sp < 5) |
| return CoderResult.UNDERFLOW; |
| if (!isContinuation(b2 = sa[sp + 1])) |
| return CoderResult.malformedForLength(1); |
| if (!isContinuation(b3 = sa[sp + 2])) |
| return CoderResult.malformedForLength(2); |
| if (!isContinuation(b4 = sa[sp + 3])) |
| return CoderResult.malformedForLength(3); |
| if (!isContinuation(b5 = sa[sp + 4])) |
| return CoderResult.malformedForLength(4); |
| uc = (((b1 & 0x03) << 24) | |
| ((b2 & 0x3f) << 18) | |
| ((b3 & 0x3f) << 12) | |
| ((b4 & 0x3f) << 06) | |
| ((b5 & 0x3f) << 00)); |
| n = 5; |
| break; |
| |
| case 12: case 13: |
| // 6 bytes, 31 bits |
| if (sl - sp < 6) |
| return CoderResult.UNDERFLOW; |
| if (!isContinuation(b2 = sa[sp + 1])) |
| return CoderResult.malformedForLength(1); |
| if (!isContinuation(b3 = sa[sp + 2])) |
| return CoderResult.malformedForLength(2); |
| if (!isContinuation(b4 = sa[sp + 3])) |
| return CoderResult.malformedForLength(3); |
| if (!isContinuation(b5 = sa[sp + 4])) |
| return CoderResult.malformedForLength(4); |
| if (!isContinuation(b6 = sa[sp + 5])) |
| return CoderResult.malformedForLength(5); |
| uc = (((b1 & 0x01) << 30) | |
| ((b2 & 0x3f) << 24) | |
| ((b3 & 0x3f) << 18) | |
| ((b4 & 0x3f) << 12) | |
| ((b5 & 0x3f) << 06) | |
| ((b6 & 0x3f))); |
| n = 6; |
| break; |
| |
| default: |
| return CoderResult.malformedForLength(1); |
| |
| } |
| |
| int gn = sgg.generate(uc, n, da, dp, dl); |
| if (gn < 0) |
| return sgg.error(); |
| dp += gn; |
| sp += n; |
| continue; |
| |
| default: |
| return CoderResult.malformedForLength(1); |
| |
| } |
| |
| } |
| |
| return CoderResult.UNDERFLOW; |
| } finally { |
| src.position(sp - src.arrayOffset()); |
| dst.position(dp - dst.arrayOffset()); |
| } |
| } |
| |
| private CoderResult decodeBufferLoop(ByteBuffer src, |
| CharBuffer dst) |
| { |
| int mark = src.position(); |
| try { |
| while (src.hasRemaining()) { |
| int b1 = src.get(); |
| int b2, b3; |
| switch ((b1 >> 4) & 0x0f) { |
| |
| case 0: case 1: case 2: case 3: |
| case 4: case 5: case 6: case 7: |
| // 1 byte, 7 bits: 0xxxxxxx |
| if (dst.remaining() < 1) |
| return CoderResult.OVERFLOW; |
| dst.put((char)b1); |
| mark++; |
| continue; |
| |
| case 12: case 13: |
| // 2 bytes, 11 bits: 110xxxxx 10xxxxxx |
| if (src.remaining() < 1) |
| return CoderResult.UNDERFLOW; |
| if (dst.remaining() < 1) |
| return CoderResult.OVERFLOW; |
| if (!isContinuation(b2 = src.get())) |
| return CoderResult.malformedForLength(1); |
| dst.put((char)(((b1 & 0x1f) << 6) | |
| ((b2 & 0x3f) << 0))); |
| mark += 2; |
| continue; |
| |
| case 14: |
| // 3 bytes, 16 bits: 1110xxxx 10xxxxxx 10xxxxxx |
| if (src.remaining() < 2) |
| return CoderResult.UNDERFLOW; |
| if (dst.remaining() < 1) |
| return CoderResult.OVERFLOW; |
| if (!isContinuation(b2 = src.get())) |
| return CoderResult.malformedForLength(1); |
| if (!isContinuation(b3 = src.get())) |
| return CoderResult.malformedForLength(2); |
| dst.put((char)(((b1 & 0x0f) << 12) | |
| ((b2 & 0x3f) << 06) | |
| ((b3 & 0x3f) << 0))); |
| mark += 3; |
| continue; |
| |
| case 15: |
| // 4, 5, or 6 bytes |
| |
| int b4, b5, b6, uc, n; |
| switch (b1 & 0x0f) { |
| |
| case 0: case 1: case 2: case 3: |
| case 4: case 5: case 6: case 7: |
| // 4 bytes, 21 bits |
| if (src.remaining() < 3) |
| return CoderResult.UNDERFLOW; |
| if (!isContinuation(b2 = src.get())) |
| return CoderResult.malformedForLength(1); |
| if (!isContinuation(b3 = src.get())) |
| return CoderResult.malformedForLength(2); |
| if (!isContinuation(b4 = src.get())) |
| return CoderResult.malformedForLength(3); |
| uc = (((b1 & 0x07) << 18) | |
| ((b2 & 0x3f) << 12) | |
| ((b3 & 0x3f) << 06) | |
| ((b4 & 0x3f) << 00)); |
| n = 4; |
| break; |
| |
| case 8: case 9: case 10: case 11: |
| // 5 bytes, 26 bits |
| if (src.remaining() < 4) |
| return CoderResult.UNDERFLOW; |
| if (!isContinuation(b2 = src.get())) |
| return CoderResult.malformedForLength(1); |
| if (!isContinuation(b3 = src.get())) |
| return CoderResult.malformedForLength(2); |
| if (!isContinuation(b4 = src.get())) |
| return CoderResult.malformedForLength(3); |
| if (!isContinuation(b5 = src.get())) |
| return CoderResult.malformedForLength(4); |
| uc = (((b1 & 0x03) << 24) | |
| ((b2 & 0x3f) << 18) | |
| ((b3 & 0x3f) << 12) | |
| ((b4 & 0x3f) << 06) | |
| ((b5 & 0x3f) << 00)); |
| n = 5; |
| break; |
| |
| case 12: case 13: |
| // 6 bytes, 31 bits |
| if (src.remaining() < 4) |
| return CoderResult.UNDERFLOW; |
| if (!isContinuation(b2 = src.get())) |
| return CoderResult.malformedForLength(1); |
| if (!isContinuation(b3 = src.get())) |
| return CoderResult.malformedForLength(2); |
| if (!isContinuation(b4 = src.get())) |
| return CoderResult.malformedForLength(3); |
| if (!isContinuation(b5 = src.get())) |
| return CoderResult.malformedForLength(4); |
| if (!isContinuation(b6 = src.get())) |
| return CoderResult.malformedForLength(5); |
| uc = (((b1 & 0x01) << 30) | |
| ((b2 & 0x3f) << 24) | |
| ((b3 & 0x3f) << 18) | |
| ((b4 & 0x3f) << 12) | |
| ((b5 & 0x3f) << 06) | |
| ((b6 & 0x3f))); |
| n = 6; |
| break; |
| |
| default: |
| return CoderResult.malformedForLength(1); |
| |
| } |
| |
| if (sgg.generate(uc, n, dst) < 0) |
| return sgg.error(); |
| mark += n; |
| continue; |
| |
| default: |
| return CoderResult.malformedForLength(1); |
| |
| } |
| |
| } |
| return CoderResult.UNDERFLOW; |
| } finally { |
| src.position(mark); |
| } |
| } |
| |
| protected CoderResult decodeLoop(ByteBuffer src, |
| CharBuffer dst) |
| { |
| if (src.hasArray() && dst.hasArray()) |
| return decodeArrayLoop(src, dst); |
| else |
| return decodeBufferLoop(src, dst); |
| } |
| |
| } |
| |
| |
| private static class Encoder extends CharsetEncoder { |
| |
| private Encoder(Charset cs) { |
| super(cs, 1.1f, 4.0f); |
| } |
| |
| public boolean canEncode(char c) { |
| return !Surrogate.is(c); |
| } |
| |
| private final Surrogate.Parser sgp = new Surrogate.Parser(); |
| |
| private CoderResult encodeArrayLoop(CharBuffer src, |
| ByteBuffer dst) |
| { |
| char[] sa = src.array(); |
| int sp = src.arrayOffset() + src.position(); |
| int sl = src.arrayOffset() + src.limit(); |
| assert (sp <= sl); |
| sp = (sp <= sl ? sp : sl); |
| byte[] da = dst.array(); |
| int dp = dst.arrayOffset() + dst.position(); |
| int dl = dst.arrayOffset() + dst.limit(); |
| assert (dp <= dl); |
| dp = (dp <= dl ? dp : dl); |
| |
| try { |
| while (sp < sl) { |
| char c = sa[sp]; |
| |
| if (c < 0x80) { |
| // Have at most seven bits |
| if (dp >= dl) |
| return CoderResult.OVERFLOW; |
| da[dp++] = (byte)c; |
| sp++; |
| continue; |
| } |
| |
| if (!Surrogate.is(c)) { |
| // 2 bytes, 11 bits |
| if (c < 0x800) { |
| if (dl - dp < 2) |
| return CoderResult.OVERFLOW; |
| da[dp++] = (byte)(0xc0 | ((c >> 06))); |
| da[dp++] = (byte)(0x80 | ((c >> 00) & 0x3f)); |
| sp++; |
| continue; |
| } |
| if (c <= '\uFFFF') { |
| // 3 bytes, 16 bits |
| if (dl - dp < 3) |
| return CoderResult.OVERFLOW; |
| da[dp++] = (byte)(0xe0 | ((c >> 12))); |
| da[dp++] = (byte)(0x80 | ((c >> 06) & 0x3f)); |
| da[dp++] = (byte)(0x80 | ((c >> 00) & 0x3f)); |
| sp++; |
| continue; |
| } |
| } |
| |
| // Have a surrogate pair |
| int uc = sgp.parse(c, sa, sp, sl); |
| if (uc < 0) |
| return sgp.error(); |
| if (uc < 0x200000) { |
| if (dl - dp < 4) |
| return CoderResult.OVERFLOW; |
| da[dp++] = (byte)(0xf0 | ((uc >> 18))); |
| da[dp++] = (byte)(0x80 | ((uc >> 12) & 0x3f)); |
| da[dp++] = (byte)(0x80 | ((uc >> 06) & 0x3f)); |
| da[dp++] = (byte)(0x80 | ((uc >> 00) & 0x3f)); |
| sp += sgp.increment(); |
| continue; |
| } |
| assert false; |
| |
| } |
| return CoderResult.UNDERFLOW; |
| } finally { |
| src.position(sp - src.arrayOffset()); |
| dst.position(dp - dst.arrayOffset()); |
| } |
| } |
| |
| private CoderResult encodeBufferLoop(CharBuffer src, |
| ByteBuffer dst) |
| { |
| int mark = src.position(); |
| try { |
| while (src.hasRemaining()) { |
| char c = src.get(); |
| |
| if (c < 0x80) { |
| // Have at most seven bits |
| if (!dst.hasRemaining()) |
| return CoderResult.OVERFLOW; |
| dst.put((byte)c); |
| mark++; |
| continue; |
| } |
| |
| if (!Surrogate.is(c)) { |
| if (c < 0x800) { |
| // 2 bytes, 11 bits |
| if (dst.remaining() < 2) |
| return CoderResult.OVERFLOW; |
| dst.put((byte)(0xc0 | ((c >> 06)))); |
| dst.put((byte)(0x80 | ((c >> 00) & 0x3f))); |
| mark++; |
| continue; |
| } |
| if (c <= '\uFFFF') { |
| // 3 bytes, 16 bits |
| if (dst.remaining() < 3) |
| return CoderResult.OVERFLOW; |
| dst.put((byte)(0xe0 | ((c >> 12)))); |
| dst.put((byte)(0x80 | ((c >> 06) & 0x3f))); |
| dst.put((byte)(0x80 | ((c >> 00) & 0x3f))); |
| mark++; |
| continue; |
| } |
| } |
| |
| // Have a surrogate pair |
| int uc = sgp.parse(c, src); |
| if (uc < 0) |
| return sgp.error(); |
| if (uc < 0x200000) { |
| if (dst.remaining() < 4) |
| return CoderResult.OVERFLOW; |
| dst.put((byte)(0xf0 | ((uc >> 18)))); |
| dst.put((byte)(0x80 | ((uc >> 12) & 0x3f))); |
| dst.put((byte)(0x80 | ((uc >> 06) & 0x3f))); |
| dst.put((byte)(0x80 | ((uc >> 00) & 0x3f))); |
| mark += sgp.increment(); |
| continue; |
| } |
| assert false; |
| |
| } |
| return CoderResult.UNDERFLOW; |
| } finally { |
| src.position(mark); |
| } |
| } |
| |
| protected final CoderResult encodeLoop(CharBuffer src, |
| ByteBuffer dst) |
| { |
| if (src.hasArray() && dst.hasArray()) |
| return encodeArrayLoop(src, dst); |
| else |
| return encodeBufferLoop(src, dst); |
| } |
| |
| } |
| |
| } |