blob: e54cffa5bb315bfd70ceff5722b898ed297dc3f3 [file] [log] [blame]
/*
* Copyright 2000-2006 Sun Microsystems, Inc. All Rights Reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation. Sun designates this
* particular file as subject to the "Classpath" exception as provided
* by Sun in the LICENSE file that accompanied this code.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
* CA 95054 USA or visit www.sun.com if you need additional information or
* have any questions.
*/
package sun.nio.cs;
import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.nio.BufferOverflowException;
import java.nio.BufferUnderflowException;
import java.nio.charset.Charset;
import java.nio.charset.CharsetDecoder;
import java.nio.charset.CharsetEncoder;
import java.nio.charset.CoderResult;
import java.nio.charset.CharacterCodingException;
import java.nio.charset.MalformedInputException;
import java.nio.charset.UnmappableCharacterException;
/*
* # Bits Bit pattern
* 1 7 0xxxxxxx
* 2 11 110xxxxx 10xxxxxx
* 3 16 1110xxxx 10xxxxxx 10xxxxxx
* 4 21 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
* 5 26 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
* 6 31 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
*
* UCS-2 uses 1-3, UTF-16 uses 1-4, UCS-4 uses 1-6
*/
class UTF_8 extends Unicode
{
public UTF_8() {
super("UTF-8", StandardCharsets.aliases_UTF_8);
}
public String historicalName() {
return "UTF8";
}
public CharsetDecoder newDecoder() {
return new Decoder(this);
}
public CharsetEncoder newEncoder() {
return new Encoder(this);
}
private static class Decoder extends CharsetDecoder {
private Decoder(Charset cs) {
super(cs, 1.0f, 1.0f);
}
private boolean isContinuation(int b) {
return ((b & 0xc0) == 0x80);
}
private final Surrogate.Generator sgg = new Surrogate.Generator();
private CoderResult decodeArrayLoop(ByteBuffer src,
CharBuffer dst)
{
byte[] sa = src.array();
int sp = src.arrayOffset() + src.position();
int sl = src.arrayOffset() + src.limit();
assert (sp <= sl);
sp = (sp <= sl ? sp : sl);
char[] da = dst.array();
int dp = dst.arrayOffset() + dst.position();
int dl = dst.arrayOffset() + dst.limit();
assert (dp <= dl);
dp = (dp <= dl ? dp : dl);
try {
while (sp < sl) {
int b1 = sa[sp];
int b2, b3;
switch ((b1 >> 4) & 0x0f) {
case 0: case 1: case 2: case 3:
case 4: case 5: case 6: case 7:
// 1 byte, 7 bits: 0xxxxxxx
if (dl - dp < 1)
return CoderResult.OVERFLOW;
da[dp++] = (char)(b1 & 0x7f);
sp++;
continue;
case 12: case 13:
// 2 bytes, 11 bits: 110xxxxx 10xxxxxx
if (sl - sp < 2)
return CoderResult.UNDERFLOW;
if (dl - dp < 1)
return CoderResult.OVERFLOW;
if (!isContinuation(b2 = sa[sp + 1]))
return CoderResult.malformedForLength(1);
da[dp++] = ((char)(((b1 & 0x1f) << 6) |
((b2 & 0x3f) << 0)));
sp += 2;
continue;
case 14:
// 3 bytes, 16 bits: 1110xxxx 10xxxxxx 10xxxxxx
if (sl - sp < 3)
return CoderResult.UNDERFLOW;
if (dl - dp < 1)
return CoderResult.OVERFLOW;
if (!isContinuation(b2 = sa[sp + 1]))
return CoderResult.malformedForLength(1);
if (!isContinuation(b3 = sa[sp + 2]))
return CoderResult.malformedForLength(2);
da[dp++] = ((char)(((b1 & 0x0f) << 12) |
((b2 & 0x3f) << 06) |
((b3 & 0x3f) << 0)));
sp += 3;
continue;
case 15:
// 4, 5, or 6 bytes
int b4, b5, b6, uc, n;
switch (b1 & 0x0f) {
case 0: case 1: case 2: case 3:
case 4: case 5: case 6: case 7:
// 4 bytes, 21 bits
if (sl - sp < 4)
return CoderResult.UNDERFLOW;
if (!isContinuation(b2 = sa[sp + 1]))
return CoderResult.malformedForLength(1);
if (!isContinuation(b3 = sa[sp + 2]))
return CoderResult.malformedForLength(2);
if (!isContinuation(b4 = sa[sp + 3]))
return CoderResult.malformedForLength(3);
uc = (((b1 & 0x07) << 18) |
((b2 & 0x3f) << 12) |
((b3 & 0x3f) << 06) |
((b4 & 0x3f) << 00));
n = 4;
break;
case 8: case 9: case 10: case 11:
// 5 bytes, 26 bits
if (sl - sp < 5)
return CoderResult.UNDERFLOW;
if (!isContinuation(b2 = sa[sp + 1]))
return CoderResult.malformedForLength(1);
if (!isContinuation(b3 = sa[sp + 2]))
return CoderResult.malformedForLength(2);
if (!isContinuation(b4 = sa[sp + 3]))
return CoderResult.malformedForLength(3);
if (!isContinuation(b5 = sa[sp + 4]))
return CoderResult.malformedForLength(4);
uc = (((b1 & 0x03) << 24) |
((b2 & 0x3f) << 18) |
((b3 & 0x3f) << 12) |
((b4 & 0x3f) << 06) |
((b5 & 0x3f) << 00));
n = 5;
break;
case 12: case 13:
// 6 bytes, 31 bits
if (sl - sp < 6)
return CoderResult.UNDERFLOW;
if (!isContinuation(b2 = sa[sp + 1]))
return CoderResult.malformedForLength(1);
if (!isContinuation(b3 = sa[sp + 2]))
return CoderResult.malformedForLength(2);
if (!isContinuation(b4 = sa[sp + 3]))
return CoderResult.malformedForLength(3);
if (!isContinuation(b5 = sa[sp + 4]))
return CoderResult.malformedForLength(4);
if (!isContinuation(b6 = sa[sp + 5]))
return CoderResult.malformedForLength(5);
uc = (((b1 & 0x01) << 30) |
((b2 & 0x3f) << 24) |
((b3 & 0x3f) << 18) |
((b4 & 0x3f) << 12) |
((b5 & 0x3f) << 06) |
((b6 & 0x3f)));
n = 6;
break;
default:
return CoderResult.malformedForLength(1);
}
int gn = sgg.generate(uc, n, da, dp, dl);
if (gn < 0)
return sgg.error();
dp += gn;
sp += n;
continue;
default:
return CoderResult.malformedForLength(1);
}
}
return CoderResult.UNDERFLOW;
} finally {
src.position(sp - src.arrayOffset());
dst.position(dp - dst.arrayOffset());
}
}
private CoderResult decodeBufferLoop(ByteBuffer src,
CharBuffer dst)
{
int mark = src.position();
try {
while (src.hasRemaining()) {
int b1 = src.get();
int b2, b3;
switch ((b1 >> 4) & 0x0f) {
case 0: case 1: case 2: case 3:
case 4: case 5: case 6: case 7:
// 1 byte, 7 bits: 0xxxxxxx
if (dst.remaining() < 1)
return CoderResult.OVERFLOW;
dst.put((char)b1);
mark++;
continue;
case 12: case 13:
// 2 bytes, 11 bits: 110xxxxx 10xxxxxx
if (src.remaining() < 1)
return CoderResult.UNDERFLOW;
if (dst.remaining() < 1)
return CoderResult.OVERFLOW;
if (!isContinuation(b2 = src.get()))
return CoderResult.malformedForLength(1);
dst.put((char)(((b1 & 0x1f) << 6) |
((b2 & 0x3f) << 0)));
mark += 2;
continue;
case 14:
// 3 bytes, 16 bits: 1110xxxx 10xxxxxx 10xxxxxx
if (src.remaining() < 2)
return CoderResult.UNDERFLOW;
if (dst.remaining() < 1)
return CoderResult.OVERFLOW;
if (!isContinuation(b2 = src.get()))
return CoderResult.malformedForLength(1);
if (!isContinuation(b3 = src.get()))
return CoderResult.malformedForLength(2);
dst.put((char)(((b1 & 0x0f) << 12) |
((b2 & 0x3f) << 06) |
((b3 & 0x3f) << 0)));
mark += 3;
continue;
case 15:
// 4, 5, or 6 bytes
int b4, b5, b6, uc, n;
switch (b1 & 0x0f) {
case 0: case 1: case 2: case 3:
case 4: case 5: case 6: case 7:
// 4 bytes, 21 bits
if (src.remaining() < 3)
return CoderResult.UNDERFLOW;
if (!isContinuation(b2 = src.get()))
return CoderResult.malformedForLength(1);
if (!isContinuation(b3 = src.get()))
return CoderResult.malformedForLength(2);
if (!isContinuation(b4 = src.get()))
return CoderResult.malformedForLength(3);
uc = (((b1 & 0x07) << 18) |
((b2 & 0x3f) << 12) |
((b3 & 0x3f) << 06) |
((b4 & 0x3f) << 00));
n = 4;
break;
case 8: case 9: case 10: case 11:
// 5 bytes, 26 bits
if (src.remaining() < 4)
return CoderResult.UNDERFLOW;
if (!isContinuation(b2 = src.get()))
return CoderResult.malformedForLength(1);
if (!isContinuation(b3 = src.get()))
return CoderResult.malformedForLength(2);
if (!isContinuation(b4 = src.get()))
return CoderResult.malformedForLength(3);
if (!isContinuation(b5 = src.get()))
return CoderResult.malformedForLength(4);
uc = (((b1 & 0x03) << 24) |
((b2 & 0x3f) << 18) |
((b3 & 0x3f) << 12) |
((b4 & 0x3f) << 06) |
((b5 & 0x3f) << 00));
n = 5;
break;
case 12: case 13:
// 6 bytes, 31 bits
if (src.remaining() < 4)
return CoderResult.UNDERFLOW;
if (!isContinuation(b2 = src.get()))
return CoderResult.malformedForLength(1);
if (!isContinuation(b3 = src.get()))
return CoderResult.malformedForLength(2);
if (!isContinuation(b4 = src.get()))
return CoderResult.malformedForLength(3);
if (!isContinuation(b5 = src.get()))
return CoderResult.malformedForLength(4);
if (!isContinuation(b6 = src.get()))
return CoderResult.malformedForLength(5);
uc = (((b1 & 0x01) << 30) |
((b2 & 0x3f) << 24) |
((b3 & 0x3f) << 18) |
((b4 & 0x3f) << 12) |
((b5 & 0x3f) << 06) |
((b6 & 0x3f)));
n = 6;
break;
default:
return CoderResult.malformedForLength(1);
}
if (sgg.generate(uc, n, dst) < 0)
return sgg.error();
mark += n;
continue;
default:
return CoderResult.malformedForLength(1);
}
}
return CoderResult.UNDERFLOW;
} finally {
src.position(mark);
}
}
protected CoderResult decodeLoop(ByteBuffer src,
CharBuffer dst)
{
if (src.hasArray() && dst.hasArray())
return decodeArrayLoop(src, dst);
else
return decodeBufferLoop(src, dst);
}
}
private static class Encoder extends CharsetEncoder {
private Encoder(Charset cs) {
super(cs, 1.1f, 4.0f);
}
public boolean canEncode(char c) {
return !Surrogate.is(c);
}
private final Surrogate.Parser sgp = new Surrogate.Parser();
private CoderResult encodeArrayLoop(CharBuffer src,
ByteBuffer dst)
{
char[] sa = src.array();
int sp = src.arrayOffset() + src.position();
int sl = src.arrayOffset() + src.limit();
assert (sp <= sl);
sp = (sp <= sl ? sp : sl);
byte[] da = dst.array();
int dp = dst.arrayOffset() + dst.position();
int dl = dst.arrayOffset() + dst.limit();
assert (dp <= dl);
dp = (dp <= dl ? dp : dl);
try {
while (sp < sl) {
char c = sa[sp];
if (c < 0x80) {
// Have at most seven bits
if (dp >= dl)
return CoderResult.OVERFLOW;
da[dp++] = (byte)c;
sp++;
continue;
}
if (!Surrogate.is(c)) {
// 2 bytes, 11 bits
if (c < 0x800) {
if (dl - dp < 2)
return CoderResult.OVERFLOW;
da[dp++] = (byte)(0xc0 | ((c >> 06)));
da[dp++] = (byte)(0x80 | ((c >> 00) & 0x3f));
sp++;
continue;
}
if (c <= '\uFFFF') {
// 3 bytes, 16 bits
if (dl - dp < 3)
return CoderResult.OVERFLOW;
da[dp++] = (byte)(0xe0 | ((c >> 12)));
da[dp++] = (byte)(0x80 | ((c >> 06) & 0x3f));
da[dp++] = (byte)(0x80 | ((c >> 00) & 0x3f));
sp++;
continue;
}
}
// Have a surrogate pair
int uc = sgp.parse(c, sa, sp, sl);
if (uc < 0)
return sgp.error();
if (uc < 0x200000) {
if (dl - dp < 4)
return CoderResult.OVERFLOW;
da[dp++] = (byte)(0xf0 | ((uc >> 18)));
da[dp++] = (byte)(0x80 | ((uc >> 12) & 0x3f));
da[dp++] = (byte)(0x80 | ((uc >> 06) & 0x3f));
da[dp++] = (byte)(0x80 | ((uc >> 00) & 0x3f));
sp += sgp.increment();
continue;
}
assert false;
}
return CoderResult.UNDERFLOW;
} finally {
src.position(sp - src.arrayOffset());
dst.position(dp - dst.arrayOffset());
}
}
private CoderResult encodeBufferLoop(CharBuffer src,
ByteBuffer dst)
{
int mark = src.position();
try {
while (src.hasRemaining()) {
char c = src.get();
if (c < 0x80) {
// Have at most seven bits
if (!dst.hasRemaining())
return CoderResult.OVERFLOW;
dst.put((byte)c);
mark++;
continue;
}
if (!Surrogate.is(c)) {
if (c < 0x800) {
// 2 bytes, 11 bits
if (dst.remaining() < 2)
return CoderResult.OVERFLOW;
dst.put((byte)(0xc0 | ((c >> 06))));
dst.put((byte)(0x80 | ((c >> 00) & 0x3f)));
mark++;
continue;
}
if (c <= '\uFFFF') {
// 3 bytes, 16 bits
if (dst.remaining() < 3)
return CoderResult.OVERFLOW;
dst.put((byte)(0xe0 | ((c >> 12))));
dst.put((byte)(0x80 | ((c >> 06) & 0x3f)));
dst.put((byte)(0x80 | ((c >> 00) & 0x3f)));
mark++;
continue;
}
}
// Have a surrogate pair
int uc = sgp.parse(c, src);
if (uc < 0)
return sgp.error();
if (uc < 0x200000) {
if (dst.remaining() < 4)
return CoderResult.OVERFLOW;
dst.put((byte)(0xf0 | ((uc >> 18))));
dst.put((byte)(0x80 | ((uc >> 12) & 0x3f)));
dst.put((byte)(0x80 | ((uc >> 06) & 0x3f)));
dst.put((byte)(0x80 | ((uc >> 00) & 0x3f)));
mark += sgp.increment();
continue;
}
assert false;
}
return CoderResult.UNDERFLOW;
} finally {
src.position(mark);
}
}
protected final CoderResult encodeLoop(CharBuffer src,
ByteBuffer dst)
{
if (src.hasArray() && dst.hasArray())
return encodeArrayLoop(src, dst);
else
return encodeBufferLoop(src, dst);
}
}
}