Blame - jdk/src/share/classes/sun/io/ByteToCharUTF8.java - platform/libcore

blob: 47da36bb4c900f78fa26c1dc1b572bb6895af36d [file] [log] [blame]

J. Duke	319a3b9	2007-12-01 00:00:00 +0000	[diff] [blame^]	1	/*
				2	* Copyright 1996-1997 Sun Microsystems, Inc. All Rights Reserved.
				3	* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
				4	*
				5	* This code is free software; you can redistribute it and/or modify it
				6	* under the terms of the GNU General Public License version 2 only, as
				7	* published by the Free Software Foundation. Sun designates this
				8	* particular file as subject to the "Classpath" exception as provided
				9	* by Sun in the LICENSE file that accompanied this code.
				10	*
				11	* This code is distributed in the hope that it will be useful, but WITHOUT
				12	* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
				13	* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
				14	* version 2 for more details (a copy is included in the LICENSE file that
				15	* accompanied this code).
				16	*
				17	* You should have received a copy of the GNU General Public License version
				18	* 2 along with this work; if not, write to the Free Software Foundation,
				19	* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
				20	*
				21	* Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
				22	* CA 95054 USA or visit www.sun.com if you need additional information or
				23	* have any questions.
				24	*/
				25	package sun.io;
				26
				27
				28	/**
				29	* UCS Transformation Format 8 (UTF-8) -> UCS2 (UTF16) converter
				30	*
				31	* see CharToByteUTF8.java about UTF-8 format
				32	*/
				33
				34	public class ByteToCharUTF8 extends ByteToCharConverter {
				35
				36	private int savedSize;
				37	private byte[] savedBytes;
				38
				39	public ByteToCharUTF8() {
				40	super();
				41	savedSize = 0;
				42	savedBytes = new byte[5];
				43	}
				44
				45	public int flush(char[] output, int outStart, int outEnd)
				46	throws MalformedInputException
				47	{
				48	if (savedSize != 0) {
				49	savedSize = 0;
				50	badInputLength = 0;
				51	throw new MalformedInputException();
				52	}
				53	byteOff = charOff = 0;
				54	return 0;
				55	}
				56
				57	/**
				58	* Character converson
				59	*/
				60	public int convert(byte[] input, int inOff, int inEnd,
				61	char[] output, int outOff, int outEnd)
				62	throws MalformedInputException, ConversionBufferFullException
				63	{
				64	int byte1, byte2, byte3, byte4;
				65	char[] outputChar = new char[2];
				66	int outputSize;
				67	int byteOffAdjustment = 0;
				68
				69	if (savedSize != 0) {
				70	byte[] newBuf;
				71	newBuf = new byte[inEnd - inOff + savedSize];
				72	for (int i = 0; i < savedSize; i++) {
				73	newBuf[i] = savedBytes[i];
				74	}
				75	System.arraycopy(input, inOff, newBuf, savedSize, inEnd - inOff);
				76	input = newBuf;
				77	inOff = 0;
				78	inEnd = newBuf.length;
				79	byteOffAdjustment = -savedSize;
				80	savedSize = 0;
				81	}
				82
				83	charOff = outOff;
				84	byteOff = inOff;
				85	int startByteOff;
				86
				87	while(byteOff < inEnd) {
				88
				89	startByteOff = byteOff;
				90	byte1 = input[byteOff++] & 0xff;
				91
				92	if ((byte1 & 0x80) == 0){
				93	outputChar[0] = (char)byte1;
				94	outputSize = 1;
				95	} else if ((byte1 & 0xe0) == 0xc0) {
				96	if (byteOff >= inEnd) {
				97	savedSize = 1;
				98	savedBytes[0] = (byte)byte1;
				99	break;
				100	}
				101	byte2 = input[byteOff++] & 0xff;
				102	if ((byte2 & 0xc0) != 0x80) {
				103	badInputLength = 2;
				104	byteOff += byteOffAdjustment;
				105	throw new MalformedInputException();
				106	}
				107	outputChar[0] = (char)(((byte1 & 0x1f) << 6) \| (byte2 & 0x3f));
				108	outputSize = 1;
				109	} else if ((byte1 & 0xf0) == 0xe0){
				110	if (byteOff + 1 >= inEnd) {
				111	savedBytes[0] = (byte)byte1;
				112	if (byteOff >= inEnd) {
				113	savedSize = 1;
				114	} else {
				115	savedSize = 2;
				116	savedBytes[1] = (byte)input[byteOff++];
				117	}
				118	break;
				119	}
				120	byte2 = input[byteOff++] & 0xff;
				121	byte3 = input[byteOff++] & 0xff;
				122	if ((byte2 & 0xc0) != 0x80 \|\| (byte3 & 0xc0) != 0x80) {
				123	badInputLength = 3;
				124	byteOff += byteOffAdjustment;
				125	throw new MalformedInputException();
				126	}
				127	outputChar[0] = (char)(((byte1 & 0x0f) << 12)
				128	\| ((byte2 & 0x3f) << 6)
				129	\| (byte3 & 0x3f));
				130	outputSize = 1;
				131	} else if ((byte1 & 0xf8) == 0xf0) {
				132	if (byteOff + 2 >= inEnd) {
				133	savedBytes[0] = (byte)byte1;
				134	if (byteOff >= inEnd) {
				135	savedSize = 1;
				136	} else if (byteOff + 1 >= inEnd) {
				137	savedSize = 2;
				138	savedBytes[1] = (byte)input[byteOff++];
				139	} else {
				140	savedSize = 3;
				141	savedBytes[1] = (byte)input[byteOff++];
				142	savedBytes[2] = (byte)input[byteOff++];
				143	}
				144	break;
				145	}
				146	byte2 = input[byteOff++] & 0xff;
				147	byte3 = input[byteOff++] & 0xff;
				148	byte4 = input[byteOff++] & 0xff;
				149	if ((byte2 & 0xc0) != 0x80 \|\|
				150	(byte3 & 0xc0) != 0x80 \|\|
				151	(byte4 & 0xc0) != 0x80) {
				152	badInputLength = 4;
				153	byteOff += byteOffAdjustment;
				154	throw new MalformedInputException();
				155	}
				156	// this byte sequence is UTF16 character
				157	int ucs4 = (int)(0x07 & byte1) << 18 \|
				158	(int)(0x3f & byte2) << 12 \|
				159	(int)(0x3f & byte3) << 6 \|
				160	(int)(0x3f & byte4);
				161	outputChar[0] = (char)((ucs4 - 0x10000) / 0x400 + 0xd800);
				162	outputChar[1] = (char)((ucs4 - 0x10000) % 0x400 + 0xdc00);
				163	outputSize = 2;
				164	} else {
				165	badInputLength = 1;
				166	byteOff += byteOffAdjustment;
				167	throw new MalformedInputException();
				168	}
				169
				170	if (charOff + outputSize > outEnd) {
				171	byteOff = startByteOff;
				172	byteOff += byteOffAdjustment;
				173	throw new ConversionBufferFullException();
				174	}
				175
				176	for (int i = 0; i < outputSize; i++) {
				177	output[charOff + i] = outputChar[i];
				178	}
				179	charOff += outputSize;
				180	}
				181
				182	byteOff += byteOffAdjustment;
				183	return charOff - outOff;
				184	}
				185
				186	/*
				187	* Return the character set id
				188	*/
				189	public String getCharacterEncoding() {
				190	return "UTF8";
				191	}
				192
				193	/*
				194	* Reset after finding bad input
				195	*/
				196	public void reset() {
				197	byteOff = charOff = 0;
				198	savedSize = 0;
				199	}
				200	}