Blame - jdk/src/share/classes/sun/io/ByteToCharEUC_TW.java - platform/libcore

blob: 064e0c3d0ab91159d508b7190861ca5780b8d873 [file] [log] [blame]

J. Duke	319a3b9	2007-12-01 00:00:00 +0000	[diff] [blame^]	1	/*
				2	* Copyright 1996-2004 Sun Microsystems, Inc. All Rights Reserved.
				3	* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
				4	*
				5	* This code is free software; you can redistribute it and/or modify it
				6	* under the terms of the GNU General Public License version 2 only, as
				7	* published by the Free Software Foundation. Sun designates this
				8	* particular file as subject to the "Classpath" exception as provided
				9	* by Sun in the LICENSE file that accompanied this code.
				10	*
				11	* This code is distributed in the hope that it will be useful, but WITHOUT
				12	* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
				13	* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
				14	* version 2 for more details (a copy is included in the LICENSE file that
				15	* accompanied this code).
				16	*
				17	* You should have received a copy of the GNU General Public License version
				18	* 2 along with this work; if not, write to the Free Software Foundation,
				19	* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
				20	*
				21	* Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
				22	* CA 95054 USA or visit www.sun.com if you need additional information or
				23	* have any questions.
				24	*/
				25
				26	package sun.io;
				27
				28	import sun.nio.cs.ext.EUC_TW;
				29
				30	/*
				31	* @author Limin Shi
				32	*/
				33	public class ByteToCharEUC_TW extends ByteToCharConverter
				34	{
				35	private final byte G0 = 0;
				36	private final byte G1 = 1;
				37	private final byte G2 = 2;
				38	private final byte G3 = 3;
				39	private final byte G4 = 4;
				40	private final byte MSB = (byte) 0x80;
				41	private final byte SS2 = (byte) 0x8E;
				42	private final byte P2 = (byte) 0xA2;
				43	private final byte P3 = (byte) 0xA3;
				44
				45	protected final char REPLACE_CHAR = '\uFFFD';
				46
				47	private byte firstByte = 0, state = G0;
				48	public static String unicodeCNS2, unicodeCNS3;
				49	private static String unicodeCNS4, unicodeCNS5, unicodeCNS6;
				50	private static String unicodeCNS7, unicodeCNS15;
				51
				52	private int cnsPlane = 0;
				53	private final static EUC_TW nioCoder = new EUC_TW();
				54
				55	public static String unicodeCNS1 = nioCoder.getUnicodeCNS1();
				56
				57	static String[] cnsChars = {
				58	unicodeCNS2 = nioCoder.getUnicodeCNS2(),
				59	unicodeCNS3 = nioCoder.getUnicodeCNS3(),
				60	unicodeCNS4 = nioCoder.getUnicodeCNS4(),
				61	unicodeCNS5 = nioCoder.getUnicodeCNS5(),
				62	unicodeCNS6 = nioCoder.getUnicodeCNS6(),
				63	unicodeCNS7 = nioCoder.getUnicodeCNS7(),
				64	unicodeCNS15 = nioCoder.getUnicodeCNS15()
				65	};
				66
				67	public ByteToCharEUC_TW() {
				68	}
				69
				70	public int flush(char[] output, int outStart, int outEnd)
				71	throws MalformedInputException
				72	{
				73	if (state != G0) {
				74	state = G0;
				75	firstByte = 0;
				76	badInputLength = 0;
				77	throw new MalformedInputException();
				78	}
				79	reset();
				80	return 0;
				81	}
				82
				83	public void reset() {
				84	state = G0;
				85	firstByte = 0;
				86	byteOff = charOff = 0;
				87	}
				88
				89	/**
				90	* Character conversion
				91	*/
				92	public int convert(byte[] input, int inOff, int inEnd,
				93	char[] output, int outOff, int outEnd)
				94	throws UnknownCharacterException, MalformedInputException,
				95	ConversionBufferFullException
				96	{
				97	int inputSize = 0;
				98	char outputChar = (char) 0;
				99
				100	byteOff = inOff;
				101	charOff = outOff;
				102
				103	cnsPlane = 3;
				104	while (byteOff < inEnd) {
				105	if (charOff >= outEnd)
				106	throw new ConversionBufferFullException();
				107
				108	switch (state) {
				109	case G0:
				110	if ( (input[byteOff] & MSB) == 0) { // ASCII
				111	outputChar = (char) input[byteOff];
				112	} else if (input[byteOff] == SS2) { // Codeset 2
				113	state = G2;
				114	} else { // Codeset 1
				115	firstByte = input[byteOff];
				116	state = G1;
				117	}
				118	break;
				119	case G1:
				120	inputSize = 2;
				121	if ( (input[byteOff] & MSB) != 0) { // 2nd byte
				122	cnsPlane = 1;
				123	outputChar = convToUnicode(firstByte,
				124	input[byteOff], unicodeCNS1);
				125	} else { // Error
				126	badInputLength = 1;
				127	throw new MalformedInputException();
				128	}
				129	firstByte = 0;
				130	state = G0;
				131	break;
				132	case G2:
				133	cnsPlane = (input[byteOff] & (byte)0x0f);
				134	// Adjust String array index for plan 15
				135	cnsPlane = (cnsPlane == 15)? 8 : cnsPlane;
				136
				137	if (cnsPlane < 15) {
				138	state = G3;
				139	} else {
				140	badInputLength = 2;
				141	throw new MalformedInputException();
				142	}
				143
				144	break;
				145	case G3:
				146	if ( (input[byteOff] & MSB) != 0) { // 1st byte
				147	firstByte = input[byteOff];
				148	state = G4;
				149	} else { // Error
				150	state = G0;
				151	badInputLength = 2;
				152	throw new MalformedInputException();
				153	}
				154	break;
				155	case G4:
				156	if ( (input[byteOff] & MSB) != 0) { // 2nd byte
				157	outputChar = convToUnicode(firstByte,
				158	input[byteOff],
				159	cnsChars[cnsPlane - 2]);
				160	} else { // Error
				161	badInputLength = 3;
				162	throw new MalformedInputException();
				163	}
				164	firstByte = 0;
				165	state = G0;
				166	break;
				167	}
				168	byteOff++;
				169
				170	if (outputChar != (char) 0) {
				171	if (outputChar == REPLACE_CHAR) {
				172	if (subMode) // substitution enabled
				173	outputChar = subChars[0];
				174	else {
				175	badInputLength = inputSize;
				176	throw new UnknownCharacterException();
				177	}
				178	}
				179	output[charOff++] = outputChar;
				180	outputChar = 0;
				181	}
				182	}
				183
				184	return charOff - outOff;
				185	}
				186
				187
				188	/**
				189	* Return the character set ID
				190	*/
				191	public String getCharacterEncoding() {
				192	return "EUC_TW";
				193	}
				194
				195	protected char convToUnicode(byte byte1, byte byte2, String table)
				196	{
				197	int index;
				198
				199	if ((byte1 & 0xff) < 0xa1 \|\| (byte2 & 0xff) < 0xa1 \|\|
				200	(byte1 & 0xff) > 0xfe \|\| (byte2 & 0xff) > 0xfe)
				201	return REPLACE_CHAR;
				202	index = (((byte1 & 0xff) - 0xa1) * 94) + (byte2 & 0xff) - 0xa1;
				203	if (index < 0 \|\| index >= table.length())
				204	return REPLACE_CHAR;
				205
				206	// Planes 3 and above containing zero value lead byte
				207	// to accommodate surrogates for mappings which decode to a surrogate
				208	// pair
				209
				210	if (this.cnsPlane >= 3)
				211	index = (index * 2) + 1;
				212
				213	return table.charAt(index);
				214	}
				215	}