Blame - jdk/src/share/classes/sun/io/CharToByteSingleByte.java - platform/libcore

blob: 0812b6822e24db7d33a91cd1fe7f17e77a7d5b6b [file] [log] [blame]

J. Duke	319a3b9	2007-12-01 00:00:00 +0000	[diff] [blame^]	1	/*
				2	* Copyright 1996-2002 Sun Microsystems, Inc. All Rights Reserved.
				3	* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
				4	*
				5	* This code is free software; you can redistribute it and/or modify it
				6	* under the terms of the GNU General Public License version 2 only, as
				7	* published by the Free Software Foundation. Sun designates this
				8	* particular file as subject to the "Classpath" exception as provided
				9	* by Sun in the LICENSE file that accompanied this code.
				10	*
				11	* This code is distributed in the hope that it will be useful, but WITHOUT
				12	* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
				13	* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
				14	* version 2 for more details (a copy is included in the LICENSE file that
				15	* accompanied this code).
				16	*
				17	* You should have received a copy of the GNU General Public License version
				18	* 2 along with this work; if not, write to the Free Software Foundation,
				19	* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
				20	*
				21	* Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
				22	* CA 95054 USA or visit www.sun.com if you need additional information or
				23	* have any questions.
				24	*/
				25
				26	package sun.io;
				27
				28	/**
				29	* A table driven conversion from char to byte for single byte
				30	* character sets. Tables will reside in the class CharToByteYYYYY,
				31	* where YYYYY is a unique character set identifier
				32
				33	< TBD: Tables are of the form... >
				34
				35	*
				36	* @author Lloyd Honomichl
				37	* @author Asmus Freytag
				38	*/
				39
				40	public abstract class CharToByteSingleByte extends CharToByteConverter {
				41
				42	/*
				43	* 1st level index, provided by subclass
				44	*/
				45	protected short index1[];
				46
				47	/*
				48	* 2nd level index, provided by subclass
				49	*/
				50	protected String index2;
				51
				52	/*
				53	* Mask to isolate bits for 1st level index, from subclass
				54	*/
				55	protected int mask1;
				56
				57	/*
				58	* Mask to isolate bits for 2nd level index, from subclass
				59	*/
				60	protected int mask2;
				61
				62	/*
				63	* Shift to isolate bits for 1st level index, from subclass
				64	*/
				65	protected int shift;
				66
				67	private char highHalfZoneCode;
				68
				69	public short[] getIndex1() {
				70	return index1;
				71	}
				72
				73	public String getIndex2() {
				74	return index2;
				75	}
				76	public int flush(byte[] output, int outStart, int outEnd)
				77	throws MalformedInputException
				78	{
				79	if (highHalfZoneCode != 0) {
				80	highHalfZoneCode = 0;
				81	badInputLength = 0;
				82	throw new MalformedInputException();
				83	}
				84	byteOff = charOff = 0;
				85	return 0;
				86	}
				87
				88	/**
				89	* Converts characters to sequences of bytes.
				90	* Conversions that result in Exceptions can be restarted by calling
				91	* convert again, with appropriately modified parameters.
				92	* @return the characters written to output.
				93	* @param input char array containing text in Unicode
				94	* @param inStart offset in input array
				95	* @param inEnd offset of last byte to be converted
				96	* @param output byte array to receive conversion result
				97	* @param outStart starting offset
				98	* @param outEnd offset of last byte to be written to
				99	* @throw MalformedInputException for any sequence of chars that is
				100	* illegal in Unicode (principally unpaired surrogates
				101	* and \uFFFF or \uFFFE), including any partial surrogate pair
				102	* which occurs at the end of an input buffer.
				103	* @throw UnsupportedCharacterException for any character that
				104	* that cannot be converted to the external character set.
				105	*/
				106	public int convert(char[] input, int inOff, int inEnd,
				107	byte[] output, int outOff, int outEnd)
				108	throws MalformedInputException,
				109	UnknownCharacterException,
				110	ConversionBufferFullException
				111	{
				112	char inputChar; // Input character to be converted
				113	byte[] outputByte; // Output byte written to output
				114	int inputSize; // Size of input
				115	int outputSize; // Size of output
				116
				117	byte[] tmpArray = new byte[1];
				118
				119	// Record beginning offsets
				120	charOff = inOff;
				121	byteOff = outOff;
				122
				123	if (highHalfZoneCode != 0) {
				124	inputChar = highHalfZoneCode;
				125	highHalfZoneCode = 0;
				126	if (input[inOff] >= 0xdc00 && input[inOff] <= 0xdfff) {
				127	// This is legal UTF16 sequence.
				128	badInputLength = 1;
				129	throw new UnknownCharacterException();
				130	} else {
				131	// This is illegal UTF16 sequence.
				132	badInputLength = 0;
				133	throw new MalformedInputException();
				134	}
				135	}
				136
				137	// Loop until we hit the end of the input
				138	while(charOff < inEnd) {
				139
				140	outputByte = tmpArray;
				141
				142	// Get the input character
				143	inputChar = input[charOff];
				144
				145	// Default output size
				146	outputSize = 1;
				147
				148	// Assume this is a simple character
				149	inputSize = 1;
				150
				151	// Is this a high surrogate?
				152	if(inputChar >= '\uD800' && inputChar <= '\uDBFF') {
				153	// Is this the last character in the input?
				154	if (charOff + 1 >= inEnd) {
				155	highHalfZoneCode = inputChar;
				156	break;
				157	}
				158
				159	// Is there a low surrogate following?
				160	inputChar = input[charOff + 1];
				161	if (inputChar >= '\uDC00' && inputChar <= '\uDFFF') {
				162	// We have a valid surrogate pair. Too bad we don't map
				163	// surrogates. Is substitution enabled?
				164	if (subMode) {
				165	outputByte = subBytes;
				166	outputSize = subBytes.length;
				167	inputSize = 2;
				168	} else {
				169	badInputLength = 2;
				170	throw new UnknownCharacterException();
				171	}
				172	} else {
				173	// We have a malformed surrogate pair
				174	badInputLength = 1;
				175	throw new MalformedInputException();
				176	}
				177	}
				178
				179	// Is this an unaccompanied low surrogate?
				180	else if (inputChar >= '\uDC00' && inputChar <= '\uDFFF') {
				181	badInputLength = 1;
				182	throw new MalformedInputException();
				183	}
				184
				185	// Not part of a surrogate, so look it up
				186	else {
				187	// Get output using two level lookup
				188	outputByte[0] = getNative(inputChar);
				189
				190	// Might this character be unmappable?
				191	if (outputByte[0] == 0) {
				192	// If outputByte is zero because the input was zero
				193	// then this character is actually mappable
				194	if (input[charOff] != '\u0000') {
				195	// We have an unmappable character
				196	// Is substitution enabled?
				197	if (subMode) {
				198	outputByte = subBytes;
				199	outputSize = subBytes.length;
				200	} else {
				201	badInputLength = 1;
				202	throw new UnknownCharacterException();
				203	}
				204	}
				205	}
				206	}
				207
				208	// If we don't have room for the output, throw an exception
				209	if (byteOff + outputSize > outEnd)
				210	throw new ConversionBufferFullException();
				211
				212	// Put the byte in the output buffer
				213	for (int i = 0; i < outputSize; i++) {
				214	output[byteOff++] = outputByte[i];
				215	}
				216	charOff += inputSize;
				217
				218	}
				219
				220	// Return the length written to the output buffer
				221	return byteOff - outOff;
				222	}
				223
				224	/**
				225	* the maximum number of bytes needed to hold a converted char
				226	* @returns the maximum number of bytes needed for a converted char
				227	*/
				228	public int getMaxBytesPerChar() {
				229	return 1;
				230	}
				231
				232	public byte getNative(char inputChar) {
				233	return (byte)index2.charAt(index1[(inputChar & mask1) >> shift]
				234	+ (inputChar & mask2));
				235	}
				236
				237	/**
				238	* Resets the converter.
				239	* Call this method to reset the converter to its initial state
				240	*/
				241	public void reset() {
				242	byteOff = charOff = 0;
				243	highHalfZoneCode = 0;
				244	}
				245
				246	/**
				247	* Return whether a character is mappable or not
				248	* @return true if a character is mappable
				249	*/
				250	public boolean canConvert(char ch) {
				251	// Look it up in the table
				252	if (index2.charAt(index1[((ch & mask1) >> shift)] + (ch & mask2)) != '\u0000')
				253	return true;
				254
				255	// Nulls are always mappable
				256	return (ch == '\u0000');
				257	}
				258	}