Blame - jdk/src/share/classes/sun/io/CharToByteCp949C.java - platform/libcore

blob: c236d21d01bcfc9542eb4ce9f69004ff4c90b5c3 [file] [log] [blame]

J. Duke	319a3b9	2007-12-01 00:00:00 +0000	[diff] [blame^]	1	/*
				2	* Copyright 1997-2003 Sun Microsystems, Inc. All Rights Reserved.
				3	* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
				4	*
				5	* This code is free software; you can redistribute it and/or modify it
				6	* under the terms of the GNU General Public License version 2 only, as
				7	* published by the Free Software Foundation. Sun designates this
				8	* particular file as subject to the "Classpath" exception as provided
				9	* by Sun in the LICENSE file that accompanied this code.
				10	*
				11	* This code is distributed in the hope that it will be useful, but WITHOUT
				12	* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
				13	* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
				14	* version 2 for more details (a copy is included in the LICENSE file that
				15	* accompanied this code).
				16	*
				17	* You should have received a copy of the GNU General Public License version
				18	* 2 along with this work; if not, write to the Free Software Foundation,
				19	* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
				20	*
				21	* Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
				22	* CA 95054 USA or visit www.sun.com if you need additional information or
				23	* have any questions.
				24	*/
				25
				26	package sun.io;
				27
				28	import sun.nio.cs.ext.IBM949C;
				29
				30	/**
				31	* @author Malcolm Ayres
				32	*/
				33
				34	/*
				35	Cp949C is a hand-modified version of Cp949
				36	maps Unicode U-005C <-> 0x5C (local code page)
				37	*/
				38
				39	public class CharToByteCp949C extends CharToByteConverter
				40	{
				41	private static final char SBase = '\uAC00';
				42	private static final char LBase = '\u1100';
				43	private static final char VBase = '\u1161';
				44	private static final char TBase = '\u11A7';
				45	private static final int VCount = 21;
				46	private static final int TCount = 28;
				47	private static final byte G0 = 0;
				48	private static final byte G1 = 1;
				49	private static final byte G2 = 2;
				50	private static final byte G3 = 3;
				51	private byte charState = G0;
				52	private char l, v, t;
				53
				54	private byte[] outputByte;
				55
				56	private char highHalfZoneCode;
				57	private int mask1;
				58	private int mask2;
				59	private int shift;
				60	private short[] index1;
				61	private String index2;
				62	private String index2a;
				63
				64	private final static IBM949C nioCoder = new IBM949C();
				65
				66	public CharToByteCp949C() {
				67	super();
				68	index1 = nioCoder.getEncoderIndex1();
				69	index2 = nioCoder.getEncoderIndex2();
				70	index2a = nioCoder.getEncoderIndex2a();
				71	highHalfZoneCode = 0;
				72	outputByte = new byte[2];
				73	mask1 = 0xFFF8;
				74	mask2 = 0x0007;
				75	shift = 3;
				76	}
				77
				78	/**
				79	* flush out any residual data and reset the buffer state
				80	*/
				81	public int flush(byte[] output, int outStart, int outEnd)
				82	throws MalformedInputException,
				83	ConversionBufferFullException
				84	{
				85	int bytesOut;
				86
				87	byteOff = outStart;
				88
				89	if (highHalfZoneCode != 0) {
				90	reset();
				91	badInputLength = 0;
				92	throw new MalformedInputException();
				93	}
				94
				95	if (charState != G0) {
				96	try {
				97	unicodeToBuffer(composeHangul() ,output, outEnd);
				98	}
				99	catch(UnknownCharacterException e) {
				100	reset();
				101	badInputLength = 0;
				102	throw new MalformedInputException();
				103	}
				104	charState = G0;
				105	}
				106
				107	bytesOut = byteOff - outStart;
				108
				109	reset();
				110	return bytesOut;
				111	}
				112
				113	/**
				114	* Resets converter to its initial state.
				115	*/
				116	public void reset() {
				117	highHalfZoneCode = 0;
				118	charState = G0;
				119	charOff = byteOff = 0;
				120	}
				121
				122	/**
				123	* Returns true if the given character can be converted to the
				124	* target character encoding.
				125	*/
				126	public boolean canConvert(char ch) {
				127	int index;
				128	int theBytes;
				129
				130	index = index1[((ch & mask1) >> shift)] + (ch & mask2);
				131	if (index < 15000)
				132	theBytes = (int)(index2.charAt(index));
				133	else
				134	theBytes = (int)(index2a.charAt(index-15000));
				135
				136	if (theBytes != 0)
				137	return (true);
				138
				139	// only return true if input char was unicode null - all others are
				140	// undefined
				141	return( ch == '\u0000');
				142	}
				143
				144	/**
				145	* Character conversion
				146	*/
				147
				148	public int convert(char[] input, int inOff, int inEnd,
				149	byte[] output, int outOff, int outEnd)
				150	throws UnknownCharacterException, MalformedInputException,
				151	ConversionBufferFullException
				152	{
				153	char inputChar;
				154	int inputSize;
				155
				156	charOff = inOff;
				157	byteOff = outOff;
				158
				159	while (charOff < inEnd) {
				160
				161	if (highHalfZoneCode == 0) {
				162	inputChar = input[charOff];
				163	inputSize = 1;
				164	} else {
				165	inputChar = highHalfZoneCode;
				166	inputSize = 0;
				167	highHalfZoneCode = 0;
				168	}
				169
				170	switch (charState) {
				171	case G0:
				172
				173	l = LBase;
				174	v = VBase;
				175	t = TBase;
				176
				177	if ( isLeadingC(inputChar) ) { // Leading Consonant
				178	l = inputChar;
				179	charState = G1;
				180	break;
				181	}
				182
				183	if ( isVowel(inputChar) ) { // Vowel
				184	v = inputChar;
				185	charState = G2;
				186	break;
				187	}
				188
				189	if ( isTrailingC(inputChar) ) { // Trailing Consonant
				190	t = inputChar;
				191	charState = G3;
				192	break;
				193	}
				194
				195	break;
				196
				197	case G1:
				198	if ( isLeadingC(inputChar) ) { // Leading Consonant
				199	l = composeLL(l, inputChar);
				200	break;
				201	}
				202
				203	if ( isVowel(inputChar) ) { // Vowel
				204	v = inputChar;
				205	charState = G2;
				206	break;
				207	}
				208
				209	if ( isTrailingC(inputChar) ) { // Trailing Consonant
				210	t = inputChar;
				211	charState = G3;
				212	break;
				213	}
				214
				215	unicodeToBuffer(composeHangul(), output, outEnd);
				216
				217	charState = G0;
				218	break;
				219
				220	case G2:
				221	if ( isLeadingC(inputChar) ) { // Leading Consonant
				222
				223	unicodeToBuffer(composeHangul(), output, outEnd);
				224
				225	l = inputChar;
				226	v = VBase;
				227	t = TBase;
				228	charState = G1;
				229	break;
				230	}
				231
				232	if ( isVowel(inputChar) ) { // Vowel
				233	v = composeVV(l, inputChar);
				234	charState = G2;
				235	break;
				236	}
				237
				238	if ( isTrailingC(inputChar) ) { // Trailing Consonant
				239	t = inputChar;
				240	charState = G3;
				241	break;
				242	}
				243
				244	unicodeToBuffer(composeHangul(), output, outEnd);
				245
				246	charState = G0;
				247
				248	break;
				249
				250	case G3:
				251	if ( isTrailingC(inputChar) ) { // Trailing Consonant
				252	t = composeTT(t, inputChar);
				253	charState = G3;
				254	break;
				255	}
				256
				257	unicodeToBuffer(composeHangul(), output, outEnd);
				258
				259	charState = G0;
				260
				261	break;
				262	}
				263
				264	if (charState != G0)
				265	charOff++;
				266	else {
				267
				268	// Is this a high surrogate?
				269	if(inputChar >= '\ud800' && inputChar <= '\udbff') {
				270	// Is this the last character of the input?
				271	if (charOff + inputSize >= inEnd) {
				272	highHalfZoneCode = inputChar;
				273	charOff += inputSize;
				274	break;
				275	}
				276
				277	// Is there a low surrogate following?
				278	inputChar = input[charOff + inputSize];
				279	if (inputChar >= '\udc00' && inputChar <= '\udfff') {
				280	// We have a valid surrogate pair. Too bad we don't do
				281	// surrogates. Is substitution enabled?
				282	if (subMode) {
				283	if (subBytes.length == 1) {
				284	outputByte[0] = 0x00;
				285	outputByte[1] = subBytes[0];
				286	} else {
				287	outputByte[0] = subBytes[0];
				288	outputByte[1] = subBytes[1];
				289	}
				290
				291	bytesToBuffer(outputByte, output, outEnd);
				292	inputSize++;
				293	} else {
				294	badInputLength = 2;
				295	throw new UnknownCharacterException();
				296	}
				297	} else {
				298	// We have a malformed surrogate pair
				299	badInputLength = 1;
				300	throw new MalformedInputException();
				301	}
				302	}
				303
				304	// Is this an unaccompanied low surrogate?
				305	else
				306	if (inputChar >= '\uDC00' && inputChar <= '\uDFFF') {
				307	badInputLength = 1;
				308	throw new MalformedInputException();
				309	} else {
				310	unicodeToBuffer(inputChar, output, outEnd);
				311	}
				312
				313	charOff += inputSize;
				314
				315	}
				316
				317	}
				318
				319	return byteOff - outOff;
				320
				321	}
				322
				323	private char composeHangul() {
				324	int lIndex, vIndex, tIndex;
				325
				326	lIndex = l - LBase;
				327	vIndex = v - VBase;
				328	tIndex = t - TBase;
				329
				330	return (char)((lIndex * VCount + vIndex) * TCount + tIndex + SBase);
				331	}
				332
				333	private char composeLL(char l1, char l2) {
				334	return l2;
				335	}
				336
				337	private char composeVV(char v1, char v2) {
				338	return v2;
				339	}
				340
				341	private char composeTT(char t1, char t2) {
				342	return t2;
				343	}
				344
				345	private boolean isLeadingC(char c) {
				346	return (c >= LBase && c <= '\u1159');
				347	}
				348
				349	private boolean isVowel(char c) {
				350	return (c >= VBase && c <= '\u11a2');
				351	}
				352
				353	private boolean isTrailingC(char c) {
				354	return (c >= TBase && c <= '\u11f9');
				355	}
				356
				357	/**
				358	* returns the maximum number of bytes needed to convert a char
				359	*/
				360	public int getMaxBytesPerChar() {
				361	return 2;
				362	}
				363
				364
				365	/**
				366	* Return the character set ID
				367	*/
				368	public String getCharacterEncoding() {
				369	return "Cp949C";
				370	}
				371
				372	/**
				373	* private function to add the bytes to the output buffer
				374	*/
				375	private void bytesToBuffer(byte[] theBytes, byte[] output, int outEnd)
				376	throws ConversionBufferFullException,
				377	UnknownCharacterException {
				378
				379	int spaceNeeded;
				380
				381	// ensure sufficient space for the bytes(s)
				382
				383	if (theBytes[0] == 0x00)
				384	spaceNeeded = 1;
				385	else
				386	spaceNeeded = 2;
				387
				388	if (byteOff + spaceNeeded > outEnd)
				389	throw new ConversionBufferFullException();
				390
				391	// move the data into the buffer
				392
				393	if (spaceNeeded == 1)
				394	output[byteOff++] = theBytes[1];
				395	else {
				396	output[byteOff++] = theBytes[0];
				397	output[byteOff++] = theBytes[1];
				398	}
				399
				400	}
				401
				402	/**
				403	* private function to add a unicode character to the output buffer
				404	*/
				405	private void unicodeToBuffer(char unicode, byte[] output, int outEnd)
				406	throws ConversionBufferFullException,
				407	UnknownCharacterException {
				408
				409	int index;
				410	int theBytes;
				411
				412	// first we convert the unicode to its byte representation
				413
				414	index = index1[((unicode & mask1) >> shift)] + (unicode & mask2);
				415	if (index < 15000)
				416	theBytes = (int)(index2.charAt(index));
				417	else
				418	theBytes = (int)(index2a.charAt(index-15000));
				419	outputByte[0] = (byte)((theBytes & 0x0000ff00)>>8);
				420	outputByte[1] = (byte)(theBytes & 0x000000ff);
				421
				422	// if the unicode was not mappable - look for the substitution bytes
				423
				424	if (outputByte[0] == 0x00 && outputByte[1] == 0x00
				425	&& unicode != '\u0000') {
				426	if (subMode) {
				427	if (subBytes.length == 1) {
				428	outputByte[0] = 0x00;
				429	outputByte[1] = subBytes[0];
				430	} else {
				431	outputByte[0] = subBytes[0];
				432	outputByte[1] = subBytes[1];
				433	}
				434	} else {
				435	badInputLength = 1;
				436	throw new UnknownCharacterException();
				437	}
				438	}
				439
				440	// now put the bytes in the buffer
				441
				442	bytesToBuffer(outputByte, output, outEnd);
				443
				444	}
				445	}