Blame - jdk/src/share/classes/sun/io/CharToByteISO2022JP.java - platform/libcore

blob: 2def9b0cdaf0f8a59833e798b6b9a898ef238f81 [file] [log] [blame]

J. Duke	319a3b9	2007-12-01 00:00:00 +0000	[diff] [blame^]	1	/*
				2	* Copyright 1996-1999 Sun Microsystems, Inc. All Rights Reserved.
				3	* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
				4	*
				5	* This code is free software; you can redistribute it and/or modify it
				6	* under the terms of the GNU General Public License version 2 only, as
				7	* published by the Free Software Foundation. Sun designates this
				8	* particular file as subject to the "Classpath" exception as provided
				9	* by Sun in the LICENSE file that accompanied this code.
				10	*
				11	* This code is distributed in the hope that it will be useful, but WITHOUT
				12	* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
				13	* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
				14	* version 2 for more details (a copy is included in the LICENSE file that
				15	* accompanied this code).
				16	*
				17	* You should have received a copy of the GNU General Public License version
				18	* 2 along with this work; if not, write to the Free Software Foundation,
				19	* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
				20	*
				21	* Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
				22	* CA 95054 USA or visit www.sun.com if you need additional information or
				23	* have any questions.
				24	*/
				25
				26	package sun.io;
				27	import java.io.*;
				28
				29	public class CharToByteISO2022JP extends CharToByteJIS0208 {
				30
				31	private static final int ASCII = 0; // ESC ( B
				32	private static final int JISX0201_1976 = 1; // ESC ( J
				33	private static final int JISX0208_1978 = 2; // ESC $ @
				34	private static final int JISX0208_1983 = 3; // ESC $ B
				35	private static final int JISX0201_1976_KANA = 4; // ESC ( I
				36
				37	private char highHalfZoneCode;
				38	private boolean flushed = true;
				39
				40	// JIS is state full encoding, so currentMode keep the
				41	// current codeset
				42	private int currentMode = ASCII;
				43
				44	/**
				45	* Bytes for substitute for unmappable input.
				46	*/
				47	// XXX: Assumes subBytes are ASCII string. Need to change Escape sequence
				48	// for other character sets.
				49	protected byte[] subBytesEscape = { (byte)0x1b, (byte)0x28, (byte)0x42 }; // ESC ( B
				50	protected int subBytesMode = ASCII;
				51
				52	public int flush(byte[] output, int outStart, int outEnd)
				53	throws MalformedInputException, ConversionBufferFullException
				54	{
				55	if (highHalfZoneCode != 0) {
				56	highHalfZoneCode = 0;
				57	badInputLength = 0;
				58	throw new MalformedInputException();
				59	}
				60
				61	if (!flushed && (currentMode != ASCII)) {
				62	if (outEnd - outStart < 3) {
				63	throw new ConversionBufferFullException();
				64	}
				65	output[outStart] = (byte)0x1b;
				66	output[outStart + 1] = (byte)0x28;
				67	output[outStart + 2] = (byte)0x42;
				68	byteOff += 3;
				69	byteOff = charOff = 0;
				70	flushed = true;
				71	currentMode = ASCII;
				72	return 3;
				73	}
				74	return 0;
				75	}
				76
				77	public int convert(char[] input, int inOff, int inEnd,
				78	byte[] output, int outOff, int outEnd)
				79	throws MalformedInputException, UnknownCharacterException,
				80	ConversionBufferFullException
				81
				82	{
				83	char inputChar; // Input character to be converted
				84	int inputSize; // Size of the input
				85	int outputSize; // Size of the output
				86
				87	// Buffer for output bytes
				88	byte[] tmpArray = new byte[6];
				89	byte[] outputByte;
				90
				91	flushed = false;
				92
				93	// Make copies of input and output indexes
				94	charOff = inOff;
				95	byteOff = outOff;
				96
				97	if (highHalfZoneCode != 0) {
				98	inputChar = highHalfZoneCode;
				99	highHalfZoneCode = 0;
				100	if (input[inOff] >= 0xdc00 && input[inOff] <= 0xdfff) {
				101	// This is legal UTF16 sequence.
				102	badInputLength = 1;
				103	throw new UnknownCharacterException();
				104	} else {
				105	// This is illegal UTF16 sequence.
				106	badInputLength = 0;
				107	throw new MalformedInputException();
				108	}
				109	}
				110
				111	// Loop until we run out of input
				112	while(charOff < inEnd) {
				113	outputByte = tmpArray;
				114	int newMode = currentMode; // Trace character mode changing
				115
				116	// Get the input character
				117	inputChar = input[charOff];
				118	inputSize = 1;
				119	outputSize = 1;
				120
				121	// Is this a high surrogate?
				122	if(inputChar >= '\uD800' && inputChar <= '\uDBFF') {
				123	// Is this the last character of the input?
				124	if (charOff + 1 >= inEnd) {
				125	highHalfZoneCode = inputChar;
				126	break;
				127	}
				128
				129	// Is there a low surrogate following?
				130	inputChar = input[charOff + 1];
				131	if (inputChar >= '\uDC00' && inputChar <= '\uDFFF') {
				132	// We have a valid surrogate pair. Too bad we don't do
				133	// surrogates. Is substitution enabled?
				134	if (subMode) {
				135	if (currentMode != subBytesMode) {
				136	System.arraycopy(subBytesEscape, 0, outputByte, 0,
				137	subBytesEscape.length);
				138	outputSize = subBytesEscape.length;
				139	System.arraycopy(subBytes, 0, outputByte,
				140	outputSize, subBytes.length);
				141	outputSize += subBytes.length;
				142	newMode = subBytesMode;
				143	} else {
				144	outputByte = subBytes;
				145	outputSize = subBytes.length;
				146	}
				147	inputSize = 2;
				148	} else {
				149	badInputLength = 2;
				150	throw new UnknownCharacterException();
				151	}
				152	} else {
				153	// We have a malformed surrogate pair
				154	badInputLength = 1;
				155	throw new MalformedInputException();
				156	}
				157	}
				158
				159	// Is this an unaccompanied low surrogate?
				160	else if (inputChar >= '\uDC00' && inputChar <= '\uDFFF') {
				161	badInputLength = 1;
				162	throw new MalformedInputException();
				163	} else {
				164	// Not part of a surrogate
				165
				166	// Does this map to the Roman range?
				167	if (inputChar <= '\u007F') {
				168	if (currentMode != ASCII) {
				169	outputByte[0] = (byte)0x1b;
				170	outputByte[1] = (byte)0x28;
				171	outputByte[2] = (byte)0x42;
				172	outputByte[3] = (byte)inputChar;
				173	outputSize = 4;
				174	newMode = ASCII;
				175	} else {
				176	outputByte[0] = (byte)inputChar;
				177	outputSize = 1;
				178	}
				179	}
				180	// Is it a single byte kana?
				181	else if (inputChar >= 0xFF61 && inputChar <= 0xFF9F) {
				182	if (currentMode != JISX0201_1976_KANA) {
				183	outputByte[0] = (byte)0x1b;
				184	outputByte[1] = (byte)0x28;
				185	outputByte[2] = (byte)0x49;
				186	outputByte[3] = (byte)(inputChar - 0xff40);
				187	outputSize = 4;
				188	newMode = JISX0201_1976_KANA;
				189	} else {
				190	outputByte[0] = (byte)(inputChar - 0xff40);
				191	outputSize = 1;
				192	}
				193	}
				194	// Is it a yen sign?
				195	else if (inputChar == '\u00A5') {
				196	if (currentMode != JISX0201_1976) {
				197	outputByte[0] = (byte)0x1b;
				198	outputByte[1] = (byte)0x28;
				199	outputByte[2] = (byte)0x4a;
				200	outputByte[3] = (byte)0x5c;
				201	outputSize = 4;
				202	newMode = JISX0201_1976;
				203	} else {
				204	outputByte[0] = (byte)0x5C;
				205	outputSize = 1;
				206	}
				207	}
				208	// Is it a tilde?
				209	else if (inputChar == '\u203E')
				210	{
				211	if (currentMode != JISX0201_1976) {
				212	outputByte[0] = (byte)0x1b;
				213	outputByte[1] = (byte)0x28;
				214	outputByte[2] = (byte)0x4a;
				215	outputByte[3] = (byte)0x7e;
				216	outputSize = 4;
				217	newMode = JISX0201_1976;
				218	} else {
				219	outputByte[0] = (byte)0x7e;
				220	outputSize = 1;
				221	}
				222	}
				223	// Is it a JIS-X-0208 character?
				224	else {
				225	int index = getNative(inputChar);
				226	if (index != 0) {
				227	if (currentMode != JISX0208_1983) {
				228	outputByte[0] = (byte)0x1b;
				229	outputByte[1] = (byte)0x24;
				230	outputByte[2] = (byte)0x42;
				231	outputByte[3] = (byte)(index >> 8);
				232	outputByte[4] = (byte)(index & 0xff);
				233	outputSize = 5;
				234	newMode = JISX0208_1983;
				235	} else {
				236	outputByte[0] = (byte)(index >> 8);
				237	outputByte[1] = (byte)(index & 0xff);
				238	outputSize = 2;
				239	}
				240	}
				241	// It doesn't map to JIS-0208!
				242	else {
				243	if (subMode) {
				244	if (currentMode != subBytesMode) {
				245	System.arraycopy(subBytesEscape, 0, outputByte, 0,
				246	subBytesEscape.length);
				247	outputSize = subBytesEscape.length;
				248	System.arraycopy(subBytes, 0, outputByte,
				249	outputSize, subBytes.length);
				250	outputSize += subBytes.length;
				251	newMode = subBytesMode;
				252	} else {
				253	outputByte = subBytes;
				254	outputSize = subBytes.length;
				255	}
				256	} else {
				257	badInputLength = 1;
				258	throw new UnknownCharacterException();
				259	}
				260	}
				261	}
				262	}
				263
				264	// Is there room in the output buffer?
				265	// XXX: The code assumes output buffer can hold at least 5 bytes,
				266	// in this coverter case. However, there is no way for apps to
				267	// see how many bytes will be necessary for next call.
				268	// getMaxBytesPerChar() should be overriden in every subclass of
				269	// CharToByteConverter and reflect real value (5 for this).
				270	if (byteOff + outputSize > outEnd)
				271	throw new ConversionBufferFullException();
				272
				273	// Put the output into the buffer
				274	for ( int i = 0 ; i < outputSize ; i++ )
				275	output[byteOff++] = outputByte[i];
				276
				277	// Advance the input pointer
				278	charOff += inputSize;
				279
				280	// We can successfuly output the characters, changes
				281	// current mode. Fix for 4251646.
				282	currentMode = newMode;
				283	}
				284
				285	// return mode ASCII at the end
				286	if (currentMode != ASCII){
				287	if (byteOff + 3 > outEnd)
				288	throw new ConversionBufferFullException();
				289
				290	output[byteOff++] = 0x1b;
				291	output[byteOff++] = 0x28;
				292	output[byteOff++] = 0x42;
				293	currentMode = ASCII;
				294	}
				295
				296	// Return the length written to the output buffer
				297	return byteOff-outOff;
				298	}
				299
				300	// Reset
				301	public void reset() {
				302	highHalfZoneCode = 0;
				303	byteOff = charOff = 0;
				304	currentMode = ASCII;
				305	}
				306
				307	/**
				308	* returns the maximum number of bytes needed to convert a char
				309	*/
				310	public int getMaxBytesPerChar() {
				311	return 8;
				312	}
				313
				314	// Return the character set ID
				315	public String getCharacterEncoding() {
				316	return "ISO2022JP";
				317	}
				318
				319	}