Blame - jdk/src/solaris/classes/sun/nio/cs/ext/COMPOUND_TEXT_Encoder.java - platform/libcore

blob: c0ffd5fdbd5159cde7b13c96c704100e24e00944 [file] [log] [blame]

J. Duke	319a3b9	2007-12-01 00:00:00 +0000	[diff] [blame^]	1	/*
				2	* Copyright 2001-2005 Sun Microsystems, Inc. All Rights Reserved.
				3	* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
				4	*
				5	* This code is free software; you can redistribute it and/or modify it
				6	* under the terms of the GNU General Public License version 2 only, as
				7	* published by the Free Software Foundation. Sun designates this
				8	* particular file as subject to the "Classpath" exception as provided
				9	* by Sun in the LICENSE file that accompanied this code.
				10	*
				11	* This code is distributed in the hope that it will be useful, but WITHOUT
				12	* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
				13	* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
				14	* version 2 for more details (a copy is included in the LICENSE file that
				15	* accompanied this code).
				16	*
				17	* You should have received a copy of the GNU General Public License version
				18	* 2 along with this work; if not, write to the Free Software Foundation,
				19	* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
				20	*
				21	* Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
				22	* CA 95054 USA or visit www.sun.com if you need additional information or
				23	* have any questions.
				24	*/
				25	package sun.nio.cs.ext;
				26
				27	import java.io.ByteArrayOutputStream;
				28	import java.nio.ByteBuffer;
				29	import java.nio.CharBuffer;
				30	import java.nio.charset.*;
				31
				32	import java.util.Collections;
				33	import java.util.HashMap;
				34	import java.util.Iterator;
				35	import java.util.List;
				36	import java.util.Map;
				37
				38	public class COMPOUND_TEXT_Encoder extends CharsetEncoder {
				39
				40	/**
				41	* NOTE: The following four static variables should be used only for
				42	* testing whether a encoder can encode a specific character. They
				43	* cannot be used for actual encoding because they are shared across all
				44	* COMPOUND_TEXT encoders and may be stateful.
				45	*/
				46	private static final Map encodingToEncoderMap =
				47	Collections.synchronizedMap(new HashMap(21, 1.0f));
				48	private static final CharsetEncoder latin1Encoder;
				49	private static final CharsetEncoder defaultEncoder;
				50	private static final boolean defaultEncodingSupported;
				51
				52	static {
				53	CharsetEncoder encoder = Charset.defaultCharset().newEncoder();
				54	String encoding = encoder.charset().name();
				55	if ("ISO8859_1".equals(encoding)) {
				56	latin1Encoder = encoder;
				57	defaultEncoder = encoder;
				58	defaultEncodingSupported = true;
				59	} else {
				60	try {
				61	latin1Encoder =
				62	Charset.forName("ISO8859_1").newEncoder();
				63	} catch (IllegalArgumentException e) {
				64	throw new ExceptionInInitializerError
				65	("ISO8859_1 unsupported");
				66	}
				67	defaultEncoder = encoder;
				68	defaultEncodingSupported = CompoundTextSupport.getEncodings().
				69	contains(defaultEncoder.charset().name());
				70	}
				71	}
				72
				73	private CharsetEncoder encoder;
				74	private char[] charBuf = new char[1];
				75	private CharBuffer charbuf = CharBuffer.wrap(charBuf);
				76	private ByteArrayOutputStream nonStandardCharsetBuffer;
				77	private byte[] byteBuf;
				78	private ByteBuffer bytebuf;
				79	private int numNonStandardChars, nonStandardEncodingLen;
				80
				81	public COMPOUND_TEXT_Encoder(Charset cs) {
				82	super(cs,
				83	(float)(CompoundTextSupport.MAX_CONTROL_SEQUENCE_LEN + 2),
				84	(float)(CompoundTextSupport.MAX_CONTROL_SEQUENCE_LEN + 2));
				85	try {
				86	encoder = Charset.forName("ISO8859_1").newEncoder();
				87	} catch (IllegalArgumentException cannotHappen) {}
				88	initEncoder(encoder);
				89	}
				90
				91	protected CoderResult encodeLoop(CharBuffer src, ByteBuffer des) {
				92	CoderResult cr = CoderResult.UNDERFLOW;
				93	char[] input = src.array();
				94	int inOff = src.arrayOffset() + src.position();
				95	int inEnd = src.arrayOffset() + src.limit();
				96
				97	try {
				98	while (inOff < inEnd && cr.isUnderflow()) {
				99	charBuf[0] = input[inOff];
				100	if (charBuf[0] <= '\u0008' \|\|
				101	(charBuf[0] >= '\u000B' && charBuf[0] <= '\u001F') \|\|
				102	(charBuf[0] >= '\u0080' && charBuf[0] <= '\u009F')) {
				103	// The compound text specification only permits the octets
				104	// 0x09, 0x0A, 0x1B, and 0x9B in C0 and C1. Of these, 1B and
				105	// 9B must also be removed because they initiate control
				106	// sequences.
				107	charBuf[0] = '?';
				108	}
				109
				110	CharsetEncoder enc = getEncoder(charBuf[0]);
				111	//System.out.println("char=" + charBuf[0] + ", enc=" + enc);
				112	if (enc == null) {
				113	if (unmappableCharacterAction()
				114	== CodingErrorAction.REPORT) {
				115	charBuf[0] = '?';
				116	enc = latin1Encoder;
				117	} else {
				118	return CoderResult.unmappableForLength(1);
				119	}
				120	}
				121	if (enc != encoder) {
				122	if (nonStandardCharsetBuffer != null) {
				123	cr = flushNonStandardCharsetBuffer(des);
				124	} else {
				125	//cr= encoder.flush(des);
				126	flushEncoder(encoder, des);
				127	}
				128	if (!cr.isUnderflow())
				129	return cr;
				130	byte[] escSequence = CompoundTextSupport.
				131	getEscapeSequence(enc.charset().name());
				132	if (escSequence == null) {
				133	throw new InternalError("Unknown encoding: " +
				134	enc.charset().name());
				135	} else if (escSequence[1] == (byte)0x25 &&
				136	escSequence[2] == (byte)0x2F) {
				137	initNonStandardCharsetBuffer(enc, escSequence);
				138	} else if (des.remaining() >= escSequence.length) {
				139	des.put(escSequence, 0, escSequence.length);
				140	} else {
				141	return CoderResult.OVERFLOW;
				142	}
				143	encoder = enc;
				144	continue;
				145	}
				146	charbuf.rewind();
				147	if (nonStandardCharsetBuffer == null) {
				148	cr = encoder.encode(charbuf, des, false);
				149	} else {
				150	bytebuf.clear();
				151	cr = encoder.encode(charbuf, bytebuf, false);
				152	bytebuf.flip();
				153	nonStandardCharsetBuffer.write(byteBuf,
				154	0, bytebuf.limit());
				155	numNonStandardChars++;
				156	}
				157	inOff++;
				158	}
				159	return cr;
				160	} finally {
				161	src.position(inOff - src.arrayOffset());
				162	}
				163	}
				164
				165	protected CoderResult implFlush(ByteBuffer out) {
				166	CoderResult cr = (nonStandardCharsetBuffer != null)
				167	? flushNonStandardCharsetBuffer(out)
				168	//: encoder.flush(out);
				169	: flushEncoder(encoder, out);
				170	reset();
				171	return cr;
				172	}
				173
				174	private void initNonStandardCharsetBuffer(CharsetEncoder c,
				175	byte[] escSequence)
				176	{
				177	nonStandardCharsetBuffer = new ByteArrayOutputStream();
				178	byteBuf = new byte[(int)c.maxBytesPerChar()];
				179	bytebuf = ByteBuffer.wrap(byteBuf);
				180	nonStandardCharsetBuffer.write(escSequence, 0, escSequence.length);
				181	nonStandardCharsetBuffer.write(0); // M placeholder
				182	nonStandardCharsetBuffer.write(0); // L placeholder
				183	byte[] encoding = CompoundTextSupport.
				184	getEncoding(c.charset().name());
				185	if (encoding == null) {
				186	throw new InternalError
				187	("Unknown encoding: " + encoder.charset().name());
				188	}
				189	nonStandardCharsetBuffer.write(encoding, 0, encoding.length);
				190	nonStandardCharsetBuffer.write(0x02); // divider
				191	nonStandardEncodingLen = encoding.length + 1;
				192	}
				193
				194	private CoderResult flushNonStandardCharsetBuffer(ByteBuffer out) {
				195	if (numNonStandardChars > 0) {
				196	byte[] flushBuf = new byte[(int)encoder.maxBytesPerChar() *
				197	numNonStandardChars];
				198	ByteBuffer bb = ByteBuffer.wrap(flushBuf);
				199	flushEncoder(encoder, bb);
				200	bb.flip();
				201	nonStandardCharsetBuffer.write(flushBuf, 0, bb.limit());
				202	numNonStandardChars = 0;
				203	}
				204
				205	int numBytes = nonStandardCharsetBuffer.size();
				206	int nonStandardBytesOff = 6 + nonStandardEncodingLen;
				207
				208	if (out.remaining() < (numBytes - nonStandardBytesOff) +
				209	nonStandardBytesOff * (((numBytes - nonStandardBytesOff) /
				210	((1 << 14) - 1)) + 1))
				211	{
				212	return CoderResult.OVERFLOW;
				213	}
				214
				215	byte[] nonStandardBytes =
				216	nonStandardCharsetBuffer.toByteArray();
				217
				218	// The non-standard charset header only supports 2^14-1 bytes of data.
				219	// If we have more than that, we have to repeat the header.
				220	do {
				221	out.put((byte)0x1B);
				222	out.put((byte)0x25);
				223	out.put((byte)0x2F);
				224	out.put((byte)nonStandardBytes[3]);
				225
				226	int toWrite = Math.min(numBytes - nonStandardBytesOff,
				227	(1 << 14) - 1 - nonStandardEncodingLen);
				228
				229	out.put((byte)
				230	(((toWrite + nonStandardEncodingLen) / 0x80) \| 0x80)); // M
				231	out.put((byte)
				232	(((toWrite + nonStandardEncodingLen) % 0x80) \| 0x80)); // L
				233	out.put(nonStandardBytes, 6, nonStandardEncodingLen);
				234	out.put(nonStandardBytes, nonStandardBytesOff, toWrite);
				235	nonStandardBytesOff += toWrite;
				236	} while (nonStandardBytesOff < numBytes);
				237
				238	nonStandardCharsetBuffer = null;
				239	byteBuf = null;
				240	nonStandardEncodingLen = 0;
				241	return CoderResult.UNDERFLOW;
				242	}
				243
				244	/**
				245	* Resets the encoder.
				246	* Call this method to reset the encoder to its initial state
				247	*/
				248	protected void implReset() {
				249	numNonStandardChars = nonStandardEncodingLen = 0;
				250	nonStandardCharsetBuffer = null;
				251	byteBuf = null;
				252	try {
				253	encoder = Charset.forName("ISO8859_1").newEncoder();
				254	} catch (IllegalArgumentException cannotHappen) {
				255	}
				256	initEncoder(encoder);
				257	}
				258
				259	/**
				260	* Return whether a character is mappable or not
				261	* @return true if a character is mappable
				262	*/
				263	public boolean canEncode(char ch) {
				264	return getEncoder(ch) != null;
				265	}
				266
				267	protected void implOnMalformedInput(CodingErrorAction newAction) {
				268	encoder.onUnmappableCharacter(newAction);
				269	}
				270
				271	protected void implOnUnmappableCharacter(CodingErrorAction newAction) {
				272	encoder.onUnmappableCharacter(newAction);
				273	}
				274
				275	protected void implReplaceWith(byte[] newReplacement) {
				276	if (encoder != null)
				277	encoder.replaceWith(newReplacement);
				278	}
				279
				280	/**
				281	* Try to figure out which CharsetEncoder to use for conversion
				282	* of the specified Unicode character. The target character encoding
				283	* of the returned encoder is approved to be used with Compound Text.
				284	*
				285	* @param ch Unicode character
				286	* @return CharsetEncoder to convert the given character
				287	*/
				288	private CharsetEncoder getEncoder(char ch) {
				289	// 1. Try the current encoder.
				290	if (encoder.canEncode(ch)) {
				291	return encoder;
				292	}
				293
				294	// 2. Try the default encoder.
				295	if (defaultEncodingSupported && defaultEncoder.canEncode(ch)) {
				296	CharsetEncoder retval = null;
				297	try {
				298	retval = defaultEncoder.charset().newEncoder();
				299	} catch (UnsupportedOperationException cannotHappen) {
				300	}
				301	initEncoder(retval);
				302	return retval;
				303	}
				304
				305	// 3. Try ISO8859-1.
				306	if (latin1Encoder.canEncode(ch)) {
				307	CharsetEncoder retval = null;
				308	try {
				309	retval = latin1Encoder.charset().newEncoder();
				310	} catch (UnsupportedOperationException cannotHappen) {}
				311	initEncoder(retval);
				312	return retval;
				313	}
				314
				315	// 4. Brute force search of all supported encodings.
				316	for (Iterator iter = CompoundTextSupport.getEncodings().iterator();
				317	iter.hasNext();)
				318	{
				319	String encoding = (String)iter.next();
				320	CharsetEncoder enc =
				321	(CharsetEncoder)encodingToEncoderMap.get(encoding);
				322	if (enc == null) {
				323	enc = CompoundTextSupport.getEncoder(encoding);
				324	if (enc == null) {
				325	throw new InternalError("Unsupported encoding: " +
				326	encoding);
				327	}
				328	encodingToEncoderMap.put(encoding, enc);
				329	}
				330	if (enc.canEncode(ch)) {
				331	CharsetEncoder retval = CompoundTextSupport.getEncoder(encoding);
				332	initEncoder(retval);
				333	return retval;
				334	}
				335	}
				336
				337	return null;
				338	}
				339
				340	private void initEncoder(CharsetEncoder enc) {
				341	try {
				342	enc.onUnmappableCharacter(CodingErrorAction.REPLACE)
				343	.replaceWith(replacement());
				344	} catch (IllegalArgumentException x) {}
				345	}
				346
				347	private CharBuffer fcb= CharBuffer.allocate(0);
				348	private CoderResult flushEncoder(CharsetEncoder enc, ByteBuffer bb) {
				349	enc.encode(fcb, bb, true);
				350	return enc.flush(bb);
				351	}
				352	}