blob: c0ffd5fdbd5159cde7b13c96c704100e24e00944 [file] [log] [blame]
J. Duke319a3b92007-12-01 00:00:00 +00001/*
2 * Copyright 2001-2005 Sun Microsystems, Inc. All Rights Reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation. Sun designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Sun in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
22 * CA 95054 USA or visit www.sun.com if you need additional information or
23 * have any questions.
24 */
25package sun.nio.cs.ext;
26
27import java.io.ByteArrayOutputStream;
28import java.nio.ByteBuffer;
29import java.nio.CharBuffer;
30import java.nio.charset.*;
31
32import java.util.Collections;
33import java.util.HashMap;
34import java.util.Iterator;
35import java.util.List;
36import java.util.Map;
37
38public class COMPOUND_TEXT_Encoder extends CharsetEncoder {
39
40 /**
41 * NOTE: The following four static variables should be used *only* for
42 * testing whether a encoder can encode a specific character. They
43 * cannot be used for actual encoding because they are shared across all
44 * COMPOUND_TEXT encoders and may be stateful.
45 */
46 private static final Map encodingToEncoderMap =
47 Collections.synchronizedMap(new HashMap(21, 1.0f));
48 private static final CharsetEncoder latin1Encoder;
49 private static final CharsetEncoder defaultEncoder;
50 private static final boolean defaultEncodingSupported;
51
52 static {
53 CharsetEncoder encoder = Charset.defaultCharset().newEncoder();
54 String encoding = encoder.charset().name();
55 if ("ISO8859_1".equals(encoding)) {
56 latin1Encoder = encoder;
57 defaultEncoder = encoder;
58 defaultEncodingSupported = true;
59 } else {
60 try {
61 latin1Encoder =
62 Charset.forName("ISO8859_1").newEncoder();
63 } catch (IllegalArgumentException e) {
64 throw new ExceptionInInitializerError
65 ("ISO8859_1 unsupported");
66 }
67 defaultEncoder = encoder;
68 defaultEncodingSupported = CompoundTextSupport.getEncodings().
69 contains(defaultEncoder.charset().name());
70 }
71 }
72
73 private CharsetEncoder encoder;
74 private char[] charBuf = new char[1];
75 private CharBuffer charbuf = CharBuffer.wrap(charBuf);
76 private ByteArrayOutputStream nonStandardCharsetBuffer;
77 private byte[] byteBuf;
78 private ByteBuffer bytebuf;
79 private int numNonStandardChars, nonStandardEncodingLen;
80
81 public COMPOUND_TEXT_Encoder(Charset cs) {
82 super(cs,
83 (float)(CompoundTextSupport.MAX_CONTROL_SEQUENCE_LEN + 2),
84 (float)(CompoundTextSupport.MAX_CONTROL_SEQUENCE_LEN + 2));
85 try {
86 encoder = Charset.forName("ISO8859_1").newEncoder();
87 } catch (IllegalArgumentException cannotHappen) {}
88 initEncoder(encoder);
89 }
90
91 protected CoderResult encodeLoop(CharBuffer src, ByteBuffer des) {
92 CoderResult cr = CoderResult.UNDERFLOW;
93 char[] input = src.array();
94 int inOff = src.arrayOffset() + src.position();
95 int inEnd = src.arrayOffset() + src.limit();
96
97 try {
98 while (inOff < inEnd && cr.isUnderflow()) {
99 charBuf[0] = input[inOff];
100 if (charBuf[0] <= '\u0008' ||
101 (charBuf[0] >= '\u000B' && charBuf[0] <= '\u001F') ||
102 (charBuf[0] >= '\u0080' && charBuf[0] <= '\u009F')) {
103 // The compound text specification only permits the octets
104 // 0x09, 0x0A, 0x1B, and 0x9B in C0 and C1. Of these, 1B and
105 // 9B must also be removed because they initiate control
106 // sequences.
107 charBuf[0] = '?';
108 }
109
110 CharsetEncoder enc = getEncoder(charBuf[0]);
111 //System.out.println("char=" + charBuf[0] + ", enc=" + enc);
112 if (enc == null) {
113 if (unmappableCharacterAction()
114 == CodingErrorAction.REPORT) {
115 charBuf[0] = '?';
116 enc = latin1Encoder;
117 } else {
118 return CoderResult.unmappableForLength(1);
119 }
120 }
121 if (enc != encoder) {
122 if (nonStandardCharsetBuffer != null) {
123 cr = flushNonStandardCharsetBuffer(des);
124 } else {
125 //cr= encoder.flush(des);
126 flushEncoder(encoder, des);
127 }
128 if (!cr.isUnderflow())
129 return cr;
130 byte[] escSequence = CompoundTextSupport.
131 getEscapeSequence(enc.charset().name());
132 if (escSequence == null) {
133 throw new InternalError("Unknown encoding: " +
134 enc.charset().name());
135 } else if (escSequence[1] == (byte)0x25 &&
136 escSequence[2] == (byte)0x2F) {
137 initNonStandardCharsetBuffer(enc, escSequence);
138 } else if (des.remaining() >= escSequence.length) {
139 des.put(escSequence, 0, escSequence.length);
140 } else {
141 return CoderResult.OVERFLOW;
142 }
143 encoder = enc;
144 continue;
145 }
146 charbuf.rewind();
147 if (nonStandardCharsetBuffer == null) {
148 cr = encoder.encode(charbuf, des, false);
149 } else {
150 bytebuf.clear();
151 cr = encoder.encode(charbuf, bytebuf, false);
152 bytebuf.flip();
153 nonStandardCharsetBuffer.write(byteBuf,
154 0, bytebuf.limit());
155 numNonStandardChars++;
156 }
157 inOff++;
158 }
159 return cr;
160 } finally {
161 src.position(inOff - src.arrayOffset());
162 }
163 }
164
165 protected CoderResult implFlush(ByteBuffer out) {
166 CoderResult cr = (nonStandardCharsetBuffer != null)
167 ? flushNonStandardCharsetBuffer(out)
168 //: encoder.flush(out);
169 : flushEncoder(encoder, out);
170 reset();
171 return cr;
172 }
173
174 private void initNonStandardCharsetBuffer(CharsetEncoder c,
175 byte[] escSequence)
176 {
177 nonStandardCharsetBuffer = new ByteArrayOutputStream();
178 byteBuf = new byte[(int)c.maxBytesPerChar()];
179 bytebuf = ByteBuffer.wrap(byteBuf);
180 nonStandardCharsetBuffer.write(escSequence, 0, escSequence.length);
181 nonStandardCharsetBuffer.write(0); // M placeholder
182 nonStandardCharsetBuffer.write(0); // L placeholder
183 byte[] encoding = CompoundTextSupport.
184 getEncoding(c.charset().name());
185 if (encoding == null) {
186 throw new InternalError
187 ("Unknown encoding: " + encoder.charset().name());
188 }
189 nonStandardCharsetBuffer.write(encoding, 0, encoding.length);
190 nonStandardCharsetBuffer.write(0x02); // divider
191 nonStandardEncodingLen = encoding.length + 1;
192 }
193
194 private CoderResult flushNonStandardCharsetBuffer(ByteBuffer out) {
195 if (numNonStandardChars > 0) {
196 byte[] flushBuf = new byte[(int)encoder.maxBytesPerChar() *
197 numNonStandardChars];
198 ByteBuffer bb = ByteBuffer.wrap(flushBuf);
199 flushEncoder(encoder, bb);
200 bb.flip();
201 nonStandardCharsetBuffer.write(flushBuf, 0, bb.limit());
202 numNonStandardChars = 0;
203 }
204
205 int numBytes = nonStandardCharsetBuffer.size();
206 int nonStandardBytesOff = 6 + nonStandardEncodingLen;
207
208 if (out.remaining() < (numBytes - nonStandardBytesOff) +
209 nonStandardBytesOff * (((numBytes - nonStandardBytesOff) /
210 ((1 << 14) - 1)) + 1))
211 {
212 return CoderResult.OVERFLOW;
213 }
214
215 byte[] nonStandardBytes =
216 nonStandardCharsetBuffer.toByteArray();
217
218 // The non-standard charset header only supports 2^14-1 bytes of data.
219 // If we have more than that, we have to repeat the header.
220 do {
221 out.put((byte)0x1B);
222 out.put((byte)0x25);
223 out.put((byte)0x2F);
224 out.put((byte)nonStandardBytes[3]);
225
226 int toWrite = Math.min(numBytes - nonStandardBytesOff,
227 (1 << 14) - 1 - nonStandardEncodingLen);
228
229 out.put((byte)
230 (((toWrite + nonStandardEncodingLen) / 0x80) | 0x80)); // M
231 out.put((byte)
232 (((toWrite + nonStandardEncodingLen) % 0x80) | 0x80)); // L
233 out.put(nonStandardBytes, 6, nonStandardEncodingLen);
234 out.put(nonStandardBytes, nonStandardBytesOff, toWrite);
235 nonStandardBytesOff += toWrite;
236 } while (nonStandardBytesOff < numBytes);
237
238 nonStandardCharsetBuffer = null;
239 byteBuf = null;
240 nonStandardEncodingLen = 0;
241 return CoderResult.UNDERFLOW;
242 }
243
244 /**
245 * Resets the encoder.
246 * Call this method to reset the encoder to its initial state
247 */
248 protected void implReset() {
249 numNonStandardChars = nonStandardEncodingLen = 0;
250 nonStandardCharsetBuffer = null;
251 byteBuf = null;
252 try {
253 encoder = Charset.forName("ISO8859_1").newEncoder();
254 } catch (IllegalArgumentException cannotHappen) {
255 }
256 initEncoder(encoder);
257 }
258
259 /**
260 * Return whether a character is mappable or not
261 * @return true if a character is mappable
262 */
263 public boolean canEncode(char ch) {
264 return getEncoder(ch) != null;
265 }
266
267 protected void implOnMalformedInput(CodingErrorAction newAction) {
268 encoder.onUnmappableCharacter(newAction);
269 }
270
271 protected void implOnUnmappableCharacter(CodingErrorAction newAction) {
272 encoder.onUnmappableCharacter(newAction);
273 }
274
275 protected void implReplaceWith(byte[] newReplacement) {
276 if (encoder != null)
277 encoder.replaceWith(newReplacement);
278 }
279
280 /**
281 * Try to figure out which CharsetEncoder to use for conversion
282 * of the specified Unicode character. The target character encoding
283 * of the returned encoder is approved to be used with Compound Text.
284 *
285 * @param ch Unicode character
286 * @return CharsetEncoder to convert the given character
287 */
288 private CharsetEncoder getEncoder(char ch) {
289 // 1. Try the current encoder.
290 if (encoder.canEncode(ch)) {
291 return encoder;
292 }
293
294 // 2. Try the default encoder.
295 if (defaultEncodingSupported && defaultEncoder.canEncode(ch)) {
296 CharsetEncoder retval = null;
297 try {
298 retval = defaultEncoder.charset().newEncoder();
299 } catch (UnsupportedOperationException cannotHappen) {
300 }
301 initEncoder(retval);
302 return retval;
303 }
304
305 // 3. Try ISO8859-1.
306 if (latin1Encoder.canEncode(ch)) {
307 CharsetEncoder retval = null;
308 try {
309 retval = latin1Encoder.charset().newEncoder();
310 } catch (UnsupportedOperationException cannotHappen) {}
311 initEncoder(retval);
312 return retval;
313 }
314
315 // 4. Brute force search of all supported encodings.
316 for (Iterator iter = CompoundTextSupport.getEncodings().iterator();
317 iter.hasNext();)
318 {
319 String encoding = (String)iter.next();
320 CharsetEncoder enc =
321 (CharsetEncoder)encodingToEncoderMap.get(encoding);
322 if (enc == null) {
323 enc = CompoundTextSupport.getEncoder(encoding);
324 if (enc == null) {
325 throw new InternalError("Unsupported encoding: " +
326 encoding);
327 }
328 encodingToEncoderMap.put(encoding, enc);
329 }
330 if (enc.canEncode(ch)) {
331 CharsetEncoder retval = CompoundTextSupport.getEncoder(encoding);
332 initEncoder(retval);
333 return retval;
334 }
335 }
336
337 return null;
338 }
339
340 private void initEncoder(CharsetEncoder enc) {
341 try {
342 enc.onUnmappableCharacter(CodingErrorAction.REPLACE)
343 .replaceWith(replacement());
344 } catch (IllegalArgumentException x) {}
345 }
346
347 private CharBuffer fcb= CharBuffer.allocate(0);
348 private CoderResult flushEncoder(CharsetEncoder enc, ByteBuffer bb) {
349 enc.encode(fcb, bb, true);
350 return enc.flush(bb);
351 }
352}