blob: 161159c4d1ad8c6c33699ee8777ad99c0d34a0a4 [file] [log] [blame]
J. Duke319a3b92007-12-01 00:00:00 +00001/*
2 * Copyright 1996-2004 Sun Microsystems, Inc. All Rights Reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation. Sun designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Sun in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
22 * CA 95054 USA or visit www.sun.com if you need additional information or
23 * have any questions.
24 */
25
26package sun.io;
27
28import java.io.*;
29
30
31/**
32 * An abstract base class for subclasses which convert character data
33 * in an external encoding into Unicode characters.
34 *
35 * @author Asmus Freytag
36 * @author Lloyd Honomichl
37 *
38 * @deprecated Replaced by {@link java.nio.charset}. THIS API WILL BE
39 * REMOVED IN J2SE 1.6.
40 */
41@Deprecated
42public abstract class ByteToCharConverter {
43
44 /*
45 * Substitution mode flag.
46 */
47 protected boolean subMode = true;
48
49 /*
50 * Characters to use for automatic substitution.
51 */
52 protected char[] subChars = { '\uFFFD' };
53
54 /*
55 * Offset of next character to be output
56 */
57 protected int charOff;
58
59 /*
60 * Offset of next byte to be converted
61 */
62 protected int byteOff;
63
64 /*
65 * Length of bad input that caused a MalformedInputException.
66 */
67 protected int badInputLength;
68
69 /**
70 * Create an instance of the default ByteToCharConverter subclass.
71 */
72 public static ByteToCharConverter getDefault() {
73 Object cvt;
74 cvt = Converters.newDefaultConverter(Converters.BYTE_TO_CHAR);
75 return (ByteToCharConverter)cvt;
76 }
77
78 /**
79 * Returns appropriate ByteToCharConverter subclass instance.
80 * @param string represents encoding
81 */
82 public static ByteToCharConverter getConverter(String encoding)
83 throws UnsupportedEncodingException
84 {
85 Object cvt;
86 cvt = Converters.newConverter(Converters.BYTE_TO_CHAR, encoding);
87 return (ByteToCharConverter)cvt;
88 }
89
90 /**
91 * Returns the character set id for the conversion
92 */
93 public abstract String getCharacterEncoding();
94
95 /**
96 * Converts an array of bytes containing characters in an external
97 * encoding into an array of Unicode characters. This method allows
98 * a buffer by buffer conversion of a data stream. The state of the
99 * conversion is saved between calls to convert. Among other things,
100 * this means multibyte input sequences can be split between calls.
101 * If a call to convert results in an exception, the conversion may be
102 * continued by calling convert again with suitably modified parameters.
103 * All conversions should be finished with a call to the flush method.
104 *
105 * @return the number of bytes written to output.
106 * @param input byte array containing text to be converted.
107 * @param inStart begin conversion at this offset in input array.
108 * @param inEnd stop conversion at this offset in input array (exclusive).
109 * @param output character array to receive conversion result.
110 * @param outStart start writing to output array at this offset.
111 * @param outEnd stop writing to output array at this offset (exclusive).
112 * @exception MalformedInputException if the input buffer contains any
113 * sequence of bytes that is illegal for the input character set.
114 * @exception UnknownCharacterException for any character that
115 * that cannot be converted to Unicode. Thrown only when converter
116 * is not in substitution mode.
117 * @exception ConversionBufferFullException if output array is filled prior
118 * to converting all the input.
119 */
120 public abstract int convert(byte[] input, int inStart, int inEnd,
121 char[] output, int outStart, int outEnd)
122 throws MalformedInputException,
123 UnknownCharacterException,
124 ConversionBufferFullException;
125
126 /**
127 * Converts an array of bytes containing characters in an external
128 * encoding into an array of Unicode characters. Unlike convert,
129 * this method does not do incremental conversion. It assumes that
130 * the given input array contains all the characters to be
131 * converted. The state of the converter is reset at the beginning
132 * of this method and is left in the reset state on successful
133 * termination. The converter is not reset if an exception is
134 * thrown. This allows the caller to determine where the bad input
135 * was encountered by calling nextByteIndex.
136 * <p>
137 * This method uses substitution mode when performing the
138 * conversion. The method setSubstitutionChars may be used to
139 * determine what characters are substituted. Even though substitution
140 * mode is used, the state of the converter's substitution mode is
141 * not changed at the end of this method.
142 *
143 * @return an array of chars containing the converted characters.
144 * @param input array containing Unicode characters to be converted.
145 * @exception MalformedInputException if the input buffer contains any
146 * sequence of chars that is illegal in the input character encoding.
147 * After this exception is thrown,
148 * the method nextByteIndex can be called to obtain the index of the
149 * first invalid input byte and getBadInputLength can be called
150 * to determine the length of the invalid input.
151 *
152 * @see #nextByteIndex
153 * @see #setSubstitutionMode
154 * @see sun.io.CharToByteConverter#setSubstitutionBytes(byte[])
155 * @see #getBadInputLength
156 */
157 public char[] convertAll( byte input[] ) throws MalformedInputException {
158 reset();
159 boolean savedSubMode = subMode;
160 subMode = true;
161
162 char[] output = new char[ getMaxCharsPerByte() * input.length ];
163
164 try {
165 int outputLength = convert( input, 0, input.length,
166 output, 0, output.length );
167 outputLength += flush( output, outputLength, output.length );
168
169 char [] returnedOutput = new char[ outputLength ];
170 System.arraycopy( output, 0, returnedOutput, 0, outputLength );
171 return returnedOutput;
172 }
173 catch( ConversionBufferFullException e ) {
174 //Not supposed to happen. If it does, getMaxCharsPerByte() lied.
175 throw new
176 InternalError("this.getMaxCharsBerByte returned bad value");
177 }
178 catch( UnknownCharacterException e ) {
179 // Not supposed to happen since we're in substitution mode.
180 throw new InternalError();
181 }
182 finally {
183 subMode = savedSubMode;
184 }
185 }
186
187 /**
188 * Writes any remaining output to the output buffer and resets the
189 * converter to its initial state.
190 *
191 * @param output char array to receive flushed output.
192 * @param outStart start writing to output array at this offset.
193 * @param outEnd stop writing to output array at this offset (exclusive).
194 * @exception MalformedInputException if the output to be flushed contained
195 * a partial or invalid multibyte character sequence. flush will
196 * write what it can to the output buffer and reset the converter before
197 * throwing this exception. An additional call to flush is not required.
198 * @exception ConversionBufferFullException if output array is filled
199 * before all the output can be flushed. flush will write what it can
200 * to the output buffer and remember its state. An additional call to
201 * flush with a new output buffer will conclude the operation.
202 */
203 public abstract int flush( char[] output, int outStart, int outEnd )
204 throws MalformedInputException, ConversionBufferFullException;
205
206 /**
207 * Resets converter to its initial state.
208 */
209 public abstract void reset();
210
211 /**
212 * Returns the maximum number of characters needed to convert a byte. Useful
213 * for calculating the maximum output buffer size needed for a particular
214 * input buffer.
215 */
216 public int getMaxCharsPerByte() {
217 // Until UTF-16, this will do for every encoding
218 return 1;
219 }
220
221 /**
222 * Returns the length, in bytes, of the input which caused a
223 * MalformedInputException. Always refers to the last
224 * MalformedInputException thrown by the converter. If none have
225 * ever been thrown, returns 0.
226 */
227 public int getBadInputLength() {
228 return badInputLength;
229 }
230
231 /**
232 * Returns the index of the character just past the last character
233 * written by the previous call to convert.
234 */
235 public int nextCharIndex() {
236 return charOff;
237 }
238
239 /**
240 * Returns the index of the byte just past the last byte successfully
241 * converted by the previous call to convert.
242 */
243 public int nextByteIndex() {
244 return byteOff;
245 }
246
247 /**
248 * Sets converter into substitution mode. In substitution mode,
249 * the converter will replace untranslatable characters in the source
250 * encoding with the substitution character set by setSubstitionChars.
251 * When not in substitution mode, the converter will throw an
252 * UnknownCharacterException when it encounters untranslatable input.
253 *
254 * @param doSub if true, enable substitution mode.
255 * @see #setSubstitutionChars
256 */
257 public void setSubstitutionMode(boolean doSub) {
258 subMode = doSub;
259 }
260
261 /**
262 * Sets the substitution characters to use when the converter is in
263 * substitution mode. The given chars must not be
264 * longer than the value returned by getMaxCharsPerByte for this
265 * converter.
266 *
267 * @param newSubBytes the substitution bytes
268 * @exception IllegalArgumentException if given byte array is longer than
269 * the value returned by the method getMaxBytesPerChar.
270 * @see #setSubstitutionMode
271 * @see #getMaxBytesPerChar
272 */
273 /**
274 * sets the substitution character to use
275 * @param c the substitution character
276 */
277 public void setSubstitutionChars(char[] c)
278 throws IllegalArgumentException
279 {
280 if( c.length > getMaxCharsPerByte() ) {
281 throw new IllegalArgumentException();
282 }
283
284 subChars = new char[ c.length ];
285 System.arraycopy( c, 0, subChars, 0, c.length );
286 }
287
288 /**
289 * returns a string representation of the character conversion
290 */
291 public String toString() {
292 return "ByteToCharConverter: " + getCharacterEncoding();
293 }
294}