J. Duke | 319a3b9 | 2007-12-01 00:00:00 +0000 | [diff] [blame^] | 1 | /* |
| 2 | * Copyright 1996-2004 Sun Microsystems, Inc. All Rights Reserved. |
| 3 | * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
| 4 | * |
| 5 | * This code is free software; you can redistribute it and/or modify it |
| 6 | * under the terms of the GNU General Public License version 2 only, as |
| 7 | * published by the Free Software Foundation. Sun designates this |
| 8 | * particular file as subject to the "Classpath" exception as provided |
| 9 | * by Sun in the LICENSE file that accompanied this code. |
| 10 | * |
| 11 | * This code is distributed in the hope that it will be useful, but WITHOUT |
| 12 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
| 13 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
| 14 | * version 2 for more details (a copy is included in the LICENSE file that |
| 15 | * accompanied this code). |
| 16 | * |
| 17 | * You should have received a copy of the GNU General Public License version |
| 18 | * 2 along with this work; if not, write to the Free Software Foundation, |
| 19 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
| 20 | * |
| 21 | * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, |
| 22 | * CA 95054 USA or visit www.sun.com if you need additional information or |
| 23 | * have any questions. |
| 24 | */ |
| 25 | |
| 26 | package sun.io; |
| 27 | |
| 28 | import java.io.*; |
| 29 | |
| 30 | |
| 31 | /** |
| 32 | * An abstract base class for subclasses which convert character data |
| 33 | * in an external encoding into Unicode characters. |
| 34 | * |
| 35 | * @author Asmus Freytag |
| 36 | * @author Lloyd Honomichl |
| 37 | * |
| 38 | * @deprecated Replaced by {@link java.nio.charset}. THIS API WILL BE |
| 39 | * REMOVED IN J2SE 1.6. |
| 40 | */ |
| 41 | @Deprecated |
| 42 | public abstract class ByteToCharConverter { |
| 43 | |
| 44 | /* |
| 45 | * Substitution mode flag. |
| 46 | */ |
| 47 | protected boolean subMode = true; |
| 48 | |
| 49 | /* |
| 50 | * Characters to use for automatic substitution. |
| 51 | */ |
| 52 | protected char[] subChars = { '\uFFFD' }; |
| 53 | |
| 54 | /* |
| 55 | * Offset of next character to be output |
| 56 | */ |
| 57 | protected int charOff; |
| 58 | |
| 59 | /* |
| 60 | * Offset of next byte to be converted |
| 61 | */ |
| 62 | protected int byteOff; |
| 63 | |
| 64 | /* |
| 65 | * Length of bad input that caused a MalformedInputException. |
| 66 | */ |
| 67 | protected int badInputLength; |
| 68 | |
| 69 | /** |
| 70 | * Create an instance of the default ByteToCharConverter subclass. |
| 71 | */ |
| 72 | public static ByteToCharConverter getDefault() { |
| 73 | Object cvt; |
| 74 | cvt = Converters.newDefaultConverter(Converters.BYTE_TO_CHAR); |
| 75 | return (ByteToCharConverter)cvt; |
| 76 | } |
| 77 | |
| 78 | /** |
| 79 | * Returns appropriate ByteToCharConverter subclass instance. |
| 80 | * @param string represents encoding |
| 81 | */ |
| 82 | public static ByteToCharConverter getConverter(String encoding) |
| 83 | throws UnsupportedEncodingException |
| 84 | { |
| 85 | Object cvt; |
| 86 | cvt = Converters.newConverter(Converters.BYTE_TO_CHAR, encoding); |
| 87 | return (ByteToCharConverter)cvt; |
| 88 | } |
| 89 | |
| 90 | /** |
| 91 | * Returns the character set id for the conversion |
| 92 | */ |
| 93 | public abstract String getCharacterEncoding(); |
| 94 | |
| 95 | /** |
| 96 | * Converts an array of bytes containing characters in an external |
| 97 | * encoding into an array of Unicode characters. This method allows |
| 98 | * a buffer by buffer conversion of a data stream. The state of the |
| 99 | * conversion is saved between calls to convert. Among other things, |
| 100 | * this means multibyte input sequences can be split between calls. |
| 101 | * If a call to convert results in an exception, the conversion may be |
| 102 | * continued by calling convert again with suitably modified parameters. |
| 103 | * All conversions should be finished with a call to the flush method. |
| 104 | * |
| 105 | * @return the number of bytes written to output. |
| 106 | * @param input byte array containing text to be converted. |
| 107 | * @param inStart begin conversion at this offset in input array. |
| 108 | * @param inEnd stop conversion at this offset in input array (exclusive). |
| 109 | * @param output character array to receive conversion result. |
| 110 | * @param outStart start writing to output array at this offset. |
| 111 | * @param outEnd stop writing to output array at this offset (exclusive). |
| 112 | * @exception MalformedInputException if the input buffer contains any |
| 113 | * sequence of bytes that is illegal for the input character set. |
| 114 | * @exception UnknownCharacterException for any character that |
| 115 | * that cannot be converted to Unicode. Thrown only when converter |
| 116 | * is not in substitution mode. |
| 117 | * @exception ConversionBufferFullException if output array is filled prior |
| 118 | * to converting all the input. |
| 119 | */ |
| 120 | public abstract int convert(byte[] input, int inStart, int inEnd, |
| 121 | char[] output, int outStart, int outEnd) |
| 122 | throws MalformedInputException, |
| 123 | UnknownCharacterException, |
| 124 | ConversionBufferFullException; |
| 125 | |
| 126 | /** |
| 127 | * Converts an array of bytes containing characters in an external |
| 128 | * encoding into an array of Unicode characters. Unlike convert, |
| 129 | * this method does not do incremental conversion. It assumes that |
| 130 | * the given input array contains all the characters to be |
| 131 | * converted. The state of the converter is reset at the beginning |
| 132 | * of this method and is left in the reset state on successful |
| 133 | * termination. The converter is not reset if an exception is |
| 134 | * thrown. This allows the caller to determine where the bad input |
| 135 | * was encountered by calling nextByteIndex. |
| 136 | * <p> |
| 137 | * This method uses substitution mode when performing the |
| 138 | * conversion. The method setSubstitutionChars may be used to |
| 139 | * determine what characters are substituted. Even though substitution |
| 140 | * mode is used, the state of the converter's substitution mode is |
| 141 | * not changed at the end of this method. |
| 142 | * |
| 143 | * @return an array of chars containing the converted characters. |
| 144 | * @param input array containing Unicode characters to be converted. |
| 145 | * @exception MalformedInputException if the input buffer contains any |
| 146 | * sequence of chars that is illegal in the input character encoding. |
| 147 | * After this exception is thrown, |
| 148 | * the method nextByteIndex can be called to obtain the index of the |
| 149 | * first invalid input byte and getBadInputLength can be called |
| 150 | * to determine the length of the invalid input. |
| 151 | * |
| 152 | * @see #nextByteIndex |
| 153 | * @see #setSubstitutionMode |
| 154 | * @see sun.io.CharToByteConverter#setSubstitutionBytes(byte[]) |
| 155 | * @see #getBadInputLength |
| 156 | */ |
| 157 | public char[] convertAll( byte input[] ) throws MalformedInputException { |
| 158 | reset(); |
| 159 | boolean savedSubMode = subMode; |
| 160 | subMode = true; |
| 161 | |
| 162 | char[] output = new char[ getMaxCharsPerByte() * input.length ]; |
| 163 | |
| 164 | try { |
| 165 | int outputLength = convert( input, 0, input.length, |
| 166 | output, 0, output.length ); |
| 167 | outputLength += flush( output, outputLength, output.length ); |
| 168 | |
| 169 | char [] returnedOutput = new char[ outputLength ]; |
| 170 | System.arraycopy( output, 0, returnedOutput, 0, outputLength ); |
| 171 | return returnedOutput; |
| 172 | } |
| 173 | catch( ConversionBufferFullException e ) { |
| 174 | //Not supposed to happen. If it does, getMaxCharsPerByte() lied. |
| 175 | throw new |
| 176 | InternalError("this.getMaxCharsBerByte returned bad value"); |
| 177 | } |
| 178 | catch( UnknownCharacterException e ) { |
| 179 | // Not supposed to happen since we're in substitution mode. |
| 180 | throw new InternalError(); |
| 181 | } |
| 182 | finally { |
| 183 | subMode = savedSubMode; |
| 184 | } |
| 185 | } |
| 186 | |
| 187 | /** |
| 188 | * Writes any remaining output to the output buffer and resets the |
| 189 | * converter to its initial state. |
| 190 | * |
| 191 | * @param output char array to receive flushed output. |
| 192 | * @param outStart start writing to output array at this offset. |
| 193 | * @param outEnd stop writing to output array at this offset (exclusive). |
| 194 | * @exception MalformedInputException if the output to be flushed contained |
| 195 | * a partial or invalid multibyte character sequence. flush will |
| 196 | * write what it can to the output buffer and reset the converter before |
| 197 | * throwing this exception. An additional call to flush is not required. |
| 198 | * @exception ConversionBufferFullException if output array is filled |
| 199 | * before all the output can be flushed. flush will write what it can |
| 200 | * to the output buffer and remember its state. An additional call to |
| 201 | * flush with a new output buffer will conclude the operation. |
| 202 | */ |
| 203 | public abstract int flush( char[] output, int outStart, int outEnd ) |
| 204 | throws MalformedInputException, ConversionBufferFullException; |
| 205 | |
| 206 | /** |
| 207 | * Resets converter to its initial state. |
| 208 | */ |
| 209 | public abstract void reset(); |
| 210 | |
| 211 | /** |
| 212 | * Returns the maximum number of characters needed to convert a byte. Useful |
| 213 | * for calculating the maximum output buffer size needed for a particular |
| 214 | * input buffer. |
| 215 | */ |
| 216 | public int getMaxCharsPerByte() { |
| 217 | // Until UTF-16, this will do for every encoding |
| 218 | return 1; |
| 219 | } |
| 220 | |
| 221 | /** |
| 222 | * Returns the length, in bytes, of the input which caused a |
| 223 | * MalformedInputException. Always refers to the last |
| 224 | * MalformedInputException thrown by the converter. If none have |
| 225 | * ever been thrown, returns 0. |
| 226 | */ |
| 227 | public int getBadInputLength() { |
| 228 | return badInputLength; |
| 229 | } |
| 230 | |
| 231 | /** |
| 232 | * Returns the index of the character just past the last character |
| 233 | * written by the previous call to convert. |
| 234 | */ |
| 235 | public int nextCharIndex() { |
| 236 | return charOff; |
| 237 | } |
| 238 | |
| 239 | /** |
| 240 | * Returns the index of the byte just past the last byte successfully |
| 241 | * converted by the previous call to convert. |
| 242 | */ |
| 243 | public int nextByteIndex() { |
| 244 | return byteOff; |
| 245 | } |
| 246 | |
| 247 | /** |
| 248 | * Sets converter into substitution mode. In substitution mode, |
| 249 | * the converter will replace untranslatable characters in the source |
| 250 | * encoding with the substitution character set by setSubstitionChars. |
| 251 | * When not in substitution mode, the converter will throw an |
| 252 | * UnknownCharacterException when it encounters untranslatable input. |
| 253 | * |
| 254 | * @param doSub if true, enable substitution mode. |
| 255 | * @see #setSubstitutionChars |
| 256 | */ |
| 257 | public void setSubstitutionMode(boolean doSub) { |
| 258 | subMode = doSub; |
| 259 | } |
| 260 | |
| 261 | /** |
| 262 | * Sets the substitution characters to use when the converter is in |
| 263 | * substitution mode. The given chars must not be |
| 264 | * longer than the value returned by getMaxCharsPerByte for this |
| 265 | * converter. |
| 266 | * |
| 267 | * @param newSubBytes the substitution bytes |
| 268 | * @exception IllegalArgumentException if given byte array is longer than |
| 269 | * the value returned by the method getMaxBytesPerChar. |
| 270 | * @see #setSubstitutionMode |
| 271 | * @see #getMaxBytesPerChar |
| 272 | */ |
| 273 | /** |
| 274 | * sets the substitution character to use |
| 275 | * @param c the substitution character |
| 276 | */ |
| 277 | public void setSubstitutionChars(char[] c) |
| 278 | throws IllegalArgumentException |
| 279 | { |
| 280 | if( c.length > getMaxCharsPerByte() ) { |
| 281 | throw new IllegalArgumentException(); |
| 282 | } |
| 283 | |
| 284 | subChars = new char[ c.length ]; |
| 285 | System.arraycopy( c, 0, subChars, 0, c.length ); |
| 286 | } |
| 287 | |
| 288 | /** |
| 289 | * returns a string representation of the character conversion |
| 290 | */ |
| 291 | public String toString() { |
| 292 | return "ByteToCharConverter: " + getCharacterEncoding(); |
| 293 | } |
| 294 | } |