J. Duke | 319a3b9 | 2007-12-01 00:00:00 +0000 | [diff] [blame^] | 1 | /* |
| 2 | * Copyright 2000-2006 Sun Microsystems, Inc. All Rights Reserved. |
| 3 | * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
| 4 | * |
| 5 | * This code is free software; you can redistribute it and/or modify it |
| 6 | * under the terms of the GNU General Public License version 2 only, as |
| 7 | * published by the Free Software Foundation. Sun designates this |
| 8 | * particular file as subject to the "Classpath" exception as provided |
| 9 | * by Sun in the LICENSE file that accompanied this code. |
| 10 | * |
| 11 | * This code is distributed in the hope that it will be useful, but WITHOUT |
| 12 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
| 13 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
| 14 | * version 2 for more details (a copy is included in the LICENSE file that |
| 15 | * accompanied this code). |
| 16 | * |
| 17 | * You should have received a copy of the GNU General Public License version |
| 18 | * 2 along with this work; if not, write to the Free Software Foundation, |
| 19 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
| 20 | * |
| 21 | * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, |
| 22 | * CA 95054 USA or visit www.sun.com if you need additional information or |
| 23 | * have any questions. |
| 24 | */ |
| 25 | |
| 26 | #warn This file is preprocessed before being compiled |
| 27 | |
| 28 | package java.nio.charset; |
| 29 | |
| 30 | import java.nio.Buffer; |
| 31 | import java.nio.ByteBuffer; |
| 32 | import java.nio.CharBuffer; |
| 33 | import java.nio.BufferOverflowException; |
| 34 | import java.nio.BufferUnderflowException; |
| 35 | import java.lang.ref.WeakReference; |
| 36 | import java.nio.charset.CoderMalfunctionError; // javadoc |
| 37 | |
| 38 | |
| 39 | /** |
| 40 | * An engine that can transform a sequence of $itypesPhrase$ into a sequence of |
| 41 | * $otypesPhrase$. |
| 42 | * |
| 43 | * <a name="steps"> |
| 44 | * |
| 45 | * <p> The input $itype$ sequence is provided in a $itype$ buffer or a series |
| 46 | * of such buffers. The output $otype$ sequence is written to a $otype$ buffer |
| 47 | * or a series of such buffers. $A$ $coder$ should always be used by making |
| 48 | * the following sequence of method invocations, hereinafter referred to as $a$ |
| 49 | * <i>$coding$ operation</i>: |
| 50 | * |
| 51 | * <ol> |
| 52 | * |
| 53 | * <li><p> Reset the $coder$ via the {@link #reset reset} method, unless it |
| 54 | * has not been used before; </p></li> |
| 55 | * |
| 56 | * <li><p> Invoke the {@link #$code$ $code$} method zero or more times, as |
| 57 | * long as additional input may be available, passing <tt>false</tt> for the |
| 58 | * <tt>endOfInput</tt> argument and filling the input buffer and flushing the |
| 59 | * output buffer between invocations; </p></li> |
| 60 | * |
| 61 | * <li><p> Invoke the {@link #$code$ $code$} method one final time, passing |
| 62 | * <tt>true</tt> for the <tt>endOfInput</tt> argument; and then </p></li> |
| 63 | * |
| 64 | * <li><p> Invoke the {@link #flush flush} method so that the $coder$ can |
| 65 | * flush any internal state to the output buffer. </p></li> |
| 66 | * |
| 67 | * </ol> |
| 68 | * |
| 69 | * Each invocation of the {@link #$code$ $code$} method will $code$ as many |
| 70 | * $itype$s as possible from the input buffer, writing the resulting $otype$s |
| 71 | * to the output buffer. The {@link #$code$ $code$} method returns when more |
| 72 | * input is required, when there is not enough room in the output buffer, or |
| 73 | * when $a$ $coding$ error has occurred. In each case a {@link CoderResult} |
| 74 | * object is returned to describe the reason for termination. An invoker can |
| 75 | * examine this object and fill the input buffer, flush the output buffer, or |
| 76 | * attempt to recover from $a$ $coding$ error, as appropriate, and try again. |
| 77 | * |
| 78 | * <a name="ce"> |
| 79 | * |
| 80 | * <p> There are two general types of $coding$ errors. If the input $itype$ |
| 81 | * sequence is $notLegal$ then the input is considered <i>malformed</i>. If |
| 82 | * the input $itype$ sequence is legal but cannot be mapped to a valid |
| 83 | * $outSequence$ then an <i>unmappable character</i> has been encountered. |
| 84 | * |
| 85 | * <a name="cae"> |
| 86 | * |
| 87 | * <p> How $a$ $coding$ error is handled depends upon the action requested for |
| 88 | * that type of error, which is described by an instance of the {@link |
| 89 | * CodingErrorAction} class. The possible error actions are to {@link |
| 90 | * CodingErrorAction#IGNORE </code>ignore<code>} the erroneous input, {@link |
| 91 | * CodingErrorAction#REPORT </code>report<code>} the error to the invoker via |
| 92 | * the returned {@link CoderResult} object, or {@link CodingErrorAction#REPLACE |
| 93 | * </code>replace<code>} the erroneous input with the current value of the |
| 94 | * replacement $replTypeName$. The replacement |
| 95 | * |
| 96 | #if[encoder] |
| 97 | * is initially set to the $coder$'s default replacement, which often |
| 98 | * (but not always) has the initial value $defaultReplName$; |
| 99 | #end[encoder] |
| 100 | #if[decoder] |
| 101 | * has the initial value $defaultReplName$; |
| 102 | #end[decoder] |
| 103 | * |
| 104 | * its value may be changed via the {@link #replaceWith($replFQType$) |
| 105 | * replaceWith} method. |
| 106 | * |
| 107 | * <p> The default action for malformed-input and unmappable-character errors |
| 108 | * is to {@link CodingErrorAction#REPORT </code>report<code>} them. The |
| 109 | * malformed-input error action may be changed via the {@link |
| 110 | * #onMalformedInput(CodingErrorAction) onMalformedInput} method; the |
| 111 | * unmappable-character action may be changed via the {@link |
| 112 | * #onUnmappableCharacter(CodingErrorAction) onUnmappableCharacter} method. |
| 113 | * |
| 114 | * <p> This class is designed to handle many of the details of the $coding$ |
| 115 | * process, including the implementation of error actions. $A$ $coder$ for a |
| 116 | * specific charset, which is a concrete subclass of this class, need only |
| 117 | * implement the abstract {@link #$code$Loop $code$Loop} method, which |
| 118 | * encapsulates the basic $coding$ loop. A subclass that maintains internal |
| 119 | * state should, additionally, override the {@link #implFlush implFlush} and |
| 120 | * {@link #implReset implReset} methods. |
| 121 | * |
| 122 | * <p> Instances of this class are not safe for use by multiple concurrent |
| 123 | * threads. </p> |
| 124 | * |
| 125 | * |
| 126 | * @author Mark Reinhold |
| 127 | * @author JSR-51 Expert Group |
| 128 | * @since 1.4 |
| 129 | * |
| 130 | * @see ByteBuffer |
| 131 | * @see CharBuffer |
| 132 | * @see Charset |
| 133 | * @see Charset$OtherCoder$ |
| 134 | */ |
| 135 | |
| 136 | public abstract class Charset$Coder$ { |
| 137 | |
| 138 | private final Charset charset; |
| 139 | private final float average$ItypesPerOtype$; |
| 140 | private final float max$ItypesPerOtype$; |
| 141 | |
| 142 | private $replType$ replacement; |
| 143 | private CodingErrorAction malformedInputAction |
| 144 | = CodingErrorAction.REPORT; |
| 145 | private CodingErrorAction unmappableCharacterAction |
| 146 | = CodingErrorAction.REPORT; |
| 147 | |
| 148 | // Internal states |
| 149 | // |
| 150 | private static final int ST_RESET = 0; |
| 151 | private static final int ST_CODING = 1; |
| 152 | private static final int ST_END = 2; |
| 153 | private static final int ST_FLUSHED = 3; |
| 154 | |
| 155 | private int state = ST_RESET; |
| 156 | |
| 157 | private static String stateNames[] |
| 158 | = { "RESET", "CODING", "CODING_END", "FLUSHED" }; |
| 159 | |
| 160 | |
| 161 | /** |
| 162 | * Initializes a new $coder$. The new $coder$ will have the given |
| 163 | * $otypes-per-itype$ and replacement values. </p> |
| 164 | * |
| 165 | * @param average$ItypesPerOtype$ |
| 166 | * A positive float value indicating the expected number of |
| 167 | * $otype$s that will be produced for each input $itype$ |
| 168 | * |
| 169 | * @param max$ItypesPerOtype$ |
| 170 | * A positive float value indicating the maximum number of |
| 171 | * $otype$s that will be produced for each input $itype$ |
| 172 | * |
| 173 | * @param replacement |
| 174 | * The initial replacement; must not be <tt>null</tt>, must have |
| 175 | * non-zero length, must not be longer than max$ItypesPerOtype$, |
| 176 | * and must be {@link #isLegalReplacement </code>legal<code>} |
| 177 | * |
| 178 | * @throws IllegalArgumentException |
| 179 | * If the preconditions on the parameters do not hold |
| 180 | */ |
| 181 | {#if[encoder]?protected:private} |
| 182 | Charset$Coder$(Charset cs, |
| 183 | float average$ItypesPerOtype$, |
| 184 | float max$ItypesPerOtype$, |
| 185 | $replType$ replacement) |
| 186 | { |
| 187 | this.charset = cs; |
| 188 | if (average$ItypesPerOtype$ <= 0.0f) |
| 189 | throw new IllegalArgumentException("Non-positive " |
| 190 | + "average$ItypesPerOtype$"); |
| 191 | if (max$ItypesPerOtype$ <= 0.0f) |
| 192 | throw new IllegalArgumentException("Non-positive " |
| 193 | + "max$ItypesPerOtype$"); |
| 194 | if (!Charset.atBugLevel("1.4")) { |
| 195 | if (average$ItypesPerOtype$ > max$ItypesPerOtype$) |
| 196 | throw new IllegalArgumentException("average$ItypesPerOtype$" |
| 197 | + " exceeds " |
| 198 | + "max$ItypesPerOtype$"); |
| 199 | } |
| 200 | this.replacement = replacement; |
| 201 | this.average$ItypesPerOtype$ = average$ItypesPerOtype$; |
| 202 | this.max$ItypesPerOtype$ = max$ItypesPerOtype$; |
| 203 | replaceWith(replacement); |
| 204 | } |
| 205 | |
| 206 | /** |
| 207 | * Initializes a new $coder$. The new $coder$ will have the given |
| 208 | * $otypes-per-itype$ values and its replacement will be the |
| 209 | * $replTypeName$ $defaultReplName$. </p> |
| 210 | * |
| 211 | * @param average$ItypesPerOtype$ |
| 212 | * A positive float value indicating the expected number of |
| 213 | * $otype$s that will be produced for each input $itype$ |
| 214 | * |
| 215 | * @param max$ItypesPerOtype$ |
| 216 | * A positive float value indicating the maximum number of |
| 217 | * $otype$s that will be produced for each input $itype$ |
| 218 | * |
| 219 | * @throws IllegalArgumentException |
| 220 | * If the preconditions on the parameters do not hold |
| 221 | */ |
| 222 | protected Charset$Coder$(Charset cs, |
| 223 | float average$ItypesPerOtype$, |
| 224 | float max$ItypesPerOtype$) |
| 225 | { |
| 226 | this(cs, |
| 227 | average$ItypesPerOtype$, max$ItypesPerOtype$, |
| 228 | $defaultRepl$); |
| 229 | } |
| 230 | |
| 231 | /** |
| 232 | * Returns the charset that created this $coder$. </p> |
| 233 | * |
| 234 | * @return This $coder$'s charset |
| 235 | */ |
| 236 | public final Charset charset() { |
| 237 | return charset; |
| 238 | } |
| 239 | |
| 240 | /** |
| 241 | * Returns this $coder$'s replacement value. </p> |
| 242 | * |
| 243 | * @return This $coder$'s current replacement, |
| 244 | * which is never <tt>null</tt> and is never empty |
| 245 | */ |
| 246 | public final $replType$ replacement() { |
| 247 | return replacement; |
| 248 | } |
| 249 | |
| 250 | /** |
| 251 | * Changes this $coder$'s replacement value. |
| 252 | * |
| 253 | * <p> This method invokes the {@link #implReplaceWith implReplaceWith} |
| 254 | * method, passing the new replacement, after checking that the new |
| 255 | * replacement is acceptable. </p> |
| 256 | * |
| 257 | * @param newReplacement |
| 258 | * |
| 259 | #if[decoder] |
| 260 | * The new replacement; must not be <tt>null</tt> |
| 261 | * and must have non-zero length |
| 262 | #end[decoder] |
| 263 | #if[encoder] |
| 264 | * The new replacement; must not be <tt>null</tt>, must have |
| 265 | * non-zero length, must not be longer than the value returned by |
| 266 | * the {@link #max$ItypesPerOtype$() max$ItypesPerOtype$} method, and |
| 267 | * must be {@link #isLegalReplacement </code>legal<code>} |
| 268 | #end[encoder] |
| 269 | * |
| 270 | * @return This $coder$ |
| 271 | * |
| 272 | * @throws IllegalArgumentException |
| 273 | * If the preconditions on the parameter do not hold |
| 274 | */ |
| 275 | public final Charset$Coder$ replaceWith($replType$ newReplacement) { |
| 276 | if (newReplacement == null) |
| 277 | throw new IllegalArgumentException("Null replacement"); |
| 278 | int len = newReplacement.$replLength$; |
| 279 | if (len == 0) |
| 280 | throw new IllegalArgumentException("Empty replacement"); |
| 281 | if (len > max$ItypesPerOtype$) |
| 282 | throw new IllegalArgumentException("Replacement too long"); |
| 283 | #if[encoder] |
| 284 | if (!isLegalReplacement(newReplacement)) |
| 285 | throw new IllegalArgumentException("Illegal replacement"); |
| 286 | #end[encoder] |
| 287 | this.replacement = newReplacement; |
| 288 | implReplaceWith(newReplacement); |
| 289 | return this; |
| 290 | } |
| 291 | |
| 292 | /** |
| 293 | * Reports a change to this $coder$'s replacement value. |
| 294 | * |
| 295 | * <p> The default implementation of this method does nothing. This method |
| 296 | * should be overridden by $coder$s that require notification of changes to |
| 297 | * the replacement. </p> |
| 298 | * |
| 299 | * @param newReplacement |
| 300 | */ |
| 301 | protected void implReplaceWith($replType$ newReplacement) { |
| 302 | } |
| 303 | |
| 304 | #if[encoder] |
| 305 | |
| 306 | private WeakReference cachedDecoder = null; |
| 307 | |
| 308 | /** |
| 309 | * Tells whether or not the given byte array is a legal replacement value |
| 310 | * for this encoder. |
| 311 | * |
| 312 | * <p> A replacement is legal if, and only if, it is a legal sequence of |
| 313 | * bytes in this encoder's charset; that is, it must be possible to decode |
| 314 | * the replacement into one or more sixteen-bit Unicode characters. |
| 315 | * |
| 316 | * <p> The default implementation of this method is not very efficient; it |
| 317 | * should generally be overridden to improve performance. </p> |
| 318 | * |
| 319 | * @param repl The byte array to be tested |
| 320 | * |
| 321 | * @return <tt>true</tt> if, and only if, the given byte array |
| 322 | * is a legal replacement value for this encoder |
| 323 | */ |
| 324 | public boolean isLegalReplacement(byte[] repl) { |
| 325 | WeakReference wr = cachedDecoder; |
| 326 | CharsetDecoder dec = null; |
| 327 | if ((wr == null) || ((dec = (CharsetDecoder)wr.get()) == null)) { |
| 328 | dec = charset().newDecoder(); |
| 329 | dec.onMalformedInput(CodingErrorAction.REPORT); |
| 330 | dec.onUnmappableCharacter(CodingErrorAction.REPORT); |
| 331 | cachedDecoder = new WeakReference(dec); |
| 332 | } else { |
| 333 | dec.reset(); |
| 334 | } |
| 335 | ByteBuffer bb = ByteBuffer.wrap(repl); |
| 336 | CharBuffer cb = CharBuffer.allocate((int)(bb.remaining() |
| 337 | * dec.maxCharsPerByte())); |
| 338 | CoderResult cr = dec.decode(bb, cb, true); |
| 339 | return !cr.isError(); |
| 340 | } |
| 341 | |
| 342 | #end[encoder] |
| 343 | |
| 344 | /** |
| 345 | * Returns this $coder$'s current action for malformed-input errors. </p> |
| 346 | * |
| 347 | * @return The current malformed-input action, which is never <tt>null</tt> |
| 348 | */ |
| 349 | public CodingErrorAction malformedInputAction() { |
| 350 | return malformedInputAction; |
| 351 | } |
| 352 | |
| 353 | /** |
| 354 | * Changes this $coder$'s action for malformed-input errors. </p> |
| 355 | * |
| 356 | * <p> This method invokes the {@link #implOnMalformedInput |
| 357 | * implOnMalformedInput} method, passing the new action. </p> |
| 358 | * |
| 359 | * @param newAction The new action; must not be <tt>null</tt> |
| 360 | * |
| 361 | * @return This $coder$ |
| 362 | * |
| 363 | * @throws IllegalArgumentException |
| 364 | * If the precondition on the parameter does not hold |
| 365 | */ |
| 366 | public final Charset$Coder$ onMalformedInput(CodingErrorAction newAction) { |
| 367 | if (newAction == null) |
| 368 | throw new IllegalArgumentException("Null action"); |
| 369 | malformedInputAction = newAction; |
| 370 | implOnMalformedInput(newAction); |
| 371 | return this; |
| 372 | } |
| 373 | |
| 374 | /** |
| 375 | * Reports a change to this $coder$'s malformed-input action. |
| 376 | * |
| 377 | * <p> The default implementation of this method does nothing. This method |
| 378 | * should be overridden by $coder$s that require notification of changes to |
| 379 | * the malformed-input action. </p> |
| 380 | */ |
| 381 | protected void implOnMalformedInput(CodingErrorAction newAction) { } |
| 382 | |
| 383 | /** |
| 384 | * Returns this $coder$'s current action for unmappable-character errors. |
| 385 | * </p> |
| 386 | * |
| 387 | * @return The current unmappable-character action, which is never |
| 388 | * <tt>null</tt> |
| 389 | */ |
| 390 | public CodingErrorAction unmappableCharacterAction() { |
| 391 | return unmappableCharacterAction; |
| 392 | } |
| 393 | |
| 394 | /** |
| 395 | * Changes this $coder$'s action for unmappable-character errors. |
| 396 | * |
| 397 | * <p> This method invokes the {@link #implOnUnmappableCharacter |
| 398 | * implOnUnmappableCharacter} method, passing the new action. </p> |
| 399 | * |
| 400 | * @param newAction The new action; must not be <tt>null</tt> |
| 401 | * |
| 402 | * @return This $coder$ |
| 403 | * |
| 404 | * @throws IllegalArgumentException |
| 405 | * If the precondition on the parameter does not hold |
| 406 | */ |
| 407 | public final Charset$Coder$ onUnmappableCharacter(CodingErrorAction |
| 408 | newAction) |
| 409 | { |
| 410 | if (newAction == null) |
| 411 | throw new IllegalArgumentException("Null action"); |
| 412 | unmappableCharacterAction = newAction; |
| 413 | implOnUnmappableCharacter(newAction); |
| 414 | return this; |
| 415 | } |
| 416 | |
| 417 | /** |
| 418 | * Reports a change to this $coder$'s unmappable-character action. |
| 419 | * |
| 420 | * <p> The default implementation of this method does nothing. This method |
| 421 | * should be overridden by $coder$s that require notification of changes to |
| 422 | * the unmappable-character action. </p> |
| 423 | */ |
| 424 | protected void implOnUnmappableCharacter(CodingErrorAction newAction) { } |
| 425 | |
| 426 | /** |
| 427 | * Returns the average number of $otype$s that will be produced for each |
| 428 | * $itype$ of input. This heuristic value may be used to estimate the size |
| 429 | * of the output buffer required for a given input sequence. </p> |
| 430 | * |
| 431 | * @return The average number of $otype$s produced |
| 432 | * per $itype$ of input |
| 433 | */ |
| 434 | public final float average$ItypesPerOtype$() { |
| 435 | return average$ItypesPerOtype$; |
| 436 | } |
| 437 | |
| 438 | /** |
| 439 | * Returns the maximum number of $otype$s that will be produced for each |
| 440 | * $itype$ of input. This value may be used to compute the worst-case size |
| 441 | * of the output buffer required for a given input sequence. </p> |
| 442 | * |
| 443 | * @return The maximum number of $otype$s that will be produced per |
| 444 | * $itype$ of input |
| 445 | */ |
| 446 | public final float max$ItypesPerOtype$() { |
| 447 | return max$ItypesPerOtype$; |
| 448 | } |
| 449 | |
| 450 | /** |
| 451 | * $Code$s as many $itype$s as possible from the given input buffer, |
| 452 | * writing the results to the given output buffer. |
| 453 | * |
| 454 | * <p> The buffers are read from, and written to, starting at their current |
| 455 | * positions. At most {@link Buffer#remaining in.remaining()} $itype$s |
| 456 | * will be read and at most {@link Buffer#remaining out.remaining()} |
| 457 | * $otype$s will be written. The buffers' positions will be advanced to |
| 458 | * reflect the $itype$s read and the $otype$s written, but their marks and |
| 459 | * limits will not be modified. |
| 460 | * |
| 461 | * <p> In addition to reading $itype$s from the input buffer and writing |
| 462 | * $otype$s to the output buffer, this method returns a {@link CoderResult} |
| 463 | * object to describe its reason for termination: |
| 464 | * |
| 465 | * <ul> |
| 466 | * |
| 467 | * <li><p> {@link CoderResult#UNDERFLOW} indicates that as much of the |
| 468 | * input buffer as possible has been $code$d. If there is no further |
| 469 | * input then the invoker can proceed to the next step of the |
| 470 | * <a href="#steps">$coding$ operation</a>. Otherwise this method |
| 471 | * should be invoked again with further input. </p></li> |
| 472 | * |
| 473 | * <li><p> {@link CoderResult#OVERFLOW} indicates that there is |
| 474 | * insufficient space in the output buffer to $code$ any more $itype$s. |
| 475 | * This method should be invoked again with an output buffer that has |
| 476 | * more {@linkplain Buffer#remaining remaining} $otype$s. This is |
| 477 | * typically done by draining any $code$d $otype$s from the output |
| 478 | * buffer. </p></li> |
| 479 | * |
| 480 | * <li><p> A {@link CoderResult#malformedForLength |
| 481 | * </code>malformed-input<code>} result indicates that a malformed-input |
| 482 | * error has been detected. The malformed $itype$s begin at the input |
| 483 | * buffer's (possibly incremented) position; the number of malformed |
| 484 | * $itype$s may be determined by invoking the result object's {@link |
| 485 | * CoderResult#length() length} method. This case applies only if the |
| 486 | * {@link #onMalformedInput </code>malformed action<code>} of this $coder$ |
| 487 | * is {@link CodingErrorAction#REPORT}; otherwise the malformed input |
| 488 | * will be ignored or replaced, as requested. </p></li> |
| 489 | * |
| 490 | * <li><p> An {@link CoderResult#unmappableForLength |
| 491 | * </code>unmappable-character<code>} result indicates that an |
| 492 | * unmappable-character error has been detected. The $itype$s that |
| 493 | * $code$ the unmappable character begin at the input buffer's (possibly |
| 494 | * incremented) position; the number of such $itype$s may be determined |
| 495 | * by invoking the result object's {@link CoderResult#length() length} |
| 496 | * method. This case applies only if the {@link #onUnmappableCharacter |
| 497 | * </code>unmappable action<code>} of this $coder$ is {@link |
| 498 | * CodingErrorAction#REPORT}; otherwise the unmappable character will be |
| 499 | * ignored or replaced, as requested. </p></li> |
| 500 | * |
| 501 | * </ul> |
| 502 | * |
| 503 | * In any case, if this method is to be reinvoked in the same $coding$ |
| 504 | * operation then care should be taken to preserve any $itype$s remaining |
| 505 | * in the input buffer so that they are available to the next invocation. |
| 506 | * |
| 507 | * <p> The <tt>endOfInput</tt> parameter advises this method as to whether |
| 508 | * the invoker can provide further input beyond that contained in the given |
| 509 | * input buffer. If there is a possibility of providing additional input |
| 510 | * then the invoker should pass <tt>false</tt> for this parameter; if there |
| 511 | * is no possibility of providing further input then the invoker should |
| 512 | * pass <tt>true</tt>. It is not erroneous, and in fact it is quite |
| 513 | * common, to pass <tt>false</tt> in one invocation and later discover that |
| 514 | * no further input was actually available. It is critical, however, that |
| 515 | * the final invocation of this method in a sequence of invocations always |
| 516 | * pass <tt>true</tt> so that any remaining un$code$d input will be treated |
| 517 | * as being malformed. |
| 518 | * |
| 519 | * <p> This method works by invoking the {@link #$code$Loop $code$Loop} |
| 520 | * method, interpreting its results, handling error conditions, and |
| 521 | * reinvoking it as necessary. </p> |
| 522 | * |
| 523 | * |
| 524 | * @param in |
| 525 | * The input $itype$ buffer |
| 526 | * |
| 527 | * @param out |
| 528 | * The output $otype$ buffer |
| 529 | * |
| 530 | * @param endOfInput |
| 531 | * <tt>true</tt> if, and only if, the invoker can provide no |
| 532 | * additional input $itype$s beyond those in the given buffer |
| 533 | * |
| 534 | * @return A coder-result object describing the reason for termination |
| 535 | * |
| 536 | * @throws IllegalStateException |
| 537 | * If $a$ $coding$ operation is already in progress and the previous |
| 538 | * step was an invocation neither of the {@link #reset reset} |
| 539 | * method, nor of this method with a value of <tt>false</tt> for |
| 540 | * the <tt>endOfInput</tt> parameter, nor of this method with a |
| 541 | * value of <tt>true</tt> for the <tt>endOfInput</tt> parameter |
| 542 | * but a return value indicating an incomplete $coding$ operation |
| 543 | * |
| 544 | * @throws CoderMalfunctionError |
| 545 | * If an invocation of the $code$Loop method threw |
| 546 | * an unexpected exception |
| 547 | */ |
| 548 | public final CoderResult $code$($Itype$Buffer in, $Otype$Buffer out, |
| 549 | boolean endOfInput) |
| 550 | { |
| 551 | int newState = endOfInput ? ST_END : ST_CODING; |
| 552 | if ((state != ST_RESET) && (state != ST_CODING) |
| 553 | && !(endOfInput && (state == ST_END))) |
| 554 | throwIllegalStateException(state, newState); |
| 555 | state = newState; |
| 556 | |
| 557 | for (;;) { |
| 558 | |
| 559 | CoderResult cr; |
| 560 | try { |
| 561 | cr = $code$Loop(in, out); |
| 562 | } catch (BufferUnderflowException x) { |
| 563 | throw new CoderMalfunctionError(x); |
| 564 | } catch (BufferOverflowException x) { |
| 565 | throw new CoderMalfunctionError(x); |
| 566 | } |
| 567 | |
| 568 | if (cr.isOverflow()) |
| 569 | return cr; |
| 570 | |
| 571 | if (cr.isUnderflow()) { |
| 572 | if (endOfInput && in.hasRemaining()) { |
| 573 | cr = CoderResult.malformedForLength(in.remaining()); |
| 574 | // Fall through to malformed-input case |
| 575 | } else { |
| 576 | return cr; |
| 577 | } |
| 578 | } |
| 579 | |
| 580 | CodingErrorAction action = null; |
| 581 | if (cr.isMalformed()) |
| 582 | action = malformedInputAction; |
| 583 | else if (cr.isUnmappable()) |
| 584 | action = unmappableCharacterAction; |
| 585 | else |
| 586 | assert false : cr.toString(); |
| 587 | |
| 588 | if (action == CodingErrorAction.REPORT) |
| 589 | return cr; |
| 590 | |
| 591 | if (action == CodingErrorAction.REPLACE) { |
| 592 | if (out.remaining() < replacement.$replLength$) |
| 593 | return CoderResult.OVERFLOW; |
| 594 | out.put(replacement); |
| 595 | } |
| 596 | |
| 597 | if ((action == CodingErrorAction.IGNORE) |
| 598 | || (action == CodingErrorAction.REPLACE)) { |
| 599 | // Skip erroneous input either way |
| 600 | in.position(in.position() + cr.length()); |
| 601 | continue; |
| 602 | } |
| 603 | |
| 604 | assert false; |
| 605 | } |
| 606 | |
| 607 | } |
| 608 | |
| 609 | /** |
| 610 | * Flushes this $coder$. |
| 611 | * |
| 612 | * <p> Some $coder$s maintain internal state and may need to write some |
| 613 | * final $otype$s to the output buffer once the overall input sequence has |
| 614 | * been read. |
| 615 | * |
| 616 | * <p> Any additional output is written to the output buffer beginning at |
| 617 | * its current position. At most {@link Buffer#remaining out.remaining()} |
| 618 | * $otype$s will be written. The buffer's position will be advanced |
| 619 | * appropriately, but its mark and limit will not be modified. |
| 620 | * |
| 621 | * <p> If this method completes successfully then it returns {@link |
| 622 | * CoderResult#UNDERFLOW}. If there is insufficient room in the output |
| 623 | * buffer then it returns {@link CoderResult#OVERFLOW}. If this happens |
| 624 | * then this method must be invoked again, with an output buffer that has |
| 625 | * more room, in order to complete the current <a href="#steps">$coding$ |
| 626 | * operation</a>. |
| 627 | * |
| 628 | * <p> If this $coder$ has already been flushed then invoking this method |
| 629 | * has no effect. |
| 630 | * |
| 631 | * <p> This method invokes the {@link #implFlush implFlush} method to |
| 632 | * perform the actual flushing operation. </p> |
| 633 | * |
| 634 | * @param out |
| 635 | * The output $otype$ buffer |
| 636 | * |
| 637 | * @return A coder-result object, either {@link CoderResult#UNDERFLOW} or |
| 638 | * {@link CoderResult#OVERFLOW} |
| 639 | * |
| 640 | * @throws IllegalStateException |
| 641 | * If the previous step of the current $coding$ operation was an |
| 642 | * invocation neither of the {@link #flush flush} method nor of |
| 643 | * the three-argument {@link |
| 644 | * #$code$($Itype$Buffer,$Otype$Buffer,boolean) $code$} method |
| 645 | * with a value of <tt>true</tt> for the <tt>endOfInput</tt> |
| 646 | * parameter |
| 647 | */ |
| 648 | public final CoderResult flush($Otype$Buffer out) { |
| 649 | if (state == ST_END) { |
| 650 | CoderResult cr = implFlush(out); |
| 651 | if (cr.isUnderflow()) |
| 652 | state = ST_FLUSHED; |
| 653 | return cr; |
| 654 | } |
| 655 | |
| 656 | if (state != ST_FLUSHED) |
| 657 | throwIllegalStateException(state, ST_FLUSHED); |
| 658 | |
| 659 | return CoderResult.UNDERFLOW; // Already flushed |
| 660 | } |
| 661 | |
| 662 | /** |
| 663 | * Flushes this $coder$. |
| 664 | * |
| 665 | * <p> The default implementation of this method does nothing, and always |
| 666 | * returns {@link CoderResult#UNDERFLOW}. This method should be overridden |
| 667 | * by $coder$s that may need to write final $otype$s to the output buffer |
| 668 | * once the entire input sequence has been read. </p> |
| 669 | * |
| 670 | * @param out |
| 671 | * The output $otype$ buffer |
| 672 | * |
| 673 | * @return A coder-result object, either {@link CoderResult#UNDERFLOW} or |
| 674 | * {@link CoderResult#OVERFLOW} |
| 675 | */ |
| 676 | protected CoderResult implFlush($Otype$Buffer out) { |
| 677 | return CoderResult.UNDERFLOW; |
| 678 | } |
| 679 | |
| 680 | /** |
| 681 | * Resets this $coder$, clearing any internal state. |
| 682 | * |
| 683 | * <p> This method resets charset-independent state and also invokes the |
| 684 | * {@link #implReset() implReset} method in order to perform any |
| 685 | * charset-specific reset actions. </p> |
| 686 | * |
| 687 | * @return This $coder$ |
| 688 | * |
| 689 | */ |
| 690 | public final Charset$Coder$ reset() { |
| 691 | implReset(); |
| 692 | state = ST_RESET; |
| 693 | return this; |
| 694 | } |
| 695 | |
| 696 | /** |
| 697 | * Resets this $coder$, clearing any charset-specific internal state. |
| 698 | * |
| 699 | * <p> The default implementation of this method does nothing. This method |
| 700 | * should be overridden by $coder$s that maintain internal state. </p> |
| 701 | */ |
| 702 | protected void implReset() { } |
| 703 | |
| 704 | /** |
| 705 | * $Code$s one or more $itype$s into one or more $otype$s. |
| 706 | * |
| 707 | * <p> This method encapsulates the basic $coding$ loop, $coding$ as many |
| 708 | * $itype$s as possible until it either runs out of input, runs out of room |
| 709 | * in the output buffer, or encounters $a$ $coding$ error. This method is |
| 710 | * invoked by the {@link #$code$ $code$} method, which handles result |
| 711 | * interpretation and error recovery. |
| 712 | * |
| 713 | * <p> The buffers are read from, and written to, starting at their current |
| 714 | * positions. At most {@link Buffer#remaining in.remaining()} $itype$s |
| 715 | * will be read, and at most {@link Buffer#remaining out.remaining()} |
| 716 | * $otype$s will be written. The buffers' positions will be advanced to |
| 717 | * reflect the $itype$s read and the $otype$s written, but their marks and |
| 718 | * limits will not be modified. |
| 719 | * |
| 720 | * <p> This method returns a {@link CoderResult} object to describe its |
| 721 | * reason for termination, in the same manner as the {@link #$code$ $code$} |
| 722 | * method. Most implementations of this method will handle $coding$ errors |
| 723 | * by returning an appropriate result object for interpretation by the |
| 724 | * {@link #$code$ $code$} method. An optimized implementation may instead |
| 725 | * examine the relevant error action and implement that action itself. |
| 726 | * |
| 727 | * <p> An implementation of this method may perform arbitrary lookahead by |
| 728 | * returning {@link CoderResult#UNDERFLOW} until it receives sufficient |
| 729 | * input. </p> |
| 730 | * |
| 731 | * @param in |
| 732 | * The input $itype$ buffer |
| 733 | * |
| 734 | * @param out |
| 735 | * The output $otype$ buffer |
| 736 | * |
| 737 | * @return A coder-result object describing the reason for termination |
| 738 | */ |
| 739 | protected abstract CoderResult $code$Loop($Itype$Buffer in, |
| 740 | $Otype$Buffer out); |
| 741 | |
| 742 | /** |
| 743 | * Convenience method that $code$s the remaining content of a single input |
| 744 | * $itype$ buffer into a newly-allocated $otype$ buffer. |
| 745 | * |
| 746 | * <p> This method implements an entire <a href="#steps">$coding$ |
| 747 | * operation</a>; that is, it resets this $coder$, then it $code$s the |
| 748 | * $itype$s in the given $itype$ buffer, and finally it flushes this |
| 749 | * $coder$. This method should therefore not be invoked if $a$ $coding$ |
| 750 | * operation is already in progress. </p> |
| 751 | * |
| 752 | * @param in |
| 753 | * The input $itype$ buffer |
| 754 | * |
| 755 | * @return A newly-allocated $otype$ buffer containing the result of the |
| 756 | * $coding$ operation. The buffer's position will be zero and its |
| 757 | * limit will follow the last $otype$ written. |
| 758 | * |
| 759 | * @throws IllegalStateException |
| 760 | * If $a$ $coding$ operation is already in progress |
| 761 | * |
| 762 | * @throws MalformedInputException |
| 763 | * If the $itype$ sequence starting at the input buffer's current |
| 764 | * position is $notLegal$ and the current malformed-input action |
| 765 | * is {@link CodingErrorAction#REPORT} |
| 766 | * |
| 767 | * @throws UnmappableCharacterException |
| 768 | * If the $itype$ sequence starting at the input buffer's current |
| 769 | * position cannot be mapped to an equivalent $otype$ sequence and |
| 770 | * the current unmappable-character action is {@link |
| 771 | * CodingErrorAction#REPORT} |
| 772 | */ |
| 773 | public final $Otype$Buffer $code$($Itype$Buffer in) |
| 774 | throws CharacterCodingException |
| 775 | { |
| 776 | int n = (int)(in.remaining() * average$ItypesPerOtype$()); |
| 777 | $Otype$Buffer out = $Otype$Buffer.allocate(n); |
| 778 | |
| 779 | if ((n == 0) && (in.remaining() == 0)) |
| 780 | return out; |
| 781 | reset(); |
| 782 | for (;;) { |
| 783 | CoderResult cr = in.hasRemaining() ? |
| 784 | $code$(in, out, true) : CoderResult.UNDERFLOW; |
| 785 | if (cr.isUnderflow()) |
| 786 | cr = flush(out); |
| 787 | |
| 788 | if (cr.isUnderflow()) |
| 789 | break; |
| 790 | if (cr.isOverflow()) { |
| 791 | n = 2*n + 1; // Ensure progress; n might be 0! |
| 792 | $Otype$Buffer o = $Otype$Buffer.allocate(n); |
| 793 | out.flip(); |
| 794 | o.put(out); |
| 795 | out = o; |
| 796 | continue; |
| 797 | } |
| 798 | cr.throwException(); |
| 799 | } |
| 800 | out.flip(); |
| 801 | return out; |
| 802 | } |
| 803 | |
| 804 | #if[decoder] |
| 805 | |
| 806 | /** |
| 807 | * Tells whether or not this decoder implements an auto-detecting charset. |
| 808 | * |
| 809 | * <p> The default implementation of this method always returns |
| 810 | * <tt>false</tt>; it should be overridden by auto-detecting decoders to |
| 811 | * return <tt>true</tt>. </p> |
| 812 | * |
| 813 | * @return <tt>true</tt> if, and only if, this decoder implements an |
| 814 | * auto-detecting charset |
| 815 | */ |
| 816 | public boolean isAutoDetecting() { |
| 817 | return false; |
| 818 | } |
| 819 | |
| 820 | /** |
| 821 | * Tells whether or not this decoder has yet detected a |
| 822 | * charset <i>(optional operation)</i>. |
| 823 | * |
| 824 | * <p> If this decoder implements an auto-detecting charset then at a |
| 825 | * single point during a decoding operation this method may start returning |
| 826 | * <tt>true</tt> to indicate that a specific charset has been detected in |
| 827 | * the input byte sequence. Once this occurs, the {@link #detectedCharset |
| 828 | * detectedCharset} method may be invoked to retrieve the detected charset. |
| 829 | * |
| 830 | * <p> That this method returns <tt>false</tt> does not imply that no bytes |
| 831 | * have yet been decoded. Some auto-detecting decoders are capable of |
| 832 | * decoding some, or even all, of an input byte sequence without fixing on |
| 833 | * a particular charset. |
| 834 | * |
| 835 | * <p> The default implementation of this method always throws an {@link |
| 836 | * UnsupportedOperationException}; it should be overridden by |
| 837 | * auto-detecting decoders to return <tt>true</tt> once the input charset |
| 838 | * has been determined. </p> |
| 839 | * |
| 840 | * @return <tt>true</tt> if, and only if, this decoder has detected a |
| 841 | * specific charset |
| 842 | * |
| 843 | * @throws UnsupportedOperationException |
| 844 | * If this decoder does not implement an auto-detecting charset |
| 845 | */ |
| 846 | public boolean isCharsetDetected() { |
| 847 | throw new UnsupportedOperationException(); |
| 848 | } |
| 849 | |
| 850 | /** |
| 851 | * Retrieves the charset that was detected by this |
| 852 | * decoder <i>(optional operation)</i>. |
| 853 | * |
| 854 | * <p> If this decoder implements an auto-detecting charset then this |
| 855 | * method returns the actual charset once it has been detected. After that |
| 856 | * point, this method returns the same value for the duration of the |
| 857 | * current decoding operation. If not enough input bytes have yet been |
| 858 | * read to determine the actual charset then this method throws an {@link |
| 859 | * IllegalStateException}. |
| 860 | * |
| 861 | * <p> The default implementation of this method always throws an {@link |
| 862 | * UnsupportedOperationException}; it should be overridden by |
| 863 | * auto-detecting decoders to return the appropriate value. </p> |
| 864 | * |
| 865 | * @return The charset detected by this auto-detecting decoder, |
| 866 | * or <tt>null</tt> if the charset has not yet been determined |
| 867 | * |
| 868 | * @throws IllegalStateException |
| 869 | * If insufficient bytes have been read to determine a charset |
| 870 | * |
| 871 | * @throws UnsupportedOperationException |
| 872 | * If this decoder does not implement an auto-detecting charset |
| 873 | */ |
| 874 | public Charset detectedCharset() { |
| 875 | throw new UnsupportedOperationException(); |
| 876 | } |
| 877 | |
| 878 | #end[decoder] |
| 879 | |
| 880 | #if[encoder] |
| 881 | |
| 882 | private boolean canEncode(CharBuffer cb) { |
| 883 | if (state == ST_FLUSHED) |
| 884 | reset(); |
| 885 | else if (state != ST_RESET) |
| 886 | throwIllegalStateException(state, ST_CODING); |
| 887 | CodingErrorAction ma = malformedInputAction(); |
| 888 | CodingErrorAction ua = unmappableCharacterAction(); |
| 889 | try { |
| 890 | onMalformedInput(CodingErrorAction.REPORT); |
| 891 | onUnmappableCharacter(CodingErrorAction.REPORT); |
| 892 | encode(cb); |
| 893 | } catch (CharacterCodingException x) { |
| 894 | return false; |
| 895 | } finally { |
| 896 | onMalformedInput(ma); |
| 897 | onUnmappableCharacter(ua); |
| 898 | reset(); |
| 899 | } |
| 900 | return true; |
| 901 | } |
| 902 | |
| 903 | /** |
| 904 | * Tells whether or not this encoder can encode the given character. |
| 905 | * |
| 906 | * <p> This method returns <tt>false</tt> if the given character is a |
| 907 | * surrogate character; such characters can be interpreted only when they |
| 908 | * are members of a pair consisting of a high surrogate followed by a low |
| 909 | * surrogate. The {@link #canEncode(java.lang.CharSequence) |
| 910 | * canEncode(CharSequence)} method may be used to test whether or not a |
| 911 | * character sequence can be encoded. |
| 912 | * |
| 913 | * <p> This method may modify this encoder's state; it should therefore not |
| 914 | * be invoked if an <a href="#steps">encoding operation</a> is already in |
| 915 | * progress. |
| 916 | * |
| 917 | * <p> The default implementation of this method is not very efficient; it |
| 918 | * should generally be overridden to improve performance. </p> |
| 919 | * |
| 920 | * @return <tt>true</tt> if, and only if, this encoder can encode |
| 921 | * the given character |
| 922 | * |
| 923 | * @throws IllegalStateException |
| 924 | * If $a$ $coding$ operation is already in progress |
| 925 | */ |
| 926 | public boolean canEncode(char c) { |
| 927 | CharBuffer cb = CharBuffer.allocate(1); |
| 928 | cb.put(c); |
| 929 | cb.flip(); |
| 930 | return canEncode(cb); |
| 931 | } |
| 932 | |
| 933 | /** |
| 934 | * Tells whether or not this encoder can encode the given character |
| 935 | * sequence. |
| 936 | * |
| 937 | * <p> If this method returns <tt>false</tt> for a particular character |
| 938 | * sequence then more information about why the sequence cannot be encoded |
| 939 | * may be obtained by performing a full <a href="#steps">encoding |
| 940 | * operation</a>. |
| 941 | * |
| 942 | * <p> This method may modify this encoder's state; it should therefore not |
| 943 | * be invoked if an encoding operation is already in progress. |
| 944 | * |
| 945 | * <p> The default implementation of this method is not very efficient; it |
| 946 | * should generally be overridden to improve performance. </p> |
| 947 | * |
| 948 | * @return <tt>true</tt> if, and only if, this encoder can encode |
| 949 | * the given character without throwing any exceptions and without |
| 950 | * performing any replacements |
| 951 | * |
| 952 | * @throws IllegalStateException |
| 953 | * If $a$ $coding$ operation is already in progress |
| 954 | */ |
| 955 | public boolean canEncode(CharSequence cs) { |
| 956 | CharBuffer cb; |
| 957 | if (cs instanceof CharBuffer) |
| 958 | cb = ((CharBuffer)cs).duplicate(); |
| 959 | else |
| 960 | cb = CharBuffer.wrap(cs.toString()); |
| 961 | return canEncode(cb); |
| 962 | } |
| 963 | |
| 964 | #end[encoder] |
| 965 | |
| 966 | |
| 967 | private void throwIllegalStateException(int from, int to) { |
| 968 | throw new IllegalStateException("Current state = " + stateNames[from] |
| 969 | + ", new state = " + stateNames[to]); |
| 970 | } |
| 971 | |
| 972 | } |