blob: 4b7c83f8084e8c9a24056133fa7c223e870da67d [file] [log] [blame]
J. Duke319a3b92007-12-01 00:00:00 +00001/*
2 * Copyright 2000-2006 Sun Microsystems, Inc. All Rights Reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation. Sun designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Sun in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
22 * CA 95054 USA or visit www.sun.com if you need additional information or
23 * have any questions.
24 */
25
26#warn This file is preprocessed before being compiled
27
28package java.nio.charset;
29
30import java.nio.Buffer;
31import java.nio.ByteBuffer;
32import java.nio.CharBuffer;
33import java.nio.BufferOverflowException;
34import java.nio.BufferUnderflowException;
35import java.lang.ref.WeakReference;
36import java.nio.charset.CoderMalfunctionError; // javadoc
37
38
39/**
40 * An engine that can transform a sequence of $itypesPhrase$ into a sequence of
41 * $otypesPhrase$.
42 *
43 * <a name="steps">
44 *
45 * <p> The input $itype$ sequence is provided in a $itype$ buffer or a series
46 * of such buffers. The output $otype$ sequence is written to a $otype$ buffer
47 * or a series of such buffers. $A$ $coder$ should always be used by making
48 * the following sequence of method invocations, hereinafter referred to as $a$
49 * <i>$coding$ operation</i>:
50 *
51 * <ol>
52 *
53 * <li><p> Reset the $coder$ via the {@link #reset reset} method, unless it
54 * has not been used before; </p></li>
55 *
56 * <li><p> Invoke the {@link #$code$ $code$} method zero or more times, as
57 * long as additional input may be available, passing <tt>false</tt> for the
58 * <tt>endOfInput</tt> argument and filling the input buffer and flushing the
59 * output buffer between invocations; </p></li>
60 *
61 * <li><p> Invoke the {@link #$code$ $code$} method one final time, passing
62 * <tt>true</tt> for the <tt>endOfInput</tt> argument; and then </p></li>
63 *
64 * <li><p> Invoke the {@link #flush flush} method so that the $coder$ can
65 * flush any internal state to the output buffer. </p></li>
66 *
67 * </ol>
68 *
69 * Each invocation of the {@link #$code$ $code$} method will $code$ as many
70 * $itype$s as possible from the input buffer, writing the resulting $otype$s
71 * to the output buffer. The {@link #$code$ $code$} method returns when more
72 * input is required, when there is not enough room in the output buffer, or
73 * when $a$ $coding$ error has occurred. In each case a {@link CoderResult}
74 * object is returned to describe the reason for termination. An invoker can
75 * examine this object and fill the input buffer, flush the output buffer, or
76 * attempt to recover from $a$ $coding$ error, as appropriate, and try again.
77 *
78 * <a name="ce">
79 *
80 * <p> There are two general types of $coding$ errors. If the input $itype$
81 * sequence is $notLegal$ then the input is considered <i>malformed</i>. If
82 * the input $itype$ sequence is legal but cannot be mapped to a valid
83 * $outSequence$ then an <i>unmappable character</i> has been encountered.
84 *
85 * <a name="cae">
86 *
87 * <p> How $a$ $coding$ error is handled depends upon the action requested for
88 * that type of error, which is described by an instance of the {@link
89 * CodingErrorAction} class. The possible error actions are to {@link
90 * CodingErrorAction#IGNORE </code>ignore<code>} the erroneous input, {@link
91 * CodingErrorAction#REPORT </code>report<code>} the error to the invoker via
92 * the returned {@link CoderResult} object, or {@link CodingErrorAction#REPLACE
93 * </code>replace<code>} the erroneous input with the current value of the
94 * replacement $replTypeName$. The replacement
95 *
96#if[encoder]
97 * is initially set to the $coder$'s default replacement, which often
98 * (but not always) has the initial value&nbsp;$defaultReplName$;
99#end[encoder]
100#if[decoder]
101 * has the initial value $defaultReplName$;
102#end[decoder]
103 *
104 * its value may be changed via the {@link #replaceWith($replFQType$)
105 * replaceWith} method.
106 *
107 * <p> The default action for malformed-input and unmappable-character errors
108 * is to {@link CodingErrorAction#REPORT </code>report<code>} them. The
109 * malformed-input error action may be changed via the {@link
110 * #onMalformedInput(CodingErrorAction) onMalformedInput} method; the
111 * unmappable-character action may be changed via the {@link
112 * #onUnmappableCharacter(CodingErrorAction) onUnmappableCharacter} method.
113 *
114 * <p> This class is designed to handle many of the details of the $coding$
115 * process, including the implementation of error actions. $A$ $coder$ for a
116 * specific charset, which is a concrete subclass of this class, need only
117 * implement the abstract {@link #$code$Loop $code$Loop} method, which
118 * encapsulates the basic $coding$ loop. A subclass that maintains internal
119 * state should, additionally, override the {@link #implFlush implFlush} and
120 * {@link #implReset implReset} methods.
121 *
122 * <p> Instances of this class are not safe for use by multiple concurrent
123 * threads. </p>
124 *
125 *
126 * @author Mark Reinhold
127 * @author JSR-51 Expert Group
128 * @since 1.4
129 *
130 * @see ByteBuffer
131 * @see CharBuffer
132 * @see Charset
133 * @see Charset$OtherCoder$
134 */
135
136public abstract class Charset$Coder$ {
137
138 private final Charset charset;
139 private final float average$ItypesPerOtype$;
140 private final float max$ItypesPerOtype$;
141
142 private $replType$ replacement;
143 private CodingErrorAction malformedInputAction
144 = CodingErrorAction.REPORT;
145 private CodingErrorAction unmappableCharacterAction
146 = CodingErrorAction.REPORT;
147
148 // Internal states
149 //
150 private static final int ST_RESET = 0;
151 private static final int ST_CODING = 1;
152 private static final int ST_END = 2;
153 private static final int ST_FLUSHED = 3;
154
155 private int state = ST_RESET;
156
157 private static String stateNames[]
158 = { "RESET", "CODING", "CODING_END", "FLUSHED" };
159
160
161 /**
162 * Initializes a new $coder$. The new $coder$ will have the given
163 * $otypes-per-itype$ and replacement values. </p>
164 *
165 * @param average$ItypesPerOtype$
166 * A positive float value indicating the expected number of
167 * $otype$s that will be produced for each input $itype$
168 *
169 * @param max$ItypesPerOtype$
170 * A positive float value indicating the maximum number of
171 * $otype$s that will be produced for each input $itype$
172 *
173 * @param replacement
174 * The initial replacement; must not be <tt>null</tt>, must have
175 * non-zero length, must not be longer than max$ItypesPerOtype$,
176 * and must be {@link #isLegalReplacement </code>legal<code>}
177 *
178 * @throws IllegalArgumentException
179 * If the preconditions on the parameters do not hold
180 */
181 {#if[encoder]?protected:private}
182 Charset$Coder$(Charset cs,
183 float average$ItypesPerOtype$,
184 float max$ItypesPerOtype$,
185 $replType$ replacement)
186 {
187 this.charset = cs;
188 if (average$ItypesPerOtype$ <= 0.0f)
189 throw new IllegalArgumentException("Non-positive "
190 + "average$ItypesPerOtype$");
191 if (max$ItypesPerOtype$ <= 0.0f)
192 throw new IllegalArgumentException("Non-positive "
193 + "max$ItypesPerOtype$");
194 if (!Charset.atBugLevel("1.4")) {
195 if (average$ItypesPerOtype$ > max$ItypesPerOtype$)
196 throw new IllegalArgumentException("average$ItypesPerOtype$"
197 + " exceeds "
198 + "max$ItypesPerOtype$");
199 }
200 this.replacement = replacement;
201 this.average$ItypesPerOtype$ = average$ItypesPerOtype$;
202 this.max$ItypesPerOtype$ = max$ItypesPerOtype$;
203 replaceWith(replacement);
204 }
205
206 /**
207 * Initializes a new $coder$. The new $coder$ will have the given
208 * $otypes-per-itype$ values and its replacement will be the
209 * $replTypeName$ $defaultReplName$. </p>
210 *
211 * @param average$ItypesPerOtype$
212 * A positive float value indicating the expected number of
213 * $otype$s that will be produced for each input $itype$
214 *
215 * @param max$ItypesPerOtype$
216 * A positive float value indicating the maximum number of
217 * $otype$s that will be produced for each input $itype$
218 *
219 * @throws IllegalArgumentException
220 * If the preconditions on the parameters do not hold
221 */
222 protected Charset$Coder$(Charset cs,
223 float average$ItypesPerOtype$,
224 float max$ItypesPerOtype$)
225 {
226 this(cs,
227 average$ItypesPerOtype$, max$ItypesPerOtype$,
228 $defaultRepl$);
229 }
230
231 /**
232 * Returns the charset that created this $coder$. </p>
233 *
234 * @return This $coder$'s charset
235 */
236 public final Charset charset() {
237 return charset;
238 }
239
240 /**
241 * Returns this $coder$'s replacement value. </p>
242 *
243 * @return This $coder$'s current replacement,
244 * which is never <tt>null</tt> and is never empty
245 */
246 public final $replType$ replacement() {
247 return replacement;
248 }
249
250 /**
251 * Changes this $coder$'s replacement value.
252 *
253 * <p> This method invokes the {@link #implReplaceWith implReplaceWith}
254 * method, passing the new replacement, after checking that the new
255 * replacement is acceptable. </p>
256 *
257 * @param newReplacement
258 *
259#if[decoder]
260 * The new replacement; must not be <tt>null</tt>
261 * and must have non-zero length
262#end[decoder]
263#if[encoder]
264 * The new replacement; must not be <tt>null</tt>, must have
265 * non-zero length, must not be longer than the value returned by
266 * the {@link #max$ItypesPerOtype$() max$ItypesPerOtype$} method, and
267 * must be {@link #isLegalReplacement </code>legal<code>}
268#end[encoder]
269 *
270 * @return This $coder$
271 *
272 * @throws IllegalArgumentException
273 * If the preconditions on the parameter do not hold
274 */
275 public final Charset$Coder$ replaceWith($replType$ newReplacement) {
276 if (newReplacement == null)
277 throw new IllegalArgumentException("Null replacement");
278 int len = newReplacement.$replLength$;
279 if (len == 0)
280 throw new IllegalArgumentException("Empty replacement");
281 if (len > max$ItypesPerOtype$)
282 throw new IllegalArgumentException("Replacement too long");
283#if[encoder]
284 if (!isLegalReplacement(newReplacement))
285 throw new IllegalArgumentException("Illegal replacement");
286#end[encoder]
287 this.replacement = newReplacement;
288 implReplaceWith(newReplacement);
289 return this;
290 }
291
292 /**
293 * Reports a change to this $coder$'s replacement value.
294 *
295 * <p> The default implementation of this method does nothing. This method
296 * should be overridden by $coder$s that require notification of changes to
297 * the replacement. </p>
298 *
299 * @param newReplacement
300 */
301 protected void implReplaceWith($replType$ newReplacement) {
302 }
303
304#if[encoder]
305
306 private WeakReference cachedDecoder = null;
307
308 /**
309 * Tells whether or not the given byte array is a legal replacement value
310 * for this encoder.
311 *
312 * <p> A replacement is legal if, and only if, it is a legal sequence of
313 * bytes in this encoder's charset; that is, it must be possible to decode
314 * the replacement into one or more sixteen-bit Unicode characters.
315 *
316 * <p> The default implementation of this method is not very efficient; it
317 * should generally be overridden to improve performance. </p>
318 *
319 * @param repl The byte array to be tested
320 *
321 * @return <tt>true</tt> if, and only if, the given byte array
322 * is a legal replacement value for this encoder
323 */
324 public boolean isLegalReplacement(byte[] repl) {
325 WeakReference wr = cachedDecoder;
326 CharsetDecoder dec = null;
327 if ((wr == null) || ((dec = (CharsetDecoder)wr.get()) == null)) {
328 dec = charset().newDecoder();
329 dec.onMalformedInput(CodingErrorAction.REPORT);
330 dec.onUnmappableCharacter(CodingErrorAction.REPORT);
331 cachedDecoder = new WeakReference(dec);
332 } else {
333 dec.reset();
334 }
335 ByteBuffer bb = ByteBuffer.wrap(repl);
336 CharBuffer cb = CharBuffer.allocate((int)(bb.remaining()
337 * dec.maxCharsPerByte()));
338 CoderResult cr = dec.decode(bb, cb, true);
339 return !cr.isError();
340 }
341
342#end[encoder]
343
344 /**
345 * Returns this $coder$'s current action for malformed-input errors. </p>
346 *
347 * @return The current malformed-input action, which is never <tt>null</tt>
348 */
349 public CodingErrorAction malformedInputAction() {
350 return malformedInputAction;
351 }
352
353 /**
354 * Changes this $coder$'s action for malformed-input errors. </p>
355 *
356 * <p> This method invokes the {@link #implOnMalformedInput
357 * implOnMalformedInput} method, passing the new action. </p>
358 *
359 * @param newAction The new action; must not be <tt>null</tt>
360 *
361 * @return This $coder$
362 *
363 * @throws IllegalArgumentException
364 * If the precondition on the parameter does not hold
365 */
366 public final Charset$Coder$ onMalformedInput(CodingErrorAction newAction) {
367 if (newAction == null)
368 throw new IllegalArgumentException("Null action");
369 malformedInputAction = newAction;
370 implOnMalformedInput(newAction);
371 return this;
372 }
373
374 /**
375 * Reports a change to this $coder$'s malformed-input action.
376 *
377 * <p> The default implementation of this method does nothing. This method
378 * should be overridden by $coder$s that require notification of changes to
379 * the malformed-input action. </p>
380 */
381 protected void implOnMalformedInput(CodingErrorAction newAction) { }
382
383 /**
384 * Returns this $coder$'s current action for unmappable-character errors.
385 * </p>
386 *
387 * @return The current unmappable-character action, which is never
388 * <tt>null</tt>
389 */
390 public CodingErrorAction unmappableCharacterAction() {
391 return unmappableCharacterAction;
392 }
393
394 /**
395 * Changes this $coder$'s action for unmappable-character errors.
396 *
397 * <p> This method invokes the {@link #implOnUnmappableCharacter
398 * implOnUnmappableCharacter} method, passing the new action. </p>
399 *
400 * @param newAction The new action; must not be <tt>null</tt>
401 *
402 * @return This $coder$
403 *
404 * @throws IllegalArgumentException
405 * If the precondition on the parameter does not hold
406 */
407 public final Charset$Coder$ onUnmappableCharacter(CodingErrorAction
408 newAction)
409 {
410 if (newAction == null)
411 throw new IllegalArgumentException("Null action");
412 unmappableCharacterAction = newAction;
413 implOnUnmappableCharacter(newAction);
414 return this;
415 }
416
417 /**
418 * Reports a change to this $coder$'s unmappable-character action.
419 *
420 * <p> The default implementation of this method does nothing. This method
421 * should be overridden by $coder$s that require notification of changes to
422 * the unmappable-character action. </p>
423 */
424 protected void implOnUnmappableCharacter(CodingErrorAction newAction) { }
425
426 /**
427 * Returns the average number of $otype$s that will be produced for each
428 * $itype$ of input. This heuristic value may be used to estimate the size
429 * of the output buffer required for a given input sequence. </p>
430 *
431 * @return The average number of $otype$s produced
432 * per $itype$ of input
433 */
434 public final float average$ItypesPerOtype$() {
435 return average$ItypesPerOtype$;
436 }
437
438 /**
439 * Returns the maximum number of $otype$s that will be produced for each
440 * $itype$ of input. This value may be used to compute the worst-case size
441 * of the output buffer required for a given input sequence. </p>
442 *
443 * @return The maximum number of $otype$s that will be produced per
444 * $itype$ of input
445 */
446 public final float max$ItypesPerOtype$() {
447 return max$ItypesPerOtype$;
448 }
449
450 /**
451 * $Code$s as many $itype$s as possible from the given input buffer,
452 * writing the results to the given output buffer.
453 *
454 * <p> The buffers are read from, and written to, starting at their current
455 * positions. At most {@link Buffer#remaining in.remaining()} $itype$s
456 * will be read and at most {@link Buffer#remaining out.remaining()}
457 * $otype$s will be written. The buffers' positions will be advanced to
458 * reflect the $itype$s read and the $otype$s written, but their marks and
459 * limits will not be modified.
460 *
461 * <p> In addition to reading $itype$s from the input buffer and writing
462 * $otype$s to the output buffer, this method returns a {@link CoderResult}
463 * object to describe its reason for termination:
464 *
465 * <ul>
466 *
467 * <li><p> {@link CoderResult#UNDERFLOW} indicates that as much of the
468 * input buffer as possible has been $code$d. If there is no further
469 * input then the invoker can proceed to the next step of the
470 * <a href="#steps">$coding$ operation</a>. Otherwise this method
471 * should be invoked again with further input. </p></li>
472 *
473 * <li><p> {@link CoderResult#OVERFLOW} indicates that there is
474 * insufficient space in the output buffer to $code$ any more $itype$s.
475 * This method should be invoked again with an output buffer that has
476 * more {@linkplain Buffer#remaining remaining} $otype$s. This is
477 * typically done by draining any $code$d $otype$s from the output
478 * buffer. </p></li>
479 *
480 * <li><p> A {@link CoderResult#malformedForLength
481 * </code>malformed-input<code>} result indicates that a malformed-input
482 * error has been detected. The malformed $itype$s begin at the input
483 * buffer's (possibly incremented) position; the number of malformed
484 * $itype$s may be determined by invoking the result object's {@link
485 * CoderResult#length() length} method. This case applies only if the
486 * {@link #onMalformedInput </code>malformed action<code>} of this $coder$
487 * is {@link CodingErrorAction#REPORT}; otherwise the malformed input
488 * will be ignored or replaced, as requested. </p></li>
489 *
490 * <li><p> An {@link CoderResult#unmappableForLength
491 * </code>unmappable-character<code>} result indicates that an
492 * unmappable-character error has been detected. The $itype$s that
493 * $code$ the unmappable character begin at the input buffer's (possibly
494 * incremented) position; the number of such $itype$s may be determined
495 * by invoking the result object's {@link CoderResult#length() length}
496 * method. This case applies only if the {@link #onUnmappableCharacter
497 * </code>unmappable action<code>} of this $coder$ is {@link
498 * CodingErrorAction#REPORT}; otherwise the unmappable character will be
499 * ignored or replaced, as requested. </p></li>
500 *
501 * </ul>
502 *
503 * In any case, if this method is to be reinvoked in the same $coding$
504 * operation then care should be taken to preserve any $itype$s remaining
505 * in the input buffer so that they are available to the next invocation.
506 *
507 * <p> The <tt>endOfInput</tt> parameter advises this method as to whether
508 * the invoker can provide further input beyond that contained in the given
509 * input buffer. If there is a possibility of providing additional input
510 * then the invoker should pass <tt>false</tt> for this parameter; if there
511 * is no possibility of providing further input then the invoker should
512 * pass <tt>true</tt>. It is not erroneous, and in fact it is quite
513 * common, to pass <tt>false</tt> in one invocation and later discover that
514 * no further input was actually available. It is critical, however, that
515 * the final invocation of this method in a sequence of invocations always
516 * pass <tt>true</tt> so that any remaining un$code$d input will be treated
517 * as being malformed.
518 *
519 * <p> This method works by invoking the {@link #$code$Loop $code$Loop}
520 * method, interpreting its results, handling error conditions, and
521 * reinvoking it as necessary. </p>
522 *
523 *
524 * @param in
525 * The input $itype$ buffer
526 *
527 * @param out
528 * The output $otype$ buffer
529 *
530 * @param endOfInput
531 * <tt>true</tt> if, and only if, the invoker can provide no
532 * additional input $itype$s beyond those in the given buffer
533 *
534 * @return A coder-result object describing the reason for termination
535 *
536 * @throws IllegalStateException
537 * If $a$ $coding$ operation is already in progress and the previous
538 * step was an invocation neither of the {@link #reset reset}
539 * method, nor of this method with a value of <tt>false</tt> for
540 * the <tt>endOfInput</tt> parameter, nor of this method with a
541 * value of <tt>true</tt> for the <tt>endOfInput</tt> parameter
542 * but a return value indicating an incomplete $coding$ operation
543 *
544 * @throws CoderMalfunctionError
545 * If an invocation of the $code$Loop method threw
546 * an unexpected exception
547 */
548 public final CoderResult $code$($Itype$Buffer in, $Otype$Buffer out,
549 boolean endOfInput)
550 {
551 int newState = endOfInput ? ST_END : ST_CODING;
552 if ((state != ST_RESET) && (state != ST_CODING)
553 && !(endOfInput && (state == ST_END)))
554 throwIllegalStateException(state, newState);
555 state = newState;
556
557 for (;;) {
558
559 CoderResult cr;
560 try {
561 cr = $code$Loop(in, out);
562 } catch (BufferUnderflowException x) {
563 throw new CoderMalfunctionError(x);
564 } catch (BufferOverflowException x) {
565 throw new CoderMalfunctionError(x);
566 }
567
568 if (cr.isOverflow())
569 return cr;
570
571 if (cr.isUnderflow()) {
572 if (endOfInput && in.hasRemaining()) {
573 cr = CoderResult.malformedForLength(in.remaining());
574 // Fall through to malformed-input case
575 } else {
576 return cr;
577 }
578 }
579
580 CodingErrorAction action = null;
581 if (cr.isMalformed())
582 action = malformedInputAction;
583 else if (cr.isUnmappable())
584 action = unmappableCharacterAction;
585 else
586 assert false : cr.toString();
587
588 if (action == CodingErrorAction.REPORT)
589 return cr;
590
591 if (action == CodingErrorAction.REPLACE) {
592 if (out.remaining() < replacement.$replLength$)
593 return CoderResult.OVERFLOW;
594 out.put(replacement);
595 }
596
597 if ((action == CodingErrorAction.IGNORE)
598 || (action == CodingErrorAction.REPLACE)) {
599 // Skip erroneous input either way
600 in.position(in.position() + cr.length());
601 continue;
602 }
603
604 assert false;
605 }
606
607 }
608
609 /**
610 * Flushes this $coder$.
611 *
612 * <p> Some $coder$s maintain internal state and may need to write some
613 * final $otype$s to the output buffer once the overall input sequence has
614 * been read.
615 *
616 * <p> Any additional output is written to the output buffer beginning at
617 * its current position. At most {@link Buffer#remaining out.remaining()}
618 * $otype$s will be written. The buffer's position will be advanced
619 * appropriately, but its mark and limit will not be modified.
620 *
621 * <p> If this method completes successfully then it returns {@link
622 * CoderResult#UNDERFLOW}. If there is insufficient room in the output
623 * buffer then it returns {@link CoderResult#OVERFLOW}. If this happens
624 * then this method must be invoked again, with an output buffer that has
625 * more room, in order to complete the current <a href="#steps">$coding$
626 * operation</a>.
627 *
628 * <p> If this $coder$ has already been flushed then invoking this method
629 * has no effect.
630 *
631 * <p> This method invokes the {@link #implFlush implFlush} method to
632 * perform the actual flushing operation. </p>
633 *
634 * @param out
635 * The output $otype$ buffer
636 *
637 * @return A coder-result object, either {@link CoderResult#UNDERFLOW} or
638 * {@link CoderResult#OVERFLOW}
639 *
640 * @throws IllegalStateException
641 * If the previous step of the current $coding$ operation was an
642 * invocation neither of the {@link #flush flush} method nor of
643 * the three-argument {@link
644 * #$code$($Itype$Buffer,$Otype$Buffer,boolean) $code$} method
645 * with a value of <tt>true</tt> for the <tt>endOfInput</tt>
646 * parameter
647 */
648 public final CoderResult flush($Otype$Buffer out) {
649 if (state == ST_END) {
650 CoderResult cr = implFlush(out);
651 if (cr.isUnderflow())
652 state = ST_FLUSHED;
653 return cr;
654 }
655
656 if (state != ST_FLUSHED)
657 throwIllegalStateException(state, ST_FLUSHED);
658
659 return CoderResult.UNDERFLOW; // Already flushed
660 }
661
662 /**
663 * Flushes this $coder$.
664 *
665 * <p> The default implementation of this method does nothing, and always
666 * returns {@link CoderResult#UNDERFLOW}. This method should be overridden
667 * by $coder$s that may need to write final $otype$s to the output buffer
668 * once the entire input sequence has been read. </p>
669 *
670 * @param out
671 * The output $otype$ buffer
672 *
673 * @return A coder-result object, either {@link CoderResult#UNDERFLOW} or
674 * {@link CoderResult#OVERFLOW}
675 */
676 protected CoderResult implFlush($Otype$Buffer out) {
677 return CoderResult.UNDERFLOW;
678 }
679
680 /**
681 * Resets this $coder$, clearing any internal state.
682 *
683 * <p> This method resets charset-independent state and also invokes the
684 * {@link #implReset() implReset} method in order to perform any
685 * charset-specific reset actions. </p>
686 *
687 * @return This $coder$
688 *
689 */
690 public final Charset$Coder$ reset() {
691 implReset();
692 state = ST_RESET;
693 return this;
694 }
695
696 /**
697 * Resets this $coder$, clearing any charset-specific internal state.
698 *
699 * <p> The default implementation of this method does nothing. This method
700 * should be overridden by $coder$s that maintain internal state. </p>
701 */
702 protected void implReset() { }
703
704 /**
705 * $Code$s one or more $itype$s into one or more $otype$s.
706 *
707 * <p> This method encapsulates the basic $coding$ loop, $coding$ as many
708 * $itype$s as possible until it either runs out of input, runs out of room
709 * in the output buffer, or encounters $a$ $coding$ error. This method is
710 * invoked by the {@link #$code$ $code$} method, which handles result
711 * interpretation and error recovery.
712 *
713 * <p> The buffers are read from, and written to, starting at their current
714 * positions. At most {@link Buffer#remaining in.remaining()} $itype$s
715 * will be read, and at most {@link Buffer#remaining out.remaining()}
716 * $otype$s will be written. The buffers' positions will be advanced to
717 * reflect the $itype$s read and the $otype$s written, but their marks and
718 * limits will not be modified.
719 *
720 * <p> This method returns a {@link CoderResult} object to describe its
721 * reason for termination, in the same manner as the {@link #$code$ $code$}
722 * method. Most implementations of this method will handle $coding$ errors
723 * by returning an appropriate result object for interpretation by the
724 * {@link #$code$ $code$} method. An optimized implementation may instead
725 * examine the relevant error action and implement that action itself.
726 *
727 * <p> An implementation of this method may perform arbitrary lookahead by
728 * returning {@link CoderResult#UNDERFLOW} until it receives sufficient
729 * input. </p>
730 *
731 * @param in
732 * The input $itype$ buffer
733 *
734 * @param out
735 * The output $otype$ buffer
736 *
737 * @return A coder-result object describing the reason for termination
738 */
739 protected abstract CoderResult $code$Loop($Itype$Buffer in,
740 $Otype$Buffer out);
741
742 /**
743 * Convenience method that $code$s the remaining content of a single input
744 * $itype$ buffer into a newly-allocated $otype$ buffer.
745 *
746 * <p> This method implements an entire <a href="#steps">$coding$
747 * operation</a>; that is, it resets this $coder$, then it $code$s the
748 * $itype$s in the given $itype$ buffer, and finally it flushes this
749 * $coder$. This method should therefore not be invoked if $a$ $coding$
750 * operation is already in progress. </p>
751 *
752 * @param in
753 * The input $itype$ buffer
754 *
755 * @return A newly-allocated $otype$ buffer containing the result of the
756 * $coding$ operation. The buffer's position will be zero and its
757 * limit will follow the last $otype$ written.
758 *
759 * @throws IllegalStateException
760 * If $a$ $coding$ operation is already in progress
761 *
762 * @throws MalformedInputException
763 * If the $itype$ sequence starting at the input buffer's current
764 * position is $notLegal$ and the current malformed-input action
765 * is {@link CodingErrorAction#REPORT}
766 *
767 * @throws UnmappableCharacterException
768 * If the $itype$ sequence starting at the input buffer's current
769 * position cannot be mapped to an equivalent $otype$ sequence and
770 * the current unmappable-character action is {@link
771 * CodingErrorAction#REPORT}
772 */
773 public final $Otype$Buffer $code$($Itype$Buffer in)
774 throws CharacterCodingException
775 {
776 int n = (int)(in.remaining() * average$ItypesPerOtype$());
777 $Otype$Buffer out = $Otype$Buffer.allocate(n);
778
779 if ((n == 0) && (in.remaining() == 0))
780 return out;
781 reset();
782 for (;;) {
783 CoderResult cr = in.hasRemaining() ?
784 $code$(in, out, true) : CoderResult.UNDERFLOW;
785 if (cr.isUnderflow())
786 cr = flush(out);
787
788 if (cr.isUnderflow())
789 break;
790 if (cr.isOverflow()) {
791 n = 2*n + 1; // Ensure progress; n might be 0!
792 $Otype$Buffer o = $Otype$Buffer.allocate(n);
793 out.flip();
794 o.put(out);
795 out = o;
796 continue;
797 }
798 cr.throwException();
799 }
800 out.flip();
801 return out;
802 }
803
804#if[decoder]
805
806 /**
807 * Tells whether or not this decoder implements an auto-detecting charset.
808 *
809 * <p> The default implementation of this method always returns
810 * <tt>false</tt>; it should be overridden by auto-detecting decoders to
811 * return <tt>true</tt>. </p>
812 *
813 * @return <tt>true</tt> if, and only if, this decoder implements an
814 * auto-detecting charset
815 */
816 public boolean isAutoDetecting() {
817 return false;
818 }
819
820 /**
821 * Tells whether or not this decoder has yet detected a
822 * charset&nbsp;&nbsp;<i>(optional operation)</i>.
823 *
824 * <p> If this decoder implements an auto-detecting charset then at a
825 * single point during a decoding operation this method may start returning
826 * <tt>true</tt> to indicate that a specific charset has been detected in
827 * the input byte sequence. Once this occurs, the {@link #detectedCharset
828 * detectedCharset} method may be invoked to retrieve the detected charset.
829 *
830 * <p> That this method returns <tt>false</tt> does not imply that no bytes
831 * have yet been decoded. Some auto-detecting decoders are capable of
832 * decoding some, or even all, of an input byte sequence without fixing on
833 * a particular charset.
834 *
835 * <p> The default implementation of this method always throws an {@link
836 * UnsupportedOperationException}; it should be overridden by
837 * auto-detecting decoders to return <tt>true</tt> once the input charset
838 * has been determined. </p>
839 *
840 * @return <tt>true</tt> if, and only if, this decoder has detected a
841 * specific charset
842 *
843 * @throws UnsupportedOperationException
844 * If this decoder does not implement an auto-detecting charset
845 */
846 public boolean isCharsetDetected() {
847 throw new UnsupportedOperationException();
848 }
849
850 /**
851 * Retrieves the charset that was detected by this
852 * decoder&nbsp;&nbsp;<i>(optional operation)</i>.
853 *
854 * <p> If this decoder implements an auto-detecting charset then this
855 * method returns the actual charset once it has been detected. After that
856 * point, this method returns the same value for the duration of the
857 * current decoding operation. If not enough input bytes have yet been
858 * read to determine the actual charset then this method throws an {@link
859 * IllegalStateException}.
860 *
861 * <p> The default implementation of this method always throws an {@link
862 * UnsupportedOperationException}; it should be overridden by
863 * auto-detecting decoders to return the appropriate value. </p>
864 *
865 * @return The charset detected by this auto-detecting decoder,
866 * or <tt>null</tt> if the charset has not yet been determined
867 *
868 * @throws IllegalStateException
869 * If insufficient bytes have been read to determine a charset
870 *
871 * @throws UnsupportedOperationException
872 * If this decoder does not implement an auto-detecting charset
873 */
874 public Charset detectedCharset() {
875 throw new UnsupportedOperationException();
876 }
877
878#end[decoder]
879
880#if[encoder]
881
882 private boolean canEncode(CharBuffer cb) {
883 if (state == ST_FLUSHED)
884 reset();
885 else if (state != ST_RESET)
886 throwIllegalStateException(state, ST_CODING);
887 CodingErrorAction ma = malformedInputAction();
888 CodingErrorAction ua = unmappableCharacterAction();
889 try {
890 onMalformedInput(CodingErrorAction.REPORT);
891 onUnmappableCharacter(CodingErrorAction.REPORT);
892 encode(cb);
893 } catch (CharacterCodingException x) {
894 return false;
895 } finally {
896 onMalformedInput(ma);
897 onUnmappableCharacter(ua);
898 reset();
899 }
900 return true;
901 }
902
903 /**
904 * Tells whether or not this encoder can encode the given character.
905 *
906 * <p> This method returns <tt>false</tt> if the given character is a
907 * surrogate character; such characters can be interpreted only when they
908 * are members of a pair consisting of a high surrogate followed by a low
909 * surrogate. The {@link #canEncode(java.lang.CharSequence)
910 * canEncode(CharSequence)} method may be used to test whether or not a
911 * character sequence can be encoded.
912 *
913 * <p> This method may modify this encoder's state; it should therefore not
914 * be invoked if an <a href="#steps">encoding operation</a> is already in
915 * progress.
916 *
917 * <p> The default implementation of this method is not very efficient; it
918 * should generally be overridden to improve performance. </p>
919 *
920 * @return <tt>true</tt> if, and only if, this encoder can encode
921 * the given character
922 *
923 * @throws IllegalStateException
924 * If $a$ $coding$ operation is already in progress
925 */
926 public boolean canEncode(char c) {
927 CharBuffer cb = CharBuffer.allocate(1);
928 cb.put(c);
929 cb.flip();
930 return canEncode(cb);
931 }
932
933 /**
934 * Tells whether or not this encoder can encode the given character
935 * sequence.
936 *
937 * <p> If this method returns <tt>false</tt> for a particular character
938 * sequence then more information about why the sequence cannot be encoded
939 * may be obtained by performing a full <a href="#steps">encoding
940 * operation</a>.
941 *
942 * <p> This method may modify this encoder's state; it should therefore not
943 * be invoked if an encoding operation is already in progress.
944 *
945 * <p> The default implementation of this method is not very efficient; it
946 * should generally be overridden to improve performance. </p>
947 *
948 * @return <tt>true</tt> if, and only if, this encoder can encode
949 * the given character without throwing any exceptions and without
950 * performing any replacements
951 *
952 * @throws IllegalStateException
953 * If $a$ $coding$ operation is already in progress
954 */
955 public boolean canEncode(CharSequence cs) {
956 CharBuffer cb;
957 if (cs instanceof CharBuffer)
958 cb = ((CharBuffer)cs).duplicate();
959 else
960 cb = CharBuffer.wrap(cs.toString());
961 return canEncode(cb);
962 }
963
964#end[encoder]
965
966
967 private void throwIllegalStateException(int from, int to) {
968 throw new IllegalStateException("Current state = " + stateNames[from]
969 + ", new state = " + stateNames[to]);
970 }
971
972}