blob: 15b6b63c1e484c052a6d6110c44fb9bf75cbc473 [file] [log] [blame]
J. Duke319a3b92007-12-01 00:00:00 +00001/*
2 * Copyright 1995-2005 Sun Microsystems, Inc. All Rights Reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation. Sun designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Sun in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
22 * CA 95054 USA or visit www.sun.com if you need additional information or
23 * have any questions.
24 */
25
26package sun.misc;
27
28import java.io.InputStream;
29import java.io.ByteArrayInputStream;
30import java.io.OutputStream;
31import java.io.ByteArrayOutputStream;
32import java.io.PrintStream;
33import java.io.IOException;
34import java.nio.ByteBuffer;
35
36
37/**
38 * This class defines the encoding half of character encoders.
39 * A character encoder is an algorithim for transforming 8 bit binary
40 * data into text (generally 7 bit ASCII or 8 bit ISO-Latin-1 text)
41 * for transmition over text channels such as e-mail and network news.
42 *
43 * The character encoders have been structured around a central theme
44 * that, in general, the encoded text has the form:
45 *
46 * <pre>
47 * [Buffer Prefix]
48 * [Line Prefix][encoded data atoms][Line Suffix]
49 * [Buffer Suffix]
50 * </pre>
51 *
52 * In the CharacterEncoder and CharacterDecoder classes, one complete
53 * chunk of data is referred to as a <i>buffer</i>. Encoded buffers
54 * are all text, and decoded buffers (sometimes just referred to as
55 * buffers) are binary octets.
56 *
57 * To create a custom encoder, you must, at a minimum, overide three
58 * abstract methods in this class.
59 * <DL>
60 * <DD>bytesPerAtom which tells the encoder how many bytes to
61 * send to encodeAtom
62 * <DD>encodeAtom which encodes the bytes sent to it as text.
63 * <DD>bytesPerLine which tells the encoder the maximum number of
64 * bytes per line.
65 * </DL>
66 *
67 * Several useful encoders have already been written and are
68 * referenced in the See Also list below.
69 *
70 * @author Chuck McManis
71 * @see CharacterDecoder;
72 * @see UCEncoder
73 * @see UUEncoder
74 * @see BASE64Encoder
75 */
76public abstract class CharacterEncoder {
77
78 /** Stream that understands "printing" */
79 protected PrintStream pStream;
80
81 /** Return the number of bytes per atom of encoding */
82 abstract protected int bytesPerAtom();
83
84 /** Return the number of bytes that can be encoded per line */
85 abstract protected int bytesPerLine();
86
87 /**
88 * Encode the prefix for the entire buffer. By default is simply
89 * opens the PrintStream for use by the other functions.
90 */
91 protected void encodeBufferPrefix(OutputStream aStream) throws IOException {
92 pStream = new PrintStream(aStream);
93 }
94
95 /**
96 * Encode the suffix for the entire buffer.
97 */
98 protected void encodeBufferSuffix(OutputStream aStream) throws IOException {
99 }
100
101 /**
102 * Encode the prefix that starts every output line.
103 */
104 protected void encodeLinePrefix(OutputStream aStream, int aLength)
105 throws IOException {
106 }
107
108 /**
109 * Encode the suffix that ends every output line. By default
110 * this method just prints a <newline> into the output stream.
111 */
112 protected void encodeLineSuffix(OutputStream aStream) throws IOException {
113 pStream.println();
114 }
115
116 /** Encode one "atom" of information into characters. */
117 abstract protected void encodeAtom(OutputStream aStream, byte someBytes[],
118 int anOffset, int aLength) throws IOException;
119
120 /**
121 * This method works around the bizarre semantics of BufferedInputStream's
122 * read method.
123 */
124 protected int readFully(InputStream in, byte buffer[])
125 throws java.io.IOException {
126 for (int i = 0; i < buffer.length; i++) {
127 int q = in.read();
128 if (q == -1)
129 return i;
130 buffer[i] = (byte)q;
131 }
132 return buffer.length;
133 }
134
135 /**
136 * Encode bytes from the input stream, and write them as text characters
137 * to the output stream. This method will run until it exhausts the
138 * input stream, but does not print the line suffix for a final
139 * line that is shorter than bytesPerLine().
140 */
141 public void encode(InputStream inStream, OutputStream outStream)
142 throws IOException {
143 int j;
144 int numBytes;
145 byte tmpbuffer[] = new byte[bytesPerLine()];
146
147 encodeBufferPrefix(outStream);
148
149 while (true) {
150 numBytes = readFully(inStream, tmpbuffer);
151 if (numBytes == 0) {
152 break;
153 }
154 encodeLinePrefix(outStream, numBytes);
155 for (j = 0; j < numBytes; j += bytesPerAtom()) {
156
157 if ((j + bytesPerAtom()) <= numBytes) {
158 encodeAtom(outStream, tmpbuffer, j, bytesPerAtom());
159 } else {
160 encodeAtom(outStream, tmpbuffer, j, (numBytes)- j);
161 }
162 }
163 if (numBytes < bytesPerLine()) {
164 break;
165 } else {
166 encodeLineSuffix(outStream);
167 }
168 }
169 encodeBufferSuffix(outStream);
170 }
171
172 /**
173 * Encode the buffer in <i>aBuffer</i> and write the encoded
174 * result to the OutputStream <i>aStream</i>.
175 */
176 public void encode(byte aBuffer[], OutputStream aStream)
177 throws IOException {
178 ByteArrayInputStream inStream = new ByteArrayInputStream(aBuffer);
179 encode(inStream, aStream);
180 }
181
182 /**
183 * A 'streamless' version of encode that simply takes a buffer of
184 * bytes and returns a string containing the encoded buffer.
185 */
186 public String encode(byte aBuffer[]) {
187 ByteArrayOutputStream outStream = new ByteArrayOutputStream();
188 ByteArrayInputStream inStream = new ByteArrayInputStream(aBuffer);
189 String retVal = null;
190 try {
191 encode(inStream, outStream);
192 // explicit ascii->unicode conversion
193 retVal = outStream.toString("8859_1");
194 } catch (Exception IOException) {
195 // This should never happen.
196 throw new Error("CharacterEncoder.encode internal error");
197 }
198 return (retVal);
199 }
200
201 /**
202 * Return a byte array from the remaining bytes in this ByteBuffer.
203 * <P>
204 * The ByteBuffer's position will be advanced to ByteBuffer's limit.
205 * <P>
206 * To avoid an extra copy, the implementation will attempt to return the
207 * byte array backing the ByteBuffer. If this is not possible, a
208 * new byte array will be created.
209 */
210 private byte [] getBytes(ByteBuffer bb) {
211 /*
212 * This should never return a BufferOverflowException, as we're
213 * careful to allocate just the right amount.
214 */
215 byte [] buf = null;
216
217 /*
218 * If it has a usable backing byte buffer, use it. Use only
219 * if the array exactly represents the current ByteBuffer.
220 */
221 if (bb.hasArray()) {
222 byte [] tmp = bb.array();
223 if ((tmp.length == bb.capacity()) &&
224 (tmp.length == bb.remaining())) {
225 buf = tmp;
226 bb.position(bb.limit());
227 }
228 }
229
230 if (buf == null) {
231 /*
232 * This class doesn't have a concept of encode(buf, len, off),
233 * so if we have a partial buffer, we must reallocate
234 * space.
235 */
236 buf = new byte[bb.remaining()];
237
238 /*
239 * position() automatically updated
240 */
241 bb.get(buf);
242 }
243
244 return buf;
245 }
246
247 /**
248 * Encode the <i>aBuffer</i> ByteBuffer and write the encoded
249 * result to the OutputStream <i>aStream</i>.
250 * <P>
251 * The ByteBuffer's position will be advanced to ByteBuffer's limit.
252 */
253 public void encode(ByteBuffer aBuffer, OutputStream aStream)
254 throws IOException {
255 byte [] buf = getBytes(aBuffer);
256 encode(buf, aStream);
257 }
258
259 /**
260 * A 'streamless' version of encode that simply takes a ByteBuffer
261 * and returns a string containing the encoded buffer.
262 * <P>
263 * The ByteBuffer's position will be advanced to ByteBuffer's limit.
264 */
265 public String encode(ByteBuffer aBuffer) {
266 byte [] buf = getBytes(aBuffer);
267 return encode(buf);
268 }
269
270 /**
271 * Encode bytes from the input stream, and write them as text characters
272 * to the output stream. This method will run until it exhausts the
273 * input stream. It differs from encode in that it will add the
274 * line at the end of a final line that is shorter than bytesPerLine().
275 */
276 public void encodeBuffer(InputStream inStream, OutputStream outStream)
277 throws IOException {
278 int j;
279 int numBytes;
280 byte tmpbuffer[] = new byte[bytesPerLine()];
281
282 encodeBufferPrefix(outStream);
283
284 while (true) {
285 numBytes = readFully(inStream, tmpbuffer);
286 if (numBytes == 0) {
287 break;
288 }
289 encodeLinePrefix(outStream, numBytes);
290 for (j = 0; j < numBytes; j += bytesPerAtom()) {
291 if ((j + bytesPerAtom()) <= numBytes) {
292 encodeAtom(outStream, tmpbuffer, j, bytesPerAtom());
293 } else {
294 encodeAtom(outStream, tmpbuffer, j, (numBytes)- j);
295 }
296 }
297 encodeLineSuffix(outStream);
298 if (numBytes < bytesPerLine()) {
299 break;
300 }
301 }
302 encodeBufferSuffix(outStream);
303 }
304
305 /**
306 * Encode the buffer in <i>aBuffer</i> and write the encoded
307 * result to the OutputStream <i>aStream</i>.
308 */
309 public void encodeBuffer(byte aBuffer[], OutputStream aStream)
310 throws IOException {
311 ByteArrayInputStream inStream = new ByteArrayInputStream(aBuffer);
312 encodeBuffer(inStream, aStream);
313 }
314
315 /**
316 * A 'streamless' version of encode that simply takes a buffer of
317 * bytes and returns a string containing the encoded buffer.
318 */
319 public String encodeBuffer(byte aBuffer[]) {
320 ByteArrayOutputStream outStream = new ByteArrayOutputStream();
321 ByteArrayInputStream inStream = new ByteArrayInputStream(aBuffer);
322 try {
323 encodeBuffer(inStream, outStream);
324 } catch (Exception IOException) {
325 // This should never happen.
326 throw new Error("CharacterEncoder.encodeBuffer internal error");
327 }
328 return (outStream.toString());
329 }
330
331 /**
332 * Encode the <i>aBuffer</i> ByteBuffer and write the encoded
333 * result to the OutputStream <i>aStream</i>.
334 * <P>
335 * The ByteBuffer's position will be advanced to ByteBuffer's limit.
336 */
337 public void encodeBuffer(ByteBuffer aBuffer, OutputStream aStream)
338 throws IOException {
339 byte [] buf = getBytes(aBuffer);
340 encodeBuffer(buf, aStream);
341 }
342
343 /**
344 * A 'streamless' version of encode that simply takes a ByteBuffer
345 * and returns a string containing the encoded buffer.
346 * <P>
347 * The ByteBuffer's position will be advanced to ByteBuffer's limit.
348 */
349 public String encodeBuffer(ByteBuffer aBuffer) {
350 byte [] buf = getBytes(aBuffer);
351 return encodeBuffer(buf);
352 }
353
354}