blob: 1f2a5a7ca9601821daba51beac3e2c655925b985 [file] [log] [blame]
Doug Zongkerd2affae2010-02-12 15:50:01 -08001/*
2 * Copyright (C) 2010 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
Doug Zongkerab69e292010-03-29 13:23:15 -070017package android.util;
Doug Zongkerd2affae2010-02-12 15:50:01 -080018
Doug Zongker9df2ffd2010-02-14 13:48:49 -080019import java.io.UnsupportedEncodingException;
20
Doug Zongkerd2affae2010-02-12 15:50:01 -080021/**
Doug Zongker9df2ffd2010-02-14 13:48:49 -080022 * Utilities for encoding and decoding the Base64 representation of
23 * binary data. See RFCs <a
24 * href="http://www.ietf.org/rfc/rfc2045.txt">2045</a> and <a
25 * href="http://www.ietf.org/rfc/rfc3548.txt">3548</a>.
Doug Zongkerd2affae2010-02-12 15:50:01 -080026 */
27public class Base64 {
28 /**
29 * Default values for encoder/decoder flags.
30 */
31 public static final int DEFAULT = 0;
32
33 /**
Doug Zongker9df2ffd2010-02-14 13:48:49 -080034 * Encoder flag bit to omit the padding '=' characters at the end
35 * of the output (if any).
Doug Zongkerd2affae2010-02-12 15:50:01 -080036 */
37 public static final int NO_PADDING = 1;
38
39 /**
Doug Zongker9df2ffd2010-02-14 13:48:49 -080040 * Encoder flag bit to omit all line terminators (i.e., the output
41 * will be on one long line).
Doug Zongkerd2affae2010-02-12 15:50:01 -080042 */
43 public static final int NO_WRAP = 2;
44
45 /**
Doug Zongker9df2ffd2010-02-14 13:48:49 -080046 * Encoder flag bit to indicate lines should be terminated with a
47 * CRLF pair instead of just an LF. Has no effect if {@code
48 * NO_WRAP} is specified as well.
Doug Zongkerd2affae2010-02-12 15:50:01 -080049 */
50 public static final int CRLF = 4;
51
52 /**
Doug Zongker9df2ffd2010-02-14 13:48:49 -080053 * Encoder/decoder flag bit to indicate using the "URL and
54 * filename safe" variant of Base64 (see RFC 3548 section 4) where
55 * {@code -} and {@code _} are used in place of {@code +} and
56 * {@code /}.
Doug Zongkerd2affae2010-02-12 15:50:01 -080057 */
Doug Zongker9df2ffd2010-02-14 13:48:49 -080058 public static final int URL_SAFE = 8;
Doug Zongkerd2affae2010-02-12 15:50:01 -080059
60 /**
Doug Zongker9df2ffd2010-02-14 13:48:49 -080061 * Flag to pass to {@link Base64OutputStream} to indicate that it
62 * should not close the output stream it is wrapping when it
63 * itself is closed.
Doug Zongkerd2affae2010-02-12 15:50:01 -080064 */
65 public static final int NO_CLOSE = 16;
66
67 // --------------------------------------------------------
Doug Zongker9df2ffd2010-02-14 13:48:49 -080068 // shared code
69 // --------------------------------------------------------
70
71 /* package */ static abstract class Coder {
72 public byte[] output;
73 public int op;
74
75 /**
76 * Encode/decode another block of input data. this.output is
77 * provided by the caller, and must be big enough to hold all
78 * the coded data. On exit, this.opwill be set to the length
79 * of the coded data.
80 *
81 * @param finish true if this is the final call to process for
82 * this object. Will finalize the coder state and
83 * include any final bytes in the output.
84 *
85 * @return true if the input so far is good; false if some
86 * error has been detected in the input stream..
87 */
88 public abstract boolean process(byte[] input, int offset, int len, boolean finish);
89
90 /**
91 * @return the maximum number of bytes a call to process()
92 * could produce for the given number of input bytes. This may
93 * be an overestimate.
94 */
95 public abstract int maxOutputSize(int len);
96 }
97
98 // --------------------------------------------------------
Doug Zongkerd2affae2010-02-12 15:50:01 -080099 // decoding
100 // --------------------------------------------------------
101
102 /**
Doug Zongkerd2affae2010-02-12 15:50:01 -0800103 * Decode the Base64-encoded data in input and return the data in
104 * a new byte array.
105 *
Doug Zongker9df2ffd2010-02-14 13:48:49 -0800106 * <p>The padding '=' characters at the end are considered optional, but
Doug Zongkerd2affae2010-02-12 15:50:01 -0800107 * if any are present, there must be the correct number of them.
108 *
Doug Zongker8fe55712010-02-12 18:00:30 -0800109 * @param str the input String to decode, which is converted to
Doug Zongkerd2affae2010-02-12 15:50:01 -0800110 * bytes using the default charset
111 * @param flags controls certain features of the decoded output.
112 * Pass {@code DEFAULT} to decode standard Base64.
113 *
114 * @throws IllegalArgumentException if the input contains
115 * incorrect padding
116 */
117 public static byte[] decode(String str, int flags) {
118 return decode(str.getBytes(), flags);
119 }
120
121 /**
122 * Decode the Base64-encoded data in input and return the data in
123 * a new byte array.
124 *
Doug Zongker9df2ffd2010-02-14 13:48:49 -0800125 * <p>The padding '=' characters at the end are considered optional, but
Doug Zongkerd2affae2010-02-12 15:50:01 -0800126 * if any are present, there must be the correct number of them.
127 *
128 * @param input the input array to decode
129 * @param flags controls certain features of the decoded output.
130 * Pass {@code DEFAULT} to decode standard Base64.
131 *
132 * @throws IllegalArgumentException if the input contains
133 * incorrect padding
134 */
135 public static byte[] decode(byte[] input, int flags) {
136 return decode(input, 0, input.length, flags);
137 }
138
139 /**
140 * Decode the Base64-encoded data in input and return the data in
141 * a new byte array.
142 *
Doug Zongker9df2ffd2010-02-14 13:48:49 -0800143 * <p>The padding '=' characters at the end are considered optional, but
Doug Zongkerd2affae2010-02-12 15:50:01 -0800144 * if any are present, there must be the correct number of them.
145 *
146 * @param input the data to decode
147 * @param offset the position within the input array at which to start
148 * @param len the number of bytes of input to decode
149 * @param flags controls certain features of the decoded output.
150 * Pass {@code DEFAULT} to decode standard Base64.
151 *
152 * @throws IllegalArgumentException if the input contains
153 * incorrect padding
154 */
155 public static byte[] decode(byte[] input, int offset, int len, int flags) {
156 // Allocate space for the most data the input could represent.
157 // (It could contain less if it contains whitespace, etc.)
Doug Zongker9df2ffd2010-02-14 13:48:49 -0800158 Decoder decoder = new Decoder(flags, new byte[len*3/4]);
Doug Zongkerd2affae2010-02-12 15:50:01 -0800159
Doug Zongker9df2ffd2010-02-14 13:48:49 -0800160 if (!decoder.process(input, offset, len, true)) {
Doug Zongkerd2affae2010-02-12 15:50:01 -0800161 throw new IllegalArgumentException("bad base-64");
162 }
163
164 // Maybe we got lucky and allocated exactly enough output space.
Doug Zongker9df2ffd2010-02-14 13:48:49 -0800165 if (decoder.op == decoder.output.length) {
166 return decoder.output;
Doug Zongkerd2affae2010-02-12 15:50:01 -0800167 }
168
169 // Need to shorten the array, so allocate a new one of the
170 // right size and copy.
Doug Zongker9df2ffd2010-02-14 13:48:49 -0800171 byte[] temp = new byte[decoder.op];
172 System.arraycopy(decoder.output, 0, temp, 0, decoder.op);
Doug Zongkerd2affae2010-02-12 15:50:01 -0800173 return temp;
174 }
175
Doug Zongker9df2ffd2010-02-14 13:48:49 -0800176 /* package */ static class Decoder extends Coder {
177 /**
178 * Lookup table for turning bytes into their position in the
179 * Base64 alphabet.
180 */
181 private static final int DECODE[] = {
182 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
183 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
184 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 62, -1, -1, -1, 63,
185 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, -1, -1, -1, -2, -1, -1,
186 -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
187 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1,
188 -1, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
189 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, -1, -1, -1, -1, -1,
190 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
191 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
192 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
193 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
194 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
195 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
196 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
197 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
198 };
Doug Zongkerd2affae2010-02-12 15:50:01 -0800199
Doug Zongker9df2ffd2010-02-14 13:48:49 -0800200 /**
201 * Decode lookup table for the "web safe" variant (RFC 3548
202 * sec. 4) where - and _ replace + and /.
203 */
204 private static final int DECODE_WEBSAFE[] = {
205 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
206 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
207 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 62, -1, -1,
208 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, -1, -1, -1, -2, -1, -1,
209 -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
210 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, 63,
211 -1, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
212 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, -1, -1, -1, -1, -1,
213 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
214 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
215 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
216 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
217 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
218 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
219 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
220 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
221 };
Doug Zongkerd2affae2010-02-12 15:50:01 -0800222
Doug Zongker9df2ffd2010-02-14 13:48:49 -0800223 /** Non-data values in the DECODE arrays. */
224 private static final int SKIP = -1;
225 private static final int EQUALS = -2;
Doug Zongkerd2affae2010-02-12 15:50:01 -0800226
Doug Zongker9df2ffd2010-02-14 13:48:49 -0800227 /**
228 * States 0-3 are reading through the next input tuple.
229 * State 4 is having read one '=' and expecting exactly
230 * one more.
231 * State 5 is expecting no more data or padding characters
232 * in the input.
233 * State 6 is the error state; an error has been detected
234 * in the input and no future input can "fix" it.
235 */
236 private int state; // state number (0 to 6)
237 private int value;
238
239 final private int[] alphabet;
240
241 public Decoder(int flags, byte[] output) {
Doug Zongkerd2affae2010-02-12 15:50:01 -0800242 this.output = output;
243
Doug Zongker9df2ffd2010-02-14 13:48:49 -0800244 alphabet = ((flags & URL_SAFE) == 0) ? DECODE : DECODE_WEBSAFE;
Doug Zongkerd2affae2010-02-12 15:50:01 -0800245 state = 0;
246 value = 0;
247 }
Doug Zongkerd2affae2010-02-12 15:50:01 -0800248
Doug Zongker9df2ffd2010-02-14 13:48:49 -0800249 /**
250 * @return an overestimate for the number of bytes {@code
251 * len} bytes could decode to.
252 */
253 public int maxOutputSize(int len) {
254 return len * 3/4 + 10;
255 }
Doug Zongkerd2affae2010-02-12 15:50:01 -0800256
Doug Zongker9df2ffd2010-02-14 13:48:49 -0800257 /**
258 * Decode another block of input data.
259 *
260 * @return true if the state machine is still healthy. false if
261 * bad base-64 data has been detected in the input stream.
262 */
263 public boolean process(byte[] input, int offset, int len, boolean finish) {
264 if (this.state == 6) return false;
Doug Zongkerd2affae2010-02-12 15:50:01 -0800265
Doug Zongker9df2ffd2010-02-14 13:48:49 -0800266 int p = offset;
267 len += offset;
Doug Zongkerd2affae2010-02-12 15:50:01 -0800268
Doug Zongker9df2ffd2010-02-14 13:48:49 -0800269 // Using local variables makes the decoder about 12%
270 // faster than if we manipulate the member variables in
271 // the loop. (Even alphabet makes a measurable
272 // difference, which is somewhat surprising to me since
273 // the member variable is final.)
274 int state = this.state;
275 int value = this.value;
276 int op = 0;
277 final byte[] output = this.output;
278 final int[] alphabet = this.alphabet;
Doug Zongkerd2affae2010-02-12 15:50:01 -0800279
Doug Zongker9df2ffd2010-02-14 13:48:49 -0800280 while (p < len) {
281 // Try the fast path: we're starting a new tuple and the
282 // next four bytes of the input stream are all data
283 // bytes. This corresponds to going through states
284 // 0-1-2-3-0. We expect to use this method for most of
285 // the data.
286 //
287 // If any of the next four bytes of input are non-data
288 // (whitespace, etc.), value will end up negative. (All
289 // the non-data values in decode are small negative
290 // numbers, so shifting any of them up and or'ing them
291 // together will result in a value with its top bit set.)
292 //
293 // You can remove this whole block and the output should
294 // be the same, just slower.
295 if (state == 0) {
296 while (p+4 <= len &&
297 (value = ((alphabet[input[p] & 0xff] << 18) |
298 (alphabet[input[p+1] & 0xff] << 12) |
299 (alphabet[input[p+2] & 0xff] << 6) |
300 (alphabet[input[p+3] & 0xff]))) >= 0) {
301 output[op+2] = (byte) value;
302 output[op+1] = (byte) (value >> 8);
303 output[op] = (byte) (value >> 16);
304 op += 3;
305 p += 4;
306 }
307 if (p >= len) break;
308 }
Doug Zongkerd2affae2010-02-12 15:50:01 -0800309
Doug Zongker9df2ffd2010-02-14 13:48:49 -0800310 // The fast path isn't available -- either we've read a
311 // partial tuple, or the next four input bytes aren't all
312 // data, or whatever. Fall back to the slower state
313 // machine implementation.
Doug Zongkerd2affae2010-02-12 15:50:01 -0800314
Doug Zongker9df2ffd2010-02-14 13:48:49 -0800315 int d = alphabet[input[p++] & 0xff];
Doug Zongkerd2affae2010-02-12 15:50:01 -0800316
Doug Zongker9df2ffd2010-02-14 13:48:49 -0800317 switch (state) {
Doug Zongkerd2affae2010-02-12 15:50:01 -0800318 case 0:
319 if (d >= 0) {
320 value = d;
321 ++state;
322 } else if (d != SKIP) {
Doug Zongker9df2ffd2010-02-14 13:48:49 -0800323 this.state = 6;
Doug Zongkerd2affae2010-02-12 15:50:01 -0800324 return false;
325 }
326 break;
327
328 case 1:
329 if (d >= 0) {
330 value = (value << 6) | d;
331 ++state;
332 } else if (d != SKIP) {
Doug Zongker9df2ffd2010-02-14 13:48:49 -0800333 this.state = 6;
Doug Zongkerd2affae2010-02-12 15:50:01 -0800334 return false;
335 }
336 break;
337
338 case 2:
339 if (d >= 0) {
340 value = (value << 6) | d;
341 ++state;
342 } else if (d == EQUALS) {
343 // Emit the last (partial) output tuple;
344 // expect exactly one more padding character.
345 output[op++] = (byte) (value >> 4);
346 state = 4;
347 } else if (d != SKIP) {
Doug Zongker9df2ffd2010-02-14 13:48:49 -0800348 this.state = 6;
Doug Zongkerd2affae2010-02-12 15:50:01 -0800349 return false;
350 }
351 break;
352
353 case 3:
354 if (d >= 0) {
355 // Emit the output triple and return to state 0.
356 value = (value << 6) | d;
357 output[op+2] = (byte) value;
358 output[op+1] = (byte) (value >> 8);
359 output[op] = (byte) (value >> 16);
360 op += 3;
361 state = 0;
362 } else if (d == EQUALS) {
363 // Emit the last (partial) output tuple;
364 // expect no further data or padding characters.
365 output[op+1] = (byte) (value >> 2);
366 output[op] = (byte) (value >> 10);
367 op += 2;
368 state = 5;
369 } else if (d != SKIP) {
Doug Zongker9df2ffd2010-02-14 13:48:49 -0800370 this.state = 6;
Doug Zongkerd2affae2010-02-12 15:50:01 -0800371 return false;
372 }
373 break;
374
375 case 4:
376 if (d == EQUALS) {
377 ++state;
378 } else if (d != SKIP) {
Doug Zongker9df2ffd2010-02-14 13:48:49 -0800379 this.state = 6;
Doug Zongkerd2affae2010-02-12 15:50:01 -0800380 return false;
381 }
382 break;
383
384 case 5:
385 if (d != SKIP) {
Doug Zongker9df2ffd2010-02-14 13:48:49 -0800386 this.state = 6;
Doug Zongkerd2affae2010-02-12 15:50:01 -0800387 return false;
388 }
389 break;
Doug Zongker9df2ffd2010-02-14 13:48:49 -0800390 }
Doug Zongkerd2affae2010-02-12 15:50:01 -0800391 }
Doug Zongkerd2affae2010-02-12 15:50:01 -0800392
Doug Zongker9df2ffd2010-02-14 13:48:49 -0800393 if (!finish) {
394 // We're out of input, but a future call could provide
395 // more.
396 this.state = state;
397 this.value = value;
398 this.op = op;
399 return true;
400 }
Doug Zongkerd2affae2010-02-12 15:50:01 -0800401
Doug Zongker9df2ffd2010-02-14 13:48:49 -0800402 // Done reading input. Now figure out where we are left in
403 // the state machine and finish up.
Doug Zongkerd2affae2010-02-12 15:50:01 -0800404
Doug Zongker9df2ffd2010-02-14 13:48:49 -0800405 switch (state) {
Doug Zongkerd2affae2010-02-12 15:50:01 -0800406 case 0:
407 // Output length is a multiple of three. Fine.
408 break;
409 case 1:
410 // Read one extra input byte, which isn't enough to
411 // make another output byte. Illegal.
Doug Zongker9df2ffd2010-02-14 13:48:49 -0800412 this.state = 6;
Doug Zongkerd2affae2010-02-12 15:50:01 -0800413 return false;
414 case 2:
415 // Read two extra input bytes, enough to emit 1 more
416 // output byte. Fine.
417 output[op++] = (byte) (value >> 4);
418 break;
419 case 3:
420 // Read three extra input bytes, enough to emit 2 more
421 // output bytes. Fine.
Doug Zongker9df2ffd2010-02-14 13:48:49 -0800422 output[op++] = (byte) (value >> 10);
423 output[op++] = (byte) (value >> 2);
Doug Zongkerd2affae2010-02-12 15:50:01 -0800424 break;
425 case 4:
426 // Read one padding '=' when we expected 2. Illegal.
Doug Zongker9df2ffd2010-02-14 13:48:49 -0800427 this.state = 6;
Doug Zongkerd2affae2010-02-12 15:50:01 -0800428 return false;
429 case 5:
430 // Read all the padding '='s we expected and no more.
431 // Fine.
432 break;
Doug Zongker9df2ffd2010-02-14 13:48:49 -0800433 }
Doug Zongkerd2affae2010-02-12 15:50:01 -0800434
Doug Zongker9df2ffd2010-02-14 13:48:49 -0800435 this.state = state;
436 this.op = op;
437 return true;
438 }
Doug Zongkerd2affae2010-02-12 15:50:01 -0800439 }
440
441 // --------------------------------------------------------
442 // encoding
443 // --------------------------------------------------------
444
445 /**
Doug Zongkerd2affae2010-02-12 15:50:01 -0800446 * Base64-encode the given data and return a newly allocated
447 * String with the result.
448 *
449 * @param input the data to encode
450 * @param flags controls certain features of the encoded output.
451 * Passing {@code DEFAULT} results in output that
452 * adheres to RFC 2045.
453 */
454 public static String encodeToString(byte[] input, int flags) {
Doug Zongker9df2ffd2010-02-14 13:48:49 -0800455 try {
456 return new String(encode(input, flags), "US-ASCII");
457 } catch (UnsupportedEncodingException e) {
458 // US-ASCII is guaranteed to be available.
459 throw new AssertionError(e);
460 }
Doug Zongkerd2affae2010-02-12 15:50:01 -0800461 }
462
463 /**
464 * Base64-encode the given data and return a newly allocated
465 * String with the result.
466 *
467 * @param input the data to encode
468 * @param offset the position within the input array at which to
469 * start
470 * @param len the number of bytes of input to encode
471 * @param flags controls certain features of the encoded output.
472 * Passing {@code DEFAULT} results in output that
473 * adheres to RFC 2045.
474 */
475 public static String encodeToString(byte[] input, int offset, int len, int flags) {
Doug Zongker9df2ffd2010-02-14 13:48:49 -0800476 try {
477 return new String(encode(input, offset, len, flags), "US-ASCII");
478 } catch (UnsupportedEncodingException e) {
479 // US-ASCII is guaranteed to be available.
480 throw new AssertionError(e);
481 }
Doug Zongkerd2affae2010-02-12 15:50:01 -0800482 }
483
484 /**
485 * Base64-encode the given data and return a newly allocated
486 * byte[] with the result.
487 *
488 * @param input the data to encode
489 * @param flags controls certain features of the encoded output.
490 * Passing {@code DEFAULT} results in output that
491 * adheres to RFC 2045.
492 */
493 public static byte[] encode(byte[] input, int flags) {
494 return encode(input, 0, input.length, flags);
495 }
496
497 /**
498 * Base64-encode the given data and return a newly allocated
499 * byte[] with the result.
500 *
501 * @param input the data to encode
502 * @param offset the position within the input array at which to
503 * start
504 * @param len the number of bytes of input to encode
505 * @param flags controls certain features of the encoded output.
506 * Passing {@code DEFAULT} results in output that
507 * adheres to RFC 2045.
508 */
509 public static byte[] encode(byte[] input, int offset, int len, int flags) {
Doug Zongker9df2ffd2010-02-14 13:48:49 -0800510 Encoder encoder = new Encoder(flags, null);
Doug Zongkerd2affae2010-02-12 15:50:01 -0800511
512 // Compute the exact length of the array we will produce.
513 int output_len = len / 3 * 4;
514
515 // Account for the tail of the data and the padding bytes, if any.
Doug Zongker9df2ffd2010-02-14 13:48:49 -0800516 if (encoder.do_padding) {
Doug Zongkerd2affae2010-02-12 15:50:01 -0800517 if (len % 3 > 0) {
518 output_len += 4;
519 }
520 } else {
521 switch (len % 3) {
522 case 0: break;
523 case 1: output_len += 2; break;
524 case 2: output_len += 3; break;
525 }
526 }
527
528 // Account for the newlines, if any.
Doug Zongker9df2ffd2010-02-14 13:48:49 -0800529 if (encoder.do_newline && len > 0) {
530 output_len += (((len-1) / (3 * Encoder.LINE_GROUPS)) + 1) *
531 (encoder.do_cr ? 2 : 1);
Doug Zongkerd2affae2010-02-12 15:50:01 -0800532 }
533
Doug Zongker9df2ffd2010-02-14 13:48:49 -0800534 encoder.output = new byte[output_len];
535 encoder.process(input, offset, len, true);
Doug Zongkerd2affae2010-02-12 15:50:01 -0800536
Doug Zongker9df2ffd2010-02-14 13:48:49 -0800537 assert encoder.op == output_len;
Doug Zongkerd2affae2010-02-12 15:50:01 -0800538
Doug Zongker9df2ffd2010-02-14 13:48:49 -0800539 return encoder.output;
Doug Zongkerd2affae2010-02-12 15:50:01 -0800540 }
541
Doug Zongker9df2ffd2010-02-14 13:48:49 -0800542 /* package */ static class Encoder extends Coder {
543 /**
544 * Emit a new line every this many output tuples. Corresponds to
545 * a 76-character line length (the maximum allowable according to
546 * <a href="http://www.ietf.org/rfc/rfc2045.txt">RFC 2045</a>).
547 */
548 public static final int LINE_GROUPS = 19;
Doug Zongkerd2affae2010-02-12 15:50:01 -0800549
Doug Zongker9df2ffd2010-02-14 13:48:49 -0800550 /**
551 * Lookup table for turning Base64 alphabet positions (6 bits)
552 * into output bytes.
553 */
554 private static final byte ENCODE[] = {
555 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P',
556 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f',
557 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v',
558 'w', 'x', 'y', 'z', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '+', '/',
559 };
560
561 /**
562 * Lookup table for turning Base64 alphabet positions (6 bits)
563 * into output bytes.
564 */
565 private static final byte ENCODE_WEBSAFE[] = {
566 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P',
567 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f',
568 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v',
569 'w', 'x', 'y', 'z', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '-', '_',
570 };
571
572 final private byte[] tail;
573 /* package */ int tailLen;
574 private int count;
Doug Zongkerd2affae2010-02-12 15:50:01 -0800575
576 final public boolean do_padding;
577 final public boolean do_newline;
578 final public boolean do_cr;
Doug Zongker9df2ffd2010-02-14 13:48:49 -0800579 final private byte[] alphabet;
Doug Zongkerd2affae2010-02-12 15:50:01 -0800580
Doug Zongker9df2ffd2010-02-14 13:48:49 -0800581 public Encoder(int flags, byte[] output) {
Doug Zongkerd2affae2010-02-12 15:50:01 -0800582 this.output = output;
583
584 do_padding = (flags & NO_PADDING) == 0;
585 do_newline = (flags & NO_WRAP) == 0;
586 do_cr = (flags & CRLF) != 0;
Doug Zongker9df2ffd2010-02-14 13:48:49 -0800587 alphabet = ((flags & URL_SAFE) == 0) ? ENCODE : ENCODE_WEBSAFE;
Doug Zongkerd2affae2010-02-12 15:50:01 -0800588
589 tail = new byte[2];
590 tailLen = 0;
591
592 count = do_newline ? LINE_GROUPS : -1;
593 }
Doug Zongkerd2affae2010-02-12 15:50:01 -0800594
Doug Zongker9df2ffd2010-02-14 13:48:49 -0800595 /**
596 * @return an overestimate for the number of bytes {@code
597 * len} bytes could encode to.
598 */
599 public int maxOutputSize(int len) {
600 return len * 8/5 + 10;
Doug Zongkerd2affae2010-02-12 15:50:01 -0800601 }
602
Doug Zongker9df2ffd2010-02-14 13:48:49 -0800603 public boolean process(byte[] input, int offset, int len, boolean finish) {
604 // Using local variables makes the encoder about 9% faster.
605 final byte[] alphabet = this.alphabet;
606 final byte[] output = this.output;
607 int op = 0;
608 int count = this.count;
609
610 int p = offset;
611 len += offset;
612 int v = -1;
613
614 // First we need to concatenate the tail of the previous call
615 // with any input bytes available now and see if we can empty
616 // the tail.
617
618 switch (tailLen) {
619 case 0:
620 // There was no tail.
621 break;
622
623 case 1:
624 if (p+2 <= len) {
625 // A 1-byte tail with at least 2 bytes of
626 // input available now.
627 v = ((tail[0] & 0xff) << 16) |
628 ((input[p++] & 0xff) << 8) |
629 (input[p++] & 0xff);
630 tailLen = 0;
631 };
632 break;
633
634 case 2:
635 if (p+1 <= len) {
636 // A 2-byte tail with at least 1 byte of input.
637 v = ((tail[0] & 0xff) << 16) |
638 ((tail[1] & 0xff) << 8) |
639 (input[p++] & 0xff);
640 tailLen = 0;
641 }
642 break;
Doug Zongkerd2affae2010-02-12 15:50:01 -0800643 }
Doug Zongkerd2affae2010-02-12 15:50:01 -0800644
Doug Zongker9df2ffd2010-02-14 13:48:49 -0800645 if (v != -1) {
646 output[op++] = alphabet[(v >> 18) & 0x3f];
647 output[op++] = alphabet[(v >> 12) & 0x3f];
648 output[op++] = alphabet[(v >> 6) & 0x3f];
649 output[op++] = alphabet[v & 0x3f];
650 if (--count == 0) {
651 if (do_cr) output[op++] = '\r';
652 output[op++] = '\n';
653 count = LINE_GROUPS;
Doug Zongkerd2affae2010-02-12 15:50:01 -0800654 }
Doug Zongker9df2ffd2010-02-14 13:48:49 -0800655 }
656
657 // At this point either there is no tail, or there are fewer
658 // than 3 bytes of input available.
659
660 // The main loop, turning 3 input bytes into 4 output bytes on
661 // each iteration.
662 while (p+3 <= len) {
663 v = ((input[p] & 0xff) << 16) |
664 ((input[p+1] & 0xff) << 8) |
665 (input[p+2] & 0xff);
666 output[op] = alphabet[(v >> 18) & 0x3f];
667 output[op+1] = alphabet[(v >> 12) & 0x3f];
668 output[op+2] = alphabet[(v >> 6) & 0x3f];
669 output[op+3] = alphabet[v & 0x3f];
670 p += 3;
671 op += 4;
672 if (--count == 0) {
673 if (do_cr) output[op++] = '\r';
674 output[op++] = '\n';
675 count = LINE_GROUPS;
676 }
677 }
678
679 if (finish) {
680 // Finish up the tail of the input. Note that we need to
681 // consume any bytes in tail before any bytes
682 // remaining in input; there should be at most two bytes
683 // total.
684
685 if (p-tailLen == len-1) {
686 int t = 0;
687 v = ((tailLen > 0 ? tail[t++] : input[p++]) & 0xff) << 4;
688 tailLen -= t;
689 output[op++] = alphabet[(v >> 6) & 0x3f];
690 output[op++] = alphabet[v & 0x3f];
691 if (do_padding) {
692 output[op++] = '=';
693 output[op++] = '=';
694 }
695 if (do_newline) {
696 if (do_cr) output[op++] = '\r';
697 output[op++] = '\n';
698 }
699 } else if (p-tailLen == len-2) {
700 int t = 0;
701 v = (((tailLen > 1 ? tail[t++] : input[p++]) & 0xff) << 10) |
702 (((tailLen > 0 ? tail[t++] : input[p++]) & 0xff) << 2);
703 tailLen -= t;
704 output[op++] = alphabet[(v >> 12) & 0x3f];
705 output[op++] = alphabet[(v >> 6) & 0x3f];
706 output[op++] = alphabet[v & 0x3f];
707 if (do_padding) {
708 output[op++] = '=';
709 }
710 if (do_newline) {
711 if (do_cr) output[op++] = '\r';
712 output[op++] = '\n';
713 }
714 } else if (do_newline && op > 0 && count != LINE_GROUPS) {
Doug Zongkerd2affae2010-02-12 15:50:01 -0800715 if (do_cr) output[op++] = '\r';
716 output[op++] = '\n';
717 }
Doug Zongker9df2ffd2010-02-14 13:48:49 -0800718
719 assert tailLen == 0;
720 assert p == len;
721 } else {
722 // Save the leftovers in tail to be consumed on the next
723 // call to encodeInternal.
724
725 if (p == len-1) {
726 tail[tailLen++] = input[p];
727 } else if (p == len-2) {
728 tail[tailLen++] = input[p];
729 tail[tailLen++] = input[p+1];
Doug Zongkerd2affae2010-02-12 15:50:01 -0800730 }
Doug Zongkerd2affae2010-02-12 15:50:01 -0800731 }
732
Doug Zongker9df2ffd2010-02-14 13:48:49 -0800733 this.op = op;
734 this.count = count;
Doug Zongkerd2affae2010-02-12 15:50:01 -0800735
Doug Zongker9df2ffd2010-02-14 13:48:49 -0800736 return true;
Doug Zongkerd2affae2010-02-12 15:50:01 -0800737 }
Doug Zongkerd2affae2010-02-12 15:50:01 -0800738 }
739
740 private Base64() { } // don't instantiate
741}