blob: ecc0c9cc9de36e40c8d1d51cc030da26609eb406 [file] [log] [blame]
Doug Zongkerd2affae2010-02-12 15:50:01 -08001/*
2 * Copyright (C) 2010 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
Doug Zongkerab69e292010-03-29 13:23:15 -070017package android.util;
Doug Zongkerd2affae2010-02-12 15:50:01 -080018
Mathew Inwood4eb56ab2018-08-14 17:24:32 +010019import android.annotation.UnsupportedAppUsage;
Doug Zongker9df2ffd2010-02-14 13:48:49 -080020import java.io.UnsupportedEncodingException;
21
Doug Zongkerd2affae2010-02-12 15:50:01 -080022/**
Doug Zongker9df2ffd2010-02-14 13:48:49 -080023 * Utilities for encoding and decoding the Base64 representation of
24 * binary data. See RFCs <a
25 * href="http://www.ietf.org/rfc/rfc2045.txt">2045</a> and <a
26 * href="http://www.ietf.org/rfc/rfc3548.txt">3548</a>.
Doug Zongkerd2affae2010-02-12 15:50:01 -080027 */
28public class Base64 {
29 /**
30 * Default values for encoder/decoder flags.
31 */
32 public static final int DEFAULT = 0;
33
34 /**
Doug Zongker9df2ffd2010-02-14 13:48:49 -080035 * Encoder flag bit to omit the padding '=' characters at the end
36 * of the output (if any).
Doug Zongkerd2affae2010-02-12 15:50:01 -080037 */
38 public static final int NO_PADDING = 1;
39
40 /**
Doug Zongker9df2ffd2010-02-14 13:48:49 -080041 * Encoder flag bit to omit all line terminators (i.e., the output
42 * will be on one long line).
Doug Zongkerd2affae2010-02-12 15:50:01 -080043 */
44 public static final int NO_WRAP = 2;
45
46 /**
Doug Zongker9df2ffd2010-02-14 13:48:49 -080047 * Encoder flag bit to indicate lines should be terminated with a
48 * CRLF pair instead of just an LF. Has no effect if {@code
49 * NO_WRAP} is specified as well.
Doug Zongkerd2affae2010-02-12 15:50:01 -080050 */
51 public static final int CRLF = 4;
52
53 /**
Doug Zongker9df2ffd2010-02-14 13:48:49 -080054 * Encoder/decoder flag bit to indicate using the "URL and
55 * filename safe" variant of Base64 (see RFC 3548 section 4) where
56 * {@code -} and {@code _} are used in place of {@code +} and
57 * {@code /}.
Doug Zongkerd2affae2010-02-12 15:50:01 -080058 */
Doug Zongker9df2ffd2010-02-14 13:48:49 -080059 public static final int URL_SAFE = 8;
Doug Zongkerd2affae2010-02-12 15:50:01 -080060
61 /**
Doug Zongker9df2ffd2010-02-14 13:48:49 -080062 * Flag to pass to {@link Base64OutputStream} to indicate that it
63 * should not close the output stream it is wrapping when it
64 * itself is closed.
Doug Zongkerd2affae2010-02-12 15:50:01 -080065 */
66 public static final int NO_CLOSE = 16;
67
68 // --------------------------------------------------------
Doug Zongker9df2ffd2010-02-14 13:48:49 -080069 // shared code
70 // --------------------------------------------------------
71
72 /* package */ static abstract class Coder {
73 public byte[] output;
74 public int op;
75
76 /**
77 * Encode/decode another block of input data. this.output is
78 * provided by the caller, and must be big enough to hold all
79 * the coded data. On exit, this.opwill be set to the length
80 * of the coded data.
81 *
82 * @param finish true if this is the final call to process for
83 * this object. Will finalize the coder state and
84 * include any final bytes in the output.
85 *
86 * @return true if the input so far is good; false if some
87 * error has been detected in the input stream..
88 */
89 public abstract boolean process(byte[] input, int offset, int len, boolean finish);
90
91 /**
92 * @return the maximum number of bytes a call to process()
93 * could produce for the given number of input bytes. This may
94 * be an overestimate.
95 */
96 public abstract int maxOutputSize(int len);
97 }
98
99 // --------------------------------------------------------
Doug Zongkerd2affae2010-02-12 15:50:01 -0800100 // decoding
101 // --------------------------------------------------------
102
103 /**
Doug Zongkerd2affae2010-02-12 15:50:01 -0800104 * Decode the Base64-encoded data in input and return the data in
105 * a new byte array.
106 *
Doug Zongker9df2ffd2010-02-14 13:48:49 -0800107 * <p>The padding '=' characters at the end are considered optional, but
Doug Zongkerd2affae2010-02-12 15:50:01 -0800108 * if any are present, there must be the correct number of them.
109 *
Doug Zongker8fe55712010-02-12 18:00:30 -0800110 * @param str the input String to decode, which is converted to
Doug Zongkerd2affae2010-02-12 15:50:01 -0800111 * bytes using the default charset
112 * @param flags controls certain features of the decoded output.
113 * Pass {@code DEFAULT} to decode standard Base64.
114 *
115 * @throws IllegalArgumentException if the input contains
116 * incorrect padding
117 */
118 public static byte[] decode(String str, int flags) {
119 return decode(str.getBytes(), flags);
120 }
121
122 /**
123 * Decode the Base64-encoded data in input and return the data in
124 * a new byte array.
125 *
Doug Zongker9df2ffd2010-02-14 13:48:49 -0800126 * <p>The padding '=' characters at the end are considered optional, but
Doug Zongkerd2affae2010-02-12 15:50:01 -0800127 * if any are present, there must be the correct number of them.
128 *
129 * @param input the input array to decode
130 * @param flags controls certain features of the decoded output.
131 * Pass {@code DEFAULT} to decode standard Base64.
132 *
133 * @throws IllegalArgumentException if the input contains
134 * incorrect padding
135 */
136 public static byte[] decode(byte[] input, int flags) {
137 return decode(input, 0, input.length, flags);
138 }
139
140 /**
141 * Decode the Base64-encoded data in input and return the data in
142 * a new byte array.
143 *
Doug Zongker9df2ffd2010-02-14 13:48:49 -0800144 * <p>The padding '=' characters at the end are considered optional, but
Doug Zongkerd2affae2010-02-12 15:50:01 -0800145 * if any are present, there must be the correct number of them.
146 *
147 * @param input the data to decode
148 * @param offset the position within the input array at which to start
149 * @param len the number of bytes of input to decode
150 * @param flags controls certain features of the decoded output.
151 * Pass {@code DEFAULT} to decode standard Base64.
152 *
153 * @throws IllegalArgumentException if the input contains
154 * incorrect padding
155 */
156 public static byte[] decode(byte[] input, int offset, int len, int flags) {
157 // Allocate space for the most data the input could represent.
158 // (It could contain less if it contains whitespace, etc.)
Doug Zongker9df2ffd2010-02-14 13:48:49 -0800159 Decoder decoder = new Decoder(flags, new byte[len*3/4]);
Doug Zongkerd2affae2010-02-12 15:50:01 -0800160
Doug Zongker9df2ffd2010-02-14 13:48:49 -0800161 if (!decoder.process(input, offset, len, true)) {
Doug Zongkerd2affae2010-02-12 15:50:01 -0800162 throw new IllegalArgumentException("bad base-64");
163 }
164
165 // Maybe we got lucky and allocated exactly enough output space.
Doug Zongker9df2ffd2010-02-14 13:48:49 -0800166 if (decoder.op == decoder.output.length) {
167 return decoder.output;
Doug Zongkerd2affae2010-02-12 15:50:01 -0800168 }
169
170 // Need to shorten the array, so allocate a new one of the
171 // right size and copy.
Doug Zongker9df2ffd2010-02-14 13:48:49 -0800172 byte[] temp = new byte[decoder.op];
173 System.arraycopy(decoder.output, 0, temp, 0, decoder.op);
Doug Zongkerd2affae2010-02-12 15:50:01 -0800174 return temp;
175 }
176
Doug Zongker9df2ffd2010-02-14 13:48:49 -0800177 /* package */ static class Decoder extends Coder {
178 /**
179 * Lookup table for turning bytes into their position in the
180 * Base64 alphabet.
181 */
182 private static final int DECODE[] = {
183 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
184 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
185 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 62, -1, -1, -1, 63,
186 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, -1, -1, -1, -2, -1, -1,
187 -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
188 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1,
189 -1, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
190 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, -1, -1, -1, -1, -1,
191 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
192 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
193 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
194 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
195 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
196 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
197 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
198 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
199 };
Doug Zongkerd2affae2010-02-12 15:50:01 -0800200
Doug Zongker9df2ffd2010-02-14 13:48:49 -0800201 /**
202 * Decode lookup table for the "web safe" variant (RFC 3548
203 * sec. 4) where - and _ replace + and /.
204 */
205 private static final int DECODE_WEBSAFE[] = {
206 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
207 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
208 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 62, -1, -1,
209 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, -1, -1, -1, -2, -1, -1,
210 -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
211 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, 63,
212 -1, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
213 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, -1, -1, -1, -1, -1,
214 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
215 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
216 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
217 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
218 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
219 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
220 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
221 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
222 };
Doug Zongkerd2affae2010-02-12 15:50:01 -0800223
Doug Zongker9df2ffd2010-02-14 13:48:49 -0800224 /** Non-data values in the DECODE arrays. */
225 private static final int SKIP = -1;
226 private static final int EQUALS = -2;
Doug Zongkerd2affae2010-02-12 15:50:01 -0800227
Doug Zongker9df2ffd2010-02-14 13:48:49 -0800228 /**
229 * States 0-3 are reading through the next input tuple.
230 * State 4 is having read one '=' and expecting exactly
231 * one more.
232 * State 5 is expecting no more data or padding characters
233 * in the input.
234 * State 6 is the error state; an error has been detected
235 * in the input and no future input can "fix" it.
236 */
237 private int state; // state number (0 to 6)
238 private int value;
239
240 final private int[] alphabet;
241
242 public Decoder(int flags, byte[] output) {
Doug Zongkerd2affae2010-02-12 15:50:01 -0800243 this.output = output;
244
Doug Zongker9df2ffd2010-02-14 13:48:49 -0800245 alphabet = ((flags & URL_SAFE) == 0) ? DECODE : DECODE_WEBSAFE;
Doug Zongkerd2affae2010-02-12 15:50:01 -0800246 state = 0;
247 value = 0;
248 }
Doug Zongkerd2affae2010-02-12 15:50:01 -0800249
Doug Zongker9df2ffd2010-02-14 13:48:49 -0800250 /**
251 * @return an overestimate for the number of bytes {@code
252 * len} bytes could decode to.
253 */
254 public int maxOutputSize(int len) {
255 return len * 3/4 + 10;
256 }
Doug Zongkerd2affae2010-02-12 15:50:01 -0800257
Doug Zongker9df2ffd2010-02-14 13:48:49 -0800258 /**
259 * Decode another block of input data.
260 *
261 * @return true if the state machine is still healthy. false if
262 * bad base-64 data has been detected in the input stream.
263 */
264 public boolean process(byte[] input, int offset, int len, boolean finish) {
265 if (this.state == 6) return false;
Doug Zongkerd2affae2010-02-12 15:50:01 -0800266
Doug Zongker9df2ffd2010-02-14 13:48:49 -0800267 int p = offset;
268 len += offset;
Doug Zongkerd2affae2010-02-12 15:50:01 -0800269
Doug Zongker9df2ffd2010-02-14 13:48:49 -0800270 // Using local variables makes the decoder about 12%
271 // faster than if we manipulate the member variables in
272 // the loop. (Even alphabet makes a measurable
273 // difference, which is somewhat surprising to me since
274 // the member variable is final.)
275 int state = this.state;
276 int value = this.value;
277 int op = 0;
278 final byte[] output = this.output;
279 final int[] alphabet = this.alphabet;
Doug Zongkerd2affae2010-02-12 15:50:01 -0800280
Doug Zongker9df2ffd2010-02-14 13:48:49 -0800281 while (p < len) {
282 // Try the fast path: we're starting a new tuple and the
283 // next four bytes of the input stream are all data
284 // bytes. This corresponds to going through states
285 // 0-1-2-3-0. We expect to use this method for most of
286 // the data.
287 //
288 // If any of the next four bytes of input are non-data
289 // (whitespace, etc.), value will end up negative. (All
290 // the non-data values in decode are small negative
291 // numbers, so shifting any of them up and or'ing them
292 // together will result in a value with its top bit set.)
293 //
294 // You can remove this whole block and the output should
295 // be the same, just slower.
296 if (state == 0) {
297 while (p+4 <= len &&
298 (value = ((alphabet[input[p] & 0xff] << 18) |
299 (alphabet[input[p+1] & 0xff] << 12) |
300 (alphabet[input[p+2] & 0xff] << 6) |
301 (alphabet[input[p+3] & 0xff]))) >= 0) {
302 output[op+2] = (byte) value;
303 output[op+1] = (byte) (value >> 8);
304 output[op] = (byte) (value >> 16);
305 op += 3;
306 p += 4;
307 }
308 if (p >= len) break;
309 }
Doug Zongkerd2affae2010-02-12 15:50:01 -0800310
Doug Zongker9df2ffd2010-02-14 13:48:49 -0800311 // The fast path isn't available -- either we've read a
312 // partial tuple, or the next four input bytes aren't all
313 // data, or whatever. Fall back to the slower state
314 // machine implementation.
Doug Zongkerd2affae2010-02-12 15:50:01 -0800315
Doug Zongker9df2ffd2010-02-14 13:48:49 -0800316 int d = alphabet[input[p++] & 0xff];
Doug Zongkerd2affae2010-02-12 15:50:01 -0800317
Doug Zongker9df2ffd2010-02-14 13:48:49 -0800318 switch (state) {
Doug Zongkerd2affae2010-02-12 15:50:01 -0800319 case 0:
320 if (d >= 0) {
321 value = d;
322 ++state;
323 } else if (d != SKIP) {
Doug Zongker9df2ffd2010-02-14 13:48:49 -0800324 this.state = 6;
Doug Zongkerd2affae2010-02-12 15:50:01 -0800325 return false;
326 }
327 break;
328
329 case 1:
330 if (d >= 0) {
331 value = (value << 6) | d;
332 ++state;
333 } else if (d != SKIP) {
Doug Zongker9df2ffd2010-02-14 13:48:49 -0800334 this.state = 6;
Doug Zongkerd2affae2010-02-12 15:50:01 -0800335 return false;
336 }
337 break;
338
339 case 2:
340 if (d >= 0) {
341 value = (value << 6) | d;
342 ++state;
343 } else if (d == EQUALS) {
344 // Emit the last (partial) output tuple;
345 // expect exactly one more padding character.
346 output[op++] = (byte) (value >> 4);
347 state = 4;
348 } else if (d != SKIP) {
Doug Zongker9df2ffd2010-02-14 13:48:49 -0800349 this.state = 6;
Doug Zongkerd2affae2010-02-12 15:50:01 -0800350 return false;
351 }
352 break;
353
354 case 3:
355 if (d >= 0) {
356 // Emit the output triple and return to state 0.
357 value = (value << 6) | d;
358 output[op+2] = (byte) value;
359 output[op+1] = (byte) (value >> 8);
360 output[op] = (byte) (value >> 16);
361 op += 3;
362 state = 0;
363 } else if (d == EQUALS) {
364 // Emit the last (partial) output tuple;
365 // expect no further data or padding characters.
366 output[op+1] = (byte) (value >> 2);
367 output[op] = (byte) (value >> 10);
368 op += 2;
369 state = 5;
370 } else if (d != SKIP) {
Doug Zongker9df2ffd2010-02-14 13:48:49 -0800371 this.state = 6;
Doug Zongkerd2affae2010-02-12 15:50:01 -0800372 return false;
373 }
374 break;
375
376 case 4:
377 if (d == EQUALS) {
378 ++state;
379 } else if (d != SKIP) {
Doug Zongker9df2ffd2010-02-14 13:48:49 -0800380 this.state = 6;
Doug Zongkerd2affae2010-02-12 15:50:01 -0800381 return false;
382 }
383 break;
384
385 case 5:
386 if (d != SKIP) {
Doug Zongker9df2ffd2010-02-14 13:48:49 -0800387 this.state = 6;
Doug Zongkerd2affae2010-02-12 15:50:01 -0800388 return false;
389 }
390 break;
Doug Zongker9df2ffd2010-02-14 13:48:49 -0800391 }
Doug Zongkerd2affae2010-02-12 15:50:01 -0800392 }
Doug Zongkerd2affae2010-02-12 15:50:01 -0800393
Doug Zongker9df2ffd2010-02-14 13:48:49 -0800394 if (!finish) {
395 // We're out of input, but a future call could provide
396 // more.
397 this.state = state;
398 this.value = value;
399 this.op = op;
400 return true;
401 }
Doug Zongkerd2affae2010-02-12 15:50:01 -0800402
Doug Zongker9df2ffd2010-02-14 13:48:49 -0800403 // Done reading input. Now figure out where we are left in
404 // the state machine and finish up.
Doug Zongkerd2affae2010-02-12 15:50:01 -0800405
Doug Zongker9df2ffd2010-02-14 13:48:49 -0800406 switch (state) {
Doug Zongkerd2affae2010-02-12 15:50:01 -0800407 case 0:
408 // Output length is a multiple of three. Fine.
409 break;
410 case 1:
411 // Read one extra input byte, which isn't enough to
412 // make another output byte. Illegal.
Doug Zongker9df2ffd2010-02-14 13:48:49 -0800413 this.state = 6;
Doug Zongkerd2affae2010-02-12 15:50:01 -0800414 return false;
415 case 2:
416 // Read two extra input bytes, enough to emit 1 more
417 // output byte. Fine.
418 output[op++] = (byte) (value >> 4);
419 break;
420 case 3:
421 // Read three extra input bytes, enough to emit 2 more
422 // output bytes. Fine.
Doug Zongker9df2ffd2010-02-14 13:48:49 -0800423 output[op++] = (byte) (value >> 10);
424 output[op++] = (byte) (value >> 2);
Doug Zongkerd2affae2010-02-12 15:50:01 -0800425 break;
426 case 4:
427 // Read one padding '=' when we expected 2. Illegal.
Doug Zongker9df2ffd2010-02-14 13:48:49 -0800428 this.state = 6;
Doug Zongkerd2affae2010-02-12 15:50:01 -0800429 return false;
430 case 5:
431 // Read all the padding '='s we expected and no more.
432 // Fine.
433 break;
Doug Zongker9df2ffd2010-02-14 13:48:49 -0800434 }
Doug Zongkerd2affae2010-02-12 15:50:01 -0800435
Doug Zongker9df2ffd2010-02-14 13:48:49 -0800436 this.state = state;
437 this.op = op;
438 return true;
439 }
Doug Zongkerd2affae2010-02-12 15:50:01 -0800440 }
441
442 // --------------------------------------------------------
443 // encoding
444 // --------------------------------------------------------
445
446 /**
Doug Zongkerd2affae2010-02-12 15:50:01 -0800447 * Base64-encode the given data and return a newly allocated
448 * String with the result.
449 *
450 * @param input the data to encode
451 * @param flags controls certain features of the encoded output.
452 * Passing {@code DEFAULT} results in output that
453 * adheres to RFC 2045.
454 */
455 public static String encodeToString(byte[] input, int flags) {
Doug Zongker9df2ffd2010-02-14 13:48:49 -0800456 try {
457 return new String(encode(input, flags), "US-ASCII");
458 } catch (UnsupportedEncodingException e) {
459 // US-ASCII is guaranteed to be available.
460 throw new AssertionError(e);
461 }
Doug Zongkerd2affae2010-02-12 15:50:01 -0800462 }
463
464 /**
465 * Base64-encode the given data and return a newly allocated
466 * String with the result.
467 *
468 * @param input the data to encode
469 * @param offset the position within the input array at which to
470 * start
471 * @param len the number of bytes of input to encode
472 * @param flags controls certain features of the encoded output.
473 * Passing {@code DEFAULT} results in output that
474 * adheres to RFC 2045.
475 */
476 public static String encodeToString(byte[] input, int offset, int len, int flags) {
Doug Zongker9df2ffd2010-02-14 13:48:49 -0800477 try {
478 return new String(encode(input, offset, len, flags), "US-ASCII");
479 } catch (UnsupportedEncodingException e) {
480 // US-ASCII is guaranteed to be available.
481 throw new AssertionError(e);
482 }
Doug Zongkerd2affae2010-02-12 15:50:01 -0800483 }
484
485 /**
486 * Base64-encode the given data and return a newly allocated
487 * byte[] with the result.
488 *
489 * @param input the data to encode
490 * @param flags controls certain features of the encoded output.
491 * Passing {@code DEFAULT} results in output that
492 * adheres to RFC 2045.
493 */
494 public static byte[] encode(byte[] input, int flags) {
495 return encode(input, 0, input.length, flags);
496 }
497
498 /**
499 * Base64-encode the given data and return a newly allocated
500 * byte[] with the result.
501 *
502 * @param input the data to encode
503 * @param offset the position within the input array at which to
504 * start
505 * @param len the number of bytes of input to encode
506 * @param flags controls certain features of the encoded output.
507 * Passing {@code DEFAULT} results in output that
508 * adheres to RFC 2045.
509 */
510 public static byte[] encode(byte[] input, int offset, int len, int flags) {
Doug Zongker9df2ffd2010-02-14 13:48:49 -0800511 Encoder encoder = new Encoder(flags, null);
Doug Zongkerd2affae2010-02-12 15:50:01 -0800512
513 // Compute the exact length of the array we will produce.
514 int output_len = len / 3 * 4;
515
516 // Account for the tail of the data and the padding bytes, if any.
Doug Zongker9df2ffd2010-02-14 13:48:49 -0800517 if (encoder.do_padding) {
Doug Zongkerd2affae2010-02-12 15:50:01 -0800518 if (len % 3 > 0) {
519 output_len += 4;
520 }
521 } else {
522 switch (len % 3) {
523 case 0: break;
524 case 1: output_len += 2; break;
525 case 2: output_len += 3; break;
526 }
527 }
528
529 // Account for the newlines, if any.
Doug Zongker9df2ffd2010-02-14 13:48:49 -0800530 if (encoder.do_newline && len > 0) {
531 output_len += (((len-1) / (3 * Encoder.LINE_GROUPS)) + 1) *
532 (encoder.do_cr ? 2 : 1);
Doug Zongkerd2affae2010-02-12 15:50:01 -0800533 }
534
Doug Zongker9df2ffd2010-02-14 13:48:49 -0800535 encoder.output = new byte[output_len];
536 encoder.process(input, offset, len, true);
Doug Zongkerd2affae2010-02-12 15:50:01 -0800537
Doug Zongker9df2ffd2010-02-14 13:48:49 -0800538 assert encoder.op == output_len;
Doug Zongkerd2affae2010-02-12 15:50:01 -0800539
Doug Zongker9df2ffd2010-02-14 13:48:49 -0800540 return encoder.output;
Doug Zongkerd2affae2010-02-12 15:50:01 -0800541 }
542
Doug Zongker9df2ffd2010-02-14 13:48:49 -0800543 /* package */ static class Encoder extends Coder {
544 /**
545 * Emit a new line every this many output tuples. Corresponds to
546 * a 76-character line length (the maximum allowable according to
547 * <a href="http://www.ietf.org/rfc/rfc2045.txt">RFC 2045</a>).
548 */
549 public static final int LINE_GROUPS = 19;
Doug Zongkerd2affae2010-02-12 15:50:01 -0800550
Doug Zongker9df2ffd2010-02-14 13:48:49 -0800551 /**
552 * Lookup table for turning Base64 alphabet positions (6 bits)
553 * into output bytes.
554 */
555 private static final byte ENCODE[] = {
556 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P',
557 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f',
558 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v',
559 'w', 'x', 'y', 'z', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '+', '/',
560 };
561
562 /**
563 * Lookup table for turning Base64 alphabet positions (6 bits)
564 * into output bytes.
565 */
566 private static final byte ENCODE_WEBSAFE[] = {
567 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P',
568 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f',
569 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v',
570 'w', 'x', 'y', 'z', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '-', '_',
571 };
572
573 final private byte[] tail;
574 /* package */ int tailLen;
575 private int count;
Doug Zongkerd2affae2010-02-12 15:50:01 -0800576
577 final public boolean do_padding;
578 final public boolean do_newline;
579 final public boolean do_cr;
Doug Zongker9df2ffd2010-02-14 13:48:49 -0800580 final private byte[] alphabet;
Doug Zongkerd2affae2010-02-12 15:50:01 -0800581
Doug Zongker9df2ffd2010-02-14 13:48:49 -0800582 public Encoder(int flags, byte[] output) {
Doug Zongkerd2affae2010-02-12 15:50:01 -0800583 this.output = output;
584
585 do_padding = (flags & NO_PADDING) == 0;
586 do_newline = (flags & NO_WRAP) == 0;
587 do_cr = (flags & CRLF) != 0;
Doug Zongker9df2ffd2010-02-14 13:48:49 -0800588 alphabet = ((flags & URL_SAFE) == 0) ? ENCODE : ENCODE_WEBSAFE;
Doug Zongkerd2affae2010-02-12 15:50:01 -0800589
590 tail = new byte[2];
591 tailLen = 0;
592
593 count = do_newline ? LINE_GROUPS : -1;
594 }
Doug Zongkerd2affae2010-02-12 15:50:01 -0800595
Doug Zongker9df2ffd2010-02-14 13:48:49 -0800596 /**
597 * @return an overestimate for the number of bytes {@code
598 * len} bytes could encode to.
599 */
600 public int maxOutputSize(int len) {
601 return len * 8/5 + 10;
Doug Zongkerd2affae2010-02-12 15:50:01 -0800602 }
603
Doug Zongker9df2ffd2010-02-14 13:48:49 -0800604 public boolean process(byte[] input, int offset, int len, boolean finish) {
605 // Using local variables makes the encoder about 9% faster.
606 final byte[] alphabet = this.alphabet;
607 final byte[] output = this.output;
608 int op = 0;
609 int count = this.count;
610
611 int p = offset;
612 len += offset;
613 int v = -1;
614
615 // First we need to concatenate the tail of the previous call
616 // with any input bytes available now and see if we can empty
617 // the tail.
618
619 switch (tailLen) {
620 case 0:
621 // There was no tail.
622 break;
623
624 case 1:
625 if (p+2 <= len) {
626 // A 1-byte tail with at least 2 bytes of
627 // input available now.
628 v = ((tail[0] & 0xff) << 16) |
629 ((input[p++] & 0xff) << 8) |
630 (input[p++] & 0xff);
631 tailLen = 0;
632 };
633 break;
634
635 case 2:
636 if (p+1 <= len) {
637 // A 2-byte tail with at least 1 byte of input.
638 v = ((tail[0] & 0xff) << 16) |
639 ((tail[1] & 0xff) << 8) |
640 (input[p++] & 0xff);
641 tailLen = 0;
642 }
643 break;
Doug Zongkerd2affae2010-02-12 15:50:01 -0800644 }
Doug Zongkerd2affae2010-02-12 15:50:01 -0800645
Doug Zongker9df2ffd2010-02-14 13:48:49 -0800646 if (v != -1) {
647 output[op++] = alphabet[(v >> 18) & 0x3f];
648 output[op++] = alphabet[(v >> 12) & 0x3f];
649 output[op++] = alphabet[(v >> 6) & 0x3f];
650 output[op++] = alphabet[v & 0x3f];
651 if (--count == 0) {
652 if (do_cr) output[op++] = '\r';
653 output[op++] = '\n';
654 count = LINE_GROUPS;
Doug Zongkerd2affae2010-02-12 15:50:01 -0800655 }
Doug Zongker9df2ffd2010-02-14 13:48:49 -0800656 }
657
658 // At this point either there is no tail, or there are fewer
659 // than 3 bytes of input available.
660
661 // The main loop, turning 3 input bytes into 4 output bytes on
662 // each iteration.
663 while (p+3 <= len) {
664 v = ((input[p] & 0xff) << 16) |
665 ((input[p+1] & 0xff) << 8) |
666 (input[p+2] & 0xff);
667 output[op] = alphabet[(v >> 18) & 0x3f];
668 output[op+1] = alphabet[(v >> 12) & 0x3f];
669 output[op+2] = alphabet[(v >> 6) & 0x3f];
670 output[op+3] = alphabet[v & 0x3f];
671 p += 3;
672 op += 4;
673 if (--count == 0) {
674 if (do_cr) output[op++] = '\r';
675 output[op++] = '\n';
676 count = LINE_GROUPS;
677 }
678 }
679
680 if (finish) {
681 // Finish up the tail of the input. Note that we need to
682 // consume any bytes in tail before any bytes
683 // remaining in input; there should be at most two bytes
684 // total.
685
686 if (p-tailLen == len-1) {
687 int t = 0;
688 v = ((tailLen > 0 ? tail[t++] : input[p++]) & 0xff) << 4;
689 tailLen -= t;
690 output[op++] = alphabet[(v >> 6) & 0x3f];
691 output[op++] = alphabet[v & 0x3f];
692 if (do_padding) {
693 output[op++] = '=';
694 output[op++] = '=';
695 }
696 if (do_newline) {
697 if (do_cr) output[op++] = '\r';
698 output[op++] = '\n';
699 }
700 } else if (p-tailLen == len-2) {
701 int t = 0;
702 v = (((tailLen > 1 ? tail[t++] : input[p++]) & 0xff) << 10) |
703 (((tailLen > 0 ? tail[t++] : input[p++]) & 0xff) << 2);
704 tailLen -= t;
705 output[op++] = alphabet[(v >> 12) & 0x3f];
706 output[op++] = alphabet[(v >> 6) & 0x3f];
707 output[op++] = alphabet[v & 0x3f];
708 if (do_padding) {
709 output[op++] = '=';
710 }
711 if (do_newline) {
712 if (do_cr) output[op++] = '\r';
713 output[op++] = '\n';
714 }
715 } else if (do_newline && op > 0 && count != LINE_GROUPS) {
Doug Zongkerd2affae2010-02-12 15:50:01 -0800716 if (do_cr) output[op++] = '\r';
717 output[op++] = '\n';
718 }
Doug Zongker9df2ffd2010-02-14 13:48:49 -0800719
720 assert tailLen == 0;
721 assert p == len;
722 } else {
723 // Save the leftovers in tail to be consumed on the next
724 // call to encodeInternal.
725
726 if (p == len-1) {
727 tail[tailLen++] = input[p];
728 } else if (p == len-2) {
729 tail[tailLen++] = input[p];
730 tail[tailLen++] = input[p+1];
Doug Zongkerd2affae2010-02-12 15:50:01 -0800731 }
Doug Zongkerd2affae2010-02-12 15:50:01 -0800732 }
733
Doug Zongker9df2ffd2010-02-14 13:48:49 -0800734 this.op = op;
735 this.count = count;
Doug Zongkerd2affae2010-02-12 15:50:01 -0800736
Doug Zongker9df2ffd2010-02-14 13:48:49 -0800737 return true;
Doug Zongkerd2affae2010-02-12 15:50:01 -0800738 }
Doug Zongkerd2affae2010-02-12 15:50:01 -0800739 }
740
Mathew Inwood4eb56ab2018-08-14 17:24:32 +0100741 @UnsupportedAppUsage
Doug Zongkerd2affae2010-02-12 15:50:01 -0800742 private Base64() { } // don't instantiate
743}