blob: 5cddea5108b36d2c946a550742702c1eed0239ce [file] [log] [blame]
Tatu Salorantaf15531c2011-12-22 23:00:40 -08001/* Jackson JSON-processor.
2 *
3 * Copyright (c) 2007- Tatu Saloranta, tatu.saloranta@iki.fi
Tatu Salorantaf15531c2011-12-22 23:00:40 -08004 */
5package com.fasterxml.jackson.core;
6
7import java.util.Arrays;
8
Tatu Saloranta26894d02013-07-02 17:03:25 -07009import com.fasterxml.jackson.core.util.ByteArrayBuilder;
10
Tatu Salorantaf15531c2011-12-22 23:00:40 -080011/**
12 * Abstract base class used to define specific details of which
13 * variant of Base64 encoding/decoding is to be used. Although there is
14 * somewhat standard basic version (so-called "MIME Base64"), other variants
15 * exists, see <a href="http://en.wikipedia.org/wiki/Base64">Base64 Wikipedia entry</a> for details.
16 *
17 * @author Tatu Saloranta
18 */
19public final class Base64Variant
Tatu Saloranta41bd0eb2012-10-05 15:01:05 -070020 implements java.io.Serializable
Tatu Salorantaf15531c2011-12-22 23:00:40 -080021{
Tatu Saloranta26894d02013-07-02 17:03:25 -070022 private final static int INT_SPACE = 0x20;
23
Tatu Saloranta41bd0eb2012-10-05 15:01:05 -070024 // We'll only serialize name
25 private static final long serialVersionUID = 1L;
26
Tatu Salorantaf15531c2011-12-22 23:00:40 -080027 /**
28 * Placeholder used by "no padding" variant, to be used when a character
29 * value is needed.
30 */
31 final static char PADDING_CHAR_NONE = '\0';
32
33 /**
34 * Marker used to denote ascii characters that do not correspond
35 * to a 6-bit value (in this variant), and is not used as a padding
36 * character.
37 */
38 public final static int BASE64_VALUE_INVALID = -1;
39
40 /**
41 * Marker used to denote ascii character (in decoding table) that
42 * is the padding character using this variant (if any).
43 */
44 public final static int BASE64_VALUE_PADDING = -2;
45
46 /*
47 /**********************************************************
48 /* Encoding/decoding tables
49 /**********************************************************
50 */
51
52 /**
53 * Decoding table used for base 64 decoding.
54 */
Tatu Saloranta41bd0eb2012-10-05 15:01:05 -070055 private final transient int[] _asciiToBase64 = new int[128];
Tatu Salorantaf15531c2011-12-22 23:00:40 -080056
57 /**
58 * Encoding table used for base 64 decoding when output is done
59 * as characters.
60 */
Tatu Saloranta41bd0eb2012-10-05 15:01:05 -070061 private final transient char[] _base64ToAsciiC = new char[64];
Tatu Salorantaf15531c2011-12-22 23:00:40 -080062
63 /**
64 * Alternative encoding table used for base 64 decoding when output is done
65 * as ascii bytes.
66 */
Tatu Saloranta41bd0eb2012-10-05 15:01:05 -070067 private final transient byte[] _base64ToAsciiB = new byte[64];
Tatu Salorantaf15531c2011-12-22 23:00:40 -080068
69 /*
70 /**********************************************************
71 /* Other configuration
72 /**********************************************************
73 */
74
75 /**
76 * Symbolic name of variant; used for diagnostics/debugging.
Tatu Saloranta41bd0eb2012-10-05 15:01:05 -070077 *<p>
78 * Note that this is the only non-transient field; used when reading
Tatu Salorantae962fb92016-07-21 14:14:26 -070079 * back from serialized state.
80 *<p>
81 * Also: must not be private, accessed from `BaseVariants`
Tatu Salorantaf15531c2011-12-22 23:00:40 -080082 */
Tatu Salorantae962fb92016-07-21 14:14:26 -070083 final String _name;
Tatu Salorantaf15531c2011-12-22 23:00:40 -080084
85 /**
86 * Whether this variant uses padding or not.
87 */
Tatu Salorantae962fb92016-07-21 14:14:26 -070088 private final transient boolean _usesPadding;
Tatu Salorantaf15531c2011-12-22 23:00:40 -080089
90 /**
91 * Characted used for padding, if any ({@link #PADDING_CHAR_NONE} if not).
92 */
Tatu Salorantae962fb92016-07-21 14:14:26 -070093 private final transient char _paddingChar;
Tatu Salorantaf15531c2011-12-22 23:00:40 -080094
95 /**
96 * Maximum number of encoded base64 characters to output during encoding
97 * before adding a linefeed, if line length is to be limited
98 * ({@link java.lang.Integer#MAX_VALUE} if not limited).
99 *<p>
100 * Note: for some output modes (when writing attributes) linefeeds may
101 * need to be avoided, and this value ignored.
102 */
Tatu Salorantae962fb92016-07-21 14:14:26 -0700103 private final transient int _maxLineLength;
Tatu Salorantaf15531c2011-12-22 23:00:40 -0800104
105 /*
106 /**********************************************************
107 /* Life-cycle
108 /**********************************************************
109 */
110
111 public Base64Variant(String name, String base64Alphabet, boolean usesPadding, char paddingChar, int maxLineLength)
112 {
113 _name = name;
114 _usesPadding = usesPadding;
115 _paddingChar = paddingChar;
116 _maxLineLength = maxLineLength;
117
118 // Ok and then we need to create codec tables.
119
120 // First the main encoding table:
121 int alphaLen = base64Alphabet.length();
122 if (alphaLen != 64) {
123 throw new IllegalArgumentException("Base64Alphabet length must be exactly 64 (was "+alphaLen+")");
124 }
125
126 // And then secondary encoding table and decoding table:
127 base64Alphabet.getChars(0, alphaLen, _base64ToAsciiC, 0);
128 Arrays.fill(_asciiToBase64, BASE64_VALUE_INVALID);
129 for (int i = 0; i < alphaLen; ++i) {
130 char alpha = _base64ToAsciiC[i];
131 _base64ToAsciiB[i] = (byte) alpha;
132 _asciiToBase64[alpha] = i;
133 }
134
135 // Plus if we use padding, add that in too
136 if (usesPadding) {
137 _asciiToBase64[(int) paddingChar] = BASE64_VALUE_PADDING;
138 }
139 }
140
141 /**
142 * "Copy constructor" that can be used when the base alphabet is identical
143 * to one used by another variant except for the maximum line length
144 * (and obviously, name).
145 */
146 public Base64Variant(Base64Variant base, String name, int maxLineLength)
147 {
148 this(base, name, base._usesPadding, base._paddingChar, maxLineLength);
149 }
150
151 /**
152 * "Copy constructor" that can be used when the base alphabet is identical
153 * to one used by another variant, but other details (padding, maximum
154 * line length) differ
155 */
156 public Base64Variant(Base64Variant base, String name, boolean usesPadding, char paddingChar, int maxLineLength)
157 {
158 _name = name;
159 byte[] srcB = base._base64ToAsciiB;
160 System.arraycopy(srcB, 0, this._base64ToAsciiB, 0, srcB.length);
161 char[] srcC = base._base64ToAsciiC;
162 System.arraycopy(srcC, 0, this._base64ToAsciiC, 0, srcC.length);
163 int[] srcV = base._asciiToBase64;
164 System.arraycopy(srcV, 0, this._asciiToBase64, 0, srcV.length);
165
166 _usesPadding = usesPadding;
167 _paddingChar = paddingChar;
168 _maxLineLength = maxLineLength;
169 }
170
171 /*
172 /**********************************************************
Tatu Saloranta41bd0eb2012-10-05 15:01:05 -0700173 /* Serializable overrides
174 /**********************************************************
175 */
176
177 /**
178 * Method used to "demote" deserialized instances back to
179 * canonical ones
180 */
181 protected Object readResolve() {
182 return Base64Variants.valueOf(_name);
183 }
184
185 /*
186 /**********************************************************
Tatu Salorantaf15531c2011-12-22 23:00:40 -0800187 /* Public accessors
188 /**********************************************************
189 */
190
191 public String getName() { return _name; }
192
193 public boolean usesPadding() { return _usesPadding; }
194 public boolean usesPaddingChar(char c) { return c == _paddingChar; }
195 public boolean usesPaddingChar(int ch) { return ch == (int) _paddingChar; }
196 public char getPaddingChar() { return _paddingChar; }
197 public byte getPaddingByte() { return (byte)_paddingChar; }
198
199 public int getMaxLineLength() { return _maxLineLength; }
200
201 /*
202 /**********************************************************
203 /* Decoding support
204 /**********************************************************
205 */
206
207 /**
208 * @return 6-bit decoded value, if valid character;
209 */
210 public int decodeBase64Char(char c)
211 {
212 int ch = (int) c;
213 return (ch <= 127) ? _asciiToBase64[ch] : BASE64_VALUE_INVALID;
214 }
215
216 public int decodeBase64Char(int ch)
217 {
218 return (ch <= 127) ? _asciiToBase64[ch] : BASE64_VALUE_INVALID;
219 }
220
221 public int decodeBase64Byte(byte b)
222 {
223 int ch = (int) b;
Tatu Salorantae962fb92016-07-21 14:14:26 -0700224 // note: cast retains sign, so it's from -128 to +127
225 if (ch < 0) {
226 return BASE64_VALUE_INVALID;
227 }
228 return _asciiToBase64[ch];
Tatu Salorantaf15531c2011-12-22 23:00:40 -0800229 }
230
231 /*
232 /**********************************************************
233 /* Encoding support
234 /**********************************************************
235 */
236
237 public char encodeBase64BitsAsChar(int value)
238 {
239 /* Let's assume caller has done necessary checks; this
240 * method must be fast and inlinable
241 */
242 return _base64ToAsciiC[value];
243 }
244
245 /**
246 * Method that encodes given right-aligned (LSB) 24-bit value
247 * into 4 base64 characters, stored in given result buffer.
248 */
249 public int encodeBase64Chunk(int b24, char[] buffer, int ptr)
250 {
251 buffer[ptr++] = _base64ToAsciiC[(b24 >> 18) & 0x3F];
252 buffer[ptr++] = _base64ToAsciiC[(b24 >> 12) & 0x3F];
253 buffer[ptr++] = _base64ToAsciiC[(b24 >> 6) & 0x3F];
254 buffer[ptr++] = _base64ToAsciiC[b24 & 0x3F];
255 return ptr;
256 }
257
258 public void encodeBase64Chunk(StringBuilder sb, int b24)
259 {
260 sb.append(_base64ToAsciiC[(b24 >> 18) & 0x3F]);
261 sb.append(_base64ToAsciiC[(b24 >> 12) & 0x3F]);
262 sb.append(_base64ToAsciiC[(b24 >> 6) & 0x3F]);
263 sb.append(_base64ToAsciiC[b24 & 0x3F]);
264 }
265
266 /**
267 * Method that outputs partial chunk (which only encodes one
268 * or two bytes of data). Data given is still aligned same as if
269 * it as full data; that is, missing data is at the "right end"
270 * (LSB) of int.
271 *
272 * @param outputBytes Number of encoded bytes included (either 1 or 2)
273 */
274 public int encodeBase64Partial(int bits, int outputBytes, char[] buffer, int outPtr)
275 {
276 buffer[outPtr++] = _base64ToAsciiC[(bits >> 18) & 0x3F];
277 buffer[outPtr++] = _base64ToAsciiC[(bits >> 12) & 0x3F];
278 if (_usesPadding) {
279 buffer[outPtr++] = (outputBytes == 2) ?
280 _base64ToAsciiC[(bits >> 6) & 0x3F] : _paddingChar;
281 buffer[outPtr++] = _paddingChar;
282 } else {
283 if (outputBytes == 2) {
284 buffer[outPtr++] = _base64ToAsciiC[(bits >> 6) & 0x3F];
285 }
286 }
287 return outPtr;
288 }
289
290 public void encodeBase64Partial(StringBuilder sb, int bits, int outputBytes)
291 {
292 sb.append(_base64ToAsciiC[(bits >> 18) & 0x3F]);
293 sb.append(_base64ToAsciiC[(bits >> 12) & 0x3F]);
294 if (_usesPadding) {
295 sb.append((outputBytes == 2) ?
296 _base64ToAsciiC[(bits >> 6) & 0x3F] : _paddingChar);
297 sb.append(_paddingChar);
298 } else {
299 if (outputBytes == 2) {
300 sb.append(_base64ToAsciiC[(bits >> 6) & 0x3F]);
301 }
302 }
303 }
304
305 public byte encodeBase64BitsAsByte(int value)
306 {
307 // As with above, assuming it is 6-bit value
308 return _base64ToAsciiB[value];
309 }
310
311 /**
312 * Method that encodes given right-aligned (LSB) 24-bit value
313 * into 4 base64 bytes (ascii), stored in given result buffer.
314 */
315 public int encodeBase64Chunk(int b24, byte[] buffer, int ptr)
316 {
317 buffer[ptr++] = _base64ToAsciiB[(b24 >> 18) & 0x3F];
318 buffer[ptr++] = _base64ToAsciiB[(b24 >> 12) & 0x3F];
319 buffer[ptr++] = _base64ToAsciiB[(b24 >> 6) & 0x3F];
320 buffer[ptr++] = _base64ToAsciiB[b24 & 0x3F];
321 return ptr;
322 }
323
324 /**
325 * Method that outputs partial chunk (which only encodes one
326 * or two bytes of data). Data given is still aligned same as if
327 * it as full data; that is, missing data is at the "right end"
328 * (LSB) of int.
329 *
330 * @param outputBytes Number of encoded bytes included (either 1 or 2)
331 */
332 public int encodeBase64Partial(int bits, int outputBytes, byte[] buffer, int outPtr)
333 {
334 buffer[outPtr++] = _base64ToAsciiB[(bits >> 18) & 0x3F];
335 buffer[outPtr++] = _base64ToAsciiB[(bits >> 12) & 0x3F];
336 if (_usesPadding) {
337 byte pb = (byte) _paddingChar;
338 buffer[outPtr++] = (outputBytes == 2) ?
339 _base64ToAsciiB[(bits >> 6) & 0x3F] : pb;
340 buffer[outPtr++] = pb;
341 } else {
342 if (outputBytes == 2) {
343 buffer[outPtr++] = _base64ToAsciiB[(bits >> 6) & 0x3F];
344 }
345 }
346 return outPtr;
347 }
348
Tatu Saloranta26894d02013-07-02 17:03:25 -0700349 /*
350 /**********************************************************
351 /* Convenience conversion methods for String to/from bytes
352 /* use case.
353 /**********************************************************
354 */
355
Tatu Salorantaf15531c2011-12-22 23:00:40 -0800356 /**
357 * Convenience method for converting given byte array as base64 encoded
358 * String using this variant's settings.
359 * Resulting value is "raw", that is, not enclosed in double-quotes.
360 *
361 * @param input Byte array to encode
362 */
363 public String encode(byte[] input)
364 {
365 return encode(input, false);
366 }
367
368 /**
Tatu Saloranta26894d02013-07-02 17:03:25 -0700369 * Convenience method for converting given byte array as base64 encoded String
370 * using this variant's settings,
371 * optionally enclosed in double-quotes.
Tatu Salorantaf15531c2011-12-22 23:00:40 -0800372 *
373 * @param input Byte array to encode
374 * @param addQuotes Whether to surround resulting value in double quotes or not
375 */
376 public String encode(byte[] input, boolean addQuotes)
377 {
378 int inputEnd = input.length;
379 StringBuilder sb;
380 {
381 // let's approximate... 33% overhead, ~= 3/8 (0.375)
382 int outputLen = inputEnd + (inputEnd >> 2) + (inputEnd >> 3);
383 sb = new StringBuilder(outputLen);
384 }
385 if (addQuotes) {
386 sb.append('"');
387 }
388
389 int chunksBeforeLF = getMaxLineLength() >> 2;
390
391 // Ok, first we loop through all full triplets of data:
392 int inputPtr = 0;
393 int safeInputEnd = inputEnd-3; // to get only full triplets
394
395 while (inputPtr <= safeInputEnd) {
396 // First, mash 3 bytes into lsb of 32-bit int
397 int b24 = ((int) input[inputPtr++]) << 8;
398 b24 |= ((int) input[inputPtr++]) & 0xFF;
399 b24 = (b24 << 8) | (((int) input[inputPtr++]) & 0xFF);
400 encodeBase64Chunk(sb, b24);
401 if (--chunksBeforeLF <= 0) {
402 // note: must quote in JSON value, so not really useful...
403 sb.append('\\');
404 sb.append('n');
405 chunksBeforeLF = getMaxLineLength() >> 2;
406 }
407 }
408
409 // And then we may have 1 or 2 leftover bytes to encode
410 int inputLeft = inputEnd - inputPtr; // 0, 1 or 2
411 if (inputLeft > 0) { // yes, but do we have room for output?
412 int b24 = ((int) input[inputPtr++]) << 16;
413 if (inputLeft == 2) {
414 b24 |= (((int) input[inputPtr++]) & 0xFF) << 8;
415 }
416 encodeBase64Partial(sb, b24, inputLeft);
417 }
418
419 if (addQuotes) {
420 sb.append('"');
421 }
422 return sb.toString();
423 }
Tatu Saloranta26894d02013-07-02 17:03:25 -0700424
425 /**
426 * Convenience method for decoding contents of a Base64-encoded String,
427 * using this variant's settings.
428 *
429 * @param input
430 *
431 * @since 2.2.3
432 *
433 * @throws IllegalArgumentException if input is not valid base64 encoded data
434 */
Tatu Saloranta5963bc42013-08-03 20:07:13 -0700435 @SuppressWarnings("resource")
Tatu Saloranta26894d02013-07-02 17:03:25 -0700436 public byte[] decode(String input) throws IllegalArgumentException
437 {
438 ByteArrayBuilder b = new ByteArrayBuilder();
439 decode(input, b);
440 return b.toByteArray();
441 }
442
443 /**
444 * Convenience method for decoding contents of a Base64-encoded String,
445 * using this variant's settings
446 * and appending decoded binary data using provided {@link ByteArrayBuilder}.
447 *<p>
448 * NOTE: builder will NOT be reset before decoding (nor cleared afterwards);
449 * assumption is that caller will ensure it is given in proper state, and
450 * used as appropriate afterwards.
451 *
Tatu Saloranta7c3e56b2016-11-25 18:35:27 -0800452 * @since 2.3
Tatu Saloranta26894d02013-07-02 17:03:25 -0700453 *
454 * @throws IllegalArgumentException if input is not valid base64 encoded data
455 */
456 public void decode(String str, ByteArrayBuilder builder) throws IllegalArgumentException
457 {
458 int ptr = 0;
459 int len = str.length();
460
Tatu Saloranta26894d02013-07-02 17:03:25 -0700461 while (ptr < len) {
462 // first, we'll skip preceding white space, if any
463 char ch;
464 do {
465 ch = str.charAt(ptr++);
Tatu Saloranta7c3e56b2016-11-25 18:35:27 -0800466 } while ((ptr < len) && (ch <= INT_SPACE));
Tatu Saloranta26894d02013-07-02 17:03:25 -0700467 int bits = decodeBase64Char(ch);
468 if (bits < 0) {
469 _reportInvalidBase64(ch, 0, null);
470 }
471 int decodedData = bits;
472 // then second base64 char; can't get padding yet, nor ws
473 if (ptr >= len) {
474 _reportBase64EOF();
475 }
476 ch = str.charAt(ptr++);
477 bits = decodeBase64Char(ch);
478 if (bits < 0) {
479 _reportInvalidBase64(ch, 1, null);
480 }
481 decodedData = (decodedData << 6) | bits;
482 // third base64 char; can be padding, but not ws
483 if (ptr >= len) {
484 // but as per [JACKSON-631] can be end-of-input, iff not using padding
485 if (!usesPadding()) {
486 decodedData >>= 4;
487 builder.append(decodedData);
488 break;
489 }
490 _reportBase64EOF();
491 }
492 ch = str.charAt(ptr++);
493 bits = decodeBase64Char(ch);
494
495 // First branch: can get padding (-> 1 byte)
496 if (bits < 0) {
497 if (bits != Base64Variant.BASE64_VALUE_PADDING) {
498 _reportInvalidBase64(ch, 2, null);
499 }
500 // Ok, must get padding
501 if (ptr >= len) {
502 _reportBase64EOF();
503 }
504 ch = str.charAt(ptr++);
505 if (!usesPaddingChar(ch)) {
506 _reportInvalidBase64(ch, 3, "expected padding character '"+getPaddingChar()+"'");
507 }
508 // Got 12 bits, only need 8, need to shift
509 decodedData >>= 4;
510 builder.append(decodedData);
511 continue;
512 }
513 // Nope, 2 or 3 bytes
514 decodedData = (decodedData << 6) | bits;
515 // fourth and last base64 char; can be padding, but not ws
516 if (ptr >= len) {
517 // but as per [JACKSON-631] can be end-of-input, iff not using padding
518 if (!usesPadding()) {
519 decodedData >>= 2;
520 builder.appendTwoBytes(decodedData);
521 break;
522 }
523 _reportBase64EOF();
524 }
525 ch = str.charAt(ptr++);
526 bits = decodeBase64Char(ch);
527 if (bits < 0) {
528 if (bits != Base64Variant.BASE64_VALUE_PADDING) {
529 _reportInvalidBase64(ch, 3, null);
530 }
531 decodedData >>= 2;
532 builder.appendTwoBytes(decodedData);
533 } else {
534 // otherwise, our triple is now complete
535 decodedData = (decodedData << 6) | bits;
536 builder.appendThreeBytes(decodedData);
537 }
538 }
539 }
540
Tatu Salorantaf15531c2011-12-22 23:00:40 -0800541 /*
542 /**********************************************************
Tatu Saloranta26894d02013-07-02 17:03:25 -0700543 /* Overridden standard methods
Tatu Salorantaf15531c2011-12-22 23:00:40 -0800544 /**********************************************************
545 */
546
547 @Override
548 public String toString() { return _name; }
Tatu Saloranta26894d02013-07-02 17:03:25 -0700549
550 @Override
551 public boolean equals(Object o) {
552 // identity comparison should be dine
553 return (o == this);
554 }
555
556 @Override
557 public int hashCode() {
558 return _name.hashCode();
559 }
560
561 /*
562 /**********************************************************
563 /* Internal helper methods
564 /**********************************************************
565 */
566
567 /**
568 * @param bindex Relative index within base64 character unit; between 0
569 * and 3 (as unit has exactly 4 characters)
570 */
571 protected void _reportInvalidBase64(char ch, int bindex, String msg)
572 throws IllegalArgumentException
573 {
574 String base;
575 if (ch <= INT_SPACE) {
576 base = "Illegal white space character (code 0x"+Integer.toHexString(ch)+") as character #"+(bindex+1)+" of 4-char base64 unit: can only used between units";
577 } else if (usesPaddingChar(ch)) {
578 base = "Unexpected padding character ('"+getPaddingChar()+"') as character #"+(bindex+1)+" of 4-char base64 unit: padding only legal as 3rd or 4th character";
579 } else if (!Character.isDefined(ch) || Character.isISOControl(ch)) {
580 // Not sure if we can really get here... ? (most illegal xml chars are caught at lower level)
581 base = "Illegal character (code 0x"+Integer.toHexString(ch)+") in base64 content";
582 } else {
583 base = "Illegal character '"+ch+"' (code 0x"+Integer.toHexString(ch)+") in base64 content";
584 }
585 if (msg != null) {
586 base = base + ": " + msg;
587 }
588 throw new IllegalArgumentException(base);
589 }
590
591 protected void _reportBase64EOF() throws IllegalArgumentException {
592 throw new IllegalArgumentException("Unexpected end-of-String in base64 content");
593 }
Tatu Salorantaf15531c2011-12-22 23:00:40 -0800594}
595