blob: 84818cf078bcc8299863c78a5b6dee899d431e3a [file] [log] [blame]
Benjamin Peterson90f5ba52010-03-11 22:53:45 +00001#! /usr/bin/env python3
Guido van Rossumaa925a51997-04-02 05:47:39 +00002
Antoine Pitrou6dd0d462013-11-17 23:52:25 +01003"""Base16, Base32, Base64 (RFC 3548), Base85 and Ascii85 data encodings"""
Guido van Rossum4acc25b2000-02-02 15:10:15 +00004
Barry Warsaw4c904d12004-01-04 01:12:26 +00005# Modified 04-Oct-1995 by Jack Jansen to use binascii module
6# Modified 30-Dec-2003 by Barry Warsaw to add full RFC 3548 support
Guido van Rossum4581ae52007-05-22 21:56:47 +00007# Modified 22-May-2007 by Guido van Rossum to use bytes everywhere
Jack Jansen951213e1995-10-04 16:39:20 +00008
Barry Warsaw4c904d12004-01-04 01:12:26 +00009import re
10import struct
Jack Jansen951213e1995-10-04 16:39:20 +000011import binascii
Antoine Pitrou6dd0d462013-11-17 23:52:25 +010012import itertools
Jack Jansen951213e1995-10-04 16:39:20 +000013
Barry Warsaw4c904d12004-01-04 01:12:26 +000014
15__all__ = [
16 # Legacy interface exports traditional RFC 1521 Base64 encodings
Georg Brandlb54d8012009-06-04 09:11:51 +000017 'encode', 'decode', 'encodebytes', 'decodebytes',
Barry Warsaw4c904d12004-01-04 01:12:26 +000018 # Generalized interface for other encodings
19 'b64encode', 'b64decode', 'b32encode', 'b32decode',
20 'b16encode', 'b16decode',
Antoine Pitrou6dd0d462013-11-17 23:52:25 +010021 # Base85 and Ascii85 encodings
22 'b85encode', 'b85decode', 'a85encode', 'a85decode',
Barry Warsaw4c904d12004-01-04 01:12:26 +000023 # Standard Base64 encoding
24 'standard_b64encode', 'standard_b64decode',
25 # Some common Base64 alternatives. As referenced by RFC 3458, see thread
26 # starting at:
27 #
28 # http://zgp.org/pipermail/p2p-hackers/2001-September/000316.html
Barry Warsaw4c904d12004-01-04 01:12:26 +000029 'urlsafe_b64encode', 'urlsafe_b64decode',
30 ]
31
Barry Warsaw4c904d12004-01-04 01:12:26 +000032
Guido van Rossum254348e2007-11-21 19:29:53 +000033bytes_types = (bytes, bytearray) # Types acceptable as binary data
Guido van Rossum98297ee2007-11-06 21:34:58 +000034
Antoine Pitrouea6b4d52012-02-20 19:30:23 +010035def _bytes_from_decode_data(s):
36 if isinstance(s, str):
37 try:
38 return s.encode('ascii')
39 except UnicodeEncodeError:
40 raise ValueError('string argument should contain only ASCII characters')
Nick Coghlanfdf239a2013-10-03 00:43:22 +100041 if isinstance(s, bytes_types):
Antoine Pitrouea6b4d52012-02-20 19:30:23 +010042 return s
Nick Coghlanfdf239a2013-10-03 00:43:22 +100043 try:
44 return memoryview(s).tobytes()
45 except TypeError:
46 raise TypeError("argument should be a bytes-like object or ASCII "
47 "string, not %r" % s.__class__.__name__) from None
Barry Warsaw4c904d12004-01-04 01:12:26 +000048
Antoine Pitroufd036452008-08-19 17:56:33 +000049
Barry Warsaw4c904d12004-01-04 01:12:26 +000050# Base64 encoding/decoding uses binascii
51
52def b64encode(s, altchars=None):
Guido van Rossum4581ae52007-05-22 21:56:47 +000053 """Encode a byte string using Base64.
Barry Warsaw4c904d12004-01-04 01:12:26 +000054
Guido van Rossum4581ae52007-05-22 21:56:47 +000055 s is the byte string to encode. Optional altchars must be a byte
56 string of length 2 which specifies an alternative alphabet for the
57 '+' and '/' characters. This allows an application to
58 e.g. generate url or filesystem safe Base64 strings.
Barry Warsaw4c904d12004-01-04 01:12:26 +000059
Guido van Rossum4581ae52007-05-22 21:56:47 +000060 The encoded byte string is returned.
Barry Warsaw4c904d12004-01-04 01:12:26 +000061 """
62 # Strip off the trailing newline
63 encoded = binascii.b2a_base64(s)[:-1]
64 if altchars is not None:
Guido van Rossum4581ae52007-05-22 21:56:47 +000065 assert len(altchars) == 2, repr(altchars)
Guido van Rossum95c1c482012-06-22 15:16:09 -070066 return encoded.translate(bytes.maketrans(b'+/', altchars))
Barry Warsaw4c904d12004-01-04 01:12:26 +000067 return encoded
68
69
R. David Murray64951362010-11-11 20:09:20 +000070def b64decode(s, altchars=None, validate=False):
Guido van Rossum4581ae52007-05-22 21:56:47 +000071 """Decode a Base64 encoded byte string.
Barry Warsaw4c904d12004-01-04 01:12:26 +000072
Guido van Rossum4581ae52007-05-22 21:56:47 +000073 s is the byte string to decode. Optional altchars must be a
74 string of length 2 which specifies the alternative alphabet used
75 instead of the '+' and '/' characters.
Barry Warsaw4c904d12004-01-04 01:12:26 +000076
R. David Murray64951362010-11-11 20:09:20 +000077 The decoded string is returned. A binascii.Error is raised if s is
78 incorrectly padded.
79
80 If validate is False (the default), non-base64-alphabet characters are
81 discarded prior to the padding check. If validate is True,
82 non-base64-alphabet characters in the input result in a binascii.Error.
Barry Warsaw4c904d12004-01-04 01:12:26 +000083 """
Antoine Pitrouea6b4d52012-02-20 19:30:23 +010084 s = _bytes_from_decode_data(s)
Barry Warsaw4c904d12004-01-04 01:12:26 +000085 if altchars is not None:
Antoine Pitrouea6b4d52012-02-20 19:30:23 +010086 altchars = _bytes_from_decode_data(altchars)
Guido van Rossum4581ae52007-05-22 21:56:47 +000087 assert len(altchars) == 2, repr(altchars)
Guido van Rossum95c1c482012-06-22 15:16:09 -070088 s = s.translate(bytes.maketrans(altchars, b'+/'))
R. David Murray64951362010-11-11 20:09:20 +000089 if validate and not re.match(b'^[A-Za-z0-9+/]*={0,2}$', s):
90 raise binascii.Error('Non-base64 digit found')
Guido van Rossum4581ae52007-05-22 21:56:47 +000091 return binascii.a2b_base64(s)
Barry Warsaw4c904d12004-01-04 01:12:26 +000092
93
94def standard_b64encode(s):
Guido van Rossum4581ae52007-05-22 21:56:47 +000095 """Encode a byte string using the standard Base64 alphabet.
Barry Warsaw4c904d12004-01-04 01:12:26 +000096
Guido van Rossum4581ae52007-05-22 21:56:47 +000097 s is the byte string to encode. The encoded byte string is returned.
Barry Warsaw4c904d12004-01-04 01:12:26 +000098 """
99 return b64encode(s)
100
101def standard_b64decode(s):
Guido van Rossum4581ae52007-05-22 21:56:47 +0000102 """Decode a byte string encoded with the standard Base64 alphabet.
Barry Warsaw4c904d12004-01-04 01:12:26 +0000103
Guido van Rossum4581ae52007-05-22 21:56:47 +0000104 s is the byte string to decode. The decoded byte string is
105 returned. binascii.Error is raised if the input is incorrectly
106 padded or if there are non-alphabet characters present in the
107 input.
Barry Warsaw4c904d12004-01-04 01:12:26 +0000108 """
109 return b64decode(s)
110
Guido van Rossum95c1c482012-06-22 15:16:09 -0700111
112_urlsafe_encode_translation = bytes.maketrans(b'+/', b'-_')
113_urlsafe_decode_translation = bytes.maketrans(b'-_', b'+/')
114
Barry Warsaw4c904d12004-01-04 01:12:26 +0000115def urlsafe_b64encode(s):
Guido van Rossum4581ae52007-05-22 21:56:47 +0000116 """Encode a byte string using a url-safe Base64 alphabet.
Barry Warsaw4c904d12004-01-04 01:12:26 +0000117
Guido van Rossum4581ae52007-05-22 21:56:47 +0000118 s is the byte string to encode. The encoded byte string is
119 returned. The alphabet uses '-' instead of '+' and '_' instead of
120 '/'.
Barry Warsaw4c904d12004-01-04 01:12:26 +0000121 """
Guido van Rossum95c1c482012-06-22 15:16:09 -0700122 return b64encode(s).translate(_urlsafe_encode_translation)
Barry Warsaw4c904d12004-01-04 01:12:26 +0000123
124def urlsafe_b64decode(s):
Guido van Rossum4581ae52007-05-22 21:56:47 +0000125 """Decode a byte string encoded with the standard Base64 alphabet.
Barry Warsaw4c904d12004-01-04 01:12:26 +0000126
Guido van Rossum4581ae52007-05-22 21:56:47 +0000127 s is the byte string to decode. The decoded byte string is
128 returned. binascii.Error is raised if the input is incorrectly
129 padded or if there are non-alphabet characters present in the
130 input.
Barry Warsaw4c904d12004-01-04 01:12:26 +0000131
132 The alphabet uses '-' instead of '+' and '_' instead of '/'.
133 """
Guido van Rossum95c1c482012-06-22 15:16:09 -0700134 s = _bytes_from_decode_data(s)
135 s = s.translate(_urlsafe_decode_translation)
136 return b64decode(s)
Barry Warsaw4c904d12004-01-04 01:12:26 +0000137
138
Antoine Pitroufd036452008-08-19 17:56:33 +0000139
Barry Warsaw4c904d12004-01-04 01:12:26 +0000140# Base32 encoding/decoding must be done in Python
Serhiy Storchaka87aa7dc2013-05-19 11:49:32 +0300141_b32alphabet = b'ABCDEFGHIJKLMNOPQRSTUVWXYZ234567'
142_b32tab = [bytes([i]) for i in _b32alphabet]
143_b32tab2 = [a + b for a in _b32tab for b in _b32tab]
144_b32rev = {v: k for k, v in enumerate(_b32alphabet)}
Barry Warsaw4c904d12004-01-04 01:12:26 +0000145
146def b32encode(s):
Guido van Rossum4581ae52007-05-22 21:56:47 +0000147 """Encode a byte string using Base32.
Barry Warsaw4c904d12004-01-04 01:12:26 +0000148
Guido van Rossum4581ae52007-05-22 21:56:47 +0000149 s is the byte string to encode. The encoded byte string is returned.
Barry Warsaw4c904d12004-01-04 01:12:26 +0000150 """
Guido van Rossum254348e2007-11-21 19:29:53 +0000151 if not isinstance(s, bytes_types):
Nick Coghlanfdf239a2013-10-03 00:43:22 +1000152 s = memoryview(s).tobytes()
Serhiy Storchaka87aa7dc2013-05-19 11:49:32 +0300153 leftover = len(s) % 5
Barry Warsaw4c904d12004-01-04 01:12:26 +0000154 # Pad the last quantum with zero bits if necessary
155 if leftover:
Guido van Rossum4581ae52007-05-22 21:56:47 +0000156 s = s + bytes(5 - leftover) # Don't use += !
Serhiy Storchaka2c3f2f12013-05-19 11:41:15 +0300157 encoded = bytearray()
Serhiy Storchaka87aa7dc2013-05-19 11:49:32 +0300158 from_bytes = int.from_bytes
159 b32tab2 = _b32tab2
160 for i in range(0, len(s), 5):
161 c = from_bytes(s[i: i + 5], 'big')
162 encoded += (b32tab2[c >> 30] + # bits 1 - 10
163 b32tab2[(c >> 20) & 0x3ff] + # bits 11 - 20
164 b32tab2[(c >> 10) & 0x3ff] + # bits 21 - 30
165 b32tab2[c & 0x3ff] # bits 31 - 40
166 )
Barry Warsaw4c904d12004-01-04 01:12:26 +0000167 # Adjust for any leftover partial quanta
168 if leftover == 1:
Serhiy Storchaka2c3f2f12013-05-19 11:41:15 +0300169 encoded[-6:] = b'======'
Barry Warsaw4c904d12004-01-04 01:12:26 +0000170 elif leftover == 2:
Serhiy Storchaka2c3f2f12013-05-19 11:41:15 +0300171 encoded[-4:] = b'===='
Barry Warsaw4c904d12004-01-04 01:12:26 +0000172 elif leftover == 3:
Serhiy Storchaka2c3f2f12013-05-19 11:41:15 +0300173 encoded[-3:] = b'==='
Barry Warsaw4c904d12004-01-04 01:12:26 +0000174 elif leftover == 4:
Serhiy Storchaka2c3f2f12013-05-19 11:41:15 +0300175 encoded[-1:] = b'='
176 return bytes(encoded)
Barry Warsaw4c904d12004-01-04 01:12:26 +0000177
178def b32decode(s, casefold=False, map01=None):
Guido van Rossum4581ae52007-05-22 21:56:47 +0000179 """Decode a Base32 encoded byte string.
Barry Warsaw4c904d12004-01-04 01:12:26 +0000180
Guido van Rossum4581ae52007-05-22 21:56:47 +0000181 s is the byte string to decode. Optional casefold is a flag
182 specifying whether a lowercase alphabet is acceptable as input.
183 For security purposes, the default is False.
Barry Warsaw4c904d12004-01-04 01:12:26 +0000184
Guido van Rossum4581ae52007-05-22 21:56:47 +0000185 RFC 3548 allows for optional mapping of the digit 0 (zero) to the
186 letter O (oh), and for optional mapping of the digit 1 (one) to
187 either the letter I (eye) or letter L (el). The optional argument
188 map01 when not None, specifies which letter the digit 1 should be
189 mapped to (when map01 is not None, the digit 0 is always mapped to
190 the letter O). For security purposes the default is None, so that
191 0 and 1 are not allowed in the input.
Barry Warsaw4c904d12004-01-04 01:12:26 +0000192
Guido van Rossum4581ae52007-05-22 21:56:47 +0000193 The decoded byte string is returned. binascii.Error is raised if
194 the input is incorrectly padded or if there are non-alphabet
195 characters present in the input.
Barry Warsaw4c904d12004-01-04 01:12:26 +0000196 """
Antoine Pitrouea6b4d52012-02-20 19:30:23 +0100197 s = _bytes_from_decode_data(s)
Serhiy Storchaka87aa7dc2013-05-19 11:49:32 +0300198 if len(s) % 8:
Guido van Rossum4581ae52007-05-22 21:56:47 +0000199 raise binascii.Error('Incorrect padding')
Barry Warsaw4c904d12004-01-04 01:12:26 +0000200 # Handle section 2.4 zero and one mapping. The flag map01 will be either
201 # False, or the character to map the digit 1 (one) to. It should be
202 # either L (el) or I (eye).
Alexandre Vassalotti5209857f2008-05-03 04:39:38 +0000203 if map01 is not None:
Antoine Pitrouea6b4d52012-02-20 19:30:23 +0100204 map01 = _bytes_from_decode_data(map01)
Guido van Rossum4581ae52007-05-22 21:56:47 +0000205 assert len(map01) == 1, repr(map01)
Guido van Rossum95c1c482012-06-22 15:16:09 -0700206 s = s.translate(bytes.maketrans(b'01', b'O' + map01))
Barry Warsaw4c904d12004-01-04 01:12:26 +0000207 if casefold:
Guido van Rossum98297ee2007-11-06 21:34:58 +0000208 s = s.upper()
Barry Warsaw4c904d12004-01-04 01:12:26 +0000209 # Strip off pad characters from the right. We need to count the pad
210 # characters because this will tell us how many null bytes to remove from
211 # the end of the decoded string.
Serhiy Storchaka87aa7dc2013-05-19 11:49:32 +0300212 l = len(s)
213 s = s.rstrip(b'=')
214 padchars = l - len(s)
Barry Warsaw4c904d12004-01-04 01:12:26 +0000215 # Now decode the full quanta
Serhiy Storchaka87aa7dc2013-05-19 11:49:32 +0300216 decoded = bytearray()
217 b32rev = _b32rev
218 for i in range(0, len(s), 8):
219 quanta = s[i: i + 8]
220 acc = 0
221 try:
222 for c in quanta:
223 acc = (acc << 5) + b32rev[c]
224 except KeyError:
Serhiy Storchaka5cc9d322013-05-28 15:42:34 +0300225 raise binascii.Error('Non-base32 digit found') from None
Serhiy Storchaka87aa7dc2013-05-19 11:49:32 +0300226 decoded += acc.to_bytes(5, 'big')
Barry Warsaw4c904d12004-01-04 01:12:26 +0000227 # Process the last, partial quanta
Serhiy Storchaka87aa7dc2013-05-19 11:49:32 +0300228 if padchars:
229 acc <<= 5 * padchars
230 last = acc.to_bytes(5, 'big')
231 if padchars == 1:
232 decoded[-5:] = last[:-1]
233 elif padchars == 3:
234 decoded[-5:] = last[:-2]
235 elif padchars == 4:
236 decoded[-5:] = last[:-3]
237 elif padchars == 6:
238 decoded[-5:] = last[:-4]
239 else:
240 raise binascii.Error('Incorrect padding')
241 return bytes(decoded)
Barry Warsaw4c904d12004-01-04 01:12:26 +0000242
243
Antoine Pitroufd036452008-08-19 17:56:33 +0000244
Barry Warsaw4c904d12004-01-04 01:12:26 +0000245# RFC 3548, Base 16 Alphabet specifies uppercase, but hexlify() returns
246# lowercase. The RFC also recommends against accepting input case
247# insensitively.
248def b16encode(s):
Guido van Rossum4581ae52007-05-22 21:56:47 +0000249 """Encode a byte string using Base16.
Barry Warsaw4c904d12004-01-04 01:12:26 +0000250
Guido van Rossum4581ae52007-05-22 21:56:47 +0000251 s is the byte string to encode. The encoded byte string is returned.
Barry Warsaw4c904d12004-01-04 01:12:26 +0000252 """
Guido van Rossum98297ee2007-11-06 21:34:58 +0000253 return binascii.hexlify(s).upper()
Barry Warsaw4c904d12004-01-04 01:12:26 +0000254
255
256def b16decode(s, casefold=False):
Guido van Rossum4581ae52007-05-22 21:56:47 +0000257 """Decode a Base16 encoded byte string.
Barry Warsaw4c904d12004-01-04 01:12:26 +0000258
Guido van Rossum4581ae52007-05-22 21:56:47 +0000259 s is the byte string to decode. Optional casefold is a flag
260 specifying whether a lowercase alphabet is acceptable as input.
261 For security purposes, the default is False.
Barry Warsaw4c904d12004-01-04 01:12:26 +0000262
Guido van Rossum4581ae52007-05-22 21:56:47 +0000263 The decoded byte string is returned. binascii.Error is raised if
264 s were incorrectly padded or if there are non-alphabet characters
265 present in the string.
Barry Warsaw4c904d12004-01-04 01:12:26 +0000266 """
Antoine Pitrouea6b4d52012-02-20 19:30:23 +0100267 s = _bytes_from_decode_data(s)
Barry Warsaw4c904d12004-01-04 01:12:26 +0000268 if casefold:
Guido van Rossum98297ee2007-11-06 21:34:58 +0000269 s = s.upper()
Antoine Pitroufd036452008-08-19 17:56:33 +0000270 if re.search(b'[^0-9A-F]', s):
Guido van Rossum4581ae52007-05-22 21:56:47 +0000271 raise binascii.Error('Non-base16 digit found')
Barry Warsaw4c904d12004-01-04 01:12:26 +0000272 return binascii.unhexlify(s)
273
Antoine Pitrou6dd0d462013-11-17 23:52:25 +0100274#
275# Ascii85 encoding/decoding
276#
Barry Warsaw4c904d12004-01-04 01:12:26 +0000277
Antoine Pitrou6dd0d462013-11-17 23:52:25 +0100278def _85encode(b, chars, chars2, pad=False, foldnuls=False, foldspaces=False):
279 # Helper function for a85encode and b85encode
280 if not isinstance(b, bytes_types):
281 b = memoryview(b).tobytes()
282
283 padding = (-len(b)) % 4
284 if padding:
285 b = b + b'\0' * padding
286 words = struct.Struct('!%dI' % (len(b) // 4)).unpack(b)
287
288 a85chars2 = _a85chars2
289 a85chars = _a85chars
290 chunks = [b'z' if foldnuls and not word else
291 b'y' if foldspaces and word == 0x20202020 else
292 (chars2[word // 614125] +
293 chars2[word // 85 % 7225] +
294 chars[word % 85])
295 for word in words]
296
297 if padding and not pad:
298 if chunks[-1] == b'z':
299 chunks[-1] = chars[0] * 5
300 chunks[-1] = chunks[-1][:-padding]
301
302 return b''.join(chunks)
303
304_A85START = b"<~"
305_A85END = b"~>"
306_a85chars = [bytes([i]) for i in range(33, 118)]
307_a85chars2 = [(a + b) for a in _a85chars for b in _a85chars]
308
309def a85encode(b, *, foldspaces=False, wrapcol=0, pad=False, adobe=False):
310 """Encode a byte string using Ascii85.
311
312 b is the byte string to encode. The encoded byte string is returned.
313
314 foldspaces is an optional flag that uses the special short sequence 'y'
315 instead of 4 consecutive spaces (ASCII 0x20) as supported by 'btoa'. This
316 feature is not supported by the "standard" Adobe encoding.
317
318 wrapcol controls whether the output should have newline ('\n') characters
319 added to it. If this is non-zero, each output line will be at most this
320 many characters long.
321
322 pad controls whether the input string is padded to a multiple of 4 before
323 encoding. Note that the btoa implementation always pads.
324
325 adobe controls whether the encoded byte sequence is framed with <~ and ~>,
326 which is used by the Adobe implementation.
327 """
328 result = _85encode(b, _a85chars, _a85chars2, pad, True, foldspaces)
329
330 if adobe:
331 result = _A85START + result
332 if wrapcol:
333 wrapcol = max(2 if adobe else 1, wrapcol)
334 chunks = [result[i: i + wrapcol]
335 for i in range(0, len(result), wrapcol)]
336 if adobe:
337 if len(chunks[-1]) + 2 > wrapcol:
338 chunks.append(b'')
339 result = b'\n'.join(chunks)
340 if adobe:
341 result += _A85END
342
343 return result
344
345def a85decode(b, *, foldspaces=False, adobe=False, ignorechars=b' \t\n\r\v'):
346 """Decode an Ascii85 encoded byte string.
347
348 s is the byte string to decode.
349
350 foldspaces is a flag that specifies whether the 'y' short sequence should be
351 accepted as shorthand for 4 consecutive spaces (ASCII 0x20). This feature is
352 not supported by the "standard" Adobe encoding.
353
354 adobe controls whether the input sequence is in Adobe Ascii85 format (i.e.
355 is framed with <~ and ~>).
356
357 ignorechars should be a byte string containing characters to ignore from the
358 input. This should only contain whitespace characters, and by default
359 contains all whitespace characters in ASCII.
360 """
361 b = _bytes_from_decode_data(b)
362 if adobe:
363 if not (b.startswith(_A85START) and b.endswith(_A85END)):
364 raise ValueError("Ascii85 encoded byte sequences must be bracketed "
365 "by {} and {}".format(_A85START, _A85END))
366 b = b[2:-2] # Strip off start/end markers
367 #
368 # We have to go through this stepwise, so as to ignore spaces and handle
369 # special short sequences
370 #
371 packI = struct.Struct('!I').pack
372 decoded = []
373 decoded_append = decoded.append
374 curr = []
375 curr_append = curr.append
376 curr_clear = curr.clear
377 for x in b + b'u' * 4:
378 if b'!'[0] <= x <= b'u'[0]:
379 curr_append(x)
380 if len(curr) == 5:
381 acc = 0
382 for x in curr:
383 acc = 85 * acc + (x - 33)
384 try:
385 decoded_append(packI(acc))
386 except struct.error:
387 raise ValueError('Ascii85 overflow') from None
388 curr_clear()
389 elif x == b'z'[0]:
390 if curr:
391 raise ValueError('z inside Ascii85 5-tuple')
392 decoded_append(b'\0\0\0\0')
393 elif foldspaces and x == b'y'[0]:
394 if curr:
395 raise ValueError('y inside Ascii85 5-tuple')
396 decoded_append(b'\x20\x20\x20\x20')
397 elif x in ignorechars:
398 # Skip whitespace
399 continue
400 else:
401 raise ValueError('Non-Ascii85 digit found: %c' % x)
402
403 result = b''.join(decoded)
404 padding = 4 - len(curr)
405 if padding:
406 # Throw away the extra padding
407 result = result[:-padding]
408 return result
409
410# The following code is originally taken (with permission) from Mercurial
411
412_b85chars = b"0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ" \
413 b"abcdefghijklmnopqrstuvwxyz!#$%&()*+-;<=>?@^_`{|}~"
414_b85chars = [bytes([i]) for i in _b85chars]
415_b85chars2 = [(a + b) for a in _b85chars for b in _b85chars]
416_b85dec = None
417
418def b85encode(b, pad=False):
419 """Encode an ASCII-encoded byte array in base85 format.
420
421 If pad is true, the input is padded with "\0" so its length is a multiple of
422 4 characters before encoding.
423 """
424 return _85encode(b, _b85chars, _b85chars2, pad)
425
426def b85decode(b):
427 """Decode base85-encoded byte array"""
428 b = _bytes_from_decode_data(b)
429 global _b85dec
430 if _b85dec is None:
431 _b85dec = [None] * 256
432 for i, c in enumerate(_b85chars):
433 _b85dec[c[0]] = i
434
435 padding = (-len(b)) % 5
436 b = b + b'~' * padding
437 out = []
438 packI = struct.Struct('!I').pack
439 for i in range(0, len(b), 5):
440 chunk = b[i:i + 5]
441 acc = 0
442 try:
443 for c in chunk:
444 acc = acc * 85 + _b85dec[c]
445 except TypeError:
446 for j, c in enumerate(chunk):
447 if _b85dec[c] is None:
448 raise ValueError('bad base85 character at position %d'
449 % (i + j)) from None
450 raise
451 try:
452 out.append(packI(acc))
453 except struct.error:
454 raise ValueError('base85 overflow in hunk starting at byte %d'
455 % i) from None
456
457 result = b''.join(out)
458 if padding:
459 result = result[:-padding]
460 return result
Antoine Pitroufd036452008-08-19 17:56:33 +0000461
Barry Warsaw4c904d12004-01-04 01:12:26 +0000462# Legacy interface. This code could be cleaned up since I don't believe
463# binascii has any line length limitations. It just doesn't seem worth it
Guido van Rossum4581ae52007-05-22 21:56:47 +0000464# though. The files should be opened in binary mode.
Skip Montanaroe99d5ea2001-01-20 19:54:20 +0000465
Guido van Rossumf1945461995-06-14 23:43:44 +0000466MAXLINESIZE = 76 # Excluding the CRLF
Guido van Rossum54e54c62001-09-04 19:14:14 +0000467MAXBINSIZE = (MAXLINESIZE//4)*3
Guido van Rossumf1945461995-06-14 23:43:44 +0000468
Guido van Rossumf1945461995-06-14 23:43:44 +0000469def encode(input, output):
Guido van Rossum54a40cb2007-08-27 22:27:41 +0000470 """Encode a file; input and output are binary files."""
Barry Warsaw4c904d12004-01-04 01:12:26 +0000471 while True:
Guido van Rossum4acc25b2000-02-02 15:10:15 +0000472 s = input.read(MAXBINSIZE)
Barry Warsaw4c904d12004-01-04 01:12:26 +0000473 if not s:
474 break
Guido van Rossum4acc25b2000-02-02 15:10:15 +0000475 while len(s) < MAXBINSIZE:
476 ns = input.read(MAXBINSIZE-len(s))
Barry Warsaw4c904d12004-01-04 01:12:26 +0000477 if not ns:
478 break
479 s += ns
Guido van Rossum4acc25b2000-02-02 15:10:15 +0000480 line = binascii.b2a_base64(s)
481 output.write(line)
Guido van Rossumf1945461995-06-14 23:43:44 +0000482
Barry Warsaw4c904d12004-01-04 01:12:26 +0000483
Guido van Rossumf1945461995-06-14 23:43:44 +0000484def decode(input, output):
Guido van Rossum54a40cb2007-08-27 22:27:41 +0000485 """Decode a file; input and output are binary files."""
Barry Warsaw4c904d12004-01-04 01:12:26 +0000486 while True:
Guido van Rossum4acc25b2000-02-02 15:10:15 +0000487 line = input.readline()
Barry Warsaw4c904d12004-01-04 01:12:26 +0000488 if not line:
489 break
Guido van Rossum4acc25b2000-02-02 15:10:15 +0000490 s = binascii.a2b_base64(line)
491 output.write(s)
Guido van Rossumf1945461995-06-14 23:43:44 +0000492
Nick Coghlanfdf239a2013-10-03 00:43:22 +1000493def _input_type_check(s):
494 try:
495 m = memoryview(s)
496 except TypeError as err:
497 msg = "expected bytes-like object, not %s" % s.__class__.__name__
498 raise TypeError(msg) from err
499 if m.format not in ('c', 'b', 'B'):
500 msg = ("expected single byte elements, not %r from %s" %
501 (m.format, s.__class__.__name__))
502 raise TypeError(msg)
503 if m.ndim != 1:
504 msg = ("expected 1-D data, not %d-D data from %s" %
505 (m.ndim, s.__class__.__name__))
506 raise TypeError(msg)
507
Barry Warsaw4c904d12004-01-04 01:12:26 +0000508
Georg Brandlb54d8012009-06-04 09:11:51 +0000509def encodebytes(s):
510 """Encode a bytestring into a bytestring containing multiple lines
511 of base-64 data."""
Nick Coghlanfdf239a2013-10-03 00:43:22 +1000512 _input_type_check(s)
Peter Schneider-Kampfbb2b4c2001-06-07 18:56:13 +0000513 pieces = []
514 for i in range(0, len(s), MAXBINSIZE):
515 chunk = s[i : i + MAXBINSIZE]
516 pieces.append(binascii.b2a_base64(chunk))
Guido van Rossum4581ae52007-05-22 21:56:47 +0000517 return b"".join(pieces)
Guido van Rossumf1945461995-06-14 23:43:44 +0000518
Georg Brandlb54d8012009-06-04 09:11:51 +0000519def encodestring(s):
520 """Legacy alias of encodebytes()."""
521 import warnings
522 warnings.warn("encodestring() is a deprecated alias, use encodebytes()",
523 DeprecationWarning, 2)
524 return encodebytes(s)
Barry Warsaw4c904d12004-01-04 01:12:26 +0000525
Guido van Rossum54a40cb2007-08-27 22:27:41 +0000526
Georg Brandlb54d8012009-06-04 09:11:51 +0000527def decodebytes(s):
528 """Decode a bytestring of base-64 data into a bytestring."""
Nick Coghlanfdf239a2013-10-03 00:43:22 +1000529 _input_type_check(s)
Peter Schneider-Kampfbb2b4c2001-06-07 18:56:13 +0000530 return binascii.a2b_base64(s)
Guido van Rossumf1945461995-06-14 23:43:44 +0000531
Georg Brandlb54d8012009-06-04 09:11:51 +0000532def decodestring(s):
533 """Legacy alias of decodebytes()."""
534 import warnings
535 warnings.warn("decodestring() is a deprecated alias, use decodebytes()",
536 DeprecationWarning, 2)
537 return decodebytes(s)
Barry Warsaw4c904d12004-01-04 01:12:26 +0000538
Antoine Pitroufd036452008-08-19 17:56:33 +0000539
Guido van Rossum4581ae52007-05-22 21:56:47 +0000540# Usable as a script...
541def main():
542 """Small main program"""
Guido van Rossum4acc25b2000-02-02 15:10:15 +0000543 import sys, getopt
544 try:
545 opts, args = getopt.getopt(sys.argv[1:], 'deut')
Guido van Rossumb940e112007-01-10 16:19:56 +0000546 except getopt.error as msg:
Guido van Rossum4acc25b2000-02-02 15:10:15 +0000547 sys.stdout = sys.stderr
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000548 print(msg)
549 print("""usage: %s [-d|-e|-u|-t] [file|-]
Guido van Rossum4acc25b2000-02-02 15:10:15 +0000550 -d, -u: decode
551 -e: encode (default)
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000552 -t: encode and decode string 'Aladdin:open sesame'"""%sys.argv[0])
Guido van Rossum4acc25b2000-02-02 15:10:15 +0000553 sys.exit(2)
554 func = encode
555 for o, a in opts:
556 if o == '-e': func = encode
557 if o == '-d': func = decode
558 if o == '-u': func = decode
Guido van Rossum4581ae52007-05-22 21:56:47 +0000559 if o == '-t': test(); return
Guido van Rossum4acc25b2000-02-02 15:10:15 +0000560 if args and args[0] != '-':
Antoine Pitroub86680e2010-10-14 21:15:17 +0000561 with open(args[0], 'rb') as f:
562 func(f, sys.stdout.buffer)
Guido van Rossum4acc25b2000-02-02 15:10:15 +0000563 else:
Victor Stinner479736b2010-05-25 21:12:34 +0000564 func(sys.stdin.buffer, sys.stdout.buffer)
Guido van Rossumf1945461995-06-14 23:43:44 +0000565
Barry Warsaw4c904d12004-01-04 01:12:26 +0000566
Guido van Rossum4581ae52007-05-22 21:56:47 +0000567def test():
568 s0 = b"Aladdin:open sesame"
569 print(repr(s0))
Georg Brandl706824f2009-06-04 09:42:55 +0000570 s1 = encodebytes(s0)
Guido van Rossum4581ae52007-05-22 21:56:47 +0000571 print(repr(s1))
Georg Brandl706824f2009-06-04 09:42:55 +0000572 s2 = decodebytes(s1)
Guido van Rossum4581ae52007-05-22 21:56:47 +0000573 print(repr(s2))
574 assert s0 == s2
Guido van Rossumf1945461995-06-14 23:43:44 +0000575
Barry Warsaw4c904d12004-01-04 01:12:26 +0000576
Guido van Rossumf1945461995-06-14 23:43:44 +0000577if __name__ == '__main__':
Guido van Rossum4581ae52007-05-22 21:56:47 +0000578 main()