blob: ad154ae0ae4bf8b7d8cd898e5e107409d2a20c77 [file] [log] [blame]
Benjamin Peterson90f5ba52010-03-11 22:53:45 +00001#! /usr/bin/env python3
Guido van Rossumaa925a51997-04-02 05:47:39 +00002
Antoine Pitrou6dd0d462013-11-17 23:52:25 +01003"""Base16, Base32, Base64 (RFC 3548), Base85 and Ascii85 data encodings"""
Guido van Rossum4acc25b2000-02-02 15:10:15 +00004
Barry Warsaw4c904d12004-01-04 01:12:26 +00005# Modified 04-Oct-1995 by Jack Jansen to use binascii module
6# Modified 30-Dec-2003 by Barry Warsaw to add full RFC 3548 support
Guido van Rossum4581ae52007-05-22 21:56:47 +00007# Modified 22-May-2007 by Guido van Rossum to use bytes everywhere
Jack Jansen951213e1995-10-04 16:39:20 +00008
Barry Warsaw4c904d12004-01-04 01:12:26 +00009import re
10import struct
Jack Jansen951213e1995-10-04 16:39:20 +000011import binascii
12
Barry Warsaw4c904d12004-01-04 01:12:26 +000013
14__all__ = [
15 # Legacy interface exports traditional RFC 1521 Base64 encodings
Georg Brandlb54d8012009-06-04 09:11:51 +000016 'encode', 'decode', 'encodebytes', 'decodebytes',
Barry Warsaw4c904d12004-01-04 01:12:26 +000017 # Generalized interface for other encodings
18 'b64encode', 'b64decode', 'b32encode', 'b32decode',
19 'b16encode', 'b16decode',
Antoine Pitrou6dd0d462013-11-17 23:52:25 +010020 # Base85 and Ascii85 encodings
21 'b85encode', 'b85decode', 'a85encode', 'a85decode',
Barry Warsaw4c904d12004-01-04 01:12:26 +000022 # Standard Base64 encoding
23 'standard_b64encode', 'standard_b64decode',
24 # Some common Base64 alternatives. As referenced by RFC 3458, see thread
25 # starting at:
26 #
27 # http://zgp.org/pipermail/p2p-hackers/2001-September/000316.html
Barry Warsaw4c904d12004-01-04 01:12:26 +000028 'urlsafe_b64encode', 'urlsafe_b64decode',
29 ]
30
Barry Warsaw4c904d12004-01-04 01:12:26 +000031
Guido van Rossum254348e2007-11-21 19:29:53 +000032bytes_types = (bytes, bytearray) # Types acceptable as binary data
Guido van Rossum98297ee2007-11-06 21:34:58 +000033
Antoine Pitrouea6b4d52012-02-20 19:30:23 +010034def _bytes_from_decode_data(s):
35 if isinstance(s, str):
36 try:
37 return s.encode('ascii')
38 except UnicodeEncodeError:
39 raise ValueError('string argument should contain only ASCII characters')
Nick Coghlanfdf239a2013-10-03 00:43:22 +100040 if isinstance(s, bytes_types):
Antoine Pitrouea6b4d52012-02-20 19:30:23 +010041 return s
Nick Coghlanfdf239a2013-10-03 00:43:22 +100042 try:
43 return memoryview(s).tobytes()
44 except TypeError:
45 raise TypeError("argument should be a bytes-like object or ASCII "
46 "string, not %r" % s.__class__.__name__) from None
Barry Warsaw4c904d12004-01-04 01:12:26 +000047
Antoine Pitroufd036452008-08-19 17:56:33 +000048
Barry Warsaw4c904d12004-01-04 01:12:26 +000049# Base64 encoding/decoding uses binascii
50
51def b64encode(s, altchars=None):
Guido van Rossum4581ae52007-05-22 21:56:47 +000052 """Encode a byte string using Base64.
Barry Warsaw4c904d12004-01-04 01:12:26 +000053
Guido van Rossum4581ae52007-05-22 21:56:47 +000054 s is the byte string to encode. Optional altchars must be a byte
55 string of length 2 which specifies an alternative alphabet for the
56 '+' and '/' characters. This allows an application to
57 e.g. generate url or filesystem safe Base64 strings.
Barry Warsaw4c904d12004-01-04 01:12:26 +000058
Guido van Rossum4581ae52007-05-22 21:56:47 +000059 The encoded byte string is returned.
Barry Warsaw4c904d12004-01-04 01:12:26 +000060 """
61 # Strip off the trailing newline
62 encoded = binascii.b2a_base64(s)[:-1]
63 if altchars is not None:
Guido van Rossum4581ae52007-05-22 21:56:47 +000064 assert len(altchars) == 2, repr(altchars)
Guido van Rossum95c1c482012-06-22 15:16:09 -070065 return encoded.translate(bytes.maketrans(b'+/', altchars))
Barry Warsaw4c904d12004-01-04 01:12:26 +000066 return encoded
67
68
R. David Murray64951362010-11-11 20:09:20 +000069def b64decode(s, altchars=None, validate=False):
Guido van Rossum4581ae52007-05-22 21:56:47 +000070 """Decode a Base64 encoded byte string.
Barry Warsaw4c904d12004-01-04 01:12:26 +000071
Guido van Rossum4581ae52007-05-22 21:56:47 +000072 s is the byte string to decode. Optional altchars must be a
73 string of length 2 which specifies the alternative alphabet used
74 instead of the '+' and '/' characters.
Barry Warsaw4c904d12004-01-04 01:12:26 +000075
R. David Murray64951362010-11-11 20:09:20 +000076 The decoded string is returned. A binascii.Error is raised if s is
77 incorrectly padded.
78
79 If validate is False (the default), non-base64-alphabet characters are
80 discarded prior to the padding check. If validate is True,
81 non-base64-alphabet characters in the input result in a binascii.Error.
Barry Warsaw4c904d12004-01-04 01:12:26 +000082 """
Antoine Pitrouea6b4d52012-02-20 19:30:23 +010083 s = _bytes_from_decode_data(s)
Barry Warsaw4c904d12004-01-04 01:12:26 +000084 if altchars is not None:
Antoine Pitrouea6b4d52012-02-20 19:30:23 +010085 altchars = _bytes_from_decode_data(altchars)
Guido van Rossum4581ae52007-05-22 21:56:47 +000086 assert len(altchars) == 2, repr(altchars)
Guido van Rossum95c1c482012-06-22 15:16:09 -070087 s = s.translate(bytes.maketrans(altchars, b'+/'))
R. David Murray64951362010-11-11 20:09:20 +000088 if validate and not re.match(b'^[A-Za-z0-9+/]*={0,2}$', s):
89 raise binascii.Error('Non-base64 digit found')
Guido van Rossum4581ae52007-05-22 21:56:47 +000090 return binascii.a2b_base64(s)
Barry Warsaw4c904d12004-01-04 01:12:26 +000091
92
93def standard_b64encode(s):
Guido van Rossum4581ae52007-05-22 21:56:47 +000094 """Encode a byte string using the standard Base64 alphabet.
Barry Warsaw4c904d12004-01-04 01:12:26 +000095
Guido van Rossum4581ae52007-05-22 21:56:47 +000096 s is the byte string to encode. The encoded byte string is returned.
Barry Warsaw4c904d12004-01-04 01:12:26 +000097 """
98 return b64encode(s)
99
100def standard_b64decode(s):
Guido van Rossum4581ae52007-05-22 21:56:47 +0000101 """Decode a byte string encoded with the standard Base64 alphabet.
Barry Warsaw4c904d12004-01-04 01:12:26 +0000102
Guido van Rossum4581ae52007-05-22 21:56:47 +0000103 s is the byte string to decode. The decoded byte string is
104 returned. binascii.Error is raised if the input is incorrectly
105 padded or if there are non-alphabet characters present in the
106 input.
Barry Warsaw4c904d12004-01-04 01:12:26 +0000107 """
108 return b64decode(s)
109
Guido van Rossum95c1c482012-06-22 15:16:09 -0700110
111_urlsafe_encode_translation = bytes.maketrans(b'+/', b'-_')
112_urlsafe_decode_translation = bytes.maketrans(b'-_', b'+/')
113
Barry Warsaw4c904d12004-01-04 01:12:26 +0000114def urlsafe_b64encode(s):
Guido van Rossum4581ae52007-05-22 21:56:47 +0000115 """Encode a byte string using a url-safe Base64 alphabet.
Barry Warsaw4c904d12004-01-04 01:12:26 +0000116
Guido van Rossum4581ae52007-05-22 21:56:47 +0000117 s is the byte string to encode. The encoded byte string is
118 returned. The alphabet uses '-' instead of '+' and '_' instead of
119 '/'.
Barry Warsaw4c904d12004-01-04 01:12:26 +0000120 """
Guido van Rossum95c1c482012-06-22 15:16:09 -0700121 return b64encode(s).translate(_urlsafe_encode_translation)
Barry Warsaw4c904d12004-01-04 01:12:26 +0000122
123def urlsafe_b64decode(s):
Guido van Rossum4581ae52007-05-22 21:56:47 +0000124 """Decode a byte string encoded with the standard Base64 alphabet.
Barry Warsaw4c904d12004-01-04 01:12:26 +0000125
Guido van Rossum4581ae52007-05-22 21:56:47 +0000126 s is the byte string to decode. The decoded byte string is
127 returned. binascii.Error is raised if the input is incorrectly
128 padded or if there are non-alphabet characters present in the
129 input.
Barry Warsaw4c904d12004-01-04 01:12:26 +0000130
131 The alphabet uses '-' instead of '+' and '_' instead of '/'.
132 """
Guido van Rossum95c1c482012-06-22 15:16:09 -0700133 s = _bytes_from_decode_data(s)
134 s = s.translate(_urlsafe_decode_translation)
135 return b64decode(s)
Barry Warsaw4c904d12004-01-04 01:12:26 +0000136
137
Antoine Pitroufd036452008-08-19 17:56:33 +0000138
Barry Warsaw4c904d12004-01-04 01:12:26 +0000139# Base32 encoding/decoding must be done in Python
Serhiy Storchaka87aa7dc2013-05-19 11:49:32 +0300140_b32alphabet = b'ABCDEFGHIJKLMNOPQRSTUVWXYZ234567'
141_b32tab = [bytes([i]) for i in _b32alphabet]
142_b32tab2 = [a + b for a in _b32tab for b in _b32tab]
143_b32rev = {v: k for k, v in enumerate(_b32alphabet)}
Barry Warsaw4c904d12004-01-04 01:12:26 +0000144
145def b32encode(s):
Guido van Rossum4581ae52007-05-22 21:56:47 +0000146 """Encode a byte string using Base32.
Barry Warsaw4c904d12004-01-04 01:12:26 +0000147
Guido van Rossum4581ae52007-05-22 21:56:47 +0000148 s is the byte string to encode. The encoded byte string is returned.
Barry Warsaw4c904d12004-01-04 01:12:26 +0000149 """
Guido van Rossum254348e2007-11-21 19:29:53 +0000150 if not isinstance(s, bytes_types):
Nick Coghlanfdf239a2013-10-03 00:43:22 +1000151 s = memoryview(s).tobytes()
Serhiy Storchaka87aa7dc2013-05-19 11:49:32 +0300152 leftover = len(s) % 5
Barry Warsaw4c904d12004-01-04 01:12:26 +0000153 # Pad the last quantum with zero bits if necessary
154 if leftover:
Guido van Rossum4581ae52007-05-22 21:56:47 +0000155 s = s + bytes(5 - leftover) # Don't use += !
Serhiy Storchaka2c3f2f12013-05-19 11:41:15 +0300156 encoded = bytearray()
Serhiy Storchaka87aa7dc2013-05-19 11:49:32 +0300157 from_bytes = int.from_bytes
158 b32tab2 = _b32tab2
159 for i in range(0, len(s), 5):
160 c = from_bytes(s[i: i + 5], 'big')
161 encoded += (b32tab2[c >> 30] + # bits 1 - 10
162 b32tab2[(c >> 20) & 0x3ff] + # bits 11 - 20
163 b32tab2[(c >> 10) & 0x3ff] + # bits 21 - 30
164 b32tab2[c & 0x3ff] # bits 31 - 40
165 )
Barry Warsaw4c904d12004-01-04 01:12:26 +0000166 # Adjust for any leftover partial quanta
167 if leftover == 1:
Serhiy Storchaka2c3f2f12013-05-19 11:41:15 +0300168 encoded[-6:] = b'======'
Barry Warsaw4c904d12004-01-04 01:12:26 +0000169 elif leftover == 2:
Serhiy Storchaka2c3f2f12013-05-19 11:41:15 +0300170 encoded[-4:] = b'===='
Barry Warsaw4c904d12004-01-04 01:12:26 +0000171 elif leftover == 3:
Serhiy Storchaka2c3f2f12013-05-19 11:41:15 +0300172 encoded[-3:] = b'==='
Barry Warsaw4c904d12004-01-04 01:12:26 +0000173 elif leftover == 4:
Serhiy Storchaka2c3f2f12013-05-19 11:41:15 +0300174 encoded[-1:] = b'='
175 return bytes(encoded)
Barry Warsaw4c904d12004-01-04 01:12:26 +0000176
177def b32decode(s, casefold=False, map01=None):
Guido van Rossum4581ae52007-05-22 21:56:47 +0000178 """Decode a Base32 encoded byte string.
Barry Warsaw4c904d12004-01-04 01:12:26 +0000179
Guido van Rossum4581ae52007-05-22 21:56:47 +0000180 s is the byte string to decode. Optional casefold is a flag
181 specifying whether a lowercase alphabet is acceptable as input.
182 For security purposes, the default is False.
Barry Warsaw4c904d12004-01-04 01:12:26 +0000183
Guido van Rossum4581ae52007-05-22 21:56:47 +0000184 RFC 3548 allows for optional mapping of the digit 0 (zero) to the
185 letter O (oh), and for optional mapping of the digit 1 (one) to
186 either the letter I (eye) or letter L (el). The optional argument
187 map01 when not None, specifies which letter the digit 1 should be
188 mapped to (when map01 is not None, the digit 0 is always mapped to
189 the letter O). For security purposes the default is None, so that
190 0 and 1 are not allowed in the input.
Barry Warsaw4c904d12004-01-04 01:12:26 +0000191
Guido van Rossum4581ae52007-05-22 21:56:47 +0000192 The decoded byte string is returned. binascii.Error is raised if
193 the input is incorrectly padded or if there are non-alphabet
194 characters present in the input.
Barry Warsaw4c904d12004-01-04 01:12:26 +0000195 """
Antoine Pitrouea6b4d52012-02-20 19:30:23 +0100196 s = _bytes_from_decode_data(s)
Serhiy Storchaka87aa7dc2013-05-19 11:49:32 +0300197 if len(s) % 8:
Guido van Rossum4581ae52007-05-22 21:56:47 +0000198 raise binascii.Error('Incorrect padding')
Barry Warsaw4c904d12004-01-04 01:12:26 +0000199 # Handle section 2.4 zero and one mapping. The flag map01 will be either
200 # False, or the character to map the digit 1 (one) to. It should be
201 # either L (el) or I (eye).
Alexandre Vassalotti5209857f2008-05-03 04:39:38 +0000202 if map01 is not None:
Antoine Pitrouea6b4d52012-02-20 19:30:23 +0100203 map01 = _bytes_from_decode_data(map01)
Guido van Rossum4581ae52007-05-22 21:56:47 +0000204 assert len(map01) == 1, repr(map01)
Guido van Rossum95c1c482012-06-22 15:16:09 -0700205 s = s.translate(bytes.maketrans(b'01', b'O' + map01))
Barry Warsaw4c904d12004-01-04 01:12:26 +0000206 if casefold:
Guido van Rossum98297ee2007-11-06 21:34:58 +0000207 s = s.upper()
Barry Warsaw4c904d12004-01-04 01:12:26 +0000208 # Strip off pad characters from the right. We need to count the pad
209 # characters because this will tell us how many null bytes to remove from
210 # the end of the decoded string.
Serhiy Storchaka87aa7dc2013-05-19 11:49:32 +0300211 l = len(s)
212 s = s.rstrip(b'=')
213 padchars = l - len(s)
Barry Warsaw4c904d12004-01-04 01:12:26 +0000214 # Now decode the full quanta
Serhiy Storchaka87aa7dc2013-05-19 11:49:32 +0300215 decoded = bytearray()
216 b32rev = _b32rev
217 for i in range(0, len(s), 8):
218 quanta = s[i: i + 8]
219 acc = 0
220 try:
221 for c in quanta:
222 acc = (acc << 5) + b32rev[c]
223 except KeyError:
Serhiy Storchaka5cc9d322013-05-28 15:42:34 +0300224 raise binascii.Error('Non-base32 digit found') from None
Serhiy Storchaka87aa7dc2013-05-19 11:49:32 +0300225 decoded += acc.to_bytes(5, 'big')
Barry Warsaw4c904d12004-01-04 01:12:26 +0000226 # Process the last, partial quanta
Serhiy Storchaka87aa7dc2013-05-19 11:49:32 +0300227 if padchars:
228 acc <<= 5 * padchars
229 last = acc.to_bytes(5, 'big')
230 if padchars == 1:
231 decoded[-5:] = last[:-1]
232 elif padchars == 3:
233 decoded[-5:] = last[:-2]
234 elif padchars == 4:
235 decoded[-5:] = last[:-3]
236 elif padchars == 6:
237 decoded[-5:] = last[:-4]
238 else:
239 raise binascii.Error('Incorrect padding')
240 return bytes(decoded)
Barry Warsaw4c904d12004-01-04 01:12:26 +0000241
242
Antoine Pitroufd036452008-08-19 17:56:33 +0000243
Barry Warsaw4c904d12004-01-04 01:12:26 +0000244# RFC 3548, Base 16 Alphabet specifies uppercase, but hexlify() returns
245# lowercase. The RFC also recommends against accepting input case
246# insensitively.
247def b16encode(s):
Guido van Rossum4581ae52007-05-22 21:56:47 +0000248 """Encode a byte string using Base16.
Barry Warsaw4c904d12004-01-04 01:12:26 +0000249
Guido van Rossum4581ae52007-05-22 21:56:47 +0000250 s is the byte string to encode. The encoded byte string is returned.
Barry Warsaw4c904d12004-01-04 01:12:26 +0000251 """
Guido van Rossum98297ee2007-11-06 21:34:58 +0000252 return binascii.hexlify(s).upper()
Barry Warsaw4c904d12004-01-04 01:12:26 +0000253
254
255def b16decode(s, casefold=False):
Guido van Rossum4581ae52007-05-22 21:56:47 +0000256 """Decode a Base16 encoded byte string.
Barry Warsaw4c904d12004-01-04 01:12:26 +0000257
Guido van Rossum4581ae52007-05-22 21:56:47 +0000258 s is the byte string to decode. Optional casefold is a flag
259 specifying whether a lowercase alphabet is acceptable as input.
260 For security purposes, the default is False.
Barry Warsaw4c904d12004-01-04 01:12:26 +0000261
Guido van Rossum4581ae52007-05-22 21:56:47 +0000262 The decoded byte string is returned. binascii.Error is raised if
263 s were incorrectly padded or if there are non-alphabet characters
264 present in the string.
Barry Warsaw4c904d12004-01-04 01:12:26 +0000265 """
Antoine Pitrouea6b4d52012-02-20 19:30:23 +0100266 s = _bytes_from_decode_data(s)
Barry Warsaw4c904d12004-01-04 01:12:26 +0000267 if casefold:
Guido van Rossum98297ee2007-11-06 21:34:58 +0000268 s = s.upper()
Antoine Pitroufd036452008-08-19 17:56:33 +0000269 if re.search(b'[^0-9A-F]', s):
Guido van Rossum4581ae52007-05-22 21:56:47 +0000270 raise binascii.Error('Non-base16 digit found')
Barry Warsaw4c904d12004-01-04 01:12:26 +0000271 return binascii.unhexlify(s)
272
Antoine Pitrou6dd0d462013-11-17 23:52:25 +0100273#
274# Ascii85 encoding/decoding
275#
Barry Warsaw4c904d12004-01-04 01:12:26 +0000276
Antoine Pitrou6dd0d462013-11-17 23:52:25 +0100277def _85encode(b, chars, chars2, pad=False, foldnuls=False, foldspaces=False):
278 # Helper function for a85encode and b85encode
279 if not isinstance(b, bytes_types):
280 b = memoryview(b).tobytes()
281
282 padding = (-len(b)) % 4
283 if padding:
284 b = b + b'\0' * padding
285 words = struct.Struct('!%dI' % (len(b) // 4)).unpack(b)
286
287 a85chars2 = _a85chars2
288 a85chars = _a85chars
289 chunks = [b'z' if foldnuls and not word else
290 b'y' if foldspaces and word == 0x20202020 else
291 (chars2[word // 614125] +
292 chars2[word // 85 % 7225] +
293 chars[word % 85])
294 for word in words]
295
296 if padding and not pad:
297 if chunks[-1] == b'z':
298 chunks[-1] = chars[0] * 5
299 chunks[-1] = chunks[-1][:-padding]
300
301 return b''.join(chunks)
302
303_A85START = b"<~"
304_A85END = b"~>"
305_a85chars = [bytes([i]) for i in range(33, 118)]
306_a85chars2 = [(a + b) for a in _a85chars for b in _a85chars]
307
308def a85encode(b, *, foldspaces=False, wrapcol=0, pad=False, adobe=False):
309 """Encode a byte string using Ascii85.
310
311 b is the byte string to encode. The encoded byte string is returned.
312
313 foldspaces is an optional flag that uses the special short sequence 'y'
314 instead of 4 consecutive spaces (ASCII 0x20) as supported by 'btoa'. This
315 feature is not supported by the "standard" Adobe encoding.
316
317 wrapcol controls whether the output should have newline ('\n') characters
318 added to it. If this is non-zero, each output line will be at most this
319 many characters long.
320
321 pad controls whether the input string is padded to a multiple of 4 before
322 encoding. Note that the btoa implementation always pads.
323
324 adobe controls whether the encoded byte sequence is framed with <~ and ~>,
325 which is used by the Adobe implementation.
326 """
327 result = _85encode(b, _a85chars, _a85chars2, pad, True, foldspaces)
328
329 if adobe:
330 result = _A85START + result
331 if wrapcol:
332 wrapcol = max(2 if adobe else 1, wrapcol)
333 chunks = [result[i: i + wrapcol]
334 for i in range(0, len(result), wrapcol)]
335 if adobe:
336 if len(chunks[-1]) + 2 > wrapcol:
337 chunks.append(b'')
338 result = b'\n'.join(chunks)
339 if adobe:
340 result += _A85END
341
342 return result
343
344def a85decode(b, *, foldspaces=False, adobe=False, ignorechars=b' \t\n\r\v'):
345 """Decode an Ascii85 encoded byte string.
346
347 s is the byte string to decode.
348
349 foldspaces is a flag that specifies whether the 'y' short sequence should be
350 accepted as shorthand for 4 consecutive spaces (ASCII 0x20). This feature is
351 not supported by the "standard" Adobe encoding.
352
353 adobe controls whether the input sequence is in Adobe Ascii85 format (i.e.
354 is framed with <~ and ~>).
355
356 ignorechars should be a byte string containing characters to ignore from the
357 input. This should only contain whitespace characters, and by default
358 contains all whitespace characters in ASCII.
359 """
360 b = _bytes_from_decode_data(b)
361 if adobe:
362 if not (b.startswith(_A85START) and b.endswith(_A85END)):
363 raise ValueError("Ascii85 encoded byte sequences must be bracketed "
Serhiy Storchakaa26b3f12014-02-06 22:52:23 +0200364 "by {!r} and {!r}".format(_A85START, _A85END))
Antoine Pitrou6dd0d462013-11-17 23:52:25 +0100365 b = b[2:-2] # Strip off start/end markers
366 #
367 # We have to go through this stepwise, so as to ignore spaces and handle
368 # special short sequences
369 #
370 packI = struct.Struct('!I').pack
371 decoded = []
372 decoded_append = decoded.append
373 curr = []
374 curr_append = curr.append
375 curr_clear = curr.clear
376 for x in b + b'u' * 4:
377 if b'!'[0] <= x <= b'u'[0]:
378 curr_append(x)
379 if len(curr) == 5:
380 acc = 0
381 for x in curr:
382 acc = 85 * acc + (x - 33)
383 try:
384 decoded_append(packI(acc))
385 except struct.error:
386 raise ValueError('Ascii85 overflow') from None
387 curr_clear()
388 elif x == b'z'[0]:
389 if curr:
390 raise ValueError('z inside Ascii85 5-tuple')
391 decoded_append(b'\0\0\0\0')
392 elif foldspaces and x == b'y'[0]:
393 if curr:
394 raise ValueError('y inside Ascii85 5-tuple')
395 decoded_append(b'\x20\x20\x20\x20')
396 elif x in ignorechars:
397 # Skip whitespace
398 continue
399 else:
400 raise ValueError('Non-Ascii85 digit found: %c' % x)
401
402 result = b''.join(decoded)
403 padding = 4 - len(curr)
404 if padding:
405 # Throw away the extra padding
406 result = result[:-padding]
407 return result
408
409# The following code is originally taken (with permission) from Mercurial
410
411_b85chars = b"0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ" \
412 b"abcdefghijklmnopqrstuvwxyz!#$%&()*+-;<=>?@^_`{|}~"
413_b85chars = [bytes([i]) for i in _b85chars]
414_b85chars2 = [(a + b) for a in _b85chars for b in _b85chars]
415_b85dec = None
416
417def b85encode(b, pad=False):
418 """Encode an ASCII-encoded byte array in base85 format.
419
420 If pad is true, the input is padded with "\0" so its length is a multiple of
421 4 characters before encoding.
422 """
423 return _85encode(b, _b85chars, _b85chars2, pad)
424
425def b85decode(b):
426 """Decode base85-encoded byte array"""
427 b = _bytes_from_decode_data(b)
428 global _b85dec
429 if _b85dec is None:
430 _b85dec = [None] * 256
431 for i, c in enumerate(_b85chars):
432 _b85dec[c[0]] = i
433
434 padding = (-len(b)) % 5
435 b = b + b'~' * padding
436 out = []
437 packI = struct.Struct('!I').pack
438 for i in range(0, len(b), 5):
439 chunk = b[i:i + 5]
440 acc = 0
441 try:
442 for c in chunk:
443 acc = acc * 85 + _b85dec[c]
444 except TypeError:
445 for j, c in enumerate(chunk):
446 if _b85dec[c] is None:
447 raise ValueError('bad base85 character at position %d'
448 % (i + j)) from None
449 raise
450 try:
451 out.append(packI(acc))
452 except struct.error:
453 raise ValueError('base85 overflow in hunk starting at byte %d'
454 % i) from None
455
456 result = b''.join(out)
457 if padding:
458 result = result[:-padding]
459 return result
Antoine Pitroufd036452008-08-19 17:56:33 +0000460
Barry Warsaw4c904d12004-01-04 01:12:26 +0000461# Legacy interface. This code could be cleaned up since I don't believe
462# binascii has any line length limitations. It just doesn't seem worth it
Guido van Rossum4581ae52007-05-22 21:56:47 +0000463# though. The files should be opened in binary mode.
Skip Montanaroe99d5ea2001-01-20 19:54:20 +0000464
Guido van Rossumf1945461995-06-14 23:43:44 +0000465MAXLINESIZE = 76 # Excluding the CRLF
Guido van Rossum54e54c62001-09-04 19:14:14 +0000466MAXBINSIZE = (MAXLINESIZE//4)*3
Guido van Rossumf1945461995-06-14 23:43:44 +0000467
Guido van Rossumf1945461995-06-14 23:43:44 +0000468def encode(input, output):
Guido van Rossum54a40cb2007-08-27 22:27:41 +0000469 """Encode a file; input and output are binary files."""
Barry Warsaw4c904d12004-01-04 01:12:26 +0000470 while True:
Guido van Rossum4acc25b2000-02-02 15:10:15 +0000471 s = input.read(MAXBINSIZE)
Barry Warsaw4c904d12004-01-04 01:12:26 +0000472 if not s:
473 break
Guido van Rossum4acc25b2000-02-02 15:10:15 +0000474 while len(s) < MAXBINSIZE:
475 ns = input.read(MAXBINSIZE-len(s))
Barry Warsaw4c904d12004-01-04 01:12:26 +0000476 if not ns:
477 break
478 s += ns
Guido van Rossum4acc25b2000-02-02 15:10:15 +0000479 line = binascii.b2a_base64(s)
480 output.write(line)
Guido van Rossumf1945461995-06-14 23:43:44 +0000481
Barry Warsaw4c904d12004-01-04 01:12:26 +0000482
Guido van Rossumf1945461995-06-14 23:43:44 +0000483def decode(input, output):
Guido van Rossum54a40cb2007-08-27 22:27:41 +0000484 """Decode a file; input and output are binary files."""
Barry Warsaw4c904d12004-01-04 01:12:26 +0000485 while True:
Guido van Rossum4acc25b2000-02-02 15:10:15 +0000486 line = input.readline()
Barry Warsaw4c904d12004-01-04 01:12:26 +0000487 if not line:
488 break
Guido van Rossum4acc25b2000-02-02 15:10:15 +0000489 s = binascii.a2b_base64(line)
490 output.write(s)
Guido van Rossumf1945461995-06-14 23:43:44 +0000491
Nick Coghlanfdf239a2013-10-03 00:43:22 +1000492def _input_type_check(s):
493 try:
494 m = memoryview(s)
495 except TypeError as err:
496 msg = "expected bytes-like object, not %s" % s.__class__.__name__
497 raise TypeError(msg) from err
498 if m.format not in ('c', 'b', 'B'):
499 msg = ("expected single byte elements, not %r from %s" %
500 (m.format, s.__class__.__name__))
501 raise TypeError(msg)
502 if m.ndim != 1:
503 msg = ("expected 1-D data, not %d-D data from %s" %
504 (m.ndim, s.__class__.__name__))
505 raise TypeError(msg)
506
Barry Warsaw4c904d12004-01-04 01:12:26 +0000507
Georg Brandlb54d8012009-06-04 09:11:51 +0000508def encodebytes(s):
509 """Encode a bytestring into a bytestring containing multiple lines
510 of base-64 data."""
Nick Coghlanfdf239a2013-10-03 00:43:22 +1000511 _input_type_check(s)
Peter Schneider-Kampfbb2b4c2001-06-07 18:56:13 +0000512 pieces = []
513 for i in range(0, len(s), MAXBINSIZE):
514 chunk = s[i : i + MAXBINSIZE]
515 pieces.append(binascii.b2a_base64(chunk))
Guido van Rossum4581ae52007-05-22 21:56:47 +0000516 return b"".join(pieces)
Guido van Rossumf1945461995-06-14 23:43:44 +0000517
Georg Brandlb54d8012009-06-04 09:11:51 +0000518def encodestring(s):
519 """Legacy alias of encodebytes()."""
520 import warnings
521 warnings.warn("encodestring() is a deprecated alias, use encodebytes()",
522 DeprecationWarning, 2)
523 return encodebytes(s)
Barry Warsaw4c904d12004-01-04 01:12:26 +0000524
Guido van Rossum54a40cb2007-08-27 22:27:41 +0000525
Georg Brandlb54d8012009-06-04 09:11:51 +0000526def decodebytes(s):
527 """Decode a bytestring of base-64 data into a bytestring."""
Nick Coghlanfdf239a2013-10-03 00:43:22 +1000528 _input_type_check(s)
Peter Schneider-Kampfbb2b4c2001-06-07 18:56:13 +0000529 return binascii.a2b_base64(s)
Guido van Rossumf1945461995-06-14 23:43:44 +0000530
Georg Brandlb54d8012009-06-04 09:11:51 +0000531def decodestring(s):
532 """Legacy alias of decodebytes()."""
533 import warnings
534 warnings.warn("decodestring() is a deprecated alias, use decodebytes()",
535 DeprecationWarning, 2)
536 return decodebytes(s)
Barry Warsaw4c904d12004-01-04 01:12:26 +0000537
Antoine Pitroufd036452008-08-19 17:56:33 +0000538
Guido van Rossum4581ae52007-05-22 21:56:47 +0000539# Usable as a script...
540def main():
541 """Small main program"""
Guido van Rossum4acc25b2000-02-02 15:10:15 +0000542 import sys, getopt
543 try:
544 opts, args = getopt.getopt(sys.argv[1:], 'deut')
Guido van Rossumb940e112007-01-10 16:19:56 +0000545 except getopt.error as msg:
Guido van Rossum4acc25b2000-02-02 15:10:15 +0000546 sys.stdout = sys.stderr
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000547 print(msg)
548 print("""usage: %s [-d|-e|-u|-t] [file|-]
Guido van Rossum4acc25b2000-02-02 15:10:15 +0000549 -d, -u: decode
550 -e: encode (default)
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000551 -t: encode and decode string 'Aladdin:open sesame'"""%sys.argv[0])
Guido van Rossum4acc25b2000-02-02 15:10:15 +0000552 sys.exit(2)
553 func = encode
554 for o, a in opts:
555 if o == '-e': func = encode
556 if o == '-d': func = decode
557 if o == '-u': func = decode
Guido van Rossum4581ae52007-05-22 21:56:47 +0000558 if o == '-t': test(); return
Guido van Rossum4acc25b2000-02-02 15:10:15 +0000559 if args and args[0] != '-':
Antoine Pitroub86680e2010-10-14 21:15:17 +0000560 with open(args[0], 'rb') as f:
561 func(f, sys.stdout.buffer)
Guido van Rossum4acc25b2000-02-02 15:10:15 +0000562 else:
Victor Stinner479736b2010-05-25 21:12:34 +0000563 func(sys.stdin.buffer, sys.stdout.buffer)
Guido van Rossumf1945461995-06-14 23:43:44 +0000564
Barry Warsaw4c904d12004-01-04 01:12:26 +0000565
Guido van Rossum4581ae52007-05-22 21:56:47 +0000566def test():
567 s0 = b"Aladdin:open sesame"
568 print(repr(s0))
Georg Brandl706824f2009-06-04 09:42:55 +0000569 s1 = encodebytes(s0)
Guido van Rossum4581ae52007-05-22 21:56:47 +0000570 print(repr(s1))
Georg Brandl706824f2009-06-04 09:42:55 +0000571 s2 = decodebytes(s1)
Guido van Rossum4581ae52007-05-22 21:56:47 +0000572 print(repr(s2))
573 assert s0 == s2
Guido van Rossumf1945461995-06-14 23:43:44 +0000574
Barry Warsaw4c904d12004-01-04 01:12:26 +0000575
Guido van Rossumf1945461995-06-14 23:43:44 +0000576if __name__ == '__main__':
Guido van Rossum4581ae52007-05-22 21:56:47 +0000577 main()