blob: 640f787c73165c2d405f1ad457a2260836107d20 [file] [log] [blame]
Benjamin Peterson90f5ba52010-03-11 22:53:45 +00001#! /usr/bin/env python3
Guido van Rossumaa925a51997-04-02 05:47:39 +00002
Antoine Pitrou6dd0d462013-11-17 23:52:25 +01003"""Base16, Base32, Base64 (RFC 3548), Base85 and Ascii85 data encodings"""
Guido van Rossum4acc25b2000-02-02 15:10:15 +00004
Barry Warsaw4c904d12004-01-04 01:12:26 +00005# Modified 04-Oct-1995 by Jack Jansen to use binascii module
6# Modified 30-Dec-2003 by Barry Warsaw to add full RFC 3548 support
Guido van Rossum4581ae52007-05-22 21:56:47 +00007# Modified 22-May-2007 by Guido van Rossum to use bytes everywhere
Jack Jansen951213e1995-10-04 16:39:20 +00008
Barry Warsaw4c904d12004-01-04 01:12:26 +00009import re
10import struct
Jack Jansen951213e1995-10-04 16:39:20 +000011import binascii
12
Barry Warsaw4c904d12004-01-04 01:12:26 +000013
14__all__ = [
15 # Legacy interface exports traditional RFC 1521 Base64 encodings
Georg Brandlb54d8012009-06-04 09:11:51 +000016 'encode', 'decode', 'encodebytes', 'decodebytes',
Barry Warsaw4c904d12004-01-04 01:12:26 +000017 # Generalized interface for other encodings
18 'b64encode', 'b64decode', 'b32encode', 'b32decode',
19 'b16encode', 'b16decode',
Antoine Pitrou6dd0d462013-11-17 23:52:25 +010020 # Base85 and Ascii85 encodings
21 'b85encode', 'b85decode', 'a85encode', 'a85decode',
Barry Warsaw4c904d12004-01-04 01:12:26 +000022 # Standard Base64 encoding
23 'standard_b64encode', 'standard_b64decode',
24 # Some common Base64 alternatives. As referenced by RFC 3458, see thread
25 # starting at:
26 #
27 # http://zgp.org/pipermail/p2p-hackers/2001-September/000316.html
Barry Warsaw4c904d12004-01-04 01:12:26 +000028 'urlsafe_b64encode', 'urlsafe_b64decode',
29 ]
30
Barry Warsaw4c904d12004-01-04 01:12:26 +000031
Guido van Rossum254348e2007-11-21 19:29:53 +000032bytes_types = (bytes, bytearray) # Types acceptable as binary data
Guido van Rossum98297ee2007-11-06 21:34:58 +000033
Antoine Pitrouea6b4d52012-02-20 19:30:23 +010034def _bytes_from_decode_data(s):
35 if isinstance(s, str):
36 try:
37 return s.encode('ascii')
38 except UnicodeEncodeError:
39 raise ValueError('string argument should contain only ASCII characters')
Nick Coghlanfdf239a2013-10-03 00:43:22 +100040 if isinstance(s, bytes_types):
Antoine Pitrouea6b4d52012-02-20 19:30:23 +010041 return s
Nick Coghlanfdf239a2013-10-03 00:43:22 +100042 try:
43 return memoryview(s).tobytes()
44 except TypeError:
45 raise TypeError("argument should be a bytes-like object or ASCII "
46 "string, not %r" % s.__class__.__name__) from None
Barry Warsaw4c904d12004-01-04 01:12:26 +000047
Antoine Pitroufd036452008-08-19 17:56:33 +000048
Barry Warsaw4c904d12004-01-04 01:12:26 +000049# Base64 encoding/decoding uses binascii
50
51def b64encode(s, altchars=None):
Guido van Rossum4581ae52007-05-22 21:56:47 +000052 """Encode a byte string using Base64.
Barry Warsaw4c904d12004-01-04 01:12:26 +000053
Guido van Rossum4581ae52007-05-22 21:56:47 +000054 s is the byte string to encode. Optional altchars must be a byte
55 string of length 2 which specifies an alternative alphabet for the
56 '+' and '/' characters. This allows an application to
57 e.g. generate url or filesystem safe Base64 strings.
Barry Warsaw4c904d12004-01-04 01:12:26 +000058
Guido van Rossum4581ae52007-05-22 21:56:47 +000059 The encoded byte string is returned.
Barry Warsaw4c904d12004-01-04 01:12:26 +000060 """
61 # Strip off the trailing newline
62 encoded = binascii.b2a_base64(s)[:-1]
63 if altchars is not None:
Guido van Rossum4581ae52007-05-22 21:56:47 +000064 assert len(altchars) == 2, repr(altchars)
Guido van Rossum95c1c482012-06-22 15:16:09 -070065 return encoded.translate(bytes.maketrans(b'+/', altchars))
Barry Warsaw4c904d12004-01-04 01:12:26 +000066 return encoded
67
68
R. David Murray64951362010-11-11 20:09:20 +000069def b64decode(s, altchars=None, validate=False):
Guido van Rossum4581ae52007-05-22 21:56:47 +000070 """Decode a Base64 encoded byte string.
Barry Warsaw4c904d12004-01-04 01:12:26 +000071
Guido van Rossum4581ae52007-05-22 21:56:47 +000072 s is the byte string to decode. Optional altchars must be a
73 string of length 2 which specifies the alternative alphabet used
74 instead of the '+' and '/' characters.
Barry Warsaw4c904d12004-01-04 01:12:26 +000075
R. David Murray64951362010-11-11 20:09:20 +000076 The decoded string is returned. A binascii.Error is raised if s is
77 incorrectly padded.
78
79 If validate is False (the default), non-base64-alphabet characters are
80 discarded prior to the padding check. If validate is True,
81 non-base64-alphabet characters in the input result in a binascii.Error.
Barry Warsaw4c904d12004-01-04 01:12:26 +000082 """
Antoine Pitrouea6b4d52012-02-20 19:30:23 +010083 s = _bytes_from_decode_data(s)
Barry Warsaw4c904d12004-01-04 01:12:26 +000084 if altchars is not None:
Antoine Pitrouea6b4d52012-02-20 19:30:23 +010085 altchars = _bytes_from_decode_data(altchars)
Guido van Rossum4581ae52007-05-22 21:56:47 +000086 assert len(altchars) == 2, repr(altchars)
Guido van Rossum95c1c482012-06-22 15:16:09 -070087 s = s.translate(bytes.maketrans(altchars, b'+/'))
R. David Murray64951362010-11-11 20:09:20 +000088 if validate and not re.match(b'^[A-Za-z0-9+/]*={0,2}$', s):
89 raise binascii.Error('Non-base64 digit found')
Guido van Rossum4581ae52007-05-22 21:56:47 +000090 return binascii.a2b_base64(s)
Barry Warsaw4c904d12004-01-04 01:12:26 +000091
92
93def standard_b64encode(s):
Guido van Rossum4581ae52007-05-22 21:56:47 +000094 """Encode a byte string using the standard Base64 alphabet.
Barry Warsaw4c904d12004-01-04 01:12:26 +000095
Guido van Rossum4581ae52007-05-22 21:56:47 +000096 s is the byte string to encode. The encoded byte string is returned.
Barry Warsaw4c904d12004-01-04 01:12:26 +000097 """
98 return b64encode(s)
99
100def standard_b64decode(s):
Guido van Rossum4581ae52007-05-22 21:56:47 +0000101 """Decode a byte string encoded with the standard Base64 alphabet.
Barry Warsaw4c904d12004-01-04 01:12:26 +0000102
Guido van Rossum4581ae52007-05-22 21:56:47 +0000103 s is the byte string to decode. The decoded byte string is
104 returned. binascii.Error is raised if the input is incorrectly
105 padded or if there are non-alphabet characters present in the
106 input.
Barry Warsaw4c904d12004-01-04 01:12:26 +0000107 """
108 return b64decode(s)
109
Guido van Rossum95c1c482012-06-22 15:16:09 -0700110
111_urlsafe_encode_translation = bytes.maketrans(b'+/', b'-_')
112_urlsafe_decode_translation = bytes.maketrans(b'-_', b'+/')
113
Barry Warsaw4c904d12004-01-04 01:12:26 +0000114def urlsafe_b64encode(s):
Guido van Rossum4581ae52007-05-22 21:56:47 +0000115 """Encode a byte string using a url-safe Base64 alphabet.
Barry Warsaw4c904d12004-01-04 01:12:26 +0000116
Guido van Rossum4581ae52007-05-22 21:56:47 +0000117 s is the byte string to encode. The encoded byte string is
118 returned. The alphabet uses '-' instead of '+' and '_' instead of
119 '/'.
Barry Warsaw4c904d12004-01-04 01:12:26 +0000120 """
Guido van Rossum95c1c482012-06-22 15:16:09 -0700121 return b64encode(s).translate(_urlsafe_encode_translation)
Barry Warsaw4c904d12004-01-04 01:12:26 +0000122
123def urlsafe_b64decode(s):
Guido van Rossum4581ae52007-05-22 21:56:47 +0000124 """Decode a byte string encoded with the standard Base64 alphabet.
Barry Warsaw4c904d12004-01-04 01:12:26 +0000125
Guido van Rossum4581ae52007-05-22 21:56:47 +0000126 s is the byte string to decode. The decoded byte string is
127 returned. binascii.Error is raised if the input is incorrectly
128 padded or if there are non-alphabet characters present in the
129 input.
Barry Warsaw4c904d12004-01-04 01:12:26 +0000130
131 The alphabet uses '-' instead of '+' and '_' instead of '/'.
132 """
Guido van Rossum95c1c482012-06-22 15:16:09 -0700133 s = _bytes_from_decode_data(s)
134 s = s.translate(_urlsafe_decode_translation)
135 return b64decode(s)
Barry Warsaw4c904d12004-01-04 01:12:26 +0000136
137
Antoine Pitroufd036452008-08-19 17:56:33 +0000138
Barry Warsaw4c904d12004-01-04 01:12:26 +0000139# Base32 encoding/decoding must be done in Python
Serhiy Storchaka87aa7dc2013-05-19 11:49:32 +0300140_b32alphabet = b'ABCDEFGHIJKLMNOPQRSTUVWXYZ234567'
Victor Stinnerd6a91a72014-03-17 22:38:41 +0100141_b32tab2 = None
142_b32rev = None
Barry Warsaw4c904d12004-01-04 01:12:26 +0000143
144def b32encode(s):
Guido van Rossum4581ae52007-05-22 21:56:47 +0000145 """Encode a byte string using Base32.
Barry Warsaw4c904d12004-01-04 01:12:26 +0000146
Guido van Rossum4581ae52007-05-22 21:56:47 +0000147 s is the byte string to encode. The encoded byte string is returned.
Barry Warsaw4c904d12004-01-04 01:12:26 +0000148 """
Victor Stinnerd6a91a72014-03-17 22:38:41 +0100149 global _b32tab2
150 # Delay the initialization of the table to not waste memory
151 # if the function is never called
152 if _b32tab2 is None:
153 b32tab = [bytes((i,)) for i in _b32alphabet]
154 _b32tab2 = [a + b for a in b32tab for b in b32tab]
155 b32tab = None
156
Guido van Rossum254348e2007-11-21 19:29:53 +0000157 if not isinstance(s, bytes_types):
Nick Coghlanfdf239a2013-10-03 00:43:22 +1000158 s = memoryview(s).tobytes()
Serhiy Storchaka87aa7dc2013-05-19 11:49:32 +0300159 leftover = len(s) % 5
Barry Warsaw4c904d12004-01-04 01:12:26 +0000160 # Pad the last quantum with zero bits if necessary
161 if leftover:
Guido van Rossum4581ae52007-05-22 21:56:47 +0000162 s = s + bytes(5 - leftover) # Don't use += !
Serhiy Storchaka2c3f2f12013-05-19 11:41:15 +0300163 encoded = bytearray()
Serhiy Storchaka87aa7dc2013-05-19 11:49:32 +0300164 from_bytes = int.from_bytes
165 b32tab2 = _b32tab2
166 for i in range(0, len(s), 5):
167 c = from_bytes(s[i: i + 5], 'big')
168 encoded += (b32tab2[c >> 30] + # bits 1 - 10
169 b32tab2[(c >> 20) & 0x3ff] + # bits 11 - 20
170 b32tab2[(c >> 10) & 0x3ff] + # bits 21 - 30
171 b32tab2[c & 0x3ff] # bits 31 - 40
172 )
Barry Warsaw4c904d12004-01-04 01:12:26 +0000173 # Adjust for any leftover partial quanta
174 if leftover == 1:
Serhiy Storchaka2c3f2f12013-05-19 11:41:15 +0300175 encoded[-6:] = b'======'
Barry Warsaw4c904d12004-01-04 01:12:26 +0000176 elif leftover == 2:
Serhiy Storchaka2c3f2f12013-05-19 11:41:15 +0300177 encoded[-4:] = b'===='
Barry Warsaw4c904d12004-01-04 01:12:26 +0000178 elif leftover == 3:
Serhiy Storchaka2c3f2f12013-05-19 11:41:15 +0300179 encoded[-3:] = b'==='
Barry Warsaw4c904d12004-01-04 01:12:26 +0000180 elif leftover == 4:
Serhiy Storchaka2c3f2f12013-05-19 11:41:15 +0300181 encoded[-1:] = b'='
182 return bytes(encoded)
Barry Warsaw4c904d12004-01-04 01:12:26 +0000183
184def b32decode(s, casefold=False, map01=None):
Guido van Rossum4581ae52007-05-22 21:56:47 +0000185 """Decode a Base32 encoded byte string.
Barry Warsaw4c904d12004-01-04 01:12:26 +0000186
Guido van Rossum4581ae52007-05-22 21:56:47 +0000187 s is the byte string to decode. Optional casefold is a flag
188 specifying whether a lowercase alphabet is acceptable as input.
189 For security purposes, the default is False.
Barry Warsaw4c904d12004-01-04 01:12:26 +0000190
Guido van Rossum4581ae52007-05-22 21:56:47 +0000191 RFC 3548 allows for optional mapping of the digit 0 (zero) to the
192 letter O (oh), and for optional mapping of the digit 1 (one) to
193 either the letter I (eye) or letter L (el). The optional argument
194 map01 when not None, specifies which letter the digit 1 should be
195 mapped to (when map01 is not None, the digit 0 is always mapped to
196 the letter O). For security purposes the default is None, so that
197 0 and 1 are not allowed in the input.
Barry Warsaw4c904d12004-01-04 01:12:26 +0000198
Guido van Rossum4581ae52007-05-22 21:56:47 +0000199 The decoded byte string is returned. binascii.Error is raised if
200 the input is incorrectly padded or if there are non-alphabet
201 characters present in the input.
Barry Warsaw4c904d12004-01-04 01:12:26 +0000202 """
Victor Stinnerd6a91a72014-03-17 22:38:41 +0100203 global _b32rev
204 # Delay the initialization of the table to not waste memory
205 # if the function is never called
206 if _b32rev is None:
207 _b32rev = {v: k for k, v in enumerate(_b32alphabet)}
Antoine Pitrouea6b4d52012-02-20 19:30:23 +0100208 s = _bytes_from_decode_data(s)
Serhiy Storchaka87aa7dc2013-05-19 11:49:32 +0300209 if len(s) % 8:
Guido van Rossum4581ae52007-05-22 21:56:47 +0000210 raise binascii.Error('Incorrect padding')
Barry Warsaw4c904d12004-01-04 01:12:26 +0000211 # Handle section 2.4 zero and one mapping. The flag map01 will be either
212 # False, or the character to map the digit 1 (one) to. It should be
213 # either L (el) or I (eye).
Alexandre Vassalotti5209857f2008-05-03 04:39:38 +0000214 if map01 is not None:
Antoine Pitrouea6b4d52012-02-20 19:30:23 +0100215 map01 = _bytes_from_decode_data(map01)
Guido van Rossum4581ae52007-05-22 21:56:47 +0000216 assert len(map01) == 1, repr(map01)
Guido van Rossum95c1c482012-06-22 15:16:09 -0700217 s = s.translate(bytes.maketrans(b'01', b'O' + map01))
Barry Warsaw4c904d12004-01-04 01:12:26 +0000218 if casefold:
Guido van Rossum98297ee2007-11-06 21:34:58 +0000219 s = s.upper()
Barry Warsaw4c904d12004-01-04 01:12:26 +0000220 # Strip off pad characters from the right. We need to count the pad
221 # characters because this will tell us how many null bytes to remove from
222 # the end of the decoded string.
Serhiy Storchaka87aa7dc2013-05-19 11:49:32 +0300223 l = len(s)
224 s = s.rstrip(b'=')
225 padchars = l - len(s)
Barry Warsaw4c904d12004-01-04 01:12:26 +0000226 # Now decode the full quanta
Serhiy Storchaka87aa7dc2013-05-19 11:49:32 +0300227 decoded = bytearray()
228 b32rev = _b32rev
229 for i in range(0, len(s), 8):
230 quanta = s[i: i + 8]
231 acc = 0
232 try:
233 for c in quanta:
234 acc = (acc << 5) + b32rev[c]
235 except KeyError:
Serhiy Storchaka5cc9d322013-05-28 15:42:34 +0300236 raise binascii.Error('Non-base32 digit found') from None
Serhiy Storchaka87aa7dc2013-05-19 11:49:32 +0300237 decoded += acc.to_bytes(5, 'big')
Barry Warsaw4c904d12004-01-04 01:12:26 +0000238 # Process the last, partial quanta
Serhiy Storchaka87aa7dc2013-05-19 11:49:32 +0300239 if padchars:
240 acc <<= 5 * padchars
241 last = acc.to_bytes(5, 'big')
242 if padchars == 1:
243 decoded[-5:] = last[:-1]
244 elif padchars == 3:
245 decoded[-5:] = last[:-2]
246 elif padchars == 4:
247 decoded[-5:] = last[:-3]
248 elif padchars == 6:
249 decoded[-5:] = last[:-4]
250 else:
251 raise binascii.Error('Incorrect padding')
252 return bytes(decoded)
Barry Warsaw4c904d12004-01-04 01:12:26 +0000253
254
Antoine Pitroufd036452008-08-19 17:56:33 +0000255
Barry Warsaw4c904d12004-01-04 01:12:26 +0000256# RFC 3548, Base 16 Alphabet specifies uppercase, but hexlify() returns
257# lowercase. The RFC also recommends against accepting input case
258# insensitively.
259def b16encode(s):
Guido van Rossum4581ae52007-05-22 21:56:47 +0000260 """Encode a byte string using Base16.
Barry Warsaw4c904d12004-01-04 01:12:26 +0000261
Guido van Rossum4581ae52007-05-22 21:56:47 +0000262 s is the byte string to encode. The encoded byte string is returned.
Barry Warsaw4c904d12004-01-04 01:12:26 +0000263 """
Guido van Rossum98297ee2007-11-06 21:34:58 +0000264 return binascii.hexlify(s).upper()
Barry Warsaw4c904d12004-01-04 01:12:26 +0000265
266
267def b16decode(s, casefold=False):
Guido van Rossum4581ae52007-05-22 21:56:47 +0000268 """Decode a Base16 encoded byte string.
Barry Warsaw4c904d12004-01-04 01:12:26 +0000269
Guido van Rossum4581ae52007-05-22 21:56:47 +0000270 s is the byte string to decode. Optional casefold is a flag
271 specifying whether a lowercase alphabet is acceptable as input.
272 For security purposes, the default is False.
Barry Warsaw4c904d12004-01-04 01:12:26 +0000273
Guido van Rossum4581ae52007-05-22 21:56:47 +0000274 The decoded byte string is returned. binascii.Error is raised if
275 s were incorrectly padded or if there are non-alphabet characters
276 present in the string.
Barry Warsaw4c904d12004-01-04 01:12:26 +0000277 """
Antoine Pitrouea6b4d52012-02-20 19:30:23 +0100278 s = _bytes_from_decode_data(s)
Barry Warsaw4c904d12004-01-04 01:12:26 +0000279 if casefold:
Guido van Rossum98297ee2007-11-06 21:34:58 +0000280 s = s.upper()
Antoine Pitroufd036452008-08-19 17:56:33 +0000281 if re.search(b'[^0-9A-F]', s):
Guido van Rossum4581ae52007-05-22 21:56:47 +0000282 raise binascii.Error('Non-base16 digit found')
Barry Warsaw4c904d12004-01-04 01:12:26 +0000283 return binascii.unhexlify(s)
284
Antoine Pitrou6dd0d462013-11-17 23:52:25 +0100285#
286# Ascii85 encoding/decoding
287#
Barry Warsaw4c904d12004-01-04 01:12:26 +0000288
Victor Stinnerd6a91a72014-03-17 22:38:41 +0100289_a85chars = None
290_a85chars2 = None
291_A85START = b"<~"
292_A85END = b"~>"
293
Antoine Pitrou6dd0d462013-11-17 23:52:25 +0100294def _85encode(b, chars, chars2, pad=False, foldnuls=False, foldspaces=False):
295 # Helper function for a85encode and b85encode
296 if not isinstance(b, bytes_types):
297 b = memoryview(b).tobytes()
298
299 padding = (-len(b)) % 4
300 if padding:
301 b = b + b'\0' * padding
302 words = struct.Struct('!%dI' % (len(b) // 4)).unpack(b)
303
Antoine Pitrou6dd0d462013-11-17 23:52:25 +0100304 chunks = [b'z' if foldnuls and not word else
305 b'y' if foldspaces and word == 0x20202020 else
306 (chars2[word // 614125] +
307 chars2[word // 85 % 7225] +
308 chars[word % 85])
309 for word in words]
310
311 if padding and not pad:
312 if chunks[-1] == b'z':
313 chunks[-1] = chars[0] * 5
314 chunks[-1] = chunks[-1][:-padding]
315
316 return b''.join(chunks)
317
Antoine Pitrou6dd0d462013-11-17 23:52:25 +0100318def a85encode(b, *, foldspaces=False, wrapcol=0, pad=False, adobe=False):
319 """Encode a byte string using Ascii85.
320
321 b is the byte string to encode. The encoded byte string is returned.
322
323 foldspaces is an optional flag that uses the special short sequence 'y'
324 instead of 4 consecutive spaces (ASCII 0x20) as supported by 'btoa'. This
325 feature is not supported by the "standard" Adobe encoding.
326
Serhiy Storchaka9f8a8912015-04-03 18:12:41 +0300327 wrapcol controls whether the output should have newline ('\\n') characters
Antoine Pitrou6dd0d462013-11-17 23:52:25 +0100328 added to it. If this is non-zero, each output line will be at most this
329 many characters long.
330
331 pad controls whether the input string is padded to a multiple of 4 before
332 encoding. Note that the btoa implementation always pads.
333
334 adobe controls whether the encoded byte sequence is framed with <~ and ~>,
335 which is used by the Adobe implementation.
336 """
Victor Stinnerd6a91a72014-03-17 22:38:41 +0100337 global _a85chars, _a85chars2
338 # Delay the initialization of tables to not waste memory
339 # if the function is never called
340 if _a85chars is None:
341 _a85chars = [bytes((i,)) for i in range(33, 118)]
342 _a85chars2 = [(a + b) for a in _a85chars for b in _a85chars]
343
Antoine Pitrou6dd0d462013-11-17 23:52:25 +0100344 result = _85encode(b, _a85chars, _a85chars2, pad, True, foldspaces)
345
346 if adobe:
347 result = _A85START + result
348 if wrapcol:
349 wrapcol = max(2 if adobe else 1, wrapcol)
350 chunks = [result[i: i + wrapcol]
351 for i in range(0, len(result), wrapcol)]
352 if adobe:
353 if len(chunks[-1]) + 2 > wrapcol:
354 chunks.append(b'')
355 result = b'\n'.join(chunks)
356 if adobe:
357 result += _A85END
358
359 return result
360
361def a85decode(b, *, foldspaces=False, adobe=False, ignorechars=b' \t\n\r\v'):
362 """Decode an Ascii85 encoded byte string.
363
364 s is the byte string to decode.
365
366 foldspaces is a flag that specifies whether the 'y' short sequence should be
367 accepted as shorthand for 4 consecutive spaces (ASCII 0x20). This feature is
368 not supported by the "standard" Adobe encoding.
369
370 adobe controls whether the input sequence is in Adobe Ascii85 format (i.e.
371 is framed with <~ and ~>).
372
373 ignorechars should be a byte string containing characters to ignore from the
374 input. This should only contain whitespace characters, and by default
375 contains all whitespace characters in ASCII.
376 """
377 b = _bytes_from_decode_data(b)
378 if adobe:
379 if not (b.startswith(_A85START) and b.endswith(_A85END)):
380 raise ValueError("Ascii85 encoded byte sequences must be bracketed "
Serhiy Storchakaa26b3f12014-02-06 22:52:23 +0200381 "by {!r} and {!r}".format(_A85START, _A85END))
Antoine Pitrou6dd0d462013-11-17 23:52:25 +0100382 b = b[2:-2] # Strip off start/end markers
383 #
384 # We have to go through this stepwise, so as to ignore spaces and handle
385 # special short sequences
386 #
387 packI = struct.Struct('!I').pack
388 decoded = []
389 decoded_append = decoded.append
390 curr = []
391 curr_append = curr.append
392 curr_clear = curr.clear
393 for x in b + b'u' * 4:
394 if b'!'[0] <= x <= b'u'[0]:
395 curr_append(x)
396 if len(curr) == 5:
397 acc = 0
398 for x in curr:
399 acc = 85 * acc + (x - 33)
400 try:
401 decoded_append(packI(acc))
402 except struct.error:
403 raise ValueError('Ascii85 overflow') from None
404 curr_clear()
405 elif x == b'z'[0]:
406 if curr:
407 raise ValueError('z inside Ascii85 5-tuple')
408 decoded_append(b'\0\0\0\0')
409 elif foldspaces and x == b'y'[0]:
410 if curr:
411 raise ValueError('y inside Ascii85 5-tuple')
412 decoded_append(b'\x20\x20\x20\x20')
413 elif x in ignorechars:
414 # Skip whitespace
415 continue
416 else:
417 raise ValueError('Non-Ascii85 digit found: %c' % x)
418
419 result = b''.join(decoded)
420 padding = 4 - len(curr)
421 if padding:
422 # Throw away the extra padding
423 result = result[:-padding]
424 return result
425
426# The following code is originally taken (with permission) from Mercurial
427
Victor Stinnerd6a91a72014-03-17 22:38:41 +0100428_b85alphabet = (b"0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"
429 b"abcdefghijklmnopqrstuvwxyz!#$%&()*+-;<=>?@^_`{|}~")
430_b85chars = None
431_b85chars2 = None
Antoine Pitrou6dd0d462013-11-17 23:52:25 +0100432_b85dec = None
433
434def b85encode(b, pad=False):
435 """Encode an ASCII-encoded byte array in base85 format.
436
Serhiy Storchaka9f8a8912015-04-03 18:12:41 +0300437 If pad is true, the input is padded with "\\0" so its length is a multiple of
Antoine Pitrou6dd0d462013-11-17 23:52:25 +0100438 4 characters before encoding.
439 """
Victor Stinnerd6a91a72014-03-17 22:38:41 +0100440 global _b85chars, _b85chars2
441 # Delay the initialization of tables to not waste memory
442 # if the function is never called
443 if _b85chars is None:
444 _b85chars = [bytes((i,)) for i in _b85alphabet]
445 _b85chars2 = [(a + b) for a in _b85chars for b in _b85chars]
Antoine Pitrou6dd0d462013-11-17 23:52:25 +0100446 return _85encode(b, _b85chars, _b85chars2, pad)
447
448def b85decode(b):
449 """Decode base85-encoded byte array"""
Antoine Pitrou6dd0d462013-11-17 23:52:25 +0100450 global _b85dec
Victor Stinnerd6a91a72014-03-17 22:38:41 +0100451 # Delay the initialization of tables to not waste memory
452 # if the function is never called
Antoine Pitrou6dd0d462013-11-17 23:52:25 +0100453 if _b85dec is None:
454 _b85dec = [None] * 256
Victor Stinnerd6a91a72014-03-17 22:38:41 +0100455 for i, c in enumerate(_b85alphabet):
456 _b85dec[c] = i
Antoine Pitrou6dd0d462013-11-17 23:52:25 +0100457
Victor Stinnerd6a91a72014-03-17 22:38:41 +0100458 b = _bytes_from_decode_data(b)
Antoine Pitrou6dd0d462013-11-17 23:52:25 +0100459 padding = (-len(b)) % 5
460 b = b + b'~' * padding
461 out = []
462 packI = struct.Struct('!I').pack
463 for i in range(0, len(b), 5):
464 chunk = b[i:i + 5]
465 acc = 0
466 try:
467 for c in chunk:
468 acc = acc * 85 + _b85dec[c]
469 except TypeError:
470 for j, c in enumerate(chunk):
471 if _b85dec[c] is None:
472 raise ValueError('bad base85 character at position %d'
473 % (i + j)) from None
474 raise
475 try:
476 out.append(packI(acc))
477 except struct.error:
478 raise ValueError('base85 overflow in hunk starting at byte %d'
479 % i) from None
480
481 result = b''.join(out)
482 if padding:
483 result = result[:-padding]
484 return result
Antoine Pitroufd036452008-08-19 17:56:33 +0000485
Barry Warsaw4c904d12004-01-04 01:12:26 +0000486# Legacy interface. This code could be cleaned up since I don't believe
487# binascii has any line length limitations. It just doesn't seem worth it
Guido van Rossum4581ae52007-05-22 21:56:47 +0000488# though. The files should be opened in binary mode.
Skip Montanaroe99d5ea2001-01-20 19:54:20 +0000489
Guido van Rossumf1945461995-06-14 23:43:44 +0000490MAXLINESIZE = 76 # Excluding the CRLF
Guido van Rossum54e54c62001-09-04 19:14:14 +0000491MAXBINSIZE = (MAXLINESIZE//4)*3
Guido van Rossumf1945461995-06-14 23:43:44 +0000492
Guido van Rossumf1945461995-06-14 23:43:44 +0000493def encode(input, output):
Guido van Rossum54a40cb2007-08-27 22:27:41 +0000494 """Encode a file; input and output are binary files."""
Barry Warsaw4c904d12004-01-04 01:12:26 +0000495 while True:
Guido van Rossum4acc25b2000-02-02 15:10:15 +0000496 s = input.read(MAXBINSIZE)
Barry Warsaw4c904d12004-01-04 01:12:26 +0000497 if not s:
498 break
Guido van Rossum4acc25b2000-02-02 15:10:15 +0000499 while len(s) < MAXBINSIZE:
500 ns = input.read(MAXBINSIZE-len(s))
Barry Warsaw4c904d12004-01-04 01:12:26 +0000501 if not ns:
502 break
503 s += ns
Guido van Rossum4acc25b2000-02-02 15:10:15 +0000504 line = binascii.b2a_base64(s)
505 output.write(line)
Guido van Rossumf1945461995-06-14 23:43:44 +0000506
Barry Warsaw4c904d12004-01-04 01:12:26 +0000507
Guido van Rossumf1945461995-06-14 23:43:44 +0000508def decode(input, output):
Guido van Rossum54a40cb2007-08-27 22:27:41 +0000509 """Decode a file; input and output are binary files."""
Barry Warsaw4c904d12004-01-04 01:12:26 +0000510 while True:
Guido van Rossum4acc25b2000-02-02 15:10:15 +0000511 line = input.readline()
Barry Warsaw4c904d12004-01-04 01:12:26 +0000512 if not line:
513 break
Guido van Rossum4acc25b2000-02-02 15:10:15 +0000514 s = binascii.a2b_base64(line)
515 output.write(s)
Guido van Rossumf1945461995-06-14 23:43:44 +0000516
Nick Coghlanfdf239a2013-10-03 00:43:22 +1000517def _input_type_check(s):
518 try:
519 m = memoryview(s)
520 except TypeError as err:
521 msg = "expected bytes-like object, not %s" % s.__class__.__name__
522 raise TypeError(msg) from err
523 if m.format not in ('c', 'b', 'B'):
524 msg = ("expected single byte elements, not %r from %s" %
525 (m.format, s.__class__.__name__))
526 raise TypeError(msg)
527 if m.ndim != 1:
528 msg = ("expected 1-D data, not %d-D data from %s" %
529 (m.ndim, s.__class__.__name__))
530 raise TypeError(msg)
531
Barry Warsaw4c904d12004-01-04 01:12:26 +0000532
Georg Brandlb54d8012009-06-04 09:11:51 +0000533def encodebytes(s):
534 """Encode a bytestring into a bytestring containing multiple lines
535 of base-64 data."""
Nick Coghlanfdf239a2013-10-03 00:43:22 +1000536 _input_type_check(s)
Peter Schneider-Kampfbb2b4c2001-06-07 18:56:13 +0000537 pieces = []
538 for i in range(0, len(s), MAXBINSIZE):
539 chunk = s[i : i + MAXBINSIZE]
540 pieces.append(binascii.b2a_base64(chunk))
Guido van Rossum4581ae52007-05-22 21:56:47 +0000541 return b"".join(pieces)
Guido van Rossumf1945461995-06-14 23:43:44 +0000542
Georg Brandlb54d8012009-06-04 09:11:51 +0000543def encodestring(s):
544 """Legacy alias of encodebytes()."""
545 import warnings
546 warnings.warn("encodestring() is a deprecated alias, use encodebytes()",
547 DeprecationWarning, 2)
548 return encodebytes(s)
Barry Warsaw4c904d12004-01-04 01:12:26 +0000549
Guido van Rossum54a40cb2007-08-27 22:27:41 +0000550
Georg Brandlb54d8012009-06-04 09:11:51 +0000551def decodebytes(s):
552 """Decode a bytestring of base-64 data into a bytestring."""
Nick Coghlanfdf239a2013-10-03 00:43:22 +1000553 _input_type_check(s)
Peter Schneider-Kampfbb2b4c2001-06-07 18:56:13 +0000554 return binascii.a2b_base64(s)
Guido van Rossumf1945461995-06-14 23:43:44 +0000555
Georg Brandlb54d8012009-06-04 09:11:51 +0000556def decodestring(s):
557 """Legacy alias of decodebytes()."""
558 import warnings
559 warnings.warn("decodestring() is a deprecated alias, use decodebytes()",
560 DeprecationWarning, 2)
561 return decodebytes(s)
Barry Warsaw4c904d12004-01-04 01:12:26 +0000562
Antoine Pitroufd036452008-08-19 17:56:33 +0000563
Guido van Rossum4581ae52007-05-22 21:56:47 +0000564# Usable as a script...
565def main():
566 """Small main program"""
Guido van Rossum4acc25b2000-02-02 15:10:15 +0000567 import sys, getopt
568 try:
569 opts, args = getopt.getopt(sys.argv[1:], 'deut')
Guido van Rossumb940e112007-01-10 16:19:56 +0000570 except getopt.error as msg:
Guido van Rossum4acc25b2000-02-02 15:10:15 +0000571 sys.stdout = sys.stderr
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000572 print(msg)
573 print("""usage: %s [-d|-e|-u|-t] [file|-]
Guido van Rossum4acc25b2000-02-02 15:10:15 +0000574 -d, -u: decode
575 -e: encode (default)
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000576 -t: encode and decode string 'Aladdin:open sesame'"""%sys.argv[0])
Guido van Rossum4acc25b2000-02-02 15:10:15 +0000577 sys.exit(2)
578 func = encode
579 for o, a in opts:
580 if o == '-e': func = encode
581 if o == '-d': func = decode
582 if o == '-u': func = decode
Guido van Rossum4581ae52007-05-22 21:56:47 +0000583 if o == '-t': test(); return
Guido van Rossum4acc25b2000-02-02 15:10:15 +0000584 if args and args[0] != '-':
Antoine Pitroub86680e2010-10-14 21:15:17 +0000585 with open(args[0], 'rb') as f:
586 func(f, sys.stdout.buffer)
Guido van Rossum4acc25b2000-02-02 15:10:15 +0000587 else:
Victor Stinner479736b2010-05-25 21:12:34 +0000588 func(sys.stdin.buffer, sys.stdout.buffer)
Guido van Rossumf1945461995-06-14 23:43:44 +0000589
Barry Warsaw4c904d12004-01-04 01:12:26 +0000590
Guido van Rossum4581ae52007-05-22 21:56:47 +0000591def test():
592 s0 = b"Aladdin:open sesame"
593 print(repr(s0))
Georg Brandl706824f2009-06-04 09:42:55 +0000594 s1 = encodebytes(s0)
Guido van Rossum4581ae52007-05-22 21:56:47 +0000595 print(repr(s1))
Georg Brandl706824f2009-06-04 09:42:55 +0000596 s2 = decodebytes(s1)
Guido van Rossum4581ae52007-05-22 21:56:47 +0000597 print(repr(s2))
598 assert s0 == s2
Guido van Rossumf1945461995-06-14 23:43:44 +0000599
Barry Warsaw4c904d12004-01-04 01:12:26 +0000600
Guido van Rossumf1945461995-06-14 23:43:44 +0000601if __name__ == '__main__':
Guido van Rossum4581ae52007-05-22 21:56:47 +0000602 main()