blob: adaec1de61bf423f332c8c5df714ee06ded596b3 [file] [log] [blame]
Benjamin Peterson90f5ba52010-03-11 22:53:45 +00001#! /usr/bin/env python3
Guido van Rossumaa925a51997-04-02 05:47:39 +00002
Antoine Pitrou6dd0d462013-11-17 23:52:25 +01003"""Base16, Base32, Base64 (RFC 3548), Base85 and Ascii85 data encodings"""
Guido van Rossum4acc25b2000-02-02 15:10:15 +00004
Barry Warsaw4c904d12004-01-04 01:12:26 +00005# Modified 04-Oct-1995 by Jack Jansen to use binascii module
6# Modified 30-Dec-2003 by Barry Warsaw to add full RFC 3548 support
Guido van Rossum4581ae52007-05-22 21:56:47 +00007# Modified 22-May-2007 by Guido van Rossum to use bytes everywhere
Jack Jansen951213e1995-10-04 16:39:20 +00008
Barry Warsaw4c904d12004-01-04 01:12:26 +00009import re
10import struct
Jack Jansen951213e1995-10-04 16:39:20 +000011import binascii
12
Barry Warsaw4c904d12004-01-04 01:12:26 +000013
14__all__ = [
Martin Panteree3074e2016-02-23 22:30:50 +000015 # Legacy interface exports traditional RFC 2045 Base64 encodings
Georg Brandlb54d8012009-06-04 09:11:51 +000016 'encode', 'decode', 'encodebytes', 'decodebytes',
Barry Warsaw4c904d12004-01-04 01:12:26 +000017 # Generalized interface for other encodings
18 'b64encode', 'b64decode', 'b32encode', 'b32decode',
19 'b16encode', 'b16decode',
Antoine Pitrou6dd0d462013-11-17 23:52:25 +010020 # Base85 and Ascii85 encodings
21 'b85encode', 'b85decode', 'a85encode', 'a85decode',
Barry Warsaw4c904d12004-01-04 01:12:26 +000022 # Standard Base64 encoding
23 'standard_b64encode', 'standard_b64decode',
24 # Some common Base64 alternatives. As referenced by RFC 3458, see thread
25 # starting at:
26 #
27 # http://zgp.org/pipermail/p2p-hackers/2001-September/000316.html
Barry Warsaw4c904d12004-01-04 01:12:26 +000028 'urlsafe_b64encode', 'urlsafe_b64decode',
29 ]
30
Barry Warsaw4c904d12004-01-04 01:12:26 +000031
Guido van Rossum254348e2007-11-21 19:29:53 +000032bytes_types = (bytes, bytearray) # Types acceptable as binary data
Guido van Rossum98297ee2007-11-06 21:34:58 +000033
Antoine Pitrouea6b4d52012-02-20 19:30:23 +010034def _bytes_from_decode_data(s):
35 if isinstance(s, str):
36 try:
37 return s.encode('ascii')
38 except UnicodeEncodeError:
39 raise ValueError('string argument should contain only ASCII characters')
Nick Coghlanfdf239a2013-10-03 00:43:22 +100040 if isinstance(s, bytes_types):
Antoine Pitrouea6b4d52012-02-20 19:30:23 +010041 return s
Nick Coghlanfdf239a2013-10-03 00:43:22 +100042 try:
43 return memoryview(s).tobytes()
44 except TypeError:
45 raise TypeError("argument should be a bytes-like object or ASCII "
46 "string, not %r" % s.__class__.__name__) from None
Barry Warsaw4c904d12004-01-04 01:12:26 +000047
Antoine Pitroufd036452008-08-19 17:56:33 +000048
Barry Warsaw4c904d12004-01-04 01:12:26 +000049# Base64 encoding/decoding uses binascii
50
51def b64encode(s, altchars=None):
Martin Panteree3074e2016-02-23 22:30:50 +000052 """Encode the bytes-like object s using Base64 and return a bytes object.
Barry Warsaw4c904d12004-01-04 01:12:26 +000053
Martin Panteree3074e2016-02-23 22:30:50 +000054 Optional altchars should be a byte string of length 2 which specifies an
55 alternative alphabet for the '+' and '/' characters. This allows an
56 application to e.g. generate url or filesystem safe Base64 strings.
Barry Warsaw4c904d12004-01-04 01:12:26 +000057 """
58 # Strip off the trailing newline
59 encoded = binascii.b2a_base64(s)[:-1]
60 if altchars is not None:
Guido van Rossum4581ae52007-05-22 21:56:47 +000061 assert len(altchars) == 2, repr(altchars)
Guido van Rossum95c1c482012-06-22 15:16:09 -070062 return encoded.translate(bytes.maketrans(b'+/', altchars))
Barry Warsaw4c904d12004-01-04 01:12:26 +000063 return encoded
64
65
R. David Murray64951362010-11-11 20:09:20 +000066def b64decode(s, altchars=None, validate=False):
Martin Panteree3074e2016-02-23 22:30:50 +000067 """Decode the Base64 encoded bytes-like object or ASCII string s.
Barry Warsaw4c904d12004-01-04 01:12:26 +000068
Martin Panteree3074e2016-02-23 22:30:50 +000069 Optional altchars must be a bytes-like object or ASCII string of length 2
70 which specifies the alternative alphabet used instead of the '+' and '/'
71 characters.
Barry Warsaw4c904d12004-01-04 01:12:26 +000072
Martin Panteree3074e2016-02-23 22:30:50 +000073 The result is returned as a bytes object. A binascii.Error is raised if
74 s is incorrectly padded.
R. David Murray64951362010-11-11 20:09:20 +000075
Martin Panteree3074e2016-02-23 22:30:50 +000076 If validate is False (the default), characters that are neither in the
77 normal base-64 alphabet nor the alternative alphabet are discarded prior
78 to the padding check. If validate is True, these non-alphabet characters
79 in the input result in a binascii.Error.
Barry Warsaw4c904d12004-01-04 01:12:26 +000080 """
Antoine Pitrouea6b4d52012-02-20 19:30:23 +010081 s = _bytes_from_decode_data(s)
Barry Warsaw4c904d12004-01-04 01:12:26 +000082 if altchars is not None:
Antoine Pitrouea6b4d52012-02-20 19:30:23 +010083 altchars = _bytes_from_decode_data(altchars)
Guido van Rossum4581ae52007-05-22 21:56:47 +000084 assert len(altchars) == 2, repr(altchars)
Guido van Rossum95c1c482012-06-22 15:16:09 -070085 s = s.translate(bytes.maketrans(altchars, b'+/'))
R. David Murray64951362010-11-11 20:09:20 +000086 if validate and not re.match(b'^[A-Za-z0-9+/]*={0,2}$', s):
87 raise binascii.Error('Non-base64 digit found')
Guido van Rossum4581ae52007-05-22 21:56:47 +000088 return binascii.a2b_base64(s)
Barry Warsaw4c904d12004-01-04 01:12:26 +000089
90
91def standard_b64encode(s):
Martin Panteree3074e2016-02-23 22:30:50 +000092 """Encode bytes-like object s using the standard Base64 alphabet.
Barry Warsaw4c904d12004-01-04 01:12:26 +000093
Martin Panteree3074e2016-02-23 22:30:50 +000094 The result is returned as a bytes object.
Barry Warsaw4c904d12004-01-04 01:12:26 +000095 """
96 return b64encode(s)
97
98def standard_b64decode(s):
Martin Panteree3074e2016-02-23 22:30:50 +000099 """Decode bytes encoded with the standard Base64 alphabet.
Barry Warsaw4c904d12004-01-04 01:12:26 +0000100
Martin Panteree3074e2016-02-23 22:30:50 +0000101 Argument s is a bytes-like object or ASCII string to decode. The result
102 is returned as a bytes object. A binascii.Error is raised if the input
103 is incorrectly padded. Characters that are not in the standard alphabet
104 are discarded prior to the padding check.
Barry Warsaw4c904d12004-01-04 01:12:26 +0000105 """
106 return b64decode(s)
107
Guido van Rossum95c1c482012-06-22 15:16:09 -0700108
109_urlsafe_encode_translation = bytes.maketrans(b'+/', b'-_')
110_urlsafe_decode_translation = bytes.maketrans(b'-_', b'+/')
111
Barry Warsaw4c904d12004-01-04 01:12:26 +0000112def urlsafe_b64encode(s):
Martin Panteree3074e2016-02-23 22:30:50 +0000113 """Encode bytes using the URL- and filesystem-safe Base64 alphabet.
Barry Warsaw4c904d12004-01-04 01:12:26 +0000114
Martin Panteree3074e2016-02-23 22:30:50 +0000115 Argument s is a bytes-like object to encode. The result is returned as a
116 bytes object. The alphabet uses '-' instead of '+' and '_' instead of
Guido van Rossum4581ae52007-05-22 21:56:47 +0000117 '/'.
Barry Warsaw4c904d12004-01-04 01:12:26 +0000118 """
Guido van Rossum95c1c482012-06-22 15:16:09 -0700119 return b64encode(s).translate(_urlsafe_encode_translation)
Barry Warsaw4c904d12004-01-04 01:12:26 +0000120
121def urlsafe_b64decode(s):
Martin Panteree3074e2016-02-23 22:30:50 +0000122 """Decode bytes using the URL- and filesystem-safe Base64 alphabet.
Barry Warsaw4c904d12004-01-04 01:12:26 +0000123
Martin Panteree3074e2016-02-23 22:30:50 +0000124 Argument s is a bytes-like object or ASCII string to decode. The result
125 is returned as a bytes object. A binascii.Error is raised if the input
126 is incorrectly padded. Characters that are not in the URL-safe base-64
127 alphabet, and are not a plus '+' or slash '/', are discarded prior to the
128 padding check.
Barry Warsaw4c904d12004-01-04 01:12:26 +0000129
130 The alphabet uses '-' instead of '+' and '_' instead of '/'.
131 """
Guido van Rossum95c1c482012-06-22 15:16:09 -0700132 s = _bytes_from_decode_data(s)
133 s = s.translate(_urlsafe_decode_translation)
134 return b64decode(s)
Barry Warsaw4c904d12004-01-04 01:12:26 +0000135
136
Antoine Pitroufd036452008-08-19 17:56:33 +0000137
Barry Warsaw4c904d12004-01-04 01:12:26 +0000138# Base32 encoding/decoding must be done in Python
Serhiy Storchaka87aa7dc2013-05-19 11:49:32 +0300139_b32alphabet = b'ABCDEFGHIJKLMNOPQRSTUVWXYZ234567'
Victor Stinnerd6a91a72014-03-17 22:38:41 +0100140_b32tab2 = None
141_b32rev = None
Barry Warsaw4c904d12004-01-04 01:12:26 +0000142
143def b32encode(s):
Martin Panteree3074e2016-02-23 22:30:50 +0000144 """Encode the bytes-like object s using Base32 and return a bytes object.
Barry Warsaw4c904d12004-01-04 01:12:26 +0000145 """
Victor Stinnerd6a91a72014-03-17 22:38:41 +0100146 global _b32tab2
147 # Delay the initialization of the table to not waste memory
148 # if the function is never called
149 if _b32tab2 is None:
150 b32tab = [bytes((i,)) for i in _b32alphabet]
151 _b32tab2 = [a + b for a in b32tab for b in b32tab]
152 b32tab = None
153
Guido van Rossum254348e2007-11-21 19:29:53 +0000154 if not isinstance(s, bytes_types):
Nick Coghlanfdf239a2013-10-03 00:43:22 +1000155 s = memoryview(s).tobytes()
Serhiy Storchaka87aa7dc2013-05-19 11:49:32 +0300156 leftover = len(s) % 5
Barry Warsaw4c904d12004-01-04 01:12:26 +0000157 # Pad the last quantum with zero bits if necessary
158 if leftover:
Guido van Rossum4581ae52007-05-22 21:56:47 +0000159 s = s + bytes(5 - leftover) # Don't use += !
Serhiy Storchaka2c3f2f12013-05-19 11:41:15 +0300160 encoded = bytearray()
Serhiy Storchaka87aa7dc2013-05-19 11:49:32 +0300161 from_bytes = int.from_bytes
162 b32tab2 = _b32tab2
163 for i in range(0, len(s), 5):
164 c = from_bytes(s[i: i + 5], 'big')
165 encoded += (b32tab2[c >> 30] + # bits 1 - 10
166 b32tab2[(c >> 20) & 0x3ff] + # bits 11 - 20
167 b32tab2[(c >> 10) & 0x3ff] + # bits 21 - 30
168 b32tab2[c & 0x3ff] # bits 31 - 40
169 )
Barry Warsaw4c904d12004-01-04 01:12:26 +0000170 # Adjust for any leftover partial quanta
171 if leftover == 1:
Serhiy Storchaka2c3f2f12013-05-19 11:41:15 +0300172 encoded[-6:] = b'======'
Barry Warsaw4c904d12004-01-04 01:12:26 +0000173 elif leftover == 2:
Serhiy Storchaka2c3f2f12013-05-19 11:41:15 +0300174 encoded[-4:] = b'===='
Barry Warsaw4c904d12004-01-04 01:12:26 +0000175 elif leftover == 3:
Serhiy Storchaka2c3f2f12013-05-19 11:41:15 +0300176 encoded[-3:] = b'==='
Barry Warsaw4c904d12004-01-04 01:12:26 +0000177 elif leftover == 4:
Serhiy Storchaka2c3f2f12013-05-19 11:41:15 +0300178 encoded[-1:] = b'='
179 return bytes(encoded)
Barry Warsaw4c904d12004-01-04 01:12:26 +0000180
181def b32decode(s, casefold=False, map01=None):
Martin Panteree3074e2016-02-23 22:30:50 +0000182 """Decode the Base32 encoded bytes-like object or ASCII string s.
Barry Warsaw4c904d12004-01-04 01:12:26 +0000183
Martin Panteree3074e2016-02-23 22:30:50 +0000184 Optional casefold is a flag specifying whether a lowercase alphabet is
185 acceptable as input. For security purposes, the default is False.
Barry Warsaw4c904d12004-01-04 01:12:26 +0000186
Guido van Rossum4581ae52007-05-22 21:56:47 +0000187 RFC 3548 allows for optional mapping of the digit 0 (zero) to the
188 letter O (oh), and for optional mapping of the digit 1 (one) to
189 either the letter I (eye) or letter L (el). The optional argument
190 map01 when not None, specifies which letter the digit 1 should be
191 mapped to (when map01 is not None, the digit 0 is always mapped to
192 the letter O). For security purposes the default is None, so that
193 0 and 1 are not allowed in the input.
Barry Warsaw4c904d12004-01-04 01:12:26 +0000194
Martin Panteree3074e2016-02-23 22:30:50 +0000195 The result is returned as a bytes object. A binascii.Error is raised if
Guido van Rossum4581ae52007-05-22 21:56:47 +0000196 the input is incorrectly padded or if there are non-alphabet
197 characters present in the input.
Barry Warsaw4c904d12004-01-04 01:12:26 +0000198 """
Victor Stinnerd6a91a72014-03-17 22:38:41 +0100199 global _b32rev
200 # Delay the initialization of the table to not waste memory
201 # if the function is never called
202 if _b32rev is None:
203 _b32rev = {v: k for k, v in enumerate(_b32alphabet)}
Antoine Pitrouea6b4d52012-02-20 19:30:23 +0100204 s = _bytes_from_decode_data(s)
Serhiy Storchaka87aa7dc2013-05-19 11:49:32 +0300205 if len(s) % 8:
Guido van Rossum4581ae52007-05-22 21:56:47 +0000206 raise binascii.Error('Incorrect padding')
Barry Warsaw4c904d12004-01-04 01:12:26 +0000207 # Handle section 2.4 zero and one mapping. The flag map01 will be either
208 # False, or the character to map the digit 1 (one) to. It should be
209 # either L (el) or I (eye).
Alexandre Vassalotti5209857f2008-05-03 04:39:38 +0000210 if map01 is not None:
Antoine Pitrouea6b4d52012-02-20 19:30:23 +0100211 map01 = _bytes_from_decode_data(map01)
Guido van Rossum4581ae52007-05-22 21:56:47 +0000212 assert len(map01) == 1, repr(map01)
Guido van Rossum95c1c482012-06-22 15:16:09 -0700213 s = s.translate(bytes.maketrans(b'01', b'O' + map01))
Barry Warsaw4c904d12004-01-04 01:12:26 +0000214 if casefold:
Guido van Rossum98297ee2007-11-06 21:34:58 +0000215 s = s.upper()
Barry Warsaw4c904d12004-01-04 01:12:26 +0000216 # Strip off pad characters from the right. We need to count the pad
217 # characters because this will tell us how many null bytes to remove from
218 # the end of the decoded string.
Serhiy Storchaka87aa7dc2013-05-19 11:49:32 +0300219 l = len(s)
220 s = s.rstrip(b'=')
221 padchars = l - len(s)
Barry Warsaw4c904d12004-01-04 01:12:26 +0000222 # Now decode the full quanta
Serhiy Storchaka87aa7dc2013-05-19 11:49:32 +0300223 decoded = bytearray()
224 b32rev = _b32rev
225 for i in range(0, len(s), 8):
226 quanta = s[i: i + 8]
227 acc = 0
228 try:
229 for c in quanta:
230 acc = (acc << 5) + b32rev[c]
231 except KeyError:
Serhiy Storchaka5cc9d322013-05-28 15:42:34 +0300232 raise binascii.Error('Non-base32 digit found') from None
Serhiy Storchaka87aa7dc2013-05-19 11:49:32 +0300233 decoded += acc.to_bytes(5, 'big')
Barry Warsaw4c904d12004-01-04 01:12:26 +0000234 # Process the last, partial quanta
Serhiy Storchaka87aa7dc2013-05-19 11:49:32 +0300235 if padchars:
236 acc <<= 5 * padchars
237 last = acc.to_bytes(5, 'big')
238 if padchars == 1:
239 decoded[-5:] = last[:-1]
240 elif padchars == 3:
241 decoded[-5:] = last[:-2]
242 elif padchars == 4:
243 decoded[-5:] = last[:-3]
244 elif padchars == 6:
245 decoded[-5:] = last[:-4]
246 else:
247 raise binascii.Error('Incorrect padding')
248 return bytes(decoded)
Barry Warsaw4c904d12004-01-04 01:12:26 +0000249
250
Antoine Pitroufd036452008-08-19 17:56:33 +0000251
Barry Warsaw4c904d12004-01-04 01:12:26 +0000252# RFC 3548, Base 16 Alphabet specifies uppercase, but hexlify() returns
253# lowercase. The RFC also recommends against accepting input case
254# insensitively.
255def b16encode(s):
Martin Panteree3074e2016-02-23 22:30:50 +0000256 """Encode the bytes-like object s using Base16 and return a bytes object.
Barry Warsaw4c904d12004-01-04 01:12:26 +0000257 """
Guido van Rossum98297ee2007-11-06 21:34:58 +0000258 return binascii.hexlify(s).upper()
Barry Warsaw4c904d12004-01-04 01:12:26 +0000259
260
261def b16decode(s, casefold=False):
Martin Panteree3074e2016-02-23 22:30:50 +0000262 """Decode the Base16 encoded bytes-like object or ASCII string s.
Barry Warsaw4c904d12004-01-04 01:12:26 +0000263
Martin Panteree3074e2016-02-23 22:30:50 +0000264 Optional casefold is a flag specifying whether a lowercase alphabet is
265 acceptable as input. For security purposes, the default is False.
Barry Warsaw4c904d12004-01-04 01:12:26 +0000266
Martin Panteree3074e2016-02-23 22:30:50 +0000267 The result is returned as a bytes object. A binascii.Error is raised if
268 s is incorrectly padded or if there are non-alphabet characters present
269 in the input.
Barry Warsaw4c904d12004-01-04 01:12:26 +0000270 """
Antoine Pitrouea6b4d52012-02-20 19:30:23 +0100271 s = _bytes_from_decode_data(s)
Barry Warsaw4c904d12004-01-04 01:12:26 +0000272 if casefold:
Guido van Rossum98297ee2007-11-06 21:34:58 +0000273 s = s.upper()
Antoine Pitroufd036452008-08-19 17:56:33 +0000274 if re.search(b'[^0-9A-F]', s):
Guido van Rossum4581ae52007-05-22 21:56:47 +0000275 raise binascii.Error('Non-base16 digit found')
Barry Warsaw4c904d12004-01-04 01:12:26 +0000276 return binascii.unhexlify(s)
277
Antoine Pitrou6dd0d462013-11-17 23:52:25 +0100278#
279# Ascii85 encoding/decoding
280#
Barry Warsaw4c904d12004-01-04 01:12:26 +0000281
Victor Stinnerd6a91a72014-03-17 22:38:41 +0100282_a85chars = None
283_a85chars2 = None
284_A85START = b"<~"
285_A85END = b"~>"
286
Antoine Pitrou6dd0d462013-11-17 23:52:25 +0100287def _85encode(b, chars, chars2, pad=False, foldnuls=False, foldspaces=False):
288 # Helper function for a85encode and b85encode
289 if not isinstance(b, bytes_types):
290 b = memoryview(b).tobytes()
291
292 padding = (-len(b)) % 4
293 if padding:
294 b = b + b'\0' * padding
295 words = struct.Struct('!%dI' % (len(b) // 4)).unpack(b)
296
Antoine Pitrou6dd0d462013-11-17 23:52:25 +0100297 chunks = [b'z' if foldnuls and not word else
298 b'y' if foldspaces and word == 0x20202020 else
299 (chars2[word // 614125] +
300 chars2[word // 85 % 7225] +
301 chars[word % 85])
302 for word in words]
303
304 if padding and not pad:
305 if chunks[-1] == b'z':
306 chunks[-1] = chars[0] * 5
307 chunks[-1] = chunks[-1][:-padding]
308
309 return b''.join(chunks)
310
Antoine Pitrou6dd0d462013-11-17 23:52:25 +0100311def a85encode(b, *, foldspaces=False, wrapcol=0, pad=False, adobe=False):
Martin Panteree3074e2016-02-23 22:30:50 +0000312 """Encode bytes-like object b using Ascii85 and return a bytes object.
Antoine Pitrou6dd0d462013-11-17 23:52:25 +0100313
314 foldspaces is an optional flag that uses the special short sequence 'y'
315 instead of 4 consecutive spaces (ASCII 0x20) as supported by 'btoa'. This
316 feature is not supported by the "standard" Adobe encoding.
317
Martin Panteree3074e2016-02-23 22:30:50 +0000318 wrapcol controls whether the output should have newline (b'\\n') characters
Antoine Pitrou6dd0d462013-11-17 23:52:25 +0100319 added to it. If this is non-zero, each output line will be at most this
320 many characters long.
321
Martin Panteree3074e2016-02-23 22:30:50 +0000322 pad controls whether the input is padded to a multiple of 4 before
Antoine Pitrou6dd0d462013-11-17 23:52:25 +0100323 encoding. Note that the btoa implementation always pads.
324
325 adobe controls whether the encoded byte sequence is framed with <~ and ~>,
326 which is used by the Adobe implementation.
327 """
Victor Stinnerd6a91a72014-03-17 22:38:41 +0100328 global _a85chars, _a85chars2
329 # Delay the initialization of tables to not waste memory
330 # if the function is never called
331 if _a85chars is None:
332 _a85chars = [bytes((i,)) for i in range(33, 118)]
333 _a85chars2 = [(a + b) for a in _a85chars for b in _a85chars]
334
Antoine Pitrou6dd0d462013-11-17 23:52:25 +0100335 result = _85encode(b, _a85chars, _a85chars2, pad, True, foldspaces)
336
337 if adobe:
338 result = _A85START + result
339 if wrapcol:
340 wrapcol = max(2 if adobe else 1, wrapcol)
341 chunks = [result[i: i + wrapcol]
342 for i in range(0, len(result), wrapcol)]
343 if adobe:
344 if len(chunks[-1]) + 2 > wrapcol:
345 chunks.append(b'')
346 result = b'\n'.join(chunks)
347 if adobe:
348 result += _A85END
349
350 return result
351
352def a85decode(b, *, foldspaces=False, adobe=False, ignorechars=b' \t\n\r\v'):
Martin Panteree3074e2016-02-23 22:30:50 +0000353 """Decode the Ascii85 encoded bytes-like object or ASCII string b.
Antoine Pitrou6dd0d462013-11-17 23:52:25 +0100354
355 foldspaces is a flag that specifies whether the 'y' short sequence should be
356 accepted as shorthand for 4 consecutive spaces (ASCII 0x20). This feature is
357 not supported by the "standard" Adobe encoding.
358
359 adobe controls whether the input sequence is in Adobe Ascii85 format (i.e.
360 is framed with <~ and ~>).
361
362 ignorechars should be a byte string containing characters to ignore from the
363 input. This should only contain whitespace characters, and by default
364 contains all whitespace characters in ASCII.
Martin Panteree3074e2016-02-23 22:30:50 +0000365
366 The result is returned as a bytes object.
Antoine Pitrou6dd0d462013-11-17 23:52:25 +0100367 """
368 b = _bytes_from_decode_data(b)
369 if adobe:
Serhiy Storchaka205e75b2016-02-24 12:05:50 +0200370 if not b.endswith(_A85END):
371 raise ValueError(
372 "Ascii85 encoded byte sequences must end "
373 "with {!r}".format(_A85END)
374 )
375 if b.startswith(_A85START):
376 b = b[2:-2] # Strip off start/end markers
377 else:
378 b = b[:-2]
Antoine Pitrou6dd0d462013-11-17 23:52:25 +0100379 #
380 # We have to go through this stepwise, so as to ignore spaces and handle
381 # special short sequences
382 #
383 packI = struct.Struct('!I').pack
384 decoded = []
385 decoded_append = decoded.append
386 curr = []
387 curr_append = curr.append
388 curr_clear = curr.clear
389 for x in b + b'u' * 4:
390 if b'!'[0] <= x <= b'u'[0]:
391 curr_append(x)
392 if len(curr) == 5:
393 acc = 0
394 for x in curr:
395 acc = 85 * acc + (x - 33)
396 try:
397 decoded_append(packI(acc))
398 except struct.error:
399 raise ValueError('Ascii85 overflow') from None
400 curr_clear()
401 elif x == b'z'[0]:
402 if curr:
403 raise ValueError('z inside Ascii85 5-tuple')
404 decoded_append(b'\0\0\0\0')
405 elif foldspaces and x == b'y'[0]:
406 if curr:
407 raise ValueError('y inside Ascii85 5-tuple')
408 decoded_append(b'\x20\x20\x20\x20')
409 elif x in ignorechars:
410 # Skip whitespace
411 continue
412 else:
413 raise ValueError('Non-Ascii85 digit found: %c' % x)
414
415 result = b''.join(decoded)
416 padding = 4 - len(curr)
417 if padding:
418 # Throw away the extra padding
419 result = result[:-padding]
420 return result
421
422# The following code is originally taken (with permission) from Mercurial
423
Victor Stinnerd6a91a72014-03-17 22:38:41 +0100424_b85alphabet = (b"0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"
425 b"abcdefghijklmnopqrstuvwxyz!#$%&()*+-;<=>?@^_`{|}~")
426_b85chars = None
427_b85chars2 = None
Antoine Pitrou6dd0d462013-11-17 23:52:25 +0100428_b85dec = None
429
430def b85encode(b, pad=False):
Martin Panteree3074e2016-02-23 22:30:50 +0000431 """Encode bytes-like object b in base85 format and return a bytes object.
Antoine Pitrou6dd0d462013-11-17 23:52:25 +0100432
Martin Panteree3074e2016-02-23 22:30:50 +0000433 If pad is true, the input is padded with b'\\0' so its length is a multiple of
434 4 bytes before encoding.
Antoine Pitrou6dd0d462013-11-17 23:52:25 +0100435 """
Victor Stinnerd6a91a72014-03-17 22:38:41 +0100436 global _b85chars, _b85chars2
437 # Delay the initialization of tables to not waste memory
438 # if the function is never called
439 if _b85chars is None:
440 _b85chars = [bytes((i,)) for i in _b85alphabet]
441 _b85chars2 = [(a + b) for a in _b85chars for b in _b85chars]
Antoine Pitrou6dd0d462013-11-17 23:52:25 +0100442 return _85encode(b, _b85chars, _b85chars2, pad)
443
444def b85decode(b):
Martin Panteree3074e2016-02-23 22:30:50 +0000445 """Decode the base85-encoded bytes-like object or ASCII string b
446
447 The result is returned as a bytes object.
448 """
Antoine Pitrou6dd0d462013-11-17 23:52:25 +0100449 global _b85dec
Victor Stinnerd6a91a72014-03-17 22:38:41 +0100450 # Delay the initialization of tables to not waste memory
451 # if the function is never called
Antoine Pitrou6dd0d462013-11-17 23:52:25 +0100452 if _b85dec is None:
453 _b85dec = [None] * 256
Victor Stinnerd6a91a72014-03-17 22:38:41 +0100454 for i, c in enumerate(_b85alphabet):
455 _b85dec[c] = i
Antoine Pitrou6dd0d462013-11-17 23:52:25 +0100456
Victor Stinnerd6a91a72014-03-17 22:38:41 +0100457 b = _bytes_from_decode_data(b)
Antoine Pitrou6dd0d462013-11-17 23:52:25 +0100458 padding = (-len(b)) % 5
459 b = b + b'~' * padding
460 out = []
461 packI = struct.Struct('!I').pack
462 for i in range(0, len(b), 5):
463 chunk = b[i:i + 5]
464 acc = 0
465 try:
466 for c in chunk:
467 acc = acc * 85 + _b85dec[c]
468 except TypeError:
469 for j, c in enumerate(chunk):
470 if _b85dec[c] is None:
471 raise ValueError('bad base85 character at position %d'
472 % (i + j)) from None
473 raise
474 try:
475 out.append(packI(acc))
476 except struct.error:
477 raise ValueError('base85 overflow in hunk starting at byte %d'
478 % i) from None
479
480 result = b''.join(out)
481 if padding:
482 result = result[:-padding]
483 return result
Antoine Pitroufd036452008-08-19 17:56:33 +0000484
Barry Warsaw4c904d12004-01-04 01:12:26 +0000485# Legacy interface. This code could be cleaned up since I don't believe
486# binascii has any line length limitations. It just doesn't seem worth it
Guido van Rossum4581ae52007-05-22 21:56:47 +0000487# though. The files should be opened in binary mode.
Skip Montanaroe99d5ea2001-01-20 19:54:20 +0000488
Guido van Rossumf1945461995-06-14 23:43:44 +0000489MAXLINESIZE = 76 # Excluding the CRLF
Guido van Rossum54e54c62001-09-04 19:14:14 +0000490MAXBINSIZE = (MAXLINESIZE//4)*3
Guido van Rossumf1945461995-06-14 23:43:44 +0000491
Guido van Rossumf1945461995-06-14 23:43:44 +0000492def encode(input, output):
Guido van Rossum54a40cb2007-08-27 22:27:41 +0000493 """Encode a file; input and output are binary files."""
Barry Warsaw4c904d12004-01-04 01:12:26 +0000494 while True:
Guido van Rossum4acc25b2000-02-02 15:10:15 +0000495 s = input.read(MAXBINSIZE)
Barry Warsaw4c904d12004-01-04 01:12:26 +0000496 if not s:
497 break
Guido van Rossum4acc25b2000-02-02 15:10:15 +0000498 while len(s) < MAXBINSIZE:
499 ns = input.read(MAXBINSIZE-len(s))
Barry Warsaw4c904d12004-01-04 01:12:26 +0000500 if not ns:
501 break
502 s += ns
Guido van Rossum4acc25b2000-02-02 15:10:15 +0000503 line = binascii.b2a_base64(s)
504 output.write(line)
Guido van Rossumf1945461995-06-14 23:43:44 +0000505
Barry Warsaw4c904d12004-01-04 01:12:26 +0000506
Guido van Rossumf1945461995-06-14 23:43:44 +0000507def decode(input, output):
Guido van Rossum54a40cb2007-08-27 22:27:41 +0000508 """Decode a file; input and output are binary files."""
Barry Warsaw4c904d12004-01-04 01:12:26 +0000509 while True:
Guido van Rossum4acc25b2000-02-02 15:10:15 +0000510 line = input.readline()
Barry Warsaw4c904d12004-01-04 01:12:26 +0000511 if not line:
512 break
Guido van Rossum4acc25b2000-02-02 15:10:15 +0000513 s = binascii.a2b_base64(line)
514 output.write(s)
Guido van Rossumf1945461995-06-14 23:43:44 +0000515
Nick Coghlanfdf239a2013-10-03 00:43:22 +1000516def _input_type_check(s):
517 try:
518 m = memoryview(s)
519 except TypeError as err:
520 msg = "expected bytes-like object, not %s" % s.__class__.__name__
521 raise TypeError(msg) from err
522 if m.format not in ('c', 'b', 'B'):
523 msg = ("expected single byte elements, not %r from %s" %
524 (m.format, s.__class__.__name__))
525 raise TypeError(msg)
526 if m.ndim != 1:
527 msg = ("expected 1-D data, not %d-D data from %s" %
528 (m.ndim, s.__class__.__name__))
529 raise TypeError(msg)
530
Barry Warsaw4c904d12004-01-04 01:12:26 +0000531
Georg Brandlb54d8012009-06-04 09:11:51 +0000532def encodebytes(s):
Martin Panteree3074e2016-02-23 22:30:50 +0000533 """Encode a bytestring into a bytes object containing multiple lines
Georg Brandlb54d8012009-06-04 09:11:51 +0000534 of base-64 data."""
Nick Coghlanfdf239a2013-10-03 00:43:22 +1000535 _input_type_check(s)
Peter Schneider-Kampfbb2b4c2001-06-07 18:56:13 +0000536 pieces = []
537 for i in range(0, len(s), MAXBINSIZE):
538 chunk = s[i : i + MAXBINSIZE]
539 pieces.append(binascii.b2a_base64(chunk))
Guido van Rossum4581ae52007-05-22 21:56:47 +0000540 return b"".join(pieces)
Guido van Rossumf1945461995-06-14 23:43:44 +0000541
Georg Brandlb54d8012009-06-04 09:11:51 +0000542def encodestring(s):
543 """Legacy alias of encodebytes()."""
544 import warnings
545 warnings.warn("encodestring() is a deprecated alias, use encodebytes()",
546 DeprecationWarning, 2)
547 return encodebytes(s)
Barry Warsaw4c904d12004-01-04 01:12:26 +0000548
Guido van Rossum54a40cb2007-08-27 22:27:41 +0000549
Georg Brandlb54d8012009-06-04 09:11:51 +0000550def decodebytes(s):
Martin Panteree3074e2016-02-23 22:30:50 +0000551 """Decode a bytestring of base-64 data into a bytes object."""
Nick Coghlanfdf239a2013-10-03 00:43:22 +1000552 _input_type_check(s)
Peter Schneider-Kampfbb2b4c2001-06-07 18:56:13 +0000553 return binascii.a2b_base64(s)
Guido van Rossumf1945461995-06-14 23:43:44 +0000554
Georg Brandlb54d8012009-06-04 09:11:51 +0000555def decodestring(s):
556 """Legacy alias of decodebytes()."""
557 import warnings
558 warnings.warn("decodestring() is a deprecated alias, use decodebytes()",
559 DeprecationWarning, 2)
560 return decodebytes(s)
Barry Warsaw4c904d12004-01-04 01:12:26 +0000561
Antoine Pitroufd036452008-08-19 17:56:33 +0000562
Guido van Rossum4581ae52007-05-22 21:56:47 +0000563# Usable as a script...
564def main():
565 """Small main program"""
Guido van Rossum4acc25b2000-02-02 15:10:15 +0000566 import sys, getopt
567 try:
568 opts, args = getopt.getopt(sys.argv[1:], 'deut')
Guido van Rossumb940e112007-01-10 16:19:56 +0000569 except getopt.error as msg:
Guido van Rossum4acc25b2000-02-02 15:10:15 +0000570 sys.stdout = sys.stderr
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000571 print(msg)
572 print("""usage: %s [-d|-e|-u|-t] [file|-]
Guido van Rossum4acc25b2000-02-02 15:10:15 +0000573 -d, -u: decode
574 -e: encode (default)
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000575 -t: encode and decode string 'Aladdin:open sesame'"""%sys.argv[0])
Guido van Rossum4acc25b2000-02-02 15:10:15 +0000576 sys.exit(2)
577 func = encode
578 for o, a in opts:
579 if o == '-e': func = encode
580 if o == '-d': func = decode
581 if o == '-u': func = decode
Guido van Rossum4581ae52007-05-22 21:56:47 +0000582 if o == '-t': test(); return
Guido van Rossum4acc25b2000-02-02 15:10:15 +0000583 if args and args[0] != '-':
Antoine Pitroub86680e2010-10-14 21:15:17 +0000584 with open(args[0], 'rb') as f:
585 func(f, sys.stdout.buffer)
Guido van Rossum4acc25b2000-02-02 15:10:15 +0000586 else:
Victor Stinner479736b2010-05-25 21:12:34 +0000587 func(sys.stdin.buffer, sys.stdout.buffer)
Guido van Rossumf1945461995-06-14 23:43:44 +0000588
Barry Warsaw4c904d12004-01-04 01:12:26 +0000589
Guido van Rossum4581ae52007-05-22 21:56:47 +0000590def test():
591 s0 = b"Aladdin:open sesame"
592 print(repr(s0))
Georg Brandl706824f2009-06-04 09:42:55 +0000593 s1 = encodebytes(s0)
Guido van Rossum4581ae52007-05-22 21:56:47 +0000594 print(repr(s1))
Georg Brandl706824f2009-06-04 09:42:55 +0000595 s2 = decodebytes(s1)
Guido van Rossum4581ae52007-05-22 21:56:47 +0000596 print(repr(s2))
597 assert s0 == s2
Guido van Rossumf1945461995-06-14 23:43:44 +0000598
Barry Warsaw4c904d12004-01-04 01:12:26 +0000599
Guido van Rossumf1945461995-06-14 23:43:44 +0000600if __name__ == '__main__':
Guido van Rossum4581ae52007-05-22 21:56:47 +0000601 main()