blob: eb8f258a2d1977408fc23a4cd3eccdb4937b0ba8 [file] [log] [blame]
Benjamin Peterson90f5ba52010-03-11 22:53:45 +00001#! /usr/bin/env python3
Guido van Rossumaa925a51997-04-02 05:47:39 +00002
Antoine Pitrou6dd0d462013-11-17 23:52:25 +01003"""Base16, Base32, Base64 (RFC 3548), Base85 and Ascii85 data encodings"""
Guido van Rossum4acc25b2000-02-02 15:10:15 +00004
Barry Warsaw4c904d12004-01-04 01:12:26 +00005# Modified 04-Oct-1995 by Jack Jansen to use binascii module
6# Modified 30-Dec-2003 by Barry Warsaw to add full RFC 3548 support
Guido van Rossum4581ae52007-05-22 21:56:47 +00007# Modified 22-May-2007 by Guido van Rossum to use bytes everywhere
Jack Jansen951213e1995-10-04 16:39:20 +00008
Barry Warsaw4c904d12004-01-04 01:12:26 +00009import re
10import struct
Jack Jansen951213e1995-10-04 16:39:20 +000011import binascii
12
Barry Warsaw4c904d12004-01-04 01:12:26 +000013
14__all__ = [
Martin Panteree3074e2016-02-23 22:30:50 +000015 # Legacy interface exports traditional RFC 2045 Base64 encodings
Georg Brandlb54d8012009-06-04 09:11:51 +000016 'encode', 'decode', 'encodebytes', 'decodebytes',
Barry Warsaw4c904d12004-01-04 01:12:26 +000017 # Generalized interface for other encodings
18 'b64encode', 'b64decode', 'b32encode', 'b32decode',
19 'b16encode', 'b16decode',
Antoine Pitrou6dd0d462013-11-17 23:52:25 +010020 # Base85 and Ascii85 encodings
21 'b85encode', 'b85decode', 'a85encode', 'a85decode',
Barry Warsaw4c904d12004-01-04 01:12:26 +000022 # Standard Base64 encoding
23 'standard_b64encode', 'standard_b64decode',
24 # Some common Base64 alternatives. As referenced by RFC 3458, see thread
25 # starting at:
26 #
27 # http://zgp.org/pipermail/p2p-hackers/2001-September/000316.html
Barry Warsaw4c904d12004-01-04 01:12:26 +000028 'urlsafe_b64encode', 'urlsafe_b64decode',
29 ]
30
Barry Warsaw4c904d12004-01-04 01:12:26 +000031
Guido van Rossum254348e2007-11-21 19:29:53 +000032bytes_types = (bytes, bytearray) # Types acceptable as binary data
Guido van Rossum98297ee2007-11-06 21:34:58 +000033
Antoine Pitrouea6b4d52012-02-20 19:30:23 +010034def _bytes_from_decode_data(s):
35 if isinstance(s, str):
36 try:
37 return s.encode('ascii')
38 except UnicodeEncodeError:
39 raise ValueError('string argument should contain only ASCII characters')
Nick Coghlanfdf239a2013-10-03 00:43:22 +100040 if isinstance(s, bytes_types):
Antoine Pitrouea6b4d52012-02-20 19:30:23 +010041 return s
Nick Coghlanfdf239a2013-10-03 00:43:22 +100042 try:
43 return memoryview(s).tobytes()
44 except TypeError:
45 raise TypeError("argument should be a bytes-like object or ASCII "
46 "string, not %r" % s.__class__.__name__) from None
Barry Warsaw4c904d12004-01-04 01:12:26 +000047
Antoine Pitroufd036452008-08-19 17:56:33 +000048
Barry Warsaw4c904d12004-01-04 01:12:26 +000049# Base64 encoding/decoding uses binascii
50
51def b64encode(s, altchars=None):
Martin Panteree3074e2016-02-23 22:30:50 +000052 """Encode the bytes-like object s using Base64 and return a bytes object.
Barry Warsaw4c904d12004-01-04 01:12:26 +000053
Martin Panteree3074e2016-02-23 22:30:50 +000054 Optional altchars should be a byte string of length 2 which specifies an
55 alternative alphabet for the '+' and '/' characters. This allows an
56 application to e.g. generate url or filesystem safe Base64 strings.
Barry Warsaw4c904d12004-01-04 01:12:26 +000057 """
Victor Stinnere84c9762015-10-11 11:01:02 +020058 encoded = binascii.b2a_base64(s, newline=False)
Barry Warsaw4c904d12004-01-04 01:12:26 +000059 if altchars is not None:
Guido van Rossum4581ae52007-05-22 21:56:47 +000060 assert len(altchars) == 2, repr(altchars)
Guido van Rossum95c1c482012-06-22 15:16:09 -070061 return encoded.translate(bytes.maketrans(b'+/', altchars))
Barry Warsaw4c904d12004-01-04 01:12:26 +000062 return encoded
63
64
R. David Murray64951362010-11-11 20:09:20 +000065def b64decode(s, altchars=None, validate=False):
Martin Panteree3074e2016-02-23 22:30:50 +000066 """Decode the Base64 encoded bytes-like object or ASCII string s.
Barry Warsaw4c904d12004-01-04 01:12:26 +000067
Martin Panteree3074e2016-02-23 22:30:50 +000068 Optional altchars must be a bytes-like object or ASCII string of length 2
69 which specifies the alternative alphabet used instead of the '+' and '/'
70 characters.
Barry Warsaw4c904d12004-01-04 01:12:26 +000071
Martin Panteree3074e2016-02-23 22:30:50 +000072 The result is returned as a bytes object. A binascii.Error is raised if
73 s is incorrectly padded.
R. David Murray64951362010-11-11 20:09:20 +000074
Martin Panteree3074e2016-02-23 22:30:50 +000075 If validate is False (the default), characters that are neither in the
76 normal base-64 alphabet nor the alternative alphabet are discarded prior
77 to the padding check. If validate is True, these non-alphabet characters
78 in the input result in a binascii.Error.
Barry Warsaw4c904d12004-01-04 01:12:26 +000079 """
Antoine Pitrouea6b4d52012-02-20 19:30:23 +010080 s = _bytes_from_decode_data(s)
Barry Warsaw4c904d12004-01-04 01:12:26 +000081 if altchars is not None:
Antoine Pitrouea6b4d52012-02-20 19:30:23 +010082 altchars = _bytes_from_decode_data(altchars)
Guido van Rossum4581ae52007-05-22 21:56:47 +000083 assert len(altchars) == 2, repr(altchars)
Guido van Rossum95c1c482012-06-22 15:16:09 -070084 s = s.translate(bytes.maketrans(altchars, b'+/'))
R. David Murray64951362010-11-11 20:09:20 +000085 if validate and not re.match(b'^[A-Za-z0-9+/]*={0,2}$', s):
86 raise binascii.Error('Non-base64 digit found')
Guido van Rossum4581ae52007-05-22 21:56:47 +000087 return binascii.a2b_base64(s)
Barry Warsaw4c904d12004-01-04 01:12:26 +000088
89
90def standard_b64encode(s):
Martin Panteree3074e2016-02-23 22:30:50 +000091 """Encode bytes-like object s using the standard Base64 alphabet.
Barry Warsaw4c904d12004-01-04 01:12:26 +000092
Martin Panteree3074e2016-02-23 22:30:50 +000093 The result is returned as a bytes object.
Barry Warsaw4c904d12004-01-04 01:12:26 +000094 """
95 return b64encode(s)
96
97def standard_b64decode(s):
Martin Panteree3074e2016-02-23 22:30:50 +000098 """Decode bytes encoded with the standard Base64 alphabet.
Barry Warsaw4c904d12004-01-04 01:12:26 +000099
Martin Panteree3074e2016-02-23 22:30:50 +0000100 Argument s is a bytes-like object or ASCII string to decode. The result
101 is returned as a bytes object. A binascii.Error is raised if the input
102 is incorrectly padded. Characters that are not in the standard alphabet
103 are discarded prior to the padding check.
Barry Warsaw4c904d12004-01-04 01:12:26 +0000104 """
105 return b64decode(s)
106
Guido van Rossum95c1c482012-06-22 15:16:09 -0700107
108_urlsafe_encode_translation = bytes.maketrans(b'+/', b'-_')
109_urlsafe_decode_translation = bytes.maketrans(b'-_', b'+/')
110
Barry Warsaw4c904d12004-01-04 01:12:26 +0000111def urlsafe_b64encode(s):
Martin Panteree3074e2016-02-23 22:30:50 +0000112 """Encode bytes using the URL- and filesystem-safe Base64 alphabet.
Barry Warsaw4c904d12004-01-04 01:12:26 +0000113
Martin Panteree3074e2016-02-23 22:30:50 +0000114 Argument s is a bytes-like object to encode. The result is returned as a
115 bytes object. The alphabet uses '-' instead of '+' and '_' instead of
Guido van Rossum4581ae52007-05-22 21:56:47 +0000116 '/'.
Barry Warsaw4c904d12004-01-04 01:12:26 +0000117 """
Guido van Rossum95c1c482012-06-22 15:16:09 -0700118 return b64encode(s).translate(_urlsafe_encode_translation)
Barry Warsaw4c904d12004-01-04 01:12:26 +0000119
120def urlsafe_b64decode(s):
Martin Panteree3074e2016-02-23 22:30:50 +0000121 """Decode bytes using the URL- and filesystem-safe Base64 alphabet.
Barry Warsaw4c904d12004-01-04 01:12:26 +0000122
Martin Panteree3074e2016-02-23 22:30:50 +0000123 Argument s is a bytes-like object or ASCII string to decode. The result
124 is returned as a bytes object. A binascii.Error is raised if the input
125 is incorrectly padded. Characters that are not in the URL-safe base-64
126 alphabet, and are not a plus '+' or slash '/', are discarded prior to the
127 padding check.
Barry Warsaw4c904d12004-01-04 01:12:26 +0000128
129 The alphabet uses '-' instead of '+' and '_' instead of '/'.
130 """
Guido van Rossum95c1c482012-06-22 15:16:09 -0700131 s = _bytes_from_decode_data(s)
132 s = s.translate(_urlsafe_decode_translation)
133 return b64decode(s)
Barry Warsaw4c904d12004-01-04 01:12:26 +0000134
135
Antoine Pitroufd036452008-08-19 17:56:33 +0000136
Barry Warsaw4c904d12004-01-04 01:12:26 +0000137# Base32 encoding/decoding must be done in Python
Serhiy Storchaka87aa7dc2013-05-19 11:49:32 +0300138_b32alphabet = b'ABCDEFGHIJKLMNOPQRSTUVWXYZ234567'
Victor Stinnerd6a91a72014-03-17 22:38:41 +0100139_b32tab2 = None
140_b32rev = None
Barry Warsaw4c904d12004-01-04 01:12:26 +0000141
142def b32encode(s):
Martin Panteree3074e2016-02-23 22:30:50 +0000143 """Encode the bytes-like object s using Base32 and return a bytes object.
Barry Warsaw4c904d12004-01-04 01:12:26 +0000144 """
Victor Stinnerd6a91a72014-03-17 22:38:41 +0100145 global _b32tab2
146 # Delay the initialization of the table to not waste memory
147 # if the function is never called
148 if _b32tab2 is None:
149 b32tab = [bytes((i,)) for i in _b32alphabet]
150 _b32tab2 = [a + b for a in b32tab for b in b32tab]
151 b32tab = None
152
Guido van Rossum254348e2007-11-21 19:29:53 +0000153 if not isinstance(s, bytes_types):
Nick Coghlanfdf239a2013-10-03 00:43:22 +1000154 s = memoryview(s).tobytes()
Serhiy Storchaka87aa7dc2013-05-19 11:49:32 +0300155 leftover = len(s) % 5
Barry Warsaw4c904d12004-01-04 01:12:26 +0000156 # Pad the last quantum with zero bits if necessary
157 if leftover:
Serhiy Storchaka5f1a5182016-09-11 14:41:02 +0300158 s = s + b'\0' * (5 - leftover) # Don't use += !
Serhiy Storchaka2c3f2f12013-05-19 11:41:15 +0300159 encoded = bytearray()
Serhiy Storchaka87aa7dc2013-05-19 11:49:32 +0300160 from_bytes = int.from_bytes
161 b32tab2 = _b32tab2
162 for i in range(0, len(s), 5):
163 c = from_bytes(s[i: i + 5], 'big')
164 encoded += (b32tab2[c >> 30] + # bits 1 - 10
165 b32tab2[(c >> 20) & 0x3ff] + # bits 11 - 20
166 b32tab2[(c >> 10) & 0x3ff] + # bits 21 - 30
167 b32tab2[c & 0x3ff] # bits 31 - 40
168 )
Barry Warsaw4c904d12004-01-04 01:12:26 +0000169 # Adjust for any leftover partial quanta
170 if leftover == 1:
Serhiy Storchaka2c3f2f12013-05-19 11:41:15 +0300171 encoded[-6:] = b'======'
Barry Warsaw4c904d12004-01-04 01:12:26 +0000172 elif leftover == 2:
Serhiy Storchaka2c3f2f12013-05-19 11:41:15 +0300173 encoded[-4:] = b'===='
Barry Warsaw4c904d12004-01-04 01:12:26 +0000174 elif leftover == 3:
Serhiy Storchaka2c3f2f12013-05-19 11:41:15 +0300175 encoded[-3:] = b'==='
Barry Warsaw4c904d12004-01-04 01:12:26 +0000176 elif leftover == 4:
Serhiy Storchaka2c3f2f12013-05-19 11:41:15 +0300177 encoded[-1:] = b'='
178 return bytes(encoded)
Barry Warsaw4c904d12004-01-04 01:12:26 +0000179
180def b32decode(s, casefold=False, map01=None):
Martin Panteree3074e2016-02-23 22:30:50 +0000181 """Decode the Base32 encoded bytes-like object or ASCII string s.
Barry Warsaw4c904d12004-01-04 01:12:26 +0000182
Martin Panteree3074e2016-02-23 22:30:50 +0000183 Optional casefold is a flag specifying whether a lowercase alphabet is
184 acceptable as input. For security purposes, the default is False.
Barry Warsaw4c904d12004-01-04 01:12:26 +0000185
Guido van Rossum4581ae52007-05-22 21:56:47 +0000186 RFC 3548 allows for optional mapping of the digit 0 (zero) to the
187 letter O (oh), and for optional mapping of the digit 1 (one) to
188 either the letter I (eye) or letter L (el). The optional argument
189 map01 when not None, specifies which letter the digit 1 should be
190 mapped to (when map01 is not None, the digit 0 is always mapped to
191 the letter O). For security purposes the default is None, so that
192 0 and 1 are not allowed in the input.
Barry Warsaw4c904d12004-01-04 01:12:26 +0000193
Martin Panteree3074e2016-02-23 22:30:50 +0000194 The result is returned as a bytes object. A binascii.Error is raised if
Guido van Rossum4581ae52007-05-22 21:56:47 +0000195 the input is incorrectly padded or if there are non-alphabet
196 characters present in the input.
Barry Warsaw4c904d12004-01-04 01:12:26 +0000197 """
Victor Stinnerd6a91a72014-03-17 22:38:41 +0100198 global _b32rev
199 # Delay the initialization of the table to not waste memory
200 # if the function is never called
201 if _b32rev is None:
202 _b32rev = {v: k for k, v in enumerate(_b32alphabet)}
Antoine Pitrouea6b4d52012-02-20 19:30:23 +0100203 s = _bytes_from_decode_data(s)
Serhiy Storchaka87aa7dc2013-05-19 11:49:32 +0300204 if len(s) % 8:
Guido van Rossum4581ae52007-05-22 21:56:47 +0000205 raise binascii.Error('Incorrect padding')
Barry Warsaw4c904d12004-01-04 01:12:26 +0000206 # Handle section 2.4 zero and one mapping. The flag map01 will be either
207 # False, or the character to map the digit 1 (one) to. It should be
208 # either L (el) or I (eye).
Alexandre Vassalotti5209857f2008-05-03 04:39:38 +0000209 if map01 is not None:
Antoine Pitrouea6b4d52012-02-20 19:30:23 +0100210 map01 = _bytes_from_decode_data(map01)
Guido van Rossum4581ae52007-05-22 21:56:47 +0000211 assert len(map01) == 1, repr(map01)
Guido van Rossum95c1c482012-06-22 15:16:09 -0700212 s = s.translate(bytes.maketrans(b'01', b'O' + map01))
Barry Warsaw4c904d12004-01-04 01:12:26 +0000213 if casefold:
Guido van Rossum98297ee2007-11-06 21:34:58 +0000214 s = s.upper()
Barry Warsaw4c904d12004-01-04 01:12:26 +0000215 # Strip off pad characters from the right. We need to count the pad
216 # characters because this will tell us how many null bytes to remove from
217 # the end of the decoded string.
Serhiy Storchaka87aa7dc2013-05-19 11:49:32 +0300218 l = len(s)
219 s = s.rstrip(b'=')
220 padchars = l - len(s)
Barry Warsaw4c904d12004-01-04 01:12:26 +0000221 # Now decode the full quanta
Serhiy Storchaka87aa7dc2013-05-19 11:49:32 +0300222 decoded = bytearray()
223 b32rev = _b32rev
224 for i in range(0, len(s), 8):
225 quanta = s[i: i + 8]
226 acc = 0
227 try:
228 for c in quanta:
229 acc = (acc << 5) + b32rev[c]
230 except KeyError:
Serhiy Storchaka5cc9d322013-05-28 15:42:34 +0300231 raise binascii.Error('Non-base32 digit found') from None
Serhiy Storchaka87aa7dc2013-05-19 11:49:32 +0300232 decoded += acc.to_bytes(5, 'big')
Barry Warsaw4c904d12004-01-04 01:12:26 +0000233 # Process the last, partial quanta
Serhiy Storchaka87aa7dc2013-05-19 11:49:32 +0300234 if padchars:
235 acc <<= 5 * padchars
236 last = acc.to_bytes(5, 'big')
237 if padchars == 1:
238 decoded[-5:] = last[:-1]
239 elif padchars == 3:
240 decoded[-5:] = last[:-2]
241 elif padchars == 4:
242 decoded[-5:] = last[:-3]
243 elif padchars == 6:
244 decoded[-5:] = last[:-4]
245 else:
246 raise binascii.Error('Incorrect padding')
247 return bytes(decoded)
Barry Warsaw4c904d12004-01-04 01:12:26 +0000248
249
Antoine Pitroufd036452008-08-19 17:56:33 +0000250
Barry Warsaw4c904d12004-01-04 01:12:26 +0000251# RFC 3548, Base 16 Alphabet specifies uppercase, but hexlify() returns
252# lowercase. The RFC also recommends against accepting input case
253# insensitively.
254def b16encode(s):
Martin Panteree3074e2016-02-23 22:30:50 +0000255 """Encode the bytes-like object s using Base16 and return a bytes object.
Barry Warsaw4c904d12004-01-04 01:12:26 +0000256 """
Guido van Rossum98297ee2007-11-06 21:34:58 +0000257 return binascii.hexlify(s).upper()
Barry Warsaw4c904d12004-01-04 01:12:26 +0000258
259
260def b16decode(s, casefold=False):
Martin Panteree3074e2016-02-23 22:30:50 +0000261 """Decode the Base16 encoded bytes-like object or ASCII string s.
Barry Warsaw4c904d12004-01-04 01:12:26 +0000262
Martin Panteree3074e2016-02-23 22:30:50 +0000263 Optional casefold is a flag specifying whether a lowercase alphabet is
264 acceptable as input. For security purposes, the default is False.
Barry Warsaw4c904d12004-01-04 01:12:26 +0000265
Martin Panteree3074e2016-02-23 22:30:50 +0000266 The result is returned as a bytes object. A binascii.Error is raised if
267 s is incorrectly padded or if there are non-alphabet characters present
268 in the input.
Barry Warsaw4c904d12004-01-04 01:12:26 +0000269 """
Antoine Pitrouea6b4d52012-02-20 19:30:23 +0100270 s = _bytes_from_decode_data(s)
Barry Warsaw4c904d12004-01-04 01:12:26 +0000271 if casefold:
Guido van Rossum98297ee2007-11-06 21:34:58 +0000272 s = s.upper()
Antoine Pitroufd036452008-08-19 17:56:33 +0000273 if re.search(b'[^0-9A-F]', s):
Guido van Rossum4581ae52007-05-22 21:56:47 +0000274 raise binascii.Error('Non-base16 digit found')
Barry Warsaw4c904d12004-01-04 01:12:26 +0000275 return binascii.unhexlify(s)
276
Antoine Pitrou6dd0d462013-11-17 23:52:25 +0100277#
278# Ascii85 encoding/decoding
279#
Barry Warsaw4c904d12004-01-04 01:12:26 +0000280
Victor Stinnerd6a91a72014-03-17 22:38:41 +0100281_a85chars = None
282_a85chars2 = None
283_A85START = b"<~"
284_A85END = b"~>"
285
Antoine Pitrou6dd0d462013-11-17 23:52:25 +0100286def _85encode(b, chars, chars2, pad=False, foldnuls=False, foldspaces=False):
287 # Helper function for a85encode and b85encode
288 if not isinstance(b, bytes_types):
289 b = memoryview(b).tobytes()
290
291 padding = (-len(b)) % 4
292 if padding:
293 b = b + b'\0' * padding
294 words = struct.Struct('!%dI' % (len(b) // 4)).unpack(b)
295
Antoine Pitrou6dd0d462013-11-17 23:52:25 +0100296 chunks = [b'z' if foldnuls and not word else
297 b'y' if foldspaces and word == 0x20202020 else
298 (chars2[word // 614125] +
299 chars2[word // 85 % 7225] +
300 chars[word % 85])
301 for word in words]
302
303 if padding and not pad:
304 if chunks[-1] == b'z':
305 chunks[-1] = chars[0] * 5
306 chunks[-1] = chunks[-1][:-padding]
307
308 return b''.join(chunks)
309
Antoine Pitrou6dd0d462013-11-17 23:52:25 +0100310def a85encode(b, *, foldspaces=False, wrapcol=0, pad=False, adobe=False):
Martin Panteree3074e2016-02-23 22:30:50 +0000311 """Encode bytes-like object b using Ascii85 and return a bytes object.
Antoine Pitrou6dd0d462013-11-17 23:52:25 +0100312
313 foldspaces is an optional flag that uses the special short sequence 'y'
314 instead of 4 consecutive spaces (ASCII 0x20) as supported by 'btoa'. This
315 feature is not supported by the "standard" Adobe encoding.
316
Martin Panteree3074e2016-02-23 22:30:50 +0000317 wrapcol controls whether the output should have newline (b'\\n') characters
Antoine Pitrou6dd0d462013-11-17 23:52:25 +0100318 added to it. If this is non-zero, each output line will be at most this
319 many characters long.
320
Martin Panteree3074e2016-02-23 22:30:50 +0000321 pad controls whether the input is padded to a multiple of 4 before
Antoine Pitrou6dd0d462013-11-17 23:52:25 +0100322 encoding. Note that the btoa implementation always pads.
323
324 adobe controls whether the encoded byte sequence is framed with <~ and ~>,
325 which is used by the Adobe implementation.
326 """
Victor Stinnerd6a91a72014-03-17 22:38:41 +0100327 global _a85chars, _a85chars2
328 # Delay the initialization of tables to not waste memory
329 # if the function is never called
330 if _a85chars is None:
331 _a85chars = [bytes((i,)) for i in range(33, 118)]
332 _a85chars2 = [(a + b) for a in _a85chars for b in _a85chars]
333
Antoine Pitrou6dd0d462013-11-17 23:52:25 +0100334 result = _85encode(b, _a85chars, _a85chars2, pad, True, foldspaces)
335
336 if adobe:
337 result = _A85START + result
338 if wrapcol:
339 wrapcol = max(2 if adobe else 1, wrapcol)
340 chunks = [result[i: i + wrapcol]
341 for i in range(0, len(result), wrapcol)]
342 if adobe:
343 if len(chunks[-1]) + 2 > wrapcol:
344 chunks.append(b'')
345 result = b'\n'.join(chunks)
346 if adobe:
347 result += _A85END
348
349 return result
350
351def a85decode(b, *, foldspaces=False, adobe=False, ignorechars=b' \t\n\r\v'):
Martin Panteree3074e2016-02-23 22:30:50 +0000352 """Decode the Ascii85 encoded bytes-like object or ASCII string b.
Antoine Pitrou6dd0d462013-11-17 23:52:25 +0100353
354 foldspaces is a flag that specifies whether the 'y' short sequence should be
355 accepted as shorthand for 4 consecutive spaces (ASCII 0x20). This feature is
356 not supported by the "standard" Adobe encoding.
357
358 adobe controls whether the input sequence is in Adobe Ascii85 format (i.e.
359 is framed with <~ and ~>).
360
361 ignorechars should be a byte string containing characters to ignore from the
362 input. This should only contain whitespace characters, and by default
363 contains all whitespace characters in ASCII.
Martin Panteree3074e2016-02-23 22:30:50 +0000364
365 The result is returned as a bytes object.
Antoine Pitrou6dd0d462013-11-17 23:52:25 +0100366 """
367 b = _bytes_from_decode_data(b)
368 if adobe:
Serhiy Storchaka205e75b2016-02-24 12:05:50 +0200369 if not b.endswith(_A85END):
370 raise ValueError(
371 "Ascii85 encoded byte sequences must end "
372 "with {!r}".format(_A85END)
373 )
374 if b.startswith(_A85START):
375 b = b[2:-2] # Strip off start/end markers
376 else:
377 b = b[:-2]
Antoine Pitrou6dd0d462013-11-17 23:52:25 +0100378 #
379 # We have to go through this stepwise, so as to ignore spaces and handle
380 # special short sequences
381 #
382 packI = struct.Struct('!I').pack
383 decoded = []
384 decoded_append = decoded.append
385 curr = []
386 curr_append = curr.append
387 curr_clear = curr.clear
388 for x in b + b'u' * 4:
389 if b'!'[0] <= x <= b'u'[0]:
390 curr_append(x)
391 if len(curr) == 5:
392 acc = 0
393 for x in curr:
394 acc = 85 * acc + (x - 33)
395 try:
396 decoded_append(packI(acc))
397 except struct.error:
398 raise ValueError('Ascii85 overflow') from None
399 curr_clear()
400 elif x == b'z'[0]:
401 if curr:
402 raise ValueError('z inside Ascii85 5-tuple')
403 decoded_append(b'\0\0\0\0')
404 elif foldspaces and x == b'y'[0]:
405 if curr:
406 raise ValueError('y inside Ascii85 5-tuple')
407 decoded_append(b'\x20\x20\x20\x20')
408 elif x in ignorechars:
409 # Skip whitespace
410 continue
411 else:
412 raise ValueError('Non-Ascii85 digit found: %c' % x)
413
414 result = b''.join(decoded)
415 padding = 4 - len(curr)
416 if padding:
417 # Throw away the extra padding
418 result = result[:-padding]
419 return result
420
421# The following code is originally taken (with permission) from Mercurial
422
Victor Stinnerd6a91a72014-03-17 22:38:41 +0100423_b85alphabet = (b"0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"
424 b"abcdefghijklmnopqrstuvwxyz!#$%&()*+-;<=>?@^_`{|}~")
425_b85chars = None
426_b85chars2 = None
Antoine Pitrou6dd0d462013-11-17 23:52:25 +0100427_b85dec = None
428
429def b85encode(b, pad=False):
Martin Panteree3074e2016-02-23 22:30:50 +0000430 """Encode bytes-like object b in base85 format and return a bytes object.
Antoine Pitrou6dd0d462013-11-17 23:52:25 +0100431
Martin Panteree3074e2016-02-23 22:30:50 +0000432 If pad is true, the input is padded with b'\\0' so its length is a multiple of
433 4 bytes before encoding.
Antoine Pitrou6dd0d462013-11-17 23:52:25 +0100434 """
Victor Stinnerd6a91a72014-03-17 22:38:41 +0100435 global _b85chars, _b85chars2
436 # Delay the initialization of tables to not waste memory
437 # if the function is never called
438 if _b85chars is None:
439 _b85chars = [bytes((i,)) for i in _b85alphabet]
440 _b85chars2 = [(a + b) for a in _b85chars for b in _b85chars]
Antoine Pitrou6dd0d462013-11-17 23:52:25 +0100441 return _85encode(b, _b85chars, _b85chars2, pad)
442
443def b85decode(b):
Martin Panteree3074e2016-02-23 22:30:50 +0000444 """Decode the base85-encoded bytes-like object or ASCII string b
445
446 The result is returned as a bytes object.
447 """
Antoine Pitrou6dd0d462013-11-17 23:52:25 +0100448 global _b85dec
Victor Stinnerd6a91a72014-03-17 22:38:41 +0100449 # Delay the initialization of tables to not waste memory
450 # if the function is never called
Antoine Pitrou6dd0d462013-11-17 23:52:25 +0100451 if _b85dec is None:
452 _b85dec = [None] * 256
Victor Stinnerd6a91a72014-03-17 22:38:41 +0100453 for i, c in enumerate(_b85alphabet):
454 _b85dec[c] = i
Antoine Pitrou6dd0d462013-11-17 23:52:25 +0100455
Victor Stinnerd6a91a72014-03-17 22:38:41 +0100456 b = _bytes_from_decode_data(b)
Antoine Pitrou6dd0d462013-11-17 23:52:25 +0100457 padding = (-len(b)) % 5
458 b = b + b'~' * padding
459 out = []
460 packI = struct.Struct('!I').pack
461 for i in range(0, len(b), 5):
462 chunk = b[i:i + 5]
463 acc = 0
464 try:
465 for c in chunk:
466 acc = acc * 85 + _b85dec[c]
467 except TypeError:
468 for j, c in enumerate(chunk):
469 if _b85dec[c] is None:
470 raise ValueError('bad base85 character at position %d'
471 % (i + j)) from None
472 raise
473 try:
474 out.append(packI(acc))
475 except struct.error:
476 raise ValueError('base85 overflow in hunk starting at byte %d'
477 % i) from None
478
479 result = b''.join(out)
480 if padding:
481 result = result[:-padding]
482 return result
Antoine Pitroufd036452008-08-19 17:56:33 +0000483
Barry Warsaw4c904d12004-01-04 01:12:26 +0000484# Legacy interface. This code could be cleaned up since I don't believe
485# binascii has any line length limitations. It just doesn't seem worth it
Guido van Rossum4581ae52007-05-22 21:56:47 +0000486# though. The files should be opened in binary mode.
Skip Montanaroe99d5ea2001-01-20 19:54:20 +0000487
Guido van Rossumf1945461995-06-14 23:43:44 +0000488MAXLINESIZE = 76 # Excluding the CRLF
Guido van Rossum54e54c62001-09-04 19:14:14 +0000489MAXBINSIZE = (MAXLINESIZE//4)*3
Guido van Rossumf1945461995-06-14 23:43:44 +0000490
Guido van Rossumf1945461995-06-14 23:43:44 +0000491def encode(input, output):
Guido van Rossum54a40cb2007-08-27 22:27:41 +0000492 """Encode a file; input and output are binary files."""
Barry Warsaw4c904d12004-01-04 01:12:26 +0000493 while True:
Guido van Rossum4acc25b2000-02-02 15:10:15 +0000494 s = input.read(MAXBINSIZE)
Barry Warsaw4c904d12004-01-04 01:12:26 +0000495 if not s:
496 break
Guido van Rossum4acc25b2000-02-02 15:10:15 +0000497 while len(s) < MAXBINSIZE:
498 ns = input.read(MAXBINSIZE-len(s))
Barry Warsaw4c904d12004-01-04 01:12:26 +0000499 if not ns:
500 break
501 s += ns
Guido van Rossum4acc25b2000-02-02 15:10:15 +0000502 line = binascii.b2a_base64(s)
503 output.write(line)
Guido van Rossumf1945461995-06-14 23:43:44 +0000504
Barry Warsaw4c904d12004-01-04 01:12:26 +0000505
Guido van Rossumf1945461995-06-14 23:43:44 +0000506def decode(input, output):
Guido van Rossum54a40cb2007-08-27 22:27:41 +0000507 """Decode a file; input and output are binary files."""
Barry Warsaw4c904d12004-01-04 01:12:26 +0000508 while True:
Guido van Rossum4acc25b2000-02-02 15:10:15 +0000509 line = input.readline()
Barry Warsaw4c904d12004-01-04 01:12:26 +0000510 if not line:
511 break
Guido van Rossum4acc25b2000-02-02 15:10:15 +0000512 s = binascii.a2b_base64(line)
513 output.write(s)
Guido van Rossumf1945461995-06-14 23:43:44 +0000514
Nick Coghlanfdf239a2013-10-03 00:43:22 +1000515def _input_type_check(s):
516 try:
517 m = memoryview(s)
518 except TypeError as err:
519 msg = "expected bytes-like object, not %s" % s.__class__.__name__
520 raise TypeError(msg) from err
521 if m.format not in ('c', 'b', 'B'):
522 msg = ("expected single byte elements, not %r from %s" %
523 (m.format, s.__class__.__name__))
524 raise TypeError(msg)
525 if m.ndim != 1:
526 msg = ("expected 1-D data, not %d-D data from %s" %
527 (m.ndim, s.__class__.__name__))
528 raise TypeError(msg)
529
Barry Warsaw4c904d12004-01-04 01:12:26 +0000530
Georg Brandlb54d8012009-06-04 09:11:51 +0000531def encodebytes(s):
Martin Panteree3074e2016-02-23 22:30:50 +0000532 """Encode a bytestring into a bytes object containing multiple lines
Georg Brandlb54d8012009-06-04 09:11:51 +0000533 of base-64 data."""
Nick Coghlanfdf239a2013-10-03 00:43:22 +1000534 _input_type_check(s)
Peter Schneider-Kampfbb2b4c2001-06-07 18:56:13 +0000535 pieces = []
536 for i in range(0, len(s), MAXBINSIZE):
537 chunk = s[i : i + MAXBINSIZE]
538 pieces.append(binascii.b2a_base64(chunk))
Guido van Rossum4581ae52007-05-22 21:56:47 +0000539 return b"".join(pieces)
Guido van Rossumf1945461995-06-14 23:43:44 +0000540
Georg Brandlb54d8012009-06-04 09:11:51 +0000541def encodestring(s):
542 """Legacy alias of encodebytes()."""
543 import warnings
Berker Peksagc7ff1632017-03-03 18:07:18 +0300544 warnings.warn("encodestring() is a deprecated alias since 3.1, "
545 "use encodebytes()",
Georg Brandlb54d8012009-06-04 09:11:51 +0000546 DeprecationWarning, 2)
547 return encodebytes(s)
Barry Warsaw4c904d12004-01-04 01:12:26 +0000548
Guido van Rossum54a40cb2007-08-27 22:27:41 +0000549
Georg Brandlb54d8012009-06-04 09:11:51 +0000550def decodebytes(s):
Martin Panteree3074e2016-02-23 22:30:50 +0000551 """Decode a bytestring of base-64 data into a bytes object."""
Nick Coghlanfdf239a2013-10-03 00:43:22 +1000552 _input_type_check(s)
Peter Schneider-Kampfbb2b4c2001-06-07 18:56:13 +0000553 return binascii.a2b_base64(s)
Guido van Rossumf1945461995-06-14 23:43:44 +0000554
Georg Brandlb54d8012009-06-04 09:11:51 +0000555def decodestring(s):
556 """Legacy alias of decodebytes()."""
557 import warnings
Berker Peksagc7ff1632017-03-03 18:07:18 +0300558 warnings.warn("decodestring() is a deprecated alias since Python 3.1, "
559 "use decodebytes()",
Georg Brandlb54d8012009-06-04 09:11:51 +0000560 DeprecationWarning, 2)
561 return decodebytes(s)
Barry Warsaw4c904d12004-01-04 01:12:26 +0000562
Antoine Pitroufd036452008-08-19 17:56:33 +0000563
Guido van Rossum4581ae52007-05-22 21:56:47 +0000564# Usable as a script...
565def main():
566 """Small main program"""
Guido van Rossum4acc25b2000-02-02 15:10:15 +0000567 import sys, getopt
568 try:
569 opts, args = getopt.getopt(sys.argv[1:], 'deut')
Guido van Rossumb940e112007-01-10 16:19:56 +0000570 except getopt.error as msg:
Guido van Rossum4acc25b2000-02-02 15:10:15 +0000571 sys.stdout = sys.stderr
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000572 print(msg)
573 print("""usage: %s [-d|-e|-u|-t] [file|-]
Guido van Rossum4acc25b2000-02-02 15:10:15 +0000574 -d, -u: decode
575 -e: encode (default)
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000576 -t: encode and decode string 'Aladdin:open sesame'"""%sys.argv[0])
Guido van Rossum4acc25b2000-02-02 15:10:15 +0000577 sys.exit(2)
578 func = encode
579 for o, a in opts:
580 if o == '-e': func = encode
581 if o == '-d': func = decode
582 if o == '-u': func = decode
Guido van Rossum4581ae52007-05-22 21:56:47 +0000583 if o == '-t': test(); return
Guido van Rossum4acc25b2000-02-02 15:10:15 +0000584 if args and args[0] != '-':
Antoine Pitroub86680e2010-10-14 21:15:17 +0000585 with open(args[0], 'rb') as f:
586 func(f, sys.stdout.buffer)
Guido van Rossum4acc25b2000-02-02 15:10:15 +0000587 else:
Victor Stinner479736b2010-05-25 21:12:34 +0000588 func(sys.stdin.buffer, sys.stdout.buffer)
Guido van Rossumf1945461995-06-14 23:43:44 +0000589
Barry Warsaw4c904d12004-01-04 01:12:26 +0000590
Guido van Rossum4581ae52007-05-22 21:56:47 +0000591def test():
592 s0 = b"Aladdin:open sesame"
593 print(repr(s0))
Georg Brandl706824f2009-06-04 09:42:55 +0000594 s1 = encodebytes(s0)
Guido van Rossum4581ae52007-05-22 21:56:47 +0000595 print(repr(s1))
Georg Brandl706824f2009-06-04 09:42:55 +0000596 s2 = decodebytes(s1)
Guido van Rossum4581ae52007-05-22 21:56:47 +0000597 print(repr(s2))
598 assert s0 == s2
Guido van Rossumf1945461995-06-14 23:43:44 +0000599
Barry Warsaw4c904d12004-01-04 01:12:26 +0000600
Guido van Rossumf1945461995-06-14 23:43:44 +0000601if __name__ == '__main__':
Guido van Rossum4581ae52007-05-22 21:56:47 +0000602 main()