blob: eb925cd4c55b91111e016a338cbf8ee3caee8572 [file] [log] [blame]
Benjamin Peterson90f5ba52010-03-11 22:53:45 +00001#! /usr/bin/env python3
Guido van Rossumaa925a51997-04-02 05:47:39 +00002
Antoine Pitrou6dd0d462013-11-17 23:52:25 +01003"""Base16, Base32, Base64 (RFC 3548), Base85 and Ascii85 data encodings"""
Guido van Rossum4acc25b2000-02-02 15:10:15 +00004
Barry Warsaw4c904d12004-01-04 01:12:26 +00005# Modified 04-Oct-1995 by Jack Jansen to use binascii module
6# Modified 30-Dec-2003 by Barry Warsaw to add full RFC 3548 support
Guido van Rossum4581ae52007-05-22 21:56:47 +00007# Modified 22-May-2007 by Guido van Rossum to use bytes everywhere
Jack Jansen951213e1995-10-04 16:39:20 +00008
Barry Warsaw4c904d12004-01-04 01:12:26 +00009import re
10import struct
Jack Jansen951213e1995-10-04 16:39:20 +000011import binascii
12
Barry Warsaw4c904d12004-01-04 01:12:26 +000013
14__all__ = [
15 # Legacy interface exports traditional RFC 1521 Base64 encodings
Georg Brandlb54d8012009-06-04 09:11:51 +000016 'encode', 'decode', 'encodebytes', 'decodebytes',
Barry Warsaw4c904d12004-01-04 01:12:26 +000017 # Generalized interface for other encodings
18 'b64encode', 'b64decode', 'b32encode', 'b32decode',
19 'b16encode', 'b16decode',
Antoine Pitrou6dd0d462013-11-17 23:52:25 +010020 # Base85 and Ascii85 encodings
21 'b85encode', 'b85decode', 'a85encode', 'a85decode',
Barry Warsaw4c904d12004-01-04 01:12:26 +000022 # Standard Base64 encoding
23 'standard_b64encode', 'standard_b64decode',
24 # Some common Base64 alternatives. As referenced by RFC 3458, see thread
25 # starting at:
26 #
27 # http://zgp.org/pipermail/p2p-hackers/2001-September/000316.html
Barry Warsaw4c904d12004-01-04 01:12:26 +000028 'urlsafe_b64encode', 'urlsafe_b64decode',
29 ]
30
Barry Warsaw4c904d12004-01-04 01:12:26 +000031
Guido van Rossum254348e2007-11-21 19:29:53 +000032bytes_types = (bytes, bytearray) # Types acceptable as binary data
Guido van Rossum98297ee2007-11-06 21:34:58 +000033
Antoine Pitrouea6b4d52012-02-20 19:30:23 +010034def _bytes_from_decode_data(s):
35 if isinstance(s, str):
36 try:
37 return s.encode('ascii')
38 except UnicodeEncodeError:
39 raise ValueError('string argument should contain only ASCII characters')
Nick Coghlanfdf239a2013-10-03 00:43:22 +100040 if isinstance(s, bytes_types):
Antoine Pitrouea6b4d52012-02-20 19:30:23 +010041 return s
Nick Coghlanfdf239a2013-10-03 00:43:22 +100042 try:
43 return memoryview(s).tobytes()
44 except TypeError:
45 raise TypeError("argument should be a bytes-like object or ASCII "
46 "string, not %r" % s.__class__.__name__) from None
Barry Warsaw4c904d12004-01-04 01:12:26 +000047
Antoine Pitroufd036452008-08-19 17:56:33 +000048
Barry Warsaw4c904d12004-01-04 01:12:26 +000049# Base64 encoding/decoding uses binascii
50
51def b64encode(s, altchars=None):
Guido van Rossum4581ae52007-05-22 21:56:47 +000052 """Encode a byte string using Base64.
Barry Warsaw4c904d12004-01-04 01:12:26 +000053
Guido van Rossum4581ae52007-05-22 21:56:47 +000054 s is the byte string to encode. Optional altchars must be a byte
55 string of length 2 which specifies an alternative alphabet for the
56 '+' and '/' characters. This allows an application to
57 e.g. generate url or filesystem safe Base64 strings.
Barry Warsaw4c904d12004-01-04 01:12:26 +000058
Guido van Rossum4581ae52007-05-22 21:56:47 +000059 The encoded byte string is returned.
Barry Warsaw4c904d12004-01-04 01:12:26 +000060 """
Victor Stinnere84c9762015-10-11 11:01:02 +020061 encoded = binascii.b2a_base64(s, newline=False)
Barry Warsaw4c904d12004-01-04 01:12:26 +000062 if altchars is not None:
Guido van Rossum4581ae52007-05-22 21:56:47 +000063 assert len(altchars) == 2, repr(altchars)
Guido van Rossum95c1c482012-06-22 15:16:09 -070064 return encoded.translate(bytes.maketrans(b'+/', altchars))
Barry Warsaw4c904d12004-01-04 01:12:26 +000065 return encoded
66
67
R. David Murray64951362010-11-11 20:09:20 +000068def b64decode(s, altchars=None, validate=False):
Guido van Rossum4581ae52007-05-22 21:56:47 +000069 """Decode a Base64 encoded byte string.
Barry Warsaw4c904d12004-01-04 01:12:26 +000070
Guido van Rossum4581ae52007-05-22 21:56:47 +000071 s is the byte string to decode. Optional altchars must be a
72 string of length 2 which specifies the alternative alphabet used
73 instead of the '+' and '/' characters.
Barry Warsaw4c904d12004-01-04 01:12:26 +000074
R. David Murray64951362010-11-11 20:09:20 +000075 The decoded string is returned. A binascii.Error is raised if s is
76 incorrectly padded.
77
78 If validate is False (the default), non-base64-alphabet characters are
79 discarded prior to the padding check. If validate is True,
80 non-base64-alphabet characters in the input result in a binascii.Error.
Barry Warsaw4c904d12004-01-04 01:12:26 +000081 """
Antoine Pitrouea6b4d52012-02-20 19:30:23 +010082 s = _bytes_from_decode_data(s)
Barry Warsaw4c904d12004-01-04 01:12:26 +000083 if altchars is not None:
Antoine Pitrouea6b4d52012-02-20 19:30:23 +010084 altchars = _bytes_from_decode_data(altchars)
Guido van Rossum4581ae52007-05-22 21:56:47 +000085 assert len(altchars) == 2, repr(altchars)
Guido van Rossum95c1c482012-06-22 15:16:09 -070086 s = s.translate(bytes.maketrans(altchars, b'+/'))
R. David Murray64951362010-11-11 20:09:20 +000087 if validate and not re.match(b'^[A-Za-z0-9+/]*={0,2}$', s):
88 raise binascii.Error('Non-base64 digit found')
Guido van Rossum4581ae52007-05-22 21:56:47 +000089 return binascii.a2b_base64(s)
Barry Warsaw4c904d12004-01-04 01:12:26 +000090
91
92def standard_b64encode(s):
Guido van Rossum4581ae52007-05-22 21:56:47 +000093 """Encode a byte string using the standard Base64 alphabet.
Barry Warsaw4c904d12004-01-04 01:12:26 +000094
Guido van Rossum4581ae52007-05-22 21:56:47 +000095 s is the byte string to encode. The encoded byte string is returned.
Barry Warsaw4c904d12004-01-04 01:12:26 +000096 """
97 return b64encode(s)
98
99def standard_b64decode(s):
Guido van Rossum4581ae52007-05-22 21:56:47 +0000100 """Decode a byte string encoded with the standard Base64 alphabet.
Barry Warsaw4c904d12004-01-04 01:12:26 +0000101
Guido van Rossum4581ae52007-05-22 21:56:47 +0000102 s is the byte string to decode. The decoded byte string is
103 returned. binascii.Error is raised if the input is incorrectly
104 padded or if there are non-alphabet characters present in the
105 input.
Barry Warsaw4c904d12004-01-04 01:12:26 +0000106 """
107 return b64decode(s)
108
Guido van Rossum95c1c482012-06-22 15:16:09 -0700109
110_urlsafe_encode_translation = bytes.maketrans(b'+/', b'-_')
111_urlsafe_decode_translation = bytes.maketrans(b'-_', b'+/')
112
Barry Warsaw4c904d12004-01-04 01:12:26 +0000113def urlsafe_b64encode(s):
Guido van Rossum4581ae52007-05-22 21:56:47 +0000114 """Encode a byte string using a url-safe Base64 alphabet.
Barry Warsaw4c904d12004-01-04 01:12:26 +0000115
Guido van Rossum4581ae52007-05-22 21:56:47 +0000116 s is the byte string to encode. The encoded byte string is
117 returned. The alphabet uses '-' instead of '+' and '_' instead of
118 '/'.
Barry Warsaw4c904d12004-01-04 01:12:26 +0000119 """
Guido van Rossum95c1c482012-06-22 15:16:09 -0700120 return b64encode(s).translate(_urlsafe_encode_translation)
Barry Warsaw4c904d12004-01-04 01:12:26 +0000121
122def urlsafe_b64decode(s):
Guido van Rossum4581ae52007-05-22 21:56:47 +0000123 """Decode a byte string encoded with the standard Base64 alphabet.
Barry Warsaw4c904d12004-01-04 01:12:26 +0000124
Guido van Rossum4581ae52007-05-22 21:56:47 +0000125 s is the byte string to decode. The decoded byte string is
126 returned. binascii.Error is raised if the input is incorrectly
127 padded or if there are non-alphabet characters present in the
128 input.
Barry Warsaw4c904d12004-01-04 01:12:26 +0000129
130 The alphabet uses '-' instead of '+' and '_' instead of '/'.
131 """
Guido van Rossum95c1c482012-06-22 15:16:09 -0700132 s = _bytes_from_decode_data(s)
133 s = s.translate(_urlsafe_decode_translation)
134 return b64decode(s)
Barry Warsaw4c904d12004-01-04 01:12:26 +0000135
136
Antoine Pitroufd036452008-08-19 17:56:33 +0000137
Barry Warsaw4c904d12004-01-04 01:12:26 +0000138# Base32 encoding/decoding must be done in Python
Serhiy Storchaka87aa7dc2013-05-19 11:49:32 +0300139_b32alphabet = b'ABCDEFGHIJKLMNOPQRSTUVWXYZ234567'
Victor Stinnerd6a91a72014-03-17 22:38:41 +0100140_b32tab2 = None
141_b32rev = None
Barry Warsaw4c904d12004-01-04 01:12:26 +0000142
143def b32encode(s):
Guido van Rossum4581ae52007-05-22 21:56:47 +0000144 """Encode a byte string using Base32.
Barry Warsaw4c904d12004-01-04 01:12:26 +0000145
Guido van Rossum4581ae52007-05-22 21:56:47 +0000146 s is the byte string to encode. The encoded byte string is returned.
Barry Warsaw4c904d12004-01-04 01:12:26 +0000147 """
Victor Stinnerd6a91a72014-03-17 22:38:41 +0100148 global _b32tab2
149 # Delay the initialization of the table to not waste memory
150 # if the function is never called
151 if _b32tab2 is None:
152 b32tab = [bytes((i,)) for i in _b32alphabet]
153 _b32tab2 = [a + b for a in b32tab for b in b32tab]
154 b32tab = None
155
Guido van Rossum254348e2007-11-21 19:29:53 +0000156 if not isinstance(s, bytes_types):
Nick Coghlanfdf239a2013-10-03 00:43:22 +1000157 s = memoryview(s).tobytes()
Serhiy Storchaka87aa7dc2013-05-19 11:49:32 +0300158 leftover = len(s) % 5
Barry Warsaw4c904d12004-01-04 01:12:26 +0000159 # Pad the last quantum with zero bits if necessary
160 if leftover:
Guido van Rossum4581ae52007-05-22 21:56:47 +0000161 s = s + bytes(5 - leftover) # Don't use += !
Serhiy Storchaka2c3f2f12013-05-19 11:41:15 +0300162 encoded = bytearray()
Serhiy Storchaka87aa7dc2013-05-19 11:49:32 +0300163 from_bytes = int.from_bytes
164 b32tab2 = _b32tab2
165 for i in range(0, len(s), 5):
166 c = from_bytes(s[i: i + 5], 'big')
167 encoded += (b32tab2[c >> 30] + # bits 1 - 10
168 b32tab2[(c >> 20) & 0x3ff] + # bits 11 - 20
169 b32tab2[(c >> 10) & 0x3ff] + # bits 21 - 30
170 b32tab2[c & 0x3ff] # bits 31 - 40
171 )
Barry Warsaw4c904d12004-01-04 01:12:26 +0000172 # Adjust for any leftover partial quanta
173 if leftover == 1:
Serhiy Storchaka2c3f2f12013-05-19 11:41:15 +0300174 encoded[-6:] = b'======'
Barry Warsaw4c904d12004-01-04 01:12:26 +0000175 elif leftover == 2:
Serhiy Storchaka2c3f2f12013-05-19 11:41:15 +0300176 encoded[-4:] = b'===='
Barry Warsaw4c904d12004-01-04 01:12:26 +0000177 elif leftover == 3:
Serhiy Storchaka2c3f2f12013-05-19 11:41:15 +0300178 encoded[-3:] = b'==='
Barry Warsaw4c904d12004-01-04 01:12:26 +0000179 elif leftover == 4:
Serhiy Storchaka2c3f2f12013-05-19 11:41:15 +0300180 encoded[-1:] = b'='
181 return bytes(encoded)
Barry Warsaw4c904d12004-01-04 01:12:26 +0000182
183def b32decode(s, casefold=False, map01=None):
Guido van Rossum4581ae52007-05-22 21:56:47 +0000184 """Decode a Base32 encoded byte string.
Barry Warsaw4c904d12004-01-04 01:12:26 +0000185
Guido van Rossum4581ae52007-05-22 21:56:47 +0000186 s is the byte string to decode. Optional casefold is a flag
187 specifying whether a lowercase alphabet is acceptable as input.
188 For security purposes, the default is False.
Barry Warsaw4c904d12004-01-04 01:12:26 +0000189
Guido van Rossum4581ae52007-05-22 21:56:47 +0000190 RFC 3548 allows for optional mapping of the digit 0 (zero) to the
191 letter O (oh), and for optional mapping of the digit 1 (one) to
192 either the letter I (eye) or letter L (el). The optional argument
193 map01 when not None, specifies which letter the digit 1 should be
194 mapped to (when map01 is not None, the digit 0 is always mapped to
195 the letter O). For security purposes the default is None, so that
196 0 and 1 are not allowed in the input.
Barry Warsaw4c904d12004-01-04 01:12:26 +0000197
Guido van Rossum4581ae52007-05-22 21:56:47 +0000198 The decoded byte string is returned. binascii.Error is raised if
199 the input is incorrectly padded or if there are non-alphabet
200 characters present in the input.
Barry Warsaw4c904d12004-01-04 01:12:26 +0000201 """
Victor Stinnerd6a91a72014-03-17 22:38:41 +0100202 global _b32rev
203 # Delay the initialization of the table to not waste memory
204 # if the function is never called
205 if _b32rev is None:
206 _b32rev = {v: k for k, v in enumerate(_b32alphabet)}
Antoine Pitrouea6b4d52012-02-20 19:30:23 +0100207 s = _bytes_from_decode_data(s)
Serhiy Storchaka87aa7dc2013-05-19 11:49:32 +0300208 if len(s) % 8:
Guido van Rossum4581ae52007-05-22 21:56:47 +0000209 raise binascii.Error('Incorrect padding')
Barry Warsaw4c904d12004-01-04 01:12:26 +0000210 # Handle section 2.4 zero and one mapping. The flag map01 will be either
211 # False, or the character to map the digit 1 (one) to. It should be
212 # either L (el) or I (eye).
Alexandre Vassalotti5209857f2008-05-03 04:39:38 +0000213 if map01 is not None:
Antoine Pitrouea6b4d52012-02-20 19:30:23 +0100214 map01 = _bytes_from_decode_data(map01)
Guido van Rossum4581ae52007-05-22 21:56:47 +0000215 assert len(map01) == 1, repr(map01)
Guido van Rossum95c1c482012-06-22 15:16:09 -0700216 s = s.translate(bytes.maketrans(b'01', b'O' + map01))
Barry Warsaw4c904d12004-01-04 01:12:26 +0000217 if casefold:
Guido van Rossum98297ee2007-11-06 21:34:58 +0000218 s = s.upper()
Barry Warsaw4c904d12004-01-04 01:12:26 +0000219 # Strip off pad characters from the right. We need to count the pad
220 # characters because this will tell us how many null bytes to remove from
221 # the end of the decoded string.
Serhiy Storchaka87aa7dc2013-05-19 11:49:32 +0300222 l = len(s)
223 s = s.rstrip(b'=')
224 padchars = l - len(s)
Barry Warsaw4c904d12004-01-04 01:12:26 +0000225 # Now decode the full quanta
Serhiy Storchaka87aa7dc2013-05-19 11:49:32 +0300226 decoded = bytearray()
227 b32rev = _b32rev
228 for i in range(0, len(s), 8):
229 quanta = s[i: i + 8]
230 acc = 0
231 try:
232 for c in quanta:
233 acc = (acc << 5) + b32rev[c]
234 except KeyError:
Serhiy Storchaka5cc9d322013-05-28 15:42:34 +0300235 raise binascii.Error('Non-base32 digit found') from None
Serhiy Storchaka87aa7dc2013-05-19 11:49:32 +0300236 decoded += acc.to_bytes(5, 'big')
Barry Warsaw4c904d12004-01-04 01:12:26 +0000237 # Process the last, partial quanta
Serhiy Storchaka87aa7dc2013-05-19 11:49:32 +0300238 if padchars:
239 acc <<= 5 * padchars
240 last = acc.to_bytes(5, 'big')
241 if padchars == 1:
242 decoded[-5:] = last[:-1]
243 elif padchars == 3:
244 decoded[-5:] = last[:-2]
245 elif padchars == 4:
246 decoded[-5:] = last[:-3]
247 elif padchars == 6:
248 decoded[-5:] = last[:-4]
249 else:
250 raise binascii.Error('Incorrect padding')
251 return bytes(decoded)
Barry Warsaw4c904d12004-01-04 01:12:26 +0000252
253
Antoine Pitroufd036452008-08-19 17:56:33 +0000254
Barry Warsaw4c904d12004-01-04 01:12:26 +0000255# RFC 3548, Base 16 Alphabet specifies uppercase, but hexlify() returns
256# lowercase. The RFC also recommends against accepting input case
257# insensitively.
258def b16encode(s):
Guido van Rossum4581ae52007-05-22 21:56:47 +0000259 """Encode a byte string using Base16.
Barry Warsaw4c904d12004-01-04 01:12:26 +0000260
Guido van Rossum4581ae52007-05-22 21:56:47 +0000261 s is the byte string to encode. The encoded byte string is returned.
Barry Warsaw4c904d12004-01-04 01:12:26 +0000262 """
Guido van Rossum98297ee2007-11-06 21:34:58 +0000263 return binascii.hexlify(s).upper()
Barry Warsaw4c904d12004-01-04 01:12:26 +0000264
265
266def b16decode(s, casefold=False):
Guido van Rossum4581ae52007-05-22 21:56:47 +0000267 """Decode a Base16 encoded byte string.
Barry Warsaw4c904d12004-01-04 01:12:26 +0000268
Guido van Rossum4581ae52007-05-22 21:56:47 +0000269 s is the byte string to decode. Optional casefold is a flag
270 specifying whether a lowercase alphabet is acceptable as input.
271 For security purposes, the default is False.
Barry Warsaw4c904d12004-01-04 01:12:26 +0000272
Guido van Rossum4581ae52007-05-22 21:56:47 +0000273 The decoded byte string is returned. binascii.Error is raised if
274 s were incorrectly padded or if there are non-alphabet characters
275 present in the string.
Barry Warsaw4c904d12004-01-04 01:12:26 +0000276 """
Antoine Pitrouea6b4d52012-02-20 19:30:23 +0100277 s = _bytes_from_decode_data(s)
Barry Warsaw4c904d12004-01-04 01:12:26 +0000278 if casefold:
Guido van Rossum98297ee2007-11-06 21:34:58 +0000279 s = s.upper()
Antoine Pitroufd036452008-08-19 17:56:33 +0000280 if re.search(b'[^0-9A-F]', s):
Guido van Rossum4581ae52007-05-22 21:56:47 +0000281 raise binascii.Error('Non-base16 digit found')
Barry Warsaw4c904d12004-01-04 01:12:26 +0000282 return binascii.unhexlify(s)
283
Antoine Pitrou6dd0d462013-11-17 23:52:25 +0100284#
285# Ascii85 encoding/decoding
286#
Barry Warsaw4c904d12004-01-04 01:12:26 +0000287
Victor Stinnerd6a91a72014-03-17 22:38:41 +0100288_a85chars = None
289_a85chars2 = None
290_A85START = b"<~"
291_A85END = b"~>"
292
Antoine Pitrou6dd0d462013-11-17 23:52:25 +0100293def _85encode(b, chars, chars2, pad=False, foldnuls=False, foldspaces=False):
294 # Helper function for a85encode and b85encode
295 if not isinstance(b, bytes_types):
296 b = memoryview(b).tobytes()
297
298 padding = (-len(b)) % 4
299 if padding:
300 b = b + b'\0' * padding
301 words = struct.Struct('!%dI' % (len(b) // 4)).unpack(b)
302
Antoine Pitrou6dd0d462013-11-17 23:52:25 +0100303 chunks = [b'z' if foldnuls and not word else
304 b'y' if foldspaces and word == 0x20202020 else
305 (chars2[word // 614125] +
306 chars2[word // 85 % 7225] +
307 chars[word % 85])
308 for word in words]
309
310 if padding and not pad:
311 if chunks[-1] == b'z':
312 chunks[-1] = chars[0] * 5
313 chunks[-1] = chunks[-1][:-padding]
314
315 return b''.join(chunks)
316
Antoine Pitrou6dd0d462013-11-17 23:52:25 +0100317def a85encode(b, *, foldspaces=False, wrapcol=0, pad=False, adobe=False):
318 """Encode a byte string using Ascii85.
319
320 b is the byte string to encode. The encoded byte string is returned.
321
322 foldspaces is an optional flag that uses the special short sequence 'y'
323 instead of 4 consecutive spaces (ASCII 0x20) as supported by 'btoa'. This
324 feature is not supported by the "standard" Adobe encoding.
325
Serhiy Storchaka9f8a8912015-04-03 18:12:41 +0300326 wrapcol controls whether the output should have newline ('\\n') characters
Antoine Pitrou6dd0d462013-11-17 23:52:25 +0100327 added to it. If this is non-zero, each output line will be at most this
328 many characters long.
329
330 pad controls whether the input string is padded to a multiple of 4 before
331 encoding. Note that the btoa implementation always pads.
332
333 adobe controls whether the encoded byte sequence is framed with <~ and ~>,
334 which is used by the Adobe implementation.
335 """
Victor Stinnerd6a91a72014-03-17 22:38:41 +0100336 global _a85chars, _a85chars2
337 # Delay the initialization of tables to not waste memory
338 # if the function is never called
339 if _a85chars is None:
340 _a85chars = [bytes((i,)) for i in range(33, 118)]
341 _a85chars2 = [(a + b) for a in _a85chars for b in _a85chars]
342
Antoine Pitrou6dd0d462013-11-17 23:52:25 +0100343 result = _85encode(b, _a85chars, _a85chars2, pad, True, foldspaces)
344
345 if adobe:
346 result = _A85START + result
347 if wrapcol:
348 wrapcol = max(2 if adobe else 1, wrapcol)
349 chunks = [result[i: i + wrapcol]
350 for i in range(0, len(result), wrapcol)]
351 if adobe:
352 if len(chunks[-1]) + 2 > wrapcol:
353 chunks.append(b'')
354 result = b'\n'.join(chunks)
355 if adobe:
356 result += _A85END
357
358 return result
359
360def a85decode(b, *, foldspaces=False, adobe=False, ignorechars=b' \t\n\r\v'):
361 """Decode an Ascii85 encoded byte string.
362
363 s is the byte string to decode.
364
365 foldspaces is a flag that specifies whether the 'y' short sequence should be
366 accepted as shorthand for 4 consecutive spaces (ASCII 0x20). This feature is
367 not supported by the "standard" Adobe encoding.
368
369 adobe controls whether the input sequence is in Adobe Ascii85 format (i.e.
370 is framed with <~ and ~>).
371
372 ignorechars should be a byte string containing characters to ignore from the
373 input. This should only contain whitespace characters, and by default
374 contains all whitespace characters in ASCII.
375 """
376 b = _bytes_from_decode_data(b)
377 if adobe:
378 if not (b.startswith(_A85START) and b.endswith(_A85END)):
379 raise ValueError("Ascii85 encoded byte sequences must be bracketed "
Serhiy Storchakaa26b3f12014-02-06 22:52:23 +0200380 "by {!r} and {!r}".format(_A85START, _A85END))
Antoine Pitrou6dd0d462013-11-17 23:52:25 +0100381 b = b[2:-2] # Strip off start/end markers
382 #
383 # We have to go through this stepwise, so as to ignore spaces and handle
384 # special short sequences
385 #
386 packI = struct.Struct('!I').pack
387 decoded = []
388 decoded_append = decoded.append
389 curr = []
390 curr_append = curr.append
391 curr_clear = curr.clear
392 for x in b + b'u' * 4:
393 if b'!'[0] <= x <= b'u'[0]:
394 curr_append(x)
395 if len(curr) == 5:
396 acc = 0
397 for x in curr:
398 acc = 85 * acc + (x - 33)
399 try:
400 decoded_append(packI(acc))
401 except struct.error:
402 raise ValueError('Ascii85 overflow') from None
403 curr_clear()
404 elif x == b'z'[0]:
405 if curr:
406 raise ValueError('z inside Ascii85 5-tuple')
407 decoded_append(b'\0\0\0\0')
408 elif foldspaces and x == b'y'[0]:
409 if curr:
410 raise ValueError('y inside Ascii85 5-tuple')
411 decoded_append(b'\x20\x20\x20\x20')
412 elif x in ignorechars:
413 # Skip whitespace
414 continue
415 else:
416 raise ValueError('Non-Ascii85 digit found: %c' % x)
417
418 result = b''.join(decoded)
419 padding = 4 - len(curr)
420 if padding:
421 # Throw away the extra padding
422 result = result[:-padding]
423 return result
424
425# The following code is originally taken (with permission) from Mercurial
426
Victor Stinnerd6a91a72014-03-17 22:38:41 +0100427_b85alphabet = (b"0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"
428 b"abcdefghijklmnopqrstuvwxyz!#$%&()*+-;<=>?@^_`{|}~")
429_b85chars = None
430_b85chars2 = None
Antoine Pitrou6dd0d462013-11-17 23:52:25 +0100431_b85dec = None
432
433def b85encode(b, pad=False):
434 """Encode an ASCII-encoded byte array in base85 format.
435
Serhiy Storchaka9f8a8912015-04-03 18:12:41 +0300436 If pad is true, the input is padded with "\\0" so its length is a multiple of
Antoine Pitrou6dd0d462013-11-17 23:52:25 +0100437 4 characters before encoding.
438 """
Victor Stinnerd6a91a72014-03-17 22:38:41 +0100439 global _b85chars, _b85chars2
440 # Delay the initialization of tables to not waste memory
441 # if the function is never called
442 if _b85chars is None:
443 _b85chars = [bytes((i,)) for i in _b85alphabet]
444 _b85chars2 = [(a + b) for a in _b85chars for b in _b85chars]
Antoine Pitrou6dd0d462013-11-17 23:52:25 +0100445 return _85encode(b, _b85chars, _b85chars2, pad)
446
447def b85decode(b):
448 """Decode base85-encoded byte array"""
Antoine Pitrou6dd0d462013-11-17 23:52:25 +0100449 global _b85dec
Victor Stinnerd6a91a72014-03-17 22:38:41 +0100450 # Delay the initialization of tables to not waste memory
451 # if the function is never called
Antoine Pitrou6dd0d462013-11-17 23:52:25 +0100452 if _b85dec is None:
453 _b85dec = [None] * 256
Victor Stinnerd6a91a72014-03-17 22:38:41 +0100454 for i, c in enumerate(_b85alphabet):
455 _b85dec[c] = i
Antoine Pitrou6dd0d462013-11-17 23:52:25 +0100456
Victor Stinnerd6a91a72014-03-17 22:38:41 +0100457 b = _bytes_from_decode_data(b)
Antoine Pitrou6dd0d462013-11-17 23:52:25 +0100458 padding = (-len(b)) % 5
459 b = b + b'~' * padding
460 out = []
461 packI = struct.Struct('!I').pack
462 for i in range(0, len(b), 5):
463 chunk = b[i:i + 5]
464 acc = 0
465 try:
466 for c in chunk:
467 acc = acc * 85 + _b85dec[c]
468 except TypeError:
469 for j, c in enumerate(chunk):
470 if _b85dec[c] is None:
471 raise ValueError('bad base85 character at position %d'
472 % (i + j)) from None
473 raise
474 try:
475 out.append(packI(acc))
476 except struct.error:
477 raise ValueError('base85 overflow in hunk starting at byte %d'
478 % i) from None
479
480 result = b''.join(out)
481 if padding:
482 result = result[:-padding]
483 return result
Antoine Pitroufd036452008-08-19 17:56:33 +0000484
Barry Warsaw4c904d12004-01-04 01:12:26 +0000485# Legacy interface. This code could be cleaned up since I don't believe
486# binascii has any line length limitations. It just doesn't seem worth it
Guido van Rossum4581ae52007-05-22 21:56:47 +0000487# though. The files should be opened in binary mode.
Skip Montanaroe99d5ea2001-01-20 19:54:20 +0000488
Guido van Rossumf1945461995-06-14 23:43:44 +0000489MAXLINESIZE = 76 # Excluding the CRLF
Guido van Rossum54e54c62001-09-04 19:14:14 +0000490MAXBINSIZE = (MAXLINESIZE//4)*3
Guido van Rossumf1945461995-06-14 23:43:44 +0000491
Guido van Rossumf1945461995-06-14 23:43:44 +0000492def encode(input, output):
Guido van Rossum54a40cb2007-08-27 22:27:41 +0000493 """Encode a file; input and output are binary files."""
Barry Warsaw4c904d12004-01-04 01:12:26 +0000494 while True:
Guido van Rossum4acc25b2000-02-02 15:10:15 +0000495 s = input.read(MAXBINSIZE)
Barry Warsaw4c904d12004-01-04 01:12:26 +0000496 if not s:
497 break
Guido van Rossum4acc25b2000-02-02 15:10:15 +0000498 while len(s) < MAXBINSIZE:
499 ns = input.read(MAXBINSIZE-len(s))
Barry Warsaw4c904d12004-01-04 01:12:26 +0000500 if not ns:
501 break
502 s += ns
Guido van Rossum4acc25b2000-02-02 15:10:15 +0000503 line = binascii.b2a_base64(s)
504 output.write(line)
Guido van Rossumf1945461995-06-14 23:43:44 +0000505
Barry Warsaw4c904d12004-01-04 01:12:26 +0000506
Guido van Rossumf1945461995-06-14 23:43:44 +0000507def decode(input, output):
Guido van Rossum54a40cb2007-08-27 22:27:41 +0000508 """Decode a file; input and output are binary files."""
Barry Warsaw4c904d12004-01-04 01:12:26 +0000509 while True:
Guido van Rossum4acc25b2000-02-02 15:10:15 +0000510 line = input.readline()
Barry Warsaw4c904d12004-01-04 01:12:26 +0000511 if not line:
512 break
Guido van Rossum4acc25b2000-02-02 15:10:15 +0000513 s = binascii.a2b_base64(line)
514 output.write(s)
Guido van Rossumf1945461995-06-14 23:43:44 +0000515
Nick Coghlanfdf239a2013-10-03 00:43:22 +1000516def _input_type_check(s):
517 try:
518 m = memoryview(s)
519 except TypeError as err:
520 msg = "expected bytes-like object, not %s" % s.__class__.__name__
521 raise TypeError(msg) from err
522 if m.format not in ('c', 'b', 'B'):
523 msg = ("expected single byte elements, not %r from %s" %
524 (m.format, s.__class__.__name__))
525 raise TypeError(msg)
526 if m.ndim != 1:
527 msg = ("expected 1-D data, not %d-D data from %s" %
528 (m.ndim, s.__class__.__name__))
529 raise TypeError(msg)
530
Barry Warsaw4c904d12004-01-04 01:12:26 +0000531
Georg Brandlb54d8012009-06-04 09:11:51 +0000532def encodebytes(s):
533 """Encode a bytestring into a bytestring containing multiple lines
534 of base-64 data."""
Nick Coghlanfdf239a2013-10-03 00:43:22 +1000535 _input_type_check(s)
Peter Schneider-Kampfbb2b4c2001-06-07 18:56:13 +0000536 pieces = []
537 for i in range(0, len(s), MAXBINSIZE):
538 chunk = s[i : i + MAXBINSIZE]
539 pieces.append(binascii.b2a_base64(chunk))
Guido van Rossum4581ae52007-05-22 21:56:47 +0000540 return b"".join(pieces)
Guido van Rossumf1945461995-06-14 23:43:44 +0000541
Georg Brandlb54d8012009-06-04 09:11:51 +0000542def encodestring(s):
543 """Legacy alias of encodebytes()."""
544 import warnings
545 warnings.warn("encodestring() is a deprecated alias, use encodebytes()",
546 DeprecationWarning, 2)
547 return encodebytes(s)
Barry Warsaw4c904d12004-01-04 01:12:26 +0000548
Guido van Rossum54a40cb2007-08-27 22:27:41 +0000549
Georg Brandlb54d8012009-06-04 09:11:51 +0000550def decodebytes(s):
551 """Decode a bytestring of base-64 data into a bytestring."""
Nick Coghlanfdf239a2013-10-03 00:43:22 +1000552 _input_type_check(s)
Peter Schneider-Kampfbb2b4c2001-06-07 18:56:13 +0000553 return binascii.a2b_base64(s)
Guido van Rossumf1945461995-06-14 23:43:44 +0000554
Georg Brandlb54d8012009-06-04 09:11:51 +0000555def decodestring(s):
556 """Legacy alias of decodebytes()."""
557 import warnings
558 warnings.warn("decodestring() is a deprecated alias, use decodebytes()",
559 DeprecationWarning, 2)
560 return decodebytes(s)
Barry Warsaw4c904d12004-01-04 01:12:26 +0000561
Antoine Pitroufd036452008-08-19 17:56:33 +0000562
Guido van Rossum4581ae52007-05-22 21:56:47 +0000563# Usable as a script...
564def main():
565 """Small main program"""
Guido van Rossum4acc25b2000-02-02 15:10:15 +0000566 import sys, getopt
567 try:
568 opts, args = getopt.getopt(sys.argv[1:], 'deut')
Guido van Rossumb940e112007-01-10 16:19:56 +0000569 except getopt.error as msg:
Guido van Rossum4acc25b2000-02-02 15:10:15 +0000570 sys.stdout = sys.stderr
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000571 print(msg)
572 print("""usage: %s [-d|-e|-u|-t] [file|-]
Guido van Rossum4acc25b2000-02-02 15:10:15 +0000573 -d, -u: decode
574 -e: encode (default)
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000575 -t: encode and decode string 'Aladdin:open sesame'"""%sys.argv[0])
Guido van Rossum4acc25b2000-02-02 15:10:15 +0000576 sys.exit(2)
577 func = encode
578 for o, a in opts:
579 if o == '-e': func = encode
580 if o == '-d': func = decode
581 if o == '-u': func = decode
Guido van Rossum4581ae52007-05-22 21:56:47 +0000582 if o == '-t': test(); return
Guido van Rossum4acc25b2000-02-02 15:10:15 +0000583 if args and args[0] != '-':
Antoine Pitroub86680e2010-10-14 21:15:17 +0000584 with open(args[0], 'rb') as f:
585 func(f, sys.stdout.buffer)
Guido van Rossum4acc25b2000-02-02 15:10:15 +0000586 else:
Victor Stinner479736b2010-05-25 21:12:34 +0000587 func(sys.stdin.buffer, sys.stdout.buffer)
Guido van Rossumf1945461995-06-14 23:43:44 +0000588
Barry Warsaw4c904d12004-01-04 01:12:26 +0000589
Guido van Rossum4581ae52007-05-22 21:56:47 +0000590def test():
591 s0 = b"Aladdin:open sesame"
592 print(repr(s0))
Georg Brandl706824f2009-06-04 09:42:55 +0000593 s1 = encodebytes(s0)
Guido van Rossum4581ae52007-05-22 21:56:47 +0000594 print(repr(s1))
Georg Brandl706824f2009-06-04 09:42:55 +0000595 s2 = decodebytes(s1)
Guido van Rossum4581ae52007-05-22 21:56:47 +0000596 print(repr(s2))
597 assert s0 == s2
Guido van Rossumf1945461995-06-14 23:43:44 +0000598
Barry Warsaw4c904d12004-01-04 01:12:26 +0000599
Guido van Rossumf1945461995-06-14 23:43:44 +0000600if __name__ == '__main__':
Guido van Rossum4581ae52007-05-22 21:56:47 +0000601 main()