blob: e1256ad9358e7ae74069608f86fbe5c981bfe109 [file] [log] [blame]
Benjamin Peterson90f5ba52010-03-11 22:53:45 +00001#! /usr/bin/env python3
Guido van Rossumaa925a51997-04-02 05:47:39 +00002
Antoine Pitrou6dd0d462013-11-17 23:52:25 +01003"""Base16, Base32, Base64 (RFC 3548), Base85 and Ascii85 data encodings"""
Guido van Rossum4acc25b2000-02-02 15:10:15 +00004
Barry Warsaw4c904d12004-01-04 01:12:26 +00005# Modified 04-Oct-1995 by Jack Jansen to use binascii module
6# Modified 30-Dec-2003 by Barry Warsaw to add full RFC 3548 support
Guido van Rossum4581ae52007-05-22 21:56:47 +00007# Modified 22-May-2007 by Guido van Rossum to use bytes everywhere
Jack Jansen951213e1995-10-04 16:39:20 +00008
Barry Warsaw4c904d12004-01-04 01:12:26 +00009import re
10import struct
Jack Jansen951213e1995-10-04 16:39:20 +000011import binascii
12
Barry Warsaw4c904d12004-01-04 01:12:26 +000013
14__all__ = [
Martin Panteree3074e2016-02-23 22:30:50 +000015 # Legacy interface exports traditional RFC 2045 Base64 encodings
Georg Brandlb54d8012009-06-04 09:11:51 +000016 'encode', 'decode', 'encodebytes', 'decodebytes',
Barry Warsaw4c904d12004-01-04 01:12:26 +000017 # Generalized interface for other encodings
18 'b64encode', 'b64decode', 'b32encode', 'b32decode',
Filipe LaĆ­ns4ce6faa2020-08-10 15:48:20 +010019 'b32hexencode', 'b32hexdecode', 'b16encode', 'b16decode',
Antoine Pitrou6dd0d462013-11-17 23:52:25 +010020 # Base85 and Ascii85 encodings
21 'b85encode', 'b85decode', 'a85encode', 'a85decode',
Barry Warsaw4c904d12004-01-04 01:12:26 +000022 # Standard Base64 encoding
23 'standard_b64encode', 'standard_b64decode',
24 # Some common Base64 alternatives. As referenced by RFC 3458, see thread
25 # starting at:
26 #
27 # http://zgp.org/pipermail/p2p-hackers/2001-September/000316.html
Barry Warsaw4c904d12004-01-04 01:12:26 +000028 'urlsafe_b64encode', 'urlsafe_b64decode',
29 ]
30
Barry Warsaw4c904d12004-01-04 01:12:26 +000031
Guido van Rossum254348e2007-11-21 19:29:53 +000032bytes_types = (bytes, bytearray) # Types acceptable as binary data
Guido van Rossum98297ee2007-11-06 21:34:58 +000033
Antoine Pitrouea6b4d52012-02-20 19:30:23 +010034def _bytes_from_decode_data(s):
35 if isinstance(s, str):
36 try:
37 return s.encode('ascii')
38 except UnicodeEncodeError:
39 raise ValueError('string argument should contain only ASCII characters')
Nick Coghlanfdf239a2013-10-03 00:43:22 +100040 if isinstance(s, bytes_types):
Antoine Pitrouea6b4d52012-02-20 19:30:23 +010041 return s
Nick Coghlanfdf239a2013-10-03 00:43:22 +100042 try:
43 return memoryview(s).tobytes()
44 except TypeError:
45 raise TypeError("argument should be a bytes-like object or ASCII "
46 "string, not %r" % s.__class__.__name__) from None
Barry Warsaw4c904d12004-01-04 01:12:26 +000047
Antoine Pitroufd036452008-08-19 17:56:33 +000048
Barry Warsaw4c904d12004-01-04 01:12:26 +000049# Base64 encoding/decoding uses binascii
50
51def b64encode(s, altchars=None):
Martin Panteree3074e2016-02-23 22:30:50 +000052 """Encode the bytes-like object s using Base64 and return a bytes object.
Barry Warsaw4c904d12004-01-04 01:12:26 +000053
Martin Panteree3074e2016-02-23 22:30:50 +000054 Optional altchars should be a byte string of length 2 which specifies an
55 alternative alphabet for the '+' and '/' characters. This allows an
56 application to e.g. generate url or filesystem safe Base64 strings.
Barry Warsaw4c904d12004-01-04 01:12:26 +000057 """
Victor Stinnere84c9762015-10-11 11:01:02 +020058 encoded = binascii.b2a_base64(s, newline=False)
Barry Warsaw4c904d12004-01-04 01:12:26 +000059 if altchars is not None:
Guido van Rossum4581ae52007-05-22 21:56:47 +000060 assert len(altchars) == 2, repr(altchars)
Guido van Rossum95c1c482012-06-22 15:16:09 -070061 return encoded.translate(bytes.maketrans(b'+/', altchars))
Barry Warsaw4c904d12004-01-04 01:12:26 +000062 return encoded
63
64
R. David Murray64951362010-11-11 20:09:20 +000065def b64decode(s, altchars=None, validate=False):
Martin Panteree3074e2016-02-23 22:30:50 +000066 """Decode the Base64 encoded bytes-like object or ASCII string s.
Barry Warsaw4c904d12004-01-04 01:12:26 +000067
Martin Panteree3074e2016-02-23 22:30:50 +000068 Optional altchars must be a bytes-like object or ASCII string of length 2
69 which specifies the alternative alphabet used instead of the '+' and '/'
70 characters.
Barry Warsaw4c904d12004-01-04 01:12:26 +000071
Martin Panteree3074e2016-02-23 22:30:50 +000072 The result is returned as a bytes object. A binascii.Error is raised if
73 s is incorrectly padded.
R. David Murray64951362010-11-11 20:09:20 +000074
Martin Panteree3074e2016-02-23 22:30:50 +000075 If validate is False (the default), characters that are neither in the
76 normal base-64 alphabet nor the alternative alphabet are discarded prior
77 to the padding check. If validate is True, these non-alphabet characters
78 in the input result in a binascii.Error.
Barry Warsaw4c904d12004-01-04 01:12:26 +000079 """
Antoine Pitrouea6b4d52012-02-20 19:30:23 +010080 s = _bytes_from_decode_data(s)
Barry Warsaw4c904d12004-01-04 01:12:26 +000081 if altchars is not None:
Antoine Pitrouea6b4d52012-02-20 19:30:23 +010082 altchars = _bytes_from_decode_data(altchars)
Guido van Rossum4581ae52007-05-22 21:56:47 +000083 assert len(altchars) == 2, repr(altchars)
Guido van Rossum95c1c482012-06-22 15:16:09 -070084 s = s.translate(bytes.maketrans(altchars, b'+/'))
Serhiy Storchakab19c0d772020-01-05 14:15:50 +020085 if validate and not re.fullmatch(b'[A-Za-z0-9+/]*={0,2}', s):
R. David Murray64951362010-11-11 20:09:20 +000086 raise binascii.Error('Non-base64 digit found')
Guido van Rossum4581ae52007-05-22 21:56:47 +000087 return binascii.a2b_base64(s)
Barry Warsaw4c904d12004-01-04 01:12:26 +000088
89
90def standard_b64encode(s):
Martin Panteree3074e2016-02-23 22:30:50 +000091 """Encode bytes-like object s using the standard Base64 alphabet.
Barry Warsaw4c904d12004-01-04 01:12:26 +000092
Martin Panteree3074e2016-02-23 22:30:50 +000093 The result is returned as a bytes object.
Barry Warsaw4c904d12004-01-04 01:12:26 +000094 """
95 return b64encode(s)
96
97def standard_b64decode(s):
Martin Panteree3074e2016-02-23 22:30:50 +000098 """Decode bytes encoded with the standard Base64 alphabet.
Barry Warsaw4c904d12004-01-04 01:12:26 +000099
Martin Panteree3074e2016-02-23 22:30:50 +0000100 Argument s is a bytes-like object or ASCII string to decode. The result
101 is returned as a bytes object. A binascii.Error is raised if the input
102 is incorrectly padded. Characters that are not in the standard alphabet
103 are discarded prior to the padding check.
Barry Warsaw4c904d12004-01-04 01:12:26 +0000104 """
105 return b64decode(s)
106
Guido van Rossum95c1c482012-06-22 15:16:09 -0700107
108_urlsafe_encode_translation = bytes.maketrans(b'+/', b'-_')
109_urlsafe_decode_translation = bytes.maketrans(b'-_', b'+/')
110
Barry Warsaw4c904d12004-01-04 01:12:26 +0000111def urlsafe_b64encode(s):
Martin Panteree3074e2016-02-23 22:30:50 +0000112 """Encode bytes using the URL- and filesystem-safe Base64 alphabet.
Barry Warsaw4c904d12004-01-04 01:12:26 +0000113
Martin Panteree3074e2016-02-23 22:30:50 +0000114 Argument s is a bytes-like object to encode. The result is returned as a
115 bytes object. The alphabet uses '-' instead of '+' and '_' instead of
Guido van Rossum4581ae52007-05-22 21:56:47 +0000116 '/'.
Barry Warsaw4c904d12004-01-04 01:12:26 +0000117 """
Guido van Rossum95c1c482012-06-22 15:16:09 -0700118 return b64encode(s).translate(_urlsafe_encode_translation)
Barry Warsaw4c904d12004-01-04 01:12:26 +0000119
120def urlsafe_b64decode(s):
Martin Panteree3074e2016-02-23 22:30:50 +0000121 """Decode bytes using the URL- and filesystem-safe Base64 alphabet.
Barry Warsaw4c904d12004-01-04 01:12:26 +0000122
Martin Panteree3074e2016-02-23 22:30:50 +0000123 Argument s is a bytes-like object or ASCII string to decode. The result
124 is returned as a bytes object. A binascii.Error is raised if the input
125 is incorrectly padded. Characters that are not in the URL-safe base-64
126 alphabet, and are not a plus '+' or slash '/', are discarded prior to the
127 padding check.
Barry Warsaw4c904d12004-01-04 01:12:26 +0000128
129 The alphabet uses '-' instead of '+' and '_' instead of '/'.
130 """
Guido van Rossum95c1c482012-06-22 15:16:09 -0700131 s = _bytes_from_decode_data(s)
132 s = s.translate(_urlsafe_decode_translation)
133 return b64decode(s)
Barry Warsaw4c904d12004-01-04 01:12:26 +0000134
135
Antoine Pitroufd036452008-08-19 17:56:33 +0000136
Barry Warsaw4c904d12004-01-04 01:12:26 +0000137# Base32 encoding/decoding must be done in Python
Filipe LaĆ­ns4ce6faa2020-08-10 15:48:20 +0100138_B32_ENCODE_DOCSTRING = '''
139Encode the bytes-like objects using {encoding} and return a bytes object.
140'''
141_B32_DECODE_DOCSTRING = '''
142Decode the {encoding} encoded bytes-like object or ASCII string s.
Barry Warsaw4c904d12004-01-04 01:12:26 +0000143
Filipe LaĆ­ns4ce6faa2020-08-10 15:48:20 +0100144Optional casefold is a flag specifying whether a lowercase alphabet is
145acceptable as input. For security purposes, the default is False.
146{extra_args}
147The result is returned as a bytes object. A binascii.Error is raised if
148the input is incorrectly padded or if there are non-alphabet
149characters present in the input.
150'''
151_B32_DECODE_MAP01_DOCSTRING = '''
152RFC 3548 allows for optional mapping of the digit 0 (zero) to the
153letter O (oh), and for optional mapping of the digit 1 (one) to
154either the letter I (eye) or letter L (el). The optional argument
155map01 when not None, specifies which letter the digit 1 should be
156mapped to (when map01 is not None, the digit 0 is always mapped to
157the letter O). For security purposes the default is None, so that
1580 and 1 are not allowed in the input.
159'''
160_b32alphabet = b'ABCDEFGHIJKLMNOPQRSTUVWXYZ234567'
161_b32hexalphabet = b'0123456789ABCDEFGHIJKLMNOPQRSTUV'
162_b32tab2 = {}
163_b32rev = {}
164
165def _b32encode(alphabet, s):
Victor Stinnerd6a91a72014-03-17 22:38:41 +0100166 global _b32tab2
167 # Delay the initialization of the table to not waste memory
168 # if the function is never called
Filipe LaĆ­ns4ce6faa2020-08-10 15:48:20 +0100169 if alphabet not in _b32tab2:
170 b32tab = [bytes((i,)) for i in alphabet]
171 _b32tab2[alphabet] = [a + b for a in b32tab for b in b32tab]
Victor Stinnerd6a91a72014-03-17 22:38:41 +0100172 b32tab = None
173
Guido van Rossum254348e2007-11-21 19:29:53 +0000174 if not isinstance(s, bytes_types):
Nick Coghlanfdf239a2013-10-03 00:43:22 +1000175 s = memoryview(s).tobytes()
Serhiy Storchaka87aa7dc2013-05-19 11:49:32 +0300176 leftover = len(s) % 5
Barry Warsaw4c904d12004-01-04 01:12:26 +0000177 # Pad the last quantum with zero bits if necessary
178 if leftover:
Serhiy Storchaka5f1a5182016-09-11 14:41:02 +0300179 s = s + b'\0' * (5 - leftover) # Don't use += !
Serhiy Storchaka2c3f2f12013-05-19 11:41:15 +0300180 encoded = bytearray()
Serhiy Storchaka87aa7dc2013-05-19 11:49:32 +0300181 from_bytes = int.from_bytes
Filipe LaĆ­ns4ce6faa2020-08-10 15:48:20 +0100182 b32tab2 = _b32tab2[alphabet]
Serhiy Storchaka87aa7dc2013-05-19 11:49:32 +0300183 for i in range(0, len(s), 5):
184 c = from_bytes(s[i: i + 5], 'big')
185 encoded += (b32tab2[c >> 30] + # bits 1 - 10
186 b32tab2[(c >> 20) & 0x3ff] + # bits 11 - 20
187 b32tab2[(c >> 10) & 0x3ff] + # bits 21 - 30
188 b32tab2[c & 0x3ff] # bits 31 - 40
189 )
Barry Warsaw4c904d12004-01-04 01:12:26 +0000190 # Adjust for any leftover partial quanta
191 if leftover == 1:
Serhiy Storchaka2c3f2f12013-05-19 11:41:15 +0300192 encoded[-6:] = b'======'
Barry Warsaw4c904d12004-01-04 01:12:26 +0000193 elif leftover == 2:
Serhiy Storchaka2c3f2f12013-05-19 11:41:15 +0300194 encoded[-4:] = b'===='
Barry Warsaw4c904d12004-01-04 01:12:26 +0000195 elif leftover == 3:
Serhiy Storchaka2c3f2f12013-05-19 11:41:15 +0300196 encoded[-3:] = b'==='
Barry Warsaw4c904d12004-01-04 01:12:26 +0000197 elif leftover == 4:
Serhiy Storchaka2c3f2f12013-05-19 11:41:15 +0300198 encoded[-1:] = b'='
199 return bytes(encoded)
Barry Warsaw4c904d12004-01-04 01:12:26 +0000200
Filipe LaĆ­ns4ce6faa2020-08-10 15:48:20 +0100201def _b32decode(alphabet, s, casefold=False, map01=None):
Victor Stinnerd6a91a72014-03-17 22:38:41 +0100202 global _b32rev
203 # Delay the initialization of the table to not waste memory
204 # if the function is never called
Filipe LaĆ­ns4ce6faa2020-08-10 15:48:20 +0100205 if alphabet not in _b32rev:
206 _b32rev[alphabet] = {v: k for k, v in enumerate(alphabet)}
Antoine Pitrouea6b4d52012-02-20 19:30:23 +0100207 s = _bytes_from_decode_data(s)
Serhiy Storchaka87aa7dc2013-05-19 11:49:32 +0300208 if len(s) % 8:
Guido van Rossum4581ae52007-05-22 21:56:47 +0000209 raise binascii.Error('Incorrect padding')
Barry Warsaw4c904d12004-01-04 01:12:26 +0000210 # Handle section 2.4 zero and one mapping. The flag map01 will be either
211 # False, or the character to map the digit 1 (one) to. It should be
212 # either L (el) or I (eye).
Alexandre Vassalotti5209857f2008-05-03 04:39:38 +0000213 if map01 is not None:
Antoine Pitrouea6b4d52012-02-20 19:30:23 +0100214 map01 = _bytes_from_decode_data(map01)
Guido van Rossum4581ae52007-05-22 21:56:47 +0000215 assert len(map01) == 1, repr(map01)
Guido van Rossum95c1c482012-06-22 15:16:09 -0700216 s = s.translate(bytes.maketrans(b'01', b'O' + map01))
Barry Warsaw4c904d12004-01-04 01:12:26 +0000217 if casefold:
Guido van Rossum98297ee2007-11-06 21:34:58 +0000218 s = s.upper()
Barry Warsaw4c904d12004-01-04 01:12:26 +0000219 # Strip off pad characters from the right. We need to count the pad
220 # characters because this will tell us how many null bytes to remove from
221 # the end of the decoded string.
Serhiy Storchaka87aa7dc2013-05-19 11:49:32 +0300222 l = len(s)
223 s = s.rstrip(b'=')
224 padchars = l - len(s)
Barry Warsaw4c904d12004-01-04 01:12:26 +0000225 # Now decode the full quanta
Serhiy Storchaka87aa7dc2013-05-19 11:49:32 +0300226 decoded = bytearray()
Filipe LaĆ­ns4ce6faa2020-08-10 15:48:20 +0100227 b32rev = _b32rev[alphabet]
Serhiy Storchaka87aa7dc2013-05-19 11:49:32 +0300228 for i in range(0, len(s), 8):
229 quanta = s[i: i + 8]
230 acc = 0
231 try:
232 for c in quanta:
233 acc = (acc << 5) + b32rev[c]
234 except KeyError:
Serhiy Storchaka5cc9d322013-05-28 15:42:34 +0300235 raise binascii.Error('Non-base32 digit found') from None
Serhiy Storchaka87aa7dc2013-05-19 11:49:32 +0300236 decoded += acc.to_bytes(5, 'big')
Barry Warsaw4c904d12004-01-04 01:12:26 +0000237 # Process the last, partial quanta
Serhiy Storchakaac0b3c22018-07-24 12:52:51 +0300238 if l % 8 or padchars not in {0, 1, 3, 4, 6}:
239 raise binascii.Error('Incorrect padding')
240 if padchars and decoded:
Serhiy Storchaka87aa7dc2013-05-19 11:49:32 +0300241 acc <<= 5 * padchars
242 last = acc.to_bytes(5, 'big')
Serhiy Storchakaac0b3c22018-07-24 12:52:51 +0300243 leftover = (43 - 5 * padchars) // 8 # 1: 4, 3: 3, 4: 2, 6: 1
244 decoded[-5:] = last[:leftover]
Serhiy Storchaka87aa7dc2013-05-19 11:49:32 +0300245 return bytes(decoded)
Barry Warsaw4c904d12004-01-04 01:12:26 +0000246
247
Filipe LaĆ­ns4ce6faa2020-08-10 15:48:20 +0100248def b32encode(s):
249 return _b32encode(_b32alphabet, s)
250b32encode.__doc__ = _B32_ENCODE_DOCSTRING.format(encoding='base32')
251
252def b32decode(s, casefold=False, map01=None):
253 return _b32decode(_b32alphabet, s, casefold, map01)
254b32decode.__doc__ = _B32_DECODE_DOCSTRING.format(encoding='base32',
255 extra_args=_B32_DECODE_MAP01_DOCSTRING)
256
257def b32hexencode(s):
258 return _b32encode(_b32hexalphabet, s)
259b32hexencode.__doc__ = _B32_ENCODE_DOCSTRING.format(encoding='base32hex')
260
261def b32hexdecode(s, casefold=False):
262 # base32hex does not have the 01 mapping
263 return _b32decode(_b32hexalphabet, s, casefold)
264b32hexdecode.__doc__ = _B32_DECODE_DOCSTRING.format(encoding='base32hex',
265 extra_args='')
266
267
Barry Warsaw4c904d12004-01-04 01:12:26 +0000268# RFC 3548, Base 16 Alphabet specifies uppercase, but hexlify() returns
269# lowercase. The RFC also recommends against accepting input case
270# insensitively.
271def b16encode(s):
Martin Panteree3074e2016-02-23 22:30:50 +0000272 """Encode the bytes-like object s using Base16 and return a bytes object.
Barry Warsaw4c904d12004-01-04 01:12:26 +0000273 """
Guido van Rossum98297ee2007-11-06 21:34:58 +0000274 return binascii.hexlify(s).upper()
Barry Warsaw4c904d12004-01-04 01:12:26 +0000275
276
277def b16decode(s, casefold=False):
Martin Panteree3074e2016-02-23 22:30:50 +0000278 """Decode the Base16 encoded bytes-like object or ASCII string s.
Barry Warsaw4c904d12004-01-04 01:12:26 +0000279
Martin Panteree3074e2016-02-23 22:30:50 +0000280 Optional casefold is a flag specifying whether a lowercase alphabet is
281 acceptable as input. For security purposes, the default is False.
Barry Warsaw4c904d12004-01-04 01:12:26 +0000282
Martin Panteree3074e2016-02-23 22:30:50 +0000283 The result is returned as a bytes object. A binascii.Error is raised if
284 s is incorrectly padded or if there are non-alphabet characters present
285 in the input.
Barry Warsaw4c904d12004-01-04 01:12:26 +0000286 """
Antoine Pitrouea6b4d52012-02-20 19:30:23 +0100287 s = _bytes_from_decode_data(s)
Barry Warsaw4c904d12004-01-04 01:12:26 +0000288 if casefold:
Guido van Rossum98297ee2007-11-06 21:34:58 +0000289 s = s.upper()
Antoine Pitroufd036452008-08-19 17:56:33 +0000290 if re.search(b'[^0-9A-F]', s):
Guido van Rossum4581ae52007-05-22 21:56:47 +0000291 raise binascii.Error('Non-base16 digit found')
Barry Warsaw4c904d12004-01-04 01:12:26 +0000292 return binascii.unhexlify(s)
293
Antoine Pitrou6dd0d462013-11-17 23:52:25 +0100294#
295# Ascii85 encoding/decoding
296#
Barry Warsaw4c904d12004-01-04 01:12:26 +0000297
Victor Stinnerd6a91a72014-03-17 22:38:41 +0100298_a85chars = None
299_a85chars2 = None
300_A85START = b"<~"
301_A85END = b"~>"
302
Antoine Pitrou6dd0d462013-11-17 23:52:25 +0100303def _85encode(b, chars, chars2, pad=False, foldnuls=False, foldspaces=False):
304 # Helper function for a85encode and b85encode
305 if not isinstance(b, bytes_types):
306 b = memoryview(b).tobytes()
307
308 padding = (-len(b)) % 4
309 if padding:
310 b = b + b'\0' * padding
311 words = struct.Struct('!%dI' % (len(b) // 4)).unpack(b)
312
Antoine Pitrou6dd0d462013-11-17 23:52:25 +0100313 chunks = [b'z' if foldnuls and not word else
314 b'y' if foldspaces and word == 0x20202020 else
315 (chars2[word // 614125] +
316 chars2[word // 85 % 7225] +
317 chars[word % 85])
318 for word in words]
319
320 if padding and not pad:
321 if chunks[-1] == b'z':
322 chunks[-1] = chars[0] * 5
323 chunks[-1] = chunks[-1][:-padding]
324
325 return b''.join(chunks)
326
Antoine Pitrou6dd0d462013-11-17 23:52:25 +0100327def a85encode(b, *, foldspaces=False, wrapcol=0, pad=False, adobe=False):
Martin Panteree3074e2016-02-23 22:30:50 +0000328 """Encode bytes-like object b using Ascii85 and return a bytes object.
Antoine Pitrou6dd0d462013-11-17 23:52:25 +0100329
330 foldspaces is an optional flag that uses the special short sequence 'y'
331 instead of 4 consecutive spaces (ASCII 0x20) as supported by 'btoa'. This
332 feature is not supported by the "standard" Adobe encoding.
333
Martin Panteree3074e2016-02-23 22:30:50 +0000334 wrapcol controls whether the output should have newline (b'\\n') characters
Antoine Pitrou6dd0d462013-11-17 23:52:25 +0100335 added to it. If this is non-zero, each output line will be at most this
336 many characters long.
337
Martin Panteree3074e2016-02-23 22:30:50 +0000338 pad controls whether the input is padded to a multiple of 4 before
Antoine Pitrou6dd0d462013-11-17 23:52:25 +0100339 encoding. Note that the btoa implementation always pads.
340
341 adobe controls whether the encoded byte sequence is framed with <~ and ~>,
342 which is used by the Adobe implementation.
343 """
Victor Stinnerd6a91a72014-03-17 22:38:41 +0100344 global _a85chars, _a85chars2
345 # Delay the initialization of tables to not waste memory
346 # if the function is never called
Brandon Stansbury96554342020-12-31 01:44:46 -0800347 if _a85chars2 is None:
Victor Stinnerd6a91a72014-03-17 22:38:41 +0100348 _a85chars = [bytes((i,)) for i in range(33, 118)]
349 _a85chars2 = [(a + b) for a in _a85chars for b in _a85chars]
350
Antoine Pitrou6dd0d462013-11-17 23:52:25 +0100351 result = _85encode(b, _a85chars, _a85chars2, pad, True, foldspaces)
352
353 if adobe:
354 result = _A85START + result
355 if wrapcol:
356 wrapcol = max(2 if adobe else 1, wrapcol)
357 chunks = [result[i: i + wrapcol]
358 for i in range(0, len(result), wrapcol)]
359 if adobe:
360 if len(chunks[-1]) + 2 > wrapcol:
361 chunks.append(b'')
362 result = b'\n'.join(chunks)
363 if adobe:
364 result += _A85END
365
366 return result
367
368def a85decode(b, *, foldspaces=False, adobe=False, ignorechars=b' \t\n\r\v'):
Martin Panteree3074e2016-02-23 22:30:50 +0000369 """Decode the Ascii85 encoded bytes-like object or ASCII string b.
Antoine Pitrou6dd0d462013-11-17 23:52:25 +0100370
371 foldspaces is a flag that specifies whether the 'y' short sequence should be
372 accepted as shorthand for 4 consecutive spaces (ASCII 0x20). This feature is
373 not supported by the "standard" Adobe encoding.
374
375 adobe controls whether the input sequence is in Adobe Ascii85 format (i.e.
376 is framed with <~ and ~>).
377
378 ignorechars should be a byte string containing characters to ignore from the
379 input. This should only contain whitespace characters, and by default
380 contains all whitespace characters in ASCII.
Martin Panteree3074e2016-02-23 22:30:50 +0000381
382 The result is returned as a bytes object.
Antoine Pitrou6dd0d462013-11-17 23:52:25 +0100383 """
384 b = _bytes_from_decode_data(b)
385 if adobe:
Serhiy Storchaka205e75b2016-02-24 12:05:50 +0200386 if not b.endswith(_A85END):
387 raise ValueError(
388 "Ascii85 encoded byte sequences must end "
389 "with {!r}".format(_A85END)
390 )
391 if b.startswith(_A85START):
392 b = b[2:-2] # Strip off start/end markers
393 else:
394 b = b[:-2]
Antoine Pitrou6dd0d462013-11-17 23:52:25 +0100395 #
396 # We have to go through this stepwise, so as to ignore spaces and handle
397 # special short sequences
398 #
399 packI = struct.Struct('!I').pack
400 decoded = []
401 decoded_append = decoded.append
402 curr = []
403 curr_append = curr.append
404 curr_clear = curr.clear
405 for x in b + b'u' * 4:
406 if b'!'[0] <= x <= b'u'[0]:
407 curr_append(x)
408 if len(curr) == 5:
409 acc = 0
410 for x in curr:
411 acc = 85 * acc + (x - 33)
412 try:
413 decoded_append(packI(acc))
414 except struct.error:
415 raise ValueError('Ascii85 overflow') from None
416 curr_clear()
417 elif x == b'z'[0]:
418 if curr:
419 raise ValueError('z inside Ascii85 5-tuple')
420 decoded_append(b'\0\0\0\0')
421 elif foldspaces and x == b'y'[0]:
422 if curr:
423 raise ValueError('y inside Ascii85 5-tuple')
424 decoded_append(b'\x20\x20\x20\x20')
425 elif x in ignorechars:
426 # Skip whitespace
427 continue
428 else:
429 raise ValueError('Non-Ascii85 digit found: %c' % x)
430
431 result = b''.join(decoded)
432 padding = 4 - len(curr)
433 if padding:
434 # Throw away the extra padding
435 result = result[:-padding]
436 return result
437
438# The following code is originally taken (with permission) from Mercurial
439
Victor Stinnerd6a91a72014-03-17 22:38:41 +0100440_b85alphabet = (b"0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"
441 b"abcdefghijklmnopqrstuvwxyz!#$%&()*+-;<=>?@^_`{|}~")
442_b85chars = None
443_b85chars2 = None
Antoine Pitrou6dd0d462013-11-17 23:52:25 +0100444_b85dec = None
445
446def b85encode(b, pad=False):
Martin Panteree3074e2016-02-23 22:30:50 +0000447 """Encode bytes-like object b in base85 format and return a bytes object.
Antoine Pitrou6dd0d462013-11-17 23:52:25 +0100448
Martin Panteree3074e2016-02-23 22:30:50 +0000449 If pad is true, the input is padded with b'\\0' so its length is a multiple of
450 4 bytes before encoding.
Antoine Pitrou6dd0d462013-11-17 23:52:25 +0100451 """
Victor Stinnerd6a91a72014-03-17 22:38:41 +0100452 global _b85chars, _b85chars2
453 # Delay the initialization of tables to not waste memory
454 # if the function is never called
Brandon Stansbury96554342020-12-31 01:44:46 -0800455 if _b85chars2 is None:
Victor Stinnerd6a91a72014-03-17 22:38:41 +0100456 _b85chars = [bytes((i,)) for i in _b85alphabet]
457 _b85chars2 = [(a + b) for a in _b85chars for b in _b85chars]
Antoine Pitrou6dd0d462013-11-17 23:52:25 +0100458 return _85encode(b, _b85chars, _b85chars2, pad)
459
460def b85decode(b):
Martin Panteree3074e2016-02-23 22:30:50 +0000461 """Decode the base85-encoded bytes-like object or ASCII string b
462
463 The result is returned as a bytes object.
464 """
Antoine Pitrou6dd0d462013-11-17 23:52:25 +0100465 global _b85dec
Victor Stinnerd6a91a72014-03-17 22:38:41 +0100466 # Delay the initialization of tables to not waste memory
467 # if the function is never called
Antoine Pitrou6dd0d462013-11-17 23:52:25 +0100468 if _b85dec is None:
469 _b85dec = [None] * 256
Victor Stinnerd6a91a72014-03-17 22:38:41 +0100470 for i, c in enumerate(_b85alphabet):
471 _b85dec[c] = i
Antoine Pitrou6dd0d462013-11-17 23:52:25 +0100472
Victor Stinnerd6a91a72014-03-17 22:38:41 +0100473 b = _bytes_from_decode_data(b)
Antoine Pitrou6dd0d462013-11-17 23:52:25 +0100474 padding = (-len(b)) % 5
475 b = b + b'~' * padding
476 out = []
477 packI = struct.Struct('!I').pack
478 for i in range(0, len(b), 5):
479 chunk = b[i:i + 5]
480 acc = 0
481 try:
482 for c in chunk:
483 acc = acc * 85 + _b85dec[c]
484 except TypeError:
485 for j, c in enumerate(chunk):
486 if _b85dec[c] is None:
487 raise ValueError('bad base85 character at position %d'
488 % (i + j)) from None
489 raise
490 try:
491 out.append(packI(acc))
492 except struct.error:
493 raise ValueError('base85 overflow in hunk starting at byte %d'
494 % i) from None
495
496 result = b''.join(out)
497 if padding:
498 result = result[:-padding]
499 return result
Antoine Pitroufd036452008-08-19 17:56:33 +0000500
Barry Warsaw4c904d12004-01-04 01:12:26 +0000501# Legacy interface. This code could be cleaned up since I don't believe
502# binascii has any line length limitations. It just doesn't seem worth it
Guido van Rossum4581ae52007-05-22 21:56:47 +0000503# though. The files should be opened in binary mode.
Skip Montanaroe99d5ea2001-01-20 19:54:20 +0000504
Guido van Rossumf1945461995-06-14 23:43:44 +0000505MAXLINESIZE = 76 # Excluding the CRLF
Guido van Rossum54e54c62001-09-04 19:14:14 +0000506MAXBINSIZE = (MAXLINESIZE//4)*3
Guido van Rossumf1945461995-06-14 23:43:44 +0000507
Guido van Rossumf1945461995-06-14 23:43:44 +0000508def encode(input, output):
Guido van Rossum54a40cb2007-08-27 22:27:41 +0000509 """Encode a file; input and output are binary files."""
Barry Warsaw4c904d12004-01-04 01:12:26 +0000510 while True:
Guido van Rossum4acc25b2000-02-02 15:10:15 +0000511 s = input.read(MAXBINSIZE)
Barry Warsaw4c904d12004-01-04 01:12:26 +0000512 if not s:
513 break
Guido van Rossum4acc25b2000-02-02 15:10:15 +0000514 while len(s) < MAXBINSIZE:
515 ns = input.read(MAXBINSIZE-len(s))
Barry Warsaw4c904d12004-01-04 01:12:26 +0000516 if not ns:
517 break
518 s += ns
Guido van Rossum4acc25b2000-02-02 15:10:15 +0000519 line = binascii.b2a_base64(s)
520 output.write(line)
Guido van Rossumf1945461995-06-14 23:43:44 +0000521
Barry Warsaw4c904d12004-01-04 01:12:26 +0000522
Guido van Rossumf1945461995-06-14 23:43:44 +0000523def decode(input, output):
Guido van Rossum54a40cb2007-08-27 22:27:41 +0000524 """Decode a file; input and output are binary files."""
Barry Warsaw4c904d12004-01-04 01:12:26 +0000525 while True:
Guido van Rossum4acc25b2000-02-02 15:10:15 +0000526 line = input.readline()
Barry Warsaw4c904d12004-01-04 01:12:26 +0000527 if not line:
528 break
Guido van Rossum4acc25b2000-02-02 15:10:15 +0000529 s = binascii.a2b_base64(line)
530 output.write(s)
Guido van Rossumf1945461995-06-14 23:43:44 +0000531
Nick Coghlanfdf239a2013-10-03 00:43:22 +1000532def _input_type_check(s):
533 try:
534 m = memoryview(s)
535 except TypeError as err:
536 msg = "expected bytes-like object, not %s" % s.__class__.__name__
537 raise TypeError(msg) from err
538 if m.format not in ('c', 'b', 'B'):
539 msg = ("expected single byte elements, not %r from %s" %
540 (m.format, s.__class__.__name__))
541 raise TypeError(msg)
542 if m.ndim != 1:
543 msg = ("expected 1-D data, not %d-D data from %s" %
544 (m.ndim, s.__class__.__name__))
545 raise TypeError(msg)
546
Barry Warsaw4c904d12004-01-04 01:12:26 +0000547
Georg Brandlb54d8012009-06-04 09:11:51 +0000548def encodebytes(s):
Martin Panteree3074e2016-02-23 22:30:50 +0000549 """Encode a bytestring into a bytes object containing multiple lines
Georg Brandlb54d8012009-06-04 09:11:51 +0000550 of base-64 data."""
Nick Coghlanfdf239a2013-10-03 00:43:22 +1000551 _input_type_check(s)
Peter Schneider-Kampfbb2b4c2001-06-07 18:56:13 +0000552 pieces = []
553 for i in range(0, len(s), MAXBINSIZE):
554 chunk = s[i : i + MAXBINSIZE]
555 pieces.append(binascii.b2a_base64(chunk))
Guido van Rossum4581ae52007-05-22 21:56:47 +0000556 return b"".join(pieces)
Guido van Rossumf1945461995-06-14 23:43:44 +0000557
Guido van Rossum54a40cb2007-08-27 22:27:41 +0000558
Georg Brandlb54d8012009-06-04 09:11:51 +0000559def decodebytes(s):
Martin Panteree3074e2016-02-23 22:30:50 +0000560 """Decode a bytestring of base-64 data into a bytes object."""
Nick Coghlanfdf239a2013-10-03 00:43:22 +1000561 _input_type_check(s)
Peter Schneider-Kampfbb2b4c2001-06-07 18:56:13 +0000562 return binascii.a2b_base64(s)
Guido van Rossumf1945461995-06-14 23:43:44 +0000563
Antoine Pitroufd036452008-08-19 17:56:33 +0000564
Guido van Rossum4581ae52007-05-22 21:56:47 +0000565# Usable as a script...
566def main():
567 """Small main program"""
Guido van Rossum4acc25b2000-02-02 15:10:15 +0000568 import sys, getopt
569 try:
570 opts, args = getopt.getopt(sys.argv[1:], 'deut')
Guido van Rossumb940e112007-01-10 16:19:56 +0000571 except getopt.error as msg:
Guido van Rossum4acc25b2000-02-02 15:10:15 +0000572 sys.stdout = sys.stderr
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000573 print(msg)
574 print("""usage: %s [-d|-e|-u|-t] [file|-]
Guido van Rossum4acc25b2000-02-02 15:10:15 +0000575 -d, -u: decode
576 -e: encode (default)
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000577 -t: encode and decode string 'Aladdin:open sesame'"""%sys.argv[0])
Guido van Rossum4acc25b2000-02-02 15:10:15 +0000578 sys.exit(2)
579 func = encode
580 for o, a in opts:
581 if o == '-e': func = encode
582 if o == '-d': func = decode
583 if o == '-u': func = decode
Guido van Rossum4581ae52007-05-22 21:56:47 +0000584 if o == '-t': test(); return
Guido van Rossum4acc25b2000-02-02 15:10:15 +0000585 if args and args[0] != '-':
Antoine Pitroub86680e2010-10-14 21:15:17 +0000586 with open(args[0], 'rb') as f:
587 func(f, sys.stdout.buffer)
Guido van Rossum4acc25b2000-02-02 15:10:15 +0000588 else:
Victor Stinner479736b2010-05-25 21:12:34 +0000589 func(sys.stdin.buffer, sys.stdout.buffer)
Guido van Rossumf1945461995-06-14 23:43:44 +0000590
Barry Warsaw4c904d12004-01-04 01:12:26 +0000591
Guido van Rossum4581ae52007-05-22 21:56:47 +0000592def test():
593 s0 = b"Aladdin:open sesame"
594 print(repr(s0))
Georg Brandl706824f2009-06-04 09:42:55 +0000595 s1 = encodebytes(s0)
Guido van Rossum4581ae52007-05-22 21:56:47 +0000596 print(repr(s1))
Georg Brandl706824f2009-06-04 09:42:55 +0000597 s2 = decodebytes(s1)
Guido van Rossum4581ae52007-05-22 21:56:47 +0000598 print(repr(s2))
599 assert s0 == s2
Guido van Rossumf1945461995-06-14 23:43:44 +0000600
Barry Warsaw4c904d12004-01-04 01:12:26 +0000601
Guido van Rossumf1945461995-06-14 23:43:44 +0000602if __name__ == '__main__':
Guido van Rossum4581ae52007-05-22 21:56:47 +0000603 main()