blob: c6da3e0a635bcafe5373d90a64f01a0a03dc4083 [file] [log] [blame]
Jack Jansen72781191995-08-07 14:34:15 +00001/*
2** Routines to represent binary data in ASCII and vice-versa
3**
4** This module currently supports the following encodings:
5** uuencode:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00006** each line encodes 45 bytes (except possibly the last)
7** First char encodes (binary) length, rest data
8** each char encodes 6 bits, as follows:
9** binary: 01234567 abcdefgh ijklmnop
10** ascii: 012345 67abcd efghij klmnop
11** ASCII encoding method is "excess-space": 000000 is encoded as ' ', etc.
12** short binary data is zero-extended (so the bits are always in the
13** right place), this does *not* reflect in the length.
Jack Jansen84bbc2e1995-10-04 16:38:44 +000014** base64:
15** Line breaks are insignificant, but lines are at most 76 chars
16** each char encodes 6 bits, in similar order as uucode/hqx. Encoding
17** is done via a table.
18** Short binary data is filled (in ASCII) with '='.
Jack Jansen72781191995-08-07 14:34:15 +000019** hqx:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000020** File starts with introductory text, real data starts and ends
21** with colons.
22** Data consists of three similar parts: info, datafork, resourcefork.
23** Each part is protected (at the end) with a 16-bit crc
24** The binary data is run-length encoded, and then ascii-fied:
25** binary: 01234567 abcdefgh ijklmnop
26** ascii: 012345 67abcd efghij klmnop
27** ASCII encoding is table-driven, see the code.
28** Short binary data results in the runt ascii-byte being output with
29** the bits in the right place.
Jack Jansen72781191995-08-07 14:34:15 +000030**
31** While I was reading dozens of programs that encode or decode the formats
32** here (documentation? hihi:-) I have formulated Jansen's Observation:
33**
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000034** Programs that encode binary data in ASCII are written in
35** such a style that they are as unreadable as possible. Devices used
36** include unnecessary global variables, burying important tables
37** in unrelated sourcefiles, putting functions in include files,
38** using seemingly-descriptive variable names for different purposes,
39** calls to empty subroutines and a host of others.
Jack Jansen72781191995-08-07 14:34:15 +000040**
41** I have attempted to break with this tradition, but I guess that that
42** does make the performance sub-optimal. Oh well, too bad...
43**
44** Jack Jansen, CWI, July 1995.
Tim Peters934c1a12002-07-02 22:24:50 +000045**
Martin v. Löwis16dc7f42001-09-30 20:32:11 +000046** Added support for quoted-printable encoding, based on rfc 1521 et al
Tim Peters934c1a12002-07-02 22:24:50 +000047** quoted-printable encoding specifies that non printable characters (anything
Martin v. Löwis16dc7f42001-09-30 20:32:11 +000048** below 32 and above 126) be encoded as =XX where XX is the hexadecimal value
49** of the character. It also specifies some other behavior to enable 8bit data
Tim Peters934c1a12002-07-02 22:24:50 +000050** in a mail message with little difficulty (maximum line sizes, protecting
51** some cases of whitespace, etc).
Martin v. Löwis16dc7f42001-09-30 20:32:11 +000052**
53** Brandon Long, September 2001.
Jack Jansen72781191995-08-07 14:34:15 +000054*/
55
Thomas Wouters9c544482006-03-01 21:59:44 +000056#define PY_SSIZE_T_CLEAN
Jack Jansen72781191995-08-07 14:34:15 +000057
58#include "Python.h"
Gregory P. Smith9c6b9162015-04-26 00:42:13 +000059#include "pystrhex.h"
Christian Heimes1dc54002008-03-24 02:19:29 +000060#ifdef USE_ZLIB_CRC32
61#include "zlib.h"
62#endif
Jack Jansen72781191995-08-07 14:34:15 +000063
Marcel Plch33e71e02019-05-22 13:51:26 +020064typedef struct binascii_state {
65 PyObject *Error;
66 PyObject *Incomplete;
67} binascii_state;
Jack Jansen72781191995-08-07 14:34:15 +000068
69/*
70** hqx lookup table, ascii->binary.
71*/
72
73#define RUNCHAR 0x90
74
75#define DONE 0x7F
76#define SKIP 0x7E
77#define FAIL 0x7D
78
Serhiy Storchaka2d06e842015-12-25 19:53:18 +020079static const unsigned char table_a2b_hqx[256] = {
Jack Jansen72781191995-08-07 14:34:15 +000080/* ^@ ^A ^B ^C ^D ^E ^F ^G */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000081/* 0*/ FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
Jack Jansen72781191995-08-07 14:34:15 +000082/* \b \t \n ^K ^L \r ^N ^O */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000083/* 1*/ FAIL, FAIL, SKIP, FAIL, FAIL, SKIP, FAIL, FAIL,
Jack Jansen72781191995-08-07 14:34:15 +000084/* ^P ^Q ^R ^S ^T ^U ^V ^W */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000085/* 2*/ FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
Jack Jansen72781191995-08-07 14:34:15 +000086/* ^X ^Y ^Z ^[ ^\ ^] ^^ ^_ */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000087/* 3*/ FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
Jack Jansen72781191995-08-07 14:34:15 +000088/* ! " # $ % & ' */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000089/* 4*/ FAIL, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
Jack Jansen72781191995-08-07 14:34:15 +000090/* ( ) * + , - . / */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000091/* 5*/ 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, FAIL, FAIL,
Jack Jansen72781191995-08-07 14:34:15 +000092/* 0 1 2 3 4 5 6 7 */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000093/* 6*/ 0x0D, 0x0E, 0x0F, 0x10, 0x11, 0x12, 0x13, FAIL,
Jack Jansen72781191995-08-07 14:34:15 +000094/* 8 9 : ; < = > ? */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000095/* 7*/ 0x14, 0x15, DONE, FAIL, FAIL, FAIL, FAIL, FAIL,
Jack Jansen72781191995-08-07 14:34:15 +000096/* @ A B C D E F G */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000097/* 8*/ 0x16, 0x17, 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D,
Jack Jansen72781191995-08-07 14:34:15 +000098/* H I J K L M N O */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000099/* 9*/ 0x1E, 0x1F, 0x20, 0x21, 0x22, 0x23, 0x24, FAIL,
Jack Jansen72781191995-08-07 14:34:15 +0000100/* P Q R S T U V W */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000101/*10*/ 0x25, 0x26, 0x27, 0x28, 0x29, 0x2A, 0x2B, FAIL,
Jack Jansen72781191995-08-07 14:34:15 +0000102/* X Y Z [ \ ] ^ _ */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000103/*11*/ 0x2C, 0x2D, 0x2E, 0x2F, FAIL, FAIL, FAIL, FAIL,
Jack Jansen72781191995-08-07 14:34:15 +0000104/* ` a b c d e f g */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000105/*12*/ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, FAIL,
Jack Jansen72781191995-08-07 14:34:15 +0000106/* h i j k l m n o */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000107/*13*/ 0x37, 0x38, 0x39, 0x3A, 0x3B, 0x3C, FAIL, FAIL,
Jack Jansen72781191995-08-07 14:34:15 +0000108/* p q r s t u v w */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000109/*14*/ 0x3D, 0x3E, 0x3F, FAIL, FAIL, FAIL, FAIL, FAIL,
Jack Jansen72781191995-08-07 14:34:15 +0000110/* x y z { | } ~ ^? */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000111/*15*/ FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
112/*16*/ FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
113 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
114 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
115 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
116 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
117 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
118 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
119 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
120 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
121 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
122 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
123 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
124 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
125 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
126 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
127 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
Jack Jansen72781191995-08-07 14:34:15 +0000128};
129
Serhiy Storchaka2d06e842015-12-25 19:53:18 +0200130static const unsigned char table_b2a_hqx[] =
Roger E. Masse5f4ce181997-01-16 17:10:22 +0000131"!\"#$%&'()*+,-012345689@ABCDEFGHIJKLMNPQRSTUVXYZ[`abcdefhijklmpqr";
Jack Jansen72781191995-08-07 14:34:15 +0000132
Sergey Fedoseev1c5e68e2019-07-14 17:15:32 +0500133static const unsigned char table_a2b_base64[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000134 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
135 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
136 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,62, -1,-1,-1,63,
137 52,53,54,55, 56,57,58,59, 60,61,-1,-1, -1, 0,-1,-1, /* Note PAD->0 */
138 -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10, 11,12,13,14,
139 15,16,17,18, 19,20,21,22, 23,24,25,-1, -1,-1,-1,-1,
140 -1,26,27,28, 29,30,31,32, 33,34,35,36, 37,38,39,40,
Sergey Fedoseev1c5e68e2019-07-14 17:15:32 +0500141 41,42,43,44, 45,46,47,48, 49,50,51,-1, -1,-1,-1,-1,
142
143 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
144 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
145 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
146 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
147 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
148 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
149 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
150 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000151};
152
153#define BASE64_PAD '='
Guido van Rossum355bc0c2001-10-30 03:00:52 +0000154
155/* Max binary chunk size; limited only by available memory */
Guido van Rossum0e225aa2007-05-22 20:24:57 +0000156#define BASE64_MAXBIN ((PY_SSIZE_T_MAX - 3) / 2)
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000157
Serhiy Storchaka2d06e842015-12-25 19:53:18 +0200158static const unsigned char table_b2a_base64[] =
Roger E. Masse5f4ce181997-01-16 17:10:22 +0000159"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000160
161
162
Serhiy Storchaka2d06e842015-12-25 19:53:18 +0200163static const unsigned short crctab_hqx[256] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000164 0x0000, 0x1021, 0x2042, 0x3063, 0x4084, 0x50a5, 0x60c6, 0x70e7,
165 0x8108, 0x9129, 0xa14a, 0xb16b, 0xc18c, 0xd1ad, 0xe1ce, 0xf1ef,
166 0x1231, 0x0210, 0x3273, 0x2252, 0x52b5, 0x4294, 0x72f7, 0x62d6,
167 0x9339, 0x8318, 0xb37b, 0xa35a, 0xd3bd, 0xc39c, 0xf3ff, 0xe3de,
168 0x2462, 0x3443, 0x0420, 0x1401, 0x64e6, 0x74c7, 0x44a4, 0x5485,
169 0xa56a, 0xb54b, 0x8528, 0x9509, 0xe5ee, 0xf5cf, 0xc5ac, 0xd58d,
170 0x3653, 0x2672, 0x1611, 0x0630, 0x76d7, 0x66f6, 0x5695, 0x46b4,
171 0xb75b, 0xa77a, 0x9719, 0x8738, 0xf7df, 0xe7fe, 0xd79d, 0xc7bc,
172 0x48c4, 0x58e5, 0x6886, 0x78a7, 0x0840, 0x1861, 0x2802, 0x3823,
173 0xc9cc, 0xd9ed, 0xe98e, 0xf9af, 0x8948, 0x9969, 0xa90a, 0xb92b,
174 0x5af5, 0x4ad4, 0x7ab7, 0x6a96, 0x1a71, 0x0a50, 0x3a33, 0x2a12,
175 0xdbfd, 0xcbdc, 0xfbbf, 0xeb9e, 0x9b79, 0x8b58, 0xbb3b, 0xab1a,
176 0x6ca6, 0x7c87, 0x4ce4, 0x5cc5, 0x2c22, 0x3c03, 0x0c60, 0x1c41,
177 0xedae, 0xfd8f, 0xcdec, 0xddcd, 0xad2a, 0xbd0b, 0x8d68, 0x9d49,
178 0x7e97, 0x6eb6, 0x5ed5, 0x4ef4, 0x3e13, 0x2e32, 0x1e51, 0x0e70,
179 0xff9f, 0xefbe, 0xdfdd, 0xcffc, 0xbf1b, 0xaf3a, 0x9f59, 0x8f78,
180 0x9188, 0x81a9, 0xb1ca, 0xa1eb, 0xd10c, 0xc12d, 0xf14e, 0xe16f,
181 0x1080, 0x00a1, 0x30c2, 0x20e3, 0x5004, 0x4025, 0x7046, 0x6067,
182 0x83b9, 0x9398, 0xa3fb, 0xb3da, 0xc33d, 0xd31c, 0xe37f, 0xf35e,
183 0x02b1, 0x1290, 0x22f3, 0x32d2, 0x4235, 0x5214, 0x6277, 0x7256,
184 0xb5ea, 0xa5cb, 0x95a8, 0x8589, 0xf56e, 0xe54f, 0xd52c, 0xc50d,
185 0x34e2, 0x24c3, 0x14a0, 0x0481, 0x7466, 0x6447, 0x5424, 0x4405,
186 0xa7db, 0xb7fa, 0x8799, 0x97b8, 0xe75f, 0xf77e, 0xc71d, 0xd73c,
187 0x26d3, 0x36f2, 0x0691, 0x16b0, 0x6657, 0x7676, 0x4615, 0x5634,
188 0xd94c, 0xc96d, 0xf90e, 0xe92f, 0x99c8, 0x89e9, 0xb98a, 0xa9ab,
189 0x5844, 0x4865, 0x7806, 0x6827, 0x18c0, 0x08e1, 0x3882, 0x28a3,
190 0xcb7d, 0xdb5c, 0xeb3f, 0xfb1e, 0x8bf9, 0x9bd8, 0xabbb, 0xbb9a,
191 0x4a75, 0x5a54, 0x6a37, 0x7a16, 0x0af1, 0x1ad0, 0x2ab3, 0x3a92,
192 0xfd2e, 0xed0f, 0xdd6c, 0xcd4d, 0xbdaa, 0xad8b, 0x9de8, 0x8dc9,
193 0x7c26, 0x6c07, 0x5c64, 0x4c45, 0x3ca2, 0x2c83, 0x1ce0, 0x0cc1,
194 0xef1f, 0xff3e, 0xcf5d, 0xdf7c, 0xaf9b, 0xbfba, 0x8fd9, 0x9ff8,
195 0x6e17, 0x7e36, 0x4e55, 0x5e74, 0x2e93, 0x3eb2, 0x0ed1, 0x1ef0,
Jack Jansen72781191995-08-07 14:34:15 +0000196};
197
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +0200198/*[clinic input]
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +0200199module binascii
200[clinic start generated code]*/
Serhiy Storchaka1009bf12015-04-03 23:53:51 +0300201/*[clinic end generated code: output=da39a3ee5e6b4b0d input=de89fb46bcaf3fec]*/
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +0200202
203/*[python input]
204
205class ascii_buffer_converter(CConverter):
206 type = 'Py_buffer'
207 converter = 'ascii_buffer_converter'
208 impl_by_reference = True
Benjamin Petersonb62deac2014-01-26 10:41:58 -0500209 c_default = "{NULL, NULL}"
210
211 def cleanup(self):
212 name = self.name
213 return "".join(["if (", name, ".obj)\n PyBuffer_Release(&", name, ");\n"])
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +0200214
215[python start generated code]*/
Larry Hastings581ee362014-01-28 05:00:08 -0800216/*[python end generated code: output=da39a3ee5e6b4b0d input=3eb7b63610da92cd]*/
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +0200217
Antoine Pitrou08316762011-12-20 13:58:41 +0100218static int
219ascii_buffer_converter(PyObject *arg, Py_buffer *buf)
220{
221 if (arg == NULL) {
222 PyBuffer_Release(buf);
223 return 1;
224 }
225 if (PyUnicode_Check(arg)) {
226 if (PyUnicode_READY(arg) < 0)
227 return 0;
228 if (!PyUnicode_IS_ASCII(arg)) {
229 PyErr_SetString(PyExc_ValueError,
230 "string argument should contain only ASCII characters");
231 return 0;
232 }
233 assert(PyUnicode_KIND(arg) == PyUnicode_1BYTE_KIND);
234 buf->buf = (void *) PyUnicode_1BYTE_DATA(arg);
235 buf->len = PyUnicode_GET_LENGTH(arg);
236 buf->obj = NULL;
237 return 1;
238 }
239 if (PyObject_GetBuffer(arg, buf, PyBUF_SIMPLE) != 0) {
240 PyErr_Format(PyExc_TypeError,
241 "argument should be bytes, buffer or ASCII string, "
Berker Peksag3cd30c22015-02-15 00:31:00 +0200242 "not '%.100s'", Py_TYPE(arg)->tp_name);
Antoine Pitrou08316762011-12-20 13:58:41 +0100243 return 0;
244 }
245 if (!PyBuffer_IsContiguous(buf, 'C')) {
246 PyErr_Format(PyExc_TypeError,
247 "argument should be a contiguous buffer, "
Berker Peksag3cd30c22015-02-15 00:31:00 +0200248 "not '%.100s'", Py_TYPE(arg)->tp_name);
Antoine Pitrou08316762011-12-20 13:58:41 +0100249 PyBuffer_Release(buf);
250 return 0;
251 }
252 return Py_CLEANUP_SUPPORTED;
253}
254
Larry Hastingsf256c222014-01-25 21:30:37 -0800255#include "clinic/binascii.c.h"
Antoine Pitrou08316762011-12-20 13:58:41 +0100256
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +0200257/*[clinic input]
258binascii.a2b_uu
259
Serhiy Storchaka12785612014-01-25 11:49:49 +0200260 data: ascii_buffer
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +0200261 /
262
263Decode a line of uuencoded data.
264[clinic start generated code]*/
Jack Jansen72781191995-08-07 14:34:15 +0000265
266static PyObject *
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +0300267binascii_a2b_uu_impl(PyObject *module, Py_buffer *data)
268/*[clinic end generated code: output=e027f8e0b0598742 input=7cafeaf73df63d1c]*/
Jack Jansen72781191995-08-07 14:34:15 +0000269{
Serhiy Storchakaef1585e2015-12-25 20:01:53 +0200270 const unsigned char *ascii_data;
271 unsigned char *bin_data;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000272 int leftbits = 0;
273 unsigned char this_ch;
274 unsigned int leftchar = 0;
275 PyObject *rv;
276 Py_ssize_t ascii_len, bin_len;
Marcel Plch33e71e02019-05-22 13:51:26 +0200277 binascii_state *state;
Tim Peters934c1a12002-07-02 22:24:50 +0000278
Serhiy Storchaka12785612014-01-25 11:49:49 +0200279 ascii_data = data->buf;
280 ascii_len = data->len;
Jack Jansen72781191995-08-07 14:34:15 +0000281
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000282 assert(ascii_len >= 0);
Amaury Forgeot d'Arc9c74b142008-06-18 00:47:36 +0000283
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000284 /* First byte: binary data length (in bytes) */
285 bin_len = (*ascii_data++ - ' ') & 077;
286 ascii_len--;
Jack Jansen72781191995-08-07 14:34:15 +0000287
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000288 /* Allocate the buffer */
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +0200289 if ( (rv=PyBytes_FromStringAndSize(NULL, bin_len)) == NULL )
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000290 return NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000291 bin_data = (unsigned char *)PyBytes_AS_STRING(rv);
Tim Peters934c1a12002-07-02 22:24:50 +0000292
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000293 for( ; bin_len > 0 ; ascii_len--, ascii_data++ ) {
294 /* XXX is it really best to add NULs if there's no more data */
295 this_ch = (ascii_len > 0) ? *ascii_data : 0;
296 if ( this_ch == '\n' || this_ch == '\r' || ascii_len <= 0) {
297 /*
298 ** Whitespace. Assume some spaces got eaten at
299 ** end-of-line. (We check this later)
300 */
301 this_ch = 0;
302 } else {
303 /* Check the character for legality
304 ** The 64 in stead of the expected 63 is because
305 ** there are a few uuencodes out there that use
306 ** '`' as zero instead of space.
307 */
308 if ( this_ch < ' ' || this_ch > (' ' + 64)) {
Marcel Plch33e71e02019-05-22 13:51:26 +0200309 state = PyModule_GetState(module);
310 if (state == NULL) {
311 return NULL;
312 }
313 PyErr_SetString(state->Error, "Illegal char");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000314 Py_DECREF(rv);
315 return NULL;
316 }
317 this_ch = (this_ch - ' ') & 077;
318 }
319 /*
320 ** Shift it in on the low end, and see if there's
321 ** a byte ready for output.
322 */
323 leftchar = (leftchar << 6) | (this_ch);
324 leftbits += 6;
325 if ( leftbits >= 8 ) {
326 leftbits -= 8;
327 *bin_data++ = (leftchar >> leftbits) & 0xff;
328 leftchar &= ((1 << leftbits) - 1);
329 bin_len--;
330 }
331 }
332 /*
333 ** Finally, check that if there's anything left on the line
334 ** that it's whitespace only.
335 */
336 while( ascii_len-- > 0 ) {
337 this_ch = *ascii_data++;
338 /* Extra '`' may be written as padding in some cases */
339 if ( this_ch != ' ' && this_ch != ' '+64 &&
340 this_ch != '\n' && this_ch != '\r' ) {
Marcel Plch33e71e02019-05-22 13:51:26 +0200341 state = PyModule_GetState(module);
342 if (state == NULL) {
343 return NULL;
344 }
345 PyErr_SetString(state->Error, "Trailing garbage");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000346 Py_DECREF(rv);
347 return NULL;
348 }
349 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000350 return rv;
Jack Jansen72781191995-08-07 14:34:15 +0000351}
352
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +0200353/*[clinic input]
354binascii.b2a_uu
355
356 data: Py_buffer
357 /
Xiang Zhang13f1f422017-05-03 11:16:21 +0800358 *
359 backtick: bool(accept={int}) = False
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +0200360
361Uuencode line of data.
362[clinic start generated code]*/
Tim Peters934c1a12002-07-02 22:24:50 +0000363
Jack Jansen72781191995-08-07 14:34:15 +0000364static PyObject *
Xiang Zhang13f1f422017-05-03 11:16:21 +0800365binascii_b2a_uu_impl(PyObject *module, Py_buffer *data, int backtick)
366/*[clinic end generated code: output=b1b99de62d9bbeb8 input=b26bc8d32b6ed2f6]*/
Jack Jansen72781191995-08-07 14:34:15 +0000367{
Serhiy Storchakaef1585e2015-12-25 20:01:53 +0200368 unsigned char *ascii_data;
369 const unsigned char *bin_data;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000370 int leftbits = 0;
371 unsigned char this_ch;
372 unsigned int leftchar = 0;
Marcel Plch33e71e02019-05-22 13:51:26 +0200373 binascii_state *state;
Victor Stinnereaaaf132015-10-13 10:51:47 +0200374 Py_ssize_t bin_len, out_len;
375 _PyBytesWriter writer;
Tim Peters934c1a12002-07-02 22:24:50 +0000376
Victor Stinnereaaaf132015-10-13 10:51:47 +0200377 _PyBytesWriter_Init(&writer);
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +0200378 bin_data = data->buf;
379 bin_len = data->len;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000380 if ( bin_len > 45 ) {
381 /* The 45 is a limit that appears in all uuencode's */
Marcel Plch33e71e02019-05-22 13:51:26 +0200382 state = PyModule_GetState(module);
383 if (state == NULL) {
384 return NULL;
385 }
386 PyErr_SetString(state->Error, "At most 45 bytes at once");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000387 return NULL;
388 }
Jack Jansen72781191995-08-07 14:34:15 +0000389
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000390 /* We're lazy and allocate to much (fixed up later) */
Victor Stinnereaaaf132015-10-13 10:51:47 +0200391 out_len = 2 + (bin_len + 2) / 3 * 4;
392 ascii_data = _PyBytesWriter_Alloc(&writer, out_len);
393 if (ascii_data == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000394 return NULL;
Jack Jansen72781191995-08-07 14:34:15 +0000395
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000396 /* Store the length */
Xiang Zhang13f1f422017-05-03 11:16:21 +0800397 if (backtick && !bin_len)
398 *ascii_data++ = '`';
399 else
Segev Finer679b5662017-07-27 01:17:57 +0300400 *ascii_data++ = ' ' + (unsigned char)bin_len;
Tim Peters934c1a12002-07-02 22:24:50 +0000401
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000402 for( ; bin_len > 0 || leftbits != 0 ; bin_len--, bin_data++ ) {
403 /* Shift the data (or padding) into our buffer */
404 if ( bin_len > 0 ) /* Data */
405 leftchar = (leftchar << 8) | *bin_data;
406 else /* Padding */
407 leftchar <<= 8;
408 leftbits += 8;
Jack Jansen72781191995-08-07 14:34:15 +0000409
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000410 /* See if there are 6-bit groups ready */
411 while ( leftbits >= 6 ) {
412 this_ch = (leftchar >> (leftbits-6)) & 0x3f;
413 leftbits -= 6;
Xiang Zhang13f1f422017-05-03 11:16:21 +0800414 if (backtick && !this_ch)
415 *ascii_data++ = '`';
416 else
417 *ascii_data++ = this_ch + ' ';
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000418 }
419 }
420 *ascii_data++ = '\n'; /* Append a courtesy newline */
Tim Peters934c1a12002-07-02 22:24:50 +0000421
Victor Stinnereaaaf132015-10-13 10:51:47 +0200422 return _PyBytesWriter_Finish(&writer, ascii_data);
Jack Jansen72781191995-08-07 14:34:15 +0000423}
424
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +0200425/*[clinic input]
426binascii.a2b_base64
427
Serhiy Storchaka12785612014-01-25 11:49:49 +0200428 data: ascii_buffer
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +0200429 /
430
431Decode a line of base64 data.
432[clinic start generated code]*/
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000433
434static PyObject *
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +0300435binascii_a2b_base64_impl(PyObject *module, Py_buffer *data)
436/*[clinic end generated code: output=0628223f19fd3f9b input=5872acf6e1cac243]*/
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000437{
Sergey Fedoseev1c5e68e2019-07-14 17:15:32 +0500438 assert(data->len >= 0);
Tim Peters934c1a12002-07-02 22:24:50 +0000439
Sergey Fedoseev1c5e68e2019-07-14 17:15:32 +0500440 const unsigned char *ascii_data = data->buf;
441 size_t ascii_len = data->len;
Victor Stinnereaaaf132015-10-13 10:51:47 +0200442
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000443 /* Allocate the buffer */
Sergey Fedoseev1c5e68e2019-07-14 17:15:32 +0500444 Py_ssize_t bin_len = ((ascii_len+3)/4)*3; /* Upper bound, corrected later */
445 _PyBytesWriter writer;
446 _PyBytesWriter_Init(&writer);
447 unsigned char *bin_data = _PyBytesWriter_Alloc(&writer, bin_len);
Victor Stinnereaaaf132015-10-13 10:51:47 +0200448 if (bin_data == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000449 return NULL;
Sergey Fedoseev1c5e68e2019-07-14 17:15:32 +0500450 unsigned char *bin_data_start = bin_data;
Guido van Rossum2db4f471999-10-19 19:05:14 +0000451
Sergey Fedoseev1c5e68e2019-07-14 17:15:32 +0500452 int quad_pos = 0;
453 unsigned char leftchar = 0;
454 int pads = 0;
455 for (size_t i = 0; i < ascii_len; i++) {
456 unsigned char this_ch = ascii_data[i];
Guido van Rossum2db4f471999-10-19 19:05:14 +0000457
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000458 /* Check for pad sequences and ignore
459 ** the invalid ones.
460 */
461 if (this_ch == BASE64_PAD) {
Sergey Fedoseev1c5e68e2019-07-14 17:15:32 +0500462 if (quad_pos >= 2 && quad_pos + ++pads >= 4) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000463 /* A pad sequence means no more input.
464 ** We've already interpreted the data
465 ** from the quad at this point.
466 */
Sergey Fedoseev1c5e68e2019-07-14 17:15:32 +0500467 goto done;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000468 }
Sergey Fedoseev1c5e68e2019-07-14 17:15:32 +0500469 continue;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000470 }
Guido van Rossum2db4f471999-10-19 19:05:14 +0000471
Sergey Fedoseev1c5e68e2019-07-14 17:15:32 +0500472 this_ch = table_a2b_base64[this_ch];
473 if (this_ch >= 64) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000474 continue;
Sergey Fedoseev1c5e68e2019-07-14 17:15:32 +0500475 }
476 pads = 0;
Guido van Rossum2db4f471999-10-19 19:05:14 +0000477
Sergey Fedoseev1c5e68e2019-07-14 17:15:32 +0500478 switch (quad_pos) {
479 case 0:
480 quad_pos = 1;
481 leftchar = this_ch;
482 break;
483 case 1:
484 quad_pos = 2;
485 *bin_data++ = (leftchar << 2) | (this_ch >> 4);
486 leftchar = this_ch & 0x0f;
487 break;
488 case 2:
489 quad_pos = 3;
490 *bin_data++ = (leftchar << 4) | (this_ch >> 2);
491 leftchar = this_ch & 0x03;
492 break;
493 case 3:
494 quad_pos = 0;
495 *bin_data++ = (leftchar << 6) | (this_ch);
496 leftchar = 0;
497 break;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000498 }
499 }
Guido van Rossum2db4f471999-10-19 19:05:14 +0000500
Sergey Fedoseev1c5e68e2019-07-14 17:15:32 +0500501 if (quad_pos != 0) {
502 binascii_state *state = PyModule_GetState(module);
Marcel Plch33e71e02019-05-22 13:51:26 +0200503 if (state == NULL) {
Sergey Fedoseev1c5e68e2019-07-14 17:15:32 +0500504 /* error already set, from PyModule_GetState */
505 } else if (quad_pos == 1) {
Tal Einat1b85c712018-06-10 10:01:50 +0300506 /*
507 ** There is exactly one extra valid, non-padding, base64 character.
508 ** This is an invalid length, as there is no possible input that
509 ** could encoded into such a base64 string.
510 */
Marcel Plch33e71e02019-05-22 13:51:26 +0200511 PyErr_Format(state->Error,
Tal Einat1fba2ff2018-09-28 08:57:22 +0300512 "Invalid base64-encoded string: "
Serhiy Storchakad53fe5f2019-03-13 22:59:55 +0200513 "number of data characters (%zd) cannot be 1 more "
Tal Einat1fba2ff2018-09-28 08:57:22 +0300514 "than a multiple of 4",
515 (bin_data - bin_data_start) / 3 * 4 + 1);
Tal Einat1b85c712018-06-10 10:01:50 +0300516 } else {
Marcel Plch33e71e02019-05-22 13:51:26 +0200517 PyErr_SetString(state->Error, "Incorrect padding");
Tal Einat1b85c712018-06-10 10:01:50 +0300518 }
Victor Stinnereaaaf132015-10-13 10:51:47 +0200519 _PyBytesWriter_Dealloc(&writer);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000520 return NULL;
521 }
Guido van Rossum2db4f471999-10-19 19:05:14 +0000522
Sergey Fedoseev1c5e68e2019-07-14 17:15:32 +0500523done:
Victor Stinnereaaaf132015-10-13 10:51:47 +0200524 return _PyBytesWriter_Finish(&writer, bin_data);
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000525}
526
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +0200527
528/*[clinic input]
529binascii.b2a_base64
530
531 data: Py_buffer
Xiang Zhang1374dbb2017-05-01 13:12:07 +0800532 /
Victor Stinnere84c9762015-10-11 11:01:02 +0200533 *
Serhiy Storchaka202fda52017-03-12 10:10:47 +0200534 newline: bool(accept={int}) = True
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +0200535
536Base64-code line of data.
537[clinic start generated code]*/
Tim Peters934c1a12002-07-02 22:24:50 +0000538
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000539static PyObject *
Serhiy Storchaka2954f832016-07-07 18:20:03 +0300540binascii_b2a_base64_impl(PyObject *module, Py_buffer *data, int newline)
Xiang Zhang1374dbb2017-05-01 13:12:07 +0800541/*[clinic end generated code: output=4ad62c8e8485d3b3 input=6083dac5777fa45d]*/
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000542{
Serhiy Storchakaef1585e2015-12-25 20:01:53 +0200543 unsigned char *ascii_data;
544 const unsigned char *bin_data;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000545 int leftbits = 0;
546 unsigned char this_ch;
547 unsigned int leftchar = 0;
Victor Stinnere84c9762015-10-11 11:01:02 +0200548 Py_ssize_t bin_len, out_len;
Victor Stinnereaaaf132015-10-13 10:51:47 +0200549 _PyBytesWriter writer;
Marcel Plch33e71e02019-05-22 13:51:26 +0200550 binascii_state *state;
Tim Peters934c1a12002-07-02 22:24:50 +0000551
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +0200552 bin_data = data->buf;
553 bin_len = data->len;
Victor Stinnereaaaf132015-10-13 10:51:47 +0200554 _PyBytesWriter_Init(&writer);
Amaury Forgeot d'Arc9c74b142008-06-18 00:47:36 +0000555
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000556 assert(bin_len >= 0);
Amaury Forgeot d'Arc9c74b142008-06-18 00:47:36 +0000557
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000558 if ( bin_len > BASE64_MAXBIN ) {
Marcel Plch33e71e02019-05-22 13:51:26 +0200559 state = PyModule_GetState(module);
560 if (state == NULL) {
561 return NULL;
562 }
563 PyErr_SetString(state->Error, "Too much data for base64 line");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000564 return NULL;
565 }
Tim Peters934c1a12002-07-02 22:24:50 +0000566
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000567 /* We're lazy and allocate too much (fixed up later).
Victor Stinnere84c9762015-10-11 11:01:02 +0200568 "+2" leaves room for up to two pad characters.
569 Note that 'b' gets encoded as 'Yg==\n' (1 in, 5 out). */
570 out_len = bin_len*2 + 2;
571 if (newline)
572 out_len++;
Victor Stinnereaaaf132015-10-13 10:51:47 +0200573 ascii_data = _PyBytesWriter_Alloc(&writer, out_len);
574 if (ascii_data == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000575 return NULL;
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000576
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000577 for( ; bin_len > 0 ; bin_len--, bin_data++ ) {
578 /* Shift the data into our buffer */
579 leftchar = (leftchar << 8) | *bin_data;
580 leftbits += 8;
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000581
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000582 /* See if there are 6-bit groups ready */
583 while ( leftbits >= 6 ) {
584 this_ch = (leftchar >> (leftbits-6)) & 0x3f;
585 leftbits -= 6;
586 *ascii_data++ = table_b2a_base64[this_ch];
587 }
588 }
589 if ( leftbits == 2 ) {
590 *ascii_data++ = table_b2a_base64[(leftchar&3) << 4];
591 *ascii_data++ = BASE64_PAD;
592 *ascii_data++ = BASE64_PAD;
593 } else if ( leftbits == 4 ) {
594 *ascii_data++ = table_b2a_base64[(leftchar&0xf) << 2];
595 *ascii_data++ = BASE64_PAD;
596 }
Victor Stinnere84c9762015-10-11 11:01:02 +0200597 if (newline)
598 *ascii_data++ = '\n'; /* Append a courtesy newline */
Tim Peters934c1a12002-07-02 22:24:50 +0000599
Victor Stinnereaaaf132015-10-13 10:51:47 +0200600 return _PyBytesWriter_Finish(&writer, ascii_data);
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000601}
602
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +0200603/*[clinic input]
604binascii.a2b_hqx
605
Serhiy Storchaka12785612014-01-25 11:49:49 +0200606 data: ascii_buffer
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +0200607 /
608
609Decode .hqx coding.
610[clinic start generated code]*/
Jack Jansen72781191995-08-07 14:34:15 +0000611
612static PyObject *
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +0300613binascii_a2b_hqx_impl(PyObject *module, Py_buffer *data)
614/*[clinic end generated code: output=4d6d8c54d54ea1c1 input=0d914c680e0eed55]*/
Jack Jansen72781191995-08-07 14:34:15 +0000615{
Victor Stinnerbeea26b2020-01-22 20:44:22 +0100616 if (PyErr_WarnEx(PyExc_DeprecationWarning,
617 "binascii.a2b_hqx() is deprecated", 1) < 0) {
618 return NULL;
619 }
620
Serhiy Storchakaef1585e2015-12-25 20:01:53 +0200621 const unsigned char *ascii_data;
622 unsigned char *bin_data;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000623 int leftbits = 0;
624 unsigned char this_ch;
625 unsigned int leftchar = 0;
Victor Stinnereaaaf132015-10-13 10:51:47 +0200626 PyObject *res;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000627 Py_ssize_t len;
628 int done = 0;
Victor Stinnereaaaf132015-10-13 10:51:47 +0200629 _PyBytesWriter writer;
Marcel Plch33e71e02019-05-22 13:51:26 +0200630 binascii_state *state;
Tim Peters934c1a12002-07-02 22:24:50 +0000631
Serhiy Storchaka12785612014-01-25 11:49:49 +0200632 ascii_data = data->buf;
633 len = data->len;
Victor Stinnereaaaf132015-10-13 10:51:47 +0200634 _PyBytesWriter_Init(&writer);
Jack Jansen72781191995-08-07 14:34:15 +0000635
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000636 assert(len >= 0);
Amaury Forgeot d'Arc9c74b142008-06-18 00:47:36 +0000637
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +0200638 if (len > PY_SSIZE_T_MAX - 2)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000639 return PyErr_NoMemory();
Amaury Forgeot d'Arc9c74b142008-06-18 00:47:36 +0000640
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000641 /* Allocate a string that is too big (fixed later)
642 Add two to the initial length to prevent interning which
643 would preclude subsequent resizing. */
Victor Stinnereaaaf132015-10-13 10:51:47 +0200644 bin_data = _PyBytesWriter_Alloc(&writer, len + 2);
645 if (bin_data == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000646 return NULL;
Jack Jansen72781191995-08-07 14:34:15 +0000647
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000648 for( ; len > 0 ; len--, ascii_data++ ) {
649 /* Get the byte and look it up */
650 this_ch = table_a2b_hqx[*ascii_data];
651 if ( this_ch == SKIP )
652 continue;
653 if ( this_ch == FAIL ) {
Marcel Plch33e71e02019-05-22 13:51:26 +0200654 state = PyModule_GetState(module);
655 if (state == NULL) {
656 return NULL;
657 }
658 PyErr_SetString(state->Error, "Illegal char");
Victor Stinnereaaaf132015-10-13 10:51:47 +0200659 _PyBytesWriter_Dealloc(&writer);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000660 return NULL;
661 }
662 if ( this_ch == DONE ) {
663 /* The terminating colon */
664 done = 1;
665 break;
666 }
Jack Jansen72781191995-08-07 14:34:15 +0000667
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000668 /* Shift it into the buffer and see if any bytes are ready */
669 leftchar = (leftchar << 6) | (this_ch);
670 leftbits += 6;
671 if ( leftbits >= 8 ) {
672 leftbits -= 8;
673 *bin_data++ = (leftchar >> leftbits) & 0xff;
674 leftchar &= ((1 << leftbits) - 1);
675 }
676 }
Tim Peters934c1a12002-07-02 22:24:50 +0000677
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000678 if ( leftbits && !done ) {
Marcel Plch33e71e02019-05-22 13:51:26 +0200679 state = PyModule_GetState(module);
680 if (state == NULL) {
681 return NULL;
682 }
683 PyErr_SetString(state->Incomplete,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000684 "String has incomplete number of bytes");
Victor Stinnereaaaf132015-10-13 10:51:47 +0200685 _PyBytesWriter_Dealloc(&writer);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000686 return NULL;
687 }
Roger E. Masse5f4ce181997-01-16 17:10:22 +0000688
Victor Stinnereaaaf132015-10-13 10:51:47 +0200689 res = _PyBytesWriter_Finish(&writer, bin_data);
690 if (res == NULL)
691 return NULL;
692 return Py_BuildValue("Ni", res, done);
Jack Jansen72781191995-08-07 14:34:15 +0000693}
694
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +0200695
696/*[clinic input]
697binascii.rlecode_hqx
698
699 data: Py_buffer
700 /
701
702Binhex RLE-code binary data.
703[clinic start generated code]*/
Jack Jansen72781191995-08-07 14:34:15 +0000704
705static PyObject *
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +0300706binascii_rlecode_hqx_impl(PyObject *module, Py_buffer *data)
707/*[clinic end generated code: output=393d79338f5f5629 input=e1f1712447a82b09]*/
Jack Jansen72781191995-08-07 14:34:15 +0000708{
Victor Stinnerbeea26b2020-01-22 20:44:22 +0100709 if (PyErr_WarnEx(PyExc_DeprecationWarning,
710 "binascii.rlecode_hqx() is deprecated", 1) < 0) {
711 return NULL;
712 }
713
Serhiy Storchakaef1585e2015-12-25 20:01:53 +0200714 const unsigned char *in_data;
715 unsigned char *out_data;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000716 unsigned char ch;
717 Py_ssize_t in, inend, len;
Victor Stinnereaaaf132015-10-13 10:51:47 +0200718 _PyBytesWriter writer;
Tim Peters934c1a12002-07-02 22:24:50 +0000719
Victor Stinnereaaaf132015-10-13 10:51:47 +0200720 _PyBytesWriter_Init(&writer);
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +0200721 in_data = data->buf;
722 len = data->len;
Jack Jansen72781191995-08-07 14:34:15 +0000723
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000724 assert(len >= 0);
Amaury Forgeot d'Arc9c74b142008-06-18 00:47:36 +0000725
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +0200726 if (len > PY_SSIZE_T_MAX / 2 - 2)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000727 return PyErr_NoMemory();
Amaury Forgeot d'Arc9c74b142008-06-18 00:47:36 +0000728
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000729 /* Worst case: output is twice as big as input (fixed later) */
Victor Stinnereaaaf132015-10-13 10:51:47 +0200730 out_data = _PyBytesWriter_Alloc(&writer, len * 2 + 2);
731 if (out_data == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000732 return NULL;
Tim Peters934c1a12002-07-02 22:24:50 +0000733
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000734 for( in=0; in<len; in++) {
735 ch = in_data[in];
736 if ( ch == RUNCHAR ) {
737 /* RUNCHAR. Escape it. */
738 *out_data++ = RUNCHAR;
739 *out_data++ = 0;
740 } else {
741 /* Check how many following are the same */
742 for(inend=in+1;
743 inend<len && in_data[inend] == ch &&
744 inend < in+255;
745 inend++) ;
746 if ( inend - in > 3 ) {
747 /* More than 3 in a row. Output RLE. */
748 *out_data++ = ch;
749 *out_data++ = RUNCHAR;
Antoine Pitrou40455752010-08-15 18:51:10 +0000750 *out_data++ = (unsigned char) (inend-in);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000751 in = inend-1;
752 } else {
753 /* Less than 3. Output the byte itself */
754 *out_data++ = ch;
755 }
756 }
757 }
Victor Stinnereaaaf132015-10-13 10:51:47 +0200758
759 return _PyBytesWriter_Finish(&writer, out_data);
Jack Jansen72781191995-08-07 14:34:15 +0000760}
761
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +0200762
763/*[clinic input]
764binascii.b2a_hqx
765
766 data: Py_buffer
767 /
768
769Encode .hqx data.
770[clinic start generated code]*/
Tim Peters934c1a12002-07-02 22:24:50 +0000771
Jack Jansen72781191995-08-07 14:34:15 +0000772static PyObject *
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +0300773binascii_b2a_hqx_impl(PyObject *module, Py_buffer *data)
774/*[clinic end generated code: output=d0aa5a704bc9f7de input=9596ebe019fe12ba]*/
Jack Jansen72781191995-08-07 14:34:15 +0000775{
Victor Stinnerbeea26b2020-01-22 20:44:22 +0100776 if (PyErr_WarnEx(PyExc_DeprecationWarning,
777 "binascii.b2a_hqx() is deprecated", 1) < 0) {
778 return NULL;
779 }
780
Serhiy Storchakaef1585e2015-12-25 20:01:53 +0200781 unsigned char *ascii_data;
782 const unsigned char *bin_data;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000783 int leftbits = 0;
784 unsigned char this_ch;
785 unsigned int leftchar = 0;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000786 Py_ssize_t len;
Victor Stinnereaaaf132015-10-13 10:51:47 +0200787 _PyBytesWriter writer;
Tim Peters934c1a12002-07-02 22:24:50 +0000788
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +0200789 bin_data = data->buf;
790 len = data->len;
Victor Stinnereaaaf132015-10-13 10:51:47 +0200791 _PyBytesWriter_Init(&writer);
Jack Jansen72781191995-08-07 14:34:15 +0000792
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000793 assert(len >= 0);
Amaury Forgeot d'Arc9c74b142008-06-18 00:47:36 +0000794
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +0200795 if (len > PY_SSIZE_T_MAX / 2 - 2)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000796 return PyErr_NoMemory();
Amaury Forgeot d'Arc9c74b142008-06-18 00:47:36 +0000797
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000798 /* Allocate a buffer that is at least large enough */
Victor Stinnereaaaf132015-10-13 10:51:47 +0200799 ascii_data = _PyBytesWriter_Alloc(&writer, len * 2 + 2);
800 if (ascii_data == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000801 return NULL;
Tim Peters934c1a12002-07-02 22:24:50 +0000802
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000803 for( ; len > 0 ; len--, bin_data++ ) {
804 /* Shift into our buffer, and output any 6bits ready */
805 leftchar = (leftchar << 8) | *bin_data;
806 leftbits += 8;
807 while ( leftbits >= 6 ) {
808 this_ch = (leftchar >> (leftbits-6)) & 0x3f;
809 leftbits -= 6;
810 *ascii_data++ = table_b2a_hqx[this_ch];
811 }
812 }
813 /* Output a possible runt byte */
814 if ( leftbits ) {
815 leftchar <<= (6-leftbits);
816 *ascii_data++ = table_b2a_hqx[leftchar & 0x3f];
817 }
Victor Stinnereaaaf132015-10-13 10:51:47 +0200818
819 return _PyBytesWriter_Finish(&writer, ascii_data);
Jack Jansen72781191995-08-07 14:34:15 +0000820}
821
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +0200822
823/*[clinic input]
824binascii.rledecode_hqx
825
826 data: Py_buffer
827 /
828
829Decode hexbin RLE-coded string.
830[clinic start generated code]*/
Tim Peters934c1a12002-07-02 22:24:50 +0000831
Jack Jansen72781191995-08-07 14:34:15 +0000832static PyObject *
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +0300833binascii_rledecode_hqx_impl(PyObject *module, Py_buffer *data)
834/*[clinic end generated code: output=9826619565de1c6c input=54cdd49fc014402c]*/
Jack Jansen72781191995-08-07 14:34:15 +0000835{
Victor Stinnerbeea26b2020-01-22 20:44:22 +0100836 if (PyErr_WarnEx(PyExc_DeprecationWarning,
837 "binascii.rledecode_hqx() is deprecated", 1) < 0) {
838 return NULL;
839 }
840
Serhiy Storchakaef1585e2015-12-25 20:01:53 +0200841 const unsigned char *in_data;
842 unsigned char *out_data;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000843 unsigned char in_byte, in_repeat;
Victor Stinnerf9c9a3f2015-10-14 15:20:07 +0200844 Py_ssize_t in_len;
Victor Stinnereaaaf132015-10-13 10:51:47 +0200845 _PyBytesWriter writer;
Jack Jansen72781191995-08-07 14:34:15 +0000846
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +0200847 in_data = data->buf;
848 in_len = data->len;
Victor Stinnereaaaf132015-10-13 10:51:47 +0200849 _PyBytesWriter_Init(&writer);
Marcel Plch33e71e02019-05-22 13:51:26 +0200850 binascii_state *state;
Jack Jansen72781191995-08-07 14:34:15 +0000851
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000852 assert(in_len >= 0);
Amaury Forgeot d'Arc9c74b142008-06-18 00:47:36 +0000853
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000854 /* Empty string is a special case */
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +0200855 if ( in_len == 0 )
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000856 return PyBytes_FromStringAndSize("", 0);
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +0200857 else if (in_len > PY_SSIZE_T_MAX / 2)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000858 return PyErr_NoMemory();
Jack Jansen72781191995-08-07 14:34:15 +0000859
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000860 /* Allocate a buffer of reasonable size. Resized when needed */
Victor Stinnerf9c9a3f2015-10-14 15:20:07 +0200861 out_data = _PyBytesWriter_Alloc(&writer, in_len);
Victor Stinnereaaaf132015-10-13 10:51:47 +0200862 if (out_data == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000863 return NULL;
Victor Stinnereaaaf132015-10-13 10:51:47 +0200864
865 /* Use overallocation */
866 writer.overallocate = 1;
Jack Jansen72781191995-08-07 14:34:15 +0000867
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000868 /*
869 ** We need two macros here to get/put bytes and handle
870 ** end-of-buffer for input and output strings.
871 */
Victor Stinnereaaaf132015-10-13 10:51:47 +0200872#define INBYTE(b) \
873 do { \
874 if ( --in_len < 0 ) { \
Marcel Plch33e71e02019-05-22 13:51:26 +0200875 state = PyModule_GetState(module); \
876 if (state == NULL) { \
877 return NULL; \
878 } \
879 PyErr_SetString(state->Incomplete, ""); \
Victor Stinnereaaaf132015-10-13 10:51:47 +0200880 goto error; \
881 } \
882 b = *in_data++; \
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000883 } while(0)
Tim Peters934c1a12002-07-02 22:24:50 +0000884
Victor Stinnereaaaf132015-10-13 10:51:47 +0200885 /*
886 ** Handle first byte separately (since we have to get angry
887 ** in case of an orphaned RLE code).
888 */
889 INBYTE(in_byte);
Jack Jansen72781191995-08-07 14:34:15 +0000890
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000891 if (in_byte == RUNCHAR) {
892 INBYTE(in_repeat);
Victor Stinnerf9c9a3f2015-10-14 15:20:07 +0200893 /* only 1 byte will be written, but 2 bytes were preallocated:
Raymond Hettinger15f44ab2016-08-30 10:47:49 -0700894 subtract 1 byte to prevent overallocation */
Victor Stinnerf9c9a3f2015-10-14 15:20:07 +0200895 writer.min_size--;
896
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000897 if (in_repeat != 0) {
898 /* Note Error, not Incomplete (which is at the end
899 ** of the string only). This is a programmer error.
900 */
Marcel Plch33e71e02019-05-22 13:51:26 +0200901 state = PyModule_GetState(module);
902 if (state == NULL) {
903 return NULL;
904 }
905 PyErr_SetString(state->Error, "Orphaned RLE code at start");
Victor Stinnereaaaf132015-10-13 10:51:47 +0200906 goto error;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000907 }
Victor Stinnerf9c9a3f2015-10-14 15:20:07 +0200908 *out_data++ = RUNCHAR;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000909 } else {
Victor Stinnerf9c9a3f2015-10-14 15:20:07 +0200910 *out_data++ = in_byte;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000911 }
Tim Peters934c1a12002-07-02 22:24:50 +0000912
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000913 while( in_len > 0 ) {
914 INBYTE(in_byte);
Jack Jansen72781191995-08-07 14:34:15 +0000915
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000916 if (in_byte == RUNCHAR) {
917 INBYTE(in_repeat);
Victor Stinnerf9c9a3f2015-10-14 15:20:07 +0200918 /* only 1 byte will be written, but 2 bytes were preallocated:
Raymond Hettinger15f44ab2016-08-30 10:47:49 -0700919 subtract 1 byte to prevent overallocation */
Victor Stinnerf9c9a3f2015-10-14 15:20:07 +0200920 writer.min_size--;
921
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000922 if ( in_repeat == 0 ) {
923 /* Just an escaped RUNCHAR value */
Victor Stinnerf9c9a3f2015-10-14 15:20:07 +0200924 *out_data++ = RUNCHAR;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000925 } else {
926 /* Pick up value and output a sequence of it */
927 in_byte = out_data[-1];
Victor Stinnerf9c9a3f2015-10-14 15:20:07 +0200928
929 /* enlarge the buffer if needed */
930 if (in_repeat > 1) {
931 /* -1 because we already preallocated 1 byte */
932 out_data = _PyBytesWriter_Prepare(&writer, out_data,
933 in_repeat - 1);
934 if (out_data == NULL)
935 goto error;
936 }
937
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000938 while ( --in_repeat > 0 )
Victor Stinnerf9c9a3f2015-10-14 15:20:07 +0200939 *out_data++ = in_byte;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000940 }
941 } else {
942 /* Normal byte */
Victor Stinnerf9c9a3f2015-10-14 15:20:07 +0200943 *out_data++ = in_byte;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000944 }
945 }
Victor Stinnereaaaf132015-10-13 10:51:47 +0200946 return _PyBytesWriter_Finish(&writer, out_data);
947
948error:
949 _PyBytesWriter_Dealloc(&writer);
950 return NULL;
Jack Jansen72781191995-08-07 14:34:15 +0000951}
952
Jack Jansen72781191995-08-07 14:34:15 +0000953
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +0200954/*[clinic input]
Victor Stinnerbeea26b2020-01-22 20:44:22 +0100955binascii.crc_hqx
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +0200956
957 data: Py_buffer
Serhiy Storchaka2ef7c472015-04-20 09:26:49 +0300958 crc: unsigned_int(bitwise=True)
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +0200959 /
960
Martin Panter3310e142016-12-24 07:36:44 +0000961Compute CRC-CCITT incrementally.
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +0200962[clinic start generated code]*/
963
Victor Stinnerbeea26b2020-01-22 20:44:22 +0100964static PyObject *
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +0300965binascii_crc_hqx_impl(PyObject *module, Py_buffer *data, unsigned int crc)
Victor Stinnerbeea26b2020-01-22 20:44:22 +0100966/*[clinic end generated code: output=2fde213d0f547a98 input=56237755370a951c]*/
Jack Jansen72781191995-08-07 14:34:15 +0000967{
Victor Stinnerbeea26b2020-01-22 20:44:22 +0100968 if (PyErr_WarnEx(PyExc_DeprecationWarning,
969 "binascii.crc_hqx() is deprecated", 1) < 0) {
970 return NULL;
971 }
972
Serhiy Storchakaef1585e2015-12-25 20:01:53 +0200973 const unsigned char *bin_data;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000974 Py_ssize_t len;
Tim Peters934c1a12002-07-02 22:24:50 +0000975
Serhiy Storchaka2ef7c472015-04-20 09:26:49 +0300976 crc &= 0xffff;
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +0200977 bin_data = data->buf;
978 len = data->len;
Jack Jansen72781191995-08-07 14:34:15 +0000979
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000980 while(len-- > 0) {
Serhiy Storchaka2ef7c472015-04-20 09:26:49 +0300981 crc = ((crc<<8)&0xff00) ^ crctab_hqx[(crc>>8)^*bin_data++];
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000982 }
Jack Jansen72781191995-08-07 14:34:15 +0000983
Victor Stinnerbeea26b2020-01-22 20:44:22 +0100984 return PyLong_FromUnsignedLong(crc);
Jack Jansen72781191995-08-07 14:34:15 +0000985}
986
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +0200987#ifndef USE_ZLIB_CRC32
Guido van Rossum7d47c9e2000-02-16 21:11:52 +0000988/* Crc - 32 BIT ANSI X3.66 CRC checksum files
989 Also known as: ISO 3307
990**********************************************************************|
991* *|
992* Demonstration program to compute the 32-bit CRC used as the frame *|
993* check sequence in ADCCP (ANSI X3.66, also known as FIPS PUB 71 *|
994* and FED-STD-1003, the U.S. versions of CCITT's X.25 link-level *|
995* protocol). The 32-bit FCS was added via the Federal Register, *|
996* 1 June 1982, p.23798. I presume but don't know for certain that *|
997* this polynomial is or will be included in CCITT V.41, which *|
998* defines the 16-bit CRC (often called CRC-CCITT) polynomial. FIPS *|
999* PUB 78 says that the 32-bit FCS reduces otherwise undetected *|
1000* errors by a factor of 10^-5 over 16-bit FCS. *|
1001* *|
1002**********************************************************************|
1003
1004 Copyright (C) 1986 Gary S. Brown. You may use this program, or
1005 code or tables extracted from it, as desired without restriction.
Guido van Rossum7d47c9e2000-02-16 21:11:52 +00001006
Tim Peters934c1a12002-07-02 22:24:50 +00001007 First, the polynomial itself and its table of feedback terms. The
1008 polynomial is
1009 X^32+X^26+X^23+X^22+X^16+X^12+X^11+X^10+X^8+X^7+X^5+X^4+X^2+X^1+X^0
1010 Note that we take it "backwards" and put the highest-order term in
1011 the lowest-order bit. The X^32 term is "implied"; the LSB is the
1012 X^31 term, etc. The X^0 term (usually shown as "+1") results in
1013 the MSB being 1.
Guido van Rossum7d47c9e2000-02-16 21:11:52 +00001014
Tim Peters934c1a12002-07-02 22:24:50 +00001015 Note that the usual hardware shift register implementation, which
1016 is what we're using (we're merely optimizing it by doing eight-bit
1017 chunks at a time) shifts bits into the lowest-order term. In our
1018 implementation, that means shifting towards the right. Why do we
1019 do it this way? Because the calculated CRC must be transmitted in
1020 order from highest-order term to lowest-order term. UARTs transmit
1021 characters in order from LSB to MSB. By storing the CRC this way,
1022 we hand it to the UART in the order low-byte to high-byte; the UART
1023 sends each low-bit to hight-bit; and the result is transmission bit
1024 by bit from highest- to lowest-order term without requiring any bit
1025 shuffling on our part. Reception works similarly.
1026
1027 The feedback terms table consists of 256, 32-bit entries. Notes:
1028
1029 1. The table can be generated at runtime if desired; code to do so
1030 is shown later. It might not be obvious, but the feedback
1031 terms simply represent the results of eight shift/xor opera-
1032 tions for all combinations of data and CRC register values.
1033
1034 2. The CRC accumulation logic is the same for all CRC polynomials,
1035 be they sixteen or thirty-two bits wide. You simply choose the
1036 appropriate table. Alternatively, because the table can be
1037 generated at runtime, you can start by generating the table for
1038 the polynomial in question and use exactly the same "updcrc",
1039 if your application needn't simultaneously handle two CRC
1040 polynomials. (Note, however, that XMODEM is strange.)
1041
1042 3. For 16-bit CRCs, the table entries need be only 16 bits wide;
1043 of course, 32-bit entries work OK if the high 16 bits are zero.
1044
1045 4. The values must be right-shifted by eight bits by the "updcrc"
1046 logic; the shift must be unsigned (bring in zeroes). On some
1047 hardware you could probably optimize the shift in assembler by
1048 using byte-swap instructions.
Guido van Rossum7d47c9e2000-02-16 21:11:52 +00001049********************************************************************/
1050
Serhiy Storchaka2d06e842015-12-25 19:53:18 +02001051static const unsigned int crc_32_tab[256] = {
Gregory P. Smith3c0e4d22008-03-25 07:51:12 +000010520x00000000U, 0x77073096U, 0xee0e612cU, 0x990951baU, 0x076dc419U,
10530x706af48fU, 0xe963a535U, 0x9e6495a3U, 0x0edb8832U, 0x79dcb8a4U,
10540xe0d5e91eU, 0x97d2d988U, 0x09b64c2bU, 0x7eb17cbdU, 0xe7b82d07U,
10550x90bf1d91U, 0x1db71064U, 0x6ab020f2U, 0xf3b97148U, 0x84be41deU,
10560x1adad47dU, 0x6ddde4ebU, 0xf4d4b551U, 0x83d385c7U, 0x136c9856U,
10570x646ba8c0U, 0xfd62f97aU, 0x8a65c9ecU, 0x14015c4fU, 0x63066cd9U,
10580xfa0f3d63U, 0x8d080df5U, 0x3b6e20c8U, 0x4c69105eU, 0xd56041e4U,
10590xa2677172U, 0x3c03e4d1U, 0x4b04d447U, 0xd20d85fdU, 0xa50ab56bU,
10600x35b5a8faU, 0x42b2986cU, 0xdbbbc9d6U, 0xacbcf940U, 0x32d86ce3U,
10610x45df5c75U, 0xdcd60dcfU, 0xabd13d59U, 0x26d930acU, 0x51de003aU,
10620xc8d75180U, 0xbfd06116U, 0x21b4f4b5U, 0x56b3c423U, 0xcfba9599U,
10630xb8bda50fU, 0x2802b89eU, 0x5f058808U, 0xc60cd9b2U, 0xb10be924U,
10640x2f6f7c87U, 0x58684c11U, 0xc1611dabU, 0xb6662d3dU, 0x76dc4190U,
10650x01db7106U, 0x98d220bcU, 0xefd5102aU, 0x71b18589U, 0x06b6b51fU,
10660x9fbfe4a5U, 0xe8b8d433U, 0x7807c9a2U, 0x0f00f934U, 0x9609a88eU,
10670xe10e9818U, 0x7f6a0dbbU, 0x086d3d2dU, 0x91646c97U, 0xe6635c01U,
10680x6b6b51f4U, 0x1c6c6162U, 0x856530d8U, 0xf262004eU, 0x6c0695edU,
10690x1b01a57bU, 0x8208f4c1U, 0xf50fc457U, 0x65b0d9c6U, 0x12b7e950U,
10700x8bbeb8eaU, 0xfcb9887cU, 0x62dd1ddfU, 0x15da2d49U, 0x8cd37cf3U,
10710xfbd44c65U, 0x4db26158U, 0x3ab551ceU, 0xa3bc0074U, 0xd4bb30e2U,
10720x4adfa541U, 0x3dd895d7U, 0xa4d1c46dU, 0xd3d6f4fbU, 0x4369e96aU,
10730x346ed9fcU, 0xad678846U, 0xda60b8d0U, 0x44042d73U, 0x33031de5U,
10740xaa0a4c5fU, 0xdd0d7cc9U, 0x5005713cU, 0x270241aaU, 0xbe0b1010U,
10750xc90c2086U, 0x5768b525U, 0x206f85b3U, 0xb966d409U, 0xce61e49fU,
10760x5edef90eU, 0x29d9c998U, 0xb0d09822U, 0xc7d7a8b4U, 0x59b33d17U,
10770x2eb40d81U, 0xb7bd5c3bU, 0xc0ba6cadU, 0xedb88320U, 0x9abfb3b6U,
10780x03b6e20cU, 0x74b1d29aU, 0xead54739U, 0x9dd277afU, 0x04db2615U,
10790x73dc1683U, 0xe3630b12U, 0x94643b84U, 0x0d6d6a3eU, 0x7a6a5aa8U,
10800xe40ecf0bU, 0x9309ff9dU, 0x0a00ae27U, 0x7d079eb1U, 0xf00f9344U,
10810x8708a3d2U, 0x1e01f268U, 0x6906c2feU, 0xf762575dU, 0x806567cbU,
10820x196c3671U, 0x6e6b06e7U, 0xfed41b76U, 0x89d32be0U, 0x10da7a5aU,
10830x67dd4accU, 0xf9b9df6fU, 0x8ebeeff9U, 0x17b7be43U, 0x60b08ed5U,
10840xd6d6a3e8U, 0xa1d1937eU, 0x38d8c2c4U, 0x4fdff252U, 0xd1bb67f1U,
10850xa6bc5767U, 0x3fb506ddU, 0x48b2364bU, 0xd80d2bdaU, 0xaf0a1b4cU,
10860x36034af6U, 0x41047a60U, 0xdf60efc3U, 0xa867df55U, 0x316e8eefU,
10870x4669be79U, 0xcb61b38cU, 0xbc66831aU, 0x256fd2a0U, 0x5268e236U,
10880xcc0c7795U, 0xbb0b4703U, 0x220216b9U, 0x5505262fU, 0xc5ba3bbeU,
10890xb2bd0b28U, 0x2bb45a92U, 0x5cb36a04U, 0xc2d7ffa7U, 0xb5d0cf31U,
10900x2cd99e8bU, 0x5bdeae1dU, 0x9b64c2b0U, 0xec63f226U, 0x756aa39cU,
10910x026d930aU, 0x9c0906a9U, 0xeb0e363fU, 0x72076785U, 0x05005713U,
10920x95bf4a82U, 0xe2b87a14U, 0x7bb12baeU, 0x0cb61b38U, 0x92d28e9bU,
10930xe5d5be0dU, 0x7cdcefb7U, 0x0bdbdf21U, 0x86d3d2d4U, 0xf1d4e242U,
10940x68ddb3f8U, 0x1fda836eU, 0x81be16cdU, 0xf6b9265bU, 0x6fb077e1U,
10950x18b74777U, 0x88085ae6U, 0xff0f6a70U, 0x66063bcaU, 0x11010b5cU,
10960x8f659effU, 0xf862ae69U, 0x616bffd3U, 0x166ccf45U, 0xa00ae278U,
10970xd70dd2eeU, 0x4e048354U, 0x3903b3c2U, 0xa7672661U, 0xd06016f7U,
10980x4969474dU, 0x3e6e77dbU, 0xaed16a4aU, 0xd9d65adcU, 0x40df0b66U,
10990x37d83bf0U, 0xa9bcae53U, 0xdebb9ec5U, 0x47b2cf7fU, 0x30b5ffe9U,
11000xbdbdf21cU, 0xcabac28aU, 0x53b39330U, 0x24b4a3a6U, 0xbad03605U,
11010xcdd70693U, 0x54de5729U, 0x23d967bfU, 0xb3667a2eU, 0xc4614ab8U,
11020x5d681b02U, 0x2a6f2b94U, 0xb40bbe37U, 0xc30c8ea1U, 0x5a05df1bU,
11030x2d02ef8dU
Guido van Rossum7d47c9e2000-02-16 21:11:52 +00001104};
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +02001105#endif /* USE_ZLIB_CRC32 */
Guido van Rossum7d47c9e2000-02-16 21:11:52 +00001106
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +02001107/*[clinic input]
1108binascii.crc32 -> unsigned_int
1109
1110 data: Py_buffer
1111 crc: unsigned_int(bitwise=True) = 0
1112 /
1113
1114Compute CRC-32 incrementally.
1115[clinic start generated code]*/
1116
1117static unsigned int
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +03001118binascii_crc32_impl(PyObject *module, Py_buffer *data, unsigned int crc)
1119/*[clinic end generated code: output=52cf59056a78593b input=bbe340bc99d25aa8]*/
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +02001120
1121#ifdef USE_ZLIB_CRC32
1122/* This was taken from zlibmodule.c PyZlib_crc32 (but is PY_SSIZE_T_CLEAN) */
1123{
Serhiy Storchakaef1585e2015-12-25 20:01:53 +02001124 const Byte *buf;
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +02001125 Py_ssize_t len;
1126 int signed_val;
1127
1128 buf = (Byte*)data->buf;
1129 len = data->len;
1130 signed_val = crc32(crc, buf, len);
1131 return (unsigned int)signed_val & 0xffffffffU;
1132}
1133#else /* USE_ZLIB_CRC32 */
Guido van Rossum7d47c9e2000-02-16 21:11:52 +00001134{ /* By Jim Ahlstrom; All rights transferred to CNRI */
Serhiy Storchakaef1585e2015-12-25 20:01:53 +02001135 const unsigned char *bin_data;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001136 Py_ssize_t len;
1137 unsigned int result;
Tim Peters934c1a12002-07-02 22:24:50 +00001138
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +02001139 bin_data = data->buf;
1140 len = data->len;
Guido van Rossum7d47c9e2000-02-16 21:11:52 +00001141
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001142 crc = ~ crc;
1143 while (len-- > 0) {
1144 crc = crc_32_tab[(crc ^ *bin_data++) & 0xff] ^ (crc >> 8);
1145 /* Note: (crc >> 8) MUST zero fill on left */
1146 }
Tim Petersa98011c2002-07-02 20:20:08 +00001147
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001148 result = (crc ^ 0xFFFFFFFF);
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +02001149 return result & 0xffffffff;
Guido van Rossum7d47c9e2000-02-16 21:11:52 +00001150}
Christian Heimes1dc54002008-03-24 02:19:29 +00001151#endif /* USE_ZLIB_CRC32 */
Guido van Rossum7d47c9e2000-02-16 21:11:52 +00001152
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +02001153/*[clinic input]
1154binascii.b2a_hex
1155
1156 data: Py_buffer
Gregory P. Smith0c2f9302019-05-29 11:46:58 -07001157 sep: object = NULL
1158 An optional single character or byte to separate hex bytes.
1159 bytes_per_sep: int = 1
1160 How many bytes between separators. Positive values count from the
1161 right, negative values count from the left.
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +02001162
1163Hexadecimal representation of binary data.
1164
1165The return value is a bytes object. This function is also
1166available as "hexlify()".
Gregory P. Smith0c2f9302019-05-29 11:46:58 -07001167
1168Example:
1169>>> binascii.b2a_hex(b'\xb9\x01\xef')
1170b'b901ef'
1171>>> binascii.hexlify(b'\xb9\x01\xef', ':')
1172b'b9:01:ef'
1173>>> binascii.b2a_hex(b'\xb9\x01\xef', b'_', 2)
1174b'b9_01ef'
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +02001175[clinic start generated code]*/
Barry Warsawe977c212000-08-15 06:07:13 +00001176
1177static PyObject *
Gregory P. Smith0c2f9302019-05-29 11:46:58 -07001178binascii_b2a_hex_impl(PyObject *module, Py_buffer *data, PyObject *sep,
1179 int bytes_per_sep)
1180/*[clinic end generated code: output=a26937946a81d2c7 input=ec0ade6ba2e43543]*/
Barry Warsawe977c212000-08-15 06:07:13 +00001181{
Gregory P. Smith0c2f9302019-05-29 11:46:58 -07001182 return _Py_strhex_bytes_with_sep((const char *)data->buf, data->len,
1183 sep, bytes_per_sep);
Barry Warsawe977c212000-08-15 06:07:13 +00001184}
1185
Zachary Wareb176d402015-01-20 13:59:46 -06001186/*[clinic input]
1187binascii.hexlify = binascii.b2a_hex
1188
1189Hexadecimal representation of binary data.
1190
Gregory P. Smith0c2f9302019-05-29 11:46:58 -07001191The return value is a bytes object. This function is also
1192available as "b2a_hex()".
Zachary Wareb176d402015-01-20 13:59:46 -06001193[clinic start generated code]*/
1194
1195static PyObject *
Gregory P. Smith0c2f9302019-05-29 11:46:58 -07001196binascii_hexlify_impl(PyObject *module, Py_buffer *data, PyObject *sep,
1197 int bytes_per_sep)
1198/*[clinic end generated code: output=d12aa1b001b15199 input=bc317bd4e241f76b]*/
Zachary Wareb176d402015-01-20 13:59:46 -06001199{
Gregory P. Smith0c2f9302019-05-29 11:46:58 -07001200 return _Py_strhex_bytes_with_sep((const char *)data->buf, data->len,
1201 sep, bytes_per_sep);
Zachary Wareb176d402015-01-20 13:59:46 -06001202}
Barry Warsawe977c212000-08-15 06:07:13 +00001203
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +02001204/*[clinic input]
1205binascii.a2b_hex
1206
1207 hexstr: ascii_buffer
1208 /
1209
1210Binary data of hexadecimal representation.
1211
1212hexstr must contain an even number of hex digits (upper or lower case).
1213This function is also available as "unhexlify()".
1214[clinic start generated code]*/
1215
Barry Warsawe977c212000-08-15 06:07:13 +00001216static PyObject *
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +03001217binascii_a2b_hex_impl(PyObject *module, Py_buffer *hexstr)
1218/*[clinic end generated code: output=0cc1a139af0eeecb input=9e1e7f2f94db24fd]*/
Barry Warsawe977c212000-08-15 06:07:13 +00001219{
Serhiy Storchakaef1585e2015-12-25 20:01:53 +02001220 const char* argbuf;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001221 Py_ssize_t arglen;
1222 PyObject *retval;
1223 char* retbuf;
1224 Py_ssize_t i, j;
Marcel Plch33e71e02019-05-22 13:51:26 +02001225 binascii_state *state;
Barry Warsawe977c212000-08-15 06:07:13 +00001226
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +02001227 argbuf = hexstr->buf;
1228 arglen = hexstr->len;
Barry Warsawe977c212000-08-15 06:07:13 +00001229
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001230 assert(arglen >= 0);
Amaury Forgeot d'Arc9c74b142008-06-18 00:47:36 +00001231
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001232 /* XXX What should we do about strings with an odd length? Should
1233 * we add an implicit leading zero, or a trailing zero? For now,
1234 * raise an exception.
1235 */
1236 if (arglen % 2) {
Marcel Plch33e71e02019-05-22 13:51:26 +02001237 state = PyModule_GetState(module);
1238 if (state == NULL) {
1239 return NULL;
1240 }
1241 PyErr_SetString(state->Error, "Odd-length string");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001242 return NULL;
1243 }
Barry Warsawe977c212000-08-15 06:07:13 +00001244
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001245 retval = PyBytes_FromStringAndSize(NULL, (arglen/2));
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +02001246 if (!retval)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001247 return NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001248 retbuf = PyBytes_AS_STRING(retval);
Barry Warsawe977c212000-08-15 06:07:13 +00001249
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001250 for (i=j=0; i < arglen; i += 2) {
Sergey Fedoseev6b5df902018-02-27 01:35:41 +05001251 unsigned int top = _PyLong_DigitValue[Py_CHARMASK(argbuf[i])];
1252 unsigned int bot = _PyLong_DigitValue[Py_CHARMASK(argbuf[i+1])];
1253 if (top >= 16 || bot >= 16) {
Marcel Plch33e71e02019-05-22 13:51:26 +02001254 state = PyModule_GetState(module);
1255 if (state == NULL) {
1256 return NULL;
1257 }
1258 PyErr_SetString(state->Error,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001259 "Non-hexadecimal digit found");
1260 goto finally;
1261 }
1262 retbuf[j++] = (top << 4) + bot;
1263 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001264 return retval;
Barry Warsawe977c212000-08-15 06:07:13 +00001265
1266 finally:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001267 Py_DECREF(retval);
1268 return NULL;
Barry Warsawe977c212000-08-15 06:07:13 +00001269}
1270
Zachary Wareb176d402015-01-20 13:59:46 -06001271/*[clinic input]
1272binascii.unhexlify = binascii.a2b_hex
1273
1274Binary data of hexadecimal representation.
1275
1276hexstr must contain an even number of hex digits (upper or lower case).
1277[clinic start generated code]*/
1278
1279static PyObject *
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +03001280binascii_unhexlify_impl(PyObject *module, Py_buffer *hexstr)
1281/*[clinic end generated code: output=51a64c06c79629e3 input=dd8c012725f462da]*/
Zachary Wareb176d402015-01-20 13:59:46 -06001282{
1283 return binascii_a2b_hex_impl(module, hexstr);
1284}
1285
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001286#define MAXLINESIZE 76
1287
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001288
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +02001289/*[clinic input]
1290binascii.a2b_qp
1291
Serhiy Storchaka12785612014-01-25 11:49:49 +02001292 data: ascii_buffer
Serhiy Storchaka202fda52017-03-12 10:10:47 +02001293 header: bool(accept={int}) = False
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +02001294
1295Decode a string of qp-encoded data.
1296[clinic start generated code]*/
1297
1298static PyObject *
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +03001299binascii_a2b_qp_impl(PyObject *module, Py_buffer *data, int header)
Serhiy Storchaka202fda52017-03-12 10:10:47 +02001300/*[clinic end generated code: output=e99f7846cfb9bc53 input=bf6766fea76cce8f]*/
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001301{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001302 Py_ssize_t in, out;
1303 char ch;
Serhiy Storchakaef1585e2015-12-25 20:01:53 +02001304 const unsigned char *ascii_data;
1305 unsigned char *odata;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001306 Py_ssize_t datalen = 0;
1307 PyObject *rv;
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001308
Serhiy Storchaka12785612014-01-25 11:49:49 +02001309 ascii_data = data->buf;
1310 datalen = data->len;
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001311
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001312 /* We allocate the output same size as input, this is overkill.
1313 * The previous implementation used calloc() so we'll zero out the
1314 * memory here too, since PyMem_Malloc() does not guarantee that.
1315 */
1316 odata = (unsigned char *) PyMem_Malloc(datalen);
1317 if (odata == NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001318 PyErr_NoMemory();
1319 return NULL;
1320 }
1321 memset(odata, 0, datalen);
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001322
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001323 in = out = 0;
1324 while (in < datalen) {
Serhiy Storchaka12785612014-01-25 11:49:49 +02001325 if (ascii_data[in] == '=') {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001326 in++;
1327 if (in >= datalen) break;
1328 /* Soft line breaks */
Serhiy Storchaka12785612014-01-25 11:49:49 +02001329 if ((ascii_data[in] == '\n') || (ascii_data[in] == '\r')) {
1330 if (ascii_data[in] != '\n') {
1331 while (in < datalen && ascii_data[in] != '\n') in++;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001332 }
1333 if (in < datalen) in++;
1334 }
Serhiy Storchaka12785612014-01-25 11:49:49 +02001335 else if (ascii_data[in] == '=') {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001336 /* broken case from broken python qp */
1337 odata[out++] = '=';
1338 in++;
1339 }
Serhiy Storchakae6265e92016-09-14 16:34:37 +03001340 else if ((in + 1 < datalen) &&
1341 ((ascii_data[in] >= 'A' && ascii_data[in] <= 'F') ||
Serhiy Storchaka12785612014-01-25 11:49:49 +02001342 (ascii_data[in] >= 'a' && ascii_data[in] <= 'f') ||
1343 (ascii_data[in] >= '0' && ascii_data[in] <= '9')) &&
1344 ((ascii_data[in+1] >= 'A' && ascii_data[in+1] <= 'F') ||
1345 (ascii_data[in+1] >= 'a' && ascii_data[in+1] <= 'f') ||
1346 (ascii_data[in+1] >= '0' && ascii_data[in+1] <= '9'))) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001347 /* hexval */
Sergey Fedoseev6b5df902018-02-27 01:35:41 +05001348 ch = _PyLong_DigitValue[ascii_data[in]] << 4;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001349 in++;
Sergey Fedoseev6b5df902018-02-27 01:35:41 +05001350 ch |= _PyLong_DigitValue[ascii_data[in]];
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001351 in++;
1352 odata[out++] = ch;
1353 }
1354 else {
1355 odata[out++] = '=';
1356 }
1357 }
Serhiy Storchaka12785612014-01-25 11:49:49 +02001358 else if (header && ascii_data[in] == '_') {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001359 odata[out++] = ' ';
1360 in++;
1361 }
1362 else {
Serhiy Storchaka12785612014-01-25 11:49:49 +02001363 odata[out] = ascii_data[in];
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001364 in++;
1365 out++;
1366 }
1367 }
1368 if ((rv = PyBytes_FromStringAndSize((char *)odata, out)) == NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001369 PyMem_Free(odata);
1370 return NULL;
1371 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001372 PyMem_Free(odata);
1373 return rv;
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001374}
1375
Tim Peters934c1a12002-07-02 22:24:50 +00001376static int
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001377to_hex (unsigned char ch, unsigned char *s)
1378{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001379 unsigned int uvalue = ch;
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001380
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001381 s[1] = "0123456789ABCDEF"[uvalue % 16];
1382 uvalue = (uvalue / 16);
1383 s[0] = "0123456789ABCDEF"[uvalue % 16];
1384 return 0;
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001385}
1386
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001387/* XXX: This is ridiculously complicated to be backward compatible
1388 * (mostly) with the quopri module. It doesn't re-create the quopri
1389 * module bug where text ending in CRLF has the CR encoded */
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +02001390
1391/*[clinic input]
1392binascii.b2a_qp
1393
1394 data: Py_buffer
Serhiy Storchaka202fda52017-03-12 10:10:47 +02001395 quotetabs: bool(accept={int}) = False
1396 istext: bool(accept={int}) = True
1397 header: bool(accept={int}) = False
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +02001398
1399Encode a string using quoted-printable encoding.
1400
1401On encoding, when istext is set, newlines are not encoded, and white
1402space at end of lines is. When istext is not set, \r and \n (CR/LF)
1403are both encoded. When quotetabs is set, space and tabs are encoded.
1404[clinic start generated code]*/
1405
1406static PyObject *
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +03001407binascii_b2a_qp_impl(PyObject *module, Py_buffer *data, int quotetabs,
Larry Hastings89964c42015-04-14 18:07:59 -04001408 int istext, int header)
Serhiy Storchaka202fda52017-03-12 10:10:47 +02001409/*[clinic end generated code: output=e9884472ebb1a94c input=21fb7eea4a184ba6]*/
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001410{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001411 Py_ssize_t in, out;
Serhiy Storchakaef1585e2015-12-25 20:01:53 +02001412 const unsigned char *databuf;
1413 unsigned char *odata;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001414 Py_ssize_t datalen = 0, odatalen = 0;
1415 PyObject *rv;
1416 unsigned int linelen = 0;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001417 unsigned char ch;
1418 int crlf = 0;
Serhiy Storchakaef1585e2015-12-25 20:01:53 +02001419 const unsigned char *p;
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001420
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +02001421 databuf = data->buf;
1422 datalen = data->len;
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001423
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001424 /* See if this string is using CRLF line ends */
1425 /* XXX: this function has the side effect of converting all of
1426 * the end of lines to be the same depending on this detection
1427 * here */
Serhiy Storchakaef1585e2015-12-25 20:01:53 +02001428 p = (const unsigned char *) memchr(databuf, '\n', datalen);
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +02001429 if ((p != NULL) && (p > databuf) && (*(p-1) == '\r'))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001430 crlf = 1;
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001431
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001432 /* First, scan to see how many characters need to be encoded */
1433 in = 0;
1434 while (in < datalen) {
Benjamin Peterson4f976512016-08-13 18:33:33 -07001435 Py_ssize_t delta = 0;
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +02001436 if ((databuf[in] > 126) ||
1437 (databuf[in] == '=') ||
1438 (header && databuf[in] == '_') ||
1439 ((databuf[in] == '.') && (linelen == 0) &&
Serhiy Storchakae6265e92016-09-14 16:34:37 +03001440 (in + 1 == datalen || databuf[in+1] == '\n' ||
1441 databuf[in+1] == '\r' || databuf[in+1] == 0)) ||
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +02001442 (!istext && ((databuf[in] == '\r') || (databuf[in] == '\n'))) ||
1443 ((databuf[in] == '\t' || databuf[in] == ' ') && (in + 1 == datalen)) ||
1444 ((databuf[in] < 33) &&
1445 (databuf[in] != '\r') && (databuf[in] != '\n') &&
1446 (quotetabs || ((databuf[in] != '\t') && (databuf[in] != ' ')))))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001447 {
1448 if ((linelen + 3) >= MAXLINESIZE) {
1449 linelen = 0;
1450 if (crlf)
Benjamin Peterson4f976512016-08-13 18:33:33 -07001451 delta += 3;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001452 else
Benjamin Peterson4f976512016-08-13 18:33:33 -07001453 delta += 2;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001454 }
1455 linelen += 3;
Benjamin Peterson4f976512016-08-13 18:33:33 -07001456 delta += 3;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001457 in++;
1458 }
1459 else {
1460 if (istext &&
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +02001461 ((databuf[in] == '\n') ||
1462 ((in+1 < datalen) && (databuf[in] == '\r') &&
1463 (databuf[in+1] == '\n'))))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001464 {
1465 linelen = 0;
1466 /* Protect against whitespace on end of line */
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +02001467 if (in && ((databuf[in-1] == ' ') || (databuf[in-1] == '\t')))
Benjamin Peterson4f976512016-08-13 18:33:33 -07001468 delta += 2;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001469 if (crlf)
Benjamin Peterson4f976512016-08-13 18:33:33 -07001470 delta += 2;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001471 else
Benjamin Peterson4f976512016-08-13 18:33:33 -07001472 delta += 1;
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +02001473 if (databuf[in] == '\r')
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001474 in += 2;
1475 else
1476 in++;
1477 }
1478 else {
1479 if ((in + 1 != datalen) &&
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +02001480 (databuf[in+1] != '\n') &&
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001481 (linelen + 1) >= MAXLINESIZE) {
1482 linelen = 0;
1483 if (crlf)
Benjamin Peterson4f976512016-08-13 18:33:33 -07001484 delta += 3;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001485 else
Benjamin Peterson4f976512016-08-13 18:33:33 -07001486 delta += 2;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001487 }
1488 linelen++;
Benjamin Peterson4f976512016-08-13 18:33:33 -07001489 delta++;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001490 in++;
1491 }
1492 }
Benjamin Peterson4f976512016-08-13 18:33:33 -07001493 if (PY_SSIZE_T_MAX - delta < odatalen) {
Benjamin Peterson4f976512016-08-13 18:33:33 -07001494 PyErr_NoMemory();
1495 return NULL;
1496 }
1497 odatalen += delta;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001498 }
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001499
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001500 /* We allocate the output same size as input, this is overkill.
1501 * The previous implementation used calloc() so we'll zero out the
1502 * memory here too, since PyMem_Malloc() does not guarantee that.
1503 */
1504 odata = (unsigned char *) PyMem_Malloc(odatalen);
1505 if (odata == NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001506 PyErr_NoMemory();
1507 return NULL;
1508 }
1509 memset(odata, 0, odatalen);
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001510
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001511 in = out = linelen = 0;
1512 while (in < datalen) {
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +02001513 if ((databuf[in] > 126) ||
1514 (databuf[in] == '=') ||
1515 (header && databuf[in] == '_') ||
1516 ((databuf[in] == '.') && (linelen == 0) &&
Serhiy Storchakae6265e92016-09-14 16:34:37 +03001517 (in + 1 == datalen || databuf[in+1] == '\n' ||
1518 databuf[in+1] == '\r' || databuf[in+1] == 0)) ||
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +02001519 (!istext && ((databuf[in] == '\r') || (databuf[in] == '\n'))) ||
1520 ((databuf[in] == '\t' || databuf[in] == ' ') && (in + 1 == datalen)) ||
1521 ((databuf[in] < 33) &&
1522 (databuf[in] != '\r') && (databuf[in] != '\n') &&
Serhiy Storchakae6265e92016-09-14 16:34:37 +03001523 (quotetabs || ((databuf[in] != '\t') && (databuf[in] != ' ')))))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001524 {
1525 if ((linelen + 3 )>= MAXLINESIZE) {
1526 odata[out++] = '=';
1527 if (crlf) odata[out++] = '\r';
1528 odata[out++] = '\n';
1529 linelen = 0;
1530 }
1531 odata[out++] = '=';
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +02001532 to_hex(databuf[in], &odata[out]);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001533 out += 2;
1534 in++;
1535 linelen += 3;
1536 }
1537 else {
1538 if (istext &&
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +02001539 ((databuf[in] == '\n') ||
1540 ((in+1 < datalen) && (databuf[in] == '\r') &&
1541 (databuf[in+1] == '\n'))))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001542 {
1543 linelen = 0;
1544 /* Protect against whitespace on end of line */
1545 if (out && ((odata[out-1] == ' ') || (odata[out-1] == '\t'))) {
1546 ch = odata[out-1];
1547 odata[out-1] = '=';
1548 to_hex(ch, &odata[out]);
1549 out += 2;
1550 }
Tim Peters934c1a12002-07-02 22:24:50 +00001551
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001552 if (crlf) odata[out++] = '\r';
1553 odata[out++] = '\n';
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +02001554 if (databuf[in] == '\r')
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001555 in += 2;
1556 else
1557 in++;
1558 }
1559 else {
1560 if ((in + 1 != datalen) &&
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +02001561 (databuf[in+1] != '\n') &&
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001562 (linelen + 1) >= MAXLINESIZE) {
1563 odata[out++] = '=';
1564 if (crlf) odata[out++] = '\r';
1565 odata[out++] = '\n';
1566 linelen = 0;
1567 }
1568 linelen++;
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +02001569 if (header && databuf[in] == ' ') {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001570 odata[out++] = '_';
1571 in++;
1572 }
1573 else {
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +02001574 odata[out++] = databuf[in++];
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001575 }
1576 }
1577 }
1578 }
1579 if ((rv = PyBytes_FromStringAndSize((char *)odata, out)) == NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001580 PyMem_Free(odata);
1581 return NULL;
1582 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001583 PyMem_Free(odata);
1584 return rv;
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001585}
Barry Warsawe977c212000-08-15 06:07:13 +00001586
Jack Jansen72781191995-08-07 14:34:15 +00001587/* List of functions defined in the module */
1588
1589static struct PyMethodDef binascii_module_methods[] = {
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +02001590 BINASCII_A2B_UU_METHODDEF
1591 BINASCII_B2A_UU_METHODDEF
1592 BINASCII_A2B_BASE64_METHODDEF
1593 BINASCII_B2A_BASE64_METHODDEF
1594 BINASCII_A2B_HQX_METHODDEF
1595 BINASCII_B2A_HQX_METHODDEF
1596 BINASCII_A2B_HEX_METHODDEF
1597 BINASCII_B2A_HEX_METHODDEF
Zachary Wareb176d402015-01-20 13:59:46 -06001598 BINASCII_HEXLIFY_METHODDEF
1599 BINASCII_UNHEXLIFY_METHODDEF
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +02001600 BINASCII_RLECODE_HQX_METHODDEF
1601 BINASCII_RLEDECODE_HQX_METHODDEF
1602 BINASCII_CRC_HQX_METHODDEF
1603 BINASCII_CRC32_METHODDEF
1604 BINASCII_A2B_QP_METHODDEF
1605 BINASCII_B2A_QP_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001606 {NULL, NULL} /* sentinel */
Jack Jansen72781191995-08-07 14:34:15 +00001607};
1608
1609
Martin v. Löwis1a214512008-06-11 05:26:20 +00001610/* Initialization function for the module (*must* be called PyInit_binascii) */
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001611PyDoc_STRVAR(doc_binascii, "Conversion between binary data and ASCII");
Jack Jansen72781191995-08-07 14:34:15 +00001612
Marcel Plch33e71e02019-05-22 13:51:26 +02001613static int
1614binascii_exec(PyObject *m) {
1615 int result;
1616 binascii_state *state = PyModule_GetState(m);
1617 if (state == NULL) {
1618 return -1;
1619 }
1620
1621 state->Error = PyErr_NewException("binascii.Error", PyExc_ValueError, NULL);
1622 if (state->Error == NULL) {
1623 return -1;
1624 }
1625 result = PyModule_AddObject(m, "Error", state->Error);
1626 if (result == -1) {
1627 return -1;
1628 }
1629
1630 state->Incomplete = PyErr_NewException("binascii.Incomplete", NULL, NULL);
1631 if (state->Incomplete == NULL) {
1632 return -1;
1633 }
1634 result = PyModule_AddObject(m, "Incomplete", state->Incomplete);
1635 if (result == -1) {
1636 return -1;
1637 }
1638
1639 return 0;
1640}
1641
1642static PyModuleDef_Slot binascii_slots[] = {
1643 {Py_mod_exec, binascii_exec},
1644 {0, NULL}
1645};
Martin v. Löwis1a214512008-06-11 05:26:20 +00001646
1647static struct PyModuleDef binasciimodule = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001648 PyModuleDef_HEAD_INIT,
1649 "binascii",
1650 doc_binascii,
Marcel Plch33e71e02019-05-22 13:51:26 +02001651 sizeof(binascii_state),
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001652 binascii_module_methods,
Marcel Plch33e71e02019-05-22 13:51:26 +02001653 binascii_slots,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001654 NULL,
1655 NULL,
1656 NULL
Martin v. Löwis1a214512008-06-11 05:26:20 +00001657};
1658
Mark Hammondfe51c6d2002-08-02 02:27:13 +00001659PyMODINIT_FUNC
Martin v. Löwis1a214512008-06-11 05:26:20 +00001660PyInit_binascii(void)
Jack Jansen72781191995-08-07 14:34:15 +00001661{
Marcel Plch33e71e02019-05-22 13:51:26 +02001662 return PyModuleDef_Init(&binasciimodule);
Jack Jansen72781191995-08-07 14:34:15 +00001663}