blob: 1c7dc35882dee6ddc5beafdad35f2517ad2a0a73 [file] [log] [blame]
Jack Jansen72781191995-08-07 14:34:15 +00001/*
2** Routines to represent binary data in ASCII and vice-versa
3**
4** This module currently supports the following encodings:
5** uuencode:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00006** each line encodes 45 bytes (except possibly the last)
7** First char encodes (binary) length, rest data
8** each char encodes 6 bits, as follows:
9** binary: 01234567 abcdefgh ijklmnop
10** ascii: 012345 67abcd efghij klmnop
11** ASCII encoding method is "excess-space": 000000 is encoded as ' ', etc.
12** short binary data is zero-extended (so the bits are always in the
13** right place), this does *not* reflect in the length.
Jack Jansen84bbc2e1995-10-04 16:38:44 +000014** base64:
15** Line breaks are insignificant, but lines are at most 76 chars
16** each char encodes 6 bits, in similar order as uucode/hqx. Encoding
17** is done via a table.
18** Short binary data is filled (in ASCII) with '='.
Jack Jansen72781191995-08-07 14:34:15 +000019** hqx:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000020** File starts with introductory text, real data starts and ends
21** with colons.
22** Data consists of three similar parts: info, datafork, resourcefork.
23** Each part is protected (at the end) with a 16-bit crc
24** The binary data is run-length encoded, and then ascii-fied:
25** binary: 01234567 abcdefgh ijklmnop
26** ascii: 012345 67abcd efghij klmnop
27** ASCII encoding is table-driven, see the code.
28** Short binary data results in the runt ascii-byte being output with
29** the bits in the right place.
Jack Jansen72781191995-08-07 14:34:15 +000030**
31** While I was reading dozens of programs that encode or decode the formats
32** here (documentation? hihi:-) I have formulated Jansen's Observation:
33**
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000034** Programs that encode binary data in ASCII are written in
35** such a style that they are as unreadable as possible. Devices used
36** include unnecessary global variables, burying important tables
37** in unrelated sourcefiles, putting functions in include files,
38** using seemingly-descriptive variable names for different purposes,
39** calls to empty subroutines and a host of others.
Jack Jansen72781191995-08-07 14:34:15 +000040**
41** I have attempted to break with this tradition, but I guess that that
42** does make the performance sub-optimal. Oh well, too bad...
43**
44** Jack Jansen, CWI, July 1995.
Tim Peters934c1a12002-07-02 22:24:50 +000045**
Martin v. Löwis16dc7f42001-09-30 20:32:11 +000046** Added support for quoted-printable encoding, based on rfc 1521 et al
Tim Peters934c1a12002-07-02 22:24:50 +000047** quoted-printable encoding specifies that non printable characters (anything
Martin v. Löwis16dc7f42001-09-30 20:32:11 +000048** below 32 and above 126) be encoded as =XX where XX is the hexadecimal value
49** of the character. It also specifies some other behavior to enable 8bit data
Tim Peters934c1a12002-07-02 22:24:50 +000050** in a mail message with little difficulty (maximum line sizes, protecting
51** some cases of whitespace, etc).
Martin v. Löwis16dc7f42001-09-30 20:32:11 +000052**
53** Brandon Long, September 2001.
Jack Jansen72781191995-08-07 14:34:15 +000054*/
55
Thomas Wouters9c544482006-03-01 21:59:44 +000056#define PY_SSIZE_T_CLEAN
Jack Jansen72781191995-08-07 14:34:15 +000057
58#include "Python.h"
Gregory P. Smith9c6b9162015-04-26 00:42:13 +000059#include "pystrhex.h"
Christian Heimes1dc54002008-03-24 02:19:29 +000060#ifdef USE_ZLIB_CRC32
61#include "zlib.h"
62#endif
Jack Jansen72781191995-08-07 14:34:15 +000063
Marcel Plch33e71e02019-05-22 13:51:26 +020064typedef struct binascii_state {
65 PyObject *Error;
66 PyObject *Incomplete;
67} binascii_state;
Jack Jansen72781191995-08-07 14:34:15 +000068
69/*
70** hqx lookup table, ascii->binary.
71*/
72
73#define RUNCHAR 0x90
74
75#define DONE 0x7F
76#define SKIP 0x7E
77#define FAIL 0x7D
78
Serhiy Storchaka2d06e842015-12-25 19:53:18 +020079static const unsigned char table_a2b_hqx[256] = {
Jack Jansen72781191995-08-07 14:34:15 +000080/* ^@ ^A ^B ^C ^D ^E ^F ^G */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000081/* 0*/ FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
Jack Jansen72781191995-08-07 14:34:15 +000082/* \b \t \n ^K ^L \r ^N ^O */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000083/* 1*/ FAIL, FAIL, SKIP, FAIL, FAIL, SKIP, FAIL, FAIL,
Jack Jansen72781191995-08-07 14:34:15 +000084/* ^P ^Q ^R ^S ^T ^U ^V ^W */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000085/* 2*/ FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
Jack Jansen72781191995-08-07 14:34:15 +000086/* ^X ^Y ^Z ^[ ^\ ^] ^^ ^_ */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000087/* 3*/ FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
Jack Jansen72781191995-08-07 14:34:15 +000088/* ! " # $ % & ' */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000089/* 4*/ FAIL, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
Jack Jansen72781191995-08-07 14:34:15 +000090/* ( ) * + , - . / */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000091/* 5*/ 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, FAIL, FAIL,
Jack Jansen72781191995-08-07 14:34:15 +000092/* 0 1 2 3 4 5 6 7 */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000093/* 6*/ 0x0D, 0x0E, 0x0F, 0x10, 0x11, 0x12, 0x13, FAIL,
Jack Jansen72781191995-08-07 14:34:15 +000094/* 8 9 : ; < = > ? */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000095/* 7*/ 0x14, 0x15, DONE, FAIL, FAIL, FAIL, FAIL, FAIL,
Jack Jansen72781191995-08-07 14:34:15 +000096/* @ A B C D E F G */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000097/* 8*/ 0x16, 0x17, 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D,
Jack Jansen72781191995-08-07 14:34:15 +000098/* H I J K L M N O */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000099/* 9*/ 0x1E, 0x1F, 0x20, 0x21, 0x22, 0x23, 0x24, FAIL,
Jack Jansen72781191995-08-07 14:34:15 +0000100/* P Q R S T U V W */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000101/*10*/ 0x25, 0x26, 0x27, 0x28, 0x29, 0x2A, 0x2B, FAIL,
Jack Jansen72781191995-08-07 14:34:15 +0000102/* X Y Z [ \ ] ^ _ */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000103/*11*/ 0x2C, 0x2D, 0x2E, 0x2F, FAIL, FAIL, FAIL, FAIL,
Jack Jansen72781191995-08-07 14:34:15 +0000104/* ` a b c d e f g */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000105/*12*/ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, FAIL,
Jack Jansen72781191995-08-07 14:34:15 +0000106/* h i j k l m n o */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000107/*13*/ 0x37, 0x38, 0x39, 0x3A, 0x3B, 0x3C, FAIL, FAIL,
Jack Jansen72781191995-08-07 14:34:15 +0000108/* p q r s t u v w */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000109/*14*/ 0x3D, 0x3E, 0x3F, FAIL, FAIL, FAIL, FAIL, FAIL,
Jack Jansen72781191995-08-07 14:34:15 +0000110/* x y z { | } ~ ^? */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000111/*15*/ FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
112/*16*/ FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
113 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
114 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
115 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
116 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
117 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
118 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
119 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
120 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
121 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
122 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
123 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
124 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
125 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
126 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
127 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
Jack Jansen72781191995-08-07 14:34:15 +0000128};
129
Serhiy Storchaka2d06e842015-12-25 19:53:18 +0200130static const unsigned char table_b2a_hqx[] =
Roger E. Masse5f4ce181997-01-16 17:10:22 +0000131"!\"#$%&'()*+,-012345689@ABCDEFGHIJKLMNPQRSTUVXYZ[`abcdefhijklmpqr";
Jack Jansen72781191995-08-07 14:34:15 +0000132
Serhiy Storchaka2d06e842015-12-25 19:53:18 +0200133static const char table_a2b_base64[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000134 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
135 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
136 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,62, -1,-1,-1,63,
137 52,53,54,55, 56,57,58,59, 60,61,-1,-1, -1, 0,-1,-1, /* Note PAD->0 */
138 -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10, 11,12,13,14,
139 15,16,17,18, 19,20,21,22, 23,24,25,-1, -1,-1,-1,-1,
140 -1,26,27,28, 29,30,31,32, 33,34,35,36, 37,38,39,40,
141 41,42,43,44, 45,46,47,48, 49,50,51,-1, -1,-1,-1,-1
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000142};
143
144#define BASE64_PAD '='
Guido van Rossum355bc0c2001-10-30 03:00:52 +0000145
146/* Max binary chunk size; limited only by available memory */
Guido van Rossum0e225aa2007-05-22 20:24:57 +0000147#define BASE64_MAXBIN ((PY_SSIZE_T_MAX - 3) / 2)
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000148
Serhiy Storchaka2d06e842015-12-25 19:53:18 +0200149static const unsigned char table_b2a_base64[] =
Roger E. Masse5f4ce181997-01-16 17:10:22 +0000150"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000151
152
153
Serhiy Storchaka2d06e842015-12-25 19:53:18 +0200154static const unsigned short crctab_hqx[256] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000155 0x0000, 0x1021, 0x2042, 0x3063, 0x4084, 0x50a5, 0x60c6, 0x70e7,
156 0x8108, 0x9129, 0xa14a, 0xb16b, 0xc18c, 0xd1ad, 0xe1ce, 0xf1ef,
157 0x1231, 0x0210, 0x3273, 0x2252, 0x52b5, 0x4294, 0x72f7, 0x62d6,
158 0x9339, 0x8318, 0xb37b, 0xa35a, 0xd3bd, 0xc39c, 0xf3ff, 0xe3de,
159 0x2462, 0x3443, 0x0420, 0x1401, 0x64e6, 0x74c7, 0x44a4, 0x5485,
160 0xa56a, 0xb54b, 0x8528, 0x9509, 0xe5ee, 0xf5cf, 0xc5ac, 0xd58d,
161 0x3653, 0x2672, 0x1611, 0x0630, 0x76d7, 0x66f6, 0x5695, 0x46b4,
162 0xb75b, 0xa77a, 0x9719, 0x8738, 0xf7df, 0xe7fe, 0xd79d, 0xc7bc,
163 0x48c4, 0x58e5, 0x6886, 0x78a7, 0x0840, 0x1861, 0x2802, 0x3823,
164 0xc9cc, 0xd9ed, 0xe98e, 0xf9af, 0x8948, 0x9969, 0xa90a, 0xb92b,
165 0x5af5, 0x4ad4, 0x7ab7, 0x6a96, 0x1a71, 0x0a50, 0x3a33, 0x2a12,
166 0xdbfd, 0xcbdc, 0xfbbf, 0xeb9e, 0x9b79, 0x8b58, 0xbb3b, 0xab1a,
167 0x6ca6, 0x7c87, 0x4ce4, 0x5cc5, 0x2c22, 0x3c03, 0x0c60, 0x1c41,
168 0xedae, 0xfd8f, 0xcdec, 0xddcd, 0xad2a, 0xbd0b, 0x8d68, 0x9d49,
169 0x7e97, 0x6eb6, 0x5ed5, 0x4ef4, 0x3e13, 0x2e32, 0x1e51, 0x0e70,
170 0xff9f, 0xefbe, 0xdfdd, 0xcffc, 0xbf1b, 0xaf3a, 0x9f59, 0x8f78,
171 0x9188, 0x81a9, 0xb1ca, 0xa1eb, 0xd10c, 0xc12d, 0xf14e, 0xe16f,
172 0x1080, 0x00a1, 0x30c2, 0x20e3, 0x5004, 0x4025, 0x7046, 0x6067,
173 0x83b9, 0x9398, 0xa3fb, 0xb3da, 0xc33d, 0xd31c, 0xe37f, 0xf35e,
174 0x02b1, 0x1290, 0x22f3, 0x32d2, 0x4235, 0x5214, 0x6277, 0x7256,
175 0xb5ea, 0xa5cb, 0x95a8, 0x8589, 0xf56e, 0xe54f, 0xd52c, 0xc50d,
176 0x34e2, 0x24c3, 0x14a0, 0x0481, 0x7466, 0x6447, 0x5424, 0x4405,
177 0xa7db, 0xb7fa, 0x8799, 0x97b8, 0xe75f, 0xf77e, 0xc71d, 0xd73c,
178 0x26d3, 0x36f2, 0x0691, 0x16b0, 0x6657, 0x7676, 0x4615, 0x5634,
179 0xd94c, 0xc96d, 0xf90e, 0xe92f, 0x99c8, 0x89e9, 0xb98a, 0xa9ab,
180 0x5844, 0x4865, 0x7806, 0x6827, 0x18c0, 0x08e1, 0x3882, 0x28a3,
181 0xcb7d, 0xdb5c, 0xeb3f, 0xfb1e, 0x8bf9, 0x9bd8, 0xabbb, 0xbb9a,
182 0x4a75, 0x5a54, 0x6a37, 0x7a16, 0x0af1, 0x1ad0, 0x2ab3, 0x3a92,
183 0xfd2e, 0xed0f, 0xdd6c, 0xcd4d, 0xbdaa, 0xad8b, 0x9de8, 0x8dc9,
184 0x7c26, 0x6c07, 0x5c64, 0x4c45, 0x3ca2, 0x2c83, 0x1ce0, 0x0cc1,
185 0xef1f, 0xff3e, 0xcf5d, 0xdf7c, 0xaf9b, 0xbfba, 0x8fd9, 0x9ff8,
186 0x6e17, 0x7e36, 0x4e55, 0x5e74, 0x2e93, 0x3eb2, 0x0ed1, 0x1ef0,
Jack Jansen72781191995-08-07 14:34:15 +0000187};
188
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +0200189/*[clinic input]
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +0200190module binascii
191[clinic start generated code]*/
Serhiy Storchaka1009bf12015-04-03 23:53:51 +0300192/*[clinic end generated code: output=da39a3ee5e6b4b0d input=de89fb46bcaf3fec]*/
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +0200193
194/*[python input]
195
196class ascii_buffer_converter(CConverter):
197 type = 'Py_buffer'
198 converter = 'ascii_buffer_converter'
199 impl_by_reference = True
Benjamin Petersonb62deac2014-01-26 10:41:58 -0500200 c_default = "{NULL, NULL}"
201
202 def cleanup(self):
203 name = self.name
204 return "".join(["if (", name, ".obj)\n PyBuffer_Release(&", name, ");\n"])
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +0200205
206[python start generated code]*/
Larry Hastings581ee362014-01-28 05:00:08 -0800207/*[python end generated code: output=da39a3ee5e6b4b0d input=3eb7b63610da92cd]*/
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +0200208
Antoine Pitrou08316762011-12-20 13:58:41 +0100209static int
210ascii_buffer_converter(PyObject *arg, Py_buffer *buf)
211{
212 if (arg == NULL) {
213 PyBuffer_Release(buf);
214 return 1;
215 }
216 if (PyUnicode_Check(arg)) {
217 if (PyUnicode_READY(arg) < 0)
218 return 0;
219 if (!PyUnicode_IS_ASCII(arg)) {
220 PyErr_SetString(PyExc_ValueError,
221 "string argument should contain only ASCII characters");
222 return 0;
223 }
224 assert(PyUnicode_KIND(arg) == PyUnicode_1BYTE_KIND);
225 buf->buf = (void *) PyUnicode_1BYTE_DATA(arg);
226 buf->len = PyUnicode_GET_LENGTH(arg);
227 buf->obj = NULL;
228 return 1;
229 }
230 if (PyObject_GetBuffer(arg, buf, PyBUF_SIMPLE) != 0) {
231 PyErr_Format(PyExc_TypeError,
232 "argument should be bytes, buffer or ASCII string, "
Berker Peksag3cd30c22015-02-15 00:31:00 +0200233 "not '%.100s'", Py_TYPE(arg)->tp_name);
Antoine Pitrou08316762011-12-20 13:58:41 +0100234 return 0;
235 }
236 if (!PyBuffer_IsContiguous(buf, 'C')) {
237 PyErr_Format(PyExc_TypeError,
238 "argument should be a contiguous buffer, "
Berker Peksag3cd30c22015-02-15 00:31:00 +0200239 "not '%.100s'", Py_TYPE(arg)->tp_name);
Antoine Pitrou08316762011-12-20 13:58:41 +0100240 PyBuffer_Release(buf);
241 return 0;
242 }
243 return Py_CLEANUP_SUPPORTED;
244}
245
Larry Hastingsf256c222014-01-25 21:30:37 -0800246#include "clinic/binascii.c.h"
Antoine Pitrou08316762011-12-20 13:58:41 +0100247
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +0200248/*[clinic input]
249binascii.a2b_uu
250
Serhiy Storchaka12785612014-01-25 11:49:49 +0200251 data: ascii_buffer
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +0200252 /
253
254Decode a line of uuencoded data.
255[clinic start generated code]*/
Jack Jansen72781191995-08-07 14:34:15 +0000256
257static PyObject *
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +0300258binascii_a2b_uu_impl(PyObject *module, Py_buffer *data)
259/*[clinic end generated code: output=e027f8e0b0598742 input=7cafeaf73df63d1c]*/
Jack Jansen72781191995-08-07 14:34:15 +0000260{
Serhiy Storchakaef1585e2015-12-25 20:01:53 +0200261 const unsigned char *ascii_data;
262 unsigned char *bin_data;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000263 int leftbits = 0;
264 unsigned char this_ch;
265 unsigned int leftchar = 0;
266 PyObject *rv;
267 Py_ssize_t ascii_len, bin_len;
Marcel Plch33e71e02019-05-22 13:51:26 +0200268 binascii_state *state;
Tim Peters934c1a12002-07-02 22:24:50 +0000269
Serhiy Storchaka12785612014-01-25 11:49:49 +0200270 ascii_data = data->buf;
271 ascii_len = data->len;
Jack Jansen72781191995-08-07 14:34:15 +0000272
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000273 assert(ascii_len >= 0);
Amaury Forgeot d'Arc9c74b142008-06-18 00:47:36 +0000274
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000275 /* First byte: binary data length (in bytes) */
276 bin_len = (*ascii_data++ - ' ') & 077;
277 ascii_len--;
Jack Jansen72781191995-08-07 14:34:15 +0000278
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000279 /* Allocate the buffer */
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +0200280 if ( (rv=PyBytes_FromStringAndSize(NULL, bin_len)) == NULL )
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000281 return NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000282 bin_data = (unsigned char *)PyBytes_AS_STRING(rv);
Tim Peters934c1a12002-07-02 22:24:50 +0000283
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000284 for( ; bin_len > 0 ; ascii_len--, ascii_data++ ) {
285 /* XXX is it really best to add NULs if there's no more data */
286 this_ch = (ascii_len > 0) ? *ascii_data : 0;
287 if ( this_ch == '\n' || this_ch == '\r' || ascii_len <= 0) {
288 /*
289 ** Whitespace. Assume some spaces got eaten at
290 ** end-of-line. (We check this later)
291 */
292 this_ch = 0;
293 } else {
294 /* Check the character for legality
295 ** The 64 in stead of the expected 63 is because
296 ** there are a few uuencodes out there that use
297 ** '`' as zero instead of space.
298 */
299 if ( this_ch < ' ' || this_ch > (' ' + 64)) {
Marcel Plch33e71e02019-05-22 13:51:26 +0200300 state = PyModule_GetState(module);
301 if (state == NULL) {
302 return NULL;
303 }
304 PyErr_SetString(state->Error, "Illegal char");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000305 Py_DECREF(rv);
306 return NULL;
307 }
308 this_ch = (this_ch - ' ') & 077;
309 }
310 /*
311 ** Shift it in on the low end, and see if there's
312 ** a byte ready for output.
313 */
314 leftchar = (leftchar << 6) | (this_ch);
315 leftbits += 6;
316 if ( leftbits >= 8 ) {
317 leftbits -= 8;
318 *bin_data++ = (leftchar >> leftbits) & 0xff;
319 leftchar &= ((1 << leftbits) - 1);
320 bin_len--;
321 }
322 }
323 /*
324 ** Finally, check that if there's anything left on the line
325 ** that it's whitespace only.
326 */
327 while( ascii_len-- > 0 ) {
328 this_ch = *ascii_data++;
329 /* Extra '`' may be written as padding in some cases */
330 if ( this_ch != ' ' && this_ch != ' '+64 &&
331 this_ch != '\n' && this_ch != '\r' ) {
Marcel Plch33e71e02019-05-22 13:51:26 +0200332 state = PyModule_GetState(module);
333 if (state == NULL) {
334 return NULL;
335 }
336 PyErr_SetString(state->Error, "Trailing garbage");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000337 Py_DECREF(rv);
338 return NULL;
339 }
340 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000341 return rv;
Jack Jansen72781191995-08-07 14:34:15 +0000342}
343
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +0200344/*[clinic input]
345binascii.b2a_uu
346
347 data: Py_buffer
348 /
Xiang Zhang13f1f422017-05-03 11:16:21 +0800349 *
350 backtick: bool(accept={int}) = False
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +0200351
352Uuencode line of data.
353[clinic start generated code]*/
Tim Peters934c1a12002-07-02 22:24:50 +0000354
Jack Jansen72781191995-08-07 14:34:15 +0000355static PyObject *
Xiang Zhang13f1f422017-05-03 11:16:21 +0800356binascii_b2a_uu_impl(PyObject *module, Py_buffer *data, int backtick)
357/*[clinic end generated code: output=b1b99de62d9bbeb8 input=b26bc8d32b6ed2f6]*/
Jack Jansen72781191995-08-07 14:34:15 +0000358{
Serhiy Storchakaef1585e2015-12-25 20:01:53 +0200359 unsigned char *ascii_data;
360 const unsigned char *bin_data;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000361 int leftbits = 0;
362 unsigned char this_ch;
363 unsigned int leftchar = 0;
Marcel Plch33e71e02019-05-22 13:51:26 +0200364 binascii_state *state;
Victor Stinnereaaaf132015-10-13 10:51:47 +0200365 Py_ssize_t bin_len, out_len;
366 _PyBytesWriter writer;
Tim Peters934c1a12002-07-02 22:24:50 +0000367
Victor Stinnereaaaf132015-10-13 10:51:47 +0200368 _PyBytesWriter_Init(&writer);
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +0200369 bin_data = data->buf;
370 bin_len = data->len;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000371 if ( bin_len > 45 ) {
372 /* The 45 is a limit that appears in all uuencode's */
Marcel Plch33e71e02019-05-22 13:51:26 +0200373 state = PyModule_GetState(module);
374 if (state == NULL) {
375 return NULL;
376 }
377 PyErr_SetString(state->Error, "At most 45 bytes at once");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000378 return NULL;
379 }
Jack Jansen72781191995-08-07 14:34:15 +0000380
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000381 /* We're lazy and allocate to much (fixed up later) */
Victor Stinnereaaaf132015-10-13 10:51:47 +0200382 out_len = 2 + (bin_len + 2) / 3 * 4;
383 ascii_data = _PyBytesWriter_Alloc(&writer, out_len);
384 if (ascii_data == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000385 return NULL;
Jack Jansen72781191995-08-07 14:34:15 +0000386
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000387 /* Store the length */
Xiang Zhang13f1f422017-05-03 11:16:21 +0800388 if (backtick && !bin_len)
389 *ascii_data++ = '`';
390 else
Segev Finer679b5662017-07-27 01:17:57 +0300391 *ascii_data++ = ' ' + (unsigned char)bin_len;
Tim Peters934c1a12002-07-02 22:24:50 +0000392
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000393 for( ; bin_len > 0 || leftbits != 0 ; bin_len--, bin_data++ ) {
394 /* Shift the data (or padding) into our buffer */
395 if ( bin_len > 0 ) /* Data */
396 leftchar = (leftchar << 8) | *bin_data;
397 else /* Padding */
398 leftchar <<= 8;
399 leftbits += 8;
Jack Jansen72781191995-08-07 14:34:15 +0000400
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000401 /* See if there are 6-bit groups ready */
402 while ( leftbits >= 6 ) {
403 this_ch = (leftchar >> (leftbits-6)) & 0x3f;
404 leftbits -= 6;
Xiang Zhang13f1f422017-05-03 11:16:21 +0800405 if (backtick && !this_ch)
406 *ascii_data++ = '`';
407 else
408 *ascii_data++ = this_ch + ' ';
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000409 }
410 }
411 *ascii_data++ = '\n'; /* Append a courtesy newline */
Tim Peters934c1a12002-07-02 22:24:50 +0000412
Victor Stinnereaaaf132015-10-13 10:51:47 +0200413 return _PyBytesWriter_Finish(&writer, ascii_data);
Jack Jansen72781191995-08-07 14:34:15 +0000414}
415
Guido van Rossum2db4f471999-10-19 19:05:14 +0000416
417static int
Serhiy Storchakaef1585e2015-12-25 20:01:53 +0200418binascii_find_valid(const unsigned char *s, Py_ssize_t slen, int num)
Guido van Rossum2db4f471999-10-19 19:05:14 +0000419{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000420 /* Finds & returns the (num+1)th
421 ** valid character for base64, or -1 if none.
422 */
Guido van Rossum2db4f471999-10-19 19:05:14 +0000423
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000424 int ret = -1;
425 unsigned char c, b64val;
Guido van Rossum2db4f471999-10-19 19:05:14 +0000426
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000427 while ((slen > 0) && (ret == -1)) {
428 c = *s;
429 b64val = table_a2b_base64[c & 0x7f];
430 if ( ((c <= 0x7f) && (b64val != (unsigned char)-1)) ) {
431 if (num == 0)
432 ret = *s;
433 num--;
434 }
Guido van Rossum2db4f471999-10-19 19:05:14 +0000435
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000436 s++;
437 slen--;
438 }
439 return ret;
Guido van Rossum2db4f471999-10-19 19:05:14 +0000440}
441
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +0200442/*[clinic input]
443binascii.a2b_base64
444
Serhiy Storchaka12785612014-01-25 11:49:49 +0200445 data: ascii_buffer
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +0200446 /
447
448Decode a line of base64 data.
449[clinic start generated code]*/
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000450
451static PyObject *
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +0300452binascii_a2b_base64_impl(PyObject *module, Py_buffer *data)
453/*[clinic end generated code: output=0628223f19fd3f9b input=5872acf6e1cac243]*/
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000454{
Serhiy Storchakaef1585e2015-12-25 20:01:53 +0200455 const unsigned char *ascii_data;
456 unsigned char *bin_data;
Tal Einat1fba2ff2018-09-28 08:57:22 +0300457 unsigned char *bin_data_start;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000458 int leftbits = 0;
459 unsigned char this_ch;
460 unsigned int leftchar = 0;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000461 Py_ssize_t ascii_len, bin_len;
462 int quad_pos = 0;
Victor Stinnereaaaf132015-10-13 10:51:47 +0200463 _PyBytesWriter writer;
Marcel Plch33e71e02019-05-22 13:51:26 +0200464 binascii_state *state;
Tim Peters934c1a12002-07-02 22:24:50 +0000465
Serhiy Storchaka12785612014-01-25 11:49:49 +0200466 ascii_data = data->buf;
467 ascii_len = data->len;
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000468
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000469 assert(ascii_len >= 0);
Amaury Forgeot d'Arc9c74b142008-06-18 00:47:36 +0000470
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +0200471 if (ascii_len > PY_SSIZE_T_MAX - 3)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000472 return PyErr_NoMemory();
Amaury Forgeot d'Arc9c74b142008-06-18 00:47:36 +0000473
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000474 bin_len = ((ascii_len+3)/4)*3; /* Upper bound, corrected later */
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000475
Victor Stinnereaaaf132015-10-13 10:51:47 +0200476 _PyBytesWriter_Init(&writer);
477
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000478 /* Allocate the buffer */
Victor Stinnereaaaf132015-10-13 10:51:47 +0200479 bin_data = _PyBytesWriter_Alloc(&writer, bin_len);
480 if (bin_data == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000481 return NULL;
Tal Einat1fba2ff2018-09-28 08:57:22 +0300482 bin_data_start = bin_data;
Guido van Rossum2db4f471999-10-19 19:05:14 +0000483
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000484 for( ; ascii_len > 0; ascii_len--, ascii_data++) {
485 this_ch = *ascii_data;
Guido van Rossum2db4f471999-10-19 19:05:14 +0000486
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000487 if (this_ch > 0x7f ||
488 this_ch == '\r' || this_ch == '\n' || this_ch == ' ')
489 continue;
Guido van Rossum2db4f471999-10-19 19:05:14 +0000490
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000491 /* Check for pad sequences and ignore
492 ** the invalid ones.
493 */
494 if (this_ch == BASE64_PAD) {
495 if ( (quad_pos < 2) ||
496 ((quad_pos == 2) &&
497 (binascii_find_valid(ascii_data, ascii_len, 1)
498 != BASE64_PAD)) )
499 {
500 continue;
501 }
502 else {
503 /* A pad sequence means no more input.
504 ** We've already interpreted the data
505 ** from the quad at this point.
506 */
507 leftbits = 0;
508 break;
509 }
510 }
Guido van Rossum2db4f471999-10-19 19:05:14 +0000511
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000512 this_ch = table_a2b_base64[*ascii_data];
513 if ( this_ch == (unsigned char) -1 )
514 continue;
Guido van Rossum2db4f471999-10-19 19:05:14 +0000515
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000516 /*
517 ** Shift it in on the low end, and see if there's
518 ** a byte ready for output.
519 */
520 quad_pos = (quad_pos + 1) & 0x03;
521 leftchar = (leftchar << 6) | (this_ch);
522 leftbits += 6;
Guido van Rossum2db4f471999-10-19 19:05:14 +0000523
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000524 if ( leftbits >= 8 ) {
525 leftbits -= 8;
526 *bin_data++ = (leftchar >> leftbits) & 0xff;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000527 leftchar &= ((1 << leftbits) - 1);
528 }
529 }
Guido van Rossum2db4f471999-10-19 19:05:14 +0000530
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000531 if (leftbits != 0) {
Marcel Plch33e71e02019-05-22 13:51:26 +0200532 state = PyModule_GetState(module);
533 if (state == NULL) {
534 return NULL;
535 }
Tal Einat1b85c712018-06-10 10:01:50 +0300536 if (leftbits == 6) {
537 /*
538 ** There is exactly one extra valid, non-padding, base64 character.
539 ** This is an invalid length, as there is no possible input that
540 ** could encoded into such a base64 string.
541 */
Marcel Plch33e71e02019-05-22 13:51:26 +0200542 PyErr_Format(state->Error,
Tal Einat1fba2ff2018-09-28 08:57:22 +0300543 "Invalid base64-encoded string: "
Serhiy Storchakad53fe5f2019-03-13 22:59:55 +0200544 "number of data characters (%zd) cannot be 1 more "
Tal Einat1fba2ff2018-09-28 08:57:22 +0300545 "than a multiple of 4",
546 (bin_data - bin_data_start) / 3 * 4 + 1);
Tal Einat1b85c712018-06-10 10:01:50 +0300547 } else {
Marcel Plch33e71e02019-05-22 13:51:26 +0200548 PyErr_SetString(state->Error, "Incorrect padding");
Tal Einat1b85c712018-06-10 10:01:50 +0300549 }
Victor Stinnereaaaf132015-10-13 10:51:47 +0200550 _PyBytesWriter_Dealloc(&writer);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000551 return NULL;
552 }
Guido van Rossum2db4f471999-10-19 19:05:14 +0000553
Victor Stinnereaaaf132015-10-13 10:51:47 +0200554 return _PyBytesWriter_Finish(&writer, bin_data);
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000555}
556
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +0200557
558/*[clinic input]
559binascii.b2a_base64
560
561 data: Py_buffer
Xiang Zhang1374dbb2017-05-01 13:12:07 +0800562 /
Victor Stinnere84c9762015-10-11 11:01:02 +0200563 *
Serhiy Storchaka202fda52017-03-12 10:10:47 +0200564 newline: bool(accept={int}) = True
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +0200565
566Base64-code line of data.
567[clinic start generated code]*/
Tim Peters934c1a12002-07-02 22:24:50 +0000568
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000569static PyObject *
Serhiy Storchaka2954f832016-07-07 18:20:03 +0300570binascii_b2a_base64_impl(PyObject *module, Py_buffer *data, int newline)
Xiang Zhang1374dbb2017-05-01 13:12:07 +0800571/*[clinic end generated code: output=4ad62c8e8485d3b3 input=6083dac5777fa45d]*/
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000572{
Serhiy Storchakaef1585e2015-12-25 20:01:53 +0200573 unsigned char *ascii_data;
574 const unsigned char *bin_data;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000575 int leftbits = 0;
576 unsigned char this_ch;
577 unsigned int leftchar = 0;
Victor Stinnere84c9762015-10-11 11:01:02 +0200578 Py_ssize_t bin_len, out_len;
Victor Stinnereaaaf132015-10-13 10:51:47 +0200579 _PyBytesWriter writer;
Marcel Plch33e71e02019-05-22 13:51:26 +0200580 binascii_state *state;
Tim Peters934c1a12002-07-02 22:24:50 +0000581
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +0200582 bin_data = data->buf;
583 bin_len = data->len;
Victor Stinnereaaaf132015-10-13 10:51:47 +0200584 _PyBytesWriter_Init(&writer);
Amaury Forgeot d'Arc9c74b142008-06-18 00:47:36 +0000585
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000586 assert(bin_len >= 0);
Amaury Forgeot d'Arc9c74b142008-06-18 00:47:36 +0000587
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000588 if ( bin_len > BASE64_MAXBIN ) {
Marcel Plch33e71e02019-05-22 13:51:26 +0200589 state = PyModule_GetState(module);
590 if (state == NULL) {
591 return NULL;
592 }
593 PyErr_SetString(state->Error, "Too much data for base64 line");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000594 return NULL;
595 }
Tim Peters934c1a12002-07-02 22:24:50 +0000596
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000597 /* We're lazy and allocate too much (fixed up later).
Victor Stinnere84c9762015-10-11 11:01:02 +0200598 "+2" leaves room for up to two pad characters.
599 Note that 'b' gets encoded as 'Yg==\n' (1 in, 5 out). */
600 out_len = bin_len*2 + 2;
601 if (newline)
602 out_len++;
Victor Stinnereaaaf132015-10-13 10:51:47 +0200603 ascii_data = _PyBytesWriter_Alloc(&writer, out_len);
604 if (ascii_data == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000605 return NULL;
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000606
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000607 for( ; bin_len > 0 ; bin_len--, bin_data++ ) {
608 /* Shift the data into our buffer */
609 leftchar = (leftchar << 8) | *bin_data;
610 leftbits += 8;
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000611
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000612 /* See if there are 6-bit groups ready */
613 while ( leftbits >= 6 ) {
614 this_ch = (leftchar >> (leftbits-6)) & 0x3f;
615 leftbits -= 6;
616 *ascii_data++ = table_b2a_base64[this_ch];
617 }
618 }
619 if ( leftbits == 2 ) {
620 *ascii_data++ = table_b2a_base64[(leftchar&3) << 4];
621 *ascii_data++ = BASE64_PAD;
622 *ascii_data++ = BASE64_PAD;
623 } else if ( leftbits == 4 ) {
624 *ascii_data++ = table_b2a_base64[(leftchar&0xf) << 2];
625 *ascii_data++ = BASE64_PAD;
626 }
Victor Stinnere84c9762015-10-11 11:01:02 +0200627 if (newline)
628 *ascii_data++ = '\n'; /* Append a courtesy newline */
Tim Peters934c1a12002-07-02 22:24:50 +0000629
Victor Stinnereaaaf132015-10-13 10:51:47 +0200630 return _PyBytesWriter_Finish(&writer, ascii_data);
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000631}
632
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +0200633/*[clinic input]
634binascii.a2b_hqx
635
Serhiy Storchaka12785612014-01-25 11:49:49 +0200636 data: ascii_buffer
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +0200637 /
638
639Decode .hqx coding.
640[clinic start generated code]*/
Jack Jansen72781191995-08-07 14:34:15 +0000641
642static PyObject *
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +0300643binascii_a2b_hqx_impl(PyObject *module, Py_buffer *data)
644/*[clinic end generated code: output=4d6d8c54d54ea1c1 input=0d914c680e0eed55]*/
Jack Jansen72781191995-08-07 14:34:15 +0000645{
Serhiy Storchakaef1585e2015-12-25 20:01:53 +0200646 const unsigned char *ascii_data;
647 unsigned char *bin_data;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000648 int leftbits = 0;
649 unsigned char this_ch;
650 unsigned int leftchar = 0;
Victor Stinnereaaaf132015-10-13 10:51:47 +0200651 PyObject *res;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000652 Py_ssize_t len;
653 int done = 0;
Victor Stinnereaaaf132015-10-13 10:51:47 +0200654 _PyBytesWriter writer;
Marcel Plch33e71e02019-05-22 13:51:26 +0200655 binascii_state *state;
Tim Peters934c1a12002-07-02 22:24:50 +0000656
Serhiy Storchaka12785612014-01-25 11:49:49 +0200657 ascii_data = data->buf;
658 len = data->len;
Victor Stinnereaaaf132015-10-13 10:51:47 +0200659 _PyBytesWriter_Init(&writer);
Jack Jansen72781191995-08-07 14:34:15 +0000660
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000661 assert(len >= 0);
Amaury Forgeot d'Arc9c74b142008-06-18 00:47:36 +0000662
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +0200663 if (len > PY_SSIZE_T_MAX - 2)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000664 return PyErr_NoMemory();
Amaury Forgeot d'Arc9c74b142008-06-18 00:47:36 +0000665
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000666 /* Allocate a string that is too big (fixed later)
667 Add two to the initial length to prevent interning which
668 would preclude subsequent resizing. */
Victor Stinnereaaaf132015-10-13 10:51:47 +0200669 bin_data = _PyBytesWriter_Alloc(&writer, len + 2);
670 if (bin_data == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000671 return NULL;
Jack Jansen72781191995-08-07 14:34:15 +0000672
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000673 for( ; len > 0 ; len--, ascii_data++ ) {
674 /* Get the byte and look it up */
675 this_ch = table_a2b_hqx[*ascii_data];
676 if ( this_ch == SKIP )
677 continue;
678 if ( this_ch == FAIL ) {
Marcel Plch33e71e02019-05-22 13:51:26 +0200679 state = PyModule_GetState(module);
680 if (state == NULL) {
681 return NULL;
682 }
683 PyErr_SetString(state->Error, "Illegal char");
Victor Stinnereaaaf132015-10-13 10:51:47 +0200684 _PyBytesWriter_Dealloc(&writer);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000685 return NULL;
686 }
687 if ( this_ch == DONE ) {
688 /* The terminating colon */
689 done = 1;
690 break;
691 }
Jack Jansen72781191995-08-07 14:34:15 +0000692
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000693 /* Shift it into the buffer and see if any bytes are ready */
694 leftchar = (leftchar << 6) | (this_ch);
695 leftbits += 6;
696 if ( leftbits >= 8 ) {
697 leftbits -= 8;
698 *bin_data++ = (leftchar >> leftbits) & 0xff;
699 leftchar &= ((1 << leftbits) - 1);
700 }
701 }
Tim Peters934c1a12002-07-02 22:24:50 +0000702
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000703 if ( leftbits && !done ) {
Marcel Plch33e71e02019-05-22 13:51:26 +0200704 state = PyModule_GetState(module);
705 if (state == NULL) {
706 return NULL;
707 }
708 PyErr_SetString(state->Incomplete,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000709 "String has incomplete number of bytes");
Victor Stinnereaaaf132015-10-13 10:51:47 +0200710 _PyBytesWriter_Dealloc(&writer);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000711 return NULL;
712 }
Roger E. Masse5f4ce181997-01-16 17:10:22 +0000713
Victor Stinnereaaaf132015-10-13 10:51:47 +0200714 res = _PyBytesWriter_Finish(&writer, bin_data);
715 if (res == NULL)
716 return NULL;
717 return Py_BuildValue("Ni", res, done);
Jack Jansen72781191995-08-07 14:34:15 +0000718}
719
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +0200720
721/*[clinic input]
722binascii.rlecode_hqx
723
724 data: Py_buffer
725 /
726
727Binhex RLE-code binary data.
728[clinic start generated code]*/
Jack Jansen72781191995-08-07 14:34:15 +0000729
730static PyObject *
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +0300731binascii_rlecode_hqx_impl(PyObject *module, Py_buffer *data)
732/*[clinic end generated code: output=393d79338f5f5629 input=e1f1712447a82b09]*/
Jack Jansen72781191995-08-07 14:34:15 +0000733{
Serhiy Storchakaef1585e2015-12-25 20:01:53 +0200734 const unsigned char *in_data;
735 unsigned char *out_data;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000736 unsigned char ch;
737 Py_ssize_t in, inend, len;
Victor Stinnereaaaf132015-10-13 10:51:47 +0200738 _PyBytesWriter writer;
Tim Peters934c1a12002-07-02 22:24:50 +0000739
Victor Stinnereaaaf132015-10-13 10:51:47 +0200740 _PyBytesWriter_Init(&writer);
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +0200741 in_data = data->buf;
742 len = data->len;
Jack Jansen72781191995-08-07 14:34:15 +0000743
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000744 assert(len >= 0);
Amaury Forgeot d'Arc9c74b142008-06-18 00:47:36 +0000745
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +0200746 if (len > PY_SSIZE_T_MAX / 2 - 2)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000747 return PyErr_NoMemory();
Amaury Forgeot d'Arc9c74b142008-06-18 00:47:36 +0000748
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000749 /* Worst case: output is twice as big as input (fixed later) */
Victor Stinnereaaaf132015-10-13 10:51:47 +0200750 out_data = _PyBytesWriter_Alloc(&writer, len * 2 + 2);
751 if (out_data == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000752 return NULL;
Tim Peters934c1a12002-07-02 22:24:50 +0000753
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000754 for( in=0; in<len; in++) {
755 ch = in_data[in];
756 if ( ch == RUNCHAR ) {
757 /* RUNCHAR. Escape it. */
758 *out_data++ = RUNCHAR;
759 *out_data++ = 0;
760 } else {
761 /* Check how many following are the same */
762 for(inend=in+1;
763 inend<len && in_data[inend] == ch &&
764 inend < in+255;
765 inend++) ;
766 if ( inend - in > 3 ) {
767 /* More than 3 in a row. Output RLE. */
768 *out_data++ = ch;
769 *out_data++ = RUNCHAR;
Antoine Pitrou40455752010-08-15 18:51:10 +0000770 *out_data++ = (unsigned char) (inend-in);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000771 in = inend-1;
772 } else {
773 /* Less than 3. Output the byte itself */
774 *out_data++ = ch;
775 }
776 }
777 }
Victor Stinnereaaaf132015-10-13 10:51:47 +0200778
779 return _PyBytesWriter_Finish(&writer, out_data);
Jack Jansen72781191995-08-07 14:34:15 +0000780}
781
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +0200782
783/*[clinic input]
784binascii.b2a_hqx
785
786 data: Py_buffer
787 /
788
789Encode .hqx data.
790[clinic start generated code]*/
Tim Peters934c1a12002-07-02 22:24:50 +0000791
Jack Jansen72781191995-08-07 14:34:15 +0000792static PyObject *
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +0300793binascii_b2a_hqx_impl(PyObject *module, Py_buffer *data)
794/*[clinic end generated code: output=d0aa5a704bc9f7de input=9596ebe019fe12ba]*/
Jack Jansen72781191995-08-07 14:34:15 +0000795{
Serhiy Storchakaef1585e2015-12-25 20:01:53 +0200796 unsigned char *ascii_data;
797 const unsigned char *bin_data;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000798 int leftbits = 0;
799 unsigned char this_ch;
800 unsigned int leftchar = 0;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000801 Py_ssize_t len;
Victor Stinnereaaaf132015-10-13 10:51:47 +0200802 _PyBytesWriter writer;
Tim Peters934c1a12002-07-02 22:24:50 +0000803
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +0200804 bin_data = data->buf;
805 len = data->len;
Victor Stinnereaaaf132015-10-13 10:51:47 +0200806 _PyBytesWriter_Init(&writer);
Jack Jansen72781191995-08-07 14:34:15 +0000807
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000808 assert(len >= 0);
Amaury Forgeot d'Arc9c74b142008-06-18 00:47:36 +0000809
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +0200810 if (len > PY_SSIZE_T_MAX / 2 - 2)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000811 return PyErr_NoMemory();
Amaury Forgeot d'Arc9c74b142008-06-18 00:47:36 +0000812
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000813 /* Allocate a buffer that is at least large enough */
Victor Stinnereaaaf132015-10-13 10:51:47 +0200814 ascii_data = _PyBytesWriter_Alloc(&writer, len * 2 + 2);
815 if (ascii_data == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000816 return NULL;
Tim Peters934c1a12002-07-02 22:24:50 +0000817
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000818 for( ; len > 0 ; len--, bin_data++ ) {
819 /* Shift into our buffer, and output any 6bits ready */
820 leftchar = (leftchar << 8) | *bin_data;
821 leftbits += 8;
822 while ( leftbits >= 6 ) {
823 this_ch = (leftchar >> (leftbits-6)) & 0x3f;
824 leftbits -= 6;
825 *ascii_data++ = table_b2a_hqx[this_ch];
826 }
827 }
828 /* Output a possible runt byte */
829 if ( leftbits ) {
830 leftchar <<= (6-leftbits);
831 *ascii_data++ = table_b2a_hqx[leftchar & 0x3f];
832 }
Victor Stinnereaaaf132015-10-13 10:51:47 +0200833
834 return _PyBytesWriter_Finish(&writer, ascii_data);
Jack Jansen72781191995-08-07 14:34:15 +0000835}
836
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +0200837
838/*[clinic input]
839binascii.rledecode_hqx
840
841 data: Py_buffer
842 /
843
844Decode hexbin RLE-coded string.
845[clinic start generated code]*/
Tim Peters934c1a12002-07-02 22:24:50 +0000846
Jack Jansen72781191995-08-07 14:34:15 +0000847static PyObject *
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +0300848binascii_rledecode_hqx_impl(PyObject *module, Py_buffer *data)
849/*[clinic end generated code: output=9826619565de1c6c input=54cdd49fc014402c]*/
Jack Jansen72781191995-08-07 14:34:15 +0000850{
Serhiy Storchakaef1585e2015-12-25 20:01:53 +0200851 const unsigned char *in_data;
852 unsigned char *out_data;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000853 unsigned char in_byte, in_repeat;
Victor Stinnerf9c9a3f2015-10-14 15:20:07 +0200854 Py_ssize_t in_len;
Victor Stinnereaaaf132015-10-13 10:51:47 +0200855 _PyBytesWriter writer;
Jack Jansen72781191995-08-07 14:34:15 +0000856
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +0200857 in_data = data->buf;
858 in_len = data->len;
Victor Stinnereaaaf132015-10-13 10:51:47 +0200859 _PyBytesWriter_Init(&writer);
Marcel Plch33e71e02019-05-22 13:51:26 +0200860 binascii_state *state;
Jack Jansen72781191995-08-07 14:34:15 +0000861
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000862 assert(in_len >= 0);
Amaury Forgeot d'Arc9c74b142008-06-18 00:47:36 +0000863
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000864 /* Empty string is a special case */
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +0200865 if ( in_len == 0 )
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000866 return PyBytes_FromStringAndSize("", 0);
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +0200867 else if (in_len > PY_SSIZE_T_MAX / 2)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000868 return PyErr_NoMemory();
Jack Jansen72781191995-08-07 14:34:15 +0000869
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000870 /* Allocate a buffer of reasonable size. Resized when needed */
Victor Stinnerf9c9a3f2015-10-14 15:20:07 +0200871 out_data = _PyBytesWriter_Alloc(&writer, in_len);
Victor Stinnereaaaf132015-10-13 10:51:47 +0200872 if (out_data == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000873 return NULL;
Victor Stinnereaaaf132015-10-13 10:51:47 +0200874
875 /* Use overallocation */
876 writer.overallocate = 1;
Jack Jansen72781191995-08-07 14:34:15 +0000877
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000878 /*
879 ** We need two macros here to get/put bytes and handle
880 ** end-of-buffer for input and output strings.
881 */
Victor Stinnereaaaf132015-10-13 10:51:47 +0200882#define INBYTE(b) \
883 do { \
884 if ( --in_len < 0 ) { \
Marcel Plch33e71e02019-05-22 13:51:26 +0200885 state = PyModule_GetState(module); \
886 if (state == NULL) { \
887 return NULL; \
888 } \
889 PyErr_SetString(state->Incomplete, ""); \
Victor Stinnereaaaf132015-10-13 10:51:47 +0200890 goto error; \
891 } \
892 b = *in_data++; \
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000893 } while(0)
Tim Peters934c1a12002-07-02 22:24:50 +0000894
Victor Stinnereaaaf132015-10-13 10:51:47 +0200895 /*
896 ** Handle first byte separately (since we have to get angry
897 ** in case of an orphaned RLE code).
898 */
899 INBYTE(in_byte);
Jack Jansen72781191995-08-07 14:34:15 +0000900
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000901 if (in_byte == RUNCHAR) {
902 INBYTE(in_repeat);
Victor Stinnerf9c9a3f2015-10-14 15:20:07 +0200903 /* only 1 byte will be written, but 2 bytes were preallocated:
Raymond Hettinger15f44ab2016-08-30 10:47:49 -0700904 subtract 1 byte to prevent overallocation */
Victor Stinnerf9c9a3f2015-10-14 15:20:07 +0200905 writer.min_size--;
906
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000907 if (in_repeat != 0) {
908 /* Note Error, not Incomplete (which is at the end
909 ** of the string only). This is a programmer error.
910 */
Marcel Plch33e71e02019-05-22 13:51:26 +0200911 state = PyModule_GetState(module);
912 if (state == NULL) {
913 return NULL;
914 }
915 PyErr_SetString(state->Error, "Orphaned RLE code at start");
Victor Stinnereaaaf132015-10-13 10:51:47 +0200916 goto error;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000917 }
Victor Stinnerf9c9a3f2015-10-14 15:20:07 +0200918 *out_data++ = RUNCHAR;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000919 } else {
Victor Stinnerf9c9a3f2015-10-14 15:20:07 +0200920 *out_data++ = in_byte;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000921 }
Tim Peters934c1a12002-07-02 22:24:50 +0000922
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000923 while( in_len > 0 ) {
924 INBYTE(in_byte);
Jack Jansen72781191995-08-07 14:34:15 +0000925
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000926 if (in_byte == RUNCHAR) {
927 INBYTE(in_repeat);
Victor Stinnerf9c9a3f2015-10-14 15:20:07 +0200928 /* only 1 byte will be written, but 2 bytes were preallocated:
Raymond Hettinger15f44ab2016-08-30 10:47:49 -0700929 subtract 1 byte to prevent overallocation */
Victor Stinnerf9c9a3f2015-10-14 15:20:07 +0200930 writer.min_size--;
931
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000932 if ( in_repeat == 0 ) {
933 /* Just an escaped RUNCHAR value */
Victor Stinnerf9c9a3f2015-10-14 15:20:07 +0200934 *out_data++ = RUNCHAR;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000935 } else {
936 /* Pick up value and output a sequence of it */
937 in_byte = out_data[-1];
Victor Stinnerf9c9a3f2015-10-14 15:20:07 +0200938
939 /* enlarge the buffer if needed */
940 if (in_repeat > 1) {
941 /* -1 because we already preallocated 1 byte */
942 out_data = _PyBytesWriter_Prepare(&writer, out_data,
943 in_repeat - 1);
944 if (out_data == NULL)
945 goto error;
946 }
947
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000948 while ( --in_repeat > 0 )
Victor Stinnerf9c9a3f2015-10-14 15:20:07 +0200949 *out_data++ = in_byte;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000950 }
951 } else {
952 /* Normal byte */
Victor Stinnerf9c9a3f2015-10-14 15:20:07 +0200953 *out_data++ = in_byte;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000954 }
955 }
Victor Stinnereaaaf132015-10-13 10:51:47 +0200956 return _PyBytesWriter_Finish(&writer, out_data);
957
958error:
959 _PyBytesWriter_Dealloc(&writer);
960 return NULL;
Jack Jansen72781191995-08-07 14:34:15 +0000961}
962
Jack Jansen72781191995-08-07 14:34:15 +0000963
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +0200964/*[clinic input]
Serhiy Storchaka2ef7c472015-04-20 09:26:49 +0300965binascii.crc_hqx -> unsigned_int
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +0200966
967 data: Py_buffer
Serhiy Storchaka2ef7c472015-04-20 09:26:49 +0300968 crc: unsigned_int(bitwise=True)
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +0200969 /
970
Martin Panter3310e142016-12-24 07:36:44 +0000971Compute CRC-CCITT incrementally.
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +0200972[clinic start generated code]*/
973
Serhiy Storchaka2ef7c472015-04-20 09:26:49 +0300974static unsigned int
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +0300975binascii_crc_hqx_impl(PyObject *module, Py_buffer *data, unsigned int crc)
Martin Panter3310e142016-12-24 07:36:44 +0000976/*[clinic end generated code: output=8ec2a78590d19170 input=f18240ff8c705b79]*/
Jack Jansen72781191995-08-07 14:34:15 +0000977{
Serhiy Storchakaef1585e2015-12-25 20:01:53 +0200978 const unsigned char *bin_data;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000979 Py_ssize_t len;
Tim Peters934c1a12002-07-02 22:24:50 +0000980
Serhiy Storchaka2ef7c472015-04-20 09:26:49 +0300981 crc &= 0xffff;
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +0200982 bin_data = data->buf;
983 len = data->len;
Jack Jansen72781191995-08-07 14:34:15 +0000984
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000985 while(len-- > 0) {
Serhiy Storchaka2ef7c472015-04-20 09:26:49 +0300986 crc = ((crc<<8)&0xff00) ^ crctab_hqx[(crc>>8)^*bin_data++];
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000987 }
Jack Jansen72781191995-08-07 14:34:15 +0000988
Serhiy Storchaka2ef7c472015-04-20 09:26:49 +0300989 return crc;
Jack Jansen72781191995-08-07 14:34:15 +0000990}
991
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +0200992#ifndef USE_ZLIB_CRC32
Guido van Rossum7d47c9e2000-02-16 21:11:52 +0000993/* Crc - 32 BIT ANSI X3.66 CRC checksum files
994 Also known as: ISO 3307
995**********************************************************************|
996* *|
997* Demonstration program to compute the 32-bit CRC used as the frame *|
998* check sequence in ADCCP (ANSI X3.66, also known as FIPS PUB 71 *|
999* and FED-STD-1003, the U.S. versions of CCITT's X.25 link-level *|
1000* protocol). The 32-bit FCS was added via the Federal Register, *|
1001* 1 June 1982, p.23798. I presume but don't know for certain that *|
1002* this polynomial is or will be included in CCITT V.41, which *|
1003* defines the 16-bit CRC (often called CRC-CCITT) polynomial. FIPS *|
1004* PUB 78 says that the 32-bit FCS reduces otherwise undetected *|
1005* errors by a factor of 10^-5 over 16-bit FCS. *|
1006* *|
1007**********************************************************************|
1008
1009 Copyright (C) 1986 Gary S. Brown. You may use this program, or
1010 code or tables extracted from it, as desired without restriction.
Guido van Rossum7d47c9e2000-02-16 21:11:52 +00001011
Tim Peters934c1a12002-07-02 22:24:50 +00001012 First, the polynomial itself and its table of feedback terms. The
1013 polynomial is
1014 X^32+X^26+X^23+X^22+X^16+X^12+X^11+X^10+X^8+X^7+X^5+X^4+X^2+X^1+X^0
1015 Note that we take it "backwards" and put the highest-order term in
1016 the lowest-order bit. The X^32 term is "implied"; the LSB is the
1017 X^31 term, etc. The X^0 term (usually shown as "+1") results in
1018 the MSB being 1.
Guido van Rossum7d47c9e2000-02-16 21:11:52 +00001019
Tim Peters934c1a12002-07-02 22:24:50 +00001020 Note that the usual hardware shift register implementation, which
1021 is what we're using (we're merely optimizing it by doing eight-bit
1022 chunks at a time) shifts bits into the lowest-order term. In our
1023 implementation, that means shifting towards the right. Why do we
1024 do it this way? Because the calculated CRC must be transmitted in
1025 order from highest-order term to lowest-order term. UARTs transmit
1026 characters in order from LSB to MSB. By storing the CRC this way,
1027 we hand it to the UART in the order low-byte to high-byte; the UART
1028 sends each low-bit to hight-bit; and the result is transmission bit
1029 by bit from highest- to lowest-order term without requiring any bit
1030 shuffling on our part. Reception works similarly.
1031
1032 The feedback terms table consists of 256, 32-bit entries. Notes:
1033
1034 1. The table can be generated at runtime if desired; code to do so
1035 is shown later. It might not be obvious, but the feedback
1036 terms simply represent the results of eight shift/xor opera-
1037 tions for all combinations of data and CRC register values.
1038
1039 2. The CRC accumulation logic is the same for all CRC polynomials,
1040 be they sixteen or thirty-two bits wide. You simply choose the
1041 appropriate table. Alternatively, because the table can be
1042 generated at runtime, you can start by generating the table for
1043 the polynomial in question and use exactly the same "updcrc",
1044 if your application needn't simultaneously handle two CRC
1045 polynomials. (Note, however, that XMODEM is strange.)
1046
1047 3. For 16-bit CRCs, the table entries need be only 16 bits wide;
1048 of course, 32-bit entries work OK if the high 16 bits are zero.
1049
1050 4. The values must be right-shifted by eight bits by the "updcrc"
1051 logic; the shift must be unsigned (bring in zeroes). On some
1052 hardware you could probably optimize the shift in assembler by
1053 using byte-swap instructions.
Guido van Rossum7d47c9e2000-02-16 21:11:52 +00001054********************************************************************/
1055
Serhiy Storchaka2d06e842015-12-25 19:53:18 +02001056static const unsigned int crc_32_tab[256] = {
Gregory P. Smith3c0e4d22008-03-25 07:51:12 +000010570x00000000U, 0x77073096U, 0xee0e612cU, 0x990951baU, 0x076dc419U,
10580x706af48fU, 0xe963a535U, 0x9e6495a3U, 0x0edb8832U, 0x79dcb8a4U,
10590xe0d5e91eU, 0x97d2d988U, 0x09b64c2bU, 0x7eb17cbdU, 0xe7b82d07U,
10600x90bf1d91U, 0x1db71064U, 0x6ab020f2U, 0xf3b97148U, 0x84be41deU,
10610x1adad47dU, 0x6ddde4ebU, 0xf4d4b551U, 0x83d385c7U, 0x136c9856U,
10620x646ba8c0U, 0xfd62f97aU, 0x8a65c9ecU, 0x14015c4fU, 0x63066cd9U,
10630xfa0f3d63U, 0x8d080df5U, 0x3b6e20c8U, 0x4c69105eU, 0xd56041e4U,
10640xa2677172U, 0x3c03e4d1U, 0x4b04d447U, 0xd20d85fdU, 0xa50ab56bU,
10650x35b5a8faU, 0x42b2986cU, 0xdbbbc9d6U, 0xacbcf940U, 0x32d86ce3U,
10660x45df5c75U, 0xdcd60dcfU, 0xabd13d59U, 0x26d930acU, 0x51de003aU,
10670xc8d75180U, 0xbfd06116U, 0x21b4f4b5U, 0x56b3c423U, 0xcfba9599U,
10680xb8bda50fU, 0x2802b89eU, 0x5f058808U, 0xc60cd9b2U, 0xb10be924U,
10690x2f6f7c87U, 0x58684c11U, 0xc1611dabU, 0xb6662d3dU, 0x76dc4190U,
10700x01db7106U, 0x98d220bcU, 0xefd5102aU, 0x71b18589U, 0x06b6b51fU,
10710x9fbfe4a5U, 0xe8b8d433U, 0x7807c9a2U, 0x0f00f934U, 0x9609a88eU,
10720xe10e9818U, 0x7f6a0dbbU, 0x086d3d2dU, 0x91646c97U, 0xe6635c01U,
10730x6b6b51f4U, 0x1c6c6162U, 0x856530d8U, 0xf262004eU, 0x6c0695edU,
10740x1b01a57bU, 0x8208f4c1U, 0xf50fc457U, 0x65b0d9c6U, 0x12b7e950U,
10750x8bbeb8eaU, 0xfcb9887cU, 0x62dd1ddfU, 0x15da2d49U, 0x8cd37cf3U,
10760xfbd44c65U, 0x4db26158U, 0x3ab551ceU, 0xa3bc0074U, 0xd4bb30e2U,
10770x4adfa541U, 0x3dd895d7U, 0xa4d1c46dU, 0xd3d6f4fbU, 0x4369e96aU,
10780x346ed9fcU, 0xad678846U, 0xda60b8d0U, 0x44042d73U, 0x33031de5U,
10790xaa0a4c5fU, 0xdd0d7cc9U, 0x5005713cU, 0x270241aaU, 0xbe0b1010U,
10800xc90c2086U, 0x5768b525U, 0x206f85b3U, 0xb966d409U, 0xce61e49fU,
10810x5edef90eU, 0x29d9c998U, 0xb0d09822U, 0xc7d7a8b4U, 0x59b33d17U,
10820x2eb40d81U, 0xb7bd5c3bU, 0xc0ba6cadU, 0xedb88320U, 0x9abfb3b6U,
10830x03b6e20cU, 0x74b1d29aU, 0xead54739U, 0x9dd277afU, 0x04db2615U,
10840x73dc1683U, 0xe3630b12U, 0x94643b84U, 0x0d6d6a3eU, 0x7a6a5aa8U,
10850xe40ecf0bU, 0x9309ff9dU, 0x0a00ae27U, 0x7d079eb1U, 0xf00f9344U,
10860x8708a3d2U, 0x1e01f268U, 0x6906c2feU, 0xf762575dU, 0x806567cbU,
10870x196c3671U, 0x6e6b06e7U, 0xfed41b76U, 0x89d32be0U, 0x10da7a5aU,
10880x67dd4accU, 0xf9b9df6fU, 0x8ebeeff9U, 0x17b7be43U, 0x60b08ed5U,
10890xd6d6a3e8U, 0xa1d1937eU, 0x38d8c2c4U, 0x4fdff252U, 0xd1bb67f1U,
10900xa6bc5767U, 0x3fb506ddU, 0x48b2364bU, 0xd80d2bdaU, 0xaf0a1b4cU,
10910x36034af6U, 0x41047a60U, 0xdf60efc3U, 0xa867df55U, 0x316e8eefU,
10920x4669be79U, 0xcb61b38cU, 0xbc66831aU, 0x256fd2a0U, 0x5268e236U,
10930xcc0c7795U, 0xbb0b4703U, 0x220216b9U, 0x5505262fU, 0xc5ba3bbeU,
10940xb2bd0b28U, 0x2bb45a92U, 0x5cb36a04U, 0xc2d7ffa7U, 0xb5d0cf31U,
10950x2cd99e8bU, 0x5bdeae1dU, 0x9b64c2b0U, 0xec63f226U, 0x756aa39cU,
10960x026d930aU, 0x9c0906a9U, 0xeb0e363fU, 0x72076785U, 0x05005713U,
10970x95bf4a82U, 0xe2b87a14U, 0x7bb12baeU, 0x0cb61b38U, 0x92d28e9bU,
10980xe5d5be0dU, 0x7cdcefb7U, 0x0bdbdf21U, 0x86d3d2d4U, 0xf1d4e242U,
10990x68ddb3f8U, 0x1fda836eU, 0x81be16cdU, 0xf6b9265bU, 0x6fb077e1U,
11000x18b74777U, 0x88085ae6U, 0xff0f6a70U, 0x66063bcaU, 0x11010b5cU,
11010x8f659effU, 0xf862ae69U, 0x616bffd3U, 0x166ccf45U, 0xa00ae278U,
11020xd70dd2eeU, 0x4e048354U, 0x3903b3c2U, 0xa7672661U, 0xd06016f7U,
11030x4969474dU, 0x3e6e77dbU, 0xaed16a4aU, 0xd9d65adcU, 0x40df0b66U,
11040x37d83bf0U, 0xa9bcae53U, 0xdebb9ec5U, 0x47b2cf7fU, 0x30b5ffe9U,
11050xbdbdf21cU, 0xcabac28aU, 0x53b39330U, 0x24b4a3a6U, 0xbad03605U,
11060xcdd70693U, 0x54de5729U, 0x23d967bfU, 0xb3667a2eU, 0xc4614ab8U,
11070x5d681b02U, 0x2a6f2b94U, 0xb40bbe37U, 0xc30c8ea1U, 0x5a05df1bU,
11080x2d02ef8dU
Guido van Rossum7d47c9e2000-02-16 21:11:52 +00001109};
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +02001110#endif /* USE_ZLIB_CRC32 */
Guido van Rossum7d47c9e2000-02-16 21:11:52 +00001111
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +02001112/*[clinic input]
1113binascii.crc32 -> unsigned_int
1114
1115 data: Py_buffer
1116 crc: unsigned_int(bitwise=True) = 0
1117 /
1118
1119Compute CRC-32 incrementally.
1120[clinic start generated code]*/
1121
1122static unsigned int
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +03001123binascii_crc32_impl(PyObject *module, Py_buffer *data, unsigned int crc)
1124/*[clinic end generated code: output=52cf59056a78593b input=bbe340bc99d25aa8]*/
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +02001125
1126#ifdef USE_ZLIB_CRC32
1127/* This was taken from zlibmodule.c PyZlib_crc32 (but is PY_SSIZE_T_CLEAN) */
1128{
Serhiy Storchakaef1585e2015-12-25 20:01:53 +02001129 const Byte *buf;
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +02001130 Py_ssize_t len;
1131 int signed_val;
1132
1133 buf = (Byte*)data->buf;
1134 len = data->len;
1135 signed_val = crc32(crc, buf, len);
1136 return (unsigned int)signed_val & 0xffffffffU;
1137}
1138#else /* USE_ZLIB_CRC32 */
Guido van Rossum7d47c9e2000-02-16 21:11:52 +00001139{ /* By Jim Ahlstrom; All rights transferred to CNRI */
Serhiy Storchakaef1585e2015-12-25 20:01:53 +02001140 const unsigned char *bin_data;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001141 Py_ssize_t len;
1142 unsigned int result;
Tim Peters934c1a12002-07-02 22:24:50 +00001143
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +02001144 bin_data = data->buf;
1145 len = data->len;
Guido van Rossum7d47c9e2000-02-16 21:11:52 +00001146
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001147 crc = ~ crc;
1148 while (len-- > 0) {
1149 crc = crc_32_tab[(crc ^ *bin_data++) & 0xff] ^ (crc >> 8);
1150 /* Note: (crc >> 8) MUST zero fill on left */
1151 }
Tim Petersa98011c2002-07-02 20:20:08 +00001152
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001153 result = (crc ^ 0xFFFFFFFF);
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +02001154 return result & 0xffffffff;
Guido van Rossum7d47c9e2000-02-16 21:11:52 +00001155}
Christian Heimes1dc54002008-03-24 02:19:29 +00001156#endif /* USE_ZLIB_CRC32 */
Guido van Rossum7d47c9e2000-02-16 21:11:52 +00001157
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +02001158/*[clinic input]
1159binascii.b2a_hex
1160
1161 data: Py_buffer
Gregory P. Smith0c2f9302019-05-29 11:46:58 -07001162 sep: object = NULL
1163 An optional single character or byte to separate hex bytes.
1164 bytes_per_sep: int = 1
1165 How many bytes between separators. Positive values count from the
1166 right, negative values count from the left.
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +02001167
1168Hexadecimal representation of binary data.
1169
1170The return value is a bytes object. This function is also
1171available as "hexlify()".
Gregory P. Smith0c2f9302019-05-29 11:46:58 -07001172
1173Example:
1174>>> binascii.b2a_hex(b'\xb9\x01\xef')
1175b'b901ef'
1176>>> binascii.hexlify(b'\xb9\x01\xef', ':')
1177b'b9:01:ef'
1178>>> binascii.b2a_hex(b'\xb9\x01\xef', b'_', 2)
1179b'b9_01ef'
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +02001180[clinic start generated code]*/
Barry Warsawe977c212000-08-15 06:07:13 +00001181
1182static PyObject *
Gregory P. Smith0c2f9302019-05-29 11:46:58 -07001183binascii_b2a_hex_impl(PyObject *module, Py_buffer *data, PyObject *sep,
1184 int bytes_per_sep)
1185/*[clinic end generated code: output=a26937946a81d2c7 input=ec0ade6ba2e43543]*/
Barry Warsawe977c212000-08-15 06:07:13 +00001186{
Gregory P. Smith0c2f9302019-05-29 11:46:58 -07001187 return _Py_strhex_bytes_with_sep((const char *)data->buf, data->len,
1188 sep, bytes_per_sep);
Barry Warsawe977c212000-08-15 06:07:13 +00001189}
1190
Zachary Wareb176d402015-01-20 13:59:46 -06001191/*[clinic input]
1192binascii.hexlify = binascii.b2a_hex
1193
1194Hexadecimal representation of binary data.
1195
Gregory P. Smith0c2f9302019-05-29 11:46:58 -07001196The return value is a bytes object. This function is also
1197available as "b2a_hex()".
Zachary Wareb176d402015-01-20 13:59:46 -06001198[clinic start generated code]*/
1199
1200static PyObject *
Gregory P. Smith0c2f9302019-05-29 11:46:58 -07001201binascii_hexlify_impl(PyObject *module, Py_buffer *data, PyObject *sep,
1202 int bytes_per_sep)
1203/*[clinic end generated code: output=d12aa1b001b15199 input=bc317bd4e241f76b]*/
Zachary Wareb176d402015-01-20 13:59:46 -06001204{
Gregory P. Smith0c2f9302019-05-29 11:46:58 -07001205 return _Py_strhex_bytes_with_sep((const char *)data->buf, data->len,
1206 sep, bytes_per_sep);
Zachary Wareb176d402015-01-20 13:59:46 -06001207}
Barry Warsawe977c212000-08-15 06:07:13 +00001208
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +02001209/*[clinic input]
1210binascii.a2b_hex
1211
1212 hexstr: ascii_buffer
1213 /
1214
1215Binary data of hexadecimal representation.
1216
1217hexstr must contain an even number of hex digits (upper or lower case).
1218This function is also available as "unhexlify()".
1219[clinic start generated code]*/
1220
Barry Warsawe977c212000-08-15 06:07:13 +00001221static PyObject *
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +03001222binascii_a2b_hex_impl(PyObject *module, Py_buffer *hexstr)
1223/*[clinic end generated code: output=0cc1a139af0eeecb input=9e1e7f2f94db24fd]*/
Barry Warsawe977c212000-08-15 06:07:13 +00001224{
Serhiy Storchakaef1585e2015-12-25 20:01:53 +02001225 const char* argbuf;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001226 Py_ssize_t arglen;
1227 PyObject *retval;
1228 char* retbuf;
1229 Py_ssize_t i, j;
Marcel Plch33e71e02019-05-22 13:51:26 +02001230 binascii_state *state;
Barry Warsawe977c212000-08-15 06:07:13 +00001231
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +02001232 argbuf = hexstr->buf;
1233 arglen = hexstr->len;
Barry Warsawe977c212000-08-15 06:07:13 +00001234
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001235 assert(arglen >= 0);
Amaury Forgeot d'Arc9c74b142008-06-18 00:47:36 +00001236
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001237 /* XXX What should we do about strings with an odd length? Should
1238 * we add an implicit leading zero, or a trailing zero? For now,
1239 * raise an exception.
1240 */
1241 if (arglen % 2) {
Marcel Plch33e71e02019-05-22 13:51:26 +02001242 state = PyModule_GetState(module);
1243 if (state == NULL) {
1244 return NULL;
1245 }
1246 PyErr_SetString(state->Error, "Odd-length string");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001247 return NULL;
1248 }
Barry Warsawe977c212000-08-15 06:07:13 +00001249
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001250 retval = PyBytes_FromStringAndSize(NULL, (arglen/2));
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +02001251 if (!retval)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001252 return NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001253 retbuf = PyBytes_AS_STRING(retval);
Barry Warsawe977c212000-08-15 06:07:13 +00001254
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001255 for (i=j=0; i < arglen; i += 2) {
Sergey Fedoseev6b5df902018-02-27 01:35:41 +05001256 unsigned int top = _PyLong_DigitValue[Py_CHARMASK(argbuf[i])];
1257 unsigned int bot = _PyLong_DigitValue[Py_CHARMASK(argbuf[i+1])];
1258 if (top >= 16 || bot >= 16) {
Marcel Plch33e71e02019-05-22 13:51:26 +02001259 state = PyModule_GetState(module);
1260 if (state == NULL) {
1261 return NULL;
1262 }
1263 PyErr_SetString(state->Error,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001264 "Non-hexadecimal digit found");
1265 goto finally;
1266 }
1267 retbuf[j++] = (top << 4) + bot;
1268 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001269 return retval;
Barry Warsawe977c212000-08-15 06:07:13 +00001270
1271 finally:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001272 Py_DECREF(retval);
1273 return NULL;
Barry Warsawe977c212000-08-15 06:07:13 +00001274}
1275
Zachary Wareb176d402015-01-20 13:59:46 -06001276/*[clinic input]
1277binascii.unhexlify = binascii.a2b_hex
1278
1279Binary data of hexadecimal representation.
1280
1281hexstr must contain an even number of hex digits (upper or lower case).
1282[clinic start generated code]*/
1283
1284static PyObject *
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +03001285binascii_unhexlify_impl(PyObject *module, Py_buffer *hexstr)
1286/*[clinic end generated code: output=51a64c06c79629e3 input=dd8c012725f462da]*/
Zachary Wareb176d402015-01-20 13:59:46 -06001287{
1288 return binascii_a2b_hex_impl(module, hexstr);
1289}
1290
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001291#define MAXLINESIZE 76
1292
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001293
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +02001294/*[clinic input]
1295binascii.a2b_qp
1296
Serhiy Storchaka12785612014-01-25 11:49:49 +02001297 data: ascii_buffer
Serhiy Storchaka202fda52017-03-12 10:10:47 +02001298 header: bool(accept={int}) = False
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +02001299
1300Decode a string of qp-encoded data.
1301[clinic start generated code]*/
1302
1303static PyObject *
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +03001304binascii_a2b_qp_impl(PyObject *module, Py_buffer *data, int header)
Serhiy Storchaka202fda52017-03-12 10:10:47 +02001305/*[clinic end generated code: output=e99f7846cfb9bc53 input=bf6766fea76cce8f]*/
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001306{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001307 Py_ssize_t in, out;
1308 char ch;
Serhiy Storchakaef1585e2015-12-25 20:01:53 +02001309 const unsigned char *ascii_data;
1310 unsigned char *odata;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001311 Py_ssize_t datalen = 0;
1312 PyObject *rv;
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001313
Serhiy Storchaka12785612014-01-25 11:49:49 +02001314 ascii_data = data->buf;
1315 datalen = data->len;
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001316
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001317 /* We allocate the output same size as input, this is overkill.
1318 * The previous implementation used calloc() so we'll zero out the
1319 * memory here too, since PyMem_Malloc() does not guarantee that.
1320 */
1321 odata = (unsigned char *) PyMem_Malloc(datalen);
1322 if (odata == NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001323 PyErr_NoMemory();
1324 return NULL;
1325 }
1326 memset(odata, 0, datalen);
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001327
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001328 in = out = 0;
1329 while (in < datalen) {
Serhiy Storchaka12785612014-01-25 11:49:49 +02001330 if (ascii_data[in] == '=') {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001331 in++;
1332 if (in >= datalen) break;
1333 /* Soft line breaks */
Serhiy Storchaka12785612014-01-25 11:49:49 +02001334 if ((ascii_data[in] == '\n') || (ascii_data[in] == '\r')) {
1335 if (ascii_data[in] != '\n') {
1336 while (in < datalen && ascii_data[in] != '\n') in++;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001337 }
1338 if (in < datalen) in++;
1339 }
Serhiy Storchaka12785612014-01-25 11:49:49 +02001340 else if (ascii_data[in] == '=') {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001341 /* broken case from broken python qp */
1342 odata[out++] = '=';
1343 in++;
1344 }
Serhiy Storchakae6265e92016-09-14 16:34:37 +03001345 else if ((in + 1 < datalen) &&
1346 ((ascii_data[in] >= 'A' && ascii_data[in] <= 'F') ||
Serhiy Storchaka12785612014-01-25 11:49:49 +02001347 (ascii_data[in] >= 'a' && ascii_data[in] <= 'f') ||
1348 (ascii_data[in] >= '0' && ascii_data[in] <= '9')) &&
1349 ((ascii_data[in+1] >= 'A' && ascii_data[in+1] <= 'F') ||
1350 (ascii_data[in+1] >= 'a' && ascii_data[in+1] <= 'f') ||
1351 (ascii_data[in+1] >= '0' && ascii_data[in+1] <= '9'))) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001352 /* hexval */
Sergey Fedoseev6b5df902018-02-27 01:35:41 +05001353 ch = _PyLong_DigitValue[ascii_data[in]] << 4;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001354 in++;
Sergey Fedoseev6b5df902018-02-27 01:35:41 +05001355 ch |= _PyLong_DigitValue[ascii_data[in]];
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001356 in++;
1357 odata[out++] = ch;
1358 }
1359 else {
1360 odata[out++] = '=';
1361 }
1362 }
Serhiy Storchaka12785612014-01-25 11:49:49 +02001363 else if (header && ascii_data[in] == '_') {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001364 odata[out++] = ' ';
1365 in++;
1366 }
1367 else {
Serhiy Storchaka12785612014-01-25 11:49:49 +02001368 odata[out] = ascii_data[in];
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001369 in++;
1370 out++;
1371 }
1372 }
1373 if ((rv = PyBytes_FromStringAndSize((char *)odata, out)) == NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001374 PyMem_Free(odata);
1375 return NULL;
1376 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001377 PyMem_Free(odata);
1378 return rv;
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001379}
1380
Tim Peters934c1a12002-07-02 22:24:50 +00001381static int
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001382to_hex (unsigned char ch, unsigned char *s)
1383{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001384 unsigned int uvalue = ch;
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001385
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001386 s[1] = "0123456789ABCDEF"[uvalue % 16];
1387 uvalue = (uvalue / 16);
1388 s[0] = "0123456789ABCDEF"[uvalue % 16];
1389 return 0;
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001390}
1391
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001392/* XXX: This is ridiculously complicated to be backward compatible
1393 * (mostly) with the quopri module. It doesn't re-create the quopri
1394 * module bug where text ending in CRLF has the CR encoded */
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +02001395
1396/*[clinic input]
1397binascii.b2a_qp
1398
1399 data: Py_buffer
Serhiy Storchaka202fda52017-03-12 10:10:47 +02001400 quotetabs: bool(accept={int}) = False
1401 istext: bool(accept={int}) = True
1402 header: bool(accept={int}) = False
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +02001403
1404Encode a string using quoted-printable encoding.
1405
1406On encoding, when istext is set, newlines are not encoded, and white
1407space at end of lines is. When istext is not set, \r and \n (CR/LF)
1408are both encoded. When quotetabs is set, space and tabs are encoded.
1409[clinic start generated code]*/
1410
1411static PyObject *
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +03001412binascii_b2a_qp_impl(PyObject *module, Py_buffer *data, int quotetabs,
Larry Hastings89964c42015-04-14 18:07:59 -04001413 int istext, int header)
Serhiy Storchaka202fda52017-03-12 10:10:47 +02001414/*[clinic end generated code: output=e9884472ebb1a94c input=21fb7eea4a184ba6]*/
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001415{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001416 Py_ssize_t in, out;
Serhiy Storchakaef1585e2015-12-25 20:01:53 +02001417 const unsigned char *databuf;
1418 unsigned char *odata;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001419 Py_ssize_t datalen = 0, odatalen = 0;
1420 PyObject *rv;
1421 unsigned int linelen = 0;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001422 unsigned char ch;
1423 int crlf = 0;
Serhiy Storchakaef1585e2015-12-25 20:01:53 +02001424 const unsigned char *p;
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001425
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +02001426 databuf = data->buf;
1427 datalen = data->len;
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001428
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001429 /* See if this string is using CRLF line ends */
1430 /* XXX: this function has the side effect of converting all of
1431 * the end of lines to be the same depending on this detection
1432 * here */
Serhiy Storchakaef1585e2015-12-25 20:01:53 +02001433 p = (const unsigned char *) memchr(databuf, '\n', datalen);
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +02001434 if ((p != NULL) && (p > databuf) && (*(p-1) == '\r'))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001435 crlf = 1;
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001436
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001437 /* First, scan to see how many characters need to be encoded */
1438 in = 0;
1439 while (in < datalen) {
Benjamin Peterson4f976512016-08-13 18:33:33 -07001440 Py_ssize_t delta = 0;
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +02001441 if ((databuf[in] > 126) ||
1442 (databuf[in] == '=') ||
1443 (header && databuf[in] == '_') ||
1444 ((databuf[in] == '.') && (linelen == 0) &&
Serhiy Storchakae6265e92016-09-14 16:34:37 +03001445 (in + 1 == datalen || databuf[in+1] == '\n' ||
1446 databuf[in+1] == '\r' || databuf[in+1] == 0)) ||
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +02001447 (!istext && ((databuf[in] == '\r') || (databuf[in] == '\n'))) ||
1448 ((databuf[in] == '\t' || databuf[in] == ' ') && (in + 1 == datalen)) ||
1449 ((databuf[in] < 33) &&
1450 (databuf[in] != '\r') && (databuf[in] != '\n') &&
1451 (quotetabs || ((databuf[in] != '\t') && (databuf[in] != ' ')))))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001452 {
1453 if ((linelen + 3) >= MAXLINESIZE) {
1454 linelen = 0;
1455 if (crlf)
Benjamin Peterson4f976512016-08-13 18:33:33 -07001456 delta += 3;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001457 else
Benjamin Peterson4f976512016-08-13 18:33:33 -07001458 delta += 2;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001459 }
1460 linelen += 3;
Benjamin Peterson4f976512016-08-13 18:33:33 -07001461 delta += 3;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001462 in++;
1463 }
1464 else {
1465 if (istext &&
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +02001466 ((databuf[in] == '\n') ||
1467 ((in+1 < datalen) && (databuf[in] == '\r') &&
1468 (databuf[in+1] == '\n'))))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001469 {
1470 linelen = 0;
1471 /* Protect against whitespace on end of line */
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +02001472 if (in && ((databuf[in-1] == ' ') || (databuf[in-1] == '\t')))
Benjamin Peterson4f976512016-08-13 18:33:33 -07001473 delta += 2;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001474 if (crlf)
Benjamin Peterson4f976512016-08-13 18:33:33 -07001475 delta += 2;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001476 else
Benjamin Peterson4f976512016-08-13 18:33:33 -07001477 delta += 1;
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +02001478 if (databuf[in] == '\r')
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001479 in += 2;
1480 else
1481 in++;
1482 }
1483 else {
1484 if ((in + 1 != datalen) &&
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +02001485 (databuf[in+1] != '\n') &&
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001486 (linelen + 1) >= MAXLINESIZE) {
1487 linelen = 0;
1488 if (crlf)
Benjamin Peterson4f976512016-08-13 18:33:33 -07001489 delta += 3;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001490 else
Benjamin Peterson4f976512016-08-13 18:33:33 -07001491 delta += 2;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001492 }
1493 linelen++;
Benjamin Peterson4f976512016-08-13 18:33:33 -07001494 delta++;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001495 in++;
1496 }
1497 }
Benjamin Peterson4f976512016-08-13 18:33:33 -07001498 if (PY_SSIZE_T_MAX - delta < odatalen) {
Benjamin Peterson4f976512016-08-13 18:33:33 -07001499 PyErr_NoMemory();
1500 return NULL;
1501 }
1502 odatalen += delta;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001503 }
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001504
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001505 /* We allocate the output same size as input, this is overkill.
1506 * The previous implementation used calloc() so we'll zero out the
1507 * memory here too, since PyMem_Malloc() does not guarantee that.
1508 */
1509 odata = (unsigned char *) PyMem_Malloc(odatalen);
1510 if (odata == NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001511 PyErr_NoMemory();
1512 return NULL;
1513 }
1514 memset(odata, 0, odatalen);
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001515
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001516 in = out = linelen = 0;
1517 while (in < datalen) {
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +02001518 if ((databuf[in] > 126) ||
1519 (databuf[in] == '=') ||
1520 (header && databuf[in] == '_') ||
1521 ((databuf[in] == '.') && (linelen == 0) &&
Serhiy Storchakae6265e92016-09-14 16:34:37 +03001522 (in + 1 == datalen || databuf[in+1] == '\n' ||
1523 databuf[in+1] == '\r' || databuf[in+1] == 0)) ||
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +02001524 (!istext && ((databuf[in] == '\r') || (databuf[in] == '\n'))) ||
1525 ((databuf[in] == '\t' || databuf[in] == ' ') && (in + 1 == datalen)) ||
1526 ((databuf[in] < 33) &&
1527 (databuf[in] != '\r') && (databuf[in] != '\n') &&
Serhiy Storchakae6265e92016-09-14 16:34:37 +03001528 (quotetabs || ((databuf[in] != '\t') && (databuf[in] != ' ')))))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001529 {
1530 if ((linelen + 3 )>= MAXLINESIZE) {
1531 odata[out++] = '=';
1532 if (crlf) odata[out++] = '\r';
1533 odata[out++] = '\n';
1534 linelen = 0;
1535 }
1536 odata[out++] = '=';
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +02001537 to_hex(databuf[in], &odata[out]);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001538 out += 2;
1539 in++;
1540 linelen += 3;
1541 }
1542 else {
1543 if (istext &&
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +02001544 ((databuf[in] == '\n') ||
1545 ((in+1 < datalen) && (databuf[in] == '\r') &&
1546 (databuf[in+1] == '\n'))))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001547 {
1548 linelen = 0;
1549 /* Protect against whitespace on end of line */
1550 if (out && ((odata[out-1] == ' ') || (odata[out-1] == '\t'))) {
1551 ch = odata[out-1];
1552 odata[out-1] = '=';
1553 to_hex(ch, &odata[out]);
1554 out += 2;
1555 }
Tim Peters934c1a12002-07-02 22:24:50 +00001556
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001557 if (crlf) odata[out++] = '\r';
1558 odata[out++] = '\n';
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +02001559 if (databuf[in] == '\r')
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001560 in += 2;
1561 else
1562 in++;
1563 }
1564 else {
1565 if ((in + 1 != datalen) &&
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +02001566 (databuf[in+1] != '\n') &&
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001567 (linelen + 1) >= MAXLINESIZE) {
1568 odata[out++] = '=';
1569 if (crlf) odata[out++] = '\r';
1570 odata[out++] = '\n';
1571 linelen = 0;
1572 }
1573 linelen++;
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +02001574 if (header && databuf[in] == ' ') {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001575 odata[out++] = '_';
1576 in++;
1577 }
1578 else {
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +02001579 odata[out++] = databuf[in++];
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001580 }
1581 }
1582 }
1583 }
1584 if ((rv = PyBytes_FromStringAndSize((char *)odata, out)) == NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001585 PyMem_Free(odata);
1586 return NULL;
1587 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001588 PyMem_Free(odata);
1589 return rv;
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001590}
Barry Warsawe977c212000-08-15 06:07:13 +00001591
Jack Jansen72781191995-08-07 14:34:15 +00001592/* List of functions defined in the module */
1593
1594static struct PyMethodDef binascii_module_methods[] = {
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +02001595 BINASCII_A2B_UU_METHODDEF
1596 BINASCII_B2A_UU_METHODDEF
1597 BINASCII_A2B_BASE64_METHODDEF
1598 BINASCII_B2A_BASE64_METHODDEF
1599 BINASCII_A2B_HQX_METHODDEF
1600 BINASCII_B2A_HQX_METHODDEF
1601 BINASCII_A2B_HEX_METHODDEF
1602 BINASCII_B2A_HEX_METHODDEF
Zachary Wareb176d402015-01-20 13:59:46 -06001603 BINASCII_HEXLIFY_METHODDEF
1604 BINASCII_UNHEXLIFY_METHODDEF
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +02001605 BINASCII_RLECODE_HQX_METHODDEF
1606 BINASCII_RLEDECODE_HQX_METHODDEF
1607 BINASCII_CRC_HQX_METHODDEF
1608 BINASCII_CRC32_METHODDEF
1609 BINASCII_A2B_QP_METHODDEF
1610 BINASCII_B2A_QP_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001611 {NULL, NULL} /* sentinel */
Jack Jansen72781191995-08-07 14:34:15 +00001612};
1613
1614
Martin v. Löwis1a214512008-06-11 05:26:20 +00001615/* Initialization function for the module (*must* be called PyInit_binascii) */
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001616PyDoc_STRVAR(doc_binascii, "Conversion between binary data and ASCII");
Jack Jansen72781191995-08-07 14:34:15 +00001617
Marcel Plch33e71e02019-05-22 13:51:26 +02001618static int
1619binascii_exec(PyObject *m) {
1620 int result;
1621 binascii_state *state = PyModule_GetState(m);
1622 if (state == NULL) {
1623 return -1;
1624 }
1625
1626 state->Error = PyErr_NewException("binascii.Error", PyExc_ValueError, NULL);
1627 if (state->Error == NULL) {
1628 return -1;
1629 }
1630 result = PyModule_AddObject(m, "Error", state->Error);
1631 if (result == -1) {
1632 return -1;
1633 }
1634
1635 state->Incomplete = PyErr_NewException("binascii.Incomplete", NULL, NULL);
1636 if (state->Incomplete == NULL) {
1637 return -1;
1638 }
1639 result = PyModule_AddObject(m, "Incomplete", state->Incomplete);
1640 if (result == -1) {
1641 return -1;
1642 }
1643
1644 return 0;
1645}
1646
1647static PyModuleDef_Slot binascii_slots[] = {
1648 {Py_mod_exec, binascii_exec},
1649 {0, NULL}
1650};
Martin v. Löwis1a214512008-06-11 05:26:20 +00001651
1652static struct PyModuleDef binasciimodule = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001653 PyModuleDef_HEAD_INIT,
1654 "binascii",
1655 doc_binascii,
Marcel Plch33e71e02019-05-22 13:51:26 +02001656 sizeof(binascii_state),
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001657 binascii_module_methods,
Marcel Plch33e71e02019-05-22 13:51:26 +02001658 binascii_slots,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001659 NULL,
1660 NULL,
1661 NULL
Martin v. Löwis1a214512008-06-11 05:26:20 +00001662};
1663
Mark Hammondfe51c6d2002-08-02 02:27:13 +00001664PyMODINIT_FUNC
Martin v. Löwis1a214512008-06-11 05:26:20 +00001665PyInit_binascii(void)
Jack Jansen72781191995-08-07 14:34:15 +00001666{
Marcel Plch33e71e02019-05-22 13:51:26 +02001667 return PyModuleDef_Init(&binasciimodule);
Jack Jansen72781191995-08-07 14:34:15 +00001668}