blob: c13bed6bbfdcaebbdcb993c62286463367263f84 [file] [log] [blame]
Jack Jansen72781191995-08-07 14:34:15 +00001/*
2** Routines to represent binary data in ASCII and vice-versa
3**
4** This module currently supports the following encodings:
5** uuencode:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00006** each line encodes 45 bytes (except possibly the last)
7** First char encodes (binary) length, rest data
8** each char encodes 6 bits, as follows:
9** binary: 01234567 abcdefgh ijklmnop
10** ascii: 012345 67abcd efghij klmnop
11** ASCII encoding method is "excess-space": 000000 is encoded as ' ', etc.
12** short binary data is zero-extended (so the bits are always in the
13** right place), this does *not* reflect in the length.
Jack Jansen84bbc2e1995-10-04 16:38:44 +000014** base64:
15** Line breaks are insignificant, but lines are at most 76 chars
16** each char encodes 6 bits, in similar order as uucode/hqx. Encoding
17** is done via a table.
18** Short binary data is filled (in ASCII) with '='.
Jack Jansen72781191995-08-07 14:34:15 +000019** hqx:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000020** File starts with introductory text, real data starts and ends
21** with colons.
22** Data consists of three similar parts: info, datafork, resourcefork.
23** Each part is protected (at the end) with a 16-bit crc
24** The binary data is run-length encoded, and then ascii-fied:
25** binary: 01234567 abcdefgh ijklmnop
26** ascii: 012345 67abcd efghij klmnop
27** ASCII encoding is table-driven, see the code.
28** Short binary data results in the runt ascii-byte being output with
29** the bits in the right place.
Jack Jansen72781191995-08-07 14:34:15 +000030**
31** While I was reading dozens of programs that encode or decode the formats
32** here (documentation? hihi:-) I have formulated Jansen's Observation:
33**
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000034** Programs that encode binary data in ASCII are written in
35** such a style that they are as unreadable as possible. Devices used
36** include unnecessary global variables, burying important tables
37** in unrelated sourcefiles, putting functions in include files,
38** using seemingly-descriptive variable names for different purposes,
39** calls to empty subroutines and a host of others.
Jack Jansen72781191995-08-07 14:34:15 +000040**
41** I have attempted to break with this tradition, but I guess that that
42** does make the performance sub-optimal. Oh well, too bad...
43**
44** Jack Jansen, CWI, July 1995.
Tim Peters934c1a12002-07-02 22:24:50 +000045**
Martin v. Löwis16dc7f42001-09-30 20:32:11 +000046** Added support for quoted-printable encoding, based on rfc 1521 et al
Tim Peters934c1a12002-07-02 22:24:50 +000047** quoted-printable encoding specifies that non printable characters (anything
Martin v. Löwis16dc7f42001-09-30 20:32:11 +000048** below 32 and above 126) be encoded as =XX where XX is the hexadecimal value
49** of the character. It also specifies some other behavior to enable 8bit data
Tim Peters934c1a12002-07-02 22:24:50 +000050** in a mail message with little difficulty (maximum line sizes, protecting
51** some cases of whitespace, etc).
Martin v. Löwis16dc7f42001-09-30 20:32:11 +000052**
53** Brandon Long, September 2001.
Jack Jansen72781191995-08-07 14:34:15 +000054*/
55
Thomas Wouters9c544482006-03-01 21:59:44 +000056#define PY_SSIZE_T_CLEAN
Jack Jansen72781191995-08-07 14:34:15 +000057
58#include "Python.h"
Gregory P. Smith9c6b9162015-04-26 00:42:13 +000059#include "pystrhex.h"
Christian Heimes1dc54002008-03-24 02:19:29 +000060#ifdef USE_ZLIB_CRC32
61#include "zlib.h"
62#endif
Jack Jansen72781191995-08-07 14:34:15 +000063
64static PyObject *Error;
65static PyObject *Incomplete;
66
67/*
68** hqx lookup table, ascii->binary.
69*/
70
71#define RUNCHAR 0x90
72
73#define DONE 0x7F
74#define SKIP 0x7E
75#define FAIL 0x7D
76
Serhiy Storchaka2d06e842015-12-25 19:53:18 +020077static const unsigned char table_a2b_hqx[256] = {
Jack Jansen72781191995-08-07 14:34:15 +000078/* ^@ ^A ^B ^C ^D ^E ^F ^G */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000079/* 0*/ FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
Jack Jansen72781191995-08-07 14:34:15 +000080/* \b \t \n ^K ^L \r ^N ^O */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000081/* 1*/ FAIL, FAIL, SKIP, FAIL, FAIL, SKIP, FAIL, FAIL,
Jack Jansen72781191995-08-07 14:34:15 +000082/* ^P ^Q ^R ^S ^T ^U ^V ^W */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000083/* 2*/ FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
Jack Jansen72781191995-08-07 14:34:15 +000084/* ^X ^Y ^Z ^[ ^\ ^] ^^ ^_ */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000085/* 3*/ FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
Jack Jansen72781191995-08-07 14:34:15 +000086/* ! " # $ % & ' */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000087/* 4*/ FAIL, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
Jack Jansen72781191995-08-07 14:34:15 +000088/* ( ) * + , - . / */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000089/* 5*/ 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, FAIL, FAIL,
Jack Jansen72781191995-08-07 14:34:15 +000090/* 0 1 2 3 4 5 6 7 */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000091/* 6*/ 0x0D, 0x0E, 0x0F, 0x10, 0x11, 0x12, 0x13, FAIL,
Jack Jansen72781191995-08-07 14:34:15 +000092/* 8 9 : ; < = > ? */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000093/* 7*/ 0x14, 0x15, DONE, FAIL, FAIL, FAIL, FAIL, FAIL,
Jack Jansen72781191995-08-07 14:34:15 +000094/* @ A B C D E F G */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000095/* 8*/ 0x16, 0x17, 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D,
Jack Jansen72781191995-08-07 14:34:15 +000096/* H I J K L M N O */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000097/* 9*/ 0x1E, 0x1F, 0x20, 0x21, 0x22, 0x23, 0x24, FAIL,
Jack Jansen72781191995-08-07 14:34:15 +000098/* P Q R S T U V W */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000099/*10*/ 0x25, 0x26, 0x27, 0x28, 0x29, 0x2A, 0x2B, FAIL,
Jack Jansen72781191995-08-07 14:34:15 +0000100/* X Y Z [ \ ] ^ _ */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000101/*11*/ 0x2C, 0x2D, 0x2E, 0x2F, FAIL, FAIL, FAIL, FAIL,
Jack Jansen72781191995-08-07 14:34:15 +0000102/* ` a b c d e f g */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000103/*12*/ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, FAIL,
Jack Jansen72781191995-08-07 14:34:15 +0000104/* h i j k l m n o */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000105/*13*/ 0x37, 0x38, 0x39, 0x3A, 0x3B, 0x3C, FAIL, FAIL,
Jack Jansen72781191995-08-07 14:34:15 +0000106/* p q r s t u v w */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000107/*14*/ 0x3D, 0x3E, 0x3F, FAIL, FAIL, FAIL, FAIL, FAIL,
Jack Jansen72781191995-08-07 14:34:15 +0000108/* x y z { | } ~ ^? */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000109/*15*/ FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
110/*16*/ FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
111 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
112 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
113 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
114 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
115 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
116 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
117 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
118 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
119 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
120 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
121 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
122 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
123 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
124 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
125 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
Jack Jansen72781191995-08-07 14:34:15 +0000126};
127
Serhiy Storchaka2d06e842015-12-25 19:53:18 +0200128static const unsigned char table_b2a_hqx[] =
Roger E. Masse5f4ce181997-01-16 17:10:22 +0000129"!\"#$%&'()*+,-012345689@ABCDEFGHIJKLMNPQRSTUVXYZ[`abcdefhijklmpqr";
Jack Jansen72781191995-08-07 14:34:15 +0000130
Serhiy Storchaka2d06e842015-12-25 19:53:18 +0200131static const char table_a2b_base64[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000132 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
133 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
134 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,62, -1,-1,-1,63,
135 52,53,54,55, 56,57,58,59, 60,61,-1,-1, -1, 0,-1,-1, /* Note PAD->0 */
136 -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10, 11,12,13,14,
137 15,16,17,18, 19,20,21,22, 23,24,25,-1, -1,-1,-1,-1,
138 -1,26,27,28, 29,30,31,32, 33,34,35,36, 37,38,39,40,
139 41,42,43,44, 45,46,47,48, 49,50,51,-1, -1,-1,-1,-1
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000140};
141
142#define BASE64_PAD '='
Guido van Rossum355bc0c2001-10-30 03:00:52 +0000143
144/* Max binary chunk size; limited only by available memory */
Guido van Rossum0e225aa2007-05-22 20:24:57 +0000145#define BASE64_MAXBIN ((PY_SSIZE_T_MAX - 3) / 2)
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000146
Serhiy Storchaka2d06e842015-12-25 19:53:18 +0200147static const unsigned char table_b2a_base64[] =
Roger E. Masse5f4ce181997-01-16 17:10:22 +0000148"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000149
150
151
Serhiy Storchaka2d06e842015-12-25 19:53:18 +0200152static const unsigned short crctab_hqx[256] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000153 0x0000, 0x1021, 0x2042, 0x3063, 0x4084, 0x50a5, 0x60c6, 0x70e7,
154 0x8108, 0x9129, 0xa14a, 0xb16b, 0xc18c, 0xd1ad, 0xe1ce, 0xf1ef,
155 0x1231, 0x0210, 0x3273, 0x2252, 0x52b5, 0x4294, 0x72f7, 0x62d6,
156 0x9339, 0x8318, 0xb37b, 0xa35a, 0xd3bd, 0xc39c, 0xf3ff, 0xe3de,
157 0x2462, 0x3443, 0x0420, 0x1401, 0x64e6, 0x74c7, 0x44a4, 0x5485,
158 0xa56a, 0xb54b, 0x8528, 0x9509, 0xe5ee, 0xf5cf, 0xc5ac, 0xd58d,
159 0x3653, 0x2672, 0x1611, 0x0630, 0x76d7, 0x66f6, 0x5695, 0x46b4,
160 0xb75b, 0xa77a, 0x9719, 0x8738, 0xf7df, 0xe7fe, 0xd79d, 0xc7bc,
161 0x48c4, 0x58e5, 0x6886, 0x78a7, 0x0840, 0x1861, 0x2802, 0x3823,
162 0xc9cc, 0xd9ed, 0xe98e, 0xf9af, 0x8948, 0x9969, 0xa90a, 0xb92b,
163 0x5af5, 0x4ad4, 0x7ab7, 0x6a96, 0x1a71, 0x0a50, 0x3a33, 0x2a12,
164 0xdbfd, 0xcbdc, 0xfbbf, 0xeb9e, 0x9b79, 0x8b58, 0xbb3b, 0xab1a,
165 0x6ca6, 0x7c87, 0x4ce4, 0x5cc5, 0x2c22, 0x3c03, 0x0c60, 0x1c41,
166 0xedae, 0xfd8f, 0xcdec, 0xddcd, 0xad2a, 0xbd0b, 0x8d68, 0x9d49,
167 0x7e97, 0x6eb6, 0x5ed5, 0x4ef4, 0x3e13, 0x2e32, 0x1e51, 0x0e70,
168 0xff9f, 0xefbe, 0xdfdd, 0xcffc, 0xbf1b, 0xaf3a, 0x9f59, 0x8f78,
169 0x9188, 0x81a9, 0xb1ca, 0xa1eb, 0xd10c, 0xc12d, 0xf14e, 0xe16f,
170 0x1080, 0x00a1, 0x30c2, 0x20e3, 0x5004, 0x4025, 0x7046, 0x6067,
171 0x83b9, 0x9398, 0xa3fb, 0xb3da, 0xc33d, 0xd31c, 0xe37f, 0xf35e,
172 0x02b1, 0x1290, 0x22f3, 0x32d2, 0x4235, 0x5214, 0x6277, 0x7256,
173 0xb5ea, 0xa5cb, 0x95a8, 0x8589, 0xf56e, 0xe54f, 0xd52c, 0xc50d,
174 0x34e2, 0x24c3, 0x14a0, 0x0481, 0x7466, 0x6447, 0x5424, 0x4405,
175 0xa7db, 0xb7fa, 0x8799, 0x97b8, 0xe75f, 0xf77e, 0xc71d, 0xd73c,
176 0x26d3, 0x36f2, 0x0691, 0x16b0, 0x6657, 0x7676, 0x4615, 0x5634,
177 0xd94c, 0xc96d, 0xf90e, 0xe92f, 0x99c8, 0x89e9, 0xb98a, 0xa9ab,
178 0x5844, 0x4865, 0x7806, 0x6827, 0x18c0, 0x08e1, 0x3882, 0x28a3,
179 0xcb7d, 0xdb5c, 0xeb3f, 0xfb1e, 0x8bf9, 0x9bd8, 0xabbb, 0xbb9a,
180 0x4a75, 0x5a54, 0x6a37, 0x7a16, 0x0af1, 0x1ad0, 0x2ab3, 0x3a92,
181 0xfd2e, 0xed0f, 0xdd6c, 0xcd4d, 0xbdaa, 0xad8b, 0x9de8, 0x8dc9,
182 0x7c26, 0x6c07, 0x5c64, 0x4c45, 0x3ca2, 0x2c83, 0x1ce0, 0x0cc1,
183 0xef1f, 0xff3e, 0xcf5d, 0xdf7c, 0xaf9b, 0xbfba, 0x8fd9, 0x9ff8,
184 0x6e17, 0x7e36, 0x4e55, 0x5e74, 0x2e93, 0x3eb2, 0x0ed1, 0x1ef0,
Jack Jansen72781191995-08-07 14:34:15 +0000185};
186
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +0200187/*[clinic input]
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +0200188module binascii
189[clinic start generated code]*/
Serhiy Storchaka1009bf12015-04-03 23:53:51 +0300190/*[clinic end generated code: output=da39a3ee5e6b4b0d input=de89fb46bcaf3fec]*/
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +0200191
192/*[python input]
193
194class ascii_buffer_converter(CConverter):
195 type = 'Py_buffer'
196 converter = 'ascii_buffer_converter'
197 impl_by_reference = True
Benjamin Petersonb62deac2014-01-26 10:41:58 -0500198 c_default = "{NULL, NULL}"
199
200 def cleanup(self):
201 name = self.name
202 return "".join(["if (", name, ".obj)\n PyBuffer_Release(&", name, ");\n"])
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +0200203
204[python start generated code]*/
Larry Hastings581ee362014-01-28 05:00:08 -0800205/*[python end generated code: output=da39a3ee5e6b4b0d input=3eb7b63610da92cd]*/
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +0200206
Antoine Pitrou08316762011-12-20 13:58:41 +0100207static int
208ascii_buffer_converter(PyObject *arg, Py_buffer *buf)
209{
210 if (arg == NULL) {
211 PyBuffer_Release(buf);
212 return 1;
213 }
214 if (PyUnicode_Check(arg)) {
215 if (PyUnicode_READY(arg) < 0)
216 return 0;
217 if (!PyUnicode_IS_ASCII(arg)) {
218 PyErr_SetString(PyExc_ValueError,
219 "string argument should contain only ASCII characters");
220 return 0;
221 }
222 assert(PyUnicode_KIND(arg) == PyUnicode_1BYTE_KIND);
223 buf->buf = (void *) PyUnicode_1BYTE_DATA(arg);
224 buf->len = PyUnicode_GET_LENGTH(arg);
225 buf->obj = NULL;
226 return 1;
227 }
228 if (PyObject_GetBuffer(arg, buf, PyBUF_SIMPLE) != 0) {
229 PyErr_Format(PyExc_TypeError,
230 "argument should be bytes, buffer or ASCII string, "
Berker Peksag3cd30c22015-02-15 00:31:00 +0200231 "not '%.100s'", Py_TYPE(arg)->tp_name);
Antoine Pitrou08316762011-12-20 13:58:41 +0100232 return 0;
233 }
234 if (!PyBuffer_IsContiguous(buf, 'C')) {
235 PyErr_Format(PyExc_TypeError,
236 "argument should be a contiguous buffer, "
Berker Peksag3cd30c22015-02-15 00:31:00 +0200237 "not '%.100s'", Py_TYPE(arg)->tp_name);
Antoine Pitrou08316762011-12-20 13:58:41 +0100238 PyBuffer_Release(buf);
239 return 0;
240 }
241 return Py_CLEANUP_SUPPORTED;
242}
243
Larry Hastingsf256c222014-01-25 21:30:37 -0800244#include "clinic/binascii.c.h"
Antoine Pitrou08316762011-12-20 13:58:41 +0100245
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +0200246/*[clinic input]
247binascii.a2b_uu
248
Serhiy Storchaka12785612014-01-25 11:49:49 +0200249 data: ascii_buffer
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +0200250 /
251
252Decode a line of uuencoded data.
253[clinic start generated code]*/
Jack Jansen72781191995-08-07 14:34:15 +0000254
255static PyObject *
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +0300256binascii_a2b_uu_impl(PyObject *module, Py_buffer *data)
257/*[clinic end generated code: output=e027f8e0b0598742 input=7cafeaf73df63d1c]*/
Jack Jansen72781191995-08-07 14:34:15 +0000258{
Serhiy Storchakaef1585e2015-12-25 20:01:53 +0200259 const unsigned char *ascii_data;
260 unsigned char *bin_data;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000261 int leftbits = 0;
262 unsigned char this_ch;
263 unsigned int leftchar = 0;
264 PyObject *rv;
265 Py_ssize_t ascii_len, bin_len;
Tim Peters934c1a12002-07-02 22:24:50 +0000266
Serhiy Storchaka12785612014-01-25 11:49:49 +0200267 ascii_data = data->buf;
268 ascii_len = data->len;
Jack Jansen72781191995-08-07 14:34:15 +0000269
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000270 assert(ascii_len >= 0);
Amaury Forgeot d'Arc9c74b142008-06-18 00:47:36 +0000271
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000272 /* First byte: binary data length (in bytes) */
273 bin_len = (*ascii_data++ - ' ') & 077;
274 ascii_len--;
Jack Jansen72781191995-08-07 14:34:15 +0000275
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000276 /* Allocate the buffer */
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +0200277 if ( (rv=PyBytes_FromStringAndSize(NULL, bin_len)) == NULL )
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000278 return NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000279 bin_data = (unsigned char *)PyBytes_AS_STRING(rv);
Tim Peters934c1a12002-07-02 22:24:50 +0000280
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000281 for( ; bin_len > 0 ; ascii_len--, ascii_data++ ) {
282 /* XXX is it really best to add NULs if there's no more data */
283 this_ch = (ascii_len > 0) ? *ascii_data : 0;
284 if ( this_ch == '\n' || this_ch == '\r' || ascii_len <= 0) {
285 /*
286 ** Whitespace. Assume some spaces got eaten at
287 ** end-of-line. (We check this later)
288 */
289 this_ch = 0;
290 } else {
291 /* Check the character for legality
292 ** The 64 in stead of the expected 63 is because
293 ** there are a few uuencodes out there that use
294 ** '`' as zero instead of space.
295 */
296 if ( this_ch < ' ' || this_ch > (' ' + 64)) {
297 PyErr_SetString(Error, "Illegal char");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000298 Py_DECREF(rv);
299 return NULL;
300 }
301 this_ch = (this_ch - ' ') & 077;
302 }
303 /*
304 ** Shift it in on the low end, and see if there's
305 ** a byte ready for output.
306 */
307 leftchar = (leftchar << 6) | (this_ch);
308 leftbits += 6;
309 if ( leftbits >= 8 ) {
310 leftbits -= 8;
311 *bin_data++ = (leftchar >> leftbits) & 0xff;
312 leftchar &= ((1 << leftbits) - 1);
313 bin_len--;
314 }
315 }
316 /*
317 ** Finally, check that if there's anything left on the line
318 ** that it's whitespace only.
319 */
320 while( ascii_len-- > 0 ) {
321 this_ch = *ascii_data++;
322 /* Extra '`' may be written as padding in some cases */
323 if ( this_ch != ' ' && this_ch != ' '+64 &&
324 this_ch != '\n' && this_ch != '\r' ) {
325 PyErr_SetString(Error, "Trailing garbage");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000326 Py_DECREF(rv);
327 return NULL;
328 }
329 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000330 return rv;
Jack Jansen72781191995-08-07 14:34:15 +0000331}
332
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +0200333/*[clinic input]
334binascii.b2a_uu
335
336 data: Py_buffer
337 /
Xiang Zhang13f1f422017-05-03 11:16:21 +0800338 *
339 backtick: bool(accept={int}) = False
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +0200340
341Uuencode line of data.
342[clinic start generated code]*/
Tim Peters934c1a12002-07-02 22:24:50 +0000343
Jack Jansen72781191995-08-07 14:34:15 +0000344static PyObject *
Xiang Zhang13f1f422017-05-03 11:16:21 +0800345binascii_b2a_uu_impl(PyObject *module, Py_buffer *data, int backtick)
346/*[clinic end generated code: output=b1b99de62d9bbeb8 input=b26bc8d32b6ed2f6]*/
Jack Jansen72781191995-08-07 14:34:15 +0000347{
Serhiy Storchakaef1585e2015-12-25 20:01:53 +0200348 unsigned char *ascii_data;
349 const unsigned char *bin_data;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000350 int leftbits = 0;
351 unsigned char this_ch;
352 unsigned int leftchar = 0;
Victor Stinnereaaaf132015-10-13 10:51:47 +0200353 Py_ssize_t bin_len, out_len;
354 _PyBytesWriter writer;
Tim Peters934c1a12002-07-02 22:24:50 +0000355
Victor Stinnereaaaf132015-10-13 10:51:47 +0200356 _PyBytesWriter_Init(&writer);
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +0200357 bin_data = data->buf;
358 bin_len = data->len;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000359 if ( bin_len > 45 ) {
360 /* The 45 is a limit that appears in all uuencode's */
361 PyErr_SetString(Error, "At most 45 bytes at once");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000362 return NULL;
363 }
Jack Jansen72781191995-08-07 14:34:15 +0000364
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000365 /* We're lazy and allocate to much (fixed up later) */
Victor Stinnereaaaf132015-10-13 10:51:47 +0200366 out_len = 2 + (bin_len + 2) / 3 * 4;
367 ascii_data = _PyBytesWriter_Alloc(&writer, out_len);
368 if (ascii_data == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000369 return NULL;
Jack Jansen72781191995-08-07 14:34:15 +0000370
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000371 /* Store the length */
Xiang Zhang13f1f422017-05-03 11:16:21 +0800372 if (backtick && !bin_len)
373 *ascii_data++ = '`';
374 else
Segev Finer679b5662017-07-27 01:17:57 +0300375 *ascii_data++ = ' ' + (unsigned char)bin_len;
Tim Peters934c1a12002-07-02 22:24:50 +0000376
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000377 for( ; bin_len > 0 || leftbits != 0 ; bin_len--, bin_data++ ) {
378 /* Shift the data (or padding) into our buffer */
379 if ( bin_len > 0 ) /* Data */
380 leftchar = (leftchar << 8) | *bin_data;
381 else /* Padding */
382 leftchar <<= 8;
383 leftbits += 8;
Jack Jansen72781191995-08-07 14:34:15 +0000384
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000385 /* See if there are 6-bit groups ready */
386 while ( leftbits >= 6 ) {
387 this_ch = (leftchar >> (leftbits-6)) & 0x3f;
388 leftbits -= 6;
Xiang Zhang13f1f422017-05-03 11:16:21 +0800389 if (backtick && !this_ch)
390 *ascii_data++ = '`';
391 else
392 *ascii_data++ = this_ch + ' ';
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000393 }
394 }
395 *ascii_data++ = '\n'; /* Append a courtesy newline */
Tim Peters934c1a12002-07-02 22:24:50 +0000396
Victor Stinnereaaaf132015-10-13 10:51:47 +0200397 return _PyBytesWriter_Finish(&writer, ascii_data);
Jack Jansen72781191995-08-07 14:34:15 +0000398}
399
Guido van Rossum2db4f471999-10-19 19:05:14 +0000400
401static int
Serhiy Storchakaef1585e2015-12-25 20:01:53 +0200402binascii_find_valid(const unsigned char *s, Py_ssize_t slen, int num)
Guido van Rossum2db4f471999-10-19 19:05:14 +0000403{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000404 /* Finds & returns the (num+1)th
405 ** valid character for base64, or -1 if none.
406 */
Guido van Rossum2db4f471999-10-19 19:05:14 +0000407
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000408 int ret = -1;
409 unsigned char c, b64val;
Guido van Rossum2db4f471999-10-19 19:05:14 +0000410
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000411 while ((slen > 0) && (ret == -1)) {
412 c = *s;
413 b64val = table_a2b_base64[c & 0x7f];
414 if ( ((c <= 0x7f) && (b64val != (unsigned char)-1)) ) {
415 if (num == 0)
416 ret = *s;
417 num--;
418 }
Guido van Rossum2db4f471999-10-19 19:05:14 +0000419
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000420 s++;
421 slen--;
422 }
423 return ret;
Guido van Rossum2db4f471999-10-19 19:05:14 +0000424}
425
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +0200426/*[clinic input]
427binascii.a2b_base64
428
Serhiy Storchaka12785612014-01-25 11:49:49 +0200429 data: ascii_buffer
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +0200430 /
431
432Decode a line of base64 data.
433[clinic start generated code]*/
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000434
435static PyObject *
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +0300436binascii_a2b_base64_impl(PyObject *module, Py_buffer *data)
437/*[clinic end generated code: output=0628223f19fd3f9b input=5872acf6e1cac243]*/
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000438{
Serhiy Storchakaef1585e2015-12-25 20:01:53 +0200439 const unsigned char *ascii_data;
440 unsigned char *bin_data;
Miss Islington (bot)7e350812018-09-27 23:12:54 -0700441 unsigned char *bin_data_start;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000442 int leftbits = 0;
443 unsigned char this_ch;
444 unsigned int leftchar = 0;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000445 Py_ssize_t ascii_len, bin_len;
446 int quad_pos = 0;
Victor Stinnereaaaf132015-10-13 10:51:47 +0200447 _PyBytesWriter writer;
Tim Peters934c1a12002-07-02 22:24:50 +0000448
Serhiy Storchaka12785612014-01-25 11:49:49 +0200449 ascii_data = data->buf;
450 ascii_len = data->len;
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000451
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000452 assert(ascii_len >= 0);
Amaury Forgeot d'Arc9c74b142008-06-18 00:47:36 +0000453
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +0200454 if (ascii_len > PY_SSIZE_T_MAX - 3)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000455 return PyErr_NoMemory();
Amaury Forgeot d'Arc9c74b142008-06-18 00:47:36 +0000456
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000457 bin_len = ((ascii_len+3)/4)*3; /* Upper bound, corrected later */
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000458
Victor Stinnereaaaf132015-10-13 10:51:47 +0200459 _PyBytesWriter_Init(&writer);
460
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000461 /* Allocate the buffer */
Victor Stinnereaaaf132015-10-13 10:51:47 +0200462 bin_data = _PyBytesWriter_Alloc(&writer, bin_len);
463 if (bin_data == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000464 return NULL;
Miss Islington (bot)7e350812018-09-27 23:12:54 -0700465 bin_data_start = bin_data;
Guido van Rossum2db4f471999-10-19 19:05:14 +0000466
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000467 for( ; ascii_len > 0; ascii_len--, ascii_data++) {
468 this_ch = *ascii_data;
Guido van Rossum2db4f471999-10-19 19:05:14 +0000469
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000470 if (this_ch > 0x7f ||
471 this_ch == '\r' || this_ch == '\n' || this_ch == ' ')
472 continue;
Guido van Rossum2db4f471999-10-19 19:05:14 +0000473
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000474 /* Check for pad sequences and ignore
475 ** the invalid ones.
476 */
477 if (this_ch == BASE64_PAD) {
478 if ( (quad_pos < 2) ||
479 ((quad_pos == 2) &&
480 (binascii_find_valid(ascii_data, ascii_len, 1)
481 != BASE64_PAD)) )
482 {
483 continue;
484 }
485 else {
486 /* A pad sequence means no more input.
487 ** We've already interpreted the data
488 ** from the quad at this point.
489 */
490 leftbits = 0;
491 break;
492 }
493 }
Guido van Rossum2db4f471999-10-19 19:05:14 +0000494
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000495 this_ch = table_a2b_base64[*ascii_data];
496 if ( this_ch == (unsigned char) -1 )
497 continue;
Guido van Rossum2db4f471999-10-19 19:05:14 +0000498
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000499 /*
500 ** Shift it in on the low end, and see if there's
501 ** a byte ready for output.
502 */
503 quad_pos = (quad_pos + 1) & 0x03;
504 leftchar = (leftchar << 6) | (this_ch);
505 leftbits += 6;
Guido van Rossum2db4f471999-10-19 19:05:14 +0000506
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000507 if ( leftbits >= 8 ) {
508 leftbits -= 8;
509 *bin_data++ = (leftchar >> leftbits) & 0xff;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000510 leftchar &= ((1 << leftbits) - 1);
511 }
512 }
Guido van Rossum2db4f471999-10-19 19:05:14 +0000513
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000514 if (leftbits != 0) {
Miss Islington (bot)053d6c52018-06-10 14:37:14 -0700515 if (leftbits == 6) {
516 /*
517 ** There is exactly one extra valid, non-padding, base64 character.
518 ** This is an invalid length, as there is no possible input that
519 ** could encoded into such a base64 string.
520 */
Miss Islington (bot)7e350812018-09-27 23:12:54 -0700521 PyErr_Format(Error,
522 "Invalid base64-encoded string: "
Serhiy Storchaka783bed42019-03-14 10:47:27 +0200523 "number of data characters (%zd) cannot be 1 more "
Miss Islington (bot)7e350812018-09-27 23:12:54 -0700524 "than a multiple of 4",
525 (bin_data - bin_data_start) / 3 * 4 + 1);
Miss Islington (bot)053d6c52018-06-10 14:37:14 -0700526 } else {
527 PyErr_SetString(Error, "Incorrect padding");
528 }
Victor Stinnereaaaf132015-10-13 10:51:47 +0200529 _PyBytesWriter_Dealloc(&writer);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000530 return NULL;
531 }
Guido van Rossum2db4f471999-10-19 19:05:14 +0000532
Victor Stinnereaaaf132015-10-13 10:51:47 +0200533 return _PyBytesWriter_Finish(&writer, bin_data);
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000534}
535
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +0200536
537/*[clinic input]
538binascii.b2a_base64
539
540 data: Py_buffer
Xiang Zhang1374dbb2017-05-01 13:12:07 +0800541 /
Victor Stinnere84c9762015-10-11 11:01:02 +0200542 *
Serhiy Storchaka202fda52017-03-12 10:10:47 +0200543 newline: bool(accept={int}) = True
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +0200544
545Base64-code line of data.
546[clinic start generated code]*/
Tim Peters934c1a12002-07-02 22:24:50 +0000547
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000548static PyObject *
Serhiy Storchaka2954f832016-07-07 18:20:03 +0300549binascii_b2a_base64_impl(PyObject *module, Py_buffer *data, int newline)
Xiang Zhang1374dbb2017-05-01 13:12:07 +0800550/*[clinic end generated code: output=4ad62c8e8485d3b3 input=6083dac5777fa45d]*/
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000551{
Serhiy Storchakaef1585e2015-12-25 20:01:53 +0200552 unsigned char *ascii_data;
553 const unsigned char *bin_data;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000554 int leftbits = 0;
555 unsigned char this_ch;
556 unsigned int leftchar = 0;
Victor Stinnere84c9762015-10-11 11:01:02 +0200557 Py_ssize_t bin_len, out_len;
Victor Stinnereaaaf132015-10-13 10:51:47 +0200558 _PyBytesWriter writer;
Tim Peters934c1a12002-07-02 22:24:50 +0000559
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +0200560 bin_data = data->buf;
561 bin_len = data->len;
Victor Stinnereaaaf132015-10-13 10:51:47 +0200562 _PyBytesWriter_Init(&writer);
Amaury Forgeot d'Arc9c74b142008-06-18 00:47:36 +0000563
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000564 assert(bin_len >= 0);
Amaury Forgeot d'Arc9c74b142008-06-18 00:47:36 +0000565
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000566 if ( bin_len > BASE64_MAXBIN ) {
567 PyErr_SetString(Error, "Too much data for base64 line");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000568 return NULL;
569 }
Tim Peters934c1a12002-07-02 22:24:50 +0000570
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000571 /* We're lazy and allocate too much (fixed up later).
Victor Stinnere84c9762015-10-11 11:01:02 +0200572 "+2" leaves room for up to two pad characters.
573 Note that 'b' gets encoded as 'Yg==\n' (1 in, 5 out). */
574 out_len = bin_len*2 + 2;
575 if (newline)
576 out_len++;
Victor Stinnereaaaf132015-10-13 10:51:47 +0200577 ascii_data = _PyBytesWriter_Alloc(&writer, out_len);
578 if (ascii_data == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000579 return NULL;
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000580
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000581 for( ; bin_len > 0 ; bin_len--, bin_data++ ) {
582 /* Shift the data into our buffer */
583 leftchar = (leftchar << 8) | *bin_data;
584 leftbits += 8;
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000585
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000586 /* See if there are 6-bit groups ready */
587 while ( leftbits >= 6 ) {
588 this_ch = (leftchar >> (leftbits-6)) & 0x3f;
589 leftbits -= 6;
590 *ascii_data++ = table_b2a_base64[this_ch];
591 }
592 }
593 if ( leftbits == 2 ) {
594 *ascii_data++ = table_b2a_base64[(leftchar&3) << 4];
595 *ascii_data++ = BASE64_PAD;
596 *ascii_data++ = BASE64_PAD;
597 } else if ( leftbits == 4 ) {
598 *ascii_data++ = table_b2a_base64[(leftchar&0xf) << 2];
599 *ascii_data++ = BASE64_PAD;
600 }
Victor Stinnere84c9762015-10-11 11:01:02 +0200601 if (newline)
602 *ascii_data++ = '\n'; /* Append a courtesy newline */
Tim Peters934c1a12002-07-02 22:24:50 +0000603
Victor Stinnereaaaf132015-10-13 10:51:47 +0200604 return _PyBytesWriter_Finish(&writer, ascii_data);
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000605}
606
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +0200607/*[clinic input]
608binascii.a2b_hqx
609
Serhiy Storchaka12785612014-01-25 11:49:49 +0200610 data: ascii_buffer
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +0200611 /
612
613Decode .hqx coding.
614[clinic start generated code]*/
Jack Jansen72781191995-08-07 14:34:15 +0000615
616static PyObject *
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +0300617binascii_a2b_hqx_impl(PyObject *module, Py_buffer *data)
618/*[clinic end generated code: output=4d6d8c54d54ea1c1 input=0d914c680e0eed55]*/
Jack Jansen72781191995-08-07 14:34:15 +0000619{
Serhiy Storchakaef1585e2015-12-25 20:01:53 +0200620 const unsigned char *ascii_data;
621 unsigned char *bin_data;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000622 int leftbits = 0;
623 unsigned char this_ch;
624 unsigned int leftchar = 0;
Victor Stinnereaaaf132015-10-13 10:51:47 +0200625 PyObject *res;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000626 Py_ssize_t len;
627 int done = 0;
Victor Stinnereaaaf132015-10-13 10:51:47 +0200628 _PyBytesWriter writer;
Tim Peters934c1a12002-07-02 22:24:50 +0000629
Serhiy Storchaka12785612014-01-25 11:49:49 +0200630 ascii_data = data->buf;
631 len = data->len;
Victor Stinnereaaaf132015-10-13 10:51:47 +0200632 _PyBytesWriter_Init(&writer);
Jack Jansen72781191995-08-07 14:34:15 +0000633
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000634 assert(len >= 0);
Amaury Forgeot d'Arc9c74b142008-06-18 00:47:36 +0000635
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +0200636 if (len > PY_SSIZE_T_MAX - 2)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000637 return PyErr_NoMemory();
Amaury Forgeot d'Arc9c74b142008-06-18 00:47:36 +0000638
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000639 /* Allocate a string that is too big (fixed later)
640 Add two to the initial length to prevent interning which
641 would preclude subsequent resizing. */
Victor Stinnereaaaf132015-10-13 10:51:47 +0200642 bin_data = _PyBytesWriter_Alloc(&writer, len + 2);
643 if (bin_data == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000644 return NULL;
Jack Jansen72781191995-08-07 14:34:15 +0000645
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000646 for( ; len > 0 ; len--, ascii_data++ ) {
647 /* Get the byte and look it up */
648 this_ch = table_a2b_hqx[*ascii_data];
649 if ( this_ch == SKIP )
650 continue;
651 if ( this_ch == FAIL ) {
652 PyErr_SetString(Error, "Illegal char");
Victor Stinnereaaaf132015-10-13 10:51:47 +0200653 _PyBytesWriter_Dealloc(&writer);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000654 return NULL;
655 }
656 if ( this_ch == DONE ) {
657 /* The terminating colon */
658 done = 1;
659 break;
660 }
Jack Jansen72781191995-08-07 14:34:15 +0000661
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000662 /* Shift it into the buffer and see if any bytes are ready */
663 leftchar = (leftchar << 6) | (this_ch);
664 leftbits += 6;
665 if ( leftbits >= 8 ) {
666 leftbits -= 8;
667 *bin_data++ = (leftchar >> leftbits) & 0xff;
668 leftchar &= ((1 << leftbits) - 1);
669 }
670 }
Tim Peters934c1a12002-07-02 22:24:50 +0000671
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000672 if ( leftbits && !done ) {
673 PyErr_SetString(Incomplete,
674 "String has incomplete number of bytes");
Victor Stinnereaaaf132015-10-13 10:51:47 +0200675 _PyBytesWriter_Dealloc(&writer);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000676 return NULL;
677 }
Roger E. Masse5f4ce181997-01-16 17:10:22 +0000678
Victor Stinnereaaaf132015-10-13 10:51:47 +0200679 res = _PyBytesWriter_Finish(&writer, bin_data);
680 if (res == NULL)
681 return NULL;
682 return Py_BuildValue("Ni", res, done);
Jack Jansen72781191995-08-07 14:34:15 +0000683}
684
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +0200685
686/*[clinic input]
687binascii.rlecode_hqx
688
689 data: Py_buffer
690 /
691
692Binhex RLE-code binary data.
693[clinic start generated code]*/
Jack Jansen72781191995-08-07 14:34:15 +0000694
695static PyObject *
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +0300696binascii_rlecode_hqx_impl(PyObject *module, Py_buffer *data)
697/*[clinic end generated code: output=393d79338f5f5629 input=e1f1712447a82b09]*/
Jack Jansen72781191995-08-07 14:34:15 +0000698{
Serhiy Storchakaef1585e2015-12-25 20:01:53 +0200699 const unsigned char *in_data;
700 unsigned char *out_data;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000701 unsigned char ch;
702 Py_ssize_t in, inend, len;
Victor Stinnereaaaf132015-10-13 10:51:47 +0200703 _PyBytesWriter writer;
Tim Peters934c1a12002-07-02 22:24:50 +0000704
Victor Stinnereaaaf132015-10-13 10:51:47 +0200705 _PyBytesWriter_Init(&writer);
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +0200706 in_data = data->buf;
707 len = data->len;
Jack Jansen72781191995-08-07 14:34:15 +0000708
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000709 assert(len >= 0);
Amaury Forgeot d'Arc9c74b142008-06-18 00:47:36 +0000710
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +0200711 if (len > PY_SSIZE_T_MAX / 2 - 2)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000712 return PyErr_NoMemory();
Amaury Forgeot d'Arc9c74b142008-06-18 00:47:36 +0000713
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000714 /* Worst case: output is twice as big as input (fixed later) */
Victor Stinnereaaaf132015-10-13 10:51:47 +0200715 out_data = _PyBytesWriter_Alloc(&writer, len * 2 + 2);
716 if (out_data == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000717 return NULL;
Tim Peters934c1a12002-07-02 22:24:50 +0000718
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000719 for( in=0; in<len; in++) {
720 ch = in_data[in];
721 if ( ch == RUNCHAR ) {
722 /* RUNCHAR. Escape it. */
723 *out_data++ = RUNCHAR;
724 *out_data++ = 0;
725 } else {
726 /* Check how many following are the same */
727 for(inend=in+1;
728 inend<len && in_data[inend] == ch &&
729 inend < in+255;
730 inend++) ;
731 if ( inend - in > 3 ) {
732 /* More than 3 in a row. Output RLE. */
733 *out_data++ = ch;
734 *out_data++ = RUNCHAR;
Antoine Pitrou40455752010-08-15 18:51:10 +0000735 *out_data++ = (unsigned char) (inend-in);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000736 in = inend-1;
737 } else {
738 /* Less than 3. Output the byte itself */
739 *out_data++ = ch;
740 }
741 }
742 }
Victor Stinnereaaaf132015-10-13 10:51:47 +0200743
744 return _PyBytesWriter_Finish(&writer, out_data);
Jack Jansen72781191995-08-07 14:34:15 +0000745}
746
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +0200747
748/*[clinic input]
749binascii.b2a_hqx
750
751 data: Py_buffer
752 /
753
754Encode .hqx data.
755[clinic start generated code]*/
Tim Peters934c1a12002-07-02 22:24:50 +0000756
Jack Jansen72781191995-08-07 14:34:15 +0000757static PyObject *
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +0300758binascii_b2a_hqx_impl(PyObject *module, Py_buffer *data)
759/*[clinic end generated code: output=d0aa5a704bc9f7de input=9596ebe019fe12ba]*/
Jack Jansen72781191995-08-07 14:34:15 +0000760{
Serhiy Storchakaef1585e2015-12-25 20:01:53 +0200761 unsigned char *ascii_data;
762 const unsigned char *bin_data;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000763 int leftbits = 0;
764 unsigned char this_ch;
765 unsigned int leftchar = 0;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000766 Py_ssize_t len;
Victor Stinnereaaaf132015-10-13 10:51:47 +0200767 _PyBytesWriter writer;
Tim Peters934c1a12002-07-02 22:24:50 +0000768
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +0200769 bin_data = data->buf;
770 len = data->len;
Victor Stinnereaaaf132015-10-13 10:51:47 +0200771 _PyBytesWriter_Init(&writer);
Jack Jansen72781191995-08-07 14:34:15 +0000772
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000773 assert(len >= 0);
Amaury Forgeot d'Arc9c74b142008-06-18 00:47:36 +0000774
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +0200775 if (len > PY_SSIZE_T_MAX / 2 - 2)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000776 return PyErr_NoMemory();
Amaury Forgeot d'Arc9c74b142008-06-18 00:47:36 +0000777
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000778 /* Allocate a buffer that is at least large enough */
Victor Stinnereaaaf132015-10-13 10:51:47 +0200779 ascii_data = _PyBytesWriter_Alloc(&writer, len * 2 + 2);
780 if (ascii_data == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000781 return NULL;
Tim Peters934c1a12002-07-02 22:24:50 +0000782
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000783 for( ; len > 0 ; len--, bin_data++ ) {
784 /* Shift into our buffer, and output any 6bits ready */
785 leftchar = (leftchar << 8) | *bin_data;
786 leftbits += 8;
787 while ( leftbits >= 6 ) {
788 this_ch = (leftchar >> (leftbits-6)) & 0x3f;
789 leftbits -= 6;
790 *ascii_data++ = table_b2a_hqx[this_ch];
791 }
792 }
793 /* Output a possible runt byte */
794 if ( leftbits ) {
795 leftchar <<= (6-leftbits);
796 *ascii_data++ = table_b2a_hqx[leftchar & 0x3f];
797 }
Victor Stinnereaaaf132015-10-13 10:51:47 +0200798
799 return _PyBytesWriter_Finish(&writer, ascii_data);
Jack Jansen72781191995-08-07 14:34:15 +0000800}
801
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +0200802
803/*[clinic input]
804binascii.rledecode_hqx
805
806 data: Py_buffer
807 /
808
809Decode hexbin RLE-coded string.
810[clinic start generated code]*/
Tim Peters934c1a12002-07-02 22:24:50 +0000811
Jack Jansen72781191995-08-07 14:34:15 +0000812static PyObject *
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +0300813binascii_rledecode_hqx_impl(PyObject *module, Py_buffer *data)
814/*[clinic end generated code: output=9826619565de1c6c input=54cdd49fc014402c]*/
Jack Jansen72781191995-08-07 14:34:15 +0000815{
Serhiy Storchakaef1585e2015-12-25 20:01:53 +0200816 const unsigned char *in_data;
817 unsigned char *out_data;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000818 unsigned char in_byte, in_repeat;
Victor Stinnerf9c9a3f2015-10-14 15:20:07 +0200819 Py_ssize_t in_len;
Victor Stinnereaaaf132015-10-13 10:51:47 +0200820 _PyBytesWriter writer;
Jack Jansen72781191995-08-07 14:34:15 +0000821
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +0200822 in_data = data->buf;
823 in_len = data->len;
Victor Stinnereaaaf132015-10-13 10:51:47 +0200824 _PyBytesWriter_Init(&writer);
Jack Jansen72781191995-08-07 14:34:15 +0000825
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000826 assert(in_len >= 0);
Amaury Forgeot d'Arc9c74b142008-06-18 00:47:36 +0000827
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000828 /* Empty string is a special case */
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +0200829 if ( in_len == 0 )
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000830 return PyBytes_FromStringAndSize("", 0);
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +0200831 else if (in_len > PY_SSIZE_T_MAX / 2)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000832 return PyErr_NoMemory();
Jack Jansen72781191995-08-07 14:34:15 +0000833
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000834 /* Allocate a buffer of reasonable size. Resized when needed */
Victor Stinnerf9c9a3f2015-10-14 15:20:07 +0200835 out_data = _PyBytesWriter_Alloc(&writer, in_len);
Victor Stinnereaaaf132015-10-13 10:51:47 +0200836 if (out_data == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000837 return NULL;
Victor Stinnereaaaf132015-10-13 10:51:47 +0200838
839 /* Use overallocation */
840 writer.overallocate = 1;
Jack Jansen72781191995-08-07 14:34:15 +0000841
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000842 /*
843 ** We need two macros here to get/put bytes and handle
844 ** end-of-buffer for input and output strings.
845 */
Victor Stinnereaaaf132015-10-13 10:51:47 +0200846#define INBYTE(b) \
847 do { \
848 if ( --in_len < 0 ) { \
849 PyErr_SetString(Incomplete, ""); \
850 goto error; \
851 } \
852 b = *in_data++; \
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000853 } while(0)
Tim Peters934c1a12002-07-02 22:24:50 +0000854
Victor Stinnereaaaf132015-10-13 10:51:47 +0200855 /*
856 ** Handle first byte separately (since we have to get angry
857 ** in case of an orphaned RLE code).
858 */
859 INBYTE(in_byte);
Jack Jansen72781191995-08-07 14:34:15 +0000860
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000861 if (in_byte == RUNCHAR) {
862 INBYTE(in_repeat);
Victor Stinnerf9c9a3f2015-10-14 15:20:07 +0200863 /* only 1 byte will be written, but 2 bytes were preallocated:
Raymond Hettinger15f44ab2016-08-30 10:47:49 -0700864 subtract 1 byte to prevent overallocation */
Victor Stinnerf9c9a3f2015-10-14 15:20:07 +0200865 writer.min_size--;
866
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000867 if (in_repeat != 0) {
868 /* Note Error, not Incomplete (which is at the end
869 ** of the string only). This is a programmer error.
870 */
871 PyErr_SetString(Error, "Orphaned RLE code at start");
Victor Stinnereaaaf132015-10-13 10:51:47 +0200872 goto error;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000873 }
Victor Stinnerf9c9a3f2015-10-14 15:20:07 +0200874 *out_data++ = RUNCHAR;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000875 } else {
Victor Stinnerf9c9a3f2015-10-14 15:20:07 +0200876 *out_data++ = in_byte;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000877 }
Tim Peters934c1a12002-07-02 22:24:50 +0000878
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000879 while( in_len > 0 ) {
880 INBYTE(in_byte);
Jack Jansen72781191995-08-07 14:34:15 +0000881
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000882 if (in_byte == RUNCHAR) {
883 INBYTE(in_repeat);
Victor Stinnerf9c9a3f2015-10-14 15:20:07 +0200884 /* only 1 byte will be written, but 2 bytes were preallocated:
Raymond Hettinger15f44ab2016-08-30 10:47:49 -0700885 subtract 1 byte to prevent overallocation */
Victor Stinnerf9c9a3f2015-10-14 15:20:07 +0200886 writer.min_size--;
887
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000888 if ( in_repeat == 0 ) {
889 /* Just an escaped RUNCHAR value */
Victor Stinnerf9c9a3f2015-10-14 15:20:07 +0200890 *out_data++ = RUNCHAR;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000891 } else {
892 /* Pick up value and output a sequence of it */
893 in_byte = out_data[-1];
Victor Stinnerf9c9a3f2015-10-14 15:20:07 +0200894
895 /* enlarge the buffer if needed */
896 if (in_repeat > 1) {
897 /* -1 because we already preallocated 1 byte */
898 out_data = _PyBytesWriter_Prepare(&writer, out_data,
899 in_repeat - 1);
900 if (out_data == NULL)
901 goto error;
902 }
903
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000904 while ( --in_repeat > 0 )
Victor Stinnerf9c9a3f2015-10-14 15:20:07 +0200905 *out_data++ = in_byte;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000906 }
907 } else {
908 /* Normal byte */
Victor Stinnerf9c9a3f2015-10-14 15:20:07 +0200909 *out_data++ = in_byte;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000910 }
911 }
Victor Stinnereaaaf132015-10-13 10:51:47 +0200912 return _PyBytesWriter_Finish(&writer, out_data);
913
914error:
915 _PyBytesWriter_Dealloc(&writer);
916 return NULL;
Jack Jansen72781191995-08-07 14:34:15 +0000917}
918
Jack Jansen72781191995-08-07 14:34:15 +0000919
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +0200920/*[clinic input]
Serhiy Storchaka2ef7c472015-04-20 09:26:49 +0300921binascii.crc_hqx -> unsigned_int
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +0200922
923 data: Py_buffer
Serhiy Storchaka2ef7c472015-04-20 09:26:49 +0300924 crc: unsigned_int(bitwise=True)
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +0200925 /
926
Martin Panter3310e142016-12-24 07:36:44 +0000927Compute CRC-CCITT incrementally.
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +0200928[clinic start generated code]*/
929
Serhiy Storchaka2ef7c472015-04-20 09:26:49 +0300930static unsigned int
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +0300931binascii_crc_hqx_impl(PyObject *module, Py_buffer *data, unsigned int crc)
Martin Panter3310e142016-12-24 07:36:44 +0000932/*[clinic end generated code: output=8ec2a78590d19170 input=f18240ff8c705b79]*/
Jack Jansen72781191995-08-07 14:34:15 +0000933{
Serhiy Storchakaef1585e2015-12-25 20:01:53 +0200934 const unsigned char *bin_data;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000935 Py_ssize_t len;
Tim Peters934c1a12002-07-02 22:24:50 +0000936
Serhiy Storchaka2ef7c472015-04-20 09:26:49 +0300937 crc &= 0xffff;
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +0200938 bin_data = data->buf;
939 len = data->len;
Jack Jansen72781191995-08-07 14:34:15 +0000940
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000941 while(len-- > 0) {
Serhiy Storchaka2ef7c472015-04-20 09:26:49 +0300942 crc = ((crc<<8)&0xff00) ^ crctab_hqx[(crc>>8)^*bin_data++];
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000943 }
Jack Jansen72781191995-08-07 14:34:15 +0000944
Serhiy Storchaka2ef7c472015-04-20 09:26:49 +0300945 return crc;
Jack Jansen72781191995-08-07 14:34:15 +0000946}
947
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +0200948#ifndef USE_ZLIB_CRC32
Guido van Rossum7d47c9e2000-02-16 21:11:52 +0000949/* Crc - 32 BIT ANSI X3.66 CRC checksum files
950 Also known as: ISO 3307
951**********************************************************************|
952* *|
953* Demonstration program to compute the 32-bit CRC used as the frame *|
954* check sequence in ADCCP (ANSI X3.66, also known as FIPS PUB 71 *|
955* and FED-STD-1003, the U.S. versions of CCITT's X.25 link-level *|
956* protocol). The 32-bit FCS was added via the Federal Register, *|
957* 1 June 1982, p.23798. I presume but don't know for certain that *|
958* this polynomial is or will be included in CCITT V.41, which *|
959* defines the 16-bit CRC (often called CRC-CCITT) polynomial. FIPS *|
960* PUB 78 says that the 32-bit FCS reduces otherwise undetected *|
961* errors by a factor of 10^-5 over 16-bit FCS. *|
962* *|
963**********************************************************************|
964
965 Copyright (C) 1986 Gary S. Brown. You may use this program, or
966 code or tables extracted from it, as desired without restriction.
Guido van Rossum7d47c9e2000-02-16 21:11:52 +0000967
Tim Peters934c1a12002-07-02 22:24:50 +0000968 First, the polynomial itself and its table of feedback terms. The
969 polynomial is
970 X^32+X^26+X^23+X^22+X^16+X^12+X^11+X^10+X^8+X^7+X^5+X^4+X^2+X^1+X^0
971 Note that we take it "backwards" and put the highest-order term in
972 the lowest-order bit. The X^32 term is "implied"; the LSB is the
973 X^31 term, etc. The X^0 term (usually shown as "+1") results in
974 the MSB being 1.
Guido van Rossum7d47c9e2000-02-16 21:11:52 +0000975
Tim Peters934c1a12002-07-02 22:24:50 +0000976 Note that the usual hardware shift register implementation, which
977 is what we're using (we're merely optimizing it by doing eight-bit
978 chunks at a time) shifts bits into the lowest-order term. In our
979 implementation, that means shifting towards the right. Why do we
980 do it this way? Because the calculated CRC must be transmitted in
981 order from highest-order term to lowest-order term. UARTs transmit
982 characters in order from LSB to MSB. By storing the CRC this way,
983 we hand it to the UART in the order low-byte to high-byte; the UART
984 sends each low-bit to hight-bit; and the result is transmission bit
985 by bit from highest- to lowest-order term without requiring any bit
986 shuffling on our part. Reception works similarly.
987
988 The feedback terms table consists of 256, 32-bit entries. Notes:
989
990 1. The table can be generated at runtime if desired; code to do so
991 is shown later. It might not be obvious, but the feedback
992 terms simply represent the results of eight shift/xor opera-
993 tions for all combinations of data and CRC register values.
994
995 2. The CRC accumulation logic is the same for all CRC polynomials,
996 be they sixteen or thirty-two bits wide. You simply choose the
997 appropriate table. Alternatively, because the table can be
998 generated at runtime, you can start by generating the table for
999 the polynomial in question and use exactly the same "updcrc",
1000 if your application needn't simultaneously handle two CRC
1001 polynomials. (Note, however, that XMODEM is strange.)
1002
1003 3. For 16-bit CRCs, the table entries need be only 16 bits wide;
1004 of course, 32-bit entries work OK if the high 16 bits are zero.
1005
1006 4. The values must be right-shifted by eight bits by the "updcrc"
1007 logic; the shift must be unsigned (bring in zeroes). On some
1008 hardware you could probably optimize the shift in assembler by
1009 using byte-swap instructions.
Guido van Rossum7d47c9e2000-02-16 21:11:52 +00001010********************************************************************/
1011
Serhiy Storchaka2d06e842015-12-25 19:53:18 +02001012static const unsigned int crc_32_tab[256] = {
Gregory P. Smith3c0e4d22008-03-25 07:51:12 +000010130x00000000U, 0x77073096U, 0xee0e612cU, 0x990951baU, 0x076dc419U,
10140x706af48fU, 0xe963a535U, 0x9e6495a3U, 0x0edb8832U, 0x79dcb8a4U,
10150xe0d5e91eU, 0x97d2d988U, 0x09b64c2bU, 0x7eb17cbdU, 0xe7b82d07U,
10160x90bf1d91U, 0x1db71064U, 0x6ab020f2U, 0xf3b97148U, 0x84be41deU,
10170x1adad47dU, 0x6ddde4ebU, 0xf4d4b551U, 0x83d385c7U, 0x136c9856U,
10180x646ba8c0U, 0xfd62f97aU, 0x8a65c9ecU, 0x14015c4fU, 0x63066cd9U,
10190xfa0f3d63U, 0x8d080df5U, 0x3b6e20c8U, 0x4c69105eU, 0xd56041e4U,
10200xa2677172U, 0x3c03e4d1U, 0x4b04d447U, 0xd20d85fdU, 0xa50ab56bU,
10210x35b5a8faU, 0x42b2986cU, 0xdbbbc9d6U, 0xacbcf940U, 0x32d86ce3U,
10220x45df5c75U, 0xdcd60dcfU, 0xabd13d59U, 0x26d930acU, 0x51de003aU,
10230xc8d75180U, 0xbfd06116U, 0x21b4f4b5U, 0x56b3c423U, 0xcfba9599U,
10240xb8bda50fU, 0x2802b89eU, 0x5f058808U, 0xc60cd9b2U, 0xb10be924U,
10250x2f6f7c87U, 0x58684c11U, 0xc1611dabU, 0xb6662d3dU, 0x76dc4190U,
10260x01db7106U, 0x98d220bcU, 0xefd5102aU, 0x71b18589U, 0x06b6b51fU,
10270x9fbfe4a5U, 0xe8b8d433U, 0x7807c9a2U, 0x0f00f934U, 0x9609a88eU,
10280xe10e9818U, 0x7f6a0dbbU, 0x086d3d2dU, 0x91646c97U, 0xe6635c01U,
10290x6b6b51f4U, 0x1c6c6162U, 0x856530d8U, 0xf262004eU, 0x6c0695edU,
10300x1b01a57bU, 0x8208f4c1U, 0xf50fc457U, 0x65b0d9c6U, 0x12b7e950U,
10310x8bbeb8eaU, 0xfcb9887cU, 0x62dd1ddfU, 0x15da2d49U, 0x8cd37cf3U,
10320xfbd44c65U, 0x4db26158U, 0x3ab551ceU, 0xa3bc0074U, 0xd4bb30e2U,
10330x4adfa541U, 0x3dd895d7U, 0xa4d1c46dU, 0xd3d6f4fbU, 0x4369e96aU,
10340x346ed9fcU, 0xad678846U, 0xda60b8d0U, 0x44042d73U, 0x33031de5U,
10350xaa0a4c5fU, 0xdd0d7cc9U, 0x5005713cU, 0x270241aaU, 0xbe0b1010U,
10360xc90c2086U, 0x5768b525U, 0x206f85b3U, 0xb966d409U, 0xce61e49fU,
10370x5edef90eU, 0x29d9c998U, 0xb0d09822U, 0xc7d7a8b4U, 0x59b33d17U,
10380x2eb40d81U, 0xb7bd5c3bU, 0xc0ba6cadU, 0xedb88320U, 0x9abfb3b6U,
10390x03b6e20cU, 0x74b1d29aU, 0xead54739U, 0x9dd277afU, 0x04db2615U,
10400x73dc1683U, 0xe3630b12U, 0x94643b84U, 0x0d6d6a3eU, 0x7a6a5aa8U,
10410xe40ecf0bU, 0x9309ff9dU, 0x0a00ae27U, 0x7d079eb1U, 0xf00f9344U,
10420x8708a3d2U, 0x1e01f268U, 0x6906c2feU, 0xf762575dU, 0x806567cbU,
10430x196c3671U, 0x6e6b06e7U, 0xfed41b76U, 0x89d32be0U, 0x10da7a5aU,
10440x67dd4accU, 0xf9b9df6fU, 0x8ebeeff9U, 0x17b7be43U, 0x60b08ed5U,
10450xd6d6a3e8U, 0xa1d1937eU, 0x38d8c2c4U, 0x4fdff252U, 0xd1bb67f1U,
10460xa6bc5767U, 0x3fb506ddU, 0x48b2364bU, 0xd80d2bdaU, 0xaf0a1b4cU,
10470x36034af6U, 0x41047a60U, 0xdf60efc3U, 0xa867df55U, 0x316e8eefU,
10480x4669be79U, 0xcb61b38cU, 0xbc66831aU, 0x256fd2a0U, 0x5268e236U,
10490xcc0c7795U, 0xbb0b4703U, 0x220216b9U, 0x5505262fU, 0xc5ba3bbeU,
10500xb2bd0b28U, 0x2bb45a92U, 0x5cb36a04U, 0xc2d7ffa7U, 0xb5d0cf31U,
10510x2cd99e8bU, 0x5bdeae1dU, 0x9b64c2b0U, 0xec63f226U, 0x756aa39cU,
10520x026d930aU, 0x9c0906a9U, 0xeb0e363fU, 0x72076785U, 0x05005713U,
10530x95bf4a82U, 0xe2b87a14U, 0x7bb12baeU, 0x0cb61b38U, 0x92d28e9bU,
10540xe5d5be0dU, 0x7cdcefb7U, 0x0bdbdf21U, 0x86d3d2d4U, 0xf1d4e242U,
10550x68ddb3f8U, 0x1fda836eU, 0x81be16cdU, 0xf6b9265bU, 0x6fb077e1U,
10560x18b74777U, 0x88085ae6U, 0xff0f6a70U, 0x66063bcaU, 0x11010b5cU,
10570x8f659effU, 0xf862ae69U, 0x616bffd3U, 0x166ccf45U, 0xa00ae278U,
10580xd70dd2eeU, 0x4e048354U, 0x3903b3c2U, 0xa7672661U, 0xd06016f7U,
10590x4969474dU, 0x3e6e77dbU, 0xaed16a4aU, 0xd9d65adcU, 0x40df0b66U,
10600x37d83bf0U, 0xa9bcae53U, 0xdebb9ec5U, 0x47b2cf7fU, 0x30b5ffe9U,
10610xbdbdf21cU, 0xcabac28aU, 0x53b39330U, 0x24b4a3a6U, 0xbad03605U,
10620xcdd70693U, 0x54de5729U, 0x23d967bfU, 0xb3667a2eU, 0xc4614ab8U,
10630x5d681b02U, 0x2a6f2b94U, 0xb40bbe37U, 0xc30c8ea1U, 0x5a05df1bU,
10640x2d02ef8dU
Guido van Rossum7d47c9e2000-02-16 21:11:52 +00001065};
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +02001066#endif /* USE_ZLIB_CRC32 */
Guido van Rossum7d47c9e2000-02-16 21:11:52 +00001067
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +02001068/*[clinic input]
1069binascii.crc32 -> unsigned_int
1070
1071 data: Py_buffer
1072 crc: unsigned_int(bitwise=True) = 0
1073 /
1074
1075Compute CRC-32 incrementally.
1076[clinic start generated code]*/
1077
1078static unsigned int
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +03001079binascii_crc32_impl(PyObject *module, Py_buffer *data, unsigned int crc)
1080/*[clinic end generated code: output=52cf59056a78593b input=bbe340bc99d25aa8]*/
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +02001081
1082#ifdef USE_ZLIB_CRC32
1083/* This was taken from zlibmodule.c PyZlib_crc32 (but is PY_SSIZE_T_CLEAN) */
1084{
Serhiy Storchakaef1585e2015-12-25 20:01:53 +02001085 const Byte *buf;
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +02001086 Py_ssize_t len;
1087 int signed_val;
1088
1089 buf = (Byte*)data->buf;
1090 len = data->len;
1091 signed_val = crc32(crc, buf, len);
1092 return (unsigned int)signed_val & 0xffffffffU;
1093}
1094#else /* USE_ZLIB_CRC32 */
Guido van Rossum7d47c9e2000-02-16 21:11:52 +00001095{ /* By Jim Ahlstrom; All rights transferred to CNRI */
Serhiy Storchakaef1585e2015-12-25 20:01:53 +02001096 const unsigned char *bin_data;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001097 Py_ssize_t len;
1098 unsigned int result;
Tim Peters934c1a12002-07-02 22:24:50 +00001099
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +02001100 bin_data = data->buf;
1101 len = data->len;
Guido van Rossum7d47c9e2000-02-16 21:11:52 +00001102
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001103 crc = ~ crc;
1104 while (len-- > 0) {
1105 crc = crc_32_tab[(crc ^ *bin_data++) & 0xff] ^ (crc >> 8);
1106 /* Note: (crc >> 8) MUST zero fill on left */
1107 }
Tim Petersa98011c2002-07-02 20:20:08 +00001108
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001109 result = (crc ^ 0xFFFFFFFF);
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +02001110 return result & 0xffffffff;
Guido van Rossum7d47c9e2000-02-16 21:11:52 +00001111}
Christian Heimes1dc54002008-03-24 02:19:29 +00001112#endif /* USE_ZLIB_CRC32 */
Guido van Rossum7d47c9e2000-02-16 21:11:52 +00001113
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +02001114/*[clinic input]
1115binascii.b2a_hex
1116
1117 data: Py_buffer
1118 /
1119
1120Hexadecimal representation of binary data.
1121
1122The return value is a bytes object. This function is also
1123available as "hexlify()".
1124[clinic start generated code]*/
Barry Warsawe977c212000-08-15 06:07:13 +00001125
1126static PyObject *
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +03001127binascii_b2a_hex_impl(PyObject *module, Py_buffer *data)
1128/*[clinic end generated code: output=92fec1a95c9897a0 input=96423cfa299ff3b1]*/
Barry Warsawe977c212000-08-15 06:07:13 +00001129{
Gregory P. Smith9c6b9162015-04-26 00:42:13 +00001130 return _Py_strhex_bytes((const char *)data->buf, data->len);
Barry Warsawe977c212000-08-15 06:07:13 +00001131}
1132
Zachary Wareb176d402015-01-20 13:59:46 -06001133/*[clinic input]
1134binascii.hexlify = binascii.b2a_hex
1135
1136Hexadecimal representation of binary data.
1137
1138The return value is a bytes object.
1139[clinic start generated code]*/
1140
1141static PyObject *
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +03001142binascii_hexlify_impl(PyObject *module, Py_buffer *data)
1143/*[clinic end generated code: output=749e95e53c14880c input=2e3afae7f083f061]*/
Zachary Wareb176d402015-01-20 13:59:46 -06001144{
Gregory P. Smith9c6b9162015-04-26 00:42:13 +00001145 return _Py_strhex_bytes((const char *)data->buf, data->len);
Zachary Wareb176d402015-01-20 13:59:46 -06001146}
Barry Warsawe977c212000-08-15 06:07:13 +00001147
1148static int
Tim Peters934c1a12002-07-02 22:24:50 +00001149to_int(int c)
Barry Warsawe977c212000-08-15 06:07:13 +00001150{
Antoine Pitrou4de74572013-02-09 23:11:27 +01001151 if (Py_ISDIGIT(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001152 return c - '0';
1153 else {
Antoine Pitroued8ba142011-10-04 13:50:21 +02001154 if (Py_ISUPPER(c))
1155 c = Py_TOLOWER(c);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001156 if (c >= 'a' && c <= 'f')
1157 return c - 'a' + 10;
1158 }
1159 return -1;
Barry Warsawe977c212000-08-15 06:07:13 +00001160}
1161
1162
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +02001163/*[clinic input]
1164binascii.a2b_hex
1165
1166 hexstr: ascii_buffer
1167 /
1168
1169Binary data of hexadecimal representation.
1170
1171hexstr must contain an even number of hex digits (upper or lower case).
1172This function is also available as "unhexlify()".
1173[clinic start generated code]*/
1174
Barry Warsawe977c212000-08-15 06:07:13 +00001175static PyObject *
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +03001176binascii_a2b_hex_impl(PyObject *module, Py_buffer *hexstr)
1177/*[clinic end generated code: output=0cc1a139af0eeecb input=9e1e7f2f94db24fd]*/
Barry Warsawe977c212000-08-15 06:07:13 +00001178{
Serhiy Storchakaef1585e2015-12-25 20:01:53 +02001179 const char* argbuf;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001180 Py_ssize_t arglen;
1181 PyObject *retval;
1182 char* retbuf;
1183 Py_ssize_t i, j;
Barry Warsawe977c212000-08-15 06:07:13 +00001184
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +02001185 argbuf = hexstr->buf;
1186 arglen = hexstr->len;
Barry Warsawe977c212000-08-15 06:07:13 +00001187
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001188 assert(arglen >= 0);
Amaury Forgeot d'Arc9c74b142008-06-18 00:47:36 +00001189
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001190 /* XXX What should we do about strings with an odd length? Should
1191 * we add an implicit leading zero, or a trailing zero? For now,
1192 * raise an exception.
1193 */
1194 if (arglen % 2) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001195 PyErr_SetString(Error, "Odd-length string");
1196 return NULL;
1197 }
Barry Warsawe977c212000-08-15 06:07:13 +00001198
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001199 retval = PyBytes_FromStringAndSize(NULL, (arglen/2));
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +02001200 if (!retval)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001201 return NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001202 retbuf = PyBytes_AS_STRING(retval);
Barry Warsawe977c212000-08-15 06:07:13 +00001203
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001204 for (i=j=0; i < arglen; i += 2) {
1205 int top = to_int(Py_CHARMASK(argbuf[i]));
1206 int bot = to_int(Py_CHARMASK(argbuf[i+1]));
1207 if (top == -1 || bot == -1) {
1208 PyErr_SetString(Error,
1209 "Non-hexadecimal digit found");
1210 goto finally;
1211 }
1212 retbuf[j++] = (top << 4) + bot;
1213 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001214 return retval;
Barry Warsawe977c212000-08-15 06:07:13 +00001215
1216 finally:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001217 Py_DECREF(retval);
1218 return NULL;
Barry Warsawe977c212000-08-15 06:07:13 +00001219}
1220
Zachary Wareb176d402015-01-20 13:59:46 -06001221/*[clinic input]
1222binascii.unhexlify = binascii.a2b_hex
1223
1224Binary data of hexadecimal representation.
1225
1226hexstr must contain an even number of hex digits (upper or lower case).
1227[clinic start generated code]*/
1228
1229static PyObject *
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +03001230binascii_unhexlify_impl(PyObject *module, Py_buffer *hexstr)
1231/*[clinic end generated code: output=51a64c06c79629e3 input=dd8c012725f462da]*/
Zachary Wareb176d402015-01-20 13:59:46 -06001232{
1233 return binascii_a2b_hex_impl(module, hexstr);
1234}
1235
Serhiy Storchaka2d06e842015-12-25 19:53:18 +02001236static const int table_hex[128] = {
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001237 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1238 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1239 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1240 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,-1,-1, -1,-1,-1,-1,
1241 -1,10,11,12, 13,14,15,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1242 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1243 -1,10,11,12, 13,14,15,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1244 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1
1245};
1246
1247#define hexval(c) table_hex[(unsigned int)(c)]
1248
1249#define MAXLINESIZE 76
1250
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001251
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +02001252/*[clinic input]
1253binascii.a2b_qp
1254
Serhiy Storchaka12785612014-01-25 11:49:49 +02001255 data: ascii_buffer
Serhiy Storchaka202fda52017-03-12 10:10:47 +02001256 header: bool(accept={int}) = False
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +02001257
1258Decode a string of qp-encoded data.
1259[clinic start generated code]*/
1260
1261static PyObject *
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +03001262binascii_a2b_qp_impl(PyObject *module, Py_buffer *data, int header)
Serhiy Storchaka202fda52017-03-12 10:10:47 +02001263/*[clinic end generated code: output=e99f7846cfb9bc53 input=bf6766fea76cce8f]*/
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001264{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001265 Py_ssize_t in, out;
1266 char ch;
Serhiy Storchakaef1585e2015-12-25 20:01:53 +02001267 const unsigned char *ascii_data;
1268 unsigned char *odata;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001269 Py_ssize_t datalen = 0;
1270 PyObject *rv;
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001271
Serhiy Storchaka12785612014-01-25 11:49:49 +02001272 ascii_data = data->buf;
1273 datalen = data->len;
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001274
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001275 /* We allocate the output same size as input, this is overkill.
1276 * The previous implementation used calloc() so we'll zero out the
1277 * memory here too, since PyMem_Malloc() does not guarantee that.
1278 */
1279 odata = (unsigned char *) PyMem_Malloc(datalen);
1280 if (odata == NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001281 PyErr_NoMemory();
1282 return NULL;
1283 }
1284 memset(odata, 0, datalen);
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001285
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001286 in = out = 0;
1287 while (in < datalen) {
Serhiy Storchaka12785612014-01-25 11:49:49 +02001288 if (ascii_data[in] == '=') {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001289 in++;
1290 if (in >= datalen) break;
1291 /* Soft line breaks */
Serhiy Storchaka12785612014-01-25 11:49:49 +02001292 if ((ascii_data[in] == '\n') || (ascii_data[in] == '\r')) {
1293 if (ascii_data[in] != '\n') {
1294 while (in < datalen && ascii_data[in] != '\n') in++;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001295 }
1296 if (in < datalen) in++;
1297 }
Serhiy Storchaka12785612014-01-25 11:49:49 +02001298 else if (ascii_data[in] == '=') {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001299 /* broken case from broken python qp */
1300 odata[out++] = '=';
1301 in++;
1302 }
Serhiy Storchakae6265e92016-09-14 16:34:37 +03001303 else if ((in + 1 < datalen) &&
1304 ((ascii_data[in] >= 'A' && ascii_data[in] <= 'F') ||
Serhiy Storchaka12785612014-01-25 11:49:49 +02001305 (ascii_data[in] >= 'a' && ascii_data[in] <= 'f') ||
1306 (ascii_data[in] >= '0' && ascii_data[in] <= '9')) &&
1307 ((ascii_data[in+1] >= 'A' && ascii_data[in+1] <= 'F') ||
1308 (ascii_data[in+1] >= 'a' && ascii_data[in+1] <= 'f') ||
1309 (ascii_data[in+1] >= '0' && ascii_data[in+1] <= '9'))) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001310 /* hexval */
Serhiy Storchaka12785612014-01-25 11:49:49 +02001311 ch = hexval(ascii_data[in]) << 4;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001312 in++;
Serhiy Storchaka12785612014-01-25 11:49:49 +02001313 ch |= hexval(ascii_data[in]);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001314 in++;
1315 odata[out++] = ch;
1316 }
1317 else {
1318 odata[out++] = '=';
1319 }
1320 }
Serhiy Storchaka12785612014-01-25 11:49:49 +02001321 else if (header && ascii_data[in] == '_') {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001322 odata[out++] = ' ';
1323 in++;
1324 }
1325 else {
Serhiy Storchaka12785612014-01-25 11:49:49 +02001326 odata[out] = ascii_data[in];
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001327 in++;
1328 out++;
1329 }
1330 }
1331 if ((rv = PyBytes_FromStringAndSize((char *)odata, out)) == NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001332 PyMem_Free(odata);
1333 return NULL;
1334 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001335 PyMem_Free(odata);
1336 return rv;
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001337}
1338
Tim Peters934c1a12002-07-02 22:24:50 +00001339static int
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001340to_hex (unsigned char ch, unsigned char *s)
1341{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001342 unsigned int uvalue = ch;
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001343
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001344 s[1] = "0123456789ABCDEF"[uvalue % 16];
1345 uvalue = (uvalue / 16);
1346 s[0] = "0123456789ABCDEF"[uvalue % 16];
1347 return 0;
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001348}
1349
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001350/* XXX: This is ridiculously complicated to be backward compatible
1351 * (mostly) with the quopri module. It doesn't re-create the quopri
1352 * module bug where text ending in CRLF has the CR encoded */
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +02001353
1354/*[clinic input]
1355binascii.b2a_qp
1356
1357 data: Py_buffer
Serhiy Storchaka202fda52017-03-12 10:10:47 +02001358 quotetabs: bool(accept={int}) = False
1359 istext: bool(accept={int}) = True
1360 header: bool(accept={int}) = False
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +02001361
1362Encode a string using quoted-printable encoding.
1363
1364On encoding, when istext is set, newlines are not encoded, and white
1365space at end of lines is. When istext is not set, \r and \n (CR/LF)
1366are both encoded. When quotetabs is set, space and tabs are encoded.
1367[clinic start generated code]*/
1368
1369static PyObject *
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +03001370binascii_b2a_qp_impl(PyObject *module, Py_buffer *data, int quotetabs,
Larry Hastings89964c42015-04-14 18:07:59 -04001371 int istext, int header)
Serhiy Storchaka202fda52017-03-12 10:10:47 +02001372/*[clinic end generated code: output=e9884472ebb1a94c input=21fb7eea4a184ba6]*/
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001373{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001374 Py_ssize_t in, out;
Serhiy Storchakaef1585e2015-12-25 20:01:53 +02001375 const unsigned char *databuf;
1376 unsigned char *odata;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001377 Py_ssize_t datalen = 0, odatalen = 0;
1378 PyObject *rv;
1379 unsigned int linelen = 0;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001380 unsigned char ch;
1381 int crlf = 0;
Serhiy Storchakaef1585e2015-12-25 20:01:53 +02001382 const unsigned char *p;
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001383
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +02001384 databuf = data->buf;
1385 datalen = data->len;
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001386
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001387 /* See if this string is using CRLF line ends */
1388 /* XXX: this function has the side effect of converting all of
1389 * the end of lines to be the same depending on this detection
1390 * here */
Serhiy Storchakaef1585e2015-12-25 20:01:53 +02001391 p = (const unsigned char *) memchr(databuf, '\n', datalen);
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +02001392 if ((p != NULL) && (p > databuf) && (*(p-1) == '\r'))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001393 crlf = 1;
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001394
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001395 /* First, scan to see how many characters need to be encoded */
1396 in = 0;
1397 while (in < datalen) {
Benjamin Peterson4f976512016-08-13 18:33:33 -07001398 Py_ssize_t delta = 0;
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +02001399 if ((databuf[in] > 126) ||
1400 (databuf[in] == '=') ||
1401 (header && databuf[in] == '_') ||
1402 ((databuf[in] == '.') && (linelen == 0) &&
Serhiy Storchakae6265e92016-09-14 16:34:37 +03001403 (in + 1 == datalen || databuf[in+1] == '\n' ||
1404 databuf[in+1] == '\r' || databuf[in+1] == 0)) ||
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +02001405 (!istext && ((databuf[in] == '\r') || (databuf[in] == '\n'))) ||
1406 ((databuf[in] == '\t' || databuf[in] == ' ') && (in + 1 == datalen)) ||
1407 ((databuf[in] < 33) &&
1408 (databuf[in] != '\r') && (databuf[in] != '\n') &&
1409 (quotetabs || ((databuf[in] != '\t') && (databuf[in] != ' ')))))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001410 {
1411 if ((linelen + 3) >= MAXLINESIZE) {
1412 linelen = 0;
1413 if (crlf)
Benjamin Peterson4f976512016-08-13 18:33:33 -07001414 delta += 3;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001415 else
Benjamin Peterson4f976512016-08-13 18:33:33 -07001416 delta += 2;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001417 }
1418 linelen += 3;
Benjamin Peterson4f976512016-08-13 18:33:33 -07001419 delta += 3;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001420 in++;
1421 }
1422 else {
1423 if (istext &&
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +02001424 ((databuf[in] == '\n') ||
1425 ((in+1 < datalen) && (databuf[in] == '\r') &&
1426 (databuf[in+1] == '\n'))))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001427 {
1428 linelen = 0;
1429 /* Protect against whitespace on end of line */
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +02001430 if (in && ((databuf[in-1] == ' ') || (databuf[in-1] == '\t')))
Benjamin Peterson4f976512016-08-13 18:33:33 -07001431 delta += 2;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001432 if (crlf)
Benjamin Peterson4f976512016-08-13 18:33:33 -07001433 delta += 2;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001434 else
Benjamin Peterson4f976512016-08-13 18:33:33 -07001435 delta += 1;
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +02001436 if (databuf[in] == '\r')
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001437 in += 2;
1438 else
1439 in++;
1440 }
1441 else {
1442 if ((in + 1 != datalen) &&
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +02001443 (databuf[in+1] != '\n') &&
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001444 (linelen + 1) >= MAXLINESIZE) {
1445 linelen = 0;
1446 if (crlf)
Benjamin Peterson4f976512016-08-13 18:33:33 -07001447 delta += 3;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001448 else
Benjamin Peterson4f976512016-08-13 18:33:33 -07001449 delta += 2;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001450 }
1451 linelen++;
Benjamin Peterson4f976512016-08-13 18:33:33 -07001452 delta++;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001453 in++;
1454 }
1455 }
Benjamin Peterson4f976512016-08-13 18:33:33 -07001456 if (PY_SSIZE_T_MAX - delta < odatalen) {
Benjamin Peterson4f976512016-08-13 18:33:33 -07001457 PyErr_NoMemory();
1458 return NULL;
1459 }
1460 odatalen += delta;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001461 }
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001462
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001463 /* We allocate the output same size as input, this is overkill.
1464 * The previous implementation used calloc() so we'll zero out the
1465 * memory here too, since PyMem_Malloc() does not guarantee that.
1466 */
1467 odata = (unsigned char *) PyMem_Malloc(odatalen);
1468 if (odata == NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001469 PyErr_NoMemory();
1470 return NULL;
1471 }
1472 memset(odata, 0, odatalen);
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001473
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001474 in = out = linelen = 0;
1475 while (in < datalen) {
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +02001476 if ((databuf[in] > 126) ||
1477 (databuf[in] == '=') ||
1478 (header && databuf[in] == '_') ||
1479 ((databuf[in] == '.') && (linelen == 0) &&
Serhiy Storchakae6265e92016-09-14 16:34:37 +03001480 (in + 1 == datalen || databuf[in+1] == '\n' ||
1481 databuf[in+1] == '\r' || databuf[in+1] == 0)) ||
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +02001482 (!istext && ((databuf[in] == '\r') || (databuf[in] == '\n'))) ||
1483 ((databuf[in] == '\t' || databuf[in] == ' ') && (in + 1 == datalen)) ||
1484 ((databuf[in] < 33) &&
1485 (databuf[in] != '\r') && (databuf[in] != '\n') &&
Serhiy Storchakae6265e92016-09-14 16:34:37 +03001486 (quotetabs || ((databuf[in] != '\t') && (databuf[in] != ' ')))))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001487 {
1488 if ((linelen + 3 )>= MAXLINESIZE) {
1489 odata[out++] = '=';
1490 if (crlf) odata[out++] = '\r';
1491 odata[out++] = '\n';
1492 linelen = 0;
1493 }
1494 odata[out++] = '=';
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +02001495 to_hex(databuf[in], &odata[out]);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001496 out += 2;
1497 in++;
1498 linelen += 3;
1499 }
1500 else {
1501 if (istext &&
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +02001502 ((databuf[in] == '\n') ||
1503 ((in+1 < datalen) && (databuf[in] == '\r') &&
1504 (databuf[in+1] == '\n'))))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001505 {
1506 linelen = 0;
1507 /* Protect against whitespace on end of line */
1508 if (out && ((odata[out-1] == ' ') || (odata[out-1] == '\t'))) {
1509 ch = odata[out-1];
1510 odata[out-1] = '=';
1511 to_hex(ch, &odata[out]);
1512 out += 2;
1513 }
Tim Peters934c1a12002-07-02 22:24:50 +00001514
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001515 if (crlf) odata[out++] = '\r';
1516 odata[out++] = '\n';
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +02001517 if (databuf[in] == '\r')
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001518 in += 2;
1519 else
1520 in++;
1521 }
1522 else {
1523 if ((in + 1 != datalen) &&
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +02001524 (databuf[in+1] != '\n') &&
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001525 (linelen + 1) >= MAXLINESIZE) {
1526 odata[out++] = '=';
1527 if (crlf) odata[out++] = '\r';
1528 odata[out++] = '\n';
1529 linelen = 0;
1530 }
1531 linelen++;
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +02001532 if (header && databuf[in] == ' ') {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001533 odata[out++] = '_';
1534 in++;
1535 }
1536 else {
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +02001537 odata[out++] = databuf[in++];
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001538 }
1539 }
1540 }
1541 }
1542 if ((rv = PyBytes_FromStringAndSize((char *)odata, out)) == NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001543 PyMem_Free(odata);
1544 return NULL;
1545 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001546 PyMem_Free(odata);
1547 return rv;
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001548}
Barry Warsawe977c212000-08-15 06:07:13 +00001549
Jack Jansen72781191995-08-07 14:34:15 +00001550/* List of functions defined in the module */
1551
1552static struct PyMethodDef binascii_module_methods[] = {
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +02001553 BINASCII_A2B_UU_METHODDEF
1554 BINASCII_B2A_UU_METHODDEF
1555 BINASCII_A2B_BASE64_METHODDEF
1556 BINASCII_B2A_BASE64_METHODDEF
1557 BINASCII_A2B_HQX_METHODDEF
1558 BINASCII_B2A_HQX_METHODDEF
1559 BINASCII_A2B_HEX_METHODDEF
1560 BINASCII_B2A_HEX_METHODDEF
Zachary Wareb176d402015-01-20 13:59:46 -06001561 BINASCII_HEXLIFY_METHODDEF
1562 BINASCII_UNHEXLIFY_METHODDEF
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +02001563 BINASCII_RLECODE_HQX_METHODDEF
1564 BINASCII_RLEDECODE_HQX_METHODDEF
1565 BINASCII_CRC_HQX_METHODDEF
1566 BINASCII_CRC32_METHODDEF
1567 BINASCII_A2B_QP_METHODDEF
1568 BINASCII_B2A_QP_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001569 {NULL, NULL} /* sentinel */
Jack Jansen72781191995-08-07 14:34:15 +00001570};
1571
1572
Martin v. Löwis1a214512008-06-11 05:26:20 +00001573/* Initialization function for the module (*must* be called PyInit_binascii) */
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001574PyDoc_STRVAR(doc_binascii, "Conversion between binary data and ASCII");
Jack Jansen72781191995-08-07 14:34:15 +00001575
Martin v. Löwis1a214512008-06-11 05:26:20 +00001576
1577static struct PyModuleDef binasciimodule = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001578 PyModuleDef_HEAD_INIT,
1579 "binascii",
1580 doc_binascii,
1581 -1,
1582 binascii_module_methods,
1583 NULL,
1584 NULL,
1585 NULL,
1586 NULL
Martin v. Löwis1a214512008-06-11 05:26:20 +00001587};
1588
Mark Hammondfe51c6d2002-08-02 02:27:13 +00001589PyMODINIT_FUNC
Martin v. Löwis1a214512008-06-11 05:26:20 +00001590PyInit_binascii(void)
Jack Jansen72781191995-08-07 14:34:15 +00001591{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001592 PyObject *m, *d;
Jack Jansen72781191995-08-07 14:34:15 +00001593
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001594 /* Create the module and add the functions */
1595 m = PyModule_Create(&binasciimodule);
1596 if (m == NULL)
1597 return NULL;
Jack Jansen72781191995-08-07 14:34:15 +00001598
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001599 d = PyModule_GetDict(m);
Jack Jansen72781191995-08-07 14:34:15 +00001600
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001601 Error = PyErr_NewException("binascii.Error", PyExc_ValueError, NULL);
1602 PyDict_SetItemString(d, "Error", Error);
1603 Incomplete = PyErr_NewException("binascii.Incomplete", NULL, NULL);
1604 PyDict_SetItemString(d, "Incomplete", Incomplete);
1605 if (PyErr_Occurred()) {
1606 Py_DECREF(m);
1607 m = NULL;
1608 }
1609 return m;
Jack Jansen72781191995-08-07 14:34:15 +00001610}