blob: fbd2320798697ec9147e4875c8d1ba173210f19d [file] [log] [blame]
Jack Jansen72781191995-08-07 14:34:15 +00001/*
2** Routines to represent binary data in ASCII and vice-versa
3**
4** This module currently supports the following encodings:
5** uuencode:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00006** each line encodes 45 bytes (except possibly the last)
7** First char encodes (binary) length, rest data
8** each char encodes 6 bits, as follows:
9** binary: 01234567 abcdefgh ijklmnop
10** ascii: 012345 67abcd efghij klmnop
11** ASCII encoding method is "excess-space": 000000 is encoded as ' ', etc.
12** short binary data is zero-extended (so the bits are always in the
13** right place), this does *not* reflect in the length.
Jack Jansen84bbc2e1995-10-04 16:38:44 +000014** base64:
15** Line breaks are insignificant, but lines are at most 76 chars
16** each char encodes 6 bits, in similar order as uucode/hqx. Encoding
17** is done via a table.
18** Short binary data is filled (in ASCII) with '='.
Jack Jansen72781191995-08-07 14:34:15 +000019** hqx:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000020** File starts with introductory text, real data starts and ends
21** with colons.
22** Data consists of three similar parts: info, datafork, resourcefork.
23** Each part is protected (at the end) with a 16-bit crc
24** The binary data is run-length encoded, and then ascii-fied:
25** binary: 01234567 abcdefgh ijklmnop
26** ascii: 012345 67abcd efghij klmnop
27** ASCII encoding is table-driven, see the code.
28** Short binary data results in the runt ascii-byte being output with
29** the bits in the right place.
Jack Jansen72781191995-08-07 14:34:15 +000030**
31** While I was reading dozens of programs that encode or decode the formats
32** here (documentation? hihi:-) I have formulated Jansen's Observation:
33**
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000034** Programs that encode binary data in ASCII are written in
35** such a style that they are as unreadable as possible. Devices used
36** include unnecessary global variables, burying important tables
37** in unrelated sourcefiles, putting functions in include files,
38** using seemingly-descriptive variable names for different purposes,
39** calls to empty subroutines and a host of others.
Jack Jansen72781191995-08-07 14:34:15 +000040**
41** I have attempted to break with this tradition, but I guess that that
42** does make the performance sub-optimal. Oh well, too bad...
43**
44** Jack Jansen, CWI, July 1995.
Tim Peters934c1a12002-07-02 22:24:50 +000045**
Martin v. Löwis16dc7f42001-09-30 20:32:11 +000046** Added support for quoted-printable encoding, based on rfc 1521 et al
Tim Peters934c1a12002-07-02 22:24:50 +000047** quoted-printable encoding specifies that non printable characters (anything
Martin v. Löwis16dc7f42001-09-30 20:32:11 +000048** below 32 and above 126) be encoded as =XX where XX is the hexadecimal value
49** of the character. It also specifies some other behavior to enable 8bit data
Tim Peters934c1a12002-07-02 22:24:50 +000050** in a mail message with little difficulty (maximum line sizes, protecting
51** some cases of whitespace, etc).
Martin v. Löwis16dc7f42001-09-30 20:32:11 +000052**
53** Brandon Long, September 2001.
Jack Jansen72781191995-08-07 14:34:15 +000054*/
55
Thomas Wouters9c544482006-03-01 21:59:44 +000056#define PY_SSIZE_T_CLEAN
Jack Jansen72781191995-08-07 14:34:15 +000057
58#include "Python.h"
Gregory P. Smith9c6b9162015-04-26 00:42:13 +000059#include "pystrhex.h"
Christian Heimes1dc54002008-03-24 02:19:29 +000060#ifdef USE_ZLIB_CRC32
61#include "zlib.h"
62#endif
Jack Jansen72781191995-08-07 14:34:15 +000063
64static PyObject *Error;
65static PyObject *Incomplete;
66
67/*
68** hqx lookup table, ascii->binary.
69*/
70
71#define RUNCHAR 0x90
72
73#define DONE 0x7F
74#define SKIP 0x7E
75#define FAIL 0x7D
76
Serhiy Storchaka2d06e842015-12-25 19:53:18 +020077static const unsigned char table_a2b_hqx[256] = {
Jack Jansen72781191995-08-07 14:34:15 +000078/* ^@ ^A ^B ^C ^D ^E ^F ^G */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000079/* 0*/ FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
Jack Jansen72781191995-08-07 14:34:15 +000080/* \b \t \n ^K ^L \r ^N ^O */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000081/* 1*/ FAIL, FAIL, SKIP, FAIL, FAIL, SKIP, FAIL, FAIL,
Jack Jansen72781191995-08-07 14:34:15 +000082/* ^P ^Q ^R ^S ^T ^U ^V ^W */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000083/* 2*/ FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
Jack Jansen72781191995-08-07 14:34:15 +000084/* ^X ^Y ^Z ^[ ^\ ^] ^^ ^_ */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000085/* 3*/ FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
Jack Jansen72781191995-08-07 14:34:15 +000086/* ! " # $ % & ' */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000087/* 4*/ FAIL, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
Jack Jansen72781191995-08-07 14:34:15 +000088/* ( ) * + , - . / */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000089/* 5*/ 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, FAIL, FAIL,
Jack Jansen72781191995-08-07 14:34:15 +000090/* 0 1 2 3 4 5 6 7 */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000091/* 6*/ 0x0D, 0x0E, 0x0F, 0x10, 0x11, 0x12, 0x13, FAIL,
Jack Jansen72781191995-08-07 14:34:15 +000092/* 8 9 : ; < = > ? */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000093/* 7*/ 0x14, 0x15, DONE, FAIL, FAIL, FAIL, FAIL, FAIL,
Jack Jansen72781191995-08-07 14:34:15 +000094/* @ A B C D E F G */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000095/* 8*/ 0x16, 0x17, 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D,
Jack Jansen72781191995-08-07 14:34:15 +000096/* H I J K L M N O */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000097/* 9*/ 0x1E, 0x1F, 0x20, 0x21, 0x22, 0x23, 0x24, FAIL,
Jack Jansen72781191995-08-07 14:34:15 +000098/* P Q R S T U V W */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000099/*10*/ 0x25, 0x26, 0x27, 0x28, 0x29, 0x2A, 0x2B, FAIL,
Jack Jansen72781191995-08-07 14:34:15 +0000100/* X Y Z [ \ ] ^ _ */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000101/*11*/ 0x2C, 0x2D, 0x2E, 0x2F, FAIL, FAIL, FAIL, FAIL,
Jack Jansen72781191995-08-07 14:34:15 +0000102/* ` a b c d e f g */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000103/*12*/ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, FAIL,
Jack Jansen72781191995-08-07 14:34:15 +0000104/* h i j k l m n o */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000105/*13*/ 0x37, 0x38, 0x39, 0x3A, 0x3B, 0x3C, FAIL, FAIL,
Jack Jansen72781191995-08-07 14:34:15 +0000106/* p q r s t u v w */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000107/*14*/ 0x3D, 0x3E, 0x3F, FAIL, FAIL, FAIL, FAIL, FAIL,
Jack Jansen72781191995-08-07 14:34:15 +0000108/* x y z { | } ~ ^? */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000109/*15*/ FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
110/*16*/ FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
111 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
112 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
113 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
114 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
115 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
116 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
117 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
118 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
119 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
120 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
121 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
122 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
123 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
124 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
125 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
Jack Jansen72781191995-08-07 14:34:15 +0000126};
127
Serhiy Storchaka2d06e842015-12-25 19:53:18 +0200128static const unsigned char table_b2a_hqx[] =
Roger E. Masse5f4ce181997-01-16 17:10:22 +0000129"!\"#$%&'()*+,-012345689@ABCDEFGHIJKLMNPQRSTUVXYZ[`abcdefhijklmpqr";
Jack Jansen72781191995-08-07 14:34:15 +0000130
Serhiy Storchaka2d06e842015-12-25 19:53:18 +0200131static const char table_a2b_base64[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000132 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
133 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
134 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,62, -1,-1,-1,63,
135 52,53,54,55, 56,57,58,59, 60,61,-1,-1, -1, 0,-1,-1, /* Note PAD->0 */
136 -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10, 11,12,13,14,
137 15,16,17,18, 19,20,21,22, 23,24,25,-1, -1,-1,-1,-1,
138 -1,26,27,28, 29,30,31,32, 33,34,35,36, 37,38,39,40,
139 41,42,43,44, 45,46,47,48, 49,50,51,-1, -1,-1,-1,-1
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000140};
141
142#define BASE64_PAD '='
Guido van Rossum355bc0c2001-10-30 03:00:52 +0000143
144/* Max binary chunk size; limited only by available memory */
Guido van Rossum0e225aa2007-05-22 20:24:57 +0000145#define BASE64_MAXBIN ((PY_SSIZE_T_MAX - 3) / 2)
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000146
Serhiy Storchaka2d06e842015-12-25 19:53:18 +0200147static const unsigned char table_b2a_base64[] =
Roger E. Masse5f4ce181997-01-16 17:10:22 +0000148"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000149
150
151
Serhiy Storchaka2d06e842015-12-25 19:53:18 +0200152static const unsigned short crctab_hqx[256] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000153 0x0000, 0x1021, 0x2042, 0x3063, 0x4084, 0x50a5, 0x60c6, 0x70e7,
154 0x8108, 0x9129, 0xa14a, 0xb16b, 0xc18c, 0xd1ad, 0xe1ce, 0xf1ef,
155 0x1231, 0x0210, 0x3273, 0x2252, 0x52b5, 0x4294, 0x72f7, 0x62d6,
156 0x9339, 0x8318, 0xb37b, 0xa35a, 0xd3bd, 0xc39c, 0xf3ff, 0xe3de,
157 0x2462, 0x3443, 0x0420, 0x1401, 0x64e6, 0x74c7, 0x44a4, 0x5485,
158 0xa56a, 0xb54b, 0x8528, 0x9509, 0xe5ee, 0xf5cf, 0xc5ac, 0xd58d,
159 0x3653, 0x2672, 0x1611, 0x0630, 0x76d7, 0x66f6, 0x5695, 0x46b4,
160 0xb75b, 0xa77a, 0x9719, 0x8738, 0xf7df, 0xe7fe, 0xd79d, 0xc7bc,
161 0x48c4, 0x58e5, 0x6886, 0x78a7, 0x0840, 0x1861, 0x2802, 0x3823,
162 0xc9cc, 0xd9ed, 0xe98e, 0xf9af, 0x8948, 0x9969, 0xa90a, 0xb92b,
163 0x5af5, 0x4ad4, 0x7ab7, 0x6a96, 0x1a71, 0x0a50, 0x3a33, 0x2a12,
164 0xdbfd, 0xcbdc, 0xfbbf, 0xeb9e, 0x9b79, 0x8b58, 0xbb3b, 0xab1a,
165 0x6ca6, 0x7c87, 0x4ce4, 0x5cc5, 0x2c22, 0x3c03, 0x0c60, 0x1c41,
166 0xedae, 0xfd8f, 0xcdec, 0xddcd, 0xad2a, 0xbd0b, 0x8d68, 0x9d49,
167 0x7e97, 0x6eb6, 0x5ed5, 0x4ef4, 0x3e13, 0x2e32, 0x1e51, 0x0e70,
168 0xff9f, 0xefbe, 0xdfdd, 0xcffc, 0xbf1b, 0xaf3a, 0x9f59, 0x8f78,
169 0x9188, 0x81a9, 0xb1ca, 0xa1eb, 0xd10c, 0xc12d, 0xf14e, 0xe16f,
170 0x1080, 0x00a1, 0x30c2, 0x20e3, 0x5004, 0x4025, 0x7046, 0x6067,
171 0x83b9, 0x9398, 0xa3fb, 0xb3da, 0xc33d, 0xd31c, 0xe37f, 0xf35e,
172 0x02b1, 0x1290, 0x22f3, 0x32d2, 0x4235, 0x5214, 0x6277, 0x7256,
173 0xb5ea, 0xa5cb, 0x95a8, 0x8589, 0xf56e, 0xe54f, 0xd52c, 0xc50d,
174 0x34e2, 0x24c3, 0x14a0, 0x0481, 0x7466, 0x6447, 0x5424, 0x4405,
175 0xa7db, 0xb7fa, 0x8799, 0x97b8, 0xe75f, 0xf77e, 0xc71d, 0xd73c,
176 0x26d3, 0x36f2, 0x0691, 0x16b0, 0x6657, 0x7676, 0x4615, 0x5634,
177 0xd94c, 0xc96d, 0xf90e, 0xe92f, 0x99c8, 0x89e9, 0xb98a, 0xa9ab,
178 0x5844, 0x4865, 0x7806, 0x6827, 0x18c0, 0x08e1, 0x3882, 0x28a3,
179 0xcb7d, 0xdb5c, 0xeb3f, 0xfb1e, 0x8bf9, 0x9bd8, 0xabbb, 0xbb9a,
180 0x4a75, 0x5a54, 0x6a37, 0x7a16, 0x0af1, 0x1ad0, 0x2ab3, 0x3a92,
181 0xfd2e, 0xed0f, 0xdd6c, 0xcd4d, 0xbdaa, 0xad8b, 0x9de8, 0x8dc9,
182 0x7c26, 0x6c07, 0x5c64, 0x4c45, 0x3ca2, 0x2c83, 0x1ce0, 0x0cc1,
183 0xef1f, 0xff3e, 0xcf5d, 0xdf7c, 0xaf9b, 0xbfba, 0x8fd9, 0x9ff8,
184 0x6e17, 0x7e36, 0x4e55, 0x5e74, 0x2e93, 0x3eb2, 0x0ed1, 0x1ef0,
Jack Jansen72781191995-08-07 14:34:15 +0000185};
186
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +0200187/*[clinic input]
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +0200188module binascii
189[clinic start generated code]*/
Serhiy Storchaka1009bf12015-04-03 23:53:51 +0300190/*[clinic end generated code: output=da39a3ee5e6b4b0d input=de89fb46bcaf3fec]*/
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +0200191
192/*[python input]
193
194class ascii_buffer_converter(CConverter):
195 type = 'Py_buffer'
196 converter = 'ascii_buffer_converter'
197 impl_by_reference = True
Benjamin Petersonb62deac2014-01-26 10:41:58 -0500198 c_default = "{NULL, NULL}"
199
200 def cleanup(self):
201 name = self.name
202 return "".join(["if (", name, ".obj)\n PyBuffer_Release(&", name, ");\n"])
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +0200203
204[python start generated code]*/
Larry Hastings581ee362014-01-28 05:00:08 -0800205/*[python end generated code: output=da39a3ee5e6b4b0d input=3eb7b63610da92cd]*/
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +0200206
Antoine Pitrou08316762011-12-20 13:58:41 +0100207static int
208ascii_buffer_converter(PyObject *arg, Py_buffer *buf)
209{
210 if (arg == NULL) {
211 PyBuffer_Release(buf);
212 return 1;
213 }
214 if (PyUnicode_Check(arg)) {
215 if (PyUnicode_READY(arg) < 0)
216 return 0;
217 if (!PyUnicode_IS_ASCII(arg)) {
218 PyErr_SetString(PyExc_ValueError,
219 "string argument should contain only ASCII characters");
220 return 0;
221 }
222 assert(PyUnicode_KIND(arg) == PyUnicode_1BYTE_KIND);
223 buf->buf = (void *) PyUnicode_1BYTE_DATA(arg);
224 buf->len = PyUnicode_GET_LENGTH(arg);
225 buf->obj = NULL;
226 return 1;
227 }
228 if (PyObject_GetBuffer(arg, buf, PyBUF_SIMPLE) != 0) {
229 PyErr_Format(PyExc_TypeError,
230 "argument should be bytes, buffer or ASCII string, "
Berker Peksag3cd30c22015-02-15 00:31:00 +0200231 "not '%.100s'", Py_TYPE(arg)->tp_name);
Antoine Pitrou08316762011-12-20 13:58:41 +0100232 return 0;
233 }
234 if (!PyBuffer_IsContiguous(buf, 'C')) {
235 PyErr_Format(PyExc_TypeError,
236 "argument should be a contiguous buffer, "
Berker Peksag3cd30c22015-02-15 00:31:00 +0200237 "not '%.100s'", Py_TYPE(arg)->tp_name);
Antoine Pitrou08316762011-12-20 13:58:41 +0100238 PyBuffer_Release(buf);
239 return 0;
240 }
241 return Py_CLEANUP_SUPPORTED;
242}
243
Larry Hastingsf256c222014-01-25 21:30:37 -0800244#include "clinic/binascii.c.h"
Antoine Pitrou08316762011-12-20 13:58:41 +0100245
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +0200246/*[clinic input]
247binascii.a2b_uu
248
Serhiy Storchaka12785612014-01-25 11:49:49 +0200249 data: ascii_buffer
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +0200250 /
251
252Decode a line of uuencoded data.
253[clinic start generated code]*/
Jack Jansen72781191995-08-07 14:34:15 +0000254
255static PyObject *
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +0300256binascii_a2b_uu_impl(PyObject *module, Py_buffer *data)
257/*[clinic end generated code: output=e027f8e0b0598742 input=7cafeaf73df63d1c]*/
Jack Jansen72781191995-08-07 14:34:15 +0000258{
Serhiy Storchakaef1585e2015-12-25 20:01:53 +0200259 const unsigned char *ascii_data;
260 unsigned char *bin_data;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000261 int leftbits = 0;
262 unsigned char this_ch;
263 unsigned int leftchar = 0;
264 PyObject *rv;
265 Py_ssize_t ascii_len, bin_len;
Tim Peters934c1a12002-07-02 22:24:50 +0000266
Serhiy Storchaka12785612014-01-25 11:49:49 +0200267 ascii_data = data->buf;
268 ascii_len = data->len;
Jack Jansen72781191995-08-07 14:34:15 +0000269
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000270 assert(ascii_len >= 0);
Amaury Forgeot d'Arc9c74b142008-06-18 00:47:36 +0000271
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000272 /* First byte: binary data length (in bytes) */
273 bin_len = (*ascii_data++ - ' ') & 077;
274 ascii_len--;
Jack Jansen72781191995-08-07 14:34:15 +0000275
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000276 /* Allocate the buffer */
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +0200277 if ( (rv=PyBytes_FromStringAndSize(NULL, bin_len)) == NULL )
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000278 return NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000279 bin_data = (unsigned char *)PyBytes_AS_STRING(rv);
Tim Peters934c1a12002-07-02 22:24:50 +0000280
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000281 for( ; bin_len > 0 ; ascii_len--, ascii_data++ ) {
282 /* XXX is it really best to add NULs if there's no more data */
283 this_ch = (ascii_len > 0) ? *ascii_data : 0;
284 if ( this_ch == '\n' || this_ch == '\r' || ascii_len <= 0) {
285 /*
286 ** Whitespace. Assume some spaces got eaten at
287 ** end-of-line. (We check this later)
288 */
289 this_ch = 0;
290 } else {
291 /* Check the character for legality
292 ** The 64 in stead of the expected 63 is because
293 ** there are a few uuencodes out there that use
294 ** '`' as zero instead of space.
295 */
296 if ( this_ch < ' ' || this_ch > (' ' + 64)) {
297 PyErr_SetString(Error, "Illegal char");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000298 Py_DECREF(rv);
299 return NULL;
300 }
301 this_ch = (this_ch - ' ') & 077;
302 }
303 /*
304 ** Shift it in on the low end, and see if there's
305 ** a byte ready for output.
306 */
307 leftchar = (leftchar << 6) | (this_ch);
308 leftbits += 6;
309 if ( leftbits >= 8 ) {
310 leftbits -= 8;
311 *bin_data++ = (leftchar >> leftbits) & 0xff;
312 leftchar &= ((1 << leftbits) - 1);
313 bin_len--;
314 }
315 }
316 /*
317 ** Finally, check that if there's anything left on the line
318 ** that it's whitespace only.
319 */
320 while( ascii_len-- > 0 ) {
321 this_ch = *ascii_data++;
322 /* Extra '`' may be written as padding in some cases */
323 if ( this_ch != ' ' && this_ch != ' '+64 &&
324 this_ch != '\n' && this_ch != '\r' ) {
325 PyErr_SetString(Error, "Trailing garbage");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000326 Py_DECREF(rv);
327 return NULL;
328 }
329 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000330 return rv;
Jack Jansen72781191995-08-07 14:34:15 +0000331}
332
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +0200333/*[clinic input]
334binascii.b2a_uu
335
336 data: Py_buffer
337 /
338
339Uuencode line of data.
340[clinic start generated code]*/
Tim Peters934c1a12002-07-02 22:24:50 +0000341
Jack Jansen72781191995-08-07 14:34:15 +0000342static PyObject *
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +0300343binascii_b2a_uu_impl(PyObject *module, Py_buffer *data)
344/*[clinic end generated code: output=0070670e52e4aa6b input=00fdf458ce8b465b]*/
Jack Jansen72781191995-08-07 14:34:15 +0000345{
Serhiy Storchakaef1585e2015-12-25 20:01:53 +0200346 unsigned char *ascii_data;
347 const unsigned char *bin_data;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000348 int leftbits = 0;
349 unsigned char this_ch;
350 unsigned int leftchar = 0;
Victor Stinnereaaaf132015-10-13 10:51:47 +0200351 Py_ssize_t bin_len, out_len;
352 _PyBytesWriter writer;
Tim Peters934c1a12002-07-02 22:24:50 +0000353
Victor Stinnereaaaf132015-10-13 10:51:47 +0200354 _PyBytesWriter_Init(&writer);
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +0200355 bin_data = data->buf;
356 bin_len = data->len;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000357 if ( bin_len > 45 ) {
358 /* The 45 is a limit that appears in all uuencode's */
359 PyErr_SetString(Error, "At most 45 bytes at once");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000360 return NULL;
361 }
Jack Jansen72781191995-08-07 14:34:15 +0000362
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000363 /* We're lazy and allocate to much (fixed up later) */
Victor Stinnereaaaf132015-10-13 10:51:47 +0200364 out_len = 2 + (bin_len + 2) / 3 * 4;
365 ascii_data = _PyBytesWriter_Alloc(&writer, out_len);
366 if (ascii_data == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000367 return NULL;
Jack Jansen72781191995-08-07 14:34:15 +0000368
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000369 /* Store the length */
370 *ascii_data++ = ' ' + (bin_len & 077);
Tim Peters934c1a12002-07-02 22:24:50 +0000371
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000372 for( ; bin_len > 0 || leftbits != 0 ; bin_len--, bin_data++ ) {
373 /* Shift the data (or padding) into our buffer */
374 if ( bin_len > 0 ) /* Data */
375 leftchar = (leftchar << 8) | *bin_data;
376 else /* Padding */
377 leftchar <<= 8;
378 leftbits += 8;
Jack Jansen72781191995-08-07 14:34:15 +0000379
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000380 /* See if there are 6-bit groups ready */
381 while ( leftbits >= 6 ) {
382 this_ch = (leftchar >> (leftbits-6)) & 0x3f;
383 leftbits -= 6;
384 *ascii_data++ = this_ch + ' ';
385 }
386 }
387 *ascii_data++ = '\n'; /* Append a courtesy newline */
Tim Peters934c1a12002-07-02 22:24:50 +0000388
Victor Stinnereaaaf132015-10-13 10:51:47 +0200389 return _PyBytesWriter_Finish(&writer, ascii_data);
Jack Jansen72781191995-08-07 14:34:15 +0000390}
391
Guido van Rossum2db4f471999-10-19 19:05:14 +0000392
393static int
Serhiy Storchakaef1585e2015-12-25 20:01:53 +0200394binascii_find_valid(const unsigned char *s, Py_ssize_t slen, int num)
Guido van Rossum2db4f471999-10-19 19:05:14 +0000395{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000396 /* Finds & returns the (num+1)th
397 ** valid character for base64, or -1 if none.
398 */
Guido van Rossum2db4f471999-10-19 19:05:14 +0000399
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000400 int ret = -1;
401 unsigned char c, b64val;
Guido van Rossum2db4f471999-10-19 19:05:14 +0000402
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000403 while ((slen > 0) && (ret == -1)) {
404 c = *s;
405 b64val = table_a2b_base64[c & 0x7f];
406 if ( ((c <= 0x7f) && (b64val != (unsigned char)-1)) ) {
407 if (num == 0)
408 ret = *s;
409 num--;
410 }
Guido van Rossum2db4f471999-10-19 19:05:14 +0000411
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000412 s++;
413 slen--;
414 }
415 return ret;
Guido van Rossum2db4f471999-10-19 19:05:14 +0000416}
417
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +0200418/*[clinic input]
419binascii.a2b_base64
420
Serhiy Storchaka12785612014-01-25 11:49:49 +0200421 data: ascii_buffer
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +0200422 /
423
424Decode a line of base64 data.
425[clinic start generated code]*/
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000426
427static PyObject *
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +0300428binascii_a2b_base64_impl(PyObject *module, Py_buffer *data)
429/*[clinic end generated code: output=0628223f19fd3f9b input=5872acf6e1cac243]*/
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000430{
Serhiy Storchakaef1585e2015-12-25 20:01:53 +0200431 const unsigned char *ascii_data;
432 unsigned char *bin_data;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000433 int leftbits = 0;
434 unsigned char this_ch;
435 unsigned int leftchar = 0;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000436 Py_ssize_t ascii_len, bin_len;
437 int quad_pos = 0;
Victor Stinnereaaaf132015-10-13 10:51:47 +0200438 _PyBytesWriter writer;
Tim Peters934c1a12002-07-02 22:24:50 +0000439
Serhiy Storchaka12785612014-01-25 11:49:49 +0200440 ascii_data = data->buf;
441 ascii_len = data->len;
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000442
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000443 assert(ascii_len >= 0);
Amaury Forgeot d'Arc9c74b142008-06-18 00:47:36 +0000444
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +0200445 if (ascii_len > PY_SSIZE_T_MAX - 3)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000446 return PyErr_NoMemory();
Amaury Forgeot d'Arc9c74b142008-06-18 00:47:36 +0000447
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000448 bin_len = ((ascii_len+3)/4)*3; /* Upper bound, corrected later */
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000449
Victor Stinnereaaaf132015-10-13 10:51:47 +0200450 _PyBytesWriter_Init(&writer);
451
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000452 /* Allocate the buffer */
Victor Stinnereaaaf132015-10-13 10:51:47 +0200453 bin_data = _PyBytesWriter_Alloc(&writer, bin_len);
454 if (bin_data == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000455 return NULL;
Guido van Rossum2db4f471999-10-19 19:05:14 +0000456
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000457 for( ; ascii_len > 0; ascii_len--, ascii_data++) {
458 this_ch = *ascii_data;
Guido van Rossum2db4f471999-10-19 19:05:14 +0000459
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000460 if (this_ch > 0x7f ||
461 this_ch == '\r' || this_ch == '\n' || this_ch == ' ')
462 continue;
Guido van Rossum2db4f471999-10-19 19:05:14 +0000463
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000464 /* Check for pad sequences and ignore
465 ** the invalid ones.
466 */
467 if (this_ch == BASE64_PAD) {
468 if ( (quad_pos < 2) ||
469 ((quad_pos == 2) &&
470 (binascii_find_valid(ascii_data, ascii_len, 1)
471 != BASE64_PAD)) )
472 {
473 continue;
474 }
475 else {
476 /* A pad sequence means no more input.
477 ** We've already interpreted the data
478 ** from the quad at this point.
479 */
480 leftbits = 0;
481 break;
482 }
483 }
Guido van Rossum2db4f471999-10-19 19:05:14 +0000484
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000485 this_ch = table_a2b_base64[*ascii_data];
486 if ( this_ch == (unsigned char) -1 )
487 continue;
Guido van Rossum2db4f471999-10-19 19:05:14 +0000488
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000489 /*
490 ** Shift it in on the low end, and see if there's
491 ** a byte ready for output.
492 */
493 quad_pos = (quad_pos + 1) & 0x03;
494 leftchar = (leftchar << 6) | (this_ch);
495 leftbits += 6;
Guido van Rossum2db4f471999-10-19 19:05:14 +0000496
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000497 if ( leftbits >= 8 ) {
498 leftbits -= 8;
499 *bin_data++ = (leftchar >> leftbits) & 0xff;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000500 leftchar &= ((1 << leftbits) - 1);
501 }
502 }
Guido van Rossum2db4f471999-10-19 19:05:14 +0000503
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000504 if (leftbits != 0) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000505 PyErr_SetString(Error, "Incorrect padding");
Victor Stinnereaaaf132015-10-13 10:51:47 +0200506 _PyBytesWriter_Dealloc(&writer);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000507 return NULL;
508 }
Guido van Rossum2db4f471999-10-19 19:05:14 +0000509
Victor Stinnereaaaf132015-10-13 10:51:47 +0200510 return _PyBytesWriter_Finish(&writer, bin_data);
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000511}
512
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +0200513
514/*[clinic input]
515binascii.b2a_base64
516
517 data: Py_buffer
Xiang Zhang1374dbb2017-05-01 13:12:07 +0800518 /
Victor Stinnere84c9762015-10-11 11:01:02 +0200519 *
Serhiy Storchaka202fda52017-03-12 10:10:47 +0200520 newline: bool(accept={int}) = True
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +0200521
522Base64-code line of data.
523[clinic start generated code]*/
Tim Peters934c1a12002-07-02 22:24:50 +0000524
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000525static PyObject *
Serhiy Storchaka2954f832016-07-07 18:20:03 +0300526binascii_b2a_base64_impl(PyObject *module, Py_buffer *data, int newline)
Xiang Zhang1374dbb2017-05-01 13:12:07 +0800527/*[clinic end generated code: output=4ad62c8e8485d3b3 input=6083dac5777fa45d]*/
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000528{
Serhiy Storchakaef1585e2015-12-25 20:01:53 +0200529 unsigned char *ascii_data;
530 const unsigned char *bin_data;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000531 int leftbits = 0;
532 unsigned char this_ch;
533 unsigned int leftchar = 0;
Victor Stinnere84c9762015-10-11 11:01:02 +0200534 Py_ssize_t bin_len, out_len;
Victor Stinnereaaaf132015-10-13 10:51:47 +0200535 _PyBytesWriter writer;
Tim Peters934c1a12002-07-02 22:24:50 +0000536
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +0200537 bin_data = data->buf;
538 bin_len = data->len;
Victor Stinnereaaaf132015-10-13 10:51:47 +0200539 _PyBytesWriter_Init(&writer);
Amaury Forgeot d'Arc9c74b142008-06-18 00:47:36 +0000540
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000541 assert(bin_len >= 0);
Amaury Forgeot d'Arc9c74b142008-06-18 00:47:36 +0000542
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000543 if ( bin_len > BASE64_MAXBIN ) {
544 PyErr_SetString(Error, "Too much data for base64 line");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000545 return NULL;
546 }
Tim Peters934c1a12002-07-02 22:24:50 +0000547
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000548 /* We're lazy and allocate too much (fixed up later).
Victor Stinnere84c9762015-10-11 11:01:02 +0200549 "+2" leaves room for up to two pad characters.
550 Note that 'b' gets encoded as 'Yg==\n' (1 in, 5 out). */
551 out_len = bin_len*2 + 2;
552 if (newline)
553 out_len++;
Victor Stinnereaaaf132015-10-13 10:51:47 +0200554 ascii_data = _PyBytesWriter_Alloc(&writer, out_len);
555 if (ascii_data == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000556 return NULL;
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000557
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000558 for( ; bin_len > 0 ; bin_len--, bin_data++ ) {
559 /* Shift the data into our buffer */
560 leftchar = (leftchar << 8) | *bin_data;
561 leftbits += 8;
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000562
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000563 /* See if there are 6-bit groups ready */
564 while ( leftbits >= 6 ) {
565 this_ch = (leftchar >> (leftbits-6)) & 0x3f;
566 leftbits -= 6;
567 *ascii_data++ = table_b2a_base64[this_ch];
568 }
569 }
570 if ( leftbits == 2 ) {
571 *ascii_data++ = table_b2a_base64[(leftchar&3) << 4];
572 *ascii_data++ = BASE64_PAD;
573 *ascii_data++ = BASE64_PAD;
574 } else if ( leftbits == 4 ) {
575 *ascii_data++ = table_b2a_base64[(leftchar&0xf) << 2];
576 *ascii_data++ = BASE64_PAD;
577 }
Victor Stinnere84c9762015-10-11 11:01:02 +0200578 if (newline)
579 *ascii_data++ = '\n'; /* Append a courtesy newline */
Tim Peters934c1a12002-07-02 22:24:50 +0000580
Victor Stinnereaaaf132015-10-13 10:51:47 +0200581 return _PyBytesWriter_Finish(&writer, ascii_data);
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000582}
583
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +0200584/*[clinic input]
585binascii.a2b_hqx
586
Serhiy Storchaka12785612014-01-25 11:49:49 +0200587 data: ascii_buffer
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +0200588 /
589
590Decode .hqx coding.
591[clinic start generated code]*/
Jack Jansen72781191995-08-07 14:34:15 +0000592
593static PyObject *
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +0300594binascii_a2b_hqx_impl(PyObject *module, Py_buffer *data)
595/*[clinic end generated code: output=4d6d8c54d54ea1c1 input=0d914c680e0eed55]*/
Jack Jansen72781191995-08-07 14:34:15 +0000596{
Serhiy Storchakaef1585e2015-12-25 20:01:53 +0200597 const unsigned char *ascii_data;
598 unsigned char *bin_data;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000599 int leftbits = 0;
600 unsigned char this_ch;
601 unsigned int leftchar = 0;
Victor Stinnereaaaf132015-10-13 10:51:47 +0200602 PyObject *res;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000603 Py_ssize_t len;
604 int done = 0;
Victor Stinnereaaaf132015-10-13 10:51:47 +0200605 _PyBytesWriter writer;
Tim Peters934c1a12002-07-02 22:24:50 +0000606
Serhiy Storchaka12785612014-01-25 11:49:49 +0200607 ascii_data = data->buf;
608 len = data->len;
Victor Stinnereaaaf132015-10-13 10:51:47 +0200609 _PyBytesWriter_Init(&writer);
Jack Jansen72781191995-08-07 14:34:15 +0000610
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000611 assert(len >= 0);
Amaury Forgeot d'Arc9c74b142008-06-18 00:47:36 +0000612
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +0200613 if (len > PY_SSIZE_T_MAX - 2)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000614 return PyErr_NoMemory();
Amaury Forgeot d'Arc9c74b142008-06-18 00:47:36 +0000615
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000616 /* Allocate a string that is too big (fixed later)
617 Add two to the initial length to prevent interning which
618 would preclude subsequent resizing. */
Victor Stinnereaaaf132015-10-13 10:51:47 +0200619 bin_data = _PyBytesWriter_Alloc(&writer, len + 2);
620 if (bin_data == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000621 return NULL;
Jack Jansen72781191995-08-07 14:34:15 +0000622
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000623 for( ; len > 0 ; len--, ascii_data++ ) {
624 /* Get the byte and look it up */
625 this_ch = table_a2b_hqx[*ascii_data];
626 if ( this_ch == SKIP )
627 continue;
628 if ( this_ch == FAIL ) {
629 PyErr_SetString(Error, "Illegal char");
Victor Stinnereaaaf132015-10-13 10:51:47 +0200630 _PyBytesWriter_Dealloc(&writer);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000631 return NULL;
632 }
633 if ( this_ch == DONE ) {
634 /* The terminating colon */
635 done = 1;
636 break;
637 }
Jack Jansen72781191995-08-07 14:34:15 +0000638
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000639 /* Shift it into the buffer and see if any bytes are ready */
640 leftchar = (leftchar << 6) | (this_ch);
641 leftbits += 6;
642 if ( leftbits >= 8 ) {
643 leftbits -= 8;
644 *bin_data++ = (leftchar >> leftbits) & 0xff;
645 leftchar &= ((1 << leftbits) - 1);
646 }
647 }
Tim Peters934c1a12002-07-02 22:24:50 +0000648
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000649 if ( leftbits && !done ) {
650 PyErr_SetString(Incomplete,
651 "String has incomplete number of bytes");
Victor Stinnereaaaf132015-10-13 10:51:47 +0200652 _PyBytesWriter_Dealloc(&writer);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000653 return NULL;
654 }
Roger E. Masse5f4ce181997-01-16 17:10:22 +0000655
Victor Stinnereaaaf132015-10-13 10:51:47 +0200656 res = _PyBytesWriter_Finish(&writer, bin_data);
657 if (res == NULL)
658 return NULL;
659 return Py_BuildValue("Ni", res, done);
Jack Jansen72781191995-08-07 14:34:15 +0000660}
661
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +0200662
663/*[clinic input]
664binascii.rlecode_hqx
665
666 data: Py_buffer
667 /
668
669Binhex RLE-code binary data.
670[clinic start generated code]*/
Jack Jansen72781191995-08-07 14:34:15 +0000671
672static PyObject *
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +0300673binascii_rlecode_hqx_impl(PyObject *module, Py_buffer *data)
674/*[clinic end generated code: output=393d79338f5f5629 input=e1f1712447a82b09]*/
Jack Jansen72781191995-08-07 14:34:15 +0000675{
Serhiy Storchakaef1585e2015-12-25 20:01:53 +0200676 const unsigned char *in_data;
677 unsigned char *out_data;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000678 unsigned char ch;
679 Py_ssize_t in, inend, len;
Victor Stinnereaaaf132015-10-13 10:51:47 +0200680 _PyBytesWriter writer;
Tim Peters934c1a12002-07-02 22:24:50 +0000681
Victor Stinnereaaaf132015-10-13 10:51:47 +0200682 _PyBytesWriter_Init(&writer);
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +0200683 in_data = data->buf;
684 len = data->len;
Jack Jansen72781191995-08-07 14:34:15 +0000685
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000686 assert(len >= 0);
Amaury Forgeot d'Arc9c74b142008-06-18 00:47:36 +0000687
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +0200688 if (len > PY_SSIZE_T_MAX / 2 - 2)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000689 return PyErr_NoMemory();
Amaury Forgeot d'Arc9c74b142008-06-18 00:47:36 +0000690
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000691 /* Worst case: output is twice as big as input (fixed later) */
Victor Stinnereaaaf132015-10-13 10:51:47 +0200692 out_data = _PyBytesWriter_Alloc(&writer, len * 2 + 2);
693 if (out_data == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000694 return NULL;
Tim Peters934c1a12002-07-02 22:24:50 +0000695
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000696 for( in=0; in<len; in++) {
697 ch = in_data[in];
698 if ( ch == RUNCHAR ) {
699 /* RUNCHAR. Escape it. */
700 *out_data++ = RUNCHAR;
701 *out_data++ = 0;
702 } else {
703 /* Check how many following are the same */
704 for(inend=in+1;
705 inend<len && in_data[inend] == ch &&
706 inend < in+255;
707 inend++) ;
708 if ( inend - in > 3 ) {
709 /* More than 3 in a row. Output RLE. */
710 *out_data++ = ch;
711 *out_data++ = RUNCHAR;
Antoine Pitrou40455752010-08-15 18:51:10 +0000712 *out_data++ = (unsigned char) (inend-in);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000713 in = inend-1;
714 } else {
715 /* Less than 3. Output the byte itself */
716 *out_data++ = ch;
717 }
718 }
719 }
Victor Stinnereaaaf132015-10-13 10:51:47 +0200720
721 return _PyBytesWriter_Finish(&writer, out_data);
Jack Jansen72781191995-08-07 14:34:15 +0000722}
723
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +0200724
725/*[clinic input]
726binascii.b2a_hqx
727
728 data: Py_buffer
729 /
730
731Encode .hqx data.
732[clinic start generated code]*/
Tim Peters934c1a12002-07-02 22:24:50 +0000733
Jack Jansen72781191995-08-07 14:34:15 +0000734static PyObject *
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +0300735binascii_b2a_hqx_impl(PyObject *module, Py_buffer *data)
736/*[clinic end generated code: output=d0aa5a704bc9f7de input=9596ebe019fe12ba]*/
Jack Jansen72781191995-08-07 14:34:15 +0000737{
Serhiy Storchakaef1585e2015-12-25 20:01:53 +0200738 unsigned char *ascii_data;
739 const unsigned char *bin_data;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000740 int leftbits = 0;
741 unsigned char this_ch;
742 unsigned int leftchar = 0;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000743 Py_ssize_t len;
Victor Stinnereaaaf132015-10-13 10:51:47 +0200744 _PyBytesWriter writer;
Tim Peters934c1a12002-07-02 22:24:50 +0000745
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +0200746 bin_data = data->buf;
747 len = data->len;
Victor Stinnereaaaf132015-10-13 10:51:47 +0200748 _PyBytesWriter_Init(&writer);
Jack Jansen72781191995-08-07 14:34:15 +0000749
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000750 assert(len >= 0);
Amaury Forgeot d'Arc9c74b142008-06-18 00:47:36 +0000751
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +0200752 if (len > PY_SSIZE_T_MAX / 2 - 2)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000753 return PyErr_NoMemory();
Amaury Forgeot d'Arc9c74b142008-06-18 00:47:36 +0000754
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000755 /* Allocate a buffer that is at least large enough */
Victor Stinnereaaaf132015-10-13 10:51:47 +0200756 ascii_data = _PyBytesWriter_Alloc(&writer, len * 2 + 2);
757 if (ascii_data == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000758 return NULL;
Tim Peters934c1a12002-07-02 22:24:50 +0000759
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000760 for( ; len > 0 ; len--, bin_data++ ) {
761 /* Shift into our buffer, and output any 6bits ready */
762 leftchar = (leftchar << 8) | *bin_data;
763 leftbits += 8;
764 while ( leftbits >= 6 ) {
765 this_ch = (leftchar >> (leftbits-6)) & 0x3f;
766 leftbits -= 6;
767 *ascii_data++ = table_b2a_hqx[this_ch];
768 }
769 }
770 /* Output a possible runt byte */
771 if ( leftbits ) {
772 leftchar <<= (6-leftbits);
773 *ascii_data++ = table_b2a_hqx[leftchar & 0x3f];
774 }
Victor Stinnereaaaf132015-10-13 10:51:47 +0200775
776 return _PyBytesWriter_Finish(&writer, ascii_data);
Jack Jansen72781191995-08-07 14:34:15 +0000777}
778
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +0200779
780/*[clinic input]
781binascii.rledecode_hqx
782
783 data: Py_buffer
784 /
785
786Decode hexbin RLE-coded string.
787[clinic start generated code]*/
Tim Peters934c1a12002-07-02 22:24:50 +0000788
Jack Jansen72781191995-08-07 14:34:15 +0000789static PyObject *
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +0300790binascii_rledecode_hqx_impl(PyObject *module, Py_buffer *data)
791/*[clinic end generated code: output=9826619565de1c6c input=54cdd49fc014402c]*/
Jack Jansen72781191995-08-07 14:34:15 +0000792{
Serhiy Storchakaef1585e2015-12-25 20:01:53 +0200793 const unsigned char *in_data;
794 unsigned char *out_data;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000795 unsigned char in_byte, in_repeat;
Victor Stinnerf9c9a3f2015-10-14 15:20:07 +0200796 Py_ssize_t in_len;
Victor Stinnereaaaf132015-10-13 10:51:47 +0200797 _PyBytesWriter writer;
Jack Jansen72781191995-08-07 14:34:15 +0000798
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +0200799 in_data = data->buf;
800 in_len = data->len;
Victor Stinnereaaaf132015-10-13 10:51:47 +0200801 _PyBytesWriter_Init(&writer);
Jack Jansen72781191995-08-07 14:34:15 +0000802
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000803 assert(in_len >= 0);
Amaury Forgeot d'Arc9c74b142008-06-18 00:47:36 +0000804
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000805 /* Empty string is a special case */
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +0200806 if ( in_len == 0 )
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000807 return PyBytes_FromStringAndSize("", 0);
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +0200808 else if (in_len > PY_SSIZE_T_MAX / 2)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000809 return PyErr_NoMemory();
Jack Jansen72781191995-08-07 14:34:15 +0000810
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000811 /* Allocate a buffer of reasonable size. Resized when needed */
Victor Stinnerf9c9a3f2015-10-14 15:20:07 +0200812 out_data = _PyBytesWriter_Alloc(&writer, in_len);
Victor Stinnereaaaf132015-10-13 10:51:47 +0200813 if (out_data == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000814 return NULL;
Victor Stinnereaaaf132015-10-13 10:51:47 +0200815
816 /* Use overallocation */
817 writer.overallocate = 1;
Jack Jansen72781191995-08-07 14:34:15 +0000818
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000819 /*
820 ** We need two macros here to get/put bytes and handle
821 ** end-of-buffer for input and output strings.
822 */
Victor Stinnereaaaf132015-10-13 10:51:47 +0200823#define INBYTE(b) \
824 do { \
825 if ( --in_len < 0 ) { \
826 PyErr_SetString(Incomplete, ""); \
827 goto error; \
828 } \
829 b = *in_data++; \
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000830 } while(0)
Tim Peters934c1a12002-07-02 22:24:50 +0000831
Victor Stinnereaaaf132015-10-13 10:51:47 +0200832 /*
833 ** Handle first byte separately (since we have to get angry
834 ** in case of an orphaned RLE code).
835 */
836 INBYTE(in_byte);
Jack Jansen72781191995-08-07 14:34:15 +0000837
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000838 if (in_byte == RUNCHAR) {
839 INBYTE(in_repeat);
Victor Stinnerf9c9a3f2015-10-14 15:20:07 +0200840 /* only 1 byte will be written, but 2 bytes were preallocated:
Raymond Hettinger15f44ab2016-08-30 10:47:49 -0700841 subtract 1 byte to prevent overallocation */
Victor Stinnerf9c9a3f2015-10-14 15:20:07 +0200842 writer.min_size--;
843
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000844 if (in_repeat != 0) {
845 /* Note Error, not Incomplete (which is at the end
846 ** of the string only). This is a programmer error.
847 */
848 PyErr_SetString(Error, "Orphaned RLE code at start");
Victor Stinnereaaaf132015-10-13 10:51:47 +0200849 goto error;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000850 }
Victor Stinnerf9c9a3f2015-10-14 15:20:07 +0200851 *out_data++ = RUNCHAR;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000852 } else {
Victor Stinnerf9c9a3f2015-10-14 15:20:07 +0200853 *out_data++ = in_byte;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000854 }
Tim Peters934c1a12002-07-02 22:24:50 +0000855
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000856 while( in_len > 0 ) {
857 INBYTE(in_byte);
Jack Jansen72781191995-08-07 14:34:15 +0000858
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000859 if (in_byte == RUNCHAR) {
860 INBYTE(in_repeat);
Victor Stinnerf9c9a3f2015-10-14 15:20:07 +0200861 /* only 1 byte will be written, but 2 bytes were preallocated:
Raymond Hettinger15f44ab2016-08-30 10:47:49 -0700862 subtract 1 byte to prevent overallocation */
Victor Stinnerf9c9a3f2015-10-14 15:20:07 +0200863 writer.min_size--;
864
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000865 if ( in_repeat == 0 ) {
866 /* Just an escaped RUNCHAR value */
Victor Stinnerf9c9a3f2015-10-14 15:20:07 +0200867 *out_data++ = RUNCHAR;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000868 } else {
869 /* Pick up value and output a sequence of it */
870 in_byte = out_data[-1];
Victor Stinnerf9c9a3f2015-10-14 15:20:07 +0200871
872 /* enlarge the buffer if needed */
873 if (in_repeat > 1) {
874 /* -1 because we already preallocated 1 byte */
875 out_data = _PyBytesWriter_Prepare(&writer, out_data,
876 in_repeat - 1);
877 if (out_data == NULL)
878 goto error;
879 }
880
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000881 while ( --in_repeat > 0 )
Victor Stinnerf9c9a3f2015-10-14 15:20:07 +0200882 *out_data++ = in_byte;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000883 }
884 } else {
885 /* Normal byte */
Victor Stinnerf9c9a3f2015-10-14 15:20:07 +0200886 *out_data++ = in_byte;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000887 }
888 }
Victor Stinnereaaaf132015-10-13 10:51:47 +0200889 return _PyBytesWriter_Finish(&writer, out_data);
890
891error:
892 _PyBytesWriter_Dealloc(&writer);
893 return NULL;
Jack Jansen72781191995-08-07 14:34:15 +0000894}
895
Jack Jansen72781191995-08-07 14:34:15 +0000896
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +0200897/*[clinic input]
Serhiy Storchaka2ef7c472015-04-20 09:26:49 +0300898binascii.crc_hqx -> unsigned_int
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +0200899
900 data: Py_buffer
Serhiy Storchaka2ef7c472015-04-20 09:26:49 +0300901 crc: unsigned_int(bitwise=True)
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +0200902 /
903
Martin Panter3310e142016-12-24 07:36:44 +0000904Compute CRC-CCITT incrementally.
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +0200905[clinic start generated code]*/
906
Serhiy Storchaka2ef7c472015-04-20 09:26:49 +0300907static unsigned int
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +0300908binascii_crc_hqx_impl(PyObject *module, Py_buffer *data, unsigned int crc)
Martin Panter3310e142016-12-24 07:36:44 +0000909/*[clinic end generated code: output=8ec2a78590d19170 input=f18240ff8c705b79]*/
Jack Jansen72781191995-08-07 14:34:15 +0000910{
Serhiy Storchakaef1585e2015-12-25 20:01:53 +0200911 const unsigned char *bin_data;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000912 Py_ssize_t len;
Tim Peters934c1a12002-07-02 22:24:50 +0000913
Serhiy Storchaka2ef7c472015-04-20 09:26:49 +0300914 crc &= 0xffff;
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +0200915 bin_data = data->buf;
916 len = data->len;
Jack Jansen72781191995-08-07 14:34:15 +0000917
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000918 while(len-- > 0) {
Serhiy Storchaka2ef7c472015-04-20 09:26:49 +0300919 crc = ((crc<<8)&0xff00) ^ crctab_hqx[(crc>>8)^*bin_data++];
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000920 }
Jack Jansen72781191995-08-07 14:34:15 +0000921
Serhiy Storchaka2ef7c472015-04-20 09:26:49 +0300922 return crc;
Jack Jansen72781191995-08-07 14:34:15 +0000923}
924
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +0200925#ifndef USE_ZLIB_CRC32
Guido van Rossum7d47c9e2000-02-16 21:11:52 +0000926/* Crc - 32 BIT ANSI X3.66 CRC checksum files
927 Also known as: ISO 3307
928**********************************************************************|
929* *|
930* Demonstration program to compute the 32-bit CRC used as the frame *|
931* check sequence in ADCCP (ANSI X3.66, also known as FIPS PUB 71 *|
932* and FED-STD-1003, the U.S. versions of CCITT's X.25 link-level *|
933* protocol). The 32-bit FCS was added via the Federal Register, *|
934* 1 June 1982, p.23798. I presume but don't know for certain that *|
935* this polynomial is or will be included in CCITT V.41, which *|
936* defines the 16-bit CRC (often called CRC-CCITT) polynomial. FIPS *|
937* PUB 78 says that the 32-bit FCS reduces otherwise undetected *|
938* errors by a factor of 10^-5 over 16-bit FCS. *|
939* *|
940**********************************************************************|
941
942 Copyright (C) 1986 Gary S. Brown. You may use this program, or
943 code or tables extracted from it, as desired without restriction.
Guido van Rossum7d47c9e2000-02-16 21:11:52 +0000944
Tim Peters934c1a12002-07-02 22:24:50 +0000945 First, the polynomial itself and its table of feedback terms. The
946 polynomial is
947 X^32+X^26+X^23+X^22+X^16+X^12+X^11+X^10+X^8+X^7+X^5+X^4+X^2+X^1+X^0
948 Note that we take it "backwards" and put the highest-order term in
949 the lowest-order bit. The X^32 term is "implied"; the LSB is the
950 X^31 term, etc. The X^0 term (usually shown as "+1") results in
951 the MSB being 1.
Guido van Rossum7d47c9e2000-02-16 21:11:52 +0000952
Tim Peters934c1a12002-07-02 22:24:50 +0000953 Note that the usual hardware shift register implementation, which
954 is what we're using (we're merely optimizing it by doing eight-bit
955 chunks at a time) shifts bits into the lowest-order term. In our
956 implementation, that means shifting towards the right. Why do we
957 do it this way? Because the calculated CRC must be transmitted in
958 order from highest-order term to lowest-order term. UARTs transmit
959 characters in order from LSB to MSB. By storing the CRC this way,
960 we hand it to the UART in the order low-byte to high-byte; the UART
961 sends each low-bit to hight-bit; and the result is transmission bit
962 by bit from highest- to lowest-order term without requiring any bit
963 shuffling on our part. Reception works similarly.
964
965 The feedback terms table consists of 256, 32-bit entries. Notes:
966
967 1. The table can be generated at runtime if desired; code to do so
968 is shown later. It might not be obvious, but the feedback
969 terms simply represent the results of eight shift/xor opera-
970 tions for all combinations of data and CRC register values.
971
972 2. The CRC accumulation logic is the same for all CRC polynomials,
973 be they sixteen or thirty-two bits wide. You simply choose the
974 appropriate table. Alternatively, because the table can be
975 generated at runtime, you can start by generating the table for
976 the polynomial in question and use exactly the same "updcrc",
977 if your application needn't simultaneously handle two CRC
978 polynomials. (Note, however, that XMODEM is strange.)
979
980 3. For 16-bit CRCs, the table entries need be only 16 bits wide;
981 of course, 32-bit entries work OK if the high 16 bits are zero.
982
983 4. The values must be right-shifted by eight bits by the "updcrc"
984 logic; the shift must be unsigned (bring in zeroes). On some
985 hardware you could probably optimize the shift in assembler by
986 using byte-swap instructions.
Guido van Rossum7d47c9e2000-02-16 21:11:52 +0000987********************************************************************/
988
Serhiy Storchaka2d06e842015-12-25 19:53:18 +0200989static const unsigned int crc_32_tab[256] = {
Gregory P. Smith3c0e4d22008-03-25 07:51:12 +00009900x00000000U, 0x77073096U, 0xee0e612cU, 0x990951baU, 0x076dc419U,
9910x706af48fU, 0xe963a535U, 0x9e6495a3U, 0x0edb8832U, 0x79dcb8a4U,
9920xe0d5e91eU, 0x97d2d988U, 0x09b64c2bU, 0x7eb17cbdU, 0xe7b82d07U,
9930x90bf1d91U, 0x1db71064U, 0x6ab020f2U, 0xf3b97148U, 0x84be41deU,
9940x1adad47dU, 0x6ddde4ebU, 0xf4d4b551U, 0x83d385c7U, 0x136c9856U,
9950x646ba8c0U, 0xfd62f97aU, 0x8a65c9ecU, 0x14015c4fU, 0x63066cd9U,
9960xfa0f3d63U, 0x8d080df5U, 0x3b6e20c8U, 0x4c69105eU, 0xd56041e4U,
9970xa2677172U, 0x3c03e4d1U, 0x4b04d447U, 0xd20d85fdU, 0xa50ab56bU,
9980x35b5a8faU, 0x42b2986cU, 0xdbbbc9d6U, 0xacbcf940U, 0x32d86ce3U,
9990x45df5c75U, 0xdcd60dcfU, 0xabd13d59U, 0x26d930acU, 0x51de003aU,
10000xc8d75180U, 0xbfd06116U, 0x21b4f4b5U, 0x56b3c423U, 0xcfba9599U,
10010xb8bda50fU, 0x2802b89eU, 0x5f058808U, 0xc60cd9b2U, 0xb10be924U,
10020x2f6f7c87U, 0x58684c11U, 0xc1611dabU, 0xb6662d3dU, 0x76dc4190U,
10030x01db7106U, 0x98d220bcU, 0xefd5102aU, 0x71b18589U, 0x06b6b51fU,
10040x9fbfe4a5U, 0xe8b8d433U, 0x7807c9a2U, 0x0f00f934U, 0x9609a88eU,
10050xe10e9818U, 0x7f6a0dbbU, 0x086d3d2dU, 0x91646c97U, 0xe6635c01U,
10060x6b6b51f4U, 0x1c6c6162U, 0x856530d8U, 0xf262004eU, 0x6c0695edU,
10070x1b01a57bU, 0x8208f4c1U, 0xf50fc457U, 0x65b0d9c6U, 0x12b7e950U,
10080x8bbeb8eaU, 0xfcb9887cU, 0x62dd1ddfU, 0x15da2d49U, 0x8cd37cf3U,
10090xfbd44c65U, 0x4db26158U, 0x3ab551ceU, 0xa3bc0074U, 0xd4bb30e2U,
10100x4adfa541U, 0x3dd895d7U, 0xa4d1c46dU, 0xd3d6f4fbU, 0x4369e96aU,
10110x346ed9fcU, 0xad678846U, 0xda60b8d0U, 0x44042d73U, 0x33031de5U,
10120xaa0a4c5fU, 0xdd0d7cc9U, 0x5005713cU, 0x270241aaU, 0xbe0b1010U,
10130xc90c2086U, 0x5768b525U, 0x206f85b3U, 0xb966d409U, 0xce61e49fU,
10140x5edef90eU, 0x29d9c998U, 0xb0d09822U, 0xc7d7a8b4U, 0x59b33d17U,
10150x2eb40d81U, 0xb7bd5c3bU, 0xc0ba6cadU, 0xedb88320U, 0x9abfb3b6U,
10160x03b6e20cU, 0x74b1d29aU, 0xead54739U, 0x9dd277afU, 0x04db2615U,
10170x73dc1683U, 0xe3630b12U, 0x94643b84U, 0x0d6d6a3eU, 0x7a6a5aa8U,
10180xe40ecf0bU, 0x9309ff9dU, 0x0a00ae27U, 0x7d079eb1U, 0xf00f9344U,
10190x8708a3d2U, 0x1e01f268U, 0x6906c2feU, 0xf762575dU, 0x806567cbU,
10200x196c3671U, 0x6e6b06e7U, 0xfed41b76U, 0x89d32be0U, 0x10da7a5aU,
10210x67dd4accU, 0xf9b9df6fU, 0x8ebeeff9U, 0x17b7be43U, 0x60b08ed5U,
10220xd6d6a3e8U, 0xa1d1937eU, 0x38d8c2c4U, 0x4fdff252U, 0xd1bb67f1U,
10230xa6bc5767U, 0x3fb506ddU, 0x48b2364bU, 0xd80d2bdaU, 0xaf0a1b4cU,
10240x36034af6U, 0x41047a60U, 0xdf60efc3U, 0xa867df55U, 0x316e8eefU,
10250x4669be79U, 0xcb61b38cU, 0xbc66831aU, 0x256fd2a0U, 0x5268e236U,
10260xcc0c7795U, 0xbb0b4703U, 0x220216b9U, 0x5505262fU, 0xc5ba3bbeU,
10270xb2bd0b28U, 0x2bb45a92U, 0x5cb36a04U, 0xc2d7ffa7U, 0xb5d0cf31U,
10280x2cd99e8bU, 0x5bdeae1dU, 0x9b64c2b0U, 0xec63f226U, 0x756aa39cU,
10290x026d930aU, 0x9c0906a9U, 0xeb0e363fU, 0x72076785U, 0x05005713U,
10300x95bf4a82U, 0xe2b87a14U, 0x7bb12baeU, 0x0cb61b38U, 0x92d28e9bU,
10310xe5d5be0dU, 0x7cdcefb7U, 0x0bdbdf21U, 0x86d3d2d4U, 0xf1d4e242U,
10320x68ddb3f8U, 0x1fda836eU, 0x81be16cdU, 0xf6b9265bU, 0x6fb077e1U,
10330x18b74777U, 0x88085ae6U, 0xff0f6a70U, 0x66063bcaU, 0x11010b5cU,
10340x8f659effU, 0xf862ae69U, 0x616bffd3U, 0x166ccf45U, 0xa00ae278U,
10350xd70dd2eeU, 0x4e048354U, 0x3903b3c2U, 0xa7672661U, 0xd06016f7U,
10360x4969474dU, 0x3e6e77dbU, 0xaed16a4aU, 0xd9d65adcU, 0x40df0b66U,
10370x37d83bf0U, 0xa9bcae53U, 0xdebb9ec5U, 0x47b2cf7fU, 0x30b5ffe9U,
10380xbdbdf21cU, 0xcabac28aU, 0x53b39330U, 0x24b4a3a6U, 0xbad03605U,
10390xcdd70693U, 0x54de5729U, 0x23d967bfU, 0xb3667a2eU, 0xc4614ab8U,
10400x5d681b02U, 0x2a6f2b94U, 0xb40bbe37U, 0xc30c8ea1U, 0x5a05df1bU,
10410x2d02ef8dU
Guido van Rossum7d47c9e2000-02-16 21:11:52 +00001042};
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +02001043#endif /* USE_ZLIB_CRC32 */
Guido van Rossum7d47c9e2000-02-16 21:11:52 +00001044
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +02001045/*[clinic input]
1046binascii.crc32 -> unsigned_int
1047
1048 data: Py_buffer
1049 crc: unsigned_int(bitwise=True) = 0
1050 /
1051
1052Compute CRC-32 incrementally.
1053[clinic start generated code]*/
1054
1055static unsigned int
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +03001056binascii_crc32_impl(PyObject *module, Py_buffer *data, unsigned int crc)
1057/*[clinic end generated code: output=52cf59056a78593b input=bbe340bc99d25aa8]*/
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +02001058
1059#ifdef USE_ZLIB_CRC32
1060/* This was taken from zlibmodule.c PyZlib_crc32 (but is PY_SSIZE_T_CLEAN) */
1061{
Serhiy Storchakaef1585e2015-12-25 20:01:53 +02001062 const Byte *buf;
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +02001063 Py_ssize_t len;
1064 int signed_val;
1065
1066 buf = (Byte*)data->buf;
1067 len = data->len;
1068 signed_val = crc32(crc, buf, len);
1069 return (unsigned int)signed_val & 0xffffffffU;
1070}
1071#else /* USE_ZLIB_CRC32 */
Guido van Rossum7d47c9e2000-02-16 21:11:52 +00001072{ /* By Jim Ahlstrom; All rights transferred to CNRI */
Serhiy Storchakaef1585e2015-12-25 20:01:53 +02001073 const unsigned char *bin_data;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001074 Py_ssize_t len;
1075 unsigned int result;
Tim Peters934c1a12002-07-02 22:24:50 +00001076
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +02001077 bin_data = data->buf;
1078 len = data->len;
Guido van Rossum7d47c9e2000-02-16 21:11:52 +00001079
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001080 crc = ~ crc;
1081 while (len-- > 0) {
1082 crc = crc_32_tab[(crc ^ *bin_data++) & 0xff] ^ (crc >> 8);
1083 /* Note: (crc >> 8) MUST zero fill on left */
1084 }
Tim Petersa98011c2002-07-02 20:20:08 +00001085
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001086 result = (crc ^ 0xFFFFFFFF);
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +02001087 return result & 0xffffffff;
Guido van Rossum7d47c9e2000-02-16 21:11:52 +00001088}
Christian Heimes1dc54002008-03-24 02:19:29 +00001089#endif /* USE_ZLIB_CRC32 */
Guido van Rossum7d47c9e2000-02-16 21:11:52 +00001090
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +02001091/*[clinic input]
1092binascii.b2a_hex
1093
1094 data: Py_buffer
1095 /
1096
1097Hexadecimal representation of binary data.
1098
1099The return value is a bytes object. This function is also
1100available as "hexlify()".
1101[clinic start generated code]*/
Barry Warsawe977c212000-08-15 06:07:13 +00001102
1103static PyObject *
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +03001104binascii_b2a_hex_impl(PyObject *module, Py_buffer *data)
1105/*[clinic end generated code: output=92fec1a95c9897a0 input=96423cfa299ff3b1]*/
Barry Warsawe977c212000-08-15 06:07:13 +00001106{
Gregory P. Smith9c6b9162015-04-26 00:42:13 +00001107 return _Py_strhex_bytes((const char *)data->buf, data->len);
Barry Warsawe977c212000-08-15 06:07:13 +00001108}
1109
Zachary Wareb176d402015-01-20 13:59:46 -06001110/*[clinic input]
1111binascii.hexlify = binascii.b2a_hex
1112
1113Hexadecimal representation of binary data.
1114
1115The return value is a bytes object.
1116[clinic start generated code]*/
1117
1118static PyObject *
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +03001119binascii_hexlify_impl(PyObject *module, Py_buffer *data)
1120/*[clinic end generated code: output=749e95e53c14880c input=2e3afae7f083f061]*/
Zachary Wareb176d402015-01-20 13:59:46 -06001121{
Gregory P. Smith9c6b9162015-04-26 00:42:13 +00001122 return _Py_strhex_bytes((const char *)data->buf, data->len);
Zachary Wareb176d402015-01-20 13:59:46 -06001123}
Barry Warsawe977c212000-08-15 06:07:13 +00001124
1125static int
Tim Peters934c1a12002-07-02 22:24:50 +00001126to_int(int c)
Barry Warsawe977c212000-08-15 06:07:13 +00001127{
Antoine Pitrou4de74572013-02-09 23:11:27 +01001128 if (Py_ISDIGIT(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001129 return c - '0';
1130 else {
Antoine Pitroued8ba142011-10-04 13:50:21 +02001131 if (Py_ISUPPER(c))
1132 c = Py_TOLOWER(c);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001133 if (c >= 'a' && c <= 'f')
1134 return c - 'a' + 10;
1135 }
1136 return -1;
Barry Warsawe977c212000-08-15 06:07:13 +00001137}
1138
1139
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +02001140/*[clinic input]
1141binascii.a2b_hex
1142
1143 hexstr: ascii_buffer
1144 /
1145
1146Binary data of hexadecimal representation.
1147
1148hexstr must contain an even number of hex digits (upper or lower case).
1149This function is also available as "unhexlify()".
1150[clinic start generated code]*/
1151
Barry Warsawe977c212000-08-15 06:07:13 +00001152static PyObject *
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +03001153binascii_a2b_hex_impl(PyObject *module, Py_buffer *hexstr)
1154/*[clinic end generated code: output=0cc1a139af0eeecb input=9e1e7f2f94db24fd]*/
Barry Warsawe977c212000-08-15 06:07:13 +00001155{
Serhiy Storchakaef1585e2015-12-25 20:01:53 +02001156 const char* argbuf;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001157 Py_ssize_t arglen;
1158 PyObject *retval;
1159 char* retbuf;
1160 Py_ssize_t i, j;
Barry Warsawe977c212000-08-15 06:07:13 +00001161
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +02001162 argbuf = hexstr->buf;
1163 arglen = hexstr->len;
Barry Warsawe977c212000-08-15 06:07:13 +00001164
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001165 assert(arglen >= 0);
Amaury Forgeot d'Arc9c74b142008-06-18 00:47:36 +00001166
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001167 /* XXX What should we do about strings with an odd length? Should
1168 * we add an implicit leading zero, or a trailing zero? For now,
1169 * raise an exception.
1170 */
1171 if (arglen % 2) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001172 PyErr_SetString(Error, "Odd-length string");
1173 return NULL;
1174 }
Barry Warsawe977c212000-08-15 06:07:13 +00001175
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001176 retval = PyBytes_FromStringAndSize(NULL, (arglen/2));
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +02001177 if (!retval)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001178 return NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001179 retbuf = PyBytes_AS_STRING(retval);
Barry Warsawe977c212000-08-15 06:07:13 +00001180
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001181 for (i=j=0; i < arglen; i += 2) {
1182 int top = to_int(Py_CHARMASK(argbuf[i]));
1183 int bot = to_int(Py_CHARMASK(argbuf[i+1]));
1184 if (top == -1 || bot == -1) {
1185 PyErr_SetString(Error,
1186 "Non-hexadecimal digit found");
1187 goto finally;
1188 }
1189 retbuf[j++] = (top << 4) + bot;
1190 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001191 return retval;
Barry Warsawe977c212000-08-15 06:07:13 +00001192
1193 finally:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001194 Py_DECREF(retval);
1195 return NULL;
Barry Warsawe977c212000-08-15 06:07:13 +00001196}
1197
Zachary Wareb176d402015-01-20 13:59:46 -06001198/*[clinic input]
1199binascii.unhexlify = binascii.a2b_hex
1200
1201Binary data of hexadecimal representation.
1202
1203hexstr must contain an even number of hex digits (upper or lower case).
1204[clinic start generated code]*/
1205
1206static PyObject *
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +03001207binascii_unhexlify_impl(PyObject *module, Py_buffer *hexstr)
1208/*[clinic end generated code: output=51a64c06c79629e3 input=dd8c012725f462da]*/
Zachary Wareb176d402015-01-20 13:59:46 -06001209{
1210 return binascii_a2b_hex_impl(module, hexstr);
1211}
1212
Serhiy Storchaka2d06e842015-12-25 19:53:18 +02001213static const int table_hex[128] = {
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001214 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1215 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1216 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1217 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,-1,-1, -1,-1,-1,-1,
1218 -1,10,11,12, 13,14,15,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1219 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1220 -1,10,11,12, 13,14,15,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1221 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1
1222};
1223
1224#define hexval(c) table_hex[(unsigned int)(c)]
1225
1226#define MAXLINESIZE 76
1227
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001228
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +02001229/*[clinic input]
1230binascii.a2b_qp
1231
Serhiy Storchaka12785612014-01-25 11:49:49 +02001232 data: ascii_buffer
Serhiy Storchaka202fda52017-03-12 10:10:47 +02001233 header: bool(accept={int}) = False
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +02001234
1235Decode a string of qp-encoded data.
1236[clinic start generated code]*/
1237
1238static PyObject *
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +03001239binascii_a2b_qp_impl(PyObject *module, Py_buffer *data, int header)
Serhiy Storchaka202fda52017-03-12 10:10:47 +02001240/*[clinic end generated code: output=e99f7846cfb9bc53 input=bf6766fea76cce8f]*/
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001241{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001242 Py_ssize_t in, out;
1243 char ch;
Serhiy Storchakaef1585e2015-12-25 20:01:53 +02001244 const unsigned char *ascii_data;
1245 unsigned char *odata;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001246 Py_ssize_t datalen = 0;
1247 PyObject *rv;
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001248
Serhiy Storchaka12785612014-01-25 11:49:49 +02001249 ascii_data = data->buf;
1250 datalen = data->len;
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001251
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001252 /* We allocate the output same size as input, this is overkill.
1253 * The previous implementation used calloc() so we'll zero out the
1254 * memory here too, since PyMem_Malloc() does not guarantee that.
1255 */
1256 odata = (unsigned char *) PyMem_Malloc(datalen);
1257 if (odata == NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001258 PyErr_NoMemory();
1259 return NULL;
1260 }
1261 memset(odata, 0, datalen);
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001262
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001263 in = out = 0;
1264 while (in < datalen) {
Serhiy Storchaka12785612014-01-25 11:49:49 +02001265 if (ascii_data[in] == '=') {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001266 in++;
1267 if (in >= datalen) break;
1268 /* Soft line breaks */
Serhiy Storchaka12785612014-01-25 11:49:49 +02001269 if ((ascii_data[in] == '\n') || (ascii_data[in] == '\r')) {
1270 if (ascii_data[in] != '\n') {
1271 while (in < datalen && ascii_data[in] != '\n') in++;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001272 }
1273 if (in < datalen) in++;
1274 }
Serhiy Storchaka12785612014-01-25 11:49:49 +02001275 else if (ascii_data[in] == '=') {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001276 /* broken case from broken python qp */
1277 odata[out++] = '=';
1278 in++;
1279 }
Serhiy Storchakae6265e92016-09-14 16:34:37 +03001280 else if ((in + 1 < datalen) &&
1281 ((ascii_data[in] >= 'A' && ascii_data[in] <= 'F') ||
Serhiy Storchaka12785612014-01-25 11:49:49 +02001282 (ascii_data[in] >= 'a' && ascii_data[in] <= 'f') ||
1283 (ascii_data[in] >= '0' && ascii_data[in] <= '9')) &&
1284 ((ascii_data[in+1] >= 'A' && ascii_data[in+1] <= 'F') ||
1285 (ascii_data[in+1] >= 'a' && ascii_data[in+1] <= 'f') ||
1286 (ascii_data[in+1] >= '0' && ascii_data[in+1] <= '9'))) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001287 /* hexval */
Serhiy Storchaka12785612014-01-25 11:49:49 +02001288 ch = hexval(ascii_data[in]) << 4;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001289 in++;
Serhiy Storchaka12785612014-01-25 11:49:49 +02001290 ch |= hexval(ascii_data[in]);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001291 in++;
1292 odata[out++] = ch;
1293 }
1294 else {
1295 odata[out++] = '=';
1296 }
1297 }
Serhiy Storchaka12785612014-01-25 11:49:49 +02001298 else if (header && ascii_data[in] == '_') {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001299 odata[out++] = ' ';
1300 in++;
1301 }
1302 else {
Serhiy Storchaka12785612014-01-25 11:49:49 +02001303 odata[out] = ascii_data[in];
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001304 in++;
1305 out++;
1306 }
1307 }
1308 if ((rv = PyBytes_FromStringAndSize((char *)odata, out)) == NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001309 PyMem_Free(odata);
1310 return NULL;
1311 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001312 PyMem_Free(odata);
1313 return rv;
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001314}
1315
Tim Peters934c1a12002-07-02 22:24:50 +00001316static int
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001317to_hex (unsigned char ch, unsigned char *s)
1318{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001319 unsigned int uvalue = ch;
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001320
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001321 s[1] = "0123456789ABCDEF"[uvalue % 16];
1322 uvalue = (uvalue / 16);
1323 s[0] = "0123456789ABCDEF"[uvalue % 16];
1324 return 0;
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001325}
1326
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001327/* XXX: This is ridiculously complicated to be backward compatible
1328 * (mostly) with the quopri module. It doesn't re-create the quopri
1329 * module bug where text ending in CRLF has the CR encoded */
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +02001330
1331/*[clinic input]
1332binascii.b2a_qp
1333
1334 data: Py_buffer
Serhiy Storchaka202fda52017-03-12 10:10:47 +02001335 quotetabs: bool(accept={int}) = False
1336 istext: bool(accept={int}) = True
1337 header: bool(accept={int}) = False
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +02001338
1339Encode a string using quoted-printable encoding.
1340
1341On encoding, when istext is set, newlines are not encoded, and white
1342space at end of lines is. When istext is not set, \r and \n (CR/LF)
1343are both encoded. When quotetabs is set, space and tabs are encoded.
1344[clinic start generated code]*/
1345
1346static PyObject *
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +03001347binascii_b2a_qp_impl(PyObject *module, Py_buffer *data, int quotetabs,
Larry Hastings89964c42015-04-14 18:07:59 -04001348 int istext, int header)
Serhiy Storchaka202fda52017-03-12 10:10:47 +02001349/*[clinic end generated code: output=e9884472ebb1a94c input=21fb7eea4a184ba6]*/
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001350{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001351 Py_ssize_t in, out;
Serhiy Storchakaef1585e2015-12-25 20:01:53 +02001352 const unsigned char *databuf;
1353 unsigned char *odata;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001354 Py_ssize_t datalen = 0, odatalen = 0;
1355 PyObject *rv;
1356 unsigned int linelen = 0;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001357 unsigned char ch;
1358 int crlf = 0;
Serhiy Storchakaef1585e2015-12-25 20:01:53 +02001359 const unsigned char *p;
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001360
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +02001361 databuf = data->buf;
1362 datalen = data->len;
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001363
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001364 /* See if this string is using CRLF line ends */
1365 /* XXX: this function has the side effect of converting all of
1366 * the end of lines to be the same depending on this detection
1367 * here */
Serhiy Storchakaef1585e2015-12-25 20:01:53 +02001368 p = (const unsigned char *) memchr(databuf, '\n', datalen);
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +02001369 if ((p != NULL) && (p > databuf) && (*(p-1) == '\r'))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001370 crlf = 1;
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001371
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001372 /* First, scan to see how many characters need to be encoded */
1373 in = 0;
1374 while (in < datalen) {
Benjamin Peterson4f976512016-08-13 18:33:33 -07001375 Py_ssize_t delta = 0;
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +02001376 if ((databuf[in] > 126) ||
1377 (databuf[in] == '=') ||
1378 (header && databuf[in] == '_') ||
1379 ((databuf[in] == '.') && (linelen == 0) &&
Serhiy Storchakae6265e92016-09-14 16:34:37 +03001380 (in + 1 == datalen || databuf[in+1] == '\n' ||
1381 databuf[in+1] == '\r' || databuf[in+1] == 0)) ||
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +02001382 (!istext && ((databuf[in] == '\r') || (databuf[in] == '\n'))) ||
1383 ((databuf[in] == '\t' || databuf[in] == ' ') && (in + 1 == datalen)) ||
1384 ((databuf[in] < 33) &&
1385 (databuf[in] != '\r') && (databuf[in] != '\n') &&
1386 (quotetabs || ((databuf[in] != '\t') && (databuf[in] != ' ')))))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001387 {
1388 if ((linelen + 3) >= MAXLINESIZE) {
1389 linelen = 0;
1390 if (crlf)
Benjamin Peterson4f976512016-08-13 18:33:33 -07001391 delta += 3;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001392 else
Benjamin Peterson4f976512016-08-13 18:33:33 -07001393 delta += 2;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001394 }
1395 linelen += 3;
Benjamin Peterson4f976512016-08-13 18:33:33 -07001396 delta += 3;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001397 in++;
1398 }
1399 else {
1400 if (istext &&
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +02001401 ((databuf[in] == '\n') ||
1402 ((in+1 < datalen) && (databuf[in] == '\r') &&
1403 (databuf[in+1] == '\n'))))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001404 {
1405 linelen = 0;
1406 /* Protect against whitespace on end of line */
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +02001407 if (in && ((databuf[in-1] == ' ') || (databuf[in-1] == '\t')))
Benjamin Peterson4f976512016-08-13 18:33:33 -07001408 delta += 2;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001409 if (crlf)
Benjamin Peterson4f976512016-08-13 18:33:33 -07001410 delta += 2;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001411 else
Benjamin Peterson4f976512016-08-13 18:33:33 -07001412 delta += 1;
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +02001413 if (databuf[in] == '\r')
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001414 in += 2;
1415 else
1416 in++;
1417 }
1418 else {
1419 if ((in + 1 != datalen) &&
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +02001420 (databuf[in+1] != '\n') &&
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001421 (linelen + 1) >= MAXLINESIZE) {
1422 linelen = 0;
1423 if (crlf)
Benjamin Peterson4f976512016-08-13 18:33:33 -07001424 delta += 3;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001425 else
Benjamin Peterson4f976512016-08-13 18:33:33 -07001426 delta += 2;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001427 }
1428 linelen++;
Benjamin Peterson4f976512016-08-13 18:33:33 -07001429 delta++;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001430 in++;
1431 }
1432 }
Benjamin Peterson4f976512016-08-13 18:33:33 -07001433 if (PY_SSIZE_T_MAX - delta < odatalen) {
Benjamin Peterson4f976512016-08-13 18:33:33 -07001434 PyErr_NoMemory();
1435 return NULL;
1436 }
1437 odatalen += delta;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001438 }
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001439
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001440 /* We allocate the output same size as input, this is overkill.
1441 * The previous implementation used calloc() so we'll zero out the
1442 * memory here too, since PyMem_Malloc() does not guarantee that.
1443 */
1444 odata = (unsigned char *) PyMem_Malloc(odatalen);
1445 if (odata == NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001446 PyErr_NoMemory();
1447 return NULL;
1448 }
1449 memset(odata, 0, odatalen);
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001450
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001451 in = out = linelen = 0;
1452 while (in < datalen) {
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +02001453 if ((databuf[in] > 126) ||
1454 (databuf[in] == '=') ||
1455 (header && databuf[in] == '_') ||
1456 ((databuf[in] == '.') && (linelen == 0) &&
Serhiy Storchakae6265e92016-09-14 16:34:37 +03001457 (in + 1 == datalen || databuf[in+1] == '\n' ||
1458 databuf[in+1] == '\r' || databuf[in+1] == 0)) ||
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +02001459 (!istext && ((databuf[in] == '\r') || (databuf[in] == '\n'))) ||
1460 ((databuf[in] == '\t' || databuf[in] == ' ') && (in + 1 == datalen)) ||
1461 ((databuf[in] < 33) &&
1462 (databuf[in] != '\r') && (databuf[in] != '\n') &&
Serhiy Storchakae6265e92016-09-14 16:34:37 +03001463 (quotetabs || ((databuf[in] != '\t') && (databuf[in] != ' ')))))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001464 {
1465 if ((linelen + 3 )>= MAXLINESIZE) {
1466 odata[out++] = '=';
1467 if (crlf) odata[out++] = '\r';
1468 odata[out++] = '\n';
1469 linelen = 0;
1470 }
1471 odata[out++] = '=';
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +02001472 to_hex(databuf[in], &odata[out]);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001473 out += 2;
1474 in++;
1475 linelen += 3;
1476 }
1477 else {
1478 if (istext &&
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +02001479 ((databuf[in] == '\n') ||
1480 ((in+1 < datalen) && (databuf[in] == '\r') &&
1481 (databuf[in+1] == '\n'))))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001482 {
1483 linelen = 0;
1484 /* Protect against whitespace on end of line */
1485 if (out && ((odata[out-1] == ' ') || (odata[out-1] == '\t'))) {
1486 ch = odata[out-1];
1487 odata[out-1] = '=';
1488 to_hex(ch, &odata[out]);
1489 out += 2;
1490 }
Tim Peters934c1a12002-07-02 22:24:50 +00001491
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001492 if (crlf) odata[out++] = '\r';
1493 odata[out++] = '\n';
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +02001494 if (databuf[in] == '\r')
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001495 in += 2;
1496 else
1497 in++;
1498 }
1499 else {
1500 if ((in + 1 != datalen) &&
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +02001501 (databuf[in+1] != '\n') &&
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001502 (linelen + 1) >= MAXLINESIZE) {
1503 odata[out++] = '=';
1504 if (crlf) odata[out++] = '\r';
1505 odata[out++] = '\n';
1506 linelen = 0;
1507 }
1508 linelen++;
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +02001509 if (header && databuf[in] == ' ') {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001510 odata[out++] = '_';
1511 in++;
1512 }
1513 else {
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +02001514 odata[out++] = databuf[in++];
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001515 }
1516 }
1517 }
1518 }
1519 if ((rv = PyBytes_FromStringAndSize((char *)odata, out)) == NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001520 PyMem_Free(odata);
1521 return NULL;
1522 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001523 PyMem_Free(odata);
1524 return rv;
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001525}
Barry Warsawe977c212000-08-15 06:07:13 +00001526
Jack Jansen72781191995-08-07 14:34:15 +00001527/* List of functions defined in the module */
1528
1529static struct PyMethodDef binascii_module_methods[] = {
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +02001530 BINASCII_A2B_UU_METHODDEF
1531 BINASCII_B2A_UU_METHODDEF
1532 BINASCII_A2B_BASE64_METHODDEF
1533 BINASCII_B2A_BASE64_METHODDEF
1534 BINASCII_A2B_HQX_METHODDEF
1535 BINASCII_B2A_HQX_METHODDEF
1536 BINASCII_A2B_HEX_METHODDEF
1537 BINASCII_B2A_HEX_METHODDEF
Zachary Wareb176d402015-01-20 13:59:46 -06001538 BINASCII_HEXLIFY_METHODDEF
1539 BINASCII_UNHEXLIFY_METHODDEF
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +02001540 BINASCII_RLECODE_HQX_METHODDEF
1541 BINASCII_RLEDECODE_HQX_METHODDEF
1542 BINASCII_CRC_HQX_METHODDEF
1543 BINASCII_CRC32_METHODDEF
1544 BINASCII_A2B_QP_METHODDEF
1545 BINASCII_B2A_QP_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001546 {NULL, NULL} /* sentinel */
Jack Jansen72781191995-08-07 14:34:15 +00001547};
1548
1549
Martin v. Löwis1a214512008-06-11 05:26:20 +00001550/* Initialization function for the module (*must* be called PyInit_binascii) */
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001551PyDoc_STRVAR(doc_binascii, "Conversion between binary data and ASCII");
Jack Jansen72781191995-08-07 14:34:15 +00001552
Martin v. Löwis1a214512008-06-11 05:26:20 +00001553
1554static struct PyModuleDef binasciimodule = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001555 PyModuleDef_HEAD_INIT,
1556 "binascii",
1557 doc_binascii,
1558 -1,
1559 binascii_module_methods,
1560 NULL,
1561 NULL,
1562 NULL,
1563 NULL
Martin v. Löwis1a214512008-06-11 05:26:20 +00001564};
1565
Mark Hammondfe51c6d2002-08-02 02:27:13 +00001566PyMODINIT_FUNC
Martin v. Löwis1a214512008-06-11 05:26:20 +00001567PyInit_binascii(void)
Jack Jansen72781191995-08-07 14:34:15 +00001568{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001569 PyObject *m, *d;
Jack Jansen72781191995-08-07 14:34:15 +00001570
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001571 /* Create the module and add the functions */
1572 m = PyModule_Create(&binasciimodule);
1573 if (m == NULL)
1574 return NULL;
Jack Jansen72781191995-08-07 14:34:15 +00001575
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001576 d = PyModule_GetDict(m);
Jack Jansen72781191995-08-07 14:34:15 +00001577
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001578 Error = PyErr_NewException("binascii.Error", PyExc_ValueError, NULL);
1579 PyDict_SetItemString(d, "Error", Error);
1580 Incomplete = PyErr_NewException("binascii.Incomplete", NULL, NULL);
1581 PyDict_SetItemString(d, "Incomplete", Incomplete);
1582 if (PyErr_Occurred()) {
1583 Py_DECREF(m);
1584 m = NULL;
1585 }
1586 return m;
Jack Jansen72781191995-08-07 14:34:15 +00001587}