blob: 1f3248b6049b31e98328e8962d21b95b33c98e22 [file] [log] [blame]
Jack Jansen72781191995-08-07 14:34:15 +00001/*
2** Routines to represent binary data in ASCII and vice-versa
3**
4** This module currently supports the following encodings:
5** uuencode:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00006** each line encodes 45 bytes (except possibly the last)
7** First char encodes (binary) length, rest data
8** each char encodes 6 bits, as follows:
9** binary: 01234567 abcdefgh ijklmnop
10** ascii: 012345 67abcd efghij klmnop
11** ASCII encoding method is "excess-space": 000000 is encoded as ' ', etc.
12** short binary data is zero-extended (so the bits are always in the
13** right place), this does *not* reflect in the length.
Jack Jansen84bbc2e1995-10-04 16:38:44 +000014** base64:
15** Line breaks are insignificant, but lines are at most 76 chars
16** each char encodes 6 bits, in similar order as uucode/hqx. Encoding
17** is done via a table.
18** Short binary data is filled (in ASCII) with '='.
Jack Jansen72781191995-08-07 14:34:15 +000019** hqx:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000020** File starts with introductory text, real data starts and ends
21** with colons.
22** Data consists of three similar parts: info, datafork, resourcefork.
23** Each part is protected (at the end) with a 16-bit crc
24** The binary data is run-length encoded, and then ascii-fied:
25** binary: 01234567 abcdefgh ijklmnop
26** ascii: 012345 67abcd efghij klmnop
27** ASCII encoding is table-driven, see the code.
28** Short binary data results in the runt ascii-byte being output with
29** the bits in the right place.
Jack Jansen72781191995-08-07 14:34:15 +000030**
31** While I was reading dozens of programs that encode or decode the formats
32** here (documentation? hihi:-) I have formulated Jansen's Observation:
33**
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000034** Programs that encode binary data in ASCII are written in
35** such a style that they are as unreadable as possible. Devices used
36** include unnecessary global variables, burying important tables
37** in unrelated sourcefiles, putting functions in include files,
38** using seemingly-descriptive variable names for different purposes,
39** calls to empty subroutines and a host of others.
Jack Jansen72781191995-08-07 14:34:15 +000040**
41** I have attempted to break with this tradition, but I guess that that
42** does make the performance sub-optimal. Oh well, too bad...
43**
44** Jack Jansen, CWI, July 1995.
Tim Peters934c1a12002-07-02 22:24:50 +000045**
Martin v. Löwis16dc7f42001-09-30 20:32:11 +000046** Added support for quoted-printable encoding, based on rfc 1521 et al
Tim Peters934c1a12002-07-02 22:24:50 +000047** quoted-printable encoding specifies that non printable characters (anything
Martin v. Löwis16dc7f42001-09-30 20:32:11 +000048** below 32 and above 126) be encoded as =XX where XX is the hexadecimal value
49** of the character. It also specifies some other behavior to enable 8bit data
Tim Peters934c1a12002-07-02 22:24:50 +000050** in a mail message with little difficulty (maximum line sizes, protecting
51** some cases of whitespace, etc).
Martin v. Löwis16dc7f42001-09-30 20:32:11 +000052**
53** Brandon Long, September 2001.
Jack Jansen72781191995-08-07 14:34:15 +000054*/
55
Thomas Wouters9c544482006-03-01 21:59:44 +000056#define PY_SSIZE_T_CLEAN
Jack Jansen72781191995-08-07 14:34:15 +000057
58#include "Python.h"
Gregory P. Smith9c6b9162015-04-26 00:42:13 +000059#include "pystrhex.h"
Christian Heimes1dc54002008-03-24 02:19:29 +000060#ifdef USE_ZLIB_CRC32
61#include "zlib.h"
62#endif
Jack Jansen72781191995-08-07 14:34:15 +000063
Marcel Plch33e71e02019-05-22 13:51:26 +020064typedef struct binascii_state {
65 PyObject *Error;
66 PyObject *Incomplete;
67} binascii_state;
Jack Jansen72781191995-08-07 14:34:15 +000068
Hai Shiaa0c0802020-03-12 00:50:52 +080069static binascii_state *
70get_binascii_state(PyObject *module)
71{
72 return (binascii_state *)PyModule_GetState(module);
73}
74
Jack Jansen72781191995-08-07 14:34:15 +000075/*
76** hqx lookup table, ascii->binary.
77*/
78
79#define RUNCHAR 0x90
80
81#define DONE 0x7F
82#define SKIP 0x7E
83#define FAIL 0x7D
84
Serhiy Storchaka2d06e842015-12-25 19:53:18 +020085static const unsigned char table_a2b_hqx[256] = {
Jack Jansen72781191995-08-07 14:34:15 +000086/* ^@ ^A ^B ^C ^D ^E ^F ^G */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000087/* 0*/ FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
Jack Jansen72781191995-08-07 14:34:15 +000088/* \b \t \n ^K ^L \r ^N ^O */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000089/* 1*/ FAIL, FAIL, SKIP, FAIL, FAIL, SKIP, FAIL, FAIL,
Jack Jansen72781191995-08-07 14:34:15 +000090/* ^P ^Q ^R ^S ^T ^U ^V ^W */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000091/* 2*/ FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
Jack Jansen72781191995-08-07 14:34:15 +000092/* ^X ^Y ^Z ^[ ^\ ^] ^^ ^_ */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000093/* 3*/ FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
Jack Jansen72781191995-08-07 14:34:15 +000094/* ! " # $ % & ' */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000095/* 4*/ FAIL, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
Jack Jansen72781191995-08-07 14:34:15 +000096/* ( ) * + , - . / */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000097/* 5*/ 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, FAIL, FAIL,
Jack Jansen72781191995-08-07 14:34:15 +000098/* 0 1 2 3 4 5 6 7 */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000099/* 6*/ 0x0D, 0x0E, 0x0F, 0x10, 0x11, 0x12, 0x13, FAIL,
Jack Jansen72781191995-08-07 14:34:15 +0000100/* 8 9 : ; < = > ? */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000101/* 7*/ 0x14, 0x15, DONE, FAIL, FAIL, FAIL, FAIL, FAIL,
Jack Jansen72781191995-08-07 14:34:15 +0000102/* @ A B C D E F G */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000103/* 8*/ 0x16, 0x17, 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D,
Jack Jansen72781191995-08-07 14:34:15 +0000104/* H I J K L M N O */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000105/* 9*/ 0x1E, 0x1F, 0x20, 0x21, 0x22, 0x23, 0x24, FAIL,
Jack Jansen72781191995-08-07 14:34:15 +0000106/* P Q R S T U V W */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000107/*10*/ 0x25, 0x26, 0x27, 0x28, 0x29, 0x2A, 0x2B, FAIL,
Jack Jansen72781191995-08-07 14:34:15 +0000108/* X Y Z [ \ ] ^ _ */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000109/*11*/ 0x2C, 0x2D, 0x2E, 0x2F, FAIL, FAIL, FAIL, FAIL,
Jack Jansen72781191995-08-07 14:34:15 +0000110/* ` a b c d e f g */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000111/*12*/ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, FAIL,
Jack Jansen72781191995-08-07 14:34:15 +0000112/* h i j k l m n o */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000113/*13*/ 0x37, 0x38, 0x39, 0x3A, 0x3B, 0x3C, FAIL, FAIL,
Jack Jansen72781191995-08-07 14:34:15 +0000114/* p q r s t u v w */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000115/*14*/ 0x3D, 0x3E, 0x3F, FAIL, FAIL, FAIL, FAIL, FAIL,
Jack Jansen72781191995-08-07 14:34:15 +0000116/* x y z { | } ~ ^? */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000117/*15*/ FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
118/*16*/ FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
119 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
120 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
121 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
122 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
123 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
124 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
125 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
126 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
127 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
128 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
129 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
130 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
131 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
132 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
133 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
Jack Jansen72781191995-08-07 14:34:15 +0000134};
135
Serhiy Storchaka2d06e842015-12-25 19:53:18 +0200136static const unsigned char table_b2a_hqx[] =
Roger E. Masse5f4ce181997-01-16 17:10:22 +0000137"!\"#$%&'()*+,-012345689@ABCDEFGHIJKLMNPQRSTUVXYZ[`abcdefhijklmpqr";
Jack Jansen72781191995-08-07 14:34:15 +0000138
Sergey Fedoseev1c5e68e2019-07-14 17:15:32 +0500139static const unsigned char table_a2b_base64[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000140 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
141 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
142 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,62, -1,-1,-1,63,
143 52,53,54,55, 56,57,58,59, 60,61,-1,-1, -1, 0,-1,-1, /* Note PAD->0 */
144 -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10, 11,12,13,14,
145 15,16,17,18, 19,20,21,22, 23,24,25,-1, -1,-1,-1,-1,
146 -1,26,27,28, 29,30,31,32, 33,34,35,36, 37,38,39,40,
Sergey Fedoseev1c5e68e2019-07-14 17:15:32 +0500147 41,42,43,44, 45,46,47,48, 49,50,51,-1, -1,-1,-1,-1,
148
149 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
150 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
151 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
152 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
153 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
154 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
155 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
156 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000157};
158
159#define BASE64_PAD '='
Guido van Rossum355bc0c2001-10-30 03:00:52 +0000160
161/* Max binary chunk size; limited only by available memory */
Guido van Rossum0e225aa2007-05-22 20:24:57 +0000162#define BASE64_MAXBIN ((PY_SSIZE_T_MAX - 3) / 2)
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000163
Serhiy Storchaka2d06e842015-12-25 19:53:18 +0200164static const unsigned char table_b2a_base64[] =
Roger E. Masse5f4ce181997-01-16 17:10:22 +0000165"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000166
167
168
Serhiy Storchaka2d06e842015-12-25 19:53:18 +0200169static const unsigned short crctab_hqx[256] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000170 0x0000, 0x1021, 0x2042, 0x3063, 0x4084, 0x50a5, 0x60c6, 0x70e7,
171 0x8108, 0x9129, 0xa14a, 0xb16b, 0xc18c, 0xd1ad, 0xe1ce, 0xf1ef,
172 0x1231, 0x0210, 0x3273, 0x2252, 0x52b5, 0x4294, 0x72f7, 0x62d6,
173 0x9339, 0x8318, 0xb37b, 0xa35a, 0xd3bd, 0xc39c, 0xf3ff, 0xe3de,
174 0x2462, 0x3443, 0x0420, 0x1401, 0x64e6, 0x74c7, 0x44a4, 0x5485,
175 0xa56a, 0xb54b, 0x8528, 0x9509, 0xe5ee, 0xf5cf, 0xc5ac, 0xd58d,
176 0x3653, 0x2672, 0x1611, 0x0630, 0x76d7, 0x66f6, 0x5695, 0x46b4,
177 0xb75b, 0xa77a, 0x9719, 0x8738, 0xf7df, 0xe7fe, 0xd79d, 0xc7bc,
178 0x48c4, 0x58e5, 0x6886, 0x78a7, 0x0840, 0x1861, 0x2802, 0x3823,
179 0xc9cc, 0xd9ed, 0xe98e, 0xf9af, 0x8948, 0x9969, 0xa90a, 0xb92b,
180 0x5af5, 0x4ad4, 0x7ab7, 0x6a96, 0x1a71, 0x0a50, 0x3a33, 0x2a12,
181 0xdbfd, 0xcbdc, 0xfbbf, 0xeb9e, 0x9b79, 0x8b58, 0xbb3b, 0xab1a,
182 0x6ca6, 0x7c87, 0x4ce4, 0x5cc5, 0x2c22, 0x3c03, 0x0c60, 0x1c41,
183 0xedae, 0xfd8f, 0xcdec, 0xddcd, 0xad2a, 0xbd0b, 0x8d68, 0x9d49,
184 0x7e97, 0x6eb6, 0x5ed5, 0x4ef4, 0x3e13, 0x2e32, 0x1e51, 0x0e70,
185 0xff9f, 0xefbe, 0xdfdd, 0xcffc, 0xbf1b, 0xaf3a, 0x9f59, 0x8f78,
186 0x9188, 0x81a9, 0xb1ca, 0xa1eb, 0xd10c, 0xc12d, 0xf14e, 0xe16f,
187 0x1080, 0x00a1, 0x30c2, 0x20e3, 0x5004, 0x4025, 0x7046, 0x6067,
188 0x83b9, 0x9398, 0xa3fb, 0xb3da, 0xc33d, 0xd31c, 0xe37f, 0xf35e,
189 0x02b1, 0x1290, 0x22f3, 0x32d2, 0x4235, 0x5214, 0x6277, 0x7256,
190 0xb5ea, 0xa5cb, 0x95a8, 0x8589, 0xf56e, 0xe54f, 0xd52c, 0xc50d,
191 0x34e2, 0x24c3, 0x14a0, 0x0481, 0x7466, 0x6447, 0x5424, 0x4405,
192 0xa7db, 0xb7fa, 0x8799, 0x97b8, 0xe75f, 0xf77e, 0xc71d, 0xd73c,
193 0x26d3, 0x36f2, 0x0691, 0x16b0, 0x6657, 0x7676, 0x4615, 0x5634,
194 0xd94c, 0xc96d, 0xf90e, 0xe92f, 0x99c8, 0x89e9, 0xb98a, 0xa9ab,
195 0x5844, 0x4865, 0x7806, 0x6827, 0x18c0, 0x08e1, 0x3882, 0x28a3,
196 0xcb7d, 0xdb5c, 0xeb3f, 0xfb1e, 0x8bf9, 0x9bd8, 0xabbb, 0xbb9a,
197 0x4a75, 0x5a54, 0x6a37, 0x7a16, 0x0af1, 0x1ad0, 0x2ab3, 0x3a92,
198 0xfd2e, 0xed0f, 0xdd6c, 0xcd4d, 0xbdaa, 0xad8b, 0x9de8, 0x8dc9,
199 0x7c26, 0x6c07, 0x5c64, 0x4c45, 0x3ca2, 0x2c83, 0x1ce0, 0x0cc1,
200 0xef1f, 0xff3e, 0xcf5d, 0xdf7c, 0xaf9b, 0xbfba, 0x8fd9, 0x9ff8,
201 0x6e17, 0x7e36, 0x4e55, 0x5e74, 0x2e93, 0x3eb2, 0x0ed1, 0x1ef0,
Jack Jansen72781191995-08-07 14:34:15 +0000202};
203
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +0200204/*[clinic input]
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +0200205module binascii
206[clinic start generated code]*/
Serhiy Storchaka1009bf12015-04-03 23:53:51 +0300207/*[clinic end generated code: output=da39a3ee5e6b4b0d input=de89fb46bcaf3fec]*/
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +0200208
209/*[python input]
210
211class ascii_buffer_converter(CConverter):
212 type = 'Py_buffer'
213 converter = 'ascii_buffer_converter'
214 impl_by_reference = True
Benjamin Petersonb62deac2014-01-26 10:41:58 -0500215 c_default = "{NULL, NULL}"
216
217 def cleanup(self):
218 name = self.name
219 return "".join(["if (", name, ".obj)\n PyBuffer_Release(&", name, ");\n"])
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +0200220
221[python start generated code]*/
Larry Hastings581ee362014-01-28 05:00:08 -0800222/*[python end generated code: output=da39a3ee5e6b4b0d input=3eb7b63610da92cd]*/
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +0200223
Antoine Pitrou08316762011-12-20 13:58:41 +0100224static int
225ascii_buffer_converter(PyObject *arg, Py_buffer *buf)
226{
227 if (arg == NULL) {
228 PyBuffer_Release(buf);
229 return 1;
230 }
231 if (PyUnicode_Check(arg)) {
232 if (PyUnicode_READY(arg) < 0)
233 return 0;
234 if (!PyUnicode_IS_ASCII(arg)) {
235 PyErr_SetString(PyExc_ValueError,
236 "string argument should contain only ASCII characters");
237 return 0;
238 }
239 assert(PyUnicode_KIND(arg) == PyUnicode_1BYTE_KIND);
240 buf->buf = (void *) PyUnicode_1BYTE_DATA(arg);
241 buf->len = PyUnicode_GET_LENGTH(arg);
242 buf->obj = NULL;
243 return 1;
244 }
245 if (PyObject_GetBuffer(arg, buf, PyBUF_SIMPLE) != 0) {
246 PyErr_Format(PyExc_TypeError,
247 "argument should be bytes, buffer or ASCII string, "
Berker Peksag3cd30c22015-02-15 00:31:00 +0200248 "not '%.100s'", Py_TYPE(arg)->tp_name);
Antoine Pitrou08316762011-12-20 13:58:41 +0100249 return 0;
250 }
251 if (!PyBuffer_IsContiguous(buf, 'C')) {
252 PyErr_Format(PyExc_TypeError,
253 "argument should be a contiguous buffer, "
Berker Peksag3cd30c22015-02-15 00:31:00 +0200254 "not '%.100s'", Py_TYPE(arg)->tp_name);
Antoine Pitrou08316762011-12-20 13:58:41 +0100255 PyBuffer_Release(buf);
256 return 0;
257 }
258 return Py_CLEANUP_SUPPORTED;
259}
260
Larry Hastingsf256c222014-01-25 21:30:37 -0800261#include "clinic/binascii.c.h"
Antoine Pitrou08316762011-12-20 13:58:41 +0100262
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +0200263/*[clinic input]
264binascii.a2b_uu
265
Serhiy Storchaka12785612014-01-25 11:49:49 +0200266 data: ascii_buffer
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +0200267 /
268
269Decode a line of uuencoded data.
270[clinic start generated code]*/
Jack Jansen72781191995-08-07 14:34:15 +0000271
272static PyObject *
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +0300273binascii_a2b_uu_impl(PyObject *module, Py_buffer *data)
274/*[clinic end generated code: output=e027f8e0b0598742 input=7cafeaf73df63d1c]*/
Jack Jansen72781191995-08-07 14:34:15 +0000275{
Serhiy Storchakaef1585e2015-12-25 20:01:53 +0200276 const unsigned char *ascii_data;
277 unsigned char *bin_data;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000278 int leftbits = 0;
279 unsigned char this_ch;
280 unsigned int leftchar = 0;
281 PyObject *rv;
282 Py_ssize_t ascii_len, bin_len;
Marcel Plch33e71e02019-05-22 13:51:26 +0200283 binascii_state *state;
Tim Peters934c1a12002-07-02 22:24:50 +0000284
Serhiy Storchaka12785612014-01-25 11:49:49 +0200285 ascii_data = data->buf;
286 ascii_len = data->len;
Jack Jansen72781191995-08-07 14:34:15 +0000287
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000288 assert(ascii_len >= 0);
Amaury Forgeot d'Arc9c74b142008-06-18 00:47:36 +0000289
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000290 /* First byte: binary data length (in bytes) */
291 bin_len = (*ascii_data++ - ' ') & 077;
292 ascii_len--;
Jack Jansen72781191995-08-07 14:34:15 +0000293
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000294 /* Allocate the buffer */
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +0200295 if ( (rv=PyBytes_FromStringAndSize(NULL, bin_len)) == NULL )
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000296 return NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000297 bin_data = (unsigned char *)PyBytes_AS_STRING(rv);
Tim Peters934c1a12002-07-02 22:24:50 +0000298
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000299 for( ; bin_len > 0 ; ascii_len--, ascii_data++ ) {
300 /* XXX is it really best to add NULs if there's no more data */
301 this_ch = (ascii_len > 0) ? *ascii_data : 0;
302 if ( this_ch == '\n' || this_ch == '\r' || ascii_len <= 0) {
303 /*
304 ** Whitespace. Assume some spaces got eaten at
305 ** end-of-line. (We check this later)
306 */
307 this_ch = 0;
308 } else {
309 /* Check the character for legality
310 ** The 64 in stead of the expected 63 is because
311 ** there are a few uuencodes out there that use
312 ** '`' as zero instead of space.
313 */
314 if ( this_ch < ' ' || this_ch > (' ' + 64)) {
Marcel Plch33e71e02019-05-22 13:51:26 +0200315 state = PyModule_GetState(module);
316 if (state == NULL) {
317 return NULL;
318 }
319 PyErr_SetString(state->Error, "Illegal char");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000320 Py_DECREF(rv);
321 return NULL;
322 }
323 this_ch = (this_ch - ' ') & 077;
324 }
325 /*
326 ** Shift it in on the low end, and see if there's
327 ** a byte ready for output.
328 */
329 leftchar = (leftchar << 6) | (this_ch);
330 leftbits += 6;
331 if ( leftbits >= 8 ) {
332 leftbits -= 8;
333 *bin_data++ = (leftchar >> leftbits) & 0xff;
334 leftchar &= ((1 << leftbits) - 1);
335 bin_len--;
336 }
337 }
338 /*
339 ** Finally, check that if there's anything left on the line
340 ** that it's whitespace only.
341 */
342 while( ascii_len-- > 0 ) {
343 this_ch = *ascii_data++;
344 /* Extra '`' may be written as padding in some cases */
345 if ( this_ch != ' ' && this_ch != ' '+64 &&
346 this_ch != '\n' && this_ch != '\r' ) {
Marcel Plch33e71e02019-05-22 13:51:26 +0200347 state = PyModule_GetState(module);
348 if (state == NULL) {
349 return NULL;
350 }
351 PyErr_SetString(state->Error, "Trailing garbage");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000352 Py_DECREF(rv);
353 return NULL;
354 }
355 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000356 return rv;
Jack Jansen72781191995-08-07 14:34:15 +0000357}
358
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +0200359/*[clinic input]
360binascii.b2a_uu
361
362 data: Py_buffer
363 /
Xiang Zhang13f1f422017-05-03 11:16:21 +0800364 *
365 backtick: bool(accept={int}) = False
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +0200366
367Uuencode line of data.
368[clinic start generated code]*/
Tim Peters934c1a12002-07-02 22:24:50 +0000369
Jack Jansen72781191995-08-07 14:34:15 +0000370static PyObject *
Xiang Zhang13f1f422017-05-03 11:16:21 +0800371binascii_b2a_uu_impl(PyObject *module, Py_buffer *data, int backtick)
372/*[clinic end generated code: output=b1b99de62d9bbeb8 input=b26bc8d32b6ed2f6]*/
Jack Jansen72781191995-08-07 14:34:15 +0000373{
Serhiy Storchakaef1585e2015-12-25 20:01:53 +0200374 unsigned char *ascii_data;
375 const unsigned char *bin_data;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000376 int leftbits = 0;
377 unsigned char this_ch;
378 unsigned int leftchar = 0;
Marcel Plch33e71e02019-05-22 13:51:26 +0200379 binascii_state *state;
Victor Stinnereaaaf132015-10-13 10:51:47 +0200380 Py_ssize_t bin_len, out_len;
381 _PyBytesWriter writer;
Tim Peters934c1a12002-07-02 22:24:50 +0000382
Victor Stinnereaaaf132015-10-13 10:51:47 +0200383 _PyBytesWriter_Init(&writer);
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +0200384 bin_data = data->buf;
385 bin_len = data->len;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000386 if ( bin_len > 45 ) {
387 /* The 45 is a limit that appears in all uuencode's */
Marcel Plch33e71e02019-05-22 13:51:26 +0200388 state = PyModule_GetState(module);
389 if (state == NULL) {
390 return NULL;
391 }
392 PyErr_SetString(state->Error, "At most 45 bytes at once");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000393 return NULL;
394 }
Jack Jansen72781191995-08-07 14:34:15 +0000395
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000396 /* We're lazy and allocate to much (fixed up later) */
Victor Stinnereaaaf132015-10-13 10:51:47 +0200397 out_len = 2 + (bin_len + 2) / 3 * 4;
398 ascii_data = _PyBytesWriter_Alloc(&writer, out_len);
399 if (ascii_data == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000400 return NULL;
Jack Jansen72781191995-08-07 14:34:15 +0000401
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000402 /* Store the length */
Xiang Zhang13f1f422017-05-03 11:16:21 +0800403 if (backtick && !bin_len)
404 *ascii_data++ = '`';
405 else
Segev Finer679b5662017-07-27 01:17:57 +0300406 *ascii_data++ = ' ' + (unsigned char)bin_len;
Tim Peters934c1a12002-07-02 22:24:50 +0000407
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000408 for( ; bin_len > 0 || leftbits != 0 ; bin_len--, bin_data++ ) {
409 /* Shift the data (or padding) into our buffer */
410 if ( bin_len > 0 ) /* Data */
411 leftchar = (leftchar << 8) | *bin_data;
412 else /* Padding */
413 leftchar <<= 8;
414 leftbits += 8;
Jack Jansen72781191995-08-07 14:34:15 +0000415
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000416 /* See if there are 6-bit groups ready */
417 while ( leftbits >= 6 ) {
418 this_ch = (leftchar >> (leftbits-6)) & 0x3f;
419 leftbits -= 6;
Xiang Zhang13f1f422017-05-03 11:16:21 +0800420 if (backtick && !this_ch)
421 *ascii_data++ = '`';
422 else
423 *ascii_data++ = this_ch + ' ';
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000424 }
425 }
426 *ascii_data++ = '\n'; /* Append a courtesy newline */
Tim Peters934c1a12002-07-02 22:24:50 +0000427
Victor Stinnereaaaf132015-10-13 10:51:47 +0200428 return _PyBytesWriter_Finish(&writer, ascii_data);
Jack Jansen72781191995-08-07 14:34:15 +0000429}
430
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +0200431/*[clinic input]
432binascii.a2b_base64
433
Serhiy Storchaka12785612014-01-25 11:49:49 +0200434 data: ascii_buffer
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +0200435 /
436
437Decode a line of base64 data.
438[clinic start generated code]*/
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000439
440static PyObject *
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +0300441binascii_a2b_base64_impl(PyObject *module, Py_buffer *data)
442/*[clinic end generated code: output=0628223f19fd3f9b input=5872acf6e1cac243]*/
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000443{
Sergey Fedoseev1c5e68e2019-07-14 17:15:32 +0500444 assert(data->len >= 0);
Tim Peters934c1a12002-07-02 22:24:50 +0000445
Sergey Fedoseev1c5e68e2019-07-14 17:15:32 +0500446 const unsigned char *ascii_data = data->buf;
447 size_t ascii_len = data->len;
Victor Stinnereaaaf132015-10-13 10:51:47 +0200448
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000449 /* Allocate the buffer */
Sergey Fedoseev1c5e68e2019-07-14 17:15:32 +0500450 Py_ssize_t bin_len = ((ascii_len+3)/4)*3; /* Upper bound, corrected later */
451 _PyBytesWriter writer;
452 _PyBytesWriter_Init(&writer);
453 unsigned char *bin_data = _PyBytesWriter_Alloc(&writer, bin_len);
Victor Stinnereaaaf132015-10-13 10:51:47 +0200454 if (bin_data == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000455 return NULL;
Sergey Fedoseev1c5e68e2019-07-14 17:15:32 +0500456 unsigned char *bin_data_start = bin_data;
Guido van Rossum2db4f471999-10-19 19:05:14 +0000457
Sergey Fedoseev1c5e68e2019-07-14 17:15:32 +0500458 int quad_pos = 0;
459 unsigned char leftchar = 0;
460 int pads = 0;
461 for (size_t i = 0; i < ascii_len; i++) {
462 unsigned char this_ch = ascii_data[i];
Guido van Rossum2db4f471999-10-19 19:05:14 +0000463
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000464 /* Check for pad sequences and ignore
465 ** the invalid ones.
466 */
467 if (this_ch == BASE64_PAD) {
Sergey Fedoseev1c5e68e2019-07-14 17:15:32 +0500468 if (quad_pos >= 2 && quad_pos + ++pads >= 4) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000469 /* A pad sequence means no more input.
470 ** We've already interpreted the data
471 ** from the quad at this point.
472 */
Sergey Fedoseev1c5e68e2019-07-14 17:15:32 +0500473 goto done;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000474 }
Sergey Fedoseev1c5e68e2019-07-14 17:15:32 +0500475 continue;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000476 }
Guido van Rossum2db4f471999-10-19 19:05:14 +0000477
Sergey Fedoseev1c5e68e2019-07-14 17:15:32 +0500478 this_ch = table_a2b_base64[this_ch];
479 if (this_ch >= 64) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000480 continue;
Sergey Fedoseev1c5e68e2019-07-14 17:15:32 +0500481 }
482 pads = 0;
Guido van Rossum2db4f471999-10-19 19:05:14 +0000483
Sergey Fedoseev1c5e68e2019-07-14 17:15:32 +0500484 switch (quad_pos) {
485 case 0:
486 quad_pos = 1;
487 leftchar = this_ch;
488 break;
489 case 1:
490 quad_pos = 2;
491 *bin_data++ = (leftchar << 2) | (this_ch >> 4);
492 leftchar = this_ch & 0x0f;
493 break;
494 case 2:
495 quad_pos = 3;
496 *bin_data++ = (leftchar << 4) | (this_ch >> 2);
497 leftchar = this_ch & 0x03;
498 break;
499 case 3:
500 quad_pos = 0;
501 *bin_data++ = (leftchar << 6) | (this_ch);
502 leftchar = 0;
503 break;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000504 }
505 }
Guido van Rossum2db4f471999-10-19 19:05:14 +0000506
Sergey Fedoseev1c5e68e2019-07-14 17:15:32 +0500507 if (quad_pos != 0) {
508 binascii_state *state = PyModule_GetState(module);
Marcel Plch33e71e02019-05-22 13:51:26 +0200509 if (state == NULL) {
Sergey Fedoseev1c5e68e2019-07-14 17:15:32 +0500510 /* error already set, from PyModule_GetState */
511 } else if (quad_pos == 1) {
Tal Einat1b85c712018-06-10 10:01:50 +0300512 /*
513 ** There is exactly one extra valid, non-padding, base64 character.
514 ** This is an invalid length, as there is no possible input that
515 ** could encoded into such a base64 string.
516 */
Marcel Plch33e71e02019-05-22 13:51:26 +0200517 PyErr_Format(state->Error,
Tal Einat1fba2ff2018-09-28 08:57:22 +0300518 "Invalid base64-encoded string: "
Serhiy Storchakad53fe5f2019-03-13 22:59:55 +0200519 "number of data characters (%zd) cannot be 1 more "
Tal Einat1fba2ff2018-09-28 08:57:22 +0300520 "than a multiple of 4",
521 (bin_data - bin_data_start) / 3 * 4 + 1);
Tal Einat1b85c712018-06-10 10:01:50 +0300522 } else {
Marcel Plch33e71e02019-05-22 13:51:26 +0200523 PyErr_SetString(state->Error, "Incorrect padding");
Tal Einat1b85c712018-06-10 10:01:50 +0300524 }
Victor Stinnereaaaf132015-10-13 10:51:47 +0200525 _PyBytesWriter_Dealloc(&writer);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000526 return NULL;
527 }
Guido van Rossum2db4f471999-10-19 19:05:14 +0000528
Sergey Fedoseev1c5e68e2019-07-14 17:15:32 +0500529done:
Victor Stinnereaaaf132015-10-13 10:51:47 +0200530 return _PyBytesWriter_Finish(&writer, bin_data);
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000531}
532
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +0200533
534/*[clinic input]
535binascii.b2a_base64
536
537 data: Py_buffer
Xiang Zhang1374dbb2017-05-01 13:12:07 +0800538 /
Victor Stinnere84c9762015-10-11 11:01:02 +0200539 *
Serhiy Storchaka202fda52017-03-12 10:10:47 +0200540 newline: bool(accept={int}) = True
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +0200541
542Base64-code line of data.
543[clinic start generated code]*/
Tim Peters934c1a12002-07-02 22:24:50 +0000544
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000545static PyObject *
Serhiy Storchaka2954f832016-07-07 18:20:03 +0300546binascii_b2a_base64_impl(PyObject *module, Py_buffer *data, int newline)
Xiang Zhang1374dbb2017-05-01 13:12:07 +0800547/*[clinic end generated code: output=4ad62c8e8485d3b3 input=6083dac5777fa45d]*/
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000548{
Serhiy Storchakaef1585e2015-12-25 20:01:53 +0200549 unsigned char *ascii_data;
550 const unsigned char *bin_data;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000551 int leftbits = 0;
552 unsigned char this_ch;
553 unsigned int leftchar = 0;
Victor Stinnere84c9762015-10-11 11:01:02 +0200554 Py_ssize_t bin_len, out_len;
Victor Stinnereaaaf132015-10-13 10:51:47 +0200555 _PyBytesWriter writer;
Marcel Plch33e71e02019-05-22 13:51:26 +0200556 binascii_state *state;
Tim Peters934c1a12002-07-02 22:24:50 +0000557
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +0200558 bin_data = data->buf;
559 bin_len = data->len;
Victor Stinnereaaaf132015-10-13 10:51:47 +0200560 _PyBytesWriter_Init(&writer);
Amaury Forgeot d'Arc9c74b142008-06-18 00:47:36 +0000561
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000562 assert(bin_len >= 0);
Amaury Forgeot d'Arc9c74b142008-06-18 00:47:36 +0000563
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000564 if ( bin_len > BASE64_MAXBIN ) {
Marcel Plch33e71e02019-05-22 13:51:26 +0200565 state = PyModule_GetState(module);
566 if (state == NULL) {
567 return NULL;
568 }
569 PyErr_SetString(state->Error, "Too much data for base64 line");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000570 return NULL;
571 }
Tim Peters934c1a12002-07-02 22:24:50 +0000572
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000573 /* We're lazy and allocate too much (fixed up later).
Victor Stinnere84c9762015-10-11 11:01:02 +0200574 "+2" leaves room for up to two pad characters.
575 Note that 'b' gets encoded as 'Yg==\n' (1 in, 5 out). */
576 out_len = bin_len*2 + 2;
577 if (newline)
578 out_len++;
Victor Stinnereaaaf132015-10-13 10:51:47 +0200579 ascii_data = _PyBytesWriter_Alloc(&writer, out_len);
580 if (ascii_data == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000581 return NULL;
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000582
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000583 for( ; bin_len > 0 ; bin_len--, bin_data++ ) {
584 /* Shift the data into our buffer */
585 leftchar = (leftchar << 8) | *bin_data;
586 leftbits += 8;
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000587
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000588 /* See if there are 6-bit groups ready */
589 while ( leftbits >= 6 ) {
590 this_ch = (leftchar >> (leftbits-6)) & 0x3f;
591 leftbits -= 6;
592 *ascii_data++ = table_b2a_base64[this_ch];
593 }
594 }
595 if ( leftbits == 2 ) {
596 *ascii_data++ = table_b2a_base64[(leftchar&3) << 4];
597 *ascii_data++ = BASE64_PAD;
598 *ascii_data++ = BASE64_PAD;
599 } else if ( leftbits == 4 ) {
600 *ascii_data++ = table_b2a_base64[(leftchar&0xf) << 2];
601 *ascii_data++ = BASE64_PAD;
602 }
Victor Stinnere84c9762015-10-11 11:01:02 +0200603 if (newline)
604 *ascii_data++ = '\n'; /* Append a courtesy newline */
Tim Peters934c1a12002-07-02 22:24:50 +0000605
Victor Stinnereaaaf132015-10-13 10:51:47 +0200606 return _PyBytesWriter_Finish(&writer, ascii_data);
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000607}
608
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +0200609/*[clinic input]
610binascii.a2b_hqx
611
Serhiy Storchaka12785612014-01-25 11:49:49 +0200612 data: ascii_buffer
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +0200613 /
614
615Decode .hqx coding.
616[clinic start generated code]*/
Jack Jansen72781191995-08-07 14:34:15 +0000617
618static PyObject *
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +0300619binascii_a2b_hqx_impl(PyObject *module, Py_buffer *data)
620/*[clinic end generated code: output=4d6d8c54d54ea1c1 input=0d914c680e0eed55]*/
Jack Jansen72781191995-08-07 14:34:15 +0000621{
Victor Stinnerbeea26b2020-01-22 20:44:22 +0100622 if (PyErr_WarnEx(PyExc_DeprecationWarning,
623 "binascii.a2b_hqx() is deprecated", 1) < 0) {
624 return NULL;
625 }
626
Serhiy Storchakaef1585e2015-12-25 20:01:53 +0200627 const unsigned char *ascii_data;
628 unsigned char *bin_data;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000629 int leftbits = 0;
630 unsigned char this_ch;
631 unsigned int leftchar = 0;
Victor Stinnereaaaf132015-10-13 10:51:47 +0200632 PyObject *res;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000633 Py_ssize_t len;
634 int done = 0;
Victor Stinnereaaaf132015-10-13 10:51:47 +0200635 _PyBytesWriter writer;
Marcel Plch33e71e02019-05-22 13:51:26 +0200636 binascii_state *state;
Tim Peters934c1a12002-07-02 22:24:50 +0000637
Serhiy Storchaka12785612014-01-25 11:49:49 +0200638 ascii_data = data->buf;
639 len = data->len;
Victor Stinnereaaaf132015-10-13 10:51:47 +0200640 _PyBytesWriter_Init(&writer);
Jack Jansen72781191995-08-07 14:34:15 +0000641
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000642 assert(len >= 0);
Amaury Forgeot d'Arc9c74b142008-06-18 00:47:36 +0000643
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +0200644 if (len > PY_SSIZE_T_MAX - 2)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000645 return PyErr_NoMemory();
Amaury Forgeot d'Arc9c74b142008-06-18 00:47:36 +0000646
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000647 /* Allocate a string that is too big (fixed later)
648 Add two to the initial length to prevent interning which
649 would preclude subsequent resizing. */
Victor Stinnereaaaf132015-10-13 10:51:47 +0200650 bin_data = _PyBytesWriter_Alloc(&writer, len + 2);
651 if (bin_data == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000652 return NULL;
Jack Jansen72781191995-08-07 14:34:15 +0000653
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000654 for( ; len > 0 ; len--, ascii_data++ ) {
655 /* Get the byte and look it up */
656 this_ch = table_a2b_hqx[*ascii_data];
657 if ( this_ch == SKIP )
658 continue;
659 if ( this_ch == FAIL ) {
Marcel Plch33e71e02019-05-22 13:51:26 +0200660 state = PyModule_GetState(module);
661 if (state == NULL) {
662 return NULL;
663 }
664 PyErr_SetString(state->Error, "Illegal char");
Victor Stinnereaaaf132015-10-13 10:51:47 +0200665 _PyBytesWriter_Dealloc(&writer);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000666 return NULL;
667 }
668 if ( this_ch == DONE ) {
669 /* The terminating colon */
670 done = 1;
671 break;
672 }
Jack Jansen72781191995-08-07 14:34:15 +0000673
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000674 /* Shift it into the buffer and see if any bytes are ready */
675 leftchar = (leftchar << 6) | (this_ch);
676 leftbits += 6;
677 if ( leftbits >= 8 ) {
678 leftbits -= 8;
679 *bin_data++ = (leftchar >> leftbits) & 0xff;
680 leftchar &= ((1 << leftbits) - 1);
681 }
682 }
Tim Peters934c1a12002-07-02 22:24:50 +0000683
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000684 if ( leftbits && !done ) {
Marcel Plch33e71e02019-05-22 13:51:26 +0200685 state = PyModule_GetState(module);
686 if (state == NULL) {
687 return NULL;
688 }
689 PyErr_SetString(state->Incomplete,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000690 "String has incomplete number of bytes");
Victor Stinnereaaaf132015-10-13 10:51:47 +0200691 _PyBytesWriter_Dealloc(&writer);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000692 return NULL;
693 }
Roger E. Masse5f4ce181997-01-16 17:10:22 +0000694
Victor Stinnereaaaf132015-10-13 10:51:47 +0200695 res = _PyBytesWriter_Finish(&writer, bin_data);
696 if (res == NULL)
697 return NULL;
698 return Py_BuildValue("Ni", res, done);
Jack Jansen72781191995-08-07 14:34:15 +0000699}
700
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +0200701
702/*[clinic input]
703binascii.rlecode_hqx
704
705 data: Py_buffer
706 /
707
708Binhex RLE-code binary data.
709[clinic start generated code]*/
Jack Jansen72781191995-08-07 14:34:15 +0000710
711static PyObject *
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +0300712binascii_rlecode_hqx_impl(PyObject *module, Py_buffer *data)
713/*[clinic end generated code: output=393d79338f5f5629 input=e1f1712447a82b09]*/
Jack Jansen72781191995-08-07 14:34:15 +0000714{
Victor Stinnerbeea26b2020-01-22 20:44:22 +0100715 if (PyErr_WarnEx(PyExc_DeprecationWarning,
716 "binascii.rlecode_hqx() is deprecated", 1) < 0) {
717 return NULL;
718 }
719
Serhiy Storchakaef1585e2015-12-25 20:01:53 +0200720 const unsigned char *in_data;
721 unsigned char *out_data;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000722 unsigned char ch;
723 Py_ssize_t in, inend, len;
Victor Stinnereaaaf132015-10-13 10:51:47 +0200724 _PyBytesWriter writer;
Tim Peters934c1a12002-07-02 22:24:50 +0000725
Victor Stinnereaaaf132015-10-13 10:51:47 +0200726 _PyBytesWriter_Init(&writer);
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +0200727 in_data = data->buf;
728 len = data->len;
Jack Jansen72781191995-08-07 14:34:15 +0000729
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000730 assert(len >= 0);
Amaury Forgeot d'Arc9c74b142008-06-18 00:47:36 +0000731
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +0200732 if (len > PY_SSIZE_T_MAX / 2 - 2)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000733 return PyErr_NoMemory();
Amaury Forgeot d'Arc9c74b142008-06-18 00:47:36 +0000734
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000735 /* Worst case: output is twice as big as input (fixed later) */
Victor Stinnereaaaf132015-10-13 10:51:47 +0200736 out_data = _PyBytesWriter_Alloc(&writer, len * 2 + 2);
737 if (out_data == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000738 return NULL;
Tim Peters934c1a12002-07-02 22:24:50 +0000739
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000740 for( in=0; in<len; in++) {
741 ch = in_data[in];
742 if ( ch == RUNCHAR ) {
743 /* RUNCHAR. Escape it. */
744 *out_data++ = RUNCHAR;
745 *out_data++ = 0;
746 } else {
747 /* Check how many following are the same */
748 for(inend=in+1;
749 inend<len && in_data[inend] == ch &&
750 inend < in+255;
751 inend++) ;
752 if ( inend - in > 3 ) {
753 /* More than 3 in a row. Output RLE. */
754 *out_data++ = ch;
755 *out_data++ = RUNCHAR;
Antoine Pitrou40455752010-08-15 18:51:10 +0000756 *out_data++ = (unsigned char) (inend-in);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000757 in = inend-1;
758 } else {
759 /* Less than 3. Output the byte itself */
760 *out_data++ = ch;
761 }
762 }
763 }
Victor Stinnereaaaf132015-10-13 10:51:47 +0200764
765 return _PyBytesWriter_Finish(&writer, out_data);
Jack Jansen72781191995-08-07 14:34:15 +0000766}
767
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +0200768
769/*[clinic input]
770binascii.b2a_hqx
771
772 data: Py_buffer
773 /
774
775Encode .hqx data.
776[clinic start generated code]*/
Tim Peters934c1a12002-07-02 22:24:50 +0000777
Jack Jansen72781191995-08-07 14:34:15 +0000778static PyObject *
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +0300779binascii_b2a_hqx_impl(PyObject *module, Py_buffer *data)
780/*[clinic end generated code: output=d0aa5a704bc9f7de input=9596ebe019fe12ba]*/
Jack Jansen72781191995-08-07 14:34:15 +0000781{
Victor Stinnerbeea26b2020-01-22 20:44:22 +0100782 if (PyErr_WarnEx(PyExc_DeprecationWarning,
783 "binascii.b2a_hqx() is deprecated", 1) < 0) {
784 return NULL;
785 }
786
Serhiy Storchakaef1585e2015-12-25 20:01:53 +0200787 unsigned char *ascii_data;
788 const unsigned char *bin_data;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000789 int leftbits = 0;
790 unsigned char this_ch;
791 unsigned int leftchar = 0;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000792 Py_ssize_t len;
Victor Stinnereaaaf132015-10-13 10:51:47 +0200793 _PyBytesWriter writer;
Tim Peters934c1a12002-07-02 22:24:50 +0000794
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +0200795 bin_data = data->buf;
796 len = data->len;
Victor Stinnereaaaf132015-10-13 10:51:47 +0200797 _PyBytesWriter_Init(&writer);
Jack Jansen72781191995-08-07 14:34:15 +0000798
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000799 assert(len >= 0);
Amaury Forgeot d'Arc9c74b142008-06-18 00:47:36 +0000800
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +0200801 if (len > PY_SSIZE_T_MAX / 2 - 2)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000802 return PyErr_NoMemory();
Amaury Forgeot d'Arc9c74b142008-06-18 00:47:36 +0000803
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000804 /* Allocate a buffer that is at least large enough */
Victor Stinnereaaaf132015-10-13 10:51:47 +0200805 ascii_data = _PyBytesWriter_Alloc(&writer, len * 2 + 2);
806 if (ascii_data == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000807 return NULL;
Tim Peters934c1a12002-07-02 22:24:50 +0000808
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000809 for( ; len > 0 ; len--, bin_data++ ) {
810 /* Shift into our buffer, and output any 6bits ready */
811 leftchar = (leftchar << 8) | *bin_data;
812 leftbits += 8;
813 while ( leftbits >= 6 ) {
814 this_ch = (leftchar >> (leftbits-6)) & 0x3f;
815 leftbits -= 6;
816 *ascii_data++ = table_b2a_hqx[this_ch];
817 }
818 }
819 /* Output a possible runt byte */
820 if ( leftbits ) {
821 leftchar <<= (6-leftbits);
822 *ascii_data++ = table_b2a_hqx[leftchar & 0x3f];
823 }
Victor Stinnereaaaf132015-10-13 10:51:47 +0200824
825 return _PyBytesWriter_Finish(&writer, ascii_data);
Jack Jansen72781191995-08-07 14:34:15 +0000826}
827
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +0200828
829/*[clinic input]
830binascii.rledecode_hqx
831
832 data: Py_buffer
833 /
834
835Decode hexbin RLE-coded string.
836[clinic start generated code]*/
Tim Peters934c1a12002-07-02 22:24:50 +0000837
Jack Jansen72781191995-08-07 14:34:15 +0000838static PyObject *
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +0300839binascii_rledecode_hqx_impl(PyObject *module, Py_buffer *data)
840/*[clinic end generated code: output=9826619565de1c6c input=54cdd49fc014402c]*/
Jack Jansen72781191995-08-07 14:34:15 +0000841{
Victor Stinnerbeea26b2020-01-22 20:44:22 +0100842 if (PyErr_WarnEx(PyExc_DeprecationWarning,
843 "binascii.rledecode_hqx() is deprecated", 1) < 0) {
844 return NULL;
845 }
846
Serhiy Storchakaef1585e2015-12-25 20:01:53 +0200847 const unsigned char *in_data;
848 unsigned char *out_data;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000849 unsigned char in_byte, in_repeat;
Victor Stinnerf9c9a3f2015-10-14 15:20:07 +0200850 Py_ssize_t in_len;
Victor Stinnereaaaf132015-10-13 10:51:47 +0200851 _PyBytesWriter writer;
Jack Jansen72781191995-08-07 14:34:15 +0000852
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +0200853 in_data = data->buf;
854 in_len = data->len;
Victor Stinnereaaaf132015-10-13 10:51:47 +0200855 _PyBytesWriter_Init(&writer);
Marcel Plch33e71e02019-05-22 13:51:26 +0200856 binascii_state *state;
Jack Jansen72781191995-08-07 14:34:15 +0000857
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000858 assert(in_len >= 0);
Amaury Forgeot d'Arc9c74b142008-06-18 00:47:36 +0000859
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000860 /* Empty string is a special case */
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +0200861 if ( in_len == 0 )
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000862 return PyBytes_FromStringAndSize("", 0);
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +0200863 else if (in_len > PY_SSIZE_T_MAX / 2)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000864 return PyErr_NoMemory();
Jack Jansen72781191995-08-07 14:34:15 +0000865
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000866 /* Allocate a buffer of reasonable size. Resized when needed */
Victor Stinnerf9c9a3f2015-10-14 15:20:07 +0200867 out_data = _PyBytesWriter_Alloc(&writer, in_len);
Victor Stinnereaaaf132015-10-13 10:51:47 +0200868 if (out_data == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000869 return NULL;
Victor Stinnereaaaf132015-10-13 10:51:47 +0200870
871 /* Use overallocation */
872 writer.overallocate = 1;
Jack Jansen72781191995-08-07 14:34:15 +0000873
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000874 /*
875 ** We need two macros here to get/put bytes and handle
876 ** end-of-buffer for input and output strings.
877 */
Victor Stinnereaaaf132015-10-13 10:51:47 +0200878#define INBYTE(b) \
879 do { \
880 if ( --in_len < 0 ) { \
Marcel Plch33e71e02019-05-22 13:51:26 +0200881 state = PyModule_GetState(module); \
882 if (state == NULL) { \
883 return NULL; \
884 } \
885 PyErr_SetString(state->Incomplete, ""); \
Victor Stinnereaaaf132015-10-13 10:51:47 +0200886 goto error; \
887 } \
888 b = *in_data++; \
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000889 } while(0)
Tim Peters934c1a12002-07-02 22:24:50 +0000890
Victor Stinnereaaaf132015-10-13 10:51:47 +0200891 /*
892 ** Handle first byte separately (since we have to get angry
893 ** in case of an orphaned RLE code).
894 */
895 INBYTE(in_byte);
Jack Jansen72781191995-08-07 14:34:15 +0000896
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000897 if (in_byte == RUNCHAR) {
898 INBYTE(in_repeat);
Victor Stinnerf9c9a3f2015-10-14 15:20:07 +0200899 /* only 1 byte will be written, but 2 bytes were preallocated:
Raymond Hettinger15f44ab2016-08-30 10:47:49 -0700900 subtract 1 byte to prevent overallocation */
Victor Stinnerf9c9a3f2015-10-14 15:20:07 +0200901 writer.min_size--;
902
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000903 if (in_repeat != 0) {
904 /* Note Error, not Incomplete (which is at the end
905 ** of the string only). This is a programmer error.
906 */
Marcel Plch33e71e02019-05-22 13:51:26 +0200907 state = PyModule_GetState(module);
908 if (state == NULL) {
909 return NULL;
910 }
911 PyErr_SetString(state->Error, "Orphaned RLE code at start");
Victor Stinnereaaaf132015-10-13 10:51:47 +0200912 goto error;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000913 }
Victor Stinnerf9c9a3f2015-10-14 15:20:07 +0200914 *out_data++ = RUNCHAR;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000915 } else {
Victor Stinnerf9c9a3f2015-10-14 15:20:07 +0200916 *out_data++ = in_byte;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000917 }
Tim Peters934c1a12002-07-02 22:24:50 +0000918
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000919 while( in_len > 0 ) {
920 INBYTE(in_byte);
Jack Jansen72781191995-08-07 14:34:15 +0000921
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000922 if (in_byte == RUNCHAR) {
923 INBYTE(in_repeat);
Victor Stinnerf9c9a3f2015-10-14 15:20:07 +0200924 /* only 1 byte will be written, but 2 bytes were preallocated:
Raymond Hettinger15f44ab2016-08-30 10:47:49 -0700925 subtract 1 byte to prevent overallocation */
Victor Stinnerf9c9a3f2015-10-14 15:20:07 +0200926 writer.min_size--;
927
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000928 if ( in_repeat == 0 ) {
929 /* Just an escaped RUNCHAR value */
Victor Stinnerf9c9a3f2015-10-14 15:20:07 +0200930 *out_data++ = RUNCHAR;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000931 } else {
932 /* Pick up value and output a sequence of it */
933 in_byte = out_data[-1];
Victor Stinnerf9c9a3f2015-10-14 15:20:07 +0200934
935 /* enlarge the buffer if needed */
936 if (in_repeat > 1) {
937 /* -1 because we already preallocated 1 byte */
938 out_data = _PyBytesWriter_Prepare(&writer, out_data,
939 in_repeat - 1);
940 if (out_data == NULL)
941 goto error;
942 }
943
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000944 while ( --in_repeat > 0 )
Victor Stinnerf9c9a3f2015-10-14 15:20:07 +0200945 *out_data++ = in_byte;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000946 }
947 } else {
948 /* Normal byte */
Victor Stinnerf9c9a3f2015-10-14 15:20:07 +0200949 *out_data++ = in_byte;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000950 }
951 }
Victor Stinnereaaaf132015-10-13 10:51:47 +0200952 return _PyBytesWriter_Finish(&writer, out_data);
953
954error:
955 _PyBytesWriter_Dealloc(&writer);
956 return NULL;
Jack Jansen72781191995-08-07 14:34:15 +0000957}
958
Jack Jansen72781191995-08-07 14:34:15 +0000959
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +0200960/*[clinic input]
Victor Stinnerbeea26b2020-01-22 20:44:22 +0100961binascii.crc_hqx
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +0200962
963 data: Py_buffer
Serhiy Storchaka2ef7c472015-04-20 09:26:49 +0300964 crc: unsigned_int(bitwise=True)
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +0200965 /
966
Martin Panter3310e142016-12-24 07:36:44 +0000967Compute CRC-CCITT incrementally.
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +0200968[clinic start generated code]*/
969
Victor Stinnerbeea26b2020-01-22 20:44:22 +0100970static PyObject *
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +0300971binascii_crc_hqx_impl(PyObject *module, Py_buffer *data, unsigned int crc)
Victor Stinnerbeea26b2020-01-22 20:44:22 +0100972/*[clinic end generated code: output=2fde213d0f547a98 input=56237755370a951c]*/
Jack Jansen72781191995-08-07 14:34:15 +0000973{
Serhiy Storchakaef1585e2015-12-25 20:01:53 +0200974 const unsigned char *bin_data;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000975 Py_ssize_t len;
Tim Peters934c1a12002-07-02 22:24:50 +0000976
Serhiy Storchaka2ef7c472015-04-20 09:26:49 +0300977 crc &= 0xffff;
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +0200978 bin_data = data->buf;
979 len = data->len;
Jack Jansen72781191995-08-07 14:34:15 +0000980
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000981 while(len-- > 0) {
Serhiy Storchaka2ef7c472015-04-20 09:26:49 +0300982 crc = ((crc<<8)&0xff00) ^ crctab_hqx[(crc>>8)^*bin_data++];
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000983 }
Jack Jansen72781191995-08-07 14:34:15 +0000984
Victor Stinnerbeea26b2020-01-22 20:44:22 +0100985 return PyLong_FromUnsignedLong(crc);
Jack Jansen72781191995-08-07 14:34:15 +0000986}
987
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +0200988#ifndef USE_ZLIB_CRC32
Guido van Rossum7d47c9e2000-02-16 21:11:52 +0000989/* Crc - 32 BIT ANSI X3.66 CRC checksum files
990 Also known as: ISO 3307
991**********************************************************************|
992* *|
993* Demonstration program to compute the 32-bit CRC used as the frame *|
994* check sequence in ADCCP (ANSI X3.66, also known as FIPS PUB 71 *|
995* and FED-STD-1003, the U.S. versions of CCITT's X.25 link-level *|
996* protocol). The 32-bit FCS was added via the Federal Register, *|
997* 1 June 1982, p.23798. I presume but don't know for certain that *|
998* this polynomial is or will be included in CCITT V.41, which *|
999* defines the 16-bit CRC (often called CRC-CCITT) polynomial. FIPS *|
1000* PUB 78 says that the 32-bit FCS reduces otherwise undetected *|
1001* errors by a factor of 10^-5 over 16-bit FCS. *|
1002* *|
1003**********************************************************************|
1004
1005 Copyright (C) 1986 Gary S. Brown. You may use this program, or
1006 code or tables extracted from it, as desired without restriction.
Guido van Rossum7d47c9e2000-02-16 21:11:52 +00001007
Tim Peters934c1a12002-07-02 22:24:50 +00001008 First, the polynomial itself and its table of feedback terms. The
1009 polynomial is
1010 X^32+X^26+X^23+X^22+X^16+X^12+X^11+X^10+X^8+X^7+X^5+X^4+X^2+X^1+X^0
1011 Note that we take it "backwards" and put the highest-order term in
1012 the lowest-order bit. The X^32 term is "implied"; the LSB is the
1013 X^31 term, etc. The X^0 term (usually shown as "+1") results in
1014 the MSB being 1.
Guido van Rossum7d47c9e2000-02-16 21:11:52 +00001015
Tim Peters934c1a12002-07-02 22:24:50 +00001016 Note that the usual hardware shift register implementation, which
1017 is what we're using (we're merely optimizing it by doing eight-bit
1018 chunks at a time) shifts bits into the lowest-order term. In our
1019 implementation, that means shifting towards the right. Why do we
1020 do it this way? Because the calculated CRC must be transmitted in
1021 order from highest-order term to lowest-order term. UARTs transmit
1022 characters in order from LSB to MSB. By storing the CRC this way,
1023 we hand it to the UART in the order low-byte to high-byte; the UART
1024 sends each low-bit to hight-bit; and the result is transmission bit
1025 by bit from highest- to lowest-order term without requiring any bit
1026 shuffling on our part. Reception works similarly.
1027
1028 The feedback terms table consists of 256, 32-bit entries. Notes:
1029
1030 1. The table can be generated at runtime if desired; code to do so
1031 is shown later. It might not be obvious, but the feedback
1032 terms simply represent the results of eight shift/xor opera-
1033 tions for all combinations of data and CRC register values.
1034
1035 2. The CRC accumulation logic is the same for all CRC polynomials,
1036 be they sixteen or thirty-two bits wide. You simply choose the
1037 appropriate table. Alternatively, because the table can be
1038 generated at runtime, you can start by generating the table for
1039 the polynomial in question and use exactly the same "updcrc",
1040 if your application needn't simultaneously handle two CRC
1041 polynomials. (Note, however, that XMODEM is strange.)
1042
1043 3. For 16-bit CRCs, the table entries need be only 16 bits wide;
1044 of course, 32-bit entries work OK if the high 16 bits are zero.
1045
1046 4. The values must be right-shifted by eight bits by the "updcrc"
1047 logic; the shift must be unsigned (bring in zeroes). On some
1048 hardware you could probably optimize the shift in assembler by
1049 using byte-swap instructions.
Guido van Rossum7d47c9e2000-02-16 21:11:52 +00001050********************************************************************/
1051
Serhiy Storchaka2d06e842015-12-25 19:53:18 +02001052static const unsigned int crc_32_tab[256] = {
Gregory P. Smith3c0e4d22008-03-25 07:51:12 +000010530x00000000U, 0x77073096U, 0xee0e612cU, 0x990951baU, 0x076dc419U,
10540x706af48fU, 0xe963a535U, 0x9e6495a3U, 0x0edb8832U, 0x79dcb8a4U,
10550xe0d5e91eU, 0x97d2d988U, 0x09b64c2bU, 0x7eb17cbdU, 0xe7b82d07U,
10560x90bf1d91U, 0x1db71064U, 0x6ab020f2U, 0xf3b97148U, 0x84be41deU,
10570x1adad47dU, 0x6ddde4ebU, 0xf4d4b551U, 0x83d385c7U, 0x136c9856U,
10580x646ba8c0U, 0xfd62f97aU, 0x8a65c9ecU, 0x14015c4fU, 0x63066cd9U,
10590xfa0f3d63U, 0x8d080df5U, 0x3b6e20c8U, 0x4c69105eU, 0xd56041e4U,
10600xa2677172U, 0x3c03e4d1U, 0x4b04d447U, 0xd20d85fdU, 0xa50ab56bU,
10610x35b5a8faU, 0x42b2986cU, 0xdbbbc9d6U, 0xacbcf940U, 0x32d86ce3U,
10620x45df5c75U, 0xdcd60dcfU, 0xabd13d59U, 0x26d930acU, 0x51de003aU,
10630xc8d75180U, 0xbfd06116U, 0x21b4f4b5U, 0x56b3c423U, 0xcfba9599U,
10640xb8bda50fU, 0x2802b89eU, 0x5f058808U, 0xc60cd9b2U, 0xb10be924U,
10650x2f6f7c87U, 0x58684c11U, 0xc1611dabU, 0xb6662d3dU, 0x76dc4190U,
10660x01db7106U, 0x98d220bcU, 0xefd5102aU, 0x71b18589U, 0x06b6b51fU,
10670x9fbfe4a5U, 0xe8b8d433U, 0x7807c9a2U, 0x0f00f934U, 0x9609a88eU,
10680xe10e9818U, 0x7f6a0dbbU, 0x086d3d2dU, 0x91646c97U, 0xe6635c01U,
10690x6b6b51f4U, 0x1c6c6162U, 0x856530d8U, 0xf262004eU, 0x6c0695edU,
10700x1b01a57bU, 0x8208f4c1U, 0xf50fc457U, 0x65b0d9c6U, 0x12b7e950U,
10710x8bbeb8eaU, 0xfcb9887cU, 0x62dd1ddfU, 0x15da2d49U, 0x8cd37cf3U,
10720xfbd44c65U, 0x4db26158U, 0x3ab551ceU, 0xa3bc0074U, 0xd4bb30e2U,
10730x4adfa541U, 0x3dd895d7U, 0xa4d1c46dU, 0xd3d6f4fbU, 0x4369e96aU,
10740x346ed9fcU, 0xad678846U, 0xda60b8d0U, 0x44042d73U, 0x33031de5U,
10750xaa0a4c5fU, 0xdd0d7cc9U, 0x5005713cU, 0x270241aaU, 0xbe0b1010U,
10760xc90c2086U, 0x5768b525U, 0x206f85b3U, 0xb966d409U, 0xce61e49fU,
10770x5edef90eU, 0x29d9c998U, 0xb0d09822U, 0xc7d7a8b4U, 0x59b33d17U,
10780x2eb40d81U, 0xb7bd5c3bU, 0xc0ba6cadU, 0xedb88320U, 0x9abfb3b6U,
10790x03b6e20cU, 0x74b1d29aU, 0xead54739U, 0x9dd277afU, 0x04db2615U,
10800x73dc1683U, 0xe3630b12U, 0x94643b84U, 0x0d6d6a3eU, 0x7a6a5aa8U,
10810xe40ecf0bU, 0x9309ff9dU, 0x0a00ae27U, 0x7d079eb1U, 0xf00f9344U,
10820x8708a3d2U, 0x1e01f268U, 0x6906c2feU, 0xf762575dU, 0x806567cbU,
10830x196c3671U, 0x6e6b06e7U, 0xfed41b76U, 0x89d32be0U, 0x10da7a5aU,
10840x67dd4accU, 0xf9b9df6fU, 0x8ebeeff9U, 0x17b7be43U, 0x60b08ed5U,
10850xd6d6a3e8U, 0xa1d1937eU, 0x38d8c2c4U, 0x4fdff252U, 0xd1bb67f1U,
10860xa6bc5767U, 0x3fb506ddU, 0x48b2364bU, 0xd80d2bdaU, 0xaf0a1b4cU,
10870x36034af6U, 0x41047a60U, 0xdf60efc3U, 0xa867df55U, 0x316e8eefU,
10880x4669be79U, 0xcb61b38cU, 0xbc66831aU, 0x256fd2a0U, 0x5268e236U,
10890xcc0c7795U, 0xbb0b4703U, 0x220216b9U, 0x5505262fU, 0xc5ba3bbeU,
10900xb2bd0b28U, 0x2bb45a92U, 0x5cb36a04U, 0xc2d7ffa7U, 0xb5d0cf31U,
10910x2cd99e8bU, 0x5bdeae1dU, 0x9b64c2b0U, 0xec63f226U, 0x756aa39cU,
10920x026d930aU, 0x9c0906a9U, 0xeb0e363fU, 0x72076785U, 0x05005713U,
10930x95bf4a82U, 0xe2b87a14U, 0x7bb12baeU, 0x0cb61b38U, 0x92d28e9bU,
10940xe5d5be0dU, 0x7cdcefb7U, 0x0bdbdf21U, 0x86d3d2d4U, 0xf1d4e242U,
10950x68ddb3f8U, 0x1fda836eU, 0x81be16cdU, 0xf6b9265bU, 0x6fb077e1U,
10960x18b74777U, 0x88085ae6U, 0xff0f6a70U, 0x66063bcaU, 0x11010b5cU,
10970x8f659effU, 0xf862ae69U, 0x616bffd3U, 0x166ccf45U, 0xa00ae278U,
10980xd70dd2eeU, 0x4e048354U, 0x3903b3c2U, 0xa7672661U, 0xd06016f7U,
10990x4969474dU, 0x3e6e77dbU, 0xaed16a4aU, 0xd9d65adcU, 0x40df0b66U,
11000x37d83bf0U, 0xa9bcae53U, 0xdebb9ec5U, 0x47b2cf7fU, 0x30b5ffe9U,
11010xbdbdf21cU, 0xcabac28aU, 0x53b39330U, 0x24b4a3a6U, 0xbad03605U,
11020xcdd70693U, 0x54de5729U, 0x23d967bfU, 0xb3667a2eU, 0xc4614ab8U,
11030x5d681b02U, 0x2a6f2b94U, 0xb40bbe37U, 0xc30c8ea1U, 0x5a05df1bU,
11040x2d02ef8dU
Guido van Rossum7d47c9e2000-02-16 21:11:52 +00001105};
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +02001106#endif /* USE_ZLIB_CRC32 */
Guido van Rossum7d47c9e2000-02-16 21:11:52 +00001107
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +02001108/*[clinic input]
1109binascii.crc32 -> unsigned_int
1110
1111 data: Py_buffer
1112 crc: unsigned_int(bitwise=True) = 0
1113 /
1114
1115Compute CRC-32 incrementally.
1116[clinic start generated code]*/
1117
1118static unsigned int
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +03001119binascii_crc32_impl(PyObject *module, Py_buffer *data, unsigned int crc)
1120/*[clinic end generated code: output=52cf59056a78593b input=bbe340bc99d25aa8]*/
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +02001121
1122#ifdef USE_ZLIB_CRC32
1123/* This was taken from zlibmodule.c PyZlib_crc32 (but is PY_SSIZE_T_CLEAN) */
1124{
Serhiy Storchakaef1585e2015-12-25 20:01:53 +02001125 const Byte *buf;
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +02001126 Py_ssize_t len;
1127 int signed_val;
1128
1129 buf = (Byte*)data->buf;
1130 len = data->len;
1131 signed_val = crc32(crc, buf, len);
1132 return (unsigned int)signed_val & 0xffffffffU;
1133}
1134#else /* USE_ZLIB_CRC32 */
Guido van Rossum7d47c9e2000-02-16 21:11:52 +00001135{ /* By Jim Ahlstrom; All rights transferred to CNRI */
Serhiy Storchakaef1585e2015-12-25 20:01:53 +02001136 const unsigned char *bin_data;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001137 Py_ssize_t len;
1138 unsigned int result;
Tim Peters934c1a12002-07-02 22:24:50 +00001139
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +02001140 bin_data = data->buf;
1141 len = data->len;
Guido van Rossum7d47c9e2000-02-16 21:11:52 +00001142
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001143 crc = ~ crc;
1144 while (len-- > 0) {
1145 crc = crc_32_tab[(crc ^ *bin_data++) & 0xff] ^ (crc >> 8);
1146 /* Note: (crc >> 8) MUST zero fill on left */
1147 }
Tim Petersa98011c2002-07-02 20:20:08 +00001148
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001149 result = (crc ^ 0xFFFFFFFF);
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +02001150 return result & 0xffffffff;
Guido van Rossum7d47c9e2000-02-16 21:11:52 +00001151}
Christian Heimes1dc54002008-03-24 02:19:29 +00001152#endif /* USE_ZLIB_CRC32 */
Guido van Rossum7d47c9e2000-02-16 21:11:52 +00001153
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +02001154/*[clinic input]
1155binascii.b2a_hex
1156
1157 data: Py_buffer
Gregory P. Smith0c2f9302019-05-29 11:46:58 -07001158 sep: object = NULL
1159 An optional single character or byte to separate hex bytes.
1160 bytes_per_sep: int = 1
1161 How many bytes between separators. Positive values count from the
1162 right, negative values count from the left.
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +02001163
1164Hexadecimal representation of binary data.
1165
1166The return value is a bytes object. This function is also
1167available as "hexlify()".
Gregory P. Smith0c2f9302019-05-29 11:46:58 -07001168
1169Example:
1170>>> binascii.b2a_hex(b'\xb9\x01\xef')
1171b'b901ef'
1172>>> binascii.hexlify(b'\xb9\x01\xef', ':')
1173b'b9:01:ef'
1174>>> binascii.b2a_hex(b'\xb9\x01\xef', b'_', 2)
1175b'b9_01ef'
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +02001176[clinic start generated code]*/
Barry Warsawe977c212000-08-15 06:07:13 +00001177
1178static PyObject *
Gregory P. Smith0c2f9302019-05-29 11:46:58 -07001179binascii_b2a_hex_impl(PyObject *module, Py_buffer *data, PyObject *sep,
1180 int bytes_per_sep)
1181/*[clinic end generated code: output=a26937946a81d2c7 input=ec0ade6ba2e43543]*/
Barry Warsawe977c212000-08-15 06:07:13 +00001182{
Gregory P. Smith0c2f9302019-05-29 11:46:58 -07001183 return _Py_strhex_bytes_with_sep((const char *)data->buf, data->len,
1184 sep, bytes_per_sep);
Barry Warsawe977c212000-08-15 06:07:13 +00001185}
1186
Zachary Wareb176d402015-01-20 13:59:46 -06001187/*[clinic input]
1188binascii.hexlify = binascii.b2a_hex
1189
1190Hexadecimal representation of binary data.
1191
Gregory P. Smith0c2f9302019-05-29 11:46:58 -07001192The return value is a bytes object. This function is also
1193available as "b2a_hex()".
Zachary Wareb176d402015-01-20 13:59:46 -06001194[clinic start generated code]*/
1195
1196static PyObject *
Gregory P. Smith0c2f9302019-05-29 11:46:58 -07001197binascii_hexlify_impl(PyObject *module, Py_buffer *data, PyObject *sep,
1198 int bytes_per_sep)
1199/*[clinic end generated code: output=d12aa1b001b15199 input=bc317bd4e241f76b]*/
Zachary Wareb176d402015-01-20 13:59:46 -06001200{
Gregory P. Smith0c2f9302019-05-29 11:46:58 -07001201 return _Py_strhex_bytes_with_sep((const char *)data->buf, data->len,
1202 sep, bytes_per_sep);
Zachary Wareb176d402015-01-20 13:59:46 -06001203}
Barry Warsawe977c212000-08-15 06:07:13 +00001204
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +02001205/*[clinic input]
1206binascii.a2b_hex
1207
1208 hexstr: ascii_buffer
1209 /
1210
1211Binary data of hexadecimal representation.
1212
1213hexstr must contain an even number of hex digits (upper or lower case).
1214This function is also available as "unhexlify()".
1215[clinic start generated code]*/
1216
Barry Warsawe977c212000-08-15 06:07:13 +00001217static PyObject *
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +03001218binascii_a2b_hex_impl(PyObject *module, Py_buffer *hexstr)
1219/*[clinic end generated code: output=0cc1a139af0eeecb input=9e1e7f2f94db24fd]*/
Barry Warsawe977c212000-08-15 06:07:13 +00001220{
Serhiy Storchakaef1585e2015-12-25 20:01:53 +02001221 const char* argbuf;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001222 Py_ssize_t arglen;
1223 PyObject *retval;
1224 char* retbuf;
1225 Py_ssize_t i, j;
Marcel Plch33e71e02019-05-22 13:51:26 +02001226 binascii_state *state;
Barry Warsawe977c212000-08-15 06:07:13 +00001227
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +02001228 argbuf = hexstr->buf;
1229 arglen = hexstr->len;
Barry Warsawe977c212000-08-15 06:07:13 +00001230
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001231 assert(arglen >= 0);
Amaury Forgeot d'Arc9c74b142008-06-18 00:47:36 +00001232
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001233 /* XXX What should we do about strings with an odd length? Should
1234 * we add an implicit leading zero, or a trailing zero? For now,
1235 * raise an exception.
1236 */
1237 if (arglen % 2) {
Marcel Plch33e71e02019-05-22 13:51:26 +02001238 state = PyModule_GetState(module);
1239 if (state == NULL) {
1240 return NULL;
1241 }
1242 PyErr_SetString(state->Error, "Odd-length string");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001243 return NULL;
1244 }
Barry Warsawe977c212000-08-15 06:07:13 +00001245
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001246 retval = PyBytes_FromStringAndSize(NULL, (arglen/2));
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +02001247 if (!retval)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001248 return NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001249 retbuf = PyBytes_AS_STRING(retval);
Barry Warsawe977c212000-08-15 06:07:13 +00001250
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001251 for (i=j=0; i < arglen; i += 2) {
Sergey Fedoseev6b5df902018-02-27 01:35:41 +05001252 unsigned int top = _PyLong_DigitValue[Py_CHARMASK(argbuf[i])];
1253 unsigned int bot = _PyLong_DigitValue[Py_CHARMASK(argbuf[i+1])];
1254 if (top >= 16 || bot >= 16) {
Marcel Plch33e71e02019-05-22 13:51:26 +02001255 state = PyModule_GetState(module);
1256 if (state == NULL) {
1257 return NULL;
1258 }
1259 PyErr_SetString(state->Error,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001260 "Non-hexadecimal digit found");
1261 goto finally;
1262 }
1263 retbuf[j++] = (top << 4) + bot;
1264 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001265 return retval;
Barry Warsawe977c212000-08-15 06:07:13 +00001266
1267 finally:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001268 Py_DECREF(retval);
1269 return NULL;
Barry Warsawe977c212000-08-15 06:07:13 +00001270}
1271
Zachary Wareb176d402015-01-20 13:59:46 -06001272/*[clinic input]
1273binascii.unhexlify = binascii.a2b_hex
1274
1275Binary data of hexadecimal representation.
1276
1277hexstr must contain an even number of hex digits (upper or lower case).
1278[clinic start generated code]*/
1279
1280static PyObject *
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +03001281binascii_unhexlify_impl(PyObject *module, Py_buffer *hexstr)
1282/*[clinic end generated code: output=51a64c06c79629e3 input=dd8c012725f462da]*/
Zachary Wareb176d402015-01-20 13:59:46 -06001283{
1284 return binascii_a2b_hex_impl(module, hexstr);
1285}
1286
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001287#define MAXLINESIZE 76
1288
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001289
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +02001290/*[clinic input]
1291binascii.a2b_qp
1292
Serhiy Storchaka12785612014-01-25 11:49:49 +02001293 data: ascii_buffer
Serhiy Storchaka202fda52017-03-12 10:10:47 +02001294 header: bool(accept={int}) = False
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +02001295
1296Decode a string of qp-encoded data.
1297[clinic start generated code]*/
1298
1299static PyObject *
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +03001300binascii_a2b_qp_impl(PyObject *module, Py_buffer *data, int header)
Serhiy Storchaka202fda52017-03-12 10:10:47 +02001301/*[clinic end generated code: output=e99f7846cfb9bc53 input=bf6766fea76cce8f]*/
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001302{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001303 Py_ssize_t in, out;
1304 char ch;
Serhiy Storchakaef1585e2015-12-25 20:01:53 +02001305 const unsigned char *ascii_data;
1306 unsigned char *odata;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001307 Py_ssize_t datalen = 0;
1308 PyObject *rv;
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001309
Serhiy Storchaka12785612014-01-25 11:49:49 +02001310 ascii_data = data->buf;
1311 datalen = data->len;
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001312
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001313 /* We allocate the output same size as input, this is overkill.
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001314 */
Andy Lester7668a8b2020-03-24 23:26:44 -05001315 odata = (unsigned char *) PyMem_Calloc(1, datalen);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001316 if (odata == NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001317 PyErr_NoMemory();
1318 return NULL;
1319 }
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001320
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001321 in = out = 0;
1322 while (in < datalen) {
Serhiy Storchaka12785612014-01-25 11:49:49 +02001323 if (ascii_data[in] == '=') {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001324 in++;
1325 if (in >= datalen) break;
1326 /* Soft line breaks */
Serhiy Storchaka12785612014-01-25 11:49:49 +02001327 if ((ascii_data[in] == '\n') || (ascii_data[in] == '\r')) {
1328 if (ascii_data[in] != '\n') {
1329 while (in < datalen && ascii_data[in] != '\n') in++;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001330 }
1331 if (in < datalen) in++;
1332 }
Serhiy Storchaka12785612014-01-25 11:49:49 +02001333 else if (ascii_data[in] == '=') {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001334 /* broken case from broken python qp */
1335 odata[out++] = '=';
1336 in++;
1337 }
Serhiy Storchakae6265e92016-09-14 16:34:37 +03001338 else if ((in + 1 < datalen) &&
1339 ((ascii_data[in] >= 'A' && ascii_data[in] <= 'F') ||
Serhiy Storchaka12785612014-01-25 11:49:49 +02001340 (ascii_data[in] >= 'a' && ascii_data[in] <= 'f') ||
1341 (ascii_data[in] >= '0' && ascii_data[in] <= '9')) &&
1342 ((ascii_data[in+1] >= 'A' && ascii_data[in+1] <= 'F') ||
1343 (ascii_data[in+1] >= 'a' && ascii_data[in+1] <= 'f') ||
1344 (ascii_data[in+1] >= '0' && ascii_data[in+1] <= '9'))) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001345 /* hexval */
Sergey Fedoseev6b5df902018-02-27 01:35:41 +05001346 ch = _PyLong_DigitValue[ascii_data[in]] << 4;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001347 in++;
Sergey Fedoseev6b5df902018-02-27 01:35:41 +05001348 ch |= _PyLong_DigitValue[ascii_data[in]];
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001349 in++;
1350 odata[out++] = ch;
1351 }
1352 else {
1353 odata[out++] = '=';
1354 }
1355 }
Serhiy Storchaka12785612014-01-25 11:49:49 +02001356 else if (header && ascii_data[in] == '_') {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001357 odata[out++] = ' ';
1358 in++;
1359 }
1360 else {
Serhiy Storchaka12785612014-01-25 11:49:49 +02001361 odata[out] = ascii_data[in];
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001362 in++;
1363 out++;
1364 }
1365 }
1366 if ((rv = PyBytes_FromStringAndSize((char *)odata, out)) == NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001367 PyMem_Free(odata);
1368 return NULL;
1369 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001370 PyMem_Free(odata);
1371 return rv;
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001372}
1373
Tim Peters934c1a12002-07-02 22:24:50 +00001374static int
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001375to_hex (unsigned char ch, unsigned char *s)
1376{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001377 unsigned int uvalue = ch;
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001378
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001379 s[1] = "0123456789ABCDEF"[uvalue % 16];
1380 uvalue = (uvalue / 16);
1381 s[0] = "0123456789ABCDEF"[uvalue % 16];
1382 return 0;
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001383}
1384
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001385/* XXX: This is ridiculously complicated to be backward compatible
1386 * (mostly) with the quopri module. It doesn't re-create the quopri
1387 * module bug where text ending in CRLF has the CR encoded */
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +02001388
1389/*[clinic input]
1390binascii.b2a_qp
1391
1392 data: Py_buffer
Serhiy Storchaka202fda52017-03-12 10:10:47 +02001393 quotetabs: bool(accept={int}) = False
1394 istext: bool(accept={int}) = True
1395 header: bool(accept={int}) = False
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +02001396
1397Encode a string using quoted-printable encoding.
1398
1399On encoding, when istext is set, newlines are not encoded, and white
1400space at end of lines is. When istext is not set, \r and \n (CR/LF)
1401are both encoded. When quotetabs is set, space and tabs are encoded.
1402[clinic start generated code]*/
1403
1404static PyObject *
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +03001405binascii_b2a_qp_impl(PyObject *module, Py_buffer *data, int quotetabs,
Larry Hastings89964c42015-04-14 18:07:59 -04001406 int istext, int header)
Serhiy Storchaka202fda52017-03-12 10:10:47 +02001407/*[clinic end generated code: output=e9884472ebb1a94c input=21fb7eea4a184ba6]*/
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001408{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001409 Py_ssize_t in, out;
Serhiy Storchakaef1585e2015-12-25 20:01:53 +02001410 const unsigned char *databuf;
1411 unsigned char *odata;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001412 Py_ssize_t datalen = 0, odatalen = 0;
1413 PyObject *rv;
1414 unsigned int linelen = 0;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001415 unsigned char ch;
1416 int crlf = 0;
Serhiy Storchakaef1585e2015-12-25 20:01:53 +02001417 const unsigned char *p;
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001418
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +02001419 databuf = data->buf;
1420 datalen = data->len;
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001421
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001422 /* See if this string is using CRLF line ends */
1423 /* XXX: this function has the side effect of converting all of
1424 * the end of lines to be the same depending on this detection
1425 * here */
Serhiy Storchakaef1585e2015-12-25 20:01:53 +02001426 p = (const unsigned char *) memchr(databuf, '\n', datalen);
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +02001427 if ((p != NULL) && (p > databuf) && (*(p-1) == '\r'))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001428 crlf = 1;
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001429
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001430 /* First, scan to see how many characters need to be encoded */
1431 in = 0;
1432 while (in < datalen) {
Benjamin Peterson4f976512016-08-13 18:33:33 -07001433 Py_ssize_t delta = 0;
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +02001434 if ((databuf[in] > 126) ||
1435 (databuf[in] == '=') ||
1436 (header && databuf[in] == '_') ||
1437 ((databuf[in] == '.') && (linelen == 0) &&
Serhiy Storchakae6265e92016-09-14 16:34:37 +03001438 (in + 1 == datalen || databuf[in+1] == '\n' ||
1439 databuf[in+1] == '\r' || databuf[in+1] == 0)) ||
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +02001440 (!istext && ((databuf[in] == '\r') || (databuf[in] == '\n'))) ||
1441 ((databuf[in] == '\t' || databuf[in] == ' ') && (in + 1 == datalen)) ||
1442 ((databuf[in] < 33) &&
1443 (databuf[in] != '\r') && (databuf[in] != '\n') &&
1444 (quotetabs || ((databuf[in] != '\t') && (databuf[in] != ' ')))))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001445 {
1446 if ((linelen + 3) >= MAXLINESIZE) {
1447 linelen = 0;
1448 if (crlf)
Benjamin Peterson4f976512016-08-13 18:33:33 -07001449 delta += 3;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001450 else
Benjamin Peterson4f976512016-08-13 18:33:33 -07001451 delta += 2;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001452 }
1453 linelen += 3;
Benjamin Peterson4f976512016-08-13 18:33:33 -07001454 delta += 3;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001455 in++;
1456 }
1457 else {
1458 if (istext &&
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +02001459 ((databuf[in] == '\n') ||
1460 ((in+1 < datalen) && (databuf[in] == '\r') &&
1461 (databuf[in+1] == '\n'))))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001462 {
1463 linelen = 0;
1464 /* Protect against whitespace on end of line */
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +02001465 if (in && ((databuf[in-1] == ' ') || (databuf[in-1] == '\t')))
Benjamin Peterson4f976512016-08-13 18:33:33 -07001466 delta += 2;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001467 if (crlf)
Benjamin Peterson4f976512016-08-13 18:33:33 -07001468 delta += 2;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001469 else
Benjamin Peterson4f976512016-08-13 18:33:33 -07001470 delta += 1;
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +02001471 if (databuf[in] == '\r')
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001472 in += 2;
1473 else
1474 in++;
1475 }
1476 else {
1477 if ((in + 1 != datalen) &&
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +02001478 (databuf[in+1] != '\n') &&
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001479 (linelen + 1) >= MAXLINESIZE) {
1480 linelen = 0;
1481 if (crlf)
Benjamin Peterson4f976512016-08-13 18:33:33 -07001482 delta += 3;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001483 else
Benjamin Peterson4f976512016-08-13 18:33:33 -07001484 delta += 2;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001485 }
1486 linelen++;
Benjamin Peterson4f976512016-08-13 18:33:33 -07001487 delta++;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001488 in++;
1489 }
1490 }
Benjamin Peterson4f976512016-08-13 18:33:33 -07001491 if (PY_SSIZE_T_MAX - delta < odatalen) {
Benjamin Peterson4f976512016-08-13 18:33:33 -07001492 PyErr_NoMemory();
1493 return NULL;
1494 }
1495 odatalen += delta;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001496 }
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001497
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001498 /* We allocate the output same size as input, this is overkill.
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001499 */
Andy Lester7668a8b2020-03-24 23:26:44 -05001500 odata = (unsigned char *) PyMem_Calloc(1, odatalen);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001501 if (odata == NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001502 PyErr_NoMemory();
1503 return NULL;
1504 }
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001505
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001506 in = out = linelen = 0;
1507 while (in < datalen) {
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +02001508 if ((databuf[in] > 126) ||
1509 (databuf[in] == '=') ||
1510 (header && databuf[in] == '_') ||
1511 ((databuf[in] == '.') && (linelen == 0) &&
Serhiy Storchakae6265e92016-09-14 16:34:37 +03001512 (in + 1 == datalen || databuf[in+1] == '\n' ||
1513 databuf[in+1] == '\r' || databuf[in+1] == 0)) ||
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +02001514 (!istext && ((databuf[in] == '\r') || (databuf[in] == '\n'))) ||
1515 ((databuf[in] == '\t' || databuf[in] == ' ') && (in + 1 == datalen)) ||
1516 ((databuf[in] < 33) &&
1517 (databuf[in] != '\r') && (databuf[in] != '\n') &&
Serhiy Storchakae6265e92016-09-14 16:34:37 +03001518 (quotetabs || ((databuf[in] != '\t') && (databuf[in] != ' ')))))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001519 {
1520 if ((linelen + 3 )>= MAXLINESIZE) {
1521 odata[out++] = '=';
1522 if (crlf) odata[out++] = '\r';
1523 odata[out++] = '\n';
1524 linelen = 0;
1525 }
1526 odata[out++] = '=';
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +02001527 to_hex(databuf[in], &odata[out]);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001528 out += 2;
1529 in++;
1530 linelen += 3;
1531 }
1532 else {
1533 if (istext &&
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +02001534 ((databuf[in] == '\n') ||
1535 ((in+1 < datalen) && (databuf[in] == '\r') &&
1536 (databuf[in+1] == '\n'))))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001537 {
1538 linelen = 0;
1539 /* Protect against whitespace on end of line */
1540 if (out && ((odata[out-1] == ' ') || (odata[out-1] == '\t'))) {
1541 ch = odata[out-1];
1542 odata[out-1] = '=';
1543 to_hex(ch, &odata[out]);
1544 out += 2;
1545 }
Tim Peters934c1a12002-07-02 22:24:50 +00001546
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001547 if (crlf) odata[out++] = '\r';
1548 odata[out++] = '\n';
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +02001549 if (databuf[in] == '\r')
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001550 in += 2;
1551 else
1552 in++;
1553 }
1554 else {
1555 if ((in + 1 != datalen) &&
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +02001556 (databuf[in+1] != '\n') &&
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001557 (linelen + 1) >= MAXLINESIZE) {
1558 odata[out++] = '=';
1559 if (crlf) odata[out++] = '\r';
1560 odata[out++] = '\n';
1561 linelen = 0;
1562 }
1563 linelen++;
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +02001564 if (header && databuf[in] == ' ') {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001565 odata[out++] = '_';
1566 in++;
1567 }
1568 else {
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +02001569 odata[out++] = databuf[in++];
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001570 }
1571 }
1572 }
1573 }
1574 if ((rv = PyBytes_FromStringAndSize((char *)odata, out)) == NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001575 PyMem_Free(odata);
1576 return NULL;
1577 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001578 PyMem_Free(odata);
1579 return rv;
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001580}
Barry Warsawe977c212000-08-15 06:07:13 +00001581
Jack Jansen72781191995-08-07 14:34:15 +00001582/* List of functions defined in the module */
1583
1584static struct PyMethodDef binascii_module_methods[] = {
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +02001585 BINASCII_A2B_UU_METHODDEF
1586 BINASCII_B2A_UU_METHODDEF
1587 BINASCII_A2B_BASE64_METHODDEF
1588 BINASCII_B2A_BASE64_METHODDEF
1589 BINASCII_A2B_HQX_METHODDEF
1590 BINASCII_B2A_HQX_METHODDEF
1591 BINASCII_A2B_HEX_METHODDEF
1592 BINASCII_B2A_HEX_METHODDEF
Zachary Wareb176d402015-01-20 13:59:46 -06001593 BINASCII_HEXLIFY_METHODDEF
1594 BINASCII_UNHEXLIFY_METHODDEF
Serhiy Storchaka3ffd9132014-01-25 11:21:23 +02001595 BINASCII_RLECODE_HQX_METHODDEF
1596 BINASCII_RLEDECODE_HQX_METHODDEF
1597 BINASCII_CRC_HQX_METHODDEF
1598 BINASCII_CRC32_METHODDEF
1599 BINASCII_A2B_QP_METHODDEF
1600 BINASCII_B2A_QP_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001601 {NULL, NULL} /* sentinel */
Jack Jansen72781191995-08-07 14:34:15 +00001602};
1603
1604
Martin v. Löwis1a214512008-06-11 05:26:20 +00001605/* Initialization function for the module (*must* be called PyInit_binascii) */
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001606PyDoc_STRVAR(doc_binascii, "Conversion between binary data and ASCII");
Jack Jansen72781191995-08-07 14:34:15 +00001607
Marcel Plch33e71e02019-05-22 13:51:26 +02001608static int
Hai Shiaa0c0802020-03-12 00:50:52 +08001609binascii_exec(PyObject *module) {
Marcel Plch33e71e02019-05-22 13:51:26 +02001610 int result;
Hai Shiaa0c0802020-03-12 00:50:52 +08001611 binascii_state *state = PyModule_GetState(module);
Marcel Plch33e71e02019-05-22 13:51:26 +02001612 if (state == NULL) {
1613 return -1;
1614 }
1615
1616 state->Error = PyErr_NewException("binascii.Error", PyExc_ValueError, NULL);
1617 if (state->Error == NULL) {
1618 return -1;
1619 }
Hai Shiaa0c0802020-03-12 00:50:52 +08001620 Py_INCREF(state->Error);
1621 result = PyModule_AddObject(module, "Error", state->Error);
Marcel Plch33e71e02019-05-22 13:51:26 +02001622 if (result == -1) {
Hai Shiaa0c0802020-03-12 00:50:52 +08001623 Py_DECREF(state->Error);
Marcel Plch33e71e02019-05-22 13:51:26 +02001624 return -1;
1625 }
1626
1627 state->Incomplete = PyErr_NewException("binascii.Incomplete", NULL, NULL);
1628 if (state->Incomplete == NULL) {
1629 return -1;
1630 }
Hai Shiaa0c0802020-03-12 00:50:52 +08001631 Py_INCREF(state->Incomplete);
1632 result = PyModule_AddObject(module, "Incomplete", state->Incomplete);
Marcel Plch33e71e02019-05-22 13:51:26 +02001633 if (result == -1) {
Hai Shiaa0c0802020-03-12 00:50:52 +08001634 Py_DECREF(state->Incomplete);
Marcel Plch33e71e02019-05-22 13:51:26 +02001635 return -1;
1636 }
1637
1638 return 0;
1639}
1640
1641static PyModuleDef_Slot binascii_slots[] = {
1642 {Py_mod_exec, binascii_exec},
1643 {0, NULL}
1644};
Martin v. Löwis1a214512008-06-11 05:26:20 +00001645
Hai Shiaa0c0802020-03-12 00:50:52 +08001646static int
1647binascii_traverse(PyObject *module, visitproc visit, void *arg)
1648{
1649 binascii_state *state = get_binascii_state(module);
Victor Stinner5b1ef202020-03-17 18:09:46 +01001650 Py_VISIT(state->Error);
1651 Py_VISIT(state->Incomplete);
Hai Shiaa0c0802020-03-12 00:50:52 +08001652 return 0;
1653}
1654
1655static int
1656binascii_clear(PyObject *module)
1657{
1658 binascii_state *state = get_binascii_state(module);
Victor Stinner5b1ef202020-03-17 18:09:46 +01001659 Py_CLEAR(state->Error);
1660 Py_CLEAR(state->Incomplete);
Hai Shiaa0c0802020-03-12 00:50:52 +08001661 return 0;
1662}
1663
1664static void
1665binascii_free(void *module)
1666{
1667 binascii_clear((PyObject *)module);
1668}
1669
Martin v. Löwis1a214512008-06-11 05:26:20 +00001670static struct PyModuleDef binasciimodule = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001671 PyModuleDef_HEAD_INIT,
1672 "binascii",
1673 doc_binascii,
Marcel Plch33e71e02019-05-22 13:51:26 +02001674 sizeof(binascii_state),
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001675 binascii_module_methods,
Marcel Plch33e71e02019-05-22 13:51:26 +02001676 binascii_slots,
Hai Shiaa0c0802020-03-12 00:50:52 +08001677 binascii_traverse,
1678 binascii_clear,
Victor Stinner5b1ef202020-03-17 18:09:46 +01001679 binascii_free
Martin v. Löwis1a214512008-06-11 05:26:20 +00001680};
1681
Mark Hammondfe51c6d2002-08-02 02:27:13 +00001682PyMODINIT_FUNC
Martin v. Löwis1a214512008-06-11 05:26:20 +00001683PyInit_binascii(void)
Jack Jansen72781191995-08-07 14:34:15 +00001684{
Marcel Plch33e71e02019-05-22 13:51:26 +02001685 return PyModuleDef_Init(&binasciimodule);
Jack Jansen72781191995-08-07 14:34:15 +00001686}