blob: 386c2deefd530a1e95989aa8fc3d19655fcf70cb [file] [log] [blame]
Jack Jansen72781191995-08-07 14:34:15 +00001/*
2** Routines to represent binary data in ASCII and vice-versa
3**
4** This module currently supports the following encodings:
5** uuencode:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00006** each line encodes 45 bytes (except possibly the last)
7** First char encodes (binary) length, rest data
8** each char encodes 6 bits, as follows:
9** binary: 01234567 abcdefgh ijklmnop
10** ascii: 012345 67abcd efghij klmnop
11** ASCII encoding method is "excess-space": 000000 is encoded as ' ', etc.
12** short binary data is zero-extended (so the bits are always in the
13** right place), this does *not* reflect in the length.
Jack Jansen84bbc2e1995-10-04 16:38:44 +000014** base64:
15** Line breaks are insignificant, but lines are at most 76 chars
16** each char encodes 6 bits, in similar order as uucode/hqx. Encoding
17** is done via a table.
18** Short binary data is filled (in ASCII) with '='.
Jack Jansen72781191995-08-07 14:34:15 +000019** hqx:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000020** File starts with introductory text, real data starts and ends
21** with colons.
22** Data consists of three similar parts: info, datafork, resourcefork.
23** Each part is protected (at the end) with a 16-bit crc
24** The binary data is run-length encoded, and then ascii-fied:
25** binary: 01234567 abcdefgh ijklmnop
26** ascii: 012345 67abcd efghij klmnop
27** ASCII encoding is table-driven, see the code.
28** Short binary data results in the runt ascii-byte being output with
29** the bits in the right place.
Jack Jansen72781191995-08-07 14:34:15 +000030**
31** While I was reading dozens of programs that encode or decode the formats
32** here (documentation? hihi:-) I have formulated Jansen's Observation:
33**
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000034** Programs that encode binary data in ASCII are written in
35** such a style that they are as unreadable as possible. Devices used
36** include unnecessary global variables, burying important tables
37** in unrelated sourcefiles, putting functions in include files,
38** using seemingly-descriptive variable names for different purposes,
39** calls to empty subroutines and a host of others.
Jack Jansen72781191995-08-07 14:34:15 +000040**
41** I have attempted to break with this tradition, but I guess that that
42** does make the performance sub-optimal. Oh well, too bad...
43**
44** Jack Jansen, CWI, July 1995.
Tim Peters934c1a12002-07-02 22:24:50 +000045**
Martin v. Löwis16dc7f42001-09-30 20:32:11 +000046** Added support for quoted-printable encoding, based on rfc 1521 et al
Tim Peters934c1a12002-07-02 22:24:50 +000047** quoted-printable encoding specifies that non printable characters (anything
Martin v. Löwis16dc7f42001-09-30 20:32:11 +000048** below 32 and above 126) be encoded as =XX where XX is the hexadecimal value
49** of the character. It also specifies some other behavior to enable 8bit data
Tim Peters934c1a12002-07-02 22:24:50 +000050** in a mail message with little difficulty (maximum line sizes, protecting
51** some cases of whitespace, etc).
Martin v. Löwis16dc7f42001-09-30 20:32:11 +000052**
53** Brandon Long, September 2001.
Jack Jansen72781191995-08-07 14:34:15 +000054*/
55
Thomas Wouters9c544482006-03-01 21:59:44 +000056#define PY_SSIZE_T_CLEAN
Jack Jansen72781191995-08-07 14:34:15 +000057
58#include "Python.h"
Christian Heimes1dc54002008-03-24 02:19:29 +000059#ifdef USE_ZLIB_CRC32
60#include "zlib.h"
61#endif
Jack Jansen72781191995-08-07 14:34:15 +000062
63static PyObject *Error;
64static PyObject *Incomplete;
65
66/*
67** hqx lookup table, ascii->binary.
68*/
69
70#define RUNCHAR 0x90
71
72#define DONE 0x7F
73#define SKIP 0x7E
74#define FAIL 0x7D
75
76static unsigned char table_a2b_hqx[256] = {
77/* ^@ ^A ^B ^C ^D ^E ^F ^G */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000078/* 0*/ FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
Jack Jansen72781191995-08-07 14:34:15 +000079/* \b \t \n ^K ^L \r ^N ^O */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000080/* 1*/ FAIL, FAIL, SKIP, FAIL, FAIL, SKIP, FAIL, FAIL,
Jack Jansen72781191995-08-07 14:34:15 +000081/* ^P ^Q ^R ^S ^T ^U ^V ^W */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000082/* 2*/ FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
Jack Jansen72781191995-08-07 14:34:15 +000083/* ^X ^Y ^Z ^[ ^\ ^] ^^ ^_ */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000084/* 3*/ FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
Jack Jansen72781191995-08-07 14:34:15 +000085/* ! " # $ % & ' */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000086/* 4*/ FAIL, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
Jack Jansen72781191995-08-07 14:34:15 +000087/* ( ) * + , - . / */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000088/* 5*/ 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, FAIL, FAIL,
Jack Jansen72781191995-08-07 14:34:15 +000089/* 0 1 2 3 4 5 6 7 */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000090/* 6*/ 0x0D, 0x0E, 0x0F, 0x10, 0x11, 0x12, 0x13, FAIL,
Jack Jansen72781191995-08-07 14:34:15 +000091/* 8 9 : ; < = > ? */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000092/* 7*/ 0x14, 0x15, DONE, FAIL, FAIL, FAIL, FAIL, FAIL,
Jack Jansen72781191995-08-07 14:34:15 +000093/* @ A B C D E F G */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000094/* 8*/ 0x16, 0x17, 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D,
Jack Jansen72781191995-08-07 14:34:15 +000095/* H I J K L M N O */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000096/* 9*/ 0x1E, 0x1F, 0x20, 0x21, 0x22, 0x23, 0x24, FAIL,
Jack Jansen72781191995-08-07 14:34:15 +000097/* P Q R S T U V W */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000098/*10*/ 0x25, 0x26, 0x27, 0x28, 0x29, 0x2A, 0x2B, FAIL,
Jack Jansen72781191995-08-07 14:34:15 +000099/* X Y Z [ \ ] ^ _ */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000100/*11*/ 0x2C, 0x2D, 0x2E, 0x2F, FAIL, FAIL, FAIL, FAIL,
Jack Jansen72781191995-08-07 14:34:15 +0000101/* ` a b c d e f g */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000102/*12*/ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, FAIL,
Jack Jansen72781191995-08-07 14:34:15 +0000103/* h i j k l m n o */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000104/*13*/ 0x37, 0x38, 0x39, 0x3A, 0x3B, 0x3C, FAIL, FAIL,
Jack Jansen72781191995-08-07 14:34:15 +0000105/* p q r s t u v w */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000106/*14*/ 0x3D, 0x3E, 0x3F, FAIL, FAIL, FAIL, FAIL, FAIL,
Jack Jansen72781191995-08-07 14:34:15 +0000107/* x y z { | } ~ ^? */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000108/*15*/ FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
109/*16*/ FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
110 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
111 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
112 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
113 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
114 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
115 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
116 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
117 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
118 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
119 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
120 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
121 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
122 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
123 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
124 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
Jack Jansen72781191995-08-07 14:34:15 +0000125};
126
127static unsigned char table_b2a_hqx[] =
Roger E. Masse5f4ce181997-01-16 17:10:22 +0000128"!\"#$%&'()*+,-012345689@ABCDEFGHIJKLMNPQRSTUVXYZ[`abcdefhijklmpqr";
Jack Jansen72781191995-08-07 14:34:15 +0000129
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000130static char table_a2b_base64[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000131 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
132 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
133 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,62, -1,-1,-1,63,
134 52,53,54,55, 56,57,58,59, 60,61,-1,-1, -1, 0,-1,-1, /* Note PAD->0 */
135 -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10, 11,12,13,14,
136 15,16,17,18, 19,20,21,22, 23,24,25,-1, -1,-1,-1,-1,
137 -1,26,27,28, 29,30,31,32, 33,34,35,36, 37,38,39,40,
138 41,42,43,44, 45,46,47,48, 49,50,51,-1, -1,-1,-1,-1
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000139};
140
141#define BASE64_PAD '='
Guido van Rossum355bc0c2001-10-30 03:00:52 +0000142
143/* Max binary chunk size; limited only by available memory */
Guido van Rossum0e225aa2007-05-22 20:24:57 +0000144#define BASE64_MAXBIN ((PY_SSIZE_T_MAX - 3) / 2)
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000145
146static unsigned char table_b2a_base64[] =
Roger E. Masse5f4ce181997-01-16 17:10:22 +0000147"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000148
149
150
Jack Jansen72781191995-08-07 14:34:15 +0000151static unsigned short crctab_hqx[256] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000152 0x0000, 0x1021, 0x2042, 0x3063, 0x4084, 0x50a5, 0x60c6, 0x70e7,
153 0x8108, 0x9129, 0xa14a, 0xb16b, 0xc18c, 0xd1ad, 0xe1ce, 0xf1ef,
154 0x1231, 0x0210, 0x3273, 0x2252, 0x52b5, 0x4294, 0x72f7, 0x62d6,
155 0x9339, 0x8318, 0xb37b, 0xa35a, 0xd3bd, 0xc39c, 0xf3ff, 0xe3de,
156 0x2462, 0x3443, 0x0420, 0x1401, 0x64e6, 0x74c7, 0x44a4, 0x5485,
157 0xa56a, 0xb54b, 0x8528, 0x9509, 0xe5ee, 0xf5cf, 0xc5ac, 0xd58d,
158 0x3653, 0x2672, 0x1611, 0x0630, 0x76d7, 0x66f6, 0x5695, 0x46b4,
159 0xb75b, 0xa77a, 0x9719, 0x8738, 0xf7df, 0xe7fe, 0xd79d, 0xc7bc,
160 0x48c4, 0x58e5, 0x6886, 0x78a7, 0x0840, 0x1861, 0x2802, 0x3823,
161 0xc9cc, 0xd9ed, 0xe98e, 0xf9af, 0x8948, 0x9969, 0xa90a, 0xb92b,
162 0x5af5, 0x4ad4, 0x7ab7, 0x6a96, 0x1a71, 0x0a50, 0x3a33, 0x2a12,
163 0xdbfd, 0xcbdc, 0xfbbf, 0xeb9e, 0x9b79, 0x8b58, 0xbb3b, 0xab1a,
164 0x6ca6, 0x7c87, 0x4ce4, 0x5cc5, 0x2c22, 0x3c03, 0x0c60, 0x1c41,
165 0xedae, 0xfd8f, 0xcdec, 0xddcd, 0xad2a, 0xbd0b, 0x8d68, 0x9d49,
166 0x7e97, 0x6eb6, 0x5ed5, 0x4ef4, 0x3e13, 0x2e32, 0x1e51, 0x0e70,
167 0xff9f, 0xefbe, 0xdfdd, 0xcffc, 0xbf1b, 0xaf3a, 0x9f59, 0x8f78,
168 0x9188, 0x81a9, 0xb1ca, 0xa1eb, 0xd10c, 0xc12d, 0xf14e, 0xe16f,
169 0x1080, 0x00a1, 0x30c2, 0x20e3, 0x5004, 0x4025, 0x7046, 0x6067,
170 0x83b9, 0x9398, 0xa3fb, 0xb3da, 0xc33d, 0xd31c, 0xe37f, 0xf35e,
171 0x02b1, 0x1290, 0x22f3, 0x32d2, 0x4235, 0x5214, 0x6277, 0x7256,
172 0xb5ea, 0xa5cb, 0x95a8, 0x8589, 0xf56e, 0xe54f, 0xd52c, 0xc50d,
173 0x34e2, 0x24c3, 0x14a0, 0x0481, 0x7466, 0x6447, 0x5424, 0x4405,
174 0xa7db, 0xb7fa, 0x8799, 0x97b8, 0xe75f, 0xf77e, 0xc71d, 0xd73c,
175 0x26d3, 0x36f2, 0x0691, 0x16b0, 0x6657, 0x7676, 0x4615, 0x5634,
176 0xd94c, 0xc96d, 0xf90e, 0xe92f, 0x99c8, 0x89e9, 0xb98a, 0xa9ab,
177 0x5844, 0x4865, 0x7806, 0x6827, 0x18c0, 0x08e1, 0x3882, 0x28a3,
178 0xcb7d, 0xdb5c, 0xeb3f, 0xfb1e, 0x8bf9, 0x9bd8, 0xabbb, 0xbb9a,
179 0x4a75, 0x5a54, 0x6a37, 0x7a16, 0x0af1, 0x1ad0, 0x2ab3, 0x3a92,
180 0xfd2e, 0xed0f, 0xdd6c, 0xcd4d, 0xbdaa, 0xad8b, 0x9de8, 0x8dc9,
181 0x7c26, 0x6c07, 0x5c64, 0x4c45, 0x3ca2, 0x2c83, 0x1ce0, 0x0cc1,
182 0xef1f, 0xff3e, 0xcf5d, 0xdf7c, 0xaf9b, 0xbfba, 0x8fd9, 0x9ff8,
183 0x6e17, 0x7e36, 0x4e55, 0x5e74, 0x2e93, 0x3eb2, 0x0ed1, 0x1ef0,
Jack Jansen72781191995-08-07 14:34:15 +0000184};
185
Antoine Pitrou08316762011-12-20 13:58:41 +0100186static int
187ascii_buffer_converter(PyObject *arg, Py_buffer *buf)
188{
189 if (arg == NULL) {
190 PyBuffer_Release(buf);
191 return 1;
192 }
193 if (PyUnicode_Check(arg)) {
194 if (PyUnicode_READY(arg) < 0)
195 return 0;
196 if (!PyUnicode_IS_ASCII(arg)) {
197 PyErr_SetString(PyExc_ValueError,
198 "string argument should contain only ASCII characters");
199 return 0;
200 }
201 assert(PyUnicode_KIND(arg) == PyUnicode_1BYTE_KIND);
202 buf->buf = (void *) PyUnicode_1BYTE_DATA(arg);
203 buf->len = PyUnicode_GET_LENGTH(arg);
204 buf->obj = NULL;
205 return 1;
206 }
207 if (PyObject_GetBuffer(arg, buf, PyBUF_SIMPLE) != 0) {
208 PyErr_Format(PyExc_TypeError,
209 "argument should be bytes, buffer or ASCII string, "
210 "not %R", Py_TYPE(arg));
211 return 0;
212 }
213 if (!PyBuffer_IsContiguous(buf, 'C')) {
214 PyErr_Format(PyExc_TypeError,
215 "argument should be a contiguous buffer, "
216 "not %R", Py_TYPE(arg));
217 PyBuffer_Release(buf);
218 return 0;
219 }
220 return Py_CLEANUP_SUPPORTED;
221}
222
223
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000224PyDoc_STRVAR(doc_a2b_uu, "(ascii) -> bin. Decode a line of uuencoded data");
Jack Jansen72781191995-08-07 14:34:15 +0000225
226static PyObject *
Peter Schneider-Kampa788a7f2000-07-10 09:57:19 +0000227binascii_a2b_uu(PyObject *self, PyObject *args)
Jack Jansen72781191995-08-07 14:34:15 +0000228{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000229 Py_buffer pascii;
230 unsigned char *ascii_data, *bin_data;
231 int leftbits = 0;
232 unsigned char this_ch;
233 unsigned int leftchar = 0;
234 PyObject *rv;
235 Py_ssize_t ascii_len, bin_len;
Tim Peters934c1a12002-07-02 22:24:50 +0000236
Antoine Pitrou08316762011-12-20 13:58:41 +0100237 if ( !PyArg_ParseTuple(args, "O&:a2b_uu", ascii_buffer_converter, &pascii) )
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000238 return NULL;
239 ascii_data = pascii.buf;
240 ascii_len = pascii.len;
Jack Jansen72781191995-08-07 14:34:15 +0000241
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000242 assert(ascii_len >= 0);
Amaury Forgeot d'Arc9c74b142008-06-18 00:47:36 +0000243
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000244 /* First byte: binary data length (in bytes) */
245 bin_len = (*ascii_data++ - ' ') & 077;
246 ascii_len--;
Jack Jansen72781191995-08-07 14:34:15 +0000247
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000248 /* Allocate the buffer */
249 if ( (rv=PyBytes_FromStringAndSize(NULL, bin_len)) == NULL ) {
250 PyBuffer_Release(&pascii);
251 return NULL;
252 }
253 bin_data = (unsigned char *)PyBytes_AS_STRING(rv);
Tim Peters934c1a12002-07-02 22:24:50 +0000254
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000255 for( ; bin_len > 0 ; ascii_len--, ascii_data++ ) {
256 /* XXX is it really best to add NULs if there's no more data */
257 this_ch = (ascii_len > 0) ? *ascii_data : 0;
258 if ( this_ch == '\n' || this_ch == '\r' || ascii_len <= 0) {
259 /*
260 ** Whitespace. Assume some spaces got eaten at
261 ** end-of-line. (We check this later)
262 */
263 this_ch = 0;
264 } else {
265 /* Check the character for legality
266 ** The 64 in stead of the expected 63 is because
267 ** there are a few uuencodes out there that use
268 ** '`' as zero instead of space.
269 */
270 if ( this_ch < ' ' || this_ch > (' ' + 64)) {
271 PyErr_SetString(Error, "Illegal char");
272 PyBuffer_Release(&pascii);
273 Py_DECREF(rv);
274 return NULL;
275 }
276 this_ch = (this_ch - ' ') & 077;
277 }
278 /*
279 ** Shift it in on the low end, and see if there's
280 ** a byte ready for output.
281 */
282 leftchar = (leftchar << 6) | (this_ch);
283 leftbits += 6;
284 if ( leftbits >= 8 ) {
285 leftbits -= 8;
286 *bin_data++ = (leftchar >> leftbits) & 0xff;
287 leftchar &= ((1 << leftbits) - 1);
288 bin_len--;
289 }
290 }
291 /*
292 ** Finally, check that if there's anything left on the line
293 ** that it's whitespace only.
294 */
295 while( ascii_len-- > 0 ) {
296 this_ch = *ascii_data++;
297 /* Extra '`' may be written as padding in some cases */
298 if ( this_ch != ' ' && this_ch != ' '+64 &&
299 this_ch != '\n' && this_ch != '\r' ) {
300 PyErr_SetString(Error, "Trailing garbage");
301 PyBuffer_Release(&pascii);
302 Py_DECREF(rv);
303 return NULL;
304 }
305 }
306 PyBuffer_Release(&pascii);
307 return rv;
Jack Jansen72781191995-08-07 14:34:15 +0000308}
309
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000310PyDoc_STRVAR(doc_b2a_uu, "(bin) -> ascii. Uuencode line of data");
Tim Peters934c1a12002-07-02 22:24:50 +0000311
Jack Jansen72781191995-08-07 14:34:15 +0000312static PyObject *
Peter Schneider-Kampa788a7f2000-07-10 09:57:19 +0000313binascii_b2a_uu(PyObject *self, PyObject *args)
Jack Jansen72781191995-08-07 14:34:15 +0000314{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000315 Py_buffer pbin;
316 unsigned char *ascii_data, *bin_data;
317 int leftbits = 0;
318 unsigned char this_ch;
319 unsigned int leftchar = 0;
320 PyObject *rv;
321 Py_ssize_t bin_len;
Tim Peters934c1a12002-07-02 22:24:50 +0000322
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000323 if ( !PyArg_ParseTuple(args, "y*:b2a_uu", &pbin) )
324 return NULL;
325 bin_data = pbin.buf;
326 bin_len = pbin.len;
327 if ( bin_len > 45 ) {
328 /* The 45 is a limit that appears in all uuencode's */
329 PyErr_SetString(Error, "At most 45 bytes at once");
330 PyBuffer_Release(&pbin);
331 return NULL;
332 }
Jack Jansen72781191995-08-07 14:34:15 +0000333
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000334 /* We're lazy and allocate to much (fixed up later) */
335 if ( (rv=PyBytes_FromStringAndSize(NULL, 2 + (bin_len+2)/3*4)) == NULL ) {
336 PyBuffer_Release(&pbin);
337 return NULL;
338 }
339 ascii_data = (unsigned char *)PyBytes_AS_STRING(rv);
Jack Jansen72781191995-08-07 14:34:15 +0000340
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000341 /* Store the length */
342 *ascii_data++ = ' ' + (bin_len & 077);
Tim Peters934c1a12002-07-02 22:24:50 +0000343
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000344 for( ; bin_len > 0 || leftbits != 0 ; bin_len--, bin_data++ ) {
345 /* Shift the data (or padding) into our buffer */
346 if ( bin_len > 0 ) /* Data */
347 leftchar = (leftchar << 8) | *bin_data;
348 else /* Padding */
349 leftchar <<= 8;
350 leftbits += 8;
Jack Jansen72781191995-08-07 14:34:15 +0000351
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000352 /* See if there are 6-bit groups ready */
353 while ( leftbits >= 6 ) {
354 this_ch = (leftchar >> (leftbits-6)) & 0x3f;
355 leftbits -= 6;
356 *ascii_data++ = this_ch + ' ';
357 }
358 }
359 *ascii_data++ = '\n'; /* Append a courtesy newline */
Tim Peters934c1a12002-07-02 22:24:50 +0000360
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000361 if (_PyBytes_Resize(&rv,
362 (ascii_data -
363 (unsigned char *)PyBytes_AS_STRING(rv))) < 0) {
Victor Stinner79799262013-07-09 00:35:22 +0200364 Py_CLEAR(rv);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000365 }
366 PyBuffer_Release(&pbin);
367 return rv;
Jack Jansen72781191995-08-07 14:34:15 +0000368}
369
Guido van Rossum2db4f471999-10-19 19:05:14 +0000370
371static int
Thomas Woutersf98db652006-03-01 21:37:32 +0000372binascii_find_valid(unsigned char *s, Py_ssize_t slen, int num)
Guido van Rossum2db4f471999-10-19 19:05:14 +0000373{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000374 /* Finds & returns the (num+1)th
375 ** valid character for base64, or -1 if none.
376 */
Guido van Rossum2db4f471999-10-19 19:05:14 +0000377
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000378 int ret = -1;
379 unsigned char c, b64val;
Guido van Rossum2db4f471999-10-19 19:05:14 +0000380
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000381 while ((slen > 0) && (ret == -1)) {
382 c = *s;
383 b64val = table_a2b_base64[c & 0x7f];
384 if ( ((c <= 0x7f) && (b64val != (unsigned char)-1)) ) {
385 if (num == 0)
386 ret = *s;
387 num--;
388 }
Guido van Rossum2db4f471999-10-19 19:05:14 +0000389
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000390 s++;
391 slen--;
392 }
393 return ret;
Guido van Rossum2db4f471999-10-19 19:05:14 +0000394}
395
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000396PyDoc_STRVAR(doc_a2b_base64, "(ascii) -> bin. Decode a line of base64 data");
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000397
398static PyObject *
Peter Schneider-Kampa788a7f2000-07-10 09:57:19 +0000399binascii_a2b_base64(PyObject *self, PyObject *args)
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000400{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000401 Py_buffer pascii;
402 unsigned char *ascii_data, *bin_data;
403 int leftbits = 0;
404 unsigned char this_ch;
405 unsigned int leftchar = 0;
406 PyObject *rv;
407 Py_ssize_t ascii_len, bin_len;
408 int quad_pos = 0;
Tim Peters934c1a12002-07-02 22:24:50 +0000409
Antoine Pitrou08316762011-12-20 13:58:41 +0100410 if ( !PyArg_ParseTuple(args, "O&:a2b_base64", ascii_buffer_converter, &pascii) )
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000411 return NULL;
412 ascii_data = pascii.buf;
413 ascii_len = pascii.len;
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000414
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000415 assert(ascii_len >= 0);
Amaury Forgeot d'Arc9c74b142008-06-18 00:47:36 +0000416
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000417 if (ascii_len > PY_SSIZE_T_MAX - 3) {
418 PyBuffer_Release(&pascii);
419 return PyErr_NoMemory();
420 }
Amaury Forgeot d'Arc9c74b142008-06-18 00:47:36 +0000421
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000422 bin_len = ((ascii_len+3)/4)*3; /* Upper bound, corrected later */
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000423
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000424 /* Allocate the buffer */
425 if ( (rv=PyBytes_FromStringAndSize(NULL, bin_len)) == NULL ) {
426 PyBuffer_Release(&pascii);
427 return NULL;
428 }
429 bin_data = (unsigned char *)PyBytes_AS_STRING(rv);
430 bin_len = 0;
Guido van Rossum2db4f471999-10-19 19:05:14 +0000431
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000432 for( ; ascii_len > 0; ascii_len--, ascii_data++) {
433 this_ch = *ascii_data;
Guido van Rossum2db4f471999-10-19 19:05:14 +0000434
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000435 if (this_ch > 0x7f ||
436 this_ch == '\r' || this_ch == '\n' || this_ch == ' ')
437 continue;
Guido van Rossum2db4f471999-10-19 19:05:14 +0000438
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000439 /* Check for pad sequences and ignore
440 ** the invalid ones.
441 */
442 if (this_ch == BASE64_PAD) {
443 if ( (quad_pos < 2) ||
444 ((quad_pos == 2) &&
445 (binascii_find_valid(ascii_data, ascii_len, 1)
446 != BASE64_PAD)) )
447 {
448 continue;
449 }
450 else {
451 /* A pad sequence means no more input.
452 ** We've already interpreted the data
453 ** from the quad at this point.
454 */
455 leftbits = 0;
456 break;
457 }
458 }
Guido van Rossum2db4f471999-10-19 19:05:14 +0000459
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000460 this_ch = table_a2b_base64[*ascii_data];
461 if ( this_ch == (unsigned char) -1 )
462 continue;
Guido van Rossum2db4f471999-10-19 19:05:14 +0000463
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000464 /*
465 ** Shift it in on the low end, and see if there's
466 ** a byte ready for output.
467 */
468 quad_pos = (quad_pos + 1) & 0x03;
469 leftchar = (leftchar << 6) | (this_ch);
470 leftbits += 6;
Guido van Rossum2db4f471999-10-19 19:05:14 +0000471
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000472 if ( leftbits >= 8 ) {
473 leftbits -= 8;
474 *bin_data++ = (leftchar >> leftbits) & 0xff;
475 bin_len++;
476 leftchar &= ((1 << leftbits) - 1);
477 }
478 }
Guido van Rossum2db4f471999-10-19 19:05:14 +0000479
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000480 if (leftbits != 0) {
481 PyBuffer_Release(&pascii);
482 PyErr_SetString(Error, "Incorrect padding");
483 Py_DECREF(rv);
484 return NULL;
485 }
Guido van Rossum2db4f471999-10-19 19:05:14 +0000486
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000487 /* And set string size correctly. If the result string is empty
488 ** (because the input was all invalid) return the shared empty
489 ** string instead; _PyBytes_Resize() won't do this for us.
490 */
491 if (bin_len > 0) {
492 if (_PyBytes_Resize(&rv, bin_len) < 0) {
Victor Stinner79799262013-07-09 00:35:22 +0200493 Py_CLEAR(rv);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000494 }
495 }
496 else {
497 Py_DECREF(rv);
498 rv = PyBytes_FromStringAndSize("", 0);
499 }
500 PyBuffer_Release(&pascii);
501 return rv;
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000502}
503
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000504PyDoc_STRVAR(doc_b2a_base64, "(bin) -> ascii. Base64-code line of data");
Tim Peters934c1a12002-07-02 22:24:50 +0000505
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000506static PyObject *
Peter Schneider-Kampa788a7f2000-07-10 09:57:19 +0000507binascii_b2a_base64(PyObject *self, PyObject *args)
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000508{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000509 Py_buffer pbuf;
510 unsigned char *ascii_data, *bin_data;
511 int leftbits = 0;
512 unsigned char this_ch;
513 unsigned int leftchar = 0;
514 PyObject *rv;
515 Py_ssize_t bin_len;
Tim Peters934c1a12002-07-02 22:24:50 +0000516
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000517 if ( !PyArg_ParseTuple(args, "y*:b2a_base64", &pbuf) )
518 return NULL;
519 bin_data = pbuf.buf;
520 bin_len = pbuf.len;
Amaury Forgeot d'Arc9c74b142008-06-18 00:47:36 +0000521
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000522 assert(bin_len >= 0);
Amaury Forgeot d'Arc9c74b142008-06-18 00:47:36 +0000523
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000524 if ( bin_len > BASE64_MAXBIN ) {
525 PyErr_SetString(Error, "Too much data for base64 line");
526 PyBuffer_Release(&pbuf);
527 return NULL;
528 }
Tim Peters934c1a12002-07-02 22:24:50 +0000529
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000530 /* We're lazy and allocate too much (fixed up later).
531 "+3" leaves room for up to two pad characters and a trailing
532 newline. Note that 'b' gets encoded as 'Yg==\n' (1 in, 5 out). */
533 if ( (rv=PyBytes_FromStringAndSize(NULL, bin_len*2 + 3)) == NULL ) {
534 PyBuffer_Release(&pbuf);
535 return NULL;
536 }
537 ascii_data = (unsigned char *)PyBytes_AS_STRING(rv);
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000538
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000539 for( ; bin_len > 0 ; bin_len--, bin_data++ ) {
540 /* Shift the data into our buffer */
541 leftchar = (leftchar << 8) | *bin_data;
542 leftbits += 8;
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000543
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000544 /* See if there are 6-bit groups ready */
545 while ( leftbits >= 6 ) {
546 this_ch = (leftchar >> (leftbits-6)) & 0x3f;
547 leftbits -= 6;
548 *ascii_data++ = table_b2a_base64[this_ch];
549 }
550 }
551 if ( leftbits == 2 ) {
552 *ascii_data++ = table_b2a_base64[(leftchar&3) << 4];
553 *ascii_data++ = BASE64_PAD;
554 *ascii_data++ = BASE64_PAD;
555 } else if ( leftbits == 4 ) {
556 *ascii_data++ = table_b2a_base64[(leftchar&0xf) << 2];
557 *ascii_data++ = BASE64_PAD;
558 }
559 *ascii_data++ = '\n'; /* Append a courtesy newline */
Tim Peters934c1a12002-07-02 22:24:50 +0000560
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000561 if (_PyBytes_Resize(&rv,
562 (ascii_data -
563 (unsigned char *)PyBytes_AS_STRING(rv))) < 0) {
Victor Stinner79799262013-07-09 00:35:22 +0200564 Py_CLEAR(rv);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000565 }
566 PyBuffer_Release(&pbuf);
567 return rv;
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000568}
569
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000570PyDoc_STRVAR(doc_a2b_hqx, "ascii -> bin, done. Decode .hqx coding");
Jack Jansen72781191995-08-07 14:34:15 +0000571
572static PyObject *
Peter Schneider-Kampa788a7f2000-07-10 09:57:19 +0000573binascii_a2b_hqx(PyObject *self, PyObject *args)
Jack Jansen72781191995-08-07 14:34:15 +0000574{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000575 Py_buffer pascii;
576 unsigned char *ascii_data, *bin_data;
577 int leftbits = 0;
578 unsigned char this_ch;
579 unsigned int leftchar = 0;
580 PyObject *rv;
581 Py_ssize_t len;
582 int done = 0;
Tim Peters934c1a12002-07-02 22:24:50 +0000583
Antoine Pitrou08316762011-12-20 13:58:41 +0100584 if ( !PyArg_ParseTuple(args, "O&:a2b_hqx", ascii_buffer_converter, &pascii) )
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000585 return NULL;
586 ascii_data = pascii.buf;
587 len = pascii.len;
Jack Jansen72781191995-08-07 14:34:15 +0000588
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000589 assert(len >= 0);
Amaury Forgeot d'Arc9c74b142008-06-18 00:47:36 +0000590
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000591 if (len > PY_SSIZE_T_MAX - 2) {
592 PyBuffer_Release(&pascii);
593 return PyErr_NoMemory();
594 }
Amaury Forgeot d'Arc9c74b142008-06-18 00:47:36 +0000595
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000596 /* Allocate a string that is too big (fixed later)
597 Add two to the initial length to prevent interning which
598 would preclude subsequent resizing. */
599 if ( (rv=PyBytes_FromStringAndSize(NULL, len+2)) == NULL ) {
600 PyBuffer_Release(&pascii);
601 return NULL;
602 }
603 bin_data = (unsigned char *)PyBytes_AS_STRING(rv);
Jack Jansen72781191995-08-07 14:34:15 +0000604
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000605 for( ; len > 0 ; len--, ascii_data++ ) {
606 /* Get the byte and look it up */
607 this_ch = table_a2b_hqx[*ascii_data];
608 if ( this_ch == SKIP )
609 continue;
610 if ( this_ch == FAIL ) {
611 PyErr_SetString(Error, "Illegal char");
612 PyBuffer_Release(&pascii);
613 Py_DECREF(rv);
614 return NULL;
615 }
616 if ( this_ch == DONE ) {
617 /* The terminating colon */
618 done = 1;
619 break;
620 }
Jack Jansen72781191995-08-07 14:34:15 +0000621
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000622 /* Shift it into the buffer and see if any bytes are ready */
623 leftchar = (leftchar << 6) | (this_ch);
624 leftbits += 6;
625 if ( leftbits >= 8 ) {
626 leftbits -= 8;
627 *bin_data++ = (leftchar >> leftbits) & 0xff;
628 leftchar &= ((1 << leftbits) - 1);
629 }
630 }
Tim Peters934c1a12002-07-02 22:24:50 +0000631
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000632 if ( leftbits && !done ) {
633 PyErr_SetString(Incomplete,
634 "String has incomplete number of bytes");
635 PyBuffer_Release(&pascii);
636 Py_DECREF(rv);
637 return NULL;
638 }
639 if (_PyBytes_Resize(&rv,
640 (bin_data -
641 (unsigned char *)PyBytes_AS_STRING(rv))) < 0) {
Victor Stinner79799262013-07-09 00:35:22 +0200642 Py_CLEAR(rv);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000643 }
644 if (rv) {
645 PyObject *rrv = Py_BuildValue("Oi", rv, done);
646 PyBuffer_Release(&pascii);
647 Py_DECREF(rv);
648 return rrv;
649 }
Roger E. Masse5f4ce181997-01-16 17:10:22 +0000650
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000651 PyBuffer_Release(&pascii);
652 return NULL;
Jack Jansen72781191995-08-07 14:34:15 +0000653}
654
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000655PyDoc_STRVAR(doc_rlecode_hqx, "Binhex RLE-code binary data");
Jack Jansen72781191995-08-07 14:34:15 +0000656
657static PyObject *
Peter Schneider-Kampa788a7f2000-07-10 09:57:19 +0000658binascii_rlecode_hqx(PyObject *self, PyObject *args)
Jack Jansen72781191995-08-07 14:34:15 +0000659{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000660 Py_buffer pbuf;
661 unsigned char *in_data, *out_data;
662 PyObject *rv;
663 unsigned char ch;
664 Py_ssize_t in, inend, len;
Tim Peters934c1a12002-07-02 22:24:50 +0000665
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000666 if ( !PyArg_ParseTuple(args, "y*:rlecode_hqx", &pbuf) )
667 return NULL;
668 in_data = pbuf.buf;
669 len = pbuf.len;
Jack Jansen72781191995-08-07 14:34:15 +0000670
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000671 assert(len >= 0);
Amaury Forgeot d'Arc9c74b142008-06-18 00:47:36 +0000672
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000673 if (len > PY_SSIZE_T_MAX / 2 - 2) {
674 PyBuffer_Release(&pbuf);
675 return PyErr_NoMemory();
676 }
Amaury Forgeot d'Arc9c74b142008-06-18 00:47:36 +0000677
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000678 /* Worst case: output is twice as big as input (fixed later) */
679 if ( (rv=PyBytes_FromStringAndSize(NULL, len*2+2)) == NULL ) {
680 PyBuffer_Release(&pbuf);
681 return NULL;
682 }
683 out_data = (unsigned char *)PyBytes_AS_STRING(rv);
Tim Peters934c1a12002-07-02 22:24:50 +0000684
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000685 for( in=0; in<len; in++) {
686 ch = in_data[in];
687 if ( ch == RUNCHAR ) {
688 /* RUNCHAR. Escape it. */
689 *out_data++ = RUNCHAR;
690 *out_data++ = 0;
691 } else {
692 /* Check how many following are the same */
693 for(inend=in+1;
694 inend<len && in_data[inend] == ch &&
695 inend < in+255;
696 inend++) ;
697 if ( inend - in > 3 ) {
698 /* More than 3 in a row. Output RLE. */
699 *out_data++ = ch;
700 *out_data++ = RUNCHAR;
Antoine Pitrou40455752010-08-15 18:51:10 +0000701 *out_data++ = (unsigned char) (inend-in);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000702 in = inend-1;
703 } else {
704 /* Less than 3. Output the byte itself */
705 *out_data++ = ch;
706 }
707 }
708 }
709 if (_PyBytes_Resize(&rv,
710 (out_data -
711 (unsigned char *)PyBytes_AS_STRING(rv))) < 0) {
Victor Stinner79799262013-07-09 00:35:22 +0200712 Py_CLEAR(rv);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000713 }
714 PyBuffer_Release(&pbuf);
715 return rv;
Jack Jansen72781191995-08-07 14:34:15 +0000716}
717
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000718PyDoc_STRVAR(doc_b2a_hqx, "Encode .hqx data");
Tim Peters934c1a12002-07-02 22:24:50 +0000719
Jack Jansen72781191995-08-07 14:34:15 +0000720static PyObject *
Peter Schneider-Kampa788a7f2000-07-10 09:57:19 +0000721binascii_b2a_hqx(PyObject *self, PyObject *args)
Jack Jansen72781191995-08-07 14:34:15 +0000722{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000723 Py_buffer pbin;
724 unsigned char *ascii_data, *bin_data;
725 int leftbits = 0;
726 unsigned char this_ch;
727 unsigned int leftchar = 0;
728 PyObject *rv;
729 Py_ssize_t len;
Tim Peters934c1a12002-07-02 22:24:50 +0000730
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000731 if ( !PyArg_ParseTuple(args, "y*:b2a_hqx", &pbin) )
732 return NULL;
733 bin_data = pbin.buf;
734 len = pbin.len;
Jack Jansen72781191995-08-07 14:34:15 +0000735
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000736 assert(len >= 0);
Amaury Forgeot d'Arc9c74b142008-06-18 00:47:36 +0000737
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000738 if (len > PY_SSIZE_T_MAX / 2 - 2) {
739 PyBuffer_Release(&pbin);
740 return PyErr_NoMemory();
741 }
Amaury Forgeot d'Arc9c74b142008-06-18 00:47:36 +0000742
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000743 /* Allocate a buffer that is at least large enough */
744 if ( (rv=PyBytes_FromStringAndSize(NULL, len*2+2)) == NULL ) {
745 PyBuffer_Release(&pbin);
746 return NULL;
747 }
748 ascii_data = (unsigned char *)PyBytes_AS_STRING(rv);
Tim Peters934c1a12002-07-02 22:24:50 +0000749
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000750 for( ; len > 0 ; len--, bin_data++ ) {
751 /* Shift into our buffer, and output any 6bits ready */
752 leftchar = (leftchar << 8) | *bin_data;
753 leftbits += 8;
754 while ( leftbits >= 6 ) {
755 this_ch = (leftchar >> (leftbits-6)) & 0x3f;
756 leftbits -= 6;
757 *ascii_data++ = table_b2a_hqx[this_ch];
758 }
759 }
760 /* Output a possible runt byte */
761 if ( leftbits ) {
762 leftchar <<= (6-leftbits);
763 *ascii_data++ = table_b2a_hqx[leftchar & 0x3f];
764 }
765 if (_PyBytes_Resize(&rv,
766 (ascii_data -
767 (unsigned char *)PyBytes_AS_STRING(rv))) < 0) {
Victor Stinner79799262013-07-09 00:35:22 +0200768 Py_CLEAR(rv);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000769 }
770 PyBuffer_Release(&pbin);
771 return rv;
Jack Jansen72781191995-08-07 14:34:15 +0000772}
773
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000774PyDoc_STRVAR(doc_rledecode_hqx, "Decode hexbin RLE-coded string");
Tim Peters934c1a12002-07-02 22:24:50 +0000775
Jack Jansen72781191995-08-07 14:34:15 +0000776static PyObject *
Peter Schneider-Kampa788a7f2000-07-10 09:57:19 +0000777binascii_rledecode_hqx(PyObject *self, PyObject *args)
Jack Jansen72781191995-08-07 14:34:15 +0000778{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000779 Py_buffer pin;
780 unsigned char *in_data, *out_data;
781 unsigned char in_byte, in_repeat;
782 PyObject *rv;
783 Py_ssize_t in_len, out_len, out_len_left;
Jack Jansen72781191995-08-07 14:34:15 +0000784
Florent Xiclunaf1046ca2010-07-27 21:20:15 +0000785 if ( !PyArg_ParseTuple(args, "y*:rledecode_hqx", &pin) )
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000786 return NULL;
787 in_data = pin.buf;
788 in_len = pin.len;
Jack Jansen72781191995-08-07 14:34:15 +0000789
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000790 assert(in_len >= 0);
Amaury Forgeot d'Arc9c74b142008-06-18 00:47:36 +0000791
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000792 /* Empty string is a special case */
793 if ( in_len == 0 ) {
794 PyBuffer_Release(&pin);
795 return PyBytes_FromStringAndSize("", 0);
796 }
797 else if (in_len > PY_SSIZE_T_MAX / 2) {
798 PyBuffer_Release(&pin);
799 return PyErr_NoMemory();
800 }
Jack Jansen72781191995-08-07 14:34:15 +0000801
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000802 /* Allocate a buffer of reasonable size. Resized when needed */
803 out_len = in_len*2;
804 if ( (rv=PyBytes_FromStringAndSize(NULL, out_len)) == NULL ) {
805 PyBuffer_Release(&pin);
806 return NULL;
807 }
808 out_len_left = out_len;
809 out_data = (unsigned char *)PyBytes_AS_STRING(rv);
Jack Jansen72781191995-08-07 14:34:15 +0000810
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000811 /*
812 ** We need two macros here to get/put bytes and handle
813 ** end-of-buffer for input and output strings.
814 */
Jack Jansen72781191995-08-07 14:34:15 +0000815#define INBYTE(b) \
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000816 do { \
817 if ( --in_len < 0 ) { \
818 PyErr_SetString(Incomplete, ""); \
819 Py_DECREF(rv); \
820 PyBuffer_Release(&pin); \
821 return NULL; \
822 } \
823 b = *in_data++; \
824 } while(0)
Tim Peters934c1a12002-07-02 22:24:50 +0000825
Jack Jansen72781191995-08-07 14:34:15 +0000826#define OUTBYTE(b) \
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000827 do { \
828 if ( --out_len_left < 0 ) { \
829 if ( out_len > PY_SSIZE_T_MAX / 2) return PyErr_NoMemory(); \
830 if (_PyBytes_Resize(&rv, 2*out_len) < 0) \
Victor Stinner79799262013-07-09 00:35:22 +0200831 { Py_XDECREF(rv); PyBuffer_Release(&pin); return NULL; } \
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000832 out_data = (unsigned char *)PyBytes_AS_STRING(rv) \
833 + out_len; \
834 out_len_left = out_len-1; \
835 out_len = out_len * 2; \
836 } \
837 *out_data++ = b; \
838 } while(0)
Jack Jansen72781191995-08-07 14:34:15 +0000839
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000840 /*
841 ** Handle first byte separately (since we have to get angry
842 ** in case of an orphaned RLE code).
843 */
844 INBYTE(in_byte);
Jack Jansen72781191995-08-07 14:34:15 +0000845
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000846 if (in_byte == RUNCHAR) {
847 INBYTE(in_repeat);
848 if (in_repeat != 0) {
849 /* Note Error, not Incomplete (which is at the end
850 ** of the string only). This is a programmer error.
851 */
852 PyErr_SetString(Error, "Orphaned RLE code at start");
853 PyBuffer_Release(&pin);
854 Py_DECREF(rv);
855 return NULL;
856 }
857 OUTBYTE(RUNCHAR);
858 } else {
859 OUTBYTE(in_byte);
860 }
Tim Peters934c1a12002-07-02 22:24:50 +0000861
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000862 while( in_len > 0 ) {
863 INBYTE(in_byte);
Jack Jansen72781191995-08-07 14:34:15 +0000864
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000865 if (in_byte == RUNCHAR) {
866 INBYTE(in_repeat);
867 if ( in_repeat == 0 ) {
868 /* Just an escaped RUNCHAR value */
869 OUTBYTE(RUNCHAR);
870 } else {
871 /* Pick up value and output a sequence of it */
872 in_byte = out_data[-1];
873 while ( --in_repeat > 0 )
874 OUTBYTE(in_byte);
875 }
876 } else {
877 /* Normal byte */
878 OUTBYTE(in_byte);
879 }
880 }
881 if (_PyBytes_Resize(&rv,
882 (out_data -
883 (unsigned char *)PyBytes_AS_STRING(rv))) < 0) {
Victor Stinner79799262013-07-09 00:35:22 +0200884 Py_CLEAR(rv);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000885 }
886 PyBuffer_Release(&pin);
887 return rv;
Jack Jansen72781191995-08-07 14:34:15 +0000888}
889
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000890PyDoc_STRVAR(doc_crc_hqx,
891"(data, oldcrc) -> newcrc. Compute hqx CRC incrementally");
Jack Jansen72781191995-08-07 14:34:15 +0000892
893static PyObject *
Peter Schneider-Kampa788a7f2000-07-10 09:57:19 +0000894binascii_crc_hqx(PyObject *self, PyObject *args)
Jack Jansen72781191995-08-07 14:34:15 +0000895{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000896 Py_buffer pin;
897 unsigned char *bin_data;
898 unsigned int crc;
899 Py_ssize_t len;
Tim Peters934c1a12002-07-02 22:24:50 +0000900
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000901 if ( !PyArg_ParseTuple(args, "y*i:crc_hqx", &pin, &crc) )
902 return NULL;
903 bin_data = pin.buf;
904 len = pin.len;
Jack Jansen72781191995-08-07 14:34:15 +0000905
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000906 while(len-- > 0) {
907 crc=((crc<<8)&0xff00)^crctab_hqx[((crc>>8)&0xff)^*bin_data++];
908 }
Jack Jansen72781191995-08-07 14:34:15 +0000909
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000910 PyBuffer_Release(&pin);
911 return Py_BuildValue("i", crc);
Jack Jansen72781191995-08-07 14:34:15 +0000912}
913
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000914PyDoc_STRVAR(doc_crc32,
915"(data, oldcrc = 0) -> newcrc. Compute CRC-32 incrementally");
Guido van Rossum7d47c9e2000-02-16 21:11:52 +0000916
Christian Heimes1dc54002008-03-24 02:19:29 +0000917#ifdef USE_ZLIB_CRC32
918/* This was taken from zlibmodule.c PyZlib_crc32 (but is PY_SSIZE_T_CLEAN) */
919static PyObject *
920binascii_crc32(PyObject *self, PyObject *args)
921{
Christian Heimescc47b052008-03-25 14:56:36 +0000922 unsigned int crc32val = 0; /* crc32(0L, Z_NULL, 0) */
Martin v. Löwis423be952008-08-13 15:53:07 +0000923 Py_buffer pbuf;
Christian Heimes1dc54002008-03-24 02:19:29 +0000924 Byte *buf;
Neal Norwitz4027bf82008-03-24 04:59:05 +0000925 Py_ssize_t len;
Christian Heimescc47b052008-03-25 14:56:36 +0000926 int signed_val;
927
Martin v. Löwis15b16a32008-12-02 06:00:15 +0000928 if (!PyArg_ParseTuple(args, "y*|I:crc32", &pbuf, &crc32val))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000929 return NULL;
Martin v. Löwis423be952008-08-13 15:53:07 +0000930 buf = (Byte*)pbuf.buf;
931 len = pbuf.len;
Christian Heimescc47b052008-03-25 14:56:36 +0000932 signed_val = crc32(crc32val, buf, len);
Martin v. Löwis423be952008-08-13 15:53:07 +0000933 PyBuffer_Release(&pbuf);
Christian Heimescc47b052008-03-25 14:56:36 +0000934 return PyLong_FromUnsignedLong(signed_val & 0xffffffffU);
Christian Heimes1dc54002008-03-24 02:19:29 +0000935}
936#else /* USE_ZLIB_CRC32 */
Guido van Rossum7d47c9e2000-02-16 21:11:52 +0000937/* Crc - 32 BIT ANSI X3.66 CRC checksum files
938 Also known as: ISO 3307
939**********************************************************************|
940* *|
941* Demonstration program to compute the 32-bit CRC used as the frame *|
942* check sequence in ADCCP (ANSI X3.66, also known as FIPS PUB 71 *|
943* and FED-STD-1003, the U.S. versions of CCITT's X.25 link-level *|
944* protocol). The 32-bit FCS was added via the Federal Register, *|
945* 1 June 1982, p.23798. I presume but don't know for certain that *|
946* this polynomial is or will be included in CCITT V.41, which *|
947* defines the 16-bit CRC (often called CRC-CCITT) polynomial. FIPS *|
948* PUB 78 says that the 32-bit FCS reduces otherwise undetected *|
949* errors by a factor of 10^-5 over 16-bit FCS. *|
950* *|
951**********************************************************************|
952
953 Copyright (C) 1986 Gary S. Brown. You may use this program, or
954 code or tables extracted from it, as desired without restriction.
Guido van Rossum7d47c9e2000-02-16 21:11:52 +0000955
Tim Peters934c1a12002-07-02 22:24:50 +0000956 First, the polynomial itself and its table of feedback terms. The
957 polynomial is
958 X^32+X^26+X^23+X^22+X^16+X^12+X^11+X^10+X^8+X^7+X^5+X^4+X^2+X^1+X^0
959 Note that we take it "backwards" and put the highest-order term in
960 the lowest-order bit. The X^32 term is "implied"; the LSB is the
961 X^31 term, etc. The X^0 term (usually shown as "+1") results in
962 the MSB being 1.
Guido van Rossum7d47c9e2000-02-16 21:11:52 +0000963
Tim Peters934c1a12002-07-02 22:24:50 +0000964 Note that the usual hardware shift register implementation, which
965 is what we're using (we're merely optimizing it by doing eight-bit
966 chunks at a time) shifts bits into the lowest-order term. In our
967 implementation, that means shifting towards the right. Why do we
968 do it this way? Because the calculated CRC must be transmitted in
969 order from highest-order term to lowest-order term. UARTs transmit
970 characters in order from LSB to MSB. By storing the CRC this way,
971 we hand it to the UART in the order low-byte to high-byte; the UART
972 sends each low-bit to hight-bit; and the result is transmission bit
973 by bit from highest- to lowest-order term without requiring any bit
974 shuffling on our part. Reception works similarly.
975
976 The feedback terms table consists of 256, 32-bit entries. Notes:
977
978 1. The table can be generated at runtime if desired; code to do so
979 is shown later. It might not be obvious, but the feedback
980 terms simply represent the results of eight shift/xor opera-
981 tions for all combinations of data and CRC register values.
982
983 2. The CRC accumulation logic is the same for all CRC polynomials,
984 be they sixteen or thirty-two bits wide. You simply choose the
985 appropriate table. Alternatively, because the table can be
986 generated at runtime, you can start by generating the table for
987 the polynomial in question and use exactly the same "updcrc",
988 if your application needn't simultaneously handle two CRC
989 polynomials. (Note, however, that XMODEM is strange.)
990
991 3. For 16-bit CRCs, the table entries need be only 16 bits wide;
992 of course, 32-bit entries work OK if the high 16 bits are zero.
993
994 4. The values must be right-shifted by eight bits by the "updcrc"
995 logic; the shift must be unsigned (bring in zeroes). On some
996 hardware you could probably optimize the shift in assembler by
997 using byte-swap instructions.
Guido van Rossum7d47c9e2000-02-16 21:11:52 +0000998********************************************************************/
999
Gregory P. Smith3c0e4d22008-03-25 07:51:12 +00001000static unsigned int crc_32_tab[256] = {
10010x00000000U, 0x77073096U, 0xee0e612cU, 0x990951baU, 0x076dc419U,
10020x706af48fU, 0xe963a535U, 0x9e6495a3U, 0x0edb8832U, 0x79dcb8a4U,
10030xe0d5e91eU, 0x97d2d988U, 0x09b64c2bU, 0x7eb17cbdU, 0xe7b82d07U,
10040x90bf1d91U, 0x1db71064U, 0x6ab020f2U, 0xf3b97148U, 0x84be41deU,
10050x1adad47dU, 0x6ddde4ebU, 0xf4d4b551U, 0x83d385c7U, 0x136c9856U,
10060x646ba8c0U, 0xfd62f97aU, 0x8a65c9ecU, 0x14015c4fU, 0x63066cd9U,
10070xfa0f3d63U, 0x8d080df5U, 0x3b6e20c8U, 0x4c69105eU, 0xd56041e4U,
10080xa2677172U, 0x3c03e4d1U, 0x4b04d447U, 0xd20d85fdU, 0xa50ab56bU,
10090x35b5a8faU, 0x42b2986cU, 0xdbbbc9d6U, 0xacbcf940U, 0x32d86ce3U,
10100x45df5c75U, 0xdcd60dcfU, 0xabd13d59U, 0x26d930acU, 0x51de003aU,
10110xc8d75180U, 0xbfd06116U, 0x21b4f4b5U, 0x56b3c423U, 0xcfba9599U,
10120xb8bda50fU, 0x2802b89eU, 0x5f058808U, 0xc60cd9b2U, 0xb10be924U,
10130x2f6f7c87U, 0x58684c11U, 0xc1611dabU, 0xb6662d3dU, 0x76dc4190U,
10140x01db7106U, 0x98d220bcU, 0xefd5102aU, 0x71b18589U, 0x06b6b51fU,
10150x9fbfe4a5U, 0xe8b8d433U, 0x7807c9a2U, 0x0f00f934U, 0x9609a88eU,
10160xe10e9818U, 0x7f6a0dbbU, 0x086d3d2dU, 0x91646c97U, 0xe6635c01U,
10170x6b6b51f4U, 0x1c6c6162U, 0x856530d8U, 0xf262004eU, 0x6c0695edU,
10180x1b01a57bU, 0x8208f4c1U, 0xf50fc457U, 0x65b0d9c6U, 0x12b7e950U,
10190x8bbeb8eaU, 0xfcb9887cU, 0x62dd1ddfU, 0x15da2d49U, 0x8cd37cf3U,
10200xfbd44c65U, 0x4db26158U, 0x3ab551ceU, 0xa3bc0074U, 0xd4bb30e2U,
10210x4adfa541U, 0x3dd895d7U, 0xa4d1c46dU, 0xd3d6f4fbU, 0x4369e96aU,
10220x346ed9fcU, 0xad678846U, 0xda60b8d0U, 0x44042d73U, 0x33031de5U,
10230xaa0a4c5fU, 0xdd0d7cc9U, 0x5005713cU, 0x270241aaU, 0xbe0b1010U,
10240xc90c2086U, 0x5768b525U, 0x206f85b3U, 0xb966d409U, 0xce61e49fU,
10250x5edef90eU, 0x29d9c998U, 0xb0d09822U, 0xc7d7a8b4U, 0x59b33d17U,
10260x2eb40d81U, 0xb7bd5c3bU, 0xc0ba6cadU, 0xedb88320U, 0x9abfb3b6U,
10270x03b6e20cU, 0x74b1d29aU, 0xead54739U, 0x9dd277afU, 0x04db2615U,
10280x73dc1683U, 0xe3630b12U, 0x94643b84U, 0x0d6d6a3eU, 0x7a6a5aa8U,
10290xe40ecf0bU, 0x9309ff9dU, 0x0a00ae27U, 0x7d079eb1U, 0xf00f9344U,
10300x8708a3d2U, 0x1e01f268U, 0x6906c2feU, 0xf762575dU, 0x806567cbU,
10310x196c3671U, 0x6e6b06e7U, 0xfed41b76U, 0x89d32be0U, 0x10da7a5aU,
10320x67dd4accU, 0xf9b9df6fU, 0x8ebeeff9U, 0x17b7be43U, 0x60b08ed5U,
10330xd6d6a3e8U, 0xa1d1937eU, 0x38d8c2c4U, 0x4fdff252U, 0xd1bb67f1U,
10340xa6bc5767U, 0x3fb506ddU, 0x48b2364bU, 0xd80d2bdaU, 0xaf0a1b4cU,
10350x36034af6U, 0x41047a60U, 0xdf60efc3U, 0xa867df55U, 0x316e8eefU,
10360x4669be79U, 0xcb61b38cU, 0xbc66831aU, 0x256fd2a0U, 0x5268e236U,
10370xcc0c7795U, 0xbb0b4703U, 0x220216b9U, 0x5505262fU, 0xc5ba3bbeU,
10380xb2bd0b28U, 0x2bb45a92U, 0x5cb36a04U, 0xc2d7ffa7U, 0xb5d0cf31U,
10390x2cd99e8bU, 0x5bdeae1dU, 0x9b64c2b0U, 0xec63f226U, 0x756aa39cU,
10400x026d930aU, 0x9c0906a9U, 0xeb0e363fU, 0x72076785U, 0x05005713U,
10410x95bf4a82U, 0xe2b87a14U, 0x7bb12baeU, 0x0cb61b38U, 0x92d28e9bU,
10420xe5d5be0dU, 0x7cdcefb7U, 0x0bdbdf21U, 0x86d3d2d4U, 0xf1d4e242U,
10430x68ddb3f8U, 0x1fda836eU, 0x81be16cdU, 0xf6b9265bU, 0x6fb077e1U,
10440x18b74777U, 0x88085ae6U, 0xff0f6a70U, 0x66063bcaU, 0x11010b5cU,
10450x8f659effU, 0xf862ae69U, 0x616bffd3U, 0x166ccf45U, 0xa00ae278U,
10460xd70dd2eeU, 0x4e048354U, 0x3903b3c2U, 0xa7672661U, 0xd06016f7U,
10470x4969474dU, 0x3e6e77dbU, 0xaed16a4aU, 0xd9d65adcU, 0x40df0b66U,
10480x37d83bf0U, 0xa9bcae53U, 0xdebb9ec5U, 0x47b2cf7fU, 0x30b5ffe9U,
10490xbdbdf21cU, 0xcabac28aU, 0x53b39330U, 0x24b4a3a6U, 0xbad03605U,
10500xcdd70693U, 0x54de5729U, 0x23d967bfU, 0xb3667a2eU, 0xc4614ab8U,
10510x5d681b02U, 0x2a6f2b94U, 0xb40bbe37U, 0xc30c8ea1U, 0x5a05df1bU,
10520x2d02ef8dU
Guido van Rossum7d47c9e2000-02-16 21:11:52 +00001053};
1054
1055static PyObject *
Peter Schneider-Kampa788a7f2000-07-10 09:57:19 +00001056binascii_crc32(PyObject *self, PyObject *args)
Guido van Rossum7d47c9e2000-02-16 21:11:52 +00001057{ /* By Jim Ahlstrom; All rights transferred to CNRI */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001058 Py_buffer pbin;
1059 unsigned char *bin_data;
1060 unsigned int crc = 0; /* initial value of CRC */
1061 Py_ssize_t len;
1062 unsigned int result;
Tim Peters934c1a12002-07-02 22:24:50 +00001063
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001064 if ( !PyArg_ParseTuple(args, "y*|I:crc32", &pbin, &crc) )
1065 return NULL;
1066 bin_data = pbin.buf;
1067 len = pbin.len;
Guido van Rossum7d47c9e2000-02-16 21:11:52 +00001068
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001069 crc = ~ crc;
1070 while (len-- > 0) {
1071 crc = crc_32_tab[(crc ^ *bin_data++) & 0xff] ^ (crc >> 8);
1072 /* Note: (crc >> 8) MUST zero fill on left */
1073 }
Tim Petersa98011c2002-07-02 20:20:08 +00001074
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001075 result = (crc ^ 0xFFFFFFFF);
1076 PyBuffer_Release(&pbin);
1077 return PyLong_FromUnsignedLong(result & 0xffffffff);
Guido van Rossum7d47c9e2000-02-16 21:11:52 +00001078}
Christian Heimes1dc54002008-03-24 02:19:29 +00001079#endif /* USE_ZLIB_CRC32 */
Guido van Rossum7d47c9e2000-02-16 21:11:52 +00001080
Barry Warsawe977c212000-08-15 06:07:13 +00001081
1082static PyObject *
1083binascii_hexlify(PyObject *self, PyObject *args)
1084{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001085 Py_buffer parg;
1086 char* argbuf;
1087 Py_ssize_t arglen;
1088 PyObject *retval;
1089 char* retbuf;
1090 Py_ssize_t i, j;
Barry Warsawe977c212000-08-15 06:07:13 +00001091
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001092 if (!PyArg_ParseTuple(args, "y*:b2a_hex", &parg))
1093 return NULL;
1094 argbuf = parg.buf;
1095 arglen = parg.len;
Barry Warsawe977c212000-08-15 06:07:13 +00001096
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001097 assert(arglen >= 0);
1098 if (arglen > PY_SSIZE_T_MAX / 2) {
1099 PyBuffer_Release(&parg);
1100 return PyErr_NoMemory();
1101 }
Amaury Forgeot d'Arc9c74b142008-06-18 00:47:36 +00001102
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001103 retval = PyBytes_FromStringAndSize(NULL, arglen*2);
1104 if (!retval) {
1105 PyBuffer_Release(&parg);
1106 return NULL;
1107 }
1108 retbuf = PyBytes_AS_STRING(retval);
Barry Warsawe977c212000-08-15 06:07:13 +00001109
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001110 /* make hex version of string, taken from shamodule.c */
1111 for (i=j=0; i < arglen; i++) {
Victor Stinnerf5cff562011-10-14 02:13:11 +02001112 unsigned char c;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001113 c = (argbuf[i] >> 4) & 0xf;
Victor Stinnerf5cff562011-10-14 02:13:11 +02001114 retbuf[j++] = Py_hexdigits[c];
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001115 c = argbuf[i] & 0xf;
Victor Stinnerf5cff562011-10-14 02:13:11 +02001116 retbuf[j++] = Py_hexdigits[c];
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001117 }
1118 PyBuffer_Release(&parg);
1119 return retval;
Barry Warsawe977c212000-08-15 06:07:13 +00001120}
1121
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001122PyDoc_STRVAR(doc_hexlify,
Barry Warsawe977c212000-08-15 06:07:13 +00001123"b2a_hex(data) -> s; Hexadecimal representation of binary data.\n\
1124\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001125This function is also available as \"hexlify()\".");
Barry Warsawe977c212000-08-15 06:07:13 +00001126
1127
1128static int
Tim Peters934c1a12002-07-02 22:24:50 +00001129to_int(int c)
Barry Warsawe977c212000-08-15 06:07:13 +00001130{
Antoine Pitrou4de74572013-02-09 23:11:27 +01001131 if (Py_ISDIGIT(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001132 return c - '0';
1133 else {
Antoine Pitroued8ba142011-10-04 13:50:21 +02001134 if (Py_ISUPPER(c))
1135 c = Py_TOLOWER(c);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001136 if (c >= 'a' && c <= 'f')
1137 return c - 'a' + 10;
1138 }
1139 return -1;
Barry Warsawe977c212000-08-15 06:07:13 +00001140}
1141
1142
1143static PyObject *
1144binascii_unhexlify(PyObject *self, PyObject *args)
1145{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001146 Py_buffer parg;
1147 char* argbuf;
1148 Py_ssize_t arglen;
1149 PyObject *retval;
1150 char* retbuf;
1151 Py_ssize_t i, j;
Barry Warsawe977c212000-08-15 06:07:13 +00001152
Antoine Pitrou08316762011-12-20 13:58:41 +01001153 if (!PyArg_ParseTuple(args, "O&:a2b_hex", ascii_buffer_converter, &parg))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001154 return NULL;
1155 argbuf = parg.buf;
1156 arglen = parg.len;
Barry Warsawe977c212000-08-15 06:07:13 +00001157
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001158 assert(arglen >= 0);
Amaury Forgeot d'Arc9c74b142008-06-18 00:47:36 +00001159
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001160 /* XXX What should we do about strings with an odd length? Should
1161 * we add an implicit leading zero, or a trailing zero? For now,
1162 * raise an exception.
1163 */
1164 if (arglen % 2) {
1165 PyBuffer_Release(&parg);
1166 PyErr_SetString(Error, "Odd-length string");
1167 return NULL;
1168 }
Barry Warsawe977c212000-08-15 06:07:13 +00001169
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001170 retval = PyBytes_FromStringAndSize(NULL, (arglen/2));
1171 if (!retval) {
1172 PyBuffer_Release(&parg);
1173 return NULL;
1174 }
1175 retbuf = PyBytes_AS_STRING(retval);
Barry Warsawe977c212000-08-15 06:07:13 +00001176
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001177 for (i=j=0; i < arglen; i += 2) {
1178 int top = to_int(Py_CHARMASK(argbuf[i]));
1179 int bot = to_int(Py_CHARMASK(argbuf[i+1]));
1180 if (top == -1 || bot == -1) {
1181 PyErr_SetString(Error,
1182 "Non-hexadecimal digit found");
1183 goto finally;
1184 }
1185 retbuf[j++] = (top << 4) + bot;
1186 }
1187 PyBuffer_Release(&parg);
1188 return retval;
Barry Warsawe977c212000-08-15 06:07:13 +00001189
1190 finally:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001191 PyBuffer_Release(&parg);
1192 Py_DECREF(retval);
1193 return NULL;
Barry Warsawe977c212000-08-15 06:07:13 +00001194}
1195
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001196PyDoc_STRVAR(doc_unhexlify,
Barry Warsawe977c212000-08-15 06:07:13 +00001197"a2b_hex(hexstr) -> s; Binary data of hexadecimal representation.\n\
1198\n\
1199hexstr must contain an even number of hex digits (upper or lower case).\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001200This function is also available as \"unhexlify()\"");
Barry Warsawe977c212000-08-15 06:07:13 +00001201
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001202static int table_hex[128] = {
1203 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1204 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1205 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1206 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,-1,-1, -1,-1,-1,-1,
1207 -1,10,11,12, 13,14,15,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1208 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1209 -1,10,11,12, 13,14,15,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1210 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1
1211};
1212
1213#define hexval(c) table_hex[(unsigned int)(c)]
1214
1215#define MAXLINESIZE 76
1216
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001217PyDoc_STRVAR(doc_a2b_qp, "Decode a string of qp-encoded data");
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001218
Tim Peters934c1a12002-07-02 22:24:50 +00001219static PyObject*
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001220binascii_a2b_qp(PyObject *self, PyObject *args, PyObject *kwargs)
1221{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001222 Py_ssize_t in, out;
1223 char ch;
1224 Py_buffer pdata;
1225 unsigned char *data, *odata;
1226 Py_ssize_t datalen = 0;
1227 PyObject *rv;
1228 static char *kwlist[] = {"data", "header", NULL};
1229 int header = 0;
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001230
Antoine Pitrou08316762011-12-20 13:58:41 +01001231 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O&|i:a2b_qp", kwlist,
1232 ascii_buffer_converter, &pdata, &header))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001233 return NULL;
1234 data = pdata.buf;
1235 datalen = pdata.len;
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001236
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001237 /* We allocate the output same size as input, this is overkill.
1238 * The previous implementation used calloc() so we'll zero out the
1239 * memory here too, since PyMem_Malloc() does not guarantee that.
1240 */
1241 odata = (unsigned char *) PyMem_Malloc(datalen);
1242 if (odata == NULL) {
1243 PyBuffer_Release(&pdata);
1244 PyErr_NoMemory();
1245 return NULL;
1246 }
1247 memset(odata, 0, datalen);
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001248
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001249 in = out = 0;
1250 while (in < datalen) {
1251 if (data[in] == '=') {
1252 in++;
1253 if (in >= datalen) break;
1254 /* Soft line breaks */
1255 if ((data[in] == '\n') || (data[in] == '\r')) {
1256 if (data[in] != '\n') {
1257 while (in < datalen && data[in] != '\n') in++;
1258 }
1259 if (in < datalen) in++;
1260 }
1261 else if (data[in] == '=') {
1262 /* broken case from broken python qp */
1263 odata[out++] = '=';
1264 in++;
1265 }
1266 else if (((data[in] >= 'A' && data[in] <= 'F') ||
1267 (data[in] >= 'a' && data[in] <= 'f') ||
1268 (data[in] >= '0' && data[in] <= '9')) &&
1269 ((data[in+1] >= 'A' && data[in+1] <= 'F') ||
1270 (data[in+1] >= 'a' && data[in+1] <= 'f') ||
1271 (data[in+1] >= '0' && data[in+1] <= '9'))) {
1272 /* hexval */
1273 ch = hexval(data[in]) << 4;
1274 in++;
1275 ch |= hexval(data[in]);
1276 in++;
1277 odata[out++] = ch;
1278 }
1279 else {
1280 odata[out++] = '=';
1281 }
1282 }
1283 else if (header && data[in] == '_') {
1284 odata[out++] = ' ';
1285 in++;
1286 }
1287 else {
1288 odata[out] = data[in];
1289 in++;
1290 out++;
1291 }
1292 }
1293 if ((rv = PyBytes_FromStringAndSize((char *)odata, out)) == NULL) {
1294 PyBuffer_Release(&pdata);
1295 PyMem_Free(odata);
1296 return NULL;
1297 }
1298 PyBuffer_Release(&pdata);
1299 PyMem_Free(odata);
1300 return rv;
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001301}
1302
Tim Peters934c1a12002-07-02 22:24:50 +00001303static int
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001304to_hex (unsigned char ch, unsigned char *s)
1305{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001306 unsigned int uvalue = ch;
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001307
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001308 s[1] = "0123456789ABCDEF"[uvalue % 16];
1309 uvalue = (uvalue / 16);
1310 s[0] = "0123456789ABCDEF"[uvalue % 16];
1311 return 0;
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001312}
1313
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001314PyDoc_STRVAR(doc_b2a_qp,
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001315"b2a_qp(data, quotetabs=0, istext=1, header=0) -> s; \n\
1316 Encode a string using quoted-printable encoding. \n\
1317\n\
1318On encoding, when istext is set, newlines are not encoded, and white \n\
1319space at end of lines is. When istext is not set, \\r and \\n (CR/LF) are \n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001320both encoded. When quotetabs is set, space and tabs are encoded.");
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001321
1322/* XXX: This is ridiculously complicated to be backward compatible
1323 * (mostly) with the quopri module. It doesn't re-create the quopri
1324 * module bug where text ending in CRLF has the CR encoded */
Tim Peters934c1a12002-07-02 22:24:50 +00001325static PyObject*
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001326binascii_b2a_qp (PyObject *self, PyObject *args, PyObject *kwargs)
1327{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001328 Py_ssize_t in, out;
1329 Py_buffer pdata;
1330 unsigned char *data, *odata;
1331 Py_ssize_t datalen = 0, odatalen = 0;
1332 PyObject *rv;
1333 unsigned int linelen = 0;
1334 static char *kwlist[] = {"data", "quotetabs", "istext",
1335 "header", NULL};
1336 int istext = 1;
1337 int quotetabs = 0;
1338 int header = 0;
1339 unsigned char ch;
1340 int crlf = 0;
1341 unsigned char *p;
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001342
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001343 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y*|iii", kwlist, &pdata,
1344 &quotetabs, &istext, &header))
1345 return NULL;
1346 data = pdata.buf;
1347 datalen = pdata.len;
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001348
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001349 /* See if this string is using CRLF line ends */
1350 /* XXX: this function has the side effect of converting all of
1351 * the end of lines to be the same depending on this detection
1352 * here */
1353 p = (unsigned char *) memchr(data, '\n', datalen);
1354 if ((p != NULL) && (p > data) && (*(p-1) == '\r'))
1355 crlf = 1;
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001356
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001357 /* First, scan to see how many characters need to be encoded */
1358 in = 0;
1359 while (in < datalen) {
1360 if ((data[in] > 126) ||
1361 (data[in] == '=') ||
1362 (header && data[in] == '_') ||
1363 ((data[in] == '.') && (linelen == 0) &&
1364 (data[in+1] == '\n' || data[in+1] == '\r' || data[in+1] == 0)) ||
1365 (!istext && ((data[in] == '\r') || (data[in] == '\n'))) ||
1366 ((data[in] == '\t' || data[in] == ' ') && (in + 1 == datalen)) ||
1367 ((data[in] < 33) &&
1368 (data[in] != '\r') && (data[in] != '\n') &&
Senthil Kumaran922e9042010-11-09 09:59:13 +00001369 (quotetabs || ((data[in] != '\t') && (data[in] != ' ')))))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001370 {
1371 if ((linelen + 3) >= MAXLINESIZE) {
1372 linelen = 0;
1373 if (crlf)
1374 odatalen += 3;
1375 else
1376 odatalen += 2;
1377 }
1378 linelen += 3;
1379 odatalen += 3;
1380 in++;
1381 }
1382 else {
1383 if (istext &&
1384 ((data[in] == '\n') ||
1385 ((in+1 < datalen) && (data[in] == '\r') &&
1386 (data[in+1] == '\n'))))
1387 {
1388 linelen = 0;
1389 /* Protect against whitespace on end of line */
1390 if (in && ((data[in-1] == ' ') || (data[in-1] == '\t')))
1391 odatalen += 2;
1392 if (crlf)
1393 odatalen += 2;
1394 else
1395 odatalen += 1;
1396 if (data[in] == '\r')
1397 in += 2;
1398 else
1399 in++;
1400 }
1401 else {
1402 if ((in + 1 != datalen) &&
1403 (data[in+1] != '\n') &&
1404 (linelen + 1) >= MAXLINESIZE) {
1405 linelen = 0;
1406 if (crlf)
1407 odatalen += 3;
1408 else
1409 odatalen += 2;
1410 }
1411 linelen++;
1412 odatalen++;
1413 in++;
1414 }
1415 }
1416 }
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001417
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001418 /* We allocate the output same size as input, this is overkill.
1419 * The previous implementation used calloc() so we'll zero out the
1420 * memory here too, since PyMem_Malloc() does not guarantee that.
1421 */
1422 odata = (unsigned char *) PyMem_Malloc(odatalen);
1423 if (odata == NULL) {
1424 PyBuffer_Release(&pdata);
1425 PyErr_NoMemory();
1426 return NULL;
1427 }
1428 memset(odata, 0, odatalen);
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001429
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001430 in = out = linelen = 0;
1431 while (in < datalen) {
1432 if ((data[in] > 126) ||
1433 (data[in] == '=') ||
1434 (header && data[in] == '_') ||
1435 ((data[in] == '.') && (linelen == 0) &&
1436 (data[in+1] == '\n' || data[in+1] == '\r' || data[in+1] == 0)) ||
1437 (!istext && ((data[in] == '\r') || (data[in] == '\n'))) ||
1438 ((data[in] == '\t' || data[in] == ' ') && (in + 1 == datalen)) ||
1439 ((data[in] < 33) &&
1440 (data[in] != '\r') && (data[in] != '\n') &&
1441 (quotetabs ||
1442 (!quotetabs && ((data[in] != '\t') && (data[in] != ' '))))))
1443 {
1444 if ((linelen + 3 )>= MAXLINESIZE) {
1445 odata[out++] = '=';
1446 if (crlf) odata[out++] = '\r';
1447 odata[out++] = '\n';
1448 linelen = 0;
1449 }
1450 odata[out++] = '=';
1451 to_hex(data[in], &odata[out]);
1452 out += 2;
1453 in++;
1454 linelen += 3;
1455 }
1456 else {
1457 if (istext &&
1458 ((data[in] == '\n') ||
1459 ((in+1 < datalen) && (data[in] == '\r') &&
1460 (data[in+1] == '\n'))))
1461 {
1462 linelen = 0;
1463 /* Protect against whitespace on end of line */
1464 if (out && ((odata[out-1] == ' ') || (odata[out-1] == '\t'))) {
1465 ch = odata[out-1];
1466 odata[out-1] = '=';
1467 to_hex(ch, &odata[out]);
1468 out += 2;
1469 }
Tim Peters934c1a12002-07-02 22:24:50 +00001470
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001471 if (crlf) odata[out++] = '\r';
1472 odata[out++] = '\n';
1473 if (data[in] == '\r')
1474 in += 2;
1475 else
1476 in++;
1477 }
1478 else {
1479 if ((in + 1 != datalen) &&
1480 (data[in+1] != '\n') &&
1481 (linelen + 1) >= MAXLINESIZE) {
1482 odata[out++] = '=';
1483 if (crlf) odata[out++] = '\r';
1484 odata[out++] = '\n';
1485 linelen = 0;
1486 }
1487 linelen++;
1488 if (header && data[in] == ' ') {
1489 odata[out++] = '_';
1490 in++;
1491 }
1492 else {
1493 odata[out++] = data[in++];
1494 }
1495 }
1496 }
1497 }
1498 if ((rv = PyBytes_FromStringAndSize((char *)odata, out)) == NULL) {
1499 PyBuffer_Release(&pdata);
1500 PyMem_Free(odata);
1501 return NULL;
1502 }
1503 PyBuffer_Release(&pdata);
1504 PyMem_Free(odata);
1505 return rv;
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001506}
Barry Warsawe977c212000-08-15 06:07:13 +00001507
Jack Jansen72781191995-08-07 14:34:15 +00001508/* List of functions defined in the module */
1509
1510static struct PyMethodDef binascii_module_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001511 {"a2b_uu", binascii_a2b_uu, METH_VARARGS, doc_a2b_uu},
1512 {"b2a_uu", binascii_b2a_uu, METH_VARARGS, doc_b2a_uu},
1513 {"a2b_base64", binascii_a2b_base64, METH_VARARGS, doc_a2b_base64},
1514 {"b2a_base64", binascii_b2a_base64, METH_VARARGS, doc_b2a_base64},
1515 {"a2b_hqx", binascii_a2b_hqx, METH_VARARGS, doc_a2b_hqx},
1516 {"b2a_hqx", binascii_b2a_hqx, METH_VARARGS, doc_b2a_hqx},
1517 {"b2a_hex", binascii_hexlify, METH_VARARGS, doc_hexlify},
1518 {"a2b_hex", binascii_unhexlify, METH_VARARGS, doc_unhexlify},
1519 {"hexlify", binascii_hexlify, METH_VARARGS, doc_hexlify},
1520 {"unhexlify", binascii_unhexlify, METH_VARARGS, doc_unhexlify},
1521 {"rlecode_hqx", binascii_rlecode_hqx, METH_VARARGS, doc_rlecode_hqx},
1522 {"rledecode_hqx", binascii_rledecode_hqx, METH_VARARGS,
1523 doc_rledecode_hqx},
1524 {"crc_hqx", binascii_crc_hqx, METH_VARARGS, doc_crc_hqx},
1525 {"crc32", binascii_crc32, METH_VARARGS, doc_crc32},
1526 {"a2b_qp", (PyCFunction)binascii_a2b_qp, METH_VARARGS | METH_KEYWORDS,
1527 doc_a2b_qp},
1528 {"b2a_qp", (PyCFunction)binascii_b2a_qp, METH_VARARGS | METH_KEYWORDS,
1529 doc_b2a_qp},
1530 {NULL, NULL} /* sentinel */
Jack Jansen72781191995-08-07 14:34:15 +00001531};
1532
1533
Martin v. Löwis1a214512008-06-11 05:26:20 +00001534/* Initialization function for the module (*must* be called PyInit_binascii) */
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001535PyDoc_STRVAR(doc_binascii, "Conversion between binary data and ASCII");
Jack Jansen72781191995-08-07 14:34:15 +00001536
Martin v. Löwis1a214512008-06-11 05:26:20 +00001537
1538static struct PyModuleDef binasciimodule = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001539 PyModuleDef_HEAD_INIT,
1540 "binascii",
1541 doc_binascii,
1542 -1,
1543 binascii_module_methods,
1544 NULL,
1545 NULL,
1546 NULL,
1547 NULL
Martin v. Löwis1a214512008-06-11 05:26:20 +00001548};
1549
Mark Hammondfe51c6d2002-08-02 02:27:13 +00001550PyMODINIT_FUNC
Martin v. Löwis1a214512008-06-11 05:26:20 +00001551PyInit_binascii(void)
Jack Jansen72781191995-08-07 14:34:15 +00001552{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001553 PyObject *m, *d;
Jack Jansen72781191995-08-07 14:34:15 +00001554
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001555 /* Create the module and add the functions */
1556 m = PyModule_Create(&binasciimodule);
1557 if (m == NULL)
1558 return NULL;
Jack Jansen72781191995-08-07 14:34:15 +00001559
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001560 d = PyModule_GetDict(m);
Jack Jansen72781191995-08-07 14:34:15 +00001561
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001562 Error = PyErr_NewException("binascii.Error", PyExc_ValueError, NULL);
1563 PyDict_SetItemString(d, "Error", Error);
1564 Incomplete = PyErr_NewException("binascii.Incomplete", NULL, NULL);
1565 PyDict_SetItemString(d, "Incomplete", Incomplete);
1566 if (PyErr_Occurred()) {
1567 Py_DECREF(m);
1568 m = NULL;
1569 }
1570 return m;
Jack Jansen72781191995-08-07 14:34:15 +00001571}