blob: a84badc7c934f13605f6d54e45f248ef38ad3ef2 [file] [log] [blame]
Jack Jansen72781191995-08-07 14:34:15 +00001/*
2** Routines to represent binary data in ASCII and vice-versa
3**
4** This module currently supports the following encodings:
5** uuencode:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00006** each line encodes 45 bytes (except possibly the last)
7** First char encodes (binary) length, rest data
8** each char encodes 6 bits, as follows:
9** binary: 01234567 abcdefgh ijklmnop
10** ascii: 012345 67abcd efghij klmnop
11** ASCII encoding method is "excess-space": 000000 is encoded as ' ', etc.
12** short binary data is zero-extended (so the bits are always in the
13** right place), this does *not* reflect in the length.
Jack Jansen84bbc2e1995-10-04 16:38:44 +000014** base64:
15** Line breaks are insignificant, but lines are at most 76 chars
16** each char encodes 6 bits, in similar order as uucode/hqx. Encoding
17** is done via a table.
18** Short binary data is filled (in ASCII) with '='.
Jack Jansen72781191995-08-07 14:34:15 +000019** hqx:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000020** File starts with introductory text, real data starts and ends
21** with colons.
22** Data consists of three similar parts: info, datafork, resourcefork.
23** Each part is protected (at the end) with a 16-bit crc
24** The binary data is run-length encoded, and then ascii-fied:
25** binary: 01234567 abcdefgh ijklmnop
26** ascii: 012345 67abcd efghij klmnop
27** ASCII encoding is table-driven, see the code.
28** Short binary data results in the runt ascii-byte being output with
29** the bits in the right place.
Jack Jansen72781191995-08-07 14:34:15 +000030**
31** While I was reading dozens of programs that encode or decode the formats
32** here (documentation? hihi:-) I have formulated Jansen's Observation:
33**
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000034** Programs that encode binary data in ASCII are written in
35** such a style that they are as unreadable as possible. Devices used
36** include unnecessary global variables, burying important tables
37** in unrelated sourcefiles, putting functions in include files,
38** using seemingly-descriptive variable names for different purposes,
39** calls to empty subroutines and a host of others.
Jack Jansen72781191995-08-07 14:34:15 +000040**
41** I have attempted to break with this tradition, but I guess that that
42** does make the performance sub-optimal. Oh well, too bad...
43**
44** Jack Jansen, CWI, July 1995.
Tim Peters934c1a12002-07-02 22:24:50 +000045**
Martin v. Löwis16dc7f42001-09-30 20:32:11 +000046** Added support for quoted-printable encoding, based on rfc 1521 et al
Tim Peters934c1a12002-07-02 22:24:50 +000047** quoted-printable encoding specifies that non printable characters (anything
Martin v. Löwis16dc7f42001-09-30 20:32:11 +000048** below 32 and above 126) be encoded as =XX where XX is the hexadecimal value
49** of the character. It also specifies some other behavior to enable 8bit data
Tim Peters934c1a12002-07-02 22:24:50 +000050** in a mail message with little difficulty (maximum line sizes, protecting
51** some cases of whitespace, etc).
Martin v. Löwis16dc7f42001-09-30 20:32:11 +000052**
53** Brandon Long, September 2001.
Jack Jansen72781191995-08-07 14:34:15 +000054*/
55
Thomas Wouters9c544482006-03-01 21:59:44 +000056#define PY_SSIZE_T_CLEAN
Jack Jansen72781191995-08-07 14:34:15 +000057
58#include "Python.h"
Christian Heimes1dc54002008-03-24 02:19:29 +000059#ifdef USE_ZLIB_CRC32
60#include "zlib.h"
61#endif
Jack Jansen72781191995-08-07 14:34:15 +000062
63static PyObject *Error;
64static PyObject *Incomplete;
65
66/*
67** hqx lookup table, ascii->binary.
68*/
69
70#define RUNCHAR 0x90
71
72#define DONE 0x7F
73#define SKIP 0x7E
74#define FAIL 0x7D
75
76static unsigned char table_a2b_hqx[256] = {
77/* ^@ ^A ^B ^C ^D ^E ^F ^G */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000078/* 0*/ FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
Jack Jansen72781191995-08-07 14:34:15 +000079/* \b \t \n ^K ^L \r ^N ^O */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000080/* 1*/ FAIL, FAIL, SKIP, FAIL, FAIL, SKIP, FAIL, FAIL,
Jack Jansen72781191995-08-07 14:34:15 +000081/* ^P ^Q ^R ^S ^T ^U ^V ^W */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000082/* 2*/ FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
Jack Jansen72781191995-08-07 14:34:15 +000083/* ^X ^Y ^Z ^[ ^\ ^] ^^ ^_ */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000084/* 3*/ FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
Jack Jansen72781191995-08-07 14:34:15 +000085/* ! " # $ % & ' */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000086/* 4*/ FAIL, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
Jack Jansen72781191995-08-07 14:34:15 +000087/* ( ) * + , - . / */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000088/* 5*/ 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, FAIL, FAIL,
Jack Jansen72781191995-08-07 14:34:15 +000089/* 0 1 2 3 4 5 6 7 */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000090/* 6*/ 0x0D, 0x0E, 0x0F, 0x10, 0x11, 0x12, 0x13, FAIL,
Jack Jansen72781191995-08-07 14:34:15 +000091/* 8 9 : ; < = > ? */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000092/* 7*/ 0x14, 0x15, DONE, FAIL, FAIL, FAIL, FAIL, FAIL,
Jack Jansen72781191995-08-07 14:34:15 +000093/* @ A B C D E F G */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000094/* 8*/ 0x16, 0x17, 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D,
Jack Jansen72781191995-08-07 14:34:15 +000095/* H I J K L M N O */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000096/* 9*/ 0x1E, 0x1F, 0x20, 0x21, 0x22, 0x23, 0x24, FAIL,
Jack Jansen72781191995-08-07 14:34:15 +000097/* P Q R S T U V W */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000098/*10*/ 0x25, 0x26, 0x27, 0x28, 0x29, 0x2A, 0x2B, FAIL,
Jack Jansen72781191995-08-07 14:34:15 +000099/* X Y Z [ \ ] ^ _ */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000100/*11*/ 0x2C, 0x2D, 0x2E, 0x2F, FAIL, FAIL, FAIL, FAIL,
Jack Jansen72781191995-08-07 14:34:15 +0000101/* ` a b c d e f g */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000102/*12*/ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, FAIL,
Jack Jansen72781191995-08-07 14:34:15 +0000103/* h i j k l m n o */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000104/*13*/ 0x37, 0x38, 0x39, 0x3A, 0x3B, 0x3C, FAIL, FAIL,
Jack Jansen72781191995-08-07 14:34:15 +0000105/* p q r s t u v w */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000106/*14*/ 0x3D, 0x3E, 0x3F, FAIL, FAIL, FAIL, FAIL, FAIL,
Jack Jansen72781191995-08-07 14:34:15 +0000107/* x y z { | } ~ ^? */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000108/*15*/ FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
109/*16*/ FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
110 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
111 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
112 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
113 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
114 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
115 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
116 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
117 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
118 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
119 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
120 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
121 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
122 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
123 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
124 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
Jack Jansen72781191995-08-07 14:34:15 +0000125};
126
127static unsigned char table_b2a_hqx[] =
Roger E. Masse5f4ce181997-01-16 17:10:22 +0000128"!\"#$%&'()*+,-012345689@ABCDEFGHIJKLMNPQRSTUVXYZ[`abcdefhijklmpqr";
Jack Jansen72781191995-08-07 14:34:15 +0000129
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000130static char table_a2b_base64[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000131 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
132 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
133 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,62, -1,-1,-1,63,
134 52,53,54,55, 56,57,58,59, 60,61,-1,-1, -1, 0,-1,-1, /* Note PAD->0 */
135 -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10, 11,12,13,14,
136 15,16,17,18, 19,20,21,22, 23,24,25,-1, -1,-1,-1,-1,
137 -1,26,27,28, 29,30,31,32, 33,34,35,36, 37,38,39,40,
138 41,42,43,44, 45,46,47,48, 49,50,51,-1, -1,-1,-1,-1
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000139};
140
141#define BASE64_PAD '='
Guido van Rossum355bc0c2001-10-30 03:00:52 +0000142
143/* Max binary chunk size; limited only by available memory */
Guido van Rossum0e225aa2007-05-22 20:24:57 +0000144#define BASE64_MAXBIN ((PY_SSIZE_T_MAX - 3) / 2)
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000145
146static unsigned char table_b2a_base64[] =
Roger E. Masse5f4ce181997-01-16 17:10:22 +0000147"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000148
149
150
Jack Jansen72781191995-08-07 14:34:15 +0000151static unsigned short crctab_hqx[256] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000152 0x0000, 0x1021, 0x2042, 0x3063, 0x4084, 0x50a5, 0x60c6, 0x70e7,
153 0x8108, 0x9129, 0xa14a, 0xb16b, 0xc18c, 0xd1ad, 0xe1ce, 0xf1ef,
154 0x1231, 0x0210, 0x3273, 0x2252, 0x52b5, 0x4294, 0x72f7, 0x62d6,
155 0x9339, 0x8318, 0xb37b, 0xa35a, 0xd3bd, 0xc39c, 0xf3ff, 0xe3de,
156 0x2462, 0x3443, 0x0420, 0x1401, 0x64e6, 0x74c7, 0x44a4, 0x5485,
157 0xa56a, 0xb54b, 0x8528, 0x9509, 0xe5ee, 0xf5cf, 0xc5ac, 0xd58d,
158 0x3653, 0x2672, 0x1611, 0x0630, 0x76d7, 0x66f6, 0x5695, 0x46b4,
159 0xb75b, 0xa77a, 0x9719, 0x8738, 0xf7df, 0xe7fe, 0xd79d, 0xc7bc,
160 0x48c4, 0x58e5, 0x6886, 0x78a7, 0x0840, 0x1861, 0x2802, 0x3823,
161 0xc9cc, 0xd9ed, 0xe98e, 0xf9af, 0x8948, 0x9969, 0xa90a, 0xb92b,
162 0x5af5, 0x4ad4, 0x7ab7, 0x6a96, 0x1a71, 0x0a50, 0x3a33, 0x2a12,
163 0xdbfd, 0xcbdc, 0xfbbf, 0xeb9e, 0x9b79, 0x8b58, 0xbb3b, 0xab1a,
164 0x6ca6, 0x7c87, 0x4ce4, 0x5cc5, 0x2c22, 0x3c03, 0x0c60, 0x1c41,
165 0xedae, 0xfd8f, 0xcdec, 0xddcd, 0xad2a, 0xbd0b, 0x8d68, 0x9d49,
166 0x7e97, 0x6eb6, 0x5ed5, 0x4ef4, 0x3e13, 0x2e32, 0x1e51, 0x0e70,
167 0xff9f, 0xefbe, 0xdfdd, 0xcffc, 0xbf1b, 0xaf3a, 0x9f59, 0x8f78,
168 0x9188, 0x81a9, 0xb1ca, 0xa1eb, 0xd10c, 0xc12d, 0xf14e, 0xe16f,
169 0x1080, 0x00a1, 0x30c2, 0x20e3, 0x5004, 0x4025, 0x7046, 0x6067,
170 0x83b9, 0x9398, 0xa3fb, 0xb3da, 0xc33d, 0xd31c, 0xe37f, 0xf35e,
171 0x02b1, 0x1290, 0x22f3, 0x32d2, 0x4235, 0x5214, 0x6277, 0x7256,
172 0xb5ea, 0xa5cb, 0x95a8, 0x8589, 0xf56e, 0xe54f, 0xd52c, 0xc50d,
173 0x34e2, 0x24c3, 0x14a0, 0x0481, 0x7466, 0x6447, 0x5424, 0x4405,
174 0xa7db, 0xb7fa, 0x8799, 0x97b8, 0xe75f, 0xf77e, 0xc71d, 0xd73c,
175 0x26d3, 0x36f2, 0x0691, 0x16b0, 0x6657, 0x7676, 0x4615, 0x5634,
176 0xd94c, 0xc96d, 0xf90e, 0xe92f, 0x99c8, 0x89e9, 0xb98a, 0xa9ab,
177 0x5844, 0x4865, 0x7806, 0x6827, 0x18c0, 0x08e1, 0x3882, 0x28a3,
178 0xcb7d, 0xdb5c, 0xeb3f, 0xfb1e, 0x8bf9, 0x9bd8, 0xabbb, 0xbb9a,
179 0x4a75, 0x5a54, 0x6a37, 0x7a16, 0x0af1, 0x1ad0, 0x2ab3, 0x3a92,
180 0xfd2e, 0xed0f, 0xdd6c, 0xcd4d, 0xbdaa, 0xad8b, 0x9de8, 0x8dc9,
181 0x7c26, 0x6c07, 0x5c64, 0x4c45, 0x3ca2, 0x2c83, 0x1ce0, 0x0cc1,
182 0xef1f, 0xff3e, 0xcf5d, 0xdf7c, 0xaf9b, 0xbfba, 0x8fd9, 0x9ff8,
183 0x6e17, 0x7e36, 0x4e55, 0x5e74, 0x2e93, 0x3eb2, 0x0ed1, 0x1ef0,
Jack Jansen72781191995-08-07 14:34:15 +0000184};
185
Antoine Pitrou08316762011-12-20 13:58:41 +0100186static int
187ascii_buffer_converter(PyObject *arg, Py_buffer *buf)
188{
189 if (arg == NULL) {
190 PyBuffer_Release(buf);
191 return 1;
192 }
193 if (PyUnicode_Check(arg)) {
194 if (PyUnicode_READY(arg) < 0)
195 return 0;
196 if (!PyUnicode_IS_ASCII(arg)) {
197 PyErr_SetString(PyExc_ValueError,
198 "string argument should contain only ASCII characters");
199 return 0;
200 }
201 assert(PyUnicode_KIND(arg) == PyUnicode_1BYTE_KIND);
202 buf->buf = (void *) PyUnicode_1BYTE_DATA(arg);
203 buf->len = PyUnicode_GET_LENGTH(arg);
204 buf->obj = NULL;
205 return 1;
206 }
207 if (PyObject_GetBuffer(arg, buf, PyBUF_SIMPLE) != 0) {
208 PyErr_Format(PyExc_TypeError,
209 "argument should be bytes, buffer or ASCII string, "
210 "not %R", Py_TYPE(arg));
211 return 0;
212 }
213 if (!PyBuffer_IsContiguous(buf, 'C')) {
214 PyErr_Format(PyExc_TypeError,
215 "argument should be a contiguous buffer, "
216 "not %R", Py_TYPE(arg));
217 PyBuffer_Release(buf);
218 return 0;
219 }
220 return Py_CLEANUP_SUPPORTED;
221}
222
223
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000224PyDoc_STRVAR(doc_a2b_uu, "(ascii) -> bin. Decode a line of uuencoded data");
Jack Jansen72781191995-08-07 14:34:15 +0000225
226static PyObject *
Peter Schneider-Kampa788a7f2000-07-10 09:57:19 +0000227binascii_a2b_uu(PyObject *self, PyObject *args)
Jack Jansen72781191995-08-07 14:34:15 +0000228{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000229 Py_buffer pascii;
230 unsigned char *ascii_data, *bin_data;
231 int leftbits = 0;
232 unsigned char this_ch;
233 unsigned int leftchar = 0;
234 PyObject *rv;
235 Py_ssize_t ascii_len, bin_len;
Tim Peters934c1a12002-07-02 22:24:50 +0000236
Antoine Pitrou08316762011-12-20 13:58:41 +0100237 if ( !PyArg_ParseTuple(args, "O&:a2b_uu", ascii_buffer_converter, &pascii) )
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000238 return NULL;
239 ascii_data = pascii.buf;
240 ascii_len = pascii.len;
Jack Jansen72781191995-08-07 14:34:15 +0000241
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000242 assert(ascii_len >= 0);
Amaury Forgeot d'Arc9c74b142008-06-18 00:47:36 +0000243
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000244 /* First byte: binary data length (in bytes) */
245 bin_len = (*ascii_data++ - ' ') & 077;
246 ascii_len--;
Jack Jansen72781191995-08-07 14:34:15 +0000247
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000248 /* Allocate the buffer */
249 if ( (rv=PyBytes_FromStringAndSize(NULL, bin_len)) == NULL ) {
250 PyBuffer_Release(&pascii);
251 return NULL;
252 }
253 bin_data = (unsigned char *)PyBytes_AS_STRING(rv);
Tim Peters934c1a12002-07-02 22:24:50 +0000254
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000255 for( ; bin_len > 0 ; ascii_len--, ascii_data++ ) {
256 /* XXX is it really best to add NULs if there's no more data */
257 this_ch = (ascii_len > 0) ? *ascii_data : 0;
258 if ( this_ch == '\n' || this_ch == '\r' || ascii_len <= 0) {
259 /*
260 ** Whitespace. Assume some spaces got eaten at
261 ** end-of-line. (We check this later)
262 */
263 this_ch = 0;
264 } else {
265 /* Check the character for legality
266 ** The 64 in stead of the expected 63 is because
267 ** there are a few uuencodes out there that use
268 ** '`' as zero instead of space.
269 */
270 if ( this_ch < ' ' || this_ch > (' ' + 64)) {
271 PyErr_SetString(Error, "Illegal char");
272 PyBuffer_Release(&pascii);
273 Py_DECREF(rv);
274 return NULL;
275 }
276 this_ch = (this_ch - ' ') & 077;
277 }
278 /*
279 ** Shift it in on the low end, and see if there's
280 ** a byte ready for output.
281 */
282 leftchar = (leftchar << 6) | (this_ch);
283 leftbits += 6;
284 if ( leftbits >= 8 ) {
285 leftbits -= 8;
286 *bin_data++ = (leftchar >> leftbits) & 0xff;
287 leftchar &= ((1 << leftbits) - 1);
288 bin_len--;
289 }
290 }
291 /*
292 ** Finally, check that if there's anything left on the line
293 ** that it's whitespace only.
294 */
295 while( ascii_len-- > 0 ) {
296 this_ch = *ascii_data++;
297 /* Extra '`' may be written as padding in some cases */
298 if ( this_ch != ' ' && this_ch != ' '+64 &&
299 this_ch != '\n' && this_ch != '\r' ) {
300 PyErr_SetString(Error, "Trailing garbage");
301 PyBuffer_Release(&pascii);
302 Py_DECREF(rv);
303 return NULL;
304 }
305 }
306 PyBuffer_Release(&pascii);
307 return rv;
Jack Jansen72781191995-08-07 14:34:15 +0000308}
309
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000310PyDoc_STRVAR(doc_b2a_uu, "(bin) -> ascii. Uuencode line of data");
Tim Peters934c1a12002-07-02 22:24:50 +0000311
Jack Jansen72781191995-08-07 14:34:15 +0000312static PyObject *
Peter Schneider-Kampa788a7f2000-07-10 09:57:19 +0000313binascii_b2a_uu(PyObject *self, PyObject *args)
Jack Jansen72781191995-08-07 14:34:15 +0000314{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000315 Py_buffer pbin;
316 unsigned char *ascii_data, *bin_data;
317 int leftbits = 0;
318 unsigned char this_ch;
319 unsigned int leftchar = 0;
320 PyObject *rv;
321 Py_ssize_t bin_len;
Tim Peters934c1a12002-07-02 22:24:50 +0000322
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000323 if ( !PyArg_ParseTuple(args, "y*:b2a_uu", &pbin) )
324 return NULL;
325 bin_data = pbin.buf;
326 bin_len = pbin.len;
327 if ( bin_len > 45 ) {
328 /* The 45 is a limit that appears in all uuencode's */
329 PyErr_SetString(Error, "At most 45 bytes at once");
330 PyBuffer_Release(&pbin);
331 return NULL;
332 }
Jack Jansen72781191995-08-07 14:34:15 +0000333
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000334 /* We're lazy and allocate to much (fixed up later) */
335 if ( (rv=PyBytes_FromStringAndSize(NULL, 2 + (bin_len+2)/3*4)) == NULL ) {
336 PyBuffer_Release(&pbin);
337 return NULL;
338 }
339 ascii_data = (unsigned char *)PyBytes_AS_STRING(rv);
Jack Jansen72781191995-08-07 14:34:15 +0000340
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000341 /* Store the length */
342 *ascii_data++ = ' ' + (bin_len & 077);
Tim Peters934c1a12002-07-02 22:24:50 +0000343
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000344 for( ; bin_len > 0 || leftbits != 0 ; bin_len--, bin_data++ ) {
345 /* Shift the data (or padding) into our buffer */
346 if ( bin_len > 0 ) /* Data */
347 leftchar = (leftchar << 8) | *bin_data;
348 else /* Padding */
349 leftchar <<= 8;
350 leftbits += 8;
Jack Jansen72781191995-08-07 14:34:15 +0000351
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000352 /* See if there are 6-bit groups ready */
353 while ( leftbits >= 6 ) {
354 this_ch = (leftchar >> (leftbits-6)) & 0x3f;
355 leftbits -= 6;
356 *ascii_data++ = this_ch + ' ';
357 }
358 }
359 *ascii_data++ = '\n'; /* Append a courtesy newline */
Tim Peters934c1a12002-07-02 22:24:50 +0000360
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000361 if (_PyBytes_Resize(&rv,
362 (ascii_data -
363 (unsigned char *)PyBytes_AS_STRING(rv))) < 0) {
364 Py_DECREF(rv);
365 rv = NULL;
366 }
367 PyBuffer_Release(&pbin);
368 return rv;
Jack Jansen72781191995-08-07 14:34:15 +0000369}
370
Guido van Rossum2db4f471999-10-19 19:05:14 +0000371
372static int
Thomas Woutersf98db652006-03-01 21:37:32 +0000373binascii_find_valid(unsigned char *s, Py_ssize_t slen, int num)
Guido van Rossum2db4f471999-10-19 19:05:14 +0000374{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000375 /* Finds & returns the (num+1)th
376 ** valid character for base64, or -1 if none.
377 */
Guido van Rossum2db4f471999-10-19 19:05:14 +0000378
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000379 int ret = -1;
380 unsigned char c, b64val;
Guido van Rossum2db4f471999-10-19 19:05:14 +0000381
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000382 while ((slen > 0) && (ret == -1)) {
383 c = *s;
384 b64val = table_a2b_base64[c & 0x7f];
385 if ( ((c <= 0x7f) && (b64val != (unsigned char)-1)) ) {
386 if (num == 0)
387 ret = *s;
388 num--;
389 }
Guido van Rossum2db4f471999-10-19 19:05:14 +0000390
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000391 s++;
392 slen--;
393 }
394 return ret;
Guido van Rossum2db4f471999-10-19 19:05:14 +0000395}
396
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000397PyDoc_STRVAR(doc_a2b_base64, "(ascii) -> bin. Decode a line of base64 data");
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000398
399static PyObject *
Peter Schneider-Kampa788a7f2000-07-10 09:57:19 +0000400binascii_a2b_base64(PyObject *self, PyObject *args)
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000401{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000402 Py_buffer pascii;
403 unsigned char *ascii_data, *bin_data;
404 int leftbits = 0;
405 unsigned char this_ch;
406 unsigned int leftchar = 0;
407 PyObject *rv;
408 Py_ssize_t ascii_len, bin_len;
409 int quad_pos = 0;
Tim Peters934c1a12002-07-02 22:24:50 +0000410
Antoine Pitrou08316762011-12-20 13:58:41 +0100411 if ( !PyArg_ParseTuple(args, "O&:a2b_base64", ascii_buffer_converter, &pascii) )
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000412 return NULL;
413 ascii_data = pascii.buf;
414 ascii_len = pascii.len;
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000415
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000416 assert(ascii_len >= 0);
Amaury Forgeot d'Arc9c74b142008-06-18 00:47:36 +0000417
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000418 if (ascii_len > PY_SSIZE_T_MAX - 3) {
419 PyBuffer_Release(&pascii);
420 return PyErr_NoMemory();
421 }
Amaury Forgeot d'Arc9c74b142008-06-18 00:47:36 +0000422
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000423 bin_len = ((ascii_len+3)/4)*3; /* Upper bound, corrected later */
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000424
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000425 /* Allocate the buffer */
426 if ( (rv=PyBytes_FromStringAndSize(NULL, bin_len)) == NULL ) {
427 PyBuffer_Release(&pascii);
428 return NULL;
429 }
430 bin_data = (unsigned char *)PyBytes_AS_STRING(rv);
431 bin_len = 0;
Guido van Rossum2db4f471999-10-19 19:05:14 +0000432
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000433 for( ; ascii_len > 0; ascii_len--, ascii_data++) {
434 this_ch = *ascii_data;
Guido van Rossum2db4f471999-10-19 19:05:14 +0000435
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000436 if (this_ch > 0x7f ||
437 this_ch == '\r' || this_ch == '\n' || this_ch == ' ')
438 continue;
Guido van Rossum2db4f471999-10-19 19:05:14 +0000439
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000440 /* Check for pad sequences and ignore
441 ** the invalid ones.
442 */
443 if (this_ch == BASE64_PAD) {
444 if ( (quad_pos < 2) ||
445 ((quad_pos == 2) &&
446 (binascii_find_valid(ascii_data, ascii_len, 1)
447 != BASE64_PAD)) )
448 {
449 continue;
450 }
451 else {
452 /* A pad sequence means no more input.
453 ** We've already interpreted the data
454 ** from the quad at this point.
455 */
456 leftbits = 0;
457 break;
458 }
459 }
Guido van Rossum2db4f471999-10-19 19:05:14 +0000460
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000461 this_ch = table_a2b_base64[*ascii_data];
462 if ( this_ch == (unsigned char) -1 )
463 continue;
Guido van Rossum2db4f471999-10-19 19:05:14 +0000464
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000465 /*
466 ** Shift it in on the low end, and see if there's
467 ** a byte ready for output.
468 */
469 quad_pos = (quad_pos + 1) & 0x03;
470 leftchar = (leftchar << 6) | (this_ch);
471 leftbits += 6;
Guido van Rossum2db4f471999-10-19 19:05:14 +0000472
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000473 if ( leftbits >= 8 ) {
474 leftbits -= 8;
475 *bin_data++ = (leftchar >> leftbits) & 0xff;
476 bin_len++;
477 leftchar &= ((1 << leftbits) - 1);
478 }
479 }
Guido van Rossum2db4f471999-10-19 19:05:14 +0000480
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000481 if (leftbits != 0) {
482 PyBuffer_Release(&pascii);
483 PyErr_SetString(Error, "Incorrect padding");
484 Py_DECREF(rv);
485 return NULL;
486 }
Guido van Rossum2db4f471999-10-19 19:05:14 +0000487
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000488 /* And set string size correctly. If the result string is empty
489 ** (because the input was all invalid) return the shared empty
490 ** string instead; _PyBytes_Resize() won't do this for us.
491 */
492 if (bin_len > 0) {
493 if (_PyBytes_Resize(&rv, bin_len) < 0) {
494 Py_DECREF(rv);
495 rv = NULL;
496 }
497 }
498 else {
499 Py_DECREF(rv);
500 rv = PyBytes_FromStringAndSize("", 0);
501 }
502 PyBuffer_Release(&pascii);
503 return rv;
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000504}
505
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000506PyDoc_STRVAR(doc_b2a_base64, "(bin) -> ascii. Base64-code line of data");
Tim Peters934c1a12002-07-02 22:24:50 +0000507
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000508static PyObject *
Peter Schneider-Kampa788a7f2000-07-10 09:57:19 +0000509binascii_b2a_base64(PyObject *self, PyObject *args)
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000510{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000511 Py_buffer pbuf;
512 unsigned char *ascii_data, *bin_data;
513 int leftbits = 0;
514 unsigned char this_ch;
515 unsigned int leftchar = 0;
516 PyObject *rv;
517 Py_ssize_t bin_len;
Tim Peters934c1a12002-07-02 22:24:50 +0000518
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000519 if ( !PyArg_ParseTuple(args, "y*:b2a_base64", &pbuf) )
520 return NULL;
521 bin_data = pbuf.buf;
522 bin_len = pbuf.len;
Amaury Forgeot d'Arc9c74b142008-06-18 00:47:36 +0000523
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000524 assert(bin_len >= 0);
Amaury Forgeot d'Arc9c74b142008-06-18 00:47:36 +0000525
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000526 if ( bin_len > BASE64_MAXBIN ) {
527 PyErr_SetString(Error, "Too much data for base64 line");
528 PyBuffer_Release(&pbuf);
529 return NULL;
530 }
Tim Peters934c1a12002-07-02 22:24:50 +0000531
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000532 /* We're lazy and allocate too much (fixed up later).
533 "+3" leaves room for up to two pad characters and a trailing
534 newline. Note that 'b' gets encoded as 'Yg==\n' (1 in, 5 out). */
535 if ( (rv=PyBytes_FromStringAndSize(NULL, bin_len*2 + 3)) == NULL ) {
536 PyBuffer_Release(&pbuf);
537 return NULL;
538 }
539 ascii_data = (unsigned char *)PyBytes_AS_STRING(rv);
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000540
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000541 for( ; bin_len > 0 ; bin_len--, bin_data++ ) {
542 /* Shift the data into our buffer */
543 leftchar = (leftchar << 8) | *bin_data;
544 leftbits += 8;
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000545
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000546 /* See if there are 6-bit groups ready */
547 while ( leftbits >= 6 ) {
548 this_ch = (leftchar >> (leftbits-6)) & 0x3f;
549 leftbits -= 6;
550 *ascii_data++ = table_b2a_base64[this_ch];
551 }
552 }
553 if ( leftbits == 2 ) {
554 *ascii_data++ = table_b2a_base64[(leftchar&3) << 4];
555 *ascii_data++ = BASE64_PAD;
556 *ascii_data++ = BASE64_PAD;
557 } else if ( leftbits == 4 ) {
558 *ascii_data++ = table_b2a_base64[(leftchar&0xf) << 2];
559 *ascii_data++ = BASE64_PAD;
560 }
561 *ascii_data++ = '\n'; /* Append a courtesy newline */
Tim Peters934c1a12002-07-02 22:24:50 +0000562
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000563 if (_PyBytes_Resize(&rv,
564 (ascii_data -
565 (unsigned char *)PyBytes_AS_STRING(rv))) < 0) {
566 Py_DECREF(rv);
567 rv = NULL;
568 }
569 PyBuffer_Release(&pbuf);
570 return rv;
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000571}
572
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000573PyDoc_STRVAR(doc_a2b_hqx, "ascii -> bin, done. Decode .hqx coding");
Jack Jansen72781191995-08-07 14:34:15 +0000574
575static PyObject *
Peter Schneider-Kampa788a7f2000-07-10 09:57:19 +0000576binascii_a2b_hqx(PyObject *self, PyObject *args)
Jack Jansen72781191995-08-07 14:34:15 +0000577{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000578 Py_buffer pascii;
579 unsigned char *ascii_data, *bin_data;
580 int leftbits = 0;
581 unsigned char this_ch;
582 unsigned int leftchar = 0;
583 PyObject *rv;
584 Py_ssize_t len;
585 int done = 0;
Tim Peters934c1a12002-07-02 22:24:50 +0000586
Antoine Pitrou08316762011-12-20 13:58:41 +0100587 if ( !PyArg_ParseTuple(args, "O&:a2b_hqx", ascii_buffer_converter, &pascii) )
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000588 return NULL;
589 ascii_data = pascii.buf;
590 len = pascii.len;
Jack Jansen72781191995-08-07 14:34:15 +0000591
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000592 assert(len >= 0);
Amaury Forgeot d'Arc9c74b142008-06-18 00:47:36 +0000593
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000594 if (len > PY_SSIZE_T_MAX - 2) {
595 PyBuffer_Release(&pascii);
596 return PyErr_NoMemory();
597 }
Amaury Forgeot d'Arc9c74b142008-06-18 00:47:36 +0000598
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000599 /* Allocate a string that is too big (fixed later)
600 Add two to the initial length to prevent interning which
601 would preclude subsequent resizing. */
602 if ( (rv=PyBytes_FromStringAndSize(NULL, len+2)) == NULL ) {
603 PyBuffer_Release(&pascii);
604 return NULL;
605 }
606 bin_data = (unsigned char *)PyBytes_AS_STRING(rv);
Jack Jansen72781191995-08-07 14:34:15 +0000607
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000608 for( ; len > 0 ; len--, ascii_data++ ) {
609 /* Get the byte and look it up */
610 this_ch = table_a2b_hqx[*ascii_data];
611 if ( this_ch == SKIP )
612 continue;
613 if ( this_ch == FAIL ) {
614 PyErr_SetString(Error, "Illegal char");
615 PyBuffer_Release(&pascii);
616 Py_DECREF(rv);
617 return NULL;
618 }
619 if ( this_ch == DONE ) {
620 /* The terminating colon */
621 done = 1;
622 break;
623 }
Jack Jansen72781191995-08-07 14:34:15 +0000624
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000625 /* Shift it into the buffer and see if any bytes are ready */
626 leftchar = (leftchar << 6) | (this_ch);
627 leftbits += 6;
628 if ( leftbits >= 8 ) {
629 leftbits -= 8;
630 *bin_data++ = (leftchar >> leftbits) & 0xff;
631 leftchar &= ((1 << leftbits) - 1);
632 }
633 }
Tim Peters934c1a12002-07-02 22:24:50 +0000634
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000635 if ( leftbits && !done ) {
636 PyErr_SetString(Incomplete,
637 "String has incomplete number of bytes");
638 PyBuffer_Release(&pascii);
639 Py_DECREF(rv);
640 return NULL;
641 }
642 if (_PyBytes_Resize(&rv,
643 (bin_data -
644 (unsigned char *)PyBytes_AS_STRING(rv))) < 0) {
645 Py_DECREF(rv);
646 rv = NULL;
647 }
648 if (rv) {
649 PyObject *rrv = Py_BuildValue("Oi", rv, done);
650 PyBuffer_Release(&pascii);
651 Py_DECREF(rv);
652 return rrv;
653 }
Roger E. Masse5f4ce181997-01-16 17:10:22 +0000654
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000655 PyBuffer_Release(&pascii);
656 return NULL;
Jack Jansen72781191995-08-07 14:34:15 +0000657}
658
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000659PyDoc_STRVAR(doc_rlecode_hqx, "Binhex RLE-code binary data");
Jack Jansen72781191995-08-07 14:34:15 +0000660
661static PyObject *
Peter Schneider-Kampa788a7f2000-07-10 09:57:19 +0000662binascii_rlecode_hqx(PyObject *self, PyObject *args)
Jack Jansen72781191995-08-07 14:34:15 +0000663{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000664 Py_buffer pbuf;
665 unsigned char *in_data, *out_data;
666 PyObject *rv;
667 unsigned char ch;
668 Py_ssize_t in, inend, len;
Tim Peters934c1a12002-07-02 22:24:50 +0000669
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000670 if ( !PyArg_ParseTuple(args, "y*:rlecode_hqx", &pbuf) )
671 return NULL;
672 in_data = pbuf.buf;
673 len = pbuf.len;
Jack Jansen72781191995-08-07 14:34:15 +0000674
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000675 assert(len >= 0);
Amaury Forgeot d'Arc9c74b142008-06-18 00:47:36 +0000676
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000677 if (len > PY_SSIZE_T_MAX / 2 - 2) {
678 PyBuffer_Release(&pbuf);
679 return PyErr_NoMemory();
680 }
Amaury Forgeot d'Arc9c74b142008-06-18 00:47:36 +0000681
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000682 /* Worst case: output is twice as big as input (fixed later) */
683 if ( (rv=PyBytes_FromStringAndSize(NULL, len*2+2)) == NULL ) {
684 PyBuffer_Release(&pbuf);
685 return NULL;
686 }
687 out_data = (unsigned char *)PyBytes_AS_STRING(rv);
Tim Peters934c1a12002-07-02 22:24:50 +0000688
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000689 for( in=0; in<len; in++) {
690 ch = in_data[in];
691 if ( ch == RUNCHAR ) {
692 /* RUNCHAR. Escape it. */
693 *out_data++ = RUNCHAR;
694 *out_data++ = 0;
695 } else {
696 /* Check how many following are the same */
697 for(inend=in+1;
698 inend<len && in_data[inend] == ch &&
699 inend < in+255;
700 inend++) ;
701 if ( inend - in > 3 ) {
702 /* More than 3 in a row. Output RLE. */
703 *out_data++ = ch;
704 *out_data++ = RUNCHAR;
Antoine Pitrou40455752010-08-15 18:51:10 +0000705 *out_data++ = (unsigned char) (inend-in);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000706 in = inend-1;
707 } else {
708 /* Less than 3. Output the byte itself */
709 *out_data++ = ch;
710 }
711 }
712 }
713 if (_PyBytes_Resize(&rv,
714 (out_data -
715 (unsigned char *)PyBytes_AS_STRING(rv))) < 0) {
716 Py_DECREF(rv);
717 rv = NULL;
718 }
719 PyBuffer_Release(&pbuf);
720 return rv;
Jack Jansen72781191995-08-07 14:34:15 +0000721}
722
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000723PyDoc_STRVAR(doc_b2a_hqx, "Encode .hqx data");
Tim Peters934c1a12002-07-02 22:24:50 +0000724
Jack Jansen72781191995-08-07 14:34:15 +0000725static PyObject *
Peter Schneider-Kampa788a7f2000-07-10 09:57:19 +0000726binascii_b2a_hqx(PyObject *self, PyObject *args)
Jack Jansen72781191995-08-07 14:34:15 +0000727{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000728 Py_buffer pbin;
729 unsigned char *ascii_data, *bin_data;
730 int leftbits = 0;
731 unsigned char this_ch;
732 unsigned int leftchar = 0;
733 PyObject *rv;
734 Py_ssize_t len;
Tim Peters934c1a12002-07-02 22:24:50 +0000735
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000736 if ( !PyArg_ParseTuple(args, "y*:b2a_hqx", &pbin) )
737 return NULL;
738 bin_data = pbin.buf;
739 len = pbin.len;
Jack Jansen72781191995-08-07 14:34:15 +0000740
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000741 assert(len >= 0);
Amaury Forgeot d'Arc9c74b142008-06-18 00:47:36 +0000742
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000743 if (len > PY_SSIZE_T_MAX / 2 - 2) {
744 PyBuffer_Release(&pbin);
745 return PyErr_NoMemory();
746 }
Amaury Forgeot d'Arc9c74b142008-06-18 00:47:36 +0000747
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000748 /* Allocate a buffer that is at least large enough */
749 if ( (rv=PyBytes_FromStringAndSize(NULL, len*2+2)) == NULL ) {
750 PyBuffer_Release(&pbin);
751 return NULL;
752 }
753 ascii_data = (unsigned char *)PyBytes_AS_STRING(rv);
Tim Peters934c1a12002-07-02 22:24:50 +0000754
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000755 for( ; len > 0 ; len--, bin_data++ ) {
756 /* Shift into our buffer, and output any 6bits ready */
757 leftchar = (leftchar << 8) | *bin_data;
758 leftbits += 8;
759 while ( leftbits >= 6 ) {
760 this_ch = (leftchar >> (leftbits-6)) & 0x3f;
761 leftbits -= 6;
762 *ascii_data++ = table_b2a_hqx[this_ch];
763 }
764 }
765 /* Output a possible runt byte */
766 if ( leftbits ) {
767 leftchar <<= (6-leftbits);
768 *ascii_data++ = table_b2a_hqx[leftchar & 0x3f];
769 }
770 if (_PyBytes_Resize(&rv,
771 (ascii_data -
772 (unsigned char *)PyBytes_AS_STRING(rv))) < 0) {
773 Py_DECREF(rv);
774 rv = NULL;
775 }
776 PyBuffer_Release(&pbin);
777 return rv;
Jack Jansen72781191995-08-07 14:34:15 +0000778}
779
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000780PyDoc_STRVAR(doc_rledecode_hqx, "Decode hexbin RLE-coded string");
Tim Peters934c1a12002-07-02 22:24:50 +0000781
Jack Jansen72781191995-08-07 14:34:15 +0000782static PyObject *
Peter Schneider-Kampa788a7f2000-07-10 09:57:19 +0000783binascii_rledecode_hqx(PyObject *self, PyObject *args)
Jack Jansen72781191995-08-07 14:34:15 +0000784{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000785 Py_buffer pin;
786 unsigned char *in_data, *out_data;
787 unsigned char in_byte, in_repeat;
788 PyObject *rv;
789 Py_ssize_t in_len, out_len, out_len_left;
Jack Jansen72781191995-08-07 14:34:15 +0000790
Florent Xiclunaf1046ca2010-07-27 21:20:15 +0000791 if ( !PyArg_ParseTuple(args, "y*:rledecode_hqx", &pin) )
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000792 return NULL;
793 in_data = pin.buf;
794 in_len = pin.len;
Jack Jansen72781191995-08-07 14:34:15 +0000795
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000796 assert(in_len >= 0);
Amaury Forgeot d'Arc9c74b142008-06-18 00:47:36 +0000797
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000798 /* Empty string is a special case */
799 if ( in_len == 0 ) {
800 PyBuffer_Release(&pin);
801 return PyBytes_FromStringAndSize("", 0);
802 }
803 else if (in_len > PY_SSIZE_T_MAX / 2) {
804 PyBuffer_Release(&pin);
805 return PyErr_NoMemory();
806 }
Jack Jansen72781191995-08-07 14:34:15 +0000807
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000808 /* Allocate a buffer of reasonable size. Resized when needed */
809 out_len = in_len*2;
810 if ( (rv=PyBytes_FromStringAndSize(NULL, out_len)) == NULL ) {
811 PyBuffer_Release(&pin);
812 return NULL;
813 }
814 out_len_left = out_len;
815 out_data = (unsigned char *)PyBytes_AS_STRING(rv);
Jack Jansen72781191995-08-07 14:34:15 +0000816
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000817 /*
818 ** We need two macros here to get/put bytes and handle
819 ** end-of-buffer for input and output strings.
820 */
Jack Jansen72781191995-08-07 14:34:15 +0000821#define INBYTE(b) \
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000822 do { \
823 if ( --in_len < 0 ) { \
824 PyErr_SetString(Incomplete, ""); \
825 Py_DECREF(rv); \
826 PyBuffer_Release(&pin); \
827 return NULL; \
828 } \
829 b = *in_data++; \
830 } while(0)
Tim Peters934c1a12002-07-02 22:24:50 +0000831
Jack Jansen72781191995-08-07 14:34:15 +0000832#define OUTBYTE(b) \
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000833 do { \
834 if ( --out_len_left < 0 ) { \
835 if ( out_len > PY_SSIZE_T_MAX / 2) return PyErr_NoMemory(); \
836 if (_PyBytes_Resize(&rv, 2*out_len) < 0) \
837 { Py_DECREF(rv); PyBuffer_Release(&pin); return NULL; } \
838 out_data = (unsigned char *)PyBytes_AS_STRING(rv) \
839 + out_len; \
840 out_len_left = out_len-1; \
841 out_len = out_len * 2; \
842 } \
843 *out_data++ = b; \
844 } while(0)
Jack Jansen72781191995-08-07 14:34:15 +0000845
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000846 /*
847 ** Handle first byte separately (since we have to get angry
848 ** in case of an orphaned RLE code).
849 */
850 INBYTE(in_byte);
Jack Jansen72781191995-08-07 14:34:15 +0000851
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000852 if (in_byte == RUNCHAR) {
853 INBYTE(in_repeat);
854 if (in_repeat != 0) {
855 /* Note Error, not Incomplete (which is at the end
856 ** of the string only). This is a programmer error.
857 */
858 PyErr_SetString(Error, "Orphaned RLE code at start");
859 PyBuffer_Release(&pin);
860 Py_DECREF(rv);
861 return NULL;
862 }
863 OUTBYTE(RUNCHAR);
864 } else {
865 OUTBYTE(in_byte);
866 }
Tim Peters934c1a12002-07-02 22:24:50 +0000867
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000868 while( in_len > 0 ) {
869 INBYTE(in_byte);
Jack Jansen72781191995-08-07 14:34:15 +0000870
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000871 if (in_byte == RUNCHAR) {
872 INBYTE(in_repeat);
873 if ( in_repeat == 0 ) {
874 /* Just an escaped RUNCHAR value */
875 OUTBYTE(RUNCHAR);
876 } else {
877 /* Pick up value and output a sequence of it */
878 in_byte = out_data[-1];
879 while ( --in_repeat > 0 )
880 OUTBYTE(in_byte);
881 }
882 } else {
883 /* Normal byte */
884 OUTBYTE(in_byte);
885 }
886 }
887 if (_PyBytes_Resize(&rv,
888 (out_data -
889 (unsigned char *)PyBytes_AS_STRING(rv))) < 0) {
890 Py_DECREF(rv);
891 rv = NULL;
892 }
893 PyBuffer_Release(&pin);
894 return rv;
Jack Jansen72781191995-08-07 14:34:15 +0000895}
896
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000897PyDoc_STRVAR(doc_crc_hqx,
898"(data, oldcrc) -> newcrc. Compute hqx CRC incrementally");
Jack Jansen72781191995-08-07 14:34:15 +0000899
900static PyObject *
Peter Schneider-Kampa788a7f2000-07-10 09:57:19 +0000901binascii_crc_hqx(PyObject *self, PyObject *args)
Jack Jansen72781191995-08-07 14:34:15 +0000902{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000903 Py_buffer pin;
904 unsigned char *bin_data;
905 unsigned int crc;
906 Py_ssize_t len;
Tim Peters934c1a12002-07-02 22:24:50 +0000907
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000908 if ( !PyArg_ParseTuple(args, "y*i:crc_hqx", &pin, &crc) )
909 return NULL;
910 bin_data = pin.buf;
911 len = pin.len;
Jack Jansen72781191995-08-07 14:34:15 +0000912
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000913 while(len-- > 0) {
914 crc=((crc<<8)&0xff00)^crctab_hqx[((crc>>8)&0xff)^*bin_data++];
915 }
Jack Jansen72781191995-08-07 14:34:15 +0000916
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000917 PyBuffer_Release(&pin);
918 return Py_BuildValue("i", crc);
Jack Jansen72781191995-08-07 14:34:15 +0000919}
920
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000921PyDoc_STRVAR(doc_crc32,
922"(data, oldcrc = 0) -> newcrc. Compute CRC-32 incrementally");
Guido van Rossum7d47c9e2000-02-16 21:11:52 +0000923
Christian Heimes1dc54002008-03-24 02:19:29 +0000924#ifdef USE_ZLIB_CRC32
925/* This was taken from zlibmodule.c PyZlib_crc32 (but is PY_SSIZE_T_CLEAN) */
926static PyObject *
927binascii_crc32(PyObject *self, PyObject *args)
928{
Christian Heimescc47b052008-03-25 14:56:36 +0000929 unsigned int crc32val = 0; /* crc32(0L, Z_NULL, 0) */
Martin v. Löwis423be952008-08-13 15:53:07 +0000930 Py_buffer pbuf;
Christian Heimes1dc54002008-03-24 02:19:29 +0000931 Byte *buf;
Neal Norwitz4027bf82008-03-24 04:59:05 +0000932 Py_ssize_t len;
Christian Heimescc47b052008-03-25 14:56:36 +0000933 int signed_val;
934
Martin v. Löwis15b16a32008-12-02 06:00:15 +0000935 if (!PyArg_ParseTuple(args, "y*|I:crc32", &pbuf, &crc32val))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000936 return NULL;
Martin v. Löwis423be952008-08-13 15:53:07 +0000937 buf = (Byte*)pbuf.buf;
938 len = pbuf.len;
Christian Heimescc47b052008-03-25 14:56:36 +0000939 signed_val = crc32(crc32val, buf, len);
Martin v. Löwis423be952008-08-13 15:53:07 +0000940 PyBuffer_Release(&pbuf);
Christian Heimescc47b052008-03-25 14:56:36 +0000941 return PyLong_FromUnsignedLong(signed_val & 0xffffffffU);
Christian Heimes1dc54002008-03-24 02:19:29 +0000942}
943#else /* USE_ZLIB_CRC32 */
Guido van Rossum7d47c9e2000-02-16 21:11:52 +0000944/* Crc - 32 BIT ANSI X3.66 CRC checksum files
945 Also known as: ISO 3307
946**********************************************************************|
947* *|
948* Demonstration program to compute the 32-bit CRC used as the frame *|
949* check sequence in ADCCP (ANSI X3.66, also known as FIPS PUB 71 *|
950* and FED-STD-1003, the U.S. versions of CCITT's X.25 link-level *|
951* protocol). The 32-bit FCS was added via the Federal Register, *|
952* 1 June 1982, p.23798. I presume but don't know for certain that *|
953* this polynomial is or will be included in CCITT V.41, which *|
954* defines the 16-bit CRC (often called CRC-CCITT) polynomial. FIPS *|
955* PUB 78 says that the 32-bit FCS reduces otherwise undetected *|
956* errors by a factor of 10^-5 over 16-bit FCS. *|
957* *|
958**********************************************************************|
959
960 Copyright (C) 1986 Gary S. Brown. You may use this program, or
961 code or tables extracted from it, as desired without restriction.
Guido van Rossum7d47c9e2000-02-16 21:11:52 +0000962
Tim Peters934c1a12002-07-02 22:24:50 +0000963 First, the polynomial itself and its table of feedback terms. The
964 polynomial is
965 X^32+X^26+X^23+X^22+X^16+X^12+X^11+X^10+X^8+X^7+X^5+X^4+X^2+X^1+X^0
966 Note that we take it "backwards" and put the highest-order term in
967 the lowest-order bit. The X^32 term is "implied"; the LSB is the
968 X^31 term, etc. The X^0 term (usually shown as "+1") results in
969 the MSB being 1.
Guido van Rossum7d47c9e2000-02-16 21:11:52 +0000970
Tim Peters934c1a12002-07-02 22:24:50 +0000971 Note that the usual hardware shift register implementation, which
972 is what we're using (we're merely optimizing it by doing eight-bit
973 chunks at a time) shifts bits into the lowest-order term. In our
974 implementation, that means shifting towards the right. Why do we
975 do it this way? Because the calculated CRC must be transmitted in
976 order from highest-order term to lowest-order term. UARTs transmit
977 characters in order from LSB to MSB. By storing the CRC this way,
978 we hand it to the UART in the order low-byte to high-byte; the UART
979 sends each low-bit to hight-bit; and the result is transmission bit
980 by bit from highest- to lowest-order term without requiring any bit
981 shuffling on our part. Reception works similarly.
982
983 The feedback terms table consists of 256, 32-bit entries. Notes:
984
985 1. The table can be generated at runtime if desired; code to do so
986 is shown later. It might not be obvious, but the feedback
987 terms simply represent the results of eight shift/xor opera-
988 tions for all combinations of data and CRC register values.
989
990 2. The CRC accumulation logic is the same for all CRC polynomials,
991 be they sixteen or thirty-two bits wide. You simply choose the
992 appropriate table. Alternatively, because the table can be
993 generated at runtime, you can start by generating the table for
994 the polynomial in question and use exactly the same "updcrc",
995 if your application needn't simultaneously handle two CRC
996 polynomials. (Note, however, that XMODEM is strange.)
997
998 3. For 16-bit CRCs, the table entries need be only 16 bits wide;
999 of course, 32-bit entries work OK if the high 16 bits are zero.
1000
1001 4. The values must be right-shifted by eight bits by the "updcrc"
1002 logic; the shift must be unsigned (bring in zeroes). On some
1003 hardware you could probably optimize the shift in assembler by
1004 using byte-swap instructions.
Guido van Rossum7d47c9e2000-02-16 21:11:52 +00001005********************************************************************/
1006
Gregory P. Smith3c0e4d22008-03-25 07:51:12 +00001007static unsigned int crc_32_tab[256] = {
10080x00000000U, 0x77073096U, 0xee0e612cU, 0x990951baU, 0x076dc419U,
10090x706af48fU, 0xe963a535U, 0x9e6495a3U, 0x0edb8832U, 0x79dcb8a4U,
10100xe0d5e91eU, 0x97d2d988U, 0x09b64c2bU, 0x7eb17cbdU, 0xe7b82d07U,
10110x90bf1d91U, 0x1db71064U, 0x6ab020f2U, 0xf3b97148U, 0x84be41deU,
10120x1adad47dU, 0x6ddde4ebU, 0xf4d4b551U, 0x83d385c7U, 0x136c9856U,
10130x646ba8c0U, 0xfd62f97aU, 0x8a65c9ecU, 0x14015c4fU, 0x63066cd9U,
10140xfa0f3d63U, 0x8d080df5U, 0x3b6e20c8U, 0x4c69105eU, 0xd56041e4U,
10150xa2677172U, 0x3c03e4d1U, 0x4b04d447U, 0xd20d85fdU, 0xa50ab56bU,
10160x35b5a8faU, 0x42b2986cU, 0xdbbbc9d6U, 0xacbcf940U, 0x32d86ce3U,
10170x45df5c75U, 0xdcd60dcfU, 0xabd13d59U, 0x26d930acU, 0x51de003aU,
10180xc8d75180U, 0xbfd06116U, 0x21b4f4b5U, 0x56b3c423U, 0xcfba9599U,
10190xb8bda50fU, 0x2802b89eU, 0x5f058808U, 0xc60cd9b2U, 0xb10be924U,
10200x2f6f7c87U, 0x58684c11U, 0xc1611dabU, 0xb6662d3dU, 0x76dc4190U,
10210x01db7106U, 0x98d220bcU, 0xefd5102aU, 0x71b18589U, 0x06b6b51fU,
10220x9fbfe4a5U, 0xe8b8d433U, 0x7807c9a2U, 0x0f00f934U, 0x9609a88eU,
10230xe10e9818U, 0x7f6a0dbbU, 0x086d3d2dU, 0x91646c97U, 0xe6635c01U,
10240x6b6b51f4U, 0x1c6c6162U, 0x856530d8U, 0xf262004eU, 0x6c0695edU,
10250x1b01a57bU, 0x8208f4c1U, 0xf50fc457U, 0x65b0d9c6U, 0x12b7e950U,
10260x8bbeb8eaU, 0xfcb9887cU, 0x62dd1ddfU, 0x15da2d49U, 0x8cd37cf3U,
10270xfbd44c65U, 0x4db26158U, 0x3ab551ceU, 0xa3bc0074U, 0xd4bb30e2U,
10280x4adfa541U, 0x3dd895d7U, 0xa4d1c46dU, 0xd3d6f4fbU, 0x4369e96aU,
10290x346ed9fcU, 0xad678846U, 0xda60b8d0U, 0x44042d73U, 0x33031de5U,
10300xaa0a4c5fU, 0xdd0d7cc9U, 0x5005713cU, 0x270241aaU, 0xbe0b1010U,
10310xc90c2086U, 0x5768b525U, 0x206f85b3U, 0xb966d409U, 0xce61e49fU,
10320x5edef90eU, 0x29d9c998U, 0xb0d09822U, 0xc7d7a8b4U, 0x59b33d17U,
10330x2eb40d81U, 0xb7bd5c3bU, 0xc0ba6cadU, 0xedb88320U, 0x9abfb3b6U,
10340x03b6e20cU, 0x74b1d29aU, 0xead54739U, 0x9dd277afU, 0x04db2615U,
10350x73dc1683U, 0xe3630b12U, 0x94643b84U, 0x0d6d6a3eU, 0x7a6a5aa8U,
10360xe40ecf0bU, 0x9309ff9dU, 0x0a00ae27U, 0x7d079eb1U, 0xf00f9344U,
10370x8708a3d2U, 0x1e01f268U, 0x6906c2feU, 0xf762575dU, 0x806567cbU,
10380x196c3671U, 0x6e6b06e7U, 0xfed41b76U, 0x89d32be0U, 0x10da7a5aU,
10390x67dd4accU, 0xf9b9df6fU, 0x8ebeeff9U, 0x17b7be43U, 0x60b08ed5U,
10400xd6d6a3e8U, 0xa1d1937eU, 0x38d8c2c4U, 0x4fdff252U, 0xd1bb67f1U,
10410xa6bc5767U, 0x3fb506ddU, 0x48b2364bU, 0xd80d2bdaU, 0xaf0a1b4cU,
10420x36034af6U, 0x41047a60U, 0xdf60efc3U, 0xa867df55U, 0x316e8eefU,
10430x4669be79U, 0xcb61b38cU, 0xbc66831aU, 0x256fd2a0U, 0x5268e236U,
10440xcc0c7795U, 0xbb0b4703U, 0x220216b9U, 0x5505262fU, 0xc5ba3bbeU,
10450xb2bd0b28U, 0x2bb45a92U, 0x5cb36a04U, 0xc2d7ffa7U, 0xb5d0cf31U,
10460x2cd99e8bU, 0x5bdeae1dU, 0x9b64c2b0U, 0xec63f226U, 0x756aa39cU,
10470x026d930aU, 0x9c0906a9U, 0xeb0e363fU, 0x72076785U, 0x05005713U,
10480x95bf4a82U, 0xe2b87a14U, 0x7bb12baeU, 0x0cb61b38U, 0x92d28e9bU,
10490xe5d5be0dU, 0x7cdcefb7U, 0x0bdbdf21U, 0x86d3d2d4U, 0xf1d4e242U,
10500x68ddb3f8U, 0x1fda836eU, 0x81be16cdU, 0xf6b9265bU, 0x6fb077e1U,
10510x18b74777U, 0x88085ae6U, 0xff0f6a70U, 0x66063bcaU, 0x11010b5cU,
10520x8f659effU, 0xf862ae69U, 0x616bffd3U, 0x166ccf45U, 0xa00ae278U,
10530xd70dd2eeU, 0x4e048354U, 0x3903b3c2U, 0xa7672661U, 0xd06016f7U,
10540x4969474dU, 0x3e6e77dbU, 0xaed16a4aU, 0xd9d65adcU, 0x40df0b66U,
10550x37d83bf0U, 0xa9bcae53U, 0xdebb9ec5U, 0x47b2cf7fU, 0x30b5ffe9U,
10560xbdbdf21cU, 0xcabac28aU, 0x53b39330U, 0x24b4a3a6U, 0xbad03605U,
10570xcdd70693U, 0x54de5729U, 0x23d967bfU, 0xb3667a2eU, 0xc4614ab8U,
10580x5d681b02U, 0x2a6f2b94U, 0xb40bbe37U, 0xc30c8ea1U, 0x5a05df1bU,
10590x2d02ef8dU
Guido van Rossum7d47c9e2000-02-16 21:11:52 +00001060};
1061
1062static PyObject *
Peter Schneider-Kampa788a7f2000-07-10 09:57:19 +00001063binascii_crc32(PyObject *self, PyObject *args)
Guido van Rossum7d47c9e2000-02-16 21:11:52 +00001064{ /* By Jim Ahlstrom; All rights transferred to CNRI */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001065 Py_buffer pbin;
1066 unsigned char *bin_data;
1067 unsigned int crc = 0; /* initial value of CRC */
1068 Py_ssize_t len;
1069 unsigned int result;
Tim Peters934c1a12002-07-02 22:24:50 +00001070
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001071 if ( !PyArg_ParseTuple(args, "y*|I:crc32", &pbin, &crc) )
1072 return NULL;
1073 bin_data = pbin.buf;
1074 len = pbin.len;
Guido van Rossum7d47c9e2000-02-16 21:11:52 +00001075
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001076 crc = ~ crc;
1077 while (len-- > 0) {
1078 crc = crc_32_tab[(crc ^ *bin_data++) & 0xff] ^ (crc >> 8);
1079 /* Note: (crc >> 8) MUST zero fill on left */
1080 }
Tim Petersa98011c2002-07-02 20:20:08 +00001081
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001082 result = (crc ^ 0xFFFFFFFF);
1083 PyBuffer_Release(&pbin);
1084 return PyLong_FromUnsignedLong(result & 0xffffffff);
Guido van Rossum7d47c9e2000-02-16 21:11:52 +00001085}
Christian Heimes1dc54002008-03-24 02:19:29 +00001086#endif /* USE_ZLIB_CRC32 */
Guido van Rossum7d47c9e2000-02-16 21:11:52 +00001087
Barry Warsawe977c212000-08-15 06:07:13 +00001088
1089static PyObject *
1090binascii_hexlify(PyObject *self, PyObject *args)
1091{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001092 Py_buffer parg;
1093 char* argbuf;
1094 Py_ssize_t arglen;
1095 PyObject *retval;
1096 char* retbuf;
1097 Py_ssize_t i, j;
Barry Warsawe977c212000-08-15 06:07:13 +00001098
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001099 if (!PyArg_ParseTuple(args, "y*:b2a_hex", &parg))
1100 return NULL;
1101 argbuf = parg.buf;
1102 arglen = parg.len;
Barry Warsawe977c212000-08-15 06:07:13 +00001103
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001104 assert(arglen >= 0);
1105 if (arglen > PY_SSIZE_T_MAX / 2) {
1106 PyBuffer_Release(&parg);
1107 return PyErr_NoMemory();
1108 }
Amaury Forgeot d'Arc9c74b142008-06-18 00:47:36 +00001109
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001110 retval = PyBytes_FromStringAndSize(NULL, arglen*2);
1111 if (!retval) {
1112 PyBuffer_Release(&parg);
1113 return NULL;
1114 }
1115 retbuf = PyBytes_AS_STRING(retval);
Barry Warsawe977c212000-08-15 06:07:13 +00001116
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001117 /* make hex version of string, taken from shamodule.c */
1118 for (i=j=0; i < arglen; i++) {
Victor Stinnerf5cff562011-10-14 02:13:11 +02001119 unsigned char c;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001120 c = (argbuf[i] >> 4) & 0xf;
Victor Stinnerf5cff562011-10-14 02:13:11 +02001121 retbuf[j++] = Py_hexdigits[c];
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001122 c = argbuf[i] & 0xf;
Victor Stinnerf5cff562011-10-14 02:13:11 +02001123 retbuf[j++] = Py_hexdigits[c];
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001124 }
1125 PyBuffer_Release(&parg);
1126 return retval;
Barry Warsawe977c212000-08-15 06:07:13 +00001127}
1128
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001129PyDoc_STRVAR(doc_hexlify,
Barry Warsawe977c212000-08-15 06:07:13 +00001130"b2a_hex(data) -> s; Hexadecimal representation of binary data.\n\
1131\n\
R David Murray5fdb64b2013-11-03 13:21:38 -05001132The return value is a bytes object. This function is also\n\
1133available as \"hexlify()\".");
Barry Warsawe977c212000-08-15 06:07:13 +00001134
1135
1136static int
Tim Peters934c1a12002-07-02 22:24:50 +00001137to_int(int c)
Barry Warsawe977c212000-08-15 06:07:13 +00001138{
Antoine Pitrou4de74572013-02-09 23:11:27 +01001139 if (Py_ISDIGIT(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001140 return c - '0';
1141 else {
Antoine Pitroued8ba142011-10-04 13:50:21 +02001142 if (Py_ISUPPER(c))
1143 c = Py_TOLOWER(c);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001144 if (c >= 'a' && c <= 'f')
1145 return c - 'a' + 10;
1146 }
1147 return -1;
Barry Warsawe977c212000-08-15 06:07:13 +00001148}
1149
1150
1151static PyObject *
1152binascii_unhexlify(PyObject *self, PyObject *args)
1153{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001154 Py_buffer parg;
1155 char* argbuf;
1156 Py_ssize_t arglen;
1157 PyObject *retval;
1158 char* retbuf;
1159 Py_ssize_t i, j;
Barry Warsawe977c212000-08-15 06:07:13 +00001160
Antoine Pitrou08316762011-12-20 13:58:41 +01001161 if (!PyArg_ParseTuple(args, "O&:a2b_hex", ascii_buffer_converter, &parg))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001162 return NULL;
1163 argbuf = parg.buf;
1164 arglen = parg.len;
Barry Warsawe977c212000-08-15 06:07:13 +00001165
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001166 assert(arglen >= 0);
Amaury Forgeot d'Arc9c74b142008-06-18 00:47:36 +00001167
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001168 /* XXX What should we do about strings with an odd length? Should
1169 * we add an implicit leading zero, or a trailing zero? For now,
1170 * raise an exception.
1171 */
1172 if (arglen % 2) {
1173 PyBuffer_Release(&parg);
1174 PyErr_SetString(Error, "Odd-length string");
1175 return NULL;
1176 }
Barry Warsawe977c212000-08-15 06:07:13 +00001177
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001178 retval = PyBytes_FromStringAndSize(NULL, (arglen/2));
1179 if (!retval) {
1180 PyBuffer_Release(&parg);
1181 return NULL;
1182 }
1183 retbuf = PyBytes_AS_STRING(retval);
Barry Warsawe977c212000-08-15 06:07:13 +00001184
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001185 for (i=j=0; i < arglen; i += 2) {
1186 int top = to_int(Py_CHARMASK(argbuf[i]));
1187 int bot = to_int(Py_CHARMASK(argbuf[i+1]));
1188 if (top == -1 || bot == -1) {
1189 PyErr_SetString(Error,
1190 "Non-hexadecimal digit found");
1191 goto finally;
1192 }
1193 retbuf[j++] = (top << 4) + bot;
1194 }
1195 PyBuffer_Release(&parg);
1196 return retval;
Barry Warsawe977c212000-08-15 06:07:13 +00001197
1198 finally:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001199 PyBuffer_Release(&parg);
1200 Py_DECREF(retval);
1201 return NULL;
Barry Warsawe977c212000-08-15 06:07:13 +00001202}
1203
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001204PyDoc_STRVAR(doc_unhexlify,
Barry Warsawe977c212000-08-15 06:07:13 +00001205"a2b_hex(hexstr) -> s; Binary data of hexadecimal representation.\n\
1206\n\
1207hexstr must contain an even number of hex digits (upper or lower case).\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001208This function is also available as \"unhexlify()\"");
Barry Warsawe977c212000-08-15 06:07:13 +00001209
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001210static int table_hex[128] = {
1211 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1212 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1213 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1214 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,-1,-1, -1,-1,-1,-1,
1215 -1,10,11,12, 13,14,15,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1216 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1217 -1,10,11,12, 13,14,15,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1218 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1
1219};
1220
1221#define hexval(c) table_hex[(unsigned int)(c)]
1222
1223#define MAXLINESIZE 76
1224
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001225PyDoc_STRVAR(doc_a2b_qp, "Decode a string of qp-encoded data");
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001226
Tim Peters934c1a12002-07-02 22:24:50 +00001227static PyObject*
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001228binascii_a2b_qp(PyObject *self, PyObject *args, PyObject *kwargs)
1229{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001230 Py_ssize_t in, out;
1231 char ch;
1232 Py_buffer pdata;
1233 unsigned char *data, *odata;
1234 Py_ssize_t datalen = 0;
1235 PyObject *rv;
1236 static char *kwlist[] = {"data", "header", NULL};
1237 int header = 0;
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001238
Antoine Pitrou08316762011-12-20 13:58:41 +01001239 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O&|i:a2b_qp", kwlist,
1240 ascii_buffer_converter, &pdata, &header))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001241 return NULL;
1242 data = pdata.buf;
1243 datalen = pdata.len;
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001244
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001245 /* We allocate the output same size as input, this is overkill.
1246 * The previous implementation used calloc() so we'll zero out the
1247 * memory here too, since PyMem_Malloc() does not guarantee that.
1248 */
1249 odata = (unsigned char *) PyMem_Malloc(datalen);
1250 if (odata == NULL) {
1251 PyBuffer_Release(&pdata);
1252 PyErr_NoMemory();
1253 return NULL;
1254 }
1255 memset(odata, 0, datalen);
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001256
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001257 in = out = 0;
1258 while (in < datalen) {
1259 if (data[in] == '=') {
1260 in++;
1261 if (in >= datalen) break;
1262 /* Soft line breaks */
1263 if ((data[in] == '\n') || (data[in] == '\r')) {
1264 if (data[in] != '\n') {
1265 while (in < datalen && data[in] != '\n') in++;
1266 }
1267 if (in < datalen) in++;
1268 }
1269 else if (data[in] == '=') {
1270 /* broken case from broken python qp */
1271 odata[out++] = '=';
1272 in++;
1273 }
1274 else if (((data[in] >= 'A' && data[in] <= 'F') ||
1275 (data[in] >= 'a' && data[in] <= 'f') ||
1276 (data[in] >= '0' && data[in] <= '9')) &&
1277 ((data[in+1] >= 'A' && data[in+1] <= 'F') ||
1278 (data[in+1] >= 'a' && data[in+1] <= 'f') ||
1279 (data[in+1] >= '0' && data[in+1] <= '9'))) {
1280 /* hexval */
1281 ch = hexval(data[in]) << 4;
1282 in++;
1283 ch |= hexval(data[in]);
1284 in++;
1285 odata[out++] = ch;
1286 }
1287 else {
1288 odata[out++] = '=';
1289 }
1290 }
1291 else if (header && data[in] == '_') {
1292 odata[out++] = ' ';
1293 in++;
1294 }
1295 else {
1296 odata[out] = data[in];
1297 in++;
1298 out++;
1299 }
1300 }
1301 if ((rv = PyBytes_FromStringAndSize((char *)odata, out)) == NULL) {
1302 PyBuffer_Release(&pdata);
1303 PyMem_Free(odata);
1304 return NULL;
1305 }
1306 PyBuffer_Release(&pdata);
1307 PyMem_Free(odata);
1308 return rv;
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001309}
1310
Tim Peters934c1a12002-07-02 22:24:50 +00001311static int
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001312to_hex (unsigned char ch, unsigned char *s)
1313{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001314 unsigned int uvalue = ch;
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001315
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001316 s[1] = "0123456789ABCDEF"[uvalue % 16];
1317 uvalue = (uvalue / 16);
1318 s[0] = "0123456789ABCDEF"[uvalue % 16];
1319 return 0;
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001320}
1321
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001322PyDoc_STRVAR(doc_b2a_qp,
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001323"b2a_qp(data, quotetabs=0, istext=1, header=0) -> s; \n\
1324 Encode a string using quoted-printable encoding. \n\
1325\n\
1326On encoding, when istext is set, newlines are not encoded, and white \n\
1327space at end of lines is. When istext is not set, \\r and \\n (CR/LF) are \n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001328both encoded. When quotetabs is set, space and tabs are encoded.");
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001329
1330/* XXX: This is ridiculously complicated to be backward compatible
1331 * (mostly) with the quopri module. It doesn't re-create the quopri
1332 * module bug where text ending in CRLF has the CR encoded */
Tim Peters934c1a12002-07-02 22:24:50 +00001333static PyObject*
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001334binascii_b2a_qp (PyObject *self, PyObject *args, PyObject *kwargs)
1335{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001336 Py_ssize_t in, out;
1337 Py_buffer pdata;
1338 unsigned char *data, *odata;
1339 Py_ssize_t datalen = 0, odatalen = 0;
1340 PyObject *rv;
1341 unsigned int linelen = 0;
1342 static char *kwlist[] = {"data", "quotetabs", "istext",
1343 "header", NULL};
1344 int istext = 1;
1345 int quotetabs = 0;
1346 int header = 0;
1347 unsigned char ch;
1348 int crlf = 0;
1349 unsigned char *p;
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001350
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001351 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y*|iii", kwlist, &pdata,
1352 &quotetabs, &istext, &header))
1353 return NULL;
1354 data = pdata.buf;
1355 datalen = pdata.len;
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001356
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001357 /* See if this string is using CRLF line ends */
1358 /* XXX: this function has the side effect of converting all of
1359 * the end of lines to be the same depending on this detection
1360 * here */
1361 p = (unsigned char *) memchr(data, '\n', datalen);
1362 if ((p != NULL) && (p > data) && (*(p-1) == '\r'))
1363 crlf = 1;
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001364
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001365 /* First, scan to see how many characters need to be encoded */
1366 in = 0;
1367 while (in < datalen) {
1368 if ((data[in] > 126) ||
1369 (data[in] == '=') ||
1370 (header && data[in] == '_') ||
1371 ((data[in] == '.') && (linelen == 0) &&
1372 (data[in+1] == '\n' || data[in+1] == '\r' || data[in+1] == 0)) ||
1373 (!istext && ((data[in] == '\r') || (data[in] == '\n'))) ||
1374 ((data[in] == '\t' || data[in] == ' ') && (in + 1 == datalen)) ||
1375 ((data[in] < 33) &&
1376 (data[in] != '\r') && (data[in] != '\n') &&
Senthil Kumaran922e9042010-11-09 09:59:13 +00001377 (quotetabs || ((data[in] != '\t') && (data[in] != ' ')))))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001378 {
1379 if ((linelen + 3) >= MAXLINESIZE) {
1380 linelen = 0;
1381 if (crlf)
1382 odatalen += 3;
1383 else
1384 odatalen += 2;
1385 }
1386 linelen += 3;
1387 odatalen += 3;
1388 in++;
1389 }
1390 else {
1391 if (istext &&
1392 ((data[in] == '\n') ||
1393 ((in+1 < datalen) && (data[in] == '\r') &&
1394 (data[in+1] == '\n'))))
1395 {
1396 linelen = 0;
1397 /* Protect against whitespace on end of line */
1398 if (in && ((data[in-1] == ' ') || (data[in-1] == '\t')))
1399 odatalen += 2;
1400 if (crlf)
1401 odatalen += 2;
1402 else
1403 odatalen += 1;
1404 if (data[in] == '\r')
1405 in += 2;
1406 else
1407 in++;
1408 }
1409 else {
1410 if ((in + 1 != datalen) &&
1411 (data[in+1] != '\n') &&
1412 (linelen + 1) >= MAXLINESIZE) {
1413 linelen = 0;
1414 if (crlf)
1415 odatalen += 3;
1416 else
1417 odatalen += 2;
1418 }
1419 linelen++;
1420 odatalen++;
1421 in++;
1422 }
1423 }
1424 }
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001425
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001426 /* We allocate the output same size as input, this is overkill.
1427 * The previous implementation used calloc() so we'll zero out the
1428 * memory here too, since PyMem_Malloc() does not guarantee that.
1429 */
1430 odata = (unsigned char *) PyMem_Malloc(odatalen);
1431 if (odata == NULL) {
1432 PyBuffer_Release(&pdata);
1433 PyErr_NoMemory();
1434 return NULL;
1435 }
1436 memset(odata, 0, odatalen);
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001437
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001438 in = out = linelen = 0;
1439 while (in < datalen) {
1440 if ((data[in] > 126) ||
1441 (data[in] == '=') ||
1442 (header && data[in] == '_') ||
1443 ((data[in] == '.') && (linelen == 0) &&
1444 (data[in+1] == '\n' || data[in+1] == '\r' || data[in+1] == 0)) ||
1445 (!istext && ((data[in] == '\r') || (data[in] == '\n'))) ||
1446 ((data[in] == '\t' || data[in] == ' ') && (in + 1 == datalen)) ||
1447 ((data[in] < 33) &&
1448 (data[in] != '\r') && (data[in] != '\n') &&
1449 (quotetabs ||
1450 (!quotetabs && ((data[in] != '\t') && (data[in] != ' '))))))
1451 {
1452 if ((linelen + 3 )>= MAXLINESIZE) {
1453 odata[out++] = '=';
1454 if (crlf) odata[out++] = '\r';
1455 odata[out++] = '\n';
1456 linelen = 0;
1457 }
1458 odata[out++] = '=';
1459 to_hex(data[in], &odata[out]);
1460 out += 2;
1461 in++;
1462 linelen += 3;
1463 }
1464 else {
1465 if (istext &&
1466 ((data[in] == '\n') ||
1467 ((in+1 < datalen) && (data[in] == '\r') &&
1468 (data[in+1] == '\n'))))
1469 {
1470 linelen = 0;
1471 /* Protect against whitespace on end of line */
1472 if (out && ((odata[out-1] == ' ') || (odata[out-1] == '\t'))) {
1473 ch = odata[out-1];
1474 odata[out-1] = '=';
1475 to_hex(ch, &odata[out]);
1476 out += 2;
1477 }
Tim Peters934c1a12002-07-02 22:24:50 +00001478
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001479 if (crlf) odata[out++] = '\r';
1480 odata[out++] = '\n';
1481 if (data[in] == '\r')
1482 in += 2;
1483 else
1484 in++;
1485 }
1486 else {
1487 if ((in + 1 != datalen) &&
1488 (data[in+1] != '\n') &&
1489 (linelen + 1) >= MAXLINESIZE) {
1490 odata[out++] = '=';
1491 if (crlf) odata[out++] = '\r';
1492 odata[out++] = '\n';
1493 linelen = 0;
1494 }
1495 linelen++;
1496 if (header && data[in] == ' ') {
1497 odata[out++] = '_';
1498 in++;
1499 }
1500 else {
1501 odata[out++] = data[in++];
1502 }
1503 }
1504 }
1505 }
1506 if ((rv = PyBytes_FromStringAndSize((char *)odata, out)) == NULL) {
1507 PyBuffer_Release(&pdata);
1508 PyMem_Free(odata);
1509 return NULL;
1510 }
1511 PyBuffer_Release(&pdata);
1512 PyMem_Free(odata);
1513 return rv;
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001514}
Barry Warsawe977c212000-08-15 06:07:13 +00001515
Jack Jansen72781191995-08-07 14:34:15 +00001516/* List of functions defined in the module */
1517
1518static struct PyMethodDef binascii_module_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001519 {"a2b_uu", binascii_a2b_uu, METH_VARARGS, doc_a2b_uu},
1520 {"b2a_uu", binascii_b2a_uu, METH_VARARGS, doc_b2a_uu},
1521 {"a2b_base64", binascii_a2b_base64, METH_VARARGS, doc_a2b_base64},
1522 {"b2a_base64", binascii_b2a_base64, METH_VARARGS, doc_b2a_base64},
1523 {"a2b_hqx", binascii_a2b_hqx, METH_VARARGS, doc_a2b_hqx},
1524 {"b2a_hqx", binascii_b2a_hqx, METH_VARARGS, doc_b2a_hqx},
1525 {"b2a_hex", binascii_hexlify, METH_VARARGS, doc_hexlify},
1526 {"a2b_hex", binascii_unhexlify, METH_VARARGS, doc_unhexlify},
1527 {"hexlify", binascii_hexlify, METH_VARARGS, doc_hexlify},
1528 {"unhexlify", binascii_unhexlify, METH_VARARGS, doc_unhexlify},
1529 {"rlecode_hqx", binascii_rlecode_hqx, METH_VARARGS, doc_rlecode_hqx},
1530 {"rledecode_hqx", binascii_rledecode_hqx, METH_VARARGS,
1531 doc_rledecode_hqx},
1532 {"crc_hqx", binascii_crc_hqx, METH_VARARGS, doc_crc_hqx},
1533 {"crc32", binascii_crc32, METH_VARARGS, doc_crc32},
1534 {"a2b_qp", (PyCFunction)binascii_a2b_qp, METH_VARARGS | METH_KEYWORDS,
1535 doc_a2b_qp},
1536 {"b2a_qp", (PyCFunction)binascii_b2a_qp, METH_VARARGS | METH_KEYWORDS,
1537 doc_b2a_qp},
1538 {NULL, NULL} /* sentinel */
Jack Jansen72781191995-08-07 14:34:15 +00001539};
1540
1541
Martin v. Löwis1a214512008-06-11 05:26:20 +00001542/* Initialization function for the module (*must* be called PyInit_binascii) */
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001543PyDoc_STRVAR(doc_binascii, "Conversion between binary data and ASCII");
Jack Jansen72781191995-08-07 14:34:15 +00001544
Martin v. Löwis1a214512008-06-11 05:26:20 +00001545
1546static struct PyModuleDef binasciimodule = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001547 PyModuleDef_HEAD_INIT,
1548 "binascii",
1549 doc_binascii,
1550 -1,
1551 binascii_module_methods,
1552 NULL,
1553 NULL,
1554 NULL,
1555 NULL
Martin v. Löwis1a214512008-06-11 05:26:20 +00001556};
1557
Mark Hammondfe51c6d2002-08-02 02:27:13 +00001558PyMODINIT_FUNC
Martin v. Löwis1a214512008-06-11 05:26:20 +00001559PyInit_binascii(void)
Jack Jansen72781191995-08-07 14:34:15 +00001560{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001561 PyObject *m, *d;
Jack Jansen72781191995-08-07 14:34:15 +00001562
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001563 /* Create the module and add the functions */
1564 m = PyModule_Create(&binasciimodule);
1565 if (m == NULL)
1566 return NULL;
Jack Jansen72781191995-08-07 14:34:15 +00001567
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001568 d = PyModule_GetDict(m);
Jack Jansen72781191995-08-07 14:34:15 +00001569
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001570 Error = PyErr_NewException("binascii.Error", PyExc_ValueError, NULL);
1571 PyDict_SetItemString(d, "Error", Error);
1572 Incomplete = PyErr_NewException("binascii.Incomplete", NULL, NULL);
1573 PyDict_SetItemString(d, "Incomplete", Incomplete);
1574 if (PyErr_Occurred()) {
1575 Py_DECREF(m);
1576 m = NULL;
1577 }
1578 return m;
Jack Jansen72781191995-08-07 14:34:15 +00001579}