blob: b90905f4134ca75949c7e6cca9e9af62f5b6ce17 [file] [log] [blame]
Jack Jansen72781191995-08-07 14:34:15 +00001/*
2** Routines to represent binary data in ASCII and vice-versa
3**
4** This module currently supports the following encodings:
5** uuencode:
6** each line encodes 45 bytes (except possibly the last)
7** First char encodes (binary) length, rest data
8** each char encodes 6 bits, as follows:
9** binary: 01234567 abcdefgh ijklmnop
10** ascii: 012345 67abcd efghij klmnop
11** ASCII encoding method is "excess-space": 000000 is encoded as ' ', etc.
12** short binary data is zero-extended (so the bits are always in the
13** right place), this does *not* reflect in the length.
Jack Jansen84bbc2e1995-10-04 16:38:44 +000014** base64:
15** Line breaks are insignificant, but lines are at most 76 chars
16** each char encodes 6 bits, in similar order as uucode/hqx. Encoding
17** is done via a table.
18** Short binary data is filled (in ASCII) with '='.
Jack Jansen72781191995-08-07 14:34:15 +000019** hqx:
20** File starts with introductory text, real data starts and ends
21** with colons.
22** Data consists of three similar parts: info, datafork, resourcefork.
23** Each part is protected (at the end) with a 16-bit crc
24** The binary data is run-length encoded, and then ascii-fied:
25** binary: 01234567 abcdefgh ijklmnop
26** ascii: 012345 67abcd efghij klmnop
27** ASCII encoding is table-driven, see the code.
28** Short binary data results in the runt ascii-byte being output with
29** the bits in the right place.
30**
31** While I was reading dozens of programs that encode or decode the formats
32** here (documentation? hihi:-) I have formulated Jansen's Observation:
33**
34** Programs that encode binary data in ASCII are written in
35** such a style that they are as unreadable as possible. Devices used
36** include unnecessary global variables, burying important tables
37** in unrelated sourcefiles, putting functions in include files,
38** using seemingly-descriptive variable names for different purposes,
39** calls to empty subroutines and a host of others.
40**
41** I have attempted to break with this tradition, but I guess that that
42** does make the performance sub-optimal. Oh well, too bad...
43**
44** Jack Jansen, CWI, July 1995.
Tim Peters934c1a12002-07-02 22:24:50 +000045**
Martin v. Löwis16dc7f42001-09-30 20:32:11 +000046** Added support for quoted-printable encoding, based on rfc 1521 et al
Tim Peters934c1a12002-07-02 22:24:50 +000047** quoted-printable encoding specifies that non printable characters (anything
Martin v. Löwis16dc7f42001-09-30 20:32:11 +000048** below 32 and above 126) be encoded as =XX where XX is the hexadecimal value
49** of the character. It also specifies some other behavior to enable 8bit data
Tim Peters934c1a12002-07-02 22:24:50 +000050** in a mail message with little difficulty (maximum line sizes, protecting
51** some cases of whitespace, etc).
Martin v. Löwis16dc7f42001-09-30 20:32:11 +000052**
53** Brandon Long, September 2001.
Jack Jansen72781191995-08-07 14:34:15 +000054*/
55
Thomas Wouters9c544482006-03-01 21:59:44 +000056#define PY_SSIZE_T_CLEAN
Jack Jansen72781191995-08-07 14:34:15 +000057
58#include "Python.h"
Christian Heimes1dc54002008-03-24 02:19:29 +000059#ifdef USE_ZLIB_CRC32
60#include "zlib.h"
61#endif
Jack Jansen72781191995-08-07 14:34:15 +000062
63static PyObject *Error;
64static PyObject *Incomplete;
65
66/*
67** hqx lookup table, ascii->binary.
68*/
69
70#define RUNCHAR 0x90
71
72#define DONE 0x7F
73#define SKIP 0x7E
74#define FAIL 0x7D
75
76static unsigned char table_a2b_hqx[256] = {
77/* ^@ ^A ^B ^C ^D ^E ^F ^G */
78/* 0*/ FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
79/* \b \t \n ^K ^L \r ^N ^O */
80/* 1*/ FAIL, FAIL, SKIP, FAIL, FAIL, SKIP, FAIL, FAIL,
81/* ^P ^Q ^R ^S ^T ^U ^V ^W */
82/* 2*/ FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
83/* ^X ^Y ^Z ^[ ^\ ^] ^^ ^_ */
84/* 3*/ FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
85/* ! " # $ % & ' */
86/* 4*/ FAIL, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
87/* ( ) * + , - . / */
88/* 5*/ 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, FAIL, FAIL,
89/* 0 1 2 3 4 5 6 7 */
90/* 6*/ 0x0D, 0x0E, 0x0F, 0x10, 0x11, 0x12, 0x13, FAIL,
91/* 8 9 : ; < = > ? */
92/* 7*/ 0x14, 0x15, DONE, FAIL, FAIL, FAIL, FAIL, FAIL,
93/* @ A B C D E F G */
94/* 8*/ 0x16, 0x17, 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D,
95/* H I J K L M N O */
96/* 9*/ 0x1E, 0x1F, 0x20, 0x21, 0x22, 0x23, 0x24, FAIL,
97/* P Q R S T U V W */
98/*10*/ 0x25, 0x26, 0x27, 0x28, 0x29, 0x2A, 0x2B, FAIL,
99/* X Y Z [ \ ] ^ _ */
100/*11*/ 0x2C, 0x2D, 0x2E, 0x2F, FAIL, FAIL, FAIL, FAIL,
101/* ` a b c d e f g */
102/*12*/ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, FAIL,
103/* h i j k l m n o */
104/*13*/ 0x37, 0x38, 0x39, 0x3A, 0x3B, 0x3C, FAIL, FAIL,
105/* p q r s t u v w */
106/*14*/ 0x3D, 0x3E, 0x3F, FAIL, FAIL, FAIL, FAIL, FAIL,
107/* x y z { | } ~ ^? */
108/*15*/ FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
109/*16*/ FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
110 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
111 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
112 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
113 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
114 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
115 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
116 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
117 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
118 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
119 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
120 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
121 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
122 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
123 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
124 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
125};
126
127static unsigned char table_b2a_hqx[] =
Roger E. Masse5f4ce181997-01-16 17:10:22 +0000128"!\"#$%&'()*+,-012345689@ABCDEFGHIJKLMNPQRSTUVXYZ[`abcdefhijklmpqr";
Jack Jansen72781191995-08-07 14:34:15 +0000129
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000130static char table_a2b_base64[] = {
131 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
132 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
133 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,62, -1,-1,-1,63,
134 52,53,54,55, 56,57,58,59, 60,61,-1,-1, -1, 0,-1,-1, /* Note PAD->0 */
135 -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10, 11,12,13,14,
136 15,16,17,18, 19,20,21,22, 23,24,25,-1, -1,-1,-1,-1,
137 -1,26,27,28, 29,30,31,32, 33,34,35,36, 37,38,39,40,
138 41,42,43,44, 45,46,47,48, 49,50,51,-1, -1,-1,-1,-1
139};
140
141#define BASE64_PAD '='
Guido van Rossum355bc0c2001-10-30 03:00:52 +0000142
143/* Max binary chunk size; limited only by available memory */
Guido van Rossum0e225aa2007-05-22 20:24:57 +0000144#define BASE64_MAXBIN ((PY_SSIZE_T_MAX - 3) / 2)
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000145
146static unsigned char table_b2a_base64[] =
Roger E. Masse5f4ce181997-01-16 17:10:22 +0000147"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000148
149
150
Jack Jansen72781191995-08-07 14:34:15 +0000151static unsigned short crctab_hqx[256] = {
Roger E. Masse5f4ce181997-01-16 17:10:22 +0000152 0x0000, 0x1021, 0x2042, 0x3063, 0x4084, 0x50a5, 0x60c6, 0x70e7,
153 0x8108, 0x9129, 0xa14a, 0xb16b, 0xc18c, 0xd1ad, 0xe1ce, 0xf1ef,
154 0x1231, 0x0210, 0x3273, 0x2252, 0x52b5, 0x4294, 0x72f7, 0x62d6,
155 0x9339, 0x8318, 0xb37b, 0xa35a, 0xd3bd, 0xc39c, 0xf3ff, 0xe3de,
156 0x2462, 0x3443, 0x0420, 0x1401, 0x64e6, 0x74c7, 0x44a4, 0x5485,
157 0xa56a, 0xb54b, 0x8528, 0x9509, 0xe5ee, 0xf5cf, 0xc5ac, 0xd58d,
158 0x3653, 0x2672, 0x1611, 0x0630, 0x76d7, 0x66f6, 0x5695, 0x46b4,
159 0xb75b, 0xa77a, 0x9719, 0x8738, 0xf7df, 0xe7fe, 0xd79d, 0xc7bc,
160 0x48c4, 0x58e5, 0x6886, 0x78a7, 0x0840, 0x1861, 0x2802, 0x3823,
161 0xc9cc, 0xd9ed, 0xe98e, 0xf9af, 0x8948, 0x9969, 0xa90a, 0xb92b,
162 0x5af5, 0x4ad4, 0x7ab7, 0x6a96, 0x1a71, 0x0a50, 0x3a33, 0x2a12,
163 0xdbfd, 0xcbdc, 0xfbbf, 0xeb9e, 0x9b79, 0x8b58, 0xbb3b, 0xab1a,
164 0x6ca6, 0x7c87, 0x4ce4, 0x5cc5, 0x2c22, 0x3c03, 0x0c60, 0x1c41,
165 0xedae, 0xfd8f, 0xcdec, 0xddcd, 0xad2a, 0xbd0b, 0x8d68, 0x9d49,
166 0x7e97, 0x6eb6, 0x5ed5, 0x4ef4, 0x3e13, 0x2e32, 0x1e51, 0x0e70,
167 0xff9f, 0xefbe, 0xdfdd, 0xcffc, 0xbf1b, 0xaf3a, 0x9f59, 0x8f78,
168 0x9188, 0x81a9, 0xb1ca, 0xa1eb, 0xd10c, 0xc12d, 0xf14e, 0xe16f,
169 0x1080, 0x00a1, 0x30c2, 0x20e3, 0x5004, 0x4025, 0x7046, 0x6067,
170 0x83b9, 0x9398, 0xa3fb, 0xb3da, 0xc33d, 0xd31c, 0xe37f, 0xf35e,
171 0x02b1, 0x1290, 0x22f3, 0x32d2, 0x4235, 0x5214, 0x6277, 0x7256,
172 0xb5ea, 0xa5cb, 0x95a8, 0x8589, 0xf56e, 0xe54f, 0xd52c, 0xc50d,
173 0x34e2, 0x24c3, 0x14a0, 0x0481, 0x7466, 0x6447, 0x5424, 0x4405,
174 0xa7db, 0xb7fa, 0x8799, 0x97b8, 0xe75f, 0xf77e, 0xc71d, 0xd73c,
175 0x26d3, 0x36f2, 0x0691, 0x16b0, 0x6657, 0x7676, 0x4615, 0x5634,
176 0xd94c, 0xc96d, 0xf90e, 0xe92f, 0x99c8, 0x89e9, 0xb98a, 0xa9ab,
177 0x5844, 0x4865, 0x7806, 0x6827, 0x18c0, 0x08e1, 0x3882, 0x28a3,
178 0xcb7d, 0xdb5c, 0xeb3f, 0xfb1e, 0x8bf9, 0x9bd8, 0xabbb, 0xbb9a,
179 0x4a75, 0x5a54, 0x6a37, 0x7a16, 0x0af1, 0x1ad0, 0x2ab3, 0x3a92,
180 0xfd2e, 0xed0f, 0xdd6c, 0xcd4d, 0xbdaa, 0xad8b, 0x9de8, 0x8dc9,
181 0x7c26, 0x6c07, 0x5c64, 0x4c45, 0x3ca2, 0x2c83, 0x1ce0, 0x0cc1,
182 0xef1f, 0xff3e, 0xcf5d, 0xdf7c, 0xaf9b, 0xbfba, 0x8fd9, 0x9ff8,
183 0x6e17, 0x7e36, 0x4e55, 0x5e74, 0x2e93, 0x3eb2, 0x0ed1, 0x1ef0,
Jack Jansen72781191995-08-07 14:34:15 +0000184};
185
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000186PyDoc_STRVAR(doc_a2b_uu, "(ascii) -> bin. Decode a line of uuencoded data");
Jack Jansen72781191995-08-07 14:34:15 +0000187
188static PyObject *
Peter Schneider-Kampa788a7f2000-07-10 09:57:19 +0000189binascii_a2b_uu(PyObject *self, PyObject *args)
Jack Jansen72781191995-08-07 14:34:15 +0000190{
191 unsigned char *ascii_data, *bin_data;
192 int leftbits = 0;
193 unsigned char this_ch;
194 unsigned int leftchar = 0;
195 PyObject *rv;
Thomas Woutersf98db652006-03-01 21:37:32 +0000196 Py_ssize_t ascii_len, bin_len;
Tim Peters934c1a12002-07-02 22:24:50 +0000197
Guido van Rossum43713e52000-02-29 13:59:29 +0000198 if ( !PyArg_ParseTuple(args, "t#:a2b_uu", &ascii_data, &ascii_len) )
Jack Jansen72781191995-08-07 14:34:15 +0000199 return NULL;
200
201 /* First byte: binary data length (in bytes) */
202 bin_len = (*ascii_data++ - ' ') & 077;
203 ascii_len--;
204
205 /* Allocate the buffer */
Guido van Rossum98297ee2007-11-06 21:34:58 +0000206 if ( (rv=PyString_FromStringAndSize(NULL, bin_len)) == NULL )
Jack Jansen72781191995-08-07 14:34:15 +0000207 return NULL;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000208 bin_data = (unsigned char *)PyString_AS_STRING(rv);
Tim Peters934c1a12002-07-02 22:24:50 +0000209
Jack Jansen72781191995-08-07 14:34:15 +0000210 for( ; bin_len > 0 ; ascii_len--, ascii_data++ ) {
Neal Norwitzfe92eef2004-06-06 20:13:10 +0000211 /* XXX is it really best to add NULs if there's no more data */
212 this_ch = (ascii_len > 0) ? *ascii_data : 0;
Jack Jansen72781191995-08-07 14:34:15 +0000213 if ( this_ch == '\n' || this_ch == '\r' || ascii_len <= 0) {
214 /*
215 ** Whitespace. Assume some spaces got eaten at
216 ** end-of-line. (We check this later)
217 */
218 this_ch = 0;
219 } else {
Jack Jansen5d957971995-11-14 10:35:19 +0000220 /* Check the character for legality
Roger E. Masse5f4ce181997-01-16 17:10:22 +0000221 ** The 64 in stead of the expected 63 is because
222 ** there are a few uuencodes out there that use
Guido van Rossum92d89172001-01-09 02:11:57 +0000223 ** '`' as zero instead of space.
Jack Jansen5d957971995-11-14 10:35:19 +0000224 */
225 if ( this_ch < ' ' || this_ch > (' ' + 64)) {
Jack Jansen72781191995-08-07 14:34:15 +0000226 PyErr_SetString(Error, "Illegal char");
227 Py_DECREF(rv);
228 return NULL;
229 }
230 this_ch = (this_ch - ' ') & 077;
231 }
232 /*
233 ** Shift it in on the low end, and see if there's
234 ** a byte ready for output.
235 */
236 leftchar = (leftchar << 6) | (this_ch);
237 leftbits += 6;
238 if ( leftbits >= 8 ) {
239 leftbits -= 8;
240 *bin_data++ = (leftchar >> leftbits) & 0xff;
241 leftchar &= ((1 << leftbits) - 1);
242 bin_len--;
243 }
244 }
245 /*
246 ** Finally, check that if there's anything left on the line
247 ** that it's whitespace only.
248 */
249 while( ascii_len-- > 0 ) {
250 this_ch = *ascii_data++;
Guido van Rossum92d89172001-01-09 02:11:57 +0000251 /* Extra '`' may be written as padding in some cases */
252 if ( this_ch != ' ' && this_ch != ' '+64 &&
Guido van Rossum1243ae71997-07-11 18:36:28 +0000253 this_ch != '\n' && this_ch != '\r' ) {
Jack Jansen72781191995-08-07 14:34:15 +0000254 PyErr_SetString(Error, "Trailing garbage");
255 Py_DECREF(rv);
256 return NULL;
257 }
258 }
259 return rv;
260}
261
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000262PyDoc_STRVAR(doc_b2a_uu, "(bin) -> ascii. Uuencode line of data");
Tim Peters934c1a12002-07-02 22:24:50 +0000263
Jack Jansen72781191995-08-07 14:34:15 +0000264static PyObject *
Peter Schneider-Kampa788a7f2000-07-10 09:57:19 +0000265binascii_b2a_uu(PyObject *self, PyObject *args)
Jack Jansen72781191995-08-07 14:34:15 +0000266{
267 unsigned char *ascii_data, *bin_data;
268 int leftbits = 0;
269 unsigned char this_ch;
270 unsigned int leftchar = 0;
271 PyObject *rv;
Thomas Woutersf98db652006-03-01 21:37:32 +0000272 Py_ssize_t bin_len;
Tim Peters934c1a12002-07-02 22:24:50 +0000273
Guido van Rossum43713e52000-02-29 13:59:29 +0000274 if ( !PyArg_ParseTuple(args, "s#:b2a_uu", &bin_data, &bin_len) )
Jack Jansen72781191995-08-07 14:34:15 +0000275 return NULL;
276 if ( bin_len > 45 ) {
277 /* The 45 is a limit that appears in all uuencode's */
278 PyErr_SetString(Error, "At most 45 bytes at once");
279 return NULL;
280 }
281
282 /* We're lazy and allocate to much (fixed up later) */
Guido van Rossum98297ee2007-11-06 21:34:58 +0000283 if ( (rv=PyString_FromStringAndSize(NULL, bin_len*2+2)) == NULL )
Jack Jansen72781191995-08-07 14:34:15 +0000284 return NULL;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000285 ascii_data = (unsigned char *)PyString_AS_STRING(rv);
Jack Jansen72781191995-08-07 14:34:15 +0000286
287 /* Store the length */
288 *ascii_data++ = ' ' + (bin_len & 077);
Tim Peters934c1a12002-07-02 22:24:50 +0000289
Jack Jansen72781191995-08-07 14:34:15 +0000290 for( ; bin_len > 0 || leftbits != 0 ; bin_len--, bin_data++ ) {
291 /* Shift the data (or padding) into our buffer */
292 if ( bin_len > 0 ) /* Data */
293 leftchar = (leftchar << 8) | *bin_data;
294 else /* Padding */
295 leftchar <<= 8;
296 leftbits += 8;
297
298 /* See if there are 6-bit groups ready */
299 while ( leftbits >= 6 ) {
300 this_ch = (leftchar >> (leftbits-6)) & 0x3f;
301 leftbits -= 6;
302 *ascii_data++ = this_ch + ' ';
303 }
304 }
305 *ascii_data++ = '\n'; /* Append a courtesy newline */
Tim Peters934c1a12002-07-02 22:24:50 +0000306
Guido van Rossum98297ee2007-11-06 21:34:58 +0000307 if (_PyString_Resize(&rv,
Guido van Rossum0e225aa2007-05-22 20:24:57 +0000308 (ascii_data -
Guido van Rossum98297ee2007-11-06 21:34:58 +0000309 (unsigned char *)PyString_AS_STRING(rv))) < 0) {
Guido van Rossum0e225aa2007-05-22 20:24:57 +0000310 Py_DECREF(rv);
311 rv = NULL;
312 }
Jack Jansen72781191995-08-07 14:34:15 +0000313 return rv;
314}
315
Guido van Rossum2db4f471999-10-19 19:05:14 +0000316
317static int
Thomas Woutersf98db652006-03-01 21:37:32 +0000318binascii_find_valid(unsigned char *s, Py_ssize_t slen, int num)
Guido van Rossum2db4f471999-10-19 19:05:14 +0000319{
Tim Peters934c1a12002-07-02 22:24:50 +0000320 /* Finds & returns the (num+1)th
Guido van Rossum2db4f471999-10-19 19:05:14 +0000321 ** valid character for base64, or -1 if none.
322 */
323
324 int ret = -1;
325 unsigned char c, b64val;
326
327 while ((slen > 0) && (ret == -1)) {
328 c = *s;
329 b64val = table_a2b_base64[c & 0x7f];
330 if ( ((c <= 0x7f) && (b64val != (unsigned char)-1)) ) {
331 if (num == 0)
332 ret = *s;
333 num--;
334 }
335
336 s++;
337 slen--;
338 }
339 return ret;
340}
341
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000342PyDoc_STRVAR(doc_a2b_base64, "(ascii) -> bin. Decode a line of base64 data");
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000343
344static PyObject *
Peter Schneider-Kampa788a7f2000-07-10 09:57:19 +0000345binascii_a2b_base64(PyObject *self, PyObject *args)
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000346{
347 unsigned char *ascii_data, *bin_data;
348 int leftbits = 0;
349 unsigned char this_ch;
350 unsigned int leftchar = 0;
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000351 PyObject *rv;
Thomas Woutersf98db652006-03-01 21:37:32 +0000352 Py_ssize_t ascii_len, bin_len;
Guido van Rossum2db4f471999-10-19 19:05:14 +0000353 int quad_pos = 0;
Tim Peters934c1a12002-07-02 22:24:50 +0000354
Guido van Rossum43713e52000-02-29 13:59:29 +0000355 if ( !PyArg_ParseTuple(args, "t#:a2b_base64", &ascii_data, &ascii_len) )
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000356 return NULL;
357
358 bin_len = ((ascii_len+3)/4)*3; /* Upper bound, corrected later */
359
360 /* Allocate the buffer */
Guido van Rossum98297ee2007-11-06 21:34:58 +0000361 if ( (rv=PyString_FromStringAndSize(NULL, bin_len)) == NULL )
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000362 return NULL;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000363 bin_data = (unsigned char *)PyString_AS_STRING(rv);
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000364 bin_len = 0;
Guido van Rossum2db4f471999-10-19 19:05:14 +0000365
366 for( ; ascii_len > 0; ascii_len--, ascii_data++) {
367 this_ch = *ascii_data;
368
369 if (this_ch > 0x7f ||
370 this_ch == '\r' || this_ch == '\n' || this_ch == ' ')
Jack Jansenba1de3b1996-01-22 10:47:15 +0000371 continue;
Guido van Rossum2db4f471999-10-19 19:05:14 +0000372
373 /* Check for pad sequences and ignore
374 ** the invalid ones.
375 */
376 if (this_ch == BASE64_PAD) {
377 if ( (quad_pos < 2) ||
378 ((quad_pos == 2) &&
379 (binascii_find_valid(ascii_data, ascii_len, 1)
380 != BASE64_PAD)) )
381 {
382 continue;
383 }
384 else {
385 /* A pad sequence means no more input.
386 ** We've already interpreted the data
387 ** from the quad at this point.
388 */
389 leftbits = 0;
390 break;
391 }
392 }
393
394 this_ch = table_a2b_base64[*ascii_data];
395 if ( this_ch == (unsigned char) -1 )
396 continue;
397
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000398 /*
399 ** Shift it in on the low end, and see if there's
400 ** a byte ready for output.
401 */
Guido van Rossum2db4f471999-10-19 19:05:14 +0000402 quad_pos = (quad_pos + 1) & 0x03;
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000403 leftchar = (leftchar << 6) | (this_ch);
404 leftbits += 6;
Guido van Rossum2db4f471999-10-19 19:05:14 +0000405
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000406 if ( leftbits >= 8 ) {
407 leftbits -= 8;
408 *bin_data++ = (leftchar >> leftbits) & 0xff;
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000409 bin_len++;
Guido van Rossum2db4f471999-10-19 19:05:14 +0000410 leftchar &= ((1 << leftbits) - 1);
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000411 }
Guido van Rossum2db4f471999-10-19 19:05:14 +0000412 }
413
414 if (leftbits != 0) {
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000415 PyErr_SetString(Error, "Incorrect padding");
416 Py_DECREF(rv);
417 return NULL;
418 }
Guido van Rossum2db4f471999-10-19 19:05:14 +0000419
Thomas Wouters9e1c1922003-03-17 11:24:29 +0000420 /* And set string size correctly. If the result string is empty
421 ** (because the input was all invalid) return the shared empty
Guido van Rossum98297ee2007-11-06 21:34:58 +0000422 ** string instead; _PyString_Resize() won't do this for us.
Thomas Wouters9e1c1922003-03-17 11:24:29 +0000423 */
Guido van Rossum0e225aa2007-05-22 20:24:57 +0000424 if (bin_len > 0) {
Guido van Rossum98297ee2007-11-06 21:34:58 +0000425 if (_PyString_Resize(&rv, bin_len) < 0) {
Guido van Rossum0e225aa2007-05-22 20:24:57 +0000426 Py_DECREF(rv);
427 rv = NULL;
428 }
429 }
Thomas Wouters9e1c1922003-03-17 11:24:29 +0000430 else {
431 Py_DECREF(rv);
Guido van Rossum98297ee2007-11-06 21:34:58 +0000432 rv = PyString_FromStringAndSize("", 0);
Thomas Wouters9e1c1922003-03-17 11:24:29 +0000433 }
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000434 return rv;
435}
436
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000437PyDoc_STRVAR(doc_b2a_base64, "(bin) -> ascii. Base64-code line of data");
Tim Peters934c1a12002-07-02 22:24:50 +0000438
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000439static PyObject *
Peter Schneider-Kampa788a7f2000-07-10 09:57:19 +0000440binascii_b2a_base64(PyObject *self, PyObject *args)
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000441{
442 unsigned char *ascii_data, *bin_data;
443 int leftbits = 0;
444 unsigned char this_ch;
445 unsigned int leftchar = 0;
446 PyObject *rv;
Thomas Woutersf98db652006-03-01 21:37:32 +0000447 Py_ssize_t bin_len;
Tim Peters934c1a12002-07-02 22:24:50 +0000448
Guido van Rossum43713e52000-02-29 13:59:29 +0000449 if ( !PyArg_ParseTuple(args, "s#:b2a_base64", &bin_data, &bin_len) )
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000450 return NULL;
451 if ( bin_len > BASE64_MAXBIN ) {
452 PyErr_SetString(Error, "Too much data for base64 line");
453 return NULL;
454 }
Tim Peters934c1a12002-07-02 22:24:50 +0000455
Tim Peters1fbb5772001-12-19 04:41:35 +0000456 /* We're lazy and allocate too much (fixed up later).
457 "+3" leaves room for up to two pad characters and a trailing
458 newline. Note that 'b' gets encoded as 'Yg==\n' (1 in, 5 out). */
Guido van Rossum98297ee2007-11-06 21:34:58 +0000459 if ( (rv=PyString_FromStringAndSize(NULL, bin_len*2 + 3)) == NULL )
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000460 return NULL;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000461 ascii_data = (unsigned char *)PyString_AS_STRING(rv);
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000462
463 for( ; bin_len > 0 ; bin_len--, bin_data++ ) {
464 /* Shift the data into our buffer */
465 leftchar = (leftchar << 8) | *bin_data;
466 leftbits += 8;
467
468 /* See if there are 6-bit groups ready */
469 while ( leftbits >= 6 ) {
470 this_ch = (leftchar >> (leftbits-6)) & 0x3f;
471 leftbits -= 6;
472 *ascii_data++ = table_b2a_base64[this_ch];
473 }
474 }
475 if ( leftbits == 2 ) {
476 *ascii_data++ = table_b2a_base64[(leftchar&3) << 4];
477 *ascii_data++ = BASE64_PAD;
478 *ascii_data++ = BASE64_PAD;
479 } else if ( leftbits == 4 ) {
480 *ascii_data++ = table_b2a_base64[(leftchar&0xf) << 2];
481 *ascii_data++ = BASE64_PAD;
Tim Peters934c1a12002-07-02 22:24:50 +0000482 }
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000483 *ascii_data++ = '\n'; /* Append a courtesy newline */
Tim Peters934c1a12002-07-02 22:24:50 +0000484
Guido van Rossum98297ee2007-11-06 21:34:58 +0000485 if (_PyString_Resize(&rv,
Guido van Rossum0e225aa2007-05-22 20:24:57 +0000486 (ascii_data -
Guido van Rossum98297ee2007-11-06 21:34:58 +0000487 (unsigned char *)PyString_AS_STRING(rv))) < 0) {
Guido van Rossum0e225aa2007-05-22 20:24:57 +0000488 Py_DECREF(rv);
489 rv = NULL;
490 }
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000491 return rv;
492}
493
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000494PyDoc_STRVAR(doc_a2b_hqx, "ascii -> bin, done. Decode .hqx coding");
Jack Jansen72781191995-08-07 14:34:15 +0000495
496static PyObject *
Peter Schneider-Kampa788a7f2000-07-10 09:57:19 +0000497binascii_a2b_hqx(PyObject *self, PyObject *args)
Jack Jansen72781191995-08-07 14:34:15 +0000498{
499 unsigned char *ascii_data, *bin_data;
500 int leftbits = 0;
501 unsigned char this_ch;
Jack Janseneaeb1c81995-08-14 12:17:57 +0000502 unsigned int leftchar = 0;
Jack Jansen72781191995-08-07 14:34:15 +0000503 PyObject *rv;
Thomas Woutersf98db652006-03-01 21:37:32 +0000504 Py_ssize_t len;
Jack Jansen72781191995-08-07 14:34:15 +0000505 int done = 0;
Tim Peters934c1a12002-07-02 22:24:50 +0000506
Guido van Rossum43713e52000-02-29 13:59:29 +0000507 if ( !PyArg_ParseTuple(args, "t#:a2b_hqx", &ascii_data, &len) )
Jack Jansen72781191995-08-07 14:34:15 +0000508 return NULL;
509
Raymond Hettinger658717e2004-09-06 22:58:37 +0000510 /* Allocate a string that is too big (fixed later)
511 Add two to the initial length to prevent interning which
512 would preclude subsequent resizing. */
Guido van Rossum98297ee2007-11-06 21:34:58 +0000513 if ( (rv=PyString_FromStringAndSize(NULL, len+2)) == NULL )
Jack Jansen72781191995-08-07 14:34:15 +0000514 return NULL;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000515 bin_data = (unsigned char *)PyString_AS_STRING(rv);
Jack Jansen72781191995-08-07 14:34:15 +0000516
517 for( ; len > 0 ; len--, ascii_data++ ) {
518 /* Get the byte and look it up */
519 this_ch = table_a2b_hqx[*ascii_data];
520 if ( this_ch == SKIP )
521 continue;
522 if ( this_ch == FAIL ) {
523 PyErr_SetString(Error, "Illegal char");
524 Py_DECREF(rv);
525 return NULL;
526 }
527 if ( this_ch == DONE ) {
528 /* The terminating colon */
529 done = 1;
530 break;
531 }
532
533 /* Shift it into the buffer and see if any bytes are ready */
534 leftchar = (leftchar << 6) | (this_ch);
535 leftbits += 6;
536 if ( leftbits >= 8 ) {
537 leftbits -= 8;
538 *bin_data++ = (leftchar >> leftbits) & 0xff;
539 leftchar &= ((1 << leftbits) - 1);
540 }
541 }
Tim Peters934c1a12002-07-02 22:24:50 +0000542
Jack Jansen72781191995-08-07 14:34:15 +0000543 if ( leftbits && !done ) {
544 PyErr_SetString(Incomplete,
545 "String has incomplete number of bytes");
546 Py_DECREF(rv);
547 return NULL;
548 }
Guido van Rossum98297ee2007-11-06 21:34:58 +0000549 if (_PyString_Resize(&rv,
Guido van Rossum0e225aa2007-05-22 20:24:57 +0000550 (bin_data -
Guido van Rossum98297ee2007-11-06 21:34:58 +0000551 (unsigned char *)PyString_AS_STRING(rv))) < 0) {
Guido van Rossum0e225aa2007-05-22 20:24:57 +0000552 Py_DECREF(rv);
553 rv = NULL;
554 }
Guido van Rossum9c6ba5e1997-01-12 20:02:04 +0000555 if (rv) {
556 PyObject *rrv = Py_BuildValue("Oi", rv, done);
557 Py_DECREF(rv);
558 return rrv;
559 }
Roger E. Masse5f4ce181997-01-16 17:10:22 +0000560
Jack Jansen72781191995-08-07 14:34:15 +0000561 return NULL;
562}
563
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000564PyDoc_STRVAR(doc_rlecode_hqx, "Binhex RLE-code binary data");
Jack Jansen72781191995-08-07 14:34:15 +0000565
566static PyObject *
Peter Schneider-Kampa788a7f2000-07-10 09:57:19 +0000567binascii_rlecode_hqx(PyObject *self, PyObject *args)
Jack Jansen72781191995-08-07 14:34:15 +0000568{
569 unsigned char *in_data, *out_data;
570 PyObject *rv;
571 unsigned char ch;
Thomas Woutersf98db652006-03-01 21:37:32 +0000572 Py_ssize_t in, inend, len;
Tim Peters934c1a12002-07-02 22:24:50 +0000573
Guido van Rossum43713e52000-02-29 13:59:29 +0000574 if ( !PyArg_ParseTuple(args, "s#:rlecode_hqx", &in_data, &len) )
Jack Jansen72781191995-08-07 14:34:15 +0000575 return NULL;
576
577 /* Worst case: output is twice as big as input (fixed later) */
Guido van Rossum98297ee2007-11-06 21:34:58 +0000578 if ( (rv=PyString_FromStringAndSize(NULL, len*2+2)) == NULL )
Jack Jansen72781191995-08-07 14:34:15 +0000579 return NULL;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000580 out_data = (unsigned char *)PyString_AS_STRING(rv);
Tim Peters934c1a12002-07-02 22:24:50 +0000581
Jack Jansen72781191995-08-07 14:34:15 +0000582 for( in=0; in<len; in++) {
583 ch = in_data[in];
584 if ( ch == RUNCHAR ) {
585 /* RUNCHAR. Escape it. */
586 *out_data++ = RUNCHAR;
587 *out_data++ = 0;
588 } else {
589 /* Check how many following are the same */
590 for(inend=in+1;
591 inend<len && in_data[inend] == ch &&
Roger E. Masse5f4ce181997-01-16 17:10:22 +0000592 inend < in+255;
Jack Jansen72781191995-08-07 14:34:15 +0000593 inend++) ;
Jack Jansen0223aa11995-08-31 13:44:23 +0000594 if ( inend - in > 3 ) {
595 /* More than 3 in a row. Output RLE. */
Jack Jansen72781191995-08-07 14:34:15 +0000596 *out_data++ = ch;
597 *out_data++ = RUNCHAR;
598 *out_data++ = inend-in;
599 in = inend-1;
600 } else {
601 /* Less than 3. Output the byte itself */
602 *out_data++ = ch;
603 }
604 }
605 }
Guido van Rossum98297ee2007-11-06 21:34:58 +0000606 if (_PyString_Resize(&rv,
Guido van Rossum0e225aa2007-05-22 20:24:57 +0000607 (out_data -
Guido van Rossum98297ee2007-11-06 21:34:58 +0000608 (unsigned char *)PyString_AS_STRING(rv))) < 0) {
Guido van Rossum0e225aa2007-05-22 20:24:57 +0000609 Py_DECREF(rv);
610 rv = NULL;
611 }
Jack Jansen72781191995-08-07 14:34:15 +0000612 return rv;
613}
614
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000615PyDoc_STRVAR(doc_b2a_hqx, "Encode .hqx data");
Tim Peters934c1a12002-07-02 22:24:50 +0000616
Jack Jansen72781191995-08-07 14:34:15 +0000617static PyObject *
Peter Schneider-Kampa788a7f2000-07-10 09:57:19 +0000618binascii_b2a_hqx(PyObject *self, PyObject *args)
Jack Jansen72781191995-08-07 14:34:15 +0000619{
620 unsigned char *ascii_data, *bin_data;
621 int leftbits = 0;
622 unsigned char this_ch;
Jack Janseneaeb1c81995-08-14 12:17:57 +0000623 unsigned int leftchar = 0;
Jack Jansen72781191995-08-07 14:34:15 +0000624 PyObject *rv;
Thomas Woutersf98db652006-03-01 21:37:32 +0000625 Py_ssize_t len;
Tim Peters934c1a12002-07-02 22:24:50 +0000626
Guido van Rossum43713e52000-02-29 13:59:29 +0000627 if ( !PyArg_ParseTuple(args, "s#:b2a_hqx", &bin_data, &len) )
Jack Jansen72781191995-08-07 14:34:15 +0000628 return NULL;
629
630 /* Allocate a buffer that is at least large enough */
Guido van Rossum98297ee2007-11-06 21:34:58 +0000631 if ( (rv=PyString_FromStringAndSize(NULL, len*2+2)) == NULL )
Jack Jansen72781191995-08-07 14:34:15 +0000632 return NULL;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000633 ascii_data = (unsigned char *)PyString_AS_STRING(rv);
Tim Peters934c1a12002-07-02 22:24:50 +0000634
Jack Jansen72781191995-08-07 14:34:15 +0000635 for( ; len > 0 ; len--, bin_data++ ) {
636 /* Shift into our buffer, and output any 6bits ready */
637 leftchar = (leftchar << 8) | *bin_data;
638 leftbits += 8;
639 while ( leftbits >= 6 ) {
640 this_ch = (leftchar >> (leftbits-6)) & 0x3f;
641 leftbits -= 6;
642 *ascii_data++ = table_b2a_hqx[this_ch];
643 }
644 }
645 /* Output a possible runt byte */
646 if ( leftbits ) {
647 leftchar <<= (6-leftbits);
648 *ascii_data++ = table_b2a_hqx[leftchar & 0x3f];
649 }
Guido van Rossum98297ee2007-11-06 21:34:58 +0000650 if (_PyString_Resize(&rv,
Guido van Rossum0e225aa2007-05-22 20:24:57 +0000651 (ascii_data -
Guido van Rossum98297ee2007-11-06 21:34:58 +0000652 (unsigned char *)PyString_AS_STRING(rv))) < 0) {
Guido van Rossum0e225aa2007-05-22 20:24:57 +0000653 Py_DECREF(rv);
654 rv = NULL;
655 }
Jack Jansen72781191995-08-07 14:34:15 +0000656 return rv;
657}
658
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000659PyDoc_STRVAR(doc_rledecode_hqx, "Decode hexbin RLE-coded string");
Tim Peters934c1a12002-07-02 22:24:50 +0000660
Jack Jansen72781191995-08-07 14:34:15 +0000661static PyObject *
Peter Schneider-Kampa788a7f2000-07-10 09:57:19 +0000662binascii_rledecode_hqx(PyObject *self, PyObject *args)
Jack Jansen72781191995-08-07 14:34:15 +0000663{
664 unsigned char *in_data, *out_data;
665 unsigned char in_byte, in_repeat;
666 PyObject *rv;
Thomas Woutersf98db652006-03-01 21:37:32 +0000667 Py_ssize_t in_len, out_len, out_len_left;
Jack Jansen72781191995-08-07 14:34:15 +0000668
Guido van Rossum43713e52000-02-29 13:59:29 +0000669 if ( !PyArg_ParseTuple(args, "s#:rledecode_hqx", &in_data, &in_len) )
Jack Jansen72781191995-08-07 14:34:15 +0000670 return NULL;
671
672 /* Empty string is a special case */
673 if ( in_len == 0 )
Guido van Rossum98297ee2007-11-06 21:34:58 +0000674 return PyString_FromStringAndSize("", 0);
Jack Jansen72781191995-08-07 14:34:15 +0000675
676 /* Allocate a buffer of reasonable size. Resized when needed */
677 out_len = in_len*2;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000678 if ( (rv=PyString_FromStringAndSize(NULL, out_len)) == NULL )
Jack Jansen72781191995-08-07 14:34:15 +0000679 return NULL;
680 out_len_left = out_len;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000681 out_data = (unsigned char *)PyString_AS_STRING(rv);
Jack Jansen72781191995-08-07 14:34:15 +0000682
683 /*
684 ** We need two macros here to get/put bytes and handle
685 ** end-of-buffer for input and output strings.
686 */
687#define INBYTE(b) \
Roger E. Masse5f4ce181997-01-16 17:10:22 +0000688 do { \
689 if ( --in_len < 0 ) { \
690 PyErr_SetString(Incomplete, ""); \
691 Py_DECREF(rv); \
692 return NULL; \
693 } \
694 b = *in_data++; \
695 } while(0)
Tim Peters934c1a12002-07-02 22:24:50 +0000696
Jack Jansen72781191995-08-07 14:34:15 +0000697#define OUTBYTE(b) \
Roger E. Masse5f4ce181997-01-16 17:10:22 +0000698 do { \
699 if ( --out_len_left < 0 ) { \
Guido van Rossum98297ee2007-11-06 21:34:58 +0000700 if (_PyString_Resize(&rv, 2*out_len) < 0) \
Guido van Rossum0e225aa2007-05-22 20:24:57 +0000701 { Py_DECREF(rv); return NULL; } \
Guido van Rossum98297ee2007-11-06 21:34:58 +0000702 out_data = (unsigned char *)PyString_AS_STRING(rv) \
Roger E. Masse5f4ce181997-01-16 17:10:22 +0000703 + out_len; \
704 out_len_left = out_len-1; \
705 out_len = out_len * 2; \
706 } \
707 *out_data++ = b; \
708 } while(0)
Jack Jansen72781191995-08-07 14:34:15 +0000709
Roger E. Masse5f4ce181997-01-16 17:10:22 +0000710 /*
711 ** Handle first byte separately (since we have to get angry
712 ** in case of an orphaned RLE code).
713 */
714 INBYTE(in_byte);
Jack Jansen72781191995-08-07 14:34:15 +0000715
716 if (in_byte == RUNCHAR) {
717 INBYTE(in_repeat);
718 if (in_repeat != 0) {
719 /* Note Error, not Incomplete (which is at the end
720 ** of the string only). This is a programmer error.
721 */
722 PyErr_SetString(Error, "Orphaned RLE code at start");
723 Py_DECREF(rv);
724 return NULL;
725 }
726 OUTBYTE(RUNCHAR);
727 } else {
728 OUTBYTE(in_byte);
729 }
Tim Peters934c1a12002-07-02 22:24:50 +0000730
Jack Jansen72781191995-08-07 14:34:15 +0000731 while( in_len > 0 ) {
732 INBYTE(in_byte);
733
734 if (in_byte == RUNCHAR) {
735 INBYTE(in_repeat);
736 if ( in_repeat == 0 ) {
737 /* Just an escaped RUNCHAR value */
738 OUTBYTE(RUNCHAR);
739 } else {
740 /* Pick up value and output a sequence of it */
741 in_byte = out_data[-1];
742 while ( --in_repeat > 0 )
743 OUTBYTE(in_byte);
744 }
745 } else {
746 /* Normal byte */
747 OUTBYTE(in_byte);
748 }
749 }
Guido van Rossum98297ee2007-11-06 21:34:58 +0000750 if (_PyString_Resize(&rv,
Guido van Rossum0e225aa2007-05-22 20:24:57 +0000751 (out_data -
Guido van Rossum98297ee2007-11-06 21:34:58 +0000752 (unsigned char *)PyString_AS_STRING(rv))) < 0) {
Guido van Rossum0e225aa2007-05-22 20:24:57 +0000753 Py_DECREF(rv);
754 rv = NULL;
755 }
Jack Jansen72781191995-08-07 14:34:15 +0000756 return rv;
757}
758
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000759PyDoc_STRVAR(doc_crc_hqx,
760"(data, oldcrc) -> newcrc. Compute hqx CRC incrementally");
Jack Jansen72781191995-08-07 14:34:15 +0000761
762static PyObject *
Peter Schneider-Kampa788a7f2000-07-10 09:57:19 +0000763binascii_crc_hqx(PyObject *self, PyObject *args)
Jack Jansen72781191995-08-07 14:34:15 +0000764{
765 unsigned char *bin_data;
766 unsigned int crc;
Thomas Woutersf98db652006-03-01 21:37:32 +0000767 Py_ssize_t len;
Tim Peters934c1a12002-07-02 22:24:50 +0000768
Guido van Rossum43713e52000-02-29 13:59:29 +0000769 if ( !PyArg_ParseTuple(args, "s#i:crc_hqx", &bin_data, &len, &crc) )
Jack Jansen72781191995-08-07 14:34:15 +0000770 return NULL;
771
772 while(len--) {
773 crc=((crc<<8)&0xff00)^crctab_hqx[((crc>>8)&0xff)^*bin_data++];
774 }
775
776 return Py_BuildValue("i", crc);
777}
778
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000779PyDoc_STRVAR(doc_crc32,
780"(data, oldcrc = 0) -> newcrc. Compute CRC-32 incrementally");
Guido van Rossum7d47c9e2000-02-16 21:11:52 +0000781
Christian Heimes1dc54002008-03-24 02:19:29 +0000782#ifdef USE_ZLIB_CRC32
783/* This was taken from zlibmodule.c PyZlib_crc32 (but is PY_SSIZE_T_CLEAN) */
784static PyObject *
785binascii_crc32(PyObject *self, PyObject *args)
786{
787 uLong crc32val = 0; /* crc32(0L, Z_NULL, 0) */
788 Byte *buf;
Neal Norwitz4027bf82008-03-24 04:59:05 +0000789 Py_ssize_t len;
Christian Heimes1dc54002008-03-24 02:19:29 +0000790 if (!PyArg_ParseTuple(args, "s#|I:crc32", &buf, &len, &crc32val))
791 return NULL;
792 crc32val = crc32(crc32val, buf, len);
793 return PyLong_FromUnsignedLong(crc32val & 0xffffffffU);
794}
795#else /* USE_ZLIB_CRC32 */
Guido van Rossum7d47c9e2000-02-16 21:11:52 +0000796/* Crc - 32 BIT ANSI X3.66 CRC checksum files
797 Also known as: ISO 3307
798**********************************************************************|
799* *|
800* Demonstration program to compute the 32-bit CRC used as the frame *|
801* check sequence in ADCCP (ANSI X3.66, also known as FIPS PUB 71 *|
802* and FED-STD-1003, the U.S. versions of CCITT's X.25 link-level *|
803* protocol). The 32-bit FCS was added via the Federal Register, *|
804* 1 June 1982, p.23798. I presume but don't know for certain that *|
805* this polynomial is or will be included in CCITT V.41, which *|
806* defines the 16-bit CRC (often called CRC-CCITT) polynomial. FIPS *|
807* PUB 78 says that the 32-bit FCS reduces otherwise undetected *|
808* errors by a factor of 10^-5 over 16-bit FCS. *|
809* *|
810**********************************************************************|
811
812 Copyright (C) 1986 Gary S. Brown. You may use this program, or
813 code or tables extracted from it, as desired without restriction.
Guido van Rossum7d47c9e2000-02-16 21:11:52 +0000814
Tim Peters934c1a12002-07-02 22:24:50 +0000815 First, the polynomial itself and its table of feedback terms. The
816 polynomial is
817 X^32+X^26+X^23+X^22+X^16+X^12+X^11+X^10+X^8+X^7+X^5+X^4+X^2+X^1+X^0
818 Note that we take it "backwards" and put the highest-order term in
819 the lowest-order bit. The X^32 term is "implied"; the LSB is the
820 X^31 term, etc. The X^0 term (usually shown as "+1") results in
821 the MSB being 1.
Guido van Rossum7d47c9e2000-02-16 21:11:52 +0000822
Tim Peters934c1a12002-07-02 22:24:50 +0000823 Note that the usual hardware shift register implementation, which
824 is what we're using (we're merely optimizing it by doing eight-bit
825 chunks at a time) shifts bits into the lowest-order term. In our
826 implementation, that means shifting towards the right. Why do we
827 do it this way? Because the calculated CRC must be transmitted in
828 order from highest-order term to lowest-order term. UARTs transmit
829 characters in order from LSB to MSB. By storing the CRC this way,
830 we hand it to the UART in the order low-byte to high-byte; the UART
831 sends each low-bit to hight-bit; and the result is transmission bit
832 by bit from highest- to lowest-order term without requiring any bit
833 shuffling on our part. Reception works similarly.
834
835 The feedback terms table consists of 256, 32-bit entries. Notes:
836
837 1. The table can be generated at runtime if desired; code to do so
838 is shown later. It might not be obvious, but the feedback
839 terms simply represent the results of eight shift/xor opera-
840 tions for all combinations of data and CRC register values.
841
842 2. The CRC accumulation logic is the same for all CRC polynomials,
843 be they sixteen or thirty-two bits wide. You simply choose the
844 appropriate table. Alternatively, because the table can be
845 generated at runtime, you can start by generating the table for
846 the polynomial in question and use exactly the same "updcrc",
847 if your application needn't simultaneously handle two CRC
848 polynomials. (Note, however, that XMODEM is strange.)
849
850 3. For 16-bit CRCs, the table entries need be only 16 bits wide;
851 of course, 32-bit entries work OK if the high 16 bits are zero.
852
853 4. The values must be right-shifted by eight bits by the "updcrc"
854 logic; the shift must be unsigned (bring in zeroes). On some
855 hardware you could probably optimize the shift in assembler by
856 using byte-swap instructions.
Guido van Rossum7d47c9e2000-02-16 21:11:52 +0000857********************************************************************/
858
Gregory P. Smith3c0e4d22008-03-25 07:51:12 +0000859static unsigned int crc_32_tab[256] = {
8600x00000000U, 0x77073096U, 0xee0e612cU, 0x990951baU, 0x076dc419U,
8610x706af48fU, 0xe963a535U, 0x9e6495a3U, 0x0edb8832U, 0x79dcb8a4U,
8620xe0d5e91eU, 0x97d2d988U, 0x09b64c2bU, 0x7eb17cbdU, 0xe7b82d07U,
8630x90bf1d91U, 0x1db71064U, 0x6ab020f2U, 0xf3b97148U, 0x84be41deU,
8640x1adad47dU, 0x6ddde4ebU, 0xf4d4b551U, 0x83d385c7U, 0x136c9856U,
8650x646ba8c0U, 0xfd62f97aU, 0x8a65c9ecU, 0x14015c4fU, 0x63066cd9U,
8660xfa0f3d63U, 0x8d080df5U, 0x3b6e20c8U, 0x4c69105eU, 0xd56041e4U,
8670xa2677172U, 0x3c03e4d1U, 0x4b04d447U, 0xd20d85fdU, 0xa50ab56bU,
8680x35b5a8faU, 0x42b2986cU, 0xdbbbc9d6U, 0xacbcf940U, 0x32d86ce3U,
8690x45df5c75U, 0xdcd60dcfU, 0xabd13d59U, 0x26d930acU, 0x51de003aU,
8700xc8d75180U, 0xbfd06116U, 0x21b4f4b5U, 0x56b3c423U, 0xcfba9599U,
8710xb8bda50fU, 0x2802b89eU, 0x5f058808U, 0xc60cd9b2U, 0xb10be924U,
8720x2f6f7c87U, 0x58684c11U, 0xc1611dabU, 0xb6662d3dU, 0x76dc4190U,
8730x01db7106U, 0x98d220bcU, 0xefd5102aU, 0x71b18589U, 0x06b6b51fU,
8740x9fbfe4a5U, 0xe8b8d433U, 0x7807c9a2U, 0x0f00f934U, 0x9609a88eU,
8750xe10e9818U, 0x7f6a0dbbU, 0x086d3d2dU, 0x91646c97U, 0xe6635c01U,
8760x6b6b51f4U, 0x1c6c6162U, 0x856530d8U, 0xf262004eU, 0x6c0695edU,
8770x1b01a57bU, 0x8208f4c1U, 0xf50fc457U, 0x65b0d9c6U, 0x12b7e950U,
8780x8bbeb8eaU, 0xfcb9887cU, 0x62dd1ddfU, 0x15da2d49U, 0x8cd37cf3U,
8790xfbd44c65U, 0x4db26158U, 0x3ab551ceU, 0xa3bc0074U, 0xd4bb30e2U,
8800x4adfa541U, 0x3dd895d7U, 0xa4d1c46dU, 0xd3d6f4fbU, 0x4369e96aU,
8810x346ed9fcU, 0xad678846U, 0xda60b8d0U, 0x44042d73U, 0x33031de5U,
8820xaa0a4c5fU, 0xdd0d7cc9U, 0x5005713cU, 0x270241aaU, 0xbe0b1010U,
8830xc90c2086U, 0x5768b525U, 0x206f85b3U, 0xb966d409U, 0xce61e49fU,
8840x5edef90eU, 0x29d9c998U, 0xb0d09822U, 0xc7d7a8b4U, 0x59b33d17U,
8850x2eb40d81U, 0xb7bd5c3bU, 0xc0ba6cadU, 0xedb88320U, 0x9abfb3b6U,
8860x03b6e20cU, 0x74b1d29aU, 0xead54739U, 0x9dd277afU, 0x04db2615U,
8870x73dc1683U, 0xe3630b12U, 0x94643b84U, 0x0d6d6a3eU, 0x7a6a5aa8U,
8880xe40ecf0bU, 0x9309ff9dU, 0x0a00ae27U, 0x7d079eb1U, 0xf00f9344U,
8890x8708a3d2U, 0x1e01f268U, 0x6906c2feU, 0xf762575dU, 0x806567cbU,
8900x196c3671U, 0x6e6b06e7U, 0xfed41b76U, 0x89d32be0U, 0x10da7a5aU,
8910x67dd4accU, 0xf9b9df6fU, 0x8ebeeff9U, 0x17b7be43U, 0x60b08ed5U,
8920xd6d6a3e8U, 0xa1d1937eU, 0x38d8c2c4U, 0x4fdff252U, 0xd1bb67f1U,
8930xa6bc5767U, 0x3fb506ddU, 0x48b2364bU, 0xd80d2bdaU, 0xaf0a1b4cU,
8940x36034af6U, 0x41047a60U, 0xdf60efc3U, 0xa867df55U, 0x316e8eefU,
8950x4669be79U, 0xcb61b38cU, 0xbc66831aU, 0x256fd2a0U, 0x5268e236U,
8960xcc0c7795U, 0xbb0b4703U, 0x220216b9U, 0x5505262fU, 0xc5ba3bbeU,
8970xb2bd0b28U, 0x2bb45a92U, 0x5cb36a04U, 0xc2d7ffa7U, 0xb5d0cf31U,
8980x2cd99e8bU, 0x5bdeae1dU, 0x9b64c2b0U, 0xec63f226U, 0x756aa39cU,
8990x026d930aU, 0x9c0906a9U, 0xeb0e363fU, 0x72076785U, 0x05005713U,
9000x95bf4a82U, 0xe2b87a14U, 0x7bb12baeU, 0x0cb61b38U, 0x92d28e9bU,
9010xe5d5be0dU, 0x7cdcefb7U, 0x0bdbdf21U, 0x86d3d2d4U, 0xf1d4e242U,
9020x68ddb3f8U, 0x1fda836eU, 0x81be16cdU, 0xf6b9265bU, 0x6fb077e1U,
9030x18b74777U, 0x88085ae6U, 0xff0f6a70U, 0x66063bcaU, 0x11010b5cU,
9040x8f659effU, 0xf862ae69U, 0x616bffd3U, 0x166ccf45U, 0xa00ae278U,
9050xd70dd2eeU, 0x4e048354U, 0x3903b3c2U, 0xa7672661U, 0xd06016f7U,
9060x4969474dU, 0x3e6e77dbU, 0xaed16a4aU, 0xd9d65adcU, 0x40df0b66U,
9070x37d83bf0U, 0xa9bcae53U, 0xdebb9ec5U, 0x47b2cf7fU, 0x30b5ffe9U,
9080xbdbdf21cU, 0xcabac28aU, 0x53b39330U, 0x24b4a3a6U, 0xbad03605U,
9090xcdd70693U, 0x54de5729U, 0x23d967bfU, 0xb3667a2eU, 0xc4614ab8U,
9100x5d681b02U, 0x2a6f2b94U, 0xb40bbe37U, 0xc30c8ea1U, 0x5a05df1bU,
9110x2d02ef8dU
Guido van Rossum7d47c9e2000-02-16 21:11:52 +0000912};
913
914static PyObject *
Peter Schneider-Kampa788a7f2000-07-10 09:57:19 +0000915binascii_crc32(PyObject *self, PyObject *args)
Guido van Rossum7d47c9e2000-02-16 21:11:52 +0000916{ /* By Jim Ahlstrom; All rights transferred to CNRI */
917 unsigned char *bin_data;
Gregory P. Smith27275032008-03-20 06:20:09 +0000918 unsigned int crc = 0; /* initial value of CRC */
Thomas Woutersf98db652006-03-01 21:37:32 +0000919 Py_ssize_t len;
Gregory P. Smith27275032008-03-20 06:20:09 +0000920 unsigned int result;
Tim Peters934c1a12002-07-02 22:24:50 +0000921
Gregory P. Smith27275032008-03-20 06:20:09 +0000922 if ( !PyArg_ParseTuple(args, "s#|I:crc32", &bin_data, &len, &crc) )
Guido van Rossum7d47c9e2000-02-16 21:11:52 +0000923 return NULL;
924
Tim Peters934c1a12002-07-02 22:24:50 +0000925 crc = ~ crc;
Gregory P. Smith27275032008-03-20 06:20:09 +0000926 while (len--) {
927 crc = crc_32_tab[(crc ^ *bin_data++) & 0xff] ^ (crc >> 8);
Guido van Rossum7d47c9e2000-02-16 21:11:52 +0000928 /* Note: (crc >> 8) MUST zero fill on left */
Gregory P. Smith27275032008-03-20 06:20:09 +0000929 }
Tim Petersa98011c2002-07-02 20:20:08 +0000930
Gregory P. Smith27275032008-03-20 06:20:09 +0000931 result = (crc ^ 0xFFFFFFFF);
932 return PyLong_FromUnsignedLong(result & 0xffffffff);
Guido van Rossum7d47c9e2000-02-16 21:11:52 +0000933}
Christian Heimes1dc54002008-03-24 02:19:29 +0000934#endif /* USE_ZLIB_CRC32 */
Guido van Rossum7d47c9e2000-02-16 21:11:52 +0000935
Barry Warsawe977c212000-08-15 06:07:13 +0000936
937static PyObject *
938binascii_hexlify(PyObject *self, PyObject *args)
939{
940 char* argbuf;
Thomas Woutersf98db652006-03-01 21:37:32 +0000941 Py_ssize_t arglen;
Barry Warsawe977c212000-08-15 06:07:13 +0000942 PyObject *retval;
943 char* retbuf;
Thomas Woutersf98db652006-03-01 21:37:32 +0000944 Py_ssize_t i, j;
Barry Warsawe977c212000-08-15 06:07:13 +0000945
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000946 if (!PyArg_ParseTuple(args, "s#:b2a_hex", &argbuf, &arglen))
Barry Warsawe977c212000-08-15 06:07:13 +0000947 return NULL;
948
Guido van Rossum98297ee2007-11-06 21:34:58 +0000949 retval = PyString_FromStringAndSize(NULL, arglen*2);
Barry Warsawe977c212000-08-15 06:07:13 +0000950 if (!retval)
951 return NULL;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000952 retbuf = PyString_AS_STRING(retval);
Barry Warsawe977c212000-08-15 06:07:13 +0000953
954 /* make hex version of string, taken from shamodule.c */
955 for (i=j=0; i < arglen; i++) {
956 char c;
957 c = (argbuf[i] >> 4) & 0xf;
958 c = (c>9) ? c+'a'-10 : c + '0';
959 retbuf[j++] = c;
960 c = argbuf[i] & 0xf;
961 c = (c>9) ? c+'a'-10 : c + '0';
962 retbuf[j++] = c;
963 }
964 return retval;
Barry Warsawe977c212000-08-15 06:07:13 +0000965}
966
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000967PyDoc_STRVAR(doc_hexlify,
Barry Warsawe977c212000-08-15 06:07:13 +0000968"b2a_hex(data) -> s; Hexadecimal representation of binary data.\n\
969\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000970This function is also available as \"hexlify()\".");
Barry Warsawe977c212000-08-15 06:07:13 +0000971
972
973static int
Tim Peters934c1a12002-07-02 22:24:50 +0000974to_int(int c)
Barry Warsawe977c212000-08-15 06:07:13 +0000975{
976 if (isdigit(c))
977 return c - '0';
978 else {
979 if (isupper(c))
980 c = tolower(c);
981 if (c >= 'a' && c <= 'f')
982 return c - 'a' + 10;
983 }
984 return -1;
985}
986
987
988static PyObject *
989binascii_unhexlify(PyObject *self, PyObject *args)
990{
991 char* argbuf;
Thomas Woutersf98db652006-03-01 21:37:32 +0000992 Py_ssize_t arglen;
Barry Warsawe977c212000-08-15 06:07:13 +0000993 PyObject *retval;
994 char* retbuf;
Thomas Woutersf98db652006-03-01 21:37:32 +0000995 Py_ssize_t i, j;
Barry Warsawe977c212000-08-15 06:07:13 +0000996
997 if (!PyArg_ParseTuple(args, "s#:a2b_hex", &argbuf, &arglen))
998 return NULL;
999
Barry Warsaw16168472000-08-15 06:59:58 +00001000 /* XXX What should we do about strings with an odd length? Should
1001 * we add an implicit leading zero, or a trailing zero? For now,
1002 * raise an exception.
Barry Warsawe977c212000-08-15 06:07:13 +00001003 */
1004 if (arglen % 2) {
Guido van Rossum4581ae52007-05-22 21:56:47 +00001005 PyErr_SetString(Error, "Odd-length string");
Barry Warsawe977c212000-08-15 06:07:13 +00001006 return NULL;
1007 }
1008
Guido van Rossum98297ee2007-11-06 21:34:58 +00001009 retval = PyString_FromStringAndSize(NULL, (arglen/2));
Barry Warsawe977c212000-08-15 06:07:13 +00001010 if (!retval)
1011 return NULL;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001012 retbuf = PyString_AS_STRING(retval);
Barry Warsawe977c212000-08-15 06:07:13 +00001013
1014 for (i=j=0; i < arglen; i += 2) {
1015 int top = to_int(Py_CHARMASK(argbuf[i]));
1016 int bot = to_int(Py_CHARMASK(argbuf[i+1]));
1017 if (top == -1 || bot == -1) {
Guido van Rossum4581ae52007-05-22 21:56:47 +00001018 PyErr_SetString(Error,
Barry Warsaw16168472000-08-15 06:59:58 +00001019 "Non-hexadecimal digit found");
Barry Warsawe977c212000-08-15 06:07:13 +00001020 goto finally;
1021 }
1022 retbuf[j++] = (top << 4) + bot;
1023 }
1024 return retval;
1025
1026 finally:
1027 Py_DECREF(retval);
1028 return NULL;
1029}
1030
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001031PyDoc_STRVAR(doc_unhexlify,
Barry Warsawe977c212000-08-15 06:07:13 +00001032"a2b_hex(hexstr) -> s; Binary data of hexadecimal representation.\n\
1033\n\
1034hexstr must contain an even number of hex digits (upper or lower case).\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001035This function is also available as \"unhexlify()\"");
Barry Warsawe977c212000-08-15 06:07:13 +00001036
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001037static int table_hex[128] = {
1038 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1039 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1040 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1041 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,-1,-1, -1,-1,-1,-1,
1042 -1,10,11,12, 13,14,15,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1043 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1044 -1,10,11,12, 13,14,15,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1045 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1
1046};
1047
1048#define hexval(c) table_hex[(unsigned int)(c)]
1049
1050#define MAXLINESIZE 76
1051
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001052PyDoc_STRVAR(doc_a2b_qp, "Decode a string of qp-encoded data");
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001053
Tim Peters934c1a12002-07-02 22:24:50 +00001054static PyObject*
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001055binascii_a2b_qp(PyObject *self, PyObject *args, PyObject *kwargs)
1056{
Thomas Wouters7087f782006-03-01 23:10:05 +00001057 Py_ssize_t in, out;
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001058 char ch;
1059 unsigned char *data, *odata;
Thomas Woutersf98db652006-03-01 21:37:32 +00001060 Py_ssize_t datalen = 0;
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001061 PyObject *rv;
Martin v. Löwis15e62742006-02-27 16:46:16 +00001062 static char *kwlist[] = {"data", "header", NULL};
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001063 int header = 0;
1064
Tim Peters934c1a12002-07-02 22:24:50 +00001065 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s#|i", kwlist, &data,
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001066 &datalen, &header))
1067 return NULL;
1068
Barry Warsaw23164a52004-05-11 02:05:11 +00001069 /* We allocate the output same size as input, this is overkill.
1070 * The previous implementation used calloc() so we'll zero out the
1071 * memory here too, since PyMem_Malloc() does not guarantee that.
1072 */
1073 odata = (unsigned char *) PyMem_Malloc(datalen);
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001074 if (odata == NULL) {
1075 PyErr_NoMemory();
1076 return NULL;
1077 }
Martin v. Löwisb600fe92004-07-27 15:03:53 +00001078 memset(odata, 0, datalen);
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001079
1080 in = out = 0;
1081 while (in < datalen) {
1082 if (data[in] == '=') {
1083 in++;
1084 if (in >= datalen) break;
1085 /* Soft line breaks */
Thomas Wouters89f507f2006-12-13 04:49:30 +00001086 if ((data[in] == '\n') || (data[in] == '\r')) {
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001087 if (data[in] != '\n') {
1088 while (in < datalen && data[in] != '\n') in++;
1089 }
1090 if (in < datalen) in++;
1091 }
1092 else if (data[in] == '=') {
1093 /* broken case from broken python qp */
1094 odata[out++] = '=';
1095 in++;
1096 }
Tim Peters934c1a12002-07-02 22:24:50 +00001097 else if (((data[in] >= 'A' && data[in] <= 'F') ||
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001098 (data[in] >= 'a' && data[in] <= 'f') ||
1099 (data[in] >= '0' && data[in] <= '9')) &&
1100 ((data[in+1] >= 'A' && data[in+1] <= 'F') ||
1101 (data[in+1] >= 'a' && data[in+1] <= 'f') ||
1102 (data[in+1] >= '0' && data[in+1] <= '9'))) {
1103 /* hexval */
1104 ch = hexval(data[in]) << 4;
1105 in++;
1106 ch |= hexval(data[in]);
1107 in++;
1108 odata[out++] = ch;
1109 }
1110 else {
1111 odata[out++] = '=';
1112 }
1113 }
1114 else if (header && data[in] == '_') {
1115 odata[out++] = ' ';
1116 in++;
1117 }
1118 else {
1119 odata[out] = data[in];
1120 in++;
1121 out++;
1122 }
1123 }
Guido van Rossum98297ee2007-11-06 21:34:58 +00001124 if ((rv = PyString_FromStringAndSize((char *)odata, out)) == NULL) {
Barry Warsaw23164a52004-05-11 02:05:11 +00001125 PyMem_Free(odata);
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001126 return NULL;
1127 }
Barry Warsaw23164a52004-05-11 02:05:11 +00001128 PyMem_Free(odata);
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001129 return rv;
1130}
1131
Tim Peters934c1a12002-07-02 22:24:50 +00001132static int
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001133to_hex (unsigned char ch, unsigned char *s)
1134{
1135 unsigned int uvalue = ch;
1136
1137 s[1] = "0123456789ABCDEF"[uvalue % 16];
1138 uvalue = (uvalue / 16);
1139 s[0] = "0123456789ABCDEF"[uvalue % 16];
1140 return 0;
1141}
1142
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001143PyDoc_STRVAR(doc_b2a_qp,
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001144"b2a_qp(data, quotetabs=0, istext=1, header=0) -> s; \n\
1145 Encode a string using quoted-printable encoding. \n\
1146\n\
1147On encoding, when istext is set, newlines are not encoded, and white \n\
1148space at end of lines is. When istext is not set, \\r and \\n (CR/LF) are \n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001149both encoded. When quotetabs is set, space and tabs are encoded.");
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001150
1151/* XXX: This is ridiculously complicated to be backward compatible
1152 * (mostly) with the quopri module. It doesn't re-create the quopri
1153 * module bug where text ending in CRLF has the CR encoded */
Tim Peters934c1a12002-07-02 22:24:50 +00001154static PyObject*
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001155binascii_b2a_qp (PyObject *self, PyObject *args, PyObject *kwargs)
1156{
Thomas Wouters7087f782006-03-01 23:10:05 +00001157 Py_ssize_t in, out;
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001158 unsigned char *data, *odata;
Thomas Woutersf98db652006-03-01 21:37:32 +00001159 Py_ssize_t datalen = 0, odatalen = 0;
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001160 PyObject *rv;
1161 unsigned int linelen = 0;
Martin v. Löwis15e62742006-02-27 16:46:16 +00001162 static char *kwlist[] = {"data", "quotetabs", "istext",
Jeremy Hyltonaf68c872005-12-10 18:50:16 +00001163 "header", NULL};
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001164 int istext = 1;
1165 int quotetabs = 0;
1166 int header = 0;
1167 unsigned char ch;
1168 int crlf = 0;
1169 unsigned char *p;
1170
Tim Peters934c1a12002-07-02 22:24:50 +00001171 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s#|iii", kwlist, &data,
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001172 &datalen, &quotetabs, &istext, &header))
1173 return NULL;
1174
1175 /* See if this string is using CRLF line ends */
1176 /* XXX: this function has the side effect of converting all of
1177 * the end of lines to be the same depending on this detection
1178 * here */
Walter Dörwald0925e412007-05-09 18:23:50 +00001179 p = (unsigned char *) memchr(data, '\n', datalen);
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001180 if ((p != NULL) && (p > data) && (*(p-1) == '\r'))
1181 crlf = 1;
1182
1183 /* First, scan to see how many characters need to be encoded */
1184 in = 0;
1185 while (in < datalen) {
Tim Peters934c1a12002-07-02 22:24:50 +00001186 if ((data[in] > 126) ||
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001187 (data[in] == '=') ||
1188 (header && data[in] == '_') ||
Guido van Rossumd8faa362007-04-27 19:54:29 +00001189 ((data[in] == '.') && (linelen == 0) &&
1190 (data[in+1] == '\n' || data[in+1] == '\r' || data[in+1] == 0)) ||
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001191 (!istext && ((data[in] == '\r') || (data[in] == '\n'))) ||
1192 ((data[in] == '\t' || data[in] == ' ') && (in + 1 == datalen)) ||
Tim Peters934c1a12002-07-02 22:24:50 +00001193 ((data[in] < 33) &&
1194 (data[in] != '\r') && (data[in] != '\n') &&
Guido van Rossumd8faa362007-04-27 19:54:29 +00001195 (quotetabs ||
1196 (!quotetabs && ((data[in] != '\t') && (data[in] != ' '))))))
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001197 {
1198 if ((linelen + 3) >= MAXLINESIZE) {
1199 linelen = 0;
1200 if (crlf)
1201 odatalen += 3;
1202 else
1203 odatalen += 2;
1204 }
1205 linelen += 3;
1206 odatalen += 3;
1207 in++;
1208 }
1209 else {
Tim Peters934c1a12002-07-02 22:24:50 +00001210 if (istext &&
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001211 ((data[in] == '\n') ||
1212 ((in+1 < datalen) && (data[in] == '\r') &&
1213 (data[in+1] == '\n'))))
1214 {
1215 linelen = 0;
1216 /* Protect against whitespace on end of line */
1217 if (in && ((data[in-1] == ' ') || (data[in-1] == '\t')))
1218 odatalen += 2;
1219 if (crlf)
1220 odatalen += 2;
1221 else
1222 odatalen += 1;
1223 if (data[in] == '\r')
1224 in += 2;
1225 else
1226 in++;
1227 }
1228 else {
Tim Peters934c1a12002-07-02 22:24:50 +00001229 if ((in + 1 != datalen) &&
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001230 (data[in+1] != '\n') &&
1231 (linelen + 1) >= MAXLINESIZE) {
1232 linelen = 0;
1233 if (crlf)
1234 odatalen += 3;
1235 else
1236 odatalen += 2;
1237 }
1238 linelen++;
1239 odatalen++;
1240 in++;
1241 }
1242 }
1243 }
1244
Barry Warsaw23164a52004-05-11 02:05:11 +00001245 /* We allocate the output same size as input, this is overkill.
1246 * The previous implementation used calloc() so we'll zero out the
1247 * memory here too, since PyMem_Malloc() does not guarantee that.
1248 */
1249 odata = (unsigned char *) PyMem_Malloc(odatalen);
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001250 if (odata == NULL) {
1251 PyErr_NoMemory();
1252 return NULL;
1253 }
Martin v. Löwisb600fe92004-07-27 15:03:53 +00001254 memset(odata, 0, odatalen);
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001255
1256 in = out = linelen = 0;
1257 while (in < datalen) {
Tim Peters934c1a12002-07-02 22:24:50 +00001258 if ((data[in] > 126) ||
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001259 (data[in] == '=') ||
1260 (header && data[in] == '_') ||
Guido van Rossumd8faa362007-04-27 19:54:29 +00001261 ((data[in] == '.') && (linelen == 0) &&
1262 (data[in+1] == '\n' || data[in+1] == '\r' || data[in+1] == 0)) ||
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001263 (!istext && ((data[in] == '\r') || (data[in] == '\n'))) ||
1264 ((data[in] == '\t' || data[in] == ' ') && (in + 1 == datalen)) ||
Tim Peters934c1a12002-07-02 22:24:50 +00001265 ((data[in] < 33) &&
1266 (data[in] != '\r') && (data[in] != '\n') &&
Guido van Rossumd8faa362007-04-27 19:54:29 +00001267 (quotetabs ||
1268 (!quotetabs && ((data[in] != '\t') && (data[in] != ' '))))))
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001269 {
1270 if ((linelen + 3 )>= MAXLINESIZE) {
1271 odata[out++] = '=';
1272 if (crlf) odata[out++] = '\r';
1273 odata[out++] = '\n';
1274 linelen = 0;
1275 }
1276 odata[out++] = '=';
1277 to_hex(data[in], &odata[out]);
1278 out += 2;
1279 in++;
1280 linelen += 3;
1281 }
1282 else {
Tim Peters934c1a12002-07-02 22:24:50 +00001283 if (istext &&
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001284 ((data[in] == '\n') ||
1285 ((in+1 < datalen) && (data[in] == '\r') &&
1286 (data[in+1] == '\n'))))
1287 {
1288 linelen = 0;
1289 /* Protect against whitespace on end of line */
1290 if (out && ((odata[out-1] == ' ') || (odata[out-1] == '\t'))) {
1291 ch = odata[out-1];
1292 odata[out-1] = '=';
1293 to_hex(ch, &odata[out]);
1294 out += 2;
1295 }
Tim Peters934c1a12002-07-02 22:24:50 +00001296
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001297 if (crlf) odata[out++] = '\r';
1298 odata[out++] = '\n';
1299 if (data[in] == '\r')
1300 in += 2;
1301 else
1302 in++;
1303 }
1304 else {
Tim Peters934c1a12002-07-02 22:24:50 +00001305 if ((in + 1 != datalen) &&
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001306 (data[in+1] != '\n') &&
1307 (linelen + 1) >= MAXLINESIZE) {
1308 odata[out++] = '=';
1309 if (crlf) odata[out++] = '\r';
1310 odata[out++] = '\n';
1311 linelen = 0;
1312 }
1313 linelen++;
1314 if (header && data[in] == ' ') {
1315 odata[out++] = '_';
1316 in++;
1317 }
1318 else {
1319 odata[out++] = data[in++];
1320 }
1321 }
1322 }
1323 }
Guido van Rossum98297ee2007-11-06 21:34:58 +00001324 if ((rv = PyString_FromStringAndSize((char *)odata, out)) == NULL) {
Barry Warsaw23164a52004-05-11 02:05:11 +00001325 PyMem_Free(odata);
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001326 return NULL;
1327 }
Barry Warsaw23164a52004-05-11 02:05:11 +00001328 PyMem_Free(odata);
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001329 return rv;
1330}
Barry Warsawe977c212000-08-15 06:07:13 +00001331
Jack Jansen72781191995-08-07 14:34:15 +00001332/* List of functions defined in the module */
1333
1334static struct PyMethodDef binascii_module_methods[] = {
Barry Warsawe977c212000-08-15 06:07:13 +00001335 {"a2b_uu", binascii_a2b_uu, METH_VARARGS, doc_a2b_uu},
1336 {"b2a_uu", binascii_b2a_uu, METH_VARARGS, doc_b2a_uu},
1337 {"a2b_base64", binascii_a2b_base64, METH_VARARGS, doc_a2b_base64},
1338 {"b2a_base64", binascii_b2a_base64, METH_VARARGS, doc_b2a_base64},
1339 {"a2b_hqx", binascii_a2b_hqx, METH_VARARGS, doc_a2b_hqx},
1340 {"b2a_hqx", binascii_b2a_hqx, METH_VARARGS, doc_b2a_hqx},
1341 {"b2a_hex", binascii_hexlify, METH_VARARGS, doc_hexlify},
1342 {"a2b_hex", binascii_unhexlify, METH_VARARGS, doc_unhexlify},
1343 {"hexlify", binascii_hexlify, METH_VARARGS, doc_hexlify},
1344 {"unhexlify", binascii_unhexlify, METH_VARARGS, doc_unhexlify},
1345 {"rlecode_hqx", binascii_rlecode_hqx, METH_VARARGS, doc_rlecode_hqx},
1346 {"rledecode_hqx", binascii_rledecode_hqx, METH_VARARGS,
1347 doc_rledecode_hqx},
1348 {"crc_hqx", binascii_crc_hqx, METH_VARARGS, doc_crc_hqx},
1349 {"crc32", binascii_crc32, METH_VARARGS, doc_crc32},
Tim Peters934c1a12002-07-02 22:24:50 +00001350 {"a2b_qp", (PyCFunction)binascii_a2b_qp, METH_VARARGS | METH_KEYWORDS,
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001351 doc_a2b_qp},
Tim Peters934c1a12002-07-02 22:24:50 +00001352 {"b2a_qp", (PyCFunction)binascii_b2a_qp, METH_VARARGS | METH_KEYWORDS,
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001353 doc_b2a_qp},
Barry Warsawe977c212000-08-15 06:07:13 +00001354 {NULL, NULL} /* sentinel */
Jack Jansen72781191995-08-07 14:34:15 +00001355};
1356
1357
1358/* Initialization function for the module (*must* be called initbinascii) */
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001359PyDoc_STRVAR(doc_binascii, "Conversion between binary data and ASCII");
Jack Jansen72781191995-08-07 14:34:15 +00001360
Mark Hammondfe51c6d2002-08-02 02:27:13 +00001361PyMODINIT_FUNC
Thomas Woutersf3f33dc2000-07-21 06:00:07 +00001362initbinascii(void)
Jack Jansen72781191995-08-07 14:34:15 +00001363{
Guido van Rossumfe096d22007-08-23 18:31:24 +00001364 PyObject *m, *d;
Jack Jansen72781191995-08-07 14:34:15 +00001365
1366 /* Create the module and add the functions */
Neal Norwitz56dc9552007-08-23 17:55:33 +00001367 m = Py_InitModule3("binascii", binascii_module_methods, doc_binascii);
Neal Norwitz1ac754f2006-01-19 06:09:39 +00001368 if (m == NULL)
1369 return;
Jack Jansen72781191995-08-07 14:34:15 +00001370
1371 d = PyModule_GetDict(m);
Jack Jansen72781191995-08-07 14:34:15 +00001372
Guido van Rossum4581ae52007-05-22 21:56:47 +00001373 Error = PyErr_NewException("binascii.Error", PyExc_ValueError, NULL);
Jack Jansen72781191995-08-07 14:34:15 +00001374 PyDict_SetItemString(d, "Error", Error);
Guido van Rossum7dbb48a1997-10-08 15:26:07 +00001375 Incomplete = PyErr_NewException("binascii.Incomplete", NULL, NULL);
Jack Jansen72781191995-08-07 14:34:15 +00001376 PyDict_SetItemString(d, "Incomplete", Incomplete);
Jack Jansen72781191995-08-07 14:34:15 +00001377}