blob: a374dc7cbfc44257e48bb587c4d8821da3443035 [file] [log] [blame]
Jack Jansen72781191995-08-07 14:34:15 +00001/*
2** Routines to represent binary data in ASCII and vice-versa
3**
4** This module currently supports the following encodings:
5** uuencode:
6** each line encodes 45 bytes (except possibly the last)
7** First char encodes (binary) length, rest data
8** each char encodes 6 bits, as follows:
9** binary: 01234567 abcdefgh ijklmnop
10** ascii: 012345 67abcd efghij klmnop
11** ASCII encoding method is "excess-space": 000000 is encoded as ' ', etc.
12** short binary data is zero-extended (so the bits are always in the
13** right place), this does *not* reflect in the length.
Jack Jansen84bbc2e1995-10-04 16:38:44 +000014** base64:
15** Line breaks are insignificant, but lines are at most 76 chars
16** each char encodes 6 bits, in similar order as uucode/hqx. Encoding
17** is done via a table.
18** Short binary data is filled (in ASCII) with '='.
Jack Jansen72781191995-08-07 14:34:15 +000019** hqx:
20** File starts with introductory text, real data starts and ends
21** with colons.
22** Data consists of three similar parts: info, datafork, resourcefork.
23** Each part is protected (at the end) with a 16-bit crc
24** The binary data is run-length encoded, and then ascii-fied:
25** binary: 01234567 abcdefgh ijklmnop
26** ascii: 012345 67abcd efghij klmnop
27** ASCII encoding is table-driven, see the code.
28** Short binary data results in the runt ascii-byte being output with
29** the bits in the right place.
30**
31** While I was reading dozens of programs that encode or decode the formats
32** here (documentation? hihi:-) I have formulated Jansen's Observation:
33**
34** Programs that encode binary data in ASCII are written in
35** such a style that they are as unreadable as possible. Devices used
36** include unnecessary global variables, burying important tables
37** in unrelated sourcefiles, putting functions in include files,
38** using seemingly-descriptive variable names for different purposes,
39** calls to empty subroutines and a host of others.
40**
41** I have attempted to break with this tradition, but I guess that that
42** does make the performance sub-optimal. Oh well, too bad...
43**
44** Jack Jansen, CWI, July 1995.
Tim Peters934c1a12002-07-02 22:24:50 +000045**
Martin v. Löwis16dc7f42001-09-30 20:32:11 +000046** Added support for quoted-printable encoding, based on rfc 1521 et al
Tim Peters934c1a12002-07-02 22:24:50 +000047** quoted-printable encoding specifies that non printable characters (anything
Martin v. Löwis16dc7f42001-09-30 20:32:11 +000048** below 32 and above 126) be encoded as =XX where XX is the hexadecimal value
49** of the character. It also specifies some other behavior to enable 8bit data
Tim Peters934c1a12002-07-02 22:24:50 +000050** in a mail message with little difficulty (maximum line sizes, protecting
51** some cases of whitespace, etc).
Martin v. Löwis16dc7f42001-09-30 20:32:11 +000052**
53** Brandon Long, September 2001.
Jack Jansen72781191995-08-07 14:34:15 +000054*/
55
56
57#include "Python.h"
58
59static PyObject *Error;
60static PyObject *Incomplete;
61
62/*
63** hqx lookup table, ascii->binary.
64*/
65
66#define RUNCHAR 0x90
67
68#define DONE 0x7F
69#define SKIP 0x7E
70#define FAIL 0x7D
71
72static unsigned char table_a2b_hqx[256] = {
73/* ^@ ^A ^B ^C ^D ^E ^F ^G */
74/* 0*/ FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
75/* \b \t \n ^K ^L \r ^N ^O */
76/* 1*/ FAIL, FAIL, SKIP, FAIL, FAIL, SKIP, FAIL, FAIL,
77/* ^P ^Q ^R ^S ^T ^U ^V ^W */
78/* 2*/ FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
79/* ^X ^Y ^Z ^[ ^\ ^] ^^ ^_ */
80/* 3*/ FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
81/* ! " # $ % & ' */
82/* 4*/ FAIL, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
83/* ( ) * + , - . / */
84/* 5*/ 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, FAIL, FAIL,
85/* 0 1 2 3 4 5 6 7 */
86/* 6*/ 0x0D, 0x0E, 0x0F, 0x10, 0x11, 0x12, 0x13, FAIL,
87/* 8 9 : ; < = > ? */
88/* 7*/ 0x14, 0x15, DONE, FAIL, FAIL, FAIL, FAIL, FAIL,
89/* @ A B C D E F G */
90/* 8*/ 0x16, 0x17, 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D,
91/* H I J K L M N O */
92/* 9*/ 0x1E, 0x1F, 0x20, 0x21, 0x22, 0x23, 0x24, FAIL,
93/* P Q R S T U V W */
94/*10*/ 0x25, 0x26, 0x27, 0x28, 0x29, 0x2A, 0x2B, FAIL,
95/* X Y Z [ \ ] ^ _ */
96/*11*/ 0x2C, 0x2D, 0x2E, 0x2F, FAIL, FAIL, FAIL, FAIL,
97/* ` a b c d e f g */
98/*12*/ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, FAIL,
99/* h i j k l m n o */
100/*13*/ 0x37, 0x38, 0x39, 0x3A, 0x3B, 0x3C, FAIL, FAIL,
101/* p q r s t u v w */
102/*14*/ 0x3D, 0x3E, 0x3F, FAIL, FAIL, FAIL, FAIL, FAIL,
103/* x y z { | } ~ ^? */
104/*15*/ FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
105/*16*/ FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
106 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
107 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
108 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
109 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
110 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
111 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
112 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
113 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
114 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
115 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
116 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
117 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
118 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
119 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
120 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
121};
122
123static unsigned char table_b2a_hqx[] =
Roger E. Masse5f4ce181997-01-16 17:10:22 +0000124"!\"#$%&'()*+,-012345689@ABCDEFGHIJKLMNPQRSTUVXYZ[`abcdefhijklmpqr";
Jack Jansen72781191995-08-07 14:34:15 +0000125
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000126static char table_a2b_base64[] = {
127 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
128 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
129 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,62, -1,-1,-1,63,
130 52,53,54,55, 56,57,58,59, 60,61,-1,-1, -1, 0,-1,-1, /* Note PAD->0 */
131 -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10, 11,12,13,14,
132 15,16,17,18, 19,20,21,22, 23,24,25,-1, -1,-1,-1,-1,
133 -1,26,27,28, 29,30,31,32, 33,34,35,36, 37,38,39,40,
134 41,42,43,44, 45,46,47,48, 49,50,51,-1, -1,-1,-1,-1
135};
136
137#define BASE64_PAD '='
Guido van Rossum355bc0c2001-10-30 03:00:52 +0000138
139/* Max binary chunk size; limited only by available memory */
Tim Peters1fbb5772001-12-19 04:41:35 +0000140#define BASE64_MAXBIN (INT_MAX/2 - sizeof(PyStringObject) - 3)
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000141
142static unsigned char table_b2a_base64[] =
Roger E. Masse5f4ce181997-01-16 17:10:22 +0000143"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000144
145
146
Jack Jansen72781191995-08-07 14:34:15 +0000147static unsigned short crctab_hqx[256] = {
Roger E. Masse5f4ce181997-01-16 17:10:22 +0000148 0x0000, 0x1021, 0x2042, 0x3063, 0x4084, 0x50a5, 0x60c6, 0x70e7,
149 0x8108, 0x9129, 0xa14a, 0xb16b, 0xc18c, 0xd1ad, 0xe1ce, 0xf1ef,
150 0x1231, 0x0210, 0x3273, 0x2252, 0x52b5, 0x4294, 0x72f7, 0x62d6,
151 0x9339, 0x8318, 0xb37b, 0xa35a, 0xd3bd, 0xc39c, 0xf3ff, 0xe3de,
152 0x2462, 0x3443, 0x0420, 0x1401, 0x64e6, 0x74c7, 0x44a4, 0x5485,
153 0xa56a, 0xb54b, 0x8528, 0x9509, 0xe5ee, 0xf5cf, 0xc5ac, 0xd58d,
154 0x3653, 0x2672, 0x1611, 0x0630, 0x76d7, 0x66f6, 0x5695, 0x46b4,
155 0xb75b, 0xa77a, 0x9719, 0x8738, 0xf7df, 0xe7fe, 0xd79d, 0xc7bc,
156 0x48c4, 0x58e5, 0x6886, 0x78a7, 0x0840, 0x1861, 0x2802, 0x3823,
157 0xc9cc, 0xd9ed, 0xe98e, 0xf9af, 0x8948, 0x9969, 0xa90a, 0xb92b,
158 0x5af5, 0x4ad4, 0x7ab7, 0x6a96, 0x1a71, 0x0a50, 0x3a33, 0x2a12,
159 0xdbfd, 0xcbdc, 0xfbbf, 0xeb9e, 0x9b79, 0x8b58, 0xbb3b, 0xab1a,
160 0x6ca6, 0x7c87, 0x4ce4, 0x5cc5, 0x2c22, 0x3c03, 0x0c60, 0x1c41,
161 0xedae, 0xfd8f, 0xcdec, 0xddcd, 0xad2a, 0xbd0b, 0x8d68, 0x9d49,
162 0x7e97, 0x6eb6, 0x5ed5, 0x4ef4, 0x3e13, 0x2e32, 0x1e51, 0x0e70,
163 0xff9f, 0xefbe, 0xdfdd, 0xcffc, 0xbf1b, 0xaf3a, 0x9f59, 0x8f78,
164 0x9188, 0x81a9, 0xb1ca, 0xa1eb, 0xd10c, 0xc12d, 0xf14e, 0xe16f,
165 0x1080, 0x00a1, 0x30c2, 0x20e3, 0x5004, 0x4025, 0x7046, 0x6067,
166 0x83b9, 0x9398, 0xa3fb, 0xb3da, 0xc33d, 0xd31c, 0xe37f, 0xf35e,
167 0x02b1, 0x1290, 0x22f3, 0x32d2, 0x4235, 0x5214, 0x6277, 0x7256,
168 0xb5ea, 0xa5cb, 0x95a8, 0x8589, 0xf56e, 0xe54f, 0xd52c, 0xc50d,
169 0x34e2, 0x24c3, 0x14a0, 0x0481, 0x7466, 0x6447, 0x5424, 0x4405,
170 0xa7db, 0xb7fa, 0x8799, 0x97b8, 0xe75f, 0xf77e, 0xc71d, 0xd73c,
171 0x26d3, 0x36f2, 0x0691, 0x16b0, 0x6657, 0x7676, 0x4615, 0x5634,
172 0xd94c, 0xc96d, 0xf90e, 0xe92f, 0x99c8, 0x89e9, 0xb98a, 0xa9ab,
173 0x5844, 0x4865, 0x7806, 0x6827, 0x18c0, 0x08e1, 0x3882, 0x28a3,
174 0xcb7d, 0xdb5c, 0xeb3f, 0xfb1e, 0x8bf9, 0x9bd8, 0xabbb, 0xbb9a,
175 0x4a75, 0x5a54, 0x6a37, 0x7a16, 0x0af1, 0x1ad0, 0x2ab3, 0x3a92,
176 0xfd2e, 0xed0f, 0xdd6c, 0xcd4d, 0xbdaa, 0xad8b, 0x9de8, 0x8dc9,
177 0x7c26, 0x6c07, 0x5c64, 0x4c45, 0x3ca2, 0x2c83, 0x1ce0, 0x0cc1,
178 0xef1f, 0xff3e, 0xcf5d, 0xdf7c, 0xaf9b, 0xbfba, 0x8fd9, 0x9ff8,
179 0x6e17, 0x7e36, 0x4e55, 0x5e74, 0x2e93, 0x3eb2, 0x0ed1, 0x1ef0,
Jack Jansen72781191995-08-07 14:34:15 +0000180};
181
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000182PyDoc_STRVAR(doc_a2b_uu, "(ascii) -> bin. Decode a line of uuencoded data");
Jack Jansen72781191995-08-07 14:34:15 +0000183
184static PyObject *
Peter Schneider-Kampa788a7f2000-07-10 09:57:19 +0000185binascii_a2b_uu(PyObject *self, PyObject *args)
Jack Jansen72781191995-08-07 14:34:15 +0000186{
187 unsigned char *ascii_data, *bin_data;
188 int leftbits = 0;
189 unsigned char this_ch;
190 unsigned int leftchar = 0;
191 PyObject *rv;
192 int ascii_len, bin_len;
Tim Peters934c1a12002-07-02 22:24:50 +0000193
Guido van Rossum43713e52000-02-29 13:59:29 +0000194 if ( !PyArg_ParseTuple(args, "t#:a2b_uu", &ascii_data, &ascii_len) )
Jack Jansen72781191995-08-07 14:34:15 +0000195 return NULL;
196
197 /* First byte: binary data length (in bytes) */
198 bin_len = (*ascii_data++ - ' ') & 077;
199 ascii_len--;
200
201 /* Allocate the buffer */
202 if ( (rv=PyString_FromStringAndSize(NULL, bin_len)) == NULL )
203 return NULL;
204 bin_data = (unsigned char *)PyString_AsString(rv);
Tim Peters934c1a12002-07-02 22:24:50 +0000205
Jack Jansen72781191995-08-07 14:34:15 +0000206 for( ; bin_len > 0 ; ascii_len--, ascii_data++ ) {
Neal Norwitzfe92eef2004-06-06 20:13:10 +0000207 /* XXX is it really best to add NULs if there's no more data */
208 this_ch = (ascii_len > 0) ? *ascii_data : 0;
Jack Jansen72781191995-08-07 14:34:15 +0000209 if ( this_ch == '\n' || this_ch == '\r' || ascii_len <= 0) {
210 /*
211 ** Whitespace. Assume some spaces got eaten at
212 ** end-of-line. (We check this later)
213 */
214 this_ch = 0;
215 } else {
Jack Jansen5d957971995-11-14 10:35:19 +0000216 /* Check the character for legality
Roger E. Masse5f4ce181997-01-16 17:10:22 +0000217 ** The 64 in stead of the expected 63 is because
218 ** there are a few uuencodes out there that use
Guido van Rossum92d89172001-01-09 02:11:57 +0000219 ** '`' as zero instead of space.
Jack Jansen5d957971995-11-14 10:35:19 +0000220 */
221 if ( this_ch < ' ' || this_ch > (' ' + 64)) {
Jack Jansen72781191995-08-07 14:34:15 +0000222 PyErr_SetString(Error, "Illegal char");
223 Py_DECREF(rv);
224 return NULL;
225 }
226 this_ch = (this_ch - ' ') & 077;
227 }
228 /*
229 ** Shift it in on the low end, and see if there's
230 ** a byte ready for output.
231 */
232 leftchar = (leftchar << 6) | (this_ch);
233 leftbits += 6;
234 if ( leftbits >= 8 ) {
235 leftbits -= 8;
236 *bin_data++ = (leftchar >> leftbits) & 0xff;
237 leftchar &= ((1 << leftbits) - 1);
238 bin_len--;
239 }
240 }
241 /*
242 ** Finally, check that if there's anything left on the line
243 ** that it's whitespace only.
244 */
245 while( ascii_len-- > 0 ) {
246 this_ch = *ascii_data++;
Guido van Rossum92d89172001-01-09 02:11:57 +0000247 /* Extra '`' may be written as padding in some cases */
248 if ( this_ch != ' ' && this_ch != ' '+64 &&
Guido van Rossum1243ae71997-07-11 18:36:28 +0000249 this_ch != '\n' && this_ch != '\r' ) {
Jack Jansen72781191995-08-07 14:34:15 +0000250 PyErr_SetString(Error, "Trailing garbage");
251 Py_DECREF(rv);
252 return NULL;
253 }
254 }
255 return rv;
256}
257
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000258PyDoc_STRVAR(doc_b2a_uu, "(bin) -> ascii. Uuencode line of data");
Tim Peters934c1a12002-07-02 22:24:50 +0000259
Jack Jansen72781191995-08-07 14:34:15 +0000260static PyObject *
Peter Schneider-Kampa788a7f2000-07-10 09:57:19 +0000261binascii_b2a_uu(PyObject *self, PyObject *args)
Jack Jansen72781191995-08-07 14:34:15 +0000262{
263 unsigned char *ascii_data, *bin_data;
264 int leftbits = 0;
265 unsigned char this_ch;
266 unsigned int leftchar = 0;
267 PyObject *rv;
268 int bin_len;
Tim Peters934c1a12002-07-02 22:24:50 +0000269
Guido van Rossum43713e52000-02-29 13:59:29 +0000270 if ( !PyArg_ParseTuple(args, "s#:b2a_uu", &bin_data, &bin_len) )
Jack Jansen72781191995-08-07 14:34:15 +0000271 return NULL;
272 if ( bin_len > 45 ) {
273 /* The 45 is a limit that appears in all uuencode's */
274 PyErr_SetString(Error, "At most 45 bytes at once");
275 return NULL;
276 }
277
278 /* We're lazy and allocate to much (fixed up later) */
Raymond Hettinger658717e2004-09-06 22:58:37 +0000279 if ( (rv=PyString_FromStringAndSize(NULL, bin_len*2+2)) == NULL )
Jack Jansen72781191995-08-07 14:34:15 +0000280 return NULL;
281 ascii_data = (unsigned char *)PyString_AsString(rv);
282
283 /* Store the length */
284 *ascii_data++ = ' ' + (bin_len & 077);
Tim Peters934c1a12002-07-02 22:24:50 +0000285
Jack Jansen72781191995-08-07 14:34:15 +0000286 for( ; bin_len > 0 || leftbits != 0 ; bin_len--, bin_data++ ) {
287 /* Shift the data (or padding) into our buffer */
288 if ( bin_len > 0 ) /* Data */
289 leftchar = (leftchar << 8) | *bin_data;
290 else /* Padding */
291 leftchar <<= 8;
292 leftbits += 8;
293
294 /* See if there are 6-bit groups ready */
295 while ( leftbits >= 6 ) {
296 this_ch = (leftchar >> (leftbits-6)) & 0x3f;
297 leftbits -= 6;
298 *ascii_data++ = this_ch + ' ';
299 }
300 }
301 *ascii_data++ = '\n'; /* Append a courtesy newline */
Tim Peters934c1a12002-07-02 22:24:50 +0000302
Roger E. Masse5f4ce181997-01-16 17:10:22 +0000303 _PyString_Resize(&rv, (ascii_data -
304 (unsigned char *)PyString_AsString(rv)));
Jack Jansen72781191995-08-07 14:34:15 +0000305 return rv;
306}
307
Guido van Rossum2db4f471999-10-19 19:05:14 +0000308
309static int
Peter Schneider-Kampa788a7f2000-07-10 09:57:19 +0000310binascii_find_valid(unsigned char *s, int slen, int num)
Guido van Rossum2db4f471999-10-19 19:05:14 +0000311{
Tim Peters934c1a12002-07-02 22:24:50 +0000312 /* Finds & returns the (num+1)th
Guido van Rossum2db4f471999-10-19 19:05:14 +0000313 ** valid character for base64, or -1 if none.
314 */
315
316 int ret = -1;
317 unsigned char c, b64val;
318
319 while ((slen > 0) && (ret == -1)) {
320 c = *s;
321 b64val = table_a2b_base64[c & 0x7f];
322 if ( ((c <= 0x7f) && (b64val != (unsigned char)-1)) ) {
323 if (num == 0)
324 ret = *s;
325 num--;
326 }
327
328 s++;
329 slen--;
330 }
331 return ret;
332}
333
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000334PyDoc_STRVAR(doc_a2b_base64, "(ascii) -> bin. Decode a line of base64 data");
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000335
336static PyObject *
Peter Schneider-Kampa788a7f2000-07-10 09:57:19 +0000337binascii_a2b_base64(PyObject *self, PyObject *args)
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000338{
339 unsigned char *ascii_data, *bin_data;
340 int leftbits = 0;
341 unsigned char this_ch;
342 unsigned int leftchar = 0;
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000343 PyObject *rv;
344 int ascii_len, bin_len;
Guido van Rossum2db4f471999-10-19 19:05:14 +0000345 int quad_pos = 0;
Tim Peters934c1a12002-07-02 22:24:50 +0000346
Guido van Rossum43713e52000-02-29 13:59:29 +0000347 if ( !PyArg_ParseTuple(args, "t#:a2b_base64", &ascii_data, &ascii_len) )
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000348 return NULL;
349
350 bin_len = ((ascii_len+3)/4)*3; /* Upper bound, corrected later */
351
352 /* Allocate the buffer */
353 if ( (rv=PyString_FromStringAndSize(NULL, bin_len)) == NULL )
354 return NULL;
355 bin_data = (unsigned char *)PyString_AsString(rv);
356 bin_len = 0;
Guido van Rossum2db4f471999-10-19 19:05:14 +0000357
358 for( ; ascii_len > 0; ascii_len--, ascii_data++) {
359 this_ch = *ascii_data;
360
361 if (this_ch > 0x7f ||
362 this_ch == '\r' || this_ch == '\n' || this_ch == ' ')
Jack Jansenba1de3b1996-01-22 10:47:15 +0000363 continue;
Guido van Rossum2db4f471999-10-19 19:05:14 +0000364
365 /* Check for pad sequences and ignore
366 ** the invalid ones.
367 */
368 if (this_ch == BASE64_PAD) {
369 if ( (quad_pos < 2) ||
370 ((quad_pos == 2) &&
371 (binascii_find_valid(ascii_data, ascii_len, 1)
372 != BASE64_PAD)) )
373 {
374 continue;
375 }
376 else {
377 /* A pad sequence means no more input.
378 ** We've already interpreted the data
379 ** from the quad at this point.
380 */
381 leftbits = 0;
382 break;
383 }
384 }
385
386 this_ch = table_a2b_base64[*ascii_data];
387 if ( this_ch == (unsigned char) -1 )
388 continue;
389
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000390 /*
391 ** Shift it in on the low end, and see if there's
392 ** a byte ready for output.
393 */
Guido van Rossum2db4f471999-10-19 19:05:14 +0000394 quad_pos = (quad_pos + 1) & 0x03;
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000395 leftchar = (leftchar << 6) | (this_ch);
396 leftbits += 6;
Guido van Rossum2db4f471999-10-19 19:05:14 +0000397
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000398 if ( leftbits >= 8 ) {
399 leftbits -= 8;
400 *bin_data++ = (leftchar >> leftbits) & 0xff;
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000401 bin_len++;
Guido van Rossum2db4f471999-10-19 19:05:14 +0000402 leftchar &= ((1 << leftbits) - 1);
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000403 }
Guido van Rossum2db4f471999-10-19 19:05:14 +0000404 }
405
406 if (leftbits != 0) {
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000407 PyErr_SetString(Error, "Incorrect padding");
408 Py_DECREF(rv);
409 return NULL;
410 }
Guido van Rossum2db4f471999-10-19 19:05:14 +0000411
Thomas Wouters9e1c1922003-03-17 11:24:29 +0000412 /* And set string size correctly. If the result string is empty
413 ** (because the input was all invalid) return the shared empty
414 ** string instead; _PyString_Resize() won't do this for us.
415 */
Barry Warsaw0a51b582002-08-15 22:14:24 +0000416 if (bin_len > 0)
417 _PyString_Resize(&rv, bin_len);
Thomas Wouters9e1c1922003-03-17 11:24:29 +0000418 else {
419 Py_DECREF(rv);
420 rv = PyString_FromString("");
421 }
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000422 return rv;
423}
424
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000425PyDoc_STRVAR(doc_b2a_base64, "(bin) -> ascii. Base64-code line of data");
Tim Peters934c1a12002-07-02 22:24:50 +0000426
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000427static PyObject *
Peter Schneider-Kampa788a7f2000-07-10 09:57:19 +0000428binascii_b2a_base64(PyObject *self, PyObject *args)
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000429{
430 unsigned char *ascii_data, *bin_data;
431 int leftbits = 0;
432 unsigned char this_ch;
433 unsigned int leftchar = 0;
434 PyObject *rv;
435 int bin_len;
Tim Peters934c1a12002-07-02 22:24:50 +0000436
Guido van Rossum43713e52000-02-29 13:59:29 +0000437 if ( !PyArg_ParseTuple(args, "s#:b2a_base64", &bin_data, &bin_len) )
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000438 return NULL;
439 if ( bin_len > BASE64_MAXBIN ) {
440 PyErr_SetString(Error, "Too much data for base64 line");
441 return NULL;
442 }
Tim Peters934c1a12002-07-02 22:24:50 +0000443
Tim Peters1fbb5772001-12-19 04:41:35 +0000444 /* We're lazy and allocate too much (fixed up later).
445 "+3" leaves room for up to two pad characters and a trailing
446 newline. Note that 'b' gets encoded as 'Yg==\n' (1 in, 5 out). */
447 if ( (rv=PyString_FromStringAndSize(NULL, bin_len*2 + 3)) == NULL )
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000448 return NULL;
449 ascii_data = (unsigned char *)PyString_AsString(rv);
450
451 for( ; bin_len > 0 ; bin_len--, bin_data++ ) {
452 /* Shift the data into our buffer */
453 leftchar = (leftchar << 8) | *bin_data;
454 leftbits += 8;
455
456 /* See if there are 6-bit groups ready */
457 while ( leftbits >= 6 ) {
458 this_ch = (leftchar >> (leftbits-6)) & 0x3f;
459 leftbits -= 6;
460 *ascii_data++ = table_b2a_base64[this_ch];
461 }
462 }
463 if ( leftbits == 2 ) {
464 *ascii_data++ = table_b2a_base64[(leftchar&3) << 4];
465 *ascii_data++ = BASE64_PAD;
466 *ascii_data++ = BASE64_PAD;
467 } else if ( leftbits == 4 ) {
468 *ascii_data++ = table_b2a_base64[(leftchar&0xf) << 2];
469 *ascii_data++ = BASE64_PAD;
Tim Peters934c1a12002-07-02 22:24:50 +0000470 }
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000471 *ascii_data++ = '\n'; /* Append a courtesy newline */
Tim Peters934c1a12002-07-02 22:24:50 +0000472
Roger E. Masse5f4ce181997-01-16 17:10:22 +0000473 _PyString_Resize(&rv, (ascii_data -
474 (unsigned char *)PyString_AsString(rv)));
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000475 return rv;
476}
477
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000478PyDoc_STRVAR(doc_a2b_hqx, "ascii -> bin, done. Decode .hqx coding");
Jack Jansen72781191995-08-07 14:34:15 +0000479
480static PyObject *
Peter Schneider-Kampa788a7f2000-07-10 09:57:19 +0000481binascii_a2b_hqx(PyObject *self, PyObject *args)
Jack Jansen72781191995-08-07 14:34:15 +0000482{
483 unsigned char *ascii_data, *bin_data;
484 int leftbits = 0;
485 unsigned char this_ch;
Jack Janseneaeb1c81995-08-14 12:17:57 +0000486 unsigned int leftchar = 0;
Jack Jansen72781191995-08-07 14:34:15 +0000487 PyObject *rv;
488 int len;
489 int done = 0;
Tim Peters934c1a12002-07-02 22:24:50 +0000490
Guido van Rossum43713e52000-02-29 13:59:29 +0000491 if ( !PyArg_ParseTuple(args, "t#:a2b_hqx", &ascii_data, &len) )
Jack Jansen72781191995-08-07 14:34:15 +0000492 return NULL;
493
Raymond Hettinger658717e2004-09-06 22:58:37 +0000494 /* Allocate a string that is too big (fixed later)
495 Add two to the initial length to prevent interning which
496 would preclude subsequent resizing. */
497 if ( (rv=PyString_FromStringAndSize(NULL, len+2)) == NULL )
Jack Jansen72781191995-08-07 14:34:15 +0000498 return NULL;
499 bin_data = (unsigned char *)PyString_AsString(rv);
500
501 for( ; len > 0 ; len--, ascii_data++ ) {
502 /* Get the byte and look it up */
503 this_ch = table_a2b_hqx[*ascii_data];
504 if ( this_ch == SKIP )
505 continue;
506 if ( this_ch == FAIL ) {
507 PyErr_SetString(Error, "Illegal char");
508 Py_DECREF(rv);
509 return NULL;
510 }
511 if ( this_ch == DONE ) {
512 /* The terminating colon */
513 done = 1;
514 break;
515 }
516
517 /* Shift it into the buffer and see if any bytes are ready */
518 leftchar = (leftchar << 6) | (this_ch);
519 leftbits += 6;
520 if ( leftbits >= 8 ) {
521 leftbits -= 8;
522 *bin_data++ = (leftchar >> leftbits) & 0xff;
523 leftchar &= ((1 << leftbits) - 1);
524 }
525 }
Tim Peters934c1a12002-07-02 22:24:50 +0000526
Jack Jansen72781191995-08-07 14:34:15 +0000527 if ( leftbits && !done ) {
528 PyErr_SetString(Incomplete,
529 "String has incomplete number of bytes");
530 Py_DECREF(rv);
531 return NULL;
532 }
Raymond Hettinger658717e2004-09-06 22:58:37 +0000533
534
535 assert(PyString_Check(rv));
536 assert((bin_data - (unsigned char *)PyString_AsString(rv)) >= 0);
537 assert(!PyString_CHECK_INTERNED(rv));
538
539 assert(rv->ob_refcnt == 1);
540
541
Guido van Rossum9c6ba5e1997-01-12 20:02:04 +0000542 _PyString_Resize(
543 &rv, (bin_data - (unsigned char *)PyString_AsString(rv)));
544 if (rv) {
545 PyObject *rrv = Py_BuildValue("Oi", rv, done);
546 Py_DECREF(rv);
547 return rrv;
548 }
Roger E. Masse5f4ce181997-01-16 17:10:22 +0000549
Jack Jansen72781191995-08-07 14:34:15 +0000550 return NULL;
551}
552
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000553PyDoc_STRVAR(doc_rlecode_hqx, "Binhex RLE-code binary data");
Jack Jansen72781191995-08-07 14:34:15 +0000554
555static PyObject *
Peter Schneider-Kampa788a7f2000-07-10 09:57:19 +0000556binascii_rlecode_hqx(PyObject *self, PyObject *args)
Jack Jansen72781191995-08-07 14:34:15 +0000557{
558 unsigned char *in_data, *out_data;
559 PyObject *rv;
560 unsigned char ch;
561 int in, inend, len;
Tim Peters934c1a12002-07-02 22:24:50 +0000562
Guido van Rossum43713e52000-02-29 13:59:29 +0000563 if ( !PyArg_ParseTuple(args, "s#:rlecode_hqx", &in_data, &len) )
Jack Jansen72781191995-08-07 14:34:15 +0000564 return NULL;
565
566 /* Worst case: output is twice as big as input (fixed later) */
Raymond Hettinger658717e2004-09-06 22:58:37 +0000567 if ( (rv=PyString_FromStringAndSize(NULL, len*2+2)) == NULL )
Jack Jansen72781191995-08-07 14:34:15 +0000568 return NULL;
569 out_data = (unsigned char *)PyString_AsString(rv);
Tim Peters934c1a12002-07-02 22:24:50 +0000570
Jack Jansen72781191995-08-07 14:34:15 +0000571 for( in=0; in<len; in++) {
572 ch = in_data[in];
573 if ( ch == RUNCHAR ) {
574 /* RUNCHAR. Escape it. */
575 *out_data++ = RUNCHAR;
576 *out_data++ = 0;
577 } else {
578 /* Check how many following are the same */
579 for(inend=in+1;
580 inend<len && in_data[inend] == ch &&
Roger E. Masse5f4ce181997-01-16 17:10:22 +0000581 inend < in+255;
Jack Jansen72781191995-08-07 14:34:15 +0000582 inend++) ;
Jack Jansen0223aa11995-08-31 13:44:23 +0000583 if ( inend - in > 3 ) {
584 /* More than 3 in a row. Output RLE. */
Jack Jansen72781191995-08-07 14:34:15 +0000585 *out_data++ = ch;
586 *out_data++ = RUNCHAR;
587 *out_data++ = inend-in;
588 in = inend-1;
589 } else {
590 /* Less than 3. Output the byte itself */
591 *out_data++ = ch;
592 }
593 }
594 }
Roger E. Masse5f4ce181997-01-16 17:10:22 +0000595 _PyString_Resize(&rv, (out_data -
596 (unsigned char *)PyString_AsString(rv)));
Jack Jansen72781191995-08-07 14:34:15 +0000597 return rv;
598}
599
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000600PyDoc_STRVAR(doc_b2a_hqx, "Encode .hqx data");
Tim Peters934c1a12002-07-02 22:24:50 +0000601
Jack Jansen72781191995-08-07 14:34:15 +0000602static PyObject *
Peter Schneider-Kampa788a7f2000-07-10 09:57:19 +0000603binascii_b2a_hqx(PyObject *self, PyObject *args)
Jack Jansen72781191995-08-07 14:34:15 +0000604{
605 unsigned char *ascii_data, *bin_data;
606 int leftbits = 0;
607 unsigned char this_ch;
Jack Janseneaeb1c81995-08-14 12:17:57 +0000608 unsigned int leftchar = 0;
Jack Jansen72781191995-08-07 14:34:15 +0000609 PyObject *rv;
610 int len;
Tim Peters934c1a12002-07-02 22:24:50 +0000611
Guido van Rossum43713e52000-02-29 13:59:29 +0000612 if ( !PyArg_ParseTuple(args, "s#:b2a_hqx", &bin_data, &len) )
Jack Jansen72781191995-08-07 14:34:15 +0000613 return NULL;
614
615 /* Allocate a buffer that is at least large enough */
Raymond Hettinger658717e2004-09-06 22:58:37 +0000616 if ( (rv=PyString_FromStringAndSize(NULL, len*2+2)) == NULL )
Jack Jansen72781191995-08-07 14:34:15 +0000617 return NULL;
618 ascii_data = (unsigned char *)PyString_AsString(rv);
Tim Peters934c1a12002-07-02 22:24:50 +0000619
Jack Jansen72781191995-08-07 14:34:15 +0000620 for( ; len > 0 ; len--, bin_data++ ) {
621 /* Shift into our buffer, and output any 6bits ready */
622 leftchar = (leftchar << 8) | *bin_data;
623 leftbits += 8;
624 while ( leftbits >= 6 ) {
625 this_ch = (leftchar >> (leftbits-6)) & 0x3f;
626 leftbits -= 6;
627 *ascii_data++ = table_b2a_hqx[this_ch];
628 }
629 }
630 /* Output a possible runt byte */
631 if ( leftbits ) {
632 leftchar <<= (6-leftbits);
633 *ascii_data++ = table_b2a_hqx[leftchar & 0x3f];
634 }
Roger E. Masse5f4ce181997-01-16 17:10:22 +0000635 _PyString_Resize(&rv, (ascii_data -
636 (unsigned char *)PyString_AsString(rv)));
Jack Jansen72781191995-08-07 14:34:15 +0000637 return rv;
638}
639
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000640PyDoc_STRVAR(doc_rledecode_hqx, "Decode hexbin RLE-coded string");
Tim Peters934c1a12002-07-02 22:24:50 +0000641
Jack Jansen72781191995-08-07 14:34:15 +0000642static PyObject *
Peter Schneider-Kampa788a7f2000-07-10 09:57:19 +0000643binascii_rledecode_hqx(PyObject *self, PyObject *args)
Jack Jansen72781191995-08-07 14:34:15 +0000644{
645 unsigned char *in_data, *out_data;
646 unsigned char in_byte, in_repeat;
647 PyObject *rv;
648 int in_len, out_len, out_len_left;
649
Guido van Rossum43713e52000-02-29 13:59:29 +0000650 if ( !PyArg_ParseTuple(args, "s#:rledecode_hqx", &in_data, &in_len) )
Jack Jansen72781191995-08-07 14:34:15 +0000651 return NULL;
652
653 /* Empty string is a special case */
654 if ( in_len == 0 )
655 return Py_BuildValue("s", "");
656
657 /* Allocate a buffer of reasonable size. Resized when needed */
658 out_len = in_len*2;
659 if ( (rv=PyString_FromStringAndSize(NULL, out_len)) == NULL )
660 return NULL;
661 out_len_left = out_len;
662 out_data = (unsigned char *)PyString_AsString(rv);
663
664 /*
665 ** We need two macros here to get/put bytes and handle
666 ** end-of-buffer for input and output strings.
667 */
668#define INBYTE(b) \
Roger E. Masse5f4ce181997-01-16 17:10:22 +0000669 do { \
670 if ( --in_len < 0 ) { \
671 PyErr_SetString(Incomplete, ""); \
672 Py_DECREF(rv); \
673 return NULL; \
674 } \
675 b = *in_data++; \
676 } while(0)
Tim Peters934c1a12002-07-02 22:24:50 +0000677
Jack Jansen72781191995-08-07 14:34:15 +0000678#define OUTBYTE(b) \
Roger E. Masse5f4ce181997-01-16 17:10:22 +0000679 do { \
680 if ( --out_len_left < 0 ) { \
681 _PyString_Resize(&rv, 2*out_len); \
682 if ( rv == NULL ) return NULL; \
683 out_data = (unsigned char *)PyString_AsString(rv) \
684 + out_len; \
685 out_len_left = out_len-1; \
686 out_len = out_len * 2; \
687 } \
688 *out_data++ = b; \
689 } while(0)
Jack Jansen72781191995-08-07 14:34:15 +0000690
Roger E. Masse5f4ce181997-01-16 17:10:22 +0000691 /*
692 ** Handle first byte separately (since we have to get angry
693 ** in case of an orphaned RLE code).
694 */
695 INBYTE(in_byte);
Jack Jansen72781191995-08-07 14:34:15 +0000696
697 if (in_byte == RUNCHAR) {
698 INBYTE(in_repeat);
699 if (in_repeat != 0) {
700 /* Note Error, not Incomplete (which is at the end
701 ** of the string only). This is a programmer error.
702 */
703 PyErr_SetString(Error, "Orphaned RLE code at start");
704 Py_DECREF(rv);
705 return NULL;
706 }
707 OUTBYTE(RUNCHAR);
708 } else {
709 OUTBYTE(in_byte);
710 }
Tim Peters934c1a12002-07-02 22:24:50 +0000711
Jack Jansen72781191995-08-07 14:34:15 +0000712 while( in_len > 0 ) {
713 INBYTE(in_byte);
714
715 if (in_byte == RUNCHAR) {
716 INBYTE(in_repeat);
717 if ( in_repeat == 0 ) {
718 /* Just an escaped RUNCHAR value */
719 OUTBYTE(RUNCHAR);
720 } else {
721 /* Pick up value and output a sequence of it */
722 in_byte = out_data[-1];
723 while ( --in_repeat > 0 )
724 OUTBYTE(in_byte);
725 }
726 } else {
727 /* Normal byte */
728 OUTBYTE(in_byte);
729 }
730 }
Roger E. Masse5f4ce181997-01-16 17:10:22 +0000731 _PyString_Resize(&rv, (out_data -
732 (unsigned char *)PyString_AsString(rv)));
Jack Jansen72781191995-08-07 14:34:15 +0000733 return rv;
734}
735
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000736PyDoc_STRVAR(doc_crc_hqx,
737"(data, oldcrc) -> newcrc. Compute hqx CRC incrementally");
Jack Jansen72781191995-08-07 14:34:15 +0000738
739static PyObject *
Peter Schneider-Kampa788a7f2000-07-10 09:57:19 +0000740binascii_crc_hqx(PyObject *self, PyObject *args)
Jack Jansen72781191995-08-07 14:34:15 +0000741{
742 unsigned char *bin_data;
743 unsigned int crc;
744 int len;
Tim Peters934c1a12002-07-02 22:24:50 +0000745
Guido van Rossum43713e52000-02-29 13:59:29 +0000746 if ( !PyArg_ParseTuple(args, "s#i:crc_hqx", &bin_data, &len, &crc) )
Jack Jansen72781191995-08-07 14:34:15 +0000747 return NULL;
748
749 while(len--) {
750 crc=((crc<<8)&0xff00)^crctab_hqx[((crc>>8)&0xff)^*bin_data++];
751 }
752
753 return Py_BuildValue("i", crc);
754}
755
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000756PyDoc_STRVAR(doc_crc32,
757"(data, oldcrc = 0) -> newcrc. Compute CRC-32 incrementally");
Guido van Rossum7d47c9e2000-02-16 21:11:52 +0000758
759/* Crc - 32 BIT ANSI X3.66 CRC checksum files
760 Also known as: ISO 3307
761**********************************************************************|
762* *|
763* Demonstration program to compute the 32-bit CRC used as the frame *|
764* check sequence in ADCCP (ANSI X3.66, also known as FIPS PUB 71 *|
765* and FED-STD-1003, the U.S. versions of CCITT's X.25 link-level *|
766* protocol). The 32-bit FCS was added via the Federal Register, *|
767* 1 June 1982, p.23798. I presume but don't know for certain that *|
768* this polynomial is or will be included in CCITT V.41, which *|
769* defines the 16-bit CRC (often called CRC-CCITT) polynomial. FIPS *|
770* PUB 78 says that the 32-bit FCS reduces otherwise undetected *|
771* errors by a factor of 10^-5 over 16-bit FCS. *|
772* *|
773**********************************************************************|
774
775 Copyright (C) 1986 Gary S. Brown. You may use this program, or
776 code or tables extracted from it, as desired without restriction.
Guido van Rossum7d47c9e2000-02-16 21:11:52 +0000777
Tim Peters934c1a12002-07-02 22:24:50 +0000778 First, the polynomial itself and its table of feedback terms. The
779 polynomial is
780 X^32+X^26+X^23+X^22+X^16+X^12+X^11+X^10+X^8+X^7+X^5+X^4+X^2+X^1+X^0
781 Note that we take it "backwards" and put the highest-order term in
782 the lowest-order bit. The X^32 term is "implied"; the LSB is the
783 X^31 term, etc. The X^0 term (usually shown as "+1") results in
784 the MSB being 1.
Guido van Rossum7d47c9e2000-02-16 21:11:52 +0000785
Tim Peters934c1a12002-07-02 22:24:50 +0000786 Note that the usual hardware shift register implementation, which
787 is what we're using (we're merely optimizing it by doing eight-bit
788 chunks at a time) shifts bits into the lowest-order term. In our
789 implementation, that means shifting towards the right. Why do we
790 do it this way? Because the calculated CRC must be transmitted in
791 order from highest-order term to lowest-order term. UARTs transmit
792 characters in order from LSB to MSB. By storing the CRC this way,
793 we hand it to the UART in the order low-byte to high-byte; the UART
794 sends each low-bit to hight-bit; and the result is transmission bit
795 by bit from highest- to lowest-order term without requiring any bit
796 shuffling on our part. Reception works similarly.
797
798 The feedback terms table consists of 256, 32-bit entries. Notes:
799
800 1. The table can be generated at runtime if desired; code to do so
801 is shown later. It might not be obvious, but the feedback
802 terms simply represent the results of eight shift/xor opera-
803 tions for all combinations of data and CRC register values.
804
805 2. The CRC accumulation logic is the same for all CRC polynomials,
806 be they sixteen or thirty-two bits wide. You simply choose the
807 appropriate table. Alternatively, because the table can be
808 generated at runtime, you can start by generating the table for
809 the polynomial in question and use exactly the same "updcrc",
810 if your application needn't simultaneously handle two CRC
811 polynomials. (Note, however, that XMODEM is strange.)
812
813 3. For 16-bit CRCs, the table entries need be only 16 bits wide;
814 of course, 32-bit entries work OK if the high 16 bits are zero.
815
816 4. The values must be right-shifted by eight bits by the "updcrc"
817 logic; the shift must be unsigned (bring in zeroes). On some
818 hardware you could probably optimize the shift in assembler by
819 using byte-swap instructions.
Guido van Rossum7d47c9e2000-02-16 21:11:52 +0000820********************************************************************/
821
822static unsigned long crc_32_tab[256] = {
8230x00000000UL, 0x77073096UL, 0xee0e612cUL, 0x990951baUL, 0x076dc419UL,
8240x706af48fUL, 0xe963a535UL, 0x9e6495a3UL, 0x0edb8832UL, 0x79dcb8a4UL,
8250xe0d5e91eUL, 0x97d2d988UL, 0x09b64c2bUL, 0x7eb17cbdUL, 0xe7b82d07UL,
8260x90bf1d91UL, 0x1db71064UL, 0x6ab020f2UL, 0xf3b97148UL, 0x84be41deUL,
8270x1adad47dUL, 0x6ddde4ebUL, 0xf4d4b551UL, 0x83d385c7UL, 0x136c9856UL,
8280x646ba8c0UL, 0xfd62f97aUL, 0x8a65c9ecUL, 0x14015c4fUL, 0x63066cd9UL,
8290xfa0f3d63UL, 0x8d080df5UL, 0x3b6e20c8UL, 0x4c69105eUL, 0xd56041e4UL,
8300xa2677172UL, 0x3c03e4d1UL, 0x4b04d447UL, 0xd20d85fdUL, 0xa50ab56bUL,
8310x35b5a8faUL, 0x42b2986cUL, 0xdbbbc9d6UL, 0xacbcf940UL, 0x32d86ce3UL,
8320x45df5c75UL, 0xdcd60dcfUL, 0xabd13d59UL, 0x26d930acUL, 0x51de003aUL,
8330xc8d75180UL, 0xbfd06116UL, 0x21b4f4b5UL, 0x56b3c423UL, 0xcfba9599UL,
8340xb8bda50fUL, 0x2802b89eUL, 0x5f058808UL, 0xc60cd9b2UL, 0xb10be924UL,
8350x2f6f7c87UL, 0x58684c11UL, 0xc1611dabUL, 0xb6662d3dUL, 0x76dc4190UL,
8360x01db7106UL, 0x98d220bcUL, 0xefd5102aUL, 0x71b18589UL, 0x06b6b51fUL,
8370x9fbfe4a5UL, 0xe8b8d433UL, 0x7807c9a2UL, 0x0f00f934UL, 0x9609a88eUL,
8380xe10e9818UL, 0x7f6a0dbbUL, 0x086d3d2dUL, 0x91646c97UL, 0xe6635c01UL,
8390x6b6b51f4UL, 0x1c6c6162UL, 0x856530d8UL, 0xf262004eUL, 0x6c0695edUL,
8400x1b01a57bUL, 0x8208f4c1UL, 0xf50fc457UL, 0x65b0d9c6UL, 0x12b7e950UL,
8410x8bbeb8eaUL, 0xfcb9887cUL, 0x62dd1ddfUL, 0x15da2d49UL, 0x8cd37cf3UL,
8420xfbd44c65UL, 0x4db26158UL, 0x3ab551ceUL, 0xa3bc0074UL, 0xd4bb30e2UL,
8430x4adfa541UL, 0x3dd895d7UL, 0xa4d1c46dUL, 0xd3d6f4fbUL, 0x4369e96aUL,
8440x346ed9fcUL, 0xad678846UL, 0xda60b8d0UL, 0x44042d73UL, 0x33031de5UL,
8450xaa0a4c5fUL, 0xdd0d7cc9UL, 0x5005713cUL, 0x270241aaUL, 0xbe0b1010UL,
8460xc90c2086UL, 0x5768b525UL, 0x206f85b3UL, 0xb966d409UL, 0xce61e49fUL,
8470x5edef90eUL, 0x29d9c998UL, 0xb0d09822UL, 0xc7d7a8b4UL, 0x59b33d17UL,
8480x2eb40d81UL, 0xb7bd5c3bUL, 0xc0ba6cadUL, 0xedb88320UL, 0x9abfb3b6UL,
8490x03b6e20cUL, 0x74b1d29aUL, 0xead54739UL, 0x9dd277afUL, 0x04db2615UL,
8500x73dc1683UL, 0xe3630b12UL, 0x94643b84UL, 0x0d6d6a3eUL, 0x7a6a5aa8UL,
8510xe40ecf0bUL, 0x9309ff9dUL, 0x0a00ae27UL, 0x7d079eb1UL, 0xf00f9344UL,
8520x8708a3d2UL, 0x1e01f268UL, 0x6906c2feUL, 0xf762575dUL, 0x806567cbUL,
8530x196c3671UL, 0x6e6b06e7UL, 0xfed41b76UL, 0x89d32be0UL, 0x10da7a5aUL,
8540x67dd4accUL, 0xf9b9df6fUL, 0x8ebeeff9UL, 0x17b7be43UL, 0x60b08ed5UL,
8550xd6d6a3e8UL, 0xa1d1937eUL, 0x38d8c2c4UL, 0x4fdff252UL, 0xd1bb67f1UL,
8560xa6bc5767UL, 0x3fb506ddUL, 0x48b2364bUL, 0xd80d2bdaUL, 0xaf0a1b4cUL,
8570x36034af6UL, 0x41047a60UL, 0xdf60efc3UL, 0xa867df55UL, 0x316e8eefUL,
8580x4669be79UL, 0xcb61b38cUL, 0xbc66831aUL, 0x256fd2a0UL, 0x5268e236UL,
8590xcc0c7795UL, 0xbb0b4703UL, 0x220216b9UL, 0x5505262fUL, 0xc5ba3bbeUL,
8600xb2bd0b28UL, 0x2bb45a92UL, 0x5cb36a04UL, 0xc2d7ffa7UL, 0xb5d0cf31UL,
8610x2cd99e8bUL, 0x5bdeae1dUL, 0x9b64c2b0UL, 0xec63f226UL, 0x756aa39cUL,
8620x026d930aUL, 0x9c0906a9UL, 0xeb0e363fUL, 0x72076785UL, 0x05005713UL,
8630x95bf4a82UL, 0xe2b87a14UL, 0x7bb12baeUL, 0x0cb61b38UL, 0x92d28e9bUL,
8640xe5d5be0dUL, 0x7cdcefb7UL, 0x0bdbdf21UL, 0x86d3d2d4UL, 0xf1d4e242UL,
8650x68ddb3f8UL, 0x1fda836eUL, 0x81be16cdUL, 0xf6b9265bUL, 0x6fb077e1UL,
8660x18b74777UL, 0x88085ae6UL, 0xff0f6a70UL, 0x66063bcaUL, 0x11010b5cUL,
8670x8f659effUL, 0xf862ae69UL, 0x616bffd3UL, 0x166ccf45UL, 0xa00ae278UL,
8680xd70dd2eeUL, 0x4e048354UL, 0x3903b3c2UL, 0xa7672661UL, 0xd06016f7UL,
8690x4969474dUL, 0x3e6e77dbUL, 0xaed16a4aUL, 0xd9d65adcUL, 0x40df0b66UL,
8700x37d83bf0UL, 0xa9bcae53UL, 0xdebb9ec5UL, 0x47b2cf7fUL, 0x30b5ffe9UL,
8710xbdbdf21cUL, 0xcabac28aUL, 0x53b39330UL, 0x24b4a3a6UL, 0xbad03605UL,
8720xcdd70693UL, 0x54de5729UL, 0x23d967bfUL, 0xb3667a2eUL, 0xc4614ab8UL,
8730x5d681b02UL, 0x2a6f2b94UL, 0xb40bbe37UL, 0xc30c8ea1UL, 0x5a05df1bUL,
8740x2d02ef8dUL
875};
876
877static PyObject *
Peter Schneider-Kampa788a7f2000-07-10 09:57:19 +0000878binascii_crc32(PyObject *self, PyObject *args)
Guido van Rossum7d47c9e2000-02-16 21:11:52 +0000879{ /* By Jim Ahlstrom; All rights transferred to CNRI */
880 unsigned char *bin_data;
881 unsigned long crc = 0UL; /* initial value of CRC */
882 int len;
Tim Petersa98011c2002-07-02 20:20:08 +0000883 long result;
Tim Peters934c1a12002-07-02 22:24:50 +0000884
Guido van Rossum43713e52000-02-29 13:59:29 +0000885 if ( !PyArg_ParseTuple(args, "s#|l:crc32", &bin_data, &len, &crc) )
Guido van Rossum7d47c9e2000-02-16 21:11:52 +0000886 return NULL;
887
Tim Peters934c1a12002-07-02 22:24:50 +0000888 crc = ~ crc;
889#if SIZEOF_LONG > 4
890 /* only want the trailing 32 bits */
891 crc &= 0xFFFFFFFFUL;
892#endif
893 while (len--)
Guido van Rossum7d47c9e2000-02-16 21:11:52 +0000894 crc = crc_32_tab[(crc ^ *bin_data++) & 0xffUL] ^ (crc >> 8);
895 /* Note: (crc >> 8) MUST zero fill on left */
Tim Petersa98011c2002-07-02 20:20:08 +0000896
897 result = (long)(crc ^ 0xFFFFFFFFUL);
Tim Peters934c1a12002-07-02 22:24:50 +0000898#if SIZEOF_LONG > 4
899 /* Extend the sign bit. This is one way to ensure the result is the
900 * same across platforms. The other way would be to return an
901 * unbounded unsigned long, but the evidence suggests that lots of
902 * code outside this treats the result as if it were a signed 4-byte
903 * integer.
Tim Petersa98011c2002-07-02 20:20:08 +0000904 */
905 result |= -(result & (1L << 31));
Tim Peters934c1a12002-07-02 22:24:50 +0000906#endif
Tim Petersa98011c2002-07-02 20:20:08 +0000907 return PyInt_FromLong(result);
Guido van Rossum7d47c9e2000-02-16 21:11:52 +0000908}
909
Barry Warsawe977c212000-08-15 06:07:13 +0000910
911static PyObject *
912binascii_hexlify(PyObject *self, PyObject *args)
913{
914 char* argbuf;
915 int arglen;
916 PyObject *retval;
917 char* retbuf;
918 int i, j;
919
920 if (!PyArg_ParseTuple(args, "t#:b2a_hex", &argbuf, &arglen))
921 return NULL;
922
923 retval = PyString_FromStringAndSize(NULL, arglen*2);
924 if (!retval)
925 return NULL;
926 retbuf = PyString_AsString(retval);
927 if (!retbuf)
928 goto finally;
929
930 /* make hex version of string, taken from shamodule.c */
931 for (i=j=0; i < arglen; i++) {
932 char c;
933 c = (argbuf[i] >> 4) & 0xf;
934 c = (c>9) ? c+'a'-10 : c + '0';
935 retbuf[j++] = c;
936 c = argbuf[i] & 0xf;
937 c = (c>9) ? c+'a'-10 : c + '0';
938 retbuf[j++] = c;
939 }
940 return retval;
941
942 finally:
943 Py_DECREF(retval);
944 return NULL;
945}
946
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000947PyDoc_STRVAR(doc_hexlify,
Barry Warsawe977c212000-08-15 06:07:13 +0000948"b2a_hex(data) -> s; Hexadecimal representation of binary data.\n\
949\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000950This function is also available as \"hexlify()\".");
Barry Warsawe977c212000-08-15 06:07:13 +0000951
952
953static int
Tim Peters934c1a12002-07-02 22:24:50 +0000954to_int(int c)
Barry Warsawe977c212000-08-15 06:07:13 +0000955{
956 if (isdigit(c))
957 return c - '0';
958 else {
959 if (isupper(c))
960 c = tolower(c);
961 if (c >= 'a' && c <= 'f')
962 return c - 'a' + 10;
963 }
964 return -1;
965}
966
967
968static PyObject *
969binascii_unhexlify(PyObject *self, PyObject *args)
970{
971 char* argbuf;
972 int arglen;
973 PyObject *retval;
974 char* retbuf;
975 int i, j;
976
977 if (!PyArg_ParseTuple(args, "s#:a2b_hex", &argbuf, &arglen))
978 return NULL;
979
Barry Warsaw16168472000-08-15 06:59:58 +0000980 /* XXX What should we do about strings with an odd length? Should
981 * we add an implicit leading zero, or a trailing zero? For now,
982 * raise an exception.
Barry Warsawe977c212000-08-15 06:07:13 +0000983 */
984 if (arglen % 2) {
Barry Warsaw16168472000-08-15 06:59:58 +0000985 PyErr_SetString(PyExc_TypeError, "Odd-length string");
Barry Warsawe977c212000-08-15 06:07:13 +0000986 return NULL;
987 }
988
989 retval = PyString_FromStringAndSize(NULL, (arglen/2));
990 if (!retval)
991 return NULL;
992 retbuf = PyString_AsString(retval);
993 if (!retbuf)
994 goto finally;
995
996 for (i=j=0; i < arglen; i += 2) {
997 int top = to_int(Py_CHARMASK(argbuf[i]));
998 int bot = to_int(Py_CHARMASK(argbuf[i+1]));
999 if (top == -1 || bot == -1) {
1000 PyErr_SetString(PyExc_TypeError,
Barry Warsaw16168472000-08-15 06:59:58 +00001001 "Non-hexadecimal digit found");
Barry Warsawe977c212000-08-15 06:07:13 +00001002 goto finally;
1003 }
1004 retbuf[j++] = (top << 4) + bot;
1005 }
1006 return retval;
1007
1008 finally:
1009 Py_DECREF(retval);
1010 return NULL;
1011}
1012
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001013PyDoc_STRVAR(doc_unhexlify,
Barry Warsawe977c212000-08-15 06:07:13 +00001014"a2b_hex(hexstr) -> s; Binary data of hexadecimal representation.\n\
1015\n\
1016hexstr must contain an even number of hex digits (upper or lower case).\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001017This function is also available as \"unhexlify()\"");
Barry Warsawe977c212000-08-15 06:07:13 +00001018
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001019static int table_hex[128] = {
1020 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1021 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1022 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1023 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,-1,-1, -1,-1,-1,-1,
1024 -1,10,11,12, 13,14,15,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1025 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1026 -1,10,11,12, 13,14,15,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1027 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1
1028};
1029
1030#define hexval(c) table_hex[(unsigned int)(c)]
1031
1032#define MAXLINESIZE 76
1033
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001034PyDoc_STRVAR(doc_a2b_qp, "Decode a string of qp-encoded data");
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001035
Tim Peters934c1a12002-07-02 22:24:50 +00001036static PyObject*
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001037binascii_a2b_qp(PyObject *self, PyObject *args, PyObject *kwargs)
1038{
1039 unsigned int in, out;
1040 char ch;
1041 unsigned char *data, *odata;
1042 unsigned int datalen = 0;
1043 PyObject *rv;
1044 static char *kwlist[] = {"data", "header", NULL};
1045 int header = 0;
1046
Tim Peters934c1a12002-07-02 22:24:50 +00001047 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s#|i", kwlist, &data,
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001048 &datalen, &header))
1049 return NULL;
1050
Barry Warsaw23164a52004-05-11 02:05:11 +00001051 /* We allocate the output same size as input, this is overkill.
1052 * The previous implementation used calloc() so we'll zero out the
1053 * memory here too, since PyMem_Malloc() does not guarantee that.
1054 */
1055 odata = (unsigned char *) PyMem_Malloc(datalen);
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001056 if (odata == NULL) {
1057 PyErr_NoMemory();
1058 return NULL;
1059 }
Martin v. Löwisb600fe92004-07-27 15:03:53 +00001060 memset(odata, 0, datalen);
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001061
1062 in = out = 0;
1063 while (in < datalen) {
1064 if (data[in] == '=') {
1065 in++;
1066 if (in >= datalen) break;
1067 /* Soft line breaks */
Tim Peters934c1a12002-07-02 22:24:50 +00001068 if ((data[in] == '\n') || (data[in] == '\r') ||
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001069 (data[in] == ' ') || (data[in] == '\t')) {
1070 if (data[in] != '\n') {
1071 while (in < datalen && data[in] != '\n') in++;
1072 }
1073 if (in < datalen) in++;
1074 }
1075 else if (data[in] == '=') {
1076 /* broken case from broken python qp */
1077 odata[out++] = '=';
1078 in++;
1079 }
Tim Peters934c1a12002-07-02 22:24:50 +00001080 else if (((data[in] >= 'A' && data[in] <= 'F') ||
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001081 (data[in] >= 'a' && data[in] <= 'f') ||
1082 (data[in] >= '0' && data[in] <= '9')) &&
1083 ((data[in+1] >= 'A' && data[in+1] <= 'F') ||
1084 (data[in+1] >= 'a' && data[in+1] <= 'f') ||
1085 (data[in+1] >= '0' && data[in+1] <= '9'))) {
1086 /* hexval */
1087 ch = hexval(data[in]) << 4;
1088 in++;
1089 ch |= hexval(data[in]);
1090 in++;
1091 odata[out++] = ch;
1092 }
1093 else {
1094 odata[out++] = '=';
1095 }
1096 }
1097 else if (header && data[in] == '_') {
1098 odata[out++] = ' ';
1099 in++;
1100 }
1101 else {
1102 odata[out] = data[in];
1103 in++;
1104 out++;
1105 }
1106 }
Greg Warda645b302001-10-04 14:54:53 +00001107 if ((rv = PyString_FromStringAndSize((char *)odata, out)) == NULL) {
Barry Warsaw23164a52004-05-11 02:05:11 +00001108 PyMem_Free(odata);
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001109 return NULL;
1110 }
Barry Warsaw23164a52004-05-11 02:05:11 +00001111 PyMem_Free(odata);
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001112 return rv;
1113}
1114
Tim Peters934c1a12002-07-02 22:24:50 +00001115static int
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001116to_hex (unsigned char ch, unsigned char *s)
1117{
1118 unsigned int uvalue = ch;
1119
1120 s[1] = "0123456789ABCDEF"[uvalue % 16];
1121 uvalue = (uvalue / 16);
1122 s[0] = "0123456789ABCDEF"[uvalue % 16];
1123 return 0;
1124}
1125
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001126PyDoc_STRVAR(doc_b2a_qp,
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001127"b2a_qp(data, quotetabs=0, istext=1, header=0) -> s; \n\
1128 Encode a string using quoted-printable encoding. \n\
1129\n\
1130On encoding, when istext is set, newlines are not encoded, and white \n\
1131space at end of lines is. When istext is not set, \\r and \\n (CR/LF) are \n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001132both encoded. When quotetabs is set, space and tabs are encoded.");
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001133
1134/* XXX: This is ridiculously complicated to be backward compatible
1135 * (mostly) with the quopri module. It doesn't re-create the quopri
1136 * module bug where text ending in CRLF has the CR encoded */
Tim Peters934c1a12002-07-02 22:24:50 +00001137static PyObject*
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001138binascii_b2a_qp (PyObject *self, PyObject *args, PyObject *kwargs)
1139{
1140 unsigned int in, out;
1141 unsigned char *data, *odata;
1142 unsigned int datalen = 0, odatalen = 0;
1143 PyObject *rv;
1144 unsigned int linelen = 0;
1145 static char *kwlist[] = {"data", "quotetabs", "istext", "header", NULL};
1146 int istext = 1;
1147 int quotetabs = 0;
1148 int header = 0;
1149 unsigned char ch;
1150 int crlf = 0;
1151 unsigned char *p;
1152
Tim Peters934c1a12002-07-02 22:24:50 +00001153 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s#|iii", kwlist, &data,
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001154 &datalen, &quotetabs, &istext, &header))
1155 return NULL;
1156
1157 /* See if this string is using CRLF line ends */
1158 /* XXX: this function has the side effect of converting all of
1159 * the end of lines to be the same depending on this detection
1160 * here */
Greg Warda645b302001-10-04 14:54:53 +00001161 p = (unsigned char *) strchr((char *)data, '\n');
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001162 if ((p != NULL) && (p > data) && (*(p-1) == '\r'))
1163 crlf = 1;
1164
1165 /* First, scan to see how many characters need to be encoded */
1166 in = 0;
1167 while (in < datalen) {
Tim Peters934c1a12002-07-02 22:24:50 +00001168 if ((data[in] > 126) ||
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001169 (data[in] == '=') ||
1170 (header && data[in] == '_') ||
1171 ((data[in] == '.') && (linelen == 1)) ||
1172 (!istext && ((data[in] == '\r') || (data[in] == '\n'))) ||
1173 ((data[in] == '\t' || data[in] == ' ') && (in + 1 == datalen)) ||
Tim Peters934c1a12002-07-02 22:24:50 +00001174 ((data[in] < 33) &&
1175 (data[in] != '\r') && (data[in] != '\n') &&
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001176 (quotetabs && ((data[in] != '\t') || (data[in] != ' ')))))
1177 {
1178 if ((linelen + 3) >= MAXLINESIZE) {
1179 linelen = 0;
1180 if (crlf)
1181 odatalen += 3;
1182 else
1183 odatalen += 2;
1184 }
1185 linelen += 3;
1186 odatalen += 3;
1187 in++;
1188 }
1189 else {
Tim Peters934c1a12002-07-02 22:24:50 +00001190 if (istext &&
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001191 ((data[in] == '\n') ||
1192 ((in+1 < datalen) && (data[in] == '\r') &&
1193 (data[in+1] == '\n'))))
1194 {
1195 linelen = 0;
1196 /* Protect against whitespace on end of line */
1197 if (in && ((data[in-1] == ' ') || (data[in-1] == '\t')))
1198 odatalen += 2;
1199 if (crlf)
1200 odatalen += 2;
1201 else
1202 odatalen += 1;
1203 if (data[in] == '\r')
1204 in += 2;
1205 else
1206 in++;
1207 }
1208 else {
Tim Peters934c1a12002-07-02 22:24:50 +00001209 if ((in + 1 != datalen) &&
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001210 (data[in+1] != '\n') &&
1211 (linelen + 1) >= MAXLINESIZE) {
1212 linelen = 0;
1213 if (crlf)
1214 odatalen += 3;
1215 else
1216 odatalen += 2;
1217 }
1218 linelen++;
1219 odatalen++;
1220 in++;
1221 }
1222 }
1223 }
1224
Barry Warsaw23164a52004-05-11 02:05:11 +00001225 /* We allocate the output same size as input, this is overkill.
1226 * The previous implementation used calloc() so we'll zero out the
1227 * memory here too, since PyMem_Malloc() does not guarantee that.
1228 */
1229 odata = (unsigned char *) PyMem_Malloc(odatalen);
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001230 if (odata == NULL) {
1231 PyErr_NoMemory();
1232 return NULL;
1233 }
Martin v. Löwisb600fe92004-07-27 15:03:53 +00001234 memset(odata, 0, odatalen);
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001235
1236 in = out = linelen = 0;
1237 while (in < datalen) {
Tim Peters934c1a12002-07-02 22:24:50 +00001238 if ((data[in] > 126) ||
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001239 (data[in] == '=') ||
1240 (header && data[in] == '_') ||
1241 ((data[in] == '.') && (linelen == 1)) ||
1242 (!istext && ((data[in] == '\r') || (data[in] == '\n'))) ||
1243 ((data[in] == '\t' || data[in] == ' ') && (in + 1 == datalen)) ||
Tim Peters934c1a12002-07-02 22:24:50 +00001244 ((data[in] < 33) &&
1245 (data[in] != '\r') && (data[in] != '\n') &&
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001246 (quotetabs && ((data[in] != '\t') || (data[in] != ' ')))))
1247 {
1248 if ((linelen + 3 )>= MAXLINESIZE) {
1249 odata[out++] = '=';
1250 if (crlf) odata[out++] = '\r';
1251 odata[out++] = '\n';
1252 linelen = 0;
1253 }
1254 odata[out++] = '=';
1255 to_hex(data[in], &odata[out]);
1256 out += 2;
1257 in++;
1258 linelen += 3;
1259 }
1260 else {
Tim Peters934c1a12002-07-02 22:24:50 +00001261 if (istext &&
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001262 ((data[in] == '\n') ||
1263 ((in+1 < datalen) && (data[in] == '\r') &&
1264 (data[in+1] == '\n'))))
1265 {
1266 linelen = 0;
1267 /* Protect against whitespace on end of line */
1268 if (out && ((odata[out-1] == ' ') || (odata[out-1] == '\t'))) {
1269 ch = odata[out-1];
1270 odata[out-1] = '=';
1271 to_hex(ch, &odata[out]);
1272 out += 2;
1273 }
Tim Peters934c1a12002-07-02 22:24:50 +00001274
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001275 if (crlf) odata[out++] = '\r';
1276 odata[out++] = '\n';
1277 if (data[in] == '\r')
1278 in += 2;
1279 else
1280 in++;
1281 }
1282 else {
Tim Peters934c1a12002-07-02 22:24:50 +00001283 if ((in + 1 != datalen) &&
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001284 (data[in+1] != '\n') &&
1285 (linelen + 1) >= MAXLINESIZE) {
1286 odata[out++] = '=';
1287 if (crlf) odata[out++] = '\r';
1288 odata[out++] = '\n';
1289 linelen = 0;
1290 }
1291 linelen++;
1292 if (header && data[in] == ' ') {
1293 odata[out++] = '_';
1294 in++;
1295 }
1296 else {
1297 odata[out++] = data[in++];
1298 }
1299 }
1300 }
1301 }
Greg Warda645b302001-10-04 14:54:53 +00001302 if ((rv = PyString_FromStringAndSize((char *)odata, out)) == NULL) {
Barry Warsaw23164a52004-05-11 02:05:11 +00001303 PyMem_Free(odata);
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001304 return NULL;
1305 }
Barry Warsaw23164a52004-05-11 02:05:11 +00001306 PyMem_Free(odata);
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001307 return rv;
1308}
Barry Warsawe977c212000-08-15 06:07:13 +00001309
Jack Jansen72781191995-08-07 14:34:15 +00001310/* List of functions defined in the module */
1311
1312static struct PyMethodDef binascii_module_methods[] = {
Barry Warsawe977c212000-08-15 06:07:13 +00001313 {"a2b_uu", binascii_a2b_uu, METH_VARARGS, doc_a2b_uu},
1314 {"b2a_uu", binascii_b2a_uu, METH_VARARGS, doc_b2a_uu},
1315 {"a2b_base64", binascii_a2b_base64, METH_VARARGS, doc_a2b_base64},
1316 {"b2a_base64", binascii_b2a_base64, METH_VARARGS, doc_b2a_base64},
1317 {"a2b_hqx", binascii_a2b_hqx, METH_VARARGS, doc_a2b_hqx},
1318 {"b2a_hqx", binascii_b2a_hqx, METH_VARARGS, doc_b2a_hqx},
1319 {"b2a_hex", binascii_hexlify, METH_VARARGS, doc_hexlify},
1320 {"a2b_hex", binascii_unhexlify, METH_VARARGS, doc_unhexlify},
1321 {"hexlify", binascii_hexlify, METH_VARARGS, doc_hexlify},
1322 {"unhexlify", binascii_unhexlify, METH_VARARGS, doc_unhexlify},
1323 {"rlecode_hqx", binascii_rlecode_hqx, METH_VARARGS, doc_rlecode_hqx},
1324 {"rledecode_hqx", binascii_rledecode_hqx, METH_VARARGS,
1325 doc_rledecode_hqx},
1326 {"crc_hqx", binascii_crc_hqx, METH_VARARGS, doc_crc_hqx},
1327 {"crc32", binascii_crc32, METH_VARARGS, doc_crc32},
Tim Peters934c1a12002-07-02 22:24:50 +00001328 {"a2b_qp", (PyCFunction)binascii_a2b_qp, METH_VARARGS | METH_KEYWORDS,
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001329 doc_a2b_qp},
Tim Peters934c1a12002-07-02 22:24:50 +00001330 {"b2a_qp", (PyCFunction)binascii_b2a_qp, METH_VARARGS | METH_KEYWORDS,
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001331 doc_b2a_qp},
Barry Warsawe977c212000-08-15 06:07:13 +00001332 {NULL, NULL} /* sentinel */
Jack Jansen72781191995-08-07 14:34:15 +00001333};
1334
1335
1336/* Initialization function for the module (*must* be called initbinascii) */
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001337PyDoc_STRVAR(doc_binascii, "Conversion between binary data and ASCII");
Jack Jansen72781191995-08-07 14:34:15 +00001338
Mark Hammondfe51c6d2002-08-02 02:27:13 +00001339PyMODINIT_FUNC
Thomas Woutersf3f33dc2000-07-21 06:00:07 +00001340initbinascii(void)
Jack Jansen72781191995-08-07 14:34:15 +00001341{
1342 PyObject *m, *d, *x;
1343
1344 /* Create the module and add the functions */
1345 m = Py_InitModule("binascii", binascii_module_methods);
1346
1347 d = PyModule_GetDict(m);
1348 x = PyString_FromString(doc_binascii);
1349 PyDict_SetItemString(d, "__doc__", x);
Guido van Rossum5c159bd1997-08-04 23:55:25 +00001350 Py_XDECREF(x);
Jack Jansen72781191995-08-07 14:34:15 +00001351
Guido van Rossum7dbb48a1997-10-08 15:26:07 +00001352 Error = PyErr_NewException("binascii.Error", NULL, NULL);
Jack Jansen72781191995-08-07 14:34:15 +00001353 PyDict_SetItemString(d, "Error", Error);
Guido van Rossum7dbb48a1997-10-08 15:26:07 +00001354 Incomplete = PyErr_NewException("binascii.Incomplete", NULL, NULL);
Jack Jansen72781191995-08-07 14:34:15 +00001355 PyDict_SetItemString(d, "Incomplete", Incomplete);
Jack Jansen72781191995-08-07 14:34:15 +00001356}