blob: 643450cf329f28bbdbe45cf7ada15b0b3775fff4 [file] [log] [blame]
Jack Jansen72781191995-08-07 14:34:15 +00001/*
2** Routines to represent binary data in ASCII and vice-versa
3**
4** This module currently supports the following encodings:
5** uuencode:
6** each line encodes 45 bytes (except possibly the last)
7** First char encodes (binary) length, rest data
8** each char encodes 6 bits, as follows:
9** binary: 01234567 abcdefgh ijklmnop
10** ascii: 012345 67abcd efghij klmnop
11** ASCII encoding method is "excess-space": 000000 is encoded as ' ', etc.
12** short binary data is zero-extended (so the bits are always in the
13** right place), this does *not* reflect in the length.
Jack Jansen84bbc2e1995-10-04 16:38:44 +000014** base64:
15** Line breaks are insignificant, but lines are at most 76 chars
16** each char encodes 6 bits, in similar order as uucode/hqx. Encoding
17** is done via a table.
18** Short binary data is filled (in ASCII) with '='.
Jack Jansen72781191995-08-07 14:34:15 +000019** hqx:
20** File starts with introductory text, real data starts and ends
21** with colons.
22** Data consists of three similar parts: info, datafork, resourcefork.
23** Each part is protected (at the end) with a 16-bit crc
24** The binary data is run-length encoded, and then ascii-fied:
25** binary: 01234567 abcdefgh ijklmnop
26** ascii: 012345 67abcd efghij klmnop
27** ASCII encoding is table-driven, see the code.
28** Short binary data results in the runt ascii-byte being output with
29** the bits in the right place.
30**
31** While I was reading dozens of programs that encode or decode the formats
32** here (documentation? hihi:-) I have formulated Jansen's Observation:
33**
34** Programs that encode binary data in ASCII are written in
35** such a style that they are as unreadable as possible. Devices used
36** include unnecessary global variables, burying important tables
37** in unrelated sourcefiles, putting functions in include files,
38** using seemingly-descriptive variable names for different purposes,
39** calls to empty subroutines and a host of others.
40**
41** I have attempted to break with this tradition, but I guess that that
42** does make the performance sub-optimal. Oh well, too bad...
43**
44** Jack Jansen, CWI, July 1995.
Martin v. Löwis16dc7f42001-09-30 20:32:11 +000045**
46** Added support for quoted-printable encoding, based on rfc 1521 et al
47** quoted-printable encoding specifies that non printable characters (anything
48** below 32 and above 126) be encoded as =XX where XX is the hexadecimal value
49** of the character. It also specifies some other behavior to enable 8bit data
50** in a mail message with little difficulty (maximum line sizes, protecting
51** some cases of whitespace, etc).
52**
53** Brandon Long, September 2001.
Jack Jansen72781191995-08-07 14:34:15 +000054*/
55
56
57#include "Python.h"
58
59static PyObject *Error;
60static PyObject *Incomplete;
61
62/*
63** hqx lookup table, ascii->binary.
64*/
65
66#define RUNCHAR 0x90
67
68#define DONE 0x7F
69#define SKIP 0x7E
70#define FAIL 0x7D
71
72static unsigned char table_a2b_hqx[256] = {
73/* ^@ ^A ^B ^C ^D ^E ^F ^G */
74/* 0*/ FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
75/* \b \t \n ^K ^L \r ^N ^O */
76/* 1*/ FAIL, FAIL, SKIP, FAIL, FAIL, SKIP, FAIL, FAIL,
77/* ^P ^Q ^R ^S ^T ^U ^V ^W */
78/* 2*/ FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
79/* ^X ^Y ^Z ^[ ^\ ^] ^^ ^_ */
80/* 3*/ FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
81/* ! " # $ % & ' */
82/* 4*/ FAIL, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
83/* ( ) * + , - . / */
84/* 5*/ 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, FAIL, FAIL,
85/* 0 1 2 3 4 5 6 7 */
86/* 6*/ 0x0D, 0x0E, 0x0F, 0x10, 0x11, 0x12, 0x13, FAIL,
87/* 8 9 : ; < = > ? */
88/* 7*/ 0x14, 0x15, DONE, FAIL, FAIL, FAIL, FAIL, FAIL,
89/* @ A B C D E F G */
90/* 8*/ 0x16, 0x17, 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D,
91/* H I J K L M N O */
92/* 9*/ 0x1E, 0x1F, 0x20, 0x21, 0x22, 0x23, 0x24, FAIL,
93/* P Q R S T U V W */
94/*10*/ 0x25, 0x26, 0x27, 0x28, 0x29, 0x2A, 0x2B, FAIL,
95/* X Y Z [ \ ] ^ _ */
96/*11*/ 0x2C, 0x2D, 0x2E, 0x2F, FAIL, FAIL, FAIL, FAIL,
97/* ` a b c d e f g */
98/*12*/ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, FAIL,
99/* h i j k l m n o */
100/*13*/ 0x37, 0x38, 0x39, 0x3A, 0x3B, 0x3C, FAIL, FAIL,
101/* p q r s t u v w */
102/*14*/ 0x3D, 0x3E, 0x3F, FAIL, FAIL, FAIL, FAIL, FAIL,
103/* x y z { | } ~ ^? */
104/*15*/ FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
105/*16*/ FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
106 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
107 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
108 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
109 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
110 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
111 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
112 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
113 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
114 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
115 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
116 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
117 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
118 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
119 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
120 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
121};
122
123static unsigned char table_b2a_hqx[] =
Roger E. Masse5f4ce181997-01-16 17:10:22 +0000124"!\"#$%&'()*+,-012345689@ABCDEFGHIJKLMNPQRSTUVXYZ[`abcdefhijklmpqr";
Jack Jansen72781191995-08-07 14:34:15 +0000125
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000126static char table_a2b_base64[] = {
127 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
128 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
129 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,62, -1,-1,-1,63,
130 52,53,54,55, 56,57,58,59, 60,61,-1,-1, -1, 0,-1,-1, /* Note PAD->0 */
131 -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10, 11,12,13,14,
132 15,16,17,18, 19,20,21,22, 23,24,25,-1, -1,-1,-1,-1,
133 -1,26,27,28, 29,30,31,32, 33,34,35,36, 37,38,39,40,
134 41,42,43,44, 45,46,47,48, 49,50,51,-1, -1,-1,-1,-1
135};
136
137#define BASE64_PAD '='
Guido van Rossum355bc0c2001-10-30 03:00:52 +0000138
139/* Max binary chunk size; limited only by available memory */
140#define BASE64_MAXBIN (INT_MAX/2 - sizeof(PyStringObject))
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000141
142static unsigned char table_b2a_base64[] =
Roger E. Masse5f4ce181997-01-16 17:10:22 +0000143"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000144
145
146
Jack Jansen72781191995-08-07 14:34:15 +0000147static unsigned short crctab_hqx[256] = {
Roger E. Masse5f4ce181997-01-16 17:10:22 +0000148 0x0000, 0x1021, 0x2042, 0x3063, 0x4084, 0x50a5, 0x60c6, 0x70e7,
149 0x8108, 0x9129, 0xa14a, 0xb16b, 0xc18c, 0xd1ad, 0xe1ce, 0xf1ef,
150 0x1231, 0x0210, 0x3273, 0x2252, 0x52b5, 0x4294, 0x72f7, 0x62d6,
151 0x9339, 0x8318, 0xb37b, 0xa35a, 0xd3bd, 0xc39c, 0xf3ff, 0xe3de,
152 0x2462, 0x3443, 0x0420, 0x1401, 0x64e6, 0x74c7, 0x44a4, 0x5485,
153 0xa56a, 0xb54b, 0x8528, 0x9509, 0xe5ee, 0xf5cf, 0xc5ac, 0xd58d,
154 0x3653, 0x2672, 0x1611, 0x0630, 0x76d7, 0x66f6, 0x5695, 0x46b4,
155 0xb75b, 0xa77a, 0x9719, 0x8738, 0xf7df, 0xe7fe, 0xd79d, 0xc7bc,
156 0x48c4, 0x58e5, 0x6886, 0x78a7, 0x0840, 0x1861, 0x2802, 0x3823,
157 0xc9cc, 0xd9ed, 0xe98e, 0xf9af, 0x8948, 0x9969, 0xa90a, 0xb92b,
158 0x5af5, 0x4ad4, 0x7ab7, 0x6a96, 0x1a71, 0x0a50, 0x3a33, 0x2a12,
159 0xdbfd, 0xcbdc, 0xfbbf, 0xeb9e, 0x9b79, 0x8b58, 0xbb3b, 0xab1a,
160 0x6ca6, 0x7c87, 0x4ce4, 0x5cc5, 0x2c22, 0x3c03, 0x0c60, 0x1c41,
161 0xedae, 0xfd8f, 0xcdec, 0xddcd, 0xad2a, 0xbd0b, 0x8d68, 0x9d49,
162 0x7e97, 0x6eb6, 0x5ed5, 0x4ef4, 0x3e13, 0x2e32, 0x1e51, 0x0e70,
163 0xff9f, 0xefbe, 0xdfdd, 0xcffc, 0xbf1b, 0xaf3a, 0x9f59, 0x8f78,
164 0x9188, 0x81a9, 0xb1ca, 0xa1eb, 0xd10c, 0xc12d, 0xf14e, 0xe16f,
165 0x1080, 0x00a1, 0x30c2, 0x20e3, 0x5004, 0x4025, 0x7046, 0x6067,
166 0x83b9, 0x9398, 0xa3fb, 0xb3da, 0xc33d, 0xd31c, 0xe37f, 0xf35e,
167 0x02b1, 0x1290, 0x22f3, 0x32d2, 0x4235, 0x5214, 0x6277, 0x7256,
168 0xb5ea, 0xa5cb, 0x95a8, 0x8589, 0xf56e, 0xe54f, 0xd52c, 0xc50d,
169 0x34e2, 0x24c3, 0x14a0, 0x0481, 0x7466, 0x6447, 0x5424, 0x4405,
170 0xa7db, 0xb7fa, 0x8799, 0x97b8, 0xe75f, 0xf77e, 0xc71d, 0xd73c,
171 0x26d3, 0x36f2, 0x0691, 0x16b0, 0x6657, 0x7676, 0x4615, 0x5634,
172 0xd94c, 0xc96d, 0xf90e, 0xe92f, 0x99c8, 0x89e9, 0xb98a, 0xa9ab,
173 0x5844, 0x4865, 0x7806, 0x6827, 0x18c0, 0x08e1, 0x3882, 0x28a3,
174 0xcb7d, 0xdb5c, 0xeb3f, 0xfb1e, 0x8bf9, 0x9bd8, 0xabbb, 0xbb9a,
175 0x4a75, 0x5a54, 0x6a37, 0x7a16, 0x0af1, 0x1ad0, 0x2ab3, 0x3a92,
176 0xfd2e, 0xed0f, 0xdd6c, 0xcd4d, 0xbdaa, 0xad8b, 0x9de8, 0x8dc9,
177 0x7c26, 0x6c07, 0x5c64, 0x4c45, 0x3ca2, 0x2c83, 0x1ce0, 0x0cc1,
178 0xef1f, 0xff3e, 0xcf5d, 0xdf7c, 0xaf9b, 0xbfba, 0x8fd9, 0x9ff8,
179 0x6e17, 0x7e36, 0x4e55, 0x5e74, 0x2e93, 0x3eb2, 0x0ed1, 0x1ef0,
Jack Jansen72781191995-08-07 14:34:15 +0000180};
181
182static char doc_a2b_uu[] = "(ascii) -> bin. Decode a line of uuencoded data";
183
184static PyObject *
Peter Schneider-Kampa788a7f2000-07-10 09:57:19 +0000185binascii_a2b_uu(PyObject *self, PyObject *args)
Jack Jansen72781191995-08-07 14:34:15 +0000186{
187 unsigned char *ascii_data, *bin_data;
188 int leftbits = 0;
189 unsigned char this_ch;
190 unsigned int leftchar = 0;
191 PyObject *rv;
192 int ascii_len, bin_len;
193
Guido van Rossum43713e52000-02-29 13:59:29 +0000194 if ( !PyArg_ParseTuple(args, "t#:a2b_uu", &ascii_data, &ascii_len) )
Jack Jansen72781191995-08-07 14:34:15 +0000195 return NULL;
196
197 /* First byte: binary data length (in bytes) */
198 bin_len = (*ascii_data++ - ' ') & 077;
199 ascii_len--;
200
201 /* Allocate the buffer */
202 if ( (rv=PyString_FromStringAndSize(NULL, bin_len)) == NULL )
203 return NULL;
204 bin_data = (unsigned char *)PyString_AsString(rv);
205
206 for( ; bin_len > 0 ; ascii_len--, ascii_data++ ) {
207 this_ch = *ascii_data;
208 if ( this_ch == '\n' || this_ch == '\r' || ascii_len <= 0) {
209 /*
210 ** Whitespace. Assume some spaces got eaten at
211 ** end-of-line. (We check this later)
212 */
213 this_ch = 0;
214 } else {
Jack Jansen5d957971995-11-14 10:35:19 +0000215 /* Check the character for legality
Roger E. Masse5f4ce181997-01-16 17:10:22 +0000216 ** The 64 in stead of the expected 63 is because
217 ** there are a few uuencodes out there that use
Guido van Rossum92d89172001-01-09 02:11:57 +0000218 ** '`' as zero instead of space.
Jack Jansen5d957971995-11-14 10:35:19 +0000219 */
220 if ( this_ch < ' ' || this_ch > (' ' + 64)) {
Jack Jansen72781191995-08-07 14:34:15 +0000221 PyErr_SetString(Error, "Illegal char");
222 Py_DECREF(rv);
223 return NULL;
224 }
225 this_ch = (this_ch - ' ') & 077;
226 }
227 /*
228 ** Shift it in on the low end, and see if there's
229 ** a byte ready for output.
230 */
231 leftchar = (leftchar << 6) | (this_ch);
232 leftbits += 6;
233 if ( leftbits >= 8 ) {
234 leftbits -= 8;
235 *bin_data++ = (leftchar >> leftbits) & 0xff;
236 leftchar &= ((1 << leftbits) - 1);
237 bin_len--;
238 }
239 }
240 /*
241 ** Finally, check that if there's anything left on the line
242 ** that it's whitespace only.
243 */
244 while( ascii_len-- > 0 ) {
245 this_ch = *ascii_data++;
Guido van Rossum92d89172001-01-09 02:11:57 +0000246 /* Extra '`' may be written as padding in some cases */
247 if ( this_ch != ' ' && this_ch != ' '+64 &&
Guido van Rossum1243ae71997-07-11 18:36:28 +0000248 this_ch != '\n' && this_ch != '\r' ) {
Jack Jansen72781191995-08-07 14:34:15 +0000249 PyErr_SetString(Error, "Trailing garbage");
250 Py_DECREF(rv);
251 return NULL;
252 }
253 }
254 return rv;
255}
256
257static char doc_b2a_uu[] = "(bin) -> ascii. Uuencode line of data";
258
259static PyObject *
Peter Schneider-Kampa788a7f2000-07-10 09:57:19 +0000260binascii_b2a_uu(PyObject *self, PyObject *args)
Jack Jansen72781191995-08-07 14:34:15 +0000261{
262 unsigned char *ascii_data, *bin_data;
263 int leftbits = 0;
264 unsigned char this_ch;
265 unsigned int leftchar = 0;
266 PyObject *rv;
267 int bin_len;
268
Guido van Rossum43713e52000-02-29 13:59:29 +0000269 if ( !PyArg_ParseTuple(args, "s#:b2a_uu", &bin_data, &bin_len) )
Jack Jansen72781191995-08-07 14:34:15 +0000270 return NULL;
271 if ( bin_len > 45 ) {
272 /* The 45 is a limit that appears in all uuencode's */
273 PyErr_SetString(Error, "At most 45 bytes at once");
274 return NULL;
275 }
276
277 /* We're lazy and allocate to much (fixed up later) */
278 if ( (rv=PyString_FromStringAndSize(NULL, bin_len*2)) == NULL )
279 return NULL;
280 ascii_data = (unsigned char *)PyString_AsString(rv);
281
282 /* Store the length */
283 *ascii_data++ = ' ' + (bin_len & 077);
284
285 for( ; bin_len > 0 || leftbits != 0 ; bin_len--, bin_data++ ) {
286 /* Shift the data (or padding) into our buffer */
287 if ( bin_len > 0 ) /* Data */
288 leftchar = (leftchar << 8) | *bin_data;
289 else /* Padding */
290 leftchar <<= 8;
291 leftbits += 8;
292
293 /* See if there are 6-bit groups ready */
294 while ( leftbits >= 6 ) {
295 this_ch = (leftchar >> (leftbits-6)) & 0x3f;
296 leftbits -= 6;
297 *ascii_data++ = this_ch + ' ';
298 }
299 }
300 *ascii_data++ = '\n'; /* Append a courtesy newline */
301
Roger E. Masse5f4ce181997-01-16 17:10:22 +0000302 _PyString_Resize(&rv, (ascii_data -
303 (unsigned char *)PyString_AsString(rv)));
Jack Jansen72781191995-08-07 14:34:15 +0000304 return rv;
305}
306
Guido van Rossum2db4f471999-10-19 19:05:14 +0000307
308static int
Peter Schneider-Kampa788a7f2000-07-10 09:57:19 +0000309binascii_find_valid(unsigned char *s, int slen, int num)
Guido van Rossum2db4f471999-10-19 19:05:14 +0000310{
311 /* Finds & returns the (num+1)th
312 ** valid character for base64, or -1 if none.
313 */
314
315 int ret = -1;
316 unsigned char c, b64val;
317
318 while ((slen > 0) && (ret == -1)) {
319 c = *s;
320 b64val = table_a2b_base64[c & 0x7f];
321 if ( ((c <= 0x7f) && (b64val != (unsigned char)-1)) ) {
322 if (num == 0)
323 ret = *s;
324 num--;
325 }
326
327 s++;
328 slen--;
329 }
330 return ret;
331}
332
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000333static char doc_a2b_base64[] = "(ascii) -> bin. Decode a line of base64 data";
334
335static PyObject *
Peter Schneider-Kampa788a7f2000-07-10 09:57:19 +0000336binascii_a2b_base64(PyObject *self, PyObject *args)
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000337{
338 unsigned char *ascii_data, *bin_data;
339 int leftbits = 0;
340 unsigned char this_ch;
341 unsigned int leftchar = 0;
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000342 PyObject *rv;
343 int ascii_len, bin_len;
Guido van Rossum2db4f471999-10-19 19:05:14 +0000344 int quad_pos = 0;
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000345
Guido van Rossum43713e52000-02-29 13:59:29 +0000346 if ( !PyArg_ParseTuple(args, "t#:a2b_base64", &ascii_data, &ascii_len) )
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000347 return NULL;
348
Peter Schneider-Kampd895b202001-06-07 05:51:36 +0000349 if ( ascii_len == 0) {
350 PyErr_SetString(Error, "Cannot decode empty input");
351 return NULL;
352 }
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000353 bin_len = ((ascii_len+3)/4)*3; /* Upper bound, corrected later */
354
355 /* Allocate the buffer */
356 if ( (rv=PyString_FromStringAndSize(NULL, bin_len)) == NULL )
357 return NULL;
358 bin_data = (unsigned char *)PyString_AsString(rv);
359 bin_len = 0;
Guido van Rossum2db4f471999-10-19 19:05:14 +0000360
361 for( ; ascii_len > 0; ascii_len--, ascii_data++) {
362 this_ch = *ascii_data;
363
364 if (this_ch > 0x7f ||
365 this_ch == '\r' || this_ch == '\n' || this_ch == ' ')
Jack Jansenba1de3b1996-01-22 10:47:15 +0000366 continue;
Guido van Rossum2db4f471999-10-19 19:05:14 +0000367
368 /* Check for pad sequences and ignore
369 ** the invalid ones.
370 */
371 if (this_ch == BASE64_PAD) {
372 if ( (quad_pos < 2) ||
373 ((quad_pos == 2) &&
374 (binascii_find_valid(ascii_data, ascii_len, 1)
375 != BASE64_PAD)) )
376 {
377 continue;
378 }
379 else {
380 /* A pad sequence means no more input.
381 ** We've already interpreted the data
382 ** from the quad at this point.
383 */
384 leftbits = 0;
385 break;
386 }
387 }
388
389 this_ch = table_a2b_base64[*ascii_data];
390 if ( this_ch == (unsigned char) -1 )
391 continue;
392
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000393 /*
394 ** Shift it in on the low end, and see if there's
395 ** a byte ready for output.
396 */
Guido van Rossum2db4f471999-10-19 19:05:14 +0000397 quad_pos = (quad_pos + 1) & 0x03;
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000398 leftchar = (leftchar << 6) | (this_ch);
399 leftbits += 6;
Guido van Rossum2db4f471999-10-19 19:05:14 +0000400
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000401 if ( leftbits >= 8 ) {
402 leftbits -= 8;
403 *bin_data++ = (leftchar >> leftbits) & 0xff;
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000404 bin_len++;
Guido van Rossum2db4f471999-10-19 19:05:14 +0000405 leftchar &= ((1 << leftbits) - 1);
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000406 }
Guido van Rossum2db4f471999-10-19 19:05:14 +0000407 }
408
409 if (leftbits != 0) {
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000410 PyErr_SetString(Error, "Incorrect padding");
411 Py_DECREF(rv);
412 return NULL;
413 }
Guido van Rossum2db4f471999-10-19 19:05:14 +0000414
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000415 /* and set string size correctly */
416 _PyString_Resize(&rv, bin_len);
417 return rv;
418}
419
420static char doc_b2a_base64[] = "(bin) -> ascii. Base64-code line of data";
421
422static PyObject *
Peter Schneider-Kampa788a7f2000-07-10 09:57:19 +0000423binascii_b2a_base64(PyObject *self, PyObject *args)
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000424{
425 unsigned char *ascii_data, *bin_data;
426 int leftbits = 0;
427 unsigned char this_ch;
428 unsigned int leftchar = 0;
429 PyObject *rv;
430 int bin_len;
431
Guido van Rossum43713e52000-02-29 13:59:29 +0000432 if ( !PyArg_ParseTuple(args, "s#:b2a_base64", &bin_data, &bin_len) )
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000433 return NULL;
434 if ( bin_len > BASE64_MAXBIN ) {
435 PyErr_SetString(Error, "Too much data for base64 line");
436 return NULL;
437 }
438
439 /* We're lazy and allocate to much (fixed up later) */
440 if ( (rv=PyString_FromStringAndSize(NULL, bin_len*2)) == NULL )
441 return NULL;
442 ascii_data = (unsigned char *)PyString_AsString(rv);
443
444 for( ; bin_len > 0 ; bin_len--, bin_data++ ) {
445 /* Shift the data into our buffer */
446 leftchar = (leftchar << 8) | *bin_data;
447 leftbits += 8;
448
449 /* See if there are 6-bit groups ready */
450 while ( leftbits >= 6 ) {
451 this_ch = (leftchar >> (leftbits-6)) & 0x3f;
452 leftbits -= 6;
453 *ascii_data++ = table_b2a_base64[this_ch];
454 }
455 }
456 if ( leftbits == 2 ) {
457 *ascii_data++ = table_b2a_base64[(leftchar&3) << 4];
458 *ascii_data++ = BASE64_PAD;
459 *ascii_data++ = BASE64_PAD;
460 } else if ( leftbits == 4 ) {
461 *ascii_data++ = table_b2a_base64[(leftchar&0xf) << 2];
462 *ascii_data++ = BASE64_PAD;
463 }
464 *ascii_data++ = '\n'; /* Append a courtesy newline */
465
Roger E. Masse5f4ce181997-01-16 17:10:22 +0000466 _PyString_Resize(&rv, (ascii_data -
467 (unsigned char *)PyString_AsString(rv)));
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000468 return rv;
469}
470
Jack Jansen72781191995-08-07 14:34:15 +0000471static char doc_a2b_hqx[] = "ascii -> bin, done. Decode .hqx coding";
472
473static PyObject *
Peter Schneider-Kampa788a7f2000-07-10 09:57:19 +0000474binascii_a2b_hqx(PyObject *self, PyObject *args)
Jack Jansen72781191995-08-07 14:34:15 +0000475{
476 unsigned char *ascii_data, *bin_data;
477 int leftbits = 0;
478 unsigned char this_ch;
Jack Janseneaeb1c81995-08-14 12:17:57 +0000479 unsigned int leftchar = 0;
Jack Jansen72781191995-08-07 14:34:15 +0000480 PyObject *rv;
481 int len;
482 int done = 0;
483
Guido van Rossum43713e52000-02-29 13:59:29 +0000484 if ( !PyArg_ParseTuple(args, "t#:a2b_hqx", &ascii_data, &len) )
Jack Jansen72781191995-08-07 14:34:15 +0000485 return NULL;
486
487 /* Allocate a string that is too big (fixed later) */
488 if ( (rv=PyString_FromStringAndSize(NULL, len)) == NULL )
489 return NULL;
490 bin_data = (unsigned char *)PyString_AsString(rv);
491
492 for( ; len > 0 ; len--, ascii_data++ ) {
493 /* Get the byte and look it up */
494 this_ch = table_a2b_hqx[*ascii_data];
495 if ( this_ch == SKIP )
496 continue;
497 if ( this_ch == FAIL ) {
498 PyErr_SetString(Error, "Illegal char");
499 Py_DECREF(rv);
500 return NULL;
501 }
502 if ( this_ch == DONE ) {
503 /* The terminating colon */
504 done = 1;
505 break;
506 }
507
508 /* Shift it into the buffer and see if any bytes are ready */
509 leftchar = (leftchar << 6) | (this_ch);
510 leftbits += 6;
511 if ( leftbits >= 8 ) {
512 leftbits -= 8;
513 *bin_data++ = (leftchar >> leftbits) & 0xff;
514 leftchar &= ((1 << leftbits) - 1);
515 }
516 }
517
518 if ( leftbits && !done ) {
519 PyErr_SetString(Incomplete,
520 "String has incomplete number of bytes");
521 Py_DECREF(rv);
522 return NULL;
523 }
Guido van Rossum9c6ba5e1997-01-12 20:02:04 +0000524 _PyString_Resize(
525 &rv, (bin_data - (unsigned char *)PyString_AsString(rv)));
526 if (rv) {
527 PyObject *rrv = Py_BuildValue("Oi", rv, done);
528 Py_DECREF(rv);
529 return rrv;
530 }
Roger E. Masse5f4ce181997-01-16 17:10:22 +0000531
Jack Jansen72781191995-08-07 14:34:15 +0000532 return NULL;
533}
534
535static char doc_rlecode_hqx[] = "Binhex RLE-code binary data";
536
537static PyObject *
Peter Schneider-Kampa788a7f2000-07-10 09:57:19 +0000538binascii_rlecode_hqx(PyObject *self, PyObject *args)
Jack Jansen72781191995-08-07 14:34:15 +0000539{
540 unsigned char *in_data, *out_data;
541 PyObject *rv;
542 unsigned char ch;
543 int in, inend, len;
544
Guido van Rossum43713e52000-02-29 13:59:29 +0000545 if ( !PyArg_ParseTuple(args, "s#:rlecode_hqx", &in_data, &len) )
Jack Jansen72781191995-08-07 14:34:15 +0000546 return NULL;
547
548 /* Worst case: output is twice as big as input (fixed later) */
549 if ( (rv=PyString_FromStringAndSize(NULL, len*2)) == NULL )
550 return NULL;
551 out_data = (unsigned char *)PyString_AsString(rv);
552
553 for( in=0; in<len; in++) {
554 ch = in_data[in];
555 if ( ch == RUNCHAR ) {
556 /* RUNCHAR. Escape it. */
557 *out_data++ = RUNCHAR;
558 *out_data++ = 0;
559 } else {
560 /* Check how many following are the same */
561 for(inend=in+1;
562 inend<len && in_data[inend] == ch &&
Roger E. Masse5f4ce181997-01-16 17:10:22 +0000563 inend < in+255;
Jack Jansen72781191995-08-07 14:34:15 +0000564 inend++) ;
Jack Jansen0223aa11995-08-31 13:44:23 +0000565 if ( inend - in > 3 ) {
566 /* More than 3 in a row. Output RLE. */
Jack Jansen72781191995-08-07 14:34:15 +0000567 *out_data++ = ch;
568 *out_data++ = RUNCHAR;
569 *out_data++ = inend-in;
570 in = inend-1;
571 } else {
572 /* Less than 3. Output the byte itself */
573 *out_data++ = ch;
574 }
575 }
576 }
Roger E. Masse5f4ce181997-01-16 17:10:22 +0000577 _PyString_Resize(&rv, (out_data -
578 (unsigned char *)PyString_AsString(rv)));
Jack Jansen72781191995-08-07 14:34:15 +0000579 return rv;
580}
581
582static char doc_b2a_hqx[] = "Encode .hqx data";
583
584static PyObject *
Peter Schneider-Kampa788a7f2000-07-10 09:57:19 +0000585binascii_b2a_hqx(PyObject *self, PyObject *args)
Jack Jansen72781191995-08-07 14:34:15 +0000586{
587 unsigned char *ascii_data, *bin_data;
588 int leftbits = 0;
589 unsigned char this_ch;
Jack Janseneaeb1c81995-08-14 12:17:57 +0000590 unsigned int leftchar = 0;
Jack Jansen72781191995-08-07 14:34:15 +0000591 PyObject *rv;
592 int len;
593
Guido van Rossum43713e52000-02-29 13:59:29 +0000594 if ( !PyArg_ParseTuple(args, "s#:b2a_hqx", &bin_data, &len) )
Jack Jansen72781191995-08-07 14:34:15 +0000595 return NULL;
596
597 /* Allocate a buffer that is at least large enough */
598 if ( (rv=PyString_FromStringAndSize(NULL, len*2)) == NULL )
599 return NULL;
600 ascii_data = (unsigned char *)PyString_AsString(rv);
601
602 for( ; len > 0 ; len--, bin_data++ ) {
603 /* Shift into our buffer, and output any 6bits ready */
604 leftchar = (leftchar << 8) | *bin_data;
605 leftbits += 8;
606 while ( leftbits >= 6 ) {
607 this_ch = (leftchar >> (leftbits-6)) & 0x3f;
608 leftbits -= 6;
609 *ascii_data++ = table_b2a_hqx[this_ch];
610 }
611 }
612 /* Output a possible runt byte */
613 if ( leftbits ) {
614 leftchar <<= (6-leftbits);
615 *ascii_data++ = table_b2a_hqx[leftchar & 0x3f];
616 }
Roger E. Masse5f4ce181997-01-16 17:10:22 +0000617 _PyString_Resize(&rv, (ascii_data -
618 (unsigned char *)PyString_AsString(rv)));
Jack Jansen72781191995-08-07 14:34:15 +0000619 return rv;
620}
621
622static char doc_rledecode_hqx[] = "Decode hexbin RLE-coded string";
623
624static PyObject *
Peter Schneider-Kampa788a7f2000-07-10 09:57:19 +0000625binascii_rledecode_hqx(PyObject *self, PyObject *args)
Jack Jansen72781191995-08-07 14:34:15 +0000626{
627 unsigned char *in_data, *out_data;
628 unsigned char in_byte, in_repeat;
629 PyObject *rv;
630 int in_len, out_len, out_len_left;
631
Guido van Rossum43713e52000-02-29 13:59:29 +0000632 if ( !PyArg_ParseTuple(args, "s#:rledecode_hqx", &in_data, &in_len) )
Jack Jansen72781191995-08-07 14:34:15 +0000633 return NULL;
634
635 /* Empty string is a special case */
636 if ( in_len == 0 )
637 return Py_BuildValue("s", "");
638
639 /* Allocate a buffer of reasonable size. Resized when needed */
640 out_len = in_len*2;
641 if ( (rv=PyString_FromStringAndSize(NULL, out_len)) == NULL )
642 return NULL;
643 out_len_left = out_len;
644 out_data = (unsigned char *)PyString_AsString(rv);
645
646 /*
647 ** We need two macros here to get/put bytes and handle
648 ** end-of-buffer for input and output strings.
649 */
650#define INBYTE(b) \
Roger E. Masse5f4ce181997-01-16 17:10:22 +0000651 do { \
652 if ( --in_len < 0 ) { \
653 PyErr_SetString(Incomplete, ""); \
654 Py_DECREF(rv); \
655 return NULL; \
656 } \
657 b = *in_data++; \
658 } while(0)
Jack Jansen72781191995-08-07 14:34:15 +0000659
660#define OUTBYTE(b) \
Roger E. Masse5f4ce181997-01-16 17:10:22 +0000661 do { \
662 if ( --out_len_left < 0 ) { \
663 _PyString_Resize(&rv, 2*out_len); \
664 if ( rv == NULL ) return NULL; \
665 out_data = (unsigned char *)PyString_AsString(rv) \
666 + out_len; \
667 out_len_left = out_len-1; \
668 out_len = out_len * 2; \
669 } \
670 *out_data++ = b; \
671 } while(0)
Jack Jansen72781191995-08-07 14:34:15 +0000672
Roger E. Masse5f4ce181997-01-16 17:10:22 +0000673 /*
674 ** Handle first byte separately (since we have to get angry
675 ** in case of an orphaned RLE code).
676 */
677 INBYTE(in_byte);
Jack Jansen72781191995-08-07 14:34:15 +0000678
679 if (in_byte == RUNCHAR) {
680 INBYTE(in_repeat);
681 if (in_repeat != 0) {
682 /* Note Error, not Incomplete (which is at the end
683 ** of the string only). This is a programmer error.
684 */
685 PyErr_SetString(Error, "Orphaned RLE code at start");
686 Py_DECREF(rv);
687 return NULL;
688 }
689 OUTBYTE(RUNCHAR);
690 } else {
691 OUTBYTE(in_byte);
692 }
693
694 while( in_len > 0 ) {
695 INBYTE(in_byte);
696
697 if (in_byte == RUNCHAR) {
698 INBYTE(in_repeat);
699 if ( in_repeat == 0 ) {
700 /* Just an escaped RUNCHAR value */
701 OUTBYTE(RUNCHAR);
702 } else {
703 /* Pick up value and output a sequence of it */
704 in_byte = out_data[-1];
705 while ( --in_repeat > 0 )
706 OUTBYTE(in_byte);
707 }
708 } else {
709 /* Normal byte */
710 OUTBYTE(in_byte);
711 }
712 }
Roger E. Masse5f4ce181997-01-16 17:10:22 +0000713 _PyString_Resize(&rv, (out_data -
714 (unsigned char *)PyString_AsString(rv)));
Jack Jansen72781191995-08-07 14:34:15 +0000715 return rv;
716}
717
Roger E. Masse5f4ce181997-01-16 17:10:22 +0000718static char doc_crc_hqx[] =
719"(data, oldcrc) -> newcrc. Compute hqx CRC incrementally";
Jack Jansen72781191995-08-07 14:34:15 +0000720
721static PyObject *
Peter Schneider-Kampa788a7f2000-07-10 09:57:19 +0000722binascii_crc_hqx(PyObject *self, PyObject *args)
Jack Jansen72781191995-08-07 14:34:15 +0000723{
724 unsigned char *bin_data;
725 unsigned int crc;
726 int len;
727
Guido van Rossum43713e52000-02-29 13:59:29 +0000728 if ( !PyArg_ParseTuple(args, "s#i:crc_hqx", &bin_data, &len, &crc) )
Jack Jansen72781191995-08-07 14:34:15 +0000729 return NULL;
730
731 while(len--) {
732 crc=((crc<<8)&0xff00)^crctab_hqx[((crc>>8)&0xff)^*bin_data++];
733 }
734
735 return Py_BuildValue("i", crc);
736}
737
Guido van Rossum7d47c9e2000-02-16 21:11:52 +0000738static char doc_crc32[] =
739"(data, oldcrc = 0) -> newcrc. Compute CRC-32 incrementally";
740
741/* Crc - 32 BIT ANSI X3.66 CRC checksum files
742 Also known as: ISO 3307
743**********************************************************************|
744* *|
745* Demonstration program to compute the 32-bit CRC used as the frame *|
746* check sequence in ADCCP (ANSI X3.66, also known as FIPS PUB 71 *|
747* and FED-STD-1003, the U.S. versions of CCITT's X.25 link-level *|
748* protocol). The 32-bit FCS was added via the Federal Register, *|
749* 1 June 1982, p.23798. I presume but don't know for certain that *|
750* this polynomial is or will be included in CCITT V.41, which *|
751* defines the 16-bit CRC (often called CRC-CCITT) polynomial. FIPS *|
752* PUB 78 says that the 32-bit FCS reduces otherwise undetected *|
753* errors by a factor of 10^-5 over 16-bit FCS. *|
754* *|
755**********************************************************************|
756
757 Copyright (C) 1986 Gary S. Brown. You may use this program, or
758 code or tables extracted from it, as desired without restriction.
759
760 First, the polynomial itself and its table of feedback terms. The
761 polynomial is
762 X^32+X^26+X^23+X^22+X^16+X^12+X^11+X^10+X^8+X^7+X^5+X^4+X^2+X^1+X^0
763 Note that we take it "backwards" and put the highest-order term in
764 the lowest-order bit. The X^32 term is "implied"; the LSB is the
765 X^31 term, etc. The X^0 term (usually shown as "+1") results in
766 the MSB being 1.
767
768 Note that the usual hardware shift register implementation, which
769 is what we're using (we're merely optimizing it by doing eight-bit
770 chunks at a time) shifts bits into the lowest-order term. In our
771 implementation, that means shifting towards the right. Why do we
772 do it this way? Because the calculated CRC must be transmitted in
773 order from highest-order term to lowest-order term. UARTs transmit
774 characters in order from LSB to MSB. By storing the CRC this way,
775 we hand it to the UART in the order low-byte to high-byte; the UART
776 sends each low-bit to hight-bit; and the result is transmission bit
777 by bit from highest- to lowest-order term without requiring any bit
778 shuffling on our part. Reception works similarly.
779
780 The feedback terms table consists of 256, 32-bit entries. Notes:
781
782 1. The table can be generated at runtime if desired; code to do so
783 is shown later. It might not be obvious, but the feedback
784 terms simply represent the results of eight shift/xor opera-
785 tions for all combinations of data and CRC register values.
786
787 2. The CRC accumulation logic is the same for all CRC polynomials,
788 be they sixteen or thirty-two bits wide. You simply choose the
789 appropriate table. Alternatively, because the table can be
790 generated at runtime, you can start by generating the table for
791 the polynomial in question and use exactly the same "updcrc",
792 if your application needn't simultaneously handle two CRC
793 polynomials. (Note, however, that XMODEM is strange.)
794
795 3. For 16-bit CRCs, the table entries need be only 16 bits wide;
796 of course, 32-bit entries work OK if the high 16 bits are zero.
797
798 4. The values must be right-shifted by eight bits by the "updcrc"
799 logic; the shift must be unsigned (bring in zeroes). On some
800 hardware you could probably optimize the shift in assembler by
801 using byte-swap instructions.
802********************************************************************/
803
804static unsigned long crc_32_tab[256] = {
8050x00000000UL, 0x77073096UL, 0xee0e612cUL, 0x990951baUL, 0x076dc419UL,
8060x706af48fUL, 0xe963a535UL, 0x9e6495a3UL, 0x0edb8832UL, 0x79dcb8a4UL,
8070xe0d5e91eUL, 0x97d2d988UL, 0x09b64c2bUL, 0x7eb17cbdUL, 0xe7b82d07UL,
8080x90bf1d91UL, 0x1db71064UL, 0x6ab020f2UL, 0xf3b97148UL, 0x84be41deUL,
8090x1adad47dUL, 0x6ddde4ebUL, 0xf4d4b551UL, 0x83d385c7UL, 0x136c9856UL,
8100x646ba8c0UL, 0xfd62f97aUL, 0x8a65c9ecUL, 0x14015c4fUL, 0x63066cd9UL,
8110xfa0f3d63UL, 0x8d080df5UL, 0x3b6e20c8UL, 0x4c69105eUL, 0xd56041e4UL,
8120xa2677172UL, 0x3c03e4d1UL, 0x4b04d447UL, 0xd20d85fdUL, 0xa50ab56bUL,
8130x35b5a8faUL, 0x42b2986cUL, 0xdbbbc9d6UL, 0xacbcf940UL, 0x32d86ce3UL,
8140x45df5c75UL, 0xdcd60dcfUL, 0xabd13d59UL, 0x26d930acUL, 0x51de003aUL,
8150xc8d75180UL, 0xbfd06116UL, 0x21b4f4b5UL, 0x56b3c423UL, 0xcfba9599UL,
8160xb8bda50fUL, 0x2802b89eUL, 0x5f058808UL, 0xc60cd9b2UL, 0xb10be924UL,
8170x2f6f7c87UL, 0x58684c11UL, 0xc1611dabUL, 0xb6662d3dUL, 0x76dc4190UL,
8180x01db7106UL, 0x98d220bcUL, 0xefd5102aUL, 0x71b18589UL, 0x06b6b51fUL,
8190x9fbfe4a5UL, 0xe8b8d433UL, 0x7807c9a2UL, 0x0f00f934UL, 0x9609a88eUL,
8200xe10e9818UL, 0x7f6a0dbbUL, 0x086d3d2dUL, 0x91646c97UL, 0xe6635c01UL,
8210x6b6b51f4UL, 0x1c6c6162UL, 0x856530d8UL, 0xf262004eUL, 0x6c0695edUL,
8220x1b01a57bUL, 0x8208f4c1UL, 0xf50fc457UL, 0x65b0d9c6UL, 0x12b7e950UL,
8230x8bbeb8eaUL, 0xfcb9887cUL, 0x62dd1ddfUL, 0x15da2d49UL, 0x8cd37cf3UL,
8240xfbd44c65UL, 0x4db26158UL, 0x3ab551ceUL, 0xa3bc0074UL, 0xd4bb30e2UL,
8250x4adfa541UL, 0x3dd895d7UL, 0xa4d1c46dUL, 0xd3d6f4fbUL, 0x4369e96aUL,
8260x346ed9fcUL, 0xad678846UL, 0xda60b8d0UL, 0x44042d73UL, 0x33031de5UL,
8270xaa0a4c5fUL, 0xdd0d7cc9UL, 0x5005713cUL, 0x270241aaUL, 0xbe0b1010UL,
8280xc90c2086UL, 0x5768b525UL, 0x206f85b3UL, 0xb966d409UL, 0xce61e49fUL,
8290x5edef90eUL, 0x29d9c998UL, 0xb0d09822UL, 0xc7d7a8b4UL, 0x59b33d17UL,
8300x2eb40d81UL, 0xb7bd5c3bUL, 0xc0ba6cadUL, 0xedb88320UL, 0x9abfb3b6UL,
8310x03b6e20cUL, 0x74b1d29aUL, 0xead54739UL, 0x9dd277afUL, 0x04db2615UL,
8320x73dc1683UL, 0xe3630b12UL, 0x94643b84UL, 0x0d6d6a3eUL, 0x7a6a5aa8UL,
8330xe40ecf0bUL, 0x9309ff9dUL, 0x0a00ae27UL, 0x7d079eb1UL, 0xf00f9344UL,
8340x8708a3d2UL, 0x1e01f268UL, 0x6906c2feUL, 0xf762575dUL, 0x806567cbUL,
8350x196c3671UL, 0x6e6b06e7UL, 0xfed41b76UL, 0x89d32be0UL, 0x10da7a5aUL,
8360x67dd4accUL, 0xf9b9df6fUL, 0x8ebeeff9UL, 0x17b7be43UL, 0x60b08ed5UL,
8370xd6d6a3e8UL, 0xa1d1937eUL, 0x38d8c2c4UL, 0x4fdff252UL, 0xd1bb67f1UL,
8380xa6bc5767UL, 0x3fb506ddUL, 0x48b2364bUL, 0xd80d2bdaUL, 0xaf0a1b4cUL,
8390x36034af6UL, 0x41047a60UL, 0xdf60efc3UL, 0xa867df55UL, 0x316e8eefUL,
8400x4669be79UL, 0xcb61b38cUL, 0xbc66831aUL, 0x256fd2a0UL, 0x5268e236UL,
8410xcc0c7795UL, 0xbb0b4703UL, 0x220216b9UL, 0x5505262fUL, 0xc5ba3bbeUL,
8420xb2bd0b28UL, 0x2bb45a92UL, 0x5cb36a04UL, 0xc2d7ffa7UL, 0xb5d0cf31UL,
8430x2cd99e8bUL, 0x5bdeae1dUL, 0x9b64c2b0UL, 0xec63f226UL, 0x756aa39cUL,
8440x026d930aUL, 0x9c0906a9UL, 0xeb0e363fUL, 0x72076785UL, 0x05005713UL,
8450x95bf4a82UL, 0xe2b87a14UL, 0x7bb12baeUL, 0x0cb61b38UL, 0x92d28e9bUL,
8460xe5d5be0dUL, 0x7cdcefb7UL, 0x0bdbdf21UL, 0x86d3d2d4UL, 0xf1d4e242UL,
8470x68ddb3f8UL, 0x1fda836eUL, 0x81be16cdUL, 0xf6b9265bUL, 0x6fb077e1UL,
8480x18b74777UL, 0x88085ae6UL, 0xff0f6a70UL, 0x66063bcaUL, 0x11010b5cUL,
8490x8f659effUL, 0xf862ae69UL, 0x616bffd3UL, 0x166ccf45UL, 0xa00ae278UL,
8500xd70dd2eeUL, 0x4e048354UL, 0x3903b3c2UL, 0xa7672661UL, 0xd06016f7UL,
8510x4969474dUL, 0x3e6e77dbUL, 0xaed16a4aUL, 0xd9d65adcUL, 0x40df0b66UL,
8520x37d83bf0UL, 0xa9bcae53UL, 0xdebb9ec5UL, 0x47b2cf7fUL, 0x30b5ffe9UL,
8530xbdbdf21cUL, 0xcabac28aUL, 0x53b39330UL, 0x24b4a3a6UL, 0xbad03605UL,
8540xcdd70693UL, 0x54de5729UL, 0x23d967bfUL, 0xb3667a2eUL, 0xc4614ab8UL,
8550x5d681b02UL, 0x2a6f2b94UL, 0xb40bbe37UL, 0xc30c8ea1UL, 0x5a05df1bUL,
8560x2d02ef8dUL
857};
858
859static PyObject *
Peter Schneider-Kampa788a7f2000-07-10 09:57:19 +0000860binascii_crc32(PyObject *self, PyObject *args)
Guido van Rossum7d47c9e2000-02-16 21:11:52 +0000861{ /* By Jim Ahlstrom; All rights transferred to CNRI */
862 unsigned char *bin_data;
863 unsigned long crc = 0UL; /* initial value of CRC */
864 int len;
865
Guido van Rossum43713e52000-02-29 13:59:29 +0000866 if ( !PyArg_ParseTuple(args, "s#|l:crc32", &bin_data, &len, &crc) )
Guido van Rossum7d47c9e2000-02-16 21:11:52 +0000867 return NULL;
868
869 crc = crc ^ 0xFFFFFFFFUL;
870 while(len--)
871 crc = crc_32_tab[(crc ^ *bin_data++) & 0xffUL] ^ (crc >> 8);
872 /* Note: (crc >> 8) MUST zero fill on left */
873 return Py_BuildValue("l", crc ^ 0xFFFFFFFFUL);
874}
875
Barry Warsawe977c212000-08-15 06:07:13 +0000876
877static PyObject *
878binascii_hexlify(PyObject *self, PyObject *args)
879{
880 char* argbuf;
881 int arglen;
882 PyObject *retval;
883 char* retbuf;
884 int i, j;
885
886 if (!PyArg_ParseTuple(args, "t#:b2a_hex", &argbuf, &arglen))
887 return NULL;
888
889 retval = PyString_FromStringAndSize(NULL, arglen*2);
890 if (!retval)
891 return NULL;
892 retbuf = PyString_AsString(retval);
893 if (!retbuf)
894 goto finally;
895
896 /* make hex version of string, taken from shamodule.c */
897 for (i=j=0; i < arglen; i++) {
898 char c;
899 c = (argbuf[i] >> 4) & 0xf;
900 c = (c>9) ? c+'a'-10 : c + '0';
901 retbuf[j++] = c;
902 c = argbuf[i] & 0xf;
903 c = (c>9) ? c+'a'-10 : c + '0';
904 retbuf[j++] = c;
905 }
906 return retval;
907
908 finally:
909 Py_DECREF(retval);
910 return NULL;
911}
912
913static char doc_hexlify[] =
914"b2a_hex(data) -> s; Hexadecimal representation of binary data.\n\
915\n\
916This function is also available as \"hexlify()\".";
917
918
919static int
Tim Petersb59ab422000-08-15 16:41:26 +0000920to_int(int c)
Barry Warsawe977c212000-08-15 06:07:13 +0000921{
922 if (isdigit(c))
923 return c - '0';
924 else {
925 if (isupper(c))
926 c = tolower(c);
927 if (c >= 'a' && c <= 'f')
928 return c - 'a' + 10;
929 }
930 return -1;
931}
932
933
934static PyObject *
935binascii_unhexlify(PyObject *self, PyObject *args)
936{
937 char* argbuf;
938 int arglen;
939 PyObject *retval;
940 char* retbuf;
941 int i, j;
942
943 if (!PyArg_ParseTuple(args, "s#:a2b_hex", &argbuf, &arglen))
944 return NULL;
945
Barry Warsaw16168472000-08-15 06:59:58 +0000946 /* XXX What should we do about strings with an odd length? Should
947 * we add an implicit leading zero, or a trailing zero? For now,
948 * raise an exception.
Barry Warsawe977c212000-08-15 06:07:13 +0000949 */
950 if (arglen % 2) {
Barry Warsaw16168472000-08-15 06:59:58 +0000951 PyErr_SetString(PyExc_TypeError, "Odd-length string");
Barry Warsawe977c212000-08-15 06:07:13 +0000952 return NULL;
953 }
954
955 retval = PyString_FromStringAndSize(NULL, (arglen/2));
956 if (!retval)
957 return NULL;
958 retbuf = PyString_AsString(retval);
959 if (!retbuf)
960 goto finally;
961
962 for (i=j=0; i < arglen; i += 2) {
963 int top = to_int(Py_CHARMASK(argbuf[i]));
964 int bot = to_int(Py_CHARMASK(argbuf[i+1]));
965 if (top == -1 || bot == -1) {
966 PyErr_SetString(PyExc_TypeError,
Barry Warsaw16168472000-08-15 06:59:58 +0000967 "Non-hexadecimal digit found");
Barry Warsawe977c212000-08-15 06:07:13 +0000968 goto finally;
969 }
970 retbuf[j++] = (top << 4) + bot;
971 }
972 return retval;
973
974 finally:
975 Py_DECREF(retval);
976 return NULL;
977}
978
979static char doc_unhexlify[] =
980"a2b_hex(hexstr) -> s; Binary data of hexadecimal representation.\n\
981\n\
982hexstr must contain an even number of hex digits (upper or lower case).\n\
983This function is also available as \"unhexlify()\"";
984
Martin v. Löwis16dc7f42001-09-30 20:32:11 +0000985static int table_hex[128] = {
986 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
987 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
988 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
989 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,-1,-1, -1,-1,-1,-1,
990 -1,10,11,12, 13,14,15,-1, -1,-1,-1,-1, -1,-1,-1,-1,
991 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
992 -1,10,11,12, 13,14,15,-1, -1,-1,-1,-1, -1,-1,-1,-1,
993 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1
994};
995
996#define hexval(c) table_hex[(unsigned int)(c)]
997
998#define MAXLINESIZE 76
999
1000static char doc_a2b_qp[] = "Decode a string of qp-encoded data";
1001
1002static PyObject*
1003binascii_a2b_qp(PyObject *self, PyObject *args, PyObject *kwargs)
1004{
1005 unsigned int in, out;
1006 char ch;
1007 unsigned char *data, *odata;
1008 unsigned int datalen = 0;
1009 PyObject *rv;
1010 static char *kwlist[] = {"data", "header", NULL};
1011 int header = 0;
1012
1013 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s#|i", kwlist, &data,
1014 &datalen, &header))
1015 return NULL;
1016
1017 /* We allocate the output same size as input, this is overkill */
Greg Warda645b302001-10-04 14:54:53 +00001018 odata = (unsigned char *) calloc(1, datalen);
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001019
1020 if (odata == NULL) {
1021 PyErr_NoMemory();
1022 return NULL;
1023 }
1024
1025 in = out = 0;
1026 while (in < datalen) {
1027 if (data[in] == '=') {
1028 in++;
1029 if (in >= datalen) break;
1030 /* Soft line breaks */
1031 if ((data[in] == '\n') || (data[in] == '\r') ||
1032 (data[in] == ' ') || (data[in] == '\t')) {
1033 if (data[in] != '\n') {
1034 while (in < datalen && data[in] != '\n') in++;
1035 }
1036 if (in < datalen) in++;
1037 }
1038 else if (data[in] == '=') {
1039 /* broken case from broken python qp */
1040 odata[out++] = '=';
1041 in++;
1042 }
1043 else if (((data[in] >= 'A' && data[in] <= 'F') ||
1044 (data[in] >= 'a' && data[in] <= 'f') ||
1045 (data[in] >= '0' && data[in] <= '9')) &&
1046 ((data[in+1] >= 'A' && data[in+1] <= 'F') ||
1047 (data[in+1] >= 'a' && data[in+1] <= 'f') ||
1048 (data[in+1] >= '0' && data[in+1] <= '9'))) {
1049 /* hexval */
1050 ch = hexval(data[in]) << 4;
1051 in++;
1052 ch |= hexval(data[in]);
1053 in++;
1054 odata[out++] = ch;
1055 }
1056 else {
1057 odata[out++] = '=';
1058 }
1059 }
1060 else if (header && data[in] == '_') {
1061 odata[out++] = ' ';
1062 in++;
1063 }
1064 else {
1065 odata[out] = data[in];
1066 in++;
1067 out++;
1068 }
1069 }
Greg Warda645b302001-10-04 14:54:53 +00001070 if ((rv = PyString_FromStringAndSize((char *)odata, out)) == NULL) {
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001071 free (odata);
1072 return NULL;
1073 }
1074 free (odata);
1075 return rv;
1076}
1077
1078static int
1079to_hex (unsigned char ch, unsigned char *s)
1080{
1081 unsigned int uvalue = ch;
1082
1083 s[1] = "0123456789ABCDEF"[uvalue % 16];
1084 uvalue = (uvalue / 16);
1085 s[0] = "0123456789ABCDEF"[uvalue % 16];
1086 return 0;
1087}
1088
1089static char doc_b2a_qp[] =
1090"b2a_qp(data, quotetabs=0, istext=1, header=0) -> s; \n\
1091 Encode a string using quoted-printable encoding. \n\
1092\n\
1093On encoding, when istext is set, newlines are not encoded, and white \n\
1094space at end of lines is. When istext is not set, \\r and \\n (CR/LF) are \n\
1095both encoded. When quotetabs is set, space and tabs are encoded.";
1096
1097/* XXX: This is ridiculously complicated to be backward compatible
1098 * (mostly) with the quopri module. It doesn't re-create the quopri
1099 * module bug where text ending in CRLF has the CR encoded */
1100static PyObject*
1101binascii_b2a_qp (PyObject *self, PyObject *args, PyObject *kwargs)
1102{
1103 unsigned int in, out;
1104 unsigned char *data, *odata;
1105 unsigned int datalen = 0, odatalen = 0;
1106 PyObject *rv;
1107 unsigned int linelen = 0;
1108 static char *kwlist[] = {"data", "quotetabs", "istext", "header", NULL};
1109 int istext = 1;
1110 int quotetabs = 0;
1111 int header = 0;
1112 unsigned char ch;
1113 int crlf = 0;
1114 unsigned char *p;
1115
1116 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s#|iii", kwlist, &data,
1117 &datalen, &quotetabs, &istext, &header))
1118 return NULL;
1119
1120 /* See if this string is using CRLF line ends */
1121 /* XXX: this function has the side effect of converting all of
1122 * the end of lines to be the same depending on this detection
1123 * here */
Greg Warda645b302001-10-04 14:54:53 +00001124 p = (unsigned char *) strchr((char *)data, '\n');
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001125 if ((p != NULL) && (p > data) && (*(p-1) == '\r'))
1126 crlf = 1;
1127
1128 /* First, scan to see how many characters need to be encoded */
1129 in = 0;
1130 while (in < datalen) {
1131 if ((data[in] > 126) ||
1132 (data[in] == '=') ||
1133 (header && data[in] == '_') ||
1134 ((data[in] == '.') && (linelen == 1)) ||
1135 (!istext && ((data[in] == '\r') || (data[in] == '\n'))) ||
1136 ((data[in] == '\t' || data[in] == ' ') && (in + 1 == datalen)) ||
1137 ((data[in] < 33) &&
1138 (data[in] != '\r') && (data[in] != '\n') &&
1139 (quotetabs && ((data[in] != '\t') || (data[in] != ' ')))))
1140 {
1141 if ((linelen + 3) >= MAXLINESIZE) {
1142 linelen = 0;
1143 if (crlf)
1144 odatalen += 3;
1145 else
1146 odatalen += 2;
1147 }
1148 linelen += 3;
1149 odatalen += 3;
1150 in++;
1151 }
1152 else {
1153 if (istext &&
1154 ((data[in] == '\n') ||
1155 ((in+1 < datalen) && (data[in] == '\r') &&
1156 (data[in+1] == '\n'))))
1157 {
1158 linelen = 0;
1159 /* Protect against whitespace on end of line */
1160 if (in && ((data[in-1] == ' ') || (data[in-1] == '\t')))
1161 odatalen += 2;
1162 if (crlf)
1163 odatalen += 2;
1164 else
1165 odatalen += 1;
1166 if (data[in] == '\r')
1167 in += 2;
1168 else
1169 in++;
1170 }
1171 else {
1172 if ((in + 1 != datalen) &&
1173 (data[in+1] != '\n') &&
1174 (linelen + 1) >= MAXLINESIZE) {
1175 linelen = 0;
1176 if (crlf)
1177 odatalen += 3;
1178 else
1179 odatalen += 2;
1180 }
1181 linelen++;
1182 odatalen++;
1183 in++;
1184 }
1185 }
1186 }
1187
Greg Warda645b302001-10-04 14:54:53 +00001188 odata = (unsigned char *) calloc(1, odatalen);
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001189
1190 if (odata == NULL) {
1191 PyErr_NoMemory();
1192 return NULL;
1193 }
1194
1195 in = out = linelen = 0;
1196 while (in < datalen) {
1197 if ((data[in] > 126) ||
1198 (data[in] == '=') ||
1199 (header && data[in] == '_') ||
1200 ((data[in] == '.') && (linelen == 1)) ||
1201 (!istext && ((data[in] == '\r') || (data[in] == '\n'))) ||
1202 ((data[in] == '\t' || data[in] == ' ') && (in + 1 == datalen)) ||
1203 ((data[in] < 33) &&
1204 (data[in] != '\r') && (data[in] != '\n') &&
1205 (quotetabs && ((data[in] != '\t') || (data[in] != ' ')))))
1206 {
1207 if ((linelen + 3 )>= MAXLINESIZE) {
1208 odata[out++] = '=';
1209 if (crlf) odata[out++] = '\r';
1210 odata[out++] = '\n';
1211 linelen = 0;
1212 }
1213 odata[out++] = '=';
1214 to_hex(data[in], &odata[out]);
1215 out += 2;
1216 in++;
1217 linelen += 3;
1218 }
1219 else {
1220 if (istext &&
1221 ((data[in] == '\n') ||
1222 ((in+1 < datalen) && (data[in] == '\r') &&
1223 (data[in+1] == '\n'))))
1224 {
1225 linelen = 0;
1226 /* Protect against whitespace on end of line */
1227 if (out && ((odata[out-1] == ' ') || (odata[out-1] == '\t'))) {
1228 ch = odata[out-1];
1229 odata[out-1] = '=';
1230 to_hex(ch, &odata[out]);
1231 out += 2;
1232 }
1233
1234 if (crlf) odata[out++] = '\r';
1235 odata[out++] = '\n';
1236 if (data[in] == '\r')
1237 in += 2;
1238 else
1239 in++;
1240 }
1241 else {
1242 if ((in + 1 != datalen) &&
1243 (data[in+1] != '\n') &&
1244 (linelen + 1) >= MAXLINESIZE) {
1245 odata[out++] = '=';
1246 if (crlf) odata[out++] = '\r';
1247 odata[out++] = '\n';
1248 linelen = 0;
1249 }
1250 linelen++;
1251 if (header && data[in] == ' ') {
1252 odata[out++] = '_';
1253 in++;
1254 }
1255 else {
1256 odata[out++] = data[in++];
1257 }
1258 }
1259 }
1260 }
Greg Warda645b302001-10-04 14:54:53 +00001261 if ((rv = PyString_FromStringAndSize((char *)odata, out)) == NULL) {
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001262 free (odata);
1263 return NULL;
1264 }
1265 free (odata);
1266 return rv;
1267}
Barry Warsawe977c212000-08-15 06:07:13 +00001268
Jack Jansen72781191995-08-07 14:34:15 +00001269/* List of functions defined in the module */
1270
1271static struct PyMethodDef binascii_module_methods[] = {
Barry Warsawe977c212000-08-15 06:07:13 +00001272 {"a2b_uu", binascii_a2b_uu, METH_VARARGS, doc_a2b_uu},
1273 {"b2a_uu", binascii_b2a_uu, METH_VARARGS, doc_b2a_uu},
1274 {"a2b_base64", binascii_a2b_base64, METH_VARARGS, doc_a2b_base64},
1275 {"b2a_base64", binascii_b2a_base64, METH_VARARGS, doc_b2a_base64},
1276 {"a2b_hqx", binascii_a2b_hqx, METH_VARARGS, doc_a2b_hqx},
1277 {"b2a_hqx", binascii_b2a_hqx, METH_VARARGS, doc_b2a_hqx},
1278 {"b2a_hex", binascii_hexlify, METH_VARARGS, doc_hexlify},
1279 {"a2b_hex", binascii_unhexlify, METH_VARARGS, doc_unhexlify},
1280 {"hexlify", binascii_hexlify, METH_VARARGS, doc_hexlify},
1281 {"unhexlify", binascii_unhexlify, METH_VARARGS, doc_unhexlify},
1282 {"rlecode_hqx", binascii_rlecode_hqx, METH_VARARGS, doc_rlecode_hqx},
1283 {"rledecode_hqx", binascii_rledecode_hqx, METH_VARARGS,
1284 doc_rledecode_hqx},
1285 {"crc_hqx", binascii_crc_hqx, METH_VARARGS, doc_crc_hqx},
1286 {"crc32", binascii_crc32, METH_VARARGS, doc_crc32},
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001287 {"a2b_qp", (PyCFunction)binascii_a2b_qp, METH_VARARGS | METH_KEYWORDS,
1288 doc_a2b_qp},
1289 {"b2a_qp", (PyCFunction)binascii_b2a_qp, METH_VARARGS | METH_KEYWORDS,
1290 doc_b2a_qp},
Barry Warsawe977c212000-08-15 06:07:13 +00001291 {NULL, NULL} /* sentinel */
Jack Jansen72781191995-08-07 14:34:15 +00001292};
1293
1294
1295/* Initialization function for the module (*must* be called initbinascii) */
1296static char doc_binascii[] = "Conversion between binary data and ASCII";
1297
Guido van Rossum3886bb61998-12-04 18:50:17 +00001298DL_EXPORT(void)
Thomas Woutersf3f33dc2000-07-21 06:00:07 +00001299initbinascii(void)
Jack Jansen72781191995-08-07 14:34:15 +00001300{
1301 PyObject *m, *d, *x;
1302
1303 /* Create the module and add the functions */
1304 m = Py_InitModule("binascii", binascii_module_methods);
1305
1306 d = PyModule_GetDict(m);
1307 x = PyString_FromString(doc_binascii);
1308 PyDict_SetItemString(d, "__doc__", x);
Guido van Rossum5c159bd1997-08-04 23:55:25 +00001309 Py_XDECREF(x);
Jack Jansen72781191995-08-07 14:34:15 +00001310
Guido van Rossum7dbb48a1997-10-08 15:26:07 +00001311 Error = PyErr_NewException("binascii.Error", NULL, NULL);
Jack Jansen72781191995-08-07 14:34:15 +00001312 PyDict_SetItemString(d, "Error", Error);
Guido van Rossum7dbb48a1997-10-08 15:26:07 +00001313 Incomplete = PyErr_NewException("binascii.Incomplete", NULL, NULL);
Jack Jansen72781191995-08-07 14:34:15 +00001314 PyDict_SetItemString(d, "Incomplete", Incomplete);
Jack Jansen72781191995-08-07 14:34:15 +00001315}