blob: 4ddea5685d990df6709f6e91837f44f3d19557cb [file] [log] [blame]
Jack Jansen72781191995-08-07 14:34:15 +00001/*
2** Routines to represent binary data in ASCII and vice-versa
3**
4** This module currently supports the following encodings:
5** uuencode:
6** each line encodes 45 bytes (except possibly the last)
7** First char encodes (binary) length, rest data
8** each char encodes 6 bits, as follows:
9** binary: 01234567 abcdefgh ijklmnop
10** ascii: 012345 67abcd efghij klmnop
11** ASCII encoding method is "excess-space": 000000 is encoded as ' ', etc.
12** short binary data is zero-extended (so the bits are always in the
13** right place), this does *not* reflect in the length.
Jack Jansen84bbc2e1995-10-04 16:38:44 +000014** base64:
15** Line breaks are insignificant, but lines are at most 76 chars
16** each char encodes 6 bits, in similar order as uucode/hqx. Encoding
17** is done via a table.
18** Short binary data is filled (in ASCII) with '='.
Jack Jansen72781191995-08-07 14:34:15 +000019** hqx:
20** File starts with introductory text, real data starts and ends
21** with colons.
22** Data consists of three similar parts: info, datafork, resourcefork.
23** Each part is protected (at the end) with a 16-bit crc
24** The binary data is run-length encoded, and then ascii-fied:
25** binary: 01234567 abcdefgh ijklmnop
26** ascii: 012345 67abcd efghij klmnop
27** ASCII encoding is table-driven, see the code.
28** Short binary data results in the runt ascii-byte being output with
29** the bits in the right place.
30**
31** While I was reading dozens of programs that encode or decode the formats
32** here (documentation? hihi:-) I have formulated Jansen's Observation:
33**
34** Programs that encode binary data in ASCII are written in
35** such a style that they are as unreadable as possible. Devices used
36** include unnecessary global variables, burying important tables
37** in unrelated sourcefiles, putting functions in include files,
38** using seemingly-descriptive variable names for different purposes,
39** calls to empty subroutines and a host of others.
40**
41** I have attempted to break with this tradition, but I guess that that
42** does make the performance sub-optimal. Oh well, too bad...
43**
44** Jack Jansen, CWI, July 1995.
Martin v. Löwis16dc7f42001-09-30 20:32:11 +000045**
46** Added support for quoted-printable encoding, based on rfc 1521 et al
47** quoted-printable encoding specifies that non printable characters (anything
48** below 32 and above 126) be encoded as =XX where XX is the hexadecimal value
49** of the character. It also specifies some other behavior to enable 8bit data
50** in a mail message with little difficulty (maximum line sizes, protecting
51** some cases of whitespace, etc).
52**
53** Brandon Long, September 2001.
Jack Jansen72781191995-08-07 14:34:15 +000054*/
55
56
57#include "Python.h"
58
59static PyObject *Error;
60static PyObject *Incomplete;
61
62/*
63** hqx lookup table, ascii->binary.
64*/
65
66#define RUNCHAR 0x90
67
68#define DONE 0x7F
69#define SKIP 0x7E
70#define FAIL 0x7D
71
72static unsigned char table_a2b_hqx[256] = {
73/* ^@ ^A ^B ^C ^D ^E ^F ^G */
74/* 0*/ FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
75/* \b \t \n ^K ^L \r ^N ^O */
76/* 1*/ FAIL, FAIL, SKIP, FAIL, FAIL, SKIP, FAIL, FAIL,
77/* ^P ^Q ^R ^S ^T ^U ^V ^W */
78/* 2*/ FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
79/* ^X ^Y ^Z ^[ ^\ ^] ^^ ^_ */
80/* 3*/ FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
81/* ! " # $ % & ' */
82/* 4*/ FAIL, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
83/* ( ) * + , - . / */
84/* 5*/ 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, FAIL, FAIL,
85/* 0 1 2 3 4 5 6 7 */
86/* 6*/ 0x0D, 0x0E, 0x0F, 0x10, 0x11, 0x12, 0x13, FAIL,
87/* 8 9 : ; < = > ? */
88/* 7*/ 0x14, 0x15, DONE, FAIL, FAIL, FAIL, FAIL, FAIL,
89/* @ A B C D E F G */
90/* 8*/ 0x16, 0x17, 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D,
91/* H I J K L M N O */
92/* 9*/ 0x1E, 0x1F, 0x20, 0x21, 0x22, 0x23, 0x24, FAIL,
93/* P Q R S T U V W */
94/*10*/ 0x25, 0x26, 0x27, 0x28, 0x29, 0x2A, 0x2B, FAIL,
95/* X Y Z [ \ ] ^ _ */
96/*11*/ 0x2C, 0x2D, 0x2E, 0x2F, FAIL, FAIL, FAIL, FAIL,
97/* ` a b c d e f g */
98/*12*/ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, FAIL,
99/* h i j k l m n o */
100/*13*/ 0x37, 0x38, 0x39, 0x3A, 0x3B, 0x3C, FAIL, FAIL,
101/* p q r s t u v w */
102/*14*/ 0x3D, 0x3E, 0x3F, FAIL, FAIL, FAIL, FAIL, FAIL,
103/* x y z { | } ~ ^? */
104/*15*/ FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
105/*16*/ FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
106 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
107 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
108 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
109 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
110 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
111 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
112 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
113 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
114 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
115 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
116 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
117 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
118 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
119 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
120 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
121};
122
123static unsigned char table_b2a_hqx[] =
Roger E. Masse5f4ce181997-01-16 17:10:22 +0000124"!\"#$%&'()*+,-012345689@ABCDEFGHIJKLMNPQRSTUVXYZ[`abcdefhijklmpqr";
Jack Jansen72781191995-08-07 14:34:15 +0000125
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000126static char table_a2b_base64[] = {
127 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
128 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
129 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,62, -1,-1,-1,63,
130 52,53,54,55, 56,57,58,59, 60,61,-1,-1, -1, 0,-1,-1, /* Note PAD->0 */
131 -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10, 11,12,13,14,
132 15,16,17,18, 19,20,21,22, 23,24,25,-1, -1,-1,-1,-1,
133 -1,26,27,28, 29,30,31,32, 33,34,35,36, 37,38,39,40,
134 41,42,43,44, 45,46,47,48, 49,50,51,-1, -1,-1,-1,-1
135};
136
137#define BASE64_PAD '='
138#define BASE64_MAXBIN 57 /* Max binary chunk size (76 char line) */
139
140static unsigned char table_b2a_base64[] =
Roger E. Masse5f4ce181997-01-16 17:10:22 +0000141"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000142
143
144
Jack Jansen72781191995-08-07 14:34:15 +0000145static unsigned short crctab_hqx[256] = {
Roger E. Masse5f4ce181997-01-16 17:10:22 +0000146 0x0000, 0x1021, 0x2042, 0x3063, 0x4084, 0x50a5, 0x60c6, 0x70e7,
147 0x8108, 0x9129, 0xa14a, 0xb16b, 0xc18c, 0xd1ad, 0xe1ce, 0xf1ef,
148 0x1231, 0x0210, 0x3273, 0x2252, 0x52b5, 0x4294, 0x72f7, 0x62d6,
149 0x9339, 0x8318, 0xb37b, 0xa35a, 0xd3bd, 0xc39c, 0xf3ff, 0xe3de,
150 0x2462, 0x3443, 0x0420, 0x1401, 0x64e6, 0x74c7, 0x44a4, 0x5485,
151 0xa56a, 0xb54b, 0x8528, 0x9509, 0xe5ee, 0xf5cf, 0xc5ac, 0xd58d,
152 0x3653, 0x2672, 0x1611, 0x0630, 0x76d7, 0x66f6, 0x5695, 0x46b4,
153 0xb75b, 0xa77a, 0x9719, 0x8738, 0xf7df, 0xe7fe, 0xd79d, 0xc7bc,
154 0x48c4, 0x58e5, 0x6886, 0x78a7, 0x0840, 0x1861, 0x2802, 0x3823,
155 0xc9cc, 0xd9ed, 0xe98e, 0xf9af, 0x8948, 0x9969, 0xa90a, 0xb92b,
156 0x5af5, 0x4ad4, 0x7ab7, 0x6a96, 0x1a71, 0x0a50, 0x3a33, 0x2a12,
157 0xdbfd, 0xcbdc, 0xfbbf, 0xeb9e, 0x9b79, 0x8b58, 0xbb3b, 0xab1a,
158 0x6ca6, 0x7c87, 0x4ce4, 0x5cc5, 0x2c22, 0x3c03, 0x0c60, 0x1c41,
159 0xedae, 0xfd8f, 0xcdec, 0xddcd, 0xad2a, 0xbd0b, 0x8d68, 0x9d49,
160 0x7e97, 0x6eb6, 0x5ed5, 0x4ef4, 0x3e13, 0x2e32, 0x1e51, 0x0e70,
161 0xff9f, 0xefbe, 0xdfdd, 0xcffc, 0xbf1b, 0xaf3a, 0x9f59, 0x8f78,
162 0x9188, 0x81a9, 0xb1ca, 0xa1eb, 0xd10c, 0xc12d, 0xf14e, 0xe16f,
163 0x1080, 0x00a1, 0x30c2, 0x20e3, 0x5004, 0x4025, 0x7046, 0x6067,
164 0x83b9, 0x9398, 0xa3fb, 0xb3da, 0xc33d, 0xd31c, 0xe37f, 0xf35e,
165 0x02b1, 0x1290, 0x22f3, 0x32d2, 0x4235, 0x5214, 0x6277, 0x7256,
166 0xb5ea, 0xa5cb, 0x95a8, 0x8589, 0xf56e, 0xe54f, 0xd52c, 0xc50d,
167 0x34e2, 0x24c3, 0x14a0, 0x0481, 0x7466, 0x6447, 0x5424, 0x4405,
168 0xa7db, 0xb7fa, 0x8799, 0x97b8, 0xe75f, 0xf77e, 0xc71d, 0xd73c,
169 0x26d3, 0x36f2, 0x0691, 0x16b0, 0x6657, 0x7676, 0x4615, 0x5634,
170 0xd94c, 0xc96d, 0xf90e, 0xe92f, 0x99c8, 0x89e9, 0xb98a, 0xa9ab,
171 0x5844, 0x4865, 0x7806, 0x6827, 0x18c0, 0x08e1, 0x3882, 0x28a3,
172 0xcb7d, 0xdb5c, 0xeb3f, 0xfb1e, 0x8bf9, 0x9bd8, 0xabbb, 0xbb9a,
173 0x4a75, 0x5a54, 0x6a37, 0x7a16, 0x0af1, 0x1ad0, 0x2ab3, 0x3a92,
174 0xfd2e, 0xed0f, 0xdd6c, 0xcd4d, 0xbdaa, 0xad8b, 0x9de8, 0x8dc9,
175 0x7c26, 0x6c07, 0x5c64, 0x4c45, 0x3ca2, 0x2c83, 0x1ce0, 0x0cc1,
176 0xef1f, 0xff3e, 0xcf5d, 0xdf7c, 0xaf9b, 0xbfba, 0x8fd9, 0x9ff8,
177 0x6e17, 0x7e36, 0x4e55, 0x5e74, 0x2e93, 0x3eb2, 0x0ed1, 0x1ef0,
Jack Jansen72781191995-08-07 14:34:15 +0000178};
179
180static char doc_a2b_uu[] = "(ascii) -> bin. Decode a line of uuencoded data";
181
182static PyObject *
Peter Schneider-Kampa788a7f2000-07-10 09:57:19 +0000183binascii_a2b_uu(PyObject *self, PyObject *args)
Jack Jansen72781191995-08-07 14:34:15 +0000184{
185 unsigned char *ascii_data, *bin_data;
186 int leftbits = 0;
187 unsigned char this_ch;
188 unsigned int leftchar = 0;
189 PyObject *rv;
190 int ascii_len, bin_len;
191
Guido van Rossum43713e52000-02-29 13:59:29 +0000192 if ( !PyArg_ParseTuple(args, "t#:a2b_uu", &ascii_data, &ascii_len) )
Jack Jansen72781191995-08-07 14:34:15 +0000193 return NULL;
194
195 /* First byte: binary data length (in bytes) */
196 bin_len = (*ascii_data++ - ' ') & 077;
197 ascii_len--;
198
199 /* Allocate the buffer */
200 if ( (rv=PyString_FromStringAndSize(NULL, bin_len)) == NULL )
201 return NULL;
202 bin_data = (unsigned char *)PyString_AsString(rv);
203
204 for( ; bin_len > 0 ; ascii_len--, ascii_data++ ) {
205 this_ch = *ascii_data;
206 if ( this_ch == '\n' || this_ch == '\r' || ascii_len <= 0) {
207 /*
208 ** Whitespace. Assume some spaces got eaten at
209 ** end-of-line. (We check this later)
210 */
211 this_ch = 0;
212 } else {
Jack Jansen5d957971995-11-14 10:35:19 +0000213 /* Check the character for legality
Roger E. Masse5f4ce181997-01-16 17:10:22 +0000214 ** The 64 in stead of the expected 63 is because
215 ** there are a few uuencodes out there that use
Guido van Rossum92d89172001-01-09 02:11:57 +0000216 ** '`' as zero instead of space.
Jack Jansen5d957971995-11-14 10:35:19 +0000217 */
218 if ( this_ch < ' ' || this_ch > (' ' + 64)) {
Jack Jansen72781191995-08-07 14:34:15 +0000219 PyErr_SetString(Error, "Illegal char");
220 Py_DECREF(rv);
221 return NULL;
222 }
223 this_ch = (this_ch - ' ') & 077;
224 }
225 /*
226 ** Shift it in on the low end, and see if there's
227 ** a byte ready for output.
228 */
229 leftchar = (leftchar << 6) | (this_ch);
230 leftbits += 6;
231 if ( leftbits >= 8 ) {
232 leftbits -= 8;
233 *bin_data++ = (leftchar >> leftbits) & 0xff;
234 leftchar &= ((1 << leftbits) - 1);
235 bin_len--;
236 }
237 }
238 /*
239 ** Finally, check that if there's anything left on the line
240 ** that it's whitespace only.
241 */
242 while( ascii_len-- > 0 ) {
243 this_ch = *ascii_data++;
Guido van Rossum92d89172001-01-09 02:11:57 +0000244 /* Extra '`' may be written as padding in some cases */
245 if ( this_ch != ' ' && this_ch != ' '+64 &&
Guido van Rossum1243ae71997-07-11 18:36:28 +0000246 this_ch != '\n' && this_ch != '\r' ) {
Jack Jansen72781191995-08-07 14:34:15 +0000247 PyErr_SetString(Error, "Trailing garbage");
248 Py_DECREF(rv);
249 return NULL;
250 }
251 }
252 return rv;
253}
254
255static char doc_b2a_uu[] = "(bin) -> ascii. Uuencode line of data";
256
257static PyObject *
Peter Schneider-Kampa788a7f2000-07-10 09:57:19 +0000258binascii_b2a_uu(PyObject *self, PyObject *args)
Jack Jansen72781191995-08-07 14:34:15 +0000259{
260 unsigned char *ascii_data, *bin_data;
261 int leftbits = 0;
262 unsigned char this_ch;
263 unsigned int leftchar = 0;
264 PyObject *rv;
265 int bin_len;
266
Guido van Rossum43713e52000-02-29 13:59:29 +0000267 if ( !PyArg_ParseTuple(args, "s#:b2a_uu", &bin_data, &bin_len) )
Jack Jansen72781191995-08-07 14:34:15 +0000268 return NULL;
269 if ( bin_len > 45 ) {
270 /* The 45 is a limit that appears in all uuencode's */
271 PyErr_SetString(Error, "At most 45 bytes at once");
272 return NULL;
273 }
274
275 /* We're lazy and allocate to much (fixed up later) */
276 if ( (rv=PyString_FromStringAndSize(NULL, bin_len*2)) == NULL )
277 return NULL;
278 ascii_data = (unsigned char *)PyString_AsString(rv);
279
280 /* Store the length */
281 *ascii_data++ = ' ' + (bin_len & 077);
282
283 for( ; bin_len > 0 || leftbits != 0 ; bin_len--, bin_data++ ) {
284 /* Shift the data (or padding) into our buffer */
285 if ( bin_len > 0 ) /* Data */
286 leftchar = (leftchar << 8) | *bin_data;
287 else /* Padding */
288 leftchar <<= 8;
289 leftbits += 8;
290
291 /* See if there are 6-bit groups ready */
292 while ( leftbits >= 6 ) {
293 this_ch = (leftchar >> (leftbits-6)) & 0x3f;
294 leftbits -= 6;
295 *ascii_data++ = this_ch + ' ';
296 }
297 }
298 *ascii_data++ = '\n'; /* Append a courtesy newline */
299
Roger E. Masse5f4ce181997-01-16 17:10:22 +0000300 _PyString_Resize(&rv, (ascii_data -
301 (unsigned char *)PyString_AsString(rv)));
Jack Jansen72781191995-08-07 14:34:15 +0000302 return rv;
303}
304
Guido van Rossum2db4f471999-10-19 19:05:14 +0000305
306static int
Peter Schneider-Kampa788a7f2000-07-10 09:57:19 +0000307binascii_find_valid(unsigned char *s, int slen, int num)
Guido van Rossum2db4f471999-10-19 19:05:14 +0000308{
309 /* Finds & returns the (num+1)th
310 ** valid character for base64, or -1 if none.
311 */
312
313 int ret = -1;
314 unsigned char c, b64val;
315
316 while ((slen > 0) && (ret == -1)) {
317 c = *s;
318 b64val = table_a2b_base64[c & 0x7f];
319 if ( ((c <= 0x7f) && (b64val != (unsigned char)-1)) ) {
320 if (num == 0)
321 ret = *s;
322 num--;
323 }
324
325 s++;
326 slen--;
327 }
328 return ret;
329}
330
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000331static char doc_a2b_base64[] = "(ascii) -> bin. Decode a line of base64 data";
332
333static PyObject *
Peter Schneider-Kampa788a7f2000-07-10 09:57:19 +0000334binascii_a2b_base64(PyObject *self, PyObject *args)
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000335{
336 unsigned char *ascii_data, *bin_data;
337 int leftbits = 0;
338 unsigned char this_ch;
339 unsigned int leftchar = 0;
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000340 PyObject *rv;
341 int ascii_len, bin_len;
Guido van Rossum2db4f471999-10-19 19:05:14 +0000342 int quad_pos = 0;
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000343
Guido van Rossum43713e52000-02-29 13:59:29 +0000344 if ( !PyArg_ParseTuple(args, "t#:a2b_base64", &ascii_data, &ascii_len) )
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000345 return NULL;
346
Peter Schneider-Kampd895b202001-06-07 05:51:36 +0000347 if ( ascii_len == 0) {
348 PyErr_SetString(Error, "Cannot decode empty input");
349 return NULL;
350 }
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000351 bin_len = ((ascii_len+3)/4)*3; /* Upper bound, corrected later */
352
353 /* Allocate the buffer */
354 if ( (rv=PyString_FromStringAndSize(NULL, bin_len)) == NULL )
355 return NULL;
356 bin_data = (unsigned char *)PyString_AsString(rv);
357 bin_len = 0;
Guido van Rossum2db4f471999-10-19 19:05:14 +0000358
359 for( ; ascii_len > 0; ascii_len--, ascii_data++) {
360 this_ch = *ascii_data;
361
362 if (this_ch > 0x7f ||
363 this_ch == '\r' || this_ch == '\n' || this_ch == ' ')
Jack Jansenba1de3b1996-01-22 10:47:15 +0000364 continue;
Guido van Rossum2db4f471999-10-19 19:05:14 +0000365
366 /* Check for pad sequences and ignore
367 ** the invalid ones.
368 */
369 if (this_ch == BASE64_PAD) {
370 if ( (quad_pos < 2) ||
371 ((quad_pos == 2) &&
372 (binascii_find_valid(ascii_data, ascii_len, 1)
373 != BASE64_PAD)) )
374 {
375 continue;
376 }
377 else {
378 /* A pad sequence means no more input.
379 ** We've already interpreted the data
380 ** from the quad at this point.
381 */
382 leftbits = 0;
383 break;
384 }
385 }
386
387 this_ch = table_a2b_base64[*ascii_data];
388 if ( this_ch == (unsigned char) -1 )
389 continue;
390
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000391 /*
392 ** Shift it in on the low end, and see if there's
393 ** a byte ready for output.
394 */
Guido van Rossum2db4f471999-10-19 19:05:14 +0000395 quad_pos = (quad_pos + 1) & 0x03;
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000396 leftchar = (leftchar << 6) | (this_ch);
397 leftbits += 6;
Guido van Rossum2db4f471999-10-19 19:05:14 +0000398
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000399 if ( leftbits >= 8 ) {
400 leftbits -= 8;
401 *bin_data++ = (leftchar >> leftbits) & 0xff;
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000402 bin_len++;
Guido van Rossum2db4f471999-10-19 19:05:14 +0000403 leftchar &= ((1 << leftbits) - 1);
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000404 }
Guido van Rossum2db4f471999-10-19 19:05:14 +0000405 }
406
407 if (leftbits != 0) {
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000408 PyErr_SetString(Error, "Incorrect padding");
409 Py_DECREF(rv);
410 return NULL;
411 }
Guido van Rossum2db4f471999-10-19 19:05:14 +0000412
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000413 /* and set string size correctly */
414 _PyString_Resize(&rv, bin_len);
415 return rv;
416}
417
418static char doc_b2a_base64[] = "(bin) -> ascii. Base64-code line of data";
419
420static PyObject *
Peter Schneider-Kampa788a7f2000-07-10 09:57:19 +0000421binascii_b2a_base64(PyObject *self, PyObject *args)
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000422{
423 unsigned char *ascii_data, *bin_data;
424 int leftbits = 0;
425 unsigned char this_ch;
426 unsigned int leftchar = 0;
427 PyObject *rv;
428 int bin_len;
429
Guido van Rossum43713e52000-02-29 13:59:29 +0000430 if ( !PyArg_ParseTuple(args, "s#:b2a_base64", &bin_data, &bin_len) )
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000431 return NULL;
432 if ( bin_len > BASE64_MAXBIN ) {
433 PyErr_SetString(Error, "Too much data for base64 line");
434 return NULL;
435 }
436
437 /* We're lazy and allocate to much (fixed up later) */
438 if ( (rv=PyString_FromStringAndSize(NULL, bin_len*2)) == NULL )
439 return NULL;
440 ascii_data = (unsigned char *)PyString_AsString(rv);
441
442 for( ; bin_len > 0 ; bin_len--, bin_data++ ) {
443 /* Shift the data into our buffer */
444 leftchar = (leftchar << 8) | *bin_data;
445 leftbits += 8;
446
447 /* See if there are 6-bit groups ready */
448 while ( leftbits >= 6 ) {
449 this_ch = (leftchar >> (leftbits-6)) & 0x3f;
450 leftbits -= 6;
451 *ascii_data++ = table_b2a_base64[this_ch];
452 }
453 }
454 if ( leftbits == 2 ) {
455 *ascii_data++ = table_b2a_base64[(leftchar&3) << 4];
456 *ascii_data++ = BASE64_PAD;
457 *ascii_data++ = BASE64_PAD;
458 } else if ( leftbits == 4 ) {
459 *ascii_data++ = table_b2a_base64[(leftchar&0xf) << 2];
460 *ascii_data++ = BASE64_PAD;
461 }
462 *ascii_data++ = '\n'; /* Append a courtesy newline */
463
Roger E. Masse5f4ce181997-01-16 17:10:22 +0000464 _PyString_Resize(&rv, (ascii_data -
465 (unsigned char *)PyString_AsString(rv)));
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000466 return rv;
467}
468
Jack Jansen72781191995-08-07 14:34:15 +0000469static char doc_a2b_hqx[] = "ascii -> bin, done. Decode .hqx coding";
470
471static PyObject *
Peter Schneider-Kampa788a7f2000-07-10 09:57:19 +0000472binascii_a2b_hqx(PyObject *self, PyObject *args)
Jack Jansen72781191995-08-07 14:34:15 +0000473{
474 unsigned char *ascii_data, *bin_data;
475 int leftbits = 0;
476 unsigned char this_ch;
Jack Janseneaeb1c81995-08-14 12:17:57 +0000477 unsigned int leftchar = 0;
Jack Jansen72781191995-08-07 14:34:15 +0000478 PyObject *rv;
479 int len;
480 int done = 0;
481
Guido van Rossum43713e52000-02-29 13:59:29 +0000482 if ( !PyArg_ParseTuple(args, "t#:a2b_hqx", &ascii_data, &len) )
Jack Jansen72781191995-08-07 14:34:15 +0000483 return NULL;
484
485 /* Allocate a string that is too big (fixed later) */
486 if ( (rv=PyString_FromStringAndSize(NULL, len)) == NULL )
487 return NULL;
488 bin_data = (unsigned char *)PyString_AsString(rv);
489
490 for( ; len > 0 ; len--, ascii_data++ ) {
491 /* Get the byte and look it up */
492 this_ch = table_a2b_hqx[*ascii_data];
493 if ( this_ch == SKIP )
494 continue;
495 if ( this_ch == FAIL ) {
496 PyErr_SetString(Error, "Illegal char");
497 Py_DECREF(rv);
498 return NULL;
499 }
500 if ( this_ch == DONE ) {
501 /* The terminating colon */
502 done = 1;
503 break;
504 }
505
506 /* Shift it into the buffer and see if any bytes are ready */
507 leftchar = (leftchar << 6) | (this_ch);
508 leftbits += 6;
509 if ( leftbits >= 8 ) {
510 leftbits -= 8;
511 *bin_data++ = (leftchar >> leftbits) & 0xff;
512 leftchar &= ((1 << leftbits) - 1);
513 }
514 }
515
516 if ( leftbits && !done ) {
517 PyErr_SetString(Incomplete,
518 "String has incomplete number of bytes");
519 Py_DECREF(rv);
520 return NULL;
521 }
Guido van Rossum9c6ba5e1997-01-12 20:02:04 +0000522 _PyString_Resize(
523 &rv, (bin_data - (unsigned char *)PyString_AsString(rv)));
524 if (rv) {
525 PyObject *rrv = Py_BuildValue("Oi", rv, done);
526 Py_DECREF(rv);
527 return rrv;
528 }
Roger E. Masse5f4ce181997-01-16 17:10:22 +0000529
Jack Jansen72781191995-08-07 14:34:15 +0000530 return NULL;
531}
532
533static char doc_rlecode_hqx[] = "Binhex RLE-code binary data";
534
535static PyObject *
Peter Schneider-Kampa788a7f2000-07-10 09:57:19 +0000536binascii_rlecode_hqx(PyObject *self, PyObject *args)
Jack Jansen72781191995-08-07 14:34:15 +0000537{
538 unsigned char *in_data, *out_data;
539 PyObject *rv;
540 unsigned char ch;
541 int in, inend, len;
542
Guido van Rossum43713e52000-02-29 13:59:29 +0000543 if ( !PyArg_ParseTuple(args, "s#:rlecode_hqx", &in_data, &len) )
Jack Jansen72781191995-08-07 14:34:15 +0000544 return NULL;
545
546 /* Worst case: output is twice as big as input (fixed later) */
547 if ( (rv=PyString_FromStringAndSize(NULL, len*2)) == NULL )
548 return NULL;
549 out_data = (unsigned char *)PyString_AsString(rv);
550
551 for( in=0; in<len; in++) {
552 ch = in_data[in];
553 if ( ch == RUNCHAR ) {
554 /* RUNCHAR. Escape it. */
555 *out_data++ = RUNCHAR;
556 *out_data++ = 0;
557 } else {
558 /* Check how many following are the same */
559 for(inend=in+1;
560 inend<len && in_data[inend] == ch &&
Roger E. Masse5f4ce181997-01-16 17:10:22 +0000561 inend < in+255;
Jack Jansen72781191995-08-07 14:34:15 +0000562 inend++) ;
Jack Jansen0223aa11995-08-31 13:44:23 +0000563 if ( inend - in > 3 ) {
564 /* More than 3 in a row. Output RLE. */
Jack Jansen72781191995-08-07 14:34:15 +0000565 *out_data++ = ch;
566 *out_data++ = RUNCHAR;
567 *out_data++ = inend-in;
568 in = inend-1;
569 } else {
570 /* Less than 3. Output the byte itself */
571 *out_data++ = ch;
572 }
573 }
574 }
Roger E. Masse5f4ce181997-01-16 17:10:22 +0000575 _PyString_Resize(&rv, (out_data -
576 (unsigned char *)PyString_AsString(rv)));
Jack Jansen72781191995-08-07 14:34:15 +0000577 return rv;
578}
579
580static char doc_b2a_hqx[] = "Encode .hqx data";
581
582static PyObject *
Peter Schneider-Kampa788a7f2000-07-10 09:57:19 +0000583binascii_b2a_hqx(PyObject *self, PyObject *args)
Jack Jansen72781191995-08-07 14:34:15 +0000584{
585 unsigned char *ascii_data, *bin_data;
586 int leftbits = 0;
587 unsigned char this_ch;
Jack Janseneaeb1c81995-08-14 12:17:57 +0000588 unsigned int leftchar = 0;
Jack Jansen72781191995-08-07 14:34:15 +0000589 PyObject *rv;
590 int len;
591
Guido van Rossum43713e52000-02-29 13:59:29 +0000592 if ( !PyArg_ParseTuple(args, "s#:b2a_hqx", &bin_data, &len) )
Jack Jansen72781191995-08-07 14:34:15 +0000593 return NULL;
594
595 /* Allocate a buffer that is at least large enough */
596 if ( (rv=PyString_FromStringAndSize(NULL, len*2)) == NULL )
597 return NULL;
598 ascii_data = (unsigned char *)PyString_AsString(rv);
599
600 for( ; len > 0 ; len--, bin_data++ ) {
601 /* Shift into our buffer, and output any 6bits ready */
602 leftchar = (leftchar << 8) | *bin_data;
603 leftbits += 8;
604 while ( leftbits >= 6 ) {
605 this_ch = (leftchar >> (leftbits-6)) & 0x3f;
606 leftbits -= 6;
607 *ascii_data++ = table_b2a_hqx[this_ch];
608 }
609 }
610 /* Output a possible runt byte */
611 if ( leftbits ) {
612 leftchar <<= (6-leftbits);
613 *ascii_data++ = table_b2a_hqx[leftchar & 0x3f];
614 }
Roger E. Masse5f4ce181997-01-16 17:10:22 +0000615 _PyString_Resize(&rv, (ascii_data -
616 (unsigned char *)PyString_AsString(rv)));
Jack Jansen72781191995-08-07 14:34:15 +0000617 return rv;
618}
619
620static char doc_rledecode_hqx[] = "Decode hexbin RLE-coded string";
621
622static PyObject *
Peter Schneider-Kampa788a7f2000-07-10 09:57:19 +0000623binascii_rledecode_hqx(PyObject *self, PyObject *args)
Jack Jansen72781191995-08-07 14:34:15 +0000624{
625 unsigned char *in_data, *out_data;
626 unsigned char in_byte, in_repeat;
627 PyObject *rv;
628 int in_len, out_len, out_len_left;
629
Guido van Rossum43713e52000-02-29 13:59:29 +0000630 if ( !PyArg_ParseTuple(args, "s#:rledecode_hqx", &in_data, &in_len) )
Jack Jansen72781191995-08-07 14:34:15 +0000631 return NULL;
632
633 /* Empty string is a special case */
634 if ( in_len == 0 )
635 return Py_BuildValue("s", "");
636
637 /* Allocate a buffer of reasonable size. Resized when needed */
638 out_len = in_len*2;
639 if ( (rv=PyString_FromStringAndSize(NULL, out_len)) == NULL )
640 return NULL;
641 out_len_left = out_len;
642 out_data = (unsigned char *)PyString_AsString(rv);
643
644 /*
645 ** We need two macros here to get/put bytes and handle
646 ** end-of-buffer for input and output strings.
647 */
648#define INBYTE(b) \
Roger E. Masse5f4ce181997-01-16 17:10:22 +0000649 do { \
650 if ( --in_len < 0 ) { \
651 PyErr_SetString(Incomplete, ""); \
652 Py_DECREF(rv); \
653 return NULL; \
654 } \
655 b = *in_data++; \
656 } while(0)
Jack Jansen72781191995-08-07 14:34:15 +0000657
658#define OUTBYTE(b) \
Roger E. Masse5f4ce181997-01-16 17:10:22 +0000659 do { \
660 if ( --out_len_left < 0 ) { \
661 _PyString_Resize(&rv, 2*out_len); \
662 if ( rv == NULL ) return NULL; \
663 out_data = (unsigned char *)PyString_AsString(rv) \
664 + out_len; \
665 out_len_left = out_len-1; \
666 out_len = out_len * 2; \
667 } \
668 *out_data++ = b; \
669 } while(0)
Jack Jansen72781191995-08-07 14:34:15 +0000670
Roger E. Masse5f4ce181997-01-16 17:10:22 +0000671 /*
672 ** Handle first byte separately (since we have to get angry
673 ** in case of an orphaned RLE code).
674 */
675 INBYTE(in_byte);
Jack Jansen72781191995-08-07 14:34:15 +0000676
677 if (in_byte == RUNCHAR) {
678 INBYTE(in_repeat);
679 if (in_repeat != 0) {
680 /* Note Error, not Incomplete (which is at the end
681 ** of the string only). This is a programmer error.
682 */
683 PyErr_SetString(Error, "Orphaned RLE code at start");
684 Py_DECREF(rv);
685 return NULL;
686 }
687 OUTBYTE(RUNCHAR);
688 } else {
689 OUTBYTE(in_byte);
690 }
691
692 while( in_len > 0 ) {
693 INBYTE(in_byte);
694
695 if (in_byte == RUNCHAR) {
696 INBYTE(in_repeat);
697 if ( in_repeat == 0 ) {
698 /* Just an escaped RUNCHAR value */
699 OUTBYTE(RUNCHAR);
700 } else {
701 /* Pick up value and output a sequence of it */
702 in_byte = out_data[-1];
703 while ( --in_repeat > 0 )
704 OUTBYTE(in_byte);
705 }
706 } else {
707 /* Normal byte */
708 OUTBYTE(in_byte);
709 }
710 }
Roger E. Masse5f4ce181997-01-16 17:10:22 +0000711 _PyString_Resize(&rv, (out_data -
712 (unsigned char *)PyString_AsString(rv)));
Jack Jansen72781191995-08-07 14:34:15 +0000713 return rv;
714}
715
Roger E. Masse5f4ce181997-01-16 17:10:22 +0000716static char doc_crc_hqx[] =
717"(data, oldcrc) -> newcrc. Compute hqx CRC incrementally";
Jack Jansen72781191995-08-07 14:34:15 +0000718
719static PyObject *
Peter Schneider-Kampa788a7f2000-07-10 09:57:19 +0000720binascii_crc_hqx(PyObject *self, PyObject *args)
Jack Jansen72781191995-08-07 14:34:15 +0000721{
722 unsigned char *bin_data;
723 unsigned int crc;
724 int len;
725
Guido van Rossum43713e52000-02-29 13:59:29 +0000726 if ( !PyArg_ParseTuple(args, "s#i:crc_hqx", &bin_data, &len, &crc) )
Jack Jansen72781191995-08-07 14:34:15 +0000727 return NULL;
728
729 while(len--) {
730 crc=((crc<<8)&0xff00)^crctab_hqx[((crc>>8)&0xff)^*bin_data++];
731 }
732
733 return Py_BuildValue("i", crc);
734}
735
Guido van Rossum7d47c9e2000-02-16 21:11:52 +0000736static char doc_crc32[] =
737"(data, oldcrc = 0) -> newcrc. Compute CRC-32 incrementally";
738
739/* Crc - 32 BIT ANSI X3.66 CRC checksum files
740 Also known as: ISO 3307
741**********************************************************************|
742* *|
743* Demonstration program to compute the 32-bit CRC used as the frame *|
744* check sequence in ADCCP (ANSI X3.66, also known as FIPS PUB 71 *|
745* and FED-STD-1003, the U.S. versions of CCITT's X.25 link-level *|
746* protocol). The 32-bit FCS was added via the Federal Register, *|
747* 1 June 1982, p.23798. I presume but don't know for certain that *|
748* this polynomial is or will be included in CCITT V.41, which *|
749* defines the 16-bit CRC (often called CRC-CCITT) polynomial. FIPS *|
750* PUB 78 says that the 32-bit FCS reduces otherwise undetected *|
751* errors by a factor of 10^-5 over 16-bit FCS. *|
752* *|
753**********************************************************************|
754
755 Copyright (C) 1986 Gary S. Brown. You may use this program, or
756 code or tables extracted from it, as desired without restriction.
757
758 First, the polynomial itself and its table of feedback terms. The
759 polynomial is
760 X^32+X^26+X^23+X^22+X^16+X^12+X^11+X^10+X^8+X^7+X^5+X^4+X^2+X^1+X^0
761 Note that we take it "backwards" and put the highest-order term in
762 the lowest-order bit. The X^32 term is "implied"; the LSB is the
763 X^31 term, etc. The X^0 term (usually shown as "+1") results in
764 the MSB being 1.
765
766 Note that the usual hardware shift register implementation, which
767 is what we're using (we're merely optimizing it by doing eight-bit
768 chunks at a time) shifts bits into the lowest-order term. In our
769 implementation, that means shifting towards the right. Why do we
770 do it this way? Because the calculated CRC must be transmitted in
771 order from highest-order term to lowest-order term. UARTs transmit
772 characters in order from LSB to MSB. By storing the CRC this way,
773 we hand it to the UART in the order low-byte to high-byte; the UART
774 sends each low-bit to hight-bit; and the result is transmission bit
775 by bit from highest- to lowest-order term without requiring any bit
776 shuffling on our part. Reception works similarly.
777
778 The feedback terms table consists of 256, 32-bit entries. Notes:
779
780 1. The table can be generated at runtime if desired; code to do so
781 is shown later. It might not be obvious, but the feedback
782 terms simply represent the results of eight shift/xor opera-
783 tions for all combinations of data and CRC register values.
784
785 2. The CRC accumulation logic is the same for all CRC polynomials,
786 be they sixteen or thirty-two bits wide. You simply choose the
787 appropriate table. Alternatively, because the table can be
788 generated at runtime, you can start by generating the table for
789 the polynomial in question and use exactly the same "updcrc",
790 if your application needn't simultaneously handle two CRC
791 polynomials. (Note, however, that XMODEM is strange.)
792
793 3. For 16-bit CRCs, the table entries need be only 16 bits wide;
794 of course, 32-bit entries work OK if the high 16 bits are zero.
795
796 4. The values must be right-shifted by eight bits by the "updcrc"
797 logic; the shift must be unsigned (bring in zeroes). On some
798 hardware you could probably optimize the shift in assembler by
799 using byte-swap instructions.
800********************************************************************/
801
802static unsigned long crc_32_tab[256] = {
8030x00000000UL, 0x77073096UL, 0xee0e612cUL, 0x990951baUL, 0x076dc419UL,
8040x706af48fUL, 0xe963a535UL, 0x9e6495a3UL, 0x0edb8832UL, 0x79dcb8a4UL,
8050xe0d5e91eUL, 0x97d2d988UL, 0x09b64c2bUL, 0x7eb17cbdUL, 0xe7b82d07UL,
8060x90bf1d91UL, 0x1db71064UL, 0x6ab020f2UL, 0xf3b97148UL, 0x84be41deUL,
8070x1adad47dUL, 0x6ddde4ebUL, 0xf4d4b551UL, 0x83d385c7UL, 0x136c9856UL,
8080x646ba8c0UL, 0xfd62f97aUL, 0x8a65c9ecUL, 0x14015c4fUL, 0x63066cd9UL,
8090xfa0f3d63UL, 0x8d080df5UL, 0x3b6e20c8UL, 0x4c69105eUL, 0xd56041e4UL,
8100xa2677172UL, 0x3c03e4d1UL, 0x4b04d447UL, 0xd20d85fdUL, 0xa50ab56bUL,
8110x35b5a8faUL, 0x42b2986cUL, 0xdbbbc9d6UL, 0xacbcf940UL, 0x32d86ce3UL,
8120x45df5c75UL, 0xdcd60dcfUL, 0xabd13d59UL, 0x26d930acUL, 0x51de003aUL,
8130xc8d75180UL, 0xbfd06116UL, 0x21b4f4b5UL, 0x56b3c423UL, 0xcfba9599UL,
8140xb8bda50fUL, 0x2802b89eUL, 0x5f058808UL, 0xc60cd9b2UL, 0xb10be924UL,
8150x2f6f7c87UL, 0x58684c11UL, 0xc1611dabUL, 0xb6662d3dUL, 0x76dc4190UL,
8160x01db7106UL, 0x98d220bcUL, 0xefd5102aUL, 0x71b18589UL, 0x06b6b51fUL,
8170x9fbfe4a5UL, 0xe8b8d433UL, 0x7807c9a2UL, 0x0f00f934UL, 0x9609a88eUL,
8180xe10e9818UL, 0x7f6a0dbbUL, 0x086d3d2dUL, 0x91646c97UL, 0xe6635c01UL,
8190x6b6b51f4UL, 0x1c6c6162UL, 0x856530d8UL, 0xf262004eUL, 0x6c0695edUL,
8200x1b01a57bUL, 0x8208f4c1UL, 0xf50fc457UL, 0x65b0d9c6UL, 0x12b7e950UL,
8210x8bbeb8eaUL, 0xfcb9887cUL, 0x62dd1ddfUL, 0x15da2d49UL, 0x8cd37cf3UL,
8220xfbd44c65UL, 0x4db26158UL, 0x3ab551ceUL, 0xa3bc0074UL, 0xd4bb30e2UL,
8230x4adfa541UL, 0x3dd895d7UL, 0xa4d1c46dUL, 0xd3d6f4fbUL, 0x4369e96aUL,
8240x346ed9fcUL, 0xad678846UL, 0xda60b8d0UL, 0x44042d73UL, 0x33031de5UL,
8250xaa0a4c5fUL, 0xdd0d7cc9UL, 0x5005713cUL, 0x270241aaUL, 0xbe0b1010UL,
8260xc90c2086UL, 0x5768b525UL, 0x206f85b3UL, 0xb966d409UL, 0xce61e49fUL,
8270x5edef90eUL, 0x29d9c998UL, 0xb0d09822UL, 0xc7d7a8b4UL, 0x59b33d17UL,
8280x2eb40d81UL, 0xb7bd5c3bUL, 0xc0ba6cadUL, 0xedb88320UL, 0x9abfb3b6UL,
8290x03b6e20cUL, 0x74b1d29aUL, 0xead54739UL, 0x9dd277afUL, 0x04db2615UL,
8300x73dc1683UL, 0xe3630b12UL, 0x94643b84UL, 0x0d6d6a3eUL, 0x7a6a5aa8UL,
8310xe40ecf0bUL, 0x9309ff9dUL, 0x0a00ae27UL, 0x7d079eb1UL, 0xf00f9344UL,
8320x8708a3d2UL, 0x1e01f268UL, 0x6906c2feUL, 0xf762575dUL, 0x806567cbUL,
8330x196c3671UL, 0x6e6b06e7UL, 0xfed41b76UL, 0x89d32be0UL, 0x10da7a5aUL,
8340x67dd4accUL, 0xf9b9df6fUL, 0x8ebeeff9UL, 0x17b7be43UL, 0x60b08ed5UL,
8350xd6d6a3e8UL, 0xa1d1937eUL, 0x38d8c2c4UL, 0x4fdff252UL, 0xd1bb67f1UL,
8360xa6bc5767UL, 0x3fb506ddUL, 0x48b2364bUL, 0xd80d2bdaUL, 0xaf0a1b4cUL,
8370x36034af6UL, 0x41047a60UL, 0xdf60efc3UL, 0xa867df55UL, 0x316e8eefUL,
8380x4669be79UL, 0xcb61b38cUL, 0xbc66831aUL, 0x256fd2a0UL, 0x5268e236UL,
8390xcc0c7795UL, 0xbb0b4703UL, 0x220216b9UL, 0x5505262fUL, 0xc5ba3bbeUL,
8400xb2bd0b28UL, 0x2bb45a92UL, 0x5cb36a04UL, 0xc2d7ffa7UL, 0xb5d0cf31UL,
8410x2cd99e8bUL, 0x5bdeae1dUL, 0x9b64c2b0UL, 0xec63f226UL, 0x756aa39cUL,
8420x026d930aUL, 0x9c0906a9UL, 0xeb0e363fUL, 0x72076785UL, 0x05005713UL,
8430x95bf4a82UL, 0xe2b87a14UL, 0x7bb12baeUL, 0x0cb61b38UL, 0x92d28e9bUL,
8440xe5d5be0dUL, 0x7cdcefb7UL, 0x0bdbdf21UL, 0x86d3d2d4UL, 0xf1d4e242UL,
8450x68ddb3f8UL, 0x1fda836eUL, 0x81be16cdUL, 0xf6b9265bUL, 0x6fb077e1UL,
8460x18b74777UL, 0x88085ae6UL, 0xff0f6a70UL, 0x66063bcaUL, 0x11010b5cUL,
8470x8f659effUL, 0xf862ae69UL, 0x616bffd3UL, 0x166ccf45UL, 0xa00ae278UL,
8480xd70dd2eeUL, 0x4e048354UL, 0x3903b3c2UL, 0xa7672661UL, 0xd06016f7UL,
8490x4969474dUL, 0x3e6e77dbUL, 0xaed16a4aUL, 0xd9d65adcUL, 0x40df0b66UL,
8500x37d83bf0UL, 0xa9bcae53UL, 0xdebb9ec5UL, 0x47b2cf7fUL, 0x30b5ffe9UL,
8510xbdbdf21cUL, 0xcabac28aUL, 0x53b39330UL, 0x24b4a3a6UL, 0xbad03605UL,
8520xcdd70693UL, 0x54de5729UL, 0x23d967bfUL, 0xb3667a2eUL, 0xc4614ab8UL,
8530x5d681b02UL, 0x2a6f2b94UL, 0xb40bbe37UL, 0xc30c8ea1UL, 0x5a05df1bUL,
8540x2d02ef8dUL
855};
856
857static PyObject *
Peter Schneider-Kampa788a7f2000-07-10 09:57:19 +0000858binascii_crc32(PyObject *self, PyObject *args)
Guido van Rossum7d47c9e2000-02-16 21:11:52 +0000859{ /* By Jim Ahlstrom; All rights transferred to CNRI */
860 unsigned char *bin_data;
861 unsigned long crc = 0UL; /* initial value of CRC */
862 int len;
863
Guido van Rossum43713e52000-02-29 13:59:29 +0000864 if ( !PyArg_ParseTuple(args, "s#|l:crc32", &bin_data, &len, &crc) )
Guido van Rossum7d47c9e2000-02-16 21:11:52 +0000865 return NULL;
866
867 crc = crc ^ 0xFFFFFFFFUL;
868 while(len--)
869 crc = crc_32_tab[(crc ^ *bin_data++) & 0xffUL] ^ (crc >> 8);
870 /* Note: (crc >> 8) MUST zero fill on left */
871 return Py_BuildValue("l", crc ^ 0xFFFFFFFFUL);
872}
873
Barry Warsawe977c212000-08-15 06:07:13 +0000874
875static PyObject *
876binascii_hexlify(PyObject *self, PyObject *args)
877{
878 char* argbuf;
879 int arglen;
880 PyObject *retval;
881 char* retbuf;
882 int i, j;
883
884 if (!PyArg_ParseTuple(args, "t#:b2a_hex", &argbuf, &arglen))
885 return NULL;
886
887 retval = PyString_FromStringAndSize(NULL, arglen*2);
888 if (!retval)
889 return NULL;
890 retbuf = PyString_AsString(retval);
891 if (!retbuf)
892 goto finally;
893
894 /* make hex version of string, taken from shamodule.c */
895 for (i=j=0; i < arglen; i++) {
896 char c;
897 c = (argbuf[i] >> 4) & 0xf;
898 c = (c>9) ? c+'a'-10 : c + '0';
899 retbuf[j++] = c;
900 c = argbuf[i] & 0xf;
901 c = (c>9) ? c+'a'-10 : c + '0';
902 retbuf[j++] = c;
903 }
904 return retval;
905
906 finally:
907 Py_DECREF(retval);
908 return NULL;
909}
910
911static char doc_hexlify[] =
912"b2a_hex(data) -> s; Hexadecimal representation of binary data.\n\
913\n\
914This function is also available as \"hexlify()\".";
915
916
917static int
Tim Petersb59ab422000-08-15 16:41:26 +0000918to_int(int c)
Barry Warsawe977c212000-08-15 06:07:13 +0000919{
920 if (isdigit(c))
921 return c - '0';
922 else {
923 if (isupper(c))
924 c = tolower(c);
925 if (c >= 'a' && c <= 'f')
926 return c - 'a' + 10;
927 }
928 return -1;
929}
930
931
932static PyObject *
933binascii_unhexlify(PyObject *self, PyObject *args)
934{
935 char* argbuf;
936 int arglen;
937 PyObject *retval;
938 char* retbuf;
939 int i, j;
940
941 if (!PyArg_ParseTuple(args, "s#:a2b_hex", &argbuf, &arglen))
942 return NULL;
943
Barry Warsaw16168472000-08-15 06:59:58 +0000944 /* XXX What should we do about strings with an odd length? Should
945 * we add an implicit leading zero, or a trailing zero? For now,
946 * raise an exception.
Barry Warsawe977c212000-08-15 06:07:13 +0000947 */
948 if (arglen % 2) {
Barry Warsaw16168472000-08-15 06:59:58 +0000949 PyErr_SetString(PyExc_TypeError, "Odd-length string");
Barry Warsawe977c212000-08-15 06:07:13 +0000950 return NULL;
951 }
952
953 retval = PyString_FromStringAndSize(NULL, (arglen/2));
954 if (!retval)
955 return NULL;
956 retbuf = PyString_AsString(retval);
957 if (!retbuf)
958 goto finally;
959
960 for (i=j=0; i < arglen; i += 2) {
961 int top = to_int(Py_CHARMASK(argbuf[i]));
962 int bot = to_int(Py_CHARMASK(argbuf[i+1]));
963 if (top == -1 || bot == -1) {
964 PyErr_SetString(PyExc_TypeError,
Barry Warsaw16168472000-08-15 06:59:58 +0000965 "Non-hexadecimal digit found");
Barry Warsawe977c212000-08-15 06:07:13 +0000966 goto finally;
967 }
968 retbuf[j++] = (top << 4) + bot;
969 }
970 return retval;
971
972 finally:
973 Py_DECREF(retval);
974 return NULL;
975}
976
977static char doc_unhexlify[] =
978"a2b_hex(hexstr) -> s; Binary data of hexadecimal representation.\n\
979\n\
980hexstr must contain an even number of hex digits (upper or lower case).\n\
981This function is also available as \"unhexlify()\"";
982
Martin v. Löwis16dc7f42001-09-30 20:32:11 +0000983static int table_hex[128] = {
984 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
985 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
986 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
987 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,-1,-1, -1,-1,-1,-1,
988 -1,10,11,12, 13,14,15,-1, -1,-1,-1,-1, -1,-1,-1,-1,
989 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
990 -1,10,11,12, 13,14,15,-1, -1,-1,-1,-1, -1,-1,-1,-1,
991 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1
992};
993
994#define hexval(c) table_hex[(unsigned int)(c)]
995
996#define MAXLINESIZE 76
997
998static char doc_a2b_qp[] = "Decode a string of qp-encoded data";
999
1000static PyObject*
1001binascii_a2b_qp(PyObject *self, PyObject *args, PyObject *kwargs)
1002{
1003 unsigned int in, out;
1004 char ch;
1005 unsigned char *data, *odata;
1006 unsigned int datalen = 0;
1007 PyObject *rv;
1008 static char *kwlist[] = {"data", "header", NULL};
1009 int header = 0;
1010
1011 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s#|i", kwlist, &data,
1012 &datalen, &header))
1013 return NULL;
1014
1015 /* We allocate the output same size as input, this is overkill */
Greg Warda645b302001-10-04 14:54:53 +00001016 odata = (unsigned char *) calloc(1, datalen);
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001017
1018 if (odata == NULL) {
1019 PyErr_NoMemory();
1020 return NULL;
1021 }
1022
1023 in = out = 0;
1024 while (in < datalen) {
1025 if (data[in] == '=') {
1026 in++;
1027 if (in >= datalen) break;
1028 /* Soft line breaks */
1029 if ((data[in] == '\n') || (data[in] == '\r') ||
1030 (data[in] == ' ') || (data[in] == '\t')) {
1031 if (data[in] != '\n') {
1032 while (in < datalen && data[in] != '\n') in++;
1033 }
1034 if (in < datalen) in++;
1035 }
1036 else if (data[in] == '=') {
1037 /* broken case from broken python qp */
1038 odata[out++] = '=';
1039 in++;
1040 }
1041 else if (((data[in] >= 'A' && data[in] <= 'F') ||
1042 (data[in] >= 'a' && data[in] <= 'f') ||
1043 (data[in] >= '0' && data[in] <= '9')) &&
1044 ((data[in+1] >= 'A' && data[in+1] <= 'F') ||
1045 (data[in+1] >= 'a' && data[in+1] <= 'f') ||
1046 (data[in+1] >= '0' && data[in+1] <= '9'))) {
1047 /* hexval */
1048 ch = hexval(data[in]) << 4;
1049 in++;
1050 ch |= hexval(data[in]);
1051 in++;
1052 odata[out++] = ch;
1053 }
1054 else {
1055 odata[out++] = '=';
1056 }
1057 }
1058 else if (header && data[in] == '_') {
1059 odata[out++] = ' ';
1060 in++;
1061 }
1062 else {
1063 odata[out] = data[in];
1064 in++;
1065 out++;
1066 }
1067 }
Greg Warda645b302001-10-04 14:54:53 +00001068 if ((rv = PyString_FromStringAndSize((char *)odata, out)) == NULL) {
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001069 free (odata);
1070 return NULL;
1071 }
1072 free (odata);
1073 return rv;
1074}
1075
1076static int
1077to_hex (unsigned char ch, unsigned char *s)
1078{
1079 unsigned int uvalue = ch;
1080
1081 s[1] = "0123456789ABCDEF"[uvalue % 16];
1082 uvalue = (uvalue / 16);
1083 s[0] = "0123456789ABCDEF"[uvalue % 16];
1084 return 0;
1085}
1086
1087static char doc_b2a_qp[] =
1088"b2a_qp(data, quotetabs=0, istext=1, header=0) -> s; \n\
1089 Encode a string using quoted-printable encoding. \n\
1090\n\
1091On encoding, when istext is set, newlines are not encoded, and white \n\
1092space at end of lines is. When istext is not set, \\r and \\n (CR/LF) are \n\
1093both encoded. When quotetabs is set, space and tabs are encoded.";
1094
1095/* XXX: This is ridiculously complicated to be backward compatible
1096 * (mostly) with the quopri module. It doesn't re-create the quopri
1097 * module bug where text ending in CRLF has the CR encoded */
1098static PyObject*
1099binascii_b2a_qp (PyObject *self, PyObject *args, PyObject *kwargs)
1100{
1101 unsigned int in, out;
1102 unsigned char *data, *odata;
1103 unsigned int datalen = 0, odatalen = 0;
1104 PyObject *rv;
1105 unsigned int linelen = 0;
1106 static char *kwlist[] = {"data", "quotetabs", "istext", "header", NULL};
1107 int istext = 1;
1108 int quotetabs = 0;
1109 int header = 0;
1110 unsigned char ch;
1111 int crlf = 0;
1112 unsigned char *p;
1113
1114 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s#|iii", kwlist, &data,
1115 &datalen, &quotetabs, &istext, &header))
1116 return NULL;
1117
1118 /* See if this string is using CRLF line ends */
1119 /* XXX: this function has the side effect of converting all of
1120 * the end of lines to be the same depending on this detection
1121 * here */
Greg Warda645b302001-10-04 14:54:53 +00001122 p = (unsigned char *) strchr((char *)data, '\n');
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001123 if ((p != NULL) && (p > data) && (*(p-1) == '\r'))
1124 crlf = 1;
1125
1126 /* First, scan to see how many characters need to be encoded */
1127 in = 0;
1128 while (in < datalen) {
1129 if ((data[in] > 126) ||
1130 (data[in] == '=') ||
1131 (header && data[in] == '_') ||
1132 ((data[in] == '.') && (linelen == 1)) ||
1133 (!istext && ((data[in] == '\r') || (data[in] == '\n'))) ||
1134 ((data[in] == '\t' || data[in] == ' ') && (in + 1 == datalen)) ||
1135 ((data[in] < 33) &&
1136 (data[in] != '\r') && (data[in] != '\n') &&
1137 (quotetabs && ((data[in] != '\t') || (data[in] != ' ')))))
1138 {
1139 if ((linelen + 3) >= MAXLINESIZE) {
1140 linelen = 0;
1141 if (crlf)
1142 odatalen += 3;
1143 else
1144 odatalen += 2;
1145 }
1146 linelen += 3;
1147 odatalen += 3;
1148 in++;
1149 }
1150 else {
1151 if (istext &&
1152 ((data[in] == '\n') ||
1153 ((in+1 < datalen) && (data[in] == '\r') &&
1154 (data[in+1] == '\n'))))
1155 {
1156 linelen = 0;
1157 /* Protect against whitespace on end of line */
1158 if (in && ((data[in-1] == ' ') || (data[in-1] == '\t')))
1159 odatalen += 2;
1160 if (crlf)
1161 odatalen += 2;
1162 else
1163 odatalen += 1;
1164 if (data[in] == '\r')
1165 in += 2;
1166 else
1167 in++;
1168 }
1169 else {
1170 if ((in + 1 != datalen) &&
1171 (data[in+1] != '\n') &&
1172 (linelen + 1) >= MAXLINESIZE) {
1173 linelen = 0;
1174 if (crlf)
1175 odatalen += 3;
1176 else
1177 odatalen += 2;
1178 }
1179 linelen++;
1180 odatalen++;
1181 in++;
1182 }
1183 }
1184 }
1185
Greg Warda645b302001-10-04 14:54:53 +00001186 odata = (unsigned char *) calloc(1, odatalen);
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001187
1188 if (odata == NULL) {
1189 PyErr_NoMemory();
1190 return NULL;
1191 }
1192
1193 in = out = linelen = 0;
1194 while (in < datalen) {
1195 if ((data[in] > 126) ||
1196 (data[in] == '=') ||
1197 (header && data[in] == '_') ||
1198 ((data[in] == '.') && (linelen == 1)) ||
1199 (!istext && ((data[in] == '\r') || (data[in] == '\n'))) ||
1200 ((data[in] == '\t' || data[in] == ' ') && (in + 1 == datalen)) ||
1201 ((data[in] < 33) &&
1202 (data[in] != '\r') && (data[in] != '\n') &&
1203 (quotetabs && ((data[in] != '\t') || (data[in] != ' ')))))
1204 {
1205 if ((linelen + 3 )>= MAXLINESIZE) {
1206 odata[out++] = '=';
1207 if (crlf) odata[out++] = '\r';
1208 odata[out++] = '\n';
1209 linelen = 0;
1210 }
1211 odata[out++] = '=';
1212 to_hex(data[in], &odata[out]);
1213 out += 2;
1214 in++;
1215 linelen += 3;
1216 }
1217 else {
1218 if (istext &&
1219 ((data[in] == '\n') ||
1220 ((in+1 < datalen) && (data[in] == '\r') &&
1221 (data[in+1] == '\n'))))
1222 {
1223 linelen = 0;
1224 /* Protect against whitespace on end of line */
1225 if (out && ((odata[out-1] == ' ') || (odata[out-1] == '\t'))) {
1226 ch = odata[out-1];
1227 odata[out-1] = '=';
1228 to_hex(ch, &odata[out]);
1229 out += 2;
1230 }
1231
1232 if (crlf) odata[out++] = '\r';
1233 odata[out++] = '\n';
1234 if (data[in] == '\r')
1235 in += 2;
1236 else
1237 in++;
1238 }
1239 else {
1240 if ((in + 1 != datalen) &&
1241 (data[in+1] != '\n') &&
1242 (linelen + 1) >= MAXLINESIZE) {
1243 odata[out++] = '=';
1244 if (crlf) odata[out++] = '\r';
1245 odata[out++] = '\n';
1246 linelen = 0;
1247 }
1248 linelen++;
1249 if (header && data[in] == ' ') {
1250 odata[out++] = '_';
1251 in++;
1252 }
1253 else {
1254 odata[out++] = data[in++];
1255 }
1256 }
1257 }
1258 }
Greg Warda645b302001-10-04 14:54:53 +00001259 if ((rv = PyString_FromStringAndSize((char *)odata, out)) == NULL) {
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001260 free (odata);
1261 return NULL;
1262 }
1263 free (odata);
1264 return rv;
1265}
Barry Warsawe977c212000-08-15 06:07:13 +00001266
Jack Jansen72781191995-08-07 14:34:15 +00001267/* List of functions defined in the module */
1268
1269static struct PyMethodDef binascii_module_methods[] = {
Barry Warsawe977c212000-08-15 06:07:13 +00001270 {"a2b_uu", binascii_a2b_uu, METH_VARARGS, doc_a2b_uu},
1271 {"b2a_uu", binascii_b2a_uu, METH_VARARGS, doc_b2a_uu},
1272 {"a2b_base64", binascii_a2b_base64, METH_VARARGS, doc_a2b_base64},
1273 {"b2a_base64", binascii_b2a_base64, METH_VARARGS, doc_b2a_base64},
1274 {"a2b_hqx", binascii_a2b_hqx, METH_VARARGS, doc_a2b_hqx},
1275 {"b2a_hqx", binascii_b2a_hqx, METH_VARARGS, doc_b2a_hqx},
1276 {"b2a_hex", binascii_hexlify, METH_VARARGS, doc_hexlify},
1277 {"a2b_hex", binascii_unhexlify, METH_VARARGS, doc_unhexlify},
1278 {"hexlify", binascii_hexlify, METH_VARARGS, doc_hexlify},
1279 {"unhexlify", binascii_unhexlify, METH_VARARGS, doc_unhexlify},
1280 {"rlecode_hqx", binascii_rlecode_hqx, METH_VARARGS, doc_rlecode_hqx},
1281 {"rledecode_hqx", binascii_rledecode_hqx, METH_VARARGS,
1282 doc_rledecode_hqx},
1283 {"crc_hqx", binascii_crc_hqx, METH_VARARGS, doc_crc_hqx},
1284 {"crc32", binascii_crc32, METH_VARARGS, doc_crc32},
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001285 {"a2b_qp", (PyCFunction)binascii_a2b_qp, METH_VARARGS | METH_KEYWORDS,
1286 doc_a2b_qp},
1287 {"b2a_qp", (PyCFunction)binascii_b2a_qp, METH_VARARGS | METH_KEYWORDS,
1288 doc_b2a_qp},
Barry Warsawe977c212000-08-15 06:07:13 +00001289 {NULL, NULL} /* sentinel */
Jack Jansen72781191995-08-07 14:34:15 +00001290};
1291
1292
1293/* Initialization function for the module (*must* be called initbinascii) */
1294static char doc_binascii[] = "Conversion between binary data and ASCII";
1295
Guido van Rossum3886bb61998-12-04 18:50:17 +00001296DL_EXPORT(void)
Thomas Woutersf3f33dc2000-07-21 06:00:07 +00001297initbinascii(void)
Jack Jansen72781191995-08-07 14:34:15 +00001298{
1299 PyObject *m, *d, *x;
1300
1301 /* Create the module and add the functions */
1302 m = Py_InitModule("binascii", binascii_module_methods);
1303
1304 d = PyModule_GetDict(m);
1305 x = PyString_FromString(doc_binascii);
1306 PyDict_SetItemString(d, "__doc__", x);
Guido van Rossum5c159bd1997-08-04 23:55:25 +00001307 Py_XDECREF(x);
Jack Jansen72781191995-08-07 14:34:15 +00001308
Guido van Rossum7dbb48a1997-10-08 15:26:07 +00001309 Error = PyErr_NewException("binascii.Error", NULL, NULL);
Jack Jansen72781191995-08-07 14:34:15 +00001310 PyDict_SetItemString(d, "Error", Error);
Guido van Rossum7dbb48a1997-10-08 15:26:07 +00001311 Incomplete = PyErr_NewException("binascii.Incomplete", NULL, NULL);
Jack Jansen72781191995-08-07 14:34:15 +00001312 PyDict_SetItemString(d, "Incomplete", Incomplete);
Jack Jansen72781191995-08-07 14:34:15 +00001313}