blob: f4d5cf0822631b20634fda767675bd97a97742f4 [file] [log] [blame]
Jack Jansen72781191995-08-07 14:34:15 +00001/*
2** Routines to represent binary data in ASCII and vice-versa
3**
4** This module currently supports the following encodings:
5** uuencode:
6** each line encodes 45 bytes (except possibly the last)
7** First char encodes (binary) length, rest data
8** each char encodes 6 bits, as follows:
9** binary: 01234567 abcdefgh ijklmnop
10** ascii: 012345 67abcd efghij klmnop
11** ASCII encoding method is "excess-space": 000000 is encoded as ' ', etc.
12** short binary data is zero-extended (so the bits are always in the
13** right place), this does *not* reflect in the length.
Jack Jansen84bbc2e1995-10-04 16:38:44 +000014** base64:
15** Line breaks are insignificant, but lines are at most 76 chars
16** each char encodes 6 bits, in similar order as uucode/hqx. Encoding
17** is done via a table.
18** Short binary data is filled (in ASCII) with '='.
Jack Jansen72781191995-08-07 14:34:15 +000019** hqx:
20** File starts with introductory text, real data starts and ends
21** with colons.
22** Data consists of three similar parts: info, datafork, resourcefork.
23** Each part is protected (at the end) with a 16-bit crc
24** The binary data is run-length encoded, and then ascii-fied:
25** binary: 01234567 abcdefgh ijklmnop
26** ascii: 012345 67abcd efghij klmnop
27** ASCII encoding is table-driven, see the code.
28** Short binary data results in the runt ascii-byte being output with
29** the bits in the right place.
30**
31** While I was reading dozens of programs that encode or decode the formats
32** here (documentation? hihi:-) I have formulated Jansen's Observation:
33**
34** Programs that encode binary data in ASCII are written in
35** such a style that they are as unreadable as possible. Devices used
36** include unnecessary global variables, burying important tables
37** in unrelated sourcefiles, putting functions in include files,
38** using seemingly-descriptive variable names for different purposes,
39** calls to empty subroutines and a host of others.
40**
41** I have attempted to break with this tradition, but I guess that that
42** does make the performance sub-optimal. Oh well, too bad...
43**
44** Jack Jansen, CWI, July 1995.
Tim Peters934c1a12002-07-02 22:24:50 +000045**
Martin v. Löwis16dc7f42001-09-30 20:32:11 +000046** Added support for quoted-printable encoding, based on rfc 1521 et al
Tim Peters934c1a12002-07-02 22:24:50 +000047** quoted-printable encoding specifies that non printable characters (anything
Martin v. Löwis16dc7f42001-09-30 20:32:11 +000048** below 32 and above 126) be encoded as =XX where XX is the hexadecimal value
49** of the character. It also specifies some other behavior to enable 8bit data
Tim Peters934c1a12002-07-02 22:24:50 +000050** in a mail message with little difficulty (maximum line sizes, protecting
51** some cases of whitespace, etc).
Martin v. Löwis16dc7f42001-09-30 20:32:11 +000052**
53** Brandon Long, September 2001.
Jack Jansen72781191995-08-07 14:34:15 +000054*/
55
Thomas Woutersf98db652006-03-01 21:37:32 +000056#include PY_SSIZE_T_CLEAN
Jack Jansen72781191995-08-07 14:34:15 +000057
58#include "Python.h"
59
60static PyObject *Error;
61static PyObject *Incomplete;
62
63/*
64** hqx lookup table, ascii->binary.
65*/
66
67#define RUNCHAR 0x90
68
69#define DONE 0x7F
70#define SKIP 0x7E
71#define FAIL 0x7D
72
73static unsigned char table_a2b_hqx[256] = {
74/* ^@ ^A ^B ^C ^D ^E ^F ^G */
75/* 0*/ FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
76/* \b \t \n ^K ^L \r ^N ^O */
77/* 1*/ FAIL, FAIL, SKIP, FAIL, FAIL, SKIP, FAIL, FAIL,
78/* ^P ^Q ^R ^S ^T ^U ^V ^W */
79/* 2*/ FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
80/* ^X ^Y ^Z ^[ ^\ ^] ^^ ^_ */
81/* 3*/ FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
82/* ! " # $ % & ' */
83/* 4*/ FAIL, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
84/* ( ) * + , - . / */
85/* 5*/ 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, FAIL, FAIL,
86/* 0 1 2 3 4 5 6 7 */
87/* 6*/ 0x0D, 0x0E, 0x0F, 0x10, 0x11, 0x12, 0x13, FAIL,
88/* 8 9 : ; < = > ? */
89/* 7*/ 0x14, 0x15, DONE, FAIL, FAIL, FAIL, FAIL, FAIL,
90/* @ A B C D E F G */
91/* 8*/ 0x16, 0x17, 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D,
92/* H I J K L M N O */
93/* 9*/ 0x1E, 0x1F, 0x20, 0x21, 0x22, 0x23, 0x24, FAIL,
94/* P Q R S T U V W */
95/*10*/ 0x25, 0x26, 0x27, 0x28, 0x29, 0x2A, 0x2B, FAIL,
96/* X Y Z [ \ ] ^ _ */
97/*11*/ 0x2C, 0x2D, 0x2E, 0x2F, FAIL, FAIL, FAIL, FAIL,
98/* ` a b c d e f g */
99/*12*/ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, FAIL,
100/* h i j k l m n o */
101/*13*/ 0x37, 0x38, 0x39, 0x3A, 0x3B, 0x3C, FAIL, FAIL,
102/* p q r s t u v w */
103/*14*/ 0x3D, 0x3E, 0x3F, FAIL, FAIL, FAIL, FAIL, FAIL,
104/* x y z { | } ~ ^? */
105/*15*/ FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
106/*16*/ FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
107 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
108 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
109 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
110 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
111 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
112 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
113 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
114 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
115 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
116 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
117 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
118 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
119 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
120 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
121 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
122};
123
124static unsigned char table_b2a_hqx[] =
Roger E. Masse5f4ce181997-01-16 17:10:22 +0000125"!\"#$%&'()*+,-012345689@ABCDEFGHIJKLMNPQRSTUVXYZ[`abcdefhijklmpqr";
Jack Jansen72781191995-08-07 14:34:15 +0000126
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000127static char table_a2b_base64[] = {
128 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
129 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
130 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,62, -1,-1,-1,63,
131 52,53,54,55, 56,57,58,59, 60,61,-1,-1, -1, 0,-1,-1, /* Note PAD->0 */
132 -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10, 11,12,13,14,
133 15,16,17,18, 19,20,21,22, 23,24,25,-1, -1,-1,-1,-1,
134 -1,26,27,28, 29,30,31,32, 33,34,35,36, 37,38,39,40,
135 41,42,43,44, 45,46,47,48, 49,50,51,-1, -1,-1,-1,-1
136};
137
138#define BASE64_PAD '='
Guido van Rossum355bc0c2001-10-30 03:00:52 +0000139
140/* Max binary chunk size; limited only by available memory */
Tim Peters1fbb5772001-12-19 04:41:35 +0000141#define BASE64_MAXBIN (INT_MAX/2 - sizeof(PyStringObject) - 3)
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000142
143static unsigned char table_b2a_base64[] =
Roger E. Masse5f4ce181997-01-16 17:10:22 +0000144"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000145
146
147
Jack Jansen72781191995-08-07 14:34:15 +0000148static unsigned short crctab_hqx[256] = {
Roger E. Masse5f4ce181997-01-16 17:10:22 +0000149 0x0000, 0x1021, 0x2042, 0x3063, 0x4084, 0x50a5, 0x60c6, 0x70e7,
150 0x8108, 0x9129, 0xa14a, 0xb16b, 0xc18c, 0xd1ad, 0xe1ce, 0xf1ef,
151 0x1231, 0x0210, 0x3273, 0x2252, 0x52b5, 0x4294, 0x72f7, 0x62d6,
152 0x9339, 0x8318, 0xb37b, 0xa35a, 0xd3bd, 0xc39c, 0xf3ff, 0xe3de,
153 0x2462, 0x3443, 0x0420, 0x1401, 0x64e6, 0x74c7, 0x44a4, 0x5485,
154 0xa56a, 0xb54b, 0x8528, 0x9509, 0xe5ee, 0xf5cf, 0xc5ac, 0xd58d,
155 0x3653, 0x2672, 0x1611, 0x0630, 0x76d7, 0x66f6, 0x5695, 0x46b4,
156 0xb75b, 0xa77a, 0x9719, 0x8738, 0xf7df, 0xe7fe, 0xd79d, 0xc7bc,
157 0x48c4, 0x58e5, 0x6886, 0x78a7, 0x0840, 0x1861, 0x2802, 0x3823,
158 0xc9cc, 0xd9ed, 0xe98e, 0xf9af, 0x8948, 0x9969, 0xa90a, 0xb92b,
159 0x5af5, 0x4ad4, 0x7ab7, 0x6a96, 0x1a71, 0x0a50, 0x3a33, 0x2a12,
160 0xdbfd, 0xcbdc, 0xfbbf, 0xeb9e, 0x9b79, 0x8b58, 0xbb3b, 0xab1a,
161 0x6ca6, 0x7c87, 0x4ce4, 0x5cc5, 0x2c22, 0x3c03, 0x0c60, 0x1c41,
162 0xedae, 0xfd8f, 0xcdec, 0xddcd, 0xad2a, 0xbd0b, 0x8d68, 0x9d49,
163 0x7e97, 0x6eb6, 0x5ed5, 0x4ef4, 0x3e13, 0x2e32, 0x1e51, 0x0e70,
164 0xff9f, 0xefbe, 0xdfdd, 0xcffc, 0xbf1b, 0xaf3a, 0x9f59, 0x8f78,
165 0x9188, 0x81a9, 0xb1ca, 0xa1eb, 0xd10c, 0xc12d, 0xf14e, 0xe16f,
166 0x1080, 0x00a1, 0x30c2, 0x20e3, 0x5004, 0x4025, 0x7046, 0x6067,
167 0x83b9, 0x9398, 0xa3fb, 0xb3da, 0xc33d, 0xd31c, 0xe37f, 0xf35e,
168 0x02b1, 0x1290, 0x22f3, 0x32d2, 0x4235, 0x5214, 0x6277, 0x7256,
169 0xb5ea, 0xa5cb, 0x95a8, 0x8589, 0xf56e, 0xe54f, 0xd52c, 0xc50d,
170 0x34e2, 0x24c3, 0x14a0, 0x0481, 0x7466, 0x6447, 0x5424, 0x4405,
171 0xa7db, 0xb7fa, 0x8799, 0x97b8, 0xe75f, 0xf77e, 0xc71d, 0xd73c,
172 0x26d3, 0x36f2, 0x0691, 0x16b0, 0x6657, 0x7676, 0x4615, 0x5634,
173 0xd94c, 0xc96d, 0xf90e, 0xe92f, 0x99c8, 0x89e9, 0xb98a, 0xa9ab,
174 0x5844, 0x4865, 0x7806, 0x6827, 0x18c0, 0x08e1, 0x3882, 0x28a3,
175 0xcb7d, 0xdb5c, 0xeb3f, 0xfb1e, 0x8bf9, 0x9bd8, 0xabbb, 0xbb9a,
176 0x4a75, 0x5a54, 0x6a37, 0x7a16, 0x0af1, 0x1ad0, 0x2ab3, 0x3a92,
177 0xfd2e, 0xed0f, 0xdd6c, 0xcd4d, 0xbdaa, 0xad8b, 0x9de8, 0x8dc9,
178 0x7c26, 0x6c07, 0x5c64, 0x4c45, 0x3ca2, 0x2c83, 0x1ce0, 0x0cc1,
179 0xef1f, 0xff3e, 0xcf5d, 0xdf7c, 0xaf9b, 0xbfba, 0x8fd9, 0x9ff8,
180 0x6e17, 0x7e36, 0x4e55, 0x5e74, 0x2e93, 0x3eb2, 0x0ed1, 0x1ef0,
Jack Jansen72781191995-08-07 14:34:15 +0000181};
182
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000183PyDoc_STRVAR(doc_a2b_uu, "(ascii) -> bin. Decode a line of uuencoded data");
Jack Jansen72781191995-08-07 14:34:15 +0000184
185static PyObject *
Peter Schneider-Kampa788a7f2000-07-10 09:57:19 +0000186binascii_a2b_uu(PyObject *self, PyObject *args)
Jack Jansen72781191995-08-07 14:34:15 +0000187{
188 unsigned char *ascii_data, *bin_data;
189 int leftbits = 0;
190 unsigned char this_ch;
191 unsigned int leftchar = 0;
192 PyObject *rv;
Thomas Woutersf98db652006-03-01 21:37:32 +0000193 Py_ssize_t ascii_len, bin_len;
Tim Peters934c1a12002-07-02 22:24:50 +0000194
Guido van Rossum43713e52000-02-29 13:59:29 +0000195 if ( !PyArg_ParseTuple(args, "t#:a2b_uu", &ascii_data, &ascii_len) )
Jack Jansen72781191995-08-07 14:34:15 +0000196 return NULL;
197
198 /* First byte: binary data length (in bytes) */
199 bin_len = (*ascii_data++ - ' ') & 077;
200 ascii_len--;
201
202 /* Allocate the buffer */
203 if ( (rv=PyString_FromStringAndSize(NULL, bin_len)) == NULL )
204 return NULL;
205 bin_data = (unsigned char *)PyString_AsString(rv);
Tim Peters934c1a12002-07-02 22:24:50 +0000206
Jack Jansen72781191995-08-07 14:34:15 +0000207 for( ; bin_len > 0 ; ascii_len--, ascii_data++ ) {
Neal Norwitzfe92eef2004-06-06 20:13:10 +0000208 /* XXX is it really best to add NULs if there's no more data */
209 this_ch = (ascii_len > 0) ? *ascii_data : 0;
Jack Jansen72781191995-08-07 14:34:15 +0000210 if ( this_ch == '\n' || this_ch == '\r' || ascii_len <= 0) {
211 /*
212 ** Whitespace. Assume some spaces got eaten at
213 ** end-of-line. (We check this later)
214 */
215 this_ch = 0;
216 } else {
Jack Jansen5d957971995-11-14 10:35:19 +0000217 /* Check the character for legality
Roger E. Masse5f4ce181997-01-16 17:10:22 +0000218 ** The 64 in stead of the expected 63 is because
219 ** there are a few uuencodes out there that use
Guido van Rossum92d89172001-01-09 02:11:57 +0000220 ** '`' as zero instead of space.
Jack Jansen5d957971995-11-14 10:35:19 +0000221 */
222 if ( this_ch < ' ' || this_ch > (' ' + 64)) {
Jack Jansen72781191995-08-07 14:34:15 +0000223 PyErr_SetString(Error, "Illegal char");
224 Py_DECREF(rv);
225 return NULL;
226 }
227 this_ch = (this_ch - ' ') & 077;
228 }
229 /*
230 ** Shift it in on the low end, and see if there's
231 ** a byte ready for output.
232 */
233 leftchar = (leftchar << 6) | (this_ch);
234 leftbits += 6;
235 if ( leftbits >= 8 ) {
236 leftbits -= 8;
237 *bin_data++ = (leftchar >> leftbits) & 0xff;
238 leftchar &= ((1 << leftbits) - 1);
239 bin_len--;
240 }
241 }
242 /*
243 ** Finally, check that if there's anything left on the line
244 ** that it's whitespace only.
245 */
246 while( ascii_len-- > 0 ) {
247 this_ch = *ascii_data++;
Guido van Rossum92d89172001-01-09 02:11:57 +0000248 /* Extra '`' may be written as padding in some cases */
249 if ( this_ch != ' ' && this_ch != ' '+64 &&
Guido van Rossum1243ae71997-07-11 18:36:28 +0000250 this_ch != '\n' && this_ch != '\r' ) {
Jack Jansen72781191995-08-07 14:34:15 +0000251 PyErr_SetString(Error, "Trailing garbage");
252 Py_DECREF(rv);
253 return NULL;
254 }
255 }
256 return rv;
257}
258
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000259PyDoc_STRVAR(doc_b2a_uu, "(bin) -> ascii. Uuencode line of data");
Tim Peters934c1a12002-07-02 22:24:50 +0000260
Jack Jansen72781191995-08-07 14:34:15 +0000261static PyObject *
Peter Schneider-Kampa788a7f2000-07-10 09:57:19 +0000262binascii_b2a_uu(PyObject *self, PyObject *args)
Jack Jansen72781191995-08-07 14:34:15 +0000263{
264 unsigned char *ascii_data, *bin_data;
265 int leftbits = 0;
266 unsigned char this_ch;
267 unsigned int leftchar = 0;
268 PyObject *rv;
Thomas Woutersf98db652006-03-01 21:37:32 +0000269 Py_ssize_t bin_len;
Tim Peters934c1a12002-07-02 22:24:50 +0000270
Guido van Rossum43713e52000-02-29 13:59:29 +0000271 if ( !PyArg_ParseTuple(args, "s#:b2a_uu", &bin_data, &bin_len) )
Jack Jansen72781191995-08-07 14:34:15 +0000272 return NULL;
273 if ( bin_len > 45 ) {
274 /* The 45 is a limit that appears in all uuencode's */
275 PyErr_SetString(Error, "At most 45 bytes at once");
276 return NULL;
277 }
278
279 /* We're lazy and allocate to much (fixed up later) */
Raymond Hettinger658717e2004-09-06 22:58:37 +0000280 if ( (rv=PyString_FromStringAndSize(NULL, bin_len*2+2)) == NULL )
Jack Jansen72781191995-08-07 14:34:15 +0000281 return NULL;
282 ascii_data = (unsigned char *)PyString_AsString(rv);
283
284 /* Store the length */
285 *ascii_data++ = ' ' + (bin_len & 077);
Tim Peters934c1a12002-07-02 22:24:50 +0000286
Jack Jansen72781191995-08-07 14:34:15 +0000287 for( ; bin_len > 0 || leftbits != 0 ; bin_len--, bin_data++ ) {
288 /* Shift the data (or padding) into our buffer */
289 if ( bin_len > 0 ) /* Data */
290 leftchar = (leftchar << 8) | *bin_data;
291 else /* Padding */
292 leftchar <<= 8;
293 leftbits += 8;
294
295 /* See if there are 6-bit groups ready */
296 while ( leftbits >= 6 ) {
297 this_ch = (leftchar >> (leftbits-6)) & 0x3f;
298 leftbits -= 6;
299 *ascii_data++ = this_ch + ' ';
300 }
301 }
302 *ascii_data++ = '\n'; /* Append a courtesy newline */
Tim Peters934c1a12002-07-02 22:24:50 +0000303
Roger E. Masse5f4ce181997-01-16 17:10:22 +0000304 _PyString_Resize(&rv, (ascii_data -
305 (unsigned char *)PyString_AsString(rv)));
Jack Jansen72781191995-08-07 14:34:15 +0000306 return rv;
307}
308
Guido van Rossum2db4f471999-10-19 19:05:14 +0000309
310static int
Thomas Woutersf98db652006-03-01 21:37:32 +0000311binascii_find_valid(unsigned char *s, Py_ssize_t slen, int num)
Guido van Rossum2db4f471999-10-19 19:05:14 +0000312{
Tim Peters934c1a12002-07-02 22:24:50 +0000313 /* Finds & returns the (num+1)th
Guido van Rossum2db4f471999-10-19 19:05:14 +0000314 ** valid character for base64, or -1 if none.
315 */
316
317 int ret = -1;
318 unsigned char c, b64val;
319
320 while ((slen > 0) && (ret == -1)) {
321 c = *s;
322 b64val = table_a2b_base64[c & 0x7f];
323 if ( ((c <= 0x7f) && (b64val != (unsigned char)-1)) ) {
324 if (num == 0)
325 ret = *s;
326 num--;
327 }
328
329 s++;
330 slen--;
331 }
332 return ret;
333}
334
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000335PyDoc_STRVAR(doc_a2b_base64, "(ascii) -> bin. Decode a line of base64 data");
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000336
337static PyObject *
Peter Schneider-Kampa788a7f2000-07-10 09:57:19 +0000338binascii_a2b_base64(PyObject *self, PyObject *args)
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000339{
340 unsigned char *ascii_data, *bin_data;
341 int leftbits = 0;
342 unsigned char this_ch;
343 unsigned int leftchar = 0;
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000344 PyObject *rv;
Thomas Woutersf98db652006-03-01 21:37:32 +0000345 Py_ssize_t ascii_len, bin_len;
Guido van Rossum2db4f471999-10-19 19:05:14 +0000346 int quad_pos = 0;
Tim Peters934c1a12002-07-02 22:24:50 +0000347
Guido van Rossum43713e52000-02-29 13:59:29 +0000348 if ( !PyArg_ParseTuple(args, "t#:a2b_base64", &ascii_data, &ascii_len) )
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000349 return NULL;
350
351 bin_len = ((ascii_len+3)/4)*3; /* Upper bound, corrected later */
352
353 /* Allocate the buffer */
354 if ( (rv=PyString_FromStringAndSize(NULL, bin_len)) == NULL )
355 return NULL;
356 bin_data = (unsigned char *)PyString_AsString(rv);
357 bin_len = 0;
Guido van Rossum2db4f471999-10-19 19:05:14 +0000358
359 for( ; ascii_len > 0; ascii_len--, ascii_data++) {
360 this_ch = *ascii_data;
361
362 if (this_ch > 0x7f ||
363 this_ch == '\r' || this_ch == '\n' || this_ch == ' ')
Jack Jansenba1de3b1996-01-22 10:47:15 +0000364 continue;
Guido van Rossum2db4f471999-10-19 19:05:14 +0000365
366 /* Check for pad sequences and ignore
367 ** the invalid ones.
368 */
369 if (this_ch == BASE64_PAD) {
370 if ( (quad_pos < 2) ||
371 ((quad_pos == 2) &&
372 (binascii_find_valid(ascii_data, ascii_len, 1)
373 != BASE64_PAD)) )
374 {
375 continue;
376 }
377 else {
378 /* A pad sequence means no more input.
379 ** We've already interpreted the data
380 ** from the quad at this point.
381 */
382 leftbits = 0;
383 break;
384 }
385 }
386
387 this_ch = table_a2b_base64[*ascii_data];
388 if ( this_ch == (unsigned char) -1 )
389 continue;
390
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000391 /*
392 ** Shift it in on the low end, and see if there's
393 ** a byte ready for output.
394 */
Guido van Rossum2db4f471999-10-19 19:05:14 +0000395 quad_pos = (quad_pos + 1) & 0x03;
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000396 leftchar = (leftchar << 6) | (this_ch);
397 leftbits += 6;
Guido van Rossum2db4f471999-10-19 19:05:14 +0000398
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000399 if ( leftbits >= 8 ) {
400 leftbits -= 8;
401 *bin_data++ = (leftchar >> leftbits) & 0xff;
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000402 bin_len++;
Guido van Rossum2db4f471999-10-19 19:05:14 +0000403 leftchar &= ((1 << leftbits) - 1);
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000404 }
Guido van Rossum2db4f471999-10-19 19:05:14 +0000405 }
406
407 if (leftbits != 0) {
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000408 PyErr_SetString(Error, "Incorrect padding");
409 Py_DECREF(rv);
410 return NULL;
411 }
Guido van Rossum2db4f471999-10-19 19:05:14 +0000412
Thomas Wouters9e1c1922003-03-17 11:24:29 +0000413 /* And set string size correctly. If the result string is empty
414 ** (because the input was all invalid) return the shared empty
415 ** string instead; _PyString_Resize() won't do this for us.
416 */
Barry Warsaw0a51b582002-08-15 22:14:24 +0000417 if (bin_len > 0)
418 _PyString_Resize(&rv, bin_len);
Thomas Wouters9e1c1922003-03-17 11:24:29 +0000419 else {
420 Py_DECREF(rv);
421 rv = PyString_FromString("");
422 }
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000423 return rv;
424}
425
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000426PyDoc_STRVAR(doc_b2a_base64, "(bin) -> ascii. Base64-code line of data");
Tim Peters934c1a12002-07-02 22:24:50 +0000427
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000428static PyObject *
Peter Schneider-Kampa788a7f2000-07-10 09:57:19 +0000429binascii_b2a_base64(PyObject *self, PyObject *args)
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000430{
431 unsigned char *ascii_data, *bin_data;
432 int leftbits = 0;
433 unsigned char this_ch;
434 unsigned int leftchar = 0;
435 PyObject *rv;
Thomas Woutersf98db652006-03-01 21:37:32 +0000436 Py_ssize_t bin_len;
Tim Peters934c1a12002-07-02 22:24:50 +0000437
Guido van Rossum43713e52000-02-29 13:59:29 +0000438 if ( !PyArg_ParseTuple(args, "s#:b2a_base64", &bin_data, &bin_len) )
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000439 return NULL;
440 if ( bin_len > BASE64_MAXBIN ) {
441 PyErr_SetString(Error, "Too much data for base64 line");
442 return NULL;
443 }
Tim Peters934c1a12002-07-02 22:24:50 +0000444
Tim Peters1fbb5772001-12-19 04:41:35 +0000445 /* We're lazy and allocate too much (fixed up later).
446 "+3" leaves room for up to two pad characters and a trailing
447 newline. Note that 'b' gets encoded as 'Yg==\n' (1 in, 5 out). */
448 if ( (rv=PyString_FromStringAndSize(NULL, bin_len*2 + 3)) == NULL )
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000449 return NULL;
450 ascii_data = (unsigned char *)PyString_AsString(rv);
451
452 for( ; bin_len > 0 ; bin_len--, bin_data++ ) {
453 /* Shift the data into our buffer */
454 leftchar = (leftchar << 8) | *bin_data;
455 leftbits += 8;
456
457 /* See if there are 6-bit groups ready */
458 while ( leftbits >= 6 ) {
459 this_ch = (leftchar >> (leftbits-6)) & 0x3f;
460 leftbits -= 6;
461 *ascii_data++ = table_b2a_base64[this_ch];
462 }
463 }
464 if ( leftbits == 2 ) {
465 *ascii_data++ = table_b2a_base64[(leftchar&3) << 4];
466 *ascii_data++ = BASE64_PAD;
467 *ascii_data++ = BASE64_PAD;
468 } else if ( leftbits == 4 ) {
469 *ascii_data++ = table_b2a_base64[(leftchar&0xf) << 2];
470 *ascii_data++ = BASE64_PAD;
Tim Peters934c1a12002-07-02 22:24:50 +0000471 }
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000472 *ascii_data++ = '\n'; /* Append a courtesy newline */
Tim Peters934c1a12002-07-02 22:24:50 +0000473
Roger E. Masse5f4ce181997-01-16 17:10:22 +0000474 _PyString_Resize(&rv, (ascii_data -
475 (unsigned char *)PyString_AsString(rv)));
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000476 return rv;
477}
478
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000479PyDoc_STRVAR(doc_a2b_hqx, "ascii -> bin, done. Decode .hqx coding");
Jack Jansen72781191995-08-07 14:34:15 +0000480
481static PyObject *
Peter Schneider-Kampa788a7f2000-07-10 09:57:19 +0000482binascii_a2b_hqx(PyObject *self, PyObject *args)
Jack Jansen72781191995-08-07 14:34:15 +0000483{
484 unsigned char *ascii_data, *bin_data;
485 int leftbits = 0;
486 unsigned char this_ch;
Jack Janseneaeb1c81995-08-14 12:17:57 +0000487 unsigned int leftchar = 0;
Jack Jansen72781191995-08-07 14:34:15 +0000488 PyObject *rv;
Thomas Woutersf98db652006-03-01 21:37:32 +0000489 Py_ssize_t len;
Jack Jansen72781191995-08-07 14:34:15 +0000490 int done = 0;
Tim Peters934c1a12002-07-02 22:24:50 +0000491
Guido van Rossum43713e52000-02-29 13:59:29 +0000492 if ( !PyArg_ParseTuple(args, "t#:a2b_hqx", &ascii_data, &len) )
Jack Jansen72781191995-08-07 14:34:15 +0000493 return NULL;
494
Raymond Hettinger658717e2004-09-06 22:58:37 +0000495 /* Allocate a string that is too big (fixed later)
496 Add two to the initial length to prevent interning which
497 would preclude subsequent resizing. */
498 if ( (rv=PyString_FromStringAndSize(NULL, len+2)) == NULL )
Jack Jansen72781191995-08-07 14:34:15 +0000499 return NULL;
500 bin_data = (unsigned char *)PyString_AsString(rv);
501
502 for( ; len > 0 ; len--, ascii_data++ ) {
503 /* Get the byte and look it up */
504 this_ch = table_a2b_hqx[*ascii_data];
505 if ( this_ch == SKIP )
506 continue;
507 if ( this_ch == FAIL ) {
508 PyErr_SetString(Error, "Illegal char");
509 Py_DECREF(rv);
510 return NULL;
511 }
512 if ( this_ch == DONE ) {
513 /* The terminating colon */
514 done = 1;
515 break;
516 }
517
518 /* Shift it into the buffer and see if any bytes are ready */
519 leftchar = (leftchar << 6) | (this_ch);
520 leftbits += 6;
521 if ( leftbits >= 8 ) {
522 leftbits -= 8;
523 *bin_data++ = (leftchar >> leftbits) & 0xff;
524 leftchar &= ((1 << leftbits) - 1);
525 }
526 }
Tim Peters934c1a12002-07-02 22:24:50 +0000527
Jack Jansen72781191995-08-07 14:34:15 +0000528 if ( leftbits && !done ) {
529 PyErr_SetString(Incomplete,
530 "String has incomplete number of bytes");
531 Py_DECREF(rv);
532 return NULL;
533 }
Guido van Rossum9c6ba5e1997-01-12 20:02:04 +0000534 _PyString_Resize(
535 &rv, (bin_data - (unsigned char *)PyString_AsString(rv)));
536 if (rv) {
537 PyObject *rrv = Py_BuildValue("Oi", rv, done);
538 Py_DECREF(rv);
539 return rrv;
540 }
Roger E. Masse5f4ce181997-01-16 17:10:22 +0000541
Jack Jansen72781191995-08-07 14:34:15 +0000542 return NULL;
543}
544
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000545PyDoc_STRVAR(doc_rlecode_hqx, "Binhex RLE-code binary data");
Jack Jansen72781191995-08-07 14:34:15 +0000546
547static PyObject *
Peter Schneider-Kampa788a7f2000-07-10 09:57:19 +0000548binascii_rlecode_hqx(PyObject *self, PyObject *args)
Jack Jansen72781191995-08-07 14:34:15 +0000549{
550 unsigned char *in_data, *out_data;
551 PyObject *rv;
552 unsigned char ch;
Thomas Woutersf98db652006-03-01 21:37:32 +0000553 Py_ssize_t in, inend, len;
Tim Peters934c1a12002-07-02 22:24:50 +0000554
Guido van Rossum43713e52000-02-29 13:59:29 +0000555 if ( !PyArg_ParseTuple(args, "s#:rlecode_hqx", &in_data, &len) )
Jack Jansen72781191995-08-07 14:34:15 +0000556 return NULL;
557
558 /* Worst case: output is twice as big as input (fixed later) */
Raymond Hettinger658717e2004-09-06 22:58:37 +0000559 if ( (rv=PyString_FromStringAndSize(NULL, len*2+2)) == NULL )
Jack Jansen72781191995-08-07 14:34:15 +0000560 return NULL;
561 out_data = (unsigned char *)PyString_AsString(rv);
Tim Peters934c1a12002-07-02 22:24:50 +0000562
Jack Jansen72781191995-08-07 14:34:15 +0000563 for( in=0; in<len; in++) {
564 ch = in_data[in];
565 if ( ch == RUNCHAR ) {
566 /* RUNCHAR. Escape it. */
567 *out_data++ = RUNCHAR;
568 *out_data++ = 0;
569 } else {
570 /* Check how many following are the same */
571 for(inend=in+1;
572 inend<len && in_data[inend] == ch &&
Roger E. Masse5f4ce181997-01-16 17:10:22 +0000573 inend < in+255;
Jack Jansen72781191995-08-07 14:34:15 +0000574 inend++) ;
Jack Jansen0223aa11995-08-31 13:44:23 +0000575 if ( inend - in > 3 ) {
576 /* More than 3 in a row. Output RLE. */
Jack Jansen72781191995-08-07 14:34:15 +0000577 *out_data++ = ch;
578 *out_data++ = RUNCHAR;
579 *out_data++ = inend-in;
580 in = inend-1;
581 } else {
582 /* Less than 3. Output the byte itself */
583 *out_data++ = ch;
584 }
585 }
586 }
Roger E. Masse5f4ce181997-01-16 17:10:22 +0000587 _PyString_Resize(&rv, (out_data -
588 (unsigned char *)PyString_AsString(rv)));
Jack Jansen72781191995-08-07 14:34:15 +0000589 return rv;
590}
591
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000592PyDoc_STRVAR(doc_b2a_hqx, "Encode .hqx data");
Tim Peters934c1a12002-07-02 22:24:50 +0000593
Jack Jansen72781191995-08-07 14:34:15 +0000594static PyObject *
Peter Schneider-Kampa788a7f2000-07-10 09:57:19 +0000595binascii_b2a_hqx(PyObject *self, PyObject *args)
Jack Jansen72781191995-08-07 14:34:15 +0000596{
597 unsigned char *ascii_data, *bin_data;
598 int leftbits = 0;
599 unsigned char this_ch;
Jack Janseneaeb1c81995-08-14 12:17:57 +0000600 unsigned int leftchar = 0;
Jack Jansen72781191995-08-07 14:34:15 +0000601 PyObject *rv;
Thomas Woutersf98db652006-03-01 21:37:32 +0000602 Py_ssize_t len;
Tim Peters934c1a12002-07-02 22:24:50 +0000603
Guido van Rossum43713e52000-02-29 13:59:29 +0000604 if ( !PyArg_ParseTuple(args, "s#:b2a_hqx", &bin_data, &len) )
Jack Jansen72781191995-08-07 14:34:15 +0000605 return NULL;
606
607 /* Allocate a buffer that is at least large enough */
Raymond Hettinger658717e2004-09-06 22:58:37 +0000608 if ( (rv=PyString_FromStringAndSize(NULL, len*2+2)) == NULL )
Jack Jansen72781191995-08-07 14:34:15 +0000609 return NULL;
610 ascii_data = (unsigned char *)PyString_AsString(rv);
Tim Peters934c1a12002-07-02 22:24:50 +0000611
Jack Jansen72781191995-08-07 14:34:15 +0000612 for( ; len > 0 ; len--, bin_data++ ) {
613 /* Shift into our buffer, and output any 6bits ready */
614 leftchar = (leftchar << 8) | *bin_data;
615 leftbits += 8;
616 while ( leftbits >= 6 ) {
617 this_ch = (leftchar >> (leftbits-6)) & 0x3f;
618 leftbits -= 6;
619 *ascii_data++ = table_b2a_hqx[this_ch];
620 }
621 }
622 /* Output a possible runt byte */
623 if ( leftbits ) {
624 leftchar <<= (6-leftbits);
625 *ascii_data++ = table_b2a_hqx[leftchar & 0x3f];
626 }
Roger E. Masse5f4ce181997-01-16 17:10:22 +0000627 _PyString_Resize(&rv, (ascii_data -
628 (unsigned char *)PyString_AsString(rv)));
Jack Jansen72781191995-08-07 14:34:15 +0000629 return rv;
630}
631
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000632PyDoc_STRVAR(doc_rledecode_hqx, "Decode hexbin RLE-coded string");
Tim Peters934c1a12002-07-02 22:24:50 +0000633
Jack Jansen72781191995-08-07 14:34:15 +0000634static PyObject *
Peter Schneider-Kampa788a7f2000-07-10 09:57:19 +0000635binascii_rledecode_hqx(PyObject *self, PyObject *args)
Jack Jansen72781191995-08-07 14:34:15 +0000636{
637 unsigned char *in_data, *out_data;
638 unsigned char in_byte, in_repeat;
639 PyObject *rv;
Thomas Woutersf98db652006-03-01 21:37:32 +0000640 Py_ssize_t in_len, out_len, out_len_left;
Jack Jansen72781191995-08-07 14:34:15 +0000641
Guido van Rossum43713e52000-02-29 13:59:29 +0000642 if ( !PyArg_ParseTuple(args, "s#:rledecode_hqx", &in_data, &in_len) )
Jack Jansen72781191995-08-07 14:34:15 +0000643 return NULL;
644
645 /* Empty string is a special case */
646 if ( in_len == 0 )
647 return Py_BuildValue("s", "");
648
649 /* Allocate a buffer of reasonable size. Resized when needed */
650 out_len = in_len*2;
651 if ( (rv=PyString_FromStringAndSize(NULL, out_len)) == NULL )
652 return NULL;
653 out_len_left = out_len;
654 out_data = (unsigned char *)PyString_AsString(rv);
655
656 /*
657 ** We need two macros here to get/put bytes and handle
658 ** end-of-buffer for input and output strings.
659 */
660#define INBYTE(b) \
Roger E. Masse5f4ce181997-01-16 17:10:22 +0000661 do { \
662 if ( --in_len < 0 ) { \
663 PyErr_SetString(Incomplete, ""); \
664 Py_DECREF(rv); \
665 return NULL; \
666 } \
667 b = *in_data++; \
668 } while(0)
Tim Peters934c1a12002-07-02 22:24:50 +0000669
Jack Jansen72781191995-08-07 14:34:15 +0000670#define OUTBYTE(b) \
Roger E. Masse5f4ce181997-01-16 17:10:22 +0000671 do { \
672 if ( --out_len_left < 0 ) { \
673 _PyString_Resize(&rv, 2*out_len); \
674 if ( rv == NULL ) return NULL; \
675 out_data = (unsigned char *)PyString_AsString(rv) \
676 + out_len; \
677 out_len_left = out_len-1; \
678 out_len = out_len * 2; \
679 } \
680 *out_data++ = b; \
681 } while(0)
Jack Jansen72781191995-08-07 14:34:15 +0000682
Roger E. Masse5f4ce181997-01-16 17:10:22 +0000683 /*
684 ** Handle first byte separately (since we have to get angry
685 ** in case of an orphaned RLE code).
686 */
687 INBYTE(in_byte);
Jack Jansen72781191995-08-07 14:34:15 +0000688
689 if (in_byte == RUNCHAR) {
690 INBYTE(in_repeat);
691 if (in_repeat != 0) {
692 /* Note Error, not Incomplete (which is at the end
693 ** of the string only). This is a programmer error.
694 */
695 PyErr_SetString(Error, "Orphaned RLE code at start");
696 Py_DECREF(rv);
697 return NULL;
698 }
699 OUTBYTE(RUNCHAR);
700 } else {
701 OUTBYTE(in_byte);
702 }
Tim Peters934c1a12002-07-02 22:24:50 +0000703
Jack Jansen72781191995-08-07 14:34:15 +0000704 while( in_len > 0 ) {
705 INBYTE(in_byte);
706
707 if (in_byte == RUNCHAR) {
708 INBYTE(in_repeat);
709 if ( in_repeat == 0 ) {
710 /* Just an escaped RUNCHAR value */
711 OUTBYTE(RUNCHAR);
712 } else {
713 /* Pick up value and output a sequence of it */
714 in_byte = out_data[-1];
715 while ( --in_repeat > 0 )
716 OUTBYTE(in_byte);
717 }
718 } else {
719 /* Normal byte */
720 OUTBYTE(in_byte);
721 }
722 }
Roger E. Masse5f4ce181997-01-16 17:10:22 +0000723 _PyString_Resize(&rv, (out_data -
724 (unsigned char *)PyString_AsString(rv)));
Jack Jansen72781191995-08-07 14:34:15 +0000725 return rv;
726}
727
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000728PyDoc_STRVAR(doc_crc_hqx,
729"(data, oldcrc) -> newcrc. Compute hqx CRC incrementally");
Jack Jansen72781191995-08-07 14:34:15 +0000730
731static PyObject *
Peter Schneider-Kampa788a7f2000-07-10 09:57:19 +0000732binascii_crc_hqx(PyObject *self, PyObject *args)
Jack Jansen72781191995-08-07 14:34:15 +0000733{
734 unsigned char *bin_data;
735 unsigned int crc;
Thomas Woutersf98db652006-03-01 21:37:32 +0000736 Py_ssize_t len;
Tim Peters934c1a12002-07-02 22:24:50 +0000737
Guido van Rossum43713e52000-02-29 13:59:29 +0000738 if ( !PyArg_ParseTuple(args, "s#i:crc_hqx", &bin_data, &len, &crc) )
Jack Jansen72781191995-08-07 14:34:15 +0000739 return NULL;
740
741 while(len--) {
742 crc=((crc<<8)&0xff00)^crctab_hqx[((crc>>8)&0xff)^*bin_data++];
743 }
744
745 return Py_BuildValue("i", crc);
746}
747
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000748PyDoc_STRVAR(doc_crc32,
749"(data, oldcrc = 0) -> newcrc. Compute CRC-32 incrementally");
Guido van Rossum7d47c9e2000-02-16 21:11:52 +0000750
751/* Crc - 32 BIT ANSI X3.66 CRC checksum files
752 Also known as: ISO 3307
753**********************************************************************|
754* *|
755* Demonstration program to compute the 32-bit CRC used as the frame *|
756* check sequence in ADCCP (ANSI X3.66, also known as FIPS PUB 71 *|
757* and FED-STD-1003, the U.S. versions of CCITT's X.25 link-level *|
758* protocol). The 32-bit FCS was added via the Federal Register, *|
759* 1 June 1982, p.23798. I presume but don't know for certain that *|
760* this polynomial is or will be included in CCITT V.41, which *|
761* defines the 16-bit CRC (often called CRC-CCITT) polynomial. FIPS *|
762* PUB 78 says that the 32-bit FCS reduces otherwise undetected *|
763* errors by a factor of 10^-5 over 16-bit FCS. *|
764* *|
765**********************************************************************|
766
767 Copyright (C) 1986 Gary S. Brown. You may use this program, or
768 code or tables extracted from it, as desired without restriction.
Guido van Rossum7d47c9e2000-02-16 21:11:52 +0000769
Tim Peters934c1a12002-07-02 22:24:50 +0000770 First, the polynomial itself and its table of feedback terms. The
771 polynomial is
772 X^32+X^26+X^23+X^22+X^16+X^12+X^11+X^10+X^8+X^7+X^5+X^4+X^2+X^1+X^0
773 Note that we take it "backwards" and put the highest-order term in
774 the lowest-order bit. The X^32 term is "implied"; the LSB is the
775 X^31 term, etc. The X^0 term (usually shown as "+1") results in
776 the MSB being 1.
Guido van Rossum7d47c9e2000-02-16 21:11:52 +0000777
Tim Peters934c1a12002-07-02 22:24:50 +0000778 Note that the usual hardware shift register implementation, which
779 is what we're using (we're merely optimizing it by doing eight-bit
780 chunks at a time) shifts bits into the lowest-order term. In our
781 implementation, that means shifting towards the right. Why do we
782 do it this way? Because the calculated CRC must be transmitted in
783 order from highest-order term to lowest-order term. UARTs transmit
784 characters in order from LSB to MSB. By storing the CRC this way,
785 we hand it to the UART in the order low-byte to high-byte; the UART
786 sends each low-bit to hight-bit; and the result is transmission bit
787 by bit from highest- to lowest-order term without requiring any bit
788 shuffling on our part. Reception works similarly.
789
790 The feedback terms table consists of 256, 32-bit entries. Notes:
791
792 1. The table can be generated at runtime if desired; code to do so
793 is shown later. It might not be obvious, but the feedback
794 terms simply represent the results of eight shift/xor opera-
795 tions for all combinations of data and CRC register values.
796
797 2. The CRC accumulation logic is the same for all CRC polynomials,
798 be they sixteen or thirty-two bits wide. You simply choose the
799 appropriate table. Alternatively, because the table can be
800 generated at runtime, you can start by generating the table for
801 the polynomial in question and use exactly the same "updcrc",
802 if your application needn't simultaneously handle two CRC
803 polynomials. (Note, however, that XMODEM is strange.)
804
805 3. For 16-bit CRCs, the table entries need be only 16 bits wide;
806 of course, 32-bit entries work OK if the high 16 bits are zero.
807
808 4. The values must be right-shifted by eight bits by the "updcrc"
809 logic; the shift must be unsigned (bring in zeroes). On some
810 hardware you could probably optimize the shift in assembler by
811 using byte-swap instructions.
Guido van Rossum7d47c9e2000-02-16 21:11:52 +0000812********************************************************************/
813
814static unsigned long crc_32_tab[256] = {
8150x00000000UL, 0x77073096UL, 0xee0e612cUL, 0x990951baUL, 0x076dc419UL,
8160x706af48fUL, 0xe963a535UL, 0x9e6495a3UL, 0x0edb8832UL, 0x79dcb8a4UL,
8170xe0d5e91eUL, 0x97d2d988UL, 0x09b64c2bUL, 0x7eb17cbdUL, 0xe7b82d07UL,
8180x90bf1d91UL, 0x1db71064UL, 0x6ab020f2UL, 0xf3b97148UL, 0x84be41deUL,
8190x1adad47dUL, 0x6ddde4ebUL, 0xf4d4b551UL, 0x83d385c7UL, 0x136c9856UL,
8200x646ba8c0UL, 0xfd62f97aUL, 0x8a65c9ecUL, 0x14015c4fUL, 0x63066cd9UL,
8210xfa0f3d63UL, 0x8d080df5UL, 0x3b6e20c8UL, 0x4c69105eUL, 0xd56041e4UL,
8220xa2677172UL, 0x3c03e4d1UL, 0x4b04d447UL, 0xd20d85fdUL, 0xa50ab56bUL,
8230x35b5a8faUL, 0x42b2986cUL, 0xdbbbc9d6UL, 0xacbcf940UL, 0x32d86ce3UL,
8240x45df5c75UL, 0xdcd60dcfUL, 0xabd13d59UL, 0x26d930acUL, 0x51de003aUL,
8250xc8d75180UL, 0xbfd06116UL, 0x21b4f4b5UL, 0x56b3c423UL, 0xcfba9599UL,
8260xb8bda50fUL, 0x2802b89eUL, 0x5f058808UL, 0xc60cd9b2UL, 0xb10be924UL,
8270x2f6f7c87UL, 0x58684c11UL, 0xc1611dabUL, 0xb6662d3dUL, 0x76dc4190UL,
8280x01db7106UL, 0x98d220bcUL, 0xefd5102aUL, 0x71b18589UL, 0x06b6b51fUL,
8290x9fbfe4a5UL, 0xe8b8d433UL, 0x7807c9a2UL, 0x0f00f934UL, 0x9609a88eUL,
8300xe10e9818UL, 0x7f6a0dbbUL, 0x086d3d2dUL, 0x91646c97UL, 0xe6635c01UL,
8310x6b6b51f4UL, 0x1c6c6162UL, 0x856530d8UL, 0xf262004eUL, 0x6c0695edUL,
8320x1b01a57bUL, 0x8208f4c1UL, 0xf50fc457UL, 0x65b0d9c6UL, 0x12b7e950UL,
8330x8bbeb8eaUL, 0xfcb9887cUL, 0x62dd1ddfUL, 0x15da2d49UL, 0x8cd37cf3UL,
8340xfbd44c65UL, 0x4db26158UL, 0x3ab551ceUL, 0xa3bc0074UL, 0xd4bb30e2UL,
8350x4adfa541UL, 0x3dd895d7UL, 0xa4d1c46dUL, 0xd3d6f4fbUL, 0x4369e96aUL,
8360x346ed9fcUL, 0xad678846UL, 0xda60b8d0UL, 0x44042d73UL, 0x33031de5UL,
8370xaa0a4c5fUL, 0xdd0d7cc9UL, 0x5005713cUL, 0x270241aaUL, 0xbe0b1010UL,
8380xc90c2086UL, 0x5768b525UL, 0x206f85b3UL, 0xb966d409UL, 0xce61e49fUL,
8390x5edef90eUL, 0x29d9c998UL, 0xb0d09822UL, 0xc7d7a8b4UL, 0x59b33d17UL,
8400x2eb40d81UL, 0xb7bd5c3bUL, 0xc0ba6cadUL, 0xedb88320UL, 0x9abfb3b6UL,
8410x03b6e20cUL, 0x74b1d29aUL, 0xead54739UL, 0x9dd277afUL, 0x04db2615UL,
8420x73dc1683UL, 0xe3630b12UL, 0x94643b84UL, 0x0d6d6a3eUL, 0x7a6a5aa8UL,
8430xe40ecf0bUL, 0x9309ff9dUL, 0x0a00ae27UL, 0x7d079eb1UL, 0xf00f9344UL,
8440x8708a3d2UL, 0x1e01f268UL, 0x6906c2feUL, 0xf762575dUL, 0x806567cbUL,
8450x196c3671UL, 0x6e6b06e7UL, 0xfed41b76UL, 0x89d32be0UL, 0x10da7a5aUL,
8460x67dd4accUL, 0xf9b9df6fUL, 0x8ebeeff9UL, 0x17b7be43UL, 0x60b08ed5UL,
8470xd6d6a3e8UL, 0xa1d1937eUL, 0x38d8c2c4UL, 0x4fdff252UL, 0xd1bb67f1UL,
8480xa6bc5767UL, 0x3fb506ddUL, 0x48b2364bUL, 0xd80d2bdaUL, 0xaf0a1b4cUL,
8490x36034af6UL, 0x41047a60UL, 0xdf60efc3UL, 0xa867df55UL, 0x316e8eefUL,
8500x4669be79UL, 0xcb61b38cUL, 0xbc66831aUL, 0x256fd2a0UL, 0x5268e236UL,
8510xcc0c7795UL, 0xbb0b4703UL, 0x220216b9UL, 0x5505262fUL, 0xc5ba3bbeUL,
8520xb2bd0b28UL, 0x2bb45a92UL, 0x5cb36a04UL, 0xc2d7ffa7UL, 0xb5d0cf31UL,
8530x2cd99e8bUL, 0x5bdeae1dUL, 0x9b64c2b0UL, 0xec63f226UL, 0x756aa39cUL,
8540x026d930aUL, 0x9c0906a9UL, 0xeb0e363fUL, 0x72076785UL, 0x05005713UL,
8550x95bf4a82UL, 0xe2b87a14UL, 0x7bb12baeUL, 0x0cb61b38UL, 0x92d28e9bUL,
8560xe5d5be0dUL, 0x7cdcefb7UL, 0x0bdbdf21UL, 0x86d3d2d4UL, 0xf1d4e242UL,
8570x68ddb3f8UL, 0x1fda836eUL, 0x81be16cdUL, 0xf6b9265bUL, 0x6fb077e1UL,
8580x18b74777UL, 0x88085ae6UL, 0xff0f6a70UL, 0x66063bcaUL, 0x11010b5cUL,
8590x8f659effUL, 0xf862ae69UL, 0x616bffd3UL, 0x166ccf45UL, 0xa00ae278UL,
8600xd70dd2eeUL, 0x4e048354UL, 0x3903b3c2UL, 0xa7672661UL, 0xd06016f7UL,
8610x4969474dUL, 0x3e6e77dbUL, 0xaed16a4aUL, 0xd9d65adcUL, 0x40df0b66UL,
8620x37d83bf0UL, 0xa9bcae53UL, 0xdebb9ec5UL, 0x47b2cf7fUL, 0x30b5ffe9UL,
8630xbdbdf21cUL, 0xcabac28aUL, 0x53b39330UL, 0x24b4a3a6UL, 0xbad03605UL,
8640xcdd70693UL, 0x54de5729UL, 0x23d967bfUL, 0xb3667a2eUL, 0xc4614ab8UL,
8650x5d681b02UL, 0x2a6f2b94UL, 0xb40bbe37UL, 0xc30c8ea1UL, 0x5a05df1bUL,
8660x2d02ef8dUL
867};
868
869static PyObject *
Peter Schneider-Kampa788a7f2000-07-10 09:57:19 +0000870binascii_crc32(PyObject *self, PyObject *args)
Guido van Rossum7d47c9e2000-02-16 21:11:52 +0000871{ /* By Jim Ahlstrom; All rights transferred to CNRI */
872 unsigned char *bin_data;
873 unsigned long crc = 0UL; /* initial value of CRC */
Thomas Woutersf98db652006-03-01 21:37:32 +0000874 Py_ssize_t len;
Tim Petersa98011c2002-07-02 20:20:08 +0000875 long result;
Tim Peters934c1a12002-07-02 22:24:50 +0000876
Guido van Rossum43713e52000-02-29 13:59:29 +0000877 if ( !PyArg_ParseTuple(args, "s#|l:crc32", &bin_data, &len, &crc) )
Guido van Rossum7d47c9e2000-02-16 21:11:52 +0000878 return NULL;
879
Tim Peters934c1a12002-07-02 22:24:50 +0000880 crc = ~ crc;
881#if SIZEOF_LONG > 4
882 /* only want the trailing 32 bits */
883 crc &= 0xFFFFFFFFUL;
884#endif
885 while (len--)
Guido van Rossum7d47c9e2000-02-16 21:11:52 +0000886 crc = crc_32_tab[(crc ^ *bin_data++) & 0xffUL] ^ (crc >> 8);
887 /* Note: (crc >> 8) MUST zero fill on left */
Tim Petersa98011c2002-07-02 20:20:08 +0000888
889 result = (long)(crc ^ 0xFFFFFFFFUL);
Tim Peters934c1a12002-07-02 22:24:50 +0000890#if SIZEOF_LONG > 4
891 /* Extend the sign bit. This is one way to ensure the result is the
892 * same across platforms. The other way would be to return an
893 * unbounded unsigned long, but the evidence suggests that lots of
894 * code outside this treats the result as if it were a signed 4-byte
895 * integer.
Tim Petersa98011c2002-07-02 20:20:08 +0000896 */
897 result |= -(result & (1L << 31));
Tim Peters934c1a12002-07-02 22:24:50 +0000898#endif
Tim Petersa98011c2002-07-02 20:20:08 +0000899 return PyInt_FromLong(result);
Guido van Rossum7d47c9e2000-02-16 21:11:52 +0000900}
901
Barry Warsawe977c212000-08-15 06:07:13 +0000902
903static PyObject *
904binascii_hexlify(PyObject *self, PyObject *args)
905{
906 char* argbuf;
Thomas Woutersf98db652006-03-01 21:37:32 +0000907 Py_ssize_t arglen;
Barry Warsawe977c212000-08-15 06:07:13 +0000908 PyObject *retval;
909 char* retbuf;
Thomas Woutersf98db652006-03-01 21:37:32 +0000910 Py_ssize_t i, j;
Barry Warsawe977c212000-08-15 06:07:13 +0000911
912 if (!PyArg_ParseTuple(args, "t#:b2a_hex", &argbuf, &arglen))
913 return NULL;
914
915 retval = PyString_FromStringAndSize(NULL, arglen*2);
916 if (!retval)
917 return NULL;
918 retbuf = PyString_AsString(retval);
919 if (!retbuf)
920 goto finally;
921
922 /* make hex version of string, taken from shamodule.c */
923 for (i=j=0; i < arglen; i++) {
924 char c;
925 c = (argbuf[i] >> 4) & 0xf;
926 c = (c>9) ? c+'a'-10 : c + '0';
927 retbuf[j++] = c;
928 c = argbuf[i] & 0xf;
929 c = (c>9) ? c+'a'-10 : c + '0';
930 retbuf[j++] = c;
931 }
932 return retval;
933
934 finally:
935 Py_DECREF(retval);
936 return NULL;
937}
938
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000939PyDoc_STRVAR(doc_hexlify,
Barry Warsawe977c212000-08-15 06:07:13 +0000940"b2a_hex(data) -> s; Hexadecimal representation of binary data.\n\
941\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000942This function is also available as \"hexlify()\".");
Barry Warsawe977c212000-08-15 06:07:13 +0000943
944
945static int
Tim Peters934c1a12002-07-02 22:24:50 +0000946to_int(int c)
Barry Warsawe977c212000-08-15 06:07:13 +0000947{
948 if (isdigit(c))
949 return c - '0';
950 else {
951 if (isupper(c))
952 c = tolower(c);
953 if (c >= 'a' && c <= 'f')
954 return c - 'a' + 10;
955 }
956 return -1;
957}
958
959
960static PyObject *
961binascii_unhexlify(PyObject *self, PyObject *args)
962{
963 char* argbuf;
Thomas Woutersf98db652006-03-01 21:37:32 +0000964 Py_ssize_t arglen;
Barry Warsawe977c212000-08-15 06:07:13 +0000965 PyObject *retval;
966 char* retbuf;
Thomas Woutersf98db652006-03-01 21:37:32 +0000967 Py_ssize_t i, j;
Barry Warsawe977c212000-08-15 06:07:13 +0000968
969 if (!PyArg_ParseTuple(args, "s#:a2b_hex", &argbuf, &arglen))
970 return NULL;
971
Barry Warsaw16168472000-08-15 06:59:58 +0000972 /* XXX What should we do about strings with an odd length? Should
973 * we add an implicit leading zero, or a trailing zero? For now,
974 * raise an exception.
Barry Warsawe977c212000-08-15 06:07:13 +0000975 */
976 if (arglen % 2) {
Barry Warsaw16168472000-08-15 06:59:58 +0000977 PyErr_SetString(PyExc_TypeError, "Odd-length string");
Barry Warsawe977c212000-08-15 06:07:13 +0000978 return NULL;
979 }
980
981 retval = PyString_FromStringAndSize(NULL, (arglen/2));
982 if (!retval)
983 return NULL;
984 retbuf = PyString_AsString(retval);
985 if (!retbuf)
986 goto finally;
987
988 for (i=j=0; i < arglen; i += 2) {
989 int top = to_int(Py_CHARMASK(argbuf[i]));
990 int bot = to_int(Py_CHARMASK(argbuf[i+1]));
991 if (top == -1 || bot == -1) {
992 PyErr_SetString(PyExc_TypeError,
Barry Warsaw16168472000-08-15 06:59:58 +0000993 "Non-hexadecimal digit found");
Barry Warsawe977c212000-08-15 06:07:13 +0000994 goto finally;
995 }
996 retbuf[j++] = (top << 4) + bot;
997 }
998 return retval;
999
1000 finally:
1001 Py_DECREF(retval);
1002 return NULL;
1003}
1004
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001005PyDoc_STRVAR(doc_unhexlify,
Barry Warsawe977c212000-08-15 06:07:13 +00001006"a2b_hex(hexstr) -> s; Binary data of hexadecimal representation.\n\
1007\n\
1008hexstr must contain an even number of hex digits (upper or lower case).\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001009This function is also available as \"unhexlify()\"");
Barry Warsawe977c212000-08-15 06:07:13 +00001010
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001011static int table_hex[128] = {
1012 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1013 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1014 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1015 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,-1,-1, -1,-1,-1,-1,
1016 -1,10,11,12, 13,14,15,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1017 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1018 -1,10,11,12, 13,14,15,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1019 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1
1020};
1021
1022#define hexval(c) table_hex[(unsigned int)(c)]
1023
1024#define MAXLINESIZE 76
1025
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001026PyDoc_STRVAR(doc_a2b_qp, "Decode a string of qp-encoded data");
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001027
Tim Peters934c1a12002-07-02 22:24:50 +00001028static PyObject*
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001029binascii_a2b_qp(PyObject *self, PyObject *args, PyObject *kwargs)
1030{
1031 unsigned int in, out;
1032 char ch;
1033 unsigned char *data, *odata;
Thomas Woutersf98db652006-03-01 21:37:32 +00001034 Py_ssize_t datalen = 0;
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001035 PyObject *rv;
Martin v. Löwis15e62742006-02-27 16:46:16 +00001036 static char *kwlist[] = {"data", "header", NULL};
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001037 int header = 0;
1038
Tim Peters934c1a12002-07-02 22:24:50 +00001039 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s#|i", kwlist, &data,
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001040 &datalen, &header))
1041 return NULL;
1042
Barry Warsaw23164a52004-05-11 02:05:11 +00001043 /* We allocate the output same size as input, this is overkill.
1044 * The previous implementation used calloc() so we'll zero out the
1045 * memory here too, since PyMem_Malloc() does not guarantee that.
1046 */
1047 odata = (unsigned char *) PyMem_Malloc(datalen);
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001048 if (odata == NULL) {
1049 PyErr_NoMemory();
1050 return NULL;
1051 }
Martin v. Löwisb600fe92004-07-27 15:03:53 +00001052 memset(odata, 0, datalen);
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001053
1054 in = out = 0;
1055 while (in < datalen) {
1056 if (data[in] == '=') {
1057 in++;
1058 if (in >= datalen) break;
1059 /* Soft line breaks */
Tim Peters934c1a12002-07-02 22:24:50 +00001060 if ((data[in] == '\n') || (data[in] == '\r') ||
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001061 (data[in] == ' ') || (data[in] == '\t')) {
1062 if (data[in] != '\n') {
1063 while (in < datalen && data[in] != '\n') in++;
1064 }
1065 if (in < datalen) in++;
1066 }
1067 else if (data[in] == '=') {
1068 /* broken case from broken python qp */
1069 odata[out++] = '=';
1070 in++;
1071 }
Tim Peters934c1a12002-07-02 22:24:50 +00001072 else if (((data[in] >= 'A' && data[in] <= 'F') ||
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001073 (data[in] >= 'a' && data[in] <= 'f') ||
1074 (data[in] >= '0' && data[in] <= '9')) &&
1075 ((data[in+1] >= 'A' && data[in+1] <= 'F') ||
1076 (data[in+1] >= 'a' && data[in+1] <= 'f') ||
1077 (data[in+1] >= '0' && data[in+1] <= '9'))) {
1078 /* hexval */
1079 ch = hexval(data[in]) << 4;
1080 in++;
1081 ch |= hexval(data[in]);
1082 in++;
1083 odata[out++] = ch;
1084 }
1085 else {
1086 odata[out++] = '=';
1087 }
1088 }
1089 else if (header && data[in] == '_') {
1090 odata[out++] = ' ';
1091 in++;
1092 }
1093 else {
1094 odata[out] = data[in];
1095 in++;
1096 out++;
1097 }
1098 }
Greg Warda645b302001-10-04 14:54:53 +00001099 if ((rv = PyString_FromStringAndSize((char *)odata, out)) == NULL) {
Barry Warsaw23164a52004-05-11 02:05:11 +00001100 PyMem_Free(odata);
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001101 return NULL;
1102 }
Barry Warsaw23164a52004-05-11 02:05:11 +00001103 PyMem_Free(odata);
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001104 return rv;
1105}
1106
Tim Peters934c1a12002-07-02 22:24:50 +00001107static int
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001108to_hex (unsigned char ch, unsigned char *s)
1109{
1110 unsigned int uvalue = ch;
1111
1112 s[1] = "0123456789ABCDEF"[uvalue % 16];
1113 uvalue = (uvalue / 16);
1114 s[0] = "0123456789ABCDEF"[uvalue % 16];
1115 return 0;
1116}
1117
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001118PyDoc_STRVAR(doc_b2a_qp,
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001119"b2a_qp(data, quotetabs=0, istext=1, header=0) -> s; \n\
1120 Encode a string using quoted-printable encoding. \n\
1121\n\
1122On encoding, when istext is set, newlines are not encoded, and white \n\
1123space at end of lines is. When istext is not set, \\r and \\n (CR/LF) are \n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001124both encoded. When quotetabs is set, space and tabs are encoded.");
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001125
1126/* XXX: This is ridiculously complicated to be backward compatible
1127 * (mostly) with the quopri module. It doesn't re-create the quopri
1128 * module bug where text ending in CRLF has the CR encoded */
Tim Peters934c1a12002-07-02 22:24:50 +00001129static PyObject*
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001130binascii_b2a_qp (PyObject *self, PyObject *args, PyObject *kwargs)
1131{
1132 unsigned int in, out;
1133 unsigned char *data, *odata;
Thomas Woutersf98db652006-03-01 21:37:32 +00001134 Py_ssize_t datalen = 0, odatalen = 0;
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001135 PyObject *rv;
1136 unsigned int linelen = 0;
Martin v. Löwis15e62742006-02-27 16:46:16 +00001137 static char *kwlist[] = {"data", "quotetabs", "istext",
Jeremy Hyltonaf68c872005-12-10 18:50:16 +00001138 "header", NULL};
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001139 int istext = 1;
1140 int quotetabs = 0;
1141 int header = 0;
1142 unsigned char ch;
1143 int crlf = 0;
1144 unsigned char *p;
1145
Tim Peters934c1a12002-07-02 22:24:50 +00001146 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s#|iii", kwlist, &data,
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001147 &datalen, &quotetabs, &istext, &header))
1148 return NULL;
1149
1150 /* See if this string is using CRLF line ends */
1151 /* XXX: this function has the side effect of converting all of
1152 * the end of lines to be the same depending on this detection
1153 * here */
Greg Warda645b302001-10-04 14:54:53 +00001154 p = (unsigned char *) strchr((char *)data, '\n');
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001155 if ((p != NULL) && (p > data) && (*(p-1) == '\r'))
1156 crlf = 1;
1157
1158 /* First, scan to see how many characters need to be encoded */
1159 in = 0;
1160 while (in < datalen) {
Tim Peters934c1a12002-07-02 22:24:50 +00001161 if ((data[in] > 126) ||
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001162 (data[in] == '=') ||
1163 (header && data[in] == '_') ||
1164 ((data[in] == '.') && (linelen == 1)) ||
1165 (!istext && ((data[in] == '\r') || (data[in] == '\n'))) ||
1166 ((data[in] == '\t' || data[in] == ' ') && (in + 1 == datalen)) ||
Tim Peters934c1a12002-07-02 22:24:50 +00001167 ((data[in] < 33) &&
1168 (data[in] != '\r') && (data[in] != '\n') &&
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001169 (quotetabs && ((data[in] != '\t') || (data[in] != ' ')))))
1170 {
1171 if ((linelen + 3) >= MAXLINESIZE) {
1172 linelen = 0;
1173 if (crlf)
1174 odatalen += 3;
1175 else
1176 odatalen += 2;
1177 }
1178 linelen += 3;
1179 odatalen += 3;
1180 in++;
1181 }
1182 else {
Tim Peters934c1a12002-07-02 22:24:50 +00001183 if (istext &&
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001184 ((data[in] == '\n') ||
1185 ((in+1 < datalen) && (data[in] == '\r') &&
1186 (data[in+1] == '\n'))))
1187 {
1188 linelen = 0;
1189 /* Protect against whitespace on end of line */
1190 if (in && ((data[in-1] == ' ') || (data[in-1] == '\t')))
1191 odatalen += 2;
1192 if (crlf)
1193 odatalen += 2;
1194 else
1195 odatalen += 1;
1196 if (data[in] == '\r')
1197 in += 2;
1198 else
1199 in++;
1200 }
1201 else {
Tim Peters934c1a12002-07-02 22:24:50 +00001202 if ((in + 1 != datalen) &&
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001203 (data[in+1] != '\n') &&
1204 (linelen + 1) >= MAXLINESIZE) {
1205 linelen = 0;
1206 if (crlf)
1207 odatalen += 3;
1208 else
1209 odatalen += 2;
1210 }
1211 linelen++;
1212 odatalen++;
1213 in++;
1214 }
1215 }
1216 }
1217
Barry Warsaw23164a52004-05-11 02:05:11 +00001218 /* We allocate the output same size as input, this is overkill.
1219 * The previous implementation used calloc() so we'll zero out the
1220 * memory here too, since PyMem_Malloc() does not guarantee that.
1221 */
1222 odata = (unsigned char *) PyMem_Malloc(odatalen);
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001223 if (odata == NULL) {
1224 PyErr_NoMemory();
1225 return NULL;
1226 }
Martin v. Löwisb600fe92004-07-27 15:03:53 +00001227 memset(odata, 0, odatalen);
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001228
1229 in = out = linelen = 0;
1230 while (in < datalen) {
Tim Peters934c1a12002-07-02 22:24:50 +00001231 if ((data[in] > 126) ||
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001232 (data[in] == '=') ||
1233 (header && data[in] == '_') ||
1234 ((data[in] == '.') && (linelen == 1)) ||
1235 (!istext && ((data[in] == '\r') || (data[in] == '\n'))) ||
1236 ((data[in] == '\t' || data[in] == ' ') && (in + 1 == datalen)) ||
Tim Peters934c1a12002-07-02 22:24:50 +00001237 ((data[in] < 33) &&
1238 (data[in] != '\r') && (data[in] != '\n') &&
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001239 (quotetabs && ((data[in] != '\t') || (data[in] != ' ')))))
1240 {
1241 if ((linelen + 3 )>= MAXLINESIZE) {
1242 odata[out++] = '=';
1243 if (crlf) odata[out++] = '\r';
1244 odata[out++] = '\n';
1245 linelen = 0;
1246 }
1247 odata[out++] = '=';
1248 to_hex(data[in], &odata[out]);
1249 out += 2;
1250 in++;
1251 linelen += 3;
1252 }
1253 else {
Tim Peters934c1a12002-07-02 22:24:50 +00001254 if (istext &&
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001255 ((data[in] == '\n') ||
1256 ((in+1 < datalen) && (data[in] == '\r') &&
1257 (data[in+1] == '\n'))))
1258 {
1259 linelen = 0;
1260 /* Protect against whitespace on end of line */
1261 if (out && ((odata[out-1] == ' ') || (odata[out-1] == '\t'))) {
1262 ch = odata[out-1];
1263 odata[out-1] = '=';
1264 to_hex(ch, &odata[out]);
1265 out += 2;
1266 }
Tim Peters934c1a12002-07-02 22:24:50 +00001267
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001268 if (crlf) odata[out++] = '\r';
1269 odata[out++] = '\n';
1270 if (data[in] == '\r')
1271 in += 2;
1272 else
1273 in++;
1274 }
1275 else {
Tim Peters934c1a12002-07-02 22:24:50 +00001276 if ((in + 1 != datalen) &&
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001277 (data[in+1] != '\n') &&
1278 (linelen + 1) >= MAXLINESIZE) {
1279 odata[out++] = '=';
1280 if (crlf) odata[out++] = '\r';
1281 odata[out++] = '\n';
1282 linelen = 0;
1283 }
1284 linelen++;
1285 if (header && data[in] == ' ') {
1286 odata[out++] = '_';
1287 in++;
1288 }
1289 else {
1290 odata[out++] = data[in++];
1291 }
1292 }
1293 }
1294 }
Greg Warda645b302001-10-04 14:54:53 +00001295 if ((rv = PyString_FromStringAndSize((char *)odata, out)) == NULL) {
Barry Warsaw23164a52004-05-11 02:05:11 +00001296 PyMem_Free(odata);
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001297 return NULL;
1298 }
Barry Warsaw23164a52004-05-11 02:05:11 +00001299 PyMem_Free(odata);
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001300 return rv;
1301}
Barry Warsawe977c212000-08-15 06:07:13 +00001302
Jack Jansen72781191995-08-07 14:34:15 +00001303/* List of functions defined in the module */
1304
1305static struct PyMethodDef binascii_module_methods[] = {
Barry Warsawe977c212000-08-15 06:07:13 +00001306 {"a2b_uu", binascii_a2b_uu, METH_VARARGS, doc_a2b_uu},
1307 {"b2a_uu", binascii_b2a_uu, METH_VARARGS, doc_b2a_uu},
1308 {"a2b_base64", binascii_a2b_base64, METH_VARARGS, doc_a2b_base64},
1309 {"b2a_base64", binascii_b2a_base64, METH_VARARGS, doc_b2a_base64},
1310 {"a2b_hqx", binascii_a2b_hqx, METH_VARARGS, doc_a2b_hqx},
1311 {"b2a_hqx", binascii_b2a_hqx, METH_VARARGS, doc_b2a_hqx},
1312 {"b2a_hex", binascii_hexlify, METH_VARARGS, doc_hexlify},
1313 {"a2b_hex", binascii_unhexlify, METH_VARARGS, doc_unhexlify},
1314 {"hexlify", binascii_hexlify, METH_VARARGS, doc_hexlify},
1315 {"unhexlify", binascii_unhexlify, METH_VARARGS, doc_unhexlify},
1316 {"rlecode_hqx", binascii_rlecode_hqx, METH_VARARGS, doc_rlecode_hqx},
1317 {"rledecode_hqx", binascii_rledecode_hqx, METH_VARARGS,
1318 doc_rledecode_hqx},
1319 {"crc_hqx", binascii_crc_hqx, METH_VARARGS, doc_crc_hqx},
1320 {"crc32", binascii_crc32, METH_VARARGS, doc_crc32},
Tim Peters934c1a12002-07-02 22:24:50 +00001321 {"a2b_qp", (PyCFunction)binascii_a2b_qp, METH_VARARGS | METH_KEYWORDS,
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001322 doc_a2b_qp},
Tim Peters934c1a12002-07-02 22:24:50 +00001323 {"b2a_qp", (PyCFunction)binascii_b2a_qp, METH_VARARGS | METH_KEYWORDS,
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001324 doc_b2a_qp},
Barry Warsawe977c212000-08-15 06:07:13 +00001325 {NULL, NULL} /* sentinel */
Jack Jansen72781191995-08-07 14:34:15 +00001326};
1327
1328
1329/* Initialization function for the module (*must* be called initbinascii) */
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001330PyDoc_STRVAR(doc_binascii, "Conversion between binary data and ASCII");
Jack Jansen72781191995-08-07 14:34:15 +00001331
Mark Hammondfe51c6d2002-08-02 02:27:13 +00001332PyMODINIT_FUNC
Thomas Woutersf3f33dc2000-07-21 06:00:07 +00001333initbinascii(void)
Jack Jansen72781191995-08-07 14:34:15 +00001334{
1335 PyObject *m, *d, *x;
1336
1337 /* Create the module and add the functions */
1338 m = Py_InitModule("binascii", binascii_module_methods);
Neal Norwitz1ac754f2006-01-19 06:09:39 +00001339 if (m == NULL)
1340 return;
Jack Jansen72781191995-08-07 14:34:15 +00001341
1342 d = PyModule_GetDict(m);
1343 x = PyString_FromString(doc_binascii);
1344 PyDict_SetItemString(d, "__doc__", x);
Guido van Rossum5c159bd1997-08-04 23:55:25 +00001345 Py_XDECREF(x);
Jack Jansen72781191995-08-07 14:34:15 +00001346
Guido van Rossum7dbb48a1997-10-08 15:26:07 +00001347 Error = PyErr_NewException("binascii.Error", NULL, NULL);
Jack Jansen72781191995-08-07 14:34:15 +00001348 PyDict_SetItemString(d, "Error", Error);
Guido van Rossum7dbb48a1997-10-08 15:26:07 +00001349 Incomplete = PyErr_NewException("binascii.Incomplete", NULL, NULL);
Jack Jansen72781191995-08-07 14:34:15 +00001350 PyDict_SetItemString(d, "Incomplete", Incomplete);
Jack Jansen72781191995-08-07 14:34:15 +00001351}