blob: c1fc675cf1d616cefab1fdee5c782bbee980cdf7 [file] [log] [blame]
Jack Jansen72781191995-08-07 14:34:15 +00001/*
2** Routines to represent binary data in ASCII and vice-versa
3**
4** This module currently supports the following encodings:
5** uuencode:
6** each line encodes 45 bytes (except possibly the last)
7** First char encodes (binary) length, rest data
8** each char encodes 6 bits, as follows:
9** binary: 01234567 abcdefgh ijklmnop
10** ascii: 012345 67abcd efghij klmnop
11** ASCII encoding method is "excess-space": 000000 is encoded as ' ', etc.
12** short binary data is zero-extended (so the bits are always in the
13** right place), this does *not* reflect in the length.
Jack Jansen84bbc2e1995-10-04 16:38:44 +000014** base64:
15** Line breaks are insignificant, but lines are at most 76 chars
16** each char encodes 6 bits, in similar order as uucode/hqx. Encoding
17** is done via a table.
18** Short binary data is filled (in ASCII) with '='.
Jack Jansen72781191995-08-07 14:34:15 +000019** hqx:
20** File starts with introductory text, real data starts and ends
21** with colons.
22** Data consists of three similar parts: info, datafork, resourcefork.
23** Each part is protected (at the end) with a 16-bit crc
24** The binary data is run-length encoded, and then ascii-fied:
25** binary: 01234567 abcdefgh ijklmnop
26** ascii: 012345 67abcd efghij klmnop
27** ASCII encoding is table-driven, see the code.
28** Short binary data results in the runt ascii-byte being output with
29** the bits in the right place.
30**
31** While I was reading dozens of programs that encode or decode the formats
32** here (documentation? hihi:-) I have formulated Jansen's Observation:
33**
34** Programs that encode binary data in ASCII are written in
35** such a style that they are as unreadable as possible. Devices used
36** include unnecessary global variables, burying important tables
37** in unrelated sourcefiles, putting functions in include files,
38** using seemingly-descriptive variable names for different purposes,
39** calls to empty subroutines and a host of others.
40**
41** I have attempted to break with this tradition, but I guess that that
42** does make the performance sub-optimal. Oh well, too bad...
43**
44** Jack Jansen, CWI, July 1995.
Tim Peters934c1a12002-07-02 22:24:50 +000045**
Martin v. Löwis16dc7f42001-09-30 20:32:11 +000046** Added support for quoted-printable encoding, based on rfc 1521 et al
Tim Peters934c1a12002-07-02 22:24:50 +000047** quoted-printable encoding specifies that non printable characters (anything
Martin v. Löwis16dc7f42001-09-30 20:32:11 +000048** below 32 and above 126) be encoded as =XX where XX is the hexadecimal value
49** of the character. It also specifies some other behavior to enable 8bit data
Tim Peters934c1a12002-07-02 22:24:50 +000050** in a mail message with little difficulty (maximum line sizes, protecting
51** some cases of whitespace, etc).
Martin v. Löwis16dc7f42001-09-30 20:32:11 +000052**
53** Brandon Long, September 2001.
Jack Jansen72781191995-08-07 14:34:15 +000054*/
55
Thomas Wouters9c544482006-03-01 21:59:44 +000056#define PY_SSIZE_T_CLEAN
Jack Jansen72781191995-08-07 14:34:15 +000057
58#include "Python.h"
Gregory P. Smith440ca772008-03-24 00:08:01 +000059#ifdef USE_ZLIB_CRC32
60#include "zlib.h"
61#endif
Jack Jansen72781191995-08-07 14:34:15 +000062
63static PyObject *Error;
64static PyObject *Incomplete;
65
66/*
67** hqx lookup table, ascii->binary.
68*/
69
70#define RUNCHAR 0x90
71
72#define DONE 0x7F
73#define SKIP 0x7E
74#define FAIL 0x7D
75
76static unsigned char table_a2b_hqx[256] = {
77/* ^@ ^A ^B ^C ^D ^E ^F ^G */
78/* 0*/ FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
79/* \b \t \n ^K ^L \r ^N ^O */
80/* 1*/ FAIL, FAIL, SKIP, FAIL, FAIL, SKIP, FAIL, FAIL,
81/* ^P ^Q ^R ^S ^T ^U ^V ^W */
82/* 2*/ FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
83/* ^X ^Y ^Z ^[ ^\ ^] ^^ ^_ */
84/* 3*/ FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
85/* ! " # $ % & ' */
86/* 4*/ FAIL, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
87/* ( ) * + , - . / */
88/* 5*/ 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, FAIL, FAIL,
89/* 0 1 2 3 4 5 6 7 */
90/* 6*/ 0x0D, 0x0E, 0x0F, 0x10, 0x11, 0x12, 0x13, FAIL,
91/* 8 9 : ; < = > ? */
92/* 7*/ 0x14, 0x15, DONE, FAIL, FAIL, FAIL, FAIL, FAIL,
93/* @ A B C D E F G */
94/* 8*/ 0x16, 0x17, 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D,
95/* H I J K L M N O */
96/* 9*/ 0x1E, 0x1F, 0x20, 0x21, 0x22, 0x23, 0x24, FAIL,
97/* P Q R S T U V W */
98/*10*/ 0x25, 0x26, 0x27, 0x28, 0x29, 0x2A, 0x2B, FAIL,
99/* X Y Z [ \ ] ^ _ */
100/*11*/ 0x2C, 0x2D, 0x2E, 0x2F, FAIL, FAIL, FAIL, FAIL,
101/* ` a b c d e f g */
102/*12*/ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, FAIL,
103/* h i j k l m n o */
104/*13*/ 0x37, 0x38, 0x39, 0x3A, 0x3B, 0x3C, FAIL, FAIL,
105/* p q r s t u v w */
106/*14*/ 0x3D, 0x3E, 0x3F, FAIL, FAIL, FAIL, FAIL, FAIL,
107/* x y z { | } ~ ^? */
108/*15*/ FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
109/*16*/ FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
110 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
111 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
112 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
113 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
114 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
115 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
116 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
117 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
118 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
119 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
120 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
121 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
122 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
123 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
124 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
125};
126
127static unsigned char table_b2a_hqx[] =
Roger E. Masse5f4ce181997-01-16 17:10:22 +0000128"!\"#$%&'()*+,-012345689@ABCDEFGHIJKLMNPQRSTUVXYZ[`abcdefhijklmpqr";
Jack Jansen72781191995-08-07 14:34:15 +0000129
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000130static char table_a2b_base64[] = {
131 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
132 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
133 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,62, -1,-1,-1,63,
134 52,53,54,55, 56,57,58,59, 60,61,-1,-1, -1, 0,-1,-1, /* Note PAD->0 */
135 -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10, 11,12,13,14,
136 15,16,17,18, 19,20,21,22, 23,24,25,-1, -1,-1,-1,-1,
137 -1,26,27,28, 29,30,31,32, 33,34,35,36, 37,38,39,40,
138 41,42,43,44, 45,46,47,48, 49,50,51,-1, -1,-1,-1,-1
139};
140
141#define BASE64_PAD '='
Guido van Rossum355bc0c2001-10-30 03:00:52 +0000142
143/* Max binary chunk size; limited only by available memory */
Tim Peters1fbb5772001-12-19 04:41:35 +0000144#define BASE64_MAXBIN (INT_MAX/2 - sizeof(PyStringObject) - 3)
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000145
146static unsigned char table_b2a_base64[] =
Roger E. Masse5f4ce181997-01-16 17:10:22 +0000147"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000148
149
150
Jack Jansen72781191995-08-07 14:34:15 +0000151static unsigned short crctab_hqx[256] = {
Roger E. Masse5f4ce181997-01-16 17:10:22 +0000152 0x0000, 0x1021, 0x2042, 0x3063, 0x4084, 0x50a5, 0x60c6, 0x70e7,
153 0x8108, 0x9129, 0xa14a, 0xb16b, 0xc18c, 0xd1ad, 0xe1ce, 0xf1ef,
154 0x1231, 0x0210, 0x3273, 0x2252, 0x52b5, 0x4294, 0x72f7, 0x62d6,
155 0x9339, 0x8318, 0xb37b, 0xa35a, 0xd3bd, 0xc39c, 0xf3ff, 0xe3de,
156 0x2462, 0x3443, 0x0420, 0x1401, 0x64e6, 0x74c7, 0x44a4, 0x5485,
157 0xa56a, 0xb54b, 0x8528, 0x9509, 0xe5ee, 0xf5cf, 0xc5ac, 0xd58d,
158 0x3653, 0x2672, 0x1611, 0x0630, 0x76d7, 0x66f6, 0x5695, 0x46b4,
159 0xb75b, 0xa77a, 0x9719, 0x8738, 0xf7df, 0xe7fe, 0xd79d, 0xc7bc,
160 0x48c4, 0x58e5, 0x6886, 0x78a7, 0x0840, 0x1861, 0x2802, 0x3823,
161 0xc9cc, 0xd9ed, 0xe98e, 0xf9af, 0x8948, 0x9969, 0xa90a, 0xb92b,
162 0x5af5, 0x4ad4, 0x7ab7, 0x6a96, 0x1a71, 0x0a50, 0x3a33, 0x2a12,
163 0xdbfd, 0xcbdc, 0xfbbf, 0xeb9e, 0x9b79, 0x8b58, 0xbb3b, 0xab1a,
164 0x6ca6, 0x7c87, 0x4ce4, 0x5cc5, 0x2c22, 0x3c03, 0x0c60, 0x1c41,
165 0xedae, 0xfd8f, 0xcdec, 0xddcd, 0xad2a, 0xbd0b, 0x8d68, 0x9d49,
166 0x7e97, 0x6eb6, 0x5ed5, 0x4ef4, 0x3e13, 0x2e32, 0x1e51, 0x0e70,
167 0xff9f, 0xefbe, 0xdfdd, 0xcffc, 0xbf1b, 0xaf3a, 0x9f59, 0x8f78,
168 0x9188, 0x81a9, 0xb1ca, 0xa1eb, 0xd10c, 0xc12d, 0xf14e, 0xe16f,
169 0x1080, 0x00a1, 0x30c2, 0x20e3, 0x5004, 0x4025, 0x7046, 0x6067,
170 0x83b9, 0x9398, 0xa3fb, 0xb3da, 0xc33d, 0xd31c, 0xe37f, 0xf35e,
171 0x02b1, 0x1290, 0x22f3, 0x32d2, 0x4235, 0x5214, 0x6277, 0x7256,
172 0xb5ea, 0xa5cb, 0x95a8, 0x8589, 0xf56e, 0xe54f, 0xd52c, 0xc50d,
173 0x34e2, 0x24c3, 0x14a0, 0x0481, 0x7466, 0x6447, 0x5424, 0x4405,
174 0xa7db, 0xb7fa, 0x8799, 0x97b8, 0xe75f, 0xf77e, 0xc71d, 0xd73c,
175 0x26d3, 0x36f2, 0x0691, 0x16b0, 0x6657, 0x7676, 0x4615, 0x5634,
176 0xd94c, 0xc96d, 0xf90e, 0xe92f, 0x99c8, 0x89e9, 0xb98a, 0xa9ab,
177 0x5844, 0x4865, 0x7806, 0x6827, 0x18c0, 0x08e1, 0x3882, 0x28a3,
178 0xcb7d, 0xdb5c, 0xeb3f, 0xfb1e, 0x8bf9, 0x9bd8, 0xabbb, 0xbb9a,
179 0x4a75, 0x5a54, 0x6a37, 0x7a16, 0x0af1, 0x1ad0, 0x2ab3, 0x3a92,
180 0xfd2e, 0xed0f, 0xdd6c, 0xcd4d, 0xbdaa, 0xad8b, 0x9de8, 0x8dc9,
181 0x7c26, 0x6c07, 0x5c64, 0x4c45, 0x3ca2, 0x2c83, 0x1ce0, 0x0cc1,
182 0xef1f, 0xff3e, 0xcf5d, 0xdf7c, 0xaf9b, 0xbfba, 0x8fd9, 0x9ff8,
183 0x6e17, 0x7e36, 0x4e55, 0x5e74, 0x2e93, 0x3eb2, 0x0ed1, 0x1ef0,
Jack Jansen72781191995-08-07 14:34:15 +0000184};
185
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000186PyDoc_STRVAR(doc_a2b_uu, "(ascii) -> bin. Decode a line of uuencoded data");
Jack Jansen72781191995-08-07 14:34:15 +0000187
188static PyObject *
Peter Schneider-Kampa788a7f2000-07-10 09:57:19 +0000189binascii_a2b_uu(PyObject *self, PyObject *args)
Jack Jansen72781191995-08-07 14:34:15 +0000190{
191 unsigned char *ascii_data, *bin_data;
192 int leftbits = 0;
193 unsigned char this_ch;
194 unsigned int leftchar = 0;
195 PyObject *rv;
Thomas Woutersf98db652006-03-01 21:37:32 +0000196 Py_ssize_t ascii_len, bin_len;
Tim Peters934c1a12002-07-02 22:24:50 +0000197
Guido van Rossum43713e52000-02-29 13:59:29 +0000198 if ( !PyArg_ParseTuple(args, "t#:a2b_uu", &ascii_data, &ascii_len) )
Jack Jansen72781191995-08-07 14:34:15 +0000199 return NULL;
200
201 /* First byte: binary data length (in bytes) */
202 bin_len = (*ascii_data++ - ' ') & 077;
203 ascii_len--;
204
205 /* Allocate the buffer */
206 if ( (rv=PyString_FromStringAndSize(NULL, bin_len)) == NULL )
207 return NULL;
208 bin_data = (unsigned char *)PyString_AsString(rv);
Tim Peters934c1a12002-07-02 22:24:50 +0000209
Jack Jansen72781191995-08-07 14:34:15 +0000210 for( ; bin_len > 0 ; ascii_len--, ascii_data++ ) {
Neal Norwitzfe92eef2004-06-06 20:13:10 +0000211 /* XXX is it really best to add NULs if there's no more data */
212 this_ch = (ascii_len > 0) ? *ascii_data : 0;
Jack Jansen72781191995-08-07 14:34:15 +0000213 if ( this_ch == '\n' || this_ch == '\r' || ascii_len <= 0) {
214 /*
215 ** Whitespace. Assume some spaces got eaten at
216 ** end-of-line. (We check this later)
217 */
218 this_ch = 0;
219 } else {
Jack Jansen5d957971995-11-14 10:35:19 +0000220 /* Check the character for legality
Roger E. Masse5f4ce181997-01-16 17:10:22 +0000221 ** The 64 in stead of the expected 63 is because
222 ** there are a few uuencodes out there that use
Guido van Rossum92d89172001-01-09 02:11:57 +0000223 ** '`' as zero instead of space.
Jack Jansen5d957971995-11-14 10:35:19 +0000224 */
225 if ( this_ch < ' ' || this_ch > (' ' + 64)) {
Jack Jansen72781191995-08-07 14:34:15 +0000226 PyErr_SetString(Error, "Illegal char");
227 Py_DECREF(rv);
228 return NULL;
229 }
230 this_ch = (this_ch - ' ') & 077;
231 }
232 /*
233 ** Shift it in on the low end, and see if there's
234 ** a byte ready for output.
235 */
236 leftchar = (leftchar << 6) | (this_ch);
237 leftbits += 6;
238 if ( leftbits >= 8 ) {
239 leftbits -= 8;
240 *bin_data++ = (leftchar >> leftbits) & 0xff;
241 leftchar &= ((1 << leftbits) - 1);
242 bin_len--;
243 }
244 }
245 /*
246 ** Finally, check that if there's anything left on the line
247 ** that it's whitespace only.
248 */
249 while( ascii_len-- > 0 ) {
250 this_ch = *ascii_data++;
Guido van Rossum92d89172001-01-09 02:11:57 +0000251 /* Extra '`' may be written as padding in some cases */
252 if ( this_ch != ' ' && this_ch != ' '+64 &&
Guido van Rossum1243ae71997-07-11 18:36:28 +0000253 this_ch != '\n' && this_ch != '\r' ) {
Jack Jansen72781191995-08-07 14:34:15 +0000254 PyErr_SetString(Error, "Trailing garbage");
255 Py_DECREF(rv);
256 return NULL;
257 }
258 }
259 return rv;
260}
261
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000262PyDoc_STRVAR(doc_b2a_uu, "(bin) -> ascii. Uuencode line of data");
Tim Peters934c1a12002-07-02 22:24:50 +0000263
Jack Jansen72781191995-08-07 14:34:15 +0000264static PyObject *
Peter Schneider-Kampa788a7f2000-07-10 09:57:19 +0000265binascii_b2a_uu(PyObject *self, PyObject *args)
Jack Jansen72781191995-08-07 14:34:15 +0000266{
267 unsigned char *ascii_data, *bin_data;
268 int leftbits = 0;
269 unsigned char this_ch;
270 unsigned int leftchar = 0;
271 PyObject *rv;
Thomas Woutersf98db652006-03-01 21:37:32 +0000272 Py_ssize_t bin_len;
Tim Peters934c1a12002-07-02 22:24:50 +0000273
Guido van Rossum43713e52000-02-29 13:59:29 +0000274 if ( !PyArg_ParseTuple(args, "s#:b2a_uu", &bin_data, &bin_len) )
Jack Jansen72781191995-08-07 14:34:15 +0000275 return NULL;
276 if ( bin_len > 45 ) {
277 /* The 45 is a limit that appears in all uuencode's */
278 PyErr_SetString(Error, "At most 45 bytes at once");
279 return NULL;
280 }
281
282 /* We're lazy and allocate to much (fixed up later) */
Raymond Hettinger658717e2004-09-06 22:58:37 +0000283 if ( (rv=PyString_FromStringAndSize(NULL, bin_len*2+2)) == NULL )
Jack Jansen72781191995-08-07 14:34:15 +0000284 return NULL;
285 ascii_data = (unsigned char *)PyString_AsString(rv);
286
287 /* Store the length */
288 *ascii_data++ = ' ' + (bin_len & 077);
Tim Peters934c1a12002-07-02 22:24:50 +0000289
Jack Jansen72781191995-08-07 14:34:15 +0000290 for( ; bin_len > 0 || leftbits != 0 ; bin_len--, bin_data++ ) {
291 /* Shift the data (or padding) into our buffer */
292 if ( bin_len > 0 ) /* Data */
293 leftchar = (leftchar << 8) | *bin_data;
294 else /* Padding */
295 leftchar <<= 8;
296 leftbits += 8;
297
298 /* See if there are 6-bit groups ready */
299 while ( leftbits >= 6 ) {
300 this_ch = (leftchar >> (leftbits-6)) & 0x3f;
301 leftbits -= 6;
302 *ascii_data++ = this_ch + ' ';
303 }
304 }
305 *ascii_data++ = '\n'; /* Append a courtesy newline */
Tim Peters934c1a12002-07-02 22:24:50 +0000306
Roger E. Masse5f4ce181997-01-16 17:10:22 +0000307 _PyString_Resize(&rv, (ascii_data -
308 (unsigned char *)PyString_AsString(rv)));
Jack Jansen72781191995-08-07 14:34:15 +0000309 return rv;
310}
311
Guido van Rossum2db4f471999-10-19 19:05:14 +0000312
313static int
Thomas Woutersf98db652006-03-01 21:37:32 +0000314binascii_find_valid(unsigned char *s, Py_ssize_t slen, int num)
Guido van Rossum2db4f471999-10-19 19:05:14 +0000315{
Tim Peters934c1a12002-07-02 22:24:50 +0000316 /* Finds & returns the (num+1)th
Guido van Rossum2db4f471999-10-19 19:05:14 +0000317 ** valid character for base64, or -1 if none.
318 */
319
320 int ret = -1;
321 unsigned char c, b64val;
322
323 while ((slen > 0) && (ret == -1)) {
324 c = *s;
325 b64val = table_a2b_base64[c & 0x7f];
326 if ( ((c <= 0x7f) && (b64val != (unsigned char)-1)) ) {
327 if (num == 0)
328 ret = *s;
329 num--;
330 }
331
332 s++;
333 slen--;
334 }
335 return ret;
336}
337
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000338PyDoc_STRVAR(doc_a2b_base64, "(ascii) -> bin. Decode a line of base64 data");
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000339
340static PyObject *
Peter Schneider-Kampa788a7f2000-07-10 09:57:19 +0000341binascii_a2b_base64(PyObject *self, PyObject *args)
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000342{
343 unsigned char *ascii_data, *bin_data;
344 int leftbits = 0;
345 unsigned char this_ch;
346 unsigned int leftchar = 0;
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000347 PyObject *rv;
Thomas Woutersf98db652006-03-01 21:37:32 +0000348 Py_ssize_t ascii_len, bin_len;
Guido van Rossum2db4f471999-10-19 19:05:14 +0000349 int quad_pos = 0;
Tim Peters934c1a12002-07-02 22:24:50 +0000350
Guido van Rossum43713e52000-02-29 13:59:29 +0000351 if ( !PyArg_ParseTuple(args, "t#:a2b_base64", &ascii_data, &ascii_len) )
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000352 return NULL;
353
354 bin_len = ((ascii_len+3)/4)*3; /* Upper bound, corrected later */
355
356 /* Allocate the buffer */
357 if ( (rv=PyString_FromStringAndSize(NULL, bin_len)) == NULL )
358 return NULL;
359 bin_data = (unsigned char *)PyString_AsString(rv);
360 bin_len = 0;
Guido van Rossum2db4f471999-10-19 19:05:14 +0000361
362 for( ; ascii_len > 0; ascii_len--, ascii_data++) {
363 this_ch = *ascii_data;
364
365 if (this_ch > 0x7f ||
366 this_ch == '\r' || this_ch == '\n' || this_ch == ' ')
Jack Jansenba1de3b1996-01-22 10:47:15 +0000367 continue;
Guido van Rossum2db4f471999-10-19 19:05:14 +0000368
369 /* Check for pad sequences and ignore
370 ** the invalid ones.
371 */
372 if (this_ch == BASE64_PAD) {
373 if ( (quad_pos < 2) ||
374 ((quad_pos == 2) &&
375 (binascii_find_valid(ascii_data, ascii_len, 1)
376 != BASE64_PAD)) )
377 {
378 continue;
379 }
380 else {
381 /* A pad sequence means no more input.
382 ** We've already interpreted the data
383 ** from the quad at this point.
384 */
385 leftbits = 0;
386 break;
387 }
388 }
389
390 this_ch = table_a2b_base64[*ascii_data];
391 if ( this_ch == (unsigned char) -1 )
392 continue;
393
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000394 /*
395 ** Shift it in on the low end, and see if there's
396 ** a byte ready for output.
397 */
Guido van Rossum2db4f471999-10-19 19:05:14 +0000398 quad_pos = (quad_pos + 1) & 0x03;
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000399 leftchar = (leftchar << 6) | (this_ch);
400 leftbits += 6;
Guido van Rossum2db4f471999-10-19 19:05:14 +0000401
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000402 if ( leftbits >= 8 ) {
403 leftbits -= 8;
404 *bin_data++ = (leftchar >> leftbits) & 0xff;
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000405 bin_len++;
Guido van Rossum2db4f471999-10-19 19:05:14 +0000406 leftchar &= ((1 << leftbits) - 1);
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000407 }
Guido van Rossum2db4f471999-10-19 19:05:14 +0000408 }
409
410 if (leftbits != 0) {
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000411 PyErr_SetString(Error, "Incorrect padding");
412 Py_DECREF(rv);
413 return NULL;
414 }
Guido van Rossum2db4f471999-10-19 19:05:14 +0000415
Thomas Wouters9e1c1922003-03-17 11:24:29 +0000416 /* And set string size correctly. If the result string is empty
417 ** (because the input was all invalid) return the shared empty
418 ** string instead; _PyString_Resize() won't do this for us.
419 */
Barry Warsaw0a51b582002-08-15 22:14:24 +0000420 if (bin_len > 0)
421 _PyString_Resize(&rv, bin_len);
Thomas Wouters9e1c1922003-03-17 11:24:29 +0000422 else {
423 Py_DECREF(rv);
424 rv = PyString_FromString("");
425 }
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000426 return rv;
427}
428
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000429PyDoc_STRVAR(doc_b2a_base64, "(bin) -> ascii. Base64-code line of data");
Tim Peters934c1a12002-07-02 22:24:50 +0000430
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000431static PyObject *
Peter Schneider-Kampa788a7f2000-07-10 09:57:19 +0000432binascii_b2a_base64(PyObject *self, PyObject *args)
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000433{
434 unsigned char *ascii_data, *bin_data;
435 int leftbits = 0;
436 unsigned char this_ch;
437 unsigned int leftchar = 0;
438 PyObject *rv;
Thomas Woutersf98db652006-03-01 21:37:32 +0000439 Py_ssize_t bin_len;
Tim Peters934c1a12002-07-02 22:24:50 +0000440
Guido van Rossum43713e52000-02-29 13:59:29 +0000441 if ( !PyArg_ParseTuple(args, "s#:b2a_base64", &bin_data, &bin_len) )
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000442 return NULL;
443 if ( bin_len > BASE64_MAXBIN ) {
444 PyErr_SetString(Error, "Too much data for base64 line");
445 return NULL;
446 }
Tim Peters934c1a12002-07-02 22:24:50 +0000447
Tim Peters1fbb5772001-12-19 04:41:35 +0000448 /* We're lazy and allocate too much (fixed up later).
449 "+3" leaves room for up to two pad characters and a trailing
450 newline. Note that 'b' gets encoded as 'Yg==\n' (1 in, 5 out). */
451 if ( (rv=PyString_FromStringAndSize(NULL, bin_len*2 + 3)) == NULL )
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000452 return NULL;
453 ascii_data = (unsigned char *)PyString_AsString(rv);
454
455 for( ; bin_len > 0 ; bin_len--, bin_data++ ) {
456 /* Shift the data into our buffer */
457 leftchar = (leftchar << 8) | *bin_data;
458 leftbits += 8;
459
460 /* See if there are 6-bit groups ready */
461 while ( leftbits >= 6 ) {
462 this_ch = (leftchar >> (leftbits-6)) & 0x3f;
463 leftbits -= 6;
464 *ascii_data++ = table_b2a_base64[this_ch];
465 }
466 }
467 if ( leftbits == 2 ) {
468 *ascii_data++ = table_b2a_base64[(leftchar&3) << 4];
469 *ascii_data++ = BASE64_PAD;
470 *ascii_data++ = BASE64_PAD;
471 } else if ( leftbits == 4 ) {
472 *ascii_data++ = table_b2a_base64[(leftchar&0xf) << 2];
473 *ascii_data++ = BASE64_PAD;
Tim Peters934c1a12002-07-02 22:24:50 +0000474 }
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000475 *ascii_data++ = '\n'; /* Append a courtesy newline */
Tim Peters934c1a12002-07-02 22:24:50 +0000476
Roger E. Masse5f4ce181997-01-16 17:10:22 +0000477 _PyString_Resize(&rv, (ascii_data -
478 (unsigned char *)PyString_AsString(rv)));
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000479 return rv;
480}
481
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000482PyDoc_STRVAR(doc_a2b_hqx, "ascii -> bin, done. Decode .hqx coding");
Jack Jansen72781191995-08-07 14:34:15 +0000483
484static PyObject *
Peter Schneider-Kampa788a7f2000-07-10 09:57:19 +0000485binascii_a2b_hqx(PyObject *self, PyObject *args)
Jack Jansen72781191995-08-07 14:34:15 +0000486{
487 unsigned char *ascii_data, *bin_data;
488 int leftbits = 0;
489 unsigned char this_ch;
Jack Janseneaeb1c81995-08-14 12:17:57 +0000490 unsigned int leftchar = 0;
Jack Jansen72781191995-08-07 14:34:15 +0000491 PyObject *rv;
Thomas Woutersf98db652006-03-01 21:37:32 +0000492 Py_ssize_t len;
Jack Jansen72781191995-08-07 14:34:15 +0000493 int done = 0;
Tim Peters934c1a12002-07-02 22:24:50 +0000494
Guido van Rossum43713e52000-02-29 13:59:29 +0000495 if ( !PyArg_ParseTuple(args, "t#:a2b_hqx", &ascii_data, &len) )
Jack Jansen72781191995-08-07 14:34:15 +0000496 return NULL;
497
Raymond Hettinger658717e2004-09-06 22:58:37 +0000498 /* Allocate a string that is too big (fixed later)
499 Add two to the initial length to prevent interning which
500 would preclude subsequent resizing. */
501 if ( (rv=PyString_FromStringAndSize(NULL, len+2)) == NULL )
Jack Jansen72781191995-08-07 14:34:15 +0000502 return NULL;
503 bin_data = (unsigned char *)PyString_AsString(rv);
504
505 for( ; len > 0 ; len--, ascii_data++ ) {
506 /* Get the byte and look it up */
507 this_ch = table_a2b_hqx[*ascii_data];
508 if ( this_ch == SKIP )
509 continue;
510 if ( this_ch == FAIL ) {
511 PyErr_SetString(Error, "Illegal char");
512 Py_DECREF(rv);
513 return NULL;
514 }
515 if ( this_ch == DONE ) {
516 /* The terminating colon */
517 done = 1;
518 break;
519 }
520
521 /* Shift it into the buffer and see if any bytes are ready */
522 leftchar = (leftchar << 6) | (this_ch);
523 leftbits += 6;
524 if ( leftbits >= 8 ) {
525 leftbits -= 8;
526 *bin_data++ = (leftchar >> leftbits) & 0xff;
527 leftchar &= ((1 << leftbits) - 1);
528 }
529 }
Tim Peters934c1a12002-07-02 22:24:50 +0000530
Jack Jansen72781191995-08-07 14:34:15 +0000531 if ( leftbits && !done ) {
532 PyErr_SetString(Incomplete,
533 "String has incomplete number of bytes");
534 Py_DECREF(rv);
535 return NULL;
536 }
Guido van Rossum9c6ba5e1997-01-12 20:02:04 +0000537 _PyString_Resize(
538 &rv, (bin_data - (unsigned char *)PyString_AsString(rv)));
539 if (rv) {
540 PyObject *rrv = Py_BuildValue("Oi", rv, done);
541 Py_DECREF(rv);
542 return rrv;
543 }
Roger E. Masse5f4ce181997-01-16 17:10:22 +0000544
Jack Jansen72781191995-08-07 14:34:15 +0000545 return NULL;
546}
547
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000548PyDoc_STRVAR(doc_rlecode_hqx, "Binhex RLE-code binary data");
Jack Jansen72781191995-08-07 14:34:15 +0000549
550static PyObject *
Peter Schneider-Kampa788a7f2000-07-10 09:57:19 +0000551binascii_rlecode_hqx(PyObject *self, PyObject *args)
Jack Jansen72781191995-08-07 14:34:15 +0000552{
553 unsigned char *in_data, *out_data;
554 PyObject *rv;
555 unsigned char ch;
Thomas Woutersf98db652006-03-01 21:37:32 +0000556 Py_ssize_t in, inend, len;
Tim Peters934c1a12002-07-02 22:24:50 +0000557
Guido van Rossum43713e52000-02-29 13:59:29 +0000558 if ( !PyArg_ParseTuple(args, "s#:rlecode_hqx", &in_data, &len) )
Jack Jansen72781191995-08-07 14:34:15 +0000559 return NULL;
560
561 /* Worst case: output is twice as big as input (fixed later) */
Raymond Hettinger658717e2004-09-06 22:58:37 +0000562 if ( (rv=PyString_FromStringAndSize(NULL, len*2+2)) == NULL )
Jack Jansen72781191995-08-07 14:34:15 +0000563 return NULL;
564 out_data = (unsigned char *)PyString_AsString(rv);
Tim Peters934c1a12002-07-02 22:24:50 +0000565
Jack Jansen72781191995-08-07 14:34:15 +0000566 for( in=0; in<len; in++) {
567 ch = in_data[in];
568 if ( ch == RUNCHAR ) {
569 /* RUNCHAR. Escape it. */
570 *out_data++ = RUNCHAR;
571 *out_data++ = 0;
572 } else {
573 /* Check how many following are the same */
574 for(inend=in+1;
575 inend<len && in_data[inend] == ch &&
Roger E. Masse5f4ce181997-01-16 17:10:22 +0000576 inend < in+255;
Jack Jansen72781191995-08-07 14:34:15 +0000577 inend++) ;
Jack Jansen0223aa11995-08-31 13:44:23 +0000578 if ( inend - in > 3 ) {
579 /* More than 3 in a row. Output RLE. */
Jack Jansen72781191995-08-07 14:34:15 +0000580 *out_data++ = ch;
581 *out_data++ = RUNCHAR;
582 *out_data++ = inend-in;
583 in = inend-1;
584 } else {
585 /* Less than 3. Output the byte itself */
586 *out_data++ = ch;
587 }
588 }
589 }
Roger E. Masse5f4ce181997-01-16 17:10:22 +0000590 _PyString_Resize(&rv, (out_data -
591 (unsigned char *)PyString_AsString(rv)));
Jack Jansen72781191995-08-07 14:34:15 +0000592 return rv;
593}
594
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000595PyDoc_STRVAR(doc_b2a_hqx, "Encode .hqx data");
Tim Peters934c1a12002-07-02 22:24:50 +0000596
Jack Jansen72781191995-08-07 14:34:15 +0000597static PyObject *
Peter Schneider-Kampa788a7f2000-07-10 09:57:19 +0000598binascii_b2a_hqx(PyObject *self, PyObject *args)
Jack Jansen72781191995-08-07 14:34:15 +0000599{
600 unsigned char *ascii_data, *bin_data;
601 int leftbits = 0;
602 unsigned char this_ch;
Jack Janseneaeb1c81995-08-14 12:17:57 +0000603 unsigned int leftchar = 0;
Jack Jansen72781191995-08-07 14:34:15 +0000604 PyObject *rv;
Thomas Woutersf98db652006-03-01 21:37:32 +0000605 Py_ssize_t len;
Tim Peters934c1a12002-07-02 22:24:50 +0000606
Guido van Rossum43713e52000-02-29 13:59:29 +0000607 if ( !PyArg_ParseTuple(args, "s#:b2a_hqx", &bin_data, &len) )
Jack Jansen72781191995-08-07 14:34:15 +0000608 return NULL;
609
610 /* Allocate a buffer that is at least large enough */
Raymond Hettinger658717e2004-09-06 22:58:37 +0000611 if ( (rv=PyString_FromStringAndSize(NULL, len*2+2)) == NULL )
Jack Jansen72781191995-08-07 14:34:15 +0000612 return NULL;
613 ascii_data = (unsigned char *)PyString_AsString(rv);
Tim Peters934c1a12002-07-02 22:24:50 +0000614
Jack Jansen72781191995-08-07 14:34:15 +0000615 for( ; len > 0 ; len--, bin_data++ ) {
616 /* Shift into our buffer, and output any 6bits ready */
617 leftchar = (leftchar << 8) | *bin_data;
618 leftbits += 8;
619 while ( leftbits >= 6 ) {
620 this_ch = (leftchar >> (leftbits-6)) & 0x3f;
621 leftbits -= 6;
622 *ascii_data++ = table_b2a_hqx[this_ch];
623 }
624 }
625 /* Output a possible runt byte */
626 if ( leftbits ) {
627 leftchar <<= (6-leftbits);
628 *ascii_data++ = table_b2a_hqx[leftchar & 0x3f];
629 }
Roger E. Masse5f4ce181997-01-16 17:10:22 +0000630 _PyString_Resize(&rv, (ascii_data -
631 (unsigned char *)PyString_AsString(rv)));
Jack Jansen72781191995-08-07 14:34:15 +0000632 return rv;
633}
634
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000635PyDoc_STRVAR(doc_rledecode_hqx, "Decode hexbin RLE-coded string");
Tim Peters934c1a12002-07-02 22:24:50 +0000636
Jack Jansen72781191995-08-07 14:34:15 +0000637static PyObject *
Peter Schneider-Kampa788a7f2000-07-10 09:57:19 +0000638binascii_rledecode_hqx(PyObject *self, PyObject *args)
Jack Jansen72781191995-08-07 14:34:15 +0000639{
640 unsigned char *in_data, *out_data;
641 unsigned char in_byte, in_repeat;
642 PyObject *rv;
Thomas Woutersf98db652006-03-01 21:37:32 +0000643 Py_ssize_t in_len, out_len, out_len_left;
Jack Jansen72781191995-08-07 14:34:15 +0000644
Guido van Rossum43713e52000-02-29 13:59:29 +0000645 if ( !PyArg_ParseTuple(args, "s#:rledecode_hqx", &in_data, &in_len) )
Jack Jansen72781191995-08-07 14:34:15 +0000646 return NULL;
647
648 /* Empty string is a special case */
649 if ( in_len == 0 )
Georg Brandl2cfaa342006-05-29 19:39:45 +0000650 return PyString_FromString("");
Jack Jansen72781191995-08-07 14:34:15 +0000651
652 /* Allocate a buffer of reasonable size. Resized when needed */
653 out_len = in_len*2;
654 if ( (rv=PyString_FromStringAndSize(NULL, out_len)) == NULL )
655 return NULL;
656 out_len_left = out_len;
657 out_data = (unsigned char *)PyString_AsString(rv);
658
659 /*
660 ** We need two macros here to get/put bytes and handle
661 ** end-of-buffer for input and output strings.
662 */
663#define INBYTE(b) \
Roger E. Masse5f4ce181997-01-16 17:10:22 +0000664 do { \
665 if ( --in_len < 0 ) { \
666 PyErr_SetString(Incomplete, ""); \
667 Py_DECREF(rv); \
668 return NULL; \
669 } \
670 b = *in_data++; \
671 } while(0)
Tim Peters934c1a12002-07-02 22:24:50 +0000672
Jack Jansen72781191995-08-07 14:34:15 +0000673#define OUTBYTE(b) \
Roger E. Masse5f4ce181997-01-16 17:10:22 +0000674 do { \
675 if ( --out_len_left < 0 ) { \
676 _PyString_Resize(&rv, 2*out_len); \
677 if ( rv == NULL ) return NULL; \
678 out_data = (unsigned char *)PyString_AsString(rv) \
679 + out_len; \
680 out_len_left = out_len-1; \
681 out_len = out_len * 2; \
682 } \
683 *out_data++ = b; \
684 } while(0)
Jack Jansen72781191995-08-07 14:34:15 +0000685
Roger E. Masse5f4ce181997-01-16 17:10:22 +0000686 /*
687 ** Handle first byte separately (since we have to get angry
688 ** in case of an orphaned RLE code).
689 */
690 INBYTE(in_byte);
Jack Jansen72781191995-08-07 14:34:15 +0000691
692 if (in_byte == RUNCHAR) {
693 INBYTE(in_repeat);
694 if (in_repeat != 0) {
695 /* Note Error, not Incomplete (which is at the end
696 ** of the string only). This is a programmer error.
697 */
698 PyErr_SetString(Error, "Orphaned RLE code at start");
699 Py_DECREF(rv);
700 return NULL;
701 }
702 OUTBYTE(RUNCHAR);
703 } else {
704 OUTBYTE(in_byte);
705 }
Tim Peters934c1a12002-07-02 22:24:50 +0000706
Jack Jansen72781191995-08-07 14:34:15 +0000707 while( in_len > 0 ) {
708 INBYTE(in_byte);
709
710 if (in_byte == RUNCHAR) {
711 INBYTE(in_repeat);
712 if ( in_repeat == 0 ) {
713 /* Just an escaped RUNCHAR value */
714 OUTBYTE(RUNCHAR);
715 } else {
716 /* Pick up value and output a sequence of it */
717 in_byte = out_data[-1];
718 while ( --in_repeat > 0 )
719 OUTBYTE(in_byte);
720 }
721 } else {
722 /* Normal byte */
723 OUTBYTE(in_byte);
724 }
725 }
Roger E. Masse5f4ce181997-01-16 17:10:22 +0000726 _PyString_Resize(&rv, (out_data -
727 (unsigned char *)PyString_AsString(rv)));
Jack Jansen72781191995-08-07 14:34:15 +0000728 return rv;
729}
730
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000731PyDoc_STRVAR(doc_crc_hqx,
732"(data, oldcrc) -> newcrc. Compute hqx CRC incrementally");
Jack Jansen72781191995-08-07 14:34:15 +0000733
734static PyObject *
Peter Schneider-Kampa788a7f2000-07-10 09:57:19 +0000735binascii_crc_hqx(PyObject *self, PyObject *args)
Jack Jansen72781191995-08-07 14:34:15 +0000736{
737 unsigned char *bin_data;
738 unsigned int crc;
Thomas Woutersf98db652006-03-01 21:37:32 +0000739 Py_ssize_t len;
Tim Peters934c1a12002-07-02 22:24:50 +0000740
Guido van Rossum43713e52000-02-29 13:59:29 +0000741 if ( !PyArg_ParseTuple(args, "s#i:crc_hqx", &bin_data, &len, &crc) )
Jack Jansen72781191995-08-07 14:34:15 +0000742 return NULL;
743
744 while(len--) {
745 crc=((crc<<8)&0xff00)^crctab_hqx[((crc>>8)&0xff)^*bin_data++];
746 }
747
748 return Py_BuildValue("i", crc);
749}
750
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000751PyDoc_STRVAR(doc_crc32,
752"(data, oldcrc = 0) -> newcrc. Compute CRC-32 incrementally");
Guido van Rossum7d47c9e2000-02-16 21:11:52 +0000753
Gregory P. Smith440ca772008-03-24 00:08:01 +0000754#ifdef USE_ZLIB_CRC32
755/* This was taken from zlibmodule.c PyZlib_crc32 (but is PY_SSIZE_T_CLEAN) */
756static PyObject *
757binascii_crc32(PyObject *self, PyObject *args)
758{
Gregory P. Smith1fa588e2008-03-25 07:31:28 +0000759 unsigned int crc32val = 0; /* crc32(0L, Z_NULL, 0) */
Gregory P. Smith440ca772008-03-24 00:08:01 +0000760 Byte *buf;
761 Py_ssize_t len;
762 int signed_val;
763
Gregory P. Smith1fa588e2008-03-25 07:31:28 +0000764 if (!PyArg_ParseTuple(args, "s#|I:crc32", &buf, &len, &crc32val))
Gregory P. Smith440ca772008-03-24 00:08:01 +0000765 return NULL;
766 /* In Python 2.x we return a signed integer regardless of native platform
767 * long size (the 32bit unsigned long is treated as 32-bit signed and sign
768 * extended into a 64-bit long inside the integer object). 3.0 does the
769 * right thing and returns unsigned. http://bugs.python.org/issue1202 */
770 signed_val = crc32(crc32val, buf, len);
771 return PyInt_FromLong(signed_val);
772}
773#else /* USE_ZLIB_CRC32 */
Guido van Rossum7d47c9e2000-02-16 21:11:52 +0000774/* Crc - 32 BIT ANSI X3.66 CRC checksum files
775 Also known as: ISO 3307
776**********************************************************************|
777* *|
778* Demonstration program to compute the 32-bit CRC used as the frame *|
779* check sequence in ADCCP (ANSI X3.66, also known as FIPS PUB 71 *|
780* and FED-STD-1003, the U.S. versions of CCITT's X.25 link-level *|
781* protocol). The 32-bit FCS was added via the Federal Register, *|
782* 1 June 1982, p.23798. I presume but don't know for certain that *|
783* this polynomial is or will be included in CCITT V.41, which *|
784* defines the 16-bit CRC (often called CRC-CCITT) polynomial. FIPS *|
785* PUB 78 says that the 32-bit FCS reduces otherwise undetected *|
786* errors by a factor of 10^-5 over 16-bit FCS. *|
787* *|
788**********************************************************************|
789
790 Copyright (C) 1986 Gary S. Brown. You may use this program, or
791 code or tables extracted from it, as desired without restriction.
Guido van Rossum7d47c9e2000-02-16 21:11:52 +0000792
Tim Peters934c1a12002-07-02 22:24:50 +0000793 First, the polynomial itself and its table of feedback terms. The
794 polynomial is
795 X^32+X^26+X^23+X^22+X^16+X^12+X^11+X^10+X^8+X^7+X^5+X^4+X^2+X^1+X^0
796 Note that we take it "backwards" and put the highest-order term in
797 the lowest-order bit. The X^32 term is "implied"; the LSB is the
798 X^31 term, etc. The X^0 term (usually shown as "+1") results in
799 the MSB being 1.
Guido van Rossum7d47c9e2000-02-16 21:11:52 +0000800
Tim Peters934c1a12002-07-02 22:24:50 +0000801 Note that the usual hardware shift register implementation, which
802 is what we're using (we're merely optimizing it by doing eight-bit
803 chunks at a time) shifts bits into the lowest-order term. In our
804 implementation, that means shifting towards the right. Why do we
805 do it this way? Because the calculated CRC must be transmitted in
806 order from highest-order term to lowest-order term. UARTs transmit
807 characters in order from LSB to MSB. By storing the CRC this way,
808 we hand it to the UART in the order low-byte to high-byte; the UART
809 sends each low-bit to hight-bit; and the result is transmission bit
810 by bit from highest- to lowest-order term without requiring any bit
811 shuffling on our part. Reception works similarly.
812
813 The feedback terms table consists of 256, 32-bit entries. Notes:
814
815 1. The table can be generated at runtime if desired; code to do so
816 is shown later. It might not be obvious, but the feedback
817 terms simply represent the results of eight shift/xor opera-
818 tions for all combinations of data and CRC register values.
819
820 2. The CRC accumulation logic is the same for all CRC polynomials,
821 be they sixteen or thirty-two bits wide. You simply choose the
822 appropriate table. Alternatively, because the table can be
823 generated at runtime, you can start by generating the table for
824 the polynomial in question and use exactly the same "updcrc",
825 if your application needn't simultaneously handle two CRC
826 polynomials. (Note, however, that XMODEM is strange.)
827
828 3. For 16-bit CRCs, the table entries need be only 16 bits wide;
829 of course, 32-bit entries work OK if the high 16 bits are zero.
830
831 4. The values must be right-shifted by eight bits by the "updcrc"
832 logic; the shift must be unsigned (bring in zeroes). On some
833 hardware you could probably optimize the shift in assembler by
834 using byte-swap instructions.
Guido van Rossum7d47c9e2000-02-16 21:11:52 +0000835********************************************************************/
836
Gregory P. Smithaef3e522008-03-25 07:46:07 +0000837static unsigned int crc_32_tab[256] = {
8380x00000000U, 0x77073096U, 0xee0e612cU, 0x990951baU, 0x076dc419U,
8390x706af48fU, 0xe963a535U, 0x9e6495a3U, 0x0edb8832U, 0x79dcb8a4U,
8400xe0d5e91eU, 0x97d2d988U, 0x09b64c2bU, 0x7eb17cbdU, 0xe7b82d07U,
8410x90bf1d91U, 0x1db71064U, 0x6ab020f2U, 0xf3b97148U, 0x84be41deU,
8420x1adad47dU, 0x6ddde4ebU, 0xf4d4b551U, 0x83d385c7U, 0x136c9856U,
8430x646ba8c0U, 0xfd62f97aU, 0x8a65c9ecU, 0x14015c4fU, 0x63066cd9U,
8440xfa0f3d63U, 0x8d080df5U, 0x3b6e20c8U, 0x4c69105eU, 0xd56041e4U,
8450xa2677172U, 0x3c03e4d1U, 0x4b04d447U, 0xd20d85fdU, 0xa50ab56bU,
8460x35b5a8faU, 0x42b2986cU, 0xdbbbc9d6U, 0xacbcf940U, 0x32d86ce3U,
8470x45df5c75U, 0xdcd60dcfU, 0xabd13d59U, 0x26d930acU, 0x51de003aU,
8480xc8d75180U, 0xbfd06116U, 0x21b4f4b5U, 0x56b3c423U, 0xcfba9599U,
8490xb8bda50fU, 0x2802b89eU, 0x5f058808U, 0xc60cd9b2U, 0xb10be924U,
8500x2f6f7c87U, 0x58684c11U, 0xc1611dabU, 0xb6662d3dU, 0x76dc4190U,
8510x01db7106U, 0x98d220bcU, 0xefd5102aU, 0x71b18589U, 0x06b6b51fU,
8520x9fbfe4a5U, 0xe8b8d433U, 0x7807c9a2U, 0x0f00f934U, 0x9609a88eU,
8530xe10e9818U, 0x7f6a0dbbU, 0x086d3d2dU, 0x91646c97U, 0xe6635c01U,
8540x6b6b51f4U, 0x1c6c6162U, 0x856530d8U, 0xf262004eU, 0x6c0695edU,
8550x1b01a57bU, 0x8208f4c1U, 0xf50fc457U, 0x65b0d9c6U, 0x12b7e950U,
8560x8bbeb8eaU, 0xfcb9887cU, 0x62dd1ddfU, 0x15da2d49U, 0x8cd37cf3U,
8570xfbd44c65U, 0x4db26158U, 0x3ab551ceU, 0xa3bc0074U, 0xd4bb30e2U,
8580x4adfa541U, 0x3dd895d7U, 0xa4d1c46dU, 0xd3d6f4fbU, 0x4369e96aU,
8590x346ed9fcU, 0xad678846U, 0xda60b8d0U, 0x44042d73U, 0x33031de5U,
8600xaa0a4c5fU, 0xdd0d7cc9U, 0x5005713cU, 0x270241aaU, 0xbe0b1010U,
8610xc90c2086U, 0x5768b525U, 0x206f85b3U, 0xb966d409U, 0xce61e49fU,
8620x5edef90eU, 0x29d9c998U, 0xb0d09822U, 0xc7d7a8b4U, 0x59b33d17U,
8630x2eb40d81U, 0xb7bd5c3bU, 0xc0ba6cadU, 0xedb88320U, 0x9abfb3b6U,
8640x03b6e20cU, 0x74b1d29aU, 0xead54739U, 0x9dd277afU, 0x04db2615U,
8650x73dc1683U, 0xe3630b12U, 0x94643b84U, 0x0d6d6a3eU, 0x7a6a5aa8U,
8660xe40ecf0bU, 0x9309ff9dU, 0x0a00ae27U, 0x7d079eb1U, 0xf00f9344U,
8670x8708a3d2U, 0x1e01f268U, 0x6906c2feU, 0xf762575dU, 0x806567cbU,
8680x196c3671U, 0x6e6b06e7U, 0xfed41b76U, 0x89d32be0U, 0x10da7a5aU,
8690x67dd4accU, 0xf9b9df6fU, 0x8ebeeff9U, 0x17b7be43U, 0x60b08ed5U,
8700xd6d6a3e8U, 0xa1d1937eU, 0x38d8c2c4U, 0x4fdff252U, 0xd1bb67f1U,
8710xa6bc5767U, 0x3fb506ddU, 0x48b2364bU, 0xd80d2bdaU, 0xaf0a1b4cU,
8720x36034af6U, 0x41047a60U, 0xdf60efc3U, 0xa867df55U, 0x316e8eefU,
8730x4669be79U, 0xcb61b38cU, 0xbc66831aU, 0x256fd2a0U, 0x5268e236U,
8740xcc0c7795U, 0xbb0b4703U, 0x220216b9U, 0x5505262fU, 0xc5ba3bbeU,
8750xb2bd0b28U, 0x2bb45a92U, 0x5cb36a04U, 0xc2d7ffa7U, 0xb5d0cf31U,
8760x2cd99e8bU, 0x5bdeae1dU, 0x9b64c2b0U, 0xec63f226U, 0x756aa39cU,
8770x026d930aU, 0x9c0906a9U, 0xeb0e363fU, 0x72076785U, 0x05005713U,
8780x95bf4a82U, 0xe2b87a14U, 0x7bb12baeU, 0x0cb61b38U, 0x92d28e9bU,
8790xe5d5be0dU, 0x7cdcefb7U, 0x0bdbdf21U, 0x86d3d2d4U, 0xf1d4e242U,
8800x68ddb3f8U, 0x1fda836eU, 0x81be16cdU, 0xf6b9265bU, 0x6fb077e1U,
8810x18b74777U, 0x88085ae6U, 0xff0f6a70U, 0x66063bcaU, 0x11010b5cU,
8820x8f659effU, 0xf862ae69U, 0x616bffd3U, 0x166ccf45U, 0xa00ae278U,
8830xd70dd2eeU, 0x4e048354U, 0x3903b3c2U, 0xa7672661U, 0xd06016f7U,
8840x4969474dU, 0x3e6e77dbU, 0xaed16a4aU, 0xd9d65adcU, 0x40df0b66U,
8850x37d83bf0U, 0xa9bcae53U, 0xdebb9ec5U, 0x47b2cf7fU, 0x30b5ffe9U,
8860xbdbdf21cU, 0xcabac28aU, 0x53b39330U, 0x24b4a3a6U, 0xbad03605U,
8870xcdd70693U, 0x54de5729U, 0x23d967bfU, 0xb3667a2eU, 0xc4614ab8U,
8880x5d681b02U, 0x2a6f2b94U, 0xb40bbe37U, 0xc30c8ea1U, 0x5a05df1bU,
8890x2d02ef8dU
Guido van Rossum7d47c9e2000-02-16 21:11:52 +0000890};
891
892static PyObject *
Peter Schneider-Kampa788a7f2000-07-10 09:57:19 +0000893binascii_crc32(PyObject *self, PyObject *args)
Guido van Rossum7d47c9e2000-02-16 21:11:52 +0000894{ /* By Jim Ahlstrom; All rights transferred to CNRI */
895 unsigned char *bin_data;
Gregory P. Smithaef3e522008-03-25 07:46:07 +0000896 unsigned int crc = 0U; /* initial value of CRC */
Thomas Woutersf98db652006-03-01 21:37:32 +0000897 Py_ssize_t len;
Gregory P. Smithaef3e522008-03-25 07:46:07 +0000898 int result;
Tim Peters934c1a12002-07-02 22:24:50 +0000899
Gregory P. Smithaef3e522008-03-25 07:46:07 +0000900 if ( !PyArg_ParseTuple(args, "s#|I:crc32", &bin_data, &len, &crc) )
Guido van Rossum7d47c9e2000-02-16 21:11:52 +0000901 return NULL;
902
Tim Peters934c1a12002-07-02 22:24:50 +0000903 crc = ~ crc;
Tim Peters934c1a12002-07-02 22:24:50 +0000904 while (len--)
Gregory P. Smithaef3e522008-03-25 07:46:07 +0000905 crc = crc_32_tab[(crc ^ *bin_data++) & 0xffU] ^ (crc >> 8);
Guido van Rossum7d47c9e2000-02-16 21:11:52 +0000906 /* Note: (crc >> 8) MUST zero fill on left */
Tim Petersa98011c2002-07-02 20:20:08 +0000907
Gregory P. Smithaef3e522008-03-25 07:46:07 +0000908 result = (int)(crc ^ 0xFFFFFFFFU);
Tim Petersa98011c2002-07-02 20:20:08 +0000909 return PyInt_FromLong(result);
Guido van Rossum7d47c9e2000-02-16 21:11:52 +0000910}
Gregory P. Smith440ca772008-03-24 00:08:01 +0000911#endif /* USE_ZLIB_CRC32 */
Guido van Rossum7d47c9e2000-02-16 21:11:52 +0000912
Barry Warsawe977c212000-08-15 06:07:13 +0000913
914static PyObject *
915binascii_hexlify(PyObject *self, PyObject *args)
916{
917 char* argbuf;
Thomas Woutersf98db652006-03-01 21:37:32 +0000918 Py_ssize_t arglen;
Barry Warsawe977c212000-08-15 06:07:13 +0000919 PyObject *retval;
920 char* retbuf;
Thomas Woutersf98db652006-03-01 21:37:32 +0000921 Py_ssize_t i, j;
Barry Warsawe977c212000-08-15 06:07:13 +0000922
Brett Cannon6ee7d012006-06-08 16:23:04 +0000923 if (!PyArg_ParseTuple(args, "s#:b2a_hex", &argbuf, &arglen))
Barry Warsawe977c212000-08-15 06:07:13 +0000924 return NULL;
925
926 retval = PyString_FromStringAndSize(NULL, arglen*2);
927 if (!retval)
928 return NULL;
929 retbuf = PyString_AsString(retval);
930 if (!retbuf)
931 goto finally;
932
933 /* make hex version of string, taken from shamodule.c */
934 for (i=j=0; i < arglen; i++) {
935 char c;
936 c = (argbuf[i] >> 4) & 0xf;
937 c = (c>9) ? c+'a'-10 : c + '0';
938 retbuf[j++] = c;
939 c = argbuf[i] & 0xf;
940 c = (c>9) ? c+'a'-10 : c + '0';
941 retbuf[j++] = c;
942 }
943 return retval;
944
945 finally:
946 Py_DECREF(retval);
947 return NULL;
948}
949
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000950PyDoc_STRVAR(doc_hexlify,
Barry Warsawe977c212000-08-15 06:07:13 +0000951"b2a_hex(data) -> s; Hexadecimal representation of binary data.\n\
952\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000953This function is also available as \"hexlify()\".");
Barry Warsawe977c212000-08-15 06:07:13 +0000954
955
956static int
Tim Peters934c1a12002-07-02 22:24:50 +0000957to_int(int c)
Barry Warsawe977c212000-08-15 06:07:13 +0000958{
959 if (isdigit(c))
960 return c - '0';
961 else {
962 if (isupper(c))
963 c = tolower(c);
964 if (c >= 'a' && c <= 'f')
965 return c - 'a' + 10;
966 }
967 return -1;
968}
969
970
971static PyObject *
972binascii_unhexlify(PyObject *self, PyObject *args)
973{
974 char* argbuf;
Thomas Woutersf98db652006-03-01 21:37:32 +0000975 Py_ssize_t arglen;
Barry Warsawe977c212000-08-15 06:07:13 +0000976 PyObject *retval;
977 char* retbuf;
Thomas Woutersf98db652006-03-01 21:37:32 +0000978 Py_ssize_t i, j;
Barry Warsawe977c212000-08-15 06:07:13 +0000979
980 if (!PyArg_ParseTuple(args, "s#:a2b_hex", &argbuf, &arglen))
981 return NULL;
982
Barry Warsaw16168472000-08-15 06:59:58 +0000983 /* XXX What should we do about strings with an odd length? Should
984 * we add an implicit leading zero, or a trailing zero? For now,
985 * raise an exception.
Barry Warsawe977c212000-08-15 06:07:13 +0000986 */
987 if (arglen % 2) {
Barry Warsaw16168472000-08-15 06:59:58 +0000988 PyErr_SetString(PyExc_TypeError, "Odd-length string");
Barry Warsawe977c212000-08-15 06:07:13 +0000989 return NULL;
990 }
991
992 retval = PyString_FromStringAndSize(NULL, (arglen/2));
993 if (!retval)
994 return NULL;
995 retbuf = PyString_AsString(retval);
996 if (!retbuf)
997 goto finally;
998
999 for (i=j=0; i < arglen; i += 2) {
1000 int top = to_int(Py_CHARMASK(argbuf[i]));
1001 int bot = to_int(Py_CHARMASK(argbuf[i+1]));
1002 if (top == -1 || bot == -1) {
1003 PyErr_SetString(PyExc_TypeError,
Barry Warsaw16168472000-08-15 06:59:58 +00001004 "Non-hexadecimal digit found");
Barry Warsawe977c212000-08-15 06:07:13 +00001005 goto finally;
1006 }
1007 retbuf[j++] = (top << 4) + bot;
1008 }
1009 return retval;
1010
1011 finally:
1012 Py_DECREF(retval);
1013 return NULL;
1014}
1015
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001016PyDoc_STRVAR(doc_unhexlify,
Barry Warsawe977c212000-08-15 06:07:13 +00001017"a2b_hex(hexstr) -> s; Binary data of hexadecimal representation.\n\
1018\n\
1019hexstr must contain an even number of hex digits (upper or lower case).\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001020This function is also available as \"unhexlify()\"");
Barry Warsawe977c212000-08-15 06:07:13 +00001021
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001022static int table_hex[128] = {
1023 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1024 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1025 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1026 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,-1,-1, -1,-1,-1,-1,
1027 -1,10,11,12, 13,14,15,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1028 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1029 -1,10,11,12, 13,14,15,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1030 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1
1031};
1032
1033#define hexval(c) table_hex[(unsigned int)(c)]
1034
1035#define MAXLINESIZE 76
1036
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001037PyDoc_STRVAR(doc_a2b_qp, "Decode a string of qp-encoded data");
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001038
Tim Peters934c1a12002-07-02 22:24:50 +00001039static PyObject*
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001040binascii_a2b_qp(PyObject *self, PyObject *args, PyObject *kwargs)
1041{
Thomas Wouters7087f782006-03-01 23:10:05 +00001042 Py_ssize_t in, out;
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001043 char ch;
1044 unsigned char *data, *odata;
Thomas Woutersf98db652006-03-01 21:37:32 +00001045 Py_ssize_t datalen = 0;
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001046 PyObject *rv;
Martin v. Löwis15e62742006-02-27 16:46:16 +00001047 static char *kwlist[] = {"data", "header", NULL};
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001048 int header = 0;
1049
Tim Peters934c1a12002-07-02 22:24:50 +00001050 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s#|i", kwlist, &data,
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001051 &datalen, &header))
1052 return NULL;
1053
Barry Warsaw23164a52004-05-11 02:05:11 +00001054 /* We allocate the output same size as input, this is overkill.
1055 * The previous implementation used calloc() so we'll zero out the
1056 * memory here too, since PyMem_Malloc() does not guarantee that.
1057 */
1058 odata = (unsigned char *) PyMem_Malloc(datalen);
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001059 if (odata == NULL) {
1060 PyErr_NoMemory();
1061 return NULL;
1062 }
Martin v. Löwisb600fe92004-07-27 15:03:53 +00001063 memset(odata, 0, datalen);
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001064
1065 in = out = 0;
1066 while (in < datalen) {
1067 if (data[in] == '=') {
1068 in++;
1069 if (in >= datalen) break;
1070 /* Soft line breaks */
Georg Brandl25aabf42006-11-16 17:08:45 +00001071 if ((data[in] == '\n') || (data[in] == '\r')) {
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001072 if (data[in] != '\n') {
1073 while (in < datalen && data[in] != '\n') in++;
1074 }
1075 if (in < datalen) in++;
1076 }
1077 else if (data[in] == '=') {
1078 /* broken case from broken python qp */
1079 odata[out++] = '=';
1080 in++;
1081 }
Tim Peters934c1a12002-07-02 22:24:50 +00001082 else if (((data[in] >= 'A' && data[in] <= 'F') ||
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001083 (data[in] >= 'a' && data[in] <= 'f') ||
1084 (data[in] >= '0' && data[in] <= '9')) &&
1085 ((data[in+1] >= 'A' && data[in+1] <= 'F') ||
1086 (data[in+1] >= 'a' && data[in+1] <= 'f') ||
1087 (data[in+1] >= '0' && data[in+1] <= '9'))) {
1088 /* hexval */
1089 ch = hexval(data[in]) << 4;
1090 in++;
1091 ch |= hexval(data[in]);
1092 in++;
1093 odata[out++] = ch;
1094 }
1095 else {
1096 odata[out++] = '=';
1097 }
1098 }
1099 else if (header && data[in] == '_') {
1100 odata[out++] = ' ';
1101 in++;
1102 }
1103 else {
1104 odata[out] = data[in];
1105 in++;
1106 out++;
1107 }
1108 }
Greg Warda645b302001-10-04 14:54:53 +00001109 if ((rv = PyString_FromStringAndSize((char *)odata, out)) == NULL) {
Barry Warsaw23164a52004-05-11 02:05:11 +00001110 PyMem_Free(odata);
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001111 return NULL;
1112 }
Barry Warsaw23164a52004-05-11 02:05:11 +00001113 PyMem_Free(odata);
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001114 return rv;
1115}
1116
Tim Peters934c1a12002-07-02 22:24:50 +00001117static int
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001118to_hex (unsigned char ch, unsigned char *s)
1119{
1120 unsigned int uvalue = ch;
1121
1122 s[1] = "0123456789ABCDEF"[uvalue % 16];
1123 uvalue = (uvalue / 16);
1124 s[0] = "0123456789ABCDEF"[uvalue % 16];
1125 return 0;
1126}
1127
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001128PyDoc_STRVAR(doc_b2a_qp,
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001129"b2a_qp(data, quotetabs=0, istext=1, header=0) -> s; \n\
1130 Encode a string using quoted-printable encoding. \n\
1131\n\
1132On encoding, when istext is set, newlines are not encoded, and white \n\
1133space at end of lines is. When istext is not set, \\r and \\n (CR/LF) are \n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001134both encoded. When quotetabs is set, space and tabs are encoded.");
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001135
1136/* XXX: This is ridiculously complicated to be backward compatible
1137 * (mostly) with the quopri module. It doesn't re-create the quopri
1138 * module bug where text ending in CRLF has the CR encoded */
Tim Peters934c1a12002-07-02 22:24:50 +00001139static PyObject*
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001140binascii_b2a_qp (PyObject *self, PyObject *args, PyObject *kwargs)
1141{
Thomas Wouters7087f782006-03-01 23:10:05 +00001142 Py_ssize_t in, out;
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001143 unsigned char *data, *odata;
Thomas Woutersf98db652006-03-01 21:37:32 +00001144 Py_ssize_t datalen = 0, odatalen = 0;
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001145 PyObject *rv;
1146 unsigned int linelen = 0;
Martin v. Löwis15e62742006-02-27 16:46:16 +00001147 static char *kwlist[] = {"data", "quotetabs", "istext",
Jeremy Hyltonaf68c872005-12-10 18:50:16 +00001148 "header", NULL};
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001149 int istext = 1;
1150 int quotetabs = 0;
1151 int header = 0;
1152 unsigned char ch;
1153 int crlf = 0;
1154 unsigned char *p;
1155
Tim Peters934c1a12002-07-02 22:24:50 +00001156 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s#|iii", kwlist, &data,
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001157 &datalen, &quotetabs, &istext, &header))
1158 return NULL;
1159
1160 /* See if this string is using CRLF line ends */
1161 /* XXX: this function has the side effect of converting all of
1162 * the end of lines to be the same depending on this detection
1163 * here */
Walter Dörwald3ebc45d2007-05-09 18:10:47 +00001164 p = (unsigned char *) memchr(data, '\n', datalen);
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001165 if ((p != NULL) && (p > data) && (*(p-1) == '\r'))
1166 crlf = 1;
1167
1168 /* First, scan to see how many characters need to be encoded */
1169 in = 0;
1170 while (in < datalen) {
Tim Peters934c1a12002-07-02 22:24:50 +00001171 if ((data[in] > 126) ||
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001172 (data[in] == '=') ||
1173 (header && data[in] == '_') ||
Georg Brandl4aef7272007-03-13 22:49:43 +00001174 ((data[in] == '.') && (linelen == 0) &&
1175 (data[in+1] == '\n' || data[in+1] == '\r' || data[in+1] == 0)) ||
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001176 (!istext && ((data[in] == '\r') || (data[in] == '\n'))) ||
1177 ((data[in] == '\t' || data[in] == ' ') && (in + 1 == datalen)) ||
Tim Peters934c1a12002-07-02 22:24:50 +00001178 ((data[in] < 33) &&
1179 (data[in] != '\r') && (data[in] != '\n') &&
Georg Brandl4aef7272007-03-13 22:49:43 +00001180 (quotetabs ||
1181 (!quotetabs && ((data[in] != '\t') && (data[in] != ' '))))))
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001182 {
1183 if ((linelen + 3) >= MAXLINESIZE) {
1184 linelen = 0;
1185 if (crlf)
1186 odatalen += 3;
1187 else
1188 odatalen += 2;
1189 }
1190 linelen += 3;
1191 odatalen += 3;
1192 in++;
1193 }
1194 else {
Tim Peters934c1a12002-07-02 22:24:50 +00001195 if (istext &&
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001196 ((data[in] == '\n') ||
1197 ((in+1 < datalen) && (data[in] == '\r') &&
1198 (data[in+1] == '\n'))))
1199 {
1200 linelen = 0;
1201 /* Protect against whitespace on end of line */
1202 if (in && ((data[in-1] == ' ') || (data[in-1] == '\t')))
1203 odatalen += 2;
1204 if (crlf)
1205 odatalen += 2;
1206 else
1207 odatalen += 1;
1208 if (data[in] == '\r')
1209 in += 2;
1210 else
1211 in++;
1212 }
1213 else {
Tim Peters934c1a12002-07-02 22:24:50 +00001214 if ((in + 1 != datalen) &&
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001215 (data[in+1] != '\n') &&
1216 (linelen + 1) >= MAXLINESIZE) {
1217 linelen = 0;
1218 if (crlf)
1219 odatalen += 3;
1220 else
1221 odatalen += 2;
1222 }
1223 linelen++;
1224 odatalen++;
1225 in++;
1226 }
1227 }
1228 }
1229
Barry Warsaw23164a52004-05-11 02:05:11 +00001230 /* We allocate the output same size as input, this is overkill.
1231 * The previous implementation used calloc() so we'll zero out the
1232 * memory here too, since PyMem_Malloc() does not guarantee that.
1233 */
1234 odata = (unsigned char *) PyMem_Malloc(odatalen);
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001235 if (odata == NULL) {
1236 PyErr_NoMemory();
1237 return NULL;
1238 }
Martin v. Löwisb600fe92004-07-27 15:03:53 +00001239 memset(odata, 0, odatalen);
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001240
1241 in = out = linelen = 0;
1242 while (in < datalen) {
Tim Peters934c1a12002-07-02 22:24:50 +00001243 if ((data[in] > 126) ||
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001244 (data[in] == '=') ||
1245 (header && data[in] == '_') ||
Georg Brandl4aef7272007-03-13 22:49:43 +00001246 ((data[in] == '.') && (linelen == 0) &&
1247 (data[in+1] == '\n' || data[in+1] == '\r' || data[in+1] == 0)) ||
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001248 (!istext && ((data[in] == '\r') || (data[in] == '\n'))) ||
1249 ((data[in] == '\t' || data[in] == ' ') && (in + 1 == datalen)) ||
Tim Peters934c1a12002-07-02 22:24:50 +00001250 ((data[in] < 33) &&
1251 (data[in] != '\r') && (data[in] != '\n') &&
Georg Brandl4aef7272007-03-13 22:49:43 +00001252 (quotetabs ||
1253 (!quotetabs && ((data[in] != '\t') && (data[in] != ' '))))))
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001254 {
1255 if ((linelen + 3 )>= MAXLINESIZE) {
1256 odata[out++] = '=';
1257 if (crlf) odata[out++] = '\r';
1258 odata[out++] = '\n';
1259 linelen = 0;
1260 }
1261 odata[out++] = '=';
1262 to_hex(data[in], &odata[out]);
1263 out += 2;
1264 in++;
1265 linelen += 3;
1266 }
1267 else {
Tim Peters934c1a12002-07-02 22:24:50 +00001268 if (istext &&
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001269 ((data[in] == '\n') ||
1270 ((in+1 < datalen) && (data[in] == '\r') &&
1271 (data[in+1] == '\n'))))
1272 {
1273 linelen = 0;
1274 /* Protect against whitespace on end of line */
1275 if (out && ((odata[out-1] == ' ') || (odata[out-1] == '\t'))) {
1276 ch = odata[out-1];
1277 odata[out-1] = '=';
1278 to_hex(ch, &odata[out]);
1279 out += 2;
1280 }
Tim Peters934c1a12002-07-02 22:24:50 +00001281
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001282 if (crlf) odata[out++] = '\r';
1283 odata[out++] = '\n';
1284 if (data[in] == '\r')
1285 in += 2;
1286 else
1287 in++;
1288 }
1289 else {
Tim Peters934c1a12002-07-02 22:24:50 +00001290 if ((in + 1 != datalen) &&
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001291 (data[in+1] != '\n') &&
1292 (linelen + 1) >= MAXLINESIZE) {
1293 odata[out++] = '=';
1294 if (crlf) odata[out++] = '\r';
1295 odata[out++] = '\n';
1296 linelen = 0;
1297 }
1298 linelen++;
1299 if (header && data[in] == ' ') {
1300 odata[out++] = '_';
1301 in++;
1302 }
1303 else {
1304 odata[out++] = data[in++];
1305 }
1306 }
1307 }
1308 }
Greg Warda645b302001-10-04 14:54:53 +00001309 if ((rv = PyString_FromStringAndSize((char *)odata, out)) == NULL) {
Barry Warsaw23164a52004-05-11 02:05:11 +00001310 PyMem_Free(odata);
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001311 return NULL;
1312 }
Barry Warsaw23164a52004-05-11 02:05:11 +00001313 PyMem_Free(odata);
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001314 return rv;
1315}
Barry Warsawe977c212000-08-15 06:07:13 +00001316
Jack Jansen72781191995-08-07 14:34:15 +00001317/* List of functions defined in the module */
1318
1319static struct PyMethodDef binascii_module_methods[] = {
Barry Warsawe977c212000-08-15 06:07:13 +00001320 {"a2b_uu", binascii_a2b_uu, METH_VARARGS, doc_a2b_uu},
1321 {"b2a_uu", binascii_b2a_uu, METH_VARARGS, doc_b2a_uu},
1322 {"a2b_base64", binascii_a2b_base64, METH_VARARGS, doc_a2b_base64},
1323 {"b2a_base64", binascii_b2a_base64, METH_VARARGS, doc_b2a_base64},
1324 {"a2b_hqx", binascii_a2b_hqx, METH_VARARGS, doc_a2b_hqx},
1325 {"b2a_hqx", binascii_b2a_hqx, METH_VARARGS, doc_b2a_hqx},
1326 {"b2a_hex", binascii_hexlify, METH_VARARGS, doc_hexlify},
1327 {"a2b_hex", binascii_unhexlify, METH_VARARGS, doc_unhexlify},
1328 {"hexlify", binascii_hexlify, METH_VARARGS, doc_hexlify},
1329 {"unhexlify", binascii_unhexlify, METH_VARARGS, doc_unhexlify},
1330 {"rlecode_hqx", binascii_rlecode_hqx, METH_VARARGS, doc_rlecode_hqx},
1331 {"rledecode_hqx", binascii_rledecode_hqx, METH_VARARGS,
1332 doc_rledecode_hqx},
1333 {"crc_hqx", binascii_crc_hqx, METH_VARARGS, doc_crc_hqx},
1334 {"crc32", binascii_crc32, METH_VARARGS, doc_crc32},
Tim Peters934c1a12002-07-02 22:24:50 +00001335 {"a2b_qp", (PyCFunction)binascii_a2b_qp, METH_VARARGS | METH_KEYWORDS,
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001336 doc_a2b_qp},
Tim Peters934c1a12002-07-02 22:24:50 +00001337 {"b2a_qp", (PyCFunction)binascii_b2a_qp, METH_VARARGS | METH_KEYWORDS,
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001338 doc_b2a_qp},
Barry Warsawe977c212000-08-15 06:07:13 +00001339 {NULL, NULL} /* sentinel */
Jack Jansen72781191995-08-07 14:34:15 +00001340};
1341
1342
1343/* Initialization function for the module (*must* be called initbinascii) */
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001344PyDoc_STRVAR(doc_binascii, "Conversion between binary data and ASCII");
Jack Jansen72781191995-08-07 14:34:15 +00001345
Mark Hammondfe51c6d2002-08-02 02:27:13 +00001346PyMODINIT_FUNC
Thomas Woutersf3f33dc2000-07-21 06:00:07 +00001347initbinascii(void)
Jack Jansen72781191995-08-07 14:34:15 +00001348{
1349 PyObject *m, *d, *x;
1350
1351 /* Create the module and add the functions */
1352 m = Py_InitModule("binascii", binascii_module_methods);
Neal Norwitz1ac754f2006-01-19 06:09:39 +00001353 if (m == NULL)
1354 return;
Jack Jansen72781191995-08-07 14:34:15 +00001355
1356 d = PyModule_GetDict(m);
1357 x = PyString_FromString(doc_binascii);
1358 PyDict_SetItemString(d, "__doc__", x);
Guido van Rossum5c159bd1997-08-04 23:55:25 +00001359 Py_XDECREF(x);
Jack Jansen72781191995-08-07 14:34:15 +00001360
Guido van Rossum7dbb48a1997-10-08 15:26:07 +00001361 Error = PyErr_NewException("binascii.Error", NULL, NULL);
Jack Jansen72781191995-08-07 14:34:15 +00001362 PyDict_SetItemString(d, "Error", Error);
Guido van Rossum7dbb48a1997-10-08 15:26:07 +00001363 Incomplete = PyErr_NewException("binascii.Incomplete", NULL, NULL);
Jack Jansen72781191995-08-07 14:34:15 +00001364 PyDict_SetItemString(d, "Incomplete", Incomplete);
Jack Jansen72781191995-08-07 14:34:15 +00001365}