blob: fa221466681369f3409f680ed30a813f37d443a3 [file] [log] [blame]
Jack Jansen72781191995-08-07 14:34:15 +00001/*
2** Routines to represent binary data in ASCII and vice-versa
3**
4** This module currently supports the following encodings:
5** uuencode:
6** each line encodes 45 bytes (except possibly the last)
7** First char encodes (binary) length, rest data
8** each char encodes 6 bits, as follows:
9** binary: 01234567 abcdefgh ijklmnop
10** ascii: 012345 67abcd efghij klmnop
11** ASCII encoding method is "excess-space": 000000 is encoded as ' ', etc.
12** short binary data is zero-extended (so the bits are always in the
13** right place), this does *not* reflect in the length.
Jack Jansen84bbc2e1995-10-04 16:38:44 +000014** base64:
15** Line breaks are insignificant, but lines are at most 76 chars
16** each char encodes 6 bits, in similar order as uucode/hqx. Encoding
17** is done via a table.
18** Short binary data is filled (in ASCII) with '='.
Jack Jansen72781191995-08-07 14:34:15 +000019** hqx:
20** File starts with introductory text, real data starts and ends
21** with colons.
22** Data consists of three similar parts: info, datafork, resourcefork.
23** Each part is protected (at the end) with a 16-bit crc
24** The binary data is run-length encoded, and then ascii-fied:
25** binary: 01234567 abcdefgh ijklmnop
26** ascii: 012345 67abcd efghij klmnop
27** ASCII encoding is table-driven, see the code.
28** Short binary data results in the runt ascii-byte being output with
29** the bits in the right place.
30**
31** While I was reading dozens of programs that encode or decode the formats
32** here (documentation? hihi:-) I have formulated Jansen's Observation:
33**
34** Programs that encode binary data in ASCII are written in
35** such a style that they are as unreadable as possible. Devices used
36** include unnecessary global variables, burying important tables
37** in unrelated sourcefiles, putting functions in include files,
38** using seemingly-descriptive variable names for different purposes,
39** calls to empty subroutines and a host of others.
40**
41** I have attempted to break with this tradition, but I guess that that
42** does make the performance sub-optimal. Oh well, too bad...
43**
44** Jack Jansen, CWI, July 1995.
Tim Peters934c1a12002-07-02 22:24:50 +000045**
Martin v. Löwis16dc7f42001-09-30 20:32:11 +000046** Added support for quoted-printable encoding, based on rfc 1521 et al
Tim Peters934c1a12002-07-02 22:24:50 +000047** quoted-printable encoding specifies that non printable characters (anything
Martin v. Löwis16dc7f42001-09-30 20:32:11 +000048** below 32 and above 126) be encoded as =XX where XX is the hexadecimal value
49** of the character. It also specifies some other behavior to enable 8bit data
Tim Peters934c1a12002-07-02 22:24:50 +000050** in a mail message with little difficulty (maximum line sizes, protecting
51** some cases of whitespace, etc).
Martin v. Löwis16dc7f42001-09-30 20:32:11 +000052**
53** Brandon Long, September 2001.
Jack Jansen72781191995-08-07 14:34:15 +000054*/
55
Thomas Wouters9c544482006-03-01 21:59:44 +000056#define PY_SSIZE_T_CLEAN
Jack Jansen72781191995-08-07 14:34:15 +000057
58#include "Python.h"
59
60static PyObject *Error;
61static PyObject *Incomplete;
62
63/*
64** hqx lookup table, ascii->binary.
65*/
66
67#define RUNCHAR 0x90
68
69#define DONE 0x7F
70#define SKIP 0x7E
71#define FAIL 0x7D
72
73static unsigned char table_a2b_hqx[256] = {
74/* ^@ ^A ^B ^C ^D ^E ^F ^G */
75/* 0*/ FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
76/* \b \t \n ^K ^L \r ^N ^O */
77/* 1*/ FAIL, FAIL, SKIP, FAIL, FAIL, SKIP, FAIL, FAIL,
78/* ^P ^Q ^R ^S ^T ^U ^V ^W */
79/* 2*/ FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
80/* ^X ^Y ^Z ^[ ^\ ^] ^^ ^_ */
81/* 3*/ FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
82/* ! " # $ % & ' */
83/* 4*/ FAIL, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
84/* ( ) * + , - . / */
85/* 5*/ 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, FAIL, FAIL,
86/* 0 1 2 3 4 5 6 7 */
87/* 6*/ 0x0D, 0x0E, 0x0F, 0x10, 0x11, 0x12, 0x13, FAIL,
88/* 8 9 : ; < = > ? */
89/* 7*/ 0x14, 0x15, DONE, FAIL, FAIL, FAIL, FAIL, FAIL,
90/* @ A B C D E F G */
91/* 8*/ 0x16, 0x17, 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D,
92/* H I J K L M N O */
93/* 9*/ 0x1E, 0x1F, 0x20, 0x21, 0x22, 0x23, 0x24, FAIL,
94/* P Q R S T U V W */
95/*10*/ 0x25, 0x26, 0x27, 0x28, 0x29, 0x2A, 0x2B, FAIL,
96/* X Y Z [ \ ] ^ _ */
97/*11*/ 0x2C, 0x2D, 0x2E, 0x2F, FAIL, FAIL, FAIL, FAIL,
98/* ` a b c d e f g */
99/*12*/ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, FAIL,
100/* h i j k l m n o */
101/*13*/ 0x37, 0x38, 0x39, 0x3A, 0x3B, 0x3C, FAIL, FAIL,
102/* p q r s t u v w */
103/*14*/ 0x3D, 0x3E, 0x3F, FAIL, FAIL, FAIL, FAIL, FAIL,
104/* x y z { | } ~ ^? */
105/*15*/ FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
106/*16*/ FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
107 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
108 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
109 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
110 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
111 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
112 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
113 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
114 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
115 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
116 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
117 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
118 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
119 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
120 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
121 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
122};
123
124static unsigned char table_b2a_hqx[] =
Roger E. Masse5f4ce181997-01-16 17:10:22 +0000125"!\"#$%&'()*+,-012345689@ABCDEFGHIJKLMNPQRSTUVXYZ[`abcdefhijklmpqr";
Jack Jansen72781191995-08-07 14:34:15 +0000126
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000127static char table_a2b_base64[] = {
128 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
129 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
130 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,62, -1,-1,-1,63,
131 52,53,54,55, 56,57,58,59, 60,61,-1,-1, -1, 0,-1,-1, /* Note PAD->0 */
132 -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10, 11,12,13,14,
133 15,16,17,18, 19,20,21,22, 23,24,25,-1, -1,-1,-1,-1,
134 -1,26,27,28, 29,30,31,32, 33,34,35,36, 37,38,39,40,
135 41,42,43,44, 45,46,47,48, 49,50,51,-1, -1,-1,-1,-1
136};
137
138#define BASE64_PAD '='
Guido van Rossum355bc0c2001-10-30 03:00:52 +0000139
140/* Max binary chunk size; limited only by available memory */
Martin v. Löwis73c01d42008-02-14 11:26:18 +0000141#define BASE64_MAXBIN (PY_SSIZE_T_MAX/2 - sizeof(PyStringObject) - 3)
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000142
143static unsigned char table_b2a_base64[] =
Roger E. Masse5f4ce181997-01-16 17:10:22 +0000144"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000145
146
147
Jack Jansen72781191995-08-07 14:34:15 +0000148static unsigned short crctab_hqx[256] = {
Roger E. Masse5f4ce181997-01-16 17:10:22 +0000149 0x0000, 0x1021, 0x2042, 0x3063, 0x4084, 0x50a5, 0x60c6, 0x70e7,
150 0x8108, 0x9129, 0xa14a, 0xb16b, 0xc18c, 0xd1ad, 0xe1ce, 0xf1ef,
151 0x1231, 0x0210, 0x3273, 0x2252, 0x52b5, 0x4294, 0x72f7, 0x62d6,
152 0x9339, 0x8318, 0xb37b, 0xa35a, 0xd3bd, 0xc39c, 0xf3ff, 0xe3de,
153 0x2462, 0x3443, 0x0420, 0x1401, 0x64e6, 0x74c7, 0x44a4, 0x5485,
154 0xa56a, 0xb54b, 0x8528, 0x9509, 0xe5ee, 0xf5cf, 0xc5ac, 0xd58d,
155 0x3653, 0x2672, 0x1611, 0x0630, 0x76d7, 0x66f6, 0x5695, 0x46b4,
156 0xb75b, 0xa77a, 0x9719, 0x8738, 0xf7df, 0xe7fe, 0xd79d, 0xc7bc,
157 0x48c4, 0x58e5, 0x6886, 0x78a7, 0x0840, 0x1861, 0x2802, 0x3823,
158 0xc9cc, 0xd9ed, 0xe98e, 0xf9af, 0x8948, 0x9969, 0xa90a, 0xb92b,
159 0x5af5, 0x4ad4, 0x7ab7, 0x6a96, 0x1a71, 0x0a50, 0x3a33, 0x2a12,
160 0xdbfd, 0xcbdc, 0xfbbf, 0xeb9e, 0x9b79, 0x8b58, 0xbb3b, 0xab1a,
161 0x6ca6, 0x7c87, 0x4ce4, 0x5cc5, 0x2c22, 0x3c03, 0x0c60, 0x1c41,
162 0xedae, 0xfd8f, 0xcdec, 0xddcd, 0xad2a, 0xbd0b, 0x8d68, 0x9d49,
163 0x7e97, 0x6eb6, 0x5ed5, 0x4ef4, 0x3e13, 0x2e32, 0x1e51, 0x0e70,
164 0xff9f, 0xefbe, 0xdfdd, 0xcffc, 0xbf1b, 0xaf3a, 0x9f59, 0x8f78,
165 0x9188, 0x81a9, 0xb1ca, 0xa1eb, 0xd10c, 0xc12d, 0xf14e, 0xe16f,
166 0x1080, 0x00a1, 0x30c2, 0x20e3, 0x5004, 0x4025, 0x7046, 0x6067,
167 0x83b9, 0x9398, 0xa3fb, 0xb3da, 0xc33d, 0xd31c, 0xe37f, 0xf35e,
168 0x02b1, 0x1290, 0x22f3, 0x32d2, 0x4235, 0x5214, 0x6277, 0x7256,
169 0xb5ea, 0xa5cb, 0x95a8, 0x8589, 0xf56e, 0xe54f, 0xd52c, 0xc50d,
170 0x34e2, 0x24c3, 0x14a0, 0x0481, 0x7466, 0x6447, 0x5424, 0x4405,
171 0xa7db, 0xb7fa, 0x8799, 0x97b8, 0xe75f, 0xf77e, 0xc71d, 0xd73c,
172 0x26d3, 0x36f2, 0x0691, 0x16b0, 0x6657, 0x7676, 0x4615, 0x5634,
173 0xd94c, 0xc96d, 0xf90e, 0xe92f, 0x99c8, 0x89e9, 0xb98a, 0xa9ab,
174 0x5844, 0x4865, 0x7806, 0x6827, 0x18c0, 0x08e1, 0x3882, 0x28a3,
175 0xcb7d, 0xdb5c, 0xeb3f, 0xfb1e, 0x8bf9, 0x9bd8, 0xabbb, 0xbb9a,
176 0x4a75, 0x5a54, 0x6a37, 0x7a16, 0x0af1, 0x1ad0, 0x2ab3, 0x3a92,
177 0xfd2e, 0xed0f, 0xdd6c, 0xcd4d, 0xbdaa, 0xad8b, 0x9de8, 0x8dc9,
178 0x7c26, 0x6c07, 0x5c64, 0x4c45, 0x3ca2, 0x2c83, 0x1ce0, 0x0cc1,
179 0xef1f, 0xff3e, 0xcf5d, 0xdf7c, 0xaf9b, 0xbfba, 0x8fd9, 0x9ff8,
180 0x6e17, 0x7e36, 0x4e55, 0x5e74, 0x2e93, 0x3eb2, 0x0ed1, 0x1ef0,
Jack Jansen72781191995-08-07 14:34:15 +0000181};
182
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000183PyDoc_STRVAR(doc_a2b_uu, "(ascii) -> bin. Decode a line of uuencoded data");
Jack Jansen72781191995-08-07 14:34:15 +0000184
185static PyObject *
Peter Schneider-Kampa788a7f2000-07-10 09:57:19 +0000186binascii_a2b_uu(PyObject *self, PyObject *args)
Jack Jansen72781191995-08-07 14:34:15 +0000187{
188 unsigned char *ascii_data, *bin_data;
189 int leftbits = 0;
190 unsigned char this_ch;
191 unsigned int leftchar = 0;
192 PyObject *rv;
Thomas Woutersf98db652006-03-01 21:37:32 +0000193 Py_ssize_t ascii_len, bin_len;
Tim Peters934c1a12002-07-02 22:24:50 +0000194
Guido van Rossum43713e52000-02-29 13:59:29 +0000195 if ( !PyArg_ParseTuple(args, "t#:a2b_uu", &ascii_data, &ascii_len) )
Jack Jansen72781191995-08-07 14:34:15 +0000196 return NULL;
197
Martin v. Löwis73c01d42008-02-14 11:26:18 +0000198 assert(ascii_len >= 0);
199
Jack Jansen72781191995-08-07 14:34:15 +0000200 /* First byte: binary data length (in bytes) */
201 bin_len = (*ascii_data++ - ' ') & 077;
202 ascii_len--;
203
204 /* Allocate the buffer */
205 if ( (rv=PyString_FromStringAndSize(NULL, bin_len)) == NULL )
206 return NULL;
207 bin_data = (unsigned char *)PyString_AsString(rv);
Tim Peters934c1a12002-07-02 22:24:50 +0000208
Jack Jansen72781191995-08-07 14:34:15 +0000209 for( ; bin_len > 0 ; ascii_len--, ascii_data++ ) {
Neal Norwitzfe92eef2004-06-06 20:13:10 +0000210 /* XXX is it really best to add NULs if there's no more data */
211 this_ch = (ascii_len > 0) ? *ascii_data : 0;
Jack Jansen72781191995-08-07 14:34:15 +0000212 if ( this_ch == '\n' || this_ch == '\r' || ascii_len <= 0) {
213 /*
214 ** Whitespace. Assume some spaces got eaten at
215 ** end-of-line. (We check this later)
216 */
217 this_ch = 0;
218 } else {
Jack Jansen5d957971995-11-14 10:35:19 +0000219 /* Check the character for legality
Roger E. Masse5f4ce181997-01-16 17:10:22 +0000220 ** The 64 in stead of the expected 63 is because
221 ** there are a few uuencodes out there that use
Guido van Rossum92d89172001-01-09 02:11:57 +0000222 ** '`' as zero instead of space.
Jack Jansen5d957971995-11-14 10:35:19 +0000223 */
224 if ( this_ch < ' ' || this_ch > (' ' + 64)) {
Jack Jansen72781191995-08-07 14:34:15 +0000225 PyErr_SetString(Error, "Illegal char");
226 Py_DECREF(rv);
227 return NULL;
228 }
229 this_ch = (this_ch - ' ') & 077;
230 }
231 /*
232 ** Shift it in on the low end, and see if there's
233 ** a byte ready for output.
234 */
235 leftchar = (leftchar << 6) | (this_ch);
236 leftbits += 6;
237 if ( leftbits >= 8 ) {
238 leftbits -= 8;
239 *bin_data++ = (leftchar >> leftbits) & 0xff;
240 leftchar &= ((1 << leftbits) - 1);
241 bin_len--;
242 }
243 }
244 /*
245 ** Finally, check that if there's anything left on the line
246 ** that it's whitespace only.
247 */
248 while( ascii_len-- > 0 ) {
249 this_ch = *ascii_data++;
Guido van Rossum92d89172001-01-09 02:11:57 +0000250 /* Extra '`' may be written as padding in some cases */
251 if ( this_ch != ' ' && this_ch != ' '+64 &&
Guido van Rossum1243ae71997-07-11 18:36:28 +0000252 this_ch != '\n' && this_ch != '\r' ) {
Jack Jansen72781191995-08-07 14:34:15 +0000253 PyErr_SetString(Error, "Trailing garbage");
254 Py_DECREF(rv);
255 return NULL;
256 }
257 }
258 return rv;
259}
260
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000261PyDoc_STRVAR(doc_b2a_uu, "(bin) -> ascii. Uuencode line of data");
Tim Peters934c1a12002-07-02 22:24:50 +0000262
Jack Jansen72781191995-08-07 14:34:15 +0000263static PyObject *
Peter Schneider-Kampa788a7f2000-07-10 09:57:19 +0000264binascii_b2a_uu(PyObject *self, PyObject *args)
Jack Jansen72781191995-08-07 14:34:15 +0000265{
266 unsigned char *ascii_data, *bin_data;
267 int leftbits = 0;
268 unsigned char this_ch;
269 unsigned int leftchar = 0;
270 PyObject *rv;
Thomas Woutersf98db652006-03-01 21:37:32 +0000271 Py_ssize_t bin_len;
Tim Peters934c1a12002-07-02 22:24:50 +0000272
Guido van Rossum43713e52000-02-29 13:59:29 +0000273 if ( !PyArg_ParseTuple(args, "s#:b2a_uu", &bin_data, &bin_len) )
Jack Jansen72781191995-08-07 14:34:15 +0000274 return NULL;
275 if ( bin_len > 45 ) {
276 /* The 45 is a limit that appears in all uuencode's */
277 PyErr_SetString(Error, "At most 45 bytes at once");
278 return NULL;
279 }
280
281 /* We're lazy and allocate to much (fixed up later) */
Raymond Hettinger658717e2004-09-06 22:58:37 +0000282 if ( (rv=PyString_FromStringAndSize(NULL, bin_len*2+2)) == NULL )
Jack Jansen72781191995-08-07 14:34:15 +0000283 return NULL;
284 ascii_data = (unsigned char *)PyString_AsString(rv);
285
286 /* Store the length */
287 *ascii_data++ = ' ' + (bin_len & 077);
Tim Peters934c1a12002-07-02 22:24:50 +0000288
Jack Jansen72781191995-08-07 14:34:15 +0000289 for( ; bin_len > 0 || leftbits != 0 ; bin_len--, bin_data++ ) {
290 /* Shift the data (or padding) into our buffer */
291 if ( bin_len > 0 ) /* Data */
292 leftchar = (leftchar << 8) | *bin_data;
293 else /* Padding */
294 leftchar <<= 8;
295 leftbits += 8;
296
297 /* See if there are 6-bit groups ready */
298 while ( leftbits >= 6 ) {
299 this_ch = (leftchar >> (leftbits-6)) & 0x3f;
300 leftbits -= 6;
301 *ascii_data++ = this_ch + ' ';
302 }
303 }
304 *ascii_data++ = '\n'; /* Append a courtesy newline */
Tim Peters934c1a12002-07-02 22:24:50 +0000305
Roger E. Masse5f4ce181997-01-16 17:10:22 +0000306 _PyString_Resize(&rv, (ascii_data -
307 (unsigned char *)PyString_AsString(rv)));
Jack Jansen72781191995-08-07 14:34:15 +0000308 return rv;
309}
310
Guido van Rossum2db4f471999-10-19 19:05:14 +0000311
312static int
Thomas Woutersf98db652006-03-01 21:37:32 +0000313binascii_find_valid(unsigned char *s, Py_ssize_t slen, int num)
Guido van Rossum2db4f471999-10-19 19:05:14 +0000314{
Tim Peters934c1a12002-07-02 22:24:50 +0000315 /* Finds & returns the (num+1)th
Guido van Rossum2db4f471999-10-19 19:05:14 +0000316 ** valid character for base64, or -1 if none.
317 */
318
319 int ret = -1;
320 unsigned char c, b64val;
321
322 while ((slen > 0) && (ret == -1)) {
323 c = *s;
324 b64val = table_a2b_base64[c & 0x7f];
325 if ( ((c <= 0x7f) && (b64val != (unsigned char)-1)) ) {
326 if (num == 0)
327 ret = *s;
328 num--;
329 }
330
331 s++;
332 slen--;
333 }
334 return ret;
335}
336
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000337PyDoc_STRVAR(doc_a2b_base64, "(ascii) -> bin. Decode a line of base64 data");
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000338
339static PyObject *
Peter Schneider-Kampa788a7f2000-07-10 09:57:19 +0000340binascii_a2b_base64(PyObject *self, PyObject *args)
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000341{
342 unsigned char *ascii_data, *bin_data;
343 int leftbits = 0;
344 unsigned char this_ch;
345 unsigned int leftchar = 0;
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000346 PyObject *rv;
Thomas Woutersf98db652006-03-01 21:37:32 +0000347 Py_ssize_t ascii_len, bin_len;
Guido van Rossum2db4f471999-10-19 19:05:14 +0000348 int quad_pos = 0;
Tim Peters934c1a12002-07-02 22:24:50 +0000349
Guido van Rossum43713e52000-02-29 13:59:29 +0000350 if ( !PyArg_ParseTuple(args, "t#:a2b_base64", &ascii_data, &ascii_len) )
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000351 return NULL;
352
Martin v. Löwis73c01d42008-02-14 11:26:18 +0000353 assert(ascii_len >= 0);
354
355 if (ascii_len > PY_SSIZE_T_MAX - 3)
356 return PyErr_NoMemory();
357
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000358 bin_len = ((ascii_len+3)/4)*3; /* Upper bound, corrected later */
359
360 /* Allocate the buffer */
361 if ( (rv=PyString_FromStringAndSize(NULL, bin_len)) == NULL )
362 return NULL;
363 bin_data = (unsigned char *)PyString_AsString(rv);
364 bin_len = 0;
Guido van Rossum2db4f471999-10-19 19:05:14 +0000365
366 for( ; ascii_len > 0; ascii_len--, ascii_data++) {
367 this_ch = *ascii_data;
368
369 if (this_ch > 0x7f ||
370 this_ch == '\r' || this_ch == '\n' || this_ch == ' ')
Jack Jansenba1de3b1996-01-22 10:47:15 +0000371 continue;
Guido van Rossum2db4f471999-10-19 19:05:14 +0000372
373 /* Check for pad sequences and ignore
374 ** the invalid ones.
375 */
376 if (this_ch == BASE64_PAD) {
377 if ( (quad_pos < 2) ||
378 ((quad_pos == 2) &&
379 (binascii_find_valid(ascii_data, ascii_len, 1)
380 != BASE64_PAD)) )
381 {
382 continue;
383 }
384 else {
385 /* A pad sequence means no more input.
386 ** We've already interpreted the data
387 ** from the quad at this point.
388 */
389 leftbits = 0;
390 break;
391 }
392 }
393
394 this_ch = table_a2b_base64[*ascii_data];
395 if ( this_ch == (unsigned char) -1 )
396 continue;
397
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000398 /*
399 ** Shift it in on the low end, and see if there's
400 ** a byte ready for output.
401 */
Guido van Rossum2db4f471999-10-19 19:05:14 +0000402 quad_pos = (quad_pos + 1) & 0x03;
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000403 leftchar = (leftchar << 6) | (this_ch);
404 leftbits += 6;
Guido van Rossum2db4f471999-10-19 19:05:14 +0000405
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000406 if ( leftbits >= 8 ) {
407 leftbits -= 8;
408 *bin_data++ = (leftchar >> leftbits) & 0xff;
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000409 bin_len++;
Guido van Rossum2db4f471999-10-19 19:05:14 +0000410 leftchar &= ((1 << leftbits) - 1);
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000411 }
Guido van Rossum2db4f471999-10-19 19:05:14 +0000412 }
413
414 if (leftbits != 0) {
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000415 PyErr_SetString(Error, "Incorrect padding");
416 Py_DECREF(rv);
417 return NULL;
418 }
Guido van Rossum2db4f471999-10-19 19:05:14 +0000419
Thomas Wouters9e1c1922003-03-17 11:24:29 +0000420 /* And set string size correctly. If the result string is empty
421 ** (because the input was all invalid) return the shared empty
422 ** string instead; _PyString_Resize() won't do this for us.
423 */
Barry Warsaw0a51b582002-08-15 22:14:24 +0000424 if (bin_len > 0)
425 _PyString_Resize(&rv, bin_len);
Thomas Wouters9e1c1922003-03-17 11:24:29 +0000426 else {
427 Py_DECREF(rv);
428 rv = PyString_FromString("");
429 }
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000430 return rv;
431}
432
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000433PyDoc_STRVAR(doc_b2a_base64, "(bin) -> ascii. Base64-code line of data");
Tim Peters934c1a12002-07-02 22:24:50 +0000434
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000435static PyObject *
Peter Schneider-Kampa788a7f2000-07-10 09:57:19 +0000436binascii_b2a_base64(PyObject *self, PyObject *args)
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000437{
438 unsigned char *ascii_data, *bin_data;
439 int leftbits = 0;
440 unsigned char this_ch;
441 unsigned int leftchar = 0;
442 PyObject *rv;
Thomas Woutersf98db652006-03-01 21:37:32 +0000443 Py_ssize_t bin_len;
Tim Peters934c1a12002-07-02 22:24:50 +0000444
Guido van Rossum43713e52000-02-29 13:59:29 +0000445 if ( !PyArg_ParseTuple(args, "s#:b2a_base64", &bin_data, &bin_len) )
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000446 return NULL;
Martin v. Löwis73c01d42008-02-14 11:26:18 +0000447
448 assert(bin_len >= 0);
449
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000450 if ( bin_len > BASE64_MAXBIN ) {
451 PyErr_SetString(Error, "Too much data for base64 line");
452 return NULL;
453 }
Tim Peters934c1a12002-07-02 22:24:50 +0000454
Tim Peters1fbb5772001-12-19 04:41:35 +0000455 /* We're lazy and allocate too much (fixed up later).
456 "+3" leaves room for up to two pad characters and a trailing
457 newline. Note that 'b' gets encoded as 'Yg==\n' (1 in, 5 out). */
458 if ( (rv=PyString_FromStringAndSize(NULL, bin_len*2 + 3)) == NULL )
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000459 return NULL;
460 ascii_data = (unsigned char *)PyString_AsString(rv);
461
462 for( ; bin_len > 0 ; bin_len--, bin_data++ ) {
463 /* Shift the data into our buffer */
464 leftchar = (leftchar << 8) | *bin_data;
465 leftbits += 8;
466
467 /* See if there are 6-bit groups ready */
468 while ( leftbits >= 6 ) {
469 this_ch = (leftchar >> (leftbits-6)) & 0x3f;
470 leftbits -= 6;
471 *ascii_data++ = table_b2a_base64[this_ch];
472 }
473 }
474 if ( leftbits == 2 ) {
475 *ascii_data++ = table_b2a_base64[(leftchar&3) << 4];
476 *ascii_data++ = BASE64_PAD;
477 *ascii_data++ = BASE64_PAD;
478 } else if ( leftbits == 4 ) {
479 *ascii_data++ = table_b2a_base64[(leftchar&0xf) << 2];
480 *ascii_data++ = BASE64_PAD;
Tim Peters934c1a12002-07-02 22:24:50 +0000481 }
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000482 *ascii_data++ = '\n'; /* Append a courtesy newline */
Tim Peters934c1a12002-07-02 22:24:50 +0000483
Roger E. Masse5f4ce181997-01-16 17:10:22 +0000484 _PyString_Resize(&rv, (ascii_data -
485 (unsigned char *)PyString_AsString(rv)));
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000486 return rv;
487}
488
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000489PyDoc_STRVAR(doc_a2b_hqx, "ascii -> bin, done. Decode .hqx coding");
Jack Jansen72781191995-08-07 14:34:15 +0000490
491static PyObject *
Peter Schneider-Kampa788a7f2000-07-10 09:57:19 +0000492binascii_a2b_hqx(PyObject *self, PyObject *args)
Jack Jansen72781191995-08-07 14:34:15 +0000493{
494 unsigned char *ascii_data, *bin_data;
495 int leftbits = 0;
496 unsigned char this_ch;
Jack Janseneaeb1c81995-08-14 12:17:57 +0000497 unsigned int leftchar = 0;
Jack Jansen72781191995-08-07 14:34:15 +0000498 PyObject *rv;
Thomas Woutersf98db652006-03-01 21:37:32 +0000499 Py_ssize_t len;
Jack Jansen72781191995-08-07 14:34:15 +0000500 int done = 0;
Tim Peters934c1a12002-07-02 22:24:50 +0000501
Guido van Rossum43713e52000-02-29 13:59:29 +0000502 if ( !PyArg_ParseTuple(args, "t#:a2b_hqx", &ascii_data, &len) )
Jack Jansen72781191995-08-07 14:34:15 +0000503 return NULL;
504
Martin v. Löwis73c01d42008-02-14 11:26:18 +0000505 assert(len >= 0);
506
507 if (len > PY_SSIZE_T_MAX - 2)
508 return PyErr_NoMemory();
509
Raymond Hettinger658717e2004-09-06 22:58:37 +0000510 /* Allocate a string that is too big (fixed later)
511 Add two to the initial length to prevent interning which
512 would preclude subsequent resizing. */
513 if ( (rv=PyString_FromStringAndSize(NULL, len+2)) == NULL )
Jack Jansen72781191995-08-07 14:34:15 +0000514 return NULL;
515 bin_data = (unsigned char *)PyString_AsString(rv);
516
517 for( ; len > 0 ; len--, ascii_data++ ) {
518 /* Get the byte and look it up */
519 this_ch = table_a2b_hqx[*ascii_data];
520 if ( this_ch == SKIP )
521 continue;
522 if ( this_ch == FAIL ) {
523 PyErr_SetString(Error, "Illegal char");
524 Py_DECREF(rv);
525 return NULL;
526 }
527 if ( this_ch == DONE ) {
528 /* The terminating colon */
529 done = 1;
530 break;
531 }
532
533 /* Shift it into the buffer and see if any bytes are ready */
534 leftchar = (leftchar << 6) | (this_ch);
535 leftbits += 6;
536 if ( leftbits >= 8 ) {
537 leftbits -= 8;
538 *bin_data++ = (leftchar >> leftbits) & 0xff;
539 leftchar &= ((1 << leftbits) - 1);
540 }
541 }
Tim Peters934c1a12002-07-02 22:24:50 +0000542
Jack Jansen72781191995-08-07 14:34:15 +0000543 if ( leftbits && !done ) {
544 PyErr_SetString(Incomplete,
545 "String has incomplete number of bytes");
546 Py_DECREF(rv);
547 return NULL;
548 }
Guido van Rossum9c6ba5e1997-01-12 20:02:04 +0000549 _PyString_Resize(
550 &rv, (bin_data - (unsigned char *)PyString_AsString(rv)));
551 if (rv) {
552 PyObject *rrv = Py_BuildValue("Oi", rv, done);
553 Py_DECREF(rv);
554 return rrv;
555 }
Roger E. Masse5f4ce181997-01-16 17:10:22 +0000556
Jack Jansen72781191995-08-07 14:34:15 +0000557 return NULL;
558}
559
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000560PyDoc_STRVAR(doc_rlecode_hqx, "Binhex RLE-code binary data");
Jack Jansen72781191995-08-07 14:34:15 +0000561
562static PyObject *
Peter Schneider-Kampa788a7f2000-07-10 09:57:19 +0000563binascii_rlecode_hqx(PyObject *self, PyObject *args)
Jack Jansen72781191995-08-07 14:34:15 +0000564{
565 unsigned char *in_data, *out_data;
566 PyObject *rv;
567 unsigned char ch;
Thomas Woutersf98db652006-03-01 21:37:32 +0000568 Py_ssize_t in, inend, len;
Tim Peters934c1a12002-07-02 22:24:50 +0000569
Guido van Rossum43713e52000-02-29 13:59:29 +0000570 if ( !PyArg_ParseTuple(args, "s#:rlecode_hqx", &in_data, &len) )
Jack Jansen72781191995-08-07 14:34:15 +0000571 return NULL;
572
Martin v. Löwis73c01d42008-02-14 11:26:18 +0000573 assert(len >= 0);
574
575 if (len > PY_SSIZE_T_MAX / 2 - 2)
576 return PyErr_NoMemory();
577
Jack Jansen72781191995-08-07 14:34:15 +0000578 /* Worst case: output is twice as big as input (fixed later) */
Raymond Hettinger658717e2004-09-06 22:58:37 +0000579 if ( (rv=PyString_FromStringAndSize(NULL, len*2+2)) == NULL )
Jack Jansen72781191995-08-07 14:34:15 +0000580 return NULL;
581 out_data = (unsigned char *)PyString_AsString(rv);
Tim Peters934c1a12002-07-02 22:24:50 +0000582
Jack Jansen72781191995-08-07 14:34:15 +0000583 for( in=0; in<len; in++) {
584 ch = in_data[in];
585 if ( ch == RUNCHAR ) {
586 /* RUNCHAR. Escape it. */
587 *out_data++ = RUNCHAR;
588 *out_data++ = 0;
589 } else {
590 /* Check how many following are the same */
591 for(inend=in+1;
592 inend<len && in_data[inend] == ch &&
Roger E. Masse5f4ce181997-01-16 17:10:22 +0000593 inend < in+255;
Jack Jansen72781191995-08-07 14:34:15 +0000594 inend++) ;
Jack Jansen0223aa11995-08-31 13:44:23 +0000595 if ( inend - in > 3 ) {
596 /* More than 3 in a row. Output RLE. */
Jack Jansen72781191995-08-07 14:34:15 +0000597 *out_data++ = ch;
598 *out_data++ = RUNCHAR;
599 *out_data++ = inend-in;
600 in = inend-1;
601 } else {
602 /* Less than 3. Output the byte itself */
603 *out_data++ = ch;
604 }
605 }
606 }
Roger E. Masse5f4ce181997-01-16 17:10:22 +0000607 _PyString_Resize(&rv, (out_data -
608 (unsigned char *)PyString_AsString(rv)));
Jack Jansen72781191995-08-07 14:34:15 +0000609 return rv;
610}
611
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000612PyDoc_STRVAR(doc_b2a_hqx, "Encode .hqx data");
Tim Peters934c1a12002-07-02 22:24:50 +0000613
Jack Jansen72781191995-08-07 14:34:15 +0000614static PyObject *
Peter Schneider-Kampa788a7f2000-07-10 09:57:19 +0000615binascii_b2a_hqx(PyObject *self, PyObject *args)
Jack Jansen72781191995-08-07 14:34:15 +0000616{
617 unsigned char *ascii_data, *bin_data;
618 int leftbits = 0;
619 unsigned char this_ch;
Jack Janseneaeb1c81995-08-14 12:17:57 +0000620 unsigned int leftchar = 0;
Jack Jansen72781191995-08-07 14:34:15 +0000621 PyObject *rv;
Thomas Woutersf98db652006-03-01 21:37:32 +0000622 Py_ssize_t len;
Tim Peters934c1a12002-07-02 22:24:50 +0000623
Guido van Rossum43713e52000-02-29 13:59:29 +0000624 if ( !PyArg_ParseTuple(args, "s#:b2a_hqx", &bin_data, &len) )
Jack Jansen72781191995-08-07 14:34:15 +0000625 return NULL;
626
Martin v. Löwis73c01d42008-02-14 11:26:18 +0000627 assert(len >= 0);
628
629 if (len > PY_SSIZE_T_MAX / 2 - 2)
630 return PyErr_NoMemory();
631
Jack Jansen72781191995-08-07 14:34:15 +0000632 /* Allocate a buffer that is at least large enough */
Raymond Hettinger658717e2004-09-06 22:58:37 +0000633 if ( (rv=PyString_FromStringAndSize(NULL, len*2+2)) == NULL )
Jack Jansen72781191995-08-07 14:34:15 +0000634 return NULL;
635 ascii_data = (unsigned char *)PyString_AsString(rv);
Tim Peters934c1a12002-07-02 22:24:50 +0000636
Jack Jansen72781191995-08-07 14:34:15 +0000637 for( ; len > 0 ; len--, bin_data++ ) {
638 /* Shift into our buffer, and output any 6bits ready */
639 leftchar = (leftchar << 8) | *bin_data;
640 leftbits += 8;
641 while ( leftbits >= 6 ) {
642 this_ch = (leftchar >> (leftbits-6)) & 0x3f;
643 leftbits -= 6;
644 *ascii_data++ = table_b2a_hqx[this_ch];
645 }
646 }
647 /* Output a possible runt byte */
648 if ( leftbits ) {
649 leftchar <<= (6-leftbits);
650 *ascii_data++ = table_b2a_hqx[leftchar & 0x3f];
651 }
Roger E. Masse5f4ce181997-01-16 17:10:22 +0000652 _PyString_Resize(&rv, (ascii_data -
653 (unsigned char *)PyString_AsString(rv)));
Jack Jansen72781191995-08-07 14:34:15 +0000654 return rv;
655}
656
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000657PyDoc_STRVAR(doc_rledecode_hqx, "Decode hexbin RLE-coded string");
Tim Peters934c1a12002-07-02 22:24:50 +0000658
Jack Jansen72781191995-08-07 14:34:15 +0000659static PyObject *
Peter Schneider-Kampa788a7f2000-07-10 09:57:19 +0000660binascii_rledecode_hqx(PyObject *self, PyObject *args)
Jack Jansen72781191995-08-07 14:34:15 +0000661{
662 unsigned char *in_data, *out_data;
663 unsigned char in_byte, in_repeat;
664 PyObject *rv;
Thomas Woutersf98db652006-03-01 21:37:32 +0000665 Py_ssize_t in_len, out_len, out_len_left;
Jack Jansen72781191995-08-07 14:34:15 +0000666
Guido van Rossum43713e52000-02-29 13:59:29 +0000667 if ( !PyArg_ParseTuple(args, "s#:rledecode_hqx", &in_data, &in_len) )
Jack Jansen72781191995-08-07 14:34:15 +0000668 return NULL;
669
Martin v. Löwis73c01d42008-02-14 11:26:18 +0000670 assert(in_len >= 0);
671
Jack Jansen72781191995-08-07 14:34:15 +0000672 /* Empty string is a special case */
673 if ( in_len == 0 )
Georg Brandl2cfaa342006-05-29 19:39:45 +0000674 return PyString_FromString("");
Martin v. Löwis73c01d42008-02-14 11:26:18 +0000675 else if (in_len > PY_SSIZE_T_MAX / 2)
676 return PyErr_NoMemory();
Jack Jansen72781191995-08-07 14:34:15 +0000677
678 /* Allocate a buffer of reasonable size. Resized when needed */
679 out_len = in_len*2;
680 if ( (rv=PyString_FromStringAndSize(NULL, out_len)) == NULL )
681 return NULL;
682 out_len_left = out_len;
683 out_data = (unsigned char *)PyString_AsString(rv);
684
685 /*
686 ** We need two macros here to get/put bytes and handle
687 ** end-of-buffer for input and output strings.
688 */
689#define INBYTE(b) \
Roger E. Masse5f4ce181997-01-16 17:10:22 +0000690 do { \
691 if ( --in_len < 0 ) { \
692 PyErr_SetString(Incomplete, ""); \
693 Py_DECREF(rv); \
694 return NULL; \
695 } \
696 b = *in_data++; \
697 } while(0)
Tim Peters934c1a12002-07-02 22:24:50 +0000698
Jack Jansen72781191995-08-07 14:34:15 +0000699#define OUTBYTE(b) \
Roger E. Masse5f4ce181997-01-16 17:10:22 +0000700 do { \
701 if ( --out_len_left < 0 ) { \
Martin v. Löwis73c01d42008-02-14 11:26:18 +0000702 if ( out_len > PY_SSIZE_T_MAX / 2) return PyErr_NoMemory(); \
Roger E. Masse5f4ce181997-01-16 17:10:22 +0000703 _PyString_Resize(&rv, 2*out_len); \
704 if ( rv == NULL ) return NULL; \
705 out_data = (unsigned char *)PyString_AsString(rv) \
706 + out_len; \
707 out_len_left = out_len-1; \
708 out_len = out_len * 2; \
709 } \
710 *out_data++ = b; \
711 } while(0)
Jack Jansen72781191995-08-07 14:34:15 +0000712
Roger E. Masse5f4ce181997-01-16 17:10:22 +0000713 /*
714 ** Handle first byte separately (since we have to get angry
715 ** in case of an orphaned RLE code).
716 */
717 INBYTE(in_byte);
Jack Jansen72781191995-08-07 14:34:15 +0000718
719 if (in_byte == RUNCHAR) {
720 INBYTE(in_repeat);
721 if (in_repeat != 0) {
722 /* Note Error, not Incomplete (which is at the end
723 ** of the string only). This is a programmer error.
724 */
725 PyErr_SetString(Error, "Orphaned RLE code at start");
726 Py_DECREF(rv);
727 return NULL;
728 }
729 OUTBYTE(RUNCHAR);
730 } else {
731 OUTBYTE(in_byte);
732 }
Tim Peters934c1a12002-07-02 22:24:50 +0000733
Jack Jansen72781191995-08-07 14:34:15 +0000734 while( in_len > 0 ) {
735 INBYTE(in_byte);
736
737 if (in_byte == RUNCHAR) {
738 INBYTE(in_repeat);
739 if ( in_repeat == 0 ) {
740 /* Just an escaped RUNCHAR value */
741 OUTBYTE(RUNCHAR);
742 } else {
743 /* Pick up value and output a sequence of it */
744 in_byte = out_data[-1];
745 while ( --in_repeat > 0 )
746 OUTBYTE(in_byte);
747 }
748 } else {
749 /* Normal byte */
750 OUTBYTE(in_byte);
751 }
752 }
Roger E. Masse5f4ce181997-01-16 17:10:22 +0000753 _PyString_Resize(&rv, (out_data -
754 (unsigned char *)PyString_AsString(rv)));
Jack Jansen72781191995-08-07 14:34:15 +0000755 return rv;
756}
757
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000758PyDoc_STRVAR(doc_crc_hqx,
759"(data, oldcrc) -> newcrc. Compute hqx CRC incrementally");
Jack Jansen72781191995-08-07 14:34:15 +0000760
761static PyObject *
Peter Schneider-Kampa788a7f2000-07-10 09:57:19 +0000762binascii_crc_hqx(PyObject *self, PyObject *args)
Jack Jansen72781191995-08-07 14:34:15 +0000763{
764 unsigned char *bin_data;
765 unsigned int crc;
Thomas Woutersf98db652006-03-01 21:37:32 +0000766 Py_ssize_t len;
Tim Peters934c1a12002-07-02 22:24:50 +0000767
Guido van Rossum43713e52000-02-29 13:59:29 +0000768 if ( !PyArg_ParseTuple(args, "s#i:crc_hqx", &bin_data, &len, &crc) )
Jack Jansen72781191995-08-07 14:34:15 +0000769 return NULL;
770
Martin v. Löwis73c01d42008-02-14 11:26:18 +0000771 while(len-- > 0) {
Jack Jansen72781191995-08-07 14:34:15 +0000772 crc=((crc<<8)&0xff00)^crctab_hqx[((crc>>8)&0xff)^*bin_data++];
773 }
774
775 return Py_BuildValue("i", crc);
776}
777
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000778PyDoc_STRVAR(doc_crc32,
779"(data, oldcrc = 0) -> newcrc. Compute CRC-32 incrementally");
Guido van Rossum7d47c9e2000-02-16 21:11:52 +0000780
781/* Crc - 32 BIT ANSI X3.66 CRC checksum files
782 Also known as: ISO 3307
783**********************************************************************|
784* *|
785* Demonstration program to compute the 32-bit CRC used as the frame *|
786* check sequence in ADCCP (ANSI X3.66, also known as FIPS PUB 71 *|
787* and FED-STD-1003, the U.S. versions of CCITT's X.25 link-level *|
788* protocol). The 32-bit FCS was added via the Federal Register, *|
789* 1 June 1982, p.23798. I presume but don't know for certain that *|
790* this polynomial is or will be included in CCITT V.41, which *|
791* defines the 16-bit CRC (often called CRC-CCITT) polynomial. FIPS *|
792* PUB 78 says that the 32-bit FCS reduces otherwise undetected *|
793* errors by a factor of 10^-5 over 16-bit FCS. *|
794* *|
795**********************************************************************|
796
797 Copyright (C) 1986 Gary S. Brown. You may use this program, or
798 code or tables extracted from it, as desired without restriction.
Guido van Rossum7d47c9e2000-02-16 21:11:52 +0000799
Tim Peters934c1a12002-07-02 22:24:50 +0000800 First, the polynomial itself and its table of feedback terms. The
801 polynomial is
802 X^32+X^26+X^23+X^22+X^16+X^12+X^11+X^10+X^8+X^7+X^5+X^4+X^2+X^1+X^0
803 Note that we take it "backwards" and put the highest-order term in
804 the lowest-order bit. The X^32 term is "implied"; the LSB is the
805 X^31 term, etc. The X^0 term (usually shown as "+1") results in
806 the MSB being 1.
Guido van Rossum7d47c9e2000-02-16 21:11:52 +0000807
Tim Peters934c1a12002-07-02 22:24:50 +0000808 Note that the usual hardware shift register implementation, which
809 is what we're using (we're merely optimizing it by doing eight-bit
810 chunks at a time) shifts bits into the lowest-order term. In our
811 implementation, that means shifting towards the right. Why do we
812 do it this way? Because the calculated CRC must be transmitted in
813 order from highest-order term to lowest-order term. UARTs transmit
814 characters in order from LSB to MSB. By storing the CRC this way,
815 we hand it to the UART in the order low-byte to high-byte; the UART
816 sends each low-bit to hight-bit; and the result is transmission bit
817 by bit from highest- to lowest-order term without requiring any bit
818 shuffling on our part. Reception works similarly.
819
820 The feedback terms table consists of 256, 32-bit entries. Notes:
821
822 1. The table can be generated at runtime if desired; code to do so
823 is shown later. It might not be obvious, but the feedback
824 terms simply represent the results of eight shift/xor opera-
825 tions for all combinations of data and CRC register values.
826
827 2. The CRC accumulation logic is the same for all CRC polynomials,
828 be they sixteen or thirty-two bits wide. You simply choose the
829 appropriate table. Alternatively, because the table can be
830 generated at runtime, you can start by generating the table for
831 the polynomial in question and use exactly the same "updcrc",
832 if your application needn't simultaneously handle two CRC
833 polynomials. (Note, however, that XMODEM is strange.)
834
835 3. For 16-bit CRCs, the table entries need be only 16 bits wide;
836 of course, 32-bit entries work OK if the high 16 bits are zero.
837
838 4. The values must be right-shifted by eight bits by the "updcrc"
839 logic; the shift must be unsigned (bring in zeroes). On some
840 hardware you could probably optimize the shift in assembler by
841 using byte-swap instructions.
Guido van Rossum7d47c9e2000-02-16 21:11:52 +0000842********************************************************************/
843
844static unsigned long crc_32_tab[256] = {
8450x00000000UL, 0x77073096UL, 0xee0e612cUL, 0x990951baUL, 0x076dc419UL,
8460x706af48fUL, 0xe963a535UL, 0x9e6495a3UL, 0x0edb8832UL, 0x79dcb8a4UL,
8470xe0d5e91eUL, 0x97d2d988UL, 0x09b64c2bUL, 0x7eb17cbdUL, 0xe7b82d07UL,
8480x90bf1d91UL, 0x1db71064UL, 0x6ab020f2UL, 0xf3b97148UL, 0x84be41deUL,
8490x1adad47dUL, 0x6ddde4ebUL, 0xf4d4b551UL, 0x83d385c7UL, 0x136c9856UL,
8500x646ba8c0UL, 0xfd62f97aUL, 0x8a65c9ecUL, 0x14015c4fUL, 0x63066cd9UL,
8510xfa0f3d63UL, 0x8d080df5UL, 0x3b6e20c8UL, 0x4c69105eUL, 0xd56041e4UL,
8520xa2677172UL, 0x3c03e4d1UL, 0x4b04d447UL, 0xd20d85fdUL, 0xa50ab56bUL,
8530x35b5a8faUL, 0x42b2986cUL, 0xdbbbc9d6UL, 0xacbcf940UL, 0x32d86ce3UL,
8540x45df5c75UL, 0xdcd60dcfUL, 0xabd13d59UL, 0x26d930acUL, 0x51de003aUL,
8550xc8d75180UL, 0xbfd06116UL, 0x21b4f4b5UL, 0x56b3c423UL, 0xcfba9599UL,
8560xb8bda50fUL, 0x2802b89eUL, 0x5f058808UL, 0xc60cd9b2UL, 0xb10be924UL,
8570x2f6f7c87UL, 0x58684c11UL, 0xc1611dabUL, 0xb6662d3dUL, 0x76dc4190UL,
8580x01db7106UL, 0x98d220bcUL, 0xefd5102aUL, 0x71b18589UL, 0x06b6b51fUL,
8590x9fbfe4a5UL, 0xe8b8d433UL, 0x7807c9a2UL, 0x0f00f934UL, 0x9609a88eUL,
8600xe10e9818UL, 0x7f6a0dbbUL, 0x086d3d2dUL, 0x91646c97UL, 0xe6635c01UL,
8610x6b6b51f4UL, 0x1c6c6162UL, 0x856530d8UL, 0xf262004eUL, 0x6c0695edUL,
8620x1b01a57bUL, 0x8208f4c1UL, 0xf50fc457UL, 0x65b0d9c6UL, 0x12b7e950UL,
8630x8bbeb8eaUL, 0xfcb9887cUL, 0x62dd1ddfUL, 0x15da2d49UL, 0x8cd37cf3UL,
8640xfbd44c65UL, 0x4db26158UL, 0x3ab551ceUL, 0xa3bc0074UL, 0xd4bb30e2UL,
8650x4adfa541UL, 0x3dd895d7UL, 0xa4d1c46dUL, 0xd3d6f4fbUL, 0x4369e96aUL,
8660x346ed9fcUL, 0xad678846UL, 0xda60b8d0UL, 0x44042d73UL, 0x33031de5UL,
8670xaa0a4c5fUL, 0xdd0d7cc9UL, 0x5005713cUL, 0x270241aaUL, 0xbe0b1010UL,
8680xc90c2086UL, 0x5768b525UL, 0x206f85b3UL, 0xb966d409UL, 0xce61e49fUL,
8690x5edef90eUL, 0x29d9c998UL, 0xb0d09822UL, 0xc7d7a8b4UL, 0x59b33d17UL,
8700x2eb40d81UL, 0xb7bd5c3bUL, 0xc0ba6cadUL, 0xedb88320UL, 0x9abfb3b6UL,
8710x03b6e20cUL, 0x74b1d29aUL, 0xead54739UL, 0x9dd277afUL, 0x04db2615UL,
8720x73dc1683UL, 0xe3630b12UL, 0x94643b84UL, 0x0d6d6a3eUL, 0x7a6a5aa8UL,
8730xe40ecf0bUL, 0x9309ff9dUL, 0x0a00ae27UL, 0x7d079eb1UL, 0xf00f9344UL,
8740x8708a3d2UL, 0x1e01f268UL, 0x6906c2feUL, 0xf762575dUL, 0x806567cbUL,
8750x196c3671UL, 0x6e6b06e7UL, 0xfed41b76UL, 0x89d32be0UL, 0x10da7a5aUL,
8760x67dd4accUL, 0xf9b9df6fUL, 0x8ebeeff9UL, 0x17b7be43UL, 0x60b08ed5UL,
8770xd6d6a3e8UL, 0xa1d1937eUL, 0x38d8c2c4UL, 0x4fdff252UL, 0xd1bb67f1UL,
8780xa6bc5767UL, 0x3fb506ddUL, 0x48b2364bUL, 0xd80d2bdaUL, 0xaf0a1b4cUL,
8790x36034af6UL, 0x41047a60UL, 0xdf60efc3UL, 0xa867df55UL, 0x316e8eefUL,
8800x4669be79UL, 0xcb61b38cUL, 0xbc66831aUL, 0x256fd2a0UL, 0x5268e236UL,
8810xcc0c7795UL, 0xbb0b4703UL, 0x220216b9UL, 0x5505262fUL, 0xc5ba3bbeUL,
8820xb2bd0b28UL, 0x2bb45a92UL, 0x5cb36a04UL, 0xc2d7ffa7UL, 0xb5d0cf31UL,
8830x2cd99e8bUL, 0x5bdeae1dUL, 0x9b64c2b0UL, 0xec63f226UL, 0x756aa39cUL,
8840x026d930aUL, 0x9c0906a9UL, 0xeb0e363fUL, 0x72076785UL, 0x05005713UL,
8850x95bf4a82UL, 0xe2b87a14UL, 0x7bb12baeUL, 0x0cb61b38UL, 0x92d28e9bUL,
8860xe5d5be0dUL, 0x7cdcefb7UL, 0x0bdbdf21UL, 0x86d3d2d4UL, 0xf1d4e242UL,
8870x68ddb3f8UL, 0x1fda836eUL, 0x81be16cdUL, 0xf6b9265bUL, 0x6fb077e1UL,
8880x18b74777UL, 0x88085ae6UL, 0xff0f6a70UL, 0x66063bcaUL, 0x11010b5cUL,
8890x8f659effUL, 0xf862ae69UL, 0x616bffd3UL, 0x166ccf45UL, 0xa00ae278UL,
8900xd70dd2eeUL, 0x4e048354UL, 0x3903b3c2UL, 0xa7672661UL, 0xd06016f7UL,
8910x4969474dUL, 0x3e6e77dbUL, 0xaed16a4aUL, 0xd9d65adcUL, 0x40df0b66UL,
8920x37d83bf0UL, 0xa9bcae53UL, 0xdebb9ec5UL, 0x47b2cf7fUL, 0x30b5ffe9UL,
8930xbdbdf21cUL, 0xcabac28aUL, 0x53b39330UL, 0x24b4a3a6UL, 0xbad03605UL,
8940xcdd70693UL, 0x54de5729UL, 0x23d967bfUL, 0xb3667a2eUL, 0xc4614ab8UL,
8950x5d681b02UL, 0x2a6f2b94UL, 0xb40bbe37UL, 0xc30c8ea1UL, 0x5a05df1bUL,
8960x2d02ef8dUL
897};
898
899static PyObject *
Peter Schneider-Kampa788a7f2000-07-10 09:57:19 +0000900binascii_crc32(PyObject *self, PyObject *args)
Guido van Rossum7d47c9e2000-02-16 21:11:52 +0000901{ /* By Jim Ahlstrom; All rights transferred to CNRI */
902 unsigned char *bin_data;
903 unsigned long crc = 0UL; /* initial value of CRC */
Thomas Woutersf98db652006-03-01 21:37:32 +0000904 Py_ssize_t len;
Tim Petersa98011c2002-07-02 20:20:08 +0000905 long result;
Tim Peters934c1a12002-07-02 22:24:50 +0000906
Guido van Rossum43713e52000-02-29 13:59:29 +0000907 if ( !PyArg_ParseTuple(args, "s#|l:crc32", &bin_data, &len, &crc) )
Guido van Rossum7d47c9e2000-02-16 21:11:52 +0000908 return NULL;
909
Tim Peters934c1a12002-07-02 22:24:50 +0000910 crc = ~ crc;
911#if SIZEOF_LONG > 4
912 /* only want the trailing 32 bits */
913 crc &= 0xFFFFFFFFUL;
914#endif
Martin v. Löwis73c01d42008-02-14 11:26:18 +0000915 while (len-- > 0)
Guido van Rossum7d47c9e2000-02-16 21:11:52 +0000916 crc = crc_32_tab[(crc ^ *bin_data++) & 0xffUL] ^ (crc >> 8);
917 /* Note: (crc >> 8) MUST zero fill on left */
Tim Petersa98011c2002-07-02 20:20:08 +0000918
919 result = (long)(crc ^ 0xFFFFFFFFUL);
Tim Peters934c1a12002-07-02 22:24:50 +0000920#if SIZEOF_LONG > 4
921 /* Extend the sign bit. This is one way to ensure the result is the
922 * same across platforms. The other way would be to return an
923 * unbounded unsigned long, but the evidence suggests that lots of
924 * code outside this treats the result as if it were a signed 4-byte
925 * integer.
Tim Petersa98011c2002-07-02 20:20:08 +0000926 */
927 result |= -(result & (1L << 31));
Tim Peters934c1a12002-07-02 22:24:50 +0000928#endif
Tim Petersa98011c2002-07-02 20:20:08 +0000929 return PyInt_FromLong(result);
Guido van Rossum7d47c9e2000-02-16 21:11:52 +0000930}
931
Barry Warsawe977c212000-08-15 06:07:13 +0000932
933static PyObject *
934binascii_hexlify(PyObject *self, PyObject *args)
935{
936 char* argbuf;
Thomas Woutersf98db652006-03-01 21:37:32 +0000937 Py_ssize_t arglen;
Barry Warsawe977c212000-08-15 06:07:13 +0000938 PyObject *retval;
939 char* retbuf;
Thomas Woutersf98db652006-03-01 21:37:32 +0000940 Py_ssize_t i, j;
Barry Warsawe977c212000-08-15 06:07:13 +0000941
Brett Cannon6ee7d012006-06-08 16:23:04 +0000942 if (!PyArg_ParseTuple(args, "s#:b2a_hex", &argbuf, &arglen))
Barry Warsawe977c212000-08-15 06:07:13 +0000943 return NULL;
944
Martin v. Löwis73c01d42008-02-14 11:26:18 +0000945 assert(arglen >= 0);
946 if (arglen > PY_SSIZE_T_MAX / 2)
947 return PyErr_NoMemory();
948
Barry Warsawe977c212000-08-15 06:07:13 +0000949 retval = PyString_FromStringAndSize(NULL, arglen*2);
950 if (!retval)
951 return NULL;
952 retbuf = PyString_AsString(retval);
953 if (!retbuf)
954 goto finally;
955
956 /* make hex version of string, taken from shamodule.c */
957 for (i=j=0; i < arglen; i++) {
958 char c;
959 c = (argbuf[i] >> 4) & 0xf;
960 c = (c>9) ? c+'a'-10 : c + '0';
961 retbuf[j++] = c;
962 c = argbuf[i] & 0xf;
963 c = (c>9) ? c+'a'-10 : c + '0';
964 retbuf[j++] = c;
965 }
966 return retval;
967
968 finally:
969 Py_DECREF(retval);
970 return NULL;
971}
972
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000973PyDoc_STRVAR(doc_hexlify,
Barry Warsawe977c212000-08-15 06:07:13 +0000974"b2a_hex(data) -> s; Hexadecimal representation of binary data.\n\
975\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000976This function is also available as \"hexlify()\".");
Barry Warsawe977c212000-08-15 06:07:13 +0000977
978
979static int
Tim Peters934c1a12002-07-02 22:24:50 +0000980to_int(int c)
Barry Warsawe977c212000-08-15 06:07:13 +0000981{
982 if (isdigit(c))
983 return c - '0';
984 else {
985 if (isupper(c))
986 c = tolower(c);
987 if (c >= 'a' && c <= 'f')
988 return c - 'a' + 10;
989 }
990 return -1;
991}
992
993
994static PyObject *
995binascii_unhexlify(PyObject *self, PyObject *args)
996{
997 char* argbuf;
Thomas Woutersf98db652006-03-01 21:37:32 +0000998 Py_ssize_t arglen;
Barry Warsawe977c212000-08-15 06:07:13 +0000999 PyObject *retval;
1000 char* retbuf;
Thomas Woutersf98db652006-03-01 21:37:32 +00001001 Py_ssize_t i, j;
Barry Warsawe977c212000-08-15 06:07:13 +00001002
1003 if (!PyArg_ParseTuple(args, "s#:a2b_hex", &argbuf, &arglen))
1004 return NULL;
1005
Martin v. Löwis73c01d42008-02-14 11:26:18 +00001006 assert(arglen >= 0);
1007
Barry Warsaw16168472000-08-15 06:59:58 +00001008 /* XXX What should we do about strings with an odd length? Should
1009 * we add an implicit leading zero, or a trailing zero? For now,
1010 * raise an exception.
Barry Warsawe977c212000-08-15 06:07:13 +00001011 */
1012 if (arglen % 2) {
Barry Warsaw16168472000-08-15 06:59:58 +00001013 PyErr_SetString(PyExc_TypeError, "Odd-length string");
Barry Warsawe977c212000-08-15 06:07:13 +00001014 return NULL;
1015 }
1016
1017 retval = PyString_FromStringAndSize(NULL, (arglen/2));
1018 if (!retval)
1019 return NULL;
1020 retbuf = PyString_AsString(retval);
1021 if (!retbuf)
1022 goto finally;
1023
1024 for (i=j=0; i < arglen; i += 2) {
1025 int top = to_int(Py_CHARMASK(argbuf[i]));
1026 int bot = to_int(Py_CHARMASK(argbuf[i+1]));
1027 if (top == -1 || bot == -1) {
1028 PyErr_SetString(PyExc_TypeError,
Barry Warsaw16168472000-08-15 06:59:58 +00001029 "Non-hexadecimal digit found");
Barry Warsawe977c212000-08-15 06:07:13 +00001030 goto finally;
1031 }
1032 retbuf[j++] = (top << 4) + bot;
1033 }
1034 return retval;
1035
1036 finally:
1037 Py_DECREF(retval);
1038 return NULL;
1039}
1040
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001041PyDoc_STRVAR(doc_unhexlify,
Barry Warsawe977c212000-08-15 06:07:13 +00001042"a2b_hex(hexstr) -> s; Binary data of hexadecimal representation.\n\
1043\n\
1044hexstr must contain an even number of hex digits (upper or lower case).\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001045This function is also available as \"unhexlify()\"");
Barry Warsawe977c212000-08-15 06:07:13 +00001046
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001047static int table_hex[128] = {
1048 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1049 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1050 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1051 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,-1,-1, -1,-1,-1,-1,
1052 -1,10,11,12, 13,14,15,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1053 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1054 -1,10,11,12, 13,14,15,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1055 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1
1056};
1057
1058#define hexval(c) table_hex[(unsigned int)(c)]
1059
1060#define MAXLINESIZE 76
1061
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001062PyDoc_STRVAR(doc_a2b_qp, "Decode a string of qp-encoded data");
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001063
Tim Peters934c1a12002-07-02 22:24:50 +00001064static PyObject*
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001065binascii_a2b_qp(PyObject *self, PyObject *args, PyObject *kwargs)
1066{
Thomas Wouters7087f782006-03-01 23:10:05 +00001067 Py_ssize_t in, out;
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001068 char ch;
1069 unsigned char *data, *odata;
Thomas Woutersf98db652006-03-01 21:37:32 +00001070 Py_ssize_t datalen = 0;
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001071 PyObject *rv;
Martin v. Löwis15e62742006-02-27 16:46:16 +00001072 static char *kwlist[] = {"data", "header", NULL};
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001073 int header = 0;
1074
Tim Peters934c1a12002-07-02 22:24:50 +00001075 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s#|i", kwlist, &data,
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001076 &datalen, &header))
1077 return NULL;
1078
Barry Warsaw23164a52004-05-11 02:05:11 +00001079 /* We allocate the output same size as input, this is overkill.
1080 * The previous implementation used calloc() so we'll zero out the
1081 * memory here too, since PyMem_Malloc() does not guarantee that.
1082 */
1083 odata = (unsigned char *) PyMem_Malloc(datalen);
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001084 if (odata == NULL) {
1085 PyErr_NoMemory();
1086 return NULL;
1087 }
Martin v. Löwisb600fe92004-07-27 15:03:53 +00001088 memset(odata, 0, datalen);
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001089
1090 in = out = 0;
1091 while (in < datalen) {
1092 if (data[in] == '=') {
1093 in++;
1094 if (in >= datalen) break;
1095 /* Soft line breaks */
Georg Brandldd3bffb2006-11-16 17:08:48 +00001096 if ((data[in] == '\n') || (data[in] == '\r')) {
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001097 if (data[in] != '\n') {
1098 while (in < datalen && data[in] != '\n') in++;
1099 }
1100 if (in < datalen) in++;
1101 }
1102 else if (data[in] == '=') {
1103 /* broken case from broken python qp */
1104 odata[out++] = '=';
1105 in++;
1106 }
Tim Peters934c1a12002-07-02 22:24:50 +00001107 else if (((data[in] >= 'A' && data[in] <= 'F') ||
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001108 (data[in] >= 'a' && data[in] <= 'f') ||
1109 (data[in] >= '0' && data[in] <= '9')) &&
1110 ((data[in+1] >= 'A' && data[in+1] <= 'F') ||
1111 (data[in+1] >= 'a' && data[in+1] <= 'f') ||
1112 (data[in+1] >= '0' && data[in+1] <= '9'))) {
1113 /* hexval */
1114 ch = hexval(data[in]) << 4;
1115 in++;
1116 ch |= hexval(data[in]);
1117 in++;
1118 odata[out++] = ch;
1119 }
1120 else {
1121 odata[out++] = '=';
1122 }
1123 }
1124 else if (header && data[in] == '_') {
1125 odata[out++] = ' ';
1126 in++;
1127 }
1128 else {
1129 odata[out] = data[in];
1130 in++;
1131 out++;
1132 }
1133 }
Greg Warda645b302001-10-04 14:54:53 +00001134 if ((rv = PyString_FromStringAndSize((char *)odata, out)) == NULL) {
Barry Warsaw23164a52004-05-11 02:05:11 +00001135 PyMem_Free(odata);
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001136 return NULL;
1137 }
Barry Warsaw23164a52004-05-11 02:05:11 +00001138 PyMem_Free(odata);
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001139 return rv;
1140}
1141
Tim Peters934c1a12002-07-02 22:24:50 +00001142static int
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001143to_hex (unsigned char ch, unsigned char *s)
1144{
1145 unsigned int uvalue = ch;
1146
1147 s[1] = "0123456789ABCDEF"[uvalue % 16];
1148 uvalue = (uvalue / 16);
1149 s[0] = "0123456789ABCDEF"[uvalue % 16];
1150 return 0;
1151}
1152
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001153PyDoc_STRVAR(doc_b2a_qp,
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001154"b2a_qp(data, quotetabs=0, istext=1, header=0) -> s; \n\
1155 Encode a string using quoted-printable encoding. \n\
1156\n\
1157On encoding, when istext is set, newlines are not encoded, and white \n\
1158space at end of lines is. When istext is not set, \\r and \\n (CR/LF) are \n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001159both encoded. When quotetabs is set, space and tabs are encoded.");
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001160
1161/* XXX: This is ridiculously complicated to be backward compatible
1162 * (mostly) with the quopri module. It doesn't re-create the quopri
1163 * module bug where text ending in CRLF has the CR encoded */
Tim Peters934c1a12002-07-02 22:24:50 +00001164static PyObject*
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001165binascii_b2a_qp (PyObject *self, PyObject *args, PyObject *kwargs)
1166{
Thomas Wouters7087f782006-03-01 23:10:05 +00001167 Py_ssize_t in, out;
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001168 unsigned char *data, *odata;
Thomas Woutersf98db652006-03-01 21:37:32 +00001169 Py_ssize_t datalen = 0, odatalen = 0;
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001170 PyObject *rv;
1171 unsigned int linelen = 0;
Martin v. Löwis15e62742006-02-27 16:46:16 +00001172 static char *kwlist[] = {"data", "quotetabs", "istext",
Jeremy Hyltonaf68c872005-12-10 18:50:16 +00001173 "header", NULL};
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001174 int istext = 1;
1175 int quotetabs = 0;
1176 int header = 0;
1177 unsigned char ch;
1178 int crlf = 0;
1179 unsigned char *p;
1180
Tim Peters934c1a12002-07-02 22:24:50 +00001181 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s#|iii", kwlist, &data,
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001182 &datalen, &quotetabs, &istext, &header))
1183 return NULL;
1184
1185 /* See if this string is using CRLF line ends */
1186 /* XXX: this function has the side effect of converting all of
1187 * the end of lines to be the same depending on this detection
1188 * here */
Walter Dörwald0ac60612007-05-09 18:13:53 +00001189 p = (unsigned char *) memchr(data, '\n', datalen);
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001190 if ((p != NULL) && (p > data) && (*(p-1) == '\r'))
1191 crlf = 1;
1192
1193 /* First, scan to see how many characters need to be encoded */
1194 in = 0;
1195 while (in < datalen) {
Tim Peters934c1a12002-07-02 22:24:50 +00001196 if ((data[in] > 126) ||
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001197 (data[in] == '=') ||
1198 (header && data[in] == '_') ||
Kristján Valur Jónssonb9d39912007-04-22 10:18:46 +00001199 ((data[in] == '.') && (linelen == 0) &&
1200 (data[in+1] == '\n' || data[in+1] == '\r' || data[in+1] == 0)) ||
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001201 (!istext && ((data[in] == '\r') || (data[in] == '\n'))) ||
1202 ((data[in] == '\t' || data[in] == ' ') && (in + 1 == datalen)) ||
Tim Peters934c1a12002-07-02 22:24:50 +00001203 ((data[in] < 33) &&
1204 (data[in] != '\r') && (data[in] != '\n') &&
Kristján Valur Jónssonb9d39912007-04-22 10:18:46 +00001205 (quotetabs ||
1206 (!quotetabs && ((data[in] != '\t') && (data[in] != ' '))))))
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001207 {
1208 if ((linelen + 3) >= MAXLINESIZE) {
1209 linelen = 0;
1210 if (crlf)
1211 odatalen += 3;
1212 else
1213 odatalen += 2;
1214 }
1215 linelen += 3;
1216 odatalen += 3;
1217 in++;
1218 }
1219 else {
Tim Peters934c1a12002-07-02 22:24:50 +00001220 if (istext &&
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001221 ((data[in] == '\n') ||
1222 ((in+1 < datalen) && (data[in] == '\r') &&
1223 (data[in+1] == '\n'))))
1224 {
1225 linelen = 0;
1226 /* Protect against whitespace on end of line */
1227 if (in && ((data[in-1] == ' ') || (data[in-1] == '\t')))
1228 odatalen += 2;
1229 if (crlf)
1230 odatalen += 2;
1231 else
1232 odatalen += 1;
1233 if (data[in] == '\r')
1234 in += 2;
1235 else
1236 in++;
1237 }
1238 else {
Tim Peters934c1a12002-07-02 22:24:50 +00001239 if ((in + 1 != datalen) &&
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001240 (data[in+1] != '\n') &&
1241 (linelen + 1) >= MAXLINESIZE) {
1242 linelen = 0;
1243 if (crlf)
1244 odatalen += 3;
1245 else
1246 odatalen += 2;
1247 }
1248 linelen++;
1249 odatalen++;
1250 in++;
1251 }
1252 }
1253 }
1254
Barry Warsaw23164a52004-05-11 02:05:11 +00001255 /* We allocate the output same size as input, this is overkill.
1256 * The previous implementation used calloc() so we'll zero out the
1257 * memory here too, since PyMem_Malloc() does not guarantee that.
1258 */
1259 odata = (unsigned char *) PyMem_Malloc(odatalen);
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001260 if (odata == NULL) {
1261 PyErr_NoMemory();
1262 return NULL;
1263 }
Martin v. Löwisb600fe92004-07-27 15:03:53 +00001264 memset(odata, 0, odatalen);
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001265
1266 in = out = linelen = 0;
1267 while (in < datalen) {
Tim Peters934c1a12002-07-02 22:24:50 +00001268 if ((data[in] > 126) ||
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001269 (data[in] == '=') ||
1270 (header && data[in] == '_') ||
Kristján Valur Jónssonb9d39912007-04-22 10:18:46 +00001271 ((data[in] == '.') && (linelen == 0) &&
1272 (data[in+1] == '\n' || data[in+1] == '\r' || data[in+1] == 0)) ||
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001273 (!istext && ((data[in] == '\r') || (data[in] == '\n'))) ||
1274 ((data[in] == '\t' || data[in] == ' ') && (in + 1 == datalen)) ||
Tim Peters934c1a12002-07-02 22:24:50 +00001275 ((data[in] < 33) &&
1276 (data[in] != '\r') && (data[in] != '\n') &&
Kristján Valur Jónssonb9d39912007-04-22 10:18:46 +00001277 (quotetabs ||
1278 (!quotetabs && ((data[in] != '\t') && (data[in] != ' '))))))
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001279 {
1280 if ((linelen + 3 )>= MAXLINESIZE) {
1281 odata[out++] = '=';
1282 if (crlf) odata[out++] = '\r';
1283 odata[out++] = '\n';
1284 linelen = 0;
1285 }
1286 odata[out++] = '=';
1287 to_hex(data[in], &odata[out]);
1288 out += 2;
1289 in++;
1290 linelen += 3;
1291 }
1292 else {
Tim Peters934c1a12002-07-02 22:24:50 +00001293 if (istext &&
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001294 ((data[in] == '\n') ||
1295 ((in+1 < datalen) && (data[in] == '\r') &&
1296 (data[in+1] == '\n'))))
1297 {
1298 linelen = 0;
1299 /* Protect against whitespace on end of line */
1300 if (out && ((odata[out-1] == ' ') || (odata[out-1] == '\t'))) {
1301 ch = odata[out-1];
1302 odata[out-1] = '=';
1303 to_hex(ch, &odata[out]);
1304 out += 2;
1305 }
Tim Peters934c1a12002-07-02 22:24:50 +00001306
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001307 if (crlf) odata[out++] = '\r';
1308 odata[out++] = '\n';
1309 if (data[in] == '\r')
1310 in += 2;
1311 else
1312 in++;
1313 }
1314 else {
Tim Peters934c1a12002-07-02 22:24:50 +00001315 if ((in + 1 != datalen) &&
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001316 (data[in+1] != '\n') &&
1317 (linelen + 1) >= MAXLINESIZE) {
1318 odata[out++] = '=';
1319 if (crlf) odata[out++] = '\r';
1320 odata[out++] = '\n';
1321 linelen = 0;
1322 }
1323 linelen++;
1324 if (header && data[in] == ' ') {
1325 odata[out++] = '_';
1326 in++;
1327 }
1328 else {
1329 odata[out++] = data[in++];
1330 }
1331 }
1332 }
1333 }
Greg Warda645b302001-10-04 14:54:53 +00001334 if ((rv = PyString_FromStringAndSize((char *)odata, out)) == NULL) {
Barry Warsaw23164a52004-05-11 02:05:11 +00001335 PyMem_Free(odata);
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001336 return NULL;
1337 }
Barry Warsaw23164a52004-05-11 02:05:11 +00001338 PyMem_Free(odata);
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001339 return rv;
1340}
Barry Warsawe977c212000-08-15 06:07:13 +00001341
Jack Jansen72781191995-08-07 14:34:15 +00001342/* List of functions defined in the module */
1343
1344static struct PyMethodDef binascii_module_methods[] = {
Barry Warsawe977c212000-08-15 06:07:13 +00001345 {"a2b_uu", binascii_a2b_uu, METH_VARARGS, doc_a2b_uu},
1346 {"b2a_uu", binascii_b2a_uu, METH_VARARGS, doc_b2a_uu},
1347 {"a2b_base64", binascii_a2b_base64, METH_VARARGS, doc_a2b_base64},
1348 {"b2a_base64", binascii_b2a_base64, METH_VARARGS, doc_b2a_base64},
1349 {"a2b_hqx", binascii_a2b_hqx, METH_VARARGS, doc_a2b_hqx},
1350 {"b2a_hqx", binascii_b2a_hqx, METH_VARARGS, doc_b2a_hqx},
1351 {"b2a_hex", binascii_hexlify, METH_VARARGS, doc_hexlify},
1352 {"a2b_hex", binascii_unhexlify, METH_VARARGS, doc_unhexlify},
1353 {"hexlify", binascii_hexlify, METH_VARARGS, doc_hexlify},
1354 {"unhexlify", binascii_unhexlify, METH_VARARGS, doc_unhexlify},
1355 {"rlecode_hqx", binascii_rlecode_hqx, METH_VARARGS, doc_rlecode_hqx},
1356 {"rledecode_hqx", binascii_rledecode_hqx, METH_VARARGS,
1357 doc_rledecode_hqx},
1358 {"crc_hqx", binascii_crc_hqx, METH_VARARGS, doc_crc_hqx},
1359 {"crc32", binascii_crc32, METH_VARARGS, doc_crc32},
Tim Peters934c1a12002-07-02 22:24:50 +00001360 {"a2b_qp", (PyCFunction)binascii_a2b_qp, METH_VARARGS | METH_KEYWORDS,
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001361 doc_a2b_qp},
Tim Peters934c1a12002-07-02 22:24:50 +00001362 {"b2a_qp", (PyCFunction)binascii_b2a_qp, METH_VARARGS | METH_KEYWORDS,
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001363 doc_b2a_qp},
Barry Warsawe977c212000-08-15 06:07:13 +00001364 {NULL, NULL} /* sentinel */
Jack Jansen72781191995-08-07 14:34:15 +00001365};
1366
1367
1368/* Initialization function for the module (*must* be called initbinascii) */
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001369PyDoc_STRVAR(doc_binascii, "Conversion between binary data and ASCII");
Jack Jansen72781191995-08-07 14:34:15 +00001370
Mark Hammondfe51c6d2002-08-02 02:27:13 +00001371PyMODINIT_FUNC
Thomas Woutersf3f33dc2000-07-21 06:00:07 +00001372initbinascii(void)
Jack Jansen72781191995-08-07 14:34:15 +00001373{
1374 PyObject *m, *d, *x;
1375
1376 /* Create the module and add the functions */
1377 m = Py_InitModule("binascii", binascii_module_methods);
Neal Norwitz1ac754f2006-01-19 06:09:39 +00001378 if (m == NULL)
1379 return;
Jack Jansen72781191995-08-07 14:34:15 +00001380
1381 d = PyModule_GetDict(m);
1382 x = PyString_FromString(doc_binascii);
1383 PyDict_SetItemString(d, "__doc__", x);
Guido van Rossum5c159bd1997-08-04 23:55:25 +00001384 Py_XDECREF(x);
Jack Jansen72781191995-08-07 14:34:15 +00001385
Guido van Rossum7dbb48a1997-10-08 15:26:07 +00001386 Error = PyErr_NewException("binascii.Error", NULL, NULL);
Jack Jansen72781191995-08-07 14:34:15 +00001387 PyDict_SetItemString(d, "Error", Error);
Guido van Rossum7dbb48a1997-10-08 15:26:07 +00001388 Incomplete = PyErr_NewException("binascii.Incomplete", NULL, NULL);
Jack Jansen72781191995-08-07 14:34:15 +00001389 PyDict_SetItemString(d, "Incomplete", Incomplete);
Jack Jansen72781191995-08-07 14:34:15 +00001390}