blob: fc7f111c7534b2579971358dca8f214995a8340a [file] [log] [blame]
Jack Jansen72781191995-08-07 14:34:15 +00001/*
2** Routines to represent binary data in ASCII and vice-versa
3**
4** This module currently supports the following encodings:
5** uuencode:
6** each line encodes 45 bytes (except possibly the last)
7** First char encodes (binary) length, rest data
8** each char encodes 6 bits, as follows:
9** binary: 01234567 abcdefgh ijklmnop
10** ascii: 012345 67abcd efghij klmnop
11** ASCII encoding method is "excess-space": 000000 is encoded as ' ', etc.
12** short binary data is zero-extended (so the bits are always in the
13** right place), this does *not* reflect in the length.
Jack Jansen84bbc2e1995-10-04 16:38:44 +000014** base64:
15** Line breaks are insignificant, but lines are at most 76 chars
16** each char encodes 6 bits, in similar order as uucode/hqx. Encoding
17** is done via a table.
18** Short binary data is filled (in ASCII) with '='.
Jack Jansen72781191995-08-07 14:34:15 +000019** hqx:
20** File starts with introductory text, real data starts and ends
21** with colons.
22** Data consists of three similar parts: info, datafork, resourcefork.
23** Each part is protected (at the end) with a 16-bit crc
24** The binary data is run-length encoded, and then ascii-fied:
25** binary: 01234567 abcdefgh ijklmnop
26** ascii: 012345 67abcd efghij klmnop
27** ASCII encoding is table-driven, see the code.
28** Short binary data results in the runt ascii-byte being output with
29** the bits in the right place.
30**
31** While I was reading dozens of programs that encode or decode the formats
32** here (documentation? hihi:-) I have formulated Jansen's Observation:
33**
34** Programs that encode binary data in ASCII are written in
35** such a style that they are as unreadable as possible. Devices used
36** include unnecessary global variables, burying important tables
37** in unrelated sourcefiles, putting functions in include files,
38** using seemingly-descriptive variable names for different purposes,
39** calls to empty subroutines and a host of others.
40**
41** I have attempted to break with this tradition, but I guess that that
42** does make the performance sub-optimal. Oh well, too bad...
43**
44** Jack Jansen, CWI, July 1995.
Tim Peters934c1a12002-07-02 22:24:50 +000045**
Martin v. Löwis16dc7f42001-09-30 20:32:11 +000046** Added support for quoted-printable encoding, based on rfc 1521 et al
Tim Peters934c1a12002-07-02 22:24:50 +000047** quoted-printable encoding specifies that non printable characters (anything
Martin v. Löwis16dc7f42001-09-30 20:32:11 +000048** below 32 and above 126) be encoded as =XX where XX is the hexadecimal value
49** of the character. It also specifies some other behavior to enable 8bit data
Tim Peters934c1a12002-07-02 22:24:50 +000050** in a mail message with little difficulty (maximum line sizes, protecting
51** some cases of whitespace, etc).
Martin v. Löwis16dc7f42001-09-30 20:32:11 +000052**
53** Brandon Long, September 2001.
Jack Jansen72781191995-08-07 14:34:15 +000054*/
55
Thomas Wouters9c544482006-03-01 21:59:44 +000056#define PY_SSIZE_T_CLEAN
Jack Jansen72781191995-08-07 14:34:15 +000057
58#include "Python.h"
59
60static PyObject *Error;
61static PyObject *Incomplete;
62
63/*
64** hqx lookup table, ascii->binary.
65*/
66
67#define RUNCHAR 0x90
68
69#define DONE 0x7F
70#define SKIP 0x7E
71#define FAIL 0x7D
72
73static unsigned char table_a2b_hqx[256] = {
74/* ^@ ^A ^B ^C ^D ^E ^F ^G */
75/* 0*/ FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
76/* \b \t \n ^K ^L \r ^N ^O */
77/* 1*/ FAIL, FAIL, SKIP, FAIL, FAIL, SKIP, FAIL, FAIL,
78/* ^P ^Q ^R ^S ^T ^U ^V ^W */
79/* 2*/ FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
80/* ^X ^Y ^Z ^[ ^\ ^] ^^ ^_ */
81/* 3*/ FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
82/* ! " # $ % & ' */
83/* 4*/ FAIL, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
84/* ( ) * + , - . / */
85/* 5*/ 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, FAIL, FAIL,
86/* 0 1 2 3 4 5 6 7 */
87/* 6*/ 0x0D, 0x0E, 0x0F, 0x10, 0x11, 0x12, 0x13, FAIL,
88/* 8 9 : ; < = > ? */
89/* 7*/ 0x14, 0x15, DONE, FAIL, FAIL, FAIL, FAIL, FAIL,
90/* @ A B C D E F G */
91/* 8*/ 0x16, 0x17, 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D,
92/* H I J K L M N O */
93/* 9*/ 0x1E, 0x1F, 0x20, 0x21, 0x22, 0x23, 0x24, FAIL,
94/* P Q R S T U V W */
95/*10*/ 0x25, 0x26, 0x27, 0x28, 0x29, 0x2A, 0x2B, FAIL,
96/* X Y Z [ \ ] ^ _ */
97/*11*/ 0x2C, 0x2D, 0x2E, 0x2F, FAIL, FAIL, FAIL, FAIL,
98/* ` a b c d e f g */
99/*12*/ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, FAIL,
100/* h i j k l m n o */
101/*13*/ 0x37, 0x38, 0x39, 0x3A, 0x3B, 0x3C, FAIL, FAIL,
102/* p q r s t u v w */
103/*14*/ 0x3D, 0x3E, 0x3F, FAIL, FAIL, FAIL, FAIL, FAIL,
104/* x y z { | } ~ ^? */
105/*15*/ FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
106/*16*/ FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
107 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
108 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
109 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
110 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
111 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
112 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
113 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
114 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
115 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
116 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
117 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
118 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
119 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
120 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
121 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
122};
123
124static unsigned char table_b2a_hqx[] =
Roger E. Masse5f4ce181997-01-16 17:10:22 +0000125"!\"#$%&'()*+,-012345689@ABCDEFGHIJKLMNPQRSTUVXYZ[`abcdefhijklmpqr";
Jack Jansen72781191995-08-07 14:34:15 +0000126
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000127static char table_a2b_base64[] = {
128 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
129 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
130 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,62, -1,-1,-1,63,
131 52,53,54,55, 56,57,58,59, 60,61,-1,-1, -1, 0,-1,-1, /* Note PAD->0 */
132 -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10, 11,12,13,14,
133 15,16,17,18, 19,20,21,22, 23,24,25,-1, -1,-1,-1,-1,
134 -1,26,27,28, 29,30,31,32, 33,34,35,36, 37,38,39,40,
135 41,42,43,44, 45,46,47,48, 49,50,51,-1, -1,-1,-1,-1
136};
137
138#define BASE64_PAD '='
Guido van Rossum355bc0c2001-10-30 03:00:52 +0000139
140/* Max binary chunk size; limited only by available memory */
Guido van Rossum0e225aa2007-05-22 20:24:57 +0000141#define BASE64_MAXBIN ((PY_SSIZE_T_MAX - 3) / 2)
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000142
143static unsigned char table_b2a_base64[] =
Roger E. Masse5f4ce181997-01-16 17:10:22 +0000144"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000145
146
147
Jack Jansen72781191995-08-07 14:34:15 +0000148static unsigned short crctab_hqx[256] = {
Roger E. Masse5f4ce181997-01-16 17:10:22 +0000149 0x0000, 0x1021, 0x2042, 0x3063, 0x4084, 0x50a5, 0x60c6, 0x70e7,
150 0x8108, 0x9129, 0xa14a, 0xb16b, 0xc18c, 0xd1ad, 0xe1ce, 0xf1ef,
151 0x1231, 0x0210, 0x3273, 0x2252, 0x52b5, 0x4294, 0x72f7, 0x62d6,
152 0x9339, 0x8318, 0xb37b, 0xa35a, 0xd3bd, 0xc39c, 0xf3ff, 0xe3de,
153 0x2462, 0x3443, 0x0420, 0x1401, 0x64e6, 0x74c7, 0x44a4, 0x5485,
154 0xa56a, 0xb54b, 0x8528, 0x9509, 0xe5ee, 0xf5cf, 0xc5ac, 0xd58d,
155 0x3653, 0x2672, 0x1611, 0x0630, 0x76d7, 0x66f6, 0x5695, 0x46b4,
156 0xb75b, 0xa77a, 0x9719, 0x8738, 0xf7df, 0xe7fe, 0xd79d, 0xc7bc,
157 0x48c4, 0x58e5, 0x6886, 0x78a7, 0x0840, 0x1861, 0x2802, 0x3823,
158 0xc9cc, 0xd9ed, 0xe98e, 0xf9af, 0x8948, 0x9969, 0xa90a, 0xb92b,
159 0x5af5, 0x4ad4, 0x7ab7, 0x6a96, 0x1a71, 0x0a50, 0x3a33, 0x2a12,
160 0xdbfd, 0xcbdc, 0xfbbf, 0xeb9e, 0x9b79, 0x8b58, 0xbb3b, 0xab1a,
161 0x6ca6, 0x7c87, 0x4ce4, 0x5cc5, 0x2c22, 0x3c03, 0x0c60, 0x1c41,
162 0xedae, 0xfd8f, 0xcdec, 0xddcd, 0xad2a, 0xbd0b, 0x8d68, 0x9d49,
163 0x7e97, 0x6eb6, 0x5ed5, 0x4ef4, 0x3e13, 0x2e32, 0x1e51, 0x0e70,
164 0xff9f, 0xefbe, 0xdfdd, 0xcffc, 0xbf1b, 0xaf3a, 0x9f59, 0x8f78,
165 0x9188, 0x81a9, 0xb1ca, 0xa1eb, 0xd10c, 0xc12d, 0xf14e, 0xe16f,
166 0x1080, 0x00a1, 0x30c2, 0x20e3, 0x5004, 0x4025, 0x7046, 0x6067,
167 0x83b9, 0x9398, 0xa3fb, 0xb3da, 0xc33d, 0xd31c, 0xe37f, 0xf35e,
168 0x02b1, 0x1290, 0x22f3, 0x32d2, 0x4235, 0x5214, 0x6277, 0x7256,
169 0xb5ea, 0xa5cb, 0x95a8, 0x8589, 0xf56e, 0xe54f, 0xd52c, 0xc50d,
170 0x34e2, 0x24c3, 0x14a0, 0x0481, 0x7466, 0x6447, 0x5424, 0x4405,
171 0xa7db, 0xb7fa, 0x8799, 0x97b8, 0xe75f, 0xf77e, 0xc71d, 0xd73c,
172 0x26d3, 0x36f2, 0x0691, 0x16b0, 0x6657, 0x7676, 0x4615, 0x5634,
173 0xd94c, 0xc96d, 0xf90e, 0xe92f, 0x99c8, 0x89e9, 0xb98a, 0xa9ab,
174 0x5844, 0x4865, 0x7806, 0x6827, 0x18c0, 0x08e1, 0x3882, 0x28a3,
175 0xcb7d, 0xdb5c, 0xeb3f, 0xfb1e, 0x8bf9, 0x9bd8, 0xabbb, 0xbb9a,
176 0x4a75, 0x5a54, 0x6a37, 0x7a16, 0x0af1, 0x1ad0, 0x2ab3, 0x3a92,
177 0xfd2e, 0xed0f, 0xdd6c, 0xcd4d, 0xbdaa, 0xad8b, 0x9de8, 0x8dc9,
178 0x7c26, 0x6c07, 0x5c64, 0x4c45, 0x3ca2, 0x2c83, 0x1ce0, 0x0cc1,
179 0xef1f, 0xff3e, 0xcf5d, 0xdf7c, 0xaf9b, 0xbfba, 0x8fd9, 0x9ff8,
180 0x6e17, 0x7e36, 0x4e55, 0x5e74, 0x2e93, 0x3eb2, 0x0ed1, 0x1ef0,
Jack Jansen72781191995-08-07 14:34:15 +0000181};
182
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000183PyDoc_STRVAR(doc_a2b_uu, "(ascii) -> bin. Decode a line of uuencoded data");
Jack Jansen72781191995-08-07 14:34:15 +0000184
185static PyObject *
Peter Schneider-Kampa788a7f2000-07-10 09:57:19 +0000186binascii_a2b_uu(PyObject *self, PyObject *args)
Jack Jansen72781191995-08-07 14:34:15 +0000187{
188 unsigned char *ascii_data, *bin_data;
189 int leftbits = 0;
190 unsigned char this_ch;
191 unsigned int leftchar = 0;
192 PyObject *rv;
Thomas Woutersf98db652006-03-01 21:37:32 +0000193 Py_ssize_t ascii_len, bin_len;
Tim Peters934c1a12002-07-02 22:24:50 +0000194
Guido van Rossum43713e52000-02-29 13:59:29 +0000195 if ( !PyArg_ParseTuple(args, "t#:a2b_uu", &ascii_data, &ascii_len) )
Jack Jansen72781191995-08-07 14:34:15 +0000196 return NULL;
197
198 /* First byte: binary data length (in bytes) */
199 bin_len = (*ascii_data++ - ' ') & 077;
200 ascii_len--;
201
202 /* Allocate the buffer */
Guido van Rossum0e225aa2007-05-22 20:24:57 +0000203 if ( (rv=PyBytes_FromStringAndSize(NULL, bin_len)) == NULL )
Jack Jansen72781191995-08-07 14:34:15 +0000204 return NULL;
Guido van Rossum0e225aa2007-05-22 20:24:57 +0000205 bin_data = (unsigned char *)PyBytes_AS_STRING(rv);
Tim Peters934c1a12002-07-02 22:24:50 +0000206
Jack Jansen72781191995-08-07 14:34:15 +0000207 for( ; bin_len > 0 ; ascii_len--, ascii_data++ ) {
Neal Norwitzfe92eef2004-06-06 20:13:10 +0000208 /* XXX is it really best to add NULs if there's no more data */
209 this_ch = (ascii_len > 0) ? *ascii_data : 0;
Jack Jansen72781191995-08-07 14:34:15 +0000210 if ( this_ch == '\n' || this_ch == '\r' || ascii_len <= 0) {
211 /*
212 ** Whitespace. Assume some spaces got eaten at
213 ** end-of-line. (We check this later)
214 */
215 this_ch = 0;
216 } else {
Jack Jansen5d957971995-11-14 10:35:19 +0000217 /* Check the character for legality
Roger E. Masse5f4ce181997-01-16 17:10:22 +0000218 ** The 64 in stead of the expected 63 is because
219 ** there are a few uuencodes out there that use
Guido van Rossum92d89172001-01-09 02:11:57 +0000220 ** '`' as zero instead of space.
Jack Jansen5d957971995-11-14 10:35:19 +0000221 */
222 if ( this_ch < ' ' || this_ch > (' ' + 64)) {
Jack Jansen72781191995-08-07 14:34:15 +0000223 PyErr_SetString(Error, "Illegal char");
224 Py_DECREF(rv);
225 return NULL;
226 }
227 this_ch = (this_ch - ' ') & 077;
228 }
229 /*
230 ** Shift it in on the low end, and see if there's
231 ** a byte ready for output.
232 */
233 leftchar = (leftchar << 6) | (this_ch);
234 leftbits += 6;
235 if ( leftbits >= 8 ) {
236 leftbits -= 8;
237 *bin_data++ = (leftchar >> leftbits) & 0xff;
238 leftchar &= ((1 << leftbits) - 1);
239 bin_len--;
240 }
241 }
242 /*
243 ** Finally, check that if there's anything left on the line
244 ** that it's whitespace only.
245 */
246 while( ascii_len-- > 0 ) {
247 this_ch = *ascii_data++;
Guido van Rossum92d89172001-01-09 02:11:57 +0000248 /* Extra '`' may be written as padding in some cases */
249 if ( this_ch != ' ' && this_ch != ' '+64 &&
Guido van Rossum1243ae71997-07-11 18:36:28 +0000250 this_ch != '\n' && this_ch != '\r' ) {
Jack Jansen72781191995-08-07 14:34:15 +0000251 PyErr_SetString(Error, "Trailing garbage");
252 Py_DECREF(rv);
253 return NULL;
254 }
255 }
256 return rv;
257}
258
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000259PyDoc_STRVAR(doc_b2a_uu, "(bin) -> ascii. Uuencode line of data");
Tim Peters934c1a12002-07-02 22:24:50 +0000260
Jack Jansen72781191995-08-07 14:34:15 +0000261static PyObject *
Peter Schneider-Kampa788a7f2000-07-10 09:57:19 +0000262binascii_b2a_uu(PyObject *self, PyObject *args)
Jack Jansen72781191995-08-07 14:34:15 +0000263{
264 unsigned char *ascii_data, *bin_data;
265 int leftbits = 0;
266 unsigned char this_ch;
267 unsigned int leftchar = 0;
268 PyObject *rv;
Thomas Woutersf98db652006-03-01 21:37:32 +0000269 Py_ssize_t bin_len;
Tim Peters934c1a12002-07-02 22:24:50 +0000270
Guido van Rossum43713e52000-02-29 13:59:29 +0000271 if ( !PyArg_ParseTuple(args, "s#:b2a_uu", &bin_data, &bin_len) )
Jack Jansen72781191995-08-07 14:34:15 +0000272 return NULL;
273 if ( bin_len > 45 ) {
274 /* The 45 is a limit that appears in all uuencode's */
275 PyErr_SetString(Error, "At most 45 bytes at once");
276 return NULL;
277 }
278
279 /* We're lazy and allocate to much (fixed up later) */
Guido van Rossum0e225aa2007-05-22 20:24:57 +0000280 if ( (rv=PyBytes_FromStringAndSize(NULL, bin_len*2+2)) == NULL )
Jack Jansen72781191995-08-07 14:34:15 +0000281 return NULL;
Guido van Rossum0e225aa2007-05-22 20:24:57 +0000282 ascii_data = (unsigned char *)PyBytes_AS_STRING(rv);
Jack Jansen72781191995-08-07 14:34:15 +0000283
284 /* Store the length */
285 *ascii_data++ = ' ' + (bin_len & 077);
Tim Peters934c1a12002-07-02 22:24:50 +0000286
Jack Jansen72781191995-08-07 14:34:15 +0000287 for( ; bin_len > 0 || leftbits != 0 ; bin_len--, bin_data++ ) {
288 /* Shift the data (or padding) into our buffer */
289 if ( bin_len > 0 ) /* Data */
290 leftchar = (leftchar << 8) | *bin_data;
291 else /* Padding */
292 leftchar <<= 8;
293 leftbits += 8;
294
295 /* See if there are 6-bit groups ready */
296 while ( leftbits >= 6 ) {
297 this_ch = (leftchar >> (leftbits-6)) & 0x3f;
298 leftbits -= 6;
299 *ascii_data++ = this_ch + ' ';
300 }
301 }
302 *ascii_data++ = '\n'; /* Append a courtesy newline */
Tim Peters934c1a12002-07-02 22:24:50 +0000303
Guido van Rossum0e225aa2007-05-22 20:24:57 +0000304 if (PyBytes_Resize(rv,
305 (ascii_data -
306 (unsigned char *)PyBytes_AS_STRING(rv))) < 0) {
307 Py_DECREF(rv);
308 rv = NULL;
309 }
Jack Jansen72781191995-08-07 14:34:15 +0000310 return rv;
311}
312
Guido van Rossum2db4f471999-10-19 19:05:14 +0000313
314static int
Thomas Woutersf98db652006-03-01 21:37:32 +0000315binascii_find_valid(unsigned char *s, Py_ssize_t slen, int num)
Guido van Rossum2db4f471999-10-19 19:05:14 +0000316{
Tim Peters934c1a12002-07-02 22:24:50 +0000317 /* Finds & returns the (num+1)th
Guido van Rossum2db4f471999-10-19 19:05:14 +0000318 ** valid character for base64, or -1 if none.
319 */
320
321 int ret = -1;
322 unsigned char c, b64val;
323
324 while ((slen > 0) && (ret == -1)) {
325 c = *s;
326 b64val = table_a2b_base64[c & 0x7f];
327 if ( ((c <= 0x7f) && (b64val != (unsigned char)-1)) ) {
328 if (num == 0)
329 ret = *s;
330 num--;
331 }
332
333 s++;
334 slen--;
335 }
336 return ret;
337}
338
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000339PyDoc_STRVAR(doc_a2b_base64, "(ascii) -> bin. Decode a line of base64 data");
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000340
341static PyObject *
Peter Schneider-Kampa788a7f2000-07-10 09:57:19 +0000342binascii_a2b_base64(PyObject *self, PyObject *args)
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000343{
344 unsigned char *ascii_data, *bin_data;
345 int leftbits = 0;
346 unsigned char this_ch;
347 unsigned int leftchar = 0;
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000348 PyObject *rv;
Thomas Woutersf98db652006-03-01 21:37:32 +0000349 Py_ssize_t ascii_len, bin_len;
Guido van Rossum2db4f471999-10-19 19:05:14 +0000350 int quad_pos = 0;
Tim Peters934c1a12002-07-02 22:24:50 +0000351
Guido van Rossum43713e52000-02-29 13:59:29 +0000352 if ( !PyArg_ParseTuple(args, "t#:a2b_base64", &ascii_data, &ascii_len) )
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000353 return NULL;
354
355 bin_len = ((ascii_len+3)/4)*3; /* Upper bound, corrected later */
356
357 /* Allocate the buffer */
Guido van Rossum0e225aa2007-05-22 20:24:57 +0000358 if ( (rv=PyBytes_FromStringAndSize(NULL, bin_len)) == NULL )
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000359 return NULL;
Guido van Rossum0e225aa2007-05-22 20:24:57 +0000360 bin_data = (unsigned char *)PyBytes_AS_STRING(rv);
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000361 bin_len = 0;
Guido van Rossum2db4f471999-10-19 19:05:14 +0000362
363 for( ; ascii_len > 0; ascii_len--, ascii_data++) {
364 this_ch = *ascii_data;
365
366 if (this_ch > 0x7f ||
367 this_ch == '\r' || this_ch == '\n' || this_ch == ' ')
Jack Jansenba1de3b1996-01-22 10:47:15 +0000368 continue;
Guido van Rossum2db4f471999-10-19 19:05:14 +0000369
370 /* Check for pad sequences and ignore
371 ** the invalid ones.
372 */
373 if (this_ch == BASE64_PAD) {
374 if ( (quad_pos < 2) ||
375 ((quad_pos == 2) &&
376 (binascii_find_valid(ascii_data, ascii_len, 1)
377 != BASE64_PAD)) )
378 {
379 continue;
380 }
381 else {
382 /* A pad sequence means no more input.
383 ** We've already interpreted the data
384 ** from the quad at this point.
385 */
386 leftbits = 0;
387 break;
388 }
389 }
390
391 this_ch = table_a2b_base64[*ascii_data];
392 if ( this_ch == (unsigned char) -1 )
393 continue;
394
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000395 /*
396 ** Shift it in on the low end, and see if there's
397 ** a byte ready for output.
398 */
Guido van Rossum2db4f471999-10-19 19:05:14 +0000399 quad_pos = (quad_pos + 1) & 0x03;
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000400 leftchar = (leftchar << 6) | (this_ch);
401 leftbits += 6;
Guido van Rossum2db4f471999-10-19 19:05:14 +0000402
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000403 if ( leftbits >= 8 ) {
404 leftbits -= 8;
405 *bin_data++ = (leftchar >> leftbits) & 0xff;
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000406 bin_len++;
Guido van Rossum2db4f471999-10-19 19:05:14 +0000407 leftchar &= ((1 << leftbits) - 1);
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000408 }
Guido van Rossum2db4f471999-10-19 19:05:14 +0000409 }
410
411 if (leftbits != 0) {
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000412 PyErr_SetString(Error, "Incorrect padding");
413 Py_DECREF(rv);
414 return NULL;
415 }
Guido van Rossum2db4f471999-10-19 19:05:14 +0000416
Thomas Wouters9e1c1922003-03-17 11:24:29 +0000417 /* And set string size correctly. If the result string is empty
418 ** (because the input was all invalid) return the shared empty
Guido van Rossum0e225aa2007-05-22 20:24:57 +0000419 ** string instead; PyBytes_Resize() won't do this for us.
Thomas Wouters9e1c1922003-03-17 11:24:29 +0000420 */
Guido van Rossum0e225aa2007-05-22 20:24:57 +0000421 if (bin_len > 0) {
422 if (PyBytes_Resize(rv, bin_len) < 0) {
423 Py_DECREF(rv);
424 rv = NULL;
425 }
426 }
Thomas Wouters9e1c1922003-03-17 11:24:29 +0000427 else {
428 Py_DECREF(rv);
Guido van Rossum0e225aa2007-05-22 20:24:57 +0000429 rv = PyBytes_FromStringAndSize("", 0);
Thomas Wouters9e1c1922003-03-17 11:24:29 +0000430 }
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000431 return rv;
432}
433
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000434PyDoc_STRVAR(doc_b2a_base64, "(bin) -> ascii. Base64-code line of data");
Tim Peters934c1a12002-07-02 22:24:50 +0000435
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000436static PyObject *
Peter Schneider-Kampa788a7f2000-07-10 09:57:19 +0000437binascii_b2a_base64(PyObject *self, PyObject *args)
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000438{
439 unsigned char *ascii_data, *bin_data;
440 int leftbits = 0;
441 unsigned char this_ch;
442 unsigned int leftchar = 0;
443 PyObject *rv;
Thomas Woutersf98db652006-03-01 21:37:32 +0000444 Py_ssize_t bin_len;
Tim Peters934c1a12002-07-02 22:24:50 +0000445
Guido van Rossum43713e52000-02-29 13:59:29 +0000446 if ( !PyArg_ParseTuple(args, "s#:b2a_base64", &bin_data, &bin_len) )
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000447 return NULL;
448 if ( bin_len > BASE64_MAXBIN ) {
449 PyErr_SetString(Error, "Too much data for base64 line");
450 return NULL;
451 }
Tim Peters934c1a12002-07-02 22:24:50 +0000452
Tim Peters1fbb5772001-12-19 04:41:35 +0000453 /* We're lazy and allocate too much (fixed up later).
454 "+3" leaves room for up to two pad characters and a trailing
455 newline. Note that 'b' gets encoded as 'Yg==\n' (1 in, 5 out). */
Guido van Rossum0e225aa2007-05-22 20:24:57 +0000456 if ( (rv=PyBytes_FromStringAndSize(NULL, bin_len*2 + 3)) == NULL )
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000457 return NULL;
Guido van Rossum0e225aa2007-05-22 20:24:57 +0000458 ascii_data = (unsigned char *)PyBytes_AS_STRING(rv);
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000459
460 for( ; bin_len > 0 ; bin_len--, bin_data++ ) {
461 /* Shift the data into our buffer */
462 leftchar = (leftchar << 8) | *bin_data;
463 leftbits += 8;
464
465 /* See if there are 6-bit groups ready */
466 while ( leftbits >= 6 ) {
467 this_ch = (leftchar >> (leftbits-6)) & 0x3f;
468 leftbits -= 6;
469 *ascii_data++ = table_b2a_base64[this_ch];
470 }
471 }
472 if ( leftbits == 2 ) {
473 *ascii_data++ = table_b2a_base64[(leftchar&3) << 4];
474 *ascii_data++ = BASE64_PAD;
475 *ascii_data++ = BASE64_PAD;
476 } else if ( leftbits == 4 ) {
477 *ascii_data++ = table_b2a_base64[(leftchar&0xf) << 2];
478 *ascii_data++ = BASE64_PAD;
Tim Peters934c1a12002-07-02 22:24:50 +0000479 }
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000480 *ascii_data++ = '\n'; /* Append a courtesy newline */
Tim Peters934c1a12002-07-02 22:24:50 +0000481
Guido van Rossum0e225aa2007-05-22 20:24:57 +0000482 if (PyBytes_Resize(rv,
483 (ascii_data -
484 (unsigned char *)PyBytes_AS_STRING(rv))) < 0) {
485 Py_DECREF(rv);
486 rv = NULL;
487 }
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000488 return rv;
489}
490
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000491PyDoc_STRVAR(doc_a2b_hqx, "ascii -> bin, done. Decode .hqx coding");
Jack Jansen72781191995-08-07 14:34:15 +0000492
493static PyObject *
Peter Schneider-Kampa788a7f2000-07-10 09:57:19 +0000494binascii_a2b_hqx(PyObject *self, PyObject *args)
Jack Jansen72781191995-08-07 14:34:15 +0000495{
496 unsigned char *ascii_data, *bin_data;
497 int leftbits = 0;
498 unsigned char this_ch;
Jack Janseneaeb1c81995-08-14 12:17:57 +0000499 unsigned int leftchar = 0;
Jack Jansen72781191995-08-07 14:34:15 +0000500 PyObject *rv;
Thomas Woutersf98db652006-03-01 21:37:32 +0000501 Py_ssize_t len;
Jack Jansen72781191995-08-07 14:34:15 +0000502 int done = 0;
Tim Peters934c1a12002-07-02 22:24:50 +0000503
Guido van Rossum43713e52000-02-29 13:59:29 +0000504 if ( !PyArg_ParseTuple(args, "t#:a2b_hqx", &ascii_data, &len) )
Jack Jansen72781191995-08-07 14:34:15 +0000505 return NULL;
506
Raymond Hettinger658717e2004-09-06 22:58:37 +0000507 /* Allocate a string that is too big (fixed later)
508 Add two to the initial length to prevent interning which
509 would preclude subsequent resizing. */
Guido van Rossum0e225aa2007-05-22 20:24:57 +0000510 if ( (rv=PyBytes_FromStringAndSize(NULL, len+2)) == NULL )
Jack Jansen72781191995-08-07 14:34:15 +0000511 return NULL;
Guido van Rossum0e225aa2007-05-22 20:24:57 +0000512 bin_data = (unsigned char *)PyBytes_AS_STRING(rv);
Jack Jansen72781191995-08-07 14:34:15 +0000513
514 for( ; len > 0 ; len--, ascii_data++ ) {
515 /* Get the byte and look it up */
516 this_ch = table_a2b_hqx[*ascii_data];
517 if ( this_ch == SKIP )
518 continue;
519 if ( this_ch == FAIL ) {
520 PyErr_SetString(Error, "Illegal char");
521 Py_DECREF(rv);
522 return NULL;
523 }
524 if ( this_ch == DONE ) {
525 /* The terminating colon */
526 done = 1;
527 break;
528 }
529
530 /* Shift it into the buffer and see if any bytes are ready */
531 leftchar = (leftchar << 6) | (this_ch);
532 leftbits += 6;
533 if ( leftbits >= 8 ) {
534 leftbits -= 8;
535 *bin_data++ = (leftchar >> leftbits) & 0xff;
536 leftchar &= ((1 << leftbits) - 1);
537 }
538 }
Tim Peters934c1a12002-07-02 22:24:50 +0000539
Jack Jansen72781191995-08-07 14:34:15 +0000540 if ( leftbits && !done ) {
541 PyErr_SetString(Incomplete,
542 "String has incomplete number of bytes");
543 Py_DECREF(rv);
544 return NULL;
545 }
Guido van Rossum0e225aa2007-05-22 20:24:57 +0000546 if (PyBytes_Resize(rv,
547 (bin_data -
548 (unsigned char *)PyBytes_AS_STRING(rv))) < 0) {
549 Py_DECREF(rv);
550 rv = NULL;
551 }
Guido van Rossum9c6ba5e1997-01-12 20:02:04 +0000552 if (rv) {
553 PyObject *rrv = Py_BuildValue("Oi", rv, done);
554 Py_DECREF(rv);
555 return rrv;
556 }
Roger E. Masse5f4ce181997-01-16 17:10:22 +0000557
Jack Jansen72781191995-08-07 14:34:15 +0000558 return NULL;
559}
560
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000561PyDoc_STRVAR(doc_rlecode_hqx, "Binhex RLE-code binary data");
Jack Jansen72781191995-08-07 14:34:15 +0000562
563static PyObject *
Peter Schneider-Kampa788a7f2000-07-10 09:57:19 +0000564binascii_rlecode_hqx(PyObject *self, PyObject *args)
Jack Jansen72781191995-08-07 14:34:15 +0000565{
566 unsigned char *in_data, *out_data;
567 PyObject *rv;
568 unsigned char ch;
Thomas Woutersf98db652006-03-01 21:37:32 +0000569 Py_ssize_t in, inend, len;
Tim Peters934c1a12002-07-02 22:24:50 +0000570
Guido van Rossum43713e52000-02-29 13:59:29 +0000571 if ( !PyArg_ParseTuple(args, "s#:rlecode_hqx", &in_data, &len) )
Jack Jansen72781191995-08-07 14:34:15 +0000572 return NULL;
573
574 /* Worst case: output is twice as big as input (fixed later) */
Guido van Rossum0e225aa2007-05-22 20:24:57 +0000575 if ( (rv=PyBytes_FromStringAndSize(NULL, len*2+2)) == NULL )
Jack Jansen72781191995-08-07 14:34:15 +0000576 return NULL;
Guido van Rossum0e225aa2007-05-22 20:24:57 +0000577 out_data = (unsigned char *)PyBytes_AS_STRING(rv);
Tim Peters934c1a12002-07-02 22:24:50 +0000578
Jack Jansen72781191995-08-07 14:34:15 +0000579 for( in=0; in<len; in++) {
580 ch = in_data[in];
581 if ( ch == RUNCHAR ) {
582 /* RUNCHAR. Escape it. */
583 *out_data++ = RUNCHAR;
584 *out_data++ = 0;
585 } else {
586 /* Check how many following are the same */
587 for(inend=in+1;
588 inend<len && in_data[inend] == ch &&
Roger E. Masse5f4ce181997-01-16 17:10:22 +0000589 inend < in+255;
Jack Jansen72781191995-08-07 14:34:15 +0000590 inend++) ;
Jack Jansen0223aa11995-08-31 13:44:23 +0000591 if ( inend - in > 3 ) {
592 /* More than 3 in a row. Output RLE. */
Jack Jansen72781191995-08-07 14:34:15 +0000593 *out_data++ = ch;
594 *out_data++ = RUNCHAR;
595 *out_data++ = inend-in;
596 in = inend-1;
597 } else {
598 /* Less than 3. Output the byte itself */
599 *out_data++ = ch;
600 }
601 }
602 }
Guido van Rossum0e225aa2007-05-22 20:24:57 +0000603 if (PyBytes_Resize(rv,
604 (out_data -
605 (unsigned char *)PyBytes_AS_STRING(rv))) < 0) {
606 Py_DECREF(rv);
607 rv = NULL;
608 }
Jack Jansen72781191995-08-07 14:34:15 +0000609 return rv;
610}
611
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000612PyDoc_STRVAR(doc_b2a_hqx, "Encode .hqx data");
Tim Peters934c1a12002-07-02 22:24:50 +0000613
Jack Jansen72781191995-08-07 14:34:15 +0000614static PyObject *
Peter Schneider-Kampa788a7f2000-07-10 09:57:19 +0000615binascii_b2a_hqx(PyObject *self, PyObject *args)
Jack Jansen72781191995-08-07 14:34:15 +0000616{
617 unsigned char *ascii_data, *bin_data;
618 int leftbits = 0;
619 unsigned char this_ch;
Jack Janseneaeb1c81995-08-14 12:17:57 +0000620 unsigned int leftchar = 0;
Jack Jansen72781191995-08-07 14:34:15 +0000621 PyObject *rv;
Thomas Woutersf98db652006-03-01 21:37:32 +0000622 Py_ssize_t len;
Tim Peters934c1a12002-07-02 22:24:50 +0000623
Guido van Rossum43713e52000-02-29 13:59:29 +0000624 if ( !PyArg_ParseTuple(args, "s#:b2a_hqx", &bin_data, &len) )
Jack Jansen72781191995-08-07 14:34:15 +0000625 return NULL;
626
627 /* Allocate a buffer that is at least large enough */
Guido van Rossum0e225aa2007-05-22 20:24:57 +0000628 if ( (rv=PyBytes_FromStringAndSize(NULL, len*2+2)) == NULL )
Jack Jansen72781191995-08-07 14:34:15 +0000629 return NULL;
Guido van Rossum0e225aa2007-05-22 20:24:57 +0000630 ascii_data = (unsigned char *)PyBytes_AS_STRING(rv);
Tim Peters934c1a12002-07-02 22:24:50 +0000631
Jack Jansen72781191995-08-07 14:34:15 +0000632 for( ; len > 0 ; len--, bin_data++ ) {
633 /* Shift into our buffer, and output any 6bits ready */
634 leftchar = (leftchar << 8) | *bin_data;
635 leftbits += 8;
636 while ( leftbits >= 6 ) {
637 this_ch = (leftchar >> (leftbits-6)) & 0x3f;
638 leftbits -= 6;
639 *ascii_data++ = table_b2a_hqx[this_ch];
640 }
641 }
642 /* Output a possible runt byte */
643 if ( leftbits ) {
644 leftchar <<= (6-leftbits);
645 *ascii_data++ = table_b2a_hqx[leftchar & 0x3f];
646 }
Guido van Rossum0e225aa2007-05-22 20:24:57 +0000647 if (PyBytes_Resize(rv,
648 (ascii_data -
649 (unsigned char *)PyBytes_AS_STRING(rv))) < 0) {
650 Py_DECREF(rv);
651 rv = NULL;
652 }
Jack Jansen72781191995-08-07 14:34:15 +0000653 return rv;
654}
655
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000656PyDoc_STRVAR(doc_rledecode_hqx, "Decode hexbin RLE-coded string");
Tim Peters934c1a12002-07-02 22:24:50 +0000657
Jack Jansen72781191995-08-07 14:34:15 +0000658static PyObject *
Peter Schneider-Kampa788a7f2000-07-10 09:57:19 +0000659binascii_rledecode_hqx(PyObject *self, PyObject *args)
Jack Jansen72781191995-08-07 14:34:15 +0000660{
661 unsigned char *in_data, *out_data;
662 unsigned char in_byte, in_repeat;
663 PyObject *rv;
Thomas Woutersf98db652006-03-01 21:37:32 +0000664 Py_ssize_t in_len, out_len, out_len_left;
Jack Jansen72781191995-08-07 14:34:15 +0000665
Guido van Rossum43713e52000-02-29 13:59:29 +0000666 if ( !PyArg_ParseTuple(args, "s#:rledecode_hqx", &in_data, &in_len) )
Jack Jansen72781191995-08-07 14:34:15 +0000667 return NULL;
668
669 /* Empty string is a special case */
670 if ( in_len == 0 )
Guido van Rossum0e225aa2007-05-22 20:24:57 +0000671 return PyBytes_FromStringAndSize("", 0);
Jack Jansen72781191995-08-07 14:34:15 +0000672
673 /* Allocate a buffer of reasonable size. Resized when needed */
674 out_len = in_len*2;
Guido van Rossum0e225aa2007-05-22 20:24:57 +0000675 if ( (rv=PyBytes_FromStringAndSize(NULL, out_len)) == NULL )
Jack Jansen72781191995-08-07 14:34:15 +0000676 return NULL;
677 out_len_left = out_len;
Guido van Rossum0e225aa2007-05-22 20:24:57 +0000678 out_data = (unsigned char *)PyBytes_AS_STRING(rv);
Jack Jansen72781191995-08-07 14:34:15 +0000679
680 /*
681 ** We need two macros here to get/put bytes and handle
682 ** end-of-buffer for input and output strings.
683 */
684#define INBYTE(b) \
Roger E. Masse5f4ce181997-01-16 17:10:22 +0000685 do { \
686 if ( --in_len < 0 ) { \
687 PyErr_SetString(Incomplete, ""); \
688 Py_DECREF(rv); \
689 return NULL; \
690 } \
691 b = *in_data++; \
692 } while(0)
Tim Peters934c1a12002-07-02 22:24:50 +0000693
Jack Jansen72781191995-08-07 14:34:15 +0000694#define OUTBYTE(b) \
Roger E. Masse5f4ce181997-01-16 17:10:22 +0000695 do { \
696 if ( --out_len_left < 0 ) { \
Guido van Rossum0e225aa2007-05-22 20:24:57 +0000697 if (PyBytes_Resize(rv, 2*out_len) < 0) \
698 { Py_DECREF(rv); return NULL; } \
699 out_data = (unsigned char *)PyBytes_AS_STRING(rv) \
Roger E. Masse5f4ce181997-01-16 17:10:22 +0000700 + out_len; \
701 out_len_left = out_len-1; \
702 out_len = out_len * 2; \
703 } \
704 *out_data++ = b; \
705 } while(0)
Jack Jansen72781191995-08-07 14:34:15 +0000706
Roger E. Masse5f4ce181997-01-16 17:10:22 +0000707 /*
708 ** Handle first byte separately (since we have to get angry
709 ** in case of an orphaned RLE code).
710 */
711 INBYTE(in_byte);
Jack Jansen72781191995-08-07 14:34:15 +0000712
713 if (in_byte == RUNCHAR) {
714 INBYTE(in_repeat);
715 if (in_repeat != 0) {
716 /* Note Error, not Incomplete (which is at the end
717 ** of the string only). This is a programmer error.
718 */
719 PyErr_SetString(Error, "Orphaned RLE code at start");
720 Py_DECREF(rv);
721 return NULL;
722 }
723 OUTBYTE(RUNCHAR);
724 } else {
725 OUTBYTE(in_byte);
726 }
Tim Peters934c1a12002-07-02 22:24:50 +0000727
Jack Jansen72781191995-08-07 14:34:15 +0000728 while( in_len > 0 ) {
729 INBYTE(in_byte);
730
731 if (in_byte == RUNCHAR) {
732 INBYTE(in_repeat);
733 if ( in_repeat == 0 ) {
734 /* Just an escaped RUNCHAR value */
735 OUTBYTE(RUNCHAR);
736 } else {
737 /* Pick up value and output a sequence of it */
738 in_byte = out_data[-1];
739 while ( --in_repeat > 0 )
740 OUTBYTE(in_byte);
741 }
742 } else {
743 /* Normal byte */
744 OUTBYTE(in_byte);
745 }
746 }
Guido van Rossum0e225aa2007-05-22 20:24:57 +0000747 if (PyBytes_Resize(rv,
748 (out_data -
749 (unsigned char *)PyBytes_AS_STRING(rv))) < 0) {
750 Py_DECREF(rv);
751 rv = NULL;
752 }
Jack Jansen72781191995-08-07 14:34:15 +0000753 return rv;
754}
755
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000756PyDoc_STRVAR(doc_crc_hqx,
757"(data, oldcrc) -> newcrc. Compute hqx CRC incrementally");
Jack Jansen72781191995-08-07 14:34:15 +0000758
759static PyObject *
Peter Schneider-Kampa788a7f2000-07-10 09:57:19 +0000760binascii_crc_hqx(PyObject *self, PyObject *args)
Jack Jansen72781191995-08-07 14:34:15 +0000761{
762 unsigned char *bin_data;
763 unsigned int crc;
Thomas Woutersf98db652006-03-01 21:37:32 +0000764 Py_ssize_t len;
Tim Peters934c1a12002-07-02 22:24:50 +0000765
Guido van Rossum43713e52000-02-29 13:59:29 +0000766 if ( !PyArg_ParseTuple(args, "s#i:crc_hqx", &bin_data, &len, &crc) )
Jack Jansen72781191995-08-07 14:34:15 +0000767 return NULL;
768
769 while(len--) {
770 crc=((crc<<8)&0xff00)^crctab_hqx[((crc>>8)&0xff)^*bin_data++];
771 }
772
773 return Py_BuildValue("i", crc);
774}
775
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000776PyDoc_STRVAR(doc_crc32,
777"(data, oldcrc = 0) -> newcrc. Compute CRC-32 incrementally");
Guido van Rossum7d47c9e2000-02-16 21:11:52 +0000778
779/* Crc - 32 BIT ANSI X3.66 CRC checksum files
780 Also known as: ISO 3307
781**********************************************************************|
782* *|
783* Demonstration program to compute the 32-bit CRC used as the frame *|
784* check sequence in ADCCP (ANSI X3.66, also known as FIPS PUB 71 *|
785* and FED-STD-1003, the U.S. versions of CCITT's X.25 link-level *|
786* protocol). The 32-bit FCS was added via the Federal Register, *|
787* 1 June 1982, p.23798. I presume but don't know for certain that *|
788* this polynomial is or will be included in CCITT V.41, which *|
789* defines the 16-bit CRC (often called CRC-CCITT) polynomial. FIPS *|
790* PUB 78 says that the 32-bit FCS reduces otherwise undetected *|
791* errors by a factor of 10^-5 over 16-bit FCS. *|
792* *|
793**********************************************************************|
794
795 Copyright (C) 1986 Gary S. Brown. You may use this program, or
796 code or tables extracted from it, as desired without restriction.
Guido van Rossum7d47c9e2000-02-16 21:11:52 +0000797
Tim Peters934c1a12002-07-02 22:24:50 +0000798 First, the polynomial itself and its table of feedback terms. The
799 polynomial is
800 X^32+X^26+X^23+X^22+X^16+X^12+X^11+X^10+X^8+X^7+X^5+X^4+X^2+X^1+X^0
801 Note that we take it "backwards" and put the highest-order term in
802 the lowest-order bit. The X^32 term is "implied"; the LSB is the
803 X^31 term, etc. The X^0 term (usually shown as "+1") results in
804 the MSB being 1.
Guido van Rossum7d47c9e2000-02-16 21:11:52 +0000805
Tim Peters934c1a12002-07-02 22:24:50 +0000806 Note that the usual hardware shift register implementation, which
807 is what we're using (we're merely optimizing it by doing eight-bit
808 chunks at a time) shifts bits into the lowest-order term. In our
809 implementation, that means shifting towards the right. Why do we
810 do it this way? Because the calculated CRC must be transmitted in
811 order from highest-order term to lowest-order term. UARTs transmit
812 characters in order from LSB to MSB. By storing the CRC this way,
813 we hand it to the UART in the order low-byte to high-byte; the UART
814 sends each low-bit to hight-bit; and the result is transmission bit
815 by bit from highest- to lowest-order term without requiring any bit
816 shuffling on our part. Reception works similarly.
817
818 The feedback terms table consists of 256, 32-bit entries. Notes:
819
820 1. The table can be generated at runtime if desired; code to do so
821 is shown later. It might not be obvious, but the feedback
822 terms simply represent the results of eight shift/xor opera-
823 tions for all combinations of data and CRC register values.
824
825 2. The CRC accumulation logic is the same for all CRC polynomials,
826 be they sixteen or thirty-two bits wide. You simply choose the
827 appropriate table. Alternatively, because the table can be
828 generated at runtime, you can start by generating the table for
829 the polynomial in question and use exactly the same "updcrc",
830 if your application needn't simultaneously handle two CRC
831 polynomials. (Note, however, that XMODEM is strange.)
832
833 3. For 16-bit CRCs, the table entries need be only 16 bits wide;
834 of course, 32-bit entries work OK if the high 16 bits are zero.
835
836 4. The values must be right-shifted by eight bits by the "updcrc"
837 logic; the shift must be unsigned (bring in zeroes). On some
838 hardware you could probably optimize the shift in assembler by
839 using byte-swap instructions.
Guido van Rossum7d47c9e2000-02-16 21:11:52 +0000840********************************************************************/
841
842static unsigned long crc_32_tab[256] = {
8430x00000000UL, 0x77073096UL, 0xee0e612cUL, 0x990951baUL, 0x076dc419UL,
8440x706af48fUL, 0xe963a535UL, 0x9e6495a3UL, 0x0edb8832UL, 0x79dcb8a4UL,
8450xe0d5e91eUL, 0x97d2d988UL, 0x09b64c2bUL, 0x7eb17cbdUL, 0xe7b82d07UL,
8460x90bf1d91UL, 0x1db71064UL, 0x6ab020f2UL, 0xf3b97148UL, 0x84be41deUL,
8470x1adad47dUL, 0x6ddde4ebUL, 0xf4d4b551UL, 0x83d385c7UL, 0x136c9856UL,
8480x646ba8c0UL, 0xfd62f97aUL, 0x8a65c9ecUL, 0x14015c4fUL, 0x63066cd9UL,
8490xfa0f3d63UL, 0x8d080df5UL, 0x3b6e20c8UL, 0x4c69105eUL, 0xd56041e4UL,
8500xa2677172UL, 0x3c03e4d1UL, 0x4b04d447UL, 0xd20d85fdUL, 0xa50ab56bUL,
8510x35b5a8faUL, 0x42b2986cUL, 0xdbbbc9d6UL, 0xacbcf940UL, 0x32d86ce3UL,
8520x45df5c75UL, 0xdcd60dcfUL, 0xabd13d59UL, 0x26d930acUL, 0x51de003aUL,
8530xc8d75180UL, 0xbfd06116UL, 0x21b4f4b5UL, 0x56b3c423UL, 0xcfba9599UL,
8540xb8bda50fUL, 0x2802b89eUL, 0x5f058808UL, 0xc60cd9b2UL, 0xb10be924UL,
8550x2f6f7c87UL, 0x58684c11UL, 0xc1611dabUL, 0xb6662d3dUL, 0x76dc4190UL,
8560x01db7106UL, 0x98d220bcUL, 0xefd5102aUL, 0x71b18589UL, 0x06b6b51fUL,
8570x9fbfe4a5UL, 0xe8b8d433UL, 0x7807c9a2UL, 0x0f00f934UL, 0x9609a88eUL,
8580xe10e9818UL, 0x7f6a0dbbUL, 0x086d3d2dUL, 0x91646c97UL, 0xe6635c01UL,
8590x6b6b51f4UL, 0x1c6c6162UL, 0x856530d8UL, 0xf262004eUL, 0x6c0695edUL,
8600x1b01a57bUL, 0x8208f4c1UL, 0xf50fc457UL, 0x65b0d9c6UL, 0x12b7e950UL,
8610x8bbeb8eaUL, 0xfcb9887cUL, 0x62dd1ddfUL, 0x15da2d49UL, 0x8cd37cf3UL,
8620xfbd44c65UL, 0x4db26158UL, 0x3ab551ceUL, 0xa3bc0074UL, 0xd4bb30e2UL,
8630x4adfa541UL, 0x3dd895d7UL, 0xa4d1c46dUL, 0xd3d6f4fbUL, 0x4369e96aUL,
8640x346ed9fcUL, 0xad678846UL, 0xda60b8d0UL, 0x44042d73UL, 0x33031de5UL,
8650xaa0a4c5fUL, 0xdd0d7cc9UL, 0x5005713cUL, 0x270241aaUL, 0xbe0b1010UL,
8660xc90c2086UL, 0x5768b525UL, 0x206f85b3UL, 0xb966d409UL, 0xce61e49fUL,
8670x5edef90eUL, 0x29d9c998UL, 0xb0d09822UL, 0xc7d7a8b4UL, 0x59b33d17UL,
8680x2eb40d81UL, 0xb7bd5c3bUL, 0xc0ba6cadUL, 0xedb88320UL, 0x9abfb3b6UL,
8690x03b6e20cUL, 0x74b1d29aUL, 0xead54739UL, 0x9dd277afUL, 0x04db2615UL,
8700x73dc1683UL, 0xe3630b12UL, 0x94643b84UL, 0x0d6d6a3eUL, 0x7a6a5aa8UL,
8710xe40ecf0bUL, 0x9309ff9dUL, 0x0a00ae27UL, 0x7d079eb1UL, 0xf00f9344UL,
8720x8708a3d2UL, 0x1e01f268UL, 0x6906c2feUL, 0xf762575dUL, 0x806567cbUL,
8730x196c3671UL, 0x6e6b06e7UL, 0xfed41b76UL, 0x89d32be0UL, 0x10da7a5aUL,
8740x67dd4accUL, 0xf9b9df6fUL, 0x8ebeeff9UL, 0x17b7be43UL, 0x60b08ed5UL,
8750xd6d6a3e8UL, 0xa1d1937eUL, 0x38d8c2c4UL, 0x4fdff252UL, 0xd1bb67f1UL,
8760xa6bc5767UL, 0x3fb506ddUL, 0x48b2364bUL, 0xd80d2bdaUL, 0xaf0a1b4cUL,
8770x36034af6UL, 0x41047a60UL, 0xdf60efc3UL, 0xa867df55UL, 0x316e8eefUL,
8780x4669be79UL, 0xcb61b38cUL, 0xbc66831aUL, 0x256fd2a0UL, 0x5268e236UL,
8790xcc0c7795UL, 0xbb0b4703UL, 0x220216b9UL, 0x5505262fUL, 0xc5ba3bbeUL,
8800xb2bd0b28UL, 0x2bb45a92UL, 0x5cb36a04UL, 0xc2d7ffa7UL, 0xb5d0cf31UL,
8810x2cd99e8bUL, 0x5bdeae1dUL, 0x9b64c2b0UL, 0xec63f226UL, 0x756aa39cUL,
8820x026d930aUL, 0x9c0906a9UL, 0xeb0e363fUL, 0x72076785UL, 0x05005713UL,
8830x95bf4a82UL, 0xe2b87a14UL, 0x7bb12baeUL, 0x0cb61b38UL, 0x92d28e9bUL,
8840xe5d5be0dUL, 0x7cdcefb7UL, 0x0bdbdf21UL, 0x86d3d2d4UL, 0xf1d4e242UL,
8850x68ddb3f8UL, 0x1fda836eUL, 0x81be16cdUL, 0xf6b9265bUL, 0x6fb077e1UL,
8860x18b74777UL, 0x88085ae6UL, 0xff0f6a70UL, 0x66063bcaUL, 0x11010b5cUL,
8870x8f659effUL, 0xf862ae69UL, 0x616bffd3UL, 0x166ccf45UL, 0xa00ae278UL,
8880xd70dd2eeUL, 0x4e048354UL, 0x3903b3c2UL, 0xa7672661UL, 0xd06016f7UL,
8890x4969474dUL, 0x3e6e77dbUL, 0xaed16a4aUL, 0xd9d65adcUL, 0x40df0b66UL,
8900x37d83bf0UL, 0xa9bcae53UL, 0xdebb9ec5UL, 0x47b2cf7fUL, 0x30b5ffe9UL,
8910xbdbdf21cUL, 0xcabac28aUL, 0x53b39330UL, 0x24b4a3a6UL, 0xbad03605UL,
8920xcdd70693UL, 0x54de5729UL, 0x23d967bfUL, 0xb3667a2eUL, 0xc4614ab8UL,
8930x5d681b02UL, 0x2a6f2b94UL, 0xb40bbe37UL, 0xc30c8ea1UL, 0x5a05df1bUL,
8940x2d02ef8dUL
895};
896
897static PyObject *
Peter Schneider-Kampa788a7f2000-07-10 09:57:19 +0000898binascii_crc32(PyObject *self, PyObject *args)
Guido van Rossum7d47c9e2000-02-16 21:11:52 +0000899{ /* By Jim Ahlstrom; All rights transferred to CNRI */
900 unsigned char *bin_data;
901 unsigned long crc = 0UL; /* initial value of CRC */
Thomas Woutersf98db652006-03-01 21:37:32 +0000902 Py_ssize_t len;
Tim Petersa98011c2002-07-02 20:20:08 +0000903 long result;
Tim Peters934c1a12002-07-02 22:24:50 +0000904
Guido van Rossum43713e52000-02-29 13:59:29 +0000905 if ( !PyArg_ParseTuple(args, "s#|l:crc32", &bin_data, &len, &crc) )
Guido van Rossum7d47c9e2000-02-16 21:11:52 +0000906 return NULL;
907
Tim Peters934c1a12002-07-02 22:24:50 +0000908 crc = ~ crc;
909#if SIZEOF_LONG > 4
910 /* only want the trailing 32 bits */
911 crc &= 0xFFFFFFFFUL;
912#endif
913 while (len--)
Guido van Rossum7d47c9e2000-02-16 21:11:52 +0000914 crc = crc_32_tab[(crc ^ *bin_data++) & 0xffUL] ^ (crc >> 8);
915 /* Note: (crc >> 8) MUST zero fill on left */
Tim Petersa98011c2002-07-02 20:20:08 +0000916
917 result = (long)(crc ^ 0xFFFFFFFFUL);
Tim Peters934c1a12002-07-02 22:24:50 +0000918#if SIZEOF_LONG > 4
919 /* Extend the sign bit. This is one way to ensure the result is the
920 * same across platforms. The other way would be to return an
921 * unbounded unsigned long, but the evidence suggests that lots of
922 * code outside this treats the result as if it were a signed 4-byte
923 * integer.
Tim Petersa98011c2002-07-02 20:20:08 +0000924 */
925 result |= -(result & (1L << 31));
Tim Peters934c1a12002-07-02 22:24:50 +0000926#endif
Tim Petersa98011c2002-07-02 20:20:08 +0000927 return PyInt_FromLong(result);
Guido van Rossum7d47c9e2000-02-16 21:11:52 +0000928}
929
Barry Warsawe977c212000-08-15 06:07:13 +0000930
931static PyObject *
932binascii_hexlify(PyObject *self, PyObject *args)
933{
934 char* argbuf;
Thomas Woutersf98db652006-03-01 21:37:32 +0000935 Py_ssize_t arglen;
Barry Warsawe977c212000-08-15 06:07:13 +0000936 PyObject *retval;
937 char* retbuf;
Thomas Woutersf98db652006-03-01 21:37:32 +0000938 Py_ssize_t i, j;
Barry Warsawe977c212000-08-15 06:07:13 +0000939
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000940 if (!PyArg_ParseTuple(args, "s#:b2a_hex", &argbuf, &arglen))
Barry Warsawe977c212000-08-15 06:07:13 +0000941 return NULL;
942
Guido van Rossum0e225aa2007-05-22 20:24:57 +0000943 retval = PyBytes_FromStringAndSize(NULL, arglen*2);
Barry Warsawe977c212000-08-15 06:07:13 +0000944 if (!retval)
945 return NULL;
Guido van Rossum0e225aa2007-05-22 20:24:57 +0000946 retbuf = PyBytes_AS_STRING(retval);
Barry Warsawe977c212000-08-15 06:07:13 +0000947
948 /* make hex version of string, taken from shamodule.c */
949 for (i=j=0; i < arglen; i++) {
950 char c;
951 c = (argbuf[i] >> 4) & 0xf;
952 c = (c>9) ? c+'a'-10 : c + '0';
953 retbuf[j++] = c;
954 c = argbuf[i] & 0xf;
955 c = (c>9) ? c+'a'-10 : c + '0';
956 retbuf[j++] = c;
957 }
958 return retval;
Barry Warsawe977c212000-08-15 06:07:13 +0000959}
960
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000961PyDoc_STRVAR(doc_hexlify,
Barry Warsawe977c212000-08-15 06:07:13 +0000962"b2a_hex(data) -> s; Hexadecimal representation of binary data.\n\
963\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000964This function is also available as \"hexlify()\".");
Barry Warsawe977c212000-08-15 06:07:13 +0000965
966
967static int
Tim Peters934c1a12002-07-02 22:24:50 +0000968to_int(int c)
Barry Warsawe977c212000-08-15 06:07:13 +0000969{
970 if (isdigit(c))
971 return c - '0';
972 else {
973 if (isupper(c))
974 c = tolower(c);
975 if (c >= 'a' && c <= 'f')
976 return c - 'a' + 10;
977 }
978 return -1;
979}
980
981
982static PyObject *
983binascii_unhexlify(PyObject *self, PyObject *args)
984{
985 char* argbuf;
Thomas Woutersf98db652006-03-01 21:37:32 +0000986 Py_ssize_t arglen;
Barry Warsawe977c212000-08-15 06:07:13 +0000987 PyObject *retval;
988 char* retbuf;
Thomas Woutersf98db652006-03-01 21:37:32 +0000989 Py_ssize_t i, j;
Barry Warsawe977c212000-08-15 06:07:13 +0000990
991 if (!PyArg_ParseTuple(args, "s#:a2b_hex", &argbuf, &arglen))
992 return NULL;
993
Barry Warsaw16168472000-08-15 06:59:58 +0000994 /* XXX What should we do about strings with an odd length? Should
995 * we add an implicit leading zero, or a trailing zero? For now,
996 * raise an exception.
Barry Warsawe977c212000-08-15 06:07:13 +0000997 */
998 if (arglen % 2) {
Guido van Rossum4581ae52007-05-22 21:56:47 +0000999 PyErr_SetString(Error, "Odd-length string");
Barry Warsawe977c212000-08-15 06:07:13 +00001000 return NULL;
1001 }
1002
Guido van Rossum0e225aa2007-05-22 20:24:57 +00001003 retval = PyBytes_FromStringAndSize(NULL, (arglen/2));
Barry Warsawe977c212000-08-15 06:07:13 +00001004 if (!retval)
1005 return NULL;
Guido van Rossum0e225aa2007-05-22 20:24:57 +00001006 retbuf = PyBytes_AS_STRING(retval);
Barry Warsawe977c212000-08-15 06:07:13 +00001007
1008 for (i=j=0; i < arglen; i += 2) {
1009 int top = to_int(Py_CHARMASK(argbuf[i]));
1010 int bot = to_int(Py_CHARMASK(argbuf[i+1]));
1011 if (top == -1 || bot == -1) {
Guido van Rossum4581ae52007-05-22 21:56:47 +00001012 PyErr_SetString(Error,
Barry Warsaw16168472000-08-15 06:59:58 +00001013 "Non-hexadecimal digit found");
Barry Warsawe977c212000-08-15 06:07:13 +00001014 goto finally;
1015 }
1016 retbuf[j++] = (top << 4) + bot;
1017 }
1018 return retval;
1019
1020 finally:
1021 Py_DECREF(retval);
1022 return NULL;
1023}
1024
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001025PyDoc_STRVAR(doc_unhexlify,
Barry Warsawe977c212000-08-15 06:07:13 +00001026"a2b_hex(hexstr) -> s; Binary data of hexadecimal representation.\n\
1027\n\
1028hexstr must contain an even number of hex digits (upper or lower case).\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001029This function is also available as \"unhexlify()\"");
Barry Warsawe977c212000-08-15 06:07:13 +00001030
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001031static int table_hex[128] = {
1032 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1033 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1034 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1035 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,-1,-1, -1,-1,-1,-1,
1036 -1,10,11,12, 13,14,15,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1037 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1038 -1,10,11,12, 13,14,15,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1039 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1
1040};
1041
1042#define hexval(c) table_hex[(unsigned int)(c)]
1043
1044#define MAXLINESIZE 76
1045
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001046PyDoc_STRVAR(doc_a2b_qp, "Decode a string of qp-encoded data");
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001047
Tim Peters934c1a12002-07-02 22:24:50 +00001048static PyObject*
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001049binascii_a2b_qp(PyObject *self, PyObject *args, PyObject *kwargs)
1050{
Thomas Wouters7087f782006-03-01 23:10:05 +00001051 Py_ssize_t in, out;
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001052 char ch;
1053 unsigned char *data, *odata;
Thomas Woutersf98db652006-03-01 21:37:32 +00001054 Py_ssize_t datalen = 0;
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001055 PyObject *rv;
Martin v. Löwis15e62742006-02-27 16:46:16 +00001056 static char *kwlist[] = {"data", "header", NULL};
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001057 int header = 0;
1058
Tim Peters934c1a12002-07-02 22:24:50 +00001059 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s#|i", kwlist, &data,
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001060 &datalen, &header))
1061 return NULL;
1062
Barry Warsaw23164a52004-05-11 02:05:11 +00001063 /* We allocate the output same size as input, this is overkill.
1064 * The previous implementation used calloc() so we'll zero out the
1065 * memory here too, since PyMem_Malloc() does not guarantee that.
1066 */
1067 odata = (unsigned char *) PyMem_Malloc(datalen);
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001068 if (odata == NULL) {
1069 PyErr_NoMemory();
1070 return NULL;
1071 }
Martin v. Löwisb600fe92004-07-27 15:03:53 +00001072 memset(odata, 0, datalen);
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001073
1074 in = out = 0;
1075 while (in < datalen) {
1076 if (data[in] == '=') {
1077 in++;
1078 if (in >= datalen) break;
1079 /* Soft line breaks */
Thomas Wouters89f507f2006-12-13 04:49:30 +00001080 if ((data[in] == '\n') || (data[in] == '\r')) {
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001081 if (data[in] != '\n') {
1082 while (in < datalen && data[in] != '\n') in++;
1083 }
1084 if (in < datalen) in++;
1085 }
1086 else if (data[in] == '=') {
1087 /* broken case from broken python qp */
1088 odata[out++] = '=';
1089 in++;
1090 }
Tim Peters934c1a12002-07-02 22:24:50 +00001091 else if (((data[in] >= 'A' && data[in] <= 'F') ||
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001092 (data[in] >= 'a' && data[in] <= 'f') ||
1093 (data[in] >= '0' && data[in] <= '9')) &&
1094 ((data[in+1] >= 'A' && data[in+1] <= 'F') ||
1095 (data[in+1] >= 'a' && data[in+1] <= 'f') ||
1096 (data[in+1] >= '0' && data[in+1] <= '9'))) {
1097 /* hexval */
1098 ch = hexval(data[in]) << 4;
1099 in++;
1100 ch |= hexval(data[in]);
1101 in++;
1102 odata[out++] = ch;
1103 }
1104 else {
1105 odata[out++] = '=';
1106 }
1107 }
1108 else if (header && data[in] == '_') {
1109 odata[out++] = ' ';
1110 in++;
1111 }
1112 else {
1113 odata[out] = data[in];
1114 in++;
1115 out++;
1116 }
1117 }
Guido van Rossum0e225aa2007-05-22 20:24:57 +00001118 if ((rv = PyBytes_FromStringAndSize((char *)odata, out)) == NULL) {
Barry Warsaw23164a52004-05-11 02:05:11 +00001119 PyMem_Free(odata);
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001120 return NULL;
1121 }
Barry Warsaw23164a52004-05-11 02:05:11 +00001122 PyMem_Free(odata);
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001123 return rv;
1124}
1125
Tim Peters934c1a12002-07-02 22:24:50 +00001126static int
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001127to_hex (unsigned char ch, unsigned char *s)
1128{
1129 unsigned int uvalue = ch;
1130
1131 s[1] = "0123456789ABCDEF"[uvalue % 16];
1132 uvalue = (uvalue / 16);
1133 s[0] = "0123456789ABCDEF"[uvalue % 16];
1134 return 0;
1135}
1136
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001137PyDoc_STRVAR(doc_b2a_qp,
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001138"b2a_qp(data, quotetabs=0, istext=1, header=0) -> s; \n\
1139 Encode a string using quoted-printable encoding. \n\
1140\n\
1141On encoding, when istext is set, newlines are not encoded, and white \n\
1142space at end of lines is. When istext is not set, \\r and \\n (CR/LF) are \n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001143both encoded. When quotetabs is set, space and tabs are encoded.");
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001144
1145/* XXX: This is ridiculously complicated to be backward compatible
1146 * (mostly) with the quopri module. It doesn't re-create the quopri
1147 * module bug where text ending in CRLF has the CR encoded */
Tim Peters934c1a12002-07-02 22:24:50 +00001148static PyObject*
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001149binascii_b2a_qp (PyObject *self, PyObject *args, PyObject *kwargs)
1150{
Thomas Wouters7087f782006-03-01 23:10:05 +00001151 Py_ssize_t in, out;
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001152 unsigned char *data, *odata;
Thomas Woutersf98db652006-03-01 21:37:32 +00001153 Py_ssize_t datalen = 0, odatalen = 0;
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001154 PyObject *rv;
1155 unsigned int linelen = 0;
Martin v. Löwis15e62742006-02-27 16:46:16 +00001156 static char *kwlist[] = {"data", "quotetabs", "istext",
Jeremy Hyltonaf68c872005-12-10 18:50:16 +00001157 "header", NULL};
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001158 int istext = 1;
1159 int quotetabs = 0;
1160 int header = 0;
1161 unsigned char ch;
1162 int crlf = 0;
1163 unsigned char *p;
1164
Tim Peters934c1a12002-07-02 22:24:50 +00001165 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s#|iii", kwlist, &data,
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001166 &datalen, &quotetabs, &istext, &header))
1167 return NULL;
1168
1169 /* See if this string is using CRLF line ends */
1170 /* XXX: this function has the side effect of converting all of
1171 * the end of lines to be the same depending on this detection
1172 * here */
Walter Dörwald0925e412007-05-09 18:23:50 +00001173 p = (unsigned char *) memchr(data, '\n', datalen);
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001174 if ((p != NULL) && (p > data) && (*(p-1) == '\r'))
1175 crlf = 1;
1176
1177 /* First, scan to see how many characters need to be encoded */
1178 in = 0;
1179 while (in < datalen) {
Tim Peters934c1a12002-07-02 22:24:50 +00001180 if ((data[in] > 126) ||
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001181 (data[in] == '=') ||
1182 (header && data[in] == '_') ||
Guido van Rossumd8faa362007-04-27 19:54:29 +00001183 ((data[in] == '.') && (linelen == 0) &&
1184 (data[in+1] == '\n' || data[in+1] == '\r' || data[in+1] == 0)) ||
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001185 (!istext && ((data[in] == '\r') || (data[in] == '\n'))) ||
1186 ((data[in] == '\t' || data[in] == ' ') && (in + 1 == datalen)) ||
Tim Peters934c1a12002-07-02 22:24:50 +00001187 ((data[in] < 33) &&
1188 (data[in] != '\r') && (data[in] != '\n') &&
Guido van Rossumd8faa362007-04-27 19:54:29 +00001189 (quotetabs ||
1190 (!quotetabs && ((data[in] != '\t') && (data[in] != ' '))))))
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001191 {
1192 if ((linelen + 3) >= MAXLINESIZE) {
1193 linelen = 0;
1194 if (crlf)
1195 odatalen += 3;
1196 else
1197 odatalen += 2;
1198 }
1199 linelen += 3;
1200 odatalen += 3;
1201 in++;
1202 }
1203 else {
Tim Peters934c1a12002-07-02 22:24:50 +00001204 if (istext &&
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001205 ((data[in] == '\n') ||
1206 ((in+1 < datalen) && (data[in] == '\r') &&
1207 (data[in+1] == '\n'))))
1208 {
1209 linelen = 0;
1210 /* Protect against whitespace on end of line */
1211 if (in && ((data[in-1] == ' ') || (data[in-1] == '\t')))
1212 odatalen += 2;
1213 if (crlf)
1214 odatalen += 2;
1215 else
1216 odatalen += 1;
1217 if (data[in] == '\r')
1218 in += 2;
1219 else
1220 in++;
1221 }
1222 else {
Tim Peters934c1a12002-07-02 22:24:50 +00001223 if ((in + 1 != datalen) &&
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001224 (data[in+1] != '\n') &&
1225 (linelen + 1) >= MAXLINESIZE) {
1226 linelen = 0;
1227 if (crlf)
1228 odatalen += 3;
1229 else
1230 odatalen += 2;
1231 }
1232 linelen++;
1233 odatalen++;
1234 in++;
1235 }
1236 }
1237 }
1238
Barry Warsaw23164a52004-05-11 02:05:11 +00001239 /* We allocate the output same size as input, this is overkill.
1240 * The previous implementation used calloc() so we'll zero out the
1241 * memory here too, since PyMem_Malloc() does not guarantee that.
1242 */
1243 odata = (unsigned char *) PyMem_Malloc(odatalen);
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001244 if (odata == NULL) {
1245 PyErr_NoMemory();
1246 return NULL;
1247 }
Martin v. Löwisb600fe92004-07-27 15:03:53 +00001248 memset(odata, 0, odatalen);
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001249
1250 in = out = linelen = 0;
1251 while (in < datalen) {
Tim Peters934c1a12002-07-02 22:24:50 +00001252 if ((data[in] > 126) ||
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001253 (data[in] == '=') ||
1254 (header && data[in] == '_') ||
Guido van Rossumd8faa362007-04-27 19:54:29 +00001255 ((data[in] == '.') && (linelen == 0) &&
1256 (data[in+1] == '\n' || data[in+1] == '\r' || data[in+1] == 0)) ||
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001257 (!istext && ((data[in] == '\r') || (data[in] == '\n'))) ||
1258 ((data[in] == '\t' || data[in] == ' ') && (in + 1 == datalen)) ||
Tim Peters934c1a12002-07-02 22:24:50 +00001259 ((data[in] < 33) &&
1260 (data[in] != '\r') && (data[in] != '\n') &&
Guido van Rossumd8faa362007-04-27 19:54:29 +00001261 (quotetabs ||
1262 (!quotetabs && ((data[in] != '\t') && (data[in] != ' '))))))
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001263 {
1264 if ((linelen + 3 )>= MAXLINESIZE) {
1265 odata[out++] = '=';
1266 if (crlf) odata[out++] = '\r';
1267 odata[out++] = '\n';
1268 linelen = 0;
1269 }
1270 odata[out++] = '=';
1271 to_hex(data[in], &odata[out]);
1272 out += 2;
1273 in++;
1274 linelen += 3;
1275 }
1276 else {
Tim Peters934c1a12002-07-02 22:24:50 +00001277 if (istext &&
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001278 ((data[in] == '\n') ||
1279 ((in+1 < datalen) && (data[in] == '\r') &&
1280 (data[in+1] == '\n'))))
1281 {
1282 linelen = 0;
1283 /* Protect against whitespace on end of line */
1284 if (out && ((odata[out-1] == ' ') || (odata[out-1] == '\t'))) {
1285 ch = odata[out-1];
1286 odata[out-1] = '=';
1287 to_hex(ch, &odata[out]);
1288 out += 2;
1289 }
Tim Peters934c1a12002-07-02 22:24:50 +00001290
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001291 if (crlf) odata[out++] = '\r';
1292 odata[out++] = '\n';
1293 if (data[in] == '\r')
1294 in += 2;
1295 else
1296 in++;
1297 }
1298 else {
Tim Peters934c1a12002-07-02 22:24:50 +00001299 if ((in + 1 != datalen) &&
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001300 (data[in+1] != '\n') &&
1301 (linelen + 1) >= MAXLINESIZE) {
1302 odata[out++] = '=';
1303 if (crlf) odata[out++] = '\r';
1304 odata[out++] = '\n';
1305 linelen = 0;
1306 }
1307 linelen++;
1308 if (header && data[in] == ' ') {
1309 odata[out++] = '_';
1310 in++;
1311 }
1312 else {
1313 odata[out++] = data[in++];
1314 }
1315 }
1316 }
1317 }
Guido van Rossum0e225aa2007-05-22 20:24:57 +00001318 if ((rv = PyBytes_FromStringAndSize((char *)odata, out)) == NULL) {
Barry Warsaw23164a52004-05-11 02:05:11 +00001319 PyMem_Free(odata);
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001320 return NULL;
1321 }
Barry Warsaw23164a52004-05-11 02:05:11 +00001322 PyMem_Free(odata);
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001323 return rv;
1324}
Barry Warsawe977c212000-08-15 06:07:13 +00001325
Jack Jansen72781191995-08-07 14:34:15 +00001326/* List of functions defined in the module */
1327
1328static struct PyMethodDef binascii_module_methods[] = {
Barry Warsawe977c212000-08-15 06:07:13 +00001329 {"a2b_uu", binascii_a2b_uu, METH_VARARGS, doc_a2b_uu},
1330 {"b2a_uu", binascii_b2a_uu, METH_VARARGS, doc_b2a_uu},
1331 {"a2b_base64", binascii_a2b_base64, METH_VARARGS, doc_a2b_base64},
1332 {"b2a_base64", binascii_b2a_base64, METH_VARARGS, doc_b2a_base64},
1333 {"a2b_hqx", binascii_a2b_hqx, METH_VARARGS, doc_a2b_hqx},
1334 {"b2a_hqx", binascii_b2a_hqx, METH_VARARGS, doc_b2a_hqx},
1335 {"b2a_hex", binascii_hexlify, METH_VARARGS, doc_hexlify},
1336 {"a2b_hex", binascii_unhexlify, METH_VARARGS, doc_unhexlify},
1337 {"hexlify", binascii_hexlify, METH_VARARGS, doc_hexlify},
1338 {"unhexlify", binascii_unhexlify, METH_VARARGS, doc_unhexlify},
1339 {"rlecode_hqx", binascii_rlecode_hqx, METH_VARARGS, doc_rlecode_hqx},
1340 {"rledecode_hqx", binascii_rledecode_hqx, METH_VARARGS,
1341 doc_rledecode_hqx},
1342 {"crc_hqx", binascii_crc_hqx, METH_VARARGS, doc_crc_hqx},
1343 {"crc32", binascii_crc32, METH_VARARGS, doc_crc32},
Tim Peters934c1a12002-07-02 22:24:50 +00001344 {"a2b_qp", (PyCFunction)binascii_a2b_qp, METH_VARARGS | METH_KEYWORDS,
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001345 doc_a2b_qp},
Tim Peters934c1a12002-07-02 22:24:50 +00001346 {"b2a_qp", (PyCFunction)binascii_b2a_qp, METH_VARARGS | METH_KEYWORDS,
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001347 doc_b2a_qp},
Barry Warsawe977c212000-08-15 06:07:13 +00001348 {NULL, NULL} /* sentinel */
Jack Jansen72781191995-08-07 14:34:15 +00001349};
1350
1351
1352/* Initialization function for the module (*must* be called initbinascii) */
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001353PyDoc_STRVAR(doc_binascii, "Conversion between binary data and ASCII");
Jack Jansen72781191995-08-07 14:34:15 +00001354
Mark Hammondfe51c6d2002-08-02 02:27:13 +00001355PyMODINIT_FUNC
Thomas Woutersf3f33dc2000-07-21 06:00:07 +00001356initbinascii(void)
Jack Jansen72781191995-08-07 14:34:15 +00001357{
1358 PyObject *m, *d, *x;
1359
1360 /* Create the module and add the functions */
1361 m = Py_InitModule("binascii", binascii_module_methods);
Neal Norwitz1ac754f2006-01-19 06:09:39 +00001362 if (m == NULL)
1363 return;
Jack Jansen72781191995-08-07 14:34:15 +00001364
1365 d = PyModule_GetDict(m);
1366 x = PyString_FromString(doc_binascii);
1367 PyDict_SetItemString(d, "__doc__", x);
Guido van Rossum5c159bd1997-08-04 23:55:25 +00001368 Py_XDECREF(x);
Jack Jansen72781191995-08-07 14:34:15 +00001369
Guido van Rossum4581ae52007-05-22 21:56:47 +00001370 Error = PyErr_NewException("binascii.Error", PyExc_ValueError, NULL);
Jack Jansen72781191995-08-07 14:34:15 +00001371 PyDict_SetItemString(d, "Error", Error);
Guido van Rossum7dbb48a1997-10-08 15:26:07 +00001372 Incomplete = PyErr_NewException("binascii.Incomplete", NULL, NULL);
Jack Jansen72781191995-08-07 14:34:15 +00001373 PyDict_SetItemString(d, "Incomplete", Incomplete);
Jack Jansen72781191995-08-07 14:34:15 +00001374}