blob: 04a945e1042bda975480a7d406d7729d99467b14 [file] [log] [blame]
Jack Jansen72781191995-08-07 14:34:15 +00001/*
2** Routines to represent binary data in ASCII and vice-versa
3**
4** This module currently supports the following encodings:
5** uuencode:
6** each line encodes 45 bytes (except possibly the last)
7** First char encodes (binary) length, rest data
8** each char encodes 6 bits, as follows:
9** binary: 01234567 abcdefgh ijklmnop
10** ascii: 012345 67abcd efghij klmnop
11** ASCII encoding method is "excess-space": 000000 is encoded as ' ', etc.
12** short binary data is zero-extended (so the bits are always in the
13** right place), this does *not* reflect in the length.
Jack Jansen84bbc2e1995-10-04 16:38:44 +000014** base64:
15** Line breaks are insignificant, but lines are at most 76 chars
16** each char encodes 6 bits, in similar order as uucode/hqx. Encoding
17** is done via a table.
18** Short binary data is filled (in ASCII) with '='.
Jack Jansen72781191995-08-07 14:34:15 +000019** hqx:
20** File starts with introductory text, real data starts and ends
21** with colons.
22** Data consists of three similar parts: info, datafork, resourcefork.
23** Each part is protected (at the end) with a 16-bit crc
24** The binary data is run-length encoded, and then ascii-fied:
25** binary: 01234567 abcdefgh ijklmnop
26** ascii: 012345 67abcd efghij klmnop
27** ASCII encoding is table-driven, see the code.
28** Short binary data results in the runt ascii-byte being output with
29** the bits in the right place.
30**
31** While I was reading dozens of programs that encode or decode the formats
32** here (documentation? hihi:-) I have formulated Jansen's Observation:
33**
34** Programs that encode binary data in ASCII are written in
35** such a style that they are as unreadable as possible. Devices used
36** include unnecessary global variables, burying important tables
37** in unrelated sourcefiles, putting functions in include files,
38** using seemingly-descriptive variable names for different purposes,
39** calls to empty subroutines and a host of others.
40**
41** I have attempted to break with this tradition, but I guess that that
42** does make the performance sub-optimal. Oh well, too bad...
43**
44** Jack Jansen, CWI, July 1995.
Tim Peters934c1a12002-07-02 22:24:50 +000045**
Martin v. Löwis16dc7f42001-09-30 20:32:11 +000046** Added support for quoted-printable encoding, based on rfc 1521 et al
Tim Peters934c1a12002-07-02 22:24:50 +000047** quoted-printable encoding specifies that non printable characters (anything
Martin v. Löwis16dc7f42001-09-30 20:32:11 +000048** below 32 and above 126) be encoded as =XX where XX is the hexadecimal value
49** of the character. It also specifies some other behavior to enable 8bit data
Tim Peters934c1a12002-07-02 22:24:50 +000050** in a mail message with little difficulty (maximum line sizes, protecting
51** some cases of whitespace, etc).
Martin v. Löwis16dc7f42001-09-30 20:32:11 +000052**
53** Brandon Long, September 2001.
Jack Jansen72781191995-08-07 14:34:15 +000054*/
55
Thomas Wouters9c544482006-03-01 21:59:44 +000056#define PY_SSIZE_T_CLEAN
Jack Jansen72781191995-08-07 14:34:15 +000057
58#include "Python.h"
59
60static PyObject *Error;
61static PyObject *Incomplete;
62
63/*
64** hqx lookup table, ascii->binary.
65*/
66
67#define RUNCHAR 0x90
68
69#define DONE 0x7F
70#define SKIP 0x7E
71#define FAIL 0x7D
72
73static unsigned char table_a2b_hqx[256] = {
74/* ^@ ^A ^B ^C ^D ^E ^F ^G */
75/* 0*/ FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
76/* \b \t \n ^K ^L \r ^N ^O */
77/* 1*/ FAIL, FAIL, SKIP, FAIL, FAIL, SKIP, FAIL, FAIL,
78/* ^P ^Q ^R ^S ^T ^U ^V ^W */
79/* 2*/ FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
80/* ^X ^Y ^Z ^[ ^\ ^] ^^ ^_ */
81/* 3*/ FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
82/* ! " # $ % & ' */
83/* 4*/ FAIL, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
84/* ( ) * + , - . / */
85/* 5*/ 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, FAIL, FAIL,
86/* 0 1 2 3 4 5 6 7 */
87/* 6*/ 0x0D, 0x0E, 0x0F, 0x10, 0x11, 0x12, 0x13, FAIL,
88/* 8 9 : ; < = > ? */
89/* 7*/ 0x14, 0x15, DONE, FAIL, FAIL, FAIL, FAIL, FAIL,
90/* @ A B C D E F G */
91/* 8*/ 0x16, 0x17, 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D,
92/* H I J K L M N O */
93/* 9*/ 0x1E, 0x1F, 0x20, 0x21, 0x22, 0x23, 0x24, FAIL,
94/* P Q R S T U V W */
95/*10*/ 0x25, 0x26, 0x27, 0x28, 0x29, 0x2A, 0x2B, FAIL,
96/* X Y Z [ \ ] ^ _ */
97/*11*/ 0x2C, 0x2D, 0x2E, 0x2F, FAIL, FAIL, FAIL, FAIL,
98/* ` a b c d e f g */
99/*12*/ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, FAIL,
100/* h i j k l m n o */
101/*13*/ 0x37, 0x38, 0x39, 0x3A, 0x3B, 0x3C, FAIL, FAIL,
102/* p q r s t u v w */
103/*14*/ 0x3D, 0x3E, 0x3F, FAIL, FAIL, FAIL, FAIL, FAIL,
104/* x y z { | } ~ ^? */
105/*15*/ FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
106/*16*/ FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
107 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
108 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
109 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
110 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
111 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
112 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
113 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
114 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
115 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
116 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
117 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
118 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
119 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
120 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
121 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
122};
123
124static unsigned char table_b2a_hqx[] =
Roger E. Masse5f4ce181997-01-16 17:10:22 +0000125"!\"#$%&'()*+,-012345689@ABCDEFGHIJKLMNPQRSTUVXYZ[`abcdefhijklmpqr";
Jack Jansen72781191995-08-07 14:34:15 +0000126
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000127static char table_a2b_base64[] = {
128 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
129 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
130 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,62, -1,-1,-1,63,
131 52,53,54,55, 56,57,58,59, 60,61,-1,-1, -1, 0,-1,-1, /* Note PAD->0 */
132 -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10, 11,12,13,14,
133 15,16,17,18, 19,20,21,22, 23,24,25,-1, -1,-1,-1,-1,
134 -1,26,27,28, 29,30,31,32, 33,34,35,36, 37,38,39,40,
135 41,42,43,44, 45,46,47,48, 49,50,51,-1, -1,-1,-1,-1
136};
137
138#define BASE64_PAD '='
Guido van Rossum355bc0c2001-10-30 03:00:52 +0000139
140/* Max binary chunk size; limited only by available memory */
Guido van Rossum0e225aa2007-05-22 20:24:57 +0000141#define BASE64_MAXBIN ((PY_SSIZE_T_MAX - 3) / 2)
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000142
143static unsigned char table_b2a_base64[] =
Roger E. Masse5f4ce181997-01-16 17:10:22 +0000144"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000145
146
147
Jack Jansen72781191995-08-07 14:34:15 +0000148static unsigned short crctab_hqx[256] = {
Roger E. Masse5f4ce181997-01-16 17:10:22 +0000149 0x0000, 0x1021, 0x2042, 0x3063, 0x4084, 0x50a5, 0x60c6, 0x70e7,
150 0x8108, 0x9129, 0xa14a, 0xb16b, 0xc18c, 0xd1ad, 0xe1ce, 0xf1ef,
151 0x1231, 0x0210, 0x3273, 0x2252, 0x52b5, 0x4294, 0x72f7, 0x62d6,
152 0x9339, 0x8318, 0xb37b, 0xa35a, 0xd3bd, 0xc39c, 0xf3ff, 0xe3de,
153 0x2462, 0x3443, 0x0420, 0x1401, 0x64e6, 0x74c7, 0x44a4, 0x5485,
154 0xa56a, 0xb54b, 0x8528, 0x9509, 0xe5ee, 0xf5cf, 0xc5ac, 0xd58d,
155 0x3653, 0x2672, 0x1611, 0x0630, 0x76d7, 0x66f6, 0x5695, 0x46b4,
156 0xb75b, 0xa77a, 0x9719, 0x8738, 0xf7df, 0xe7fe, 0xd79d, 0xc7bc,
157 0x48c4, 0x58e5, 0x6886, 0x78a7, 0x0840, 0x1861, 0x2802, 0x3823,
158 0xc9cc, 0xd9ed, 0xe98e, 0xf9af, 0x8948, 0x9969, 0xa90a, 0xb92b,
159 0x5af5, 0x4ad4, 0x7ab7, 0x6a96, 0x1a71, 0x0a50, 0x3a33, 0x2a12,
160 0xdbfd, 0xcbdc, 0xfbbf, 0xeb9e, 0x9b79, 0x8b58, 0xbb3b, 0xab1a,
161 0x6ca6, 0x7c87, 0x4ce4, 0x5cc5, 0x2c22, 0x3c03, 0x0c60, 0x1c41,
162 0xedae, 0xfd8f, 0xcdec, 0xddcd, 0xad2a, 0xbd0b, 0x8d68, 0x9d49,
163 0x7e97, 0x6eb6, 0x5ed5, 0x4ef4, 0x3e13, 0x2e32, 0x1e51, 0x0e70,
164 0xff9f, 0xefbe, 0xdfdd, 0xcffc, 0xbf1b, 0xaf3a, 0x9f59, 0x8f78,
165 0x9188, 0x81a9, 0xb1ca, 0xa1eb, 0xd10c, 0xc12d, 0xf14e, 0xe16f,
166 0x1080, 0x00a1, 0x30c2, 0x20e3, 0x5004, 0x4025, 0x7046, 0x6067,
167 0x83b9, 0x9398, 0xa3fb, 0xb3da, 0xc33d, 0xd31c, 0xe37f, 0xf35e,
168 0x02b1, 0x1290, 0x22f3, 0x32d2, 0x4235, 0x5214, 0x6277, 0x7256,
169 0xb5ea, 0xa5cb, 0x95a8, 0x8589, 0xf56e, 0xe54f, 0xd52c, 0xc50d,
170 0x34e2, 0x24c3, 0x14a0, 0x0481, 0x7466, 0x6447, 0x5424, 0x4405,
171 0xa7db, 0xb7fa, 0x8799, 0x97b8, 0xe75f, 0xf77e, 0xc71d, 0xd73c,
172 0x26d3, 0x36f2, 0x0691, 0x16b0, 0x6657, 0x7676, 0x4615, 0x5634,
173 0xd94c, 0xc96d, 0xf90e, 0xe92f, 0x99c8, 0x89e9, 0xb98a, 0xa9ab,
174 0x5844, 0x4865, 0x7806, 0x6827, 0x18c0, 0x08e1, 0x3882, 0x28a3,
175 0xcb7d, 0xdb5c, 0xeb3f, 0xfb1e, 0x8bf9, 0x9bd8, 0xabbb, 0xbb9a,
176 0x4a75, 0x5a54, 0x6a37, 0x7a16, 0x0af1, 0x1ad0, 0x2ab3, 0x3a92,
177 0xfd2e, 0xed0f, 0xdd6c, 0xcd4d, 0xbdaa, 0xad8b, 0x9de8, 0x8dc9,
178 0x7c26, 0x6c07, 0x5c64, 0x4c45, 0x3ca2, 0x2c83, 0x1ce0, 0x0cc1,
179 0xef1f, 0xff3e, 0xcf5d, 0xdf7c, 0xaf9b, 0xbfba, 0x8fd9, 0x9ff8,
180 0x6e17, 0x7e36, 0x4e55, 0x5e74, 0x2e93, 0x3eb2, 0x0ed1, 0x1ef0,
Jack Jansen72781191995-08-07 14:34:15 +0000181};
182
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000183PyDoc_STRVAR(doc_a2b_uu, "(ascii) -> bin. Decode a line of uuencoded data");
Jack Jansen72781191995-08-07 14:34:15 +0000184
185static PyObject *
Peter Schneider-Kampa788a7f2000-07-10 09:57:19 +0000186binascii_a2b_uu(PyObject *self, PyObject *args)
Jack Jansen72781191995-08-07 14:34:15 +0000187{
188 unsigned char *ascii_data, *bin_data;
189 int leftbits = 0;
190 unsigned char this_ch;
191 unsigned int leftchar = 0;
192 PyObject *rv;
Thomas Woutersf98db652006-03-01 21:37:32 +0000193 Py_ssize_t ascii_len, bin_len;
Tim Peters934c1a12002-07-02 22:24:50 +0000194
Guido van Rossum43713e52000-02-29 13:59:29 +0000195 if ( !PyArg_ParseTuple(args, "t#:a2b_uu", &ascii_data, &ascii_len) )
Jack Jansen72781191995-08-07 14:34:15 +0000196 return NULL;
197
198 /* First byte: binary data length (in bytes) */
199 bin_len = (*ascii_data++ - ' ') & 077;
200 ascii_len--;
201
202 /* Allocate the buffer */
Guido van Rossum0e225aa2007-05-22 20:24:57 +0000203 if ( (rv=PyBytes_FromStringAndSize(NULL, bin_len)) == NULL )
Jack Jansen72781191995-08-07 14:34:15 +0000204 return NULL;
Guido van Rossum0e225aa2007-05-22 20:24:57 +0000205 bin_data = (unsigned char *)PyBytes_AS_STRING(rv);
Tim Peters934c1a12002-07-02 22:24:50 +0000206
Jack Jansen72781191995-08-07 14:34:15 +0000207 for( ; bin_len > 0 ; ascii_len--, ascii_data++ ) {
Neal Norwitzfe92eef2004-06-06 20:13:10 +0000208 /* XXX is it really best to add NULs if there's no more data */
209 this_ch = (ascii_len > 0) ? *ascii_data : 0;
Jack Jansen72781191995-08-07 14:34:15 +0000210 if ( this_ch == '\n' || this_ch == '\r' || ascii_len <= 0) {
211 /*
212 ** Whitespace. Assume some spaces got eaten at
213 ** end-of-line. (We check this later)
214 */
215 this_ch = 0;
216 } else {
Jack Jansen5d957971995-11-14 10:35:19 +0000217 /* Check the character for legality
Roger E. Masse5f4ce181997-01-16 17:10:22 +0000218 ** The 64 in stead of the expected 63 is because
219 ** there are a few uuencodes out there that use
Guido van Rossum92d89172001-01-09 02:11:57 +0000220 ** '`' as zero instead of space.
Jack Jansen5d957971995-11-14 10:35:19 +0000221 */
222 if ( this_ch < ' ' || this_ch > (' ' + 64)) {
Jack Jansen72781191995-08-07 14:34:15 +0000223 PyErr_SetString(Error, "Illegal char");
224 Py_DECREF(rv);
225 return NULL;
226 }
227 this_ch = (this_ch - ' ') & 077;
228 }
229 /*
230 ** Shift it in on the low end, and see if there's
231 ** a byte ready for output.
232 */
233 leftchar = (leftchar << 6) | (this_ch);
234 leftbits += 6;
235 if ( leftbits >= 8 ) {
236 leftbits -= 8;
237 *bin_data++ = (leftchar >> leftbits) & 0xff;
238 leftchar &= ((1 << leftbits) - 1);
239 bin_len--;
240 }
241 }
242 /*
243 ** Finally, check that if there's anything left on the line
244 ** that it's whitespace only.
245 */
246 while( ascii_len-- > 0 ) {
247 this_ch = *ascii_data++;
Guido van Rossum92d89172001-01-09 02:11:57 +0000248 /* Extra '`' may be written as padding in some cases */
249 if ( this_ch != ' ' && this_ch != ' '+64 &&
Guido van Rossum1243ae71997-07-11 18:36:28 +0000250 this_ch != '\n' && this_ch != '\r' ) {
Jack Jansen72781191995-08-07 14:34:15 +0000251 PyErr_SetString(Error, "Trailing garbage");
252 Py_DECREF(rv);
253 return NULL;
254 }
255 }
256 return rv;
257}
258
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000259PyDoc_STRVAR(doc_b2a_uu, "(bin) -> ascii. Uuencode line of data");
Tim Peters934c1a12002-07-02 22:24:50 +0000260
Jack Jansen72781191995-08-07 14:34:15 +0000261static PyObject *
Peter Schneider-Kampa788a7f2000-07-10 09:57:19 +0000262binascii_b2a_uu(PyObject *self, PyObject *args)
Jack Jansen72781191995-08-07 14:34:15 +0000263{
264 unsigned char *ascii_data, *bin_data;
265 int leftbits = 0;
266 unsigned char this_ch;
267 unsigned int leftchar = 0;
268 PyObject *rv;
Thomas Woutersf98db652006-03-01 21:37:32 +0000269 Py_ssize_t bin_len;
Tim Peters934c1a12002-07-02 22:24:50 +0000270
Guido van Rossum43713e52000-02-29 13:59:29 +0000271 if ( !PyArg_ParseTuple(args, "s#:b2a_uu", &bin_data, &bin_len) )
Jack Jansen72781191995-08-07 14:34:15 +0000272 return NULL;
273 if ( bin_len > 45 ) {
274 /* The 45 is a limit that appears in all uuencode's */
275 PyErr_SetString(Error, "At most 45 bytes at once");
276 return NULL;
277 }
278
279 /* We're lazy and allocate to much (fixed up later) */
Guido van Rossum0e225aa2007-05-22 20:24:57 +0000280 if ( (rv=PyBytes_FromStringAndSize(NULL, bin_len*2+2)) == NULL )
Jack Jansen72781191995-08-07 14:34:15 +0000281 return NULL;
Guido van Rossum0e225aa2007-05-22 20:24:57 +0000282 ascii_data = (unsigned char *)PyBytes_AS_STRING(rv);
Jack Jansen72781191995-08-07 14:34:15 +0000283
284 /* Store the length */
285 *ascii_data++ = ' ' + (bin_len & 077);
Tim Peters934c1a12002-07-02 22:24:50 +0000286
Jack Jansen72781191995-08-07 14:34:15 +0000287 for( ; bin_len > 0 || leftbits != 0 ; bin_len--, bin_data++ ) {
288 /* Shift the data (or padding) into our buffer */
289 if ( bin_len > 0 ) /* Data */
290 leftchar = (leftchar << 8) | *bin_data;
291 else /* Padding */
292 leftchar <<= 8;
293 leftbits += 8;
294
295 /* See if there are 6-bit groups ready */
296 while ( leftbits >= 6 ) {
297 this_ch = (leftchar >> (leftbits-6)) & 0x3f;
298 leftbits -= 6;
299 *ascii_data++ = this_ch + ' ';
300 }
301 }
302 *ascii_data++ = '\n'; /* Append a courtesy newline */
Tim Peters934c1a12002-07-02 22:24:50 +0000303
Guido van Rossum0e225aa2007-05-22 20:24:57 +0000304 if (PyBytes_Resize(rv,
305 (ascii_data -
306 (unsigned char *)PyBytes_AS_STRING(rv))) < 0) {
307 Py_DECREF(rv);
308 rv = NULL;
309 }
Jack Jansen72781191995-08-07 14:34:15 +0000310 return rv;
311}
312
Guido van Rossum2db4f471999-10-19 19:05:14 +0000313
314static int
Thomas Woutersf98db652006-03-01 21:37:32 +0000315binascii_find_valid(unsigned char *s, Py_ssize_t slen, int num)
Guido van Rossum2db4f471999-10-19 19:05:14 +0000316{
Tim Peters934c1a12002-07-02 22:24:50 +0000317 /* Finds & returns the (num+1)th
Guido van Rossum2db4f471999-10-19 19:05:14 +0000318 ** valid character for base64, or -1 if none.
319 */
320
321 int ret = -1;
322 unsigned char c, b64val;
323
324 while ((slen > 0) && (ret == -1)) {
325 c = *s;
326 b64val = table_a2b_base64[c & 0x7f];
327 if ( ((c <= 0x7f) && (b64val != (unsigned char)-1)) ) {
328 if (num == 0)
329 ret = *s;
330 num--;
331 }
332
333 s++;
334 slen--;
335 }
336 return ret;
337}
338
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000339PyDoc_STRVAR(doc_a2b_base64, "(ascii) -> bin. Decode a line of base64 data");
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000340
341static PyObject *
Peter Schneider-Kampa788a7f2000-07-10 09:57:19 +0000342binascii_a2b_base64(PyObject *self, PyObject *args)
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000343{
344 unsigned char *ascii_data, *bin_data;
345 int leftbits = 0;
346 unsigned char this_ch;
347 unsigned int leftchar = 0;
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000348 PyObject *rv;
Thomas Woutersf98db652006-03-01 21:37:32 +0000349 Py_ssize_t ascii_len, bin_len;
Guido van Rossum2db4f471999-10-19 19:05:14 +0000350 int quad_pos = 0;
Tim Peters934c1a12002-07-02 22:24:50 +0000351
Guido van Rossum43713e52000-02-29 13:59:29 +0000352 if ( !PyArg_ParseTuple(args, "t#:a2b_base64", &ascii_data, &ascii_len) )
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000353 return NULL;
354
355 bin_len = ((ascii_len+3)/4)*3; /* Upper bound, corrected later */
356
357 /* Allocate the buffer */
Guido van Rossum0e225aa2007-05-22 20:24:57 +0000358 if ( (rv=PyBytes_FromStringAndSize(NULL, bin_len)) == NULL )
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000359 return NULL;
Guido van Rossum0e225aa2007-05-22 20:24:57 +0000360 bin_data = (unsigned char *)PyBytes_AS_STRING(rv);
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000361 bin_len = 0;
Guido van Rossum2db4f471999-10-19 19:05:14 +0000362
363 for( ; ascii_len > 0; ascii_len--, ascii_data++) {
364 this_ch = *ascii_data;
365
366 if (this_ch > 0x7f ||
367 this_ch == '\r' || this_ch == '\n' || this_ch == ' ')
Jack Jansenba1de3b1996-01-22 10:47:15 +0000368 continue;
Guido van Rossum2db4f471999-10-19 19:05:14 +0000369
370 /* Check for pad sequences and ignore
371 ** the invalid ones.
372 */
373 if (this_ch == BASE64_PAD) {
374 if ( (quad_pos < 2) ||
375 ((quad_pos == 2) &&
376 (binascii_find_valid(ascii_data, ascii_len, 1)
377 != BASE64_PAD)) )
378 {
379 continue;
380 }
381 else {
382 /* A pad sequence means no more input.
383 ** We've already interpreted the data
384 ** from the quad at this point.
385 */
386 leftbits = 0;
387 break;
388 }
389 }
390
391 this_ch = table_a2b_base64[*ascii_data];
392 if ( this_ch == (unsigned char) -1 )
393 continue;
394
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000395 /*
396 ** Shift it in on the low end, and see if there's
397 ** a byte ready for output.
398 */
Guido van Rossum2db4f471999-10-19 19:05:14 +0000399 quad_pos = (quad_pos + 1) & 0x03;
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000400 leftchar = (leftchar << 6) | (this_ch);
401 leftbits += 6;
Guido van Rossum2db4f471999-10-19 19:05:14 +0000402
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000403 if ( leftbits >= 8 ) {
404 leftbits -= 8;
405 *bin_data++ = (leftchar >> leftbits) & 0xff;
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000406 bin_len++;
Guido van Rossum2db4f471999-10-19 19:05:14 +0000407 leftchar &= ((1 << leftbits) - 1);
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000408 }
Guido van Rossum2db4f471999-10-19 19:05:14 +0000409 }
410
411 if (leftbits != 0) {
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000412 PyErr_SetString(Error, "Incorrect padding");
413 Py_DECREF(rv);
414 return NULL;
415 }
Guido van Rossum2db4f471999-10-19 19:05:14 +0000416
Thomas Wouters9e1c1922003-03-17 11:24:29 +0000417 /* And set string size correctly. If the result string is empty
418 ** (because the input was all invalid) return the shared empty
Guido van Rossum0e225aa2007-05-22 20:24:57 +0000419 ** string instead; PyBytes_Resize() won't do this for us.
Thomas Wouters9e1c1922003-03-17 11:24:29 +0000420 */
Guido van Rossum0e225aa2007-05-22 20:24:57 +0000421 if (bin_len > 0) {
422 if (PyBytes_Resize(rv, bin_len) < 0) {
423 Py_DECREF(rv);
424 rv = NULL;
425 }
426 }
Thomas Wouters9e1c1922003-03-17 11:24:29 +0000427 else {
428 Py_DECREF(rv);
Guido van Rossum0e225aa2007-05-22 20:24:57 +0000429 rv = PyBytes_FromStringAndSize("", 0);
Thomas Wouters9e1c1922003-03-17 11:24:29 +0000430 }
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000431 return rv;
432}
433
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000434PyDoc_STRVAR(doc_b2a_base64, "(bin) -> ascii. Base64-code line of data");
Tim Peters934c1a12002-07-02 22:24:50 +0000435
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000436static PyObject *
Peter Schneider-Kampa788a7f2000-07-10 09:57:19 +0000437binascii_b2a_base64(PyObject *self, PyObject *args)
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000438{
439 unsigned char *ascii_data, *bin_data;
440 int leftbits = 0;
441 unsigned char this_ch;
442 unsigned int leftchar = 0;
443 PyObject *rv;
Thomas Woutersf98db652006-03-01 21:37:32 +0000444 Py_ssize_t bin_len;
Tim Peters934c1a12002-07-02 22:24:50 +0000445
Guido van Rossum43713e52000-02-29 13:59:29 +0000446 if ( !PyArg_ParseTuple(args, "s#:b2a_base64", &bin_data, &bin_len) )
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000447 return NULL;
448 if ( bin_len > BASE64_MAXBIN ) {
449 PyErr_SetString(Error, "Too much data for base64 line");
450 return NULL;
451 }
Tim Peters934c1a12002-07-02 22:24:50 +0000452
Tim Peters1fbb5772001-12-19 04:41:35 +0000453 /* We're lazy and allocate too much (fixed up later).
454 "+3" leaves room for up to two pad characters and a trailing
455 newline. Note that 'b' gets encoded as 'Yg==\n' (1 in, 5 out). */
Guido van Rossum0e225aa2007-05-22 20:24:57 +0000456 if ( (rv=PyBytes_FromStringAndSize(NULL, bin_len*2 + 3)) == NULL )
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000457 return NULL;
Guido van Rossum0e225aa2007-05-22 20:24:57 +0000458 ascii_data = (unsigned char *)PyBytes_AS_STRING(rv);
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000459
460 for( ; bin_len > 0 ; bin_len--, bin_data++ ) {
461 /* Shift the data into our buffer */
462 leftchar = (leftchar << 8) | *bin_data;
463 leftbits += 8;
464
465 /* See if there are 6-bit groups ready */
466 while ( leftbits >= 6 ) {
467 this_ch = (leftchar >> (leftbits-6)) & 0x3f;
468 leftbits -= 6;
469 *ascii_data++ = table_b2a_base64[this_ch];
470 }
471 }
472 if ( leftbits == 2 ) {
473 *ascii_data++ = table_b2a_base64[(leftchar&3) << 4];
474 *ascii_data++ = BASE64_PAD;
475 *ascii_data++ = BASE64_PAD;
476 } else if ( leftbits == 4 ) {
477 *ascii_data++ = table_b2a_base64[(leftchar&0xf) << 2];
478 *ascii_data++ = BASE64_PAD;
Tim Peters934c1a12002-07-02 22:24:50 +0000479 }
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000480 *ascii_data++ = '\n'; /* Append a courtesy newline */
Tim Peters934c1a12002-07-02 22:24:50 +0000481
Guido van Rossum0e225aa2007-05-22 20:24:57 +0000482 if (PyBytes_Resize(rv,
483 (ascii_data -
484 (unsigned char *)PyBytes_AS_STRING(rv))) < 0) {
485 Py_DECREF(rv);
486 rv = NULL;
487 }
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000488 return rv;
489}
490
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000491PyDoc_STRVAR(doc_a2b_hqx, "ascii -> bin, done. Decode .hqx coding");
Jack Jansen72781191995-08-07 14:34:15 +0000492
493static PyObject *
Peter Schneider-Kampa788a7f2000-07-10 09:57:19 +0000494binascii_a2b_hqx(PyObject *self, PyObject *args)
Jack Jansen72781191995-08-07 14:34:15 +0000495{
496 unsigned char *ascii_data, *bin_data;
497 int leftbits = 0;
498 unsigned char this_ch;
Jack Janseneaeb1c81995-08-14 12:17:57 +0000499 unsigned int leftchar = 0;
Jack Jansen72781191995-08-07 14:34:15 +0000500 PyObject *rv;
Thomas Woutersf98db652006-03-01 21:37:32 +0000501 Py_ssize_t len;
Jack Jansen72781191995-08-07 14:34:15 +0000502 int done = 0;
Tim Peters934c1a12002-07-02 22:24:50 +0000503
Guido van Rossum43713e52000-02-29 13:59:29 +0000504 if ( !PyArg_ParseTuple(args, "t#:a2b_hqx", &ascii_data, &len) )
Jack Jansen72781191995-08-07 14:34:15 +0000505 return NULL;
506
Raymond Hettinger658717e2004-09-06 22:58:37 +0000507 /* Allocate a string that is too big (fixed later)
508 Add two to the initial length to prevent interning which
509 would preclude subsequent resizing. */
Guido van Rossum0e225aa2007-05-22 20:24:57 +0000510 if ( (rv=PyBytes_FromStringAndSize(NULL, len+2)) == NULL )
Jack Jansen72781191995-08-07 14:34:15 +0000511 return NULL;
Guido van Rossum0e225aa2007-05-22 20:24:57 +0000512 bin_data = (unsigned char *)PyBytes_AS_STRING(rv);
Jack Jansen72781191995-08-07 14:34:15 +0000513
514 for( ; len > 0 ; len--, ascii_data++ ) {
515 /* Get the byte and look it up */
516 this_ch = table_a2b_hqx[*ascii_data];
517 if ( this_ch == SKIP )
518 continue;
519 if ( this_ch == FAIL ) {
520 PyErr_SetString(Error, "Illegal char");
521 Py_DECREF(rv);
522 return NULL;
523 }
524 if ( this_ch == DONE ) {
525 /* The terminating colon */
526 done = 1;
527 break;
528 }
529
530 /* Shift it into the buffer and see if any bytes are ready */
531 leftchar = (leftchar << 6) | (this_ch);
532 leftbits += 6;
533 if ( leftbits >= 8 ) {
534 leftbits -= 8;
535 *bin_data++ = (leftchar >> leftbits) & 0xff;
536 leftchar &= ((1 << leftbits) - 1);
537 }
538 }
Tim Peters934c1a12002-07-02 22:24:50 +0000539
Jack Jansen72781191995-08-07 14:34:15 +0000540 if ( leftbits && !done ) {
541 PyErr_SetString(Incomplete,
542 "String has incomplete number of bytes");
543 Py_DECREF(rv);
544 return NULL;
545 }
Guido van Rossum0e225aa2007-05-22 20:24:57 +0000546 if (PyBytes_Resize(rv,
547 (bin_data -
548 (unsigned char *)PyBytes_AS_STRING(rv))) < 0) {
549 Py_DECREF(rv);
550 rv = NULL;
551 }
Guido van Rossum9c6ba5e1997-01-12 20:02:04 +0000552 if (rv) {
553 PyObject *rrv = Py_BuildValue("Oi", rv, done);
554 Py_DECREF(rv);
555 return rrv;
556 }
Roger E. Masse5f4ce181997-01-16 17:10:22 +0000557
Jack Jansen72781191995-08-07 14:34:15 +0000558 return NULL;
559}
560
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000561PyDoc_STRVAR(doc_rlecode_hqx, "Binhex RLE-code binary data");
Jack Jansen72781191995-08-07 14:34:15 +0000562
563static PyObject *
Peter Schneider-Kampa788a7f2000-07-10 09:57:19 +0000564binascii_rlecode_hqx(PyObject *self, PyObject *args)
Jack Jansen72781191995-08-07 14:34:15 +0000565{
566 unsigned char *in_data, *out_data;
567 PyObject *rv;
568 unsigned char ch;
Thomas Woutersf98db652006-03-01 21:37:32 +0000569 Py_ssize_t in, inend, len;
Tim Peters934c1a12002-07-02 22:24:50 +0000570
Guido van Rossum43713e52000-02-29 13:59:29 +0000571 if ( !PyArg_ParseTuple(args, "s#:rlecode_hqx", &in_data, &len) )
Jack Jansen72781191995-08-07 14:34:15 +0000572 return NULL;
573
574 /* Worst case: output is twice as big as input (fixed later) */
Guido van Rossum0e225aa2007-05-22 20:24:57 +0000575 if ( (rv=PyBytes_FromStringAndSize(NULL, len*2+2)) == NULL )
Jack Jansen72781191995-08-07 14:34:15 +0000576 return NULL;
Guido van Rossum0e225aa2007-05-22 20:24:57 +0000577 out_data = (unsigned char *)PyBytes_AS_STRING(rv);
Tim Peters934c1a12002-07-02 22:24:50 +0000578
Jack Jansen72781191995-08-07 14:34:15 +0000579 for( in=0; in<len; in++) {
580 ch = in_data[in];
581 if ( ch == RUNCHAR ) {
582 /* RUNCHAR. Escape it. */
583 *out_data++ = RUNCHAR;
584 *out_data++ = 0;
585 } else {
586 /* Check how many following are the same */
587 for(inend=in+1;
588 inend<len && in_data[inend] == ch &&
Roger E. Masse5f4ce181997-01-16 17:10:22 +0000589 inend < in+255;
Jack Jansen72781191995-08-07 14:34:15 +0000590 inend++) ;
Jack Jansen0223aa11995-08-31 13:44:23 +0000591 if ( inend - in > 3 ) {
592 /* More than 3 in a row. Output RLE. */
Jack Jansen72781191995-08-07 14:34:15 +0000593 *out_data++ = ch;
594 *out_data++ = RUNCHAR;
595 *out_data++ = inend-in;
596 in = inend-1;
597 } else {
598 /* Less than 3. Output the byte itself */
599 *out_data++ = ch;
600 }
601 }
602 }
Guido van Rossum0e225aa2007-05-22 20:24:57 +0000603 if (PyBytes_Resize(rv,
604 (out_data -
605 (unsigned char *)PyBytes_AS_STRING(rv))) < 0) {
606 Py_DECREF(rv);
607 rv = NULL;
608 }
Jack Jansen72781191995-08-07 14:34:15 +0000609 return rv;
610}
611
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000612PyDoc_STRVAR(doc_b2a_hqx, "Encode .hqx data");
Tim Peters934c1a12002-07-02 22:24:50 +0000613
Jack Jansen72781191995-08-07 14:34:15 +0000614static PyObject *
Peter Schneider-Kampa788a7f2000-07-10 09:57:19 +0000615binascii_b2a_hqx(PyObject *self, PyObject *args)
Jack Jansen72781191995-08-07 14:34:15 +0000616{
617 unsigned char *ascii_data, *bin_data;
618 int leftbits = 0;
619 unsigned char this_ch;
Jack Janseneaeb1c81995-08-14 12:17:57 +0000620 unsigned int leftchar = 0;
Jack Jansen72781191995-08-07 14:34:15 +0000621 PyObject *rv;
Thomas Woutersf98db652006-03-01 21:37:32 +0000622 Py_ssize_t len;
Tim Peters934c1a12002-07-02 22:24:50 +0000623
Guido van Rossum43713e52000-02-29 13:59:29 +0000624 if ( !PyArg_ParseTuple(args, "s#:b2a_hqx", &bin_data, &len) )
Jack Jansen72781191995-08-07 14:34:15 +0000625 return NULL;
626
627 /* Allocate a buffer that is at least large enough */
Guido van Rossum0e225aa2007-05-22 20:24:57 +0000628 if ( (rv=PyBytes_FromStringAndSize(NULL, len*2+2)) == NULL )
Jack Jansen72781191995-08-07 14:34:15 +0000629 return NULL;
Guido van Rossum0e225aa2007-05-22 20:24:57 +0000630 ascii_data = (unsigned char *)PyBytes_AS_STRING(rv);
Tim Peters934c1a12002-07-02 22:24:50 +0000631
Jack Jansen72781191995-08-07 14:34:15 +0000632 for( ; len > 0 ; len--, bin_data++ ) {
633 /* Shift into our buffer, and output any 6bits ready */
634 leftchar = (leftchar << 8) | *bin_data;
635 leftbits += 8;
636 while ( leftbits >= 6 ) {
637 this_ch = (leftchar >> (leftbits-6)) & 0x3f;
638 leftbits -= 6;
639 *ascii_data++ = table_b2a_hqx[this_ch];
640 }
641 }
642 /* Output a possible runt byte */
643 if ( leftbits ) {
644 leftchar <<= (6-leftbits);
645 *ascii_data++ = table_b2a_hqx[leftchar & 0x3f];
646 }
Guido van Rossum0e225aa2007-05-22 20:24:57 +0000647 if (PyBytes_Resize(rv,
648 (ascii_data -
649 (unsigned char *)PyBytes_AS_STRING(rv))) < 0) {
650 Py_DECREF(rv);
651 rv = NULL;
652 }
Jack Jansen72781191995-08-07 14:34:15 +0000653 return rv;
654}
655
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000656PyDoc_STRVAR(doc_rledecode_hqx, "Decode hexbin RLE-coded string");
Tim Peters934c1a12002-07-02 22:24:50 +0000657
Jack Jansen72781191995-08-07 14:34:15 +0000658static PyObject *
Peter Schneider-Kampa788a7f2000-07-10 09:57:19 +0000659binascii_rledecode_hqx(PyObject *self, PyObject *args)
Jack Jansen72781191995-08-07 14:34:15 +0000660{
661 unsigned char *in_data, *out_data;
662 unsigned char in_byte, in_repeat;
663 PyObject *rv;
Thomas Woutersf98db652006-03-01 21:37:32 +0000664 Py_ssize_t in_len, out_len, out_len_left;
Jack Jansen72781191995-08-07 14:34:15 +0000665
Guido van Rossum43713e52000-02-29 13:59:29 +0000666 if ( !PyArg_ParseTuple(args, "s#:rledecode_hqx", &in_data, &in_len) )
Jack Jansen72781191995-08-07 14:34:15 +0000667 return NULL;
668
669 /* Empty string is a special case */
670 if ( in_len == 0 )
Guido van Rossum0e225aa2007-05-22 20:24:57 +0000671 return PyBytes_FromStringAndSize("", 0);
Jack Jansen72781191995-08-07 14:34:15 +0000672
673 /* Allocate a buffer of reasonable size. Resized when needed */
674 out_len = in_len*2;
Guido van Rossum0e225aa2007-05-22 20:24:57 +0000675 if ( (rv=PyBytes_FromStringAndSize(NULL, out_len)) == NULL )
Jack Jansen72781191995-08-07 14:34:15 +0000676 return NULL;
677 out_len_left = out_len;
Guido van Rossum0e225aa2007-05-22 20:24:57 +0000678 out_data = (unsigned char *)PyBytes_AS_STRING(rv);
Jack Jansen72781191995-08-07 14:34:15 +0000679
680 /*
681 ** We need two macros here to get/put bytes and handle
682 ** end-of-buffer for input and output strings.
683 */
684#define INBYTE(b) \
Roger E. Masse5f4ce181997-01-16 17:10:22 +0000685 do { \
686 if ( --in_len < 0 ) { \
687 PyErr_SetString(Incomplete, ""); \
688 Py_DECREF(rv); \
689 return NULL; \
690 } \
691 b = *in_data++; \
692 } while(0)
Tim Peters934c1a12002-07-02 22:24:50 +0000693
Jack Jansen72781191995-08-07 14:34:15 +0000694#define OUTBYTE(b) \
Roger E. Masse5f4ce181997-01-16 17:10:22 +0000695 do { \
696 if ( --out_len_left < 0 ) { \
Guido van Rossum0e225aa2007-05-22 20:24:57 +0000697 if (PyBytes_Resize(rv, 2*out_len) < 0) \
698 { Py_DECREF(rv); return NULL; } \
699 out_data = (unsigned char *)PyBytes_AS_STRING(rv) \
Roger E. Masse5f4ce181997-01-16 17:10:22 +0000700 + out_len; \
701 out_len_left = out_len-1; \
702 out_len = out_len * 2; \
703 } \
704 *out_data++ = b; \
705 } while(0)
Jack Jansen72781191995-08-07 14:34:15 +0000706
Roger E. Masse5f4ce181997-01-16 17:10:22 +0000707 /*
708 ** Handle first byte separately (since we have to get angry
709 ** in case of an orphaned RLE code).
710 */
711 INBYTE(in_byte);
Jack Jansen72781191995-08-07 14:34:15 +0000712
713 if (in_byte == RUNCHAR) {
714 INBYTE(in_repeat);
715 if (in_repeat != 0) {
716 /* Note Error, not Incomplete (which is at the end
717 ** of the string only). This is a programmer error.
718 */
719 PyErr_SetString(Error, "Orphaned RLE code at start");
720 Py_DECREF(rv);
721 return NULL;
722 }
723 OUTBYTE(RUNCHAR);
724 } else {
725 OUTBYTE(in_byte);
726 }
Tim Peters934c1a12002-07-02 22:24:50 +0000727
Jack Jansen72781191995-08-07 14:34:15 +0000728 while( in_len > 0 ) {
729 INBYTE(in_byte);
730
731 if (in_byte == RUNCHAR) {
732 INBYTE(in_repeat);
733 if ( in_repeat == 0 ) {
734 /* Just an escaped RUNCHAR value */
735 OUTBYTE(RUNCHAR);
736 } else {
737 /* Pick up value and output a sequence of it */
738 in_byte = out_data[-1];
739 while ( --in_repeat > 0 )
740 OUTBYTE(in_byte);
741 }
742 } else {
743 /* Normal byte */
744 OUTBYTE(in_byte);
745 }
746 }
Guido van Rossum0e225aa2007-05-22 20:24:57 +0000747 if (PyBytes_Resize(rv,
748 (out_data -
749 (unsigned char *)PyBytes_AS_STRING(rv))) < 0) {
750 Py_DECREF(rv);
751 rv = NULL;
752 }
Jack Jansen72781191995-08-07 14:34:15 +0000753 return rv;
754}
755
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000756PyDoc_STRVAR(doc_crc_hqx,
757"(data, oldcrc) -> newcrc. Compute hqx CRC incrementally");
Jack Jansen72781191995-08-07 14:34:15 +0000758
759static PyObject *
Peter Schneider-Kampa788a7f2000-07-10 09:57:19 +0000760binascii_crc_hqx(PyObject *self, PyObject *args)
Jack Jansen72781191995-08-07 14:34:15 +0000761{
762 unsigned char *bin_data;
763 unsigned int crc;
Thomas Woutersf98db652006-03-01 21:37:32 +0000764 Py_ssize_t len;
Tim Peters934c1a12002-07-02 22:24:50 +0000765
Guido van Rossum43713e52000-02-29 13:59:29 +0000766 if ( !PyArg_ParseTuple(args, "s#i:crc_hqx", &bin_data, &len, &crc) )
Jack Jansen72781191995-08-07 14:34:15 +0000767 return NULL;
768
769 while(len--) {
770 crc=((crc<<8)&0xff00)^crctab_hqx[((crc>>8)&0xff)^*bin_data++];
771 }
772
773 return Py_BuildValue("i", crc);
774}
775
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000776PyDoc_STRVAR(doc_crc32,
777"(data, oldcrc = 0) -> newcrc. Compute CRC-32 incrementally");
Guido van Rossum7d47c9e2000-02-16 21:11:52 +0000778
779/* Crc - 32 BIT ANSI X3.66 CRC checksum files
780 Also known as: ISO 3307
781**********************************************************************|
782* *|
783* Demonstration program to compute the 32-bit CRC used as the frame *|
784* check sequence in ADCCP (ANSI X3.66, also known as FIPS PUB 71 *|
785* and FED-STD-1003, the U.S. versions of CCITT's X.25 link-level *|
786* protocol). The 32-bit FCS was added via the Federal Register, *|
787* 1 June 1982, p.23798. I presume but don't know for certain that *|
788* this polynomial is or will be included in CCITT V.41, which *|
789* defines the 16-bit CRC (often called CRC-CCITT) polynomial. FIPS *|
790* PUB 78 says that the 32-bit FCS reduces otherwise undetected *|
791* errors by a factor of 10^-5 over 16-bit FCS. *|
792* *|
793**********************************************************************|
794
795 Copyright (C) 1986 Gary S. Brown. You may use this program, or
796 code or tables extracted from it, as desired without restriction.
Guido van Rossum7d47c9e2000-02-16 21:11:52 +0000797
Tim Peters934c1a12002-07-02 22:24:50 +0000798 First, the polynomial itself and its table of feedback terms. The
799 polynomial is
800 X^32+X^26+X^23+X^22+X^16+X^12+X^11+X^10+X^8+X^7+X^5+X^4+X^2+X^1+X^0
801 Note that we take it "backwards" and put the highest-order term in
802 the lowest-order bit. The X^32 term is "implied"; the LSB is the
803 X^31 term, etc. The X^0 term (usually shown as "+1") results in
804 the MSB being 1.
Guido van Rossum7d47c9e2000-02-16 21:11:52 +0000805
Tim Peters934c1a12002-07-02 22:24:50 +0000806 Note that the usual hardware shift register implementation, which
807 is what we're using (we're merely optimizing it by doing eight-bit
808 chunks at a time) shifts bits into the lowest-order term. In our
809 implementation, that means shifting towards the right. Why do we
810 do it this way? Because the calculated CRC must be transmitted in
811 order from highest-order term to lowest-order term. UARTs transmit
812 characters in order from LSB to MSB. By storing the CRC this way,
813 we hand it to the UART in the order low-byte to high-byte; the UART
814 sends each low-bit to hight-bit; and the result is transmission bit
815 by bit from highest- to lowest-order term without requiring any bit
816 shuffling on our part. Reception works similarly.
817
818 The feedback terms table consists of 256, 32-bit entries. Notes:
819
820 1. The table can be generated at runtime if desired; code to do so
821 is shown later. It might not be obvious, but the feedback
822 terms simply represent the results of eight shift/xor opera-
823 tions for all combinations of data and CRC register values.
824
825 2. The CRC accumulation logic is the same for all CRC polynomials,
826 be they sixteen or thirty-two bits wide. You simply choose the
827 appropriate table. Alternatively, because the table can be
828 generated at runtime, you can start by generating the table for
829 the polynomial in question and use exactly the same "updcrc",
830 if your application needn't simultaneously handle two CRC
831 polynomials. (Note, however, that XMODEM is strange.)
832
833 3. For 16-bit CRCs, the table entries need be only 16 bits wide;
834 of course, 32-bit entries work OK if the high 16 bits are zero.
835
836 4. The values must be right-shifted by eight bits by the "updcrc"
837 logic; the shift must be unsigned (bring in zeroes). On some
838 hardware you could probably optimize the shift in assembler by
839 using byte-swap instructions.
Guido van Rossum7d47c9e2000-02-16 21:11:52 +0000840********************************************************************/
841
842static unsigned long crc_32_tab[256] = {
8430x00000000UL, 0x77073096UL, 0xee0e612cUL, 0x990951baUL, 0x076dc419UL,
8440x706af48fUL, 0xe963a535UL, 0x9e6495a3UL, 0x0edb8832UL, 0x79dcb8a4UL,
8450xe0d5e91eUL, 0x97d2d988UL, 0x09b64c2bUL, 0x7eb17cbdUL, 0xe7b82d07UL,
8460x90bf1d91UL, 0x1db71064UL, 0x6ab020f2UL, 0xf3b97148UL, 0x84be41deUL,
8470x1adad47dUL, 0x6ddde4ebUL, 0xf4d4b551UL, 0x83d385c7UL, 0x136c9856UL,
8480x646ba8c0UL, 0xfd62f97aUL, 0x8a65c9ecUL, 0x14015c4fUL, 0x63066cd9UL,
8490xfa0f3d63UL, 0x8d080df5UL, 0x3b6e20c8UL, 0x4c69105eUL, 0xd56041e4UL,
8500xa2677172UL, 0x3c03e4d1UL, 0x4b04d447UL, 0xd20d85fdUL, 0xa50ab56bUL,
8510x35b5a8faUL, 0x42b2986cUL, 0xdbbbc9d6UL, 0xacbcf940UL, 0x32d86ce3UL,
8520x45df5c75UL, 0xdcd60dcfUL, 0xabd13d59UL, 0x26d930acUL, 0x51de003aUL,
8530xc8d75180UL, 0xbfd06116UL, 0x21b4f4b5UL, 0x56b3c423UL, 0xcfba9599UL,
8540xb8bda50fUL, 0x2802b89eUL, 0x5f058808UL, 0xc60cd9b2UL, 0xb10be924UL,
8550x2f6f7c87UL, 0x58684c11UL, 0xc1611dabUL, 0xb6662d3dUL, 0x76dc4190UL,
8560x01db7106UL, 0x98d220bcUL, 0xefd5102aUL, 0x71b18589UL, 0x06b6b51fUL,
8570x9fbfe4a5UL, 0xe8b8d433UL, 0x7807c9a2UL, 0x0f00f934UL, 0x9609a88eUL,
8580xe10e9818UL, 0x7f6a0dbbUL, 0x086d3d2dUL, 0x91646c97UL, 0xe6635c01UL,
8590x6b6b51f4UL, 0x1c6c6162UL, 0x856530d8UL, 0xf262004eUL, 0x6c0695edUL,
8600x1b01a57bUL, 0x8208f4c1UL, 0xf50fc457UL, 0x65b0d9c6UL, 0x12b7e950UL,
8610x8bbeb8eaUL, 0xfcb9887cUL, 0x62dd1ddfUL, 0x15da2d49UL, 0x8cd37cf3UL,
8620xfbd44c65UL, 0x4db26158UL, 0x3ab551ceUL, 0xa3bc0074UL, 0xd4bb30e2UL,
8630x4adfa541UL, 0x3dd895d7UL, 0xa4d1c46dUL, 0xd3d6f4fbUL, 0x4369e96aUL,
8640x346ed9fcUL, 0xad678846UL, 0xda60b8d0UL, 0x44042d73UL, 0x33031de5UL,
8650xaa0a4c5fUL, 0xdd0d7cc9UL, 0x5005713cUL, 0x270241aaUL, 0xbe0b1010UL,
8660xc90c2086UL, 0x5768b525UL, 0x206f85b3UL, 0xb966d409UL, 0xce61e49fUL,
8670x5edef90eUL, 0x29d9c998UL, 0xb0d09822UL, 0xc7d7a8b4UL, 0x59b33d17UL,
8680x2eb40d81UL, 0xb7bd5c3bUL, 0xc0ba6cadUL, 0xedb88320UL, 0x9abfb3b6UL,
8690x03b6e20cUL, 0x74b1d29aUL, 0xead54739UL, 0x9dd277afUL, 0x04db2615UL,
8700x73dc1683UL, 0xe3630b12UL, 0x94643b84UL, 0x0d6d6a3eUL, 0x7a6a5aa8UL,
8710xe40ecf0bUL, 0x9309ff9dUL, 0x0a00ae27UL, 0x7d079eb1UL, 0xf00f9344UL,
8720x8708a3d2UL, 0x1e01f268UL, 0x6906c2feUL, 0xf762575dUL, 0x806567cbUL,
8730x196c3671UL, 0x6e6b06e7UL, 0xfed41b76UL, 0x89d32be0UL, 0x10da7a5aUL,
8740x67dd4accUL, 0xf9b9df6fUL, 0x8ebeeff9UL, 0x17b7be43UL, 0x60b08ed5UL,
8750xd6d6a3e8UL, 0xa1d1937eUL, 0x38d8c2c4UL, 0x4fdff252UL, 0xd1bb67f1UL,
8760xa6bc5767UL, 0x3fb506ddUL, 0x48b2364bUL, 0xd80d2bdaUL, 0xaf0a1b4cUL,
8770x36034af6UL, 0x41047a60UL, 0xdf60efc3UL, 0xa867df55UL, 0x316e8eefUL,
8780x4669be79UL, 0xcb61b38cUL, 0xbc66831aUL, 0x256fd2a0UL, 0x5268e236UL,
8790xcc0c7795UL, 0xbb0b4703UL, 0x220216b9UL, 0x5505262fUL, 0xc5ba3bbeUL,
8800xb2bd0b28UL, 0x2bb45a92UL, 0x5cb36a04UL, 0xc2d7ffa7UL, 0xb5d0cf31UL,
8810x2cd99e8bUL, 0x5bdeae1dUL, 0x9b64c2b0UL, 0xec63f226UL, 0x756aa39cUL,
8820x026d930aUL, 0x9c0906a9UL, 0xeb0e363fUL, 0x72076785UL, 0x05005713UL,
8830x95bf4a82UL, 0xe2b87a14UL, 0x7bb12baeUL, 0x0cb61b38UL, 0x92d28e9bUL,
8840xe5d5be0dUL, 0x7cdcefb7UL, 0x0bdbdf21UL, 0x86d3d2d4UL, 0xf1d4e242UL,
8850x68ddb3f8UL, 0x1fda836eUL, 0x81be16cdUL, 0xf6b9265bUL, 0x6fb077e1UL,
8860x18b74777UL, 0x88085ae6UL, 0xff0f6a70UL, 0x66063bcaUL, 0x11010b5cUL,
8870x8f659effUL, 0xf862ae69UL, 0x616bffd3UL, 0x166ccf45UL, 0xa00ae278UL,
8880xd70dd2eeUL, 0x4e048354UL, 0x3903b3c2UL, 0xa7672661UL, 0xd06016f7UL,
8890x4969474dUL, 0x3e6e77dbUL, 0xaed16a4aUL, 0xd9d65adcUL, 0x40df0b66UL,
8900x37d83bf0UL, 0xa9bcae53UL, 0xdebb9ec5UL, 0x47b2cf7fUL, 0x30b5ffe9UL,
8910xbdbdf21cUL, 0xcabac28aUL, 0x53b39330UL, 0x24b4a3a6UL, 0xbad03605UL,
8920xcdd70693UL, 0x54de5729UL, 0x23d967bfUL, 0xb3667a2eUL, 0xc4614ab8UL,
8930x5d681b02UL, 0x2a6f2b94UL, 0xb40bbe37UL, 0xc30c8ea1UL, 0x5a05df1bUL,
8940x2d02ef8dUL
895};
896
897static PyObject *
Peter Schneider-Kampa788a7f2000-07-10 09:57:19 +0000898binascii_crc32(PyObject *self, PyObject *args)
Guido van Rossum7d47c9e2000-02-16 21:11:52 +0000899{ /* By Jim Ahlstrom; All rights transferred to CNRI */
900 unsigned char *bin_data;
901 unsigned long crc = 0UL; /* initial value of CRC */
Thomas Woutersf98db652006-03-01 21:37:32 +0000902 Py_ssize_t len;
Tim Petersa98011c2002-07-02 20:20:08 +0000903 long result;
Tim Peters934c1a12002-07-02 22:24:50 +0000904
Guido van Rossum43713e52000-02-29 13:59:29 +0000905 if ( !PyArg_ParseTuple(args, "s#|l:crc32", &bin_data, &len, &crc) )
Guido van Rossum7d47c9e2000-02-16 21:11:52 +0000906 return NULL;
907
Tim Peters934c1a12002-07-02 22:24:50 +0000908 crc = ~ crc;
909#if SIZEOF_LONG > 4
910 /* only want the trailing 32 bits */
911 crc &= 0xFFFFFFFFUL;
912#endif
913 while (len--)
Guido van Rossum7d47c9e2000-02-16 21:11:52 +0000914 crc = crc_32_tab[(crc ^ *bin_data++) & 0xffUL] ^ (crc >> 8);
915 /* Note: (crc >> 8) MUST zero fill on left */
Tim Petersa98011c2002-07-02 20:20:08 +0000916
917 result = (long)(crc ^ 0xFFFFFFFFUL);
Tim Peters934c1a12002-07-02 22:24:50 +0000918#if SIZEOF_LONG > 4
919 /* Extend the sign bit. This is one way to ensure the result is the
920 * same across platforms. The other way would be to return an
921 * unbounded unsigned long, but the evidence suggests that lots of
922 * code outside this treats the result as if it were a signed 4-byte
923 * integer.
Tim Petersa98011c2002-07-02 20:20:08 +0000924 */
925 result |= -(result & (1L << 31));
Tim Peters934c1a12002-07-02 22:24:50 +0000926#endif
Tim Petersa98011c2002-07-02 20:20:08 +0000927 return PyInt_FromLong(result);
Guido van Rossum7d47c9e2000-02-16 21:11:52 +0000928}
929
Barry Warsawe977c212000-08-15 06:07:13 +0000930
931static PyObject *
932binascii_hexlify(PyObject *self, PyObject *args)
933{
934 char* argbuf;
Thomas Woutersf98db652006-03-01 21:37:32 +0000935 Py_ssize_t arglen;
Barry Warsawe977c212000-08-15 06:07:13 +0000936 PyObject *retval;
937 char* retbuf;
Thomas Woutersf98db652006-03-01 21:37:32 +0000938 Py_ssize_t i, j;
Barry Warsawe977c212000-08-15 06:07:13 +0000939
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000940 if (!PyArg_ParseTuple(args, "s#:b2a_hex", &argbuf, &arglen))
Barry Warsawe977c212000-08-15 06:07:13 +0000941 return NULL;
942
Guido van Rossum0e225aa2007-05-22 20:24:57 +0000943 retval = PyBytes_FromStringAndSize(NULL, arglen*2);
Barry Warsawe977c212000-08-15 06:07:13 +0000944 if (!retval)
945 return NULL;
Guido van Rossum0e225aa2007-05-22 20:24:57 +0000946 retbuf = PyBytes_AS_STRING(retval);
Barry Warsawe977c212000-08-15 06:07:13 +0000947
948 /* make hex version of string, taken from shamodule.c */
949 for (i=j=0; i < arglen; i++) {
950 char c;
951 c = (argbuf[i] >> 4) & 0xf;
952 c = (c>9) ? c+'a'-10 : c + '0';
953 retbuf[j++] = c;
954 c = argbuf[i] & 0xf;
955 c = (c>9) ? c+'a'-10 : c + '0';
956 retbuf[j++] = c;
957 }
958 return retval;
959
960 finally:
961 Py_DECREF(retval);
962 return NULL;
963}
964
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000965PyDoc_STRVAR(doc_hexlify,
Barry Warsawe977c212000-08-15 06:07:13 +0000966"b2a_hex(data) -> s; Hexadecimal representation of binary data.\n\
967\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000968This function is also available as \"hexlify()\".");
Barry Warsawe977c212000-08-15 06:07:13 +0000969
970
971static int
Tim Peters934c1a12002-07-02 22:24:50 +0000972to_int(int c)
Barry Warsawe977c212000-08-15 06:07:13 +0000973{
974 if (isdigit(c))
975 return c - '0';
976 else {
977 if (isupper(c))
978 c = tolower(c);
979 if (c >= 'a' && c <= 'f')
980 return c - 'a' + 10;
981 }
982 return -1;
983}
984
985
986static PyObject *
987binascii_unhexlify(PyObject *self, PyObject *args)
988{
989 char* argbuf;
Thomas Woutersf98db652006-03-01 21:37:32 +0000990 Py_ssize_t arglen;
Barry Warsawe977c212000-08-15 06:07:13 +0000991 PyObject *retval;
992 char* retbuf;
Thomas Woutersf98db652006-03-01 21:37:32 +0000993 Py_ssize_t i, j;
Barry Warsawe977c212000-08-15 06:07:13 +0000994
995 if (!PyArg_ParseTuple(args, "s#:a2b_hex", &argbuf, &arglen))
996 return NULL;
997
Barry Warsaw16168472000-08-15 06:59:58 +0000998 /* XXX What should we do about strings with an odd length? Should
999 * we add an implicit leading zero, or a trailing zero? For now,
1000 * raise an exception.
Barry Warsawe977c212000-08-15 06:07:13 +00001001 */
1002 if (arglen % 2) {
Guido van Rossum4581ae52007-05-22 21:56:47 +00001003 PyErr_SetString(Error, "Odd-length string");
Barry Warsawe977c212000-08-15 06:07:13 +00001004 return NULL;
1005 }
1006
Guido van Rossum0e225aa2007-05-22 20:24:57 +00001007 retval = PyBytes_FromStringAndSize(NULL, (arglen/2));
Barry Warsawe977c212000-08-15 06:07:13 +00001008 if (!retval)
1009 return NULL;
Guido van Rossum0e225aa2007-05-22 20:24:57 +00001010 retbuf = PyBytes_AS_STRING(retval);
Barry Warsawe977c212000-08-15 06:07:13 +00001011
1012 for (i=j=0; i < arglen; i += 2) {
1013 int top = to_int(Py_CHARMASK(argbuf[i]));
1014 int bot = to_int(Py_CHARMASK(argbuf[i+1]));
1015 if (top == -1 || bot == -1) {
Guido van Rossum4581ae52007-05-22 21:56:47 +00001016 PyErr_SetString(Error,
Barry Warsaw16168472000-08-15 06:59:58 +00001017 "Non-hexadecimal digit found");
Barry Warsawe977c212000-08-15 06:07:13 +00001018 goto finally;
1019 }
1020 retbuf[j++] = (top << 4) + bot;
1021 }
1022 return retval;
1023
1024 finally:
1025 Py_DECREF(retval);
1026 return NULL;
1027}
1028
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001029PyDoc_STRVAR(doc_unhexlify,
Barry Warsawe977c212000-08-15 06:07:13 +00001030"a2b_hex(hexstr) -> s; Binary data of hexadecimal representation.\n\
1031\n\
1032hexstr must contain an even number of hex digits (upper or lower case).\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001033This function is also available as \"unhexlify()\"");
Barry Warsawe977c212000-08-15 06:07:13 +00001034
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001035static int table_hex[128] = {
1036 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1037 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1038 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1039 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,-1,-1, -1,-1,-1,-1,
1040 -1,10,11,12, 13,14,15,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1041 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1042 -1,10,11,12, 13,14,15,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1043 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1
1044};
1045
1046#define hexval(c) table_hex[(unsigned int)(c)]
1047
1048#define MAXLINESIZE 76
1049
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001050PyDoc_STRVAR(doc_a2b_qp, "Decode a string of qp-encoded data");
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001051
Tim Peters934c1a12002-07-02 22:24:50 +00001052static PyObject*
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001053binascii_a2b_qp(PyObject *self, PyObject *args, PyObject *kwargs)
1054{
Thomas Wouters7087f782006-03-01 23:10:05 +00001055 Py_ssize_t in, out;
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001056 char ch;
1057 unsigned char *data, *odata;
Thomas Woutersf98db652006-03-01 21:37:32 +00001058 Py_ssize_t datalen = 0;
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001059 PyObject *rv;
Martin v. Löwis15e62742006-02-27 16:46:16 +00001060 static char *kwlist[] = {"data", "header", NULL};
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001061 int header = 0;
1062
Tim Peters934c1a12002-07-02 22:24:50 +00001063 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s#|i", kwlist, &data,
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001064 &datalen, &header))
1065 return NULL;
1066
Barry Warsaw23164a52004-05-11 02:05:11 +00001067 /* We allocate the output same size as input, this is overkill.
1068 * The previous implementation used calloc() so we'll zero out the
1069 * memory here too, since PyMem_Malloc() does not guarantee that.
1070 */
1071 odata = (unsigned char *) PyMem_Malloc(datalen);
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001072 if (odata == NULL) {
1073 PyErr_NoMemory();
1074 return NULL;
1075 }
Martin v. Löwisb600fe92004-07-27 15:03:53 +00001076 memset(odata, 0, datalen);
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001077
1078 in = out = 0;
1079 while (in < datalen) {
1080 if (data[in] == '=') {
1081 in++;
1082 if (in >= datalen) break;
1083 /* Soft line breaks */
Thomas Wouters89f507f2006-12-13 04:49:30 +00001084 if ((data[in] == '\n') || (data[in] == '\r')) {
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001085 if (data[in] != '\n') {
1086 while (in < datalen && data[in] != '\n') in++;
1087 }
1088 if (in < datalen) in++;
1089 }
1090 else if (data[in] == '=') {
1091 /* broken case from broken python qp */
1092 odata[out++] = '=';
1093 in++;
1094 }
Tim Peters934c1a12002-07-02 22:24:50 +00001095 else if (((data[in] >= 'A' && data[in] <= 'F') ||
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001096 (data[in] >= 'a' && data[in] <= 'f') ||
1097 (data[in] >= '0' && data[in] <= '9')) &&
1098 ((data[in+1] >= 'A' && data[in+1] <= 'F') ||
1099 (data[in+1] >= 'a' && data[in+1] <= 'f') ||
1100 (data[in+1] >= '0' && data[in+1] <= '9'))) {
1101 /* hexval */
1102 ch = hexval(data[in]) << 4;
1103 in++;
1104 ch |= hexval(data[in]);
1105 in++;
1106 odata[out++] = ch;
1107 }
1108 else {
1109 odata[out++] = '=';
1110 }
1111 }
1112 else if (header && data[in] == '_') {
1113 odata[out++] = ' ';
1114 in++;
1115 }
1116 else {
1117 odata[out] = data[in];
1118 in++;
1119 out++;
1120 }
1121 }
Guido van Rossum0e225aa2007-05-22 20:24:57 +00001122 if ((rv = PyBytes_FromStringAndSize((char *)odata, out)) == NULL) {
Barry Warsaw23164a52004-05-11 02:05:11 +00001123 PyMem_Free(odata);
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001124 return NULL;
1125 }
Barry Warsaw23164a52004-05-11 02:05:11 +00001126 PyMem_Free(odata);
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001127 return rv;
1128}
1129
Tim Peters934c1a12002-07-02 22:24:50 +00001130static int
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001131to_hex (unsigned char ch, unsigned char *s)
1132{
1133 unsigned int uvalue = ch;
1134
1135 s[1] = "0123456789ABCDEF"[uvalue % 16];
1136 uvalue = (uvalue / 16);
1137 s[0] = "0123456789ABCDEF"[uvalue % 16];
1138 return 0;
1139}
1140
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001141PyDoc_STRVAR(doc_b2a_qp,
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001142"b2a_qp(data, quotetabs=0, istext=1, header=0) -> s; \n\
1143 Encode a string using quoted-printable encoding. \n\
1144\n\
1145On encoding, when istext is set, newlines are not encoded, and white \n\
1146space at end of lines is. When istext is not set, \\r and \\n (CR/LF) are \n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001147both encoded. When quotetabs is set, space and tabs are encoded.");
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001148
1149/* XXX: This is ridiculously complicated to be backward compatible
1150 * (mostly) with the quopri module. It doesn't re-create the quopri
1151 * module bug where text ending in CRLF has the CR encoded */
Tim Peters934c1a12002-07-02 22:24:50 +00001152static PyObject*
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001153binascii_b2a_qp (PyObject *self, PyObject *args, PyObject *kwargs)
1154{
Thomas Wouters7087f782006-03-01 23:10:05 +00001155 Py_ssize_t in, out;
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001156 unsigned char *data, *odata;
Thomas Woutersf98db652006-03-01 21:37:32 +00001157 Py_ssize_t datalen = 0, odatalen = 0;
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001158 PyObject *rv;
1159 unsigned int linelen = 0;
Martin v. Löwis15e62742006-02-27 16:46:16 +00001160 static char *kwlist[] = {"data", "quotetabs", "istext",
Jeremy Hyltonaf68c872005-12-10 18:50:16 +00001161 "header", NULL};
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001162 int istext = 1;
1163 int quotetabs = 0;
1164 int header = 0;
1165 unsigned char ch;
1166 int crlf = 0;
1167 unsigned char *p;
1168
Tim Peters934c1a12002-07-02 22:24:50 +00001169 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s#|iii", kwlist, &data,
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001170 &datalen, &quotetabs, &istext, &header))
1171 return NULL;
1172
1173 /* See if this string is using CRLF line ends */
1174 /* XXX: this function has the side effect of converting all of
1175 * the end of lines to be the same depending on this detection
1176 * here */
Walter Dörwald0925e412007-05-09 18:23:50 +00001177 p = (unsigned char *) memchr(data, '\n', datalen);
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001178 if ((p != NULL) && (p > data) && (*(p-1) == '\r'))
1179 crlf = 1;
1180
1181 /* First, scan to see how many characters need to be encoded */
1182 in = 0;
1183 while (in < datalen) {
Tim Peters934c1a12002-07-02 22:24:50 +00001184 if ((data[in] > 126) ||
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001185 (data[in] == '=') ||
1186 (header && data[in] == '_') ||
Guido van Rossumd8faa362007-04-27 19:54:29 +00001187 ((data[in] == '.') && (linelen == 0) &&
1188 (data[in+1] == '\n' || data[in+1] == '\r' || data[in+1] == 0)) ||
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001189 (!istext && ((data[in] == '\r') || (data[in] == '\n'))) ||
1190 ((data[in] == '\t' || data[in] == ' ') && (in + 1 == datalen)) ||
Tim Peters934c1a12002-07-02 22:24:50 +00001191 ((data[in] < 33) &&
1192 (data[in] != '\r') && (data[in] != '\n') &&
Guido van Rossumd8faa362007-04-27 19:54:29 +00001193 (quotetabs ||
1194 (!quotetabs && ((data[in] != '\t') && (data[in] != ' '))))))
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001195 {
1196 if ((linelen + 3) >= MAXLINESIZE) {
1197 linelen = 0;
1198 if (crlf)
1199 odatalen += 3;
1200 else
1201 odatalen += 2;
1202 }
1203 linelen += 3;
1204 odatalen += 3;
1205 in++;
1206 }
1207 else {
Tim Peters934c1a12002-07-02 22:24:50 +00001208 if (istext &&
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001209 ((data[in] == '\n') ||
1210 ((in+1 < datalen) && (data[in] == '\r') &&
1211 (data[in+1] == '\n'))))
1212 {
1213 linelen = 0;
1214 /* Protect against whitespace on end of line */
1215 if (in && ((data[in-1] == ' ') || (data[in-1] == '\t')))
1216 odatalen += 2;
1217 if (crlf)
1218 odatalen += 2;
1219 else
1220 odatalen += 1;
1221 if (data[in] == '\r')
1222 in += 2;
1223 else
1224 in++;
1225 }
1226 else {
Tim Peters934c1a12002-07-02 22:24:50 +00001227 if ((in + 1 != datalen) &&
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001228 (data[in+1] != '\n') &&
1229 (linelen + 1) >= MAXLINESIZE) {
1230 linelen = 0;
1231 if (crlf)
1232 odatalen += 3;
1233 else
1234 odatalen += 2;
1235 }
1236 linelen++;
1237 odatalen++;
1238 in++;
1239 }
1240 }
1241 }
1242
Barry Warsaw23164a52004-05-11 02:05:11 +00001243 /* We allocate the output same size as input, this is overkill.
1244 * The previous implementation used calloc() so we'll zero out the
1245 * memory here too, since PyMem_Malloc() does not guarantee that.
1246 */
1247 odata = (unsigned char *) PyMem_Malloc(odatalen);
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001248 if (odata == NULL) {
1249 PyErr_NoMemory();
1250 return NULL;
1251 }
Martin v. Löwisb600fe92004-07-27 15:03:53 +00001252 memset(odata, 0, odatalen);
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001253
1254 in = out = linelen = 0;
1255 while (in < datalen) {
Tim Peters934c1a12002-07-02 22:24:50 +00001256 if ((data[in] > 126) ||
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001257 (data[in] == '=') ||
1258 (header && data[in] == '_') ||
Guido van Rossumd8faa362007-04-27 19:54:29 +00001259 ((data[in] == '.') && (linelen == 0) &&
1260 (data[in+1] == '\n' || data[in+1] == '\r' || data[in+1] == 0)) ||
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001261 (!istext && ((data[in] == '\r') || (data[in] == '\n'))) ||
1262 ((data[in] == '\t' || data[in] == ' ') && (in + 1 == datalen)) ||
Tim Peters934c1a12002-07-02 22:24:50 +00001263 ((data[in] < 33) &&
1264 (data[in] != '\r') && (data[in] != '\n') &&
Guido van Rossumd8faa362007-04-27 19:54:29 +00001265 (quotetabs ||
1266 (!quotetabs && ((data[in] != '\t') && (data[in] != ' '))))))
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001267 {
1268 if ((linelen + 3 )>= MAXLINESIZE) {
1269 odata[out++] = '=';
1270 if (crlf) odata[out++] = '\r';
1271 odata[out++] = '\n';
1272 linelen = 0;
1273 }
1274 odata[out++] = '=';
1275 to_hex(data[in], &odata[out]);
1276 out += 2;
1277 in++;
1278 linelen += 3;
1279 }
1280 else {
Tim Peters934c1a12002-07-02 22:24:50 +00001281 if (istext &&
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001282 ((data[in] == '\n') ||
1283 ((in+1 < datalen) && (data[in] == '\r') &&
1284 (data[in+1] == '\n'))))
1285 {
1286 linelen = 0;
1287 /* Protect against whitespace on end of line */
1288 if (out && ((odata[out-1] == ' ') || (odata[out-1] == '\t'))) {
1289 ch = odata[out-1];
1290 odata[out-1] = '=';
1291 to_hex(ch, &odata[out]);
1292 out += 2;
1293 }
Tim Peters934c1a12002-07-02 22:24:50 +00001294
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001295 if (crlf) odata[out++] = '\r';
1296 odata[out++] = '\n';
1297 if (data[in] == '\r')
1298 in += 2;
1299 else
1300 in++;
1301 }
1302 else {
Tim Peters934c1a12002-07-02 22:24:50 +00001303 if ((in + 1 != datalen) &&
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001304 (data[in+1] != '\n') &&
1305 (linelen + 1) >= MAXLINESIZE) {
1306 odata[out++] = '=';
1307 if (crlf) odata[out++] = '\r';
1308 odata[out++] = '\n';
1309 linelen = 0;
1310 }
1311 linelen++;
1312 if (header && data[in] == ' ') {
1313 odata[out++] = '_';
1314 in++;
1315 }
1316 else {
1317 odata[out++] = data[in++];
1318 }
1319 }
1320 }
1321 }
Guido van Rossum0e225aa2007-05-22 20:24:57 +00001322 if ((rv = PyBytes_FromStringAndSize((char *)odata, out)) == NULL) {
Barry Warsaw23164a52004-05-11 02:05:11 +00001323 PyMem_Free(odata);
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001324 return NULL;
1325 }
Barry Warsaw23164a52004-05-11 02:05:11 +00001326 PyMem_Free(odata);
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001327 return rv;
1328}
Barry Warsawe977c212000-08-15 06:07:13 +00001329
Jack Jansen72781191995-08-07 14:34:15 +00001330/* List of functions defined in the module */
1331
1332static struct PyMethodDef binascii_module_methods[] = {
Barry Warsawe977c212000-08-15 06:07:13 +00001333 {"a2b_uu", binascii_a2b_uu, METH_VARARGS, doc_a2b_uu},
1334 {"b2a_uu", binascii_b2a_uu, METH_VARARGS, doc_b2a_uu},
1335 {"a2b_base64", binascii_a2b_base64, METH_VARARGS, doc_a2b_base64},
1336 {"b2a_base64", binascii_b2a_base64, METH_VARARGS, doc_b2a_base64},
1337 {"a2b_hqx", binascii_a2b_hqx, METH_VARARGS, doc_a2b_hqx},
1338 {"b2a_hqx", binascii_b2a_hqx, METH_VARARGS, doc_b2a_hqx},
1339 {"b2a_hex", binascii_hexlify, METH_VARARGS, doc_hexlify},
1340 {"a2b_hex", binascii_unhexlify, METH_VARARGS, doc_unhexlify},
1341 {"hexlify", binascii_hexlify, METH_VARARGS, doc_hexlify},
1342 {"unhexlify", binascii_unhexlify, METH_VARARGS, doc_unhexlify},
1343 {"rlecode_hqx", binascii_rlecode_hqx, METH_VARARGS, doc_rlecode_hqx},
1344 {"rledecode_hqx", binascii_rledecode_hqx, METH_VARARGS,
1345 doc_rledecode_hqx},
1346 {"crc_hqx", binascii_crc_hqx, METH_VARARGS, doc_crc_hqx},
1347 {"crc32", binascii_crc32, METH_VARARGS, doc_crc32},
Tim Peters934c1a12002-07-02 22:24:50 +00001348 {"a2b_qp", (PyCFunction)binascii_a2b_qp, METH_VARARGS | METH_KEYWORDS,
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001349 doc_a2b_qp},
Tim Peters934c1a12002-07-02 22:24:50 +00001350 {"b2a_qp", (PyCFunction)binascii_b2a_qp, METH_VARARGS | METH_KEYWORDS,
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001351 doc_b2a_qp},
Barry Warsawe977c212000-08-15 06:07:13 +00001352 {NULL, NULL} /* sentinel */
Jack Jansen72781191995-08-07 14:34:15 +00001353};
1354
1355
1356/* Initialization function for the module (*must* be called initbinascii) */
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001357PyDoc_STRVAR(doc_binascii, "Conversion between binary data and ASCII");
Jack Jansen72781191995-08-07 14:34:15 +00001358
Mark Hammondfe51c6d2002-08-02 02:27:13 +00001359PyMODINIT_FUNC
Thomas Woutersf3f33dc2000-07-21 06:00:07 +00001360initbinascii(void)
Jack Jansen72781191995-08-07 14:34:15 +00001361{
1362 PyObject *m, *d, *x;
1363
1364 /* Create the module and add the functions */
1365 m = Py_InitModule("binascii", binascii_module_methods);
Neal Norwitz1ac754f2006-01-19 06:09:39 +00001366 if (m == NULL)
1367 return;
Jack Jansen72781191995-08-07 14:34:15 +00001368
1369 d = PyModule_GetDict(m);
1370 x = PyString_FromString(doc_binascii);
1371 PyDict_SetItemString(d, "__doc__", x);
Guido van Rossum5c159bd1997-08-04 23:55:25 +00001372 Py_XDECREF(x);
Jack Jansen72781191995-08-07 14:34:15 +00001373
Guido van Rossum4581ae52007-05-22 21:56:47 +00001374 Error = PyErr_NewException("binascii.Error", PyExc_ValueError, NULL);
Jack Jansen72781191995-08-07 14:34:15 +00001375 PyDict_SetItemString(d, "Error", Error);
Guido van Rossum7dbb48a1997-10-08 15:26:07 +00001376 Incomplete = PyErr_NewException("binascii.Incomplete", NULL, NULL);
Jack Jansen72781191995-08-07 14:34:15 +00001377 PyDict_SetItemString(d, "Incomplete", Incomplete);
Jack Jansen72781191995-08-07 14:34:15 +00001378}