blob: b65bdab786c442b3fe292bb4fbbb6693cbe57beb [file] [log] [blame]
Jack Jansen72781191995-08-07 14:34:15 +00001/*
2** Routines to represent binary data in ASCII and vice-versa
3**
4** This module currently supports the following encodings:
5** uuencode:
6** each line encodes 45 bytes (except possibly the last)
7** First char encodes (binary) length, rest data
8** each char encodes 6 bits, as follows:
9** binary: 01234567 abcdefgh ijklmnop
10** ascii: 012345 67abcd efghij klmnop
11** ASCII encoding method is "excess-space": 000000 is encoded as ' ', etc.
12** short binary data is zero-extended (so the bits are always in the
13** right place), this does *not* reflect in the length.
Jack Jansen84bbc2e1995-10-04 16:38:44 +000014** base64:
15** Line breaks are insignificant, but lines are at most 76 chars
16** each char encodes 6 bits, in similar order as uucode/hqx. Encoding
17** is done via a table.
18** Short binary data is filled (in ASCII) with '='.
Jack Jansen72781191995-08-07 14:34:15 +000019** hqx:
20** File starts with introductory text, real data starts and ends
21** with colons.
22** Data consists of three similar parts: info, datafork, resourcefork.
23** Each part is protected (at the end) with a 16-bit crc
24** The binary data is run-length encoded, and then ascii-fied:
25** binary: 01234567 abcdefgh ijklmnop
26** ascii: 012345 67abcd efghij klmnop
27** ASCII encoding is table-driven, see the code.
28** Short binary data results in the runt ascii-byte being output with
29** the bits in the right place.
30**
31** While I was reading dozens of programs that encode or decode the formats
32** here (documentation? hihi:-) I have formulated Jansen's Observation:
33**
34** Programs that encode binary data in ASCII are written in
35** such a style that they are as unreadable as possible. Devices used
36** include unnecessary global variables, burying important tables
37** in unrelated sourcefiles, putting functions in include files,
38** using seemingly-descriptive variable names for different purposes,
39** calls to empty subroutines and a host of others.
40**
41** I have attempted to break with this tradition, but I guess that that
42** does make the performance sub-optimal. Oh well, too bad...
43**
44** Jack Jansen, CWI, July 1995.
Tim Peters934c1a12002-07-02 22:24:50 +000045**
Martin v. Löwis16dc7f42001-09-30 20:32:11 +000046** Added support for quoted-printable encoding, based on rfc 1521 et al
Tim Peters934c1a12002-07-02 22:24:50 +000047** quoted-printable encoding specifies that non printable characters (anything
Martin v. Löwis16dc7f42001-09-30 20:32:11 +000048** below 32 and above 126) be encoded as =XX where XX is the hexadecimal value
49** of the character. It also specifies some other behavior to enable 8bit data
Tim Peters934c1a12002-07-02 22:24:50 +000050** in a mail message with little difficulty (maximum line sizes, protecting
51** some cases of whitespace, etc).
Martin v. Löwis16dc7f42001-09-30 20:32:11 +000052**
53** Brandon Long, September 2001.
Jack Jansen72781191995-08-07 14:34:15 +000054*/
55
Thomas Wouters9c544482006-03-01 21:59:44 +000056#define PY_SSIZE_T_CLEAN
Jack Jansen72781191995-08-07 14:34:15 +000057
58#include "Python.h"
Christian Heimes1dc54002008-03-24 02:19:29 +000059#ifdef USE_ZLIB_CRC32
60#include "zlib.h"
61#endif
Jack Jansen72781191995-08-07 14:34:15 +000062
63static PyObject *Error;
64static PyObject *Incomplete;
65
66/*
67** hqx lookup table, ascii->binary.
68*/
69
70#define RUNCHAR 0x90
71
72#define DONE 0x7F
73#define SKIP 0x7E
74#define FAIL 0x7D
75
76static unsigned char table_a2b_hqx[256] = {
77/* ^@ ^A ^B ^C ^D ^E ^F ^G */
78/* 0*/ FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
79/* \b \t \n ^K ^L \r ^N ^O */
80/* 1*/ FAIL, FAIL, SKIP, FAIL, FAIL, SKIP, FAIL, FAIL,
81/* ^P ^Q ^R ^S ^T ^U ^V ^W */
82/* 2*/ FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
83/* ^X ^Y ^Z ^[ ^\ ^] ^^ ^_ */
84/* 3*/ FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
85/* ! " # $ % & ' */
86/* 4*/ FAIL, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
87/* ( ) * + , - . / */
88/* 5*/ 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, FAIL, FAIL,
89/* 0 1 2 3 4 5 6 7 */
90/* 6*/ 0x0D, 0x0E, 0x0F, 0x10, 0x11, 0x12, 0x13, FAIL,
91/* 8 9 : ; < = > ? */
92/* 7*/ 0x14, 0x15, DONE, FAIL, FAIL, FAIL, FAIL, FAIL,
93/* @ A B C D E F G */
94/* 8*/ 0x16, 0x17, 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D,
95/* H I J K L M N O */
96/* 9*/ 0x1E, 0x1F, 0x20, 0x21, 0x22, 0x23, 0x24, FAIL,
97/* P Q R S T U V W */
98/*10*/ 0x25, 0x26, 0x27, 0x28, 0x29, 0x2A, 0x2B, FAIL,
99/* X Y Z [ \ ] ^ _ */
100/*11*/ 0x2C, 0x2D, 0x2E, 0x2F, FAIL, FAIL, FAIL, FAIL,
101/* ` a b c d e f g */
102/*12*/ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, FAIL,
103/* h i j k l m n o */
104/*13*/ 0x37, 0x38, 0x39, 0x3A, 0x3B, 0x3C, FAIL, FAIL,
105/* p q r s t u v w */
106/*14*/ 0x3D, 0x3E, 0x3F, FAIL, FAIL, FAIL, FAIL, FAIL,
107/* x y z { | } ~ ^? */
108/*15*/ FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
109/*16*/ FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
110 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
111 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
112 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
113 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
114 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
115 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
116 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
117 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
118 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
119 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
120 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
121 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
122 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
123 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
124 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
125};
126
127static unsigned char table_b2a_hqx[] =
Roger E. Masse5f4ce181997-01-16 17:10:22 +0000128"!\"#$%&'()*+,-012345689@ABCDEFGHIJKLMNPQRSTUVXYZ[`abcdefhijklmpqr";
Jack Jansen72781191995-08-07 14:34:15 +0000129
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000130static char table_a2b_base64[] = {
131 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
132 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
133 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,62, -1,-1,-1,63,
134 52,53,54,55, 56,57,58,59, 60,61,-1,-1, -1, 0,-1,-1, /* Note PAD->0 */
135 -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10, 11,12,13,14,
136 15,16,17,18, 19,20,21,22, 23,24,25,-1, -1,-1,-1,-1,
137 -1,26,27,28, 29,30,31,32, 33,34,35,36, 37,38,39,40,
138 41,42,43,44, 45,46,47,48, 49,50,51,-1, -1,-1,-1,-1
139};
140
141#define BASE64_PAD '='
Guido van Rossum355bc0c2001-10-30 03:00:52 +0000142
143/* Max binary chunk size; limited only by available memory */
Guido van Rossum0e225aa2007-05-22 20:24:57 +0000144#define BASE64_MAXBIN ((PY_SSIZE_T_MAX - 3) / 2)
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000145
146static unsigned char table_b2a_base64[] =
Roger E. Masse5f4ce181997-01-16 17:10:22 +0000147"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000148
149
150
Jack Jansen72781191995-08-07 14:34:15 +0000151static unsigned short crctab_hqx[256] = {
Roger E. Masse5f4ce181997-01-16 17:10:22 +0000152 0x0000, 0x1021, 0x2042, 0x3063, 0x4084, 0x50a5, 0x60c6, 0x70e7,
153 0x8108, 0x9129, 0xa14a, 0xb16b, 0xc18c, 0xd1ad, 0xe1ce, 0xf1ef,
154 0x1231, 0x0210, 0x3273, 0x2252, 0x52b5, 0x4294, 0x72f7, 0x62d6,
155 0x9339, 0x8318, 0xb37b, 0xa35a, 0xd3bd, 0xc39c, 0xf3ff, 0xe3de,
156 0x2462, 0x3443, 0x0420, 0x1401, 0x64e6, 0x74c7, 0x44a4, 0x5485,
157 0xa56a, 0xb54b, 0x8528, 0x9509, 0xe5ee, 0xf5cf, 0xc5ac, 0xd58d,
158 0x3653, 0x2672, 0x1611, 0x0630, 0x76d7, 0x66f6, 0x5695, 0x46b4,
159 0xb75b, 0xa77a, 0x9719, 0x8738, 0xf7df, 0xe7fe, 0xd79d, 0xc7bc,
160 0x48c4, 0x58e5, 0x6886, 0x78a7, 0x0840, 0x1861, 0x2802, 0x3823,
161 0xc9cc, 0xd9ed, 0xe98e, 0xf9af, 0x8948, 0x9969, 0xa90a, 0xb92b,
162 0x5af5, 0x4ad4, 0x7ab7, 0x6a96, 0x1a71, 0x0a50, 0x3a33, 0x2a12,
163 0xdbfd, 0xcbdc, 0xfbbf, 0xeb9e, 0x9b79, 0x8b58, 0xbb3b, 0xab1a,
164 0x6ca6, 0x7c87, 0x4ce4, 0x5cc5, 0x2c22, 0x3c03, 0x0c60, 0x1c41,
165 0xedae, 0xfd8f, 0xcdec, 0xddcd, 0xad2a, 0xbd0b, 0x8d68, 0x9d49,
166 0x7e97, 0x6eb6, 0x5ed5, 0x4ef4, 0x3e13, 0x2e32, 0x1e51, 0x0e70,
167 0xff9f, 0xefbe, 0xdfdd, 0xcffc, 0xbf1b, 0xaf3a, 0x9f59, 0x8f78,
168 0x9188, 0x81a9, 0xb1ca, 0xa1eb, 0xd10c, 0xc12d, 0xf14e, 0xe16f,
169 0x1080, 0x00a1, 0x30c2, 0x20e3, 0x5004, 0x4025, 0x7046, 0x6067,
170 0x83b9, 0x9398, 0xa3fb, 0xb3da, 0xc33d, 0xd31c, 0xe37f, 0xf35e,
171 0x02b1, 0x1290, 0x22f3, 0x32d2, 0x4235, 0x5214, 0x6277, 0x7256,
172 0xb5ea, 0xa5cb, 0x95a8, 0x8589, 0xf56e, 0xe54f, 0xd52c, 0xc50d,
173 0x34e2, 0x24c3, 0x14a0, 0x0481, 0x7466, 0x6447, 0x5424, 0x4405,
174 0xa7db, 0xb7fa, 0x8799, 0x97b8, 0xe75f, 0xf77e, 0xc71d, 0xd73c,
175 0x26d3, 0x36f2, 0x0691, 0x16b0, 0x6657, 0x7676, 0x4615, 0x5634,
176 0xd94c, 0xc96d, 0xf90e, 0xe92f, 0x99c8, 0x89e9, 0xb98a, 0xa9ab,
177 0x5844, 0x4865, 0x7806, 0x6827, 0x18c0, 0x08e1, 0x3882, 0x28a3,
178 0xcb7d, 0xdb5c, 0xeb3f, 0xfb1e, 0x8bf9, 0x9bd8, 0xabbb, 0xbb9a,
179 0x4a75, 0x5a54, 0x6a37, 0x7a16, 0x0af1, 0x1ad0, 0x2ab3, 0x3a92,
180 0xfd2e, 0xed0f, 0xdd6c, 0xcd4d, 0xbdaa, 0xad8b, 0x9de8, 0x8dc9,
181 0x7c26, 0x6c07, 0x5c64, 0x4c45, 0x3ca2, 0x2c83, 0x1ce0, 0x0cc1,
182 0xef1f, 0xff3e, 0xcf5d, 0xdf7c, 0xaf9b, 0xbfba, 0x8fd9, 0x9ff8,
183 0x6e17, 0x7e36, 0x4e55, 0x5e74, 0x2e93, 0x3eb2, 0x0ed1, 0x1ef0,
Jack Jansen72781191995-08-07 14:34:15 +0000184};
185
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000186PyDoc_STRVAR(doc_a2b_uu, "(ascii) -> bin. Decode a line of uuencoded data");
Jack Jansen72781191995-08-07 14:34:15 +0000187
188static PyObject *
Peter Schneider-Kampa788a7f2000-07-10 09:57:19 +0000189binascii_a2b_uu(PyObject *self, PyObject *args)
Jack Jansen72781191995-08-07 14:34:15 +0000190{
191 unsigned char *ascii_data, *bin_data;
192 int leftbits = 0;
193 unsigned char this_ch;
194 unsigned int leftchar = 0;
195 PyObject *rv;
Thomas Woutersf98db652006-03-01 21:37:32 +0000196 Py_ssize_t ascii_len, bin_len;
Tim Peters934c1a12002-07-02 22:24:50 +0000197
Guido van Rossum43713e52000-02-29 13:59:29 +0000198 if ( !PyArg_ParseTuple(args, "t#:a2b_uu", &ascii_data, &ascii_len) )
Jack Jansen72781191995-08-07 14:34:15 +0000199 return NULL;
200
201 /* First byte: binary data length (in bytes) */
202 bin_len = (*ascii_data++ - ' ') & 077;
203 ascii_len--;
204
205 /* Allocate the buffer */
Guido van Rossum98297ee2007-11-06 21:34:58 +0000206 if ( (rv=PyString_FromStringAndSize(NULL, bin_len)) == NULL )
Jack Jansen72781191995-08-07 14:34:15 +0000207 return NULL;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000208 bin_data = (unsigned char *)PyString_AS_STRING(rv);
Tim Peters934c1a12002-07-02 22:24:50 +0000209
Jack Jansen72781191995-08-07 14:34:15 +0000210 for( ; bin_len > 0 ; ascii_len--, ascii_data++ ) {
Neal Norwitzfe92eef2004-06-06 20:13:10 +0000211 /* XXX is it really best to add NULs if there's no more data */
212 this_ch = (ascii_len > 0) ? *ascii_data : 0;
Jack Jansen72781191995-08-07 14:34:15 +0000213 if ( this_ch == '\n' || this_ch == '\r' || ascii_len <= 0) {
214 /*
215 ** Whitespace. Assume some spaces got eaten at
216 ** end-of-line. (We check this later)
217 */
218 this_ch = 0;
219 } else {
Jack Jansen5d957971995-11-14 10:35:19 +0000220 /* Check the character for legality
Roger E. Masse5f4ce181997-01-16 17:10:22 +0000221 ** The 64 in stead of the expected 63 is because
222 ** there are a few uuencodes out there that use
Guido van Rossum92d89172001-01-09 02:11:57 +0000223 ** '`' as zero instead of space.
Jack Jansen5d957971995-11-14 10:35:19 +0000224 */
225 if ( this_ch < ' ' || this_ch > (' ' + 64)) {
Jack Jansen72781191995-08-07 14:34:15 +0000226 PyErr_SetString(Error, "Illegal char");
227 Py_DECREF(rv);
228 return NULL;
229 }
230 this_ch = (this_ch - ' ') & 077;
231 }
232 /*
233 ** Shift it in on the low end, and see if there's
234 ** a byte ready for output.
235 */
236 leftchar = (leftchar << 6) | (this_ch);
237 leftbits += 6;
238 if ( leftbits >= 8 ) {
239 leftbits -= 8;
240 *bin_data++ = (leftchar >> leftbits) & 0xff;
241 leftchar &= ((1 << leftbits) - 1);
242 bin_len--;
243 }
244 }
245 /*
246 ** Finally, check that if there's anything left on the line
247 ** that it's whitespace only.
248 */
249 while( ascii_len-- > 0 ) {
250 this_ch = *ascii_data++;
Guido van Rossum92d89172001-01-09 02:11:57 +0000251 /* Extra '`' may be written as padding in some cases */
252 if ( this_ch != ' ' && this_ch != ' '+64 &&
Guido van Rossum1243ae71997-07-11 18:36:28 +0000253 this_ch != '\n' && this_ch != '\r' ) {
Jack Jansen72781191995-08-07 14:34:15 +0000254 PyErr_SetString(Error, "Trailing garbage");
255 Py_DECREF(rv);
256 return NULL;
257 }
258 }
259 return rv;
260}
261
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000262PyDoc_STRVAR(doc_b2a_uu, "(bin) -> ascii. Uuencode line of data");
Tim Peters934c1a12002-07-02 22:24:50 +0000263
Jack Jansen72781191995-08-07 14:34:15 +0000264static PyObject *
Peter Schneider-Kampa788a7f2000-07-10 09:57:19 +0000265binascii_b2a_uu(PyObject *self, PyObject *args)
Jack Jansen72781191995-08-07 14:34:15 +0000266{
267 unsigned char *ascii_data, *bin_data;
268 int leftbits = 0;
269 unsigned char this_ch;
270 unsigned int leftchar = 0;
271 PyObject *rv;
Thomas Woutersf98db652006-03-01 21:37:32 +0000272 Py_ssize_t bin_len;
Tim Peters934c1a12002-07-02 22:24:50 +0000273
Guido van Rossum43713e52000-02-29 13:59:29 +0000274 if ( !PyArg_ParseTuple(args, "s#:b2a_uu", &bin_data, &bin_len) )
Jack Jansen72781191995-08-07 14:34:15 +0000275 return NULL;
276 if ( bin_len > 45 ) {
277 /* The 45 is a limit that appears in all uuencode's */
278 PyErr_SetString(Error, "At most 45 bytes at once");
279 return NULL;
280 }
281
282 /* We're lazy and allocate to much (fixed up later) */
Guido van Rossum98297ee2007-11-06 21:34:58 +0000283 if ( (rv=PyString_FromStringAndSize(NULL, bin_len*2+2)) == NULL )
Jack Jansen72781191995-08-07 14:34:15 +0000284 return NULL;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000285 ascii_data = (unsigned char *)PyString_AS_STRING(rv);
Jack Jansen72781191995-08-07 14:34:15 +0000286
287 /* Store the length */
288 *ascii_data++ = ' ' + (bin_len & 077);
Tim Peters934c1a12002-07-02 22:24:50 +0000289
Jack Jansen72781191995-08-07 14:34:15 +0000290 for( ; bin_len > 0 || leftbits != 0 ; bin_len--, bin_data++ ) {
291 /* Shift the data (or padding) into our buffer */
292 if ( bin_len > 0 ) /* Data */
293 leftchar = (leftchar << 8) | *bin_data;
294 else /* Padding */
295 leftchar <<= 8;
296 leftbits += 8;
297
298 /* See if there are 6-bit groups ready */
299 while ( leftbits >= 6 ) {
300 this_ch = (leftchar >> (leftbits-6)) & 0x3f;
301 leftbits -= 6;
302 *ascii_data++ = this_ch + ' ';
303 }
304 }
305 *ascii_data++ = '\n'; /* Append a courtesy newline */
Tim Peters934c1a12002-07-02 22:24:50 +0000306
Guido van Rossum98297ee2007-11-06 21:34:58 +0000307 if (_PyString_Resize(&rv,
Guido van Rossum0e225aa2007-05-22 20:24:57 +0000308 (ascii_data -
Guido van Rossum98297ee2007-11-06 21:34:58 +0000309 (unsigned char *)PyString_AS_STRING(rv))) < 0) {
Guido van Rossum0e225aa2007-05-22 20:24:57 +0000310 Py_DECREF(rv);
311 rv = NULL;
312 }
Jack Jansen72781191995-08-07 14:34:15 +0000313 return rv;
314}
315
Guido van Rossum2db4f471999-10-19 19:05:14 +0000316
317static int
Thomas Woutersf98db652006-03-01 21:37:32 +0000318binascii_find_valid(unsigned char *s, Py_ssize_t slen, int num)
Guido van Rossum2db4f471999-10-19 19:05:14 +0000319{
Tim Peters934c1a12002-07-02 22:24:50 +0000320 /* Finds & returns the (num+1)th
Guido van Rossum2db4f471999-10-19 19:05:14 +0000321 ** valid character for base64, or -1 if none.
322 */
323
324 int ret = -1;
325 unsigned char c, b64val;
326
327 while ((slen > 0) && (ret == -1)) {
328 c = *s;
329 b64val = table_a2b_base64[c & 0x7f];
330 if ( ((c <= 0x7f) && (b64val != (unsigned char)-1)) ) {
331 if (num == 0)
332 ret = *s;
333 num--;
334 }
335
336 s++;
337 slen--;
338 }
339 return ret;
340}
341
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000342PyDoc_STRVAR(doc_a2b_base64, "(ascii) -> bin. Decode a line of base64 data");
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000343
344static PyObject *
Peter Schneider-Kampa788a7f2000-07-10 09:57:19 +0000345binascii_a2b_base64(PyObject *self, PyObject *args)
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000346{
347 unsigned char *ascii_data, *bin_data;
348 int leftbits = 0;
349 unsigned char this_ch;
350 unsigned int leftchar = 0;
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000351 PyObject *rv;
Thomas Woutersf98db652006-03-01 21:37:32 +0000352 Py_ssize_t ascii_len, bin_len;
Guido van Rossum2db4f471999-10-19 19:05:14 +0000353 int quad_pos = 0;
Tim Peters934c1a12002-07-02 22:24:50 +0000354
Guido van Rossum43713e52000-02-29 13:59:29 +0000355 if ( !PyArg_ParseTuple(args, "t#:a2b_base64", &ascii_data, &ascii_len) )
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000356 return NULL;
357
358 bin_len = ((ascii_len+3)/4)*3; /* Upper bound, corrected later */
359
360 /* Allocate the buffer */
Guido van Rossum98297ee2007-11-06 21:34:58 +0000361 if ( (rv=PyString_FromStringAndSize(NULL, bin_len)) == NULL )
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000362 return NULL;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000363 bin_data = (unsigned char *)PyString_AS_STRING(rv);
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000364 bin_len = 0;
Guido van Rossum2db4f471999-10-19 19:05:14 +0000365
366 for( ; ascii_len > 0; ascii_len--, ascii_data++) {
367 this_ch = *ascii_data;
368
369 if (this_ch > 0x7f ||
370 this_ch == '\r' || this_ch == '\n' || this_ch == ' ')
Jack Jansenba1de3b1996-01-22 10:47:15 +0000371 continue;
Guido van Rossum2db4f471999-10-19 19:05:14 +0000372
373 /* Check for pad sequences and ignore
374 ** the invalid ones.
375 */
376 if (this_ch == BASE64_PAD) {
377 if ( (quad_pos < 2) ||
378 ((quad_pos == 2) &&
379 (binascii_find_valid(ascii_data, ascii_len, 1)
380 != BASE64_PAD)) )
381 {
382 continue;
383 }
384 else {
385 /* A pad sequence means no more input.
386 ** We've already interpreted the data
387 ** from the quad at this point.
388 */
389 leftbits = 0;
390 break;
391 }
392 }
393
394 this_ch = table_a2b_base64[*ascii_data];
395 if ( this_ch == (unsigned char) -1 )
396 continue;
397
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000398 /*
399 ** Shift it in on the low end, and see if there's
400 ** a byte ready for output.
401 */
Guido van Rossum2db4f471999-10-19 19:05:14 +0000402 quad_pos = (quad_pos + 1) & 0x03;
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000403 leftchar = (leftchar << 6) | (this_ch);
404 leftbits += 6;
Guido van Rossum2db4f471999-10-19 19:05:14 +0000405
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000406 if ( leftbits >= 8 ) {
407 leftbits -= 8;
408 *bin_data++ = (leftchar >> leftbits) & 0xff;
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000409 bin_len++;
Guido van Rossum2db4f471999-10-19 19:05:14 +0000410 leftchar &= ((1 << leftbits) - 1);
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000411 }
Guido van Rossum2db4f471999-10-19 19:05:14 +0000412 }
413
414 if (leftbits != 0) {
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000415 PyErr_SetString(Error, "Incorrect padding");
416 Py_DECREF(rv);
417 return NULL;
418 }
Guido van Rossum2db4f471999-10-19 19:05:14 +0000419
Thomas Wouters9e1c1922003-03-17 11:24:29 +0000420 /* And set string size correctly. If the result string is empty
421 ** (because the input was all invalid) return the shared empty
Guido van Rossum98297ee2007-11-06 21:34:58 +0000422 ** string instead; _PyString_Resize() won't do this for us.
Thomas Wouters9e1c1922003-03-17 11:24:29 +0000423 */
Guido van Rossum0e225aa2007-05-22 20:24:57 +0000424 if (bin_len > 0) {
Guido van Rossum98297ee2007-11-06 21:34:58 +0000425 if (_PyString_Resize(&rv, bin_len) < 0) {
Guido van Rossum0e225aa2007-05-22 20:24:57 +0000426 Py_DECREF(rv);
427 rv = NULL;
428 }
429 }
Thomas Wouters9e1c1922003-03-17 11:24:29 +0000430 else {
431 Py_DECREF(rv);
Guido van Rossum98297ee2007-11-06 21:34:58 +0000432 rv = PyString_FromStringAndSize("", 0);
Thomas Wouters9e1c1922003-03-17 11:24:29 +0000433 }
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000434 return rv;
435}
436
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000437PyDoc_STRVAR(doc_b2a_base64, "(bin) -> ascii. Base64-code line of data");
Tim Peters934c1a12002-07-02 22:24:50 +0000438
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000439static PyObject *
Peter Schneider-Kampa788a7f2000-07-10 09:57:19 +0000440binascii_b2a_base64(PyObject *self, PyObject *args)
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000441{
442 unsigned char *ascii_data, *bin_data;
443 int leftbits = 0;
444 unsigned char this_ch;
445 unsigned int leftchar = 0;
446 PyObject *rv;
Thomas Woutersf98db652006-03-01 21:37:32 +0000447 Py_ssize_t bin_len;
Tim Peters934c1a12002-07-02 22:24:50 +0000448
Guido van Rossum43713e52000-02-29 13:59:29 +0000449 if ( !PyArg_ParseTuple(args, "s#:b2a_base64", &bin_data, &bin_len) )
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000450 return NULL;
451 if ( bin_len > BASE64_MAXBIN ) {
452 PyErr_SetString(Error, "Too much data for base64 line");
453 return NULL;
454 }
Tim Peters934c1a12002-07-02 22:24:50 +0000455
Tim Peters1fbb5772001-12-19 04:41:35 +0000456 /* We're lazy and allocate too much (fixed up later).
457 "+3" leaves room for up to two pad characters and a trailing
458 newline. Note that 'b' gets encoded as 'Yg==\n' (1 in, 5 out). */
Guido van Rossum98297ee2007-11-06 21:34:58 +0000459 if ( (rv=PyString_FromStringAndSize(NULL, bin_len*2 + 3)) == NULL )
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000460 return NULL;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000461 ascii_data = (unsigned char *)PyString_AS_STRING(rv);
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000462
463 for( ; bin_len > 0 ; bin_len--, bin_data++ ) {
464 /* Shift the data into our buffer */
465 leftchar = (leftchar << 8) | *bin_data;
466 leftbits += 8;
467
468 /* See if there are 6-bit groups ready */
469 while ( leftbits >= 6 ) {
470 this_ch = (leftchar >> (leftbits-6)) & 0x3f;
471 leftbits -= 6;
472 *ascii_data++ = table_b2a_base64[this_ch];
473 }
474 }
475 if ( leftbits == 2 ) {
476 *ascii_data++ = table_b2a_base64[(leftchar&3) << 4];
477 *ascii_data++ = BASE64_PAD;
478 *ascii_data++ = BASE64_PAD;
479 } else if ( leftbits == 4 ) {
480 *ascii_data++ = table_b2a_base64[(leftchar&0xf) << 2];
481 *ascii_data++ = BASE64_PAD;
Tim Peters934c1a12002-07-02 22:24:50 +0000482 }
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000483 *ascii_data++ = '\n'; /* Append a courtesy newline */
Tim Peters934c1a12002-07-02 22:24:50 +0000484
Guido van Rossum98297ee2007-11-06 21:34:58 +0000485 if (_PyString_Resize(&rv,
Guido van Rossum0e225aa2007-05-22 20:24:57 +0000486 (ascii_data -
Guido van Rossum98297ee2007-11-06 21:34:58 +0000487 (unsigned char *)PyString_AS_STRING(rv))) < 0) {
Guido van Rossum0e225aa2007-05-22 20:24:57 +0000488 Py_DECREF(rv);
489 rv = NULL;
490 }
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000491 return rv;
492}
493
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000494PyDoc_STRVAR(doc_a2b_hqx, "ascii -> bin, done. Decode .hqx coding");
Jack Jansen72781191995-08-07 14:34:15 +0000495
496static PyObject *
Peter Schneider-Kampa788a7f2000-07-10 09:57:19 +0000497binascii_a2b_hqx(PyObject *self, PyObject *args)
Jack Jansen72781191995-08-07 14:34:15 +0000498{
499 unsigned char *ascii_data, *bin_data;
500 int leftbits = 0;
501 unsigned char this_ch;
Jack Janseneaeb1c81995-08-14 12:17:57 +0000502 unsigned int leftchar = 0;
Jack Jansen72781191995-08-07 14:34:15 +0000503 PyObject *rv;
Thomas Woutersf98db652006-03-01 21:37:32 +0000504 Py_ssize_t len;
Jack Jansen72781191995-08-07 14:34:15 +0000505 int done = 0;
Tim Peters934c1a12002-07-02 22:24:50 +0000506
Guido van Rossum43713e52000-02-29 13:59:29 +0000507 if ( !PyArg_ParseTuple(args, "t#:a2b_hqx", &ascii_data, &len) )
Jack Jansen72781191995-08-07 14:34:15 +0000508 return NULL;
509
Raymond Hettinger658717e2004-09-06 22:58:37 +0000510 /* Allocate a string that is too big (fixed later)
511 Add two to the initial length to prevent interning which
512 would preclude subsequent resizing. */
Guido van Rossum98297ee2007-11-06 21:34:58 +0000513 if ( (rv=PyString_FromStringAndSize(NULL, len+2)) == NULL )
Jack Jansen72781191995-08-07 14:34:15 +0000514 return NULL;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000515 bin_data = (unsigned char *)PyString_AS_STRING(rv);
Jack Jansen72781191995-08-07 14:34:15 +0000516
517 for( ; len > 0 ; len--, ascii_data++ ) {
518 /* Get the byte and look it up */
519 this_ch = table_a2b_hqx[*ascii_data];
520 if ( this_ch == SKIP )
521 continue;
522 if ( this_ch == FAIL ) {
523 PyErr_SetString(Error, "Illegal char");
524 Py_DECREF(rv);
525 return NULL;
526 }
527 if ( this_ch == DONE ) {
528 /* The terminating colon */
529 done = 1;
530 break;
531 }
532
533 /* Shift it into the buffer and see if any bytes are ready */
534 leftchar = (leftchar << 6) | (this_ch);
535 leftbits += 6;
536 if ( leftbits >= 8 ) {
537 leftbits -= 8;
538 *bin_data++ = (leftchar >> leftbits) & 0xff;
539 leftchar &= ((1 << leftbits) - 1);
540 }
541 }
Tim Peters934c1a12002-07-02 22:24:50 +0000542
Jack Jansen72781191995-08-07 14:34:15 +0000543 if ( leftbits && !done ) {
544 PyErr_SetString(Incomplete,
545 "String has incomplete number of bytes");
546 Py_DECREF(rv);
547 return NULL;
548 }
Guido van Rossum98297ee2007-11-06 21:34:58 +0000549 if (_PyString_Resize(&rv,
Guido van Rossum0e225aa2007-05-22 20:24:57 +0000550 (bin_data -
Guido van Rossum98297ee2007-11-06 21:34:58 +0000551 (unsigned char *)PyString_AS_STRING(rv))) < 0) {
Guido van Rossum0e225aa2007-05-22 20:24:57 +0000552 Py_DECREF(rv);
553 rv = NULL;
554 }
Guido van Rossum9c6ba5e1997-01-12 20:02:04 +0000555 if (rv) {
556 PyObject *rrv = Py_BuildValue("Oi", rv, done);
557 Py_DECREF(rv);
558 return rrv;
559 }
Roger E. Masse5f4ce181997-01-16 17:10:22 +0000560
Jack Jansen72781191995-08-07 14:34:15 +0000561 return NULL;
562}
563
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000564PyDoc_STRVAR(doc_rlecode_hqx, "Binhex RLE-code binary data");
Jack Jansen72781191995-08-07 14:34:15 +0000565
566static PyObject *
Peter Schneider-Kampa788a7f2000-07-10 09:57:19 +0000567binascii_rlecode_hqx(PyObject *self, PyObject *args)
Jack Jansen72781191995-08-07 14:34:15 +0000568{
569 unsigned char *in_data, *out_data;
570 PyObject *rv;
571 unsigned char ch;
Thomas Woutersf98db652006-03-01 21:37:32 +0000572 Py_ssize_t in, inend, len;
Tim Peters934c1a12002-07-02 22:24:50 +0000573
Guido van Rossum43713e52000-02-29 13:59:29 +0000574 if ( !PyArg_ParseTuple(args, "s#:rlecode_hqx", &in_data, &len) )
Jack Jansen72781191995-08-07 14:34:15 +0000575 return NULL;
576
577 /* Worst case: output is twice as big as input (fixed later) */
Guido van Rossum98297ee2007-11-06 21:34:58 +0000578 if ( (rv=PyString_FromStringAndSize(NULL, len*2+2)) == NULL )
Jack Jansen72781191995-08-07 14:34:15 +0000579 return NULL;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000580 out_data = (unsigned char *)PyString_AS_STRING(rv);
Tim Peters934c1a12002-07-02 22:24:50 +0000581
Jack Jansen72781191995-08-07 14:34:15 +0000582 for( in=0; in<len; in++) {
583 ch = in_data[in];
584 if ( ch == RUNCHAR ) {
585 /* RUNCHAR. Escape it. */
586 *out_data++ = RUNCHAR;
587 *out_data++ = 0;
588 } else {
589 /* Check how many following are the same */
590 for(inend=in+1;
591 inend<len && in_data[inend] == ch &&
Roger E. Masse5f4ce181997-01-16 17:10:22 +0000592 inend < in+255;
Jack Jansen72781191995-08-07 14:34:15 +0000593 inend++) ;
Jack Jansen0223aa11995-08-31 13:44:23 +0000594 if ( inend - in > 3 ) {
595 /* More than 3 in a row. Output RLE. */
Jack Jansen72781191995-08-07 14:34:15 +0000596 *out_data++ = ch;
597 *out_data++ = RUNCHAR;
598 *out_data++ = inend-in;
599 in = inend-1;
600 } else {
601 /* Less than 3. Output the byte itself */
602 *out_data++ = ch;
603 }
604 }
605 }
Guido van Rossum98297ee2007-11-06 21:34:58 +0000606 if (_PyString_Resize(&rv,
Guido van Rossum0e225aa2007-05-22 20:24:57 +0000607 (out_data -
Guido van Rossum98297ee2007-11-06 21:34:58 +0000608 (unsigned char *)PyString_AS_STRING(rv))) < 0) {
Guido van Rossum0e225aa2007-05-22 20:24:57 +0000609 Py_DECREF(rv);
610 rv = NULL;
611 }
Jack Jansen72781191995-08-07 14:34:15 +0000612 return rv;
613}
614
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000615PyDoc_STRVAR(doc_b2a_hqx, "Encode .hqx data");
Tim Peters934c1a12002-07-02 22:24:50 +0000616
Jack Jansen72781191995-08-07 14:34:15 +0000617static PyObject *
Peter Schneider-Kampa788a7f2000-07-10 09:57:19 +0000618binascii_b2a_hqx(PyObject *self, PyObject *args)
Jack Jansen72781191995-08-07 14:34:15 +0000619{
620 unsigned char *ascii_data, *bin_data;
621 int leftbits = 0;
622 unsigned char this_ch;
Jack Janseneaeb1c81995-08-14 12:17:57 +0000623 unsigned int leftchar = 0;
Jack Jansen72781191995-08-07 14:34:15 +0000624 PyObject *rv;
Thomas Woutersf98db652006-03-01 21:37:32 +0000625 Py_ssize_t len;
Tim Peters934c1a12002-07-02 22:24:50 +0000626
Guido van Rossum43713e52000-02-29 13:59:29 +0000627 if ( !PyArg_ParseTuple(args, "s#:b2a_hqx", &bin_data, &len) )
Jack Jansen72781191995-08-07 14:34:15 +0000628 return NULL;
629
630 /* Allocate a buffer that is at least large enough */
Guido van Rossum98297ee2007-11-06 21:34:58 +0000631 if ( (rv=PyString_FromStringAndSize(NULL, len*2+2)) == NULL )
Jack Jansen72781191995-08-07 14:34:15 +0000632 return NULL;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000633 ascii_data = (unsigned char *)PyString_AS_STRING(rv);
Tim Peters934c1a12002-07-02 22:24:50 +0000634
Jack Jansen72781191995-08-07 14:34:15 +0000635 for( ; len > 0 ; len--, bin_data++ ) {
636 /* Shift into our buffer, and output any 6bits ready */
637 leftchar = (leftchar << 8) | *bin_data;
638 leftbits += 8;
639 while ( leftbits >= 6 ) {
640 this_ch = (leftchar >> (leftbits-6)) & 0x3f;
641 leftbits -= 6;
642 *ascii_data++ = table_b2a_hqx[this_ch];
643 }
644 }
645 /* Output a possible runt byte */
646 if ( leftbits ) {
647 leftchar <<= (6-leftbits);
648 *ascii_data++ = table_b2a_hqx[leftchar & 0x3f];
649 }
Guido van Rossum98297ee2007-11-06 21:34:58 +0000650 if (_PyString_Resize(&rv,
Guido van Rossum0e225aa2007-05-22 20:24:57 +0000651 (ascii_data -
Guido van Rossum98297ee2007-11-06 21:34:58 +0000652 (unsigned char *)PyString_AS_STRING(rv))) < 0) {
Guido van Rossum0e225aa2007-05-22 20:24:57 +0000653 Py_DECREF(rv);
654 rv = NULL;
655 }
Jack Jansen72781191995-08-07 14:34:15 +0000656 return rv;
657}
658
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000659PyDoc_STRVAR(doc_rledecode_hqx, "Decode hexbin RLE-coded string");
Tim Peters934c1a12002-07-02 22:24:50 +0000660
Jack Jansen72781191995-08-07 14:34:15 +0000661static PyObject *
Peter Schneider-Kampa788a7f2000-07-10 09:57:19 +0000662binascii_rledecode_hqx(PyObject *self, PyObject *args)
Jack Jansen72781191995-08-07 14:34:15 +0000663{
664 unsigned char *in_data, *out_data;
665 unsigned char in_byte, in_repeat;
666 PyObject *rv;
Thomas Woutersf98db652006-03-01 21:37:32 +0000667 Py_ssize_t in_len, out_len, out_len_left;
Jack Jansen72781191995-08-07 14:34:15 +0000668
Guido van Rossum43713e52000-02-29 13:59:29 +0000669 if ( !PyArg_ParseTuple(args, "s#:rledecode_hqx", &in_data, &in_len) )
Jack Jansen72781191995-08-07 14:34:15 +0000670 return NULL;
671
672 /* Empty string is a special case */
673 if ( in_len == 0 )
Guido van Rossum98297ee2007-11-06 21:34:58 +0000674 return PyString_FromStringAndSize("", 0);
Jack Jansen72781191995-08-07 14:34:15 +0000675
676 /* Allocate a buffer of reasonable size. Resized when needed */
677 out_len = in_len*2;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000678 if ( (rv=PyString_FromStringAndSize(NULL, out_len)) == NULL )
Jack Jansen72781191995-08-07 14:34:15 +0000679 return NULL;
680 out_len_left = out_len;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000681 out_data = (unsigned char *)PyString_AS_STRING(rv);
Jack Jansen72781191995-08-07 14:34:15 +0000682
683 /*
684 ** We need two macros here to get/put bytes and handle
685 ** end-of-buffer for input and output strings.
686 */
687#define INBYTE(b) \
Roger E. Masse5f4ce181997-01-16 17:10:22 +0000688 do { \
689 if ( --in_len < 0 ) { \
690 PyErr_SetString(Incomplete, ""); \
691 Py_DECREF(rv); \
692 return NULL; \
693 } \
694 b = *in_data++; \
695 } while(0)
Tim Peters934c1a12002-07-02 22:24:50 +0000696
Jack Jansen72781191995-08-07 14:34:15 +0000697#define OUTBYTE(b) \
Roger E. Masse5f4ce181997-01-16 17:10:22 +0000698 do { \
699 if ( --out_len_left < 0 ) { \
Guido van Rossum98297ee2007-11-06 21:34:58 +0000700 if (_PyString_Resize(&rv, 2*out_len) < 0) \
Guido van Rossum0e225aa2007-05-22 20:24:57 +0000701 { Py_DECREF(rv); return NULL; } \
Guido van Rossum98297ee2007-11-06 21:34:58 +0000702 out_data = (unsigned char *)PyString_AS_STRING(rv) \
Roger E. Masse5f4ce181997-01-16 17:10:22 +0000703 + out_len; \
704 out_len_left = out_len-1; \
705 out_len = out_len * 2; \
706 } \
707 *out_data++ = b; \
708 } while(0)
Jack Jansen72781191995-08-07 14:34:15 +0000709
Roger E. Masse5f4ce181997-01-16 17:10:22 +0000710 /*
711 ** Handle first byte separately (since we have to get angry
712 ** in case of an orphaned RLE code).
713 */
714 INBYTE(in_byte);
Jack Jansen72781191995-08-07 14:34:15 +0000715
716 if (in_byte == RUNCHAR) {
717 INBYTE(in_repeat);
718 if (in_repeat != 0) {
719 /* Note Error, not Incomplete (which is at the end
720 ** of the string only). This is a programmer error.
721 */
722 PyErr_SetString(Error, "Orphaned RLE code at start");
723 Py_DECREF(rv);
724 return NULL;
725 }
726 OUTBYTE(RUNCHAR);
727 } else {
728 OUTBYTE(in_byte);
729 }
Tim Peters934c1a12002-07-02 22:24:50 +0000730
Jack Jansen72781191995-08-07 14:34:15 +0000731 while( in_len > 0 ) {
732 INBYTE(in_byte);
733
734 if (in_byte == RUNCHAR) {
735 INBYTE(in_repeat);
736 if ( in_repeat == 0 ) {
737 /* Just an escaped RUNCHAR value */
738 OUTBYTE(RUNCHAR);
739 } else {
740 /* Pick up value and output a sequence of it */
741 in_byte = out_data[-1];
742 while ( --in_repeat > 0 )
743 OUTBYTE(in_byte);
744 }
745 } else {
746 /* Normal byte */
747 OUTBYTE(in_byte);
748 }
749 }
Guido van Rossum98297ee2007-11-06 21:34:58 +0000750 if (_PyString_Resize(&rv,
Guido van Rossum0e225aa2007-05-22 20:24:57 +0000751 (out_data -
Guido van Rossum98297ee2007-11-06 21:34:58 +0000752 (unsigned char *)PyString_AS_STRING(rv))) < 0) {
Guido van Rossum0e225aa2007-05-22 20:24:57 +0000753 Py_DECREF(rv);
754 rv = NULL;
755 }
Jack Jansen72781191995-08-07 14:34:15 +0000756 return rv;
757}
758
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000759PyDoc_STRVAR(doc_crc_hqx,
760"(data, oldcrc) -> newcrc. Compute hqx CRC incrementally");
Jack Jansen72781191995-08-07 14:34:15 +0000761
762static PyObject *
Peter Schneider-Kampa788a7f2000-07-10 09:57:19 +0000763binascii_crc_hqx(PyObject *self, PyObject *args)
Jack Jansen72781191995-08-07 14:34:15 +0000764{
765 unsigned char *bin_data;
766 unsigned int crc;
Thomas Woutersf98db652006-03-01 21:37:32 +0000767 Py_ssize_t len;
Tim Peters934c1a12002-07-02 22:24:50 +0000768
Guido van Rossum43713e52000-02-29 13:59:29 +0000769 if ( !PyArg_ParseTuple(args, "s#i:crc_hqx", &bin_data, &len, &crc) )
Jack Jansen72781191995-08-07 14:34:15 +0000770 return NULL;
771
772 while(len--) {
773 crc=((crc<<8)&0xff00)^crctab_hqx[((crc>>8)&0xff)^*bin_data++];
774 }
775
776 return Py_BuildValue("i", crc);
777}
778
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000779PyDoc_STRVAR(doc_crc32,
780"(data, oldcrc = 0) -> newcrc. Compute CRC-32 incrementally");
Guido van Rossum7d47c9e2000-02-16 21:11:52 +0000781
Christian Heimes1dc54002008-03-24 02:19:29 +0000782#ifdef USE_ZLIB_CRC32
783/* This was taken from zlibmodule.c PyZlib_crc32 (but is PY_SSIZE_T_CLEAN) */
784static PyObject *
785binascii_crc32(PyObject *self, PyObject *args)
786{
Christian Heimescc47b052008-03-25 14:56:36 +0000787 unsigned int crc32val = 0; /* crc32(0L, Z_NULL, 0) */
Christian Heimes1dc54002008-03-24 02:19:29 +0000788 Byte *buf;
Neal Norwitz4027bf82008-03-24 04:59:05 +0000789 Py_ssize_t len;
Christian Heimescc47b052008-03-25 14:56:36 +0000790 int signed_val;
791
Christian Heimes1dc54002008-03-24 02:19:29 +0000792 if (!PyArg_ParseTuple(args, "s#|I:crc32", &buf, &len, &crc32val))
793 return NULL;
Christian Heimescc47b052008-03-25 14:56:36 +0000794 signed_val = crc32(crc32val, buf, len);
795 return PyLong_FromUnsignedLong(signed_val & 0xffffffffU);
Christian Heimes1dc54002008-03-24 02:19:29 +0000796}
797#else /* USE_ZLIB_CRC32 */
Guido van Rossum7d47c9e2000-02-16 21:11:52 +0000798/* Crc - 32 BIT ANSI X3.66 CRC checksum files
799 Also known as: ISO 3307
800**********************************************************************|
801* *|
802* Demonstration program to compute the 32-bit CRC used as the frame *|
803* check sequence in ADCCP (ANSI X3.66, also known as FIPS PUB 71 *|
804* and FED-STD-1003, the U.S. versions of CCITT's X.25 link-level *|
805* protocol). The 32-bit FCS was added via the Federal Register, *|
806* 1 June 1982, p.23798. I presume but don't know for certain that *|
807* this polynomial is or will be included in CCITT V.41, which *|
808* defines the 16-bit CRC (often called CRC-CCITT) polynomial. FIPS *|
809* PUB 78 says that the 32-bit FCS reduces otherwise undetected *|
810* errors by a factor of 10^-5 over 16-bit FCS. *|
811* *|
812**********************************************************************|
813
814 Copyright (C) 1986 Gary S. Brown. You may use this program, or
815 code or tables extracted from it, as desired without restriction.
Guido van Rossum7d47c9e2000-02-16 21:11:52 +0000816
Tim Peters934c1a12002-07-02 22:24:50 +0000817 First, the polynomial itself and its table of feedback terms. The
818 polynomial is
819 X^32+X^26+X^23+X^22+X^16+X^12+X^11+X^10+X^8+X^7+X^5+X^4+X^2+X^1+X^0
820 Note that we take it "backwards" and put the highest-order term in
821 the lowest-order bit. The X^32 term is "implied"; the LSB is the
822 X^31 term, etc. The X^0 term (usually shown as "+1") results in
823 the MSB being 1.
Guido van Rossum7d47c9e2000-02-16 21:11:52 +0000824
Tim Peters934c1a12002-07-02 22:24:50 +0000825 Note that the usual hardware shift register implementation, which
826 is what we're using (we're merely optimizing it by doing eight-bit
827 chunks at a time) shifts bits into the lowest-order term. In our
828 implementation, that means shifting towards the right. Why do we
829 do it this way? Because the calculated CRC must be transmitted in
830 order from highest-order term to lowest-order term. UARTs transmit
831 characters in order from LSB to MSB. By storing the CRC this way,
832 we hand it to the UART in the order low-byte to high-byte; the UART
833 sends each low-bit to hight-bit; and the result is transmission bit
834 by bit from highest- to lowest-order term without requiring any bit
835 shuffling on our part. Reception works similarly.
836
837 The feedback terms table consists of 256, 32-bit entries. Notes:
838
839 1. The table can be generated at runtime if desired; code to do so
840 is shown later. It might not be obvious, but the feedback
841 terms simply represent the results of eight shift/xor opera-
842 tions for all combinations of data and CRC register values.
843
844 2. The CRC accumulation logic is the same for all CRC polynomials,
845 be they sixteen or thirty-two bits wide. You simply choose the
846 appropriate table. Alternatively, because the table can be
847 generated at runtime, you can start by generating the table for
848 the polynomial in question and use exactly the same "updcrc",
849 if your application needn't simultaneously handle two CRC
850 polynomials. (Note, however, that XMODEM is strange.)
851
852 3. For 16-bit CRCs, the table entries need be only 16 bits wide;
853 of course, 32-bit entries work OK if the high 16 bits are zero.
854
855 4. The values must be right-shifted by eight bits by the "updcrc"
856 logic; the shift must be unsigned (bring in zeroes). On some
857 hardware you could probably optimize the shift in assembler by
858 using byte-swap instructions.
Guido van Rossum7d47c9e2000-02-16 21:11:52 +0000859********************************************************************/
860
Gregory P. Smith3c0e4d22008-03-25 07:51:12 +0000861static unsigned int crc_32_tab[256] = {
8620x00000000U, 0x77073096U, 0xee0e612cU, 0x990951baU, 0x076dc419U,
8630x706af48fU, 0xe963a535U, 0x9e6495a3U, 0x0edb8832U, 0x79dcb8a4U,
8640xe0d5e91eU, 0x97d2d988U, 0x09b64c2bU, 0x7eb17cbdU, 0xe7b82d07U,
8650x90bf1d91U, 0x1db71064U, 0x6ab020f2U, 0xf3b97148U, 0x84be41deU,
8660x1adad47dU, 0x6ddde4ebU, 0xf4d4b551U, 0x83d385c7U, 0x136c9856U,
8670x646ba8c0U, 0xfd62f97aU, 0x8a65c9ecU, 0x14015c4fU, 0x63066cd9U,
8680xfa0f3d63U, 0x8d080df5U, 0x3b6e20c8U, 0x4c69105eU, 0xd56041e4U,
8690xa2677172U, 0x3c03e4d1U, 0x4b04d447U, 0xd20d85fdU, 0xa50ab56bU,
8700x35b5a8faU, 0x42b2986cU, 0xdbbbc9d6U, 0xacbcf940U, 0x32d86ce3U,
8710x45df5c75U, 0xdcd60dcfU, 0xabd13d59U, 0x26d930acU, 0x51de003aU,
8720xc8d75180U, 0xbfd06116U, 0x21b4f4b5U, 0x56b3c423U, 0xcfba9599U,
8730xb8bda50fU, 0x2802b89eU, 0x5f058808U, 0xc60cd9b2U, 0xb10be924U,
8740x2f6f7c87U, 0x58684c11U, 0xc1611dabU, 0xb6662d3dU, 0x76dc4190U,
8750x01db7106U, 0x98d220bcU, 0xefd5102aU, 0x71b18589U, 0x06b6b51fU,
8760x9fbfe4a5U, 0xe8b8d433U, 0x7807c9a2U, 0x0f00f934U, 0x9609a88eU,
8770xe10e9818U, 0x7f6a0dbbU, 0x086d3d2dU, 0x91646c97U, 0xe6635c01U,
8780x6b6b51f4U, 0x1c6c6162U, 0x856530d8U, 0xf262004eU, 0x6c0695edU,
8790x1b01a57bU, 0x8208f4c1U, 0xf50fc457U, 0x65b0d9c6U, 0x12b7e950U,
8800x8bbeb8eaU, 0xfcb9887cU, 0x62dd1ddfU, 0x15da2d49U, 0x8cd37cf3U,
8810xfbd44c65U, 0x4db26158U, 0x3ab551ceU, 0xa3bc0074U, 0xd4bb30e2U,
8820x4adfa541U, 0x3dd895d7U, 0xa4d1c46dU, 0xd3d6f4fbU, 0x4369e96aU,
8830x346ed9fcU, 0xad678846U, 0xda60b8d0U, 0x44042d73U, 0x33031de5U,
8840xaa0a4c5fU, 0xdd0d7cc9U, 0x5005713cU, 0x270241aaU, 0xbe0b1010U,
8850xc90c2086U, 0x5768b525U, 0x206f85b3U, 0xb966d409U, 0xce61e49fU,
8860x5edef90eU, 0x29d9c998U, 0xb0d09822U, 0xc7d7a8b4U, 0x59b33d17U,
8870x2eb40d81U, 0xb7bd5c3bU, 0xc0ba6cadU, 0xedb88320U, 0x9abfb3b6U,
8880x03b6e20cU, 0x74b1d29aU, 0xead54739U, 0x9dd277afU, 0x04db2615U,
8890x73dc1683U, 0xe3630b12U, 0x94643b84U, 0x0d6d6a3eU, 0x7a6a5aa8U,
8900xe40ecf0bU, 0x9309ff9dU, 0x0a00ae27U, 0x7d079eb1U, 0xf00f9344U,
8910x8708a3d2U, 0x1e01f268U, 0x6906c2feU, 0xf762575dU, 0x806567cbU,
8920x196c3671U, 0x6e6b06e7U, 0xfed41b76U, 0x89d32be0U, 0x10da7a5aU,
8930x67dd4accU, 0xf9b9df6fU, 0x8ebeeff9U, 0x17b7be43U, 0x60b08ed5U,
8940xd6d6a3e8U, 0xa1d1937eU, 0x38d8c2c4U, 0x4fdff252U, 0xd1bb67f1U,
8950xa6bc5767U, 0x3fb506ddU, 0x48b2364bU, 0xd80d2bdaU, 0xaf0a1b4cU,
8960x36034af6U, 0x41047a60U, 0xdf60efc3U, 0xa867df55U, 0x316e8eefU,
8970x4669be79U, 0xcb61b38cU, 0xbc66831aU, 0x256fd2a0U, 0x5268e236U,
8980xcc0c7795U, 0xbb0b4703U, 0x220216b9U, 0x5505262fU, 0xc5ba3bbeU,
8990xb2bd0b28U, 0x2bb45a92U, 0x5cb36a04U, 0xc2d7ffa7U, 0xb5d0cf31U,
9000x2cd99e8bU, 0x5bdeae1dU, 0x9b64c2b0U, 0xec63f226U, 0x756aa39cU,
9010x026d930aU, 0x9c0906a9U, 0xeb0e363fU, 0x72076785U, 0x05005713U,
9020x95bf4a82U, 0xe2b87a14U, 0x7bb12baeU, 0x0cb61b38U, 0x92d28e9bU,
9030xe5d5be0dU, 0x7cdcefb7U, 0x0bdbdf21U, 0x86d3d2d4U, 0xf1d4e242U,
9040x68ddb3f8U, 0x1fda836eU, 0x81be16cdU, 0xf6b9265bU, 0x6fb077e1U,
9050x18b74777U, 0x88085ae6U, 0xff0f6a70U, 0x66063bcaU, 0x11010b5cU,
9060x8f659effU, 0xf862ae69U, 0x616bffd3U, 0x166ccf45U, 0xa00ae278U,
9070xd70dd2eeU, 0x4e048354U, 0x3903b3c2U, 0xa7672661U, 0xd06016f7U,
9080x4969474dU, 0x3e6e77dbU, 0xaed16a4aU, 0xd9d65adcU, 0x40df0b66U,
9090x37d83bf0U, 0xa9bcae53U, 0xdebb9ec5U, 0x47b2cf7fU, 0x30b5ffe9U,
9100xbdbdf21cU, 0xcabac28aU, 0x53b39330U, 0x24b4a3a6U, 0xbad03605U,
9110xcdd70693U, 0x54de5729U, 0x23d967bfU, 0xb3667a2eU, 0xc4614ab8U,
9120x5d681b02U, 0x2a6f2b94U, 0xb40bbe37U, 0xc30c8ea1U, 0x5a05df1bU,
9130x2d02ef8dU
Guido van Rossum7d47c9e2000-02-16 21:11:52 +0000914};
915
916static PyObject *
Peter Schneider-Kampa788a7f2000-07-10 09:57:19 +0000917binascii_crc32(PyObject *self, PyObject *args)
Guido van Rossum7d47c9e2000-02-16 21:11:52 +0000918{ /* By Jim Ahlstrom; All rights transferred to CNRI */
919 unsigned char *bin_data;
Gregory P. Smith27275032008-03-20 06:20:09 +0000920 unsigned int crc = 0; /* initial value of CRC */
Thomas Woutersf98db652006-03-01 21:37:32 +0000921 Py_ssize_t len;
Gregory P. Smith27275032008-03-20 06:20:09 +0000922 unsigned int result;
Tim Peters934c1a12002-07-02 22:24:50 +0000923
Gregory P. Smith27275032008-03-20 06:20:09 +0000924 if ( !PyArg_ParseTuple(args, "s#|I:crc32", &bin_data, &len, &crc) )
Guido van Rossum7d47c9e2000-02-16 21:11:52 +0000925 return NULL;
926
Tim Peters934c1a12002-07-02 22:24:50 +0000927 crc = ~ crc;
Gregory P. Smith27275032008-03-20 06:20:09 +0000928 while (len--) {
929 crc = crc_32_tab[(crc ^ *bin_data++) & 0xff] ^ (crc >> 8);
Guido van Rossum7d47c9e2000-02-16 21:11:52 +0000930 /* Note: (crc >> 8) MUST zero fill on left */
Gregory P. Smith27275032008-03-20 06:20:09 +0000931 }
Tim Petersa98011c2002-07-02 20:20:08 +0000932
Gregory P. Smith27275032008-03-20 06:20:09 +0000933 result = (crc ^ 0xFFFFFFFF);
934 return PyLong_FromUnsignedLong(result & 0xffffffff);
Guido van Rossum7d47c9e2000-02-16 21:11:52 +0000935}
Christian Heimes1dc54002008-03-24 02:19:29 +0000936#endif /* USE_ZLIB_CRC32 */
Guido van Rossum7d47c9e2000-02-16 21:11:52 +0000937
Barry Warsawe977c212000-08-15 06:07:13 +0000938
939static PyObject *
940binascii_hexlify(PyObject *self, PyObject *args)
941{
942 char* argbuf;
Thomas Woutersf98db652006-03-01 21:37:32 +0000943 Py_ssize_t arglen;
Barry Warsawe977c212000-08-15 06:07:13 +0000944 PyObject *retval;
945 char* retbuf;
Thomas Woutersf98db652006-03-01 21:37:32 +0000946 Py_ssize_t i, j;
Barry Warsawe977c212000-08-15 06:07:13 +0000947
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000948 if (!PyArg_ParseTuple(args, "s#:b2a_hex", &argbuf, &arglen))
Barry Warsawe977c212000-08-15 06:07:13 +0000949 return NULL;
950
Guido van Rossum98297ee2007-11-06 21:34:58 +0000951 retval = PyString_FromStringAndSize(NULL, arglen*2);
Barry Warsawe977c212000-08-15 06:07:13 +0000952 if (!retval)
953 return NULL;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000954 retbuf = PyString_AS_STRING(retval);
Barry Warsawe977c212000-08-15 06:07:13 +0000955
956 /* make hex version of string, taken from shamodule.c */
957 for (i=j=0; i < arglen; i++) {
958 char c;
959 c = (argbuf[i] >> 4) & 0xf;
960 c = (c>9) ? c+'a'-10 : c + '0';
961 retbuf[j++] = c;
962 c = argbuf[i] & 0xf;
963 c = (c>9) ? c+'a'-10 : c + '0';
964 retbuf[j++] = c;
965 }
966 return retval;
Barry Warsawe977c212000-08-15 06:07:13 +0000967}
968
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000969PyDoc_STRVAR(doc_hexlify,
Barry Warsawe977c212000-08-15 06:07:13 +0000970"b2a_hex(data) -> s; Hexadecimal representation of binary data.\n\
971\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000972This function is also available as \"hexlify()\".");
Barry Warsawe977c212000-08-15 06:07:13 +0000973
974
975static int
Tim Peters934c1a12002-07-02 22:24:50 +0000976to_int(int c)
Barry Warsawe977c212000-08-15 06:07:13 +0000977{
978 if (isdigit(c))
979 return c - '0';
980 else {
981 if (isupper(c))
982 c = tolower(c);
983 if (c >= 'a' && c <= 'f')
984 return c - 'a' + 10;
985 }
986 return -1;
987}
988
989
990static PyObject *
991binascii_unhexlify(PyObject *self, PyObject *args)
992{
993 char* argbuf;
Thomas Woutersf98db652006-03-01 21:37:32 +0000994 Py_ssize_t arglen;
Barry Warsawe977c212000-08-15 06:07:13 +0000995 PyObject *retval;
996 char* retbuf;
Thomas Woutersf98db652006-03-01 21:37:32 +0000997 Py_ssize_t i, j;
Barry Warsawe977c212000-08-15 06:07:13 +0000998
999 if (!PyArg_ParseTuple(args, "s#:a2b_hex", &argbuf, &arglen))
1000 return NULL;
1001
Barry Warsaw16168472000-08-15 06:59:58 +00001002 /* XXX What should we do about strings with an odd length? Should
1003 * we add an implicit leading zero, or a trailing zero? For now,
1004 * raise an exception.
Barry Warsawe977c212000-08-15 06:07:13 +00001005 */
1006 if (arglen % 2) {
Guido van Rossum4581ae52007-05-22 21:56:47 +00001007 PyErr_SetString(Error, "Odd-length string");
Barry Warsawe977c212000-08-15 06:07:13 +00001008 return NULL;
1009 }
1010
Guido van Rossum98297ee2007-11-06 21:34:58 +00001011 retval = PyString_FromStringAndSize(NULL, (arglen/2));
Barry Warsawe977c212000-08-15 06:07:13 +00001012 if (!retval)
1013 return NULL;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001014 retbuf = PyString_AS_STRING(retval);
Barry Warsawe977c212000-08-15 06:07:13 +00001015
1016 for (i=j=0; i < arglen; i += 2) {
1017 int top = to_int(Py_CHARMASK(argbuf[i]));
1018 int bot = to_int(Py_CHARMASK(argbuf[i+1]));
1019 if (top == -1 || bot == -1) {
Guido van Rossum4581ae52007-05-22 21:56:47 +00001020 PyErr_SetString(Error,
Barry Warsaw16168472000-08-15 06:59:58 +00001021 "Non-hexadecimal digit found");
Barry Warsawe977c212000-08-15 06:07:13 +00001022 goto finally;
1023 }
1024 retbuf[j++] = (top << 4) + bot;
1025 }
1026 return retval;
1027
1028 finally:
1029 Py_DECREF(retval);
1030 return NULL;
1031}
1032
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001033PyDoc_STRVAR(doc_unhexlify,
Barry Warsawe977c212000-08-15 06:07:13 +00001034"a2b_hex(hexstr) -> s; Binary data of hexadecimal representation.\n\
1035\n\
1036hexstr must contain an even number of hex digits (upper or lower case).\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001037This function is also available as \"unhexlify()\"");
Barry Warsawe977c212000-08-15 06:07:13 +00001038
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001039static int table_hex[128] = {
1040 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1041 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1042 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1043 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,-1,-1, -1,-1,-1,-1,
1044 -1,10,11,12, 13,14,15,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1045 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1046 -1,10,11,12, 13,14,15,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1047 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1
1048};
1049
1050#define hexval(c) table_hex[(unsigned int)(c)]
1051
1052#define MAXLINESIZE 76
1053
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001054PyDoc_STRVAR(doc_a2b_qp, "Decode a string of qp-encoded data");
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001055
Tim Peters934c1a12002-07-02 22:24:50 +00001056static PyObject*
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001057binascii_a2b_qp(PyObject *self, PyObject *args, PyObject *kwargs)
1058{
Thomas Wouters7087f782006-03-01 23:10:05 +00001059 Py_ssize_t in, out;
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001060 char ch;
1061 unsigned char *data, *odata;
Thomas Woutersf98db652006-03-01 21:37:32 +00001062 Py_ssize_t datalen = 0;
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001063 PyObject *rv;
Martin v. Löwis15e62742006-02-27 16:46:16 +00001064 static char *kwlist[] = {"data", "header", NULL};
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001065 int header = 0;
1066
Tim Peters934c1a12002-07-02 22:24:50 +00001067 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s#|i", kwlist, &data,
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001068 &datalen, &header))
1069 return NULL;
1070
Barry Warsaw23164a52004-05-11 02:05:11 +00001071 /* We allocate the output same size as input, this is overkill.
1072 * The previous implementation used calloc() so we'll zero out the
1073 * memory here too, since PyMem_Malloc() does not guarantee that.
1074 */
1075 odata = (unsigned char *) PyMem_Malloc(datalen);
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001076 if (odata == NULL) {
1077 PyErr_NoMemory();
1078 return NULL;
1079 }
Martin v. Löwisb600fe92004-07-27 15:03:53 +00001080 memset(odata, 0, datalen);
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001081
1082 in = out = 0;
1083 while (in < datalen) {
1084 if (data[in] == '=') {
1085 in++;
1086 if (in >= datalen) break;
1087 /* Soft line breaks */
Thomas Wouters89f507f2006-12-13 04:49:30 +00001088 if ((data[in] == '\n') || (data[in] == '\r')) {
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001089 if (data[in] != '\n') {
1090 while (in < datalen && data[in] != '\n') in++;
1091 }
1092 if (in < datalen) in++;
1093 }
1094 else if (data[in] == '=') {
1095 /* broken case from broken python qp */
1096 odata[out++] = '=';
1097 in++;
1098 }
Tim Peters934c1a12002-07-02 22:24:50 +00001099 else if (((data[in] >= 'A' && data[in] <= 'F') ||
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001100 (data[in] >= 'a' && data[in] <= 'f') ||
1101 (data[in] >= '0' && data[in] <= '9')) &&
1102 ((data[in+1] >= 'A' && data[in+1] <= 'F') ||
1103 (data[in+1] >= 'a' && data[in+1] <= 'f') ||
1104 (data[in+1] >= '0' && data[in+1] <= '9'))) {
1105 /* hexval */
1106 ch = hexval(data[in]) << 4;
1107 in++;
1108 ch |= hexval(data[in]);
1109 in++;
1110 odata[out++] = ch;
1111 }
1112 else {
1113 odata[out++] = '=';
1114 }
1115 }
1116 else if (header && data[in] == '_') {
1117 odata[out++] = ' ';
1118 in++;
1119 }
1120 else {
1121 odata[out] = data[in];
1122 in++;
1123 out++;
1124 }
1125 }
Guido van Rossum98297ee2007-11-06 21:34:58 +00001126 if ((rv = PyString_FromStringAndSize((char *)odata, out)) == NULL) {
Barry Warsaw23164a52004-05-11 02:05:11 +00001127 PyMem_Free(odata);
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001128 return NULL;
1129 }
Barry Warsaw23164a52004-05-11 02:05:11 +00001130 PyMem_Free(odata);
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001131 return rv;
1132}
1133
Tim Peters934c1a12002-07-02 22:24:50 +00001134static int
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001135to_hex (unsigned char ch, unsigned char *s)
1136{
1137 unsigned int uvalue = ch;
1138
1139 s[1] = "0123456789ABCDEF"[uvalue % 16];
1140 uvalue = (uvalue / 16);
1141 s[0] = "0123456789ABCDEF"[uvalue % 16];
1142 return 0;
1143}
1144
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001145PyDoc_STRVAR(doc_b2a_qp,
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001146"b2a_qp(data, quotetabs=0, istext=1, header=0) -> s; \n\
1147 Encode a string using quoted-printable encoding. \n\
1148\n\
1149On encoding, when istext is set, newlines are not encoded, and white \n\
1150space at end of lines is. When istext is not set, \\r and \\n (CR/LF) are \n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001151both encoded. When quotetabs is set, space and tabs are encoded.");
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001152
1153/* XXX: This is ridiculously complicated to be backward compatible
1154 * (mostly) with the quopri module. It doesn't re-create the quopri
1155 * module bug where text ending in CRLF has the CR encoded */
Tim Peters934c1a12002-07-02 22:24:50 +00001156static PyObject*
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001157binascii_b2a_qp (PyObject *self, PyObject *args, PyObject *kwargs)
1158{
Thomas Wouters7087f782006-03-01 23:10:05 +00001159 Py_ssize_t in, out;
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001160 unsigned char *data, *odata;
Thomas Woutersf98db652006-03-01 21:37:32 +00001161 Py_ssize_t datalen = 0, odatalen = 0;
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001162 PyObject *rv;
1163 unsigned int linelen = 0;
Martin v. Löwis15e62742006-02-27 16:46:16 +00001164 static char *kwlist[] = {"data", "quotetabs", "istext",
Jeremy Hyltonaf68c872005-12-10 18:50:16 +00001165 "header", NULL};
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001166 int istext = 1;
1167 int quotetabs = 0;
1168 int header = 0;
1169 unsigned char ch;
1170 int crlf = 0;
1171 unsigned char *p;
1172
Tim Peters934c1a12002-07-02 22:24:50 +00001173 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s#|iii", kwlist, &data,
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001174 &datalen, &quotetabs, &istext, &header))
1175 return NULL;
1176
1177 /* See if this string is using CRLF line ends */
1178 /* XXX: this function has the side effect of converting all of
1179 * the end of lines to be the same depending on this detection
1180 * here */
Walter Dörwald0925e412007-05-09 18:23:50 +00001181 p = (unsigned char *) memchr(data, '\n', datalen);
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001182 if ((p != NULL) && (p > data) && (*(p-1) == '\r'))
1183 crlf = 1;
1184
1185 /* First, scan to see how many characters need to be encoded */
1186 in = 0;
1187 while (in < datalen) {
Tim Peters934c1a12002-07-02 22:24:50 +00001188 if ((data[in] > 126) ||
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001189 (data[in] == '=') ||
1190 (header && data[in] == '_') ||
Guido van Rossumd8faa362007-04-27 19:54:29 +00001191 ((data[in] == '.') && (linelen == 0) &&
1192 (data[in+1] == '\n' || data[in+1] == '\r' || data[in+1] == 0)) ||
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001193 (!istext && ((data[in] == '\r') || (data[in] == '\n'))) ||
1194 ((data[in] == '\t' || data[in] == ' ') && (in + 1 == datalen)) ||
Tim Peters934c1a12002-07-02 22:24:50 +00001195 ((data[in] < 33) &&
1196 (data[in] != '\r') && (data[in] != '\n') &&
Guido van Rossumd8faa362007-04-27 19:54:29 +00001197 (quotetabs ||
1198 (!quotetabs && ((data[in] != '\t') && (data[in] != ' '))))))
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001199 {
1200 if ((linelen + 3) >= MAXLINESIZE) {
1201 linelen = 0;
1202 if (crlf)
1203 odatalen += 3;
1204 else
1205 odatalen += 2;
1206 }
1207 linelen += 3;
1208 odatalen += 3;
1209 in++;
1210 }
1211 else {
Tim Peters934c1a12002-07-02 22:24:50 +00001212 if (istext &&
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001213 ((data[in] == '\n') ||
1214 ((in+1 < datalen) && (data[in] == '\r') &&
1215 (data[in+1] == '\n'))))
1216 {
1217 linelen = 0;
1218 /* Protect against whitespace on end of line */
1219 if (in && ((data[in-1] == ' ') || (data[in-1] == '\t')))
1220 odatalen += 2;
1221 if (crlf)
1222 odatalen += 2;
1223 else
1224 odatalen += 1;
1225 if (data[in] == '\r')
1226 in += 2;
1227 else
1228 in++;
1229 }
1230 else {
Tim Peters934c1a12002-07-02 22:24:50 +00001231 if ((in + 1 != datalen) &&
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001232 (data[in+1] != '\n') &&
1233 (linelen + 1) >= MAXLINESIZE) {
1234 linelen = 0;
1235 if (crlf)
1236 odatalen += 3;
1237 else
1238 odatalen += 2;
1239 }
1240 linelen++;
1241 odatalen++;
1242 in++;
1243 }
1244 }
1245 }
1246
Barry Warsaw23164a52004-05-11 02:05:11 +00001247 /* We allocate the output same size as input, this is overkill.
1248 * The previous implementation used calloc() so we'll zero out the
1249 * memory here too, since PyMem_Malloc() does not guarantee that.
1250 */
1251 odata = (unsigned char *) PyMem_Malloc(odatalen);
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001252 if (odata == NULL) {
1253 PyErr_NoMemory();
1254 return NULL;
1255 }
Martin v. Löwisb600fe92004-07-27 15:03:53 +00001256 memset(odata, 0, odatalen);
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001257
1258 in = out = linelen = 0;
1259 while (in < datalen) {
Tim Peters934c1a12002-07-02 22:24:50 +00001260 if ((data[in] > 126) ||
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001261 (data[in] == '=') ||
1262 (header && data[in] == '_') ||
Guido van Rossumd8faa362007-04-27 19:54:29 +00001263 ((data[in] == '.') && (linelen == 0) &&
1264 (data[in+1] == '\n' || data[in+1] == '\r' || data[in+1] == 0)) ||
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001265 (!istext && ((data[in] == '\r') || (data[in] == '\n'))) ||
1266 ((data[in] == '\t' || data[in] == ' ') && (in + 1 == datalen)) ||
Tim Peters934c1a12002-07-02 22:24:50 +00001267 ((data[in] < 33) &&
1268 (data[in] != '\r') && (data[in] != '\n') &&
Guido van Rossumd8faa362007-04-27 19:54:29 +00001269 (quotetabs ||
1270 (!quotetabs && ((data[in] != '\t') && (data[in] != ' '))))))
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001271 {
1272 if ((linelen + 3 )>= MAXLINESIZE) {
1273 odata[out++] = '=';
1274 if (crlf) odata[out++] = '\r';
1275 odata[out++] = '\n';
1276 linelen = 0;
1277 }
1278 odata[out++] = '=';
1279 to_hex(data[in], &odata[out]);
1280 out += 2;
1281 in++;
1282 linelen += 3;
1283 }
1284 else {
Tim Peters934c1a12002-07-02 22:24:50 +00001285 if (istext &&
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001286 ((data[in] == '\n') ||
1287 ((in+1 < datalen) && (data[in] == '\r') &&
1288 (data[in+1] == '\n'))))
1289 {
1290 linelen = 0;
1291 /* Protect against whitespace on end of line */
1292 if (out && ((odata[out-1] == ' ') || (odata[out-1] == '\t'))) {
1293 ch = odata[out-1];
1294 odata[out-1] = '=';
1295 to_hex(ch, &odata[out]);
1296 out += 2;
1297 }
Tim Peters934c1a12002-07-02 22:24:50 +00001298
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001299 if (crlf) odata[out++] = '\r';
1300 odata[out++] = '\n';
1301 if (data[in] == '\r')
1302 in += 2;
1303 else
1304 in++;
1305 }
1306 else {
Tim Peters934c1a12002-07-02 22:24:50 +00001307 if ((in + 1 != datalen) &&
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001308 (data[in+1] != '\n') &&
1309 (linelen + 1) >= MAXLINESIZE) {
1310 odata[out++] = '=';
1311 if (crlf) odata[out++] = '\r';
1312 odata[out++] = '\n';
1313 linelen = 0;
1314 }
1315 linelen++;
1316 if (header && data[in] == ' ') {
1317 odata[out++] = '_';
1318 in++;
1319 }
1320 else {
1321 odata[out++] = data[in++];
1322 }
1323 }
1324 }
1325 }
Guido van Rossum98297ee2007-11-06 21:34:58 +00001326 if ((rv = PyString_FromStringAndSize((char *)odata, out)) == NULL) {
Barry Warsaw23164a52004-05-11 02:05:11 +00001327 PyMem_Free(odata);
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001328 return NULL;
1329 }
Barry Warsaw23164a52004-05-11 02:05:11 +00001330 PyMem_Free(odata);
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001331 return rv;
1332}
Barry Warsawe977c212000-08-15 06:07:13 +00001333
Jack Jansen72781191995-08-07 14:34:15 +00001334/* List of functions defined in the module */
1335
1336static struct PyMethodDef binascii_module_methods[] = {
Barry Warsawe977c212000-08-15 06:07:13 +00001337 {"a2b_uu", binascii_a2b_uu, METH_VARARGS, doc_a2b_uu},
1338 {"b2a_uu", binascii_b2a_uu, METH_VARARGS, doc_b2a_uu},
1339 {"a2b_base64", binascii_a2b_base64, METH_VARARGS, doc_a2b_base64},
1340 {"b2a_base64", binascii_b2a_base64, METH_VARARGS, doc_b2a_base64},
1341 {"a2b_hqx", binascii_a2b_hqx, METH_VARARGS, doc_a2b_hqx},
1342 {"b2a_hqx", binascii_b2a_hqx, METH_VARARGS, doc_b2a_hqx},
1343 {"b2a_hex", binascii_hexlify, METH_VARARGS, doc_hexlify},
1344 {"a2b_hex", binascii_unhexlify, METH_VARARGS, doc_unhexlify},
1345 {"hexlify", binascii_hexlify, METH_VARARGS, doc_hexlify},
1346 {"unhexlify", binascii_unhexlify, METH_VARARGS, doc_unhexlify},
1347 {"rlecode_hqx", binascii_rlecode_hqx, METH_VARARGS, doc_rlecode_hqx},
1348 {"rledecode_hqx", binascii_rledecode_hqx, METH_VARARGS,
1349 doc_rledecode_hqx},
1350 {"crc_hqx", binascii_crc_hqx, METH_VARARGS, doc_crc_hqx},
1351 {"crc32", binascii_crc32, METH_VARARGS, doc_crc32},
Tim Peters934c1a12002-07-02 22:24:50 +00001352 {"a2b_qp", (PyCFunction)binascii_a2b_qp, METH_VARARGS | METH_KEYWORDS,
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001353 doc_a2b_qp},
Tim Peters934c1a12002-07-02 22:24:50 +00001354 {"b2a_qp", (PyCFunction)binascii_b2a_qp, METH_VARARGS | METH_KEYWORDS,
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001355 doc_b2a_qp},
Barry Warsawe977c212000-08-15 06:07:13 +00001356 {NULL, NULL} /* sentinel */
Jack Jansen72781191995-08-07 14:34:15 +00001357};
1358
1359
1360/* Initialization function for the module (*must* be called initbinascii) */
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001361PyDoc_STRVAR(doc_binascii, "Conversion between binary data and ASCII");
Jack Jansen72781191995-08-07 14:34:15 +00001362
Mark Hammondfe51c6d2002-08-02 02:27:13 +00001363PyMODINIT_FUNC
Thomas Woutersf3f33dc2000-07-21 06:00:07 +00001364initbinascii(void)
Jack Jansen72781191995-08-07 14:34:15 +00001365{
Guido van Rossumfe096d22007-08-23 18:31:24 +00001366 PyObject *m, *d;
Jack Jansen72781191995-08-07 14:34:15 +00001367
1368 /* Create the module and add the functions */
Neal Norwitz56dc9552007-08-23 17:55:33 +00001369 m = Py_InitModule3("binascii", binascii_module_methods, doc_binascii);
Neal Norwitz1ac754f2006-01-19 06:09:39 +00001370 if (m == NULL)
1371 return;
Jack Jansen72781191995-08-07 14:34:15 +00001372
1373 d = PyModule_GetDict(m);
Jack Jansen72781191995-08-07 14:34:15 +00001374
Guido van Rossum4581ae52007-05-22 21:56:47 +00001375 Error = PyErr_NewException("binascii.Error", PyExc_ValueError, NULL);
Jack Jansen72781191995-08-07 14:34:15 +00001376 PyDict_SetItemString(d, "Error", Error);
Guido van Rossum7dbb48a1997-10-08 15:26:07 +00001377 Incomplete = PyErr_NewException("binascii.Incomplete", NULL, NULL);
Jack Jansen72781191995-08-07 14:34:15 +00001378 PyDict_SetItemString(d, "Incomplete", Incomplete);
Jack Jansen72781191995-08-07 14:34:15 +00001379}