blob: bcbafcffa9dd3e53ab9f7339328368fc17608f72 [file] [log] [blame]
Jack Jansen72781191995-08-07 14:34:15 +00001/*
2** Routines to represent binary data in ASCII and vice-versa
3**
4** This module currently supports the following encodings:
5** uuencode:
6** each line encodes 45 bytes (except possibly the last)
7** First char encodes (binary) length, rest data
8** each char encodes 6 bits, as follows:
9** binary: 01234567 abcdefgh ijklmnop
10** ascii: 012345 67abcd efghij klmnop
11** ASCII encoding method is "excess-space": 000000 is encoded as ' ', etc.
12** short binary data is zero-extended (so the bits are always in the
13** right place), this does *not* reflect in the length.
Jack Jansen84bbc2e1995-10-04 16:38:44 +000014** base64:
15** Line breaks are insignificant, but lines are at most 76 chars
16** each char encodes 6 bits, in similar order as uucode/hqx. Encoding
17** is done via a table.
18** Short binary data is filled (in ASCII) with '='.
Jack Jansen72781191995-08-07 14:34:15 +000019** hqx:
20** File starts with introductory text, real data starts and ends
21** with colons.
22** Data consists of three similar parts: info, datafork, resourcefork.
23** Each part is protected (at the end) with a 16-bit crc
24** The binary data is run-length encoded, and then ascii-fied:
25** binary: 01234567 abcdefgh ijklmnop
26** ascii: 012345 67abcd efghij klmnop
27** ASCII encoding is table-driven, see the code.
28** Short binary data results in the runt ascii-byte being output with
29** the bits in the right place.
30**
31** While I was reading dozens of programs that encode or decode the formats
32** here (documentation? hihi:-) I have formulated Jansen's Observation:
33**
34** Programs that encode binary data in ASCII are written in
35** such a style that they are as unreadable as possible. Devices used
36** include unnecessary global variables, burying important tables
37** in unrelated sourcefiles, putting functions in include files,
38** using seemingly-descriptive variable names for different purposes,
39** calls to empty subroutines and a host of others.
40**
41** I have attempted to break with this tradition, but I guess that that
42** does make the performance sub-optimal. Oh well, too bad...
43**
44** Jack Jansen, CWI, July 1995.
Tim Peters934c1a12002-07-02 22:24:50 +000045**
Martin v. Löwis16dc7f42001-09-30 20:32:11 +000046** Added support for quoted-printable encoding, based on rfc 1521 et al
Tim Peters934c1a12002-07-02 22:24:50 +000047** quoted-printable encoding specifies that non printable characters (anything
Martin v. Löwis16dc7f42001-09-30 20:32:11 +000048** below 32 and above 126) be encoded as =XX where XX is the hexadecimal value
49** of the character. It also specifies some other behavior to enable 8bit data
Tim Peters934c1a12002-07-02 22:24:50 +000050** in a mail message with little difficulty (maximum line sizes, protecting
51** some cases of whitespace, etc).
Martin v. Löwis16dc7f42001-09-30 20:32:11 +000052**
53** Brandon Long, September 2001.
Jack Jansen72781191995-08-07 14:34:15 +000054*/
55
Thomas Wouters9c544482006-03-01 21:59:44 +000056#define PY_SSIZE_T_CLEAN
Jack Jansen72781191995-08-07 14:34:15 +000057
58#include "Python.h"
Gregory P. Smith440ca772008-03-24 00:08:01 +000059#ifdef USE_ZLIB_CRC32
60#include "zlib.h"
61#endif
Jack Jansen72781191995-08-07 14:34:15 +000062
63static PyObject *Error;
64static PyObject *Incomplete;
65
66/*
67** hqx lookup table, ascii->binary.
68*/
69
70#define RUNCHAR 0x90
71
72#define DONE 0x7F
73#define SKIP 0x7E
74#define FAIL 0x7D
75
76static unsigned char table_a2b_hqx[256] = {
77/* ^@ ^A ^B ^C ^D ^E ^F ^G */
78/* 0*/ FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
79/* \b \t \n ^K ^L \r ^N ^O */
80/* 1*/ FAIL, FAIL, SKIP, FAIL, FAIL, SKIP, FAIL, FAIL,
81/* ^P ^Q ^R ^S ^T ^U ^V ^W */
82/* 2*/ FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
83/* ^X ^Y ^Z ^[ ^\ ^] ^^ ^_ */
84/* 3*/ FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
85/* ! " # $ % & ' */
86/* 4*/ FAIL, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
87/* ( ) * + , - . / */
88/* 5*/ 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, FAIL, FAIL,
89/* 0 1 2 3 4 5 6 7 */
90/* 6*/ 0x0D, 0x0E, 0x0F, 0x10, 0x11, 0x12, 0x13, FAIL,
91/* 8 9 : ; < = > ? */
92/* 7*/ 0x14, 0x15, DONE, FAIL, FAIL, FAIL, FAIL, FAIL,
93/* @ A B C D E F G */
94/* 8*/ 0x16, 0x17, 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D,
95/* H I J K L M N O */
96/* 9*/ 0x1E, 0x1F, 0x20, 0x21, 0x22, 0x23, 0x24, FAIL,
97/* P Q R S T U V W */
98/*10*/ 0x25, 0x26, 0x27, 0x28, 0x29, 0x2A, 0x2B, FAIL,
99/* X Y Z [ \ ] ^ _ */
100/*11*/ 0x2C, 0x2D, 0x2E, 0x2F, FAIL, FAIL, FAIL, FAIL,
101/* ` a b c d e f g */
102/*12*/ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, FAIL,
103/* h i j k l m n o */
104/*13*/ 0x37, 0x38, 0x39, 0x3A, 0x3B, 0x3C, FAIL, FAIL,
105/* p q r s t u v w */
106/*14*/ 0x3D, 0x3E, 0x3F, FAIL, FAIL, FAIL, FAIL, FAIL,
107/* x y z { | } ~ ^? */
108/*15*/ FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
109/*16*/ FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
110 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
111 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
112 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
113 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
114 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
115 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
116 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
117 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
118 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
119 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
120 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
121 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
122 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
123 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
124 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
125};
126
127static unsigned char table_b2a_hqx[] =
Roger E. Masse5f4ce181997-01-16 17:10:22 +0000128"!\"#$%&'()*+,-012345689@ABCDEFGHIJKLMNPQRSTUVXYZ[`abcdefhijklmpqr";
Jack Jansen72781191995-08-07 14:34:15 +0000129
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000130static char table_a2b_base64[] = {
131 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
132 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
133 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,62, -1,-1,-1,63,
134 52,53,54,55, 56,57,58,59, 60,61,-1,-1, -1, 0,-1,-1, /* Note PAD->0 */
135 -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10, 11,12,13,14,
136 15,16,17,18, 19,20,21,22, 23,24,25,-1, -1,-1,-1,-1,
137 -1,26,27,28, 29,30,31,32, 33,34,35,36, 37,38,39,40,
138 41,42,43,44, 45,46,47,48, 49,50,51,-1, -1,-1,-1,-1
139};
140
141#define BASE64_PAD '='
Guido van Rossum355bc0c2001-10-30 03:00:52 +0000142
143/* Max binary chunk size; limited only by available memory */
Gregory P. Smith9d534572008-06-11 07:41:16 +0000144#define BASE64_MAXBIN (PY_SSIZE_T_MAX/2 - sizeof(PyStringObject) - 3)
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000145
146static unsigned char table_b2a_base64[] =
Roger E. Masse5f4ce181997-01-16 17:10:22 +0000147"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000148
149
150
Jack Jansen72781191995-08-07 14:34:15 +0000151static unsigned short crctab_hqx[256] = {
Roger E. Masse5f4ce181997-01-16 17:10:22 +0000152 0x0000, 0x1021, 0x2042, 0x3063, 0x4084, 0x50a5, 0x60c6, 0x70e7,
153 0x8108, 0x9129, 0xa14a, 0xb16b, 0xc18c, 0xd1ad, 0xe1ce, 0xf1ef,
154 0x1231, 0x0210, 0x3273, 0x2252, 0x52b5, 0x4294, 0x72f7, 0x62d6,
155 0x9339, 0x8318, 0xb37b, 0xa35a, 0xd3bd, 0xc39c, 0xf3ff, 0xe3de,
156 0x2462, 0x3443, 0x0420, 0x1401, 0x64e6, 0x74c7, 0x44a4, 0x5485,
157 0xa56a, 0xb54b, 0x8528, 0x9509, 0xe5ee, 0xf5cf, 0xc5ac, 0xd58d,
158 0x3653, 0x2672, 0x1611, 0x0630, 0x76d7, 0x66f6, 0x5695, 0x46b4,
159 0xb75b, 0xa77a, 0x9719, 0x8738, 0xf7df, 0xe7fe, 0xd79d, 0xc7bc,
160 0x48c4, 0x58e5, 0x6886, 0x78a7, 0x0840, 0x1861, 0x2802, 0x3823,
161 0xc9cc, 0xd9ed, 0xe98e, 0xf9af, 0x8948, 0x9969, 0xa90a, 0xb92b,
162 0x5af5, 0x4ad4, 0x7ab7, 0x6a96, 0x1a71, 0x0a50, 0x3a33, 0x2a12,
163 0xdbfd, 0xcbdc, 0xfbbf, 0xeb9e, 0x9b79, 0x8b58, 0xbb3b, 0xab1a,
164 0x6ca6, 0x7c87, 0x4ce4, 0x5cc5, 0x2c22, 0x3c03, 0x0c60, 0x1c41,
165 0xedae, 0xfd8f, 0xcdec, 0xddcd, 0xad2a, 0xbd0b, 0x8d68, 0x9d49,
166 0x7e97, 0x6eb6, 0x5ed5, 0x4ef4, 0x3e13, 0x2e32, 0x1e51, 0x0e70,
167 0xff9f, 0xefbe, 0xdfdd, 0xcffc, 0xbf1b, 0xaf3a, 0x9f59, 0x8f78,
168 0x9188, 0x81a9, 0xb1ca, 0xa1eb, 0xd10c, 0xc12d, 0xf14e, 0xe16f,
169 0x1080, 0x00a1, 0x30c2, 0x20e3, 0x5004, 0x4025, 0x7046, 0x6067,
170 0x83b9, 0x9398, 0xa3fb, 0xb3da, 0xc33d, 0xd31c, 0xe37f, 0xf35e,
171 0x02b1, 0x1290, 0x22f3, 0x32d2, 0x4235, 0x5214, 0x6277, 0x7256,
172 0xb5ea, 0xa5cb, 0x95a8, 0x8589, 0xf56e, 0xe54f, 0xd52c, 0xc50d,
173 0x34e2, 0x24c3, 0x14a0, 0x0481, 0x7466, 0x6447, 0x5424, 0x4405,
174 0xa7db, 0xb7fa, 0x8799, 0x97b8, 0xe75f, 0xf77e, 0xc71d, 0xd73c,
175 0x26d3, 0x36f2, 0x0691, 0x16b0, 0x6657, 0x7676, 0x4615, 0x5634,
176 0xd94c, 0xc96d, 0xf90e, 0xe92f, 0x99c8, 0x89e9, 0xb98a, 0xa9ab,
177 0x5844, 0x4865, 0x7806, 0x6827, 0x18c0, 0x08e1, 0x3882, 0x28a3,
178 0xcb7d, 0xdb5c, 0xeb3f, 0xfb1e, 0x8bf9, 0x9bd8, 0xabbb, 0xbb9a,
179 0x4a75, 0x5a54, 0x6a37, 0x7a16, 0x0af1, 0x1ad0, 0x2ab3, 0x3a92,
180 0xfd2e, 0xed0f, 0xdd6c, 0xcd4d, 0xbdaa, 0xad8b, 0x9de8, 0x8dc9,
181 0x7c26, 0x6c07, 0x5c64, 0x4c45, 0x3ca2, 0x2c83, 0x1ce0, 0x0cc1,
182 0xef1f, 0xff3e, 0xcf5d, 0xdf7c, 0xaf9b, 0xbfba, 0x8fd9, 0x9ff8,
183 0x6e17, 0x7e36, 0x4e55, 0x5e74, 0x2e93, 0x3eb2, 0x0ed1, 0x1ef0,
Jack Jansen72781191995-08-07 14:34:15 +0000184};
185
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000186PyDoc_STRVAR(doc_a2b_uu, "(ascii) -> bin. Decode a line of uuencoded data");
Jack Jansen72781191995-08-07 14:34:15 +0000187
188static PyObject *
Peter Schneider-Kampa788a7f2000-07-10 09:57:19 +0000189binascii_a2b_uu(PyObject *self, PyObject *args)
Jack Jansen72781191995-08-07 14:34:15 +0000190{
191 unsigned char *ascii_data, *bin_data;
192 int leftbits = 0;
193 unsigned char this_ch;
194 unsigned int leftchar = 0;
195 PyObject *rv;
Thomas Woutersf98db652006-03-01 21:37:32 +0000196 Py_ssize_t ascii_len, bin_len;
Tim Peters934c1a12002-07-02 22:24:50 +0000197
Guido van Rossum43713e52000-02-29 13:59:29 +0000198 if ( !PyArg_ParseTuple(args, "t#:a2b_uu", &ascii_data, &ascii_len) )
Jack Jansen72781191995-08-07 14:34:15 +0000199 return NULL;
200
Gregory P. Smith9d534572008-06-11 07:41:16 +0000201 assert(ascii_len >= 0);
202
Jack Jansen72781191995-08-07 14:34:15 +0000203 /* First byte: binary data length (in bytes) */
204 bin_len = (*ascii_data++ - ' ') & 077;
205 ascii_len--;
206
207 /* Allocate the buffer */
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000208 if ( (rv=PyString_FromStringAndSize(NULL, bin_len)) == NULL )
Jack Jansen72781191995-08-07 14:34:15 +0000209 return NULL;
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000210 bin_data = (unsigned char *)PyString_AsString(rv);
Tim Peters934c1a12002-07-02 22:24:50 +0000211
Jack Jansen72781191995-08-07 14:34:15 +0000212 for( ; bin_len > 0 ; ascii_len--, ascii_data++ ) {
Neal Norwitzfe92eef2004-06-06 20:13:10 +0000213 /* XXX is it really best to add NULs if there's no more data */
214 this_ch = (ascii_len > 0) ? *ascii_data : 0;
Jack Jansen72781191995-08-07 14:34:15 +0000215 if ( this_ch == '\n' || this_ch == '\r' || ascii_len <= 0) {
216 /*
217 ** Whitespace. Assume some spaces got eaten at
218 ** end-of-line. (We check this later)
219 */
220 this_ch = 0;
221 } else {
Jack Jansen5d957971995-11-14 10:35:19 +0000222 /* Check the character for legality
Roger E. Masse5f4ce181997-01-16 17:10:22 +0000223 ** The 64 in stead of the expected 63 is because
224 ** there are a few uuencodes out there that use
Guido van Rossum92d89172001-01-09 02:11:57 +0000225 ** '`' as zero instead of space.
Jack Jansen5d957971995-11-14 10:35:19 +0000226 */
227 if ( this_ch < ' ' || this_ch > (' ' + 64)) {
Jack Jansen72781191995-08-07 14:34:15 +0000228 PyErr_SetString(Error, "Illegal char");
229 Py_DECREF(rv);
230 return NULL;
231 }
232 this_ch = (this_ch - ' ') & 077;
233 }
234 /*
235 ** Shift it in on the low end, and see if there's
236 ** a byte ready for output.
237 */
238 leftchar = (leftchar << 6) | (this_ch);
239 leftbits += 6;
240 if ( leftbits >= 8 ) {
241 leftbits -= 8;
242 *bin_data++ = (leftchar >> leftbits) & 0xff;
243 leftchar &= ((1 << leftbits) - 1);
244 bin_len--;
245 }
246 }
247 /*
248 ** Finally, check that if there's anything left on the line
249 ** that it's whitespace only.
250 */
251 while( ascii_len-- > 0 ) {
252 this_ch = *ascii_data++;
Guido van Rossum92d89172001-01-09 02:11:57 +0000253 /* Extra '`' may be written as padding in some cases */
254 if ( this_ch != ' ' && this_ch != ' '+64 &&
Guido van Rossum1243ae71997-07-11 18:36:28 +0000255 this_ch != '\n' && this_ch != '\r' ) {
Jack Jansen72781191995-08-07 14:34:15 +0000256 PyErr_SetString(Error, "Trailing garbage");
257 Py_DECREF(rv);
258 return NULL;
259 }
260 }
261 return rv;
262}
263
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000264PyDoc_STRVAR(doc_b2a_uu, "(bin) -> ascii. Uuencode line of data");
Tim Peters934c1a12002-07-02 22:24:50 +0000265
Jack Jansen72781191995-08-07 14:34:15 +0000266static PyObject *
Peter Schneider-Kampa788a7f2000-07-10 09:57:19 +0000267binascii_b2a_uu(PyObject *self, PyObject *args)
Jack Jansen72781191995-08-07 14:34:15 +0000268{
269 unsigned char *ascii_data, *bin_data;
270 int leftbits = 0;
271 unsigned char this_ch;
272 unsigned int leftchar = 0;
273 PyObject *rv;
Thomas Woutersf98db652006-03-01 21:37:32 +0000274 Py_ssize_t bin_len;
Tim Peters934c1a12002-07-02 22:24:50 +0000275
Guido van Rossum43713e52000-02-29 13:59:29 +0000276 if ( !PyArg_ParseTuple(args, "s#:b2a_uu", &bin_data, &bin_len) )
Jack Jansen72781191995-08-07 14:34:15 +0000277 return NULL;
278 if ( bin_len > 45 ) {
279 /* The 45 is a limit that appears in all uuencode's */
280 PyErr_SetString(Error, "At most 45 bytes at once");
281 return NULL;
282 }
283
284 /* We're lazy and allocate to much (fixed up later) */
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000285 if ( (rv=PyString_FromStringAndSize(NULL, bin_len*2+2)) == NULL )
Jack Jansen72781191995-08-07 14:34:15 +0000286 return NULL;
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000287 ascii_data = (unsigned char *)PyString_AsString(rv);
Jack Jansen72781191995-08-07 14:34:15 +0000288
289 /* Store the length */
290 *ascii_data++ = ' ' + (bin_len & 077);
Tim Peters934c1a12002-07-02 22:24:50 +0000291
Jack Jansen72781191995-08-07 14:34:15 +0000292 for( ; bin_len > 0 || leftbits != 0 ; bin_len--, bin_data++ ) {
293 /* Shift the data (or padding) into our buffer */
294 if ( bin_len > 0 ) /* Data */
295 leftchar = (leftchar << 8) | *bin_data;
296 else /* Padding */
297 leftchar <<= 8;
298 leftbits += 8;
299
300 /* See if there are 6-bit groups ready */
301 while ( leftbits >= 6 ) {
302 this_ch = (leftchar >> (leftbits-6)) & 0x3f;
303 leftbits -= 6;
304 *ascii_data++ = this_ch + ' ';
305 }
306 }
307 *ascii_data++ = '\n'; /* Append a courtesy newline */
Tim Peters934c1a12002-07-02 22:24:50 +0000308
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000309 _PyString_Resize(&rv, (ascii_data -
310 (unsigned char *)PyString_AsString(rv)));
Jack Jansen72781191995-08-07 14:34:15 +0000311 return rv;
312}
313
Guido van Rossum2db4f471999-10-19 19:05:14 +0000314
315static int
Thomas Woutersf98db652006-03-01 21:37:32 +0000316binascii_find_valid(unsigned char *s, Py_ssize_t slen, int num)
Guido van Rossum2db4f471999-10-19 19:05:14 +0000317{
Tim Peters934c1a12002-07-02 22:24:50 +0000318 /* Finds & returns the (num+1)th
Guido van Rossum2db4f471999-10-19 19:05:14 +0000319 ** valid character for base64, or -1 if none.
320 */
321
322 int ret = -1;
323 unsigned char c, b64val;
324
325 while ((slen > 0) && (ret == -1)) {
326 c = *s;
327 b64val = table_a2b_base64[c & 0x7f];
328 if ( ((c <= 0x7f) && (b64val != (unsigned char)-1)) ) {
329 if (num == 0)
330 ret = *s;
331 num--;
332 }
333
334 s++;
335 slen--;
336 }
337 return ret;
338}
339
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000340PyDoc_STRVAR(doc_a2b_base64, "(ascii) -> bin. Decode a line of base64 data");
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000341
342static PyObject *
Peter Schneider-Kampa788a7f2000-07-10 09:57:19 +0000343binascii_a2b_base64(PyObject *self, PyObject *args)
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000344{
345 unsigned char *ascii_data, *bin_data;
346 int leftbits = 0;
347 unsigned char this_ch;
348 unsigned int leftchar = 0;
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000349 PyObject *rv;
Thomas Woutersf98db652006-03-01 21:37:32 +0000350 Py_ssize_t ascii_len, bin_len;
Guido van Rossum2db4f471999-10-19 19:05:14 +0000351 int quad_pos = 0;
Tim Peters934c1a12002-07-02 22:24:50 +0000352
Guido van Rossum43713e52000-02-29 13:59:29 +0000353 if ( !PyArg_ParseTuple(args, "t#:a2b_base64", &ascii_data, &ascii_len) )
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000354 return NULL;
355
Gregory P. Smith9d534572008-06-11 07:41:16 +0000356 assert(ascii_len >= 0);
357
358 if (ascii_len > PY_SSIZE_T_MAX - 3)
359 return PyErr_NoMemory();
360
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000361 bin_len = ((ascii_len+3)/4)*3; /* Upper bound, corrected later */
362
363 /* Allocate the buffer */
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000364 if ( (rv=PyString_FromStringAndSize(NULL, bin_len)) == NULL )
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000365 return NULL;
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000366 bin_data = (unsigned char *)PyString_AsString(rv);
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000367 bin_len = 0;
Guido van Rossum2db4f471999-10-19 19:05:14 +0000368
369 for( ; ascii_len > 0; ascii_len--, ascii_data++) {
370 this_ch = *ascii_data;
371
372 if (this_ch > 0x7f ||
373 this_ch == '\r' || this_ch == '\n' || this_ch == ' ')
Jack Jansenba1de3b1996-01-22 10:47:15 +0000374 continue;
Guido van Rossum2db4f471999-10-19 19:05:14 +0000375
376 /* Check for pad sequences and ignore
377 ** the invalid ones.
378 */
379 if (this_ch == BASE64_PAD) {
380 if ( (quad_pos < 2) ||
381 ((quad_pos == 2) &&
382 (binascii_find_valid(ascii_data, ascii_len, 1)
383 != BASE64_PAD)) )
384 {
385 continue;
386 }
387 else {
388 /* A pad sequence means no more input.
389 ** We've already interpreted the data
390 ** from the quad at this point.
391 */
392 leftbits = 0;
393 break;
394 }
395 }
396
397 this_ch = table_a2b_base64[*ascii_data];
398 if ( this_ch == (unsigned char) -1 )
399 continue;
400
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000401 /*
402 ** Shift it in on the low end, and see if there's
403 ** a byte ready for output.
404 */
Guido van Rossum2db4f471999-10-19 19:05:14 +0000405 quad_pos = (quad_pos + 1) & 0x03;
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000406 leftchar = (leftchar << 6) | (this_ch);
407 leftbits += 6;
Guido van Rossum2db4f471999-10-19 19:05:14 +0000408
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000409 if ( leftbits >= 8 ) {
410 leftbits -= 8;
411 *bin_data++ = (leftchar >> leftbits) & 0xff;
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000412 bin_len++;
Guido van Rossum2db4f471999-10-19 19:05:14 +0000413 leftchar &= ((1 << leftbits) - 1);
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000414 }
Guido van Rossum2db4f471999-10-19 19:05:14 +0000415 }
416
417 if (leftbits != 0) {
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000418 PyErr_SetString(Error, "Incorrect padding");
419 Py_DECREF(rv);
420 return NULL;
421 }
Guido van Rossum2db4f471999-10-19 19:05:14 +0000422
Thomas Wouters9e1c1922003-03-17 11:24:29 +0000423 /* And set string size correctly. If the result string is empty
424 ** (because the input was all invalid) return the shared empty
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000425 ** string instead; _PyString_Resize() won't do this for us.
Thomas Wouters9e1c1922003-03-17 11:24:29 +0000426 */
Barry Warsaw0a51b582002-08-15 22:14:24 +0000427 if (bin_len > 0)
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000428 _PyString_Resize(&rv, bin_len);
Thomas Wouters9e1c1922003-03-17 11:24:29 +0000429 else {
430 Py_DECREF(rv);
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000431 rv = PyString_FromString("");
Thomas Wouters9e1c1922003-03-17 11:24:29 +0000432 }
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000433 return rv;
434}
435
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000436PyDoc_STRVAR(doc_b2a_base64, "(bin) -> ascii. Base64-code line of data");
Tim Peters934c1a12002-07-02 22:24:50 +0000437
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000438static PyObject *
Peter Schneider-Kampa788a7f2000-07-10 09:57:19 +0000439binascii_b2a_base64(PyObject *self, PyObject *args)
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000440{
441 unsigned char *ascii_data, *bin_data;
442 int leftbits = 0;
443 unsigned char this_ch;
444 unsigned int leftchar = 0;
445 PyObject *rv;
Thomas Woutersf98db652006-03-01 21:37:32 +0000446 Py_ssize_t bin_len;
Tim Peters934c1a12002-07-02 22:24:50 +0000447
Guido van Rossum43713e52000-02-29 13:59:29 +0000448 if ( !PyArg_ParseTuple(args, "s#:b2a_base64", &bin_data, &bin_len) )
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000449 return NULL;
Gregory P. Smith9d534572008-06-11 07:41:16 +0000450
451 assert(bin_len >= 0);
452
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000453 if ( bin_len > BASE64_MAXBIN ) {
454 PyErr_SetString(Error, "Too much data for base64 line");
455 return NULL;
456 }
Tim Peters934c1a12002-07-02 22:24:50 +0000457
Tim Peters1fbb5772001-12-19 04:41:35 +0000458 /* We're lazy and allocate too much (fixed up later).
459 "+3" leaves room for up to two pad characters and a trailing
460 newline. Note that 'b' gets encoded as 'Yg==\n' (1 in, 5 out). */
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000461 if ( (rv=PyString_FromStringAndSize(NULL, bin_len*2 + 3)) == NULL )
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000462 return NULL;
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000463 ascii_data = (unsigned char *)PyString_AsString(rv);
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000464
465 for( ; bin_len > 0 ; bin_len--, bin_data++ ) {
466 /* Shift the data into our buffer */
467 leftchar = (leftchar << 8) | *bin_data;
468 leftbits += 8;
469
470 /* See if there are 6-bit groups ready */
471 while ( leftbits >= 6 ) {
472 this_ch = (leftchar >> (leftbits-6)) & 0x3f;
473 leftbits -= 6;
474 *ascii_data++ = table_b2a_base64[this_ch];
475 }
476 }
477 if ( leftbits == 2 ) {
478 *ascii_data++ = table_b2a_base64[(leftchar&3) << 4];
479 *ascii_data++ = BASE64_PAD;
480 *ascii_data++ = BASE64_PAD;
481 } else if ( leftbits == 4 ) {
482 *ascii_data++ = table_b2a_base64[(leftchar&0xf) << 2];
483 *ascii_data++ = BASE64_PAD;
Tim Peters934c1a12002-07-02 22:24:50 +0000484 }
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000485 *ascii_data++ = '\n'; /* Append a courtesy newline */
Tim Peters934c1a12002-07-02 22:24:50 +0000486
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000487 _PyString_Resize(&rv, (ascii_data -
488 (unsigned char *)PyString_AsString(rv)));
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000489 return rv;
490}
491
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000492PyDoc_STRVAR(doc_a2b_hqx, "ascii -> bin, done. Decode .hqx coding");
Jack Jansen72781191995-08-07 14:34:15 +0000493
494static PyObject *
Peter Schneider-Kampa788a7f2000-07-10 09:57:19 +0000495binascii_a2b_hqx(PyObject *self, PyObject *args)
Jack Jansen72781191995-08-07 14:34:15 +0000496{
497 unsigned char *ascii_data, *bin_data;
498 int leftbits = 0;
499 unsigned char this_ch;
Jack Janseneaeb1c81995-08-14 12:17:57 +0000500 unsigned int leftchar = 0;
Jack Jansen72781191995-08-07 14:34:15 +0000501 PyObject *rv;
Thomas Woutersf98db652006-03-01 21:37:32 +0000502 Py_ssize_t len;
Jack Jansen72781191995-08-07 14:34:15 +0000503 int done = 0;
Tim Peters934c1a12002-07-02 22:24:50 +0000504
Guido van Rossum43713e52000-02-29 13:59:29 +0000505 if ( !PyArg_ParseTuple(args, "t#:a2b_hqx", &ascii_data, &len) )
Jack Jansen72781191995-08-07 14:34:15 +0000506 return NULL;
507
Gregory P. Smith9d534572008-06-11 07:41:16 +0000508 assert(len >= 0);
509
510 if (len > PY_SSIZE_T_MAX - 2)
511 return PyErr_NoMemory();
512
Raymond Hettinger658717e2004-09-06 22:58:37 +0000513 /* Allocate a string that is too big (fixed later)
514 Add two to the initial length to prevent interning which
515 would preclude subsequent resizing. */
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000516 if ( (rv=PyString_FromStringAndSize(NULL, len+2)) == NULL )
Jack Jansen72781191995-08-07 14:34:15 +0000517 return NULL;
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000518 bin_data = (unsigned char *)PyString_AsString(rv);
Jack Jansen72781191995-08-07 14:34:15 +0000519
520 for( ; len > 0 ; len--, ascii_data++ ) {
521 /* Get the byte and look it up */
522 this_ch = table_a2b_hqx[*ascii_data];
523 if ( this_ch == SKIP )
524 continue;
525 if ( this_ch == FAIL ) {
526 PyErr_SetString(Error, "Illegal char");
527 Py_DECREF(rv);
528 return NULL;
529 }
530 if ( this_ch == DONE ) {
531 /* The terminating colon */
532 done = 1;
533 break;
534 }
535
536 /* Shift it into the buffer and see if any bytes are ready */
537 leftchar = (leftchar << 6) | (this_ch);
538 leftbits += 6;
539 if ( leftbits >= 8 ) {
540 leftbits -= 8;
541 *bin_data++ = (leftchar >> leftbits) & 0xff;
542 leftchar &= ((1 << leftbits) - 1);
543 }
544 }
Tim Peters934c1a12002-07-02 22:24:50 +0000545
Jack Jansen72781191995-08-07 14:34:15 +0000546 if ( leftbits && !done ) {
547 PyErr_SetString(Incomplete,
548 "String has incomplete number of bytes");
549 Py_DECREF(rv);
550 return NULL;
551 }
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000552 _PyString_Resize(
553 &rv, (bin_data - (unsigned char *)PyString_AsString(rv)));
Guido van Rossum9c6ba5e1997-01-12 20:02:04 +0000554 if (rv) {
555 PyObject *rrv = Py_BuildValue("Oi", rv, done);
556 Py_DECREF(rv);
557 return rrv;
558 }
Roger E. Masse5f4ce181997-01-16 17:10:22 +0000559
Jack Jansen72781191995-08-07 14:34:15 +0000560 return NULL;
561}
562
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000563PyDoc_STRVAR(doc_rlecode_hqx, "Binhex RLE-code binary data");
Jack Jansen72781191995-08-07 14:34:15 +0000564
565static PyObject *
Peter Schneider-Kampa788a7f2000-07-10 09:57:19 +0000566binascii_rlecode_hqx(PyObject *self, PyObject *args)
Jack Jansen72781191995-08-07 14:34:15 +0000567{
568 unsigned char *in_data, *out_data;
569 PyObject *rv;
570 unsigned char ch;
Thomas Woutersf98db652006-03-01 21:37:32 +0000571 Py_ssize_t in, inend, len;
Tim Peters934c1a12002-07-02 22:24:50 +0000572
Guido van Rossum43713e52000-02-29 13:59:29 +0000573 if ( !PyArg_ParseTuple(args, "s#:rlecode_hqx", &in_data, &len) )
Jack Jansen72781191995-08-07 14:34:15 +0000574 return NULL;
575
Gregory P. Smith9d534572008-06-11 07:41:16 +0000576 assert(len >= 0);
577
578 if (len > PY_SSIZE_T_MAX / 2 - 2)
579 return PyErr_NoMemory();
580
Jack Jansen72781191995-08-07 14:34:15 +0000581 /* Worst case: output is twice as big as input (fixed later) */
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000582 if ( (rv=PyString_FromStringAndSize(NULL, len*2+2)) == NULL )
Jack Jansen72781191995-08-07 14:34:15 +0000583 return NULL;
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000584 out_data = (unsigned char *)PyString_AsString(rv);
Tim Peters934c1a12002-07-02 22:24:50 +0000585
Jack Jansen72781191995-08-07 14:34:15 +0000586 for( in=0; in<len; in++) {
587 ch = in_data[in];
588 if ( ch == RUNCHAR ) {
589 /* RUNCHAR. Escape it. */
590 *out_data++ = RUNCHAR;
591 *out_data++ = 0;
592 } else {
593 /* Check how many following are the same */
594 for(inend=in+1;
595 inend<len && in_data[inend] == ch &&
Roger E. Masse5f4ce181997-01-16 17:10:22 +0000596 inend < in+255;
Jack Jansen72781191995-08-07 14:34:15 +0000597 inend++) ;
Jack Jansen0223aa11995-08-31 13:44:23 +0000598 if ( inend - in > 3 ) {
599 /* More than 3 in a row. Output RLE. */
Jack Jansen72781191995-08-07 14:34:15 +0000600 *out_data++ = ch;
601 *out_data++ = RUNCHAR;
602 *out_data++ = inend-in;
603 in = inend-1;
604 } else {
605 /* Less than 3. Output the byte itself */
606 *out_data++ = ch;
607 }
608 }
609 }
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000610 _PyString_Resize(&rv, (out_data -
611 (unsigned char *)PyString_AsString(rv)));
Jack Jansen72781191995-08-07 14:34:15 +0000612 return rv;
613}
614
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000615PyDoc_STRVAR(doc_b2a_hqx, "Encode .hqx data");
Tim Peters934c1a12002-07-02 22:24:50 +0000616
Jack Jansen72781191995-08-07 14:34:15 +0000617static PyObject *
Peter Schneider-Kampa788a7f2000-07-10 09:57:19 +0000618binascii_b2a_hqx(PyObject *self, PyObject *args)
Jack Jansen72781191995-08-07 14:34:15 +0000619{
620 unsigned char *ascii_data, *bin_data;
621 int leftbits = 0;
622 unsigned char this_ch;
Jack Janseneaeb1c81995-08-14 12:17:57 +0000623 unsigned int leftchar = 0;
Jack Jansen72781191995-08-07 14:34:15 +0000624 PyObject *rv;
Thomas Woutersf98db652006-03-01 21:37:32 +0000625 Py_ssize_t len;
Tim Peters934c1a12002-07-02 22:24:50 +0000626
Guido van Rossum43713e52000-02-29 13:59:29 +0000627 if ( !PyArg_ParseTuple(args, "s#:b2a_hqx", &bin_data, &len) )
Jack Jansen72781191995-08-07 14:34:15 +0000628 return NULL;
629
Gregory P. Smith9d534572008-06-11 07:41:16 +0000630 assert(len >= 0);
631
632 if (len > PY_SSIZE_T_MAX / 2 - 2)
633 return PyErr_NoMemory();
634
Jack Jansen72781191995-08-07 14:34:15 +0000635 /* Allocate a buffer that is at least large enough */
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000636 if ( (rv=PyString_FromStringAndSize(NULL, len*2+2)) == NULL )
Jack Jansen72781191995-08-07 14:34:15 +0000637 return NULL;
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000638 ascii_data = (unsigned char *)PyString_AsString(rv);
Tim Peters934c1a12002-07-02 22:24:50 +0000639
Jack Jansen72781191995-08-07 14:34:15 +0000640 for( ; len > 0 ; len--, bin_data++ ) {
641 /* Shift into our buffer, and output any 6bits ready */
642 leftchar = (leftchar << 8) | *bin_data;
643 leftbits += 8;
644 while ( leftbits >= 6 ) {
645 this_ch = (leftchar >> (leftbits-6)) & 0x3f;
646 leftbits -= 6;
647 *ascii_data++ = table_b2a_hqx[this_ch];
648 }
649 }
650 /* Output a possible runt byte */
651 if ( leftbits ) {
652 leftchar <<= (6-leftbits);
653 *ascii_data++ = table_b2a_hqx[leftchar & 0x3f];
654 }
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000655 _PyString_Resize(&rv, (ascii_data -
656 (unsigned char *)PyString_AsString(rv)));
Jack Jansen72781191995-08-07 14:34:15 +0000657 return rv;
658}
659
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000660PyDoc_STRVAR(doc_rledecode_hqx, "Decode hexbin RLE-coded string");
Tim Peters934c1a12002-07-02 22:24:50 +0000661
Jack Jansen72781191995-08-07 14:34:15 +0000662static PyObject *
Peter Schneider-Kampa788a7f2000-07-10 09:57:19 +0000663binascii_rledecode_hqx(PyObject *self, PyObject *args)
Jack Jansen72781191995-08-07 14:34:15 +0000664{
665 unsigned char *in_data, *out_data;
666 unsigned char in_byte, in_repeat;
667 PyObject *rv;
Thomas Woutersf98db652006-03-01 21:37:32 +0000668 Py_ssize_t in_len, out_len, out_len_left;
Jack Jansen72781191995-08-07 14:34:15 +0000669
Guido van Rossum43713e52000-02-29 13:59:29 +0000670 if ( !PyArg_ParseTuple(args, "s#:rledecode_hqx", &in_data, &in_len) )
Jack Jansen72781191995-08-07 14:34:15 +0000671 return NULL;
672
Gregory P. Smith9d534572008-06-11 07:41:16 +0000673 assert(in_len >= 0);
674
Jack Jansen72781191995-08-07 14:34:15 +0000675 /* Empty string is a special case */
676 if ( in_len == 0 )
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000677 return PyString_FromString("");
Gregory P. Smith9d534572008-06-11 07:41:16 +0000678 else if (in_len > PY_SSIZE_T_MAX / 2)
679 return PyErr_NoMemory();
Jack Jansen72781191995-08-07 14:34:15 +0000680
681 /* Allocate a buffer of reasonable size. Resized when needed */
682 out_len = in_len*2;
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000683 if ( (rv=PyString_FromStringAndSize(NULL, out_len)) == NULL )
Jack Jansen72781191995-08-07 14:34:15 +0000684 return NULL;
685 out_len_left = out_len;
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000686 out_data = (unsigned char *)PyString_AsString(rv);
Jack Jansen72781191995-08-07 14:34:15 +0000687
688 /*
689 ** We need two macros here to get/put bytes and handle
690 ** end-of-buffer for input and output strings.
691 */
692#define INBYTE(b) \
Roger E. Masse5f4ce181997-01-16 17:10:22 +0000693 do { \
694 if ( --in_len < 0 ) { \
695 PyErr_SetString(Incomplete, ""); \
696 Py_DECREF(rv); \
697 return NULL; \
698 } \
699 b = *in_data++; \
700 } while(0)
Tim Peters934c1a12002-07-02 22:24:50 +0000701
Jack Jansen72781191995-08-07 14:34:15 +0000702#define OUTBYTE(b) \
Roger E. Masse5f4ce181997-01-16 17:10:22 +0000703 do { \
704 if ( --out_len_left < 0 ) { \
Gregory P. Smith9d534572008-06-11 07:41:16 +0000705 if ( out_len > PY_SSIZE_T_MAX / 2) return PyErr_NoMemory(); \
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000706 _PyString_Resize(&rv, 2*out_len); \
Roger E. Masse5f4ce181997-01-16 17:10:22 +0000707 if ( rv == NULL ) return NULL; \
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000708 out_data = (unsigned char *)PyString_AsString(rv) \
Roger E. Masse5f4ce181997-01-16 17:10:22 +0000709 + out_len; \
710 out_len_left = out_len-1; \
711 out_len = out_len * 2; \
712 } \
713 *out_data++ = b; \
714 } while(0)
Jack Jansen72781191995-08-07 14:34:15 +0000715
Roger E. Masse5f4ce181997-01-16 17:10:22 +0000716 /*
717 ** Handle first byte separately (since we have to get angry
718 ** in case of an orphaned RLE code).
719 */
720 INBYTE(in_byte);
Jack Jansen72781191995-08-07 14:34:15 +0000721
722 if (in_byte == RUNCHAR) {
723 INBYTE(in_repeat);
724 if (in_repeat != 0) {
725 /* Note Error, not Incomplete (which is at the end
726 ** of the string only). This is a programmer error.
727 */
728 PyErr_SetString(Error, "Orphaned RLE code at start");
729 Py_DECREF(rv);
730 return NULL;
731 }
732 OUTBYTE(RUNCHAR);
733 } else {
734 OUTBYTE(in_byte);
735 }
Tim Peters934c1a12002-07-02 22:24:50 +0000736
Jack Jansen72781191995-08-07 14:34:15 +0000737 while( in_len > 0 ) {
738 INBYTE(in_byte);
739
740 if (in_byte == RUNCHAR) {
741 INBYTE(in_repeat);
742 if ( in_repeat == 0 ) {
743 /* Just an escaped RUNCHAR value */
744 OUTBYTE(RUNCHAR);
745 } else {
746 /* Pick up value and output a sequence of it */
747 in_byte = out_data[-1];
748 while ( --in_repeat > 0 )
749 OUTBYTE(in_byte);
750 }
751 } else {
752 /* Normal byte */
753 OUTBYTE(in_byte);
754 }
755 }
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000756 _PyString_Resize(&rv, (out_data -
757 (unsigned char *)PyString_AsString(rv)));
Jack Jansen72781191995-08-07 14:34:15 +0000758 return rv;
759}
760
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000761PyDoc_STRVAR(doc_crc_hqx,
762"(data, oldcrc) -> newcrc. Compute hqx CRC incrementally");
Jack Jansen72781191995-08-07 14:34:15 +0000763
764static PyObject *
Peter Schneider-Kampa788a7f2000-07-10 09:57:19 +0000765binascii_crc_hqx(PyObject *self, PyObject *args)
Jack Jansen72781191995-08-07 14:34:15 +0000766{
767 unsigned char *bin_data;
768 unsigned int crc;
Thomas Woutersf98db652006-03-01 21:37:32 +0000769 Py_ssize_t len;
Tim Peters934c1a12002-07-02 22:24:50 +0000770
Guido van Rossum43713e52000-02-29 13:59:29 +0000771 if ( !PyArg_ParseTuple(args, "s#i:crc_hqx", &bin_data, &len, &crc) )
Jack Jansen72781191995-08-07 14:34:15 +0000772 return NULL;
773
Gregory P. Smith9d534572008-06-11 07:41:16 +0000774 while(len-- > 0) {
Jack Jansen72781191995-08-07 14:34:15 +0000775 crc=((crc<<8)&0xff00)^crctab_hqx[((crc>>8)&0xff)^*bin_data++];
776 }
777
778 return Py_BuildValue("i", crc);
779}
780
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000781PyDoc_STRVAR(doc_crc32,
782"(data, oldcrc = 0) -> newcrc. Compute CRC-32 incrementally");
Guido van Rossum7d47c9e2000-02-16 21:11:52 +0000783
Gregory P. Smith440ca772008-03-24 00:08:01 +0000784#ifdef USE_ZLIB_CRC32
785/* This was taken from zlibmodule.c PyZlib_crc32 (but is PY_SSIZE_T_CLEAN) */
786static PyObject *
787binascii_crc32(PyObject *self, PyObject *args)
788{
Gregory P. Smith1fa588e2008-03-25 07:31:28 +0000789 unsigned int crc32val = 0; /* crc32(0L, Z_NULL, 0) */
Gregory P. Smith440ca772008-03-24 00:08:01 +0000790 Byte *buf;
791 Py_ssize_t len;
792 int signed_val;
793
Gregory P. Smith1fa588e2008-03-25 07:31:28 +0000794 if (!PyArg_ParseTuple(args, "s#|I:crc32", &buf, &len, &crc32val))
Gregory P. Smith440ca772008-03-24 00:08:01 +0000795 return NULL;
796 /* In Python 2.x we return a signed integer regardless of native platform
797 * long size (the 32bit unsigned long is treated as 32-bit signed and sign
798 * extended into a 64-bit long inside the integer object). 3.0 does the
799 * right thing and returns unsigned. http://bugs.python.org/issue1202 */
800 signed_val = crc32(crc32val, buf, len);
801 return PyInt_FromLong(signed_val);
802}
803#else /* USE_ZLIB_CRC32 */
Guido van Rossum7d47c9e2000-02-16 21:11:52 +0000804/* Crc - 32 BIT ANSI X3.66 CRC checksum files
805 Also known as: ISO 3307
806**********************************************************************|
807* *|
808* Demonstration program to compute the 32-bit CRC used as the frame *|
809* check sequence in ADCCP (ANSI X3.66, also known as FIPS PUB 71 *|
810* and FED-STD-1003, the U.S. versions of CCITT's X.25 link-level *|
811* protocol). The 32-bit FCS was added via the Federal Register, *|
812* 1 June 1982, p.23798. I presume but don't know for certain that *|
813* this polynomial is or will be included in CCITT V.41, which *|
814* defines the 16-bit CRC (often called CRC-CCITT) polynomial. FIPS *|
815* PUB 78 says that the 32-bit FCS reduces otherwise undetected *|
816* errors by a factor of 10^-5 over 16-bit FCS. *|
817* *|
818**********************************************************************|
819
820 Copyright (C) 1986 Gary S. Brown. You may use this program, or
821 code or tables extracted from it, as desired without restriction.
Guido van Rossum7d47c9e2000-02-16 21:11:52 +0000822
Tim Peters934c1a12002-07-02 22:24:50 +0000823 First, the polynomial itself and its table of feedback terms. The
824 polynomial is
825 X^32+X^26+X^23+X^22+X^16+X^12+X^11+X^10+X^8+X^7+X^5+X^4+X^2+X^1+X^0
826 Note that we take it "backwards" and put the highest-order term in
827 the lowest-order bit. The X^32 term is "implied"; the LSB is the
828 X^31 term, etc. The X^0 term (usually shown as "+1") results in
829 the MSB being 1.
Guido van Rossum7d47c9e2000-02-16 21:11:52 +0000830
Tim Peters934c1a12002-07-02 22:24:50 +0000831 Note that the usual hardware shift register implementation, which
832 is what we're using (we're merely optimizing it by doing eight-bit
833 chunks at a time) shifts bits into the lowest-order term. In our
834 implementation, that means shifting towards the right. Why do we
835 do it this way? Because the calculated CRC must be transmitted in
836 order from highest-order term to lowest-order term. UARTs transmit
837 characters in order from LSB to MSB. By storing the CRC this way,
838 we hand it to the UART in the order low-byte to high-byte; the UART
839 sends each low-bit to hight-bit; and the result is transmission bit
840 by bit from highest- to lowest-order term without requiring any bit
841 shuffling on our part. Reception works similarly.
842
843 The feedback terms table consists of 256, 32-bit entries. Notes:
844
845 1. The table can be generated at runtime if desired; code to do so
846 is shown later. It might not be obvious, but the feedback
847 terms simply represent the results of eight shift/xor opera-
848 tions for all combinations of data and CRC register values.
849
850 2. The CRC accumulation logic is the same for all CRC polynomials,
851 be they sixteen or thirty-two bits wide. You simply choose the
852 appropriate table. Alternatively, because the table can be
853 generated at runtime, you can start by generating the table for
854 the polynomial in question and use exactly the same "updcrc",
855 if your application needn't simultaneously handle two CRC
856 polynomials. (Note, however, that XMODEM is strange.)
857
858 3. For 16-bit CRCs, the table entries need be only 16 bits wide;
859 of course, 32-bit entries work OK if the high 16 bits are zero.
860
861 4. The values must be right-shifted by eight bits by the "updcrc"
862 logic; the shift must be unsigned (bring in zeroes). On some
863 hardware you could probably optimize the shift in assembler by
864 using byte-swap instructions.
Guido van Rossum7d47c9e2000-02-16 21:11:52 +0000865********************************************************************/
866
Gregory P. Smithaef3e522008-03-25 07:46:07 +0000867static unsigned int crc_32_tab[256] = {
8680x00000000U, 0x77073096U, 0xee0e612cU, 0x990951baU, 0x076dc419U,
8690x706af48fU, 0xe963a535U, 0x9e6495a3U, 0x0edb8832U, 0x79dcb8a4U,
8700xe0d5e91eU, 0x97d2d988U, 0x09b64c2bU, 0x7eb17cbdU, 0xe7b82d07U,
8710x90bf1d91U, 0x1db71064U, 0x6ab020f2U, 0xf3b97148U, 0x84be41deU,
8720x1adad47dU, 0x6ddde4ebU, 0xf4d4b551U, 0x83d385c7U, 0x136c9856U,
8730x646ba8c0U, 0xfd62f97aU, 0x8a65c9ecU, 0x14015c4fU, 0x63066cd9U,
8740xfa0f3d63U, 0x8d080df5U, 0x3b6e20c8U, 0x4c69105eU, 0xd56041e4U,
8750xa2677172U, 0x3c03e4d1U, 0x4b04d447U, 0xd20d85fdU, 0xa50ab56bU,
8760x35b5a8faU, 0x42b2986cU, 0xdbbbc9d6U, 0xacbcf940U, 0x32d86ce3U,
8770x45df5c75U, 0xdcd60dcfU, 0xabd13d59U, 0x26d930acU, 0x51de003aU,
8780xc8d75180U, 0xbfd06116U, 0x21b4f4b5U, 0x56b3c423U, 0xcfba9599U,
8790xb8bda50fU, 0x2802b89eU, 0x5f058808U, 0xc60cd9b2U, 0xb10be924U,
8800x2f6f7c87U, 0x58684c11U, 0xc1611dabU, 0xb6662d3dU, 0x76dc4190U,
8810x01db7106U, 0x98d220bcU, 0xefd5102aU, 0x71b18589U, 0x06b6b51fU,
8820x9fbfe4a5U, 0xe8b8d433U, 0x7807c9a2U, 0x0f00f934U, 0x9609a88eU,
8830xe10e9818U, 0x7f6a0dbbU, 0x086d3d2dU, 0x91646c97U, 0xe6635c01U,
8840x6b6b51f4U, 0x1c6c6162U, 0x856530d8U, 0xf262004eU, 0x6c0695edU,
8850x1b01a57bU, 0x8208f4c1U, 0xf50fc457U, 0x65b0d9c6U, 0x12b7e950U,
8860x8bbeb8eaU, 0xfcb9887cU, 0x62dd1ddfU, 0x15da2d49U, 0x8cd37cf3U,
8870xfbd44c65U, 0x4db26158U, 0x3ab551ceU, 0xa3bc0074U, 0xd4bb30e2U,
8880x4adfa541U, 0x3dd895d7U, 0xa4d1c46dU, 0xd3d6f4fbU, 0x4369e96aU,
8890x346ed9fcU, 0xad678846U, 0xda60b8d0U, 0x44042d73U, 0x33031de5U,
8900xaa0a4c5fU, 0xdd0d7cc9U, 0x5005713cU, 0x270241aaU, 0xbe0b1010U,
8910xc90c2086U, 0x5768b525U, 0x206f85b3U, 0xb966d409U, 0xce61e49fU,
8920x5edef90eU, 0x29d9c998U, 0xb0d09822U, 0xc7d7a8b4U, 0x59b33d17U,
8930x2eb40d81U, 0xb7bd5c3bU, 0xc0ba6cadU, 0xedb88320U, 0x9abfb3b6U,
8940x03b6e20cU, 0x74b1d29aU, 0xead54739U, 0x9dd277afU, 0x04db2615U,
8950x73dc1683U, 0xe3630b12U, 0x94643b84U, 0x0d6d6a3eU, 0x7a6a5aa8U,
8960xe40ecf0bU, 0x9309ff9dU, 0x0a00ae27U, 0x7d079eb1U, 0xf00f9344U,
8970x8708a3d2U, 0x1e01f268U, 0x6906c2feU, 0xf762575dU, 0x806567cbU,
8980x196c3671U, 0x6e6b06e7U, 0xfed41b76U, 0x89d32be0U, 0x10da7a5aU,
8990x67dd4accU, 0xf9b9df6fU, 0x8ebeeff9U, 0x17b7be43U, 0x60b08ed5U,
9000xd6d6a3e8U, 0xa1d1937eU, 0x38d8c2c4U, 0x4fdff252U, 0xd1bb67f1U,
9010xa6bc5767U, 0x3fb506ddU, 0x48b2364bU, 0xd80d2bdaU, 0xaf0a1b4cU,
9020x36034af6U, 0x41047a60U, 0xdf60efc3U, 0xa867df55U, 0x316e8eefU,
9030x4669be79U, 0xcb61b38cU, 0xbc66831aU, 0x256fd2a0U, 0x5268e236U,
9040xcc0c7795U, 0xbb0b4703U, 0x220216b9U, 0x5505262fU, 0xc5ba3bbeU,
9050xb2bd0b28U, 0x2bb45a92U, 0x5cb36a04U, 0xc2d7ffa7U, 0xb5d0cf31U,
9060x2cd99e8bU, 0x5bdeae1dU, 0x9b64c2b0U, 0xec63f226U, 0x756aa39cU,
9070x026d930aU, 0x9c0906a9U, 0xeb0e363fU, 0x72076785U, 0x05005713U,
9080x95bf4a82U, 0xe2b87a14U, 0x7bb12baeU, 0x0cb61b38U, 0x92d28e9bU,
9090xe5d5be0dU, 0x7cdcefb7U, 0x0bdbdf21U, 0x86d3d2d4U, 0xf1d4e242U,
9100x68ddb3f8U, 0x1fda836eU, 0x81be16cdU, 0xf6b9265bU, 0x6fb077e1U,
9110x18b74777U, 0x88085ae6U, 0xff0f6a70U, 0x66063bcaU, 0x11010b5cU,
9120x8f659effU, 0xf862ae69U, 0x616bffd3U, 0x166ccf45U, 0xa00ae278U,
9130xd70dd2eeU, 0x4e048354U, 0x3903b3c2U, 0xa7672661U, 0xd06016f7U,
9140x4969474dU, 0x3e6e77dbU, 0xaed16a4aU, 0xd9d65adcU, 0x40df0b66U,
9150x37d83bf0U, 0xa9bcae53U, 0xdebb9ec5U, 0x47b2cf7fU, 0x30b5ffe9U,
9160xbdbdf21cU, 0xcabac28aU, 0x53b39330U, 0x24b4a3a6U, 0xbad03605U,
9170xcdd70693U, 0x54de5729U, 0x23d967bfU, 0xb3667a2eU, 0xc4614ab8U,
9180x5d681b02U, 0x2a6f2b94U, 0xb40bbe37U, 0xc30c8ea1U, 0x5a05df1bU,
9190x2d02ef8dU
Guido van Rossum7d47c9e2000-02-16 21:11:52 +0000920};
921
922static PyObject *
Peter Schneider-Kampa788a7f2000-07-10 09:57:19 +0000923binascii_crc32(PyObject *self, PyObject *args)
Guido van Rossum7d47c9e2000-02-16 21:11:52 +0000924{ /* By Jim Ahlstrom; All rights transferred to CNRI */
925 unsigned char *bin_data;
Gregory P. Smithaef3e522008-03-25 07:46:07 +0000926 unsigned int crc = 0U; /* initial value of CRC */
Thomas Woutersf98db652006-03-01 21:37:32 +0000927 Py_ssize_t len;
Gregory P. Smithaef3e522008-03-25 07:46:07 +0000928 int result;
Tim Peters934c1a12002-07-02 22:24:50 +0000929
Gregory P. Smithaef3e522008-03-25 07:46:07 +0000930 if ( !PyArg_ParseTuple(args, "s#|I:crc32", &bin_data, &len, &crc) )
Guido van Rossum7d47c9e2000-02-16 21:11:52 +0000931 return NULL;
932
Tim Peters934c1a12002-07-02 22:24:50 +0000933 crc = ~ crc;
Gregory P. Smith9d534572008-06-11 07:41:16 +0000934 while (len-- > 0)
Gregory P. Smithaef3e522008-03-25 07:46:07 +0000935 crc = crc_32_tab[(crc ^ *bin_data++) & 0xffU] ^ (crc >> 8);
Guido van Rossum7d47c9e2000-02-16 21:11:52 +0000936 /* Note: (crc >> 8) MUST zero fill on left */
Tim Petersa98011c2002-07-02 20:20:08 +0000937
Gregory P. Smithaef3e522008-03-25 07:46:07 +0000938 result = (int)(crc ^ 0xFFFFFFFFU);
Tim Petersa98011c2002-07-02 20:20:08 +0000939 return PyInt_FromLong(result);
Guido van Rossum7d47c9e2000-02-16 21:11:52 +0000940}
Gregory P. Smith440ca772008-03-24 00:08:01 +0000941#endif /* USE_ZLIB_CRC32 */
Guido van Rossum7d47c9e2000-02-16 21:11:52 +0000942
Barry Warsawe977c212000-08-15 06:07:13 +0000943
944static PyObject *
945binascii_hexlify(PyObject *self, PyObject *args)
946{
947 char* argbuf;
Thomas Woutersf98db652006-03-01 21:37:32 +0000948 Py_ssize_t arglen;
Barry Warsawe977c212000-08-15 06:07:13 +0000949 PyObject *retval;
950 char* retbuf;
Thomas Woutersf98db652006-03-01 21:37:32 +0000951 Py_ssize_t i, j;
Barry Warsawe977c212000-08-15 06:07:13 +0000952
Brett Cannon6ee7d012006-06-08 16:23:04 +0000953 if (!PyArg_ParseTuple(args, "s#:b2a_hex", &argbuf, &arglen))
Barry Warsawe977c212000-08-15 06:07:13 +0000954 return NULL;
955
Gregory P. Smith9d534572008-06-11 07:41:16 +0000956 assert(arglen >= 0);
957 if (arglen > PY_SSIZE_T_MAX / 2)
958 return PyErr_NoMemory();
959
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000960 retval = PyString_FromStringAndSize(NULL, arglen*2);
Barry Warsawe977c212000-08-15 06:07:13 +0000961 if (!retval)
962 return NULL;
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000963 retbuf = PyString_AsString(retval);
Barry Warsawe977c212000-08-15 06:07:13 +0000964 if (!retbuf)
965 goto finally;
966
967 /* make hex version of string, taken from shamodule.c */
968 for (i=j=0; i < arglen; i++) {
969 char c;
970 c = (argbuf[i] >> 4) & 0xf;
971 c = (c>9) ? c+'a'-10 : c + '0';
972 retbuf[j++] = c;
973 c = argbuf[i] & 0xf;
974 c = (c>9) ? c+'a'-10 : c + '0';
975 retbuf[j++] = c;
976 }
977 return retval;
978
979 finally:
980 Py_DECREF(retval);
981 return NULL;
982}
983
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000984PyDoc_STRVAR(doc_hexlify,
Barry Warsawe977c212000-08-15 06:07:13 +0000985"b2a_hex(data) -> s; Hexadecimal representation of binary data.\n\
986\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000987This function is also available as \"hexlify()\".");
Barry Warsawe977c212000-08-15 06:07:13 +0000988
989
990static int
Tim Peters934c1a12002-07-02 22:24:50 +0000991to_int(int c)
Barry Warsawe977c212000-08-15 06:07:13 +0000992{
993 if (isdigit(c))
994 return c - '0';
995 else {
996 if (isupper(c))
997 c = tolower(c);
998 if (c >= 'a' && c <= 'f')
999 return c - 'a' + 10;
1000 }
1001 return -1;
1002}
1003
1004
1005static PyObject *
1006binascii_unhexlify(PyObject *self, PyObject *args)
1007{
1008 char* argbuf;
Thomas Woutersf98db652006-03-01 21:37:32 +00001009 Py_ssize_t arglen;
Barry Warsawe977c212000-08-15 06:07:13 +00001010 PyObject *retval;
1011 char* retbuf;
Thomas Woutersf98db652006-03-01 21:37:32 +00001012 Py_ssize_t i, j;
Barry Warsawe977c212000-08-15 06:07:13 +00001013
1014 if (!PyArg_ParseTuple(args, "s#:a2b_hex", &argbuf, &arglen))
1015 return NULL;
1016
Gregory P. Smith9d534572008-06-11 07:41:16 +00001017 assert(arglen >= 0);
1018
Barry Warsaw16168472000-08-15 06:59:58 +00001019 /* XXX What should we do about strings with an odd length? Should
1020 * we add an implicit leading zero, or a trailing zero? For now,
1021 * raise an exception.
Barry Warsawe977c212000-08-15 06:07:13 +00001022 */
1023 if (arglen % 2) {
Barry Warsaw16168472000-08-15 06:59:58 +00001024 PyErr_SetString(PyExc_TypeError, "Odd-length string");
Barry Warsawe977c212000-08-15 06:07:13 +00001025 return NULL;
1026 }
1027
Gregory P. Smithdd96db62008-06-09 04:58:54 +00001028 retval = PyString_FromStringAndSize(NULL, (arglen/2));
Barry Warsawe977c212000-08-15 06:07:13 +00001029 if (!retval)
1030 return NULL;
Gregory P. Smithdd96db62008-06-09 04:58:54 +00001031 retbuf = PyString_AsString(retval);
Barry Warsawe977c212000-08-15 06:07:13 +00001032 if (!retbuf)
1033 goto finally;
1034
1035 for (i=j=0; i < arglen; i += 2) {
1036 int top = to_int(Py_CHARMASK(argbuf[i]));
1037 int bot = to_int(Py_CHARMASK(argbuf[i+1]));
1038 if (top == -1 || bot == -1) {
1039 PyErr_SetString(PyExc_TypeError,
Barry Warsaw16168472000-08-15 06:59:58 +00001040 "Non-hexadecimal digit found");
Barry Warsawe977c212000-08-15 06:07:13 +00001041 goto finally;
1042 }
1043 retbuf[j++] = (top << 4) + bot;
1044 }
1045 return retval;
1046
1047 finally:
1048 Py_DECREF(retval);
1049 return NULL;
1050}
1051
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001052PyDoc_STRVAR(doc_unhexlify,
Barry Warsawe977c212000-08-15 06:07:13 +00001053"a2b_hex(hexstr) -> s; Binary data of hexadecimal representation.\n\
1054\n\
1055hexstr must contain an even number of hex digits (upper or lower case).\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001056This function is also available as \"unhexlify()\"");
Barry Warsawe977c212000-08-15 06:07:13 +00001057
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001058static int table_hex[128] = {
1059 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1060 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1061 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1062 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,-1,-1, -1,-1,-1,-1,
1063 -1,10,11,12, 13,14,15,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1064 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1065 -1,10,11,12, 13,14,15,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1066 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1
1067};
1068
1069#define hexval(c) table_hex[(unsigned int)(c)]
1070
1071#define MAXLINESIZE 76
1072
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001073PyDoc_STRVAR(doc_a2b_qp, "Decode a string of qp-encoded data");
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001074
Tim Peters934c1a12002-07-02 22:24:50 +00001075static PyObject*
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001076binascii_a2b_qp(PyObject *self, PyObject *args, PyObject *kwargs)
1077{
Thomas Wouters7087f782006-03-01 23:10:05 +00001078 Py_ssize_t in, out;
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001079 char ch;
1080 unsigned char *data, *odata;
Thomas Woutersf98db652006-03-01 21:37:32 +00001081 Py_ssize_t datalen = 0;
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001082 PyObject *rv;
Martin v. Löwis15e62742006-02-27 16:46:16 +00001083 static char *kwlist[] = {"data", "header", NULL};
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001084 int header = 0;
1085
Tim Peters934c1a12002-07-02 22:24:50 +00001086 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s#|i", kwlist, &data,
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001087 &datalen, &header))
1088 return NULL;
1089
Barry Warsaw23164a52004-05-11 02:05:11 +00001090 /* We allocate the output same size as input, this is overkill.
1091 * The previous implementation used calloc() so we'll zero out the
1092 * memory here too, since PyMem_Malloc() does not guarantee that.
1093 */
1094 odata = (unsigned char *) PyMem_Malloc(datalen);
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001095 if (odata == NULL) {
1096 PyErr_NoMemory();
1097 return NULL;
1098 }
Martin v. Löwisb600fe92004-07-27 15:03:53 +00001099 memset(odata, 0, datalen);
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001100
1101 in = out = 0;
1102 while (in < datalen) {
1103 if (data[in] == '=') {
1104 in++;
1105 if (in >= datalen) break;
1106 /* Soft line breaks */
Georg Brandl25aabf42006-11-16 17:08:45 +00001107 if ((data[in] == '\n') || (data[in] == '\r')) {
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001108 if (data[in] != '\n') {
1109 while (in < datalen && data[in] != '\n') in++;
1110 }
1111 if (in < datalen) in++;
1112 }
1113 else if (data[in] == '=') {
1114 /* broken case from broken python qp */
1115 odata[out++] = '=';
1116 in++;
1117 }
Tim Peters934c1a12002-07-02 22:24:50 +00001118 else if (((data[in] >= 'A' && data[in] <= 'F') ||
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001119 (data[in] >= 'a' && data[in] <= 'f') ||
1120 (data[in] >= '0' && data[in] <= '9')) &&
1121 ((data[in+1] >= 'A' && data[in+1] <= 'F') ||
1122 (data[in+1] >= 'a' && data[in+1] <= 'f') ||
1123 (data[in+1] >= '0' && data[in+1] <= '9'))) {
1124 /* hexval */
1125 ch = hexval(data[in]) << 4;
1126 in++;
1127 ch |= hexval(data[in]);
1128 in++;
1129 odata[out++] = ch;
1130 }
1131 else {
1132 odata[out++] = '=';
1133 }
1134 }
1135 else if (header && data[in] == '_') {
1136 odata[out++] = ' ';
1137 in++;
1138 }
1139 else {
1140 odata[out] = data[in];
1141 in++;
1142 out++;
1143 }
1144 }
Gregory P. Smithdd96db62008-06-09 04:58:54 +00001145 if ((rv = PyString_FromStringAndSize((char *)odata, out)) == NULL) {
Barry Warsaw23164a52004-05-11 02:05:11 +00001146 PyMem_Free(odata);
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001147 return NULL;
1148 }
Barry Warsaw23164a52004-05-11 02:05:11 +00001149 PyMem_Free(odata);
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001150 return rv;
1151}
1152
Tim Peters934c1a12002-07-02 22:24:50 +00001153static int
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001154to_hex (unsigned char ch, unsigned char *s)
1155{
1156 unsigned int uvalue = ch;
1157
1158 s[1] = "0123456789ABCDEF"[uvalue % 16];
1159 uvalue = (uvalue / 16);
1160 s[0] = "0123456789ABCDEF"[uvalue % 16];
1161 return 0;
1162}
1163
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001164PyDoc_STRVAR(doc_b2a_qp,
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001165"b2a_qp(data, quotetabs=0, istext=1, header=0) -> s; \n\
1166 Encode a string using quoted-printable encoding. \n\
1167\n\
1168On encoding, when istext is set, newlines are not encoded, and white \n\
1169space at end of lines is. When istext is not set, \\r and \\n (CR/LF) are \n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001170both encoded. When quotetabs is set, space and tabs are encoded.");
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001171
1172/* XXX: This is ridiculously complicated to be backward compatible
1173 * (mostly) with the quopri module. It doesn't re-create the quopri
1174 * module bug where text ending in CRLF has the CR encoded */
Tim Peters934c1a12002-07-02 22:24:50 +00001175static PyObject*
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001176binascii_b2a_qp (PyObject *self, PyObject *args, PyObject *kwargs)
1177{
Thomas Wouters7087f782006-03-01 23:10:05 +00001178 Py_ssize_t in, out;
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001179 unsigned char *data, *odata;
Thomas Woutersf98db652006-03-01 21:37:32 +00001180 Py_ssize_t datalen = 0, odatalen = 0;
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001181 PyObject *rv;
1182 unsigned int linelen = 0;
Martin v. Löwis15e62742006-02-27 16:46:16 +00001183 static char *kwlist[] = {"data", "quotetabs", "istext",
Jeremy Hyltonaf68c872005-12-10 18:50:16 +00001184 "header", NULL};
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001185 int istext = 1;
1186 int quotetabs = 0;
1187 int header = 0;
1188 unsigned char ch;
1189 int crlf = 0;
1190 unsigned char *p;
1191
Tim Peters934c1a12002-07-02 22:24:50 +00001192 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s#|iii", kwlist, &data,
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001193 &datalen, &quotetabs, &istext, &header))
1194 return NULL;
1195
1196 /* See if this string is using CRLF line ends */
1197 /* XXX: this function has the side effect of converting all of
1198 * the end of lines to be the same depending on this detection
1199 * here */
Walter Dörwald3ebc45d2007-05-09 18:10:47 +00001200 p = (unsigned char *) memchr(data, '\n', datalen);
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001201 if ((p != NULL) && (p > data) && (*(p-1) == '\r'))
1202 crlf = 1;
1203
1204 /* First, scan to see how many characters need to be encoded */
1205 in = 0;
1206 while (in < datalen) {
Tim Peters934c1a12002-07-02 22:24:50 +00001207 if ((data[in] > 126) ||
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001208 (data[in] == '=') ||
1209 (header && data[in] == '_') ||
Georg Brandl4aef7272007-03-13 22:49:43 +00001210 ((data[in] == '.') && (linelen == 0) &&
1211 (data[in+1] == '\n' || data[in+1] == '\r' || data[in+1] == 0)) ||
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001212 (!istext && ((data[in] == '\r') || (data[in] == '\n'))) ||
1213 ((data[in] == '\t' || data[in] == ' ') && (in + 1 == datalen)) ||
Tim Peters934c1a12002-07-02 22:24:50 +00001214 ((data[in] < 33) &&
1215 (data[in] != '\r') && (data[in] != '\n') &&
Georg Brandl4aef7272007-03-13 22:49:43 +00001216 (quotetabs ||
1217 (!quotetabs && ((data[in] != '\t') && (data[in] != ' '))))))
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001218 {
1219 if ((linelen + 3) >= MAXLINESIZE) {
1220 linelen = 0;
1221 if (crlf)
1222 odatalen += 3;
1223 else
1224 odatalen += 2;
1225 }
1226 linelen += 3;
1227 odatalen += 3;
1228 in++;
1229 }
1230 else {
Tim Peters934c1a12002-07-02 22:24:50 +00001231 if (istext &&
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001232 ((data[in] == '\n') ||
1233 ((in+1 < datalen) && (data[in] == '\r') &&
1234 (data[in+1] == '\n'))))
1235 {
1236 linelen = 0;
1237 /* Protect against whitespace on end of line */
1238 if (in && ((data[in-1] == ' ') || (data[in-1] == '\t')))
1239 odatalen += 2;
1240 if (crlf)
1241 odatalen += 2;
1242 else
1243 odatalen += 1;
1244 if (data[in] == '\r')
1245 in += 2;
1246 else
1247 in++;
1248 }
1249 else {
Tim Peters934c1a12002-07-02 22:24:50 +00001250 if ((in + 1 != datalen) &&
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001251 (data[in+1] != '\n') &&
1252 (linelen + 1) >= MAXLINESIZE) {
1253 linelen = 0;
1254 if (crlf)
1255 odatalen += 3;
1256 else
1257 odatalen += 2;
1258 }
1259 linelen++;
1260 odatalen++;
1261 in++;
1262 }
1263 }
1264 }
1265
Barry Warsaw23164a52004-05-11 02:05:11 +00001266 /* We allocate the output same size as input, this is overkill.
1267 * The previous implementation used calloc() so we'll zero out the
1268 * memory here too, since PyMem_Malloc() does not guarantee that.
1269 */
1270 odata = (unsigned char *) PyMem_Malloc(odatalen);
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001271 if (odata == NULL) {
1272 PyErr_NoMemory();
1273 return NULL;
1274 }
Martin v. Löwisb600fe92004-07-27 15:03:53 +00001275 memset(odata, 0, odatalen);
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001276
1277 in = out = linelen = 0;
1278 while (in < datalen) {
Tim Peters934c1a12002-07-02 22:24:50 +00001279 if ((data[in] > 126) ||
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001280 (data[in] == '=') ||
1281 (header && data[in] == '_') ||
Georg Brandl4aef7272007-03-13 22:49:43 +00001282 ((data[in] == '.') && (linelen == 0) &&
1283 (data[in+1] == '\n' || data[in+1] == '\r' || data[in+1] == 0)) ||
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001284 (!istext && ((data[in] == '\r') || (data[in] == '\n'))) ||
1285 ((data[in] == '\t' || data[in] == ' ') && (in + 1 == datalen)) ||
Tim Peters934c1a12002-07-02 22:24:50 +00001286 ((data[in] < 33) &&
1287 (data[in] != '\r') && (data[in] != '\n') &&
Georg Brandl4aef7272007-03-13 22:49:43 +00001288 (quotetabs ||
1289 (!quotetabs && ((data[in] != '\t') && (data[in] != ' '))))))
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001290 {
1291 if ((linelen + 3 )>= MAXLINESIZE) {
1292 odata[out++] = '=';
1293 if (crlf) odata[out++] = '\r';
1294 odata[out++] = '\n';
1295 linelen = 0;
1296 }
1297 odata[out++] = '=';
1298 to_hex(data[in], &odata[out]);
1299 out += 2;
1300 in++;
1301 linelen += 3;
1302 }
1303 else {
Tim Peters934c1a12002-07-02 22:24:50 +00001304 if (istext &&
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001305 ((data[in] == '\n') ||
1306 ((in+1 < datalen) && (data[in] == '\r') &&
1307 (data[in+1] == '\n'))))
1308 {
1309 linelen = 0;
1310 /* Protect against whitespace on end of line */
1311 if (out && ((odata[out-1] == ' ') || (odata[out-1] == '\t'))) {
1312 ch = odata[out-1];
1313 odata[out-1] = '=';
1314 to_hex(ch, &odata[out]);
1315 out += 2;
1316 }
Tim Peters934c1a12002-07-02 22:24:50 +00001317
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001318 if (crlf) odata[out++] = '\r';
1319 odata[out++] = '\n';
1320 if (data[in] == '\r')
1321 in += 2;
1322 else
1323 in++;
1324 }
1325 else {
Tim Peters934c1a12002-07-02 22:24:50 +00001326 if ((in + 1 != datalen) &&
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001327 (data[in+1] != '\n') &&
1328 (linelen + 1) >= MAXLINESIZE) {
1329 odata[out++] = '=';
1330 if (crlf) odata[out++] = '\r';
1331 odata[out++] = '\n';
1332 linelen = 0;
1333 }
1334 linelen++;
1335 if (header && data[in] == ' ') {
1336 odata[out++] = '_';
1337 in++;
1338 }
1339 else {
1340 odata[out++] = data[in++];
1341 }
1342 }
1343 }
1344 }
Gregory P. Smithdd96db62008-06-09 04:58:54 +00001345 if ((rv = PyString_FromStringAndSize((char *)odata, out)) == NULL) {
Barry Warsaw23164a52004-05-11 02:05:11 +00001346 PyMem_Free(odata);
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001347 return NULL;
1348 }
Barry Warsaw23164a52004-05-11 02:05:11 +00001349 PyMem_Free(odata);
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001350 return rv;
1351}
Barry Warsawe977c212000-08-15 06:07:13 +00001352
Jack Jansen72781191995-08-07 14:34:15 +00001353/* List of functions defined in the module */
1354
1355static struct PyMethodDef binascii_module_methods[] = {
Barry Warsawe977c212000-08-15 06:07:13 +00001356 {"a2b_uu", binascii_a2b_uu, METH_VARARGS, doc_a2b_uu},
1357 {"b2a_uu", binascii_b2a_uu, METH_VARARGS, doc_b2a_uu},
1358 {"a2b_base64", binascii_a2b_base64, METH_VARARGS, doc_a2b_base64},
1359 {"b2a_base64", binascii_b2a_base64, METH_VARARGS, doc_b2a_base64},
1360 {"a2b_hqx", binascii_a2b_hqx, METH_VARARGS, doc_a2b_hqx},
1361 {"b2a_hqx", binascii_b2a_hqx, METH_VARARGS, doc_b2a_hqx},
1362 {"b2a_hex", binascii_hexlify, METH_VARARGS, doc_hexlify},
1363 {"a2b_hex", binascii_unhexlify, METH_VARARGS, doc_unhexlify},
1364 {"hexlify", binascii_hexlify, METH_VARARGS, doc_hexlify},
1365 {"unhexlify", binascii_unhexlify, METH_VARARGS, doc_unhexlify},
1366 {"rlecode_hqx", binascii_rlecode_hqx, METH_VARARGS, doc_rlecode_hqx},
1367 {"rledecode_hqx", binascii_rledecode_hqx, METH_VARARGS,
1368 doc_rledecode_hqx},
1369 {"crc_hqx", binascii_crc_hqx, METH_VARARGS, doc_crc_hqx},
1370 {"crc32", binascii_crc32, METH_VARARGS, doc_crc32},
Tim Peters934c1a12002-07-02 22:24:50 +00001371 {"a2b_qp", (PyCFunction)binascii_a2b_qp, METH_VARARGS | METH_KEYWORDS,
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001372 doc_a2b_qp},
Tim Peters934c1a12002-07-02 22:24:50 +00001373 {"b2a_qp", (PyCFunction)binascii_b2a_qp, METH_VARARGS | METH_KEYWORDS,
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001374 doc_b2a_qp},
Barry Warsawe977c212000-08-15 06:07:13 +00001375 {NULL, NULL} /* sentinel */
Jack Jansen72781191995-08-07 14:34:15 +00001376};
1377
1378
1379/* Initialization function for the module (*must* be called initbinascii) */
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001380PyDoc_STRVAR(doc_binascii, "Conversion between binary data and ASCII");
Jack Jansen72781191995-08-07 14:34:15 +00001381
Mark Hammondfe51c6d2002-08-02 02:27:13 +00001382PyMODINIT_FUNC
Thomas Woutersf3f33dc2000-07-21 06:00:07 +00001383initbinascii(void)
Jack Jansen72781191995-08-07 14:34:15 +00001384{
1385 PyObject *m, *d, *x;
1386
1387 /* Create the module and add the functions */
1388 m = Py_InitModule("binascii", binascii_module_methods);
Neal Norwitz1ac754f2006-01-19 06:09:39 +00001389 if (m == NULL)
1390 return;
Jack Jansen72781191995-08-07 14:34:15 +00001391
1392 d = PyModule_GetDict(m);
Gregory P. Smithdd96db62008-06-09 04:58:54 +00001393 x = PyString_FromString(doc_binascii);
Jack Jansen72781191995-08-07 14:34:15 +00001394 PyDict_SetItemString(d, "__doc__", x);
Guido van Rossum5c159bd1997-08-04 23:55:25 +00001395 Py_XDECREF(x);
Jack Jansen72781191995-08-07 14:34:15 +00001396
Guido van Rossum7dbb48a1997-10-08 15:26:07 +00001397 Error = PyErr_NewException("binascii.Error", NULL, NULL);
Jack Jansen72781191995-08-07 14:34:15 +00001398 PyDict_SetItemString(d, "Error", Error);
Guido van Rossum7dbb48a1997-10-08 15:26:07 +00001399 Incomplete = PyErr_NewException("binascii.Incomplete", NULL, NULL);
Jack Jansen72781191995-08-07 14:34:15 +00001400 PyDict_SetItemString(d, "Incomplete", Incomplete);
Jack Jansen72781191995-08-07 14:34:15 +00001401}