blob: ea34bcd9ded6b6ff50f288daa02d76e074306263 [file] [log] [blame]
Jack Jansen72781191995-08-07 14:34:15 +00001/*
2** Routines to represent binary data in ASCII and vice-versa
3**
4** This module currently supports the following encodings:
5** uuencode:
6** each line encodes 45 bytes (except possibly the last)
7** First char encodes (binary) length, rest data
8** each char encodes 6 bits, as follows:
9** binary: 01234567 abcdefgh ijklmnop
10** ascii: 012345 67abcd efghij klmnop
11** ASCII encoding method is "excess-space": 000000 is encoded as ' ', etc.
12** short binary data is zero-extended (so the bits are always in the
13** right place), this does *not* reflect in the length.
Jack Jansen84bbc2e1995-10-04 16:38:44 +000014** base64:
15** Line breaks are insignificant, but lines are at most 76 chars
16** each char encodes 6 bits, in similar order as uucode/hqx. Encoding
17** is done via a table.
18** Short binary data is filled (in ASCII) with '='.
Jack Jansen72781191995-08-07 14:34:15 +000019** hqx:
20** File starts with introductory text, real data starts and ends
21** with colons.
22** Data consists of three similar parts: info, datafork, resourcefork.
23** Each part is protected (at the end) with a 16-bit crc
24** The binary data is run-length encoded, and then ascii-fied:
25** binary: 01234567 abcdefgh ijklmnop
26** ascii: 012345 67abcd efghij klmnop
27** ASCII encoding is table-driven, see the code.
28** Short binary data results in the runt ascii-byte being output with
29** the bits in the right place.
30**
31** While I was reading dozens of programs that encode or decode the formats
32** here (documentation? hihi:-) I have formulated Jansen's Observation:
33**
34** Programs that encode binary data in ASCII are written in
35** such a style that they are as unreadable as possible. Devices used
36** include unnecessary global variables, burying important tables
37** in unrelated sourcefiles, putting functions in include files,
38** using seemingly-descriptive variable names for different purposes,
39** calls to empty subroutines and a host of others.
40**
41** I have attempted to break with this tradition, but I guess that that
42** does make the performance sub-optimal. Oh well, too bad...
43**
44** Jack Jansen, CWI, July 1995.
Tim Peters934c1a12002-07-02 22:24:50 +000045**
Martin v. Löwis16dc7f42001-09-30 20:32:11 +000046** Added support for quoted-printable encoding, based on rfc 1521 et al
Tim Peters934c1a12002-07-02 22:24:50 +000047** quoted-printable encoding specifies that non printable characters (anything
Martin v. Löwis16dc7f42001-09-30 20:32:11 +000048** below 32 and above 126) be encoded as =XX where XX is the hexadecimal value
49** of the character. It also specifies some other behavior to enable 8bit data
Tim Peters934c1a12002-07-02 22:24:50 +000050** in a mail message with little difficulty (maximum line sizes, protecting
51** some cases of whitespace, etc).
Martin v. Löwis16dc7f42001-09-30 20:32:11 +000052**
53** Brandon Long, September 2001.
Jack Jansen72781191995-08-07 14:34:15 +000054*/
55
Thomas Wouters9c544482006-03-01 21:59:44 +000056#define PY_SSIZE_T_CLEAN
Jack Jansen72781191995-08-07 14:34:15 +000057
58#include "Python.h"
Christian Heimes1dc54002008-03-24 02:19:29 +000059#ifdef USE_ZLIB_CRC32
60#include "zlib.h"
61#endif
Jack Jansen72781191995-08-07 14:34:15 +000062
63static PyObject *Error;
64static PyObject *Incomplete;
65
66/*
67** hqx lookup table, ascii->binary.
68*/
69
70#define RUNCHAR 0x90
71
72#define DONE 0x7F
73#define SKIP 0x7E
74#define FAIL 0x7D
75
76static unsigned char table_a2b_hqx[256] = {
77/* ^@ ^A ^B ^C ^D ^E ^F ^G */
78/* 0*/ FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
79/* \b \t \n ^K ^L \r ^N ^O */
80/* 1*/ FAIL, FAIL, SKIP, FAIL, FAIL, SKIP, FAIL, FAIL,
81/* ^P ^Q ^R ^S ^T ^U ^V ^W */
82/* 2*/ FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
83/* ^X ^Y ^Z ^[ ^\ ^] ^^ ^_ */
84/* 3*/ FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
85/* ! " # $ % & ' */
86/* 4*/ FAIL, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
87/* ( ) * + , - . / */
88/* 5*/ 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, FAIL, FAIL,
89/* 0 1 2 3 4 5 6 7 */
90/* 6*/ 0x0D, 0x0E, 0x0F, 0x10, 0x11, 0x12, 0x13, FAIL,
91/* 8 9 : ; < = > ? */
92/* 7*/ 0x14, 0x15, DONE, FAIL, FAIL, FAIL, FAIL, FAIL,
93/* @ A B C D E F G */
94/* 8*/ 0x16, 0x17, 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D,
95/* H I J K L M N O */
96/* 9*/ 0x1E, 0x1F, 0x20, 0x21, 0x22, 0x23, 0x24, FAIL,
97/* P Q R S T U V W */
98/*10*/ 0x25, 0x26, 0x27, 0x28, 0x29, 0x2A, 0x2B, FAIL,
99/* X Y Z [ \ ] ^ _ */
100/*11*/ 0x2C, 0x2D, 0x2E, 0x2F, FAIL, FAIL, FAIL, FAIL,
101/* ` a b c d e f g */
102/*12*/ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, FAIL,
103/* h i j k l m n o */
104/*13*/ 0x37, 0x38, 0x39, 0x3A, 0x3B, 0x3C, FAIL, FAIL,
105/* p q r s t u v w */
106/*14*/ 0x3D, 0x3E, 0x3F, FAIL, FAIL, FAIL, FAIL, FAIL,
107/* x y z { | } ~ ^? */
108/*15*/ FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
109/*16*/ FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
110 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
111 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
112 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
113 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
114 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
115 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
116 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
117 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
118 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
119 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
120 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
121 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
122 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
123 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
124 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
125};
126
127static unsigned char table_b2a_hqx[] =
Roger E. Masse5f4ce181997-01-16 17:10:22 +0000128"!\"#$%&'()*+,-012345689@ABCDEFGHIJKLMNPQRSTUVXYZ[`abcdefhijklmpqr";
Jack Jansen72781191995-08-07 14:34:15 +0000129
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000130static char table_a2b_base64[] = {
131 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
132 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
133 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,62, -1,-1,-1,63,
134 52,53,54,55, 56,57,58,59, 60,61,-1,-1, -1, 0,-1,-1, /* Note PAD->0 */
135 -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10, 11,12,13,14,
136 15,16,17,18, 19,20,21,22, 23,24,25,-1, -1,-1,-1,-1,
137 -1,26,27,28, 29,30,31,32, 33,34,35,36, 37,38,39,40,
138 41,42,43,44, 45,46,47,48, 49,50,51,-1, -1,-1,-1,-1
139};
140
141#define BASE64_PAD '='
Guido van Rossum355bc0c2001-10-30 03:00:52 +0000142
143/* Max binary chunk size; limited only by available memory */
Guido van Rossum0e225aa2007-05-22 20:24:57 +0000144#define BASE64_MAXBIN ((PY_SSIZE_T_MAX - 3) / 2)
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000145
146static unsigned char table_b2a_base64[] =
Roger E. Masse5f4ce181997-01-16 17:10:22 +0000147"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000148
149
150
Jack Jansen72781191995-08-07 14:34:15 +0000151static unsigned short crctab_hqx[256] = {
Roger E. Masse5f4ce181997-01-16 17:10:22 +0000152 0x0000, 0x1021, 0x2042, 0x3063, 0x4084, 0x50a5, 0x60c6, 0x70e7,
153 0x8108, 0x9129, 0xa14a, 0xb16b, 0xc18c, 0xd1ad, 0xe1ce, 0xf1ef,
154 0x1231, 0x0210, 0x3273, 0x2252, 0x52b5, 0x4294, 0x72f7, 0x62d6,
155 0x9339, 0x8318, 0xb37b, 0xa35a, 0xd3bd, 0xc39c, 0xf3ff, 0xe3de,
156 0x2462, 0x3443, 0x0420, 0x1401, 0x64e6, 0x74c7, 0x44a4, 0x5485,
157 0xa56a, 0xb54b, 0x8528, 0x9509, 0xe5ee, 0xf5cf, 0xc5ac, 0xd58d,
158 0x3653, 0x2672, 0x1611, 0x0630, 0x76d7, 0x66f6, 0x5695, 0x46b4,
159 0xb75b, 0xa77a, 0x9719, 0x8738, 0xf7df, 0xe7fe, 0xd79d, 0xc7bc,
160 0x48c4, 0x58e5, 0x6886, 0x78a7, 0x0840, 0x1861, 0x2802, 0x3823,
161 0xc9cc, 0xd9ed, 0xe98e, 0xf9af, 0x8948, 0x9969, 0xa90a, 0xb92b,
162 0x5af5, 0x4ad4, 0x7ab7, 0x6a96, 0x1a71, 0x0a50, 0x3a33, 0x2a12,
163 0xdbfd, 0xcbdc, 0xfbbf, 0xeb9e, 0x9b79, 0x8b58, 0xbb3b, 0xab1a,
164 0x6ca6, 0x7c87, 0x4ce4, 0x5cc5, 0x2c22, 0x3c03, 0x0c60, 0x1c41,
165 0xedae, 0xfd8f, 0xcdec, 0xddcd, 0xad2a, 0xbd0b, 0x8d68, 0x9d49,
166 0x7e97, 0x6eb6, 0x5ed5, 0x4ef4, 0x3e13, 0x2e32, 0x1e51, 0x0e70,
167 0xff9f, 0xefbe, 0xdfdd, 0xcffc, 0xbf1b, 0xaf3a, 0x9f59, 0x8f78,
168 0x9188, 0x81a9, 0xb1ca, 0xa1eb, 0xd10c, 0xc12d, 0xf14e, 0xe16f,
169 0x1080, 0x00a1, 0x30c2, 0x20e3, 0x5004, 0x4025, 0x7046, 0x6067,
170 0x83b9, 0x9398, 0xa3fb, 0xb3da, 0xc33d, 0xd31c, 0xe37f, 0xf35e,
171 0x02b1, 0x1290, 0x22f3, 0x32d2, 0x4235, 0x5214, 0x6277, 0x7256,
172 0xb5ea, 0xa5cb, 0x95a8, 0x8589, 0xf56e, 0xe54f, 0xd52c, 0xc50d,
173 0x34e2, 0x24c3, 0x14a0, 0x0481, 0x7466, 0x6447, 0x5424, 0x4405,
174 0xa7db, 0xb7fa, 0x8799, 0x97b8, 0xe75f, 0xf77e, 0xc71d, 0xd73c,
175 0x26d3, 0x36f2, 0x0691, 0x16b0, 0x6657, 0x7676, 0x4615, 0x5634,
176 0xd94c, 0xc96d, 0xf90e, 0xe92f, 0x99c8, 0x89e9, 0xb98a, 0xa9ab,
177 0x5844, 0x4865, 0x7806, 0x6827, 0x18c0, 0x08e1, 0x3882, 0x28a3,
178 0xcb7d, 0xdb5c, 0xeb3f, 0xfb1e, 0x8bf9, 0x9bd8, 0xabbb, 0xbb9a,
179 0x4a75, 0x5a54, 0x6a37, 0x7a16, 0x0af1, 0x1ad0, 0x2ab3, 0x3a92,
180 0xfd2e, 0xed0f, 0xdd6c, 0xcd4d, 0xbdaa, 0xad8b, 0x9de8, 0x8dc9,
181 0x7c26, 0x6c07, 0x5c64, 0x4c45, 0x3ca2, 0x2c83, 0x1ce0, 0x0cc1,
182 0xef1f, 0xff3e, 0xcf5d, 0xdf7c, 0xaf9b, 0xbfba, 0x8fd9, 0x9ff8,
183 0x6e17, 0x7e36, 0x4e55, 0x5e74, 0x2e93, 0x3eb2, 0x0ed1, 0x1ef0,
Jack Jansen72781191995-08-07 14:34:15 +0000184};
185
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000186PyDoc_STRVAR(doc_a2b_uu, "(ascii) -> bin. Decode a line of uuencoded data");
Jack Jansen72781191995-08-07 14:34:15 +0000187
188static PyObject *
Peter Schneider-Kampa788a7f2000-07-10 09:57:19 +0000189binascii_a2b_uu(PyObject *self, PyObject *args)
Jack Jansen72781191995-08-07 14:34:15 +0000190{
191 unsigned char *ascii_data, *bin_data;
192 int leftbits = 0;
193 unsigned char this_ch;
194 unsigned int leftchar = 0;
195 PyObject *rv;
Thomas Woutersf98db652006-03-01 21:37:32 +0000196 Py_ssize_t ascii_len, bin_len;
Tim Peters934c1a12002-07-02 22:24:50 +0000197
Guido van Rossum43713e52000-02-29 13:59:29 +0000198 if ( !PyArg_ParseTuple(args, "t#:a2b_uu", &ascii_data, &ascii_len) )
Jack Jansen72781191995-08-07 14:34:15 +0000199 return NULL;
200
Amaury Forgeot d'Arc9c74b142008-06-18 00:47:36 +0000201 assert(ascii_len >= 0);
202
Jack Jansen72781191995-08-07 14:34:15 +0000203 /* First byte: binary data length (in bytes) */
204 bin_len = (*ascii_data++ - ' ') & 077;
205 ascii_len--;
206
207 /* Allocate the buffer */
Christian Heimes72b710a2008-05-26 13:28:38 +0000208 if ( (rv=PyBytes_FromStringAndSize(NULL, bin_len)) == NULL )
Jack Jansen72781191995-08-07 14:34:15 +0000209 return NULL;
Christian Heimes72b710a2008-05-26 13:28:38 +0000210 bin_data = (unsigned char *)PyBytes_AS_STRING(rv);
Tim Peters934c1a12002-07-02 22:24:50 +0000211
Jack Jansen72781191995-08-07 14:34:15 +0000212 for( ; bin_len > 0 ; ascii_len--, ascii_data++ ) {
Neal Norwitzfe92eef2004-06-06 20:13:10 +0000213 /* XXX is it really best to add NULs if there's no more data */
214 this_ch = (ascii_len > 0) ? *ascii_data : 0;
Jack Jansen72781191995-08-07 14:34:15 +0000215 if ( this_ch == '\n' || this_ch == '\r' || ascii_len <= 0) {
216 /*
217 ** Whitespace. Assume some spaces got eaten at
218 ** end-of-line. (We check this later)
219 */
220 this_ch = 0;
221 } else {
Jack Jansen5d957971995-11-14 10:35:19 +0000222 /* Check the character for legality
Roger E. Masse5f4ce181997-01-16 17:10:22 +0000223 ** The 64 in stead of the expected 63 is because
224 ** there are a few uuencodes out there that use
Guido van Rossum92d89172001-01-09 02:11:57 +0000225 ** '`' as zero instead of space.
Jack Jansen5d957971995-11-14 10:35:19 +0000226 */
227 if ( this_ch < ' ' || this_ch > (' ' + 64)) {
Jack Jansen72781191995-08-07 14:34:15 +0000228 PyErr_SetString(Error, "Illegal char");
229 Py_DECREF(rv);
230 return NULL;
231 }
232 this_ch = (this_ch - ' ') & 077;
233 }
234 /*
235 ** Shift it in on the low end, and see if there's
236 ** a byte ready for output.
237 */
238 leftchar = (leftchar << 6) | (this_ch);
239 leftbits += 6;
240 if ( leftbits >= 8 ) {
241 leftbits -= 8;
242 *bin_data++ = (leftchar >> leftbits) & 0xff;
243 leftchar &= ((1 << leftbits) - 1);
244 bin_len--;
245 }
246 }
247 /*
248 ** Finally, check that if there's anything left on the line
249 ** that it's whitespace only.
250 */
251 while( ascii_len-- > 0 ) {
252 this_ch = *ascii_data++;
Guido van Rossum92d89172001-01-09 02:11:57 +0000253 /* Extra '`' may be written as padding in some cases */
254 if ( this_ch != ' ' && this_ch != ' '+64 &&
Guido van Rossum1243ae71997-07-11 18:36:28 +0000255 this_ch != '\n' && this_ch != '\r' ) {
Jack Jansen72781191995-08-07 14:34:15 +0000256 PyErr_SetString(Error, "Trailing garbage");
257 Py_DECREF(rv);
258 return NULL;
259 }
260 }
261 return rv;
262}
263
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000264PyDoc_STRVAR(doc_b2a_uu, "(bin) -> ascii. Uuencode line of data");
Tim Peters934c1a12002-07-02 22:24:50 +0000265
Jack Jansen72781191995-08-07 14:34:15 +0000266static PyObject *
Peter Schneider-Kampa788a7f2000-07-10 09:57:19 +0000267binascii_b2a_uu(PyObject *self, PyObject *args)
Jack Jansen72781191995-08-07 14:34:15 +0000268{
269 unsigned char *ascii_data, *bin_data;
270 int leftbits = 0;
271 unsigned char this_ch;
272 unsigned int leftchar = 0;
273 PyObject *rv;
Thomas Woutersf98db652006-03-01 21:37:32 +0000274 Py_ssize_t bin_len;
Tim Peters934c1a12002-07-02 22:24:50 +0000275
Guido van Rossum43713e52000-02-29 13:59:29 +0000276 if ( !PyArg_ParseTuple(args, "s#:b2a_uu", &bin_data, &bin_len) )
Jack Jansen72781191995-08-07 14:34:15 +0000277 return NULL;
278 if ( bin_len > 45 ) {
279 /* The 45 is a limit that appears in all uuencode's */
280 PyErr_SetString(Error, "At most 45 bytes at once");
281 return NULL;
282 }
283
284 /* We're lazy and allocate to much (fixed up later) */
Christian Heimes72b710a2008-05-26 13:28:38 +0000285 if ( (rv=PyBytes_FromStringAndSize(NULL, bin_len*2+2)) == NULL )
Jack Jansen72781191995-08-07 14:34:15 +0000286 return NULL;
Christian Heimes72b710a2008-05-26 13:28:38 +0000287 ascii_data = (unsigned char *)PyBytes_AS_STRING(rv);
Jack Jansen72781191995-08-07 14:34:15 +0000288
289 /* Store the length */
290 *ascii_data++ = ' ' + (bin_len & 077);
Tim Peters934c1a12002-07-02 22:24:50 +0000291
Jack Jansen72781191995-08-07 14:34:15 +0000292 for( ; bin_len > 0 || leftbits != 0 ; bin_len--, bin_data++ ) {
293 /* Shift the data (or padding) into our buffer */
294 if ( bin_len > 0 ) /* Data */
295 leftchar = (leftchar << 8) | *bin_data;
296 else /* Padding */
297 leftchar <<= 8;
298 leftbits += 8;
299
300 /* See if there are 6-bit groups ready */
301 while ( leftbits >= 6 ) {
302 this_ch = (leftchar >> (leftbits-6)) & 0x3f;
303 leftbits -= 6;
304 *ascii_data++ = this_ch + ' ';
305 }
306 }
307 *ascii_data++ = '\n'; /* Append a courtesy newline */
Tim Peters934c1a12002-07-02 22:24:50 +0000308
Christian Heimes72b710a2008-05-26 13:28:38 +0000309 if (_PyBytes_Resize(&rv,
Guido van Rossum0e225aa2007-05-22 20:24:57 +0000310 (ascii_data -
Christian Heimes72b710a2008-05-26 13:28:38 +0000311 (unsigned char *)PyBytes_AS_STRING(rv))) < 0) {
Guido van Rossum0e225aa2007-05-22 20:24:57 +0000312 Py_DECREF(rv);
313 rv = NULL;
314 }
Jack Jansen72781191995-08-07 14:34:15 +0000315 return rv;
316}
317
Guido van Rossum2db4f471999-10-19 19:05:14 +0000318
319static int
Thomas Woutersf98db652006-03-01 21:37:32 +0000320binascii_find_valid(unsigned char *s, Py_ssize_t slen, int num)
Guido van Rossum2db4f471999-10-19 19:05:14 +0000321{
Tim Peters934c1a12002-07-02 22:24:50 +0000322 /* Finds & returns the (num+1)th
Guido van Rossum2db4f471999-10-19 19:05:14 +0000323 ** valid character for base64, or -1 if none.
324 */
325
326 int ret = -1;
327 unsigned char c, b64val;
328
329 while ((slen > 0) && (ret == -1)) {
330 c = *s;
331 b64val = table_a2b_base64[c & 0x7f];
332 if ( ((c <= 0x7f) && (b64val != (unsigned char)-1)) ) {
333 if (num == 0)
334 ret = *s;
335 num--;
336 }
337
338 s++;
339 slen--;
340 }
341 return ret;
342}
343
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000344PyDoc_STRVAR(doc_a2b_base64, "(ascii) -> bin. Decode a line of base64 data");
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000345
346static PyObject *
Peter Schneider-Kampa788a7f2000-07-10 09:57:19 +0000347binascii_a2b_base64(PyObject *self, PyObject *args)
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000348{
349 unsigned char *ascii_data, *bin_data;
350 int leftbits = 0;
351 unsigned char this_ch;
352 unsigned int leftchar = 0;
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000353 PyObject *rv;
Thomas Woutersf98db652006-03-01 21:37:32 +0000354 Py_ssize_t ascii_len, bin_len;
Guido van Rossum2db4f471999-10-19 19:05:14 +0000355 int quad_pos = 0;
Tim Peters934c1a12002-07-02 22:24:50 +0000356
Guido van Rossum43713e52000-02-29 13:59:29 +0000357 if ( !PyArg_ParseTuple(args, "t#:a2b_base64", &ascii_data, &ascii_len) )
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000358 return NULL;
359
Amaury Forgeot d'Arc9c74b142008-06-18 00:47:36 +0000360 assert(ascii_len >= 0);
361
362 if (ascii_len > PY_SSIZE_T_MAX - 3)
363 return PyErr_NoMemory();
364
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000365 bin_len = ((ascii_len+3)/4)*3; /* Upper bound, corrected later */
366
367 /* Allocate the buffer */
Christian Heimes72b710a2008-05-26 13:28:38 +0000368 if ( (rv=PyBytes_FromStringAndSize(NULL, bin_len)) == NULL )
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000369 return NULL;
Christian Heimes72b710a2008-05-26 13:28:38 +0000370 bin_data = (unsigned char *)PyBytes_AS_STRING(rv);
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000371 bin_len = 0;
Guido van Rossum2db4f471999-10-19 19:05:14 +0000372
373 for( ; ascii_len > 0; ascii_len--, ascii_data++) {
374 this_ch = *ascii_data;
375
376 if (this_ch > 0x7f ||
377 this_ch == '\r' || this_ch == '\n' || this_ch == ' ')
Jack Jansenba1de3b1996-01-22 10:47:15 +0000378 continue;
Guido van Rossum2db4f471999-10-19 19:05:14 +0000379
380 /* Check for pad sequences and ignore
381 ** the invalid ones.
382 */
383 if (this_ch == BASE64_PAD) {
384 if ( (quad_pos < 2) ||
385 ((quad_pos == 2) &&
386 (binascii_find_valid(ascii_data, ascii_len, 1)
387 != BASE64_PAD)) )
388 {
389 continue;
390 }
391 else {
392 /* A pad sequence means no more input.
393 ** We've already interpreted the data
394 ** from the quad at this point.
395 */
396 leftbits = 0;
397 break;
398 }
399 }
400
401 this_ch = table_a2b_base64[*ascii_data];
402 if ( this_ch == (unsigned char) -1 )
403 continue;
404
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000405 /*
406 ** Shift it in on the low end, and see if there's
407 ** a byte ready for output.
408 */
Guido van Rossum2db4f471999-10-19 19:05:14 +0000409 quad_pos = (quad_pos + 1) & 0x03;
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000410 leftchar = (leftchar << 6) | (this_ch);
411 leftbits += 6;
Guido van Rossum2db4f471999-10-19 19:05:14 +0000412
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000413 if ( leftbits >= 8 ) {
414 leftbits -= 8;
415 *bin_data++ = (leftchar >> leftbits) & 0xff;
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000416 bin_len++;
Guido van Rossum2db4f471999-10-19 19:05:14 +0000417 leftchar &= ((1 << leftbits) - 1);
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000418 }
Guido van Rossum2db4f471999-10-19 19:05:14 +0000419 }
420
421 if (leftbits != 0) {
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000422 PyErr_SetString(Error, "Incorrect padding");
423 Py_DECREF(rv);
424 return NULL;
425 }
Guido van Rossum2db4f471999-10-19 19:05:14 +0000426
Thomas Wouters9e1c1922003-03-17 11:24:29 +0000427 /* And set string size correctly. If the result string is empty
428 ** (because the input was all invalid) return the shared empty
Christian Heimes72b710a2008-05-26 13:28:38 +0000429 ** string instead; _PyBytes_Resize() won't do this for us.
Thomas Wouters9e1c1922003-03-17 11:24:29 +0000430 */
Guido van Rossum0e225aa2007-05-22 20:24:57 +0000431 if (bin_len > 0) {
Christian Heimes72b710a2008-05-26 13:28:38 +0000432 if (_PyBytes_Resize(&rv, bin_len) < 0) {
Guido van Rossum0e225aa2007-05-22 20:24:57 +0000433 Py_DECREF(rv);
434 rv = NULL;
435 }
436 }
Thomas Wouters9e1c1922003-03-17 11:24:29 +0000437 else {
438 Py_DECREF(rv);
Christian Heimes72b710a2008-05-26 13:28:38 +0000439 rv = PyBytes_FromStringAndSize("", 0);
Thomas Wouters9e1c1922003-03-17 11:24:29 +0000440 }
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000441 return rv;
442}
443
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000444PyDoc_STRVAR(doc_b2a_base64, "(bin) -> ascii. Base64-code line of data");
Tim Peters934c1a12002-07-02 22:24:50 +0000445
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000446static PyObject *
Peter Schneider-Kampa788a7f2000-07-10 09:57:19 +0000447binascii_b2a_base64(PyObject *self, PyObject *args)
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000448{
449 unsigned char *ascii_data, *bin_data;
450 int leftbits = 0;
451 unsigned char this_ch;
452 unsigned int leftchar = 0;
453 PyObject *rv;
Thomas Woutersf98db652006-03-01 21:37:32 +0000454 Py_ssize_t bin_len;
Tim Peters934c1a12002-07-02 22:24:50 +0000455
Guido van Rossum43713e52000-02-29 13:59:29 +0000456 if ( !PyArg_ParseTuple(args, "s#:b2a_base64", &bin_data, &bin_len) )
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000457 return NULL;
Amaury Forgeot d'Arc9c74b142008-06-18 00:47:36 +0000458
459 assert(bin_len >= 0);
460
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000461 if ( bin_len > BASE64_MAXBIN ) {
462 PyErr_SetString(Error, "Too much data for base64 line");
463 return NULL;
464 }
Tim Peters934c1a12002-07-02 22:24:50 +0000465
Tim Peters1fbb5772001-12-19 04:41:35 +0000466 /* We're lazy and allocate too much (fixed up later).
467 "+3" leaves room for up to two pad characters and a trailing
468 newline. Note that 'b' gets encoded as 'Yg==\n' (1 in, 5 out). */
Christian Heimes72b710a2008-05-26 13:28:38 +0000469 if ( (rv=PyBytes_FromStringAndSize(NULL, bin_len*2 + 3)) == NULL )
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000470 return NULL;
Christian Heimes72b710a2008-05-26 13:28:38 +0000471 ascii_data = (unsigned char *)PyBytes_AS_STRING(rv);
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000472
473 for( ; bin_len > 0 ; bin_len--, bin_data++ ) {
474 /* Shift the data into our buffer */
475 leftchar = (leftchar << 8) | *bin_data;
476 leftbits += 8;
477
478 /* See if there are 6-bit groups ready */
479 while ( leftbits >= 6 ) {
480 this_ch = (leftchar >> (leftbits-6)) & 0x3f;
481 leftbits -= 6;
482 *ascii_data++ = table_b2a_base64[this_ch];
483 }
484 }
485 if ( leftbits == 2 ) {
486 *ascii_data++ = table_b2a_base64[(leftchar&3) << 4];
487 *ascii_data++ = BASE64_PAD;
488 *ascii_data++ = BASE64_PAD;
489 } else if ( leftbits == 4 ) {
490 *ascii_data++ = table_b2a_base64[(leftchar&0xf) << 2];
491 *ascii_data++ = BASE64_PAD;
Tim Peters934c1a12002-07-02 22:24:50 +0000492 }
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000493 *ascii_data++ = '\n'; /* Append a courtesy newline */
Tim Peters934c1a12002-07-02 22:24:50 +0000494
Christian Heimes72b710a2008-05-26 13:28:38 +0000495 if (_PyBytes_Resize(&rv,
Guido van Rossum0e225aa2007-05-22 20:24:57 +0000496 (ascii_data -
Christian Heimes72b710a2008-05-26 13:28:38 +0000497 (unsigned char *)PyBytes_AS_STRING(rv))) < 0) {
Guido van Rossum0e225aa2007-05-22 20:24:57 +0000498 Py_DECREF(rv);
499 rv = NULL;
500 }
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000501 return rv;
502}
503
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000504PyDoc_STRVAR(doc_a2b_hqx, "ascii -> bin, done. Decode .hqx coding");
Jack Jansen72781191995-08-07 14:34:15 +0000505
506static PyObject *
Peter Schneider-Kampa788a7f2000-07-10 09:57:19 +0000507binascii_a2b_hqx(PyObject *self, PyObject *args)
Jack Jansen72781191995-08-07 14:34:15 +0000508{
509 unsigned char *ascii_data, *bin_data;
510 int leftbits = 0;
511 unsigned char this_ch;
Jack Janseneaeb1c81995-08-14 12:17:57 +0000512 unsigned int leftchar = 0;
Jack Jansen72781191995-08-07 14:34:15 +0000513 PyObject *rv;
Thomas Woutersf98db652006-03-01 21:37:32 +0000514 Py_ssize_t len;
Jack Jansen72781191995-08-07 14:34:15 +0000515 int done = 0;
Tim Peters934c1a12002-07-02 22:24:50 +0000516
Guido van Rossum43713e52000-02-29 13:59:29 +0000517 if ( !PyArg_ParseTuple(args, "t#:a2b_hqx", &ascii_data, &len) )
Jack Jansen72781191995-08-07 14:34:15 +0000518 return NULL;
519
Amaury Forgeot d'Arc9c74b142008-06-18 00:47:36 +0000520 assert(len >= 0);
521
522 if (len > PY_SSIZE_T_MAX - 2)
523 return PyErr_NoMemory();
524
Raymond Hettinger658717e2004-09-06 22:58:37 +0000525 /* Allocate a string that is too big (fixed later)
526 Add two to the initial length to prevent interning which
527 would preclude subsequent resizing. */
Christian Heimes72b710a2008-05-26 13:28:38 +0000528 if ( (rv=PyBytes_FromStringAndSize(NULL, len+2)) == NULL )
Jack Jansen72781191995-08-07 14:34:15 +0000529 return NULL;
Christian Heimes72b710a2008-05-26 13:28:38 +0000530 bin_data = (unsigned char *)PyBytes_AS_STRING(rv);
Jack Jansen72781191995-08-07 14:34:15 +0000531
532 for( ; len > 0 ; len--, ascii_data++ ) {
533 /* Get the byte and look it up */
534 this_ch = table_a2b_hqx[*ascii_data];
535 if ( this_ch == SKIP )
536 continue;
537 if ( this_ch == FAIL ) {
538 PyErr_SetString(Error, "Illegal char");
539 Py_DECREF(rv);
540 return NULL;
541 }
542 if ( this_ch == DONE ) {
543 /* The terminating colon */
544 done = 1;
545 break;
546 }
547
548 /* Shift it into the buffer and see if any bytes are ready */
549 leftchar = (leftchar << 6) | (this_ch);
550 leftbits += 6;
551 if ( leftbits >= 8 ) {
552 leftbits -= 8;
553 *bin_data++ = (leftchar >> leftbits) & 0xff;
554 leftchar &= ((1 << leftbits) - 1);
555 }
556 }
Tim Peters934c1a12002-07-02 22:24:50 +0000557
Jack Jansen72781191995-08-07 14:34:15 +0000558 if ( leftbits && !done ) {
559 PyErr_SetString(Incomplete,
560 "String has incomplete number of bytes");
561 Py_DECREF(rv);
562 return NULL;
563 }
Christian Heimes72b710a2008-05-26 13:28:38 +0000564 if (_PyBytes_Resize(&rv,
Guido van Rossum0e225aa2007-05-22 20:24:57 +0000565 (bin_data -
Christian Heimes72b710a2008-05-26 13:28:38 +0000566 (unsigned char *)PyBytes_AS_STRING(rv))) < 0) {
Guido van Rossum0e225aa2007-05-22 20:24:57 +0000567 Py_DECREF(rv);
568 rv = NULL;
569 }
Guido van Rossum9c6ba5e1997-01-12 20:02:04 +0000570 if (rv) {
571 PyObject *rrv = Py_BuildValue("Oi", rv, done);
572 Py_DECREF(rv);
573 return rrv;
574 }
Roger E. Masse5f4ce181997-01-16 17:10:22 +0000575
Jack Jansen72781191995-08-07 14:34:15 +0000576 return NULL;
577}
578
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000579PyDoc_STRVAR(doc_rlecode_hqx, "Binhex RLE-code binary data");
Jack Jansen72781191995-08-07 14:34:15 +0000580
581static PyObject *
Peter Schneider-Kampa788a7f2000-07-10 09:57:19 +0000582binascii_rlecode_hqx(PyObject *self, PyObject *args)
Jack Jansen72781191995-08-07 14:34:15 +0000583{
584 unsigned char *in_data, *out_data;
585 PyObject *rv;
586 unsigned char ch;
Thomas Woutersf98db652006-03-01 21:37:32 +0000587 Py_ssize_t in, inend, len;
Tim Peters934c1a12002-07-02 22:24:50 +0000588
Guido van Rossum43713e52000-02-29 13:59:29 +0000589 if ( !PyArg_ParseTuple(args, "s#:rlecode_hqx", &in_data, &len) )
Jack Jansen72781191995-08-07 14:34:15 +0000590 return NULL;
591
Amaury Forgeot d'Arc9c74b142008-06-18 00:47:36 +0000592 assert(len >= 0);
593
594 if (len > PY_SSIZE_T_MAX / 2 - 2)
595 return PyErr_NoMemory();
596
Jack Jansen72781191995-08-07 14:34:15 +0000597 /* Worst case: output is twice as big as input (fixed later) */
Christian Heimes72b710a2008-05-26 13:28:38 +0000598 if ( (rv=PyBytes_FromStringAndSize(NULL, len*2+2)) == NULL )
Jack Jansen72781191995-08-07 14:34:15 +0000599 return NULL;
Christian Heimes72b710a2008-05-26 13:28:38 +0000600 out_data = (unsigned char *)PyBytes_AS_STRING(rv);
Tim Peters934c1a12002-07-02 22:24:50 +0000601
Jack Jansen72781191995-08-07 14:34:15 +0000602 for( in=0; in<len; in++) {
603 ch = in_data[in];
604 if ( ch == RUNCHAR ) {
605 /* RUNCHAR. Escape it. */
606 *out_data++ = RUNCHAR;
607 *out_data++ = 0;
608 } else {
609 /* Check how many following are the same */
610 for(inend=in+1;
611 inend<len && in_data[inend] == ch &&
Roger E. Masse5f4ce181997-01-16 17:10:22 +0000612 inend < in+255;
Jack Jansen72781191995-08-07 14:34:15 +0000613 inend++) ;
Jack Jansen0223aa11995-08-31 13:44:23 +0000614 if ( inend - in > 3 ) {
615 /* More than 3 in a row. Output RLE. */
Jack Jansen72781191995-08-07 14:34:15 +0000616 *out_data++ = ch;
617 *out_data++ = RUNCHAR;
618 *out_data++ = inend-in;
619 in = inend-1;
620 } else {
621 /* Less than 3. Output the byte itself */
622 *out_data++ = ch;
623 }
624 }
625 }
Christian Heimes72b710a2008-05-26 13:28:38 +0000626 if (_PyBytes_Resize(&rv,
Guido van Rossum0e225aa2007-05-22 20:24:57 +0000627 (out_data -
Christian Heimes72b710a2008-05-26 13:28:38 +0000628 (unsigned char *)PyBytes_AS_STRING(rv))) < 0) {
Guido van Rossum0e225aa2007-05-22 20:24:57 +0000629 Py_DECREF(rv);
630 rv = NULL;
631 }
Jack Jansen72781191995-08-07 14:34:15 +0000632 return rv;
633}
634
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000635PyDoc_STRVAR(doc_b2a_hqx, "Encode .hqx data");
Tim Peters934c1a12002-07-02 22:24:50 +0000636
Jack Jansen72781191995-08-07 14:34:15 +0000637static PyObject *
Peter Schneider-Kampa788a7f2000-07-10 09:57:19 +0000638binascii_b2a_hqx(PyObject *self, PyObject *args)
Jack Jansen72781191995-08-07 14:34:15 +0000639{
640 unsigned char *ascii_data, *bin_data;
641 int leftbits = 0;
642 unsigned char this_ch;
Jack Janseneaeb1c81995-08-14 12:17:57 +0000643 unsigned int leftchar = 0;
Jack Jansen72781191995-08-07 14:34:15 +0000644 PyObject *rv;
Thomas Woutersf98db652006-03-01 21:37:32 +0000645 Py_ssize_t len;
Tim Peters934c1a12002-07-02 22:24:50 +0000646
Guido van Rossum43713e52000-02-29 13:59:29 +0000647 if ( !PyArg_ParseTuple(args, "s#:b2a_hqx", &bin_data, &len) )
Jack Jansen72781191995-08-07 14:34:15 +0000648 return NULL;
649
Amaury Forgeot d'Arc9c74b142008-06-18 00:47:36 +0000650 assert(len >= 0);
651
652 if (len > PY_SSIZE_T_MAX / 2 - 2)
653 return PyErr_NoMemory();
654
Jack Jansen72781191995-08-07 14:34:15 +0000655 /* Allocate a buffer that is at least large enough */
Christian Heimes72b710a2008-05-26 13:28:38 +0000656 if ( (rv=PyBytes_FromStringAndSize(NULL, len*2+2)) == NULL )
Jack Jansen72781191995-08-07 14:34:15 +0000657 return NULL;
Christian Heimes72b710a2008-05-26 13:28:38 +0000658 ascii_data = (unsigned char *)PyBytes_AS_STRING(rv);
Tim Peters934c1a12002-07-02 22:24:50 +0000659
Jack Jansen72781191995-08-07 14:34:15 +0000660 for( ; len > 0 ; len--, bin_data++ ) {
661 /* Shift into our buffer, and output any 6bits ready */
662 leftchar = (leftchar << 8) | *bin_data;
663 leftbits += 8;
664 while ( leftbits >= 6 ) {
665 this_ch = (leftchar >> (leftbits-6)) & 0x3f;
666 leftbits -= 6;
667 *ascii_data++ = table_b2a_hqx[this_ch];
668 }
669 }
670 /* Output a possible runt byte */
671 if ( leftbits ) {
672 leftchar <<= (6-leftbits);
673 *ascii_data++ = table_b2a_hqx[leftchar & 0x3f];
674 }
Christian Heimes72b710a2008-05-26 13:28:38 +0000675 if (_PyBytes_Resize(&rv,
Guido van Rossum0e225aa2007-05-22 20:24:57 +0000676 (ascii_data -
Christian Heimes72b710a2008-05-26 13:28:38 +0000677 (unsigned char *)PyBytes_AS_STRING(rv))) < 0) {
Guido van Rossum0e225aa2007-05-22 20:24:57 +0000678 Py_DECREF(rv);
679 rv = NULL;
680 }
Jack Jansen72781191995-08-07 14:34:15 +0000681 return rv;
682}
683
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000684PyDoc_STRVAR(doc_rledecode_hqx, "Decode hexbin RLE-coded string");
Tim Peters934c1a12002-07-02 22:24:50 +0000685
Jack Jansen72781191995-08-07 14:34:15 +0000686static PyObject *
Peter Schneider-Kampa788a7f2000-07-10 09:57:19 +0000687binascii_rledecode_hqx(PyObject *self, PyObject *args)
Jack Jansen72781191995-08-07 14:34:15 +0000688{
689 unsigned char *in_data, *out_data;
690 unsigned char in_byte, in_repeat;
691 PyObject *rv;
Thomas Woutersf98db652006-03-01 21:37:32 +0000692 Py_ssize_t in_len, out_len, out_len_left;
Jack Jansen72781191995-08-07 14:34:15 +0000693
Guido van Rossum43713e52000-02-29 13:59:29 +0000694 if ( !PyArg_ParseTuple(args, "s#:rledecode_hqx", &in_data, &in_len) )
Jack Jansen72781191995-08-07 14:34:15 +0000695 return NULL;
696
Amaury Forgeot d'Arc9c74b142008-06-18 00:47:36 +0000697 assert(in_len >= 0);
698
Jack Jansen72781191995-08-07 14:34:15 +0000699 /* Empty string is a special case */
700 if ( in_len == 0 )
Christian Heimes72b710a2008-05-26 13:28:38 +0000701 return PyBytes_FromStringAndSize("", 0);
Amaury Forgeot d'Arc9c74b142008-06-18 00:47:36 +0000702 else if (in_len > PY_SSIZE_T_MAX / 2)
703 return PyErr_NoMemory();
Jack Jansen72781191995-08-07 14:34:15 +0000704
705 /* Allocate a buffer of reasonable size. Resized when needed */
706 out_len = in_len*2;
Christian Heimes72b710a2008-05-26 13:28:38 +0000707 if ( (rv=PyBytes_FromStringAndSize(NULL, out_len)) == NULL )
Jack Jansen72781191995-08-07 14:34:15 +0000708 return NULL;
709 out_len_left = out_len;
Christian Heimes72b710a2008-05-26 13:28:38 +0000710 out_data = (unsigned char *)PyBytes_AS_STRING(rv);
Jack Jansen72781191995-08-07 14:34:15 +0000711
712 /*
713 ** We need two macros here to get/put bytes and handle
714 ** end-of-buffer for input and output strings.
715 */
716#define INBYTE(b) \
Roger E. Masse5f4ce181997-01-16 17:10:22 +0000717 do { \
718 if ( --in_len < 0 ) { \
719 PyErr_SetString(Incomplete, ""); \
720 Py_DECREF(rv); \
721 return NULL; \
722 } \
723 b = *in_data++; \
724 } while(0)
Tim Peters934c1a12002-07-02 22:24:50 +0000725
Jack Jansen72781191995-08-07 14:34:15 +0000726#define OUTBYTE(b) \
Roger E. Masse5f4ce181997-01-16 17:10:22 +0000727 do { \
728 if ( --out_len_left < 0 ) { \
Amaury Forgeot d'Arc9c74b142008-06-18 00:47:36 +0000729 if ( out_len > PY_SSIZE_T_MAX / 2) return PyErr_NoMemory(); \
Christian Heimes72b710a2008-05-26 13:28:38 +0000730 if (_PyBytes_Resize(&rv, 2*out_len) < 0) \
Guido van Rossum0e225aa2007-05-22 20:24:57 +0000731 { Py_DECREF(rv); return NULL; } \
Christian Heimes72b710a2008-05-26 13:28:38 +0000732 out_data = (unsigned char *)PyBytes_AS_STRING(rv) \
Roger E. Masse5f4ce181997-01-16 17:10:22 +0000733 + out_len; \
734 out_len_left = out_len-1; \
735 out_len = out_len * 2; \
736 } \
737 *out_data++ = b; \
738 } while(0)
Jack Jansen72781191995-08-07 14:34:15 +0000739
Roger E. Masse5f4ce181997-01-16 17:10:22 +0000740 /*
741 ** Handle first byte separately (since we have to get angry
742 ** in case of an orphaned RLE code).
743 */
744 INBYTE(in_byte);
Jack Jansen72781191995-08-07 14:34:15 +0000745
746 if (in_byte == RUNCHAR) {
747 INBYTE(in_repeat);
748 if (in_repeat != 0) {
749 /* Note Error, not Incomplete (which is at the end
750 ** of the string only). This is a programmer error.
751 */
752 PyErr_SetString(Error, "Orphaned RLE code at start");
753 Py_DECREF(rv);
754 return NULL;
755 }
756 OUTBYTE(RUNCHAR);
757 } else {
758 OUTBYTE(in_byte);
759 }
Tim Peters934c1a12002-07-02 22:24:50 +0000760
Jack Jansen72781191995-08-07 14:34:15 +0000761 while( in_len > 0 ) {
762 INBYTE(in_byte);
763
764 if (in_byte == RUNCHAR) {
765 INBYTE(in_repeat);
766 if ( in_repeat == 0 ) {
767 /* Just an escaped RUNCHAR value */
768 OUTBYTE(RUNCHAR);
769 } else {
770 /* Pick up value and output a sequence of it */
771 in_byte = out_data[-1];
772 while ( --in_repeat > 0 )
773 OUTBYTE(in_byte);
774 }
775 } else {
776 /* Normal byte */
777 OUTBYTE(in_byte);
778 }
779 }
Christian Heimes72b710a2008-05-26 13:28:38 +0000780 if (_PyBytes_Resize(&rv,
Guido van Rossum0e225aa2007-05-22 20:24:57 +0000781 (out_data -
Christian Heimes72b710a2008-05-26 13:28:38 +0000782 (unsigned char *)PyBytes_AS_STRING(rv))) < 0) {
Guido van Rossum0e225aa2007-05-22 20:24:57 +0000783 Py_DECREF(rv);
784 rv = NULL;
785 }
Jack Jansen72781191995-08-07 14:34:15 +0000786 return rv;
787}
788
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000789PyDoc_STRVAR(doc_crc_hqx,
790"(data, oldcrc) -> newcrc. Compute hqx CRC incrementally");
Jack Jansen72781191995-08-07 14:34:15 +0000791
792static PyObject *
Peter Schneider-Kampa788a7f2000-07-10 09:57:19 +0000793binascii_crc_hqx(PyObject *self, PyObject *args)
Jack Jansen72781191995-08-07 14:34:15 +0000794{
795 unsigned char *bin_data;
796 unsigned int crc;
Thomas Woutersf98db652006-03-01 21:37:32 +0000797 Py_ssize_t len;
Tim Peters934c1a12002-07-02 22:24:50 +0000798
Guido van Rossum43713e52000-02-29 13:59:29 +0000799 if ( !PyArg_ParseTuple(args, "s#i:crc_hqx", &bin_data, &len, &crc) )
Jack Jansen72781191995-08-07 14:34:15 +0000800 return NULL;
801
Amaury Forgeot d'Arc9c74b142008-06-18 00:47:36 +0000802 while(len-- > 0) {
Jack Jansen72781191995-08-07 14:34:15 +0000803 crc=((crc<<8)&0xff00)^crctab_hqx[((crc>>8)&0xff)^*bin_data++];
804 }
805
806 return Py_BuildValue("i", crc);
807}
808
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000809PyDoc_STRVAR(doc_crc32,
810"(data, oldcrc = 0) -> newcrc. Compute CRC-32 incrementally");
Guido van Rossum7d47c9e2000-02-16 21:11:52 +0000811
Christian Heimes1dc54002008-03-24 02:19:29 +0000812#ifdef USE_ZLIB_CRC32
813/* This was taken from zlibmodule.c PyZlib_crc32 (but is PY_SSIZE_T_CLEAN) */
814static PyObject *
815binascii_crc32(PyObject *self, PyObject *args)
816{
Christian Heimescc47b052008-03-25 14:56:36 +0000817 unsigned int crc32val = 0; /* crc32(0L, Z_NULL, 0) */
Christian Heimes1dc54002008-03-24 02:19:29 +0000818 Byte *buf;
Neal Norwitz4027bf82008-03-24 04:59:05 +0000819 Py_ssize_t len;
Christian Heimescc47b052008-03-25 14:56:36 +0000820 int signed_val;
821
Christian Heimes1dc54002008-03-24 02:19:29 +0000822 if (!PyArg_ParseTuple(args, "s#|I:crc32", &buf, &len, &crc32val))
823 return NULL;
Christian Heimescc47b052008-03-25 14:56:36 +0000824 signed_val = crc32(crc32val, buf, len);
825 return PyLong_FromUnsignedLong(signed_val & 0xffffffffU);
Christian Heimes1dc54002008-03-24 02:19:29 +0000826}
827#else /* USE_ZLIB_CRC32 */
Guido van Rossum7d47c9e2000-02-16 21:11:52 +0000828/* Crc - 32 BIT ANSI X3.66 CRC checksum files
829 Also known as: ISO 3307
830**********************************************************************|
831* *|
832* Demonstration program to compute the 32-bit CRC used as the frame *|
833* check sequence in ADCCP (ANSI X3.66, also known as FIPS PUB 71 *|
834* and FED-STD-1003, the U.S. versions of CCITT's X.25 link-level *|
835* protocol). The 32-bit FCS was added via the Federal Register, *|
836* 1 June 1982, p.23798. I presume but don't know for certain that *|
837* this polynomial is or will be included in CCITT V.41, which *|
838* defines the 16-bit CRC (often called CRC-CCITT) polynomial. FIPS *|
839* PUB 78 says that the 32-bit FCS reduces otherwise undetected *|
840* errors by a factor of 10^-5 over 16-bit FCS. *|
841* *|
842**********************************************************************|
843
844 Copyright (C) 1986 Gary S. Brown. You may use this program, or
845 code or tables extracted from it, as desired without restriction.
Guido van Rossum7d47c9e2000-02-16 21:11:52 +0000846
Tim Peters934c1a12002-07-02 22:24:50 +0000847 First, the polynomial itself and its table of feedback terms. The
848 polynomial is
849 X^32+X^26+X^23+X^22+X^16+X^12+X^11+X^10+X^8+X^7+X^5+X^4+X^2+X^1+X^0
850 Note that we take it "backwards" and put the highest-order term in
851 the lowest-order bit. The X^32 term is "implied"; the LSB is the
852 X^31 term, etc. The X^0 term (usually shown as "+1") results in
853 the MSB being 1.
Guido van Rossum7d47c9e2000-02-16 21:11:52 +0000854
Tim Peters934c1a12002-07-02 22:24:50 +0000855 Note that the usual hardware shift register implementation, which
856 is what we're using (we're merely optimizing it by doing eight-bit
857 chunks at a time) shifts bits into the lowest-order term. In our
858 implementation, that means shifting towards the right. Why do we
859 do it this way? Because the calculated CRC must be transmitted in
860 order from highest-order term to lowest-order term. UARTs transmit
861 characters in order from LSB to MSB. By storing the CRC this way,
862 we hand it to the UART in the order low-byte to high-byte; the UART
863 sends each low-bit to hight-bit; and the result is transmission bit
864 by bit from highest- to lowest-order term without requiring any bit
865 shuffling on our part. Reception works similarly.
866
867 The feedback terms table consists of 256, 32-bit entries. Notes:
868
869 1. The table can be generated at runtime if desired; code to do so
870 is shown later. It might not be obvious, but the feedback
871 terms simply represent the results of eight shift/xor opera-
872 tions for all combinations of data and CRC register values.
873
874 2. The CRC accumulation logic is the same for all CRC polynomials,
875 be they sixteen or thirty-two bits wide. You simply choose the
876 appropriate table. Alternatively, because the table can be
877 generated at runtime, you can start by generating the table for
878 the polynomial in question and use exactly the same "updcrc",
879 if your application needn't simultaneously handle two CRC
880 polynomials. (Note, however, that XMODEM is strange.)
881
882 3. For 16-bit CRCs, the table entries need be only 16 bits wide;
883 of course, 32-bit entries work OK if the high 16 bits are zero.
884
885 4. The values must be right-shifted by eight bits by the "updcrc"
886 logic; the shift must be unsigned (bring in zeroes). On some
887 hardware you could probably optimize the shift in assembler by
888 using byte-swap instructions.
Guido van Rossum7d47c9e2000-02-16 21:11:52 +0000889********************************************************************/
890
Gregory P. Smith3c0e4d22008-03-25 07:51:12 +0000891static unsigned int crc_32_tab[256] = {
8920x00000000U, 0x77073096U, 0xee0e612cU, 0x990951baU, 0x076dc419U,
8930x706af48fU, 0xe963a535U, 0x9e6495a3U, 0x0edb8832U, 0x79dcb8a4U,
8940xe0d5e91eU, 0x97d2d988U, 0x09b64c2bU, 0x7eb17cbdU, 0xe7b82d07U,
8950x90bf1d91U, 0x1db71064U, 0x6ab020f2U, 0xf3b97148U, 0x84be41deU,
8960x1adad47dU, 0x6ddde4ebU, 0xf4d4b551U, 0x83d385c7U, 0x136c9856U,
8970x646ba8c0U, 0xfd62f97aU, 0x8a65c9ecU, 0x14015c4fU, 0x63066cd9U,
8980xfa0f3d63U, 0x8d080df5U, 0x3b6e20c8U, 0x4c69105eU, 0xd56041e4U,
8990xa2677172U, 0x3c03e4d1U, 0x4b04d447U, 0xd20d85fdU, 0xa50ab56bU,
9000x35b5a8faU, 0x42b2986cU, 0xdbbbc9d6U, 0xacbcf940U, 0x32d86ce3U,
9010x45df5c75U, 0xdcd60dcfU, 0xabd13d59U, 0x26d930acU, 0x51de003aU,
9020xc8d75180U, 0xbfd06116U, 0x21b4f4b5U, 0x56b3c423U, 0xcfba9599U,
9030xb8bda50fU, 0x2802b89eU, 0x5f058808U, 0xc60cd9b2U, 0xb10be924U,
9040x2f6f7c87U, 0x58684c11U, 0xc1611dabU, 0xb6662d3dU, 0x76dc4190U,
9050x01db7106U, 0x98d220bcU, 0xefd5102aU, 0x71b18589U, 0x06b6b51fU,
9060x9fbfe4a5U, 0xe8b8d433U, 0x7807c9a2U, 0x0f00f934U, 0x9609a88eU,
9070xe10e9818U, 0x7f6a0dbbU, 0x086d3d2dU, 0x91646c97U, 0xe6635c01U,
9080x6b6b51f4U, 0x1c6c6162U, 0x856530d8U, 0xf262004eU, 0x6c0695edU,
9090x1b01a57bU, 0x8208f4c1U, 0xf50fc457U, 0x65b0d9c6U, 0x12b7e950U,
9100x8bbeb8eaU, 0xfcb9887cU, 0x62dd1ddfU, 0x15da2d49U, 0x8cd37cf3U,
9110xfbd44c65U, 0x4db26158U, 0x3ab551ceU, 0xa3bc0074U, 0xd4bb30e2U,
9120x4adfa541U, 0x3dd895d7U, 0xa4d1c46dU, 0xd3d6f4fbU, 0x4369e96aU,
9130x346ed9fcU, 0xad678846U, 0xda60b8d0U, 0x44042d73U, 0x33031de5U,
9140xaa0a4c5fU, 0xdd0d7cc9U, 0x5005713cU, 0x270241aaU, 0xbe0b1010U,
9150xc90c2086U, 0x5768b525U, 0x206f85b3U, 0xb966d409U, 0xce61e49fU,
9160x5edef90eU, 0x29d9c998U, 0xb0d09822U, 0xc7d7a8b4U, 0x59b33d17U,
9170x2eb40d81U, 0xb7bd5c3bU, 0xc0ba6cadU, 0xedb88320U, 0x9abfb3b6U,
9180x03b6e20cU, 0x74b1d29aU, 0xead54739U, 0x9dd277afU, 0x04db2615U,
9190x73dc1683U, 0xe3630b12U, 0x94643b84U, 0x0d6d6a3eU, 0x7a6a5aa8U,
9200xe40ecf0bU, 0x9309ff9dU, 0x0a00ae27U, 0x7d079eb1U, 0xf00f9344U,
9210x8708a3d2U, 0x1e01f268U, 0x6906c2feU, 0xf762575dU, 0x806567cbU,
9220x196c3671U, 0x6e6b06e7U, 0xfed41b76U, 0x89d32be0U, 0x10da7a5aU,
9230x67dd4accU, 0xf9b9df6fU, 0x8ebeeff9U, 0x17b7be43U, 0x60b08ed5U,
9240xd6d6a3e8U, 0xa1d1937eU, 0x38d8c2c4U, 0x4fdff252U, 0xd1bb67f1U,
9250xa6bc5767U, 0x3fb506ddU, 0x48b2364bU, 0xd80d2bdaU, 0xaf0a1b4cU,
9260x36034af6U, 0x41047a60U, 0xdf60efc3U, 0xa867df55U, 0x316e8eefU,
9270x4669be79U, 0xcb61b38cU, 0xbc66831aU, 0x256fd2a0U, 0x5268e236U,
9280xcc0c7795U, 0xbb0b4703U, 0x220216b9U, 0x5505262fU, 0xc5ba3bbeU,
9290xb2bd0b28U, 0x2bb45a92U, 0x5cb36a04U, 0xc2d7ffa7U, 0xb5d0cf31U,
9300x2cd99e8bU, 0x5bdeae1dU, 0x9b64c2b0U, 0xec63f226U, 0x756aa39cU,
9310x026d930aU, 0x9c0906a9U, 0xeb0e363fU, 0x72076785U, 0x05005713U,
9320x95bf4a82U, 0xe2b87a14U, 0x7bb12baeU, 0x0cb61b38U, 0x92d28e9bU,
9330xe5d5be0dU, 0x7cdcefb7U, 0x0bdbdf21U, 0x86d3d2d4U, 0xf1d4e242U,
9340x68ddb3f8U, 0x1fda836eU, 0x81be16cdU, 0xf6b9265bU, 0x6fb077e1U,
9350x18b74777U, 0x88085ae6U, 0xff0f6a70U, 0x66063bcaU, 0x11010b5cU,
9360x8f659effU, 0xf862ae69U, 0x616bffd3U, 0x166ccf45U, 0xa00ae278U,
9370xd70dd2eeU, 0x4e048354U, 0x3903b3c2U, 0xa7672661U, 0xd06016f7U,
9380x4969474dU, 0x3e6e77dbU, 0xaed16a4aU, 0xd9d65adcU, 0x40df0b66U,
9390x37d83bf0U, 0xa9bcae53U, 0xdebb9ec5U, 0x47b2cf7fU, 0x30b5ffe9U,
9400xbdbdf21cU, 0xcabac28aU, 0x53b39330U, 0x24b4a3a6U, 0xbad03605U,
9410xcdd70693U, 0x54de5729U, 0x23d967bfU, 0xb3667a2eU, 0xc4614ab8U,
9420x5d681b02U, 0x2a6f2b94U, 0xb40bbe37U, 0xc30c8ea1U, 0x5a05df1bU,
9430x2d02ef8dU
Guido van Rossum7d47c9e2000-02-16 21:11:52 +0000944};
945
946static PyObject *
Peter Schneider-Kampa788a7f2000-07-10 09:57:19 +0000947binascii_crc32(PyObject *self, PyObject *args)
Guido van Rossum7d47c9e2000-02-16 21:11:52 +0000948{ /* By Jim Ahlstrom; All rights transferred to CNRI */
949 unsigned char *bin_data;
Gregory P. Smith27275032008-03-20 06:20:09 +0000950 unsigned int crc = 0; /* initial value of CRC */
Thomas Woutersf98db652006-03-01 21:37:32 +0000951 Py_ssize_t len;
Gregory P. Smith27275032008-03-20 06:20:09 +0000952 unsigned int result;
Tim Peters934c1a12002-07-02 22:24:50 +0000953
Gregory P. Smith27275032008-03-20 06:20:09 +0000954 if ( !PyArg_ParseTuple(args, "s#|I:crc32", &bin_data, &len, &crc) )
Guido van Rossum7d47c9e2000-02-16 21:11:52 +0000955 return NULL;
956
Tim Peters934c1a12002-07-02 22:24:50 +0000957 crc = ~ crc;
Amaury Forgeot d'Arc9c74b142008-06-18 00:47:36 +0000958 while (len-- > 0) {
Gregory P. Smith27275032008-03-20 06:20:09 +0000959 crc = crc_32_tab[(crc ^ *bin_data++) & 0xff] ^ (crc >> 8);
Guido van Rossum7d47c9e2000-02-16 21:11:52 +0000960 /* Note: (crc >> 8) MUST zero fill on left */
Gregory P. Smith27275032008-03-20 06:20:09 +0000961 }
Tim Petersa98011c2002-07-02 20:20:08 +0000962
Gregory P. Smith27275032008-03-20 06:20:09 +0000963 result = (crc ^ 0xFFFFFFFF);
964 return PyLong_FromUnsignedLong(result & 0xffffffff);
Guido van Rossum7d47c9e2000-02-16 21:11:52 +0000965}
Christian Heimes1dc54002008-03-24 02:19:29 +0000966#endif /* USE_ZLIB_CRC32 */
Guido van Rossum7d47c9e2000-02-16 21:11:52 +0000967
Barry Warsawe977c212000-08-15 06:07:13 +0000968
969static PyObject *
970binascii_hexlify(PyObject *self, PyObject *args)
971{
972 char* argbuf;
Thomas Woutersf98db652006-03-01 21:37:32 +0000973 Py_ssize_t arglen;
Barry Warsawe977c212000-08-15 06:07:13 +0000974 PyObject *retval;
975 char* retbuf;
Thomas Woutersf98db652006-03-01 21:37:32 +0000976 Py_ssize_t i, j;
Barry Warsawe977c212000-08-15 06:07:13 +0000977
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000978 if (!PyArg_ParseTuple(args, "s#:b2a_hex", &argbuf, &arglen))
Barry Warsawe977c212000-08-15 06:07:13 +0000979 return NULL;
980
Amaury Forgeot d'Arc9c74b142008-06-18 00:47:36 +0000981 assert(arglen >= 0);
982 if (arglen > PY_SSIZE_T_MAX / 2)
983 return PyErr_NoMemory();
984
Christian Heimes72b710a2008-05-26 13:28:38 +0000985 retval = PyBytes_FromStringAndSize(NULL, arglen*2);
Barry Warsawe977c212000-08-15 06:07:13 +0000986 if (!retval)
987 return NULL;
Christian Heimes72b710a2008-05-26 13:28:38 +0000988 retbuf = PyBytes_AS_STRING(retval);
Barry Warsawe977c212000-08-15 06:07:13 +0000989
990 /* make hex version of string, taken from shamodule.c */
991 for (i=j=0; i < arglen; i++) {
992 char c;
993 c = (argbuf[i] >> 4) & 0xf;
994 c = (c>9) ? c+'a'-10 : c + '0';
995 retbuf[j++] = c;
996 c = argbuf[i] & 0xf;
997 c = (c>9) ? c+'a'-10 : c + '0';
998 retbuf[j++] = c;
999 }
1000 return retval;
Barry Warsawe977c212000-08-15 06:07:13 +00001001}
1002
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001003PyDoc_STRVAR(doc_hexlify,
Barry Warsawe977c212000-08-15 06:07:13 +00001004"b2a_hex(data) -> s; Hexadecimal representation of binary data.\n\
1005\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001006This function is also available as \"hexlify()\".");
Barry Warsawe977c212000-08-15 06:07:13 +00001007
1008
1009static int
Tim Peters934c1a12002-07-02 22:24:50 +00001010to_int(int c)
Barry Warsawe977c212000-08-15 06:07:13 +00001011{
1012 if (isdigit(c))
1013 return c - '0';
1014 else {
1015 if (isupper(c))
1016 c = tolower(c);
1017 if (c >= 'a' && c <= 'f')
1018 return c - 'a' + 10;
1019 }
1020 return -1;
1021}
1022
1023
1024static PyObject *
1025binascii_unhexlify(PyObject *self, PyObject *args)
1026{
1027 char* argbuf;
Thomas Woutersf98db652006-03-01 21:37:32 +00001028 Py_ssize_t arglen;
Barry Warsawe977c212000-08-15 06:07:13 +00001029 PyObject *retval;
1030 char* retbuf;
Thomas Woutersf98db652006-03-01 21:37:32 +00001031 Py_ssize_t i, j;
Barry Warsawe977c212000-08-15 06:07:13 +00001032
1033 if (!PyArg_ParseTuple(args, "s#:a2b_hex", &argbuf, &arglen))
1034 return NULL;
1035
Amaury Forgeot d'Arc9c74b142008-06-18 00:47:36 +00001036 assert(arglen >= 0);
1037
Barry Warsaw16168472000-08-15 06:59:58 +00001038 /* XXX What should we do about strings with an odd length? Should
1039 * we add an implicit leading zero, or a trailing zero? For now,
1040 * raise an exception.
Barry Warsawe977c212000-08-15 06:07:13 +00001041 */
1042 if (arglen % 2) {
Guido van Rossum4581ae52007-05-22 21:56:47 +00001043 PyErr_SetString(Error, "Odd-length string");
Barry Warsawe977c212000-08-15 06:07:13 +00001044 return NULL;
1045 }
1046
Christian Heimes72b710a2008-05-26 13:28:38 +00001047 retval = PyBytes_FromStringAndSize(NULL, (arglen/2));
Barry Warsawe977c212000-08-15 06:07:13 +00001048 if (!retval)
1049 return NULL;
Christian Heimes72b710a2008-05-26 13:28:38 +00001050 retbuf = PyBytes_AS_STRING(retval);
Barry Warsawe977c212000-08-15 06:07:13 +00001051
1052 for (i=j=0; i < arglen; i += 2) {
1053 int top = to_int(Py_CHARMASK(argbuf[i]));
1054 int bot = to_int(Py_CHARMASK(argbuf[i+1]));
1055 if (top == -1 || bot == -1) {
Guido van Rossum4581ae52007-05-22 21:56:47 +00001056 PyErr_SetString(Error,
Barry Warsaw16168472000-08-15 06:59:58 +00001057 "Non-hexadecimal digit found");
Barry Warsawe977c212000-08-15 06:07:13 +00001058 goto finally;
1059 }
1060 retbuf[j++] = (top << 4) + bot;
1061 }
1062 return retval;
1063
1064 finally:
1065 Py_DECREF(retval);
1066 return NULL;
1067}
1068
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001069PyDoc_STRVAR(doc_unhexlify,
Barry Warsawe977c212000-08-15 06:07:13 +00001070"a2b_hex(hexstr) -> s; Binary data of hexadecimal representation.\n\
1071\n\
1072hexstr must contain an even number of hex digits (upper or lower case).\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001073This function is also available as \"unhexlify()\"");
Barry Warsawe977c212000-08-15 06:07:13 +00001074
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001075static int table_hex[128] = {
1076 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1077 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1078 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1079 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,-1,-1, -1,-1,-1,-1,
1080 -1,10,11,12, 13,14,15,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1081 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1082 -1,10,11,12, 13,14,15,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1083 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1
1084};
1085
1086#define hexval(c) table_hex[(unsigned int)(c)]
1087
1088#define MAXLINESIZE 76
1089
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001090PyDoc_STRVAR(doc_a2b_qp, "Decode a string of qp-encoded data");
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001091
Tim Peters934c1a12002-07-02 22:24:50 +00001092static PyObject*
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001093binascii_a2b_qp(PyObject *self, PyObject *args, PyObject *kwargs)
1094{
Thomas Wouters7087f782006-03-01 23:10:05 +00001095 Py_ssize_t in, out;
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001096 char ch;
1097 unsigned char *data, *odata;
Thomas Woutersf98db652006-03-01 21:37:32 +00001098 Py_ssize_t datalen = 0;
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001099 PyObject *rv;
Martin v. Löwis15e62742006-02-27 16:46:16 +00001100 static char *kwlist[] = {"data", "header", NULL};
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001101 int header = 0;
1102
Tim Peters934c1a12002-07-02 22:24:50 +00001103 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s#|i", kwlist, &data,
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001104 &datalen, &header))
1105 return NULL;
1106
Barry Warsaw23164a52004-05-11 02:05:11 +00001107 /* We allocate the output same size as input, this is overkill.
1108 * The previous implementation used calloc() so we'll zero out the
1109 * memory here too, since PyMem_Malloc() does not guarantee that.
1110 */
1111 odata = (unsigned char *) PyMem_Malloc(datalen);
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001112 if (odata == NULL) {
1113 PyErr_NoMemory();
1114 return NULL;
1115 }
Martin v. Löwisb600fe92004-07-27 15:03:53 +00001116 memset(odata, 0, datalen);
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001117
1118 in = out = 0;
1119 while (in < datalen) {
1120 if (data[in] == '=') {
1121 in++;
1122 if (in >= datalen) break;
1123 /* Soft line breaks */
Thomas Wouters89f507f2006-12-13 04:49:30 +00001124 if ((data[in] == '\n') || (data[in] == '\r')) {
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001125 if (data[in] != '\n') {
1126 while (in < datalen && data[in] != '\n') in++;
1127 }
1128 if (in < datalen) in++;
1129 }
1130 else if (data[in] == '=') {
1131 /* broken case from broken python qp */
1132 odata[out++] = '=';
1133 in++;
1134 }
Tim Peters934c1a12002-07-02 22:24:50 +00001135 else if (((data[in] >= 'A' && data[in] <= 'F') ||
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001136 (data[in] >= 'a' && data[in] <= 'f') ||
1137 (data[in] >= '0' && data[in] <= '9')) &&
1138 ((data[in+1] >= 'A' && data[in+1] <= 'F') ||
1139 (data[in+1] >= 'a' && data[in+1] <= 'f') ||
1140 (data[in+1] >= '0' && data[in+1] <= '9'))) {
1141 /* hexval */
1142 ch = hexval(data[in]) << 4;
1143 in++;
1144 ch |= hexval(data[in]);
1145 in++;
1146 odata[out++] = ch;
1147 }
1148 else {
1149 odata[out++] = '=';
1150 }
1151 }
1152 else if (header && data[in] == '_') {
1153 odata[out++] = ' ';
1154 in++;
1155 }
1156 else {
1157 odata[out] = data[in];
1158 in++;
1159 out++;
1160 }
1161 }
Christian Heimes72b710a2008-05-26 13:28:38 +00001162 if ((rv = PyBytes_FromStringAndSize((char *)odata, out)) == NULL) {
Barry Warsaw23164a52004-05-11 02:05:11 +00001163 PyMem_Free(odata);
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001164 return NULL;
1165 }
Barry Warsaw23164a52004-05-11 02:05:11 +00001166 PyMem_Free(odata);
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001167 return rv;
1168}
1169
Tim Peters934c1a12002-07-02 22:24:50 +00001170static int
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001171to_hex (unsigned char ch, unsigned char *s)
1172{
1173 unsigned int uvalue = ch;
1174
1175 s[1] = "0123456789ABCDEF"[uvalue % 16];
1176 uvalue = (uvalue / 16);
1177 s[0] = "0123456789ABCDEF"[uvalue % 16];
1178 return 0;
1179}
1180
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001181PyDoc_STRVAR(doc_b2a_qp,
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001182"b2a_qp(data, quotetabs=0, istext=1, header=0) -> s; \n\
1183 Encode a string using quoted-printable encoding. \n\
1184\n\
1185On encoding, when istext is set, newlines are not encoded, and white \n\
1186space at end of lines is. When istext is not set, \\r and \\n (CR/LF) are \n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001187both encoded. When quotetabs is set, space and tabs are encoded.");
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001188
1189/* XXX: This is ridiculously complicated to be backward compatible
1190 * (mostly) with the quopri module. It doesn't re-create the quopri
1191 * module bug where text ending in CRLF has the CR encoded */
Tim Peters934c1a12002-07-02 22:24:50 +00001192static PyObject*
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001193binascii_b2a_qp (PyObject *self, PyObject *args, PyObject *kwargs)
1194{
Thomas Wouters7087f782006-03-01 23:10:05 +00001195 Py_ssize_t in, out;
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001196 unsigned char *data, *odata;
Thomas Woutersf98db652006-03-01 21:37:32 +00001197 Py_ssize_t datalen = 0, odatalen = 0;
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001198 PyObject *rv;
1199 unsigned int linelen = 0;
Martin v. Löwis15e62742006-02-27 16:46:16 +00001200 static char *kwlist[] = {"data", "quotetabs", "istext",
Jeremy Hyltonaf68c872005-12-10 18:50:16 +00001201 "header", NULL};
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001202 int istext = 1;
1203 int quotetabs = 0;
1204 int header = 0;
1205 unsigned char ch;
1206 int crlf = 0;
1207 unsigned char *p;
1208
Tim Peters934c1a12002-07-02 22:24:50 +00001209 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s#|iii", kwlist, &data,
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001210 &datalen, &quotetabs, &istext, &header))
1211 return NULL;
1212
1213 /* See if this string is using CRLF line ends */
1214 /* XXX: this function has the side effect of converting all of
1215 * the end of lines to be the same depending on this detection
1216 * here */
Walter Dörwald0925e412007-05-09 18:23:50 +00001217 p = (unsigned char *) memchr(data, '\n', datalen);
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001218 if ((p != NULL) && (p > data) && (*(p-1) == '\r'))
1219 crlf = 1;
1220
1221 /* First, scan to see how many characters need to be encoded */
1222 in = 0;
1223 while (in < datalen) {
Tim Peters934c1a12002-07-02 22:24:50 +00001224 if ((data[in] > 126) ||
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001225 (data[in] == '=') ||
1226 (header && data[in] == '_') ||
Guido van Rossumd8faa362007-04-27 19:54:29 +00001227 ((data[in] == '.') && (linelen == 0) &&
1228 (data[in+1] == '\n' || data[in+1] == '\r' || data[in+1] == 0)) ||
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001229 (!istext && ((data[in] == '\r') || (data[in] == '\n'))) ||
1230 ((data[in] == '\t' || data[in] == ' ') && (in + 1 == datalen)) ||
Tim Peters934c1a12002-07-02 22:24:50 +00001231 ((data[in] < 33) &&
1232 (data[in] != '\r') && (data[in] != '\n') &&
Guido van Rossumd8faa362007-04-27 19:54:29 +00001233 (quotetabs ||
1234 (!quotetabs && ((data[in] != '\t') && (data[in] != ' '))))))
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001235 {
1236 if ((linelen + 3) >= MAXLINESIZE) {
1237 linelen = 0;
1238 if (crlf)
1239 odatalen += 3;
1240 else
1241 odatalen += 2;
1242 }
1243 linelen += 3;
1244 odatalen += 3;
1245 in++;
1246 }
1247 else {
Tim Peters934c1a12002-07-02 22:24:50 +00001248 if (istext &&
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001249 ((data[in] == '\n') ||
1250 ((in+1 < datalen) && (data[in] == '\r') &&
1251 (data[in+1] == '\n'))))
1252 {
1253 linelen = 0;
1254 /* Protect against whitespace on end of line */
1255 if (in && ((data[in-1] == ' ') || (data[in-1] == '\t')))
1256 odatalen += 2;
1257 if (crlf)
1258 odatalen += 2;
1259 else
1260 odatalen += 1;
1261 if (data[in] == '\r')
1262 in += 2;
1263 else
1264 in++;
1265 }
1266 else {
Tim Peters934c1a12002-07-02 22:24:50 +00001267 if ((in + 1 != datalen) &&
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001268 (data[in+1] != '\n') &&
1269 (linelen + 1) >= MAXLINESIZE) {
1270 linelen = 0;
1271 if (crlf)
1272 odatalen += 3;
1273 else
1274 odatalen += 2;
1275 }
1276 linelen++;
1277 odatalen++;
1278 in++;
1279 }
1280 }
1281 }
1282
Barry Warsaw23164a52004-05-11 02:05:11 +00001283 /* We allocate the output same size as input, this is overkill.
1284 * The previous implementation used calloc() so we'll zero out the
1285 * memory here too, since PyMem_Malloc() does not guarantee that.
1286 */
1287 odata = (unsigned char *) PyMem_Malloc(odatalen);
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001288 if (odata == NULL) {
1289 PyErr_NoMemory();
1290 return NULL;
1291 }
Martin v. Löwisb600fe92004-07-27 15:03:53 +00001292 memset(odata, 0, odatalen);
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001293
1294 in = out = linelen = 0;
1295 while (in < datalen) {
Tim Peters934c1a12002-07-02 22:24:50 +00001296 if ((data[in] > 126) ||
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001297 (data[in] == '=') ||
1298 (header && data[in] == '_') ||
Guido van Rossumd8faa362007-04-27 19:54:29 +00001299 ((data[in] == '.') && (linelen == 0) &&
1300 (data[in+1] == '\n' || data[in+1] == '\r' || data[in+1] == 0)) ||
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001301 (!istext && ((data[in] == '\r') || (data[in] == '\n'))) ||
1302 ((data[in] == '\t' || data[in] == ' ') && (in + 1 == datalen)) ||
Tim Peters934c1a12002-07-02 22:24:50 +00001303 ((data[in] < 33) &&
1304 (data[in] != '\r') && (data[in] != '\n') &&
Guido van Rossumd8faa362007-04-27 19:54:29 +00001305 (quotetabs ||
1306 (!quotetabs && ((data[in] != '\t') && (data[in] != ' '))))))
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001307 {
1308 if ((linelen + 3 )>= MAXLINESIZE) {
1309 odata[out++] = '=';
1310 if (crlf) odata[out++] = '\r';
1311 odata[out++] = '\n';
1312 linelen = 0;
1313 }
1314 odata[out++] = '=';
1315 to_hex(data[in], &odata[out]);
1316 out += 2;
1317 in++;
1318 linelen += 3;
1319 }
1320 else {
Tim Peters934c1a12002-07-02 22:24:50 +00001321 if (istext &&
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001322 ((data[in] == '\n') ||
1323 ((in+1 < datalen) && (data[in] == '\r') &&
1324 (data[in+1] == '\n'))))
1325 {
1326 linelen = 0;
1327 /* Protect against whitespace on end of line */
1328 if (out && ((odata[out-1] == ' ') || (odata[out-1] == '\t'))) {
1329 ch = odata[out-1];
1330 odata[out-1] = '=';
1331 to_hex(ch, &odata[out]);
1332 out += 2;
1333 }
Tim Peters934c1a12002-07-02 22:24:50 +00001334
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001335 if (crlf) odata[out++] = '\r';
1336 odata[out++] = '\n';
1337 if (data[in] == '\r')
1338 in += 2;
1339 else
1340 in++;
1341 }
1342 else {
Tim Peters934c1a12002-07-02 22:24:50 +00001343 if ((in + 1 != datalen) &&
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001344 (data[in+1] != '\n') &&
1345 (linelen + 1) >= MAXLINESIZE) {
1346 odata[out++] = '=';
1347 if (crlf) odata[out++] = '\r';
1348 odata[out++] = '\n';
1349 linelen = 0;
1350 }
1351 linelen++;
1352 if (header && data[in] == ' ') {
1353 odata[out++] = '_';
1354 in++;
1355 }
1356 else {
1357 odata[out++] = data[in++];
1358 }
1359 }
1360 }
1361 }
Christian Heimes72b710a2008-05-26 13:28:38 +00001362 if ((rv = PyBytes_FromStringAndSize((char *)odata, out)) == NULL) {
Barry Warsaw23164a52004-05-11 02:05:11 +00001363 PyMem_Free(odata);
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001364 return NULL;
1365 }
Barry Warsaw23164a52004-05-11 02:05:11 +00001366 PyMem_Free(odata);
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001367 return rv;
1368}
Barry Warsawe977c212000-08-15 06:07:13 +00001369
Jack Jansen72781191995-08-07 14:34:15 +00001370/* List of functions defined in the module */
1371
1372static struct PyMethodDef binascii_module_methods[] = {
Barry Warsawe977c212000-08-15 06:07:13 +00001373 {"a2b_uu", binascii_a2b_uu, METH_VARARGS, doc_a2b_uu},
1374 {"b2a_uu", binascii_b2a_uu, METH_VARARGS, doc_b2a_uu},
1375 {"a2b_base64", binascii_a2b_base64, METH_VARARGS, doc_a2b_base64},
1376 {"b2a_base64", binascii_b2a_base64, METH_VARARGS, doc_b2a_base64},
1377 {"a2b_hqx", binascii_a2b_hqx, METH_VARARGS, doc_a2b_hqx},
1378 {"b2a_hqx", binascii_b2a_hqx, METH_VARARGS, doc_b2a_hqx},
1379 {"b2a_hex", binascii_hexlify, METH_VARARGS, doc_hexlify},
1380 {"a2b_hex", binascii_unhexlify, METH_VARARGS, doc_unhexlify},
1381 {"hexlify", binascii_hexlify, METH_VARARGS, doc_hexlify},
1382 {"unhexlify", binascii_unhexlify, METH_VARARGS, doc_unhexlify},
1383 {"rlecode_hqx", binascii_rlecode_hqx, METH_VARARGS, doc_rlecode_hqx},
1384 {"rledecode_hqx", binascii_rledecode_hqx, METH_VARARGS,
1385 doc_rledecode_hqx},
1386 {"crc_hqx", binascii_crc_hqx, METH_VARARGS, doc_crc_hqx},
1387 {"crc32", binascii_crc32, METH_VARARGS, doc_crc32},
Tim Peters934c1a12002-07-02 22:24:50 +00001388 {"a2b_qp", (PyCFunction)binascii_a2b_qp, METH_VARARGS | METH_KEYWORDS,
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001389 doc_a2b_qp},
Tim Peters934c1a12002-07-02 22:24:50 +00001390 {"b2a_qp", (PyCFunction)binascii_b2a_qp, METH_VARARGS | METH_KEYWORDS,
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001391 doc_b2a_qp},
Barry Warsawe977c212000-08-15 06:07:13 +00001392 {NULL, NULL} /* sentinel */
Jack Jansen72781191995-08-07 14:34:15 +00001393};
1394
1395
Martin v. Löwis1a214512008-06-11 05:26:20 +00001396/* Initialization function for the module (*must* be called PyInit_binascii) */
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001397PyDoc_STRVAR(doc_binascii, "Conversion between binary data and ASCII");
Jack Jansen72781191995-08-07 14:34:15 +00001398
Martin v. Löwis1a214512008-06-11 05:26:20 +00001399
1400static struct PyModuleDef binasciimodule = {
1401 PyModuleDef_HEAD_INIT,
1402 "binascii",
1403 doc_binascii,
1404 -1,
1405 binascii_module_methods,
1406 NULL,
1407 NULL,
1408 NULL,
1409 NULL
1410};
1411
Mark Hammondfe51c6d2002-08-02 02:27:13 +00001412PyMODINIT_FUNC
Martin v. Löwis1a214512008-06-11 05:26:20 +00001413PyInit_binascii(void)
Jack Jansen72781191995-08-07 14:34:15 +00001414{
Guido van Rossumfe096d22007-08-23 18:31:24 +00001415 PyObject *m, *d;
Jack Jansen72781191995-08-07 14:34:15 +00001416
1417 /* Create the module and add the functions */
Martin v. Löwis1a214512008-06-11 05:26:20 +00001418 m = PyModule_Create(&binasciimodule);
Neal Norwitz1ac754f2006-01-19 06:09:39 +00001419 if (m == NULL)
Martin v. Löwis1a214512008-06-11 05:26:20 +00001420 return NULL;
Jack Jansen72781191995-08-07 14:34:15 +00001421
1422 d = PyModule_GetDict(m);
Jack Jansen72781191995-08-07 14:34:15 +00001423
Guido van Rossum4581ae52007-05-22 21:56:47 +00001424 Error = PyErr_NewException("binascii.Error", PyExc_ValueError, NULL);
Jack Jansen72781191995-08-07 14:34:15 +00001425 PyDict_SetItemString(d, "Error", Error);
Guido van Rossum7dbb48a1997-10-08 15:26:07 +00001426 Incomplete = PyErr_NewException("binascii.Incomplete", NULL, NULL);
Jack Jansen72781191995-08-07 14:34:15 +00001427 PyDict_SetItemString(d, "Incomplete", Incomplete);
Martin v. Löwis1a214512008-06-11 05:26:20 +00001428 if (PyErr_Occurred()) {
1429 Py_DECREF(m);
1430 m = NULL;
1431 }
1432 return m;
Jack Jansen72781191995-08-07 14:34:15 +00001433}