blob: dc4fef542de4192b09508a71df0d0d13dd878b80 [file] [log] [blame]
Jack Jansen72781191995-08-07 14:34:15 +00001/*
2** Routines to represent binary data in ASCII and vice-versa
3**
4** This module currently supports the following encodings:
5** uuencode:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00006** each line encodes 45 bytes (except possibly the last)
7** First char encodes (binary) length, rest data
8** each char encodes 6 bits, as follows:
9** binary: 01234567 abcdefgh ijklmnop
10** ascii: 012345 67abcd efghij klmnop
11** ASCII encoding method is "excess-space": 000000 is encoded as ' ', etc.
12** short binary data is zero-extended (so the bits are always in the
13** right place), this does *not* reflect in the length.
Jack Jansen84bbc2e1995-10-04 16:38:44 +000014** base64:
15** Line breaks are insignificant, but lines are at most 76 chars
16** each char encodes 6 bits, in similar order as uucode/hqx. Encoding
17** is done via a table.
18** Short binary data is filled (in ASCII) with '='.
Jack Jansen72781191995-08-07 14:34:15 +000019** hqx:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000020** File starts with introductory text, real data starts and ends
21** with colons.
22** Data consists of three similar parts: info, datafork, resourcefork.
23** Each part is protected (at the end) with a 16-bit crc
24** The binary data is run-length encoded, and then ascii-fied:
25** binary: 01234567 abcdefgh ijklmnop
26** ascii: 012345 67abcd efghij klmnop
27** ASCII encoding is table-driven, see the code.
28** Short binary data results in the runt ascii-byte being output with
29** the bits in the right place.
Jack Jansen72781191995-08-07 14:34:15 +000030**
31** While I was reading dozens of programs that encode or decode the formats
32** here (documentation? hihi:-) I have formulated Jansen's Observation:
33**
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000034** Programs that encode binary data in ASCII are written in
35** such a style that they are as unreadable as possible. Devices used
36** include unnecessary global variables, burying important tables
37** in unrelated sourcefiles, putting functions in include files,
38** using seemingly-descriptive variable names for different purposes,
39** calls to empty subroutines and a host of others.
Jack Jansen72781191995-08-07 14:34:15 +000040**
41** I have attempted to break with this tradition, but I guess that that
42** does make the performance sub-optimal. Oh well, too bad...
43**
44** Jack Jansen, CWI, July 1995.
Tim Peters934c1a12002-07-02 22:24:50 +000045**
Martin v. Löwis16dc7f42001-09-30 20:32:11 +000046** Added support for quoted-printable encoding, based on rfc 1521 et al
Tim Peters934c1a12002-07-02 22:24:50 +000047** quoted-printable encoding specifies that non printable characters (anything
Martin v. Löwis16dc7f42001-09-30 20:32:11 +000048** below 32 and above 126) be encoded as =XX where XX is the hexadecimal value
49** of the character. It also specifies some other behavior to enable 8bit data
Tim Peters934c1a12002-07-02 22:24:50 +000050** in a mail message with little difficulty (maximum line sizes, protecting
51** some cases of whitespace, etc).
Martin v. Löwis16dc7f42001-09-30 20:32:11 +000052**
53** Brandon Long, September 2001.
Jack Jansen72781191995-08-07 14:34:15 +000054*/
55
Thomas Wouters9c544482006-03-01 21:59:44 +000056#define PY_SSIZE_T_CLEAN
Jack Jansen72781191995-08-07 14:34:15 +000057
58#include "Python.h"
Christian Heimes1dc54002008-03-24 02:19:29 +000059#ifdef USE_ZLIB_CRC32
60#include "zlib.h"
61#endif
Jack Jansen72781191995-08-07 14:34:15 +000062
63static PyObject *Error;
64static PyObject *Incomplete;
65
66/*
67** hqx lookup table, ascii->binary.
68*/
69
70#define RUNCHAR 0x90
71
72#define DONE 0x7F
73#define SKIP 0x7E
74#define FAIL 0x7D
75
76static unsigned char table_a2b_hqx[256] = {
77/* ^@ ^A ^B ^C ^D ^E ^F ^G */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000078/* 0*/ FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
Jack Jansen72781191995-08-07 14:34:15 +000079/* \b \t \n ^K ^L \r ^N ^O */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000080/* 1*/ FAIL, FAIL, SKIP, FAIL, FAIL, SKIP, FAIL, FAIL,
Jack Jansen72781191995-08-07 14:34:15 +000081/* ^P ^Q ^R ^S ^T ^U ^V ^W */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000082/* 2*/ FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
Jack Jansen72781191995-08-07 14:34:15 +000083/* ^X ^Y ^Z ^[ ^\ ^] ^^ ^_ */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000084/* 3*/ FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
Jack Jansen72781191995-08-07 14:34:15 +000085/* ! " # $ % & ' */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000086/* 4*/ FAIL, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
Jack Jansen72781191995-08-07 14:34:15 +000087/* ( ) * + , - . / */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000088/* 5*/ 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, FAIL, FAIL,
Jack Jansen72781191995-08-07 14:34:15 +000089/* 0 1 2 3 4 5 6 7 */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000090/* 6*/ 0x0D, 0x0E, 0x0F, 0x10, 0x11, 0x12, 0x13, FAIL,
Jack Jansen72781191995-08-07 14:34:15 +000091/* 8 9 : ; < = > ? */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000092/* 7*/ 0x14, 0x15, DONE, FAIL, FAIL, FAIL, FAIL, FAIL,
Jack Jansen72781191995-08-07 14:34:15 +000093/* @ A B C D E F G */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000094/* 8*/ 0x16, 0x17, 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D,
Jack Jansen72781191995-08-07 14:34:15 +000095/* H I J K L M N O */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000096/* 9*/ 0x1E, 0x1F, 0x20, 0x21, 0x22, 0x23, 0x24, FAIL,
Jack Jansen72781191995-08-07 14:34:15 +000097/* P Q R S T U V W */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000098/*10*/ 0x25, 0x26, 0x27, 0x28, 0x29, 0x2A, 0x2B, FAIL,
Jack Jansen72781191995-08-07 14:34:15 +000099/* X Y Z [ \ ] ^ _ */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000100/*11*/ 0x2C, 0x2D, 0x2E, 0x2F, FAIL, FAIL, FAIL, FAIL,
Jack Jansen72781191995-08-07 14:34:15 +0000101/* ` a b c d e f g */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000102/*12*/ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, FAIL,
Jack Jansen72781191995-08-07 14:34:15 +0000103/* h i j k l m n o */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000104/*13*/ 0x37, 0x38, 0x39, 0x3A, 0x3B, 0x3C, FAIL, FAIL,
Jack Jansen72781191995-08-07 14:34:15 +0000105/* p q r s t u v w */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000106/*14*/ 0x3D, 0x3E, 0x3F, FAIL, FAIL, FAIL, FAIL, FAIL,
Jack Jansen72781191995-08-07 14:34:15 +0000107/* x y z { | } ~ ^? */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000108/*15*/ FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
109/*16*/ FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
110 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
111 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
112 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
113 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
114 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
115 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
116 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
117 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
118 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
119 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
120 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
121 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
122 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
123 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
124 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
Jack Jansen72781191995-08-07 14:34:15 +0000125};
126
127static unsigned char table_b2a_hqx[] =
Roger E. Masse5f4ce181997-01-16 17:10:22 +0000128"!\"#$%&'()*+,-012345689@ABCDEFGHIJKLMNPQRSTUVXYZ[`abcdefhijklmpqr";
Jack Jansen72781191995-08-07 14:34:15 +0000129
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000130static char table_a2b_base64[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000131 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
132 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
133 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,62, -1,-1,-1,63,
134 52,53,54,55, 56,57,58,59, 60,61,-1,-1, -1, 0,-1,-1, /* Note PAD->0 */
135 -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10, 11,12,13,14,
136 15,16,17,18, 19,20,21,22, 23,24,25,-1, -1,-1,-1,-1,
137 -1,26,27,28, 29,30,31,32, 33,34,35,36, 37,38,39,40,
138 41,42,43,44, 45,46,47,48, 49,50,51,-1, -1,-1,-1,-1
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000139};
140
141#define BASE64_PAD '='
Guido van Rossum355bc0c2001-10-30 03:00:52 +0000142
143/* Max binary chunk size; limited only by available memory */
Guido van Rossum0e225aa2007-05-22 20:24:57 +0000144#define BASE64_MAXBIN ((PY_SSIZE_T_MAX - 3) / 2)
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000145
146static unsigned char table_b2a_base64[] =
Roger E. Masse5f4ce181997-01-16 17:10:22 +0000147"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000148
149
150
Jack Jansen72781191995-08-07 14:34:15 +0000151static unsigned short crctab_hqx[256] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000152 0x0000, 0x1021, 0x2042, 0x3063, 0x4084, 0x50a5, 0x60c6, 0x70e7,
153 0x8108, 0x9129, 0xa14a, 0xb16b, 0xc18c, 0xd1ad, 0xe1ce, 0xf1ef,
154 0x1231, 0x0210, 0x3273, 0x2252, 0x52b5, 0x4294, 0x72f7, 0x62d6,
155 0x9339, 0x8318, 0xb37b, 0xa35a, 0xd3bd, 0xc39c, 0xf3ff, 0xe3de,
156 0x2462, 0x3443, 0x0420, 0x1401, 0x64e6, 0x74c7, 0x44a4, 0x5485,
157 0xa56a, 0xb54b, 0x8528, 0x9509, 0xe5ee, 0xf5cf, 0xc5ac, 0xd58d,
158 0x3653, 0x2672, 0x1611, 0x0630, 0x76d7, 0x66f6, 0x5695, 0x46b4,
159 0xb75b, 0xa77a, 0x9719, 0x8738, 0xf7df, 0xe7fe, 0xd79d, 0xc7bc,
160 0x48c4, 0x58e5, 0x6886, 0x78a7, 0x0840, 0x1861, 0x2802, 0x3823,
161 0xc9cc, 0xd9ed, 0xe98e, 0xf9af, 0x8948, 0x9969, 0xa90a, 0xb92b,
162 0x5af5, 0x4ad4, 0x7ab7, 0x6a96, 0x1a71, 0x0a50, 0x3a33, 0x2a12,
163 0xdbfd, 0xcbdc, 0xfbbf, 0xeb9e, 0x9b79, 0x8b58, 0xbb3b, 0xab1a,
164 0x6ca6, 0x7c87, 0x4ce4, 0x5cc5, 0x2c22, 0x3c03, 0x0c60, 0x1c41,
165 0xedae, 0xfd8f, 0xcdec, 0xddcd, 0xad2a, 0xbd0b, 0x8d68, 0x9d49,
166 0x7e97, 0x6eb6, 0x5ed5, 0x4ef4, 0x3e13, 0x2e32, 0x1e51, 0x0e70,
167 0xff9f, 0xefbe, 0xdfdd, 0xcffc, 0xbf1b, 0xaf3a, 0x9f59, 0x8f78,
168 0x9188, 0x81a9, 0xb1ca, 0xa1eb, 0xd10c, 0xc12d, 0xf14e, 0xe16f,
169 0x1080, 0x00a1, 0x30c2, 0x20e3, 0x5004, 0x4025, 0x7046, 0x6067,
170 0x83b9, 0x9398, 0xa3fb, 0xb3da, 0xc33d, 0xd31c, 0xe37f, 0xf35e,
171 0x02b1, 0x1290, 0x22f3, 0x32d2, 0x4235, 0x5214, 0x6277, 0x7256,
172 0xb5ea, 0xa5cb, 0x95a8, 0x8589, 0xf56e, 0xe54f, 0xd52c, 0xc50d,
173 0x34e2, 0x24c3, 0x14a0, 0x0481, 0x7466, 0x6447, 0x5424, 0x4405,
174 0xa7db, 0xb7fa, 0x8799, 0x97b8, 0xe75f, 0xf77e, 0xc71d, 0xd73c,
175 0x26d3, 0x36f2, 0x0691, 0x16b0, 0x6657, 0x7676, 0x4615, 0x5634,
176 0xd94c, 0xc96d, 0xf90e, 0xe92f, 0x99c8, 0x89e9, 0xb98a, 0xa9ab,
177 0x5844, 0x4865, 0x7806, 0x6827, 0x18c0, 0x08e1, 0x3882, 0x28a3,
178 0xcb7d, 0xdb5c, 0xeb3f, 0xfb1e, 0x8bf9, 0x9bd8, 0xabbb, 0xbb9a,
179 0x4a75, 0x5a54, 0x6a37, 0x7a16, 0x0af1, 0x1ad0, 0x2ab3, 0x3a92,
180 0xfd2e, 0xed0f, 0xdd6c, 0xcd4d, 0xbdaa, 0xad8b, 0x9de8, 0x8dc9,
181 0x7c26, 0x6c07, 0x5c64, 0x4c45, 0x3ca2, 0x2c83, 0x1ce0, 0x0cc1,
182 0xef1f, 0xff3e, 0xcf5d, 0xdf7c, 0xaf9b, 0xbfba, 0x8fd9, 0x9ff8,
183 0x6e17, 0x7e36, 0x4e55, 0x5e74, 0x2e93, 0x3eb2, 0x0ed1, 0x1ef0,
Jack Jansen72781191995-08-07 14:34:15 +0000184};
185
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000186PyDoc_STRVAR(doc_a2b_uu, "(ascii) -> bin. Decode a line of uuencoded data");
Jack Jansen72781191995-08-07 14:34:15 +0000187
188static PyObject *
Peter Schneider-Kampa788a7f2000-07-10 09:57:19 +0000189binascii_a2b_uu(PyObject *self, PyObject *args)
Jack Jansen72781191995-08-07 14:34:15 +0000190{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000191 Py_buffer pascii;
192 unsigned char *ascii_data, *bin_data;
193 int leftbits = 0;
194 unsigned char this_ch;
195 unsigned int leftchar = 0;
196 PyObject *rv;
197 Py_ssize_t ascii_len, bin_len;
Tim Peters934c1a12002-07-02 22:24:50 +0000198
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000199 if ( !PyArg_ParseTuple(args, "y*:a2b_uu", &pascii) )
200 return NULL;
201 ascii_data = pascii.buf;
202 ascii_len = pascii.len;
Jack Jansen72781191995-08-07 14:34:15 +0000203
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000204 assert(ascii_len >= 0);
Amaury Forgeot d'Arc9c74b142008-06-18 00:47:36 +0000205
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000206 /* First byte: binary data length (in bytes) */
207 bin_len = (*ascii_data++ - ' ') & 077;
208 ascii_len--;
Jack Jansen72781191995-08-07 14:34:15 +0000209
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000210 /* Allocate the buffer */
211 if ( (rv=PyBytes_FromStringAndSize(NULL, bin_len)) == NULL ) {
212 PyBuffer_Release(&pascii);
213 return NULL;
214 }
215 bin_data = (unsigned char *)PyBytes_AS_STRING(rv);
Tim Peters934c1a12002-07-02 22:24:50 +0000216
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000217 for( ; bin_len > 0 ; ascii_len--, ascii_data++ ) {
218 /* XXX is it really best to add NULs if there's no more data */
219 this_ch = (ascii_len > 0) ? *ascii_data : 0;
220 if ( this_ch == '\n' || this_ch == '\r' || ascii_len <= 0) {
221 /*
222 ** Whitespace. Assume some spaces got eaten at
223 ** end-of-line. (We check this later)
224 */
225 this_ch = 0;
226 } else {
227 /* Check the character for legality
228 ** The 64 in stead of the expected 63 is because
229 ** there are a few uuencodes out there that use
230 ** '`' as zero instead of space.
231 */
232 if ( this_ch < ' ' || this_ch > (' ' + 64)) {
233 PyErr_SetString(Error, "Illegal char");
234 PyBuffer_Release(&pascii);
235 Py_DECREF(rv);
236 return NULL;
237 }
238 this_ch = (this_ch - ' ') & 077;
239 }
240 /*
241 ** Shift it in on the low end, and see if there's
242 ** a byte ready for output.
243 */
244 leftchar = (leftchar << 6) | (this_ch);
245 leftbits += 6;
246 if ( leftbits >= 8 ) {
247 leftbits -= 8;
248 *bin_data++ = (leftchar >> leftbits) & 0xff;
249 leftchar &= ((1 << leftbits) - 1);
250 bin_len--;
251 }
252 }
253 /*
254 ** Finally, check that if there's anything left on the line
255 ** that it's whitespace only.
256 */
257 while( ascii_len-- > 0 ) {
258 this_ch = *ascii_data++;
259 /* Extra '`' may be written as padding in some cases */
260 if ( this_ch != ' ' && this_ch != ' '+64 &&
261 this_ch != '\n' && this_ch != '\r' ) {
262 PyErr_SetString(Error, "Trailing garbage");
263 PyBuffer_Release(&pascii);
264 Py_DECREF(rv);
265 return NULL;
266 }
267 }
268 PyBuffer_Release(&pascii);
269 return rv;
Jack Jansen72781191995-08-07 14:34:15 +0000270}
271
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000272PyDoc_STRVAR(doc_b2a_uu, "(bin) -> ascii. Uuencode line of data");
Tim Peters934c1a12002-07-02 22:24:50 +0000273
Jack Jansen72781191995-08-07 14:34:15 +0000274static PyObject *
Peter Schneider-Kampa788a7f2000-07-10 09:57:19 +0000275binascii_b2a_uu(PyObject *self, PyObject *args)
Jack Jansen72781191995-08-07 14:34:15 +0000276{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000277 Py_buffer pbin;
278 unsigned char *ascii_data, *bin_data;
279 int leftbits = 0;
280 unsigned char this_ch;
281 unsigned int leftchar = 0;
282 PyObject *rv;
283 Py_ssize_t bin_len;
Tim Peters934c1a12002-07-02 22:24:50 +0000284
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000285 if ( !PyArg_ParseTuple(args, "y*:b2a_uu", &pbin) )
286 return NULL;
287 bin_data = pbin.buf;
288 bin_len = pbin.len;
289 if ( bin_len > 45 ) {
290 /* The 45 is a limit that appears in all uuencode's */
291 PyErr_SetString(Error, "At most 45 bytes at once");
292 PyBuffer_Release(&pbin);
293 return NULL;
294 }
Jack Jansen72781191995-08-07 14:34:15 +0000295
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000296 /* We're lazy and allocate to much (fixed up later) */
297 if ( (rv=PyBytes_FromStringAndSize(NULL, 2 + (bin_len+2)/3*4)) == NULL ) {
298 PyBuffer_Release(&pbin);
299 return NULL;
300 }
301 ascii_data = (unsigned char *)PyBytes_AS_STRING(rv);
Jack Jansen72781191995-08-07 14:34:15 +0000302
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000303 /* Store the length */
304 *ascii_data++ = ' ' + (bin_len & 077);
Tim Peters934c1a12002-07-02 22:24:50 +0000305
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000306 for( ; bin_len > 0 || leftbits != 0 ; bin_len--, bin_data++ ) {
307 /* Shift the data (or padding) into our buffer */
308 if ( bin_len > 0 ) /* Data */
309 leftchar = (leftchar << 8) | *bin_data;
310 else /* Padding */
311 leftchar <<= 8;
312 leftbits += 8;
Jack Jansen72781191995-08-07 14:34:15 +0000313
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000314 /* See if there are 6-bit groups ready */
315 while ( leftbits >= 6 ) {
316 this_ch = (leftchar >> (leftbits-6)) & 0x3f;
317 leftbits -= 6;
318 *ascii_data++ = this_ch + ' ';
319 }
320 }
321 *ascii_data++ = '\n'; /* Append a courtesy newline */
Tim Peters934c1a12002-07-02 22:24:50 +0000322
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000323 if (_PyBytes_Resize(&rv,
324 (ascii_data -
325 (unsigned char *)PyBytes_AS_STRING(rv))) < 0) {
326 Py_DECREF(rv);
327 rv = NULL;
328 }
329 PyBuffer_Release(&pbin);
330 return rv;
Jack Jansen72781191995-08-07 14:34:15 +0000331}
332
Guido van Rossum2db4f471999-10-19 19:05:14 +0000333
334static int
Thomas Woutersf98db652006-03-01 21:37:32 +0000335binascii_find_valid(unsigned char *s, Py_ssize_t slen, int num)
Guido van Rossum2db4f471999-10-19 19:05:14 +0000336{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000337 /* Finds & returns the (num+1)th
338 ** valid character for base64, or -1 if none.
339 */
Guido van Rossum2db4f471999-10-19 19:05:14 +0000340
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000341 int ret = -1;
342 unsigned char c, b64val;
Guido van Rossum2db4f471999-10-19 19:05:14 +0000343
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000344 while ((slen > 0) && (ret == -1)) {
345 c = *s;
346 b64val = table_a2b_base64[c & 0x7f];
347 if ( ((c <= 0x7f) && (b64val != (unsigned char)-1)) ) {
348 if (num == 0)
349 ret = *s;
350 num--;
351 }
Guido van Rossum2db4f471999-10-19 19:05:14 +0000352
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000353 s++;
354 slen--;
355 }
356 return ret;
Guido van Rossum2db4f471999-10-19 19:05:14 +0000357}
358
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000359PyDoc_STRVAR(doc_a2b_base64, "(ascii) -> bin. Decode a line of base64 data");
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000360
361static PyObject *
Peter Schneider-Kampa788a7f2000-07-10 09:57:19 +0000362binascii_a2b_base64(PyObject *self, PyObject *args)
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000363{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000364 Py_buffer pascii;
365 unsigned char *ascii_data, *bin_data;
366 int leftbits = 0;
367 unsigned char this_ch;
368 unsigned int leftchar = 0;
369 PyObject *rv;
370 Py_ssize_t ascii_len, bin_len;
371 int quad_pos = 0;
Tim Peters934c1a12002-07-02 22:24:50 +0000372
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000373 if ( !PyArg_ParseTuple(args, "y*:a2b_base64", &pascii) )
374 return NULL;
375 ascii_data = pascii.buf;
376 ascii_len = pascii.len;
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000377
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000378 assert(ascii_len >= 0);
Amaury Forgeot d'Arc9c74b142008-06-18 00:47:36 +0000379
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000380 if (ascii_len > PY_SSIZE_T_MAX - 3) {
381 PyBuffer_Release(&pascii);
382 return PyErr_NoMemory();
383 }
Amaury Forgeot d'Arc9c74b142008-06-18 00:47:36 +0000384
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000385 bin_len = ((ascii_len+3)/4)*3; /* Upper bound, corrected later */
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000386
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000387 /* Allocate the buffer */
388 if ( (rv=PyBytes_FromStringAndSize(NULL, bin_len)) == NULL ) {
389 PyBuffer_Release(&pascii);
390 return NULL;
391 }
392 bin_data = (unsigned char *)PyBytes_AS_STRING(rv);
393 bin_len = 0;
Guido van Rossum2db4f471999-10-19 19:05:14 +0000394
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000395 for( ; ascii_len > 0; ascii_len--, ascii_data++) {
396 this_ch = *ascii_data;
Guido van Rossum2db4f471999-10-19 19:05:14 +0000397
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000398 if (this_ch > 0x7f ||
399 this_ch == '\r' || this_ch == '\n' || this_ch == ' ')
400 continue;
Guido van Rossum2db4f471999-10-19 19:05:14 +0000401
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000402 /* Check for pad sequences and ignore
403 ** the invalid ones.
404 */
405 if (this_ch == BASE64_PAD) {
406 if ( (quad_pos < 2) ||
407 ((quad_pos == 2) &&
408 (binascii_find_valid(ascii_data, ascii_len, 1)
409 != BASE64_PAD)) )
410 {
411 continue;
412 }
413 else {
414 /* A pad sequence means no more input.
415 ** We've already interpreted the data
416 ** from the quad at this point.
417 */
418 leftbits = 0;
419 break;
420 }
421 }
Guido van Rossum2db4f471999-10-19 19:05:14 +0000422
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000423 this_ch = table_a2b_base64[*ascii_data];
424 if ( this_ch == (unsigned char) -1 )
425 continue;
Guido van Rossum2db4f471999-10-19 19:05:14 +0000426
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000427 /*
428 ** Shift it in on the low end, and see if there's
429 ** a byte ready for output.
430 */
431 quad_pos = (quad_pos + 1) & 0x03;
432 leftchar = (leftchar << 6) | (this_ch);
433 leftbits += 6;
Guido van Rossum2db4f471999-10-19 19:05:14 +0000434
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000435 if ( leftbits >= 8 ) {
436 leftbits -= 8;
437 *bin_data++ = (leftchar >> leftbits) & 0xff;
438 bin_len++;
439 leftchar &= ((1 << leftbits) - 1);
440 }
441 }
Guido van Rossum2db4f471999-10-19 19:05:14 +0000442
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000443 if (leftbits != 0) {
444 PyBuffer_Release(&pascii);
445 PyErr_SetString(Error, "Incorrect padding");
446 Py_DECREF(rv);
447 return NULL;
448 }
Guido van Rossum2db4f471999-10-19 19:05:14 +0000449
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000450 /* And set string size correctly. If the result string is empty
451 ** (because the input was all invalid) return the shared empty
452 ** string instead; _PyBytes_Resize() won't do this for us.
453 */
454 if (bin_len > 0) {
455 if (_PyBytes_Resize(&rv, bin_len) < 0) {
456 Py_DECREF(rv);
457 rv = NULL;
458 }
459 }
460 else {
461 Py_DECREF(rv);
462 rv = PyBytes_FromStringAndSize("", 0);
463 }
464 PyBuffer_Release(&pascii);
465 return rv;
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000466}
467
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000468PyDoc_STRVAR(doc_b2a_base64, "(bin) -> ascii. Base64-code line of data");
Tim Peters934c1a12002-07-02 22:24:50 +0000469
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000470static PyObject *
Peter Schneider-Kampa788a7f2000-07-10 09:57:19 +0000471binascii_b2a_base64(PyObject *self, PyObject *args)
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000472{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000473 Py_buffer pbuf;
474 unsigned char *ascii_data, *bin_data;
475 int leftbits = 0;
476 unsigned char this_ch;
477 unsigned int leftchar = 0;
478 PyObject *rv;
479 Py_ssize_t bin_len;
Tim Peters934c1a12002-07-02 22:24:50 +0000480
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000481 if ( !PyArg_ParseTuple(args, "y*:b2a_base64", &pbuf) )
482 return NULL;
483 bin_data = pbuf.buf;
484 bin_len = pbuf.len;
Amaury Forgeot d'Arc9c74b142008-06-18 00:47:36 +0000485
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000486 assert(bin_len >= 0);
Amaury Forgeot d'Arc9c74b142008-06-18 00:47:36 +0000487
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000488 if ( bin_len > BASE64_MAXBIN ) {
489 PyErr_SetString(Error, "Too much data for base64 line");
490 PyBuffer_Release(&pbuf);
491 return NULL;
492 }
Tim Peters934c1a12002-07-02 22:24:50 +0000493
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000494 /* We're lazy and allocate too much (fixed up later).
495 "+3" leaves room for up to two pad characters and a trailing
496 newline. Note that 'b' gets encoded as 'Yg==\n' (1 in, 5 out). */
497 if ( (rv=PyBytes_FromStringAndSize(NULL, bin_len*2 + 3)) == NULL ) {
498 PyBuffer_Release(&pbuf);
499 return NULL;
500 }
501 ascii_data = (unsigned char *)PyBytes_AS_STRING(rv);
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000502
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000503 for( ; bin_len > 0 ; bin_len--, bin_data++ ) {
504 /* Shift the data into our buffer */
505 leftchar = (leftchar << 8) | *bin_data;
506 leftbits += 8;
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000507
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000508 /* See if there are 6-bit groups ready */
509 while ( leftbits >= 6 ) {
510 this_ch = (leftchar >> (leftbits-6)) & 0x3f;
511 leftbits -= 6;
512 *ascii_data++ = table_b2a_base64[this_ch];
513 }
514 }
515 if ( leftbits == 2 ) {
516 *ascii_data++ = table_b2a_base64[(leftchar&3) << 4];
517 *ascii_data++ = BASE64_PAD;
518 *ascii_data++ = BASE64_PAD;
519 } else if ( leftbits == 4 ) {
520 *ascii_data++ = table_b2a_base64[(leftchar&0xf) << 2];
521 *ascii_data++ = BASE64_PAD;
522 }
523 *ascii_data++ = '\n'; /* Append a courtesy newline */
Tim Peters934c1a12002-07-02 22:24:50 +0000524
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000525 if (_PyBytes_Resize(&rv,
526 (ascii_data -
527 (unsigned char *)PyBytes_AS_STRING(rv))) < 0) {
528 Py_DECREF(rv);
529 rv = NULL;
530 }
531 PyBuffer_Release(&pbuf);
532 return rv;
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000533}
534
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000535PyDoc_STRVAR(doc_a2b_hqx, "ascii -> bin, done. Decode .hqx coding");
Jack Jansen72781191995-08-07 14:34:15 +0000536
537static PyObject *
Peter Schneider-Kampa788a7f2000-07-10 09:57:19 +0000538binascii_a2b_hqx(PyObject *self, PyObject *args)
Jack Jansen72781191995-08-07 14:34:15 +0000539{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000540 Py_buffer pascii;
541 unsigned char *ascii_data, *bin_data;
542 int leftbits = 0;
543 unsigned char this_ch;
544 unsigned int leftchar = 0;
545 PyObject *rv;
546 Py_ssize_t len;
547 int done = 0;
Tim Peters934c1a12002-07-02 22:24:50 +0000548
Florent Xiclunaf1046ca2010-07-27 21:20:15 +0000549 if ( !PyArg_ParseTuple(args, "y*:a2b_hqx", &pascii) )
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000550 return NULL;
551 ascii_data = pascii.buf;
552 len = pascii.len;
Jack Jansen72781191995-08-07 14:34:15 +0000553
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000554 assert(len >= 0);
Amaury Forgeot d'Arc9c74b142008-06-18 00:47:36 +0000555
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000556 if (len > PY_SSIZE_T_MAX - 2) {
557 PyBuffer_Release(&pascii);
558 return PyErr_NoMemory();
559 }
Amaury Forgeot d'Arc9c74b142008-06-18 00:47:36 +0000560
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000561 /* Allocate a string that is too big (fixed later)
562 Add two to the initial length to prevent interning which
563 would preclude subsequent resizing. */
564 if ( (rv=PyBytes_FromStringAndSize(NULL, len+2)) == NULL ) {
565 PyBuffer_Release(&pascii);
566 return NULL;
567 }
568 bin_data = (unsigned char *)PyBytes_AS_STRING(rv);
Jack Jansen72781191995-08-07 14:34:15 +0000569
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000570 for( ; len > 0 ; len--, ascii_data++ ) {
571 /* Get the byte and look it up */
572 this_ch = table_a2b_hqx[*ascii_data];
573 if ( this_ch == SKIP )
574 continue;
575 if ( this_ch == FAIL ) {
576 PyErr_SetString(Error, "Illegal char");
577 PyBuffer_Release(&pascii);
578 Py_DECREF(rv);
579 return NULL;
580 }
581 if ( this_ch == DONE ) {
582 /* The terminating colon */
583 done = 1;
584 break;
585 }
Jack Jansen72781191995-08-07 14:34:15 +0000586
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000587 /* Shift it into the buffer and see if any bytes are ready */
588 leftchar = (leftchar << 6) | (this_ch);
589 leftbits += 6;
590 if ( leftbits >= 8 ) {
591 leftbits -= 8;
592 *bin_data++ = (leftchar >> leftbits) & 0xff;
593 leftchar &= ((1 << leftbits) - 1);
594 }
595 }
Tim Peters934c1a12002-07-02 22:24:50 +0000596
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000597 if ( leftbits && !done ) {
598 PyErr_SetString(Incomplete,
599 "String has incomplete number of bytes");
600 PyBuffer_Release(&pascii);
601 Py_DECREF(rv);
602 return NULL;
603 }
604 if (_PyBytes_Resize(&rv,
605 (bin_data -
606 (unsigned char *)PyBytes_AS_STRING(rv))) < 0) {
607 Py_DECREF(rv);
608 rv = NULL;
609 }
610 if (rv) {
611 PyObject *rrv = Py_BuildValue("Oi", rv, done);
612 PyBuffer_Release(&pascii);
613 Py_DECREF(rv);
614 return rrv;
615 }
Roger E. Masse5f4ce181997-01-16 17:10:22 +0000616
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000617 PyBuffer_Release(&pascii);
618 return NULL;
Jack Jansen72781191995-08-07 14:34:15 +0000619}
620
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000621PyDoc_STRVAR(doc_rlecode_hqx, "Binhex RLE-code binary data");
Jack Jansen72781191995-08-07 14:34:15 +0000622
623static PyObject *
Peter Schneider-Kampa788a7f2000-07-10 09:57:19 +0000624binascii_rlecode_hqx(PyObject *self, PyObject *args)
Jack Jansen72781191995-08-07 14:34:15 +0000625{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000626 Py_buffer pbuf;
627 unsigned char *in_data, *out_data;
628 PyObject *rv;
629 unsigned char ch;
630 Py_ssize_t in, inend, len;
Tim Peters934c1a12002-07-02 22:24:50 +0000631
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000632 if ( !PyArg_ParseTuple(args, "y*:rlecode_hqx", &pbuf) )
633 return NULL;
634 in_data = pbuf.buf;
635 len = pbuf.len;
Jack Jansen72781191995-08-07 14:34:15 +0000636
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000637 assert(len >= 0);
Amaury Forgeot d'Arc9c74b142008-06-18 00:47:36 +0000638
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000639 if (len > PY_SSIZE_T_MAX / 2 - 2) {
640 PyBuffer_Release(&pbuf);
641 return PyErr_NoMemory();
642 }
Amaury Forgeot d'Arc9c74b142008-06-18 00:47:36 +0000643
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000644 /* Worst case: output is twice as big as input (fixed later) */
645 if ( (rv=PyBytes_FromStringAndSize(NULL, len*2+2)) == NULL ) {
646 PyBuffer_Release(&pbuf);
647 return NULL;
648 }
649 out_data = (unsigned char *)PyBytes_AS_STRING(rv);
Tim Peters934c1a12002-07-02 22:24:50 +0000650
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000651 for( in=0; in<len; in++) {
652 ch = in_data[in];
653 if ( ch == RUNCHAR ) {
654 /* RUNCHAR. Escape it. */
655 *out_data++ = RUNCHAR;
656 *out_data++ = 0;
657 } else {
658 /* Check how many following are the same */
659 for(inend=in+1;
660 inend<len && in_data[inend] == ch &&
661 inend < in+255;
662 inend++) ;
663 if ( inend - in > 3 ) {
664 /* More than 3 in a row. Output RLE. */
665 *out_data++ = ch;
666 *out_data++ = RUNCHAR;
Antoine Pitrou40455752010-08-15 18:51:10 +0000667 *out_data++ = (unsigned char) (inend-in);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000668 in = inend-1;
669 } else {
670 /* Less than 3. Output the byte itself */
671 *out_data++ = ch;
672 }
673 }
674 }
675 if (_PyBytes_Resize(&rv,
676 (out_data -
677 (unsigned char *)PyBytes_AS_STRING(rv))) < 0) {
678 Py_DECREF(rv);
679 rv = NULL;
680 }
681 PyBuffer_Release(&pbuf);
682 return rv;
Jack Jansen72781191995-08-07 14:34:15 +0000683}
684
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000685PyDoc_STRVAR(doc_b2a_hqx, "Encode .hqx data");
Tim Peters934c1a12002-07-02 22:24:50 +0000686
Jack Jansen72781191995-08-07 14:34:15 +0000687static PyObject *
Peter Schneider-Kampa788a7f2000-07-10 09:57:19 +0000688binascii_b2a_hqx(PyObject *self, PyObject *args)
Jack Jansen72781191995-08-07 14:34:15 +0000689{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000690 Py_buffer pbin;
691 unsigned char *ascii_data, *bin_data;
692 int leftbits = 0;
693 unsigned char this_ch;
694 unsigned int leftchar = 0;
695 PyObject *rv;
696 Py_ssize_t len;
Tim Peters934c1a12002-07-02 22:24:50 +0000697
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000698 if ( !PyArg_ParseTuple(args, "y*:b2a_hqx", &pbin) )
699 return NULL;
700 bin_data = pbin.buf;
701 len = pbin.len;
Jack Jansen72781191995-08-07 14:34:15 +0000702
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000703 assert(len >= 0);
Amaury Forgeot d'Arc9c74b142008-06-18 00:47:36 +0000704
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000705 if (len > PY_SSIZE_T_MAX / 2 - 2) {
706 PyBuffer_Release(&pbin);
707 return PyErr_NoMemory();
708 }
Amaury Forgeot d'Arc9c74b142008-06-18 00:47:36 +0000709
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000710 /* Allocate a buffer that is at least large enough */
711 if ( (rv=PyBytes_FromStringAndSize(NULL, len*2+2)) == NULL ) {
712 PyBuffer_Release(&pbin);
713 return NULL;
714 }
715 ascii_data = (unsigned char *)PyBytes_AS_STRING(rv);
Tim Peters934c1a12002-07-02 22:24:50 +0000716
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000717 for( ; len > 0 ; len--, bin_data++ ) {
718 /* Shift into our buffer, and output any 6bits ready */
719 leftchar = (leftchar << 8) | *bin_data;
720 leftbits += 8;
721 while ( leftbits >= 6 ) {
722 this_ch = (leftchar >> (leftbits-6)) & 0x3f;
723 leftbits -= 6;
724 *ascii_data++ = table_b2a_hqx[this_ch];
725 }
726 }
727 /* Output a possible runt byte */
728 if ( leftbits ) {
729 leftchar <<= (6-leftbits);
730 *ascii_data++ = table_b2a_hqx[leftchar & 0x3f];
731 }
732 if (_PyBytes_Resize(&rv,
733 (ascii_data -
734 (unsigned char *)PyBytes_AS_STRING(rv))) < 0) {
735 Py_DECREF(rv);
736 rv = NULL;
737 }
738 PyBuffer_Release(&pbin);
739 return rv;
Jack Jansen72781191995-08-07 14:34:15 +0000740}
741
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000742PyDoc_STRVAR(doc_rledecode_hqx, "Decode hexbin RLE-coded string");
Tim Peters934c1a12002-07-02 22:24:50 +0000743
Jack Jansen72781191995-08-07 14:34:15 +0000744static PyObject *
Peter Schneider-Kampa788a7f2000-07-10 09:57:19 +0000745binascii_rledecode_hqx(PyObject *self, PyObject *args)
Jack Jansen72781191995-08-07 14:34:15 +0000746{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000747 Py_buffer pin;
748 unsigned char *in_data, *out_data;
749 unsigned char in_byte, in_repeat;
750 PyObject *rv;
751 Py_ssize_t in_len, out_len, out_len_left;
Jack Jansen72781191995-08-07 14:34:15 +0000752
Florent Xiclunaf1046ca2010-07-27 21:20:15 +0000753 if ( !PyArg_ParseTuple(args, "y*:rledecode_hqx", &pin) )
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000754 return NULL;
755 in_data = pin.buf;
756 in_len = pin.len;
Jack Jansen72781191995-08-07 14:34:15 +0000757
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000758 assert(in_len >= 0);
Amaury Forgeot d'Arc9c74b142008-06-18 00:47:36 +0000759
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000760 /* Empty string is a special case */
761 if ( in_len == 0 ) {
762 PyBuffer_Release(&pin);
763 return PyBytes_FromStringAndSize("", 0);
764 }
765 else if (in_len > PY_SSIZE_T_MAX / 2) {
766 PyBuffer_Release(&pin);
767 return PyErr_NoMemory();
768 }
Jack Jansen72781191995-08-07 14:34:15 +0000769
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000770 /* Allocate a buffer of reasonable size. Resized when needed */
771 out_len = in_len*2;
772 if ( (rv=PyBytes_FromStringAndSize(NULL, out_len)) == NULL ) {
773 PyBuffer_Release(&pin);
774 return NULL;
775 }
776 out_len_left = out_len;
777 out_data = (unsigned char *)PyBytes_AS_STRING(rv);
Jack Jansen72781191995-08-07 14:34:15 +0000778
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000779 /*
780 ** We need two macros here to get/put bytes and handle
781 ** end-of-buffer for input and output strings.
782 */
Jack Jansen72781191995-08-07 14:34:15 +0000783#define INBYTE(b) \
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000784 do { \
785 if ( --in_len < 0 ) { \
786 PyErr_SetString(Incomplete, ""); \
787 Py_DECREF(rv); \
788 PyBuffer_Release(&pin); \
789 return NULL; \
790 } \
791 b = *in_data++; \
792 } while(0)
Tim Peters934c1a12002-07-02 22:24:50 +0000793
Jack Jansen72781191995-08-07 14:34:15 +0000794#define OUTBYTE(b) \
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000795 do { \
796 if ( --out_len_left < 0 ) { \
797 if ( out_len > PY_SSIZE_T_MAX / 2) return PyErr_NoMemory(); \
798 if (_PyBytes_Resize(&rv, 2*out_len) < 0) \
799 { Py_DECREF(rv); PyBuffer_Release(&pin); return NULL; } \
800 out_data = (unsigned char *)PyBytes_AS_STRING(rv) \
801 + out_len; \
802 out_len_left = out_len-1; \
803 out_len = out_len * 2; \
804 } \
805 *out_data++ = b; \
806 } while(0)
Jack Jansen72781191995-08-07 14:34:15 +0000807
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000808 /*
809 ** Handle first byte separately (since we have to get angry
810 ** in case of an orphaned RLE code).
811 */
812 INBYTE(in_byte);
Jack Jansen72781191995-08-07 14:34:15 +0000813
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000814 if (in_byte == RUNCHAR) {
815 INBYTE(in_repeat);
816 if (in_repeat != 0) {
817 /* Note Error, not Incomplete (which is at the end
818 ** of the string only). This is a programmer error.
819 */
820 PyErr_SetString(Error, "Orphaned RLE code at start");
821 PyBuffer_Release(&pin);
822 Py_DECREF(rv);
823 return NULL;
824 }
825 OUTBYTE(RUNCHAR);
826 } else {
827 OUTBYTE(in_byte);
828 }
Tim Peters934c1a12002-07-02 22:24:50 +0000829
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000830 while( in_len > 0 ) {
831 INBYTE(in_byte);
Jack Jansen72781191995-08-07 14:34:15 +0000832
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000833 if (in_byte == RUNCHAR) {
834 INBYTE(in_repeat);
835 if ( in_repeat == 0 ) {
836 /* Just an escaped RUNCHAR value */
837 OUTBYTE(RUNCHAR);
838 } else {
839 /* Pick up value and output a sequence of it */
840 in_byte = out_data[-1];
841 while ( --in_repeat > 0 )
842 OUTBYTE(in_byte);
843 }
844 } else {
845 /* Normal byte */
846 OUTBYTE(in_byte);
847 }
848 }
849 if (_PyBytes_Resize(&rv,
850 (out_data -
851 (unsigned char *)PyBytes_AS_STRING(rv))) < 0) {
852 Py_DECREF(rv);
853 rv = NULL;
854 }
855 PyBuffer_Release(&pin);
856 return rv;
Jack Jansen72781191995-08-07 14:34:15 +0000857}
858
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000859PyDoc_STRVAR(doc_crc_hqx,
860"(data, oldcrc) -> newcrc. Compute hqx CRC incrementally");
Jack Jansen72781191995-08-07 14:34:15 +0000861
862static PyObject *
Peter Schneider-Kampa788a7f2000-07-10 09:57:19 +0000863binascii_crc_hqx(PyObject *self, PyObject *args)
Jack Jansen72781191995-08-07 14:34:15 +0000864{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000865 Py_buffer pin;
866 unsigned char *bin_data;
867 unsigned int crc;
868 Py_ssize_t len;
Tim Peters934c1a12002-07-02 22:24:50 +0000869
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000870 if ( !PyArg_ParseTuple(args, "y*i:crc_hqx", &pin, &crc) )
871 return NULL;
872 bin_data = pin.buf;
873 len = pin.len;
Jack Jansen72781191995-08-07 14:34:15 +0000874
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000875 while(len-- > 0) {
876 crc=((crc<<8)&0xff00)^crctab_hqx[((crc>>8)&0xff)^*bin_data++];
877 }
Jack Jansen72781191995-08-07 14:34:15 +0000878
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000879 PyBuffer_Release(&pin);
880 return Py_BuildValue("i", crc);
Jack Jansen72781191995-08-07 14:34:15 +0000881}
882
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000883PyDoc_STRVAR(doc_crc32,
884"(data, oldcrc = 0) -> newcrc. Compute CRC-32 incrementally");
Guido van Rossum7d47c9e2000-02-16 21:11:52 +0000885
Christian Heimes1dc54002008-03-24 02:19:29 +0000886#ifdef USE_ZLIB_CRC32
887/* This was taken from zlibmodule.c PyZlib_crc32 (but is PY_SSIZE_T_CLEAN) */
888static PyObject *
889binascii_crc32(PyObject *self, PyObject *args)
890{
Christian Heimescc47b052008-03-25 14:56:36 +0000891 unsigned int crc32val = 0; /* crc32(0L, Z_NULL, 0) */
Martin v. Löwis423be952008-08-13 15:53:07 +0000892 Py_buffer pbuf;
Christian Heimes1dc54002008-03-24 02:19:29 +0000893 Byte *buf;
Neal Norwitz4027bf82008-03-24 04:59:05 +0000894 Py_ssize_t len;
Christian Heimescc47b052008-03-25 14:56:36 +0000895 int signed_val;
896
Martin v. Löwis15b16a32008-12-02 06:00:15 +0000897 if (!PyArg_ParseTuple(args, "y*|I:crc32", &pbuf, &crc32val))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000898 return NULL;
Martin v. Löwis423be952008-08-13 15:53:07 +0000899 buf = (Byte*)pbuf.buf;
900 len = pbuf.len;
Christian Heimescc47b052008-03-25 14:56:36 +0000901 signed_val = crc32(crc32val, buf, len);
Martin v. Löwis423be952008-08-13 15:53:07 +0000902 PyBuffer_Release(&pbuf);
Christian Heimescc47b052008-03-25 14:56:36 +0000903 return PyLong_FromUnsignedLong(signed_val & 0xffffffffU);
Christian Heimes1dc54002008-03-24 02:19:29 +0000904}
905#else /* USE_ZLIB_CRC32 */
Guido van Rossum7d47c9e2000-02-16 21:11:52 +0000906/* Crc - 32 BIT ANSI X3.66 CRC checksum files
907 Also known as: ISO 3307
908**********************************************************************|
909* *|
910* Demonstration program to compute the 32-bit CRC used as the frame *|
911* check sequence in ADCCP (ANSI X3.66, also known as FIPS PUB 71 *|
912* and FED-STD-1003, the U.S. versions of CCITT's X.25 link-level *|
913* protocol). The 32-bit FCS was added via the Federal Register, *|
914* 1 June 1982, p.23798. I presume but don't know for certain that *|
915* this polynomial is or will be included in CCITT V.41, which *|
916* defines the 16-bit CRC (often called CRC-CCITT) polynomial. FIPS *|
917* PUB 78 says that the 32-bit FCS reduces otherwise undetected *|
918* errors by a factor of 10^-5 over 16-bit FCS. *|
919* *|
920**********************************************************************|
921
922 Copyright (C) 1986 Gary S. Brown. You may use this program, or
923 code or tables extracted from it, as desired without restriction.
Guido van Rossum7d47c9e2000-02-16 21:11:52 +0000924
Tim Peters934c1a12002-07-02 22:24:50 +0000925 First, the polynomial itself and its table of feedback terms. The
926 polynomial is
927 X^32+X^26+X^23+X^22+X^16+X^12+X^11+X^10+X^8+X^7+X^5+X^4+X^2+X^1+X^0
928 Note that we take it "backwards" and put the highest-order term in
929 the lowest-order bit. The X^32 term is "implied"; the LSB is the
930 X^31 term, etc. The X^0 term (usually shown as "+1") results in
931 the MSB being 1.
Guido van Rossum7d47c9e2000-02-16 21:11:52 +0000932
Tim Peters934c1a12002-07-02 22:24:50 +0000933 Note that the usual hardware shift register implementation, which
934 is what we're using (we're merely optimizing it by doing eight-bit
935 chunks at a time) shifts bits into the lowest-order term. In our
936 implementation, that means shifting towards the right. Why do we
937 do it this way? Because the calculated CRC must be transmitted in
938 order from highest-order term to lowest-order term. UARTs transmit
939 characters in order from LSB to MSB. By storing the CRC this way,
940 we hand it to the UART in the order low-byte to high-byte; the UART
941 sends each low-bit to hight-bit; and the result is transmission bit
942 by bit from highest- to lowest-order term without requiring any bit
943 shuffling on our part. Reception works similarly.
944
945 The feedback terms table consists of 256, 32-bit entries. Notes:
946
947 1. The table can be generated at runtime if desired; code to do so
948 is shown later. It might not be obvious, but the feedback
949 terms simply represent the results of eight shift/xor opera-
950 tions for all combinations of data and CRC register values.
951
952 2. The CRC accumulation logic is the same for all CRC polynomials,
953 be they sixteen or thirty-two bits wide. You simply choose the
954 appropriate table. Alternatively, because the table can be
955 generated at runtime, you can start by generating the table for
956 the polynomial in question and use exactly the same "updcrc",
957 if your application needn't simultaneously handle two CRC
958 polynomials. (Note, however, that XMODEM is strange.)
959
960 3. For 16-bit CRCs, the table entries need be only 16 bits wide;
961 of course, 32-bit entries work OK if the high 16 bits are zero.
962
963 4. The values must be right-shifted by eight bits by the "updcrc"
964 logic; the shift must be unsigned (bring in zeroes). On some
965 hardware you could probably optimize the shift in assembler by
966 using byte-swap instructions.
Guido van Rossum7d47c9e2000-02-16 21:11:52 +0000967********************************************************************/
968
Gregory P. Smith3c0e4d22008-03-25 07:51:12 +0000969static unsigned int crc_32_tab[256] = {
9700x00000000U, 0x77073096U, 0xee0e612cU, 0x990951baU, 0x076dc419U,
9710x706af48fU, 0xe963a535U, 0x9e6495a3U, 0x0edb8832U, 0x79dcb8a4U,
9720xe0d5e91eU, 0x97d2d988U, 0x09b64c2bU, 0x7eb17cbdU, 0xe7b82d07U,
9730x90bf1d91U, 0x1db71064U, 0x6ab020f2U, 0xf3b97148U, 0x84be41deU,
9740x1adad47dU, 0x6ddde4ebU, 0xf4d4b551U, 0x83d385c7U, 0x136c9856U,
9750x646ba8c0U, 0xfd62f97aU, 0x8a65c9ecU, 0x14015c4fU, 0x63066cd9U,
9760xfa0f3d63U, 0x8d080df5U, 0x3b6e20c8U, 0x4c69105eU, 0xd56041e4U,
9770xa2677172U, 0x3c03e4d1U, 0x4b04d447U, 0xd20d85fdU, 0xa50ab56bU,
9780x35b5a8faU, 0x42b2986cU, 0xdbbbc9d6U, 0xacbcf940U, 0x32d86ce3U,
9790x45df5c75U, 0xdcd60dcfU, 0xabd13d59U, 0x26d930acU, 0x51de003aU,
9800xc8d75180U, 0xbfd06116U, 0x21b4f4b5U, 0x56b3c423U, 0xcfba9599U,
9810xb8bda50fU, 0x2802b89eU, 0x5f058808U, 0xc60cd9b2U, 0xb10be924U,
9820x2f6f7c87U, 0x58684c11U, 0xc1611dabU, 0xb6662d3dU, 0x76dc4190U,
9830x01db7106U, 0x98d220bcU, 0xefd5102aU, 0x71b18589U, 0x06b6b51fU,
9840x9fbfe4a5U, 0xe8b8d433U, 0x7807c9a2U, 0x0f00f934U, 0x9609a88eU,
9850xe10e9818U, 0x7f6a0dbbU, 0x086d3d2dU, 0x91646c97U, 0xe6635c01U,
9860x6b6b51f4U, 0x1c6c6162U, 0x856530d8U, 0xf262004eU, 0x6c0695edU,
9870x1b01a57bU, 0x8208f4c1U, 0xf50fc457U, 0x65b0d9c6U, 0x12b7e950U,
9880x8bbeb8eaU, 0xfcb9887cU, 0x62dd1ddfU, 0x15da2d49U, 0x8cd37cf3U,
9890xfbd44c65U, 0x4db26158U, 0x3ab551ceU, 0xa3bc0074U, 0xd4bb30e2U,
9900x4adfa541U, 0x3dd895d7U, 0xa4d1c46dU, 0xd3d6f4fbU, 0x4369e96aU,
9910x346ed9fcU, 0xad678846U, 0xda60b8d0U, 0x44042d73U, 0x33031de5U,
9920xaa0a4c5fU, 0xdd0d7cc9U, 0x5005713cU, 0x270241aaU, 0xbe0b1010U,
9930xc90c2086U, 0x5768b525U, 0x206f85b3U, 0xb966d409U, 0xce61e49fU,
9940x5edef90eU, 0x29d9c998U, 0xb0d09822U, 0xc7d7a8b4U, 0x59b33d17U,
9950x2eb40d81U, 0xb7bd5c3bU, 0xc0ba6cadU, 0xedb88320U, 0x9abfb3b6U,
9960x03b6e20cU, 0x74b1d29aU, 0xead54739U, 0x9dd277afU, 0x04db2615U,
9970x73dc1683U, 0xe3630b12U, 0x94643b84U, 0x0d6d6a3eU, 0x7a6a5aa8U,
9980xe40ecf0bU, 0x9309ff9dU, 0x0a00ae27U, 0x7d079eb1U, 0xf00f9344U,
9990x8708a3d2U, 0x1e01f268U, 0x6906c2feU, 0xf762575dU, 0x806567cbU,
10000x196c3671U, 0x6e6b06e7U, 0xfed41b76U, 0x89d32be0U, 0x10da7a5aU,
10010x67dd4accU, 0xf9b9df6fU, 0x8ebeeff9U, 0x17b7be43U, 0x60b08ed5U,
10020xd6d6a3e8U, 0xa1d1937eU, 0x38d8c2c4U, 0x4fdff252U, 0xd1bb67f1U,
10030xa6bc5767U, 0x3fb506ddU, 0x48b2364bU, 0xd80d2bdaU, 0xaf0a1b4cU,
10040x36034af6U, 0x41047a60U, 0xdf60efc3U, 0xa867df55U, 0x316e8eefU,
10050x4669be79U, 0xcb61b38cU, 0xbc66831aU, 0x256fd2a0U, 0x5268e236U,
10060xcc0c7795U, 0xbb0b4703U, 0x220216b9U, 0x5505262fU, 0xc5ba3bbeU,
10070xb2bd0b28U, 0x2bb45a92U, 0x5cb36a04U, 0xc2d7ffa7U, 0xb5d0cf31U,
10080x2cd99e8bU, 0x5bdeae1dU, 0x9b64c2b0U, 0xec63f226U, 0x756aa39cU,
10090x026d930aU, 0x9c0906a9U, 0xeb0e363fU, 0x72076785U, 0x05005713U,
10100x95bf4a82U, 0xe2b87a14U, 0x7bb12baeU, 0x0cb61b38U, 0x92d28e9bU,
10110xe5d5be0dU, 0x7cdcefb7U, 0x0bdbdf21U, 0x86d3d2d4U, 0xf1d4e242U,
10120x68ddb3f8U, 0x1fda836eU, 0x81be16cdU, 0xf6b9265bU, 0x6fb077e1U,
10130x18b74777U, 0x88085ae6U, 0xff0f6a70U, 0x66063bcaU, 0x11010b5cU,
10140x8f659effU, 0xf862ae69U, 0x616bffd3U, 0x166ccf45U, 0xa00ae278U,
10150xd70dd2eeU, 0x4e048354U, 0x3903b3c2U, 0xa7672661U, 0xd06016f7U,
10160x4969474dU, 0x3e6e77dbU, 0xaed16a4aU, 0xd9d65adcU, 0x40df0b66U,
10170x37d83bf0U, 0xa9bcae53U, 0xdebb9ec5U, 0x47b2cf7fU, 0x30b5ffe9U,
10180xbdbdf21cU, 0xcabac28aU, 0x53b39330U, 0x24b4a3a6U, 0xbad03605U,
10190xcdd70693U, 0x54de5729U, 0x23d967bfU, 0xb3667a2eU, 0xc4614ab8U,
10200x5d681b02U, 0x2a6f2b94U, 0xb40bbe37U, 0xc30c8ea1U, 0x5a05df1bU,
10210x2d02ef8dU
Guido van Rossum7d47c9e2000-02-16 21:11:52 +00001022};
1023
1024static PyObject *
Peter Schneider-Kampa788a7f2000-07-10 09:57:19 +00001025binascii_crc32(PyObject *self, PyObject *args)
Guido van Rossum7d47c9e2000-02-16 21:11:52 +00001026{ /* By Jim Ahlstrom; All rights transferred to CNRI */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001027 Py_buffer pbin;
1028 unsigned char *bin_data;
1029 unsigned int crc = 0; /* initial value of CRC */
1030 Py_ssize_t len;
1031 unsigned int result;
Tim Peters934c1a12002-07-02 22:24:50 +00001032
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001033 if ( !PyArg_ParseTuple(args, "y*|I:crc32", &pbin, &crc) )
1034 return NULL;
1035 bin_data = pbin.buf;
1036 len = pbin.len;
Guido van Rossum7d47c9e2000-02-16 21:11:52 +00001037
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001038 crc = ~ crc;
1039 while (len-- > 0) {
1040 crc = crc_32_tab[(crc ^ *bin_data++) & 0xff] ^ (crc >> 8);
1041 /* Note: (crc >> 8) MUST zero fill on left */
1042 }
Tim Petersa98011c2002-07-02 20:20:08 +00001043
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001044 result = (crc ^ 0xFFFFFFFF);
1045 PyBuffer_Release(&pbin);
1046 return PyLong_FromUnsignedLong(result & 0xffffffff);
Guido van Rossum7d47c9e2000-02-16 21:11:52 +00001047}
Christian Heimes1dc54002008-03-24 02:19:29 +00001048#endif /* USE_ZLIB_CRC32 */
Guido van Rossum7d47c9e2000-02-16 21:11:52 +00001049
Barry Warsawe977c212000-08-15 06:07:13 +00001050
1051static PyObject *
1052binascii_hexlify(PyObject *self, PyObject *args)
1053{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001054 Py_buffer parg;
1055 char* argbuf;
1056 Py_ssize_t arglen;
1057 PyObject *retval;
1058 char* retbuf;
1059 Py_ssize_t i, j;
Barry Warsawe977c212000-08-15 06:07:13 +00001060
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001061 if (!PyArg_ParseTuple(args, "y*:b2a_hex", &parg))
1062 return NULL;
1063 argbuf = parg.buf;
1064 arglen = parg.len;
Barry Warsawe977c212000-08-15 06:07:13 +00001065
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001066 assert(arglen >= 0);
1067 if (arglen > PY_SSIZE_T_MAX / 2) {
1068 PyBuffer_Release(&parg);
1069 return PyErr_NoMemory();
1070 }
Amaury Forgeot d'Arc9c74b142008-06-18 00:47:36 +00001071
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001072 retval = PyBytes_FromStringAndSize(NULL, arglen*2);
1073 if (!retval) {
1074 PyBuffer_Release(&parg);
1075 return NULL;
1076 }
1077 retbuf = PyBytes_AS_STRING(retval);
Barry Warsawe977c212000-08-15 06:07:13 +00001078
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001079 /* make hex version of string, taken from shamodule.c */
1080 for (i=j=0; i < arglen; i++) {
Victor Stinnerf5cff562011-10-14 02:13:11 +02001081 unsigned char c;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001082 c = (argbuf[i] >> 4) & 0xf;
Victor Stinnerf5cff562011-10-14 02:13:11 +02001083 retbuf[j++] = Py_hexdigits[c];
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001084 c = argbuf[i] & 0xf;
Victor Stinnerf5cff562011-10-14 02:13:11 +02001085 retbuf[j++] = Py_hexdigits[c];
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001086 }
1087 PyBuffer_Release(&parg);
1088 return retval;
Barry Warsawe977c212000-08-15 06:07:13 +00001089}
1090
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001091PyDoc_STRVAR(doc_hexlify,
Barry Warsawe977c212000-08-15 06:07:13 +00001092"b2a_hex(data) -> s; Hexadecimal representation of binary data.\n\
1093\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001094This function is also available as \"hexlify()\".");
Barry Warsawe977c212000-08-15 06:07:13 +00001095
1096
1097static int
Tim Peters934c1a12002-07-02 22:24:50 +00001098to_int(int c)
Barry Warsawe977c212000-08-15 06:07:13 +00001099{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001100 if (isdigit(c))
1101 return c - '0';
1102 else {
Antoine Pitroued8ba142011-10-04 13:50:21 +02001103 if (Py_ISUPPER(c))
1104 c = Py_TOLOWER(c);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001105 if (c >= 'a' && c <= 'f')
1106 return c - 'a' + 10;
1107 }
1108 return -1;
Barry Warsawe977c212000-08-15 06:07:13 +00001109}
1110
1111
1112static PyObject *
1113binascii_unhexlify(PyObject *self, PyObject *args)
1114{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001115 Py_buffer parg;
1116 char* argbuf;
1117 Py_ssize_t arglen;
1118 PyObject *retval;
1119 char* retbuf;
1120 Py_ssize_t i, j;
Barry Warsawe977c212000-08-15 06:07:13 +00001121
Florent Xiclunaf1046ca2010-07-27 21:20:15 +00001122 if (!PyArg_ParseTuple(args, "y*:a2b_hex", &parg))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001123 return NULL;
1124 argbuf = parg.buf;
1125 arglen = parg.len;
Barry Warsawe977c212000-08-15 06:07:13 +00001126
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001127 assert(arglen >= 0);
Amaury Forgeot d'Arc9c74b142008-06-18 00:47:36 +00001128
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001129 /* XXX What should we do about strings with an odd length? Should
1130 * we add an implicit leading zero, or a trailing zero? For now,
1131 * raise an exception.
1132 */
1133 if (arglen % 2) {
1134 PyBuffer_Release(&parg);
1135 PyErr_SetString(Error, "Odd-length string");
1136 return NULL;
1137 }
Barry Warsawe977c212000-08-15 06:07:13 +00001138
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001139 retval = PyBytes_FromStringAndSize(NULL, (arglen/2));
1140 if (!retval) {
1141 PyBuffer_Release(&parg);
1142 return NULL;
1143 }
1144 retbuf = PyBytes_AS_STRING(retval);
Barry Warsawe977c212000-08-15 06:07:13 +00001145
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001146 for (i=j=0; i < arglen; i += 2) {
1147 int top = to_int(Py_CHARMASK(argbuf[i]));
1148 int bot = to_int(Py_CHARMASK(argbuf[i+1]));
1149 if (top == -1 || bot == -1) {
1150 PyErr_SetString(Error,
1151 "Non-hexadecimal digit found");
1152 goto finally;
1153 }
1154 retbuf[j++] = (top << 4) + bot;
1155 }
1156 PyBuffer_Release(&parg);
1157 return retval;
Barry Warsawe977c212000-08-15 06:07:13 +00001158
1159 finally:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001160 PyBuffer_Release(&parg);
1161 Py_DECREF(retval);
1162 return NULL;
Barry Warsawe977c212000-08-15 06:07:13 +00001163}
1164
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001165PyDoc_STRVAR(doc_unhexlify,
Barry Warsawe977c212000-08-15 06:07:13 +00001166"a2b_hex(hexstr) -> s; Binary data of hexadecimal representation.\n\
1167\n\
1168hexstr must contain an even number of hex digits (upper or lower case).\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001169This function is also available as \"unhexlify()\"");
Barry Warsawe977c212000-08-15 06:07:13 +00001170
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001171static int table_hex[128] = {
1172 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1173 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1174 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1175 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,-1,-1, -1,-1,-1,-1,
1176 -1,10,11,12, 13,14,15,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1177 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1178 -1,10,11,12, 13,14,15,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1179 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1
1180};
1181
1182#define hexval(c) table_hex[(unsigned int)(c)]
1183
1184#define MAXLINESIZE 76
1185
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001186PyDoc_STRVAR(doc_a2b_qp, "Decode a string of qp-encoded data");
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001187
Tim Peters934c1a12002-07-02 22:24:50 +00001188static PyObject*
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001189binascii_a2b_qp(PyObject *self, PyObject *args, PyObject *kwargs)
1190{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001191 Py_ssize_t in, out;
1192 char ch;
1193 Py_buffer pdata;
1194 unsigned char *data, *odata;
1195 Py_ssize_t datalen = 0;
1196 PyObject *rv;
1197 static char *kwlist[] = {"data", "header", NULL};
1198 int header = 0;
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001199
Florent Xiclunaf1046ca2010-07-27 21:20:15 +00001200 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y*|i", kwlist, &pdata,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001201 &header))
1202 return NULL;
1203 data = pdata.buf;
1204 datalen = pdata.len;
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001205
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001206 /* We allocate the output same size as input, this is overkill.
1207 * The previous implementation used calloc() so we'll zero out the
1208 * memory here too, since PyMem_Malloc() does not guarantee that.
1209 */
1210 odata = (unsigned char *) PyMem_Malloc(datalen);
1211 if (odata == NULL) {
1212 PyBuffer_Release(&pdata);
1213 PyErr_NoMemory();
1214 return NULL;
1215 }
1216 memset(odata, 0, datalen);
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001217
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001218 in = out = 0;
1219 while (in < datalen) {
1220 if (data[in] == '=') {
1221 in++;
1222 if (in >= datalen) break;
1223 /* Soft line breaks */
1224 if ((data[in] == '\n') || (data[in] == '\r')) {
1225 if (data[in] != '\n') {
1226 while (in < datalen && data[in] != '\n') in++;
1227 }
1228 if (in < datalen) in++;
1229 }
1230 else if (data[in] == '=') {
1231 /* broken case from broken python qp */
1232 odata[out++] = '=';
1233 in++;
1234 }
1235 else if (((data[in] >= 'A' && data[in] <= 'F') ||
1236 (data[in] >= 'a' && data[in] <= 'f') ||
1237 (data[in] >= '0' && data[in] <= '9')) &&
1238 ((data[in+1] >= 'A' && data[in+1] <= 'F') ||
1239 (data[in+1] >= 'a' && data[in+1] <= 'f') ||
1240 (data[in+1] >= '0' && data[in+1] <= '9'))) {
1241 /* hexval */
1242 ch = hexval(data[in]) << 4;
1243 in++;
1244 ch |= hexval(data[in]);
1245 in++;
1246 odata[out++] = ch;
1247 }
1248 else {
1249 odata[out++] = '=';
1250 }
1251 }
1252 else if (header && data[in] == '_') {
1253 odata[out++] = ' ';
1254 in++;
1255 }
1256 else {
1257 odata[out] = data[in];
1258 in++;
1259 out++;
1260 }
1261 }
1262 if ((rv = PyBytes_FromStringAndSize((char *)odata, out)) == NULL) {
1263 PyBuffer_Release(&pdata);
1264 PyMem_Free(odata);
1265 return NULL;
1266 }
1267 PyBuffer_Release(&pdata);
1268 PyMem_Free(odata);
1269 return rv;
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001270}
1271
Tim Peters934c1a12002-07-02 22:24:50 +00001272static int
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001273to_hex (unsigned char ch, unsigned char *s)
1274{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001275 unsigned int uvalue = ch;
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001276
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001277 s[1] = "0123456789ABCDEF"[uvalue % 16];
1278 uvalue = (uvalue / 16);
1279 s[0] = "0123456789ABCDEF"[uvalue % 16];
1280 return 0;
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001281}
1282
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001283PyDoc_STRVAR(doc_b2a_qp,
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001284"b2a_qp(data, quotetabs=0, istext=1, header=0) -> s; \n\
1285 Encode a string using quoted-printable encoding. \n\
1286\n\
1287On encoding, when istext is set, newlines are not encoded, and white \n\
1288space at end of lines is. When istext is not set, \\r and \\n (CR/LF) are \n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001289both encoded. When quotetabs is set, space and tabs are encoded.");
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001290
1291/* XXX: This is ridiculously complicated to be backward compatible
1292 * (mostly) with the quopri module. It doesn't re-create the quopri
1293 * module bug where text ending in CRLF has the CR encoded */
Tim Peters934c1a12002-07-02 22:24:50 +00001294static PyObject*
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001295binascii_b2a_qp (PyObject *self, PyObject *args, PyObject *kwargs)
1296{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001297 Py_ssize_t in, out;
1298 Py_buffer pdata;
1299 unsigned char *data, *odata;
1300 Py_ssize_t datalen = 0, odatalen = 0;
1301 PyObject *rv;
1302 unsigned int linelen = 0;
1303 static char *kwlist[] = {"data", "quotetabs", "istext",
1304 "header", NULL};
1305 int istext = 1;
1306 int quotetabs = 0;
1307 int header = 0;
1308 unsigned char ch;
1309 int crlf = 0;
1310 unsigned char *p;
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001311
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001312 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y*|iii", kwlist, &pdata,
1313 &quotetabs, &istext, &header))
1314 return NULL;
1315 data = pdata.buf;
1316 datalen = pdata.len;
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001317
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001318 /* See if this string is using CRLF line ends */
1319 /* XXX: this function has the side effect of converting all of
1320 * the end of lines to be the same depending on this detection
1321 * here */
1322 p = (unsigned char *) memchr(data, '\n', datalen);
1323 if ((p != NULL) && (p > data) && (*(p-1) == '\r'))
1324 crlf = 1;
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001325
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001326 /* First, scan to see how many characters need to be encoded */
1327 in = 0;
1328 while (in < datalen) {
1329 if ((data[in] > 126) ||
1330 (data[in] == '=') ||
1331 (header && data[in] == '_') ||
1332 ((data[in] == '.') && (linelen == 0) &&
1333 (data[in+1] == '\n' || data[in+1] == '\r' || data[in+1] == 0)) ||
1334 (!istext && ((data[in] == '\r') || (data[in] == '\n'))) ||
1335 ((data[in] == '\t' || data[in] == ' ') && (in + 1 == datalen)) ||
1336 ((data[in] < 33) &&
1337 (data[in] != '\r') && (data[in] != '\n') &&
Senthil Kumaran922e9042010-11-09 09:59:13 +00001338 (quotetabs || ((data[in] != '\t') && (data[in] != ' ')))))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001339 {
1340 if ((linelen + 3) >= MAXLINESIZE) {
1341 linelen = 0;
1342 if (crlf)
1343 odatalen += 3;
1344 else
1345 odatalen += 2;
1346 }
1347 linelen += 3;
1348 odatalen += 3;
1349 in++;
1350 }
1351 else {
1352 if (istext &&
1353 ((data[in] == '\n') ||
1354 ((in+1 < datalen) && (data[in] == '\r') &&
1355 (data[in+1] == '\n'))))
1356 {
1357 linelen = 0;
1358 /* Protect against whitespace on end of line */
1359 if (in && ((data[in-1] == ' ') || (data[in-1] == '\t')))
1360 odatalen += 2;
1361 if (crlf)
1362 odatalen += 2;
1363 else
1364 odatalen += 1;
1365 if (data[in] == '\r')
1366 in += 2;
1367 else
1368 in++;
1369 }
1370 else {
1371 if ((in + 1 != datalen) &&
1372 (data[in+1] != '\n') &&
1373 (linelen + 1) >= MAXLINESIZE) {
1374 linelen = 0;
1375 if (crlf)
1376 odatalen += 3;
1377 else
1378 odatalen += 2;
1379 }
1380 linelen++;
1381 odatalen++;
1382 in++;
1383 }
1384 }
1385 }
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001386
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001387 /* We allocate the output same size as input, this is overkill.
1388 * The previous implementation used calloc() so we'll zero out the
1389 * memory here too, since PyMem_Malloc() does not guarantee that.
1390 */
1391 odata = (unsigned char *) PyMem_Malloc(odatalen);
1392 if (odata == NULL) {
1393 PyBuffer_Release(&pdata);
1394 PyErr_NoMemory();
1395 return NULL;
1396 }
1397 memset(odata, 0, odatalen);
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001398
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001399 in = out = linelen = 0;
1400 while (in < datalen) {
1401 if ((data[in] > 126) ||
1402 (data[in] == '=') ||
1403 (header && data[in] == '_') ||
1404 ((data[in] == '.') && (linelen == 0) &&
1405 (data[in+1] == '\n' || data[in+1] == '\r' || data[in+1] == 0)) ||
1406 (!istext && ((data[in] == '\r') || (data[in] == '\n'))) ||
1407 ((data[in] == '\t' || data[in] == ' ') && (in + 1 == datalen)) ||
1408 ((data[in] < 33) &&
1409 (data[in] != '\r') && (data[in] != '\n') &&
1410 (quotetabs ||
1411 (!quotetabs && ((data[in] != '\t') && (data[in] != ' '))))))
1412 {
1413 if ((linelen + 3 )>= MAXLINESIZE) {
1414 odata[out++] = '=';
1415 if (crlf) odata[out++] = '\r';
1416 odata[out++] = '\n';
1417 linelen = 0;
1418 }
1419 odata[out++] = '=';
1420 to_hex(data[in], &odata[out]);
1421 out += 2;
1422 in++;
1423 linelen += 3;
1424 }
1425 else {
1426 if (istext &&
1427 ((data[in] == '\n') ||
1428 ((in+1 < datalen) && (data[in] == '\r') &&
1429 (data[in+1] == '\n'))))
1430 {
1431 linelen = 0;
1432 /* Protect against whitespace on end of line */
1433 if (out && ((odata[out-1] == ' ') || (odata[out-1] == '\t'))) {
1434 ch = odata[out-1];
1435 odata[out-1] = '=';
1436 to_hex(ch, &odata[out]);
1437 out += 2;
1438 }
Tim Peters934c1a12002-07-02 22:24:50 +00001439
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001440 if (crlf) odata[out++] = '\r';
1441 odata[out++] = '\n';
1442 if (data[in] == '\r')
1443 in += 2;
1444 else
1445 in++;
1446 }
1447 else {
1448 if ((in + 1 != datalen) &&
1449 (data[in+1] != '\n') &&
1450 (linelen + 1) >= MAXLINESIZE) {
1451 odata[out++] = '=';
1452 if (crlf) odata[out++] = '\r';
1453 odata[out++] = '\n';
1454 linelen = 0;
1455 }
1456 linelen++;
1457 if (header && data[in] == ' ') {
1458 odata[out++] = '_';
1459 in++;
1460 }
1461 else {
1462 odata[out++] = data[in++];
1463 }
1464 }
1465 }
1466 }
1467 if ((rv = PyBytes_FromStringAndSize((char *)odata, out)) == NULL) {
1468 PyBuffer_Release(&pdata);
1469 PyMem_Free(odata);
1470 return NULL;
1471 }
1472 PyBuffer_Release(&pdata);
1473 PyMem_Free(odata);
1474 return rv;
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001475}
Barry Warsawe977c212000-08-15 06:07:13 +00001476
Jack Jansen72781191995-08-07 14:34:15 +00001477/* List of functions defined in the module */
1478
1479static struct PyMethodDef binascii_module_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001480 {"a2b_uu", binascii_a2b_uu, METH_VARARGS, doc_a2b_uu},
1481 {"b2a_uu", binascii_b2a_uu, METH_VARARGS, doc_b2a_uu},
1482 {"a2b_base64", binascii_a2b_base64, METH_VARARGS, doc_a2b_base64},
1483 {"b2a_base64", binascii_b2a_base64, METH_VARARGS, doc_b2a_base64},
1484 {"a2b_hqx", binascii_a2b_hqx, METH_VARARGS, doc_a2b_hqx},
1485 {"b2a_hqx", binascii_b2a_hqx, METH_VARARGS, doc_b2a_hqx},
1486 {"b2a_hex", binascii_hexlify, METH_VARARGS, doc_hexlify},
1487 {"a2b_hex", binascii_unhexlify, METH_VARARGS, doc_unhexlify},
1488 {"hexlify", binascii_hexlify, METH_VARARGS, doc_hexlify},
1489 {"unhexlify", binascii_unhexlify, METH_VARARGS, doc_unhexlify},
1490 {"rlecode_hqx", binascii_rlecode_hqx, METH_VARARGS, doc_rlecode_hqx},
1491 {"rledecode_hqx", binascii_rledecode_hqx, METH_VARARGS,
1492 doc_rledecode_hqx},
1493 {"crc_hqx", binascii_crc_hqx, METH_VARARGS, doc_crc_hqx},
1494 {"crc32", binascii_crc32, METH_VARARGS, doc_crc32},
1495 {"a2b_qp", (PyCFunction)binascii_a2b_qp, METH_VARARGS | METH_KEYWORDS,
1496 doc_a2b_qp},
1497 {"b2a_qp", (PyCFunction)binascii_b2a_qp, METH_VARARGS | METH_KEYWORDS,
1498 doc_b2a_qp},
1499 {NULL, NULL} /* sentinel */
Jack Jansen72781191995-08-07 14:34:15 +00001500};
1501
1502
Martin v. Löwis1a214512008-06-11 05:26:20 +00001503/* Initialization function for the module (*must* be called PyInit_binascii) */
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001504PyDoc_STRVAR(doc_binascii, "Conversion between binary data and ASCII");
Jack Jansen72781191995-08-07 14:34:15 +00001505
Martin v. Löwis1a214512008-06-11 05:26:20 +00001506
1507static struct PyModuleDef binasciimodule = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001508 PyModuleDef_HEAD_INIT,
1509 "binascii",
1510 doc_binascii,
1511 -1,
1512 binascii_module_methods,
1513 NULL,
1514 NULL,
1515 NULL,
1516 NULL
Martin v. Löwis1a214512008-06-11 05:26:20 +00001517};
1518
Mark Hammondfe51c6d2002-08-02 02:27:13 +00001519PyMODINIT_FUNC
Martin v. Löwis1a214512008-06-11 05:26:20 +00001520PyInit_binascii(void)
Jack Jansen72781191995-08-07 14:34:15 +00001521{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001522 PyObject *m, *d;
Jack Jansen72781191995-08-07 14:34:15 +00001523
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001524 /* Create the module and add the functions */
1525 m = PyModule_Create(&binasciimodule);
1526 if (m == NULL)
1527 return NULL;
Jack Jansen72781191995-08-07 14:34:15 +00001528
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001529 d = PyModule_GetDict(m);
Jack Jansen72781191995-08-07 14:34:15 +00001530
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001531 Error = PyErr_NewException("binascii.Error", PyExc_ValueError, NULL);
1532 PyDict_SetItemString(d, "Error", Error);
1533 Incomplete = PyErr_NewException("binascii.Incomplete", NULL, NULL);
1534 PyDict_SetItemString(d, "Incomplete", Incomplete);
1535 if (PyErr_Occurred()) {
1536 Py_DECREF(m);
1537 m = NULL;
1538 }
1539 return m;
Jack Jansen72781191995-08-07 14:34:15 +00001540}