blob: 1e785fa8faaa5fd43a36f722f0f8791b5b7454c9 [file] [log] [blame]
Jack Jansen72781191995-08-07 14:34:15 +00001/*
2** Routines to represent binary data in ASCII and vice-versa
3**
4** This module currently supports the following encodings:
5** uuencode:
Antoine Pitrouc83ea132010-05-09 14:46:46 +00006** each line encodes 45 bytes (except possibly the last)
7** First char encodes (binary) length, rest data
8** each char encodes 6 bits, as follows:
9** binary: 01234567 abcdefgh ijklmnop
10** ascii: 012345 67abcd efghij klmnop
11** ASCII encoding method is "excess-space": 000000 is encoded as ' ', etc.
12** short binary data is zero-extended (so the bits are always in the
13** right place), this does *not* reflect in the length.
Jack Jansen84bbc2e1995-10-04 16:38:44 +000014** base64:
15** Line breaks are insignificant, but lines are at most 76 chars
16** each char encodes 6 bits, in similar order as uucode/hqx. Encoding
17** is done via a table.
18** Short binary data is filled (in ASCII) with '='.
Jack Jansen72781191995-08-07 14:34:15 +000019** hqx:
Antoine Pitrouc83ea132010-05-09 14:46:46 +000020** File starts with introductory text, real data starts and ends
21** with colons.
22** Data consists of three similar parts: info, datafork, resourcefork.
23** Each part is protected (at the end) with a 16-bit crc
24** The binary data is run-length encoded, and then ascii-fied:
25** binary: 01234567 abcdefgh ijklmnop
26** ascii: 012345 67abcd efghij klmnop
27** ASCII encoding is table-driven, see the code.
28** Short binary data results in the runt ascii-byte being output with
29** the bits in the right place.
Jack Jansen72781191995-08-07 14:34:15 +000030**
31** While I was reading dozens of programs that encode or decode the formats
32** here (documentation? hihi:-) I have formulated Jansen's Observation:
33**
Antoine Pitrouc83ea132010-05-09 14:46:46 +000034** Programs that encode binary data in ASCII are written in
35** such a style that they are as unreadable as possible. Devices used
36** include unnecessary global variables, burying important tables
37** in unrelated sourcefiles, putting functions in include files,
38** using seemingly-descriptive variable names for different purposes,
39** calls to empty subroutines and a host of others.
Jack Jansen72781191995-08-07 14:34:15 +000040**
41** I have attempted to break with this tradition, but I guess that that
42** does make the performance sub-optimal. Oh well, too bad...
43**
44** Jack Jansen, CWI, July 1995.
Tim Peters934c1a12002-07-02 22:24:50 +000045**
Martin v. Löwis16dc7f42001-09-30 20:32:11 +000046** Added support for quoted-printable encoding, based on rfc 1521 et al
Tim Peters934c1a12002-07-02 22:24:50 +000047** quoted-printable encoding specifies that non printable characters (anything
Martin v. Löwis16dc7f42001-09-30 20:32:11 +000048** below 32 and above 126) be encoded as =XX where XX is the hexadecimal value
49** of the character. It also specifies some other behavior to enable 8bit data
Tim Peters934c1a12002-07-02 22:24:50 +000050** in a mail message with little difficulty (maximum line sizes, protecting
51** some cases of whitespace, etc).
Martin v. Löwis16dc7f42001-09-30 20:32:11 +000052**
53** Brandon Long, September 2001.
Jack Jansen72781191995-08-07 14:34:15 +000054*/
55
Thomas Wouters9c544482006-03-01 21:59:44 +000056#define PY_SSIZE_T_CLEAN
Jack Jansen72781191995-08-07 14:34:15 +000057
58#include "Python.h"
Gregory P. Smith440ca772008-03-24 00:08:01 +000059#ifdef USE_ZLIB_CRC32
60#include "zlib.h"
61#endif
Jack Jansen72781191995-08-07 14:34:15 +000062
63static PyObject *Error;
64static PyObject *Incomplete;
65
66/*
67** hqx lookup table, ascii->binary.
68*/
69
70#define RUNCHAR 0x90
71
72#define DONE 0x7F
73#define SKIP 0x7E
74#define FAIL 0x7D
75
76static unsigned char table_a2b_hqx[256] = {
77/* ^@ ^A ^B ^C ^D ^E ^F ^G */
Antoine Pitrouc83ea132010-05-09 14:46:46 +000078/* 0*/ FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
Jack Jansen72781191995-08-07 14:34:15 +000079/* \b \t \n ^K ^L \r ^N ^O */
Antoine Pitrouc83ea132010-05-09 14:46:46 +000080/* 1*/ FAIL, FAIL, SKIP, FAIL, FAIL, SKIP, FAIL, FAIL,
Jack Jansen72781191995-08-07 14:34:15 +000081/* ^P ^Q ^R ^S ^T ^U ^V ^W */
Antoine Pitrouc83ea132010-05-09 14:46:46 +000082/* 2*/ FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
Jack Jansen72781191995-08-07 14:34:15 +000083/* ^X ^Y ^Z ^[ ^\ ^] ^^ ^_ */
Antoine Pitrouc83ea132010-05-09 14:46:46 +000084/* 3*/ FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
Jack Jansen72781191995-08-07 14:34:15 +000085/* ! " # $ % & ' */
Antoine Pitrouc83ea132010-05-09 14:46:46 +000086/* 4*/ FAIL, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
Jack Jansen72781191995-08-07 14:34:15 +000087/* ( ) * + , - . / */
Antoine Pitrouc83ea132010-05-09 14:46:46 +000088/* 5*/ 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, FAIL, FAIL,
Jack Jansen72781191995-08-07 14:34:15 +000089/* 0 1 2 3 4 5 6 7 */
Antoine Pitrouc83ea132010-05-09 14:46:46 +000090/* 6*/ 0x0D, 0x0E, 0x0F, 0x10, 0x11, 0x12, 0x13, FAIL,
Jack Jansen72781191995-08-07 14:34:15 +000091/* 8 9 : ; < = > ? */
Antoine Pitrouc83ea132010-05-09 14:46:46 +000092/* 7*/ 0x14, 0x15, DONE, FAIL, FAIL, FAIL, FAIL, FAIL,
Jack Jansen72781191995-08-07 14:34:15 +000093/* @ A B C D E F G */
Antoine Pitrouc83ea132010-05-09 14:46:46 +000094/* 8*/ 0x16, 0x17, 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D,
Jack Jansen72781191995-08-07 14:34:15 +000095/* H I J K L M N O */
Antoine Pitrouc83ea132010-05-09 14:46:46 +000096/* 9*/ 0x1E, 0x1F, 0x20, 0x21, 0x22, 0x23, 0x24, FAIL,
Jack Jansen72781191995-08-07 14:34:15 +000097/* P Q R S T U V W */
Antoine Pitrouc83ea132010-05-09 14:46:46 +000098/*10*/ 0x25, 0x26, 0x27, 0x28, 0x29, 0x2A, 0x2B, FAIL,
Jack Jansen72781191995-08-07 14:34:15 +000099/* X Y Z [ \ ] ^ _ */
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000100/*11*/ 0x2C, 0x2D, 0x2E, 0x2F, FAIL, FAIL, FAIL, FAIL,
Jack Jansen72781191995-08-07 14:34:15 +0000101/* ` a b c d e f g */
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000102/*12*/ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, FAIL,
Jack Jansen72781191995-08-07 14:34:15 +0000103/* h i j k l m n o */
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000104/*13*/ 0x37, 0x38, 0x39, 0x3A, 0x3B, 0x3C, FAIL, FAIL,
Jack Jansen72781191995-08-07 14:34:15 +0000105/* p q r s t u v w */
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000106/*14*/ 0x3D, 0x3E, 0x3F, FAIL, FAIL, FAIL, FAIL, FAIL,
Jack Jansen72781191995-08-07 14:34:15 +0000107/* x y z { | } ~ ^? */
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000108/*15*/ FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
109/*16*/ FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
110 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
111 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
112 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
113 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
114 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
115 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
116 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
117 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
118 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
119 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
120 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
121 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
122 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
123 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
124 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
Jack Jansen72781191995-08-07 14:34:15 +0000125};
126
127static unsigned char table_b2a_hqx[] =
Roger E. Masse5f4ce181997-01-16 17:10:22 +0000128"!\"#$%&'()*+,-012345689@ABCDEFGHIJKLMNPQRSTUVXYZ[`abcdefhijklmpqr";
Jack Jansen72781191995-08-07 14:34:15 +0000129
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000130static char table_a2b_base64[] = {
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000131 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
132 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
133 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,62, -1,-1,-1,63,
134 52,53,54,55, 56,57,58,59, 60,61,-1,-1, -1, 0,-1,-1, /* Note PAD->0 */
135 -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10, 11,12,13,14,
136 15,16,17,18, 19,20,21,22, 23,24,25,-1, -1,-1,-1,-1,
137 -1,26,27,28, 29,30,31,32, 33,34,35,36, 37,38,39,40,
138 41,42,43,44, 45,46,47,48, 49,50,51,-1, -1,-1,-1,-1
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000139};
140
141#define BASE64_PAD '='
Guido van Rossum355bc0c2001-10-30 03:00:52 +0000142
143/* Max binary chunk size; limited only by available memory */
Gregory P. Smith9d534572008-06-11 07:41:16 +0000144#define BASE64_MAXBIN (PY_SSIZE_T_MAX/2 - sizeof(PyStringObject) - 3)
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000145
146static unsigned char table_b2a_base64[] =
Roger E. Masse5f4ce181997-01-16 17:10:22 +0000147"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000148
149
150
Jack Jansen72781191995-08-07 14:34:15 +0000151static unsigned short crctab_hqx[256] = {
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000152 0x0000, 0x1021, 0x2042, 0x3063, 0x4084, 0x50a5, 0x60c6, 0x70e7,
153 0x8108, 0x9129, 0xa14a, 0xb16b, 0xc18c, 0xd1ad, 0xe1ce, 0xf1ef,
154 0x1231, 0x0210, 0x3273, 0x2252, 0x52b5, 0x4294, 0x72f7, 0x62d6,
155 0x9339, 0x8318, 0xb37b, 0xa35a, 0xd3bd, 0xc39c, 0xf3ff, 0xe3de,
156 0x2462, 0x3443, 0x0420, 0x1401, 0x64e6, 0x74c7, 0x44a4, 0x5485,
157 0xa56a, 0xb54b, 0x8528, 0x9509, 0xe5ee, 0xf5cf, 0xc5ac, 0xd58d,
158 0x3653, 0x2672, 0x1611, 0x0630, 0x76d7, 0x66f6, 0x5695, 0x46b4,
159 0xb75b, 0xa77a, 0x9719, 0x8738, 0xf7df, 0xe7fe, 0xd79d, 0xc7bc,
160 0x48c4, 0x58e5, 0x6886, 0x78a7, 0x0840, 0x1861, 0x2802, 0x3823,
161 0xc9cc, 0xd9ed, 0xe98e, 0xf9af, 0x8948, 0x9969, 0xa90a, 0xb92b,
162 0x5af5, 0x4ad4, 0x7ab7, 0x6a96, 0x1a71, 0x0a50, 0x3a33, 0x2a12,
163 0xdbfd, 0xcbdc, 0xfbbf, 0xeb9e, 0x9b79, 0x8b58, 0xbb3b, 0xab1a,
164 0x6ca6, 0x7c87, 0x4ce4, 0x5cc5, 0x2c22, 0x3c03, 0x0c60, 0x1c41,
165 0xedae, 0xfd8f, 0xcdec, 0xddcd, 0xad2a, 0xbd0b, 0x8d68, 0x9d49,
166 0x7e97, 0x6eb6, 0x5ed5, 0x4ef4, 0x3e13, 0x2e32, 0x1e51, 0x0e70,
167 0xff9f, 0xefbe, 0xdfdd, 0xcffc, 0xbf1b, 0xaf3a, 0x9f59, 0x8f78,
168 0x9188, 0x81a9, 0xb1ca, 0xa1eb, 0xd10c, 0xc12d, 0xf14e, 0xe16f,
169 0x1080, 0x00a1, 0x30c2, 0x20e3, 0x5004, 0x4025, 0x7046, 0x6067,
170 0x83b9, 0x9398, 0xa3fb, 0xb3da, 0xc33d, 0xd31c, 0xe37f, 0xf35e,
171 0x02b1, 0x1290, 0x22f3, 0x32d2, 0x4235, 0x5214, 0x6277, 0x7256,
172 0xb5ea, 0xa5cb, 0x95a8, 0x8589, 0xf56e, 0xe54f, 0xd52c, 0xc50d,
173 0x34e2, 0x24c3, 0x14a0, 0x0481, 0x7466, 0x6447, 0x5424, 0x4405,
174 0xa7db, 0xb7fa, 0x8799, 0x97b8, 0xe75f, 0xf77e, 0xc71d, 0xd73c,
175 0x26d3, 0x36f2, 0x0691, 0x16b0, 0x6657, 0x7676, 0x4615, 0x5634,
176 0xd94c, 0xc96d, 0xf90e, 0xe92f, 0x99c8, 0x89e9, 0xb98a, 0xa9ab,
177 0x5844, 0x4865, 0x7806, 0x6827, 0x18c0, 0x08e1, 0x3882, 0x28a3,
178 0xcb7d, 0xdb5c, 0xeb3f, 0xfb1e, 0x8bf9, 0x9bd8, 0xabbb, 0xbb9a,
179 0x4a75, 0x5a54, 0x6a37, 0x7a16, 0x0af1, 0x1ad0, 0x2ab3, 0x3a92,
180 0xfd2e, 0xed0f, 0xdd6c, 0xcd4d, 0xbdaa, 0xad8b, 0x9de8, 0x8dc9,
181 0x7c26, 0x6c07, 0x5c64, 0x4c45, 0x3ca2, 0x2c83, 0x1ce0, 0x0cc1,
182 0xef1f, 0xff3e, 0xcf5d, 0xdf7c, 0xaf9b, 0xbfba, 0x8fd9, 0x9ff8,
183 0x6e17, 0x7e36, 0x4e55, 0x5e74, 0x2e93, 0x3eb2, 0x0ed1, 0x1ef0,
Jack Jansen72781191995-08-07 14:34:15 +0000184};
185
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000186PyDoc_STRVAR(doc_a2b_uu, "(ascii) -> bin. Decode a line of uuencoded data");
Jack Jansen72781191995-08-07 14:34:15 +0000187
188static PyObject *
Peter Schneider-Kampa788a7f2000-07-10 09:57:19 +0000189binascii_a2b_uu(PyObject *self, PyObject *args)
Jack Jansen72781191995-08-07 14:34:15 +0000190{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000191 Py_buffer pascii;
192 unsigned char *ascii_data, *bin_data;
193 int leftbits = 0;
194 unsigned char this_ch;
195 unsigned int leftchar = 0;
196 PyObject *rv;
197 Py_ssize_t ascii_len, bin_len;
Tim Peters934c1a12002-07-02 22:24:50 +0000198
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000199 if ( !PyArg_ParseTuple(args, "s*:a2b_uu", &pascii) )
200 return NULL;
201 ascii_data = pascii.buf;
202 ascii_len = pascii.len;
Jack Jansen72781191995-08-07 14:34:15 +0000203
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000204 assert(ascii_len >= 0);
Gregory P. Smith9d534572008-06-11 07:41:16 +0000205
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000206 /* First byte: binary data length (in bytes) */
207 bin_len = (*ascii_data++ - ' ') & 077;
208 ascii_len--;
Jack Jansen72781191995-08-07 14:34:15 +0000209
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000210 /* Allocate the buffer */
211 if ( (rv=PyString_FromStringAndSize(NULL, bin_len)) == NULL ) {
212 PyBuffer_Release(&pascii);
213 return NULL;
214 }
215 bin_data = (unsigned char *)PyString_AS_STRING(rv);
Tim Peters934c1a12002-07-02 22:24:50 +0000216
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000217 for( ; bin_len > 0 ; ascii_len--, ascii_data++ ) {
218 /* XXX is it really best to add NULs if there's no more data */
219 this_ch = (ascii_len > 0) ? *ascii_data : 0;
220 if ( this_ch == '\n' || this_ch == '\r' || ascii_len <= 0) {
221 /*
222 ** Whitespace. Assume some spaces got eaten at
223 ** end-of-line. (We check this later)
224 */
225 this_ch = 0;
226 } else {
227 /* Check the character for legality
228 ** The 64 in stead of the expected 63 is because
229 ** there are a few uuencodes out there that use
230 ** '`' as zero instead of space.
231 */
232 if ( this_ch < ' ' || this_ch > (' ' + 64)) {
233 PyErr_SetString(Error, "Illegal char");
234 PyBuffer_Release(&pascii);
235 Py_DECREF(rv);
236 return NULL;
237 }
238 this_ch = (this_ch - ' ') & 077;
239 }
240 /*
241 ** Shift it in on the low end, and see if there's
242 ** a byte ready for output.
243 */
244 leftchar = (leftchar << 6) | (this_ch);
245 leftbits += 6;
246 if ( leftbits >= 8 ) {
247 leftbits -= 8;
248 *bin_data++ = (leftchar >> leftbits) & 0xff;
249 leftchar &= ((1 << leftbits) - 1);
250 bin_len--;
251 }
252 }
253 /*
254 ** Finally, check that if there's anything left on the line
255 ** that it's whitespace only.
256 */
257 while( ascii_len-- > 0 ) {
258 this_ch = *ascii_data++;
259 /* Extra '`' may be written as padding in some cases */
260 if ( this_ch != ' ' && this_ch != ' '+64 &&
261 this_ch != '\n' && this_ch != '\r' ) {
262 PyErr_SetString(Error, "Trailing garbage");
263 PyBuffer_Release(&pascii);
264 Py_DECREF(rv);
265 return NULL;
266 }
267 }
268 PyBuffer_Release(&pascii);
269 return rv;
Jack Jansen72781191995-08-07 14:34:15 +0000270}
271
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000272PyDoc_STRVAR(doc_b2a_uu, "(bin) -> ascii. Uuencode line of data");
Tim Peters934c1a12002-07-02 22:24:50 +0000273
Jack Jansen72781191995-08-07 14:34:15 +0000274static PyObject *
Peter Schneider-Kampa788a7f2000-07-10 09:57:19 +0000275binascii_b2a_uu(PyObject *self, PyObject *args)
Jack Jansen72781191995-08-07 14:34:15 +0000276{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000277 Py_buffer pbin;
278 unsigned char *ascii_data, *bin_data;
279 int leftbits = 0;
280 unsigned char this_ch;
281 unsigned int leftchar = 0;
282 PyObject *rv;
283 Py_ssize_t bin_len;
Tim Peters934c1a12002-07-02 22:24:50 +0000284
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000285 if ( !PyArg_ParseTuple(args, "s*:b2a_uu", &pbin) )
286 return NULL;
287 bin_data = pbin.buf;
288 bin_len = pbin.len;
289 if ( bin_len > 45 ) {
290 /* The 45 is a limit that appears in all uuencode's */
291 PyErr_SetString(Error, "At most 45 bytes at once");
292 PyBuffer_Release(&pbin);
293 return NULL;
294 }
Jack Jansen72781191995-08-07 14:34:15 +0000295
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000296 /* We're lazy and allocate to much (fixed up later) */
297 if ( (rv=PyString_FromStringAndSize(NULL, 2 + (bin_len+2)/3*4)) == NULL ) {
298 PyBuffer_Release(&pbin);
299 return NULL;
300 }
301 ascii_data = (unsigned char *)PyString_AS_STRING(rv);
Jack Jansen72781191995-08-07 14:34:15 +0000302
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000303 /* Store the length */
304 *ascii_data++ = ' ' + (bin_len & 077);
Tim Peters934c1a12002-07-02 22:24:50 +0000305
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000306 for( ; bin_len > 0 || leftbits != 0 ; bin_len--, bin_data++ ) {
307 /* Shift the data (or padding) into our buffer */
308 if ( bin_len > 0 ) /* Data */
309 leftchar = (leftchar << 8) | *bin_data;
310 else /* Padding */
311 leftchar <<= 8;
312 leftbits += 8;
Jack Jansen72781191995-08-07 14:34:15 +0000313
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000314 /* See if there are 6-bit groups ready */
315 while ( leftbits >= 6 ) {
316 this_ch = (leftchar >> (leftbits-6)) & 0x3f;
317 leftbits -= 6;
318 *ascii_data++ = this_ch + ' ';
319 }
320 }
321 *ascii_data++ = '\n'; /* Append a courtesy newline */
Tim Peters934c1a12002-07-02 22:24:50 +0000322
Kristján Valur Jónssonbe580f22014-04-25 09:51:21 +0000323 /* rv is cleared on error */
324 (void)_PyString_Resize(&rv,
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000325 (ascii_data -
Kristján Valur Jónssonbe580f22014-04-25 09:51:21 +0000326 (unsigned char *)PyString_AS_STRING(rv)));
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000327 PyBuffer_Release(&pbin);
328 return rv;
Jack Jansen72781191995-08-07 14:34:15 +0000329}
330
Guido van Rossum2db4f471999-10-19 19:05:14 +0000331
332static int
Thomas Woutersf98db652006-03-01 21:37:32 +0000333binascii_find_valid(unsigned char *s, Py_ssize_t slen, int num)
Guido van Rossum2db4f471999-10-19 19:05:14 +0000334{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000335 /* Finds & returns the (num+1)th
336 ** valid character for base64, or -1 if none.
337 */
Guido van Rossum2db4f471999-10-19 19:05:14 +0000338
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000339 int ret = -1;
340 unsigned char c, b64val;
Guido van Rossum2db4f471999-10-19 19:05:14 +0000341
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000342 while ((slen > 0) && (ret == -1)) {
343 c = *s;
344 b64val = table_a2b_base64[c & 0x7f];
345 if ( ((c <= 0x7f) && (b64val != (unsigned char)-1)) ) {
346 if (num == 0)
347 ret = *s;
348 num--;
349 }
Guido van Rossum2db4f471999-10-19 19:05:14 +0000350
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000351 s++;
352 slen--;
353 }
354 return ret;
Guido van Rossum2db4f471999-10-19 19:05:14 +0000355}
356
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000357PyDoc_STRVAR(doc_a2b_base64, "(ascii) -> bin. Decode a line of base64 data");
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000358
359static PyObject *
Peter Schneider-Kampa788a7f2000-07-10 09:57:19 +0000360binascii_a2b_base64(PyObject *self, PyObject *args)
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000361{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000362 Py_buffer pascii;
363 unsigned char *ascii_data, *bin_data;
364 int leftbits = 0;
365 unsigned char this_ch;
366 unsigned int leftchar = 0;
367 PyObject *rv;
368 Py_ssize_t ascii_len, bin_len;
369 int quad_pos = 0;
Tim Peters934c1a12002-07-02 22:24:50 +0000370
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000371 if ( !PyArg_ParseTuple(args, "s*:a2b_base64", &pascii) )
372 return NULL;
373 ascii_data = pascii.buf;
374 ascii_len = pascii.len;
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000375
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000376 assert(ascii_len >= 0);
Gregory P. Smith9d534572008-06-11 07:41:16 +0000377
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000378 if (ascii_len > PY_SSIZE_T_MAX - 3) {
379 PyBuffer_Release(&pascii);
380 return PyErr_NoMemory();
381 }
Gregory P. Smith9d534572008-06-11 07:41:16 +0000382
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000383 bin_len = ((ascii_len+3)/4)*3; /* Upper bound, corrected later */
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000384
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000385 /* Allocate the buffer */
386 if ( (rv=PyString_FromStringAndSize(NULL, bin_len)) == NULL ) {
387 PyBuffer_Release(&pascii);
388 return NULL;
389 }
390 bin_data = (unsigned char *)PyString_AS_STRING(rv);
391 bin_len = 0;
Guido van Rossum2db4f471999-10-19 19:05:14 +0000392
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000393 for( ; ascii_len > 0; ascii_len--, ascii_data++) {
394 this_ch = *ascii_data;
Guido van Rossum2db4f471999-10-19 19:05:14 +0000395
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000396 if (this_ch > 0x7f ||
397 this_ch == '\r' || this_ch == '\n' || this_ch == ' ')
398 continue;
Guido van Rossum2db4f471999-10-19 19:05:14 +0000399
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000400 /* Check for pad sequences and ignore
401 ** the invalid ones.
402 */
403 if (this_ch == BASE64_PAD) {
404 if ( (quad_pos < 2) ||
405 ((quad_pos == 2) &&
406 (binascii_find_valid(ascii_data, ascii_len, 1)
407 != BASE64_PAD)) )
408 {
409 continue;
410 }
411 else {
412 /* A pad sequence means no more input.
413 ** We've already interpreted the data
414 ** from the quad at this point.
415 */
416 leftbits = 0;
417 break;
418 }
419 }
Guido van Rossum2db4f471999-10-19 19:05:14 +0000420
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000421 this_ch = table_a2b_base64[*ascii_data];
422 if ( this_ch == (unsigned char) -1 )
423 continue;
Guido van Rossum2db4f471999-10-19 19:05:14 +0000424
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000425 /*
426 ** Shift it in on the low end, and see if there's
427 ** a byte ready for output.
428 */
429 quad_pos = (quad_pos + 1) & 0x03;
430 leftchar = (leftchar << 6) | (this_ch);
431 leftbits += 6;
Guido van Rossum2db4f471999-10-19 19:05:14 +0000432
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000433 if ( leftbits >= 8 ) {
434 leftbits -= 8;
435 *bin_data++ = (leftchar >> leftbits) & 0xff;
436 bin_len++;
437 leftchar &= ((1 << leftbits) - 1);
438 }
439 }
Guido van Rossum2db4f471999-10-19 19:05:14 +0000440
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000441 if (leftbits != 0) {
442 PyBuffer_Release(&pascii);
443 PyErr_SetString(Error, "Incorrect padding");
444 Py_DECREF(rv);
445 return NULL;
446 }
Guido van Rossum2db4f471999-10-19 19:05:14 +0000447
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000448 /* And set string size correctly. If the result string is empty
449 ** (because the input was all invalid) return the shared empty
450 ** string instead; _PyString_Resize() won't do this for us.
451 */
452 if (bin_len > 0) {
Kristján Valur Jónssonbe580f22014-04-25 09:51:21 +0000453 /* rv is cleared on error */
454 (void)_PyString_Resize(&rv, bin_len);
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000455 }
456 else {
457 Py_DECREF(rv);
458 rv = PyString_FromStringAndSize("", 0);
459 }
460 PyBuffer_Release(&pascii);
461 return rv;
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000462}
463
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000464PyDoc_STRVAR(doc_b2a_base64, "(bin) -> ascii. Base64-code line of data");
Tim Peters934c1a12002-07-02 22:24:50 +0000465
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000466static PyObject *
Peter Schneider-Kampa788a7f2000-07-10 09:57:19 +0000467binascii_b2a_base64(PyObject *self, PyObject *args)
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000468{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000469 Py_buffer pbuf;
470 unsigned char *ascii_data, *bin_data;
471 int leftbits = 0;
472 unsigned char this_ch;
473 unsigned int leftchar = 0;
474 PyObject *rv;
475 Py_ssize_t bin_len;
Tim Peters934c1a12002-07-02 22:24:50 +0000476
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000477 if ( !PyArg_ParseTuple(args, "s*:b2a_base64", &pbuf) )
478 return NULL;
479 bin_data = pbuf.buf;
480 bin_len = pbuf.len;
Gregory P. Smith9d534572008-06-11 07:41:16 +0000481
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000482 assert(bin_len >= 0);
Gregory P. Smith9d534572008-06-11 07:41:16 +0000483
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000484 if ( bin_len > BASE64_MAXBIN ) {
485 PyErr_SetString(Error, "Too much data for base64 line");
486 PyBuffer_Release(&pbuf);
487 return NULL;
488 }
Tim Peters934c1a12002-07-02 22:24:50 +0000489
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000490 /* We're lazy and allocate too much (fixed up later).
491 "+3" leaves room for up to two pad characters and a trailing
492 newline. Note that 'b' gets encoded as 'Yg==\n' (1 in, 5 out). */
493 if ( (rv=PyString_FromStringAndSize(NULL, bin_len*2 + 3)) == NULL ) {
494 PyBuffer_Release(&pbuf);
495 return NULL;
496 }
497 ascii_data = (unsigned char *)PyString_AS_STRING(rv);
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000498
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000499 for( ; bin_len > 0 ; bin_len--, bin_data++ ) {
500 /* Shift the data into our buffer */
501 leftchar = (leftchar << 8) | *bin_data;
502 leftbits += 8;
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000503
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000504 /* See if there are 6-bit groups ready */
505 while ( leftbits >= 6 ) {
506 this_ch = (leftchar >> (leftbits-6)) & 0x3f;
507 leftbits -= 6;
508 *ascii_data++ = table_b2a_base64[this_ch];
509 }
510 }
511 if ( leftbits == 2 ) {
512 *ascii_data++ = table_b2a_base64[(leftchar&3) << 4];
513 *ascii_data++ = BASE64_PAD;
514 *ascii_data++ = BASE64_PAD;
515 } else if ( leftbits == 4 ) {
516 *ascii_data++ = table_b2a_base64[(leftchar&0xf) << 2];
517 *ascii_data++ = BASE64_PAD;
518 }
519 *ascii_data++ = '\n'; /* Append a courtesy newline */
Tim Peters934c1a12002-07-02 22:24:50 +0000520
Kristján Valur Jónssonbe580f22014-04-25 09:51:21 +0000521 /* rv is cleared on error */
522 (void)_PyString_Resize(&rv,
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000523 (ascii_data -
Kristján Valur Jónssonbe580f22014-04-25 09:51:21 +0000524 (unsigned char *)PyString_AS_STRING(rv)));
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000525 PyBuffer_Release(&pbuf);
526 return rv;
Jack Jansen84bbc2e1995-10-04 16:38:44 +0000527}
528
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000529PyDoc_STRVAR(doc_a2b_hqx, "ascii -> bin, done. Decode .hqx coding");
Jack Jansen72781191995-08-07 14:34:15 +0000530
531static PyObject *
Peter Schneider-Kampa788a7f2000-07-10 09:57:19 +0000532binascii_a2b_hqx(PyObject *self, PyObject *args)
Jack Jansen72781191995-08-07 14:34:15 +0000533{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000534 Py_buffer pascii;
535 unsigned char *ascii_data, *bin_data;
536 int leftbits = 0;
537 unsigned char this_ch;
538 unsigned int leftchar = 0;
539 PyObject *rv;
540 Py_ssize_t len;
541 int done = 0;
Tim Peters934c1a12002-07-02 22:24:50 +0000542
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000543 if ( !PyArg_ParseTuple(args, "s*:a2b_hqx", &pascii) )
544 return NULL;
545 ascii_data = pascii.buf;
546 len = pascii.len;
Jack Jansen72781191995-08-07 14:34:15 +0000547
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000548 assert(len >= 0);
Gregory P. Smith9d534572008-06-11 07:41:16 +0000549
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000550 if (len > PY_SSIZE_T_MAX - 2) {
551 PyBuffer_Release(&pascii);
552 return PyErr_NoMemory();
553 }
Gregory P. Smith9d534572008-06-11 07:41:16 +0000554
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000555 /* Allocate a string that is too big (fixed later)
556 Add two to the initial length to prevent interning which
557 would preclude subsequent resizing. */
558 if ( (rv=PyString_FromStringAndSize(NULL, len+2)) == NULL ) {
559 PyBuffer_Release(&pascii);
560 return NULL;
561 }
562 bin_data = (unsigned char *)PyString_AS_STRING(rv);
Jack Jansen72781191995-08-07 14:34:15 +0000563
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000564 for( ; len > 0 ; len--, ascii_data++ ) {
565 /* Get the byte and look it up */
566 this_ch = table_a2b_hqx[*ascii_data];
567 if ( this_ch == SKIP )
568 continue;
569 if ( this_ch == FAIL ) {
570 PyErr_SetString(Error, "Illegal char");
571 PyBuffer_Release(&pascii);
572 Py_DECREF(rv);
573 return NULL;
574 }
575 if ( this_ch == DONE ) {
576 /* The terminating colon */
577 done = 1;
578 break;
579 }
Jack Jansen72781191995-08-07 14:34:15 +0000580
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000581 /* Shift it into the buffer and see if any bytes are ready */
582 leftchar = (leftchar << 6) | (this_ch);
583 leftbits += 6;
584 if ( leftbits >= 8 ) {
585 leftbits -= 8;
586 *bin_data++ = (leftchar >> leftbits) & 0xff;
587 leftchar &= ((1 << leftbits) - 1);
588 }
589 }
Tim Peters934c1a12002-07-02 22:24:50 +0000590
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000591 if ( leftbits && !done ) {
592 PyErr_SetString(Incomplete,
593 "String has incomplete number of bytes");
594 PyBuffer_Release(&pascii);
595 Py_DECREF(rv);
596 return NULL;
597 }
Kristján Valur Jónssonbe580f22014-04-25 09:51:21 +0000598 /* rv is cleared on error */
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000599 if (_PyString_Resize(&rv,
600 (bin_data -
Kristján Valur Jónssonbe580f22014-04-25 09:51:21 +0000601 (unsigned char *)PyString_AS_STRING(rv))) == 0) {
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000602 PyObject *rrv = Py_BuildValue("Oi", rv, done);
603 PyBuffer_Release(&pascii);
604 Py_DECREF(rv);
605 return rrv;
606 }
Roger E. Masse5f4ce181997-01-16 17:10:22 +0000607
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000608 PyBuffer_Release(&pascii);
609 return NULL;
Jack Jansen72781191995-08-07 14:34:15 +0000610}
611
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000612PyDoc_STRVAR(doc_rlecode_hqx, "Binhex RLE-code binary data");
Jack Jansen72781191995-08-07 14:34:15 +0000613
614static PyObject *
Peter Schneider-Kampa788a7f2000-07-10 09:57:19 +0000615binascii_rlecode_hqx(PyObject *self, PyObject *args)
Jack Jansen72781191995-08-07 14:34:15 +0000616{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000617 Py_buffer pbuf;
618 unsigned char *in_data, *out_data;
619 PyObject *rv;
620 unsigned char ch;
621 Py_ssize_t in, inend, len;
Tim Peters934c1a12002-07-02 22:24:50 +0000622
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000623 if ( !PyArg_ParseTuple(args, "s*:rlecode_hqx", &pbuf) )
624 return NULL;
625 in_data = pbuf.buf;
626 len = pbuf.len;
Jack Jansen72781191995-08-07 14:34:15 +0000627
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000628 assert(len >= 0);
Gregory P. Smith9d534572008-06-11 07:41:16 +0000629
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000630 if (len > PY_SSIZE_T_MAX / 2 - 2) {
631 PyBuffer_Release(&pbuf);
632 return PyErr_NoMemory();
633 }
Gregory P. Smith9d534572008-06-11 07:41:16 +0000634
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000635 /* Worst case: output is twice as big as input (fixed later) */
636 if ( (rv=PyString_FromStringAndSize(NULL, len*2+2)) == NULL ) {
637 PyBuffer_Release(&pbuf);
638 return NULL;
639 }
640 out_data = (unsigned char *)PyString_AS_STRING(rv);
Tim Peters934c1a12002-07-02 22:24:50 +0000641
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000642 for( in=0; in<len; in++) {
643 ch = in_data[in];
644 if ( ch == RUNCHAR ) {
645 /* RUNCHAR. Escape it. */
646 *out_data++ = RUNCHAR;
647 *out_data++ = 0;
648 } else {
649 /* Check how many following are the same */
650 for(inend=in+1;
651 inend<len && in_data[inend] == ch &&
652 inend < in+255;
653 inend++) ;
654 if ( inend - in > 3 ) {
655 /* More than 3 in a row. Output RLE. */
656 *out_data++ = ch;
657 *out_data++ = RUNCHAR;
658 *out_data++ = inend-in;
659 in = inend-1;
660 } else {
661 /* Less than 3. Output the byte itself */
662 *out_data++ = ch;
663 }
664 }
665 }
Kristján Valur Jónssonbe580f22014-04-25 09:51:21 +0000666 /* rv is cleared on error */
667 (void)_PyString_Resize(&rv,
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000668 (out_data -
Kristján Valur Jónssonbe580f22014-04-25 09:51:21 +0000669 (unsigned char *)PyString_AS_STRING(rv)));
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000670 PyBuffer_Release(&pbuf);
671 return rv;
Jack Jansen72781191995-08-07 14:34:15 +0000672}
673
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000674PyDoc_STRVAR(doc_b2a_hqx, "Encode .hqx data");
Tim Peters934c1a12002-07-02 22:24:50 +0000675
Jack Jansen72781191995-08-07 14:34:15 +0000676static PyObject *
Peter Schneider-Kampa788a7f2000-07-10 09:57:19 +0000677binascii_b2a_hqx(PyObject *self, PyObject *args)
Jack Jansen72781191995-08-07 14:34:15 +0000678{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000679 Py_buffer pbin;
680 unsigned char *ascii_data, *bin_data;
681 int leftbits = 0;
682 unsigned char this_ch;
683 unsigned int leftchar = 0;
684 PyObject *rv;
685 Py_ssize_t len;
Tim Peters934c1a12002-07-02 22:24:50 +0000686
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000687 if ( !PyArg_ParseTuple(args, "s*:b2a_hqx", &pbin) )
688 return NULL;
689 bin_data = pbin.buf;
690 len = pbin.len;
Jack Jansen72781191995-08-07 14:34:15 +0000691
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000692 assert(len >= 0);
Gregory P. Smith9d534572008-06-11 07:41:16 +0000693
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000694 if (len > PY_SSIZE_T_MAX / 2 - 2) {
695 PyBuffer_Release(&pbin);
696 return PyErr_NoMemory();
697 }
Gregory P. Smith9d534572008-06-11 07:41:16 +0000698
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000699 /* Allocate a buffer that is at least large enough */
700 if ( (rv=PyString_FromStringAndSize(NULL, len*2+2)) == NULL ) {
701 PyBuffer_Release(&pbin);
702 return NULL;
703 }
704 ascii_data = (unsigned char *)PyString_AS_STRING(rv);
Tim Peters934c1a12002-07-02 22:24:50 +0000705
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000706 for( ; len > 0 ; len--, bin_data++ ) {
707 /* Shift into our buffer, and output any 6bits ready */
708 leftchar = (leftchar << 8) | *bin_data;
709 leftbits += 8;
710 while ( leftbits >= 6 ) {
711 this_ch = (leftchar >> (leftbits-6)) & 0x3f;
712 leftbits -= 6;
713 *ascii_data++ = table_b2a_hqx[this_ch];
714 }
715 }
716 /* Output a possible runt byte */
717 if ( leftbits ) {
718 leftchar <<= (6-leftbits);
719 *ascii_data++ = table_b2a_hqx[leftchar & 0x3f];
720 }
Kristján Valur Jónssonbe580f22014-04-25 09:51:21 +0000721 /* rv is cleared on error */
722 (void)_PyString_Resize(&rv,
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000723 (ascii_data -
Kristján Valur Jónssonbe580f22014-04-25 09:51:21 +0000724 (unsigned char *)PyString_AS_STRING(rv)));
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000725 PyBuffer_Release(&pbin);
726 return rv;
Jack Jansen72781191995-08-07 14:34:15 +0000727}
728
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000729PyDoc_STRVAR(doc_rledecode_hqx, "Decode hexbin RLE-coded string");
Tim Peters934c1a12002-07-02 22:24:50 +0000730
Jack Jansen72781191995-08-07 14:34:15 +0000731static PyObject *
Peter Schneider-Kampa788a7f2000-07-10 09:57:19 +0000732binascii_rledecode_hqx(PyObject *self, PyObject *args)
Jack Jansen72781191995-08-07 14:34:15 +0000733{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000734 Py_buffer pin;
735 unsigned char *in_data, *out_data;
736 unsigned char in_byte, in_repeat;
737 PyObject *rv;
738 Py_ssize_t in_len, out_len, out_len_left;
Jack Jansen72781191995-08-07 14:34:15 +0000739
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000740 if ( !PyArg_ParseTuple(args, "s*:rledecode_hqx", &pin) )
741 return NULL;
742 in_data = pin.buf;
743 in_len = pin.len;
Jack Jansen72781191995-08-07 14:34:15 +0000744
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000745 assert(in_len >= 0);
Gregory P. Smith9d534572008-06-11 07:41:16 +0000746
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000747 /* Empty string is a special case */
748 if ( in_len == 0 ) {
749 PyBuffer_Release(&pin);
750 return PyString_FromStringAndSize("", 0);
751 }
752 else if (in_len > PY_SSIZE_T_MAX / 2) {
753 PyBuffer_Release(&pin);
754 return PyErr_NoMemory();
755 }
Jack Jansen72781191995-08-07 14:34:15 +0000756
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000757 /* Allocate a buffer of reasonable size. Resized when needed */
758 out_len = in_len*2;
759 if ( (rv=PyString_FromStringAndSize(NULL, out_len)) == NULL ) {
760 PyBuffer_Release(&pin);
761 return NULL;
762 }
763 out_len_left = out_len;
764 out_data = (unsigned char *)PyString_AS_STRING(rv);
Jack Jansen72781191995-08-07 14:34:15 +0000765
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000766 /*
767 ** We need two macros here to get/put bytes and handle
768 ** end-of-buffer for input and output strings.
769 */
Jack Jansen72781191995-08-07 14:34:15 +0000770#define INBYTE(b) \
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000771 do { \
772 if ( --in_len < 0 ) { \
773 PyErr_SetString(Incomplete, ""); \
774 Py_DECREF(rv); \
775 PyBuffer_Release(&pin); \
776 return NULL; \
777 } \
778 b = *in_data++; \
779 } while(0)
Tim Peters934c1a12002-07-02 22:24:50 +0000780
Jack Jansen72781191995-08-07 14:34:15 +0000781#define OUTBYTE(b) \
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000782 do { \
783 if ( --out_len_left < 0 ) { \
784 if ( out_len > PY_SSIZE_T_MAX / 2) return PyErr_NoMemory(); \
785 if (_PyString_Resize(&rv, 2*out_len) < 0) \
Kristján Valur Jónssonbe580f22014-04-25 09:51:21 +0000786 { PyBuffer_Release(&pin); return NULL; } \
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000787 out_data = (unsigned char *)PyString_AS_STRING(rv) \
788 + out_len; \
789 out_len_left = out_len-1; \
790 out_len = out_len * 2; \
791 } \
792 *out_data++ = b; \
793 } while(0)
Jack Jansen72781191995-08-07 14:34:15 +0000794
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000795 /*
796 ** Handle first byte separately (since we have to get angry
797 ** in case of an orphaned RLE code).
798 */
799 INBYTE(in_byte);
Jack Jansen72781191995-08-07 14:34:15 +0000800
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000801 if (in_byte == RUNCHAR) {
802 INBYTE(in_repeat);
803 if (in_repeat != 0) {
804 /* Note Error, not Incomplete (which is at the end
805 ** of the string only). This is a programmer error.
806 */
807 PyErr_SetString(Error, "Orphaned RLE code at start");
808 PyBuffer_Release(&pin);
809 Py_DECREF(rv);
810 return NULL;
811 }
812 OUTBYTE(RUNCHAR);
813 } else {
814 OUTBYTE(in_byte);
815 }
Tim Peters934c1a12002-07-02 22:24:50 +0000816
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000817 while( in_len > 0 ) {
818 INBYTE(in_byte);
Jack Jansen72781191995-08-07 14:34:15 +0000819
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000820 if (in_byte == RUNCHAR) {
821 INBYTE(in_repeat);
822 if ( in_repeat == 0 ) {
823 /* Just an escaped RUNCHAR value */
824 OUTBYTE(RUNCHAR);
825 } else {
826 /* Pick up value and output a sequence of it */
827 in_byte = out_data[-1];
828 while ( --in_repeat > 0 )
829 OUTBYTE(in_byte);
830 }
831 } else {
832 /* Normal byte */
833 OUTBYTE(in_byte);
834 }
835 }
Kristján Valur Jónssonbe580f22014-04-25 09:51:21 +0000836 /* rv is cleared on error */
837 (void)_PyString_Resize(&rv,
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000838 (out_data -
Kristján Valur Jónssonbe580f22014-04-25 09:51:21 +0000839 (unsigned char *)PyString_AS_STRING(rv)));
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000840 PyBuffer_Release(&pin);
841 return rv;
Jack Jansen72781191995-08-07 14:34:15 +0000842}
843
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000844PyDoc_STRVAR(doc_crc_hqx,
Martin Panter3698bd22016-12-24 07:53:57 +0000845"(data, oldcrc) -> newcrc. Compute CRC-CCITT incrementally");
Jack Jansen72781191995-08-07 14:34:15 +0000846
847static PyObject *
Peter Schneider-Kampa788a7f2000-07-10 09:57:19 +0000848binascii_crc_hqx(PyObject *self, PyObject *args)
Jack Jansen72781191995-08-07 14:34:15 +0000849{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000850 Py_buffer pin;
851 unsigned char *bin_data;
852 unsigned int crc;
853 Py_ssize_t len;
Tim Peters934c1a12002-07-02 22:24:50 +0000854
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000855 if ( !PyArg_ParseTuple(args, "s*i:crc_hqx", &pin, &crc) )
856 return NULL;
857 bin_data = pin.buf;
858 len = pin.len;
Jack Jansen72781191995-08-07 14:34:15 +0000859
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000860 while(len-- > 0) {
861 crc=((crc<<8)&0xff00)^crctab_hqx[((crc>>8)&0xff)^*bin_data++];
862 }
Jack Jansen72781191995-08-07 14:34:15 +0000863
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000864 PyBuffer_Release(&pin);
865 return Py_BuildValue("i", crc);
Jack Jansen72781191995-08-07 14:34:15 +0000866}
867
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000868PyDoc_STRVAR(doc_crc32,
869"(data, oldcrc = 0) -> newcrc. Compute CRC-32 incrementally");
Guido van Rossum7d47c9e2000-02-16 21:11:52 +0000870
Gregory P. Smith440ca772008-03-24 00:08:01 +0000871#ifdef USE_ZLIB_CRC32
872/* This was taken from zlibmodule.c PyZlib_crc32 (but is PY_SSIZE_T_CLEAN) */
873static PyObject *
874binascii_crc32(PyObject *self, PyObject *args)
875{
Gregory P. Smith1fa588e2008-03-25 07:31:28 +0000876 unsigned int crc32val = 0; /* crc32(0L, Z_NULL, 0) */
Antoine Pitroufd3a60d2010-01-14 16:27:09 +0000877 Py_buffer pbuf;
Gregory P. Smith440ca772008-03-24 00:08:01 +0000878 Byte *buf;
879 Py_ssize_t len;
880 int signed_val;
881
Antoine Pitroufd3a60d2010-01-14 16:27:09 +0000882 if (!PyArg_ParseTuple(args, "s*|I:crc32", &pbuf, &crc32val))
Martin Panterca56dd42016-09-17 07:54:55 +0000883 return NULL;
Gregory P. Smith440ca772008-03-24 00:08:01 +0000884 /* In Python 2.x we return a signed integer regardless of native platform
885 * long size (the 32bit unsigned long is treated as 32-bit signed and sign
886 * extended into a 64-bit long inside the integer object). 3.0 does the
887 * right thing and returns unsigned. http://bugs.python.org/issue1202 */
Antoine Pitroufd3a60d2010-01-14 16:27:09 +0000888 buf = (Byte*)pbuf.buf;
889 len = pbuf.len;
Gregory P. Smith440ca772008-03-24 00:08:01 +0000890 signed_val = crc32(crc32val, buf, len);
Antoine Pitroufd3a60d2010-01-14 16:27:09 +0000891 PyBuffer_Release(&pbuf);
Gregory P. Smith440ca772008-03-24 00:08:01 +0000892 return PyInt_FromLong(signed_val);
893}
894#else /* USE_ZLIB_CRC32 */
Guido van Rossum7d47c9e2000-02-16 21:11:52 +0000895/* Crc - 32 BIT ANSI X3.66 CRC checksum files
896 Also known as: ISO 3307
897**********************************************************************|
898* *|
899* Demonstration program to compute the 32-bit CRC used as the frame *|
900* check sequence in ADCCP (ANSI X3.66, also known as FIPS PUB 71 *|
901* and FED-STD-1003, the U.S. versions of CCITT's X.25 link-level *|
902* protocol). The 32-bit FCS was added via the Federal Register, *|
903* 1 June 1982, p.23798. I presume but don't know for certain that *|
904* this polynomial is or will be included in CCITT V.41, which *|
905* defines the 16-bit CRC (often called CRC-CCITT) polynomial. FIPS *|
906* PUB 78 says that the 32-bit FCS reduces otherwise undetected *|
907* errors by a factor of 10^-5 over 16-bit FCS. *|
908* *|
909**********************************************************************|
910
911 Copyright (C) 1986 Gary S. Brown. You may use this program, or
912 code or tables extracted from it, as desired without restriction.
Guido van Rossum7d47c9e2000-02-16 21:11:52 +0000913
Tim Peters934c1a12002-07-02 22:24:50 +0000914 First, the polynomial itself and its table of feedback terms. The
915 polynomial is
916 X^32+X^26+X^23+X^22+X^16+X^12+X^11+X^10+X^8+X^7+X^5+X^4+X^2+X^1+X^0
917 Note that we take it "backwards" and put the highest-order term in
918 the lowest-order bit. The X^32 term is "implied"; the LSB is the
919 X^31 term, etc. The X^0 term (usually shown as "+1") results in
920 the MSB being 1.
Guido van Rossum7d47c9e2000-02-16 21:11:52 +0000921
Tim Peters934c1a12002-07-02 22:24:50 +0000922 Note that the usual hardware shift register implementation, which
923 is what we're using (we're merely optimizing it by doing eight-bit
924 chunks at a time) shifts bits into the lowest-order term. In our
925 implementation, that means shifting towards the right. Why do we
926 do it this way? Because the calculated CRC must be transmitted in
927 order from highest-order term to lowest-order term. UARTs transmit
928 characters in order from LSB to MSB. By storing the CRC this way,
929 we hand it to the UART in the order low-byte to high-byte; the UART
930 sends each low-bit to hight-bit; and the result is transmission bit
931 by bit from highest- to lowest-order term without requiring any bit
932 shuffling on our part. Reception works similarly.
933
934 The feedback terms table consists of 256, 32-bit entries. Notes:
935
936 1. The table can be generated at runtime if desired; code to do so
937 is shown later. It might not be obvious, but the feedback
938 terms simply represent the results of eight shift/xor opera-
939 tions for all combinations of data and CRC register values.
940
941 2. The CRC accumulation logic is the same for all CRC polynomials,
942 be they sixteen or thirty-two bits wide. You simply choose the
943 appropriate table. Alternatively, because the table can be
944 generated at runtime, you can start by generating the table for
945 the polynomial in question and use exactly the same "updcrc",
946 if your application needn't simultaneously handle two CRC
947 polynomials. (Note, however, that XMODEM is strange.)
948
949 3. For 16-bit CRCs, the table entries need be only 16 bits wide;
950 of course, 32-bit entries work OK if the high 16 bits are zero.
951
952 4. The values must be right-shifted by eight bits by the "updcrc"
953 logic; the shift must be unsigned (bring in zeroes). On some
954 hardware you could probably optimize the shift in assembler by
955 using byte-swap instructions.
Guido van Rossum7d47c9e2000-02-16 21:11:52 +0000956********************************************************************/
957
Gregory P. Smithaef3e522008-03-25 07:46:07 +0000958static unsigned int crc_32_tab[256] = {
9590x00000000U, 0x77073096U, 0xee0e612cU, 0x990951baU, 0x076dc419U,
9600x706af48fU, 0xe963a535U, 0x9e6495a3U, 0x0edb8832U, 0x79dcb8a4U,
9610xe0d5e91eU, 0x97d2d988U, 0x09b64c2bU, 0x7eb17cbdU, 0xe7b82d07U,
9620x90bf1d91U, 0x1db71064U, 0x6ab020f2U, 0xf3b97148U, 0x84be41deU,
9630x1adad47dU, 0x6ddde4ebU, 0xf4d4b551U, 0x83d385c7U, 0x136c9856U,
9640x646ba8c0U, 0xfd62f97aU, 0x8a65c9ecU, 0x14015c4fU, 0x63066cd9U,
9650xfa0f3d63U, 0x8d080df5U, 0x3b6e20c8U, 0x4c69105eU, 0xd56041e4U,
9660xa2677172U, 0x3c03e4d1U, 0x4b04d447U, 0xd20d85fdU, 0xa50ab56bU,
9670x35b5a8faU, 0x42b2986cU, 0xdbbbc9d6U, 0xacbcf940U, 0x32d86ce3U,
9680x45df5c75U, 0xdcd60dcfU, 0xabd13d59U, 0x26d930acU, 0x51de003aU,
9690xc8d75180U, 0xbfd06116U, 0x21b4f4b5U, 0x56b3c423U, 0xcfba9599U,
9700xb8bda50fU, 0x2802b89eU, 0x5f058808U, 0xc60cd9b2U, 0xb10be924U,
9710x2f6f7c87U, 0x58684c11U, 0xc1611dabU, 0xb6662d3dU, 0x76dc4190U,
9720x01db7106U, 0x98d220bcU, 0xefd5102aU, 0x71b18589U, 0x06b6b51fU,
9730x9fbfe4a5U, 0xe8b8d433U, 0x7807c9a2U, 0x0f00f934U, 0x9609a88eU,
9740xe10e9818U, 0x7f6a0dbbU, 0x086d3d2dU, 0x91646c97U, 0xe6635c01U,
9750x6b6b51f4U, 0x1c6c6162U, 0x856530d8U, 0xf262004eU, 0x6c0695edU,
9760x1b01a57bU, 0x8208f4c1U, 0xf50fc457U, 0x65b0d9c6U, 0x12b7e950U,
9770x8bbeb8eaU, 0xfcb9887cU, 0x62dd1ddfU, 0x15da2d49U, 0x8cd37cf3U,
9780xfbd44c65U, 0x4db26158U, 0x3ab551ceU, 0xa3bc0074U, 0xd4bb30e2U,
9790x4adfa541U, 0x3dd895d7U, 0xa4d1c46dU, 0xd3d6f4fbU, 0x4369e96aU,
9800x346ed9fcU, 0xad678846U, 0xda60b8d0U, 0x44042d73U, 0x33031de5U,
9810xaa0a4c5fU, 0xdd0d7cc9U, 0x5005713cU, 0x270241aaU, 0xbe0b1010U,
9820xc90c2086U, 0x5768b525U, 0x206f85b3U, 0xb966d409U, 0xce61e49fU,
9830x5edef90eU, 0x29d9c998U, 0xb0d09822U, 0xc7d7a8b4U, 0x59b33d17U,
9840x2eb40d81U, 0xb7bd5c3bU, 0xc0ba6cadU, 0xedb88320U, 0x9abfb3b6U,
9850x03b6e20cU, 0x74b1d29aU, 0xead54739U, 0x9dd277afU, 0x04db2615U,
9860x73dc1683U, 0xe3630b12U, 0x94643b84U, 0x0d6d6a3eU, 0x7a6a5aa8U,
9870xe40ecf0bU, 0x9309ff9dU, 0x0a00ae27U, 0x7d079eb1U, 0xf00f9344U,
9880x8708a3d2U, 0x1e01f268U, 0x6906c2feU, 0xf762575dU, 0x806567cbU,
9890x196c3671U, 0x6e6b06e7U, 0xfed41b76U, 0x89d32be0U, 0x10da7a5aU,
9900x67dd4accU, 0xf9b9df6fU, 0x8ebeeff9U, 0x17b7be43U, 0x60b08ed5U,
9910xd6d6a3e8U, 0xa1d1937eU, 0x38d8c2c4U, 0x4fdff252U, 0xd1bb67f1U,
9920xa6bc5767U, 0x3fb506ddU, 0x48b2364bU, 0xd80d2bdaU, 0xaf0a1b4cU,
9930x36034af6U, 0x41047a60U, 0xdf60efc3U, 0xa867df55U, 0x316e8eefU,
9940x4669be79U, 0xcb61b38cU, 0xbc66831aU, 0x256fd2a0U, 0x5268e236U,
9950xcc0c7795U, 0xbb0b4703U, 0x220216b9U, 0x5505262fU, 0xc5ba3bbeU,
9960xb2bd0b28U, 0x2bb45a92U, 0x5cb36a04U, 0xc2d7ffa7U, 0xb5d0cf31U,
9970x2cd99e8bU, 0x5bdeae1dU, 0x9b64c2b0U, 0xec63f226U, 0x756aa39cU,
9980x026d930aU, 0x9c0906a9U, 0xeb0e363fU, 0x72076785U, 0x05005713U,
9990x95bf4a82U, 0xe2b87a14U, 0x7bb12baeU, 0x0cb61b38U, 0x92d28e9bU,
10000xe5d5be0dU, 0x7cdcefb7U, 0x0bdbdf21U, 0x86d3d2d4U, 0xf1d4e242U,
10010x68ddb3f8U, 0x1fda836eU, 0x81be16cdU, 0xf6b9265bU, 0x6fb077e1U,
10020x18b74777U, 0x88085ae6U, 0xff0f6a70U, 0x66063bcaU, 0x11010b5cU,
10030x8f659effU, 0xf862ae69U, 0x616bffd3U, 0x166ccf45U, 0xa00ae278U,
10040xd70dd2eeU, 0x4e048354U, 0x3903b3c2U, 0xa7672661U, 0xd06016f7U,
10050x4969474dU, 0x3e6e77dbU, 0xaed16a4aU, 0xd9d65adcU, 0x40df0b66U,
10060x37d83bf0U, 0xa9bcae53U, 0xdebb9ec5U, 0x47b2cf7fU, 0x30b5ffe9U,
10070xbdbdf21cU, 0xcabac28aU, 0x53b39330U, 0x24b4a3a6U, 0xbad03605U,
10080xcdd70693U, 0x54de5729U, 0x23d967bfU, 0xb3667a2eU, 0xc4614ab8U,
10090x5d681b02U, 0x2a6f2b94U, 0xb40bbe37U, 0xc30c8ea1U, 0x5a05df1bU,
10100x2d02ef8dU
Guido van Rossum7d47c9e2000-02-16 21:11:52 +00001011};
1012
1013static PyObject *
Peter Schneider-Kampa788a7f2000-07-10 09:57:19 +00001014binascii_crc32(PyObject *self, PyObject *args)
Guido van Rossum7d47c9e2000-02-16 21:11:52 +00001015{ /* By Jim Ahlstrom; All rights transferred to CNRI */
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001016 Py_buffer pbin;
1017 unsigned char *bin_data;
1018 unsigned int crc = 0U; /* initial value of CRC */
1019 Py_ssize_t len;
1020 int result;
Tim Peters934c1a12002-07-02 22:24:50 +00001021
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001022 if ( !PyArg_ParseTuple(args, "s*|I:crc32", &pbin, &crc) )
1023 return NULL;
1024 bin_data = pbin.buf;
1025 len = pbin.len;
Guido van Rossum7d47c9e2000-02-16 21:11:52 +00001026
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001027 crc = ~ crc;
1028 while (len-- > 0)
1029 crc = crc_32_tab[(crc ^ *bin_data++) & 0xffU] ^ (crc >> 8);
1030 /* Note: (crc >> 8) MUST zero fill on left */
Tim Petersa98011c2002-07-02 20:20:08 +00001031
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001032 result = (int)(crc ^ 0xFFFFFFFFU);
1033 PyBuffer_Release(&pbin);
1034 return PyInt_FromLong(result);
Guido van Rossum7d47c9e2000-02-16 21:11:52 +00001035}
Gregory P. Smith440ca772008-03-24 00:08:01 +00001036#endif /* USE_ZLIB_CRC32 */
Guido van Rossum7d47c9e2000-02-16 21:11:52 +00001037
Barry Warsawe977c212000-08-15 06:07:13 +00001038
1039static PyObject *
1040binascii_hexlify(PyObject *self, PyObject *args)
1041{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001042 Py_buffer parg;
1043 char* argbuf;
1044 Py_ssize_t arglen;
1045 PyObject *retval;
1046 char* retbuf;
1047 Py_ssize_t i, j;
Barry Warsawe977c212000-08-15 06:07:13 +00001048
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001049 if (!PyArg_ParseTuple(args, "s*:b2a_hex", &parg))
1050 return NULL;
1051 argbuf = parg.buf;
1052 arglen = parg.len;
Barry Warsawe977c212000-08-15 06:07:13 +00001053
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001054 assert(arglen >= 0);
1055 if (arglen > PY_SSIZE_T_MAX / 2) {
1056 PyBuffer_Release(&parg);
1057 return PyErr_NoMemory();
1058 }
Gregory P. Smith9d534572008-06-11 07:41:16 +00001059
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001060 retval = PyString_FromStringAndSize(NULL, arglen*2);
1061 if (!retval) {
1062 PyBuffer_Release(&parg);
1063 return NULL;
1064 }
1065 retbuf = PyString_AS_STRING(retval);
Barry Warsawe977c212000-08-15 06:07:13 +00001066
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001067 /* make hex version of string, taken from shamodule.c */
1068 for (i=j=0; i < arglen; i++) {
1069 char c;
1070 c = (argbuf[i] >> 4) & 0xf;
1071 c = (c>9) ? c+'a'-10 : c + '0';
1072 retbuf[j++] = c;
1073 c = argbuf[i] & 0xf;
1074 c = (c>9) ? c+'a'-10 : c + '0';
1075 retbuf[j++] = c;
1076 }
1077 PyBuffer_Release(&parg);
1078 return retval;
Barry Warsawe977c212000-08-15 06:07:13 +00001079}
1080
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001081PyDoc_STRVAR(doc_hexlify,
Barry Warsawe977c212000-08-15 06:07:13 +00001082"b2a_hex(data) -> s; Hexadecimal representation of binary data.\n\
1083\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001084This function is also available as \"hexlify()\".");
Barry Warsawe977c212000-08-15 06:07:13 +00001085
1086
1087static int
Tim Peters934c1a12002-07-02 22:24:50 +00001088to_int(int c)
Barry Warsawe977c212000-08-15 06:07:13 +00001089{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001090 if (isdigit(c))
1091 return c - '0';
1092 else {
Antoine Pitrou44b3b542011-10-04 13:55:37 +02001093 if (Py_ISUPPER(c))
1094 c = Py_TOLOWER(c);
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001095 if (c >= 'a' && c <= 'f')
1096 return c - 'a' + 10;
1097 }
1098 return -1;
Barry Warsawe977c212000-08-15 06:07:13 +00001099}
1100
1101
1102static PyObject *
1103binascii_unhexlify(PyObject *self, PyObject *args)
1104{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001105 Py_buffer parg;
1106 char* argbuf;
1107 Py_ssize_t arglen;
1108 PyObject *retval;
1109 char* retbuf;
1110 Py_ssize_t i, j;
Barry Warsawe977c212000-08-15 06:07:13 +00001111
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001112 if (!PyArg_ParseTuple(args, "s*:a2b_hex", &parg))
1113 return NULL;
1114 argbuf = parg.buf;
1115 arglen = parg.len;
Barry Warsawe977c212000-08-15 06:07:13 +00001116
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001117 assert(arglen >= 0);
Gregory P. Smith9d534572008-06-11 07:41:16 +00001118
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001119 /* XXX What should we do about strings with an odd length? Should
1120 * we add an implicit leading zero, or a trailing zero? For now,
1121 * raise an exception.
1122 */
1123 if (arglen % 2) {
1124 PyBuffer_Release(&parg);
1125 PyErr_SetString(PyExc_TypeError, "Odd-length string");
1126 return NULL;
1127 }
Barry Warsawe977c212000-08-15 06:07:13 +00001128
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001129 retval = PyString_FromStringAndSize(NULL, (arglen/2));
1130 if (!retval) {
1131 PyBuffer_Release(&parg);
1132 return NULL;
1133 }
1134 retbuf = PyString_AS_STRING(retval);
Barry Warsawe977c212000-08-15 06:07:13 +00001135
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001136 for (i=j=0; i < arglen; i += 2) {
1137 int top = to_int(Py_CHARMASK(argbuf[i]));
1138 int bot = to_int(Py_CHARMASK(argbuf[i+1]));
1139 if (top == -1 || bot == -1) {
1140 PyErr_SetString(PyExc_TypeError,
1141 "Non-hexadecimal digit found");
1142 goto finally;
1143 }
1144 retbuf[j++] = (top << 4) + bot;
1145 }
1146 PyBuffer_Release(&parg);
1147 return retval;
Barry Warsawe977c212000-08-15 06:07:13 +00001148
1149 finally:
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001150 PyBuffer_Release(&parg);
1151 Py_DECREF(retval);
1152 return NULL;
Barry Warsawe977c212000-08-15 06:07:13 +00001153}
1154
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001155PyDoc_STRVAR(doc_unhexlify,
Barry Warsawe977c212000-08-15 06:07:13 +00001156"a2b_hex(hexstr) -> s; Binary data of hexadecimal representation.\n\
1157\n\
1158hexstr must contain an even number of hex digits (upper or lower case).\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001159This function is also available as \"unhexlify()\"");
Barry Warsawe977c212000-08-15 06:07:13 +00001160
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001161static int table_hex[128] = {
1162 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1163 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1164 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1165 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,-1,-1, -1,-1,-1,-1,
1166 -1,10,11,12, 13,14,15,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1167 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1168 -1,10,11,12, 13,14,15,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1169 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1
1170};
1171
1172#define hexval(c) table_hex[(unsigned int)(c)]
1173
1174#define MAXLINESIZE 76
1175
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001176PyDoc_STRVAR(doc_a2b_qp, "Decode a string of qp-encoded data");
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001177
Tim Peters934c1a12002-07-02 22:24:50 +00001178static PyObject*
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001179binascii_a2b_qp(PyObject *self, PyObject *args, PyObject *kwargs)
1180{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001181 Py_ssize_t in, out;
1182 char ch;
1183 Py_buffer pdata;
1184 unsigned char *data, *odata;
1185 Py_ssize_t datalen = 0;
1186 PyObject *rv;
1187 static char *kwlist[] = {"data", "header", NULL};
1188 int header = 0;
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001189
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001190 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s*|i", kwlist, &pdata,
1191 &header))
1192 return NULL;
1193 data = pdata.buf;
1194 datalen = pdata.len;
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001195
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001196 /* We allocate the output same size as input, this is overkill.
1197 * The previous implementation used calloc() so we'll zero out the
1198 * memory here too, since PyMem_Malloc() does not guarantee that.
1199 */
1200 odata = (unsigned char *) PyMem_Malloc(datalen);
1201 if (odata == NULL) {
1202 PyBuffer_Release(&pdata);
1203 PyErr_NoMemory();
1204 return NULL;
1205 }
1206 memset(odata, 0, datalen);
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001207
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001208 in = out = 0;
1209 while (in < datalen) {
1210 if (data[in] == '=') {
1211 in++;
1212 if (in >= datalen) break;
1213 /* Soft line breaks */
1214 if ((data[in] == '\n') || (data[in] == '\r')) {
1215 if (data[in] != '\n') {
1216 while (in < datalen && data[in] != '\n') in++;
1217 }
1218 if (in < datalen) in++;
1219 }
1220 else if (data[in] == '=') {
1221 /* broken case from broken python qp */
1222 odata[out++] = '=';
1223 in++;
1224 }
Serhiy Storchaka12c88552016-09-14 16:36:15 +03001225 else if ((in + 1 < datalen) &&
1226 ((data[in] >= 'A' && data[in] <= 'F') ||
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001227 (data[in] >= 'a' && data[in] <= 'f') ||
1228 (data[in] >= '0' && data[in] <= '9')) &&
1229 ((data[in+1] >= 'A' && data[in+1] <= 'F') ||
1230 (data[in+1] >= 'a' && data[in+1] <= 'f') ||
1231 (data[in+1] >= '0' && data[in+1] <= '9'))) {
1232 /* hexval */
1233 ch = hexval(data[in]) << 4;
1234 in++;
1235 ch |= hexval(data[in]);
1236 in++;
1237 odata[out++] = ch;
1238 }
1239 else {
1240 odata[out++] = '=';
1241 }
1242 }
1243 else if (header && data[in] == '_') {
1244 odata[out++] = ' ';
1245 in++;
1246 }
1247 else {
1248 odata[out] = data[in];
1249 in++;
1250 out++;
1251 }
1252 }
1253 if ((rv = PyString_FromStringAndSize((char *)odata, out)) == NULL) {
1254 PyBuffer_Release(&pdata);
1255 PyMem_Free(odata);
1256 return NULL;
1257 }
1258 PyBuffer_Release(&pdata);
1259 PyMem_Free(odata);
1260 return rv;
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001261}
1262
Tim Peters934c1a12002-07-02 22:24:50 +00001263static int
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001264to_hex (unsigned char ch, unsigned char *s)
1265{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001266 unsigned int uvalue = ch;
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001267
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001268 s[1] = "0123456789ABCDEF"[uvalue % 16];
1269 uvalue = (uvalue / 16);
1270 s[0] = "0123456789ABCDEF"[uvalue % 16];
1271 return 0;
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001272}
1273
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001274PyDoc_STRVAR(doc_b2a_qp,
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001275"b2a_qp(data, quotetabs=0, istext=1, header=0) -> s; \n\
1276 Encode a string using quoted-printable encoding. \n\
1277\n\
1278On encoding, when istext is set, newlines are not encoded, and white \n\
1279space at end of lines is. When istext is not set, \\r and \\n (CR/LF) are \n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001280both encoded. When quotetabs is set, space and tabs are encoded.");
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001281
1282/* XXX: This is ridiculously complicated to be backward compatible
1283 * (mostly) with the quopri module. It doesn't re-create the quopri
1284 * module bug where text ending in CRLF has the CR encoded */
Tim Peters934c1a12002-07-02 22:24:50 +00001285static PyObject*
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001286binascii_b2a_qp (PyObject *self, PyObject *args, PyObject *kwargs)
1287{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001288 Py_ssize_t in, out;
1289 Py_buffer pdata;
1290 unsigned char *data, *odata;
1291 Py_ssize_t datalen = 0, odatalen = 0;
1292 PyObject *rv;
1293 unsigned int linelen = 0;
1294 static char *kwlist[] = {"data", "quotetabs", "istext",
1295 "header", NULL};
1296 int istext = 1;
1297 int quotetabs = 0;
1298 int header = 0;
1299 unsigned char ch;
1300 int crlf = 0;
1301 unsigned char *p;
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001302
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001303 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s*|iii", kwlist, &pdata,
1304 &quotetabs, &istext, &header))
1305 return NULL;
1306 data = pdata.buf;
1307 datalen = pdata.len;
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001308
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001309 /* See if this string is using CRLF line ends */
1310 /* XXX: this function has the side effect of converting all of
1311 * the end of lines to be the same depending on this detection
1312 * here */
1313 p = (unsigned char *) memchr(data, '\n', datalen);
1314 if ((p != NULL) && (p > data) && (*(p-1) == '\r'))
1315 crlf = 1;
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001316
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001317 /* First, scan to see how many characters need to be encoded */
1318 in = 0;
1319 while (in < datalen) {
Benjamin Peterson6c08d962016-08-13 18:33:33 -07001320 Py_ssize_t delta = 0;
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001321 if ((data[in] > 126) ||
1322 (data[in] == '=') ||
1323 (header && data[in] == '_') ||
1324 ((data[in] == '.') && (linelen == 0) &&
Serhiy Storchaka12c88552016-09-14 16:36:15 +03001325 (in + 1 == datalen || data[in+1] == '\n' ||
1326 data[in+1] == '\r' || data[in+1] == 0)) ||
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001327 (!istext && ((data[in] == '\r') || (data[in] == '\n'))) ||
1328 ((data[in] == '\t' || data[in] == ' ') && (in + 1 == datalen)) ||
1329 ((data[in] < 33) &&
1330 (data[in] != '\r') && (data[in] != '\n') &&
1331 (quotetabs ||
1332 (!quotetabs && ((data[in] != '\t') && (data[in] != ' '))))))
1333 {
1334 if ((linelen + 3) >= MAXLINESIZE) {
1335 linelen = 0;
1336 if (crlf)
Benjamin Peterson6c08d962016-08-13 18:33:33 -07001337 delta += 3;
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001338 else
Benjamin Peterson6c08d962016-08-13 18:33:33 -07001339 delta += 2;
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001340 }
1341 linelen += 3;
Benjamin Peterson6c08d962016-08-13 18:33:33 -07001342 delta += 3;
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001343 in++;
1344 }
1345 else {
1346 if (istext &&
1347 ((data[in] == '\n') ||
1348 ((in+1 < datalen) && (data[in] == '\r') &&
1349 (data[in+1] == '\n'))))
1350 {
1351 linelen = 0;
1352 /* Protect against whitespace on end of line */
1353 if (in && ((data[in-1] == ' ') || (data[in-1] == '\t')))
Benjamin Peterson6c08d962016-08-13 18:33:33 -07001354 delta += 2;
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001355 if (crlf)
Benjamin Peterson6c08d962016-08-13 18:33:33 -07001356 delta += 2;
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001357 else
Benjamin Peterson6c08d962016-08-13 18:33:33 -07001358 delta += 1;
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001359 if (data[in] == '\r')
1360 in += 2;
1361 else
1362 in++;
1363 }
1364 else {
1365 if ((in + 1 != datalen) &&
1366 (data[in+1] != '\n') &&
1367 (linelen + 1) >= MAXLINESIZE) {
1368 linelen = 0;
1369 if (crlf)
Benjamin Peterson6c08d962016-08-13 18:33:33 -07001370 delta += 3;
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001371 else
Benjamin Peterson6c08d962016-08-13 18:33:33 -07001372 delta += 2;
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001373 }
1374 linelen++;
Benjamin Peterson6c08d962016-08-13 18:33:33 -07001375 delta++;
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001376 in++;
1377 }
1378 }
Benjamin Peterson6c08d962016-08-13 18:33:33 -07001379 if (PY_SSIZE_T_MAX - delta < odatalen) {
1380 PyBuffer_Release(&pdata);
1381 PyErr_NoMemory();
1382 return NULL;
1383 }
1384 odatalen += delta;
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001385 }
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001386
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001387 /* We allocate the output same size as input, this is overkill.
1388 * The previous implementation used calloc() so we'll zero out the
1389 * memory here too, since PyMem_Malloc() does not guarantee that.
1390 */
1391 odata = (unsigned char *) PyMem_Malloc(odatalen);
1392 if (odata == NULL) {
1393 PyBuffer_Release(&pdata);
1394 PyErr_NoMemory();
1395 return NULL;
1396 }
1397 memset(odata, 0, odatalen);
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001398
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001399 in = out = linelen = 0;
1400 while (in < datalen) {
1401 if ((data[in] > 126) ||
1402 (data[in] == '=') ||
1403 (header && data[in] == '_') ||
1404 ((data[in] == '.') && (linelen == 0) &&
Serhiy Storchaka12c88552016-09-14 16:36:15 +03001405 (in + 1 == datalen || data[in+1] == '\n' ||
1406 data[in+1] == '\r' || data[in+1] == 0)) ||
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001407 (!istext && ((data[in] == '\r') || (data[in] == '\n'))) ||
1408 ((data[in] == '\t' || data[in] == ' ') && (in + 1 == datalen)) ||
1409 ((data[in] < 33) &&
1410 (data[in] != '\r') && (data[in] != '\n') &&
Serhiy Storchaka12c88552016-09-14 16:36:15 +03001411 (quotetabs || ((data[in] != '\t') && (data[in] != ' ')))))
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001412 {
1413 if ((linelen + 3 )>= MAXLINESIZE) {
1414 odata[out++] = '=';
1415 if (crlf) odata[out++] = '\r';
1416 odata[out++] = '\n';
1417 linelen = 0;
1418 }
1419 odata[out++] = '=';
1420 to_hex(data[in], &odata[out]);
1421 out += 2;
1422 in++;
1423 linelen += 3;
1424 }
1425 else {
1426 if (istext &&
1427 ((data[in] == '\n') ||
1428 ((in+1 < datalen) && (data[in] == '\r') &&
1429 (data[in+1] == '\n'))))
1430 {
1431 linelen = 0;
1432 /* Protect against whitespace on end of line */
1433 if (out && ((odata[out-1] == ' ') || (odata[out-1] == '\t'))) {
1434 ch = odata[out-1];
1435 odata[out-1] = '=';
1436 to_hex(ch, &odata[out]);
1437 out += 2;
1438 }
Tim Peters934c1a12002-07-02 22:24:50 +00001439
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001440 if (crlf) odata[out++] = '\r';
1441 odata[out++] = '\n';
1442 if (data[in] == '\r')
1443 in += 2;
1444 else
1445 in++;
1446 }
1447 else {
1448 if ((in + 1 != datalen) &&
1449 (data[in+1] != '\n') &&
1450 (linelen + 1) >= MAXLINESIZE) {
1451 odata[out++] = '=';
1452 if (crlf) odata[out++] = '\r';
1453 odata[out++] = '\n';
1454 linelen = 0;
1455 }
1456 linelen++;
1457 if (header && data[in] == ' ') {
1458 odata[out++] = '_';
1459 in++;
1460 }
1461 else {
1462 odata[out++] = data[in++];
1463 }
1464 }
1465 }
1466 }
1467 if ((rv = PyString_FromStringAndSize((char *)odata, out)) == NULL) {
1468 PyBuffer_Release(&pdata);
1469 PyMem_Free(odata);
1470 return NULL;
1471 }
1472 PyBuffer_Release(&pdata);
1473 PyMem_Free(odata);
1474 return rv;
Martin v. Löwis16dc7f42001-09-30 20:32:11 +00001475}
Barry Warsawe977c212000-08-15 06:07:13 +00001476
Jack Jansen72781191995-08-07 14:34:15 +00001477/* List of functions defined in the module */
1478
1479static struct PyMethodDef binascii_module_methods[] = {
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001480 {"a2b_uu", binascii_a2b_uu, METH_VARARGS, doc_a2b_uu},
1481 {"b2a_uu", binascii_b2a_uu, METH_VARARGS, doc_b2a_uu},
1482 {"a2b_base64", binascii_a2b_base64, METH_VARARGS, doc_a2b_base64},
1483 {"b2a_base64", binascii_b2a_base64, METH_VARARGS, doc_b2a_base64},
1484 {"a2b_hqx", binascii_a2b_hqx, METH_VARARGS, doc_a2b_hqx},
1485 {"b2a_hqx", binascii_b2a_hqx, METH_VARARGS, doc_b2a_hqx},
1486 {"b2a_hex", binascii_hexlify, METH_VARARGS, doc_hexlify},
1487 {"a2b_hex", binascii_unhexlify, METH_VARARGS, doc_unhexlify},
1488 {"hexlify", binascii_hexlify, METH_VARARGS, doc_hexlify},
1489 {"unhexlify", binascii_unhexlify, METH_VARARGS, doc_unhexlify},
1490 {"rlecode_hqx", binascii_rlecode_hqx, METH_VARARGS, doc_rlecode_hqx},
1491 {"rledecode_hqx", binascii_rledecode_hqx, METH_VARARGS,
1492 doc_rledecode_hqx},
1493 {"crc_hqx", binascii_crc_hqx, METH_VARARGS, doc_crc_hqx},
1494 {"crc32", binascii_crc32, METH_VARARGS, doc_crc32},
1495 {"a2b_qp", (PyCFunction)binascii_a2b_qp, METH_VARARGS | METH_KEYWORDS,
1496 doc_a2b_qp},
1497 {"b2a_qp", (PyCFunction)binascii_b2a_qp, METH_VARARGS | METH_KEYWORDS,
1498 doc_b2a_qp},
1499 {NULL, NULL} /* sentinel */
Jack Jansen72781191995-08-07 14:34:15 +00001500};
1501
1502
1503/* Initialization function for the module (*must* be called initbinascii) */
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001504PyDoc_STRVAR(doc_binascii, "Conversion between binary data and ASCII");
Jack Jansen72781191995-08-07 14:34:15 +00001505
Mark Hammondfe51c6d2002-08-02 02:27:13 +00001506PyMODINIT_FUNC
Thomas Woutersf3f33dc2000-07-21 06:00:07 +00001507initbinascii(void)
Jack Jansen72781191995-08-07 14:34:15 +00001508{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001509 PyObject *m, *d, *x;
Jack Jansen72781191995-08-07 14:34:15 +00001510
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001511 /* Create the module and add the functions */
1512 m = Py_InitModule("binascii", binascii_module_methods);
1513 if (m == NULL)
1514 return;
Jack Jansen72781191995-08-07 14:34:15 +00001515
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001516 d = PyModule_GetDict(m);
1517 x = PyString_FromString(doc_binascii);
1518 PyDict_SetItemString(d, "__doc__", x);
1519 Py_XDECREF(x);
Jack Jansen72781191995-08-07 14:34:15 +00001520
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001521 Error = PyErr_NewException("binascii.Error", NULL, NULL);
1522 PyDict_SetItemString(d, "Error", Error);
1523 Incomplete = PyErr_NewException("binascii.Incomplete", NULL, NULL);
1524 PyDict_SetItemString(d, "Incomplete", Incomplete);
Jack Jansen72781191995-08-07 14:34:15 +00001525}