Another stab at SF 576327: zipfile when sizeof(long) == 8 binascii_crc32(): The previous patch forced this to return the same result across platforms. This patch deals with that, on a 64-bit box, the *entry* value may have "unexpected" bits in the high four bytes. Bugfix candidate.

commit: 934c1a1c6b13ba27baf75d206a842cefda99f771 [log] [tgz]
author: Tim Peters <tim.peters@gmail.com> Tue Jul 02 22:24:50 2002 +0000
committer: Tim Peters <tim.peters@gmail.com> Tue Jul 02 22:24:50 2002 +0000
tree: 76ea85507b772200006ee373d43eb00216abb9af
parent: aab713bdf7380c1b618655cfd1b284c83a9b5d58 [diff]
diff --git a/Modules/binascii.c b/Modules/binascii.c
index 66644e1..ec07a71 100644
--- a/Modules/binascii.c
+++ b/Modules/binascii.c

@@ -42,13 +42,13 @@
 ** does make the performance sub-optimal. Oh well, too bad...
 **
 ** Jack Jansen, CWI, July 1995.
-** 
+**
 ** Added support for quoted-printable encoding, based on rfc 1521 et al
-** quoted-printable encoding specifies that non printable characters (anything 
+** quoted-printable encoding specifies that non printable characters (anything
 ** below 32 and above 126) be encoded as =XX where XX is the hexadecimal value
 ** of the character.  It also specifies some other behavior to enable 8bit data
-** in a mail message with little difficulty (maximum line sizes, protecting 
-** some cases of whitespace, etc).    
+** in a mail message with little difficulty (maximum line sizes, protecting
+** some cases of whitespace, etc).
 **
 ** Brandon Long, September 2001.
 */
@@ -190,7 +190,7 @@
 	unsigned int leftchar = 0;
 	PyObject *rv;
 	int ascii_len, bin_len;
-	
+
 	if ( !PyArg_ParseTuple(args, "t#:a2b_uu", &ascii_data, &ascii_len) )
 		return NULL;
 
@@ -202,7 +202,7 @@
 	if ( (rv=PyString_FromStringAndSize(NULL, bin_len)) == NULL )
 		return NULL;
 	bin_data = (unsigned char *)PyString_AsString(rv);
-	
+
 	for( ; bin_len > 0 ; ascii_len--, ascii_data++ ) {
 		this_ch = *ascii_data;
 		if ( this_ch == '\n' || this_ch == '\r' || ascii_len <= 0) {
@@ -255,7 +255,7 @@
 }
 
 PyDoc_STRVAR(doc_b2a_uu, "(bin) -> ascii. Uuencode line of data");
-	
+
 static PyObject *
 binascii_b2a_uu(PyObject *self, PyObject *args)
 {
@@ -265,7 +265,7 @@
 	unsigned int leftchar = 0;
 	PyObject *rv;
 	int bin_len;
-	
+
 	if ( !PyArg_ParseTuple(args, "s#:b2a_uu", &bin_data, &bin_len) )
 		return NULL;
 	if ( bin_len > 45 ) {
@@ -281,7 +281,7 @@
 
 	/* Store the length */
 	*ascii_data++ = ' ' + (bin_len & 077);
-	
+
 	for( ; bin_len > 0 || leftbits != 0 ; bin_len--, bin_data++ ) {
 		/* Shift the data (or padding) into our buffer */
 		if ( bin_len > 0 )	/* Data */
@@ -298,7 +298,7 @@
 		}
 	}
 	*ascii_data++ = '\n';	/* Append a courtesy newline */
-	
+
 	_PyString_Resize(&rv, (ascii_data -
 			       (unsigned char *)PyString_AsString(rv)));
 	return rv;
@@ -308,7 +308,7 @@
 static int
 binascii_find_valid(unsigned char *s, int slen, int num)
 {
-	/* Finds & returns the (num+1)th 
+	/* Finds & returns the (num+1)th
 	** valid character for base64, or -1 if none.
 	*/
 
@@ -342,7 +342,7 @@
 	PyObject *rv;
 	int ascii_len, bin_len;
 	int quad_pos = 0;
-	
+
 	if ( !PyArg_ParseTuple(args, "t#:a2b_base64", &ascii_data, &ascii_len) )
 		return NULL;
 
@@ -418,7 +418,7 @@
 }
 
 PyDoc_STRVAR(doc_b2a_base64, "(bin) -> ascii. Base64-code line of data");
-	
+
 static PyObject *
 binascii_b2a_base64(PyObject *self, PyObject *args)
 {
@@ -428,14 +428,14 @@
 	unsigned int leftchar = 0;
 	PyObject *rv;
 	int bin_len;
-	
+
 	if ( !PyArg_ParseTuple(args, "s#:b2a_base64", &bin_data, &bin_len) )
 		return NULL;
 	if ( bin_len > BASE64_MAXBIN ) {
 		PyErr_SetString(Error, "Too much data for base64 line");
 		return NULL;
 	}
-	
+
 	/* We're lazy and allocate too much (fixed up later).
 	   "+3" leaves room for up to two pad characters and a trailing
 	   newline.  Note that 'b' gets encoded as 'Yg==\n' (1 in, 5 out). */
@@ -462,9 +462,9 @@
 	} else if ( leftbits == 4 ) {
 		*ascii_data++ = table_b2a_base64[(leftchar&0xf) << 2];
 		*ascii_data++ = BASE64_PAD;
-	} 
+	}
 	*ascii_data++ = '\n';	/* Append a courtesy newline */
-	
+
 	_PyString_Resize(&rv, (ascii_data -
 			       (unsigned char *)PyString_AsString(rv)));
 	return rv;
@@ -482,7 +482,7 @@
 	PyObject *rv;
 	int len;
 	int done = 0;
-	
+
 	if ( !PyArg_ParseTuple(args, "t#:a2b_hqx", &ascii_data, &len) )
 		return NULL;
 
@@ -516,7 +516,7 @@
 			leftchar &= ((1 << leftbits) - 1);
 		}
 	}
-	
+
 	if ( leftbits && !done ) {
 		PyErr_SetString(Incomplete,
 				"String has incomplete number of bytes");
@@ -543,7 +543,7 @@
 	PyObject *rv;
 	unsigned char ch;
 	int in, inend, len;
-	
+
 	if ( !PyArg_ParseTuple(args, "s#:rlecode_hqx", &in_data, &len) )
 		return NULL;
 
@@ -551,7 +551,7 @@
 	if ( (rv=PyString_FromStringAndSize(NULL, len*2)) == NULL )
 		return NULL;
 	out_data = (unsigned char *)PyString_AsString(rv);
-	
+
 	for( in=0; in<len; in++) {
 		ch = in_data[in];
 		if ( ch == RUNCHAR ) {
@@ -582,7 +582,7 @@
 }
 
 PyDoc_STRVAR(doc_b2a_hqx, "Encode .hqx data");
-	
+
 static PyObject *
 binascii_b2a_hqx(PyObject *self, PyObject *args)
 {
@@ -592,7 +592,7 @@
 	unsigned int leftchar = 0;
 	PyObject *rv;
 	int len;
-	
+
 	if ( !PyArg_ParseTuple(args, "s#:b2a_hqx", &bin_data, &len) )
 		return NULL;
 
@@ -600,7 +600,7 @@
 	if ( (rv=PyString_FromStringAndSize(NULL, len*2)) == NULL )
 		return NULL;
 	ascii_data = (unsigned char *)PyString_AsString(rv);
-	
+
 	for( ; len > 0 ; len--, bin_data++ ) {
 		/* Shift into our buffer, and output any 6bits ready */
 		leftchar = (leftchar << 8) | *bin_data;
@@ -622,7 +622,7 @@
 }
 
 PyDoc_STRVAR(doc_rledecode_hqx, "Decode hexbin RLE-coded string");
-	
+
 static PyObject *
 binascii_rledecode_hqx(PyObject *self, PyObject *args)
 {
@@ -658,7 +658,7 @@
 		 } \
 		 b = *in_data++; \
 	} while(0)
-	    
+
 #define OUTBYTE(b) \
 	do { \
 		 if ( --out_len_left < 0 ) { \
@@ -692,7 +692,7 @@
 	} else {
 		OUTBYTE(in_byte);
 	}
-	
+
 	while( in_len > 0 ) {
 		INBYTE(in_byte);
 
@@ -726,7 +726,7 @@
 	unsigned char *bin_data;
 	unsigned int crc;
 	int len;
-	
+
 	if ( !PyArg_ParseTuple(args, "s#i:crc_hqx", &bin_data, &len, &crc) )
 		return NULL;
 
@@ -758,49 +758,49 @@
 
  Copyright (C) 1986 Gary S. Brown.  You may use this program, or
  code or tables extracted from it, as desired without restriction.
- 
- First, the polynomial itself and its table of feedback terms.  The  
- polynomial is                                                       
- X^32+X^26+X^23+X^22+X^16+X^12+X^11+X^10+X^8+X^7+X^5+X^4+X^2+X^1+X^0 
- Note that we take it "backwards" and put the highest-order term in  
- the lowest-order bit.  The X^32 term is "implied"; the LSB is the   
- X^31 term, etc.  The X^0 term (usually shown as "+1") results in    
- the MSB being 1.                                                    
 
- Note that the usual hardware shift register implementation, which   
- is what we're using (we're merely optimizing it by doing eight-bit  
- chunks at a time) shifts bits into the lowest-order term.  In our   
- implementation, that means shifting towards the right.  Why do we   
- do it this way?  Because the calculated CRC must be transmitted in  
- order from highest-order term to lowest-order term.  UARTs transmit 
- characters in order from LSB to MSB.  By storing the CRC this way,  
- we hand it to the UART in the order low-byte to high-byte; the UART 
- sends each low-bit to hight-bit; and the result is transmission bit 
- by bit from highest- to lowest-order term without requiring any bit 
- shuffling on our part.  Reception works similarly.                  
+ First, the polynomial itself and its table of feedback terms.  The
+ polynomial is
+ X^32+X^26+X^23+X^22+X^16+X^12+X^11+X^10+X^8+X^7+X^5+X^4+X^2+X^1+X^0
+ Note that we take it "backwards" and put the highest-order term in
+ the lowest-order bit.  The X^32 term is "implied"; the LSB is the
+ X^31 term, etc.  The X^0 term (usually shown as "+1") results in
+ the MSB being 1.
 
- The feedback terms table consists of 256, 32-bit entries.  Notes:   
-                                                                     
-  1. The table can be generated at runtime if desired; code to do so 
-     is shown later.  It might not be obvious, but the feedback      
-     terms simply represent the results of eight shift/xor opera-    
-     tions for all combinations of data and CRC register values.     
-                                                                     
-  2. The CRC accumulation logic is the same for all CRC polynomials, 
-     be they sixteen or thirty-two bits wide.  You simply choose the 
-     appropriate table.  Alternatively, because the table can be     
-     generated at runtime, you can start by generating the table for 
-     the polynomial in question and use exactly the same "updcrc",   
-     if your application needn't simultaneously handle two CRC       
-     polynomials.  (Note, however, that XMODEM is strange.)          
-                                                                     
-  3. For 16-bit CRCs, the table entries need be only 16 bits wide;   
-     of course, 32-bit entries work OK if the high 16 bits are zero. 
-                                                                     
-  4. The values must be right-shifted by eight bits by the "updcrc"  
-     logic; the shift must be unsigned (bring in zeroes).  On some   
-     hardware you could probably optimize the shift in assembler by  
-     using byte-swap instructions.                                   
+ Note that the usual hardware shift register implementation, which
+ is what we're using (we're merely optimizing it by doing eight-bit
+ chunks at a time) shifts bits into the lowest-order term.  In our
+ implementation, that means shifting towards the right.  Why do we
+ do it this way?  Because the calculated CRC must be transmitted in
+ order from highest-order term to lowest-order term.  UARTs transmit
+ characters in order from LSB to MSB.  By storing the CRC this way,
+ we hand it to the UART in the order low-byte to high-byte; the UART
+ sends each low-bit to hight-bit; and the result is transmission bit
+ by bit from highest- to lowest-order term without requiring any bit
+ shuffling on our part.  Reception works similarly.
+
+ The feedback terms table consists of 256, 32-bit entries.  Notes:
+
+  1. The table can be generated at runtime if desired; code to do so
+     is shown later.  It might not be obvious, but the feedback
+     terms simply represent the results of eight shift/xor opera-
+     tions for all combinations of data and CRC register values.
+
+  2. The CRC accumulation logic is the same for all CRC polynomials,
+     be they sixteen or thirty-two bits wide.  You simply choose the
+     appropriate table.  Alternatively, because the table can be
+     generated at runtime, you can start by generating the table for
+     the polynomial in question and use exactly the same "updcrc",
+     if your application needn't simultaneously handle two CRC
+     polynomials.  (Note, however, that XMODEM is strange.)
+
+  3. For 16-bit CRCs, the table entries need be only 16 bits wide;
+     of course, 32-bit entries work OK if the high 16 bits are zero.
+
+  4. The values must be right-shifted by eight bits by the "updcrc"
+     logic; the shift must be unsigned (bring in zeroes).  On some
+     hardware you could probably optimize the shift in assembler by
+     using byte-swap instructions.
 ********************************************************************/
 
 static unsigned long crc_32_tab[256] = {
@@ -865,23 +865,29 @@
 	unsigned long crc = 0UL;	/* initial value of CRC */
 	int len;
 	long result;
-	
+
 	if ( !PyArg_ParseTuple(args, "s#|l:crc32", &bin_data, &len, &crc) )
 		return NULL;
 
-	crc = crc ^ 0xFFFFFFFFUL;
-	while(len--)
+	crc = ~ crc;
+#if SIZEOF_LONG > 4
+	/* only want the trailing 32 bits */
+	crc &= 0xFFFFFFFFUL;
+#endif
+	while (len--)
 		crc = crc_32_tab[(crc ^ *bin_data++) & 0xffUL] ^ (crc >> 8);
 		/* Note:  (crc >> 8) MUST zero fill on left */
 
 	result = (long)(crc ^ 0xFFFFFFFFUL);
-	/* If long is > 32 bits, extend the sign bit.  This is one way to
-	 * ensure the result is the same across platforms.  The other way
-	 * would be to return an unbounded long, but the evidence suggests
-	 * that lots of code outside this treats the result as if it were
-	 * a signed 4-byte integer.
+#if SIZEOF_LONG > 4
+	/* Extend the sign bit.  This is one way to ensure the result is the
+	 * same across platforms.  The other way would be to return an
+	 * unbounded unsigned long, but the evidence suggests that lots of
+	 * code outside this treats the result as if it were a signed 4-byte
+	 * integer.
 	 */
 	result |= -(result & (1L << 31));
+#endif
 	return PyInt_FromLong(result);
 }
 
@@ -929,7 +935,7 @@
 
 
 static int
-to_int(int c) 
+to_int(int c)
 {
 	if (isdigit(c))
 		return c - '0';
@@ -1011,7 +1017,7 @@
 
 PyDoc_STRVAR(doc_a2b_qp, "Decode a string of qp-encoded data");
 
-static PyObject* 
+static PyObject*
 binascii_a2b_qp(PyObject *self, PyObject *args, PyObject *kwargs)
 {
 	unsigned int in, out;
@@ -1022,7 +1028,7 @@
 	static char *kwlist[] = {"data", "header", NULL};
 	int header = 0;
 
-	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s#|i", kwlist, &data, 
+	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s#|i", kwlist, &data,
 	      &datalen, &header))
 		return NULL;
 
@@ -1040,7 +1046,7 @@
 			in++;
 			if (in >= datalen) break;
 			/* Soft line breaks */
-			if ((data[in] == '\n') || (data[in] == '\r') || 
+			if ((data[in] == '\n') || (data[in] == '\r') ||
 			    (data[in] == ' ') || (data[in] == '\t')) {
 				if (data[in] != '\n') {
 					while (in < datalen && data[in] != '\n') in++;
@@ -1052,7 +1058,7 @@
 				odata[out++] = '=';
 				in++;
 			}
-			else if (((data[in] >= 'A' && data[in] <= 'F') || 
+			else if (((data[in] >= 'A' && data[in] <= 'F') ||
 			          (data[in] >= 'a' && data[in] <= 'f') ||
 				  (data[in] >= '0' && data[in] <= '9')) &&
 			         ((data[in+1] >= 'A' && data[in+1] <= 'F') ||
@@ -1087,7 +1093,7 @@
 	return rv;
 }
 
-static int 
+static int
 to_hex (unsigned char ch, unsigned char *s)
 {
 	unsigned int uvalue = ch;
@@ -1109,7 +1115,7 @@
 /* XXX: This is ridiculously complicated to be backward compatible
  * (mostly) with the quopri module.  It doesn't re-create the quopri
  * module bug where text ending in CRLF has the CR encoded */
-static PyObject* 
+static PyObject*
 binascii_b2a_qp (PyObject *self, PyObject *args, PyObject *kwargs)
 {
 	unsigned int in, out;
@@ -1125,7 +1131,7 @@
 	int crlf = 0;
 	unsigned char *p;
 
-	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s#|iii", kwlist, &data, 
+	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s#|iii", kwlist, &data,
 	      &datalen, &quotetabs, &istext, &header))
 		return NULL;
 
@@ -1140,14 +1146,14 @@
 	/* First, scan to see how many characters need to be encoded */
 	in = 0;
 	while (in < datalen) {
-		if ((data[in] > 126) || 
+		if ((data[in] > 126) ||
 		    (data[in] == '=') ||
 		    (header && data[in] == '_') ||
 		    ((data[in] == '.') && (linelen == 1)) ||
 		    (!istext && ((data[in] == '\r') || (data[in] == '\n'))) ||
 		    ((data[in] == '\t' || data[in] == ' ') && (in + 1 == datalen)) ||
-		    ((data[in] < 33) && 
-		     (data[in] != '\r') && (data[in] != '\n') && 
+		    ((data[in] < 33) &&
+		     (data[in] != '\r') && (data[in] != '\n') &&
 		     (quotetabs && ((data[in] != '\t') || (data[in] != ' ')))))
 		{
 			if ((linelen + 3) >= MAXLINESIZE) {
@@ -1162,7 +1168,7 @@
 			in++;
 		}
 		else {
-		  	if (istext && 
+		  	if (istext &&
 			    ((data[in] == '\n') ||
 			     ((in+1 < datalen) && (data[in] == '\r') &&
 			     (data[in+1] == '\n'))))
@@ -1181,7 +1187,7 @@
 					in++;
 			}
 			else {
-				if ((in + 1 != datalen) && 
+				if ((in + 1 != datalen) &&
 				    (data[in+1] != '\n') &&
 				    (linelen + 1) >= MAXLINESIZE) {
 					linelen = 0;
@@ -1206,14 +1212,14 @@
 
 	in = out = linelen = 0;
 	while (in < datalen) {
-		if ((data[in] > 126) || 
+		if ((data[in] > 126) ||
 		    (data[in] == '=') ||
 		    (header && data[in] == '_') ||
 		    ((data[in] == '.') && (linelen == 1)) ||
 		    (!istext && ((data[in] == '\r') || (data[in] == '\n'))) ||
 		    ((data[in] == '\t' || data[in] == ' ') && (in + 1 == datalen)) ||
-		    ((data[in] < 33) && 
-		     (data[in] != '\r') && (data[in] != '\n') && 
+		    ((data[in] < 33) &&
+		     (data[in] != '\r') && (data[in] != '\n') &&
 		     (quotetabs && ((data[in] != '\t') || (data[in] != ' ')))))
 		{
 			if ((linelen + 3 )>= MAXLINESIZE) {
@@ -1229,7 +1235,7 @@
 			linelen += 3;
 		}
 		else {
-		  	if (istext && 
+		  	if (istext &&
 			    ((data[in] == '\n') ||
 			     ((in+1 < datalen) && (data[in] == '\r') &&
 			     (data[in+1] == '\n'))))
@@ -1242,7 +1248,7 @@
 					to_hex(ch, &odata[out]);
 					out += 2;
 				}
-					
+
 				if (crlf) odata[out++] = '\r';
 				odata[out++] = '\n';
 				if (data[in] == '\r')
@@ -1251,7 +1257,7 @@
 					in++;
 			}
 			else {
-				if ((in + 1 != datalen) && 
+				if ((in + 1 != datalen) &&
 				    (data[in+1] != '\n') &&
 				    (linelen + 1) >= MAXLINESIZE) {
 					odata[out++] = '=';
@@ -1296,9 +1302,9 @@
 	 doc_rledecode_hqx},
 	{"crc_hqx",    binascii_crc_hqx,    METH_VARARGS, doc_crc_hqx},
 	{"crc32",      binascii_crc32,      METH_VARARGS, doc_crc32},
-	{"a2b_qp", (PyCFunction)binascii_a2b_qp, METH_VARARGS | METH_KEYWORDS, 
+	{"a2b_qp", (PyCFunction)binascii_a2b_qp, METH_VARARGS | METH_KEYWORDS,
 	  doc_a2b_qp},
-	{"b2a_qp", (PyCFunction)binascii_b2a_qp, METH_VARARGS | METH_KEYWORDS, 
+	{"b2a_qp", (PyCFunction)binascii_b2a_qp, METH_VARARGS | METH_KEYWORDS,
           doc_b2a_qp},
 	{NULL, NULL}			     /* sentinel */
 };
commit	934c1a1c6b13ba27baf75d206a842cefda99f771	[log] [tgz]
author	Tim Peters <tim.peters@gmail.com>	Tue Jul 02 22:24:50 2002 +0000
committer	Tim Peters <tim.peters@gmail.com>	Tue Jul 02 22:24:50 2002 +0000
tree	76ea85507b772200006ee373d43eb00216abb9af
parent	aab713bdf7380c1b618655cfd1b284c83a9b5d58 [diff]