Make test_zipfile pass.
The zipfile module now does all I/O in binary mode using bytes.
(Maybe we should support wrapping a TextIOWrapper around it
when text mode reading is requested?)
Even the password is a bytes array now.
Had to fix py_compile.py to use bytes while I was at it.
The _struct needed a patch to support bytes, str8 and str
for the 's' and 'p' formats.
diff --git a/Lib/zipfile.py b/Lib/zipfile.py
index 1e180fc..6cff722 100644
--- a/Lib/zipfile.py
+++ b/Lib/zipfile.py
@@ -1,5 +1,7 @@
 """
 Read and write ZIP files.
+
+XXX references to utf-8 need further investigation.
 """
 import struct, os, time, sys
 import binascii, io
@@ -33,15 +35,15 @@
 
 # Here are some struct module formats for reading headers
 structEndArchive = "<4s4H2lH"     # 9 items, end of archive, 22 bytes
-stringEndArchive = "PK\005\006"   # magic number for end of archive record
+stringEndArchive = b"PK\005\006"   # magic number for end of archive record
 structCentralDir = "<4s4B4HlLL5HLl"# 19 items, central directory, 46 bytes
-stringCentralDir = "PK\001\002"   # magic number for central directory
+stringCentralDir = b"PK\001\002"   # magic number for central directory
 structFileHeader = "<4s2B4HlLL2H"  # 12 items, file header record, 30 bytes
-stringFileHeader = "PK\003\004"   # magic number for file header
+stringFileHeader = b"PK\003\004"   # magic number for file header
 structEndArchive64Locator = "<4slql" # 4 items, locate Zip64 header, 20 bytes
-stringEndArchive64Locator = "PK\x06\x07" # magic token for locator header
+stringEndArchive64Locator = b"PK\x06\x07" # magic token for locator header
 structEndArchive64 = "<4sqhhllqqqq" # 10 items, end of archive (Zip64), 56 bytes
-stringEndArchive64 = "PK\x06\x06" # magic token for Zip64 header
+stringEndArchive64 = b"PK\x06\x06" # magic token for Zip64 header
 
 
 # indexes of entries in the central directory structure
@@ -82,7 +84,7 @@
 def is_zipfile(filename):
     """Quickly see if file is a ZIP file by checking the magic number."""
     try:
-        fpin = open(filename, "rb")
+        fpin = io.open(filename, "rb")
         endrec = _EndRecData(fpin)
         fpin.close()
         if endrec:
@@ -206,8 +208,8 @@
         self.date_time = date_time      # year, month, day, hour, min, sec
         # Standard values:
         self.compress_type = ZIP_STORED # Type of compression for the file
-        self.comment = ""               # Comment for each file
-        self.extra = ""                 # ZIP extra data
+        self.comment = b""              # Comment for each file
+        self.extra = b""                # ZIP extra data
         if sys.platform == 'win32':
             self.create_system = 0          # System which created ZIP archive
         else:
@@ -257,7 +259,7 @@
                  self.compress_type, dostime, dosdate, CRC,
                  compress_size, file_size,
                  len(self.filename), len(extra))
-        return header + self.filename + extra
+        return header + self.filename.encode("utf-8") + extra
 
     def _decodeExtra(self):
         # Try to decode the extra field.
@@ -331,7 +333,7 @@
 
     def _crc32(self, ch, crc):
         """Compute the CRC32 primitive on one byte."""
-        return ((crc >> 8) & 0xffffff) ^ self.crctable[(crc ^ ord(ch)) & 0xff]
+        return ((crc >> 8) & 0xffffff) ^ self.crctable[(crc ^ ch) & 0xff]
 
     def __init__(self, pwd):
         self.key0 = 305419896
@@ -344,20 +346,13 @@
         self.key0 = self._crc32(c, self.key0)
         self.key1 = (self.key1 + (self.key0 & 255)) & 4294967295
         self.key1 = (self.key1 * 134775813 + 1) & 4294967295
-        self.key2 = self._crc32(chr((self.key1 >> 24) & 255), self.key2)
+        self.key2 = self._crc32((self.key1 >> 24) & 255, self.key2)
 
     def __call__(self, c):
         """Decrypt a single character."""
-        # XXX When this is called with a byte instead of a char, ord()
-        # isn't needed.  Don't die in that case.  In the future we should
-        # just leave this out, once we're always using bytes.
-        try:
-            c = ord(c)
-        except TypeError:
-            pass
+        assert isinstance(c, int)
         k = self.key2 | 2
         c = c ^ (((k * (k^1)) >> 8) & 255)
-        c = chr(c)
         self._UpdateKeys(c)
         return c
 
@@ -370,13 +365,13 @@
         self.fileobj = fileobj
         self.decrypter = decrypt
         self.bytes_read = 0
-        self.rawbuffer = ''
-        self.readbuffer = ''
-        self.linebuffer = ''
+        self.rawbuffer = b''
+        self.readbuffer = b''
+        self.linebuffer = b''
         self.eof = False
         self.univ_newlines = False
-        self.nlSeps = ("\n", )
-        self.lastdiscard = ''
+        self.nlSeps = (b"\n", )
+        self.lastdiscard = b''
 
         self.compress_type = zipinfo.compress_type
         self.compress_size = zipinfo.compress_size
@@ -394,9 +389,9 @@
         self.univ_newlines = univ_newlines
 
         # pick line separator char(s) based on universal newlines flag
-        self.nlSeps = ("\n", )
+        self.nlSeps = (b"\n", )
         if self.univ_newlines:
-            self.nlSeps = ("\r\n", "\r", "\n")
+            self.nlSeps = (b"\r\n", b"\r", b"\n")
 
     def __iter__(self):
         return self
@@ -417,7 +412,7 @@
             # ugly check for cases where half of an \r\n pair was
             # read on the last pass, and the \r was discarded.  In this
             # case we just throw away the \n at the start of the buffer.
-            if (self.lastdiscard, self.linebuffer[0]) == ('\r','\n'):
+            if (self.lastdiscard, self.linebuffer[0]) == (b'\r', b'\n'):
                 self.linebuffer = self.linebuffer[1:]
 
             for sep in self.nlSeps:
@@ -435,7 +430,7 @@
         if size < 0:
             size = sys.maxint
         elif size == 0:
-            return ''
+            return b''
 
         # check for a newline already in buffer
         nl, nllen = self._checkfornewline()
@@ -461,7 +456,7 @@
             # so return current buffer
             if nl < 0:
                 s = self.linebuffer
-                self.linebuffer = ''
+                self.linebuffer = b''
                 return s
 
         buf = self.linebuffer[:nl]
@@ -470,7 +465,7 @@
 
         # line is always returned with \n as newline char (except possibly
         # for a final incomplete line in the file, which is handled above).
-        return buf + "\n"
+        return buf + b"\n"
 
     def readlines(self, sizehint = -1):
         """Return a list with all (following) lines. The sizehint parameter
@@ -516,18 +511,23 @@
 
         # try to read from file (if necessary)
         if bytesToRead > 0:
-            bytes = self.fileobj.read(bytesToRead)
-            self.bytes_read += len(bytes)
-            self.rawbuffer += bytes
+            data = self.fileobj.read(bytesToRead)
+            self.bytes_read += len(data)
+            try:
+                self.rawbuffer += data
+            except:
+                print(repr(self.fileobj), repr(self.rawbuffer),
+                      repr(data))
+                raise
 
             # handle contents of raw buffer
             if self.rawbuffer:
                 newdata = self.rawbuffer
-                self.rawbuffer = ''
+                self.rawbuffer = b''
 
                 # decrypt new data if we were given an object to handle that
                 if newdata and self.decrypter is not None:
-                    newdata = ''.join(map(self.decrypter, newdata))
+                    newdata = bytes(map(self.decrypter, newdata))
 
                 # decompress newly read data if necessary
                 if newdata and self.compress_type == ZIP_DEFLATED:
@@ -546,13 +546,13 @@
 
         # return what the user asked for
         if size is None or len(self.readbuffer) <= size:
-            bytes = self.readbuffer
-            self.readbuffer = ''
+            data = self.readbuffer
+            self.readbuffer = b''
         else:
-            bytes = self.readbuffer[:size]
+            data = self.readbuffer[:size]
             self.readbuffer = self.readbuffer[size:]
 
-        return bytes
+        return data
 
 
 class ZipFile:
@@ -593,15 +593,16 @@
 
         # Check if we were passed a file-like object
         if isinstance(file, basestring):
+            # No, it's a filename
             self._filePassed = 0
             self.filename = file
             modeDict = {'r' : 'rb', 'w': 'wb', 'a' : 'r+b'}
             try:
-                self.fp = open(file, modeDict[mode])
+                self.fp = io.open(file, modeDict[mode])
             except IOError:
                 if mode == 'a':
                     mode = key = 'w'
-                    self.fp = open(file, modeDict[mode])
+                    self.fp = io.open(file, modeDict[mode])
                 else:
                     raise
         else:
@@ -661,7 +662,7 @@
         self.start_dir = offset_cd + concat
         fp.seek(self.start_dir, 0)
         data = fp.read(size_cd)
-        fp = io.StringIO(data)
+        fp = io.BytesIO(data)
         total = 0
         while total < size_cd:
             centdir = fp.read(46)
@@ -673,7 +674,7 @@
                 print(centdir)
             filename = fp.read(centdir[_CD_FILENAME_LENGTH])
             # Create ZipInfo instance to store file information
-            x = ZipInfo(filename)
+            x = ZipInfo(str(filename))
             x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH])
             x.comment = fp.read(centdir[_CD_COMMENT_LENGTH])
             total = (total + centdir[_CD_FILENAME_LENGTH]
@@ -708,12 +709,16 @@
         archive."""
         return self.filelist
 
-    def printdir(self):
+    def printdir(self, file=None):
         """Print a table of contents for the zip file."""
-        print("%-46s %19s %12s" % ("File Name", "Modified    ", "Size"))
+        if file is None:
+            file = sys.stdout
+        print("%-46s %19s %12s" % ("File Name", "Modified    ", "Size"),
+              file=file)
         for zinfo in self.filelist:
             date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time
-            print("%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size))
+            print("%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size),
+                  file=file)
 
     def testzip(self):
         """Read all the files and check the CRC."""
@@ -730,6 +735,7 @@
 
     def setpassword(self, pwd):
         """Set default password for encrypted files."""
+        assert isinstance(pwd, bytes)
         self.pwd = pwd
 
     def read(self, name, pwd=None):
@@ -749,7 +755,7 @@
         if self._filePassed:
             zef_file = self.fp
         else:
-            zef_file = open(self.filename, 'rb')
+            zef_file = io.open(self.filename, 'rb')
 
         # Get info object for name
         zinfo = self.getinfo(name)
@@ -768,9 +774,9 @@
         if fheader[_FH_EXTRA_FIELD_LENGTH]:
             zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH])
 
-        if fname != zinfo.orig_filename:
+        if fname != zinfo.orig_filename.encode("utf-8"):
             raise BadZipfile, \
-                      'File name in directory "%s" and header "%s" differ.' % (
+                      'File name in directory %r and header %r differ.' % (
                           zinfo.orig_filename, fname)
 
         # check for encrypted flag & handle password
@@ -790,7 +796,7 @@
             #  and is used to check the correctness of the password.
             bytes = zef_file.read(12)
             h = map(zd, bytes[0:12])
-            if ord(h[11]) != ((zinfo.CRC>>24)&255):
+            if h[11] != ((zinfo.CRC>>24) & 255):
                 raise RuntimeError, "Bad password for file %s" % name
 
         # build and return a ZipExtFile
@@ -852,7 +858,7 @@
 
         self._writecheck(zinfo)
         self._didModify = True
-        fp = open(filename, "rb")
+        fp = io.open(filename, "rb")
         # Must overwrite CRC and sizes with correct data later
         zinfo.CRC = CRC = 0
         zinfo.compress_size = compress_size = 0
@@ -982,7 +988,7 @@
                   0, zinfo.internal_attr, zinfo.external_attr,
                   header_offset)
                 self.fp.write(centdir)
-                self.fp.write(zinfo.filename)
+                self.fp.write(zinfo.filename.encode("utf-8"))
                 self.fp.write(extra_data)
                 self.fp.write(zinfo.comment)
 
@@ -1163,7 +1169,7 @@
             tgtdir = os.path.dirname(tgt)
             if not os.path.exists(tgtdir):
                 os.makedirs(tgtdir)
-            fp = open(tgt, 'wb')
+            fp = io.open(tgt, 'wb')
             fp.write(zf.read(path))
             fp.close()
         zf.close()