Merged revisions 70523 via svnmerge from
svn+ssh://pythondev@svn.python.org/python/trunk

........
  r70523 | lars.gustaebel | 2009-03-22 21:09:33 +0100 (Sun, 22 Mar 2009) | 5 lines

  Issue #5068: Fixed the tarfile._BZ2Proxy.read() method that would loop
  forever on incomplete input. That caused tarfile.open() to hang when used
  with mode 'r' or 'r:bz2' and a fileobj argument that contained no data or
  partial bzip2 compressed data.
........
diff --git a/Lib/tarfile.py b/Lib/tarfile.py
index 4264da3..3a3d2c9 100644
--- a/Lib/tarfile.py
+++ b/Lib/tarfile.py
@@ -639,12 +639,11 @@
     def read(self, size):
         x = len(self.buf)
         while x < size:
-            try:
-                raw = self.fileobj.read(self.blocksize)
-                data = self.bz2obj.decompress(raw)
-                self.buf += data
-            except EOFError:
+            raw = self.fileobj.read(self.blocksize)
+            if not raw:
                 break
+            data = self.bz2obj.decompress(raw)
+            self.buf += data
             x += len(data)
 
         buf = self.buf[:size]
diff --git a/Lib/test/test_tarfile.py b/Lib/test/test_tarfile.py
index affbeeb..f4c0035 100644
--- a/Lib/test/test_tarfile.py
+++ b/Lib/test/test_tarfile.py
@@ -1121,6 +1121,30 @@
 class Bz2StreamWriteTest(StreamWriteTest):
     mode = "w|bz2"
 
+class Bz2PartialReadTest(unittest.TestCase):
+    # Issue5068: The _BZ2Proxy.read() method loops forever
+    # on an empty or partial bzipped file.
+
+    def _test_partial_input(self, mode):
+        class MyBytesIO(io.BytesIO):
+            hit_eof = False
+            def read(self, n):
+                if self.hit_eof:
+                    raise AssertionError("infinite loop detected in tarfile.open()")
+                self.hit_eof = self.tell() == len(self.getvalue())
+                return super(MyBytesIO, self).read(n)
+
+        data = bz2.compress(tarfile.TarInfo("foo").tobuf())
+        for x in range(len(data) + 1):
+            tarfile.open(fileobj=MyBytesIO(data[:x]), mode=mode)
+
+    def test_partial_input(self):
+        self._test_partial_input("r")
+
+    def test_partial_input_bz2(self):
+        self._test_partial_input("r:bz2")
+
+
 def test_main():
     if not os.path.exists(TEMPDIR):
         os.mkdir(TEMPDIR)
@@ -1178,6 +1202,7 @@
             Bz2StreamReadTest,
             Bz2WriteTest,
             Bz2StreamWriteTest,
+            Bz2PartialReadTest,
         ]
 
     try:
diff --git a/Misc/NEWS b/Misc/NEWS
index 0d3137f..460a36e 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -32,6 +32,11 @@
 Library
 -------
 
+- Issue #5068: Fixed the tarfile._BZ2Proxy.read() method that would loop
+  forever on incomplete input. That caused tarfile.open() to hang when used
+  with mode 'r' or 'r:bz2' and a fileobj argument that contained no data or
+  partial bzip2 compressed data.
+
 - Issue #2110: Add support for thousands separator and 'n' type
   specifier to Decimal.__format__