Patch #1230446: tarfile.py: fix ExFileObject so that read() and tell()
work correctly together with readline().

Will backport to 2.5.
diff --git a/Lib/tarfile.py b/Lib/tarfile.py
index ccbfdde..46031e1 100644
--- a/Lib/tarfile.py
+++ b/Lib/tarfile.py
@@ -628,64 +628,158 @@
 #------------------------
 # Extraction file object
 #------------------------
-class ExFileObject(object):
-    """File-like object for reading an archive member.
-       Is returned by TarFile.extractfile(). Support for
-       sparse files included.
+class _FileInFile(object):
+    """A thin wrapper around an existing file object that
+       provides a part of its data as an individual file
+       object.
     """
 
-    def __init__(self, tarfile, tarinfo):
-        self.fileobj = tarfile.fileobj
-        self.name    = tarinfo.name
-        self.mode    = "r"
-        self.closed  = False
-        self.offset  = tarinfo.offset_data
-        self.size    = tarinfo.size
-        self.pos     = 0L
-        self.linebuffer = ""
-        if tarinfo.issparse():
-            self.sparse = tarinfo.sparse
-            self.read = self._readsparse
-        else:
-            self.read = self._readnormal
+    def __init__(self, fileobj, offset, size, sparse=None):
+        self.fileobj = fileobj
+        self.offset = offset
+        self.size = size
+        self.sparse = sparse
+        self.position = 0
 
-    def __read(self, size):
-        """Overloadable read method.
+    def tell(self):
+        """Return the current file position.
         """
+        return self.position
+
+    def seek(self, position):
+        """Seek to a position in the file.
+        """
+        self.position = position
+
+    def read(self, size=None):
+        """Read data from the file.
+        """
+        if size is None:
+            size = self.size - self.position
+        else:
+            size = min(size, self.size - self.position)
+
+        if self.sparse is None:
+            return self.readnormal(size)
+        else:
+            return self.readsparse(size)
+
+    def readnormal(self, size):
+        """Read operation for regular files.
+        """
+        self.fileobj.seek(self.offset + self.position)
+        self.position += size
         return self.fileobj.read(size)
 
-    def readline(self, size=-1):
-        """Read a line with approx. size. If size is negative,
-           read a whole line. readline() and read() must not
-           be mixed up (!).
+    def readsparse(self, size):
+        """Read operation for sparse files.
         """
-        if size < 0:
-            size = sys.maxint
+        data = []
+        while size > 0:
+            buf = self.readsparsesection(size)
+            if not buf:
+                break
+            size -= len(buf)
+            data.append(buf)
+        return "".join(data)
 
-        nl = self.linebuffer.find("\n")
-        if nl >= 0:
-            nl = min(nl, size)
+    def readsparsesection(self, size):
+        """Read a single section of a sparse file.
+        """
+        section = self.sparse.find(self.position)
+
+        if section is None:
+            return ""
+
+        size = min(size, section.offset + section.size - self.position)
+
+        if isinstance(section, _data):
+            realpos = section.realpos + self.position - section.offset
+            self.fileobj.seek(self.offset + realpos)
+            self.position += size
+            return self.fileobj.read(size)
         else:
-            size -= len(self.linebuffer)
-            while (nl < 0 and size > 0):
-                buf = self.read(min(size, 100))
-                if not buf:
+            self.position += size
+            return NUL * size
+#class _FileInFile
+
+
+class ExFileObject(object):
+    """File-like object for reading an archive member.
+       Is returned by TarFile.extractfile().
+    """
+    blocksize = 1024
+
+    def __init__(self, tarfile, tarinfo):
+        self.fileobj = _FileInFile(tarfile.fileobj,
+                                   tarinfo.offset_data,
+                                   tarinfo.size,
+                                   getattr(tarinfo, "sparse", None))
+        self.name = tarinfo.name
+        self.mode = "r"
+        self.closed = False
+        self.size = tarinfo.size
+
+        self.position = 0
+        self.buffer = ""
+
+    def read(self, size=None):
+        """Read at most size bytes from the file. If size is not
+           present or None, read all data until EOF is reached.
+        """
+        if self.closed:
+            raise ValueError("I/O operation on closed file")
+
+        buf = ""
+        if self.buffer:
+            if size is None:
+                buf = self.buffer
+                self.buffer = ""
+            else:
+                buf = self.buffer[:size]
+                self.buffer = self.buffer[size:]
+
+        if size is None:
+            buf += self.fileobj.read()
+        else:
+            buf += self.fileobj.read(size - len(buf))
+
+        self.position += len(buf)
+        return buf
+
+    def readline(self, size=-1):
+        """Read one entire line from the file. If size is present
+           and non-negative, return a string with at most that
+           size, which may be an incomplete line.
+        """
+        if self.closed:
+            raise ValueError("I/O operation on closed file")
+
+        if "\n" in self.buffer:
+            pos = self.buffer.find("\n") + 1
+        else:
+            buffers = [self.buffer]
+            while True:
+                buf = self.fileobj.read(self.blocksize)
+                buffers.append(buf)
+                if not buf or "\n" in buf:
+                    self.buffer = "".join(buffers)
+                    pos = self.buffer.find("\n") + 1
+                    if pos == 0:
+                        # no newline found.
+                        pos = len(self.buffer)
                     break
-                self.linebuffer += buf
-                size -= len(buf)
-                nl = self.linebuffer.find("\n")
-            if nl == -1:
-                s = self.linebuffer
-                self.linebuffer = ""
-                return s
-        buf = self.linebuffer[:nl]
-        self.linebuffer = self.linebuffer[nl + 1:]
-        while buf[-1:] == "\r":
-            buf = buf[:-1]
-        return buf + "\n"
+
+        if size != -1:
+            pos = min(size, pos)
+
+        buf = self.buffer[:pos]
+        self.buffer = self.buffer[pos:]
+        self.position += len(buf)
+        return buf
 
     def readlines(self):
-        """Return a list with all (following) lines.
+        """Return a list with all remaining lines.
         """
         result = []
         while True:
@@ -694,74 +788,34 @@
             result.append(line)
         return result
 
-    def _readnormal(self, size=None):
-        """Read operation for regular files.
-        """
-        if self.closed:
-            raise ValueError("file is closed")
-        self.fileobj.seek(self.offset + self.pos)
-        bytesleft = self.size - self.pos
-        if size is None:
-            bytestoread = bytesleft
-        else:
-            bytestoread = min(size, bytesleft)
-        self.pos += bytestoread
-        return self.__read(bytestoread)
-
-    def _readsparse(self, size=None):
-        """Read operation for sparse files.
-        """
-        if self.closed:
-            raise ValueError("file is closed")
-
-        if size is None:
-            size = self.size - self.pos
-
-        data = []
-        while size > 0:
-            buf = self._readsparsesection(size)
-            if not buf:
-                break
-            size -= len(buf)
-            data.append(buf)
-        return "".join(data)
-
-    def _readsparsesection(self, size):
-        """Read a single section of a sparse file.
-        """
-        section = self.sparse.find(self.pos)
-
-        if section is None:
-            return ""
-
-        toread = min(size, section.offset + section.size - self.pos)
-        if isinstance(section, _data):
-            realpos = section.realpos + self.pos - section.offset
-            self.pos += toread
-            self.fileobj.seek(self.offset + realpos)
-            return self.__read(toread)
-        else:
-            self.pos += toread
-            return NUL * toread
-
     def tell(self):
         """Return the current file position.
         """
-        return self.pos
+        if self.closed:
+            raise ValueError("I/O operation on closed file")
 
-    def seek(self, pos, whence=0):
+        return self.position
+
+    def seek(self, pos, whence=os.SEEK_SET):
         """Seek to a position in the file.
         """
-        self.linebuffer = ""
-        if whence == 0:
-            self.pos = min(max(pos, 0), self.size)
-        if whence == 1:
+        if self.closed:
+            raise ValueError("I/O operation on closed file")
+
+        if whence == os.SEEK_SET:
+            self.position = min(max(pos, 0), self.size)
+        elif whence == os.SEEK_CUR:
             if pos < 0:
-                self.pos = max(self.pos + pos, 0)
+                self.position = max(self.position + pos, 0)
             else:
-                self.pos = min(self.pos + pos, self.size)
-        if whence == 2:
-            self.pos = max(min(self.size + pos, self.size), 0)
+                self.position = min(self.position + pos, self.size)
+        elif whence == os.SEEK_END:
+            self.position = max(min(self.size + pos, self.size), 0)
+        else:
+            raise ValueError("Invalid argument")
+
+        self.buffer = ""
+        self.fileobj.seek(self.position)
 
     def close(self):
         """Close the file object.
@@ -769,20 +823,13 @@
         self.closed = True
 
     def __iter__(self):
-        """Get an iterator over the file object.
+        """Get an iterator over the file's lines.
         """
-        if self.closed:
-            raise ValueError("I/O operation on closed file")
-        return self
-
-    def next(self):
-        """Get the next item from the file iterator.
-        """
-        result = self.readline()
-        if not result:
-            raise StopIteration
-        return result
-
+        while True:
+            line = self.readline()
+            if not line:
+                break
+            yield line
 #class ExFileObject
 
 #------------------