Issue #5006: Better handling of unicode byte-order marks (BOM) in the io library. This means, for example, that opening an UTF-16 text file in append mode doesn't add a BOM at the end of the file if the file isn't empty.

commit: 8243ddb6ca5c0f78764a28f044f0c0284774d317 [log] [tgz]
author: Victor Stinner <victor.stinner@haypocalc.com> Wed Jul 28 01:58:41 2010 +0000
committer: Victor Stinner <victor.stinner@haypocalc.com> Wed Jul 28 01:58:41 2010 +0000
tree: ca6b3fc1c3b76df1ebd7b0a4d575dd49470f4b56
parent: 082a65ab1fd8c54df7a128d0e813061db9bfbba8 [diff] [blame]
diff --git a/Lib/io.py b/Lib/io.py
index 1458b47..9013c58 100644
--- a/Lib/io.py
+++ b/Lib/io.py

@@ -1440,6 +1440,15 @@
         self._snapshot = None  # info for reconstructing decoder state
         self._seekable = self._telling = self.buffer.seekable()
 
+        if self._seekable and self.writable():
+            position = self.buffer.tell()
+            if position != 0:
+                try:
+                    self._get_encoder().setstate(0)
+                except LookupError:
+                    # Sometimes the encoder doesn't exist
+                    pass
+
     # self._snapshot is either None, or a tuple (dec_flags, next_input)
     # where dec_flags is the second (integer) item of the decoder state
     # and next_input is the chunk of input bytes that comes next after the
@@ -1726,6 +1735,17 @@
                 raise IOError("can't restore logical file position")
             self._decoded_chars_used = chars_to_skip
 
+        # Finally, reset the encoder (merely useful for proper BOM handling)
+        try:
+            encoder = self._encoder or self._get_encoder()
+        except LookupError:
+            # Sometimes the encoder doesn't exist
+            pass
+        else:
+            if cookie != 0:
+                encoder.setstate(0)
+            else:
+                encoder.reset()
         return cookie
 
     def read(self, n=None):
commit	8243ddb6ca5c0f78764a28f044f0c0284774d317	[log] [tgz]
author	Victor Stinner <victor.stinner@haypocalc.com>	Wed Jul 28 01:58:41 2010 +0000
committer	Victor Stinner <victor.stinner@haypocalc.com>	Wed Jul 28 01:58:41 2010 +0000
tree	ca6b3fc1c3b76df1ebd7b0a4d575dd49470f4b56
parent	082a65ab1fd8c54df7a128d0e813061db9bfbba8 [diff] [blame]