Issue #5006: Better handling of unicode byte-order marks (BOM) in the io library. This means, for example, that opening an UTF-16 text file in append mode doesn't add a BOM at the end of the file if the file isn't empty.

commit: 8243ddb6ca5c0f78764a28f044f0c0284774d317 [log] [tgz]
author: Victor Stinner <victor.stinner@haypocalc.com> Wed Jul 28 01:58:41 2010 +0000
committer: Victor Stinner <victor.stinner@haypocalc.com> Wed Jul 28 01:58:41 2010 +0000
tree: ca6b3fc1c3b76df1ebd7b0a4d575dd49470f4b56
parent: 082a65ab1fd8c54df7a128d0e813061db9bfbba8 [diff] [blame]
diff --git a/Lib/test/test_io.py b/Lib/test/test_io.py
index aebe67b..5cfa472 100644
--- a/Lib/test/test_io.py
+++ b/Lib/test/test_io.py

@@ -799,6 +799,37 @@
         self.assertEquals(d.decode(b'oiabcd'), '')
         self.assertEquals(d.decode(b'', 1), 'abcd.')
 
+    def test_append_bom(self):
+        # The BOM is not written again when appending to a non-empty file
+        filename = test_support.TESTFN
+        for charset in ('utf-8-sig', 'utf-16', 'utf-32'):
+            with io.open(filename, 'w', encoding=charset) as f:
+                f.write('aaa')
+                pos = f.tell()
+            with io.open(filename, 'rb') as f:
+                self.assertEquals(f.read(), 'aaa'.encode(charset))
+
+            with io.open(filename, 'a', encoding=charset) as f:
+                f.write('xxx')
+            with io.open(filename, 'rb') as f:
+                self.assertEquals(f.read(), 'aaaxxx'.encode(charset))
+
+    def test_seek_bom(self):
+        # Same test, but when seeking manually
+        filename = test_support.TESTFN
+        for charset in ('utf-8-sig', 'utf-16', 'utf-32'):
+            with io.open(filename, 'w', encoding=charset) as f:
+                f.write('aaa')
+                pos = f.tell()
+            with io.open(filename, 'r+', encoding=charset) as f:
+                f.seek(pos)
+                f.write('zzz')
+                f.seek(0)
+                f.write('bbb')
+            with io.open(filename, 'rb') as f:
+                self.assertEquals(f.read(), 'bbbzzz'.encode(charset))
+
+
 class TextIOWrapperTest(unittest.TestCase):
 
     def setUp(self):
commit	8243ddb6ca5c0f78764a28f044f0c0284774d317	[log] [tgz]
author	Victor Stinner <victor.stinner@haypocalc.com>	Wed Jul 28 01:58:41 2010 +0000
committer	Victor Stinner <victor.stinner@haypocalc.com>	Wed Jul 28 01:58:41 2010 +0000
tree	ca6b3fc1c3b76df1ebd7b0a4d575dd49470f4b56
parent	082a65ab1fd8c54df7a128d0e813061db9bfbba8 [diff] [blame]