| Benjamin Peterson | 90f5ba5 | 2010-03-11 22:53:45 +0000 | [diff] [blame] | 1 | #! /usr/bin/env python3 | 
| Andrew M. Kuchling | a6f68e1 | 2005-06-09 14:12:36 +0000 | [diff] [blame] | 2 | """Test script for the gzip module. | 
 | 3 | """ | 
 | 4 |  | 
 | 5 | import unittest | 
| Benjamin Peterson | ee8712c | 2008-05-20 21:35:26 +0000 | [diff] [blame] | 6 | from test import support | 
| Christian Heimes | 05e8be1 | 2008-02-23 18:30:17 +0000 | [diff] [blame] | 7 | import os | 
| Antoine Pitrou | b1f8835 | 2010-01-03 22:37:40 +0000 | [diff] [blame] | 8 | import io | 
| Antoine Pitrou | 42db3ef | 2009-01-04 21:37:59 +0000 | [diff] [blame] | 9 | import struct | 
| Ezio Melotti | 78ea202 | 2009-09-12 18:41:20 +0000 | [diff] [blame] | 10 | gzip = support.import_module('gzip') | 
| Andrew M. Kuchling | 605ebdd | 1999-03-25 21:50:27 +0000 | [diff] [blame] | 11 |  | 
| Walter Dörwald | 5b1284d | 2007-06-06 16:43:59 +0000 | [diff] [blame] | 12 | data1 = b"""  int length=DEFAULTALLOC, err = Z_OK; | 
| Andrew M. Kuchling | 605ebdd | 1999-03-25 21:50:27 +0000 | [diff] [blame] | 13 |   PyObject *RetVal; | 
 | 14 |   int flushmode = Z_FINISH; | 
 | 15 |   unsigned long start_total_out; | 
 | 16 |  | 
 | 17 | """ | 
 | 18 |  | 
| Walter Dörwald | 5b1284d | 2007-06-06 16:43:59 +0000 | [diff] [blame] | 19 | data2 = b"""/* zlibmodule.c -- gzip-compatible data compression */ | 
| Neal Norwitz | 014f103 | 2004-07-29 03:55:56 +0000 | [diff] [blame] | 20 | /* See http://www.gzip.org/zlib/ | 
| Andrew M. Kuchling | 605ebdd | 1999-03-25 21:50:27 +0000 | [diff] [blame] | 21 | /* See http://www.winimage.com/zLibDll for Windows */ | 
 | 22 | """ | 
 | 23 |  | 
| Andrew M. Kuchling | 605ebdd | 1999-03-25 21:50:27 +0000 | [diff] [blame] | 24 |  | 
| Antoine Pitrou | 7b96984 | 2010-09-23 16:22:51 +0000 | [diff] [blame] | 25 | class UnseekableIO(io.BytesIO): | 
 | 26 |     def seekable(self): | 
 | 27 |         return False | 
 | 28 |  | 
 | 29 |     def tell(self): | 
 | 30 |         raise io.UnsupportedOperation | 
 | 31 |  | 
 | 32 |     def seek(self, *args): | 
 | 33 |         raise io.UnsupportedOperation | 
 | 34 |  | 
 | 35 |  | 
| Nadeem Vawda | 1b8a14d | 2012-05-06 15:17:52 +0200 | [diff] [blame] | 36 | class BaseTest(unittest.TestCase): | 
| Benjamin Peterson | ee8712c | 2008-05-20 21:35:26 +0000 | [diff] [blame] | 37 |     filename = support.TESTFN | 
| Tim Peters | 5cfb05e | 2004-07-27 21:02:02 +0000 | [diff] [blame] | 38 |  | 
| Georg Brandl | b533e26 | 2008-05-25 18:19:30 +0000 | [diff] [blame] | 39 |     def setUp(self): | 
| Benjamin Peterson | ee8712c | 2008-05-20 21:35:26 +0000 | [diff] [blame] | 40 |         support.unlink(self.filename) | 
| Andrew M. Kuchling | 605ebdd | 1999-03-25 21:50:27 +0000 | [diff] [blame] | 41 |  | 
| Georg Brandl | b533e26 | 2008-05-25 18:19:30 +0000 | [diff] [blame] | 42 |     def tearDown(self): | 
| Benjamin Peterson | ee8712c | 2008-05-20 21:35:26 +0000 | [diff] [blame] | 43 |         support.unlink(self.filename) | 
| Andrew M. Kuchling | 605ebdd | 1999-03-25 21:50:27 +0000 | [diff] [blame] | 44 |  | 
| Andrew M. Kuchling | 85ab738 | 2000-07-29 20:18:34 +0000 | [diff] [blame] | 45 |  | 
| Nadeem Vawda | 1b8a14d | 2012-05-06 15:17:52 +0200 | [diff] [blame] | 46 | class TestGzip(BaseTest): | 
| Georg Brandl | b533e26 | 2008-05-25 18:19:30 +0000 | [diff] [blame] | 47 |     def test_write(self): | 
| Brian Curtin | 28f96b5 | 2010-10-13 02:21:42 +0000 | [diff] [blame] | 48 |         with gzip.GzipFile(self.filename, 'wb') as f: | 
 | 49 |             f.write(data1 * 50) | 
| Andrew M. Kuchling | 85ab738 | 2000-07-29 20:18:34 +0000 | [diff] [blame] | 50 |  | 
| Brian Curtin | 28f96b5 | 2010-10-13 02:21:42 +0000 | [diff] [blame] | 51 |             # Try flush and fileno. | 
 | 52 |             f.flush() | 
 | 53 |             f.fileno() | 
 | 54 |             if hasattr(os, 'fsync'): | 
 | 55 |                 os.fsync(f.fileno()) | 
 | 56 |             f.close() | 
| Andrew M. Kuchling | 85ab738 | 2000-07-29 20:18:34 +0000 | [diff] [blame] | 57 |  | 
| Georg Brandl | b533e26 | 2008-05-25 18:19:30 +0000 | [diff] [blame] | 58 |         # Test multiple close() calls. | 
 | 59 |         f.close() | 
 | 60 |  | 
| Andrew M. Kuchling | a6f68e1 | 2005-06-09 14:12:36 +0000 | [diff] [blame] | 61 |     def test_read(self): | 
 | 62 |         self.test_write() | 
 | 63 |         # Try reading. | 
| Brian Curtin | 28f96b5 | 2010-10-13 02:21:42 +0000 | [diff] [blame] | 64 |         with gzip.GzipFile(self.filename, 'r') as f: | 
 | 65 |             d = f.read() | 
| Andrew M. Kuchling | a6f68e1 | 2005-06-09 14:12:36 +0000 | [diff] [blame] | 66 |         self.assertEqual(d, data1*50) | 
| Andrew M. Kuchling | 85ab738 | 2000-07-29 20:18:34 +0000 | [diff] [blame] | 67 |  | 
| Antoine Pitrou | 4ec4b0c | 2011-04-04 21:00:37 +0200 | [diff] [blame] | 68 |     def test_read1(self): | 
 | 69 |         self.test_write() | 
 | 70 |         blocks = [] | 
 | 71 |         nread = 0 | 
 | 72 |         with gzip.GzipFile(self.filename, 'r') as f: | 
 | 73 |             while True: | 
 | 74 |                 d = f.read1() | 
 | 75 |                 if not d: | 
 | 76 |                     break | 
 | 77 |                 blocks.append(d) | 
 | 78 |                 nread += len(d) | 
 | 79 |                 # Check that position was updated correctly (see issue10791). | 
 | 80 |                 self.assertEqual(f.tell(), nread) | 
 | 81 |         self.assertEqual(b''.join(blocks), data1 * 50) | 
 | 82 |  | 
| Antoine Pitrou | 7980eaa | 2010-10-06 21:21:18 +0000 | [diff] [blame] | 83 |     def test_io_on_closed_object(self): | 
 | 84 |         # Test that I/O operations on closed GzipFile objects raise a | 
 | 85 |         # ValueError, just like the corresponding functions on file objects. | 
 | 86 |  | 
 | 87 |         # Write to a file, open it for reading, then close it. | 
 | 88 |         self.test_write() | 
 | 89 |         f = gzip.GzipFile(self.filename, 'r') | 
 | 90 |         f.close() | 
 | 91 |         with self.assertRaises(ValueError): | 
 | 92 |             f.read(1) | 
 | 93 |         with self.assertRaises(ValueError): | 
 | 94 |             f.seek(0) | 
 | 95 |         with self.assertRaises(ValueError): | 
 | 96 |             f.tell() | 
 | 97 |         # Open the file for writing, then close it. | 
 | 98 |         f = gzip.GzipFile(self.filename, 'w') | 
 | 99 |         f.close() | 
 | 100 |         with self.assertRaises(ValueError): | 
 | 101 |             f.write(b'') | 
 | 102 |         with self.assertRaises(ValueError): | 
 | 103 |             f.flush() | 
 | 104 |  | 
| Andrew M. Kuchling | a6f68e1 | 2005-06-09 14:12:36 +0000 | [diff] [blame] | 105 |     def test_append(self): | 
 | 106 |         self.test_write() | 
 | 107 |         # Append to the previous file | 
| Brian Curtin | 28f96b5 | 2010-10-13 02:21:42 +0000 | [diff] [blame] | 108 |         with gzip.GzipFile(self.filename, 'ab') as f: | 
 | 109 |             f.write(data2 * 15) | 
| Andrew M. Kuchling | 85ab738 | 2000-07-29 20:18:34 +0000 | [diff] [blame] | 110 |  | 
| Brian Curtin | 28f96b5 | 2010-10-13 02:21:42 +0000 | [diff] [blame] | 111 |         with gzip.GzipFile(self.filename, 'rb') as f: | 
 | 112 |             d = f.read() | 
| Andrew M. Kuchling | a6f68e1 | 2005-06-09 14:12:36 +0000 | [diff] [blame] | 113 |         self.assertEqual(d, (data1*50) + (data2*15)) | 
| Andrew M. Kuchling | 85ab738 | 2000-07-29 20:18:34 +0000 | [diff] [blame] | 114 |  | 
| Andrew M. Kuchling | 01cb47b | 2005-06-09 14:19:32 +0000 | [diff] [blame] | 115 |     def test_many_append(self): | 
 | 116 |         # Bug #1074261 was triggered when reading a file that contained | 
 | 117 |         # many, many members.  Create such a file and verify that reading it | 
 | 118 |         # works. | 
| Nadeem Vawda | 1b8a14d | 2012-05-06 15:17:52 +0200 | [diff] [blame] | 119 |         with gzip.GzipFile(self.filename, 'wb', 9) as f: | 
| Walter Dörwald | 5b1284d | 2007-06-06 16:43:59 +0000 | [diff] [blame] | 120 |             f.write(b'a') | 
| Brian Curtin | 28f96b5 | 2010-10-13 02:21:42 +0000 | [diff] [blame] | 121 |         for i in range(0, 200): | 
| Nadeem Vawda | 1b8a14d | 2012-05-06 15:17:52 +0200 | [diff] [blame] | 122 |             with gzip.GzipFile(self.filename, "ab", 9) as f: # append | 
| Brian Curtin | 28f96b5 | 2010-10-13 02:21:42 +0000 | [diff] [blame] | 123 |                 f.write(b'a') | 
| Andrew M. Kuchling | 01cb47b | 2005-06-09 14:19:32 +0000 | [diff] [blame] | 124 |  | 
 | 125 |         # Try reading the file | 
| Nadeem Vawda | 1b8a14d | 2012-05-06 15:17:52 +0200 | [diff] [blame] | 126 |         with gzip.GzipFile(self.filename, "rb") as zgfile: | 
| Brian Curtin | 28f96b5 | 2010-10-13 02:21:42 +0000 | [diff] [blame] | 127 |             contents = b"" | 
 | 128 |             while 1: | 
 | 129 |                 ztxt = zgfile.read(8192) | 
 | 130 |                 contents += ztxt | 
 | 131 |                 if not ztxt: break | 
| Ezio Melotti | b3aedd4 | 2010-11-20 19:04:17 +0000 | [diff] [blame] | 132 |         self.assertEqual(contents, b'a'*201) | 
| Andrew M. Kuchling | 01cb47b | 2005-06-09 14:19:32 +0000 | [diff] [blame] | 133 |  | 
| Antoine Pitrou | b1f8835 | 2010-01-03 22:37:40 +0000 | [diff] [blame] | 134 |     def test_buffered_reader(self): | 
 | 135 |         # Issue #7471: a GzipFile can be wrapped in a BufferedReader for | 
 | 136 |         # performance. | 
 | 137 |         self.test_write() | 
 | 138 |  | 
| Brian Curtin | 28f96b5 | 2010-10-13 02:21:42 +0000 | [diff] [blame] | 139 |         with gzip.GzipFile(self.filename, 'rb') as f: | 
 | 140 |             with io.BufferedReader(f) as r: | 
 | 141 |                 lines = [line for line in r] | 
| Antoine Pitrou | b1f8835 | 2010-01-03 22:37:40 +0000 | [diff] [blame] | 142 |  | 
| Ezio Melotti | d8b509b | 2011-09-28 17:37:55 +0300 | [diff] [blame] | 143 |         self.assertEqual(lines, 50 * data1.splitlines(keepends=True)) | 
| Andrew M. Kuchling | 01cb47b | 2005-06-09 14:19:32 +0000 | [diff] [blame] | 144 |  | 
| Andrew M. Kuchling | a6f68e1 | 2005-06-09 14:12:36 +0000 | [diff] [blame] | 145 |     def test_readline(self): | 
 | 146 |         self.test_write() | 
 | 147 |         # Try .readline() with varying line lengths | 
| Martin v. Löwis | 8cc965c | 2001-08-09 07:21:56 +0000 | [diff] [blame] | 148 |  | 
| Brian Curtin | 28f96b5 | 2010-10-13 02:21:42 +0000 | [diff] [blame] | 149 |         with gzip.GzipFile(self.filename, 'rb') as f: | 
 | 150 |             line_length = 0 | 
 | 151 |             while 1: | 
 | 152 |                 L = f.readline(line_length) | 
 | 153 |                 if not L and line_length != 0: break | 
 | 154 |                 self.assertTrue(len(L) <= line_length) | 
 | 155 |                 line_length = (line_length + 1) % 50 | 
| Martin v. Löwis | 8cc965c | 2001-08-09 07:21:56 +0000 | [diff] [blame] | 156 |  | 
| Andrew M. Kuchling | a6f68e1 | 2005-06-09 14:12:36 +0000 | [diff] [blame] | 157 |     def test_readlines(self): | 
 | 158 |         self.test_write() | 
 | 159 |         # Try .readlines() | 
| Andrew M. Kuchling | 605ebdd | 1999-03-25 21:50:27 +0000 | [diff] [blame] | 160 |  | 
| Brian Curtin | 28f96b5 | 2010-10-13 02:21:42 +0000 | [diff] [blame] | 161 |         with gzip.GzipFile(self.filename, 'rb') as f: | 
 | 162 |             L = f.readlines() | 
| Skip Montanaro | 12424bc | 2002-05-23 01:43:05 +0000 | [diff] [blame] | 163 |  | 
| Brian Curtin | 28f96b5 | 2010-10-13 02:21:42 +0000 | [diff] [blame] | 164 |         with gzip.GzipFile(self.filename, 'rb') as f: | 
 | 165 |             while 1: | 
 | 166 |                 L = f.readlines(150) | 
 | 167 |                 if L == []: break | 
| Andrew M. Kuchling | a6f68e1 | 2005-06-09 14:12:36 +0000 | [diff] [blame] | 168 |  | 
 | 169 |     def test_seek_read(self): | 
 | 170 |         self.test_write() | 
 | 171 |         # Try seek, read test | 
 | 172 |  | 
| Brian Curtin | 28f96b5 | 2010-10-13 02:21:42 +0000 | [diff] [blame] | 173 |         with gzip.GzipFile(self.filename) as f: | 
 | 174 |             while 1: | 
 | 175 |                 oldpos = f.tell() | 
 | 176 |                 line1 = f.readline() | 
 | 177 |                 if not line1: break | 
 | 178 |                 newpos = f.tell() | 
 | 179 |                 f.seek(oldpos)  # negative seek | 
 | 180 |                 if len(line1)>10: | 
 | 181 |                     amount = 10 | 
 | 182 |                 else: | 
 | 183 |                     amount = len(line1) | 
 | 184 |                 line2 = f.read(amount) | 
 | 185 |                 self.assertEqual(line1[:amount], line2) | 
 | 186 |                 f.seek(newpos)  # positive seek | 
| Andrew M. Kuchling | a6f68e1 | 2005-06-09 14:12:36 +0000 | [diff] [blame] | 187 |  | 
| Thomas Wouters | 89f507f | 2006-12-13 04:49:30 +0000 | [diff] [blame] | 188 |     def test_seek_whence(self): | 
 | 189 |         self.test_write() | 
 | 190 |         # Try seek(whence=1), read test | 
 | 191 |  | 
| Brian Curtin | 28f96b5 | 2010-10-13 02:21:42 +0000 | [diff] [blame] | 192 |         with gzip.GzipFile(self.filename) as f: | 
 | 193 |             f.read(10) | 
 | 194 |             f.seek(10, whence=1) | 
 | 195 |             y = f.read(10) | 
| Ezio Melotti | b3aedd4 | 2010-11-20 19:04:17 +0000 | [diff] [blame] | 196 |         self.assertEqual(y, data1[20:30]) | 
| Thomas Wouters | 9fe394c | 2007-02-05 01:24:16 +0000 | [diff] [blame] | 197 |  | 
| Andrew M. Kuchling | a6f68e1 | 2005-06-09 14:12:36 +0000 | [diff] [blame] | 198 |     def test_seek_write(self): | 
 | 199 |         # Try seek, write test | 
| Brian Curtin | 28f96b5 | 2010-10-13 02:21:42 +0000 | [diff] [blame] | 200 |         with gzip.GzipFile(self.filename, 'w') as f: | 
 | 201 |             for pos in range(0, 256, 16): | 
 | 202 |                 f.seek(pos) | 
 | 203 |                 f.write(b'GZ\n') | 
| Andrew M. Kuchling | a6f68e1 | 2005-06-09 14:12:36 +0000 | [diff] [blame] | 204 |  | 
 | 205 |     def test_mode(self): | 
 | 206 |         self.test_write() | 
| Brian Curtin | 28f96b5 | 2010-10-13 02:21:42 +0000 | [diff] [blame] | 207 |         with gzip.GzipFile(self.filename, 'r') as f: | 
 | 208 |             self.assertEqual(f.myfileobj.mode, 'rb') | 
| Andrew M. Kuchling | a6f68e1 | 2005-06-09 14:12:36 +0000 | [diff] [blame] | 209 |  | 
| Thomas Wouters | cf297e4 | 2007-02-23 15:07:44 +0000 | [diff] [blame] | 210 |     def test_1647484(self): | 
 | 211 |         for mode in ('wb', 'rb'): | 
| Brian Curtin | 28f96b5 | 2010-10-13 02:21:42 +0000 | [diff] [blame] | 212 |             with gzip.GzipFile(self.filename, mode) as f: | 
 | 213 |                 self.assertTrue(hasattr(f, "name")) | 
 | 214 |                 self.assertEqual(f.name, self.filename) | 
| Thomas Wouters | cf297e4 | 2007-02-23 15:07:44 +0000 | [diff] [blame] | 215 |  | 
| Georg Brandl | 9f1c1dc | 2010-11-20 11:25:01 +0000 | [diff] [blame] | 216 |     def test_paddedfile_getattr(self): | 
 | 217 |         self.test_write() | 
 | 218 |         with gzip.GzipFile(self.filename, 'rb') as f: | 
 | 219 |             self.assertTrue(hasattr(f.fileobj, "name")) | 
 | 220 |             self.assertEqual(f.fileobj.name, self.filename) | 
 | 221 |  | 
| Antoine Pitrou | 42db3ef | 2009-01-04 21:37:59 +0000 | [diff] [blame] | 222 |     def test_mtime(self): | 
 | 223 |         mtime = 123456789 | 
| Brian Curtin | 28f96b5 | 2010-10-13 02:21:42 +0000 | [diff] [blame] | 224 |         with gzip.GzipFile(self.filename, 'w', mtime = mtime) as fWrite: | 
 | 225 |             fWrite.write(data1) | 
 | 226 |         with gzip.GzipFile(self.filename) as fRead: | 
 | 227 |             dataRead = fRead.read() | 
 | 228 |             self.assertEqual(dataRead, data1) | 
 | 229 |             self.assertTrue(hasattr(fRead, 'mtime')) | 
 | 230 |             self.assertEqual(fRead.mtime, mtime) | 
| Antoine Pitrou | 42db3ef | 2009-01-04 21:37:59 +0000 | [diff] [blame] | 231 |  | 
 | 232 |     def test_metadata(self): | 
 | 233 |         mtime = 123456789 | 
 | 234 |  | 
| Brian Curtin | 28f96b5 | 2010-10-13 02:21:42 +0000 | [diff] [blame] | 235 |         with gzip.GzipFile(self.filename, 'w', mtime = mtime) as fWrite: | 
 | 236 |             fWrite.write(data1) | 
| Antoine Pitrou | 42db3ef | 2009-01-04 21:37:59 +0000 | [diff] [blame] | 237 |  | 
| Brian Curtin | 28f96b5 | 2010-10-13 02:21:42 +0000 | [diff] [blame] | 238 |         with open(self.filename, 'rb') as fRead: | 
 | 239 |             # see RFC 1952: http://www.faqs.org/rfcs/rfc1952.html | 
| Antoine Pitrou | 42db3ef | 2009-01-04 21:37:59 +0000 | [diff] [blame] | 240 |  | 
| Brian Curtin | 28f96b5 | 2010-10-13 02:21:42 +0000 | [diff] [blame] | 241 |             idBytes = fRead.read(2) | 
 | 242 |             self.assertEqual(idBytes, b'\x1f\x8b') # gzip ID | 
| Antoine Pitrou | 42db3ef | 2009-01-04 21:37:59 +0000 | [diff] [blame] | 243 |  | 
| Brian Curtin | 28f96b5 | 2010-10-13 02:21:42 +0000 | [diff] [blame] | 244 |             cmByte = fRead.read(1) | 
 | 245 |             self.assertEqual(cmByte, b'\x08') # deflate | 
| Antoine Pitrou | 42db3ef | 2009-01-04 21:37:59 +0000 | [diff] [blame] | 246 |  | 
| Brian Curtin | 28f96b5 | 2010-10-13 02:21:42 +0000 | [diff] [blame] | 247 |             flagsByte = fRead.read(1) | 
 | 248 |             self.assertEqual(flagsByte, b'\x08') # only the FNAME flag is set | 
| Antoine Pitrou | 42db3ef | 2009-01-04 21:37:59 +0000 | [diff] [blame] | 249 |  | 
| Brian Curtin | 28f96b5 | 2010-10-13 02:21:42 +0000 | [diff] [blame] | 250 |             mtimeBytes = fRead.read(4) | 
 | 251 |             self.assertEqual(mtimeBytes, struct.pack('<i', mtime)) # little-endian | 
| Antoine Pitrou | 42db3ef | 2009-01-04 21:37:59 +0000 | [diff] [blame] | 252 |  | 
| Brian Curtin | 28f96b5 | 2010-10-13 02:21:42 +0000 | [diff] [blame] | 253 |             xflByte = fRead.read(1) | 
 | 254 |             self.assertEqual(xflByte, b'\x02') # maximum compression | 
| Antoine Pitrou | 42db3ef | 2009-01-04 21:37:59 +0000 | [diff] [blame] | 255 |  | 
| Brian Curtin | 28f96b5 | 2010-10-13 02:21:42 +0000 | [diff] [blame] | 256 |             osByte = fRead.read(1) | 
 | 257 |             self.assertEqual(osByte, b'\xff') # OS "unknown" (OS-independent) | 
| Antoine Pitrou | 42db3ef | 2009-01-04 21:37:59 +0000 | [diff] [blame] | 258 |  | 
| Brian Curtin | 28f96b5 | 2010-10-13 02:21:42 +0000 | [diff] [blame] | 259 |             # Since the FNAME flag is set, the zero-terminated filename follows. | 
 | 260 |             # RFC 1952 specifies that this is the name of the input file, if any. | 
 | 261 |             # However, the gzip module defaults to storing the name of the output | 
 | 262 |             # file in this field. | 
 | 263 |             expected = self.filename.encode('Latin-1') + b'\x00' | 
 | 264 |             nameBytes = fRead.read(len(expected)) | 
 | 265 |             self.assertEqual(nameBytes, expected) | 
| Antoine Pitrou | 42db3ef | 2009-01-04 21:37:59 +0000 | [diff] [blame] | 266 |  | 
| Brian Curtin | 28f96b5 | 2010-10-13 02:21:42 +0000 | [diff] [blame] | 267 |             # Since no other flags were set, the header ends here. | 
 | 268 |             # Rather than process the compressed data, let's seek to the trailer. | 
 | 269 |             fRead.seek(os.stat(self.filename).st_size - 8) | 
| Antoine Pitrou | 42db3ef | 2009-01-04 21:37:59 +0000 | [diff] [blame] | 270 |  | 
| Brian Curtin | 28f96b5 | 2010-10-13 02:21:42 +0000 | [diff] [blame] | 271 |             crc32Bytes = fRead.read(4) # CRC32 of uncompressed data [data1] | 
 | 272 |             self.assertEqual(crc32Bytes, b'\xaf\xd7d\x83') | 
| Antoine Pitrou | 42db3ef | 2009-01-04 21:37:59 +0000 | [diff] [blame] | 273 |  | 
| Brian Curtin | 28f96b5 | 2010-10-13 02:21:42 +0000 | [diff] [blame] | 274 |             isizeBytes = fRead.read(4) | 
 | 275 |             self.assertEqual(isizeBytes, struct.pack('<i', len(data1))) | 
| Antoine Pitrou | 42db3ef | 2009-01-04 21:37:59 +0000 | [diff] [blame] | 276 |  | 
| Antoine Pitrou | 308705e | 2009-01-10 16:22:51 +0000 | [diff] [blame] | 277 |     def test_with_open(self): | 
 | 278 |         # GzipFile supports the context management protocol | 
 | 279 |         with gzip.GzipFile(self.filename, "wb") as f: | 
 | 280 |             f.write(b"xxx") | 
 | 281 |         f = gzip.GzipFile(self.filename, "rb") | 
 | 282 |         f.close() | 
 | 283 |         try: | 
 | 284 |             with f: | 
 | 285 |                 pass | 
 | 286 |         except ValueError: | 
 | 287 |             pass | 
 | 288 |         else: | 
 | 289 |             self.fail("__enter__ on a closed file didn't raise an exception") | 
 | 290 |         try: | 
 | 291 |             with gzip.GzipFile(self.filename, "wb") as f: | 
 | 292 |                 1/0 | 
 | 293 |         except ZeroDivisionError: | 
 | 294 |             pass | 
 | 295 |         else: | 
 | 296 |             self.fail("1/0 didn't raise an exception") | 
 | 297 |  | 
| Antoine Pitrou | 8e33fd7 | 2010-01-13 14:37:26 +0000 | [diff] [blame] | 298 |     def test_zero_padded_file(self): | 
 | 299 |         with gzip.GzipFile(self.filename, "wb") as f: | 
 | 300 |             f.write(data1 * 50) | 
 | 301 |  | 
 | 302 |         # Pad the file with zeroes | 
 | 303 |         with open(self.filename, "ab") as f: | 
 | 304 |             f.write(b"\x00" * 50) | 
 | 305 |  | 
 | 306 |         with gzip.GzipFile(self.filename, "rb") as f: | 
 | 307 |             d = f.read() | 
 | 308 |             self.assertEqual(d, data1 * 50, "Incorrect data in file") | 
 | 309 |  | 
| Antoine Pitrou | 7b96984 | 2010-09-23 16:22:51 +0000 | [diff] [blame] | 310 |     def test_non_seekable_file(self): | 
 | 311 |         uncompressed = data1 * 50 | 
 | 312 |         buf = UnseekableIO() | 
 | 313 |         with gzip.GzipFile(fileobj=buf, mode="wb") as f: | 
 | 314 |             f.write(uncompressed) | 
 | 315 |         compressed = buf.getvalue() | 
 | 316 |         buf = UnseekableIO(compressed) | 
 | 317 |         with gzip.GzipFile(fileobj=buf, mode="rb") as f: | 
 | 318 |             self.assertEqual(f.read(), uncompressed) | 
 | 319 |  | 
| Antoine Pitrou | c3ed2e7 | 2010-09-29 10:49:46 +0000 | [diff] [blame] | 320 |     def test_peek(self): | 
 | 321 |         uncompressed = data1 * 200 | 
 | 322 |         with gzip.GzipFile(self.filename, "wb") as f: | 
 | 323 |             f.write(uncompressed) | 
 | 324 |  | 
 | 325 |         def sizes(): | 
 | 326 |             while True: | 
 | 327 |                 for n in range(5, 50, 10): | 
 | 328 |                     yield n | 
 | 329 |  | 
 | 330 |         with gzip.GzipFile(self.filename, "rb") as f: | 
 | 331 |             f.max_read_chunk = 33 | 
 | 332 |             nread = 0 | 
 | 333 |             for n in sizes(): | 
 | 334 |                 s = f.peek(n) | 
 | 335 |                 if s == b'': | 
 | 336 |                     break | 
 | 337 |                 self.assertEqual(f.read(len(s)), s) | 
 | 338 |                 nread += len(s) | 
 | 339 |             self.assertEqual(f.read(100), b'') | 
 | 340 |             self.assertEqual(nread, len(uncompressed)) | 
 | 341 |  | 
| Antoine Pitrou | 4ec4b0c | 2011-04-04 21:00:37 +0200 | [diff] [blame] | 342 |     def test_textio_readlines(self): | 
 | 343 |         # Issue #10791: TextIOWrapper.readlines() fails when wrapping GzipFile. | 
| Ezio Melotti | d8b509b | 2011-09-28 17:37:55 +0300 | [diff] [blame] | 344 |         lines = (data1 * 50).decode("ascii").splitlines(keepends=True) | 
| Antoine Pitrou | 4ec4b0c | 2011-04-04 21:00:37 +0200 | [diff] [blame] | 345 |         self.test_write() | 
 | 346 |         with gzip.GzipFile(self.filename, 'r') as f: | 
 | 347 |             with io.TextIOWrapper(f, encoding="ascii") as t: | 
 | 348 |                 self.assertEqual(t.readlines(), lines) | 
 | 349 |  | 
| Nadeem Vawda | 892b0b9 | 2012-01-18 09:25:58 +0200 | [diff] [blame] | 350 |     def test_fileobj_from_fdopen(self): | 
 | 351 |         # Issue #13781: Opening a GzipFile for writing fails when using a | 
 | 352 |         # fileobj created with os.fdopen(). | 
 | 353 |         fd = os.open(self.filename, os.O_WRONLY | os.O_CREAT) | 
 | 354 |         with os.fdopen(fd, "wb") as f: | 
 | 355 |             with gzip.GzipFile(fileobj=f, mode="w") as g: | 
 | 356 |                 pass | 
 | 357 |  | 
| Nadeem Vawda | 103e811 | 2012-06-20 01:35:22 +0200 | [diff] [blame] | 358 |     def test_bytes_filename(self): | 
 | 359 |         str_filename = self.filename | 
 | 360 |         try: | 
 | 361 |             bytes_filename = str_filename.encode("ascii") | 
 | 362 |         except UnicodeEncodeError: | 
 | 363 |             self.skipTest("Temporary file name needs to be ASCII") | 
 | 364 |         with gzip.GzipFile(bytes_filename, "wb") as f: | 
 | 365 |             f.write(data1 * 50) | 
 | 366 |         with gzip.GzipFile(bytes_filename, "rb") as f: | 
 | 367 |             self.assertEqual(f.read(), data1 * 50) | 
 | 368 |         # Sanity check that we are actually operating on the right file. | 
 | 369 |         with gzip.GzipFile(str_filename, "rb") as f: | 
 | 370 |             self.assertEqual(f.read(), data1 * 50) | 
 | 371 |  | 
| Antoine Pitrou | 79c5ef1 | 2010-08-17 21:10:05 +0000 | [diff] [blame] | 372 |     # Testing compress/decompress shortcut functions | 
 | 373 |  | 
 | 374 |     def test_compress(self): | 
 | 375 |         for data in [data1, data2]: | 
 | 376 |             for args in [(), (1,), (6,), (9,)]: | 
 | 377 |                 datac = gzip.compress(data, *args) | 
 | 378 |                 self.assertEqual(type(datac), bytes) | 
 | 379 |                 with gzip.GzipFile(fileobj=io.BytesIO(datac), mode="rb") as f: | 
 | 380 |                     self.assertEqual(f.read(), data) | 
 | 381 |  | 
 | 382 |     def test_decompress(self): | 
 | 383 |         for data in (data1, data2): | 
 | 384 |             buf = io.BytesIO() | 
 | 385 |             with gzip.GzipFile(fileobj=buf, mode="wb") as f: | 
 | 386 |                 f.write(data) | 
 | 387 |             self.assertEqual(gzip.decompress(buf.getvalue()), data) | 
 | 388 |             # Roundtrip with compress | 
 | 389 |             datac = gzip.compress(data) | 
 | 390 |             self.assertEqual(gzip.decompress(datac), data) | 
 | 391 |  | 
| Nadeem Vawda | 7e12620 | 2012-05-06 15:04:01 +0200 | [diff] [blame] | 392 |  | 
| Nadeem Vawda | 1b8a14d | 2012-05-06 15:17:52 +0200 | [diff] [blame] | 393 | class TestOpen(BaseTest): | 
 | 394 |     def test_binary_modes(self): | 
| Nadeem Vawda | 7e12620 | 2012-05-06 15:04:01 +0200 | [diff] [blame] | 395 |         uncompressed = data1 * 50 | 
 | 396 |         with gzip.open(self.filename, "wb") as f: | 
 | 397 |             f.write(uncompressed) | 
 | 398 |         with open(self.filename, "rb") as f: | 
 | 399 |             file_data = gzip.decompress(f.read()) | 
 | 400 |             self.assertEqual(file_data, uncompressed) | 
 | 401 |         with gzip.open(self.filename, "rb") as f: | 
 | 402 |             self.assertEqual(f.read(), uncompressed) | 
 | 403 |         with gzip.open(self.filename, "ab") as f: | 
 | 404 |             f.write(uncompressed) | 
 | 405 |         with open(self.filename, "rb") as f: | 
 | 406 |             file_data = gzip.decompress(f.read()) | 
 | 407 |             self.assertEqual(file_data, uncompressed * 2) | 
 | 408 |  | 
| Nadeem Vawda | 1b8a14d | 2012-05-06 15:17:52 +0200 | [diff] [blame] | 409 |     def test_implicit_binary_modes(self): | 
| Nadeem Vawda | 7e12620 | 2012-05-06 15:04:01 +0200 | [diff] [blame] | 410 |         # Test implicit binary modes (no "b" or "t" in mode string). | 
 | 411 |         uncompressed = data1 * 50 | 
 | 412 |         with gzip.open(self.filename, "w") as f: | 
 | 413 |             f.write(uncompressed) | 
 | 414 |         with open(self.filename, "rb") as f: | 
 | 415 |             file_data = gzip.decompress(f.read()) | 
 | 416 |             self.assertEqual(file_data, uncompressed) | 
 | 417 |         with gzip.open(self.filename, "r") as f: | 
 | 418 |             self.assertEqual(f.read(), uncompressed) | 
 | 419 |         with gzip.open(self.filename, "a") as f: | 
 | 420 |             f.write(uncompressed) | 
 | 421 |         with open(self.filename, "rb") as f: | 
 | 422 |             file_data = gzip.decompress(f.read()) | 
 | 423 |             self.assertEqual(file_data, uncompressed * 2) | 
 | 424 |  | 
| Nadeem Vawda | 1b8a14d | 2012-05-06 15:17:52 +0200 | [diff] [blame] | 425 |     def test_text_modes(self): | 
| Nadeem Vawda | 11328e4 | 2012-05-06 19:24:18 +0200 | [diff] [blame] | 426 |         uncompressed = data1.decode("ascii") * 50 | 
 | 427 |         uncompressed_raw = uncompressed.replace("\n", os.linesep) | 
| Nadeem Vawda | 7e12620 | 2012-05-06 15:04:01 +0200 | [diff] [blame] | 428 |         with gzip.open(self.filename, "wt") as f: | 
 | 429 |             f.write(uncompressed) | 
 | 430 |         with open(self.filename, "rb") as f: | 
 | 431 |             file_data = gzip.decompress(f.read()).decode("ascii") | 
| Nadeem Vawda | 11328e4 | 2012-05-06 19:24:18 +0200 | [diff] [blame] | 432 |             self.assertEqual(file_data, uncompressed_raw) | 
| Nadeem Vawda | 7e12620 | 2012-05-06 15:04:01 +0200 | [diff] [blame] | 433 |         with gzip.open(self.filename, "rt") as f: | 
 | 434 |             self.assertEqual(f.read(), uncompressed) | 
 | 435 |         with gzip.open(self.filename, "at") as f: | 
 | 436 |             f.write(uncompressed) | 
 | 437 |         with open(self.filename, "rb") as f: | 
 | 438 |             file_data = gzip.decompress(f.read()).decode("ascii") | 
| Nadeem Vawda | 11328e4 | 2012-05-06 19:24:18 +0200 | [diff] [blame] | 439 |             self.assertEqual(file_data, uncompressed_raw * 2) | 
| Nadeem Vawda | 7e12620 | 2012-05-06 15:04:01 +0200 | [diff] [blame] | 440 |  | 
| Nadeem Vawda | 6872101 | 2012-06-04 23:21:38 +0200 | [diff] [blame] | 441 |     def test_fileobj(self): | 
 | 442 |         uncompressed_bytes = data1 * 50 | 
 | 443 |         uncompressed_str = uncompressed_bytes.decode("ascii") | 
 | 444 |         compressed = gzip.compress(uncompressed_bytes) | 
 | 445 |         with gzip.open(io.BytesIO(compressed), "r") as f: | 
 | 446 |             self.assertEqual(f.read(), uncompressed_bytes) | 
 | 447 |         with gzip.open(io.BytesIO(compressed), "rb") as f: | 
 | 448 |             self.assertEqual(f.read(), uncompressed_bytes) | 
 | 449 |         with gzip.open(io.BytesIO(compressed), "rt") as f: | 
 | 450 |             self.assertEqual(f.read(), uncompressed_str) | 
 | 451 |  | 
| Nadeem Vawda | 1b8a14d | 2012-05-06 15:17:52 +0200 | [diff] [blame] | 452 |     def test_bad_params(self): | 
| Nadeem Vawda | 7e12620 | 2012-05-06 15:04:01 +0200 | [diff] [blame] | 453 |         # Test invalid parameter combinations. | 
| Nadeem Vawda | 6872101 | 2012-06-04 23:21:38 +0200 | [diff] [blame] | 454 |         with self.assertRaises(TypeError): | 
 | 455 |             gzip.open(123.456) | 
| Nadeem Vawda | 7e12620 | 2012-05-06 15:04:01 +0200 | [diff] [blame] | 456 |         with self.assertRaises(ValueError): | 
 | 457 |             gzip.open(self.filename, "wbt") | 
 | 458 |         with self.assertRaises(ValueError): | 
 | 459 |             gzip.open(self.filename, "rb", encoding="utf-8") | 
 | 460 |         with self.assertRaises(ValueError): | 
 | 461 |             gzip.open(self.filename, "rb", errors="ignore") | 
 | 462 |         with self.assertRaises(ValueError): | 
 | 463 |             gzip.open(self.filename, "rb", newline="\n") | 
 | 464 |  | 
| Nadeem Vawda | 1b8a14d | 2012-05-06 15:17:52 +0200 | [diff] [blame] | 465 |     def test_encoding(self): | 
| Nadeem Vawda | 7e12620 | 2012-05-06 15:04:01 +0200 | [diff] [blame] | 466 |         # Test non-default encoding. | 
| Nadeem Vawda | 11328e4 | 2012-05-06 19:24:18 +0200 | [diff] [blame] | 467 |         uncompressed = data1.decode("ascii") * 50 | 
 | 468 |         uncompressed_raw = uncompressed.replace("\n", os.linesep) | 
| Nadeem Vawda | 7e12620 | 2012-05-06 15:04:01 +0200 | [diff] [blame] | 469 |         with gzip.open(self.filename, "wt", encoding="utf-16") as f: | 
 | 470 |             f.write(uncompressed) | 
 | 471 |         with open(self.filename, "rb") as f: | 
 | 472 |             file_data = gzip.decompress(f.read()).decode("utf-16") | 
| Nadeem Vawda | 11328e4 | 2012-05-06 19:24:18 +0200 | [diff] [blame] | 473 |             self.assertEqual(file_data, uncompressed_raw) | 
| Nadeem Vawda | 7e12620 | 2012-05-06 15:04:01 +0200 | [diff] [blame] | 474 |         with gzip.open(self.filename, "rt", encoding="utf-16") as f: | 
 | 475 |             self.assertEqual(f.read(), uncompressed) | 
 | 476 |  | 
| Nadeem Vawda | 1b8a14d | 2012-05-06 15:17:52 +0200 | [diff] [blame] | 477 |     def test_encoding_error_handler(self): | 
| Nadeem Vawda | 7e12620 | 2012-05-06 15:04:01 +0200 | [diff] [blame] | 478 |         # Test with non-default encoding error handler. | 
 | 479 |         with gzip.open(self.filename, "wb") as f: | 
 | 480 |             f.write(b"foo\xffbar") | 
 | 481 |         with gzip.open(self.filename, "rt", encoding="ascii", errors="ignore") \ | 
 | 482 |                 as f: | 
 | 483 |             self.assertEqual(f.read(), "foobar") | 
 | 484 |  | 
| Nadeem Vawda | 1b8a14d | 2012-05-06 15:17:52 +0200 | [diff] [blame] | 485 |     def test_newline(self): | 
| Nadeem Vawda | 7e12620 | 2012-05-06 15:04:01 +0200 | [diff] [blame] | 486 |         # Test with explicit newline (universal newline mode disabled). | 
 | 487 |         uncompressed = data1.decode("ascii") * 50 | 
| Nadeem Vawda | 9d9dc8e | 2012-05-06 16:25:35 +0200 | [diff] [blame] | 488 |         with gzip.open(self.filename, "wt", newline="\n") as f: | 
| Nadeem Vawda | 7e12620 | 2012-05-06 15:04:01 +0200 | [diff] [blame] | 489 |             f.write(uncompressed) | 
 | 490 |         with gzip.open(self.filename, "rt", newline="\r") as f: | 
 | 491 |             self.assertEqual(f.readlines(), [uncompressed]) | 
 | 492 |  | 
| Andrew M. Kuchling | a6f68e1 | 2005-06-09 14:12:36 +0000 | [diff] [blame] | 493 | def test_main(verbose=None): | 
| Nadeem Vawda | 1b8a14d | 2012-05-06 15:17:52 +0200 | [diff] [blame] | 494 |     support.run_unittest(TestGzip, TestOpen) | 
| Andrew M. Kuchling | a6f68e1 | 2005-06-09 14:12:36 +0000 | [diff] [blame] | 495 |  | 
 | 496 | if __name__ == "__main__": | 
 | 497 |     test_main(verbose=True) |