blob: 971306182e13dbde10c6b248bf2bf0a001ee3019 [file] [log] [blame]
Andrew M. Kuchlinga6f68e12005-06-09 14:12:36 +00001"""Test script for the gzip module.
2"""
3
4import unittest
5from test import test_support
Christian Heimesc5f05e42008-02-23 17:40:11 +00006import os
Antoine Pitrou673ddf92010-01-03 22:29:56 +00007import io
Antoine Pitrouf0d2c3f2009-01-04 21:29:23 +00008import struct
Ezio Melotti1036a7f2009-09-12 14:43:43 +00009gzip = test_support.import_module('gzip')
Andrew M. Kuchling605ebdd1999-03-25 21:50:27 +000010
11data1 = """ int length=DEFAULTALLOC, err = Z_OK;
12 PyObject *RetVal;
13 int flushmode = Z_FINISH;
14 unsigned long start_total_out;
15
16"""
17
18data2 = """/* zlibmodule.c -- gzip-compatible data compression */
Neal Norwitz014f1032004-07-29 03:55:56 +000019/* See http://www.gzip.org/zlib/
Andrew M. Kuchling605ebdd1999-03-25 21:50:27 +000020/* See http://www.winimage.com/zLibDll for Windows */
21"""
22
Andrew M. Kuchling605ebdd1999-03-25 21:50:27 +000023
Andrew M. Kuchlinga6f68e12005-06-09 14:12:36 +000024class TestGzip(unittest.TestCase):
25 filename = test_support.TESTFN
Tim Peters5cfb05e2004-07-27 21:02:02 +000026
Georg Brandle08e3d02008-05-25 08:07:37 +000027 def setUp(self):
Neal Norwitz36a59b42008-04-10 05:46:39 +000028 test_support.unlink(self.filename)
Andrew M. Kuchling605ebdd1999-03-25 21:50:27 +000029
Georg Brandle08e3d02008-05-25 08:07:37 +000030 def tearDown(self):
Neal Norwitz36a59b42008-04-10 05:46:39 +000031 test_support.unlink(self.filename)
Andrew M. Kuchling605ebdd1999-03-25 21:50:27 +000032
Serhiy Storchaka54edfb32014-10-12 22:23:28 +030033 @test_support.requires_unicode
34 def test_unicode_filename(self):
35 unicode_filename = test_support.TESTFN_UNICODE
36 with gzip.GzipFile(unicode_filename, "wb") as f:
37 f.write(data1 * 50)
38 with gzip.GzipFile(unicode_filename, "rb") as f:
39 self.assertEqual(f.read(), data1 * 50)
40 # Sanity check that we are actually operating on the right file.
41 with open(unicode_filename, 'rb') as fobj, \
42 gzip.GzipFile(fileobj=fobj, mode="rb") as f:
43 self.assertEqual(f.read(), data1 * 50)
Andrew M. Kuchling85ab7382000-07-29 20:18:34 +000044
Georg Brandle08e3d02008-05-25 08:07:37 +000045 def test_write(self):
Brian Curtin31cf8d02010-10-13 23:51:19 +000046 with gzip.GzipFile(self.filename, 'wb') as f:
47 f.write(data1 * 50)
Andrew M. Kuchling85ab7382000-07-29 20:18:34 +000048
Brian Curtin31cf8d02010-10-13 23:51:19 +000049 # Try flush and fileno.
50 f.flush()
51 f.fileno()
52 if hasattr(os, 'fsync'):
53 os.fsync(f.fileno())
54 f.close()
Andrew M. Kuchling85ab7382000-07-29 20:18:34 +000055
Georg Brandle08e3d02008-05-25 08:07:37 +000056 # Test multiple close() calls.
57 f.close()
58
Andrew M. Kuchlinga6f68e12005-06-09 14:12:36 +000059 def test_read(self):
60 self.test_write()
61 # Try reading.
Brian Curtin31cf8d02010-10-13 23:51:19 +000062 with gzip.GzipFile(self.filename, 'r') as f:
63 d = f.read()
Andrew M. Kuchlinga6f68e12005-06-09 14:12:36 +000064 self.assertEqual(d, data1*50)
Andrew M. Kuchling85ab7382000-07-29 20:18:34 +000065
Nadeem Vawdadd72b3f2012-10-21 18:15:05 +020066 def test_read_universal_newlines(self):
67 # Issue #5148: Reading breaks when mode contains 'U'.
68 self.test_write()
69 with gzip.GzipFile(self.filename, 'rU') as f:
70 d = f.read()
71 self.assertEqual(d, data1*50)
72
Antoine Pitrou76a66aa2010-10-06 21:26:52 +000073 def test_io_on_closed_object(self):
74 # Test that I/O operations on closed GzipFile objects raise a
75 # ValueError, just like the corresponding functions on file objects.
76
77 # Write to a file, open it for reading, then close it.
78 self.test_write()
79 f = gzip.GzipFile(self.filename, 'r')
80 f.close()
81 with self.assertRaises(ValueError):
82 f.read(1)
83 with self.assertRaises(ValueError):
84 f.seek(0)
85 with self.assertRaises(ValueError):
86 f.tell()
87 # Open the file for writing, then close it.
88 f = gzip.GzipFile(self.filename, 'w')
89 f.close()
90 with self.assertRaises(ValueError):
Brian Curtin31cf8d02010-10-13 23:51:19 +000091 f.write('')
Antoine Pitrou76a66aa2010-10-06 21:26:52 +000092 with self.assertRaises(ValueError):
93 f.flush()
94
Andrew M. Kuchlinga6f68e12005-06-09 14:12:36 +000095 def test_append(self):
96 self.test_write()
97 # Append to the previous file
Brian Curtin31cf8d02010-10-13 23:51:19 +000098 with gzip.GzipFile(self.filename, 'ab') as f:
99 f.write(data2 * 15)
Andrew M. Kuchling85ab7382000-07-29 20:18:34 +0000100
Brian Curtin31cf8d02010-10-13 23:51:19 +0000101 with gzip.GzipFile(self.filename, 'rb') as f:
102 d = f.read()
Andrew M. Kuchlinga6f68e12005-06-09 14:12:36 +0000103 self.assertEqual(d, (data1*50) + (data2*15))
Andrew M. Kuchling85ab7382000-07-29 20:18:34 +0000104
Andrew M. Kuchling01cb47b2005-06-09 14:19:32 +0000105 def test_many_append(self):
106 # Bug #1074261 was triggered when reading a file that contained
107 # many, many members. Create such a file and verify that reading it
108 # works.
Brian Curtin31cf8d02010-10-13 23:51:19 +0000109 with gzip.open(self.filename, 'wb', 9) as f:
Andrew M. Kuchling01cb47b2005-06-09 14:19:32 +0000110 f.write('a')
Brian Curtin31cf8d02010-10-13 23:51:19 +0000111 for i in range(0, 200):
112 with gzip.open(self.filename, "ab", 9) as f: # append
113 f.write('a')
Andrew M. Kuchling01cb47b2005-06-09 14:19:32 +0000114
115 # Try reading the file
Brian Curtin31cf8d02010-10-13 23:51:19 +0000116 with gzip.open(self.filename, "rb") as zgfile:
117 contents = ""
118 while 1:
119 ztxt = zgfile.read(8192)
120 contents += ztxt
121 if not ztxt: break
Ezio Melotti2623a372010-11-21 13:34:58 +0000122 self.assertEqual(contents, 'a'*201)
Andrew M. Kuchling01cb47b2005-06-09 14:19:32 +0000123
Antoine Pitrou673ddf92010-01-03 22:29:56 +0000124 def test_buffered_reader(self):
125 # Issue #7471: a GzipFile can be wrapped in a BufferedReader for
126 # performance.
127 self.test_write()
128
Brian Curtin31cf8d02010-10-13 23:51:19 +0000129 with gzip.GzipFile(self.filename, 'rb') as f:
130 with io.BufferedReader(f) as r:
131 lines = [line for line in r]
Antoine Pitrou673ddf92010-01-03 22:29:56 +0000132
133 self.assertEqual(lines, 50 * data1.splitlines(True))
Andrew M. Kuchling01cb47b2005-06-09 14:19:32 +0000134
Andrew M. Kuchlinga6f68e12005-06-09 14:12:36 +0000135 def test_readline(self):
136 self.test_write()
137 # Try .readline() with varying line lengths
Martin v. Löwis8cc965c2001-08-09 07:21:56 +0000138
Brian Curtin31cf8d02010-10-13 23:51:19 +0000139 with gzip.GzipFile(self.filename, 'rb') as f:
140 line_length = 0
141 while 1:
142 L = f.readline(line_length)
143 if not L and line_length != 0: break
144 self.assertTrue(len(L) <= line_length)
145 line_length = (line_length + 1) % 50
Martin v. Löwis8cc965c2001-08-09 07:21:56 +0000146
Andrew M. Kuchlinga6f68e12005-06-09 14:12:36 +0000147 def test_readlines(self):
148 self.test_write()
149 # Try .readlines()
Andrew M. Kuchling605ebdd1999-03-25 21:50:27 +0000150
Brian Curtin31cf8d02010-10-13 23:51:19 +0000151 with gzip.GzipFile(self.filename, 'rb') as f:
152 L = f.readlines()
Skip Montanaro12424bc2002-05-23 01:43:05 +0000153
Brian Curtin31cf8d02010-10-13 23:51:19 +0000154 with gzip.GzipFile(self.filename, 'rb') as f:
155 while 1:
156 L = f.readlines(150)
157 if L == []: break
Andrew M. Kuchlinga6f68e12005-06-09 14:12:36 +0000158
159 def test_seek_read(self):
160 self.test_write()
161 # Try seek, read test
162
Brian Curtin31cf8d02010-10-13 23:51:19 +0000163 with gzip.GzipFile(self.filename) as f:
164 while 1:
165 oldpos = f.tell()
166 line1 = f.readline()
167 if not line1: break
168 newpos = f.tell()
169 f.seek(oldpos) # negative seek
170 if len(line1)>10:
171 amount = 10
172 else:
173 amount = len(line1)
174 line2 = f.read(amount)
175 self.assertEqual(line1[:amount], line2)
176 f.seek(newpos) # positive seek
Andrew M. Kuchlinga6f68e12005-06-09 14:12:36 +0000177
Martin v. Löwis065f0c82006-11-12 10:41:39 +0000178 def test_seek_whence(self):
179 self.test_write()
180 # Try seek(whence=1), read test
181
Brian Curtin31cf8d02010-10-13 23:51:19 +0000182 with gzip.GzipFile(self.filename) as f:
183 f.read(10)
184 f.seek(10, whence=1)
185 y = f.read(10)
Ezio Melotti2623a372010-11-21 13:34:58 +0000186 self.assertEqual(y, data1[20:30])
Tim Petersf733abb2007-01-30 03:03:46 +0000187
Andrew M. Kuchlinga6f68e12005-06-09 14:12:36 +0000188 def test_seek_write(self):
189 # Try seek, write test
Brian Curtin31cf8d02010-10-13 23:51:19 +0000190 with gzip.GzipFile(self.filename, 'w') as f:
191 for pos in range(0, 256, 16):
192 f.seek(pos)
193 f.write('GZ\n')
Andrew M. Kuchlinga6f68e12005-06-09 14:12:36 +0000194
195 def test_mode(self):
196 self.test_write()
Brian Curtin31cf8d02010-10-13 23:51:19 +0000197 with gzip.GzipFile(self.filename, 'r') as f:
198 self.assertEqual(f.myfileobj.mode, 'rb')
Andrew M. Kuchlinga6f68e12005-06-09 14:12:36 +0000199
Lars Gustäbel5b1a7852007-02-13 16:09:24 +0000200 def test_1647484(self):
201 for mode in ('wb', 'rb'):
Brian Curtin31cf8d02010-10-13 23:51:19 +0000202 with gzip.GzipFile(self.filename, mode) as f:
203 self.assertTrue(hasattr(f, "name"))
204 self.assertEqual(f.name, self.filename)
Lars Gustäbel5b1a7852007-02-13 16:09:24 +0000205
Antoine Pitrouf0d2c3f2009-01-04 21:29:23 +0000206 def test_mtime(self):
207 mtime = 123456789
Brian Curtin31cf8d02010-10-13 23:51:19 +0000208 with gzip.GzipFile(self.filename, 'w', mtime = mtime) as fWrite:
209 fWrite.write(data1)
210 with gzip.GzipFile(self.filename) as fRead:
211 dataRead = fRead.read()
212 self.assertEqual(dataRead, data1)
213 self.assertTrue(hasattr(fRead, 'mtime'))
214 self.assertEqual(fRead.mtime, mtime)
Antoine Pitrouf0d2c3f2009-01-04 21:29:23 +0000215
216 def test_metadata(self):
217 mtime = 123456789
218
Brian Curtin31cf8d02010-10-13 23:51:19 +0000219 with gzip.GzipFile(self.filename, 'w', mtime = mtime) as fWrite:
220 fWrite.write(data1)
Antoine Pitrouf0d2c3f2009-01-04 21:29:23 +0000221
Brian Curtin31cf8d02010-10-13 23:51:19 +0000222 with open(self.filename, 'rb') as fRead:
223 # see RFC 1952: http://www.faqs.org/rfcs/rfc1952.html
Antoine Pitrouf0d2c3f2009-01-04 21:29:23 +0000224
Brian Curtin31cf8d02010-10-13 23:51:19 +0000225 idBytes = fRead.read(2)
226 self.assertEqual(idBytes, '\x1f\x8b') # gzip ID
Antoine Pitrouf0d2c3f2009-01-04 21:29:23 +0000227
Brian Curtin31cf8d02010-10-13 23:51:19 +0000228 cmByte = fRead.read(1)
229 self.assertEqual(cmByte, '\x08') # deflate
Antoine Pitrouf0d2c3f2009-01-04 21:29:23 +0000230
Brian Curtin31cf8d02010-10-13 23:51:19 +0000231 flagsByte = fRead.read(1)
232 self.assertEqual(flagsByte, '\x08') # only the FNAME flag is set
Antoine Pitrouf0d2c3f2009-01-04 21:29:23 +0000233
Brian Curtin31cf8d02010-10-13 23:51:19 +0000234 mtimeBytes = fRead.read(4)
235 self.assertEqual(mtimeBytes, struct.pack('<i', mtime)) # little-endian
Antoine Pitrouf0d2c3f2009-01-04 21:29:23 +0000236
Brian Curtin31cf8d02010-10-13 23:51:19 +0000237 xflByte = fRead.read(1)
238 self.assertEqual(xflByte, '\x02') # maximum compression
Antoine Pitrouf0d2c3f2009-01-04 21:29:23 +0000239
Brian Curtin31cf8d02010-10-13 23:51:19 +0000240 osByte = fRead.read(1)
241 self.assertEqual(osByte, '\xff') # OS "unknown" (OS-independent)
Antoine Pitrouf0d2c3f2009-01-04 21:29:23 +0000242
Brian Curtin31cf8d02010-10-13 23:51:19 +0000243 # Since the FNAME flag is set, the zero-terminated filename follows.
244 # RFC 1952 specifies that this is the name of the input file, if any.
245 # However, the gzip module defaults to storing the name of the output
246 # file in this field.
247 expected = self.filename.encode('Latin-1') + '\x00'
248 nameBytes = fRead.read(len(expected))
249 self.assertEqual(nameBytes, expected)
Antoine Pitrouf0d2c3f2009-01-04 21:29:23 +0000250
Brian Curtin31cf8d02010-10-13 23:51:19 +0000251 # Since no other flags were set, the header ends here.
252 # Rather than process the compressed data, let's seek to the trailer.
253 fRead.seek(os.stat(self.filename).st_size - 8)
Antoine Pitrouf0d2c3f2009-01-04 21:29:23 +0000254
Brian Curtin31cf8d02010-10-13 23:51:19 +0000255 crc32Bytes = fRead.read(4) # CRC32 of uncompressed data [data1]
256 self.assertEqual(crc32Bytes, '\xaf\xd7d\x83')
Antoine Pitrouf0d2c3f2009-01-04 21:29:23 +0000257
Brian Curtin31cf8d02010-10-13 23:51:19 +0000258 isizeBytes = fRead.read(4)
259 self.assertEqual(isizeBytes, struct.pack('<i', len(data1)))
Antoine Pitrouf0d2c3f2009-01-04 21:29:23 +0000260
Antoine Pitroub74fc2b2009-01-10 16:13:45 +0000261 def test_with_open(self):
262 # GzipFile supports the context management protocol
263 with gzip.GzipFile(self.filename, "wb") as f:
264 f.write(b"xxx")
265 f = gzip.GzipFile(self.filename, "rb")
266 f.close()
267 try:
268 with f:
269 pass
270 except ValueError:
271 pass
272 else:
273 self.fail("__enter__ on a closed file didn't raise an exception")
274 try:
275 with gzip.GzipFile(self.filename, "wb") as f:
Ezio Melottidde5b942010-02-03 05:37:26 +0000276 1 // 0
Antoine Pitroub74fc2b2009-01-10 16:13:45 +0000277 except ZeroDivisionError:
278 pass
279 else:
Ezio Melottidde5b942010-02-03 05:37:26 +0000280 self.fail("1 // 0 didn't raise an exception")
Antoine Pitroub74fc2b2009-01-10 16:13:45 +0000281
Antoine Pitrou5a9112c2010-01-13 14:32:10 +0000282 def test_zero_padded_file(self):
283 with gzip.GzipFile(self.filename, "wb") as f:
284 f.write(data1 * 50)
285
286 # Pad the file with zeroes
287 with open(self.filename, "ab") as f:
288 f.write("\x00" * 50)
289
290 with gzip.GzipFile(self.filename, "rb") as f:
291 d = f.read()
292 self.assertEqual(d, data1 * 50, "Incorrect data in file")
293
Nadeem Vawdad7664de2012-01-19 00:40:46 +0200294 def test_fileobj_from_fdopen(self):
295 # Issue #13781: Creating a GzipFile using a fileobj from os.fdopen()
296 # should not embed the fake filename "<fdopen>" in the output file.
297 fd = os.open(self.filename, os.O_WRONLY | os.O_CREAT)
298 with os.fdopen(fd, "wb") as f:
299 with gzip.GzipFile(fileobj=f, mode="w") as g:
300 self.assertEqual(g.name, "")
301
Serhiy Storchaka371432b2013-04-08 22:33:55 +0300302 def test_read_with_extra(self):
303 # Gzip data with an extra field
304 gzdata = (b'\x1f\x8b\x08\x04\xb2\x17cQ\x02\xff'
305 b'\x05\x00Extra'
306 b'\x0bI-.\x01\x002\xd1Mx\x04\x00\x00\x00')
307 with gzip.GzipFile(fileobj=io.BytesIO(gzdata)) as f:
308 self.assertEqual(f.read(), b'Test')
Serhiy Storchaka353e54e2013-01-22 17:13:26 +0200309
Andrew M. Kuchlinga6f68e12005-06-09 14:12:36 +0000310def test_main(verbose=None):
311 test_support.run_unittest(TestGzip)
312
313if __name__ == "__main__":
314 test_main(verbose=True)