blob: aa56ed3ab509516d703f563df37e38adc30e0a1b [file] [log] [blame]
Andrew M. Kuchlinga6f68e12005-06-09 14:12:36 +00001"""Test script for the gzip module.
2"""
3
4import unittest
5from test import test_support
Christian Heimesc5f05e42008-02-23 17:40:11 +00006import os
Antoine Pitrou673ddf92010-01-03 22:29:56 +00007import io
Antoine Pitrouf0d2c3f2009-01-04 21:29:23 +00008import struct
Ezio Melotti1036a7f2009-09-12 14:43:43 +00009gzip = test_support.import_module('gzip')
Andrew M. Kuchling605ebdd1999-03-25 21:50:27 +000010
11data1 = """ int length=DEFAULTALLOC, err = Z_OK;
12 PyObject *RetVal;
13 int flushmode = Z_FINISH;
14 unsigned long start_total_out;
15
16"""
17
18data2 = """/* zlibmodule.c -- gzip-compatible data compression */
Neal Norwitz014f1032004-07-29 03:55:56 +000019/* See http://www.gzip.org/zlib/
Andrew M. Kuchling605ebdd1999-03-25 21:50:27 +000020/* See http://www.winimage.com/zLibDll for Windows */
21"""
22
Andrew M. Kuchling605ebdd1999-03-25 21:50:27 +000023
Andrew M. Kuchlinga6f68e12005-06-09 14:12:36 +000024class TestGzip(unittest.TestCase):
25 filename = test_support.TESTFN
Tim Peters5cfb05e2004-07-27 21:02:02 +000026
Georg Brandle08e3d02008-05-25 08:07:37 +000027 def setUp(self):
Neal Norwitz36a59b42008-04-10 05:46:39 +000028 test_support.unlink(self.filename)
Andrew M. Kuchling605ebdd1999-03-25 21:50:27 +000029
Georg Brandle08e3d02008-05-25 08:07:37 +000030 def tearDown(self):
Neal Norwitz36a59b42008-04-10 05:46:39 +000031 test_support.unlink(self.filename)
Andrew M. Kuchling605ebdd1999-03-25 21:50:27 +000032
Andrew M. Kuchling85ab7382000-07-29 20:18:34 +000033
Georg Brandle08e3d02008-05-25 08:07:37 +000034 def test_write(self):
Brian Curtin31cf8d02010-10-13 23:51:19 +000035 with gzip.GzipFile(self.filename, 'wb') as f:
36 f.write(data1 * 50)
Andrew M. Kuchling85ab7382000-07-29 20:18:34 +000037
Brian Curtin31cf8d02010-10-13 23:51:19 +000038 # Try flush and fileno.
39 f.flush()
40 f.fileno()
41 if hasattr(os, 'fsync'):
42 os.fsync(f.fileno())
43 f.close()
Andrew M. Kuchling85ab7382000-07-29 20:18:34 +000044
Georg Brandle08e3d02008-05-25 08:07:37 +000045 # Test multiple close() calls.
46 f.close()
47
Andrew M. Kuchlinga6f68e12005-06-09 14:12:36 +000048 def test_read(self):
49 self.test_write()
50 # Try reading.
Brian Curtin31cf8d02010-10-13 23:51:19 +000051 with gzip.GzipFile(self.filename, 'r') as f:
52 d = f.read()
Andrew M. Kuchlinga6f68e12005-06-09 14:12:36 +000053 self.assertEqual(d, data1*50)
Andrew M. Kuchling85ab7382000-07-29 20:18:34 +000054
Nadeem Vawdadd72b3f2012-10-21 18:15:05 +020055 def test_read_universal_newlines(self):
56 # Issue #5148: Reading breaks when mode contains 'U'.
57 self.test_write()
58 with gzip.GzipFile(self.filename, 'rU') as f:
59 d = f.read()
60 self.assertEqual(d, data1*50)
61
Antoine Pitrou76a66aa2010-10-06 21:26:52 +000062 def test_io_on_closed_object(self):
63 # Test that I/O operations on closed GzipFile objects raise a
64 # ValueError, just like the corresponding functions on file objects.
65
66 # Write to a file, open it for reading, then close it.
67 self.test_write()
68 f = gzip.GzipFile(self.filename, 'r')
69 f.close()
70 with self.assertRaises(ValueError):
71 f.read(1)
72 with self.assertRaises(ValueError):
73 f.seek(0)
74 with self.assertRaises(ValueError):
75 f.tell()
76 # Open the file for writing, then close it.
77 f = gzip.GzipFile(self.filename, 'w')
78 f.close()
79 with self.assertRaises(ValueError):
Brian Curtin31cf8d02010-10-13 23:51:19 +000080 f.write('')
Antoine Pitrou76a66aa2010-10-06 21:26:52 +000081 with self.assertRaises(ValueError):
82 f.flush()
83
Andrew M. Kuchlinga6f68e12005-06-09 14:12:36 +000084 def test_append(self):
85 self.test_write()
86 # Append to the previous file
Brian Curtin31cf8d02010-10-13 23:51:19 +000087 with gzip.GzipFile(self.filename, 'ab') as f:
88 f.write(data2 * 15)
Andrew M. Kuchling85ab7382000-07-29 20:18:34 +000089
Brian Curtin31cf8d02010-10-13 23:51:19 +000090 with gzip.GzipFile(self.filename, 'rb') as f:
91 d = f.read()
Andrew M. Kuchlinga6f68e12005-06-09 14:12:36 +000092 self.assertEqual(d, (data1*50) + (data2*15))
Andrew M. Kuchling85ab7382000-07-29 20:18:34 +000093
Andrew M. Kuchling01cb47b2005-06-09 14:19:32 +000094 def test_many_append(self):
95 # Bug #1074261 was triggered when reading a file that contained
96 # many, many members. Create such a file and verify that reading it
97 # works.
Brian Curtin31cf8d02010-10-13 23:51:19 +000098 with gzip.open(self.filename, 'wb', 9) as f:
Andrew M. Kuchling01cb47b2005-06-09 14:19:32 +000099 f.write('a')
Brian Curtin31cf8d02010-10-13 23:51:19 +0000100 for i in range(0, 200):
101 with gzip.open(self.filename, "ab", 9) as f: # append
102 f.write('a')
Andrew M. Kuchling01cb47b2005-06-09 14:19:32 +0000103
104 # Try reading the file
Brian Curtin31cf8d02010-10-13 23:51:19 +0000105 with gzip.open(self.filename, "rb") as zgfile:
106 contents = ""
107 while 1:
108 ztxt = zgfile.read(8192)
109 contents += ztxt
110 if not ztxt: break
Ezio Melotti2623a372010-11-21 13:34:58 +0000111 self.assertEqual(contents, 'a'*201)
Andrew M. Kuchling01cb47b2005-06-09 14:19:32 +0000112
Antoine Pitrou673ddf92010-01-03 22:29:56 +0000113 def test_buffered_reader(self):
114 # Issue #7471: a GzipFile can be wrapped in a BufferedReader for
115 # performance.
116 self.test_write()
117
Brian Curtin31cf8d02010-10-13 23:51:19 +0000118 with gzip.GzipFile(self.filename, 'rb') as f:
119 with io.BufferedReader(f) as r:
120 lines = [line for line in r]
Antoine Pitrou673ddf92010-01-03 22:29:56 +0000121
122 self.assertEqual(lines, 50 * data1.splitlines(True))
Andrew M. Kuchling01cb47b2005-06-09 14:19:32 +0000123
Andrew M. Kuchlinga6f68e12005-06-09 14:12:36 +0000124 def test_readline(self):
125 self.test_write()
126 # Try .readline() with varying line lengths
Martin v. Löwis8cc965c2001-08-09 07:21:56 +0000127
Brian Curtin31cf8d02010-10-13 23:51:19 +0000128 with gzip.GzipFile(self.filename, 'rb') as f:
129 line_length = 0
130 while 1:
131 L = f.readline(line_length)
132 if not L and line_length != 0: break
133 self.assertTrue(len(L) <= line_length)
134 line_length = (line_length + 1) % 50
Martin v. Löwis8cc965c2001-08-09 07:21:56 +0000135
Andrew M. Kuchlinga6f68e12005-06-09 14:12:36 +0000136 def test_readlines(self):
137 self.test_write()
138 # Try .readlines()
Andrew M. Kuchling605ebdd1999-03-25 21:50:27 +0000139
Brian Curtin31cf8d02010-10-13 23:51:19 +0000140 with gzip.GzipFile(self.filename, 'rb') as f:
141 L = f.readlines()
Skip Montanaro12424bc2002-05-23 01:43:05 +0000142
Brian Curtin31cf8d02010-10-13 23:51:19 +0000143 with gzip.GzipFile(self.filename, 'rb') as f:
144 while 1:
145 L = f.readlines(150)
146 if L == []: break
Andrew M. Kuchlinga6f68e12005-06-09 14:12:36 +0000147
148 def test_seek_read(self):
149 self.test_write()
150 # Try seek, read test
151
Brian Curtin31cf8d02010-10-13 23:51:19 +0000152 with gzip.GzipFile(self.filename) as f:
153 while 1:
154 oldpos = f.tell()
155 line1 = f.readline()
156 if not line1: break
157 newpos = f.tell()
158 f.seek(oldpos) # negative seek
159 if len(line1)>10:
160 amount = 10
161 else:
162 amount = len(line1)
163 line2 = f.read(amount)
164 self.assertEqual(line1[:amount], line2)
165 f.seek(newpos) # positive seek
Andrew M. Kuchlinga6f68e12005-06-09 14:12:36 +0000166
Martin v. Löwis065f0c82006-11-12 10:41:39 +0000167 def test_seek_whence(self):
168 self.test_write()
169 # Try seek(whence=1), read test
170
Brian Curtin31cf8d02010-10-13 23:51:19 +0000171 with gzip.GzipFile(self.filename) as f:
172 f.read(10)
173 f.seek(10, whence=1)
174 y = f.read(10)
Ezio Melotti2623a372010-11-21 13:34:58 +0000175 self.assertEqual(y, data1[20:30])
Tim Petersf733abb2007-01-30 03:03:46 +0000176
Andrew M. Kuchlinga6f68e12005-06-09 14:12:36 +0000177 def test_seek_write(self):
178 # Try seek, write test
Brian Curtin31cf8d02010-10-13 23:51:19 +0000179 with gzip.GzipFile(self.filename, 'w') as f:
180 for pos in range(0, 256, 16):
181 f.seek(pos)
182 f.write('GZ\n')
Andrew M. Kuchlinga6f68e12005-06-09 14:12:36 +0000183
184 def test_mode(self):
185 self.test_write()
Brian Curtin31cf8d02010-10-13 23:51:19 +0000186 with gzip.GzipFile(self.filename, 'r') as f:
187 self.assertEqual(f.myfileobj.mode, 'rb')
Andrew M. Kuchlinga6f68e12005-06-09 14:12:36 +0000188
Lars Gustäbel5b1a7852007-02-13 16:09:24 +0000189 def test_1647484(self):
190 for mode in ('wb', 'rb'):
Brian Curtin31cf8d02010-10-13 23:51:19 +0000191 with gzip.GzipFile(self.filename, mode) as f:
192 self.assertTrue(hasattr(f, "name"))
193 self.assertEqual(f.name, self.filename)
Lars Gustäbel5b1a7852007-02-13 16:09:24 +0000194
Antoine Pitrouf0d2c3f2009-01-04 21:29:23 +0000195 def test_mtime(self):
196 mtime = 123456789
Brian Curtin31cf8d02010-10-13 23:51:19 +0000197 with gzip.GzipFile(self.filename, 'w', mtime = mtime) as fWrite:
198 fWrite.write(data1)
199 with gzip.GzipFile(self.filename) as fRead:
200 dataRead = fRead.read()
201 self.assertEqual(dataRead, data1)
202 self.assertTrue(hasattr(fRead, 'mtime'))
203 self.assertEqual(fRead.mtime, mtime)
Antoine Pitrouf0d2c3f2009-01-04 21:29:23 +0000204
205 def test_metadata(self):
206 mtime = 123456789
207
Brian Curtin31cf8d02010-10-13 23:51:19 +0000208 with gzip.GzipFile(self.filename, 'w', mtime = mtime) as fWrite:
209 fWrite.write(data1)
Antoine Pitrouf0d2c3f2009-01-04 21:29:23 +0000210
Brian Curtin31cf8d02010-10-13 23:51:19 +0000211 with open(self.filename, 'rb') as fRead:
212 # see RFC 1952: http://www.faqs.org/rfcs/rfc1952.html
Antoine Pitrouf0d2c3f2009-01-04 21:29:23 +0000213
Brian Curtin31cf8d02010-10-13 23:51:19 +0000214 idBytes = fRead.read(2)
215 self.assertEqual(idBytes, '\x1f\x8b') # gzip ID
Antoine Pitrouf0d2c3f2009-01-04 21:29:23 +0000216
Brian Curtin31cf8d02010-10-13 23:51:19 +0000217 cmByte = fRead.read(1)
218 self.assertEqual(cmByte, '\x08') # deflate
Antoine Pitrouf0d2c3f2009-01-04 21:29:23 +0000219
Brian Curtin31cf8d02010-10-13 23:51:19 +0000220 flagsByte = fRead.read(1)
221 self.assertEqual(flagsByte, '\x08') # only the FNAME flag is set
Antoine Pitrouf0d2c3f2009-01-04 21:29:23 +0000222
Brian Curtin31cf8d02010-10-13 23:51:19 +0000223 mtimeBytes = fRead.read(4)
224 self.assertEqual(mtimeBytes, struct.pack('<i', mtime)) # little-endian
Antoine Pitrouf0d2c3f2009-01-04 21:29:23 +0000225
Brian Curtin31cf8d02010-10-13 23:51:19 +0000226 xflByte = fRead.read(1)
227 self.assertEqual(xflByte, '\x02') # maximum compression
Antoine Pitrouf0d2c3f2009-01-04 21:29:23 +0000228
Brian Curtin31cf8d02010-10-13 23:51:19 +0000229 osByte = fRead.read(1)
230 self.assertEqual(osByte, '\xff') # OS "unknown" (OS-independent)
Antoine Pitrouf0d2c3f2009-01-04 21:29:23 +0000231
Brian Curtin31cf8d02010-10-13 23:51:19 +0000232 # Since the FNAME flag is set, the zero-terminated filename follows.
233 # RFC 1952 specifies that this is the name of the input file, if any.
234 # However, the gzip module defaults to storing the name of the output
235 # file in this field.
236 expected = self.filename.encode('Latin-1') + '\x00'
237 nameBytes = fRead.read(len(expected))
238 self.assertEqual(nameBytes, expected)
Antoine Pitrouf0d2c3f2009-01-04 21:29:23 +0000239
Brian Curtin31cf8d02010-10-13 23:51:19 +0000240 # Since no other flags were set, the header ends here.
241 # Rather than process the compressed data, let's seek to the trailer.
242 fRead.seek(os.stat(self.filename).st_size - 8)
Antoine Pitrouf0d2c3f2009-01-04 21:29:23 +0000243
Brian Curtin31cf8d02010-10-13 23:51:19 +0000244 crc32Bytes = fRead.read(4) # CRC32 of uncompressed data [data1]
245 self.assertEqual(crc32Bytes, '\xaf\xd7d\x83')
Antoine Pitrouf0d2c3f2009-01-04 21:29:23 +0000246
Brian Curtin31cf8d02010-10-13 23:51:19 +0000247 isizeBytes = fRead.read(4)
248 self.assertEqual(isizeBytes, struct.pack('<i', len(data1)))
Antoine Pitrouf0d2c3f2009-01-04 21:29:23 +0000249
Antoine Pitroub74fc2b2009-01-10 16:13:45 +0000250 def test_with_open(self):
251 # GzipFile supports the context management protocol
252 with gzip.GzipFile(self.filename, "wb") as f:
253 f.write(b"xxx")
254 f = gzip.GzipFile(self.filename, "rb")
255 f.close()
256 try:
257 with f:
258 pass
259 except ValueError:
260 pass
261 else:
262 self.fail("__enter__ on a closed file didn't raise an exception")
263 try:
264 with gzip.GzipFile(self.filename, "wb") as f:
Ezio Melottidde5b942010-02-03 05:37:26 +0000265 1 // 0
Antoine Pitroub74fc2b2009-01-10 16:13:45 +0000266 except ZeroDivisionError:
267 pass
268 else:
Ezio Melottidde5b942010-02-03 05:37:26 +0000269 self.fail("1 // 0 didn't raise an exception")
Antoine Pitroub74fc2b2009-01-10 16:13:45 +0000270
Antoine Pitrou5a9112c2010-01-13 14:32:10 +0000271 def test_zero_padded_file(self):
272 with gzip.GzipFile(self.filename, "wb") as f:
273 f.write(data1 * 50)
274
275 # Pad the file with zeroes
276 with open(self.filename, "ab") as f:
277 f.write("\x00" * 50)
278
279 with gzip.GzipFile(self.filename, "rb") as f:
280 d = f.read()
281 self.assertEqual(d, data1 * 50, "Incorrect data in file")
282
Nadeem Vawdad7664de2012-01-19 00:40:46 +0200283 def test_fileobj_from_fdopen(self):
284 # Issue #13781: Creating a GzipFile using a fileobj from os.fdopen()
285 # should not embed the fake filename "<fdopen>" in the output file.
286 fd = os.open(self.filename, os.O_WRONLY | os.O_CREAT)
287 with os.fdopen(fd, "wb") as f:
288 with gzip.GzipFile(fileobj=f, mode="w") as g:
289 self.assertEqual(g.name, "")
290
Serhiy Storchaka371432b2013-04-08 22:33:55 +0300291 def test_read_with_extra(self):
292 # Gzip data with an extra field
293 gzdata = (b'\x1f\x8b\x08\x04\xb2\x17cQ\x02\xff'
294 b'\x05\x00Extra'
295 b'\x0bI-.\x01\x002\xd1Mx\x04\x00\x00\x00')
296 with gzip.GzipFile(fileobj=io.BytesIO(gzdata)) as f:
297 self.assertEqual(f.read(), b'Test')
Serhiy Storchaka353e54e2013-01-22 17:13:26 +0200298
Andrew M. Kuchlinga6f68e12005-06-09 14:12:36 +0000299def test_main(verbose=None):
300 test_support.run_unittest(TestGzip)
301
302if __name__ == "__main__":
303 test_main(verbose=True)