blob: b417044bcbff039094e929f644700d4954f29e58 [file] [log] [blame]
Andrew M. Kuchlinga6f68e12005-06-09 14:12:36 +00001"""Test script for the gzip module.
2"""
3
4import unittest
Benjamin Petersonee8712c2008-05-20 21:35:26 +00005from test import support
Christian Heimes05e8be12008-02-23 18:30:17 +00006import os
Antoine Pitroub1f88352010-01-03 22:37:40 +00007import io
Antoine Pitrou42db3ef2009-01-04 21:37:59 +00008import struct
Ezio Melotti78ea2022009-09-12 18:41:20 +00009gzip = support.import_module('gzip')
Andrew M. Kuchling605ebdd1999-03-25 21:50:27 +000010
Walter Dörwald5b1284d2007-06-06 16:43:59 +000011data1 = b""" int length=DEFAULTALLOC, err = Z_OK;
Andrew M. Kuchling605ebdd1999-03-25 21:50:27 +000012 PyObject *RetVal;
13 int flushmode = Z_FINISH;
14 unsigned long start_total_out;
15
16"""
17
Walter Dörwald5b1284d2007-06-06 16:43:59 +000018data2 = b"""/* zlibmodule.c -- gzip-compatible data compression */
Neal Norwitz014f1032004-07-29 03:55:56 +000019/* See http://www.gzip.org/zlib/
Andrew M. Kuchling605ebdd1999-03-25 21:50:27 +000020/* See http://www.winimage.com/zLibDll for Windows */
21"""
22
Andrew M. Kuchling605ebdd1999-03-25 21:50:27 +000023
Antoine Pitrou7b969842010-09-23 16:22:51 +000024class UnseekableIO(io.BytesIO):
25 def seekable(self):
26 return False
27
28 def tell(self):
29 raise io.UnsupportedOperation
30
31 def seek(self, *args):
32 raise io.UnsupportedOperation
33
34
Nadeem Vawda1b8a14d2012-05-06 15:17:52 +020035class BaseTest(unittest.TestCase):
Benjamin Petersonee8712c2008-05-20 21:35:26 +000036 filename = support.TESTFN
Tim Peters5cfb05e2004-07-27 21:02:02 +000037
Georg Brandlb533e262008-05-25 18:19:30 +000038 def setUp(self):
Benjamin Petersonee8712c2008-05-20 21:35:26 +000039 support.unlink(self.filename)
Andrew M. Kuchling605ebdd1999-03-25 21:50:27 +000040
Georg Brandlb533e262008-05-25 18:19:30 +000041 def tearDown(self):
Benjamin Petersonee8712c2008-05-20 21:35:26 +000042 support.unlink(self.filename)
Andrew M. Kuchling605ebdd1999-03-25 21:50:27 +000043
Andrew M. Kuchling85ab7382000-07-29 20:18:34 +000044
Nadeem Vawda1b8a14d2012-05-06 15:17:52 +020045class TestGzip(BaseTest):
Serhiy Storchakad4c2ac82015-03-23 15:25:43 +020046 def write_and_read_back(self, data, mode='b'):
47 b_data = bytes(data)
48 with gzip.GzipFile(self.filename, 'w'+mode) as f:
49 l = f.write(data)
50 self.assertEqual(l, len(b_data))
51 with gzip.GzipFile(self.filename, 'r'+mode) as f:
52 self.assertEqual(f.read(), b_data)
53
Georg Brandlb533e262008-05-25 18:19:30 +000054 def test_write(self):
Brian Curtin28f96b52010-10-13 02:21:42 +000055 with gzip.GzipFile(self.filename, 'wb') as f:
56 f.write(data1 * 50)
Andrew M. Kuchling85ab7382000-07-29 20:18:34 +000057
Brian Curtin28f96b52010-10-13 02:21:42 +000058 # Try flush and fileno.
59 f.flush()
60 f.fileno()
61 if hasattr(os, 'fsync'):
62 os.fsync(f.fileno())
63 f.close()
Andrew M. Kuchling85ab7382000-07-29 20:18:34 +000064
Georg Brandlb533e262008-05-25 18:19:30 +000065 # Test multiple close() calls.
66 f.close()
67
Serhiy Storchakad4c2ac82015-03-23 15:25:43 +020068 # The following test_write_xy methods test that write accepts
69 # the corresponding bytes-like object type as input
70 # and that the data written equals bytes(xy) in all cases.
71 def test_write_memoryview(self):
72 self.write_and_read_back(memoryview(data1 * 50))
73 m = memoryview(bytes(range(256)))
74 data = m.cast('B', shape=[8,8,4])
75 self.write_and_read_back(data)
76
77 def test_write_bytearray(self):
78 self.write_and_read_back(bytearray(data1 * 50))
79
80 def test_write_incompatible_type(self):
81 # Test that non-bytes-like types raise TypeError.
82 # Issue #21560: attempts to write incompatible types
83 # should not affect the state of the fileobject
84 with gzip.GzipFile(self.filename, 'wb') as f:
85 with self.assertRaises(TypeError):
86 f.write('a')
87 with self.assertRaises(TypeError):
88 f.write([1])
89 f.write(data1)
90 with gzip.GzipFile(self.filename, 'rb') as f:
91 self.assertEqual(f.read(), data1)
92
Andrew M. Kuchlinga6f68e12005-06-09 14:12:36 +000093 def test_read(self):
94 self.test_write()
95 # Try reading.
Brian Curtin28f96b52010-10-13 02:21:42 +000096 with gzip.GzipFile(self.filename, 'r') as f:
97 d = f.read()
Andrew M. Kuchlinga6f68e12005-06-09 14:12:36 +000098 self.assertEqual(d, data1*50)
Andrew M. Kuchling85ab7382000-07-29 20:18:34 +000099
Antoine Pitrou4ec4b0c2011-04-04 21:00:37 +0200100 def test_read1(self):
101 self.test_write()
102 blocks = []
103 nread = 0
104 with gzip.GzipFile(self.filename, 'r') as f:
105 while True:
106 d = f.read1()
107 if not d:
108 break
109 blocks.append(d)
110 nread += len(d)
111 # Check that position was updated correctly (see issue10791).
112 self.assertEqual(f.tell(), nread)
113 self.assertEqual(b''.join(blocks), data1 * 50)
114
Antoine Pitrou7980eaa2010-10-06 21:21:18 +0000115 def test_io_on_closed_object(self):
116 # Test that I/O operations on closed GzipFile objects raise a
117 # ValueError, just like the corresponding functions on file objects.
118
119 # Write to a file, open it for reading, then close it.
120 self.test_write()
121 f = gzip.GzipFile(self.filename, 'r')
122 f.close()
123 with self.assertRaises(ValueError):
124 f.read(1)
125 with self.assertRaises(ValueError):
126 f.seek(0)
127 with self.assertRaises(ValueError):
128 f.tell()
129 # Open the file for writing, then close it.
130 f = gzip.GzipFile(self.filename, 'w')
131 f.close()
132 with self.assertRaises(ValueError):
133 f.write(b'')
134 with self.assertRaises(ValueError):
135 f.flush()
136
Andrew M. Kuchlinga6f68e12005-06-09 14:12:36 +0000137 def test_append(self):
138 self.test_write()
139 # Append to the previous file
Brian Curtin28f96b52010-10-13 02:21:42 +0000140 with gzip.GzipFile(self.filename, 'ab') as f:
141 f.write(data2 * 15)
Andrew M. Kuchling85ab7382000-07-29 20:18:34 +0000142
Brian Curtin28f96b52010-10-13 02:21:42 +0000143 with gzip.GzipFile(self.filename, 'rb') as f:
144 d = f.read()
Andrew M. Kuchlinga6f68e12005-06-09 14:12:36 +0000145 self.assertEqual(d, (data1*50) + (data2*15))
Andrew M. Kuchling85ab7382000-07-29 20:18:34 +0000146
Andrew M. Kuchling01cb47b2005-06-09 14:19:32 +0000147 def test_many_append(self):
148 # Bug #1074261 was triggered when reading a file that contained
149 # many, many members. Create such a file and verify that reading it
150 # works.
Nadeem Vawda1b8a14d2012-05-06 15:17:52 +0200151 with gzip.GzipFile(self.filename, 'wb', 9) as f:
Walter Dörwald5b1284d2007-06-06 16:43:59 +0000152 f.write(b'a')
Brian Curtin28f96b52010-10-13 02:21:42 +0000153 for i in range(0, 200):
Nadeem Vawda1b8a14d2012-05-06 15:17:52 +0200154 with gzip.GzipFile(self.filename, "ab", 9) as f: # append
Brian Curtin28f96b52010-10-13 02:21:42 +0000155 f.write(b'a')
Andrew M. Kuchling01cb47b2005-06-09 14:19:32 +0000156
157 # Try reading the file
Nadeem Vawda1b8a14d2012-05-06 15:17:52 +0200158 with gzip.GzipFile(self.filename, "rb") as zgfile:
Brian Curtin28f96b52010-10-13 02:21:42 +0000159 contents = b""
160 while 1:
161 ztxt = zgfile.read(8192)
162 contents += ztxt
163 if not ztxt: break
Ezio Melottib3aedd42010-11-20 19:04:17 +0000164 self.assertEqual(contents, b'a'*201)
Andrew M. Kuchling01cb47b2005-06-09 14:19:32 +0000165
Nadeem Vawdaee1be992013-10-19 00:11:13 +0200166 def test_exclusive_write(self):
167 with gzip.GzipFile(self.filename, 'xb') as f:
168 f.write(data1 * 50)
169 with gzip.GzipFile(self.filename, 'rb') as f:
170 self.assertEqual(f.read(), data1 * 50)
171 with self.assertRaises(FileExistsError):
172 gzip.GzipFile(self.filename, 'xb')
173
Antoine Pitroub1f88352010-01-03 22:37:40 +0000174 def test_buffered_reader(self):
175 # Issue #7471: a GzipFile can be wrapped in a BufferedReader for
176 # performance.
177 self.test_write()
178
Brian Curtin28f96b52010-10-13 02:21:42 +0000179 with gzip.GzipFile(self.filename, 'rb') as f:
180 with io.BufferedReader(f) as r:
181 lines = [line for line in r]
Antoine Pitroub1f88352010-01-03 22:37:40 +0000182
Ezio Melottid8b509b2011-09-28 17:37:55 +0300183 self.assertEqual(lines, 50 * data1.splitlines(keepends=True))
Andrew M. Kuchling01cb47b2005-06-09 14:19:32 +0000184
Andrew M. Kuchlinga6f68e12005-06-09 14:12:36 +0000185 def test_readline(self):
186 self.test_write()
187 # Try .readline() with varying line lengths
Martin v. Löwis8cc965c2001-08-09 07:21:56 +0000188
Brian Curtin28f96b52010-10-13 02:21:42 +0000189 with gzip.GzipFile(self.filename, 'rb') as f:
190 line_length = 0
191 while 1:
192 L = f.readline(line_length)
193 if not L and line_length != 0: break
194 self.assertTrue(len(L) <= line_length)
195 line_length = (line_length + 1) % 50
Martin v. Löwis8cc965c2001-08-09 07:21:56 +0000196
Andrew M. Kuchlinga6f68e12005-06-09 14:12:36 +0000197 def test_readlines(self):
198 self.test_write()
199 # Try .readlines()
Andrew M. Kuchling605ebdd1999-03-25 21:50:27 +0000200
Brian Curtin28f96b52010-10-13 02:21:42 +0000201 with gzip.GzipFile(self.filename, 'rb') as f:
202 L = f.readlines()
Skip Montanaro12424bc2002-05-23 01:43:05 +0000203
Brian Curtin28f96b52010-10-13 02:21:42 +0000204 with gzip.GzipFile(self.filename, 'rb') as f:
205 while 1:
206 L = f.readlines(150)
207 if L == []: break
Andrew M. Kuchlinga6f68e12005-06-09 14:12:36 +0000208
209 def test_seek_read(self):
210 self.test_write()
211 # Try seek, read test
212
Brian Curtin28f96b52010-10-13 02:21:42 +0000213 with gzip.GzipFile(self.filename) as f:
214 while 1:
215 oldpos = f.tell()
216 line1 = f.readline()
217 if not line1: break
218 newpos = f.tell()
219 f.seek(oldpos) # negative seek
220 if len(line1)>10:
221 amount = 10
222 else:
223 amount = len(line1)
224 line2 = f.read(amount)
225 self.assertEqual(line1[:amount], line2)
226 f.seek(newpos) # positive seek
Andrew M. Kuchlinga6f68e12005-06-09 14:12:36 +0000227
Thomas Wouters89f507f2006-12-13 04:49:30 +0000228 def test_seek_whence(self):
229 self.test_write()
230 # Try seek(whence=1), read test
231
Brian Curtin28f96b52010-10-13 02:21:42 +0000232 with gzip.GzipFile(self.filename) as f:
233 f.read(10)
234 f.seek(10, whence=1)
235 y = f.read(10)
Ezio Melottib3aedd42010-11-20 19:04:17 +0000236 self.assertEqual(y, data1[20:30])
Thomas Wouters9fe394c2007-02-05 01:24:16 +0000237
Andrew M. Kuchlinga6f68e12005-06-09 14:12:36 +0000238 def test_seek_write(self):
239 # Try seek, write test
Brian Curtin28f96b52010-10-13 02:21:42 +0000240 with gzip.GzipFile(self.filename, 'w') as f:
241 for pos in range(0, 256, 16):
242 f.seek(pos)
243 f.write(b'GZ\n')
Andrew M. Kuchlinga6f68e12005-06-09 14:12:36 +0000244
245 def test_mode(self):
246 self.test_write()
Brian Curtin28f96b52010-10-13 02:21:42 +0000247 with gzip.GzipFile(self.filename, 'r') as f:
248 self.assertEqual(f.myfileobj.mode, 'rb')
Nadeem Vawdaee1be992013-10-19 00:11:13 +0200249 support.unlink(self.filename)
250 with gzip.GzipFile(self.filename, 'x') as f:
251 self.assertEqual(f.myfileobj.mode, 'xb')
Andrew M. Kuchlinga6f68e12005-06-09 14:12:36 +0000252
Thomas Wouterscf297e42007-02-23 15:07:44 +0000253 def test_1647484(self):
254 for mode in ('wb', 'rb'):
Brian Curtin28f96b52010-10-13 02:21:42 +0000255 with gzip.GzipFile(self.filename, mode) as f:
256 self.assertTrue(hasattr(f, "name"))
257 self.assertEqual(f.name, self.filename)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000258
Georg Brandl9f1c1dc2010-11-20 11:25:01 +0000259 def test_paddedfile_getattr(self):
260 self.test_write()
261 with gzip.GzipFile(self.filename, 'rb') as f:
262 self.assertTrue(hasattr(f.fileobj, "name"))
263 self.assertEqual(f.fileobj.name, self.filename)
264
Antoine Pitrou42db3ef2009-01-04 21:37:59 +0000265 def test_mtime(self):
266 mtime = 123456789
Brian Curtin28f96b52010-10-13 02:21:42 +0000267 with gzip.GzipFile(self.filename, 'w', mtime = mtime) as fWrite:
268 fWrite.write(data1)
269 with gzip.GzipFile(self.filename) as fRead:
270 dataRead = fRead.read()
271 self.assertEqual(dataRead, data1)
272 self.assertTrue(hasattr(fRead, 'mtime'))
273 self.assertEqual(fRead.mtime, mtime)
Antoine Pitrou42db3ef2009-01-04 21:37:59 +0000274
275 def test_metadata(self):
276 mtime = 123456789
277
Brian Curtin28f96b52010-10-13 02:21:42 +0000278 with gzip.GzipFile(self.filename, 'w', mtime = mtime) as fWrite:
279 fWrite.write(data1)
Antoine Pitrou42db3ef2009-01-04 21:37:59 +0000280
Brian Curtin28f96b52010-10-13 02:21:42 +0000281 with open(self.filename, 'rb') as fRead:
282 # see RFC 1952: http://www.faqs.org/rfcs/rfc1952.html
Antoine Pitrou42db3ef2009-01-04 21:37:59 +0000283
Brian Curtin28f96b52010-10-13 02:21:42 +0000284 idBytes = fRead.read(2)
285 self.assertEqual(idBytes, b'\x1f\x8b') # gzip ID
Antoine Pitrou42db3ef2009-01-04 21:37:59 +0000286
Brian Curtin28f96b52010-10-13 02:21:42 +0000287 cmByte = fRead.read(1)
288 self.assertEqual(cmByte, b'\x08') # deflate
Antoine Pitrou42db3ef2009-01-04 21:37:59 +0000289
Brian Curtin28f96b52010-10-13 02:21:42 +0000290 flagsByte = fRead.read(1)
291 self.assertEqual(flagsByte, b'\x08') # only the FNAME flag is set
Antoine Pitrou42db3ef2009-01-04 21:37:59 +0000292
Brian Curtin28f96b52010-10-13 02:21:42 +0000293 mtimeBytes = fRead.read(4)
294 self.assertEqual(mtimeBytes, struct.pack('<i', mtime)) # little-endian
Antoine Pitrou42db3ef2009-01-04 21:37:59 +0000295
Brian Curtin28f96b52010-10-13 02:21:42 +0000296 xflByte = fRead.read(1)
297 self.assertEqual(xflByte, b'\x02') # maximum compression
Antoine Pitrou42db3ef2009-01-04 21:37:59 +0000298
Brian Curtin28f96b52010-10-13 02:21:42 +0000299 osByte = fRead.read(1)
300 self.assertEqual(osByte, b'\xff') # OS "unknown" (OS-independent)
Antoine Pitrou42db3ef2009-01-04 21:37:59 +0000301
Brian Curtin28f96b52010-10-13 02:21:42 +0000302 # Since the FNAME flag is set, the zero-terminated filename follows.
303 # RFC 1952 specifies that this is the name of the input file, if any.
304 # However, the gzip module defaults to storing the name of the output
305 # file in this field.
306 expected = self.filename.encode('Latin-1') + b'\x00'
307 nameBytes = fRead.read(len(expected))
308 self.assertEqual(nameBytes, expected)
Antoine Pitrou42db3ef2009-01-04 21:37:59 +0000309
Brian Curtin28f96b52010-10-13 02:21:42 +0000310 # Since no other flags were set, the header ends here.
311 # Rather than process the compressed data, let's seek to the trailer.
312 fRead.seek(os.stat(self.filename).st_size - 8)
Antoine Pitrou42db3ef2009-01-04 21:37:59 +0000313
Brian Curtin28f96b52010-10-13 02:21:42 +0000314 crc32Bytes = fRead.read(4) # CRC32 of uncompressed data [data1]
315 self.assertEqual(crc32Bytes, b'\xaf\xd7d\x83')
Antoine Pitrou42db3ef2009-01-04 21:37:59 +0000316
Brian Curtin28f96b52010-10-13 02:21:42 +0000317 isizeBytes = fRead.read(4)
318 self.assertEqual(isizeBytes, struct.pack('<i', len(data1)))
Antoine Pitrou42db3ef2009-01-04 21:37:59 +0000319
Antoine Pitrou308705e2009-01-10 16:22:51 +0000320 def test_with_open(self):
321 # GzipFile supports the context management protocol
322 with gzip.GzipFile(self.filename, "wb") as f:
323 f.write(b"xxx")
324 f = gzip.GzipFile(self.filename, "rb")
325 f.close()
326 try:
327 with f:
328 pass
329 except ValueError:
330 pass
331 else:
332 self.fail("__enter__ on a closed file didn't raise an exception")
333 try:
334 with gzip.GzipFile(self.filename, "wb") as f:
335 1/0
336 except ZeroDivisionError:
337 pass
338 else:
339 self.fail("1/0 didn't raise an exception")
340
Antoine Pitrou8e33fd72010-01-13 14:37:26 +0000341 def test_zero_padded_file(self):
342 with gzip.GzipFile(self.filename, "wb") as f:
343 f.write(data1 * 50)
344
345 # Pad the file with zeroes
346 with open(self.filename, "ab") as f:
347 f.write(b"\x00" * 50)
348
349 with gzip.GzipFile(self.filename, "rb") as f:
350 d = f.read()
351 self.assertEqual(d, data1 * 50, "Incorrect data in file")
352
Antoine Pitrou7b969842010-09-23 16:22:51 +0000353 def test_non_seekable_file(self):
354 uncompressed = data1 * 50
355 buf = UnseekableIO()
356 with gzip.GzipFile(fileobj=buf, mode="wb") as f:
357 f.write(uncompressed)
358 compressed = buf.getvalue()
359 buf = UnseekableIO(compressed)
360 with gzip.GzipFile(fileobj=buf, mode="rb") as f:
361 self.assertEqual(f.read(), uncompressed)
362
Antoine Pitrouc3ed2e72010-09-29 10:49:46 +0000363 def test_peek(self):
364 uncompressed = data1 * 200
365 with gzip.GzipFile(self.filename, "wb") as f:
366 f.write(uncompressed)
367
368 def sizes():
369 while True:
370 for n in range(5, 50, 10):
371 yield n
372
373 with gzip.GzipFile(self.filename, "rb") as f:
374 f.max_read_chunk = 33
375 nread = 0
376 for n in sizes():
377 s = f.peek(n)
378 if s == b'':
379 break
380 self.assertEqual(f.read(len(s)), s)
381 nread += len(s)
382 self.assertEqual(f.read(100), b'')
383 self.assertEqual(nread, len(uncompressed))
384
Antoine Pitrou4ec4b0c2011-04-04 21:00:37 +0200385 def test_textio_readlines(self):
386 # Issue #10791: TextIOWrapper.readlines() fails when wrapping GzipFile.
Ezio Melottid8b509b2011-09-28 17:37:55 +0300387 lines = (data1 * 50).decode("ascii").splitlines(keepends=True)
Antoine Pitrou4ec4b0c2011-04-04 21:00:37 +0200388 self.test_write()
389 with gzip.GzipFile(self.filename, 'r') as f:
390 with io.TextIOWrapper(f, encoding="ascii") as t:
391 self.assertEqual(t.readlines(), lines)
392
Nadeem Vawda892b0b92012-01-18 09:25:58 +0200393 def test_fileobj_from_fdopen(self):
394 # Issue #13781: Opening a GzipFile for writing fails when using a
395 # fileobj created with os.fdopen().
396 fd = os.open(self.filename, os.O_WRONLY | os.O_CREAT)
397 with os.fdopen(fd, "wb") as f:
398 with gzip.GzipFile(fileobj=f, mode="w") as g:
399 pass
400
Nadeem Vawda103e8112012-06-20 01:35:22 +0200401 def test_bytes_filename(self):
402 str_filename = self.filename
403 try:
404 bytes_filename = str_filename.encode("ascii")
405 except UnicodeEncodeError:
406 self.skipTest("Temporary file name needs to be ASCII")
407 with gzip.GzipFile(bytes_filename, "wb") as f:
408 f.write(data1 * 50)
409 with gzip.GzipFile(bytes_filename, "rb") as f:
410 self.assertEqual(f.read(), data1 * 50)
411 # Sanity check that we are actually operating on the right file.
412 with gzip.GzipFile(str_filename, "rb") as f:
413 self.assertEqual(f.read(), data1 * 50)
414
Antoine Pitrou79c5ef12010-08-17 21:10:05 +0000415 # Testing compress/decompress shortcut functions
416
417 def test_compress(self):
418 for data in [data1, data2]:
419 for args in [(), (1,), (6,), (9,)]:
420 datac = gzip.compress(data, *args)
421 self.assertEqual(type(datac), bytes)
422 with gzip.GzipFile(fileobj=io.BytesIO(datac), mode="rb") as f:
423 self.assertEqual(f.read(), data)
424
425 def test_decompress(self):
426 for data in (data1, data2):
427 buf = io.BytesIO()
428 with gzip.GzipFile(fileobj=buf, mode="wb") as f:
429 f.write(data)
430 self.assertEqual(gzip.decompress(buf.getvalue()), data)
431 # Roundtrip with compress
432 datac = gzip.compress(data)
433 self.assertEqual(gzip.decompress(datac), data)
434
Serhiy Storchaka7c3922f2013-01-22 17:01:59 +0200435 def test_read_truncated(self):
436 data = data1*50
437 # Drop the CRC (4 bytes) and file size (4 bytes).
438 truncated = gzip.compress(data)[:-8]
439 with gzip.GzipFile(fileobj=io.BytesIO(truncated)) as f:
440 self.assertRaises(EOFError, f.read)
441 with gzip.GzipFile(fileobj=io.BytesIO(truncated)) as f:
442 self.assertEqual(f.read(len(data)), data)
443 self.assertRaises(EOFError, f.read, 1)
444 # Incomplete 10-byte header.
445 for i in range(2, 10):
446 with gzip.GzipFile(fileobj=io.BytesIO(truncated[:i])) as f:
447 self.assertRaises(EOFError, f.read, 1)
448
Serhiy Storchaka7e69f002013-04-08 22:35:02 +0300449 def test_read_with_extra(self):
450 # Gzip data with an extra field
451 gzdata = (b'\x1f\x8b\x08\x04\xb2\x17cQ\x02\xff'
452 b'\x05\x00Extra'
453 b'\x0bI-.\x01\x002\xd1Mx\x04\x00\x00\x00')
454 with gzip.GzipFile(fileobj=io.BytesIO(gzdata)) as f:
455 self.assertEqual(f.read(), b'Test')
Nadeem Vawda7e126202012-05-06 15:04:01 +0200456
Ned Deily61207392014-03-09 14:44:34 -0700457 def test_prepend_error(self):
458 # See issue #20875
459 with gzip.open(self.filename, "wb") as f:
460 f.write(data1)
461 with gzip.open(self.filename, "rb") as f:
462 f.fileobj.prepend()
463
Nadeem Vawda1b8a14d2012-05-06 15:17:52 +0200464class TestOpen(BaseTest):
465 def test_binary_modes(self):
Nadeem Vawda7e126202012-05-06 15:04:01 +0200466 uncompressed = data1 * 50
Nadeem Vawdaee1be992013-10-19 00:11:13 +0200467
Nadeem Vawda7e126202012-05-06 15:04:01 +0200468 with gzip.open(self.filename, "wb") as f:
469 f.write(uncompressed)
470 with open(self.filename, "rb") as f:
471 file_data = gzip.decompress(f.read())
472 self.assertEqual(file_data, uncompressed)
Nadeem Vawdaee1be992013-10-19 00:11:13 +0200473
Nadeem Vawda7e126202012-05-06 15:04:01 +0200474 with gzip.open(self.filename, "rb") as f:
475 self.assertEqual(f.read(), uncompressed)
Nadeem Vawdaee1be992013-10-19 00:11:13 +0200476
Nadeem Vawda7e126202012-05-06 15:04:01 +0200477 with gzip.open(self.filename, "ab") as f:
478 f.write(uncompressed)
479 with open(self.filename, "rb") as f:
480 file_data = gzip.decompress(f.read())
481 self.assertEqual(file_data, uncompressed * 2)
482
Nadeem Vawdaee1be992013-10-19 00:11:13 +0200483 with self.assertRaises(FileExistsError):
484 gzip.open(self.filename, "xb")
485 support.unlink(self.filename)
486 with gzip.open(self.filename, "xb") as f:
487 f.write(uncompressed)
488 with open(self.filename, "rb") as f:
489 file_data = gzip.decompress(f.read())
490 self.assertEqual(file_data, uncompressed)
491
Nadeem Vawda1b8a14d2012-05-06 15:17:52 +0200492 def test_implicit_binary_modes(self):
Nadeem Vawda7e126202012-05-06 15:04:01 +0200493 # Test implicit binary modes (no "b" or "t" in mode string).
494 uncompressed = data1 * 50
Nadeem Vawdaee1be992013-10-19 00:11:13 +0200495
Nadeem Vawda7e126202012-05-06 15:04:01 +0200496 with gzip.open(self.filename, "w") as f:
497 f.write(uncompressed)
498 with open(self.filename, "rb") as f:
499 file_data = gzip.decompress(f.read())
500 self.assertEqual(file_data, uncompressed)
Nadeem Vawdaee1be992013-10-19 00:11:13 +0200501
Nadeem Vawda7e126202012-05-06 15:04:01 +0200502 with gzip.open(self.filename, "r") as f:
503 self.assertEqual(f.read(), uncompressed)
Nadeem Vawdaee1be992013-10-19 00:11:13 +0200504
Nadeem Vawda7e126202012-05-06 15:04:01 +0200505 with gzip.open(self.filename, "a") as f:
506 f.write(uncompressed)
507 with open(self.filename, "rb") as f:
508 file_data = gzip.decompress(f.read())
509 self.assertEqual(file_data, uncompressed * 2)
510
Nadeem Vawdaee1be992013-10-19 00:11:13 +0200511 with self.assertRaises(FileExistsError):
512 gzip.open(self.filename, "x")
513 support.unlink(self.filename)
514 with gzip.open(self.filename, "x") as f:
515 f.write(uncompressed)
516 with open(self.filename, "rb") as f:
517 file_data = gzip.decompress(f.read())
518 self.assertEqual(file_data, uncompressed)
519
Nadeem Vawda1b8a14d2012-05-06 15:17:52 +0200520 def test_text_modes(self):
Nadeem Vawda11328e42012-05-06 19:24:18 +0200521 uncompressed = data1.decode("ascii") * 50
522 uncompressed_raw = uncompressed.replace("\n", os.linesep)
Nadeem Vawda7e126202012-05-06 15:04:01 +0200523 with gzip.open(self.filename, "wt") as f:
524 f.write(uncompressed)
525 with open(self.filename, "rb") as f:
526 file_data = gzip.decompress(f.read()).decode("ascii")
Nadeem Vawda11328e42012-05-06 19:24:18 +0200527 self.assertEqual(file_data, uncompressed_raw)
Nadeem Vawda7e126202012-05-06 15:04:01 +0200528 with gzip.open(self.filename, "rt") as f:
529 self.assertEqual(f.read(), uncompressed)
530 with gzip.open(self.filename, "at") as f:
531 f.write(uncompressed)
532 with open(self.filename, "rb") as f:
533 file_data = gzip.decompress(f.read()).decode("ascii")
Nadeem Vawda11328e42012-05-06 19:24:18 +0200534 self.assertEqual(file_data, uncompressed_raw * 2)
Nadeem Vawda7e126202012-05-06 15:04:01 +0200535
Nadeem Vawda68721012012-06-04 23:21:38 +0200536 def test_fileobj(self):
537 uncompressed_bytes = data1 * 50
538 uncompressed_str = uncompressed_bytes.decode("ascii")
539 compressed = gzip.compress(uncompressed_bytes)
540 with gzip.open(io.BytesIO(compressed), "r") as f:
541 self.assertEqual(f.read(), uncompressed_bytes)
542 with gzip.open(io.BytesIO(compressed), "rb") as f:
543 self.assertEqual(f.read(), uncompressed_bytes)
544 with gzip.open(io.BytesIO(compressed), "rt") as f:
545 self.assertEqual(f.read(), uncompressed_str)
546
Nadeem Vawda1b8a14d2012-05-06 15:17:52 +0200547 def test_bad_params(self):
Nadeem Vawda7e126202012-05-06 15:04:01 +0200548 # Test invalid parameter combinations.
Nadeem Vawda68721012012-06-04 23:21:38 +0200549 with self.assertRaises(TypeError):
550 gzip.open(123.456)
Nadeem Vawda7e126202012-05-06 15:04:01 +0200551 with self.assertRaises(ValueError):
552 gzip.open(self.filename, "wbt")
553 with self.assertRaises(ValueError):
Nadeem Vawdaee1be992013-10-19 00:11:13 +0200554 gzip.open(self.filename, "xbt")
555 with self.assertRaises(ValueError):
Nadeem Vawda7e126202012-05-06 15:04:01 +0200556 gzip.open(self.filename, "rb", encoding="utf-8")
557 with self.assertRaises(ValueError):
558 gzip.open(self.filename, "rb", errors="ignore")
559 with self.assertRaises(ValueError):
560 gzip.open(self.filename, "rb", newline="\n")
561
Nadeem Vawda1b8a14d2012-05-06 15:17:52 +0200562 def test_encoding(self):
Nadeem Vawda7e126202012-05-06 15:04:01 +0200563 # Test non-default encoding.
Nadeem Vawda11328e42012-05-06 19:24:18 +0200564 uncompressed = data1.decode("ascii") * 50
565 uncompressed_raw = uncompressed.replace("\n", os.linesep)
Nadeem Vawda7e126202012-05-06 15:04:01 +0200566 with gzip.open(self.filename, "wt", encoding="utf-16") as f:
567 f.write(uncompressed)
568 with open(self.filename, "rb") as f:
569 file_data = gzip.decompress(f.read()).decode("utf-16")
Nadeem Vawda11328e42012-05-06 19:24:18 +0200570 self.assertEqual(file_data, uncompressed_raw)
Nadeem Vawda7e126202012-05-06 15:04:01 +0200571 with gzip.open(self.filename, "rt", encoding="utf-16") as f:
572 self.assertEqual(f.read(), uncompressed)
573
Nadeem Vawda1b8a14d2012-05-06 15:17:52 +0200574 def test_encoding_error_handler(self):
Nadeem Vawda7e126202012-05-06 15:04:01 +0200575 # Test with non-default encoding error handler.
576 with gzip.open(self.filename, "wb") as f:
577 f.write(b"foo\xffbar")
578 with gzip.open(self.filename, "rt", encoding="ascii", errors="ignore") \
579 as f:
580 self.assertEqual(f.read(), "foobar")
581
Nadeem Vawda1b8a14d2012-05-06 15:17:52 +0200582 def test_newline(self):
Nadeem Vawda7e126202012-05-06 15:04:01 +0200583 # Test with explicit newline (universal newline mode disabled).
584 uncompressed = data1.decode("ascii") * 50
Nadeem Vawda9d9dc8e2012-05-06 16:25:35 +0200585 with gzip.open(self.filename, "wt", newline="\n") as f:
Nadeem Vawda7e126202012-05-06 15:04:01 +0200586 f.write(uncompressed)
587 with gzip.open(self.filename, "rt", newline="\r") as f:
588 self.assertEqual(f.readlines(), [uncompressed])
589
Andrew M. Kuchlinga6f68e12005-06-09 14:12:36 +0000590def test_main(verbose=None):
Nadeem Vawda1b8a14d2012-05-06 15:17:52 +0200591 support.run_unittest(TestGzip, TestOpen)
Andrew M. Kuchlinga6f68e12005-06-09 14:12:36 +0000592
593if __name__ == "__main__":
594 test_main(verbose=True)