blob: b57de7348ec86be0178ef1dcc5a548326152745d [file] [log] [blame]
Benjamin Peterson90f5ba52010-03-11 22:53:45 +00001#!/usr/bin/env python3
Guido van Rossum5416a0d1997-05-28 16:13:21 +00002# Demo program for zlib; it compresses or decompresses files, but *doesn't*
3# delete the original. This doesn't support all of gzip's options.
Thomas Wouters73e5a5b2006-06-08 15:35:45 +00004#
5# The 'gzip' module in the standard library provides a more complete
6# implementation of gzip-format files.
7
8import zlib, sys, os
Guido van Rossum5416a0d1997-05-28 16:13:21 +00009
10FTEXT, FHCRC, FEXTRA, FNAME, FCOMMENT = 1, 2, 4, 8, 16
11
12def write32(output, value):
Georg Brandlfb3b12d2010-08-02 23:13:24 +000013 output.write(bytes([value & 255])) ; value=value // 256
14 output.write(bytes([value & 255])) ; value=value // 256
15 output.write(bytes([value & 255])) ; value=value // 256
16 output.write(bytes([value & 255]))
Tim Peters182b5ac2004-07-18 06:16:08 +000017
Guido van Rossum5416a0d1997-05-28 16:13:21 +000018def read32(input):
Thomas Wouters73e5a5b2006-06-08 15:35:45 +000019 v = ord(input.read(1))
20 v += (ord(input.read(1)) << 8 )
21 v += (ord(input.read(1)) << 16)
22 v += (ord(input.read(1)) << 24)
Guido van Rossum5416a0d1997-05-28 16:13:21 +000023 return v
24
Georg Brandlfb3b12d2010-08-02 23:13:24 +000025def compress(filename, input, output):
26 output.write(b'\037\213\010') # Write the header, ...
27 output.write(bytes([FNAME])) # ... flag byte ...
Guido van Rossum5416a0d1997-05-28 16:13:21 +000028
Georg Brandlfb3b12d2010-08-02 23:13:24 +000029 statval = os.stat(filename) # ... modification time ...
Thomas Wouters73e5a5b2006-06-08 15:35:45 +000030 mtime = statval[8]
Guido van Rossum5416a0d1997-05-28 16:13:21 +000031 write32(output, mtime)
Georg Brandlfb3b12d2010-08-02 23:13:24 +000032 output.write(b'\002') # ... slowest compression alg. ...
33 output.write(b'\377') # ... OS (=unknown) ...
Victor Stinner16004ac2010-09-29 16:59:18 +000034 bfilename = os.fsencode(filename)
Georg Brandlfb3b12d2010-08-02 23:13:24 +000035 output.write(bfilename + b'\000') # ... original filename ...
Guido van Rossum5416a0d1997-05-28 16:13:21 +000036
Georg Brandlfb3b12d2010-08-02 23:13:24 +000037 crcval = zlib.crc32(b'')
Thomas Wouters73e5a5b2006-06-08 15:35:45 +000038 compobj = zlib.compressobj(9, zlib.DEFLATED, -zlib.MAX_WBITS,
Guido van Rossum4117e541998-09-14 16:44:15 +000039 zlib.DEF_MEM_LEVEL, 0)
Thomas Wouters73e5a5b2006-06-08 15:35:45 +000040 while True:
41 data = input.read(1024)
Georg Brandlfb3b12d2010-08-02 23:13:24 +000042 if data == b'':
Thomas Wouters73e5a5b2006-06-08 15:35:45 +000043 break
44 crcval = zlib.crc32(data, crcval)
Guido van Rossum4117e541998-09-14 16:44:15 +000045 output.write(compobj.compress(data))
Guido van Rossum5416a0d1997-05-28 16:13:21 +000046 output.write(compobj.flush())
Guido van Rossum4117e541998-09-14 16:44:15 +000047 write32(output, crcval) # ... the CRC ...
48 write32(output, statval[6]) # and the file size.
Guido van Rossum5416a0d1997-05-28 16:13:21 +000049
Georg Brandlfb3b12d2010-08-02 23:13:24 +000050def decompress(input, output):
Thomas Wouters73e5a5b2006-06-08 15:35:45 +000051 magic = input.read(2)
Georg Brandlfb3b12d2010-08-02 23:13:24 +000052 if magic != b'\037\213':
Collin Winter6f2df4d2007-07-17 20:59:35 +000053 print('Not a gzipped file')
Thomas Wouters73e5a5b2006-06-08 15:35:45 +000054 sys.exit(0)
55 if ord(input.read(1)) != 8:
Collin Winter6f2df4d2007-07-17 20:59:35 +000056 print('Unknown compression method')
Thomas Wouters73e5a5b2006-06-08 15:35:45 +000057 sys.exit(0)
58 flag = ord(input.read(1))
Guido van Rossum4117e541998-09-14 16:44:15 +000059 input.read(4+1+1) # Discard modification time,
60 # extra flags, and OS byte.
Guido van Rossum5416a0d1997-05-28 16:13:21 +000061 if flag & FEXTRA:
Guido van Rossum4117e541998-09-14 16:44:15 +000062 # Read & discard the extra field, if present
Thomas Wouters73e5a5b2006-06-08 15:35:45 +000063 xlen = ord(input.read(1))
64 xlen += 256*ord(input.read(1))
Guido van Rossum4117e541998-09-14 16:44:15 +000065 input.read(xlen)
Guido van Rossum5416a0d1997-05-28 16:13:21 +000066 if flag & FNAME:
Guido van Rossum4117e541998-09-14 16:44:15 +000067 # Read and discard a null-terminated string containing the filename
Thomas Wouters73e5a5b2006-06-08 15:35:45 +000068 while True:
69 s = input.read(1)
Georg Brandlfb3b12d2010-08-02 23:13:24 +000070 if s == b'\0': break
Guido van Rossum5416a0d1997-05-28 16:13:21 +000071 if flag & FCOMMENT:
Guido van Rossum4117e541998-09-14 16:44:15 +000072 # Read and discard a null-terminated string containing a comment
Thomas Wouters73e5a5b2006-06-08 15:35:45 +000073 while True:
Georg Brandlfb3b12d2010-08-02 23:13:24 +000074 s = input.read(1)
75 if s == b'\0': break
Guido van Rossum5416a0d1997-05-28 16:13:21 +000076 if flag & FHCRC:
Guido van Rossum4117e541998-09-14 16:44:15 +000077 input.read(2) # Read & discard the 16-bit header CRC
Thomas Wouters73e5a5b2006-06-08 15:35:45 +000078
79 decompobj = zlib.decompressobj(-zlib.MAX_WBITS)
Georg Brandlfb3b12d2010-08-02 23:13:24 +000080 crcval = zlib.crc32(b'')
Thomas Wouters73e5a5b2006-06-08 15:35:45 +000081 length = 0
82 while True:
Georg Brandlfb3b12d2010-08-02 23:13:24 +000083 data = input.read(1024)
84 if data == b"":
Thomas Wouters73e5a5b2006-06-08 15:35:45 +000085 break
86 decompdata = decompobj.decompress(data)
87 output.write(decompdata)
88 length += len(decompdata)
89 crcval = zlib.crc32(decompdata, crcval)
90
91 decompdata = decompobj.flush()
92 output.write(decompdata)
93 length += len(decompdata)
94 crcval = zlib.crc32(decompdata, crcval)
Tim Peters182b5ac2004-07-18 06:16:08 +000095
Guido van Rossum5416a0d1997-05-28 16:13:21 +000096 # We've read to the end of the file, so we have to rewind in order
97 # to reread the 8 bytes containing the CRC and the file size. The
98 # decompressor is smart and knows when to stop, so feeding it
Tim Peters182b5ac2004-07-18 06:16:08 +000099 # extra data is harmless.
Guido van Rossum5416a0d1997-05-28 16:13:21 +0000100 input.seek(-8, 2)
Thomas Wouters73e5a5b2006-06-08 15:35:45 +0000101 crc32 = read32(input)
102 isize = read32(input)
103 if crc32 != crcval:
Collin Winter6f2df4d2007-07-17 20:59:35 +0000104 print('CRC check failed.')
Thomas Wouters73e5a5b2006-06-08 15:35:45 +0000105 if isize != length:
Collin Winter6f2df4d2007-07-17 20:59:35 +0000106 print('Incorrect length of data produced')
Guido van Rossum5416a0d1997-05-28 16:13:21 +0000107
Thomas Wouters73e5a5b2006-06-08 15:35:45 +0000108def main():
109 if len(sys.argv)!=2:
Collin Winter6f2df4d2007-07-17 20:59:35 +0000110 print('Usage: minigzip.py <filename>')
111 print(' The file will be compressed or decompressed.')
Thomas Wouters73e5a5b2006-06-08 15:35:45 +0000112 sys.exit(0)
113
114 filename = sys.argv[1]
115 if filename.endswith('.gz'):
116 compressing = False
117 outputname = filename[:-3]
118 else:
119 compressing = True
120 outputname = filename + '.gz'
121
122 input = open(filename, 'rb')
123 output = open(outputname, 'wb')
124
125 if compressing:
126 compress(filename, input, output)
127 else:
128 decompress(input, output)
129
130 input.close()
131 output.close()
132
133if __name__ == '__main__':
134 main()