blob: 87fed4ae57abaa0aa74d3f4d55ed63f746adadf9 [file] [log] [blame]
Guido van Rossum9a4da081999-03-12 19:07:59 +00001#!/usr/bin/env python
Guido van Rossum5416a0d1997-05-28 16:13:21 +00002# Demo program for zlib; it compresses or decompresses files, but *doesn't*
3# delete the original. This doesn't support all of gzip's options.
Andrew M. Kuchling0b4e5542006-06-03 23:39:07 +00004#
5# The 'gzip' module in the standard library provides a more complete
6# implementation of gzip-format files.
7
8import zlib, sys, os
Guido van Rossum5416a0d1997-05-28 16:13:21 +00009
10FTEXT, FHCRC, FEXTRA, FNAME, FCOMMENT = 1, 2, 4, 8, 16
11
12def write32(output, value):
Andrew M. Kuchling0b4e5542006-06-03 23:39:07 +000013 output.write(chr(value & 255)) ; value=value // 256
14 output.write(chr(value & 255)) ; value=value // 256
15 output.write(chr(value & 255)) ; value=value // 256
Guido van Rossum5416a0d1997-05-28 16:13:21 +000016 output.write(chr(value & 255))
Tim Peters182b5ac2004-07-18 06:16:08 +000017
Guido van Rossum5416a0d1997-05-28 16:13:21 +000018def read32(input):
Andrew M. Kuchling0b4e5542006-06-03 23:39:07 +000019 v = ord(input.read(1))
20 v += (ord(input.read(1)) << 8 )
21 v += (ord(input.read(1)) << 16)
22 v += (ord(input.read(1)) << 24)
Guido van Rossum5416a0d1997-05-28 16:13:21 +000023 return v
24
Andrew M. Kuchling0b4e5542006-06-03 23:39:07 +000025def compress (filename, input, output):
Guido van Rossum5416a0d1997-05-28 16:13:21 +000026 output.write('\037\213\010') # Write the header, ...
Guido van Rossum4117e541998-09-14 16:44:15 +000027 output.write(chr(FNAME)) # ... flag byte ...
Guido van Rossum5416a0d1997-05-28 16:13:21 +000028
Andrew M. Kuchling0b4e5542006-06-03 23:39:07 +000029 statval = os.stat(filename) # ... modification time ...
30 mtime = statval[8]
Guido van Rossum5416a0d1997-05-28 16:13:21 +000031 write32(output, mtime)
Guido van Rossum4117e541998-09-14 16:44:15 +000032 output.write('\002') # ... slowest compression alg. ...
33 output.write('\377') # ... OS (=unknown) ...
34 output.write(filename+'\000') # ... original filename ...
Guido van Rossum5416a0d1997-05-28 16:13:21 +000035
Andrew M. Kuchling0b4e5542006-06-03 23:39:07 +000036 crcval = zlib.crc32("")
37 compobj = zlib.compressobj(9, zlib.DEFLATED, -zlib.MAX_WBITS,
Guido van Rossum4117e541998-09-14 16:44:15 +000038 zlib.DEF_MEM_LEVEL, 0)
Andrew M. Kuchling0b4e5542006-06-03 23:39:07 +000039 while True:
40 data = input.read(1024)
41 if data == "":
42 break
43 crcval = zlib.crc32(data, crcval)
Guido van Rossum4117e541998-09-14 16:44:15 +000044 output.write(compobj.compress(data))
Guido van Rossum5416a0d1997-05-28 16:13:21 +000045 output.write(compobj.flush())
Guido van Rossum4117e541998-09-14 16:44:15 +000046 write32(output, crcval) # ... the CRC ...
47 write32(output, statval[6]) # and the file size.
Guido van Rossum5416a0d1997-05-28 16:13:21 +000048
Andrew M. Kuchling0b4e5542006-06-03 23:39:07 +000049def decompress (input, output):
50 magic = input.read(2)
51 if magic != '\037\213':
52 print 'Not a gzipped file'
53 sys.exit(0)
54 if ord(input.read(1)) != 8:
55 print 'Unknown compression method'
56 sys.exit(0)
57 flag = ord(input.read(1))
Guido van Rossum4117e541998-09-14 16:44:15 +000058 input.read(4+1+1) # Discard modification time,
59 # extra flags, and OS byte.
Guido van Rossum5416a0d1997-05-28 16:13:21 +000060 if flag & FEXTRA:
Guido van Rossum4117e541998-09-14 16:44:15 +000061 # Read & discard the extra field, if present
Andrew M. Kuchling0b4e5542006-06-03 23:39:07 +000062 xlen = ord(input.read(1))
63 xlen += 256*ord(input.read(1))
Guido van Rossum4117e541998-09-14 16:44:15 +000064 input.read(xlen)
Guido van Rossum5416a0d1997-05-28 16:13:21 +000065 if flag & FNAME:
Guido van Rossum4117e541998-09-14 16:44:15 +000066 # Read and discard a null-terminated string containing the filename
Andrew M. Kuchling0b4e5542006-06-03 23:39:07 +000067 while True:
68 s = input.read(1)
69 if s == '\0': break
Guido van Rossum5416a0d1997-05-28 16:13:21 +000070 if flag & FCOMMENT:
Guido van Rossum4117e541998-09-14 16:44:15 +000071 # Read and discard a null-terminated string containing a comment
Andrew M. Kuchling0b4e5542006-06-03 23:39:07 +000072 while True:
Guido van Rossum4117e541998-09-14 16:44:15 +000073 s=input.read(1)
Andrew M. Kuchling0b4e5542006-06-03 23:39:07 +000074 if s=='\0': break
Guido van Rossum5416a0d1997-05-28 16:13:21 +000075 if flag & FHCRC:
Guido van Rossum4117e541998-09-14 16:44:15 +000076 input.read(2) # Read & discard the 16-bit header CRC
Andrew M. Kuchling0b4e5542006-06-03 23:39:07 +000077
78 decompobj = zlib.decompressobj(-zlib.MAX_WBITS)
79 crcval = zlib.crc32("")
80 length = 0
81 while True:
Guido van Rossum4117e541998-09-14 16:44:15 +000082 data=input.read(1024)
Andrew M. Kuchling0b4e5542006-06-03 23:39:07 +000083 if data == "":
84 break
85 decompdata = decompobj.decompress(data)
86 output.write(decompdata)
87 length += len(decompdata)
88 crcval = zlib.crc32(decompdata, crcval)
89
90 decompdata = decompobj.flush()
91 output.write(decompdata)
92 length += len(decompdata)
93 crcval = zlib.crc32(decompdata, crcval)
Tim Peters182b5ac2004-07-18 06:16:08 +000094
Guido van Rossum5416a0d1997-05-28 16:13:21 +000095 # We've read to the end of the file, so we have to rewind in order
96 # to reread the 8 bytes containing the CRC and the file size. The
97 # decompressor is smart and knows when to stop, so feeding it
Tim Peters182b5ac2004-07-18 06:16:08 +000098 # extra data is harmless.
Guido van Rossum5416a0d1997-05-28 16:13:21 +000099 input.seek(-8, 2)
Andrew M. Kuchling0b4e5542006-06-03 23:39:07 +0000100 crc32 = read32(input)
101 isize = read32(input)
102 if crc32 != crcval:
103 print 'CRC check failed.'
104 if isize != length:
105 print 'Incorrect length of data produced'
Guido van Rossum5416a0d1997-05-28 16:13:21 +0000106
Andrew M. Kuchling0b4e5542006-06-03 23:39:07 +0000107def main():
108 if len(sys.argv)!=2:
109 print 'Usage: minigzip.py <filename>'
110 print ' The file will be compressed or decompressed.'
111 sys.exit(0)
112
113 filename = sys.argv[1]
114 if filename.endswith('.gz'):
115 compressing = False
116 outputname = filename[:-3]
117 else:
118 compressing = True
119 outputname = filename + '.gz'
120
121 input = open(filename, 'rb')
122 output = open(outputname, 'wb')
123
124 if compressing:
125 compress(filename, input, output)
126 else:
127 decompress(input, output)
128
129 input.close()
130 output.close()
131
132if __name__ == '__main__':
133 main()