Benjamin Peterson | 90f5ba5 | 2010-03-11 22:53:45 +0000 | [diff] [blame] | 1 | #!/usr/bin/env python3 |
Guido van Rossum | 5416a0d | 1997-05-28 16:13:21 +0000 | [diff] [blame] | 2 | # Demo program for zlib; it compresses or decompresses files, but *doesn't* |
| 3 | # delete the original. This doesn't support all of gzip's options. |
Thomas Wouters | 73e5a5b | 2006-06-08 15:35:45 +0000 | [diff] [blame] | 4 | # |
| 5 | # The 'gzip' module in the standard library provides a more complete |
| 6 | # implementation of gzip-format files. |
| 7 | |
| 8 | import zlib, sys, os |
Guido van Rossum | 5416a0d | 1997-05-28 16:13:21 +0000 | [diff] [blame] | 9 | |
| 10 | FTEXT, FHCRC, FEXTRA, FNAME, FCOMMENT = 1, 2, 4, 8, 16 |
| 11 | |
| 12 | def write32(output, value): |
Georg Brandl | fb3b12d | 2010-08-02 23:13:24 +0000 | [diff] [blame] | 13 | output.write(bytes([value & 255])) ; value=value // 256 |
| 14 | output.write(bytes([value & 255])) ; value=value // 256 |
| 15 | output.write(bytes([value & 255])) ; value=value // 256 |
| 16 | output.write(bytes([value & 255])) |
Tim Peters | 182b5ac | 2004-07-18 06:16:08 +0000 | [diff] [blame] | 17 | |
Guido van Rossum | 5416a0d | 1997-05-28 16:13:21 +0000 | [diff] [blame] | 18 | def read32(input): |
Thomas Wouters | 73e5a5b | 2006-06-08 15:35:45 +0000 | [diff] [blame] | 19 | v = ord(input.read(1)) |
| 20 | v += (ord(input.read(1)) << 8 ) |
| 21 | v += (ord(input.read(1)) << 16) |
| 22 | v += (ord(input.read(1)) << 24) |
Guido van Rossum | 5416a0d | 1997-05-28 16:13:21 +0000 | [diff] [blame] | 23 | return v |
| 24 | |
Georg Brandl | fb3b12d | 2010-08-02 23:13:24 +0000 | [diff] [blame] | 25 | def compress(filename, input, output): |
| 26 | output.write(b'\037\213\010') # Write the header, ... |
| 27 | output.write(bytes([FNAME])) # ... flag byte ... |
Guido van Rossum | 5416a0d | 1997-05-28 16:13:21 +0000 | [diff] [blame] | 28 | |
Georg Brandl | fb3b12d | 2010-08-02 23:13:24 +0000 | [diff] [blame] | 29 | statval = os.stat(filename) # ... modification time ... |
Thomas Wouters | 73e5a5b | 2006-06-08 15:35:45 +0000 | [diff] [blame] | 30 | mtime = statval[8] |
Guido van Rossum | 5416a0d | 1997-05-28 16:13:21 +0000 | [diff] [blame] | 31 | write32(output, mtime) |
Georg Brandl | fb3b12d | 2010-08-02 23:13:24 +0000 | [diff] [blame] | 32 | output.write(b'\002') # ... slowest compression alg. ... |
| 33 | output.write(b'\377') # ... OS (=unknown) ... |
| 34 | bfilename = filename.encode(sys.getfilesystemencoding()) |
| 35 | output.write(bfilename + b'\000') # ... original filename ... |
Guido van Rossum | 5416a0d | 1997-05-28 16:13:21 +0000 | [diff] [blame] | 36 | |
Georg Brandl | fb3b12d | 2010-08-02 23:13:24 +0000 | [diff] [blame] | 37 | crcval = zlib.crc32(b'') |
Thomas Wouters | 73e5a5b | 2006-06-08 15:35:45 +0000 | [diff] [blame] | 38 | compobj = zlib.compressobj(9, zlib.DEFLATED, -zlib.MAX_WBITS, |
Guido van Rossum | 4117e54 | 1998-09-14 16:44:15 +0000 | [diff] [blame] | 39 | zlib.DEF_MEM_LEVEL, 0) |
Thomas Wouters | 73e5a5b | 2006-06-08 15:35:45 +0000 | [diff] [blame] | 40 | while True: |
| 41 | data = input.read(1024) |
Georg Brandl | fb3b12d | 2010-08-02 23:13:24 +0000 | [diff] [blame] | 42 | if data == b'': |
Thomas Wouters | 73e5a5b | 2006-06-08 15:35:45 +0000 | [diff] [blame] | 43 | break |
| 44 | crcval = zlib.crc32(data, crcval) |
Guido van Rossum | 4117e54 | 1998-09-14 16:44:15 +0000 | [diff] [blame] | 45 | output.write(compobj.compress(data)) |
Guido van Rossum | 5416a0d | 1997-05-28 16:13:21 +0000 | [diff] [blame] | 46 | output.write(compobj.flush()) |
Guido van Rossum | 4117e54 | 1998-09-14 16:44:15 +0000 | [diff] [blame] | 47 | write32(output, crcval) # ... the CRC ... |
| 48 | write32(output, statval[6]) # and the file size. |
Guido van Rossum | 5416a0d | 1997-05-28 16:13:21 +0000 | [diff] [blame] | 49 | |
Georg Brandl | fb3b12d | 2010-08-02 23:13:24 +0000 | [diff] [blame] | 50 | def decompress(input, output): |
Thomas Wouters | 73e5a5b | 2006-06-08 15:35:45 +0000 | [diff] [blame] | 51 | magic = input.read(2) |
Georg Brandl | fb3b12d | 2010-08-02 23:13:24 +0000 | [diff] [blame] | 52 | if magic != b'\037\213': |
Collin Winter | 6f2df4d | 2007-07-17 20:59:35 +0000 | [diff] [blame] | 53 | print('Not a gzipped file') |
Thomas Wouters | 73e5a5b | 2006-06-08 15:35:45 +0000 | [diff] [blame] | 54 | sys.exit(0) |
| 55 | if ord(input.read(1)) != 8: |
Collin Winter | 6f2df4d | 2007-07-17 20:59:35 +0000 | [diff] [blame] | 56 | print('Unknown compression method') |
Thomas Wouters | 73e5a5b | 2006-06-08 15:35:45 +0000 | [diff] [blame] | 57 | sys.exit(0) |
| 58 | flag = ord(input.read(1)) |
Guido van Rossum | 4117e54 | 1998-09-14 16:44:15 +0000 | [diff] [blame] | 59 | input.read(4+1+1) # Discard modification time, |
| 60 | # extra flags, and OS byte. |
Guido van Rossum | 5416a0d | 1997-05-28 16:13:21 +0000 | [diff] [blame] | 61 | if flag & FEXTRA: |
Guido van Rossum | 4117e54 | 1998-09-14 16:44:15 +0000 | [diff] [blame] | 62 | # Read & discard the extra field, if present |
Thomas Wouters | 73e5a5b | 2006-06-08 15:35:45 +0000 | [diff] [blame] | 63 | xlen = ord(input.read(1)) |
| 64 | xlen += 256*ord(input.read(1)) |
Guido van Rossum | 4117e54 | 1998-09-14 16:44:15 +0000 | [diff] [blame] | 65 | input.read(xlen) |
Guido van Rossum | 5416a0d | 1997-05-28 16:13:21 +0000 | [diff] [blame] | 66 | if flag & FNAME: |
Guido van Rossum | 4117e54 | 1998-09-14 16:44:15 +0000 | [diff] [blame] | 67 | # Read and discard a null-terminated string containing the filename |
Thomas Wouters | 73e5a5b | 2006-06-08 15:35:45 +0000 | [diff] [blame] | 68 | while True: |
| 69 | s = input.read(1) |
Georg Brandl | fb3b12d | 2010-08-02 23:13:24 +0000 | [diff] [blame] | 70 | if s == b'\0': break |
Guido van Rossum | 5416a0d | 1997-05-28 16:13:21 +0000 | [diff] [blame] | 71 | if flag & FCOMMENT: |
Guido van Rossum | 4117e54 | 1998-09-14 16:44:15 +0000 | [diff] [blame] | 72 | # Read and discard a null-terminated string containing a comment |
Thomas Wouters | 73e5a5b | 2006-06-08 15:35:45 +0000 | [diff] [blame] | 73 | while True: |
Georg Brandl | fb3b12d | 2010-08-02 23:13:24 +0000 | [diff] [blame] | 74 | s = input.read(1) |
| 75 | if s == b'\0': break |
Guido van Rossum | 5416a0d | 1997-05-28 16:13:21 +0000 | [diff] [blame] | 76 | if flag & FHCRC: |
Guido van Rossum | 4117e54 | 1998-09-14 16:44:15 +0000 | [diff] [blame] | 77 | input.read(2) # Read & discard the 16-bit header CRC |
Thomas Wouters | 73e5a5b | 2006-06-08 15:35:45 +0000 | [diff] [blame] | 78 | |
| 79 | decompobj = zlib.decompressobj(-zlib.MAX_WBITS) |
Georg Brandl | fb3b12d | 2010-08-02 23:13:24 +0000 | [diff] [blame] | 80 | crcval = zlib.crc32(b'') |
Thomas Wouters | 73e5a5b | 2006-06-08 15:35:45 +0000 | [diff] [blame] | 81 | length = 0 |
| 82 | while True: |
Georg Brandl | fb3b12d | 2010-08-02 23:13:24 +0000 | [diff] [blame] | 83 | data = input.read(1024) |
| 84 | if data == b"": |
Thomas Wouters | 73e5a5b | 2006-06-08 15:35:45 +0000 | [diff] [blame] | 85 | break |
| 86 | decompdata = decompobj.decompress(data) |
| 87 | output.write(decompdata) |
| 88 | length += len(decompdata) |
| 89 | crcval = zlib.crc32(decompdata, crcval) |
| 90 | |
| 91 | decompdata = decompobj.flush() |
| 92 | output.write(decompdata) |
| 93 | length += len(decompdata) |
| 94 | crcval = zlib.crc32(decompdata, crcval) |
Tim Peters | 182b5ac | 2004-07-18 06:16:08 +0000 | [diff] [blame] | 95 | |
Guido van Rossum | 5416a0d | 1997-05-28 16:13:21 +0000 | [diff] [blame] | 96 | # We've read to the end of the file, so we have to rewind in order |
| 97 | # to reread the 8 bytes containing the CRC and the file size. The |
| 98 | # decompressor is smart and knows when to stop, so feeding it |
Tim Peters | 182b5ac | 2004-07-18 06:16:08 +0000 | [diff] [blame] | 99 | # extra data is harmless. |
Guido van Rossum | 5416a0d | 1997-05-28 16:13:21 +0000 | [diff] [blame] | 100 | input.seek(-8, 2) |
Thomas Wouters | 73e5a5b | 2006-06-08 15:35:45 +0000 | [diff] [blame] | 101 | crc32 = read32(input) |
| 102 | isize = read32(input) |
| 103 | if crc32 != crcval: |
Collin Winter | 6f2df4d | 2007-07-17 20:59:35 +0000 | [diff] [blame] | 104 | print('CRC check failed.') |
Thomas Wouters | 73e5a5b | 2006-06-08 15:35:45 +0000 | [diff] [blame] | 105 | if isize != length: |
Collin Winter | 6f2df4d | 2007-07-17 20:59:35 +0000 | [diff] [blame] | 106 | print('Incorrect length of data produced') |
Guido van Rossum | 5416a0d | 1997-05-28 16:13:21 +0000 | [diff] [blame] | 107 | |
Thomas Wouters | 73e5a5b | 2006-06-08 15:35:45 +0000 | [diff] [blame] | 108 | def main(): |
| 109 | if len(sys.argv)!=2: |
Collin Winter | 6f2df4d | 2007-07-17 20:59:35 +0000 | [diff] [blame] | 110 | print('Usage: minigzip.py <filename>') |
| 111 | print(' The file will be compressed or decompressed.') |
Thomas Wouters | 73e5a5b | 2006-06-08 15:35:45 +0000 | [diff] [blame] | 112 | sys.exit(0) |
| 113 | |
| 114 | filename = sys.argv[1] |
| 115 | if filename.endswith('.gz'): |
| 116 | compressing = False |
| 117 | outputname = filename[:-3] |
| 118 | else: |
| 119 | compressing = True |
| 120 | outputname = filename + '.gz' |
| 121 | |
| 122 | input = open(filename, 'rb') |
| 123 | output = open(outputname, 'wb') |
| 124 | |
| 125 | if compressing: |
| 126 | compress(filename, input, output) |
| 127 | else: |
| 128 | decompress(input, output) |
| 129 | |
| 130 | input.close() |
| 131 | output.close() |
| 132 | |
| 133 | if __name__ == '__main__': |
| 134 | main() |