Guido van Rossum | 9a4da08 | 1999-03-12 19:07:59 +0000 | [diff] [blame] | 1 | #!/usr/bin/env python |
Guido van Rossum | 5416a0d | 1997-05-28 16:13:21 +0000 | [diff] [blame] | 2 | # Demo program for zlib; it compresses or decompresses files, but *doesn't* |
| 3 | # delete the original. This doesn't support all of gzip's options. |
Thomas Wouters | 73e5a5b | 2006-06-08 15:35:45 +0000 | [diff] [blame] | 4 | # |
| 5 | # The 'gzip' module in the standard library provides a more complete |
| 6 | # implementation of gzip-format files. |
| 7 | |
| 8 | import zlib, sys, os |
Guido van Rossum | 5416a0d | 1997-05-28 16:13:21 +0000 | [diff] [blame] | 9 | |
| 10 | FTEXT, FHCRC, FEXTRA, FNAME, FCOMMENT = 1, 2, 4, 8, 16 |
| 11 | |
| 12 | def write32(output, value): |
Thomas Wouters | 73e5a5b | 2006-06-08 15:35:45 +0000 | [diff] [blame] | 13 | output.write(chr(value & 255)) ; value=value // 256 |
| 14 | output.write(chr(value & 255)) ; value=value // 256 |
| 15 | output.write(chr(value & 255)) ; value=value // 256 |
Guido van Rossum | 5416a0d | 1997-05-28 16:13:21 +0000 | [diff] [blame] | 16 | output.write(chr(value & 255)) |
Tim Peters | 182b5ac | 2004-07-18 06:16:08 +0000 | [diff] [blame] | 17 | |
Guido van Rossum | 5416a0d | 1997-05-28 16:13:21 +0000 | [diff] [blame] | 18 | def read32(input): |
Thomas Wouters | 73e5a5b | 2006-06-08 15:35:45 +0000 | [diff] [blame] | 19 | v = ord(input.read(1)) |
| 20 | v += (ord(input.read(1)) << 8 ) |
| 21 | v += (ord(input.read(1)) << 16) |
| 22 | v += (ord(input.read(1)) << 24) |
Guido van Rossum | 5416a0d | 1997-05-28 16:13:21 +0000 | [diff] [blame] | 23 | return v |
| 24 | |
Thomas Wouters | 73e5a5b | 2006-06-08 15:35:45 +0000 | [diff] [blame] | 25 | def compress (filename, input, output): |
Guido van Rossum | 5416a0d | 1997-05-28 16:13:21 +0000 | [diff] [blame] | 26 | output.write('\037\213\010') # Write the header, ... |
Guido van Rossum | 4117e54 | 1998-09-14 16:44:15 +0000 | [diff] [blame] | 27 | output.write(chr(FNAME)) # ... flag byte ... |
Guido van Rossum | 5416a0d | 1997-05-28 16:13:21 +0000 | [diff] [blame] | 28 | |
Thomas Wouters | 73e5a5b | 2006-06-08 15:35:45 +0000 | [diff] [blame] | 29 | statval = os.stat(filename) # ... modification time ... |
| 30 | mtime = statval[8] |
Guido van Rossum | 5416a0d | 1997-05-28 16:13:21 +0000 | [diff] [blame] | 31 | write32(output, mtime) |
Guido van Rossum | 4117e54 | 1998-09-14 16:44:15 +0000 | [diff] [blame] | 32 | output.write('\002') # ... slowest compression alg. ... |
| 33 | output.write('\377') # ... OS (=unknown) ... |
| 34 | output.write(filename+'\000') # ... original filename ... |
Guido van Rossum | 5416a0d | 1997-05-28 16:13:21 +0000 | [diff] [blame] | 35 | |
Thomas Wouters | 73e5a5b | 2006-06-08 15:35:45 +0000 | [diff] [blame] | 36 | crcval = zlib.crc32("") |
| 37 | compobj = zlib.compressobj(9, zlib.DEFLATED, -zlib.MAX_WBITS, |
Guido van Rossum | 4117e54 | 1998-09-14 16:44:15 +0000 | [diff] [blame] | 38 | zlib.DEF_MEM_LEVEL, 0) |
Thomas Wouters | 73e5a5b | 2006-06-08 15:35:45 +0000 | [diff] [blame] | 39 | while True: |
| 40 | data = input.read(1024) |
| 41 | if data == "": |
| 42 | break |
| 43 | crcval = zlib.crc32(data, crcval) |
Guido van Rossum | 4117e54 | 1998-09-14 16:44:15 +0000 | [diff] [blame] | 44 | output.write(compobj.compress(data)) |
Guido van Rossum | 5416a0d | 1997-05-28 16:13:21 +0000 | [diff] [blame] | 45 | output.write(compobj.flush()) |
Guido van Rossum | 4117e54 | 1998-09-14 16:44:15 +0000 | [diff] [blame] | 46 | write32(output, crcval) # ... the CRC ... |
| 47 | write32(output, statval[6]) # and the file size. |
Guido van Rossum | 5416a0d | 1997-05-28 16:13:21 +0000 | [diff] [blame] | 48 | |
Thomas Wouters | 73e5a5b | 2006-06-08 15:35:45 +0000 | [diff] [blame] | 49 | def decompress (input, output): |
| 50 | magic = input.read(2) |
| 51 | if magic != '\037\213': |
Collin Winter | 6f2df4d | 2007-07-17 20:59:35 +0000 | [diff] [blame] | 52 | print('Not a gzipped file') |
Thomas Wouters | 73e5a5b | 2006-06-08 15:35:45 +0000 | [diff] [blame] | 53 | sys.exit(0) |
| 54 | if ord(input.read(1)) != 8: |
Collin Winter | 6f2df4d | 2007-07-17 20:59:35 +0000 | [diff] [blame] | 55 | print('Unknown compression method') |
Thomas Wouters | 73e5a5b | 2006-06-08 15:35:45 +0000 | [diff] [blame] | 56 | sys.exit(0) |
| 57 | flag = ord(input.read(1)) |
Guido van Rossum | 4117e54 | 1998-09-14 16:44:15 +0000 | [diff] [blame] | 58 | input.read(4+1+1) # Discard modification time, |
| 59 | # extra flags, and OS byte. |
Guido van Rossum | 5416a0d | 1997-05-28 16:13:21 +0000 | [diff] [blame] | 60 | if flag & FEXTRA: |
Guido van Rossum | 4117e54 | 1998-09-14 16:44:15 +0000 | [diff] [blame] | 61 | # Read & discard the extra field, if present |
Thomas Wouters | 73e5a5b | 2006-06-08 15:35:45 +0000 | [diff] [blame] | 62 | xlen = ord(input.read(1)) |
| 63 | xlen += 256*ord(input.read(1)) |
Guido van Rossum | 4117e54 | 1998-09-14 16:44:15 +0000 | [diff] [blame] | 64 | input.read(xlen) |
Guido van Rossum | 5416a0d | 1997-05-28 16:13:21 +0000 | [diff] [blame] | 65 | if flag & FNAME: |
Guido van Rossum | 4117e54 | 1998-09-14 16:44:15 +0000 | [diff] [blame] | 66 | # Read and discard a null-terminated string containing the filename |
Thomas Wouters | 73e5a5b | 2006-06-08 15:35:45 +0000 | [diff] [blame] | 67 | while True: |
| 68 | s = input.read(1) |
| 69 | if s == '\0': break |
Guido van Rossum | 5416a0d | 1997-05-28 16:13:21 +0000 | [diff] [blame] | 70 | if flag & FCOMMENT: |
Guido van Rossum | 4117e54 | 1998-09-14 16:44:15 +0000 | [diff] [blame] | 71 | # Read and discard a null-terminated string containing a comment |
Thomas Wouters | 73e5a5b | 2006-06-08 15:35:45 +0000 | [diff] [blame] | 72 | while True: |
Guido van Rossum | 4117e54 | 1998-09-14 16:44:15 +0000 | [diff] [blame] | 73 | s=input.read(1) |
Thomas Wouters | 73e5a5b | 2006-06-08 15:35:45 +0000 | [diff] [blame] | 74 | if s=='\0': break |
Guido van Rossum | 5416a0d | 1997-05-28 16:13:21 +0000 | [diff] [blame] | 75 | if flag & FHCRC: |
Guido van Rossum | 4117e54 | 1998-09-14 16:44:15 +0000 | [diff] [blame] | 76 | input.read(2) # Read & discard the 16-bit header CRC |
Thomas Wouters | 73e5a5b | 2006-06-08 15:35:45 +0000 | [diff] [blame] | 77 | |
| 78 | decompobj = zlib.decompressobj(-zlib.MAX_WBITS) |
| 79 | crcval = zlib.crc32("") |
| 80 | length = 0 |
| 81 | while True: |
Guido van Rossum | 4117e54 | 1998-09-14 16:44:15 +0000 | [diff] [blame] | 82 | data=input.read(1024) |
Thomas Wouters | 73e5a5b | 2006-06-08 15:35:45 +0000 | [diff] [blame] | 83 | if data == "": |
| 84 | break |
| 85 | decompdata = decompobj.decompress(data) |
| 86 | output.write(decompdata) |
| 87 | length += len(decompdata) |
| 88 | crcval = zlib.crc32(decompdata, crcval) |
| 89 | |
| 90 | decompdata = decompobj.flush() |
| 91 | output.write(decompdata) |
| 92 | length += len(decompdata) |
| 93 | crcval = zlib.crc32(decompdata, crcval) |
Tim Peters | 182b5ac | 2004-07-18 06:16:08 +0000 | [diff] [blame] | 94 | |
Guido van Rossum | 5416a0d | 1997-05-28 16:13:21 +0000 | [diff] [blame] | 95 | # We've read to the end of the file, so we have to rewind in order |
| 96 | # to reread the 8 bytes containing the CRC and the file size. The |
| 97 | # decompressor is smart and knows when to stop, so feeding it |
Tim Peters | 182b5ac | 2004-07-18 06:16:08 +0000 | [diff] [blame] | 98 | # extra data is harmless. |
Guido van Rossum | 5416a0d | 1997-05-28 16:13:21 +0000 | [diff] [blame] | 99 | input.seek(-8, 2) |
Thomas Wouters | 73e5a5b | 2006-06-08 15:35:45 +0000 | [diff] [blame] | 100 | crc32 = read32(input) |
| 101 | isize = read32(input) |
| 102 | if crc32 != crcval: |
Collin Winter | 6f2df4d | 2007-07-17 20:59:35 +0000 | [diff] [blame] | 103 | print('CRC check failed.') |
Thomas Wouters | 73e5a5b | 2006-06-08 15:35:45 +0000 | [diff] [blame] | 104 | if isize != length: |
Collin Winter | 6f2df4d | 2007-07-17 20:59:35 +0000 | [diff] [blame] | 105 | print('Incorrect length of data produced') |
Guido van Rossum | 5416a0d | 1997-05-28 16:13:21 +0000 | [diff] [blame] | 106 | |
Thomas Wouters | 73e5a5b | 2006-06-08 15:35:45 +0000 | [diff] [blame] | 107 | def main(): |
| 108 | if len(sys.argv)!=2: |
Collin Winter | 6f2df4d | 2007-07-17 20:59:35 +0000 | [diff] [blame] | 109 | print('Usage: minigzip.py <filename>') |
| 110 | print(' The file will be compressed or decompressed.') |
Thomas Wouters | 73e5a5b | 2006-06-08 15:35:45 +0000 | [diff] [blame] | 111 | sys.exit(0) |
| 112 | |
| 113 | filename = sys.argv[1] |
| 114 | if filename.endswith('.gz'): |
| 115 | compressing = False |
| 116 | outputname = filename[:-3] |
| 117 | else: |
| 118 | compressing = True |
| 119 | outputname = filename + '.gz' |
| 120 | |
| 121 | input = open(filename, 'rb') |
| 122 | output = open(outputname, 'wb') |
| 123 | |
| 124 | if compressing: |
| 125 | compress(filename, input, output) |
| 126 | else: |
| 127 | decompress(input, output) |
| 128 | |
| 129 | input.close() |
| 130 | output.close() |
| 131 | |
| 132 | if __name__ == '__main__': |
| 133 | main() |