blob: 78ee1d3c1526834ce912b9d53590d1342fbd138b [file] [log] [blame]
Erik Andersene49d5ec2000-02-08 19:58:47 +00001/* vi: set sw=4 ts=4: */
Erik Andersen61677fe2000-04-13 01:18:56 +00002/*
3 * Gzip implementation for busybox
4 *
5 * Based on GNU gzip Copyright (C) 1992-1993 Jean-loup Gailly.
6 *
7 * Originally adjusted for busybox by Sven Rudolph <sr1@inf.tu-dresden.de>
8 * based on gzip sources
9 *
10 * Adjusted further by Erik Andersen <andersen@lineo.com>, <andersee@debian.org>
11 * to support files as well as stdin/stdout, and to generally behave itself wrt
12 * command line handling.
13 *
14 * This program is free software; you can redistribute it and/or modify
15 * it under the terms of the GNU General Public License as published by
16 * the Free Software Foundation; either version 2 of the License, or
17 * (at your option) any later version.
18 *
19 * This program is distributed in the hope that it will be useful,
20 * but WITHOUT ANY WARRANTY; without even the implied warranty of
21 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22 * General Public License for more details.
23 *
24 * You should have received a copy of the GNU General Public License
25 * along with this program; if not, write to the Free Software
26 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
27 *
28 */
Eric Andersenb052b471999-11-18 00:21:59 +000029
30#include "internal.h"
Erik Andersen7ab9c7e2000-05-12 19:41:47 +000031
Eric Andersenb052b471999-11-18 00:21:59 +000032static const char gunzip_usage[] =
Erik Andersen7ab9c7e2000-05-12 19:41:47 +000033 "gunzip [OPTION]... FILE\n"
34#ifndef BB_FEATURE_TRIVIAL_HELP
35 "\nUncompress FILE (or standard input if FILE is '-').\n\n"
Erik Andersene49d5ec2000-02-08 19:58:47 +000036 "Options:\n"
37
38 "\t-c\tWrite output to standard output\n"
Erik Andersen7ab9c7e2000-05-12 19:41:47 +000039 "\t-t\tTest compressed file integrity\n"
40#endif
41 ;
Eric Andersenb052b471999-11-18 00:21:59 +000042
Erik Andersen61677fe2000-04-13 01:18:56 +000043
44 /* These defines are very important for BusyBox. Without these,
45 * huge chunks of ram are pre-allocated making the BusyBox bss
46 * size Freaking Huge(tm), which is a bad thing.*/
47#define SMALL_MEM
48#define DYN_ALLOC
49
Erik Andersen61677fe2000-04-13 01:18:56 +000050#define BB_DECLARE_EXTERN
Erik Andersen7ab9c7e2000-05-12 19:41:47 +000051#define bb_need_memory_exhausted
52#define bb_need_name_too_long
Erik Andersen61677fe2000-04-13 01:18:56 +000053#include "messages.c"
54
55
Eric Andersenb052b471999-11-18 00:21:59 +000056/* gzip (GNU zip) -- compress files with zip algorithm and 'compress' interface
57 * Copyright (C) 1992-1993 Jean-loup Gailly
58 * The unzip code was written and put in the public domain by Mark Adler.
59 * Portions of the lzw code are derived from the public domain 'compress'
60 * written by Spencer Thomas, Joe Orost, James Woods, Jim McKie, Steve Davies,
61 * Ken Turkowski, Dave Mack and Peter Jannesen.
62 *
63 * See the license_msg below and the file COPYING for the software license.
64 * See the file algorithm.doc for the compression algorithms and file formats.
65 */
66
67#if 0
Erik Andersene49d5ec2000-02-08 19:58:47 +000068static char *license_msg[] = {
69 " Copyright (C) 1992-1993 Jean-loup Gailly",
70 " This program is free software; you can redistribute it and/or modify",
71 " it under the terms of the GNU General Public License as published by",
72 " the Free Software Foundation; either version 2, or (at your option)",
73 " any later version.",
74 "",
75 " This program is distributed in the hope that it will be useful,",
76 " but WITHOUT ANY WARRANTY; without even the implied warranty of",
77 " MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the",
78 " GNU General Public License for more details.",
79 "",
80 " You should have received a copy of the GNU General Public License",
81 " along with this program; if not, write to the Free Software",
82 " Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.",
83 0
84};
Eric Andersenb052b471999-11-18 00:21:59 +000085#endif
86
87/* Compress files with zip algorithm and 'compress' interface.
88 * See usage() and help() functions below for all options.
89 * Outputs:
90 * file.gz: compressed file with same mode, owner, and utimes
91 * or stdout with -c option or if stdin used as input.
92 * If the output file name had to be truncated, the original name is kept
93 * in the compressed file.
94 * On MSDOS, file.tmp -> file.tmz. On VMS, file.tmp -> file.tmp-gz.
95 *
96 * Using gz on MSDOS would create too many file name conflicts. For
97 * example, foo.txt -> foo.tgz (.tgz must be reserved as shorthand for
98 * tar.gz). Similarly, foo.dir and foo.doc would both be mapped to foo.dgz.
99 * I also considered 12345678.txt -> 12345txt.gz but this truncates the name
100 * too heavily. There is no ideal solution given the MSDOS 8+3 limitation.
101 *
102 * For the meaning of all compilation flags, see comments in Makefile.in.
103 */
104
105#include <ctype.h>
106#include <sys/types.h>
107#include <signal.h>
108#include <sys/stat.h>
109#include <errno.h>
110
111/* #include "tailor.h" */
112
113/* tailor.h -- target dependent definitions
114 * Copyright (C) 1992-1993 Jean-loup Gailly.
115 * This is free software; you can redistribute it and/or modify it under the
116 * terms of the GNU General Public License, see the file COPYING.
117 */
118
119/* The target dependent definitions should be defined here only.
120 * The target dependent functions should be defined in tailor.c.
121 */
122
123#define RECORD_IO 0
124
125#define get_char() get_byte()
126#define put_char(c) put_byte(c)
127
Eric Andersenb052b471999-11-18 00:21:59 +0000128
129/* I don't like nested includes, but the string and io functions are used
130 * too often
131 */
132#include <stdio.h>
133#if !defined(NO_STRING_H) || defined(STDC_HEADERS)
134# include <string.h>
135# if !defined(STDC_HEADERS) && !defined(NO_MEMORY_H) && !defined(__GNUC__)
136# include <memory.h>
137# endif
Erik Andersen61677fe2000-04-13 01:18:56 +0000138# define memzero(s, n) memset ((void *)(s), 0, (n))
Eric Andersenb052b471999-11-18 00:21:59 +0000139#else
140# include <strings.h>
Erik Andersene49d5ec2000-02-08 19:58:47 +0000141# define strchr index
Eric Andersenb052b471999-11-18 00:21:59 +0000142# define strrchr rindex
Erik Andersene49d5ec2000-02-08 19:58:47 +0000143# define memcpy(d, s, n) bcopy((s), (d), (n))
144# define memcmp(s1, s2, n) bcmp((s1), (s2), (n))
Eric Andersenb052b471999-11-18 00:21:59 +0000145# define memzero(s, n) bzero((s), (n))
146#endif
147
148#ifndef RETSIGTYPE
149# define RETSIGTYPE void
150#endif
151
152#define local static
153
Erik Andersene49d5ec2000-02-08 19:58:47 +0000154typedef unsigned char uch;
Eric Andersenb052b471999-11-18 00:21:59 +0000155typedef unsigned short ush;
Erik Andersene49d5ec2000-02-08 19:58:47 +0000156typedef unsigned long ulg;
Eric Andersenb052b471999-11-18 00:21:59 +0000157
158/* Return codes from gzip */
159#define OK 0
160#define ERROR 1
161#define WARNING 2
162
163/* Compression methods (see algorithm.doc) */
164#define DEFLATED 8
165
Erik Andersene49d5ec2000-02-08 19:58:47 +0000166extern int method; /* compression method */
Eric Andersenb052b471999-11-18 00:21:59 +0000167
168/* To save memory for 16 bit systems, some arrays are overlaid between
169 * the various modules:
170 * deflate: prev+head window d_buf l_buf outbuf
171 * unlzw: tab_prefix tab_suffix stack inbuf outbuf
172 * inflate: window inbuf
173 * unpack: window inbuf prefix_len
174 * unlzh: left+right window c_table inbuf c_len
175 * For compression, input is done in window[]. For decompression, output
176 * is done in window except for unlzw.
177 */
178
179#ifndef INBUFSIZ
180# ifdef SMALL_MEM
Erik Andersene49d5ec2000-02-08 19:58:47 +0000181# define INBUFSIZ 0x2000 /* input buffer size */
Eric Andersenb052b471999-11-18 00:21:59 +0000182# else
Erik Andersene49d5ec2000-02-08 19:58:47 +0000183# define INBUFSIZ 0x8000 /* input buffer size */
Eric Andersenb052b471999-11-18 00:21:59 +0000184# endif
185#endif
Erik Andersene49d5ec2000-02-08 19:58:47 +0000186#define INBUF_EXTRA 64 /* required by unlzw() */
Eric Andersenb052b471999-11-18 00:21:59 +0000187
188#ifndef OUTBUFSIZ
189# ifdef SMALL_MEM
Erik Andersene49d5ec2000-02-08 19:58:47 +0000190# define OUTBUFSIZ 8192 /* output buffer size */
Eric Andersenb052b471999-11-18 00:21:59 +0000191# else
Erik Andersene49d5ec2000-02-08 19:58:47 +0000192# define OUTBUFSIZ 16384 /* output buffer size */
Eric Andersenb052b471999-11-18 00:21:59 +0000193# endif
194#endif
Erik Andersene49d5ec2000-02-08 19:58:47 +0000195#define OUTBUF_EXTRA 2048 /* required by unlzw() */
Eric Andersenb052b471999-11-18 00:21:59 +0000196
197#define SMALL_MEM
198
199#ifndef DIST_BUFSIZE
200# ifdef SMALL_MEM
Erik Andersene49d5ec2000-02-08 19:58:47 +0000201# define DIST_BUFSIZE 0x2000 /* buffer for distances, see trees.c */
Eric Andersenb052b471999-11-18 00:21:59 +0000202# else
Erik Andersene49d5ec2000-02-08 19:58:47 +0000203# define DIST_BUFSIZE 0x8000 /* buffer for distances, see trees.c */
Eric Andersenb052b471999-11-18 00:21:59 +0000204# endif
205#endif
206
207/*#define DYN_ALLOC*/
208
209#ifdef DYN_ALLOC
210# define EXTERN(type, array) extern type * array
211# define DECLARE(type, array, size) type * array
212# define ALLOC(type, array, size) { \
213 array = (type*)calloc((size_t)(((size)+1L)/2), 2*sizeof(type)); \
Erik Andersen7ab9c7e2000-05-12 19:41:47 +0000214 if (array == NULL) errorMsg(memory_exhausted, "gunzip"); \
Eric Andersenb052b471999-11-18 00:21:59 +0000215 }
216# define FREE(array) {if (array != NULL) free(array), array=NULL;}
217#else
218# define EXTERN(type, array) extern type array[]
219# define DECLARE(type, array, size) type array[size]
220# define ALLOC(type, array, size)
221# define FREE(array)
222#endif
223
Erik Andersene49d5ec2000-02-08 19:58:47 +0000224EXTERN(uch, inbuf); /* input buffer */
225EXTERN(uch, outbuf); /* output buffer */
226EXTERN(ush, d_buf); /* buffer for distances, see trees.c */
227EXTERN(uch, window); /* Sliding window and suffix table (unlzw) */
Eric Andersenb052b471999-11-18 00:21:59 +0000228#define tab_suffix window
229#ifndef MAXSEG_64K
Erik Andersene49d5ec2000-02-08 19:58:47 +0000230# define tab_prefix prev /* hash link (see deflate.c) */
231# define head (prev+WSIZE) /* hash head (see deflate.c) */
232EXTERN(ush, tab_prefix); /* prefix code (see unlzw.c) */
Eric Andersenb052b471999-11-18 00:21:59 +0000233#else
234# define tab_prefix0 prev
235# define head tab_prefix1
Erik Andersene49d5ec2000-02-08 19:58:47 +0000236EXTERN(ush, tab_prefix0); /* prefix for even codes */
237EXTERN(ush, tab_prefix1); /* prefix for odd codes */
Eric Andersenb052b471999-11-18 00:21:59 +0000238#endif
239
Erik Andersene49d5ec2000-02-08 19:58:47 +0000240extern unsigned insize; /* valid bytes in inbuf */
241extern unsigned inptr; /* index of next byte to be processed in inbuf */
242extern unsigned outcnt; /* bytes in output buffer */
Eric Andersenb052b471999-11-18 00:21:59 +0000243
Erik Andersene49d5ec2000-02-08 19:58:47 +0000244extern long bytes_in; /* number of input bytes */
245extern long bytes_out; /* number of output bytes */
246extern long header_bytes; /* number of bytes in gzip header */
Eric Andersenb052b471999-11-18 00:21:59 +0000247
Erik Andersene49d5ec2000-02-08 19:58:47 +0000248extern long ifile_size; /* input file size, -1 for devices (debug only) */
Eric Andersenb052b471999-11-18 00:21:59 +0000249
Erik Andersene49d5ec2000-02-08 19:58:47 +0000250typedef int file_t; /* Do not use stdio */
251
252#define NO_FILE (-1) /* in memory compression */
Eric Andersenb052b471999-11-18 00:21:59 +0000253
254
Erik Andersene49d5ec2000-02-08 19:58:47 +0000255#define GZIP_MAGIC "\037\213" /* Magic header for gzip files, 1F 8B */
Eric Andersenb052b471999-11-18 00:21:59 +0000256
257/* gzip flag byte */
Erik Andersene49d5ec2000-02-08 19:58:47 +0000258#define ASCII_FLAG 0x01 /* bit 0 set: file probably ascii text */
259#define CONTINUATION 0x02 /* bit 1 set: continuation of multi-part gzip file */
260#define EXTRA_FIELD 0x04 /* bit 2 set: extra field present */
261#define ORIG_NAME 0x08 /* bit 3 set: original file name present */
262#define COMMENT 0x10 /* bit 4 set: file comment present */
263#define ENCRYPTED 0x20 /* bit 5 set: file is encrypted */
264#define RESERVED 0xC0 /* bit 6,7: reserved */
Eric Andersenb052b471999-11-18 00:21:59 +0000265
266#ifndef WSIZE
Erik Andersene49d5ec2000-02-08 19:58:47 +0000267# define WSIZE 0x8000 /* window size--must be a power of two, and */
268#endif /* at least 32K for zip's deflate method */
Eric Andersenb052b471999-11-18 00:21:59 +0000269
270#define MIN_MATCH 3
271#define MAX_MATCH 258
272/* The minimum and maximum match lengths */
273
274#define MIN_LOOKAHEAD (MAX_MATCH+MIN_MATCH+1)
275/* Minimum amount of lookahead, except at the end of the input file.
276 * See deflate.c for comments about the MIN_MATCH+1.
277 */
278
279#define MAX_DIST (WSIZE-MIN_LOOKAHEAD)
280/* In order to simplify the code, particularly on 16 bit machines, match
281 * distances are limited to MAX_DIST instead of WSIZE.
282 */
283
Erik Andersene49d5ec2000-02-08 19:58:47 +0000284extern int exit_code; /* program exit code */
285extern int verbose; /* be verbose (-v) */
286extern int level; /* compression level */
287extern int test; /* check .z file integrity */
288extern int save_orig_name; /* set if original name must be saved */
Eric Andersenb052b471999-11-18 00:21:59 +0000289
290#define get_byte() (inptr < insize ? inbuf[inptr++] : fill_inbuf(0))
291#define try_byte() (inptr < insize ? inbuf[inptr++] : fill_inbuf(1))
292
293/* put_byte is used for the compressed output, put_ubyte for the
294 * uncompressed output. However unlzw() uses window for its
295 * suffix table instead of its output buffer, so it does not use put_ubyte
296 * (to be cleaned up).
297 */
298#define put_byte(c) {outbuf[outcnt++]=(uch)(c); if (outcnt==OUTBUFSIZ)\
299 flush_outbuf();}
300#define put_ubyte(c) {window[outcnt++]=(uch)(c); if (outcnt==WSIZE)\
301 flush_window();}
302
303/* Output a 16 bit value, lsb first */
304#define put_short(w) \
305{ if (outcnt < OUTBUFSIZ-2) { \
306 outbuf[outcnt++] = (uch) ((w) & 0xff); \
307 outbuf[outcnt++] = (uch) ((ush)(w) >> 8); \
308 } else { \
309 put_byte((uch)((w) & 0xff)); \
310 put_byte((uch)((ush)(w) >> 8)); \
311 } \
312}
313
314/* Output a 32 bit value to the bit stream, lsb first */
315#define put_long(n) { \
316 put_short((n) & 0xffff); \
317 put_short(((ulg)(n)) >> 16); \
318}
319
Erik Andersene49d5ec2000-02-08 19:58:47 +0000320#define seekable() 0 /* force sequential output */
321#define translate_eol 0 /* no option -a yet */
Eric Andersenb052b471999-11-18 00:21:59 +0000322
Erik Andersene49d5ec2000-02-08 19:58:47 +0000323#define tolow(c) (isupper(c) ? (c)-'A'+'a' : (c)) /* force to lower case */
Eric Andersenb052b471999-11-18 00:21:59 +0000324
325/* Macros for getting two-byte and four-byte header values */
326#define SH(p) ((ush)(uch)((p)[0]) | ((ush)(uch)((p)[1]) << 8))
327#define LG(p) ((ulg)(SH(p)) | ((ulg)(SH((p)+2)) << 16))
328
329/* Diagnostic functions */
330#ifdef DEBUG
Erik Andersen9ffdaa62000-02-11 21:55:04 +0000331# define Assert(cond,msg) {if(!(cond)) errorMsg(msg);}
Eric Andersenb052b471999-11-18 00:21:59 +0000332# define Trace(x) fprintf x
333# define Tracev(x) {if (verbose) fprintf x ;}
334# define Tracevv(x) {if (verbose>1) fprintf x ;}
335# define Tracec(c,x) {if (verbose && (c)) fprintf x ;}
336# define Tracecv(c,x) {if (verbose>1 && (c)) fprintf x ;}
337#else
338# define Assert(cond,msg)
339# define Trace(x)
340# define Tracev(x)
341# define Tracevv(x)
342# define Tracec(c,x)
343# define Tracecv(c,x)
344#endif
345
346#define WARN(msg) {fprintf msg ; \
347 if (exit_code == OK) exit_code = WARNING;}
348
349 /* in unzip.c */
Erik Andersen61677fe2000-04-13 01:18:56 +0000350extern int unzip (int in, int out);
Eric Andersenb052b471999-11-18 00:21:59 +0000351
352 /* in gzip.c */
Erik Andersen61677fe2000-04-13 01:18:56 +0000353RETSIGTYPE abort_gzip (void);
Eric Andersenb052b471999-11-18 00:21:59 +0000354
Erik Andersene49d5ec2000-02-08 19:58:47 +0000355 /* in deflate.c */
Erik Andersen61677fe2000-04-13 01:18:56 +0000356void lm_init (int pack_level, ush * flags);
357ulg deflate (void);
Eric Andersenb052b471999-11-18 00:21:59 +0000358
Erik Andersene49d5ec2000-02-08 19:58:47 +0000359 /* in trees.c */
Erik Andersen61677fe2000-04-13 01:18:56 +0000360void ct_init (ush * attr, int *method);
361int ct_tally (int dist, int lc);
362ulg flush_block (char *buf, ulg stored_len, int eof);
Eric Andersenb052b471999-11-18 00:21:59 +0000363
Erik Andersene49d5ec2000-02-08 19:58:47 +0000364 /* in bits.c */
Erik Andersen61677fe2000-04-13 01:18:56 +0000365void bi_init (file_t zipfile);
366void send_bits (int value, int length);
367unsigned bi_reverse (unsigned value, int length);
368void bi_windup (void);
369void copy_block (char *buf, unsigned len, int header);
Eric Andersenb052b471999-11-18 00:21:59 +0000370
371 /* in util.c: */
Erik Andersen61677fe2000-04-13 01:18:56 +0000372extern ulg updcrc (uch * s, unsigned n);
373extern void clear_bufs (void);
Erik Andersen330fd2b2000-05-19 05:35:19 +0000374static int fill_inbuf (int eof_ok);
Erik Andersen61677fe2000-04-13 01:18:56 +0000375extern void flush_outbuf (void);
Erik Andersen330fd2b2000-05-19 05:35:19 +0000376static void flush_window (void);
Erik Andersen61677fe2000-04-13 01:18:56 +0000377extern void write_buf (int fd, void * buf, unsigned cnt);
Erik Andersene49d5ec2000-02-08 19:58:47 +0000378
Eric Andersenb052b471999-11-18 00:21:59 +0000379#ifndef __linux__
Erik Andersen330fd2b2000-05-19 05:35:19 +0000380static char *basename (char *fname);
Erik Andersene49d5ec2000-02-08 19:58:47 +0000381#endif /* not __linux__ */
Erik Andersen330fd2b2000-05-19 05:35:19 +0000382void read_error_msg (void);
383void write_error_msg (void);
Eric Andersenb052b471999-11-18 00:21:59 +0000384
385 /* in inflate.c */
Erik Andersen330fd2b2000-05-19 05:35:19 +0000386static int inflate (void);
Eric Andersenb052b471999-11-18 00:21:59 +0000387
388/* #include "lzw.h" */
389
390/* lzw.h -- define the lzw functions.
391 * Copyright (C) 1992-1993 Jean-loup Gailly.
392 * This is free software; you can redistribute it and/or modify it under the
393 * terms of the GNU General Public License, see the file COPYING.
394 */
395
396#if !defined(OF) && defined(lint)
397# include "gzip.h"
398#endif
399
400#ifndef BITS
401# define BITS 16
402#endif
Erik Andersene49d5ec2000-02-08 19:58:47 +0000403#define INIT_BITS 9 /* Initial number of bits per code */
Eric Andersenb052b471999-11-18 00:21:59 +0000404
Erik Andersene49d5ec2000-02-08 19:58:47 +0000405#define LZW_MAGIC "\037\235" /* Magic header for lzw files, 1F 9D */
Eric Andersenb052b471999-11-18 00:21:59 +0000406
Erik Andersene49d5ec2000-02-08 19:58:47 +0000407#define BIT_MASK 0x1f /* Mask for 'number of compression bits' */
Eric Andersenb052b471999-11-18 00:21:59 +0000408/* Mask 0x20 is reserved to mean a fourth header byte, and 0x40 is free.
409 * It's a pity that old uncompress does not check bit 0x20. That makes
410 * extension of the format actually undesirable because old compress
411 * would just crash on the new format instead of giving a meaningful
412 * error message. It does check the number of bits, but it's more
413 * helpful to say "unsupported format, get a new version" than
414 * "can only handle 16 bits".
415 */
416
417#define BLOCK_MODE 0x80
418/* Block compression: if table is full and compression rate is dropping,
419 * clear the dictionary.
420 */
421
Erik Andersene49d5ec2000-02-08 19:58:47 +0000422#define LZW_RESERVED 0x60 /* reserved bits */
Eric Andersenb052b471999-11-18 00:21:59 +0000423
Erik Andersene49d5ec2000-02-08 19:58:47 +0000424#define CLEAR 256 /* flush the dictionary */
425#define FIRST (CLEAR+1) /* first free entry */
Eric Andersenb052b471999-11-18 00:21:59 +0000426
Erik Andersene49d5ec2000-02-08 19:58:47 +0000427extern int maxbits; /* max bits per code for LZW */
428extern int block_mode; /* block compress mode -C compatible with 2.0 */
Eric Andersenb052b471999-11-18 00:21:59 +0000429
Erik Andersen61677fe2000-04-13 01:18:56 +0000430extern int lzw (int in, int out);
431extern int unlzw (int in, int out);
Eric Andersenb052b471999-11-18 00:21:59 +0000432
433
434/* #include "revision.h" */
435
436/* revision.h -- define the version number
437 * Copyright (C) 1992-1993 Jean-loup Gailly.
438 * This is free software; you can redistribute it and/or modify it under the
439 * terms of the GNU General Public License, see the file COPYING.
440 */
441
442#define VERSION "1.2.4"
443#define PATCHLEVEL 0
444#define REVDATE "18 Aug 93"
445
446/* This version does not support compression into old compress format: */
447#ifdef LZW
448# undef LZW
449#endif
450
Eric Andersenb052b471999-11-18 00:21:59 +0000451#include <time.h>
452#include <fcntl.h>
453#include <unistd.h>
Eric Andersenb052b471999-11-18 00:21:59 +0000454#include <stdlib.h>
Eric Andersenb052b471999-11-18 00:21:59 +0000455#if defined(DIRENT)
456# include <dirent.h>
Erik Andersene49d5ec2000-02-08 19:58:47 +0000457typedef struct dirent dir_type;
458
Eric Andersenb052b471999-11-18 00:21:59 +0000459# define NLENGTH(dirent) ((int)strlen((dirent)->d_name))
460# define DIR_OPT "DIRENT"
461#else
462# define NLENGTH(dirent) ((dirent)->d_namlen)
463# ifdef SYSDIR
464# include <sys/dir.h>
Erik Andersene49d5ec2000-02-08 19:58:47 +0000465typedef struct direct dir_type;
466
Eric Andersenb052b471999-11-18 00:21:59 +0000467# define DIR_OPT "SYSDIR"
468# else
469# ifdef SYSNDIR
470# include <sys/ndir.h>
Erik Andersene49d5ec2000-02-08 19:58:47 +0000471typedef struct direct dir_type;
472
Eric Andersenb052b471999-11-18 00:21:59 +0000473# define DIR_OPT "SYSNDIR"
474# else
475# ifdef NDIR
476# include <ndir.h>
Erik Andersene49d5ec2000-02-08 19:58:47 +0000477typedef struct direct dir_type;
478
Eric Andersenb052b471999-11-18 00:21:59 +0000479# define DIR_OPT "NDIR"
480# else
481# define NO_DIR
482# define DIR_OPT "NO_DIR"
483# endif
484# endif
485# endif
486#endif
Eric Andersenb052b471999-11-18 00:21:59 +0000487#if !defined(S_ISDIR) && defined(S_IFDIR)
488# define S_ISDIR(m) (((m) & S_IFMT) == S_IFDIR)
489#endif
490#if !defined(S_ISREG) && defined(S_IFREG)
491# define S_ISREG(m) (((m) & S_IFMT) == S_IFREG)
492#endif
Erik Andersen61677fe2000-04-13 01:18:56 +0000493typedef RETSIGTYPE(*sig_type) (int);
Eric Andersenb052b471999-11-18 00:21:59 +0000494
495#ifndef O_BINARY
Erik Andersene49d5ec2000-02-08 19:58:47 +0000496# define O_BINARY 0 /* creation mode for open() */
Eric Andersenb052b471999-11-18 00:21:59 +0000497#endif
498
499#ifndef O_CREAT
500 /* Pure BSD system? */
501# include <sys/file.h>
502# ifndef O_CREAT
503# define O_CREAT FCREAT
504# endif
505# ifndef O_EXCL
506# define O_EXCL FEXCL
507# endif
508#endif
509
510#ifndef S_IRUSR
511# define S_IRUSR 0400
512#endif
513#ifndef S_IWUSR
514# define S_IWUSR 0200
515#endif
Erik Andersene49d5ec2000-02-08 19:58:47 +0000516#define RW_USER (S_IRUSR | S_IWUSR) /* creation mode for open() */
Eric Andersenb052b471999-11-18 00:21:59 +0000517
Erik Andersene49d5ec2000-02-08 19:58:47 +0000518#ifndef MAX_PATH_LEN /* max pathname length */
Erik Andersen4f3f7572000-04-28 00:18:56 +0000519# ifdef BUFSIZ
520# define MAX_PATH_LEN BUFSIZ
Erik Andersenfac10d72000-02-07 05:29:42 +0000521# else
522# define MAX_PATH_LEN 1024
523# endif
Eric Andersenb052b471999-11-18 00:21:59 +0000524#endif
525
526#ifndef SEEK_END
527# define SEEK_END 2
528#endif
529
530#ifdef NO_OFF_T
Erik Andersene49d5ec2000-02-08 19:58:47 +0000531typedef long off_t;
Erik Andersen61677fe2000-04-13 01:18:56 +0000532off_t lseek (int fd, off_t offset, int whence);
Eric Andersenb052b471999-11-18 00:21:59 +0000533#endif
534
535
536 /* global buffers */
537
Erik Andersene49d5ec2000-02-08 19:58:47 +0000538DECLARE(uch, inbuf, INBUFSIZ + INBUF_EXTRA);
539DECLARE(uch, outbuf, OUTBUFSIZ + OUTBUF_EXTRA);
540DECLARE(ush, d_buf, DIST_BUFSIZE);
541DECLARE(uch, window, 2L * WSIZE);
Eric Andersenb052b471999-11-18 00:21:59 +0000542#ifndef MAXSEG_64K
Erik Andersene49d5ec2000-02-08 19:58:47 +0000543DECLARE(ush, tab_prefix, 1L << BITS);
Eric Andersenb052b471999-11-18 00:21:59 +0000544#else
Erik Andersene49d5ec2000-02-08 19:58:47 +0000545DECLARE(ush, tab_prefix0, 1L << (BITS - 1));
546DECLARE(ush, tab_prefix1, 1L << (BITS - 1));
Eric Andersenb052b471999-11-18 00:21:59 +0000547#endif
548
549 /* local variables */
550
Erik Andersene49d5ec2000-02-08 19:58:47 +0000551int test_mode = 0; /* check file integrity option */
552int foreground; /* set if program run in foreground */
553int maxbits = BITS; /* max bits per code for LZW */
554int method = DEFLATED; /* compression method */
555int exit_code = OK; /* program exit code */
556int last_member; /* set for .zip and .Z files */
557int part_nb; /* number of parts in .gz file */
558long ifile_size; /* input file size, -1 for devices (debug only) */
Eric Andersenb052b471999-11-18 00:21:59 +0000559
Erik Andersene49d5ec2000-02-08 19:58:47 +0000560long bytes_in; /* number of input bytes */
561long bytes_out; /* number of output bytes */
562long total_in = 0; /* input bytes for all files */
563long total_out = 0; /* output bytes for all files */
564struct stat istat; /* status for input file */
565int ifd; /* input file descriptor */
566int ofd; /* output file descriptor */
567unsigned insize; /* valid bytes in inbuf */
568unsigned inptr; /* index of next byte to be processed in inbuf */
569unsigned outcnt; /* bytes in output buffer */
Eric Andersenb052b471999-11-18 00:21:59 +0000570
Erik Andersene49d5ec2000-02-08 19:58:47 +0000571long header_bytes; /* number of bytes in gzip header */
Eric Andersenb052b471999-11-18 00:21:59 +0000572
573/* local functions */
574
Erik Andersen61677fe2000-04-13 01:18:56 +0000575local int get_method (int in);
Eric Andersenb052b471999-11-18 00:21:59 +0000576
577#define strequ(s1, s2) (strcmp((s1),(s2)) == 0)
578
579/* ======================================================================== */
Erik Andersene49d5ec2000-02-08 19:58:47 +0000580int gunzip_main(int argc, char **argv)
Eric Andersenb052b471999-11-18 00:21:59 +0000581{
Erik Andersene49d5ec2000-02-08 19:58:47 +0000582 int file_count; /* number of files to precess */
583 int to_stdout = 0;
584 int fromstdin = 0;
585 int result;
586 int inFileNum;
587 int outFileNum;
588 int delInputFile = 0;
589 struct stat statBuf;
590 char *delFileName;
591 char ifname[MAX_PATH_LEN + 1]; /* input file name */
592 char ofname[MAX_PATH_LEN + 1]; /* output file name */
Eric Andersenb052b471999-11-18 00:21:59 +0000593
Erik Andersen59b9e872000-05-10 05:05:45 +0000594 if (strcmp(*argv, "zcat") == 0) {
Erik Andersene49d5ec2000-02-08 19:58:47 +0000595 to_stdout = 1;
Erik Andersen59b9e872000-05-10 05:05:45 +0000596 if (argc == 1) {
597 fromstdin = 1;
598 }
599 }
Erik Andersene49d5ec2000-02-08 19:58:47 +0000600
601 /* Parse any options */
602 while (--argc > 0 && **(++argv) == '-') {
603 if (*((*argv) + 1) == '\0') {
604 fromstdin = 1;
605 to_stdout = 1;
606 }
607 while (*(++(*argv))) {
608 switch (**argv) {
609 case 'c':
610 to_stdout = 1;
611 break;
612 case 't':
613 test_mode = 1;
614 break;
615
616 default:
617 usage(gunzip_usage);
618 }
619 }
620 }
621
622 foreground = signal(SIGINT, SIG_IGN) != SIG_IGN;
623 if (foreground) {
624 (void) signal(SIGINT, (sig_type) abort_gzip);
625 }
Eric Andersenb052b471999-11-18 00:21:59 +0000626#ifdef SIGTERM
Erik Andersene49d5ec2000-02-08 19:58:47 +0000627 if (signal(SIGTERM, SIG_IGN) != SIG_IGN) {
628 (void) signal(SIGTERM, (sig_type) abort_gzip);
629 }
Eric Andersenb052b471999-11-18 00:21:59 +0000630#endif
631#ifdef SIGHUP
Erik Andersene49d5ec2000-02-08 19:58:47 +0000632 if (signal(SIGHUP, SIG_IGN) != SIG_IGN) {
633 (void) signal(SIGHUP, (sig_type) abort_gzip);
634 }
Eric Andersenb052b471999-11-18 00:21:59 +0000635#endif
636
Erik Andersene49d5ec2000-02-08 19:58:47 +0000637 file_count = argc - optind;
Eric Andersenb052b471999-11-18 00:21:59 +0000638
Erik Andersene49d5ec2000-02-08 19:58:47 +0000639 /* Allocate all global buffers (for DYN_ALLOC option) */
640 ALLOC(uch, inbuf, INBUFSIZ + INBUF_EXTRA);
641 ALLOC(uch, outbuf, OUTBUFSIZ + OUTBUF_EXTRA);
642 ALLOC(ush, d_buf, DIST_BUFSIZE);
643 ALLOC(uch, window, 2L * WSIZE);
Eric Andersenb052b471999-11-18 00:21:59 +0000644#ifndef MAXSEG_64K
Erik Andersene49d5ec2000-02-08 19:58:47 +0000645 ALLOC(ush, tab_prefix, 1L << BITS);
Eric Andersenb052b471999-11-18 00:21:59 +0000646#else
Erik Andersene49d5ec2000-02-08 19:58:47 +0000647 ALLOC(ush, tab_prefix0, 1L << (BITS - 1));
648 ALLOC(ush, tab_prefix1, 1L << (BITS - 1));
Eric Andersenb052b471999-11-18 00:21:59 +0000649#endif
650
Erik Andersene49d5ec2000-02-08 19:58:47 +0000651 if (fromstdin == 1) {
652 strcpy(ofname, "stdin");
Eric Andersenb052b471999-11-18 00:21:59 +0000653
Erik Andersene49d5ec2000-02-08 19:58:47 +0000654 inFileNum = fileno(stdin);
655 ifile_size = -1L; /* convention for unknown size */
Eric Andersenb052b471999-11-18 00:21:59 +0000656 } else {
Erik Andersene49d5ec2000-02-08 19:58:47 +0000657 /* Open up the input file */
658 if (*argv == '\0')
659 usage(gunzip_usage);
660 if (strlen(*argv) > MAX_PATH_LEN) {
661 fprintf(stderr, name_too_long, "gunzip");
Erik Andersen61677fe2000-04-13 01:18:56 +0000662 exit(WARNING);
Erik Andersene49d5ec2000-02-08 19:58:47 +0000663 }
664 strcpy(ifname, *argv);
665
666 /* Open input fille */
667 inFileNum = open(ifname, O_RDONLY);
668 if (inFileNum < 0) {
669 perror(ifname);
Erik Andersen61677fe2000-04-13 01:18:56 +0000670 exit(WARNING);
Erik Andersene49d5ec2000-02-08 19:58:47 +0000671 }
672 /* Get the time stamp on the input file. */
673 result = stat(ifname, &statBuf);
674 if (result < 0) {
675 perror(ifname);
Erik Andersen61677fe2000-04-13 01:18:56 +0000676 exit(WARNING);
Erik Andersene49d5ec2000-02-08 19:58:47 +0000677 }
678 ifile_size = statBuf.st_size;
Eric Andersenb052b471999-11-18 00:21:59 +0000679 }
680
Erik Andersene49d5ec2000-02-08 19:58:47 +0000681 if (to_stdout == 1) {
682 /* And get to work */
683 strcpy(ofname, "stdout");
684 outFileNum = fileno(stdout);
685
686 clear_bufs(); /* clear input and output buffers */
687 part_nb = 0;
688
689 /* Actually do the compression/decompression. */
690 unzip(inFileNum, outFileNum);
691
692 } else if (test_mode) {
693 /* Actually do the compression/decompression. */
694 unzip(inFileNum, 2);
695 } else {
696 char *pos;
697
698 /* And get to work */
699 if (strlen(ifname) > MAX_PATH_LEN - 4) {
700 fprintf(stderr, name_too_long, "gunzip");
Erik Andersen61677fe2000-04-13 01:18:56 +0000701 exit(WARNING);
Erik Andersene49d5ec2000-02-08 19:58:47 +0000702 }
703 strcpy(ofname, ifname);
704 pos = strstr(ofname, ".gz");
705 if (pos != NULL) {
706 *pos = '\0';
707 delInputFile = 1;
708 } else {
709 pos = strstr(ofname, ".tgz");
710 if (pos != NULL) {
711 *pos = '\0';
712 strcat(pos, ".tar");
713 delInputFile = 1;
714 }
715 }
716
717 /* Open output fille */
Erik Andersen4d1d0111999-12-17 18:44:15 +0000718#if (__GLIBC__ >= 2) && (__GLIBC_MINOR__ >= 1)
Erik Andersene49d5ec2000-02-08 19:58:47 +0000719 outFileNum = open(ofname, O_RDWR | O_CREAT | O_EXCL | O_NOFOLLOW);
Erik Andersen4d1d0111999-12-17 18:44:15 +0000720#else
Erik Andersene49d5ec2000-02-08 19:58:47 +0000721 outFileNum = open(ofname, O_RDWR | O_CREAT | O_EXCL);
Erik Andersen4d1d0111999-12-17 18:44:15 +0000722#endif
Erik Andersene49d5ec2000-02-08 19:58:47 +0000723 if (outFileNum < 0) {
724 perror(ofname);
Erik Andersen61677fe2000-04-13 01:18:56 +0000725 exit(WARNING);
Erik Andersene49d5ec2000-02-08 19:58:47 +0000726 }
727 /* Set permissions on the file */
728 fchmod(outFileNum, statBuf.st_mode);
729
730 clear_bufs(); /* clear input and output buffers */
731 part_nb = 0;
732
733 /* Actually do the compression/decompression. */
734 result = unzip(inFileNum, outFileNum);
735
736 close(outFileNum);
737 close(inFileNum);
738 /* Delete the original file */
739 if (result == OK)
740 delFileName = ifname;
741 else
742 delFileName = ofname;
743
744 if (delInputFile == 1 && unlink(delFileName) < 0) {
745 perror(delFileName);
746 exit(FALSE);
747 }
Eric Andersenb052b471999-11-18 00:21:59 +0000748 }
Erik Andersen61677fe2000-04-13 01:18:56 +0000749 exit(exit_code);
Eric Andersenb052b471999-11-18 00:21:59 +0000750}
751
752
753/* ========================================================================
754 * Check the magic number of the input file and update ofname if an
755 * original name was given and to_stdout is not set.
756 * Return the compression method, -1 for error, -2 for warning.
757 * Set inptr to the offset of the next byte to be processed.
758 * Updates time_stamp if there is one and --no-time is not used.
759 * This function may be called repeatedly for an input file consisting
760 * of several contiguous gzip'ed members.
761 * IN assertions: there is at least one remaining compressed member.
762 * If the member is a zip file, it must be the only one.
763 */
764local int get_method(in)
Erik Andersene49d5ec2000-02-08 19:58:47 +0000765int in; /* input file descriptor */
Eric Andersenb052b471999-11-18 00:21:59 +0000766{
Erik Andersene49d5ec2000-02-08 19:58:47 +0000767 uch flags; /* compression flags */
768 char magic[2]; /* magic header */
Eric Andersenb052b471999-11-18 00:21:59 +0000769
Erik Andersene49d5ec2000-02-08 19:58:47 +0000770 magic[0] = (char) get_byte();
771 magic[1] = (char) get_byte();
772 method = -1; /* unknown yet */
773 part_nb++; /* number of parts in gzip file */
774 header_bytes = 0;
775 last_member = RECORD_IO;
776 /* assume multiple members in gzip file except for record oriented I/O */
Eric Andersenb052b471999-11-18 00:21:59 +0000777
Erik Andersene49d5ec2000-02-08 19:58:47 +0000778 if (memcmp(magic, GZIP_MAGIC, 2) == 0) {
Eric Andersenb052b471999-11-18 00:21:59 +0000779
Erik Andersene49d5ec2000-02-08 19:58:47 +0000780 method = (int) get_byte();
781 if (method != DEFLATED) {
782 fprintf(stderr,
783 "unknown method %d -- get newer version of gzip\n",
784 method);
785 exit_code = ERROR;
786 return -1;
787 }
788 flags = (uch) get_byte();
Eric Andersenb052b471999-11-18 00:21:59 +0000789
Erik Andersene49d5ec2000-02-08 19:58:47 +0000790 (ulg) get_byte(); /* Ignore time stamp */
791 (ulg) get_byte();
792 (ulg) get_byte();
793 (ulg) get_byte();
Eric Andersenb052b471999-11-18 00:21:59 +0000794
Erik Andersene49d5ec2000-02-08 19:58:47 +0000795 (void) get_byte(); /* Ignore extra flags for the moment */
796 (void) get_byte(); /* Ignore OS type for the moment */
Eric Andersenb052b471999-11-18 00:21:59 +0000797
Erik Andersene49d5ec2000-02-08 19:58:47 +0000798 if ((flags & EXTRA_FIELD) != 0) {
799 unsigned len = (unsigned) get_byte();
Eric Andersenb052b471999-11-18 00:21:59 +0000800
Erik Andersene49d5ec2000-02-08 19:58:47 +0000801 len |= ((unsigned) get_byte()) << 8;
802
803 while (len--)
804 (void) get_byte();
805 }
806
807 /* Discard original name if any */
808 if ((flags & ORIG_NAME) != 0) {
809 while (get_char() != 0) /* null */
810 ;
811 }
812
813 /* Discard file comment if any */
814 if ((flags & COMMENT) != 0) {
815 while (get_char() != 0) /* null */
816 ;
817 }
818 if (part_nb == 1) {
819 header_bytes = inptr + 2 * sizeof(long); /* include crc and size */
820 }
821
Eric Andersenb052b471999-11-18 00:21:59 +0000822 }
823
Erik Andersene49d5ec2000-02-08 19:58:47 +0000824 if (method >= 0)
825 return method;
Eric Andersenb052b471999-11-18 00:21:59 +0000826
Eric Andersenb052b471999-11-18 00:21:59 +0000827 if (part_nb == 1) {
Erik Andersene49d5ec2000-02-08 19:58:47 +0000828 fprintf(stderr, "\nnot in gzip format\n");
829 exit_code = ERROR;
830 return -1;
831 } else {
832 WARN((stderr, "\ndecompression OK, trailing garbage ignored\n"));
833 return -2;
Eric Andersenb052b471999-11-18 00:21:59 +0000834 }
Eric Andersenb052b471999-11-18 00:21:59 +0000835}
836
Eric Andersenb052b471999-11-18 00:21:59 +0000837/* ========================================================================
838 * Signal and error handler.
839 */
840RETSIGTYPE abort_gzip()
841{
Erik Andersen61677fe2000-04-13 01:18:56 +0000842 exit(ERROR);
Eric Andersenb052b471999-11-18 00:21:59 +0000843}
Erik Andersene49d5ec2000-02-08 19:58:47 +0000844
Eric Andersenb052b471999-11-18 00:21:59 +0000845/* unzip.c -- decompress files in gzip or pkzip format.
846 * Copyright (C) 1992-1993 Jean-loup Gailly
847 * This is free software; you can redistribute it and/or modify it under the
848 * terms of the GNU General Public License, see the file COPYING.
849 *
850 * The code in this file is derived from the file funzip.c written
851 * and put in the public domain by Mark Adler.
852 */
853
854/*
855 This version can extract files in gzip or pkzip format.
856 For the latter, only the first entry is extracted, and it has to be
857 either deflated or stored.
858 */
859
860/* #include "crypt.h" */
861
862/* crypt.h (dummy version) -- do not perform encryption
863 * Hardly worth copyrighting :-)
864 */
865
866#ifdef CRYPT
Erik Andersene49d5ec2000-02-08 19:58:47 +0000867# undef CRYPT /* dummy version */
Eric Andersenb052b471999-11-18 00:21:59 +0000868#endif
869
Erik Andersene49d5ec2000-02-08 19:58:47 +0000870#define RAND_HEAD_LEN 12 /* length of encryption random header */
Eric Andersenb052b471999-11-18 00:21:59 +0000871
872#define zencode
873#define zdecode
874
875/* PKZIP header definitions */
Erik Andersene49d5ec2000-02-08 19:58:47 +0000876#define LOCSIG 0x04034b50L /* four-byte lead-in (lsb first) */
877#define LOCFLG 6 /* offset of bit flag */
878#define CRPFLG 1 /* bit for encrypted entry */
879#define EXTFLG 8 /* bit for extended local header */
880#define LOCHOW 8 /* offset of compression method */
881#define LOCTIM 10 /* file mod time (for decryption) */
882#define LOCCRC 14 /* offset of crc */
883#define LOCSIZ 18 /* offset of compressed size */
884#define LOCLEN 22 /* offset of uncompressed length */
885#define LOCFIL 26 /* offset of file name field length */
886#define LOCEXT 28 /* offset of extra field length */
887#define LOCHDR 30 /* size of local header, including sig */
888#define EXTHDR 16 /* size of extended local header, inc sig */
Eric Andersenb052b471999-11-18 00:21:59 +0000889
890
891/* Globals */
892
Erik Andersene49d5ec2000-02-08 19:58:47 +0000893char *key; /* not used--needed to link crypt.c */
894int pkzip = 0; /* set for a pkzip file */
895int ext_header = 0; /* set if extended local header */
Eric Andersenb052b471999-11-18 00:21:59 +0000896
897/* ===========================================================================
898 * Unzip in to out. This routine works on both gzip and pkzip files.
899 *
900 * IN assertions: the buffer inbuf contains already the beginning of
901 * the compressed data, from offsets inptr to insize-1 included.
902 * The magic header has already been checked. The output buffer is cleared.
903 */
904int unzip(in, out)
Erik Andersene49d5ec2000-02-08 19:58:47 +0000905int in, out; /* input and output file descriptors */
Eric Andersenb052b471999-11-18 00:21:59 +0000906{
Erik Andersene49d5ec2000-02-08 19:58:47 +0000907 ulg orig_crc = 0; /* original crc */
908 ulg orig_len = 0; /* original uncompressed length */
909 int n;
910 uch buf[EXTHDR]; /* extended local header */
Eric Andersenb052b471999-11-18 00:21:59 +0000911
Erik Andersene49d5ec2000-02-08 19:58:47 +0000912 ifd = in;
913 ofd = out;
914 method = get_method(ifd);
915 if (method < 0) {
Erik Andersen61677fe2000-04-13 01:18:56 +0000916 exit(exit_code); /* error message already emitted */
Eric Andersenb052b471999-11-18 00:21:59 +0000917 }
918
Erik Andersene49d5ec2000-02-08 19:58:47 +0000919 updcrc(NULL, 0); /* initialize crc */
Eric Andersenb052b471999-11-18 00:21:59 +0000920
Erik Andersene49d5ec2000-02-08 19:58:47 +0000921 if (pkzip && !ext_header) { /* crc and length at the end otherwise */
922 orig_crc = LG(inbuf + LOCCRC);
923 orig_len = LG(inbuf + LOCLEN);
Eric Andersenb052b471999-11-18 00:21:59 +0000924 }
Eric Andersenb052b471999-11-18 00:21:59 +0000925
Erik Andersene49d5ec2000-02-08 19:58:47 +0000926 /* Decompress */
927 if (method == DEFLATED) {
928
929 int res = inflate();
930
931 if (res == 3) {
Erik Andersen7ab9c7e2000-05-12 19:41:47 +0000932 errorMsg(memory_exhausted, "gunzip");
Erik Andersene49d5ec2000-02-08 19:58:47 +0000933 } else if (res != 0) {
Erik Andersen9ffdaa62000-02-11 21:55:04 +0000934 errorMsg("invalid compressed data--format violated");
Erik Andersene49d5ec2000-02-08 19:58:47 +0000935 }
936
937 } else {
Erik Andersen9ffdaa62000-02-11 21:55:04 +0000938 errorMsg("internal error, invalid method");
Eric Andersenb052b471999-11-18 00:21:59 +0000939 }
Eric Andersenb052b471999-11-18 00:21:59 +0000940
Erik Andersene49d5ec2000-02-08 19:58:47 +0000941 /* Get the crc and original length */
942 if (!pkzip) {
943 /* crc32 (see algorithm.doc)
944 * uncompressed input size modulo 2^32
945 */
946 for (n = 0; n < 8; n++) {
947 buf[n] = (uch) get_byte(); /* may cause an error if EOF */
948 }
949 orig_crc = LG(buf);
950 orig_len = LG(buf + 4);
Eric Andersenb052b471999-11-18 00:21:59 +0000951
Erik Andersene49d5ec2000-02-08 19:58:47 +0000952 } else if (ext_header) { /* If extended header, check it */
953 /* signature - 4bytes: 0x50 0x4b 0x07 0x08
954 * CRC-32 value
955 * compressed size 4-bytes
956 * uncompressed size 4-bytes
957 */
958 for (n = 0; n < EXTHDR; n++) {
959 buf[n] = (uch) get_byte(); /* may cause an error if EOF */
960 }
961 orig_crc = LG(buf + 4);
962 orig_len = LG(buf + 12);
963 }
964
965 /* Validate decompression */
966 if (orig_crc != updcrc(outbuf, 0)) {
Erik Andersen9ffdaa62000-02-11 21:55:04 +0000967 errorMsg("invalid compressed data--crc error");
Erik Andersene49d5ec2000-02-08 19:58:47 +0000968 }
969 if (orig_len != (ulg) bytes_out) {
Erik Andersen9ffdaa62000-02-11 21:55:04 +0000970 errorMsg("invalid compressed data--length error");
Erik Andersene49d5ec2000-02-08 19:58:47 +0000971 }
972
973 /* Check if there are more entries in a pkzip file */
974 if (pkzip && inptr + 4 < insize && LG(inbuf + inptr) == LOCSIG) {
975 WARN((stderr, "has more than one entry--rest ignored\n"));
976 }
977 ext_header = pkzip = 0; /* for next file */
978 return OK;
Eric Andersenb052b471999-11-18 00:21:59 +0000979}
Erik Andersene49d5ec2000-02-08 19:58:47 +0000980
Eric Andersenb052b471999-11-18 00:21:59 +0000981/* util.c -- utility functions for gzip support
982 * Copyright (C) 1992-1993 Jean-loup Gailly
983 * This is free software; you can redistribute it and/or modify it under the
984 * terms of the GNU General Public License, see the file COPYING.
985 */
986
987#include <ctype.h>
988#include <errno.h>
989#include <sys/types.h>
990
991#ifdef HAVE_UNISTD_H
992# include <unistd.h>
993#endif
994#ifndef NO_FCNTL_H
995# include <fcntl.h>
996#endif
997
998#if defined(STDC_HEADERS) || !defined(NO_STDLIB_H)
999# include <stdlib.h>
1000#else
Erik Andersene49d5ec2000-02-08 19:58:47 +00001001extern int errno;
Eric Andersenb052b471999-11-18 00:21:59 +00001002#endif
1003
Erik Andersene49d5ec2000-02-08 19:58:47 +00001004static const ulg crc_32_tab[]; /* crc table, defined below */
Eric Andersenb052b471999-11-18 00:21:59 +00001005
1006/* ===========================================================================
1007 * Run a set of bytes through the crc shift register. If s is a NULL
1008 * pointer, then initialize the crc shift register contents instead.
1009 * Return the current crc in either case.
1010 */
1011ulg updcrc(s, n)
Erik Andersene49d5ec2000-02-08 19:58:47 +00001012uch *s; /* pointer to bytes to pump through */
1013unsigned n; /* number of bytes in s[] */
Eric Andersenb052b471999-11-18 00:21:59 +00001014{
Erik Andersene49d5ec2000-02-08 19:58:47 +00001015 register ulg c; /* temporary variable */
Eric Andersenb052b471999-11-18 00:21:59 +00001016
Erik Andersene49d5ec2000-02-08 19:58:47 +00001017 static ulg crc = (ulg) 0xffffffffL; /* shift register contents */
Eric Andersenb052b471999-11-18 00:21:59 +00001018
Erik Andersene49d5ec2000-02-08 19:58:47 +00001019 if (s == NULL) {
1020 c = 0xffffffffL;
1021 } else {
1022 c = crc;
1023 if (n)
1024 do {
1025 c = crc_32_tab[((int) c ^ (*s++)) & 0xff] ^ (c >> 8);
1026 } while (--n);
1027 }
1028 crc = c;
1029 return c ^ 0xffffffffL; /* (instead of ~c for 64-bit machines) */
Eric Andersenb052b471999-11-18 00:21:59 +00001030}
1031
1032/* ===========================================================================
1033 * Clear input and output buffers
1034 */
Erik Andersen330fd2b2000-05-19 05:35:19 +00001035void clear_bufs(void)
Eric Andersenb052b471999-11-18 00:21:59 +00001036{
Erik Andersene49d5ec2000-02-08 19:58:47 +00001037 outcnt = 0;
1038 insize = inptr = 0;
1039 bytes_in = bytes_out = 0L;
Eric Andersenb052b471999-11-18 00:21:59 +00001040}
1041
1042/* ===========================================================================
1043 * Fill the input buffer. This is called only when the buffer is empty.
1044 */
1045int fill_inbuf(eof_ok)
Erik Andersene49d5ec2000-02-08 19:58:47 +00001046int eof_ok; /* set if EOF acceptable as a result */
Eric Andersenb052b471999-11-18 00:21:59 +00001047{
Erik Andersene49d5ec2000-02-08 19:58:47 +00001048 int len;
Eric Andersenb052b471999-11-18 00:21:59 +00001049
Erik Andersene49d5ec2000-02-08 19:58:47 +00001050 /* Read as much as possible */
1051 insize = 0;
1052 errno = 0;
1053 do {
1054 len = read(ifd, (char *) inbuf + insize, INBUFSIZ - insize);
1055 if (len == 0 || len == EOF)
1056 break;
1057 insize += len;
1058 } while (insize < INBUFSIZ);
Eric Andersenb052b471999-11-18 00:21:59 +00001059
Erik Andersene49d5ec2000-02-08 19:58:47 +00001060 if (insize == 0) {
1061 if (eof_ok)
1062 return EOF;
Erik Andersen330fd2b2000-05-19 05:35:19 +00001063 read_error_msg();
Erik Andersene49d5ec2000-02-08 19:58:47 +00001064 }
1065 bytes_in += (ulg) insize;
1066 inptr = 1;
1067 return inbuf[0];
Eric Andersenb052b471999-11-18 00:21:59 +00001068}
1069
1070/* ===========================================================================
1071 * Write the output buffer outbuf[0..outcnt-1] and update bytes_out.
1072 * (used for the compressed data only)
1073 */
1074void flush_outbuf()
1075{
Erik Andersene49d5ec2000-02-08 19:58:47 +00001076 if (outcnt == 0)
1077 return;
Eric Andersenb052b471999-11-18 00:21:59 +00001078
Erik Andersene49d5ec2000-02-08 19:58:47 +00001079 if (!test_mode)
1080 write_buf(ofd, (char *) outbuf, outcnt);
1081 bytes_out += (ulg) outcnt;
1082 outcnt = 0;
Eric Andersenb052b471999-11-18 00:21:59 +00001083}
1084
1085/* ===========================================================================
1086 * Write the output window window[0..outcnt-1] and update crc and bytes_out.
1087 * (Used for the decompressed data only.)
1088 */
1089void flush_window()
1090{
Erik Andersene49d5ec2000-02-08 19:58:47 +00001091 if (outcnt == 0)
1092 return;
1093 updcrc(window, outcnt);
Eric Andersenb052b471999-11-18 00:21:59 +00001094
Erik Andersene49d5ec2000-02-08 19:58:47 +00001095 if (!test_mode)
1096 write_buf(ofd, (char *) window, outcnt);
1097 bytes_out += (ulg) outcnt;
1098 outcnt = 0;
Eric Andersenb052b471999-11-18 00:21:59 +00001099}
1100
1101/* ===========================================================================
1102 * Does the same as write(), but also handles partial pipe writes and checks
1103 * for error return.
1104 */
1105void write_buf(fd, buf, cnt)
Erik Andersene49d5ec2000-02-08 19:58:47 +00001106int fd;
Erik Andersen61677fe2000-04-13 01:18:56 +00001107void * buf;
Erik Andersene49d5ec2000-02-08 19:58:47 +00001108unsigned cnt;
Eric Andersenb052b471999-11-18 00:21:59 +00001109{
Erik Andersene49d5ec2000-02-08 19:58:47 +00001110 unsigned n;
Eric Andersenb052b471999-11-18 00:21:59 +00001111
Erik Andersene49d5ec2000-02-08 19:58:47 +00001112 while ((n = write(fd, buf, cnt)) != cnt) {
1113 if (n == (unsigned) (-1)) {
Erik Andersen330fd2b2000-05-19 05:35:19 +00001114 write_error_msg();
Erik Andersene49d5ec2000-02-08 19:58:47 +00001115 }
1116 cnt -= n;
Erik Andersen61677fe2000-04-13 01:18:56 +00001117 buf = (void *) ((char *) buf + n);
Eric Andersenb052b471999-11-18 00:21:59 +00001118 }
Eric Andersenb052b471999-11-18 00:21:59 +00001119}
1120
1121#if defined(NO_STRING_H) && !defined(STDC_HEADERS)
1122
1123/* Provide missing strspn and strcspn functions. */
1124
1125# ifndef __STDC__
1126# define const
1127# endif
1128
Erik Andersen61677fe2000-04-13 01:18:56 +00001129int strspn (const char *s, const char *accept);
1130int strcspn (const char *s, const char *reject);
Eric Andersenb052b471999-11-18 00:21:59 +00001131
1132/* ========================================================================
1133 * Return the length of the maximum initial segment
1134 * of s which contains only characters in accept.
1135 */
1136int strspn(s, accept)
Erik Andersene49d5ec2000-02-08 19:58:47 +00001137const char *s;
1138const char *accept;
Eric Andersenb052b471999-11-18 00:21:59 +00001139{
Erik Andersene49d5ec2000-02-08 19:58:47 +00001140 register const char *p;
1141 register const char *a;
1142 register int count = 0;
Eric Andersenb052b471999-11-18 00:21:59 +00001143
Erik Andersene49d5ec2000-02-08 19:58:47 +00001144 for (p = s; *p != '\0'; ++p) {
1145 for (a = accept; *a != '\0'; ++a) {
1146 if (*p == *a)
1147 break;
1148 }
1149 if (*a == '\0')
1150 return count;
1151 ++count;
Eric Andersenb052b471999-11-18 00:21:59 +00001152 }
Erik Andersene49d5ec2000-02-08 19:58:47 +00001153 return count;
Eric Andersenb052b471999-11-18 00:21:59 +00001154}
1155
1156/* ========================================================================
1157 * Return the length of the maximum inital segment of s
1158 * which contains no characters from reject.
1159 */
1160int strcspn(s, reject)
Erik Andersene49d5ec2000-02-08 19:58:47 +00001161const char *s;
1162const char *reject;
Eric Andersenb052b471999-11-18 00:21:59 +00001163{
Erik Andersene49d5ec2000-02-08 19:58:47 +00001164 register int count = 0;
Eric Andersenb052b471999-11-18 00:21:59 +00001165
Erik Andersene49d5ec2000-02-08 19:58:47 +00001166 while (*s != '\0') {
1167 if (strchr(reject, *s++) != NULL)
1168 return count;
1169 ++count;
1170 }
1171 return count;
Eric Andersenb052b471999-11-18 00:21:59 +00001172}
1173
Erik Andersene49d5ec2000-02-08 19:58:47 +00001174#endif /* NO_STRING_H */
Eric Andersenb052b471999-11-18 00:21:59 +00001175
1176
1177/* ========================================================================
1178 * Error handlers.
1179 */
Erik Andersen330fd2b2000-05-19 05:35:19 +00001180void read_error_msg()
Eric Andersenb052b471999-11-18 00:21:59 +00001181{
Erik Andersene49d5ec2000-02-08 19:58:47 +00001182 fprintf(stderr, "\n");
1183 if (errno != 0) {
1184 perror("");
1185 } else {
1186 fprintf(stderr, "unexpected end of file\n");
1187 }
1188 abort_gzip();
Eric Andersenb052b471999-11-18 00:21:59 +00001189}
1190
Erik Andersen330fd2b2000-05-19 05:35:19 +00001191void write_error_msg()
Eric Andersenb052b471999-11-18 00:21:59 +00001192{
Erik Andersene49d5ec2000-02-08 19:58:47 +00001193 fprintf(stderr, "\n");
1194 perror("");
1195 abort_gzip();
Eric Andersenb052b471999-11-18 00:21:59 +00001196}
1197
1198
1199/* ========================================================================
Eric Andersenb052b471999-11-18 00:21:59 +00001200 * Table of CRC-32's of all single-byte values (made by makecrc.c)
1201 */
1202static const ulg crc_32_tab[] = {
Erik Andersene49d5ec2000-02-08 19:58:47 +00001203 0x00000000L, 0x77073096L, 0xee0e612cL, 0x990951baL, 0x076dc419L,
1204 0x706af48fL, 0xe963a535L, 0x9e6495a3L, 0x0edb8832L, 0x79dcb8a4L,
1205 0xe0d5e91eL, 0x97d2d988L, 0x09b64c2bL, 0x7eb17cbdL, 0xe7b82d07L,
1206 0x90bf1d91L, 0x1db71064L, 0x6ab020f2L, 0xf3b97148L, 0x84be41deL,
1207 0x1adad47dL, 0x6ddde4ebL, 0xf4d4b551L, 0x83d385c7L, 0x136c9856L,
1208 0x646ba8c0L, 0xfd62f97aL, 0x8a65c9ecL, 0x14015c4fL, 0x63066cd9L,
1209 0xfa0f3d63L, 0x8d080df5L, 0x3b6e20c8L, 0x4c69105eL, 0xd56041e4L,
1210 0xa2677172L, 0x3c03e4d1L, 0x4b04d447L, 0xd20d85fdL, 0xa50ab56bL,
1211 0x35b5a8faL, 0x42b2986cL, 0xdbbbc9d6L, 0xacbcf940L, 0x32d86ce3L,
1212 0x45df5c75L, 0xdcd60dcfL, 0xabd13d59L, 0x26d930acL, 0x51de003aL,
1213 0xc8d75180L, 0xbfd06116L, 0x21b4f4b5L, 0x56b3c423L, 0xcfba9599L,
1214 0xb8bda50fL, 0x2802b89eL, 0x5f058808L, 0xc60cd9b2L, 0xb10be924L,
1215 0x2f6f7c87L, 0x58684c11L, 0xc1611dabL, 0xb6662d3dL, 0x76dc4190L,
1216 0x01db7106L, 0x98d220bcL, 0xefd5102aL, 0x71b18589L, 0x06b6b51fL,
1217 0x9fbfe4a5L, 0xe8b8d433L, 0x7807c9a2L, 0x0f00f934L, 0x9609a88eL,
1218 0xe10e9818L, 0x7f6a0dbbL, 0x086d3d2dL, 0x91646c97L, 0xe6635c01L,
1219 0x6b6b51f4L, 0x1c6c6162L, 0x856530d8L, 0xf262004eL, 0x6c0695edL,
1220 0x1b01a57bL, 0x8208f4c1L, 0xf50fc457L, 0x65b0d9c6L, 0x12b7e950L,
1221 0x8bbeb8eaL, 0xfcb9887cL, 0x62dd1ddfL, 0x15da2d49L, 0x8cd37cf3L,
1222 0xfbd44c65L, 0x4db26158L, 0x3ab551ceL, 0xa3bc0074L, 0xd4bb30e2L,
1223 0x4adfa541L, 0x3dd895d7L, 0xa4d1c46dL, 0xd3d6f4fbL, 0x4369e96aL,
1224 0x346ed9fcL, 0xad678846L, 0xda60b8d0L, 0x44042d73L, 0x33031de5L,
1225 0xaa0a4c5fL, 0xdd0d7cc9L, 0x5005713cL, 0x270241aaL, 0xbe0b1010L,
1226 0xc90c2086L, 0x5768b525L, 0x206f85b3L, 0xb966d409L, 0xce61e49fL,
1227 0x5edef90eL, 0x29d9c998L, 0xb0d09822L, 0xc7d7a8b4L, 0x59b33d17L,
1228 0x2eb40d81L, 0xb7bd5c3bL, 0xc0ba6cadL, 0xedb88320L, 0x9abfb3b6L,
1229 0x03b6e20cL, 0x74b1d29aL, 0xead54739L, 0x9dd277afL, 0x04db2615L,
1230 0x73dc1683L, 0xe3630b12L, 0x94643b84L, 0x0d6d6a3eL, 0x7a6a5aa8L,
1231 0xe40ecf0bL, 0x9309ff9dL, 0x0a00ae27L, 0x7d079eb1L, 0xf00f9344L,
1232 0x8708a3d2L, 0x1e01f268L, 0x6906c2feL, 0xf762575dL, 0x806567cbL,
1233 0x196c3671L, 0x6e6b06e7L, 0xfed41b76L, 0x89d32be0L, 0x10da7a5aL,
1234 0x67dd4accL, 0xf9b9df6fL, 0x8ebeeff9L, 0x17b7be43L, 0x60b08ed5L,
1235 0xd6d6a3e8L, 0xa1d1937eL, 0x38d8c2c4L, 0x4fdff252L, 0xd1bb67f1L,
1236 0xa6bc5767L, 0x3fb506ddL, 0x48b2364bL, 0xd80d2bdaL, 0xaf0a1b4cL,
1237 0x36034af6L, 0x41047a60L, 0xdf60efc3L, 0xa867df55L, 0x316e8eefL,
1238 0x4669be79L, 0xcb61b38cL, 0xbc66831aL, 0x256fd2a0L, 0x5268e236L,
1239 0xcc0c7795L, 0xbb0b4703L, 0x220216b9L, 0x5505262fL, 0xc5ba3bbeL,
1240 0xb2bd0b28L, 0x2bb45a92L, 0x5cb36a04L, 0xc2d7ffa7L, 0xb5d0cf31L,
1241 0x2cd99e8bL, 0x5bdeae1dL, 0x9b64c2b0L, 0xec63f226L, 0x756aa39cL,
1242 0x026d930aL, 0x9c0906a9L, 0xeb0e363fL, 0x72076785L, 0x05005713L,
1243 0x95bf4a82L, 0xe2b87a14L, 0x7bb12baeL, 0x0cb61b38L, 0x92d28e9bL,
1244 0xe5d5be0dL, 0x7cdcefb7L, 0x0bdbdf21L, 0x86d3d2d4L, 0xf1d4e242L,
1245 0x68ddb3f8L, 0x1fda836eL, 0x81be16cdL, 0xf6b9265bL, 0x6fb077e1L,
1246 0x18b74777L, 0x88085ae6L, 0xff0f6a70L, 0x66063bcaL, 0x11010b5cL,
1247 0x8f659effL, 0xf862ae69L, 0x616bffd3L, 0x166ccf45L, 0xa00ae278L,
1248 0xd70dd2eeL, 0x4e048354L, 0x3903b3c2L, 0xa7672661L, 0xd06016f7L,
1249 0x4969474dL, 0x3e6e77dbL, 0xaed16a4aL, 0xd9d65adcL, 0x40df0b66L,
1250 0x37d83bf0L, 0xa9bcae53L, 0xdebb9ec5L, 0x47b2cf7fL, 0x30b5ffe9L,
1251 0xbdbdf21cL, 0xcabac28aL, 0x53b39330L, 0x24b4a3a6L, 0xbad03605L,
1252 0xcdd70693L, 0x54de5729L, 0x23d967bfL, 0xb3667a2eL, 0xc4614ab8L,
1253 0x5d681b02L, 0x2a6f2b94L, 0xb40bbe37L, 0xc30c8ea1L, 0x5a05df1bL,
1254 0x2d02ef8dL
Eric Andersenb052b471999-11-18 00:21:59 +00001255};
Erik Andersene49d5ec2000-02-08 19:58:47 +00001256
Eric Andersenb052b471999-11-18 00:21:59 +00001257/* inflate.c -- Not copyrighted 1992 by Mark Adler
1258 version c10p1, 10 January 1993 */
1259
1260/* You can do whatever you like with this source file, though I would
1261 prefer that if you modify it and redistribute it that you include
1262 comments to that effect with your name and the date. Thank you.
1263 [The history has been moved to the file ChangeLog.]
1264 */
1265
1266/*
1267 Inflate deflated (PKZIP's method 8 compressed) data. The compression
1268 method searches for as much of the current string of bytes (up to a
1269 length of 258) in the previous 32K bytes. If it doesn't find any
1270 matches (of at least length 3), it codes the next byte. Otherwise, it
1271 codes the length of the matched string and its distance backwards from
1272 the current position. There is a single Huffman code that codes both
1273 single bytes (called "literals") and match lengths. A second Huffman
1274 code codes the distance information, which follows a length code. Each
1275 length or distance code actually represents a base value and a number
1276 of "extra" (sometimes zero) bits to get to add to the base value. At
1277 the end of each deflated block is a special end-of-block (EOB) literal/
1278 length code. The decoding process is basically: get a literal/length
1279 code; if EOB then done; if a literal, emit the decoded byte; if a
1280 length then get the distance and emit the referred-to bytes from the
1281 sliding window of previously emitted data.
1282
1283 There are (currently) three kinds of inflate blocks: stored, fixed, and
1284 dynamic. The compressor deals with some chunk of data at a time, and
1285 decides which method to use on a chunk-by-chunk basis. A chunk might
1286 typically be 32K or 64K. If the chunk is uncompressible, then the
1287 "stored" method is used. In this case, the bytes are simply stored as
1288 is, eight bits per byte, with none of the above coding. The bytes are
1289 preceded by a count, since there is no longer an EOB code.
1290
1291 If the data is compressible, then either the fixed or dynamic methods
1292 are used. In the dynamic method, the compressed data is preceded by
1293 an encoding of the literal/length and distance Huffman codes that are
1294 to be used to decode this block. The representation is itself Huffman
1295 coded, and so is preceded by a description of that code. These code
1296 descriptions take up a little space, and so for small blocks, there is
1297 a predefined set of codes, called the fixed codes. The fixed method is
1298 used if the block codes up smaller that way (usually for quite small
1299 chunks), otherwise the dynamic method is used. In the latter case, the
1300 codes are customized to the probabilities in the current block, and so
1301 can code it much better than the pre-determined fixed codes.
1302
1303 The Huffman codes themselves are decoded using a mutli-level table
1304 lookup, in order to maximize the speed of decoding plus the speed of
1305 building the decoding tables. See the comments below that precede the
1306 lbits and dbits tuning parameters.
1307 */
1308
1309
1310/*
1311 Notes beyond the 1.93a appnote.txt:
1312
1313 1. Distance pointers never point before the beginning of the output
1314 stream.
1315 2. Distance pointers can point back across blocks, up to 32k away.
1316 3. There is an implied maximum of 7 bits for the bit length table and
1317 15 bits for the actual data.
1318 4. If only one code exists, then it is encoded using one bit. (Zero
1319 would be more efficient, but perhaps a little confusing.) If two
1320 codes exist, they are coded using one bit each (0 and 1).
1321 5. There is no way of sending zero distance codes--a dummy must be
1322 sent if there are none. (History: a pre 2.0 version of PKZIP would
1323 store blocks with no distance codes, but this was discovered to be
1324 too harsh a criterion.) Valid only for 1.93a. 2.04c does allow
1325 zero distance codes, which is sent as one code of zero bits in
1326 length.
1327 6. There are up to 286 literal/length codes. Code 256 represents the
1328 end-of-block. Note however that the static length tree defines
1329 288 codes just to fill out the Huffman codes. Codes 286 and 287
1330 cannot be used though, since there is no length base or extra bits
1331 defined for them. Similarly, there are up to 30 distance codes.
1332 However, static trees define 32 codes (all 5 bits) to fill out the
1333 Huffman codes, but the last two had better not show up in the data.
1334 7. Unzip can check dynamic Huffman blocks for complete code sets.
1335 The exception is that a single code would not be complete (see #4).
1336 8. The five bits following the block type is really the number of
1337 literal codes sent minus 257.
1338 9. Length codes 8,16,16 are interpreted as 13 length codes of 8 bits
1339 (1+6+6). Therefore, to output three times the length, you output
1340 three codes (1+1+1), whereas to output four times the same length,
1341 you only need two codes (1+3). Hmm.
1342 10. In the tree reconstruction algorithm, Code = Code + Increment
1343 only if BitLength(i) is not zero. (Pretty obvious.)
1344 11. Correction: 4 Bits: # of Bit Length codes - 4 (4 - 19)
1345 12. Note: length code 284 can represent 227-258, but length code 285
1346 really is 258. The last length deserves its own, short code
1347 since it gets used a lot in very redundant files. The length
1348 258 is special since 258 - 3 (the min match length) is 255.
1349 13. The literal/length and distance code bit lengths are read as a
1350 single stream of lengths. It is possible (and advantageous) for
1351 a repeat code (16, 17, or 18) to go across the boundary between
1352 the two sets of lengths.
1353 */
1354
1355#include <sys/types.h>
1356
1357#if defined(STDC_HEADERS) || !defined(NO_STDLIB_H)
1358# include <stdlib.h>
1359#endif
1360
1361
1362#define slide window
1363
1364/* Huffman code lookup table entry--this entry is four bytes for machines
1365 that have 16-bit pointers (e.g. PC's in the small or medium model).
1366 Valid extra bits are 0..13. e == 15 is EOB (end of block), e == 16
1367 means that v is a literal, 16 < e < 32 means that v is a pointer to
1368 the next table, which codes e - 16 bits, and lastly e == 99 indicates
1369 an unused code. If a code with e == 99 is looked up, this implies an
1370 error in the data. */
1371struct huft {
Erik Andersene49d5ec2000-02-08 19:58:47 +00001372 uch e; /* number of extra bits or operation */
1373 uch b; /* number of bits in this code or subcode */
1374 union {
1375 ush n; /* literal, length base, or distance base */
1376 struct huft *t; /* pointer to next level of table */
1377 } v;
Eric Andersenb052b471999-11-18 00:21:59 +00001378};
1379
1380
1381/* Function prototypes */
Erik Andersen61677fe2000-04-13 01:18:56 +00001382int huft_build (unsigned *, unsigned, unsigned, ush *, ush *,
1383 struct huft **, int *);
1384int huft_free (struct huft *);
1385int inflate_codes (struct huft *, struct huft *, int, int);
1386int inflate_stored (void);
1387int inflate_fixed (void);
1388int inflate_dynamic (void);
1389int inflate_block (int *);
1390int inflate (void);
Eric Andersenb052b471999-11-18 00:21:59 +00001391
1392
1393/* The inflate algorithm uses a sliding 32K byte window on the uncompressed
1394 stream to find repeated byte strings. This is implemented here as a
1395 circular buffer. The index is updated simply by incrementing and then
1396 and'ing with 0x7fff (32K-1). */
1397/* It is left to other modules to supply the 32K area. It is assumed
1398 to be usable as if it were declared "uch slide[32768];" or as just
1399 "uch *slide;" and then malloc'ed in the latter case. The definition
1400 must be in unzip.h, included above. */
1401/* unsigned wp; current position in slide */
1402#define wp outcnt
1403#define flush_output(w) (wp=(w),flush_window())
1404
1405/* Tables for deflate from PKZIP's appnote.txt. */
Erik Andersene49d5ec2000-02-08 19:58:47 +00001406static unsigned border[] = { /* Order of the bit length code lengths */
1407 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15
1408};
1409static ush cplens[] = { /* Copy lengths for literal codes 257..285 */
1410 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27, 31,
1411 35, 43, 51, 59, 67, 83, 99, 115, 131, 163, 195, 227, 258, 0, 0
1412};
1413
1414 /* note: see note #13 above about the 258 in this list. */
1415static ush cplext[] = { /* Extra bits for literal codes 257..285 */
1416 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2,
1417 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 0, 99, 99
1418}; /* 99==invalid */
1419static ush cpdist[] = { /* Copy offsets for distance codes 0..29 */
1420 1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193,
1421 257, 385, 513, 769, 1025, 1537, 2049, 3073, 4097, 6145,
1422 8193, 12289, 16385, 24577
1423};
1424static ush cpdext[] = { /* Extra bits for distance codes */
1425 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6,
1426 7, 7, 8, 8, 9, 9, 10, 10, 11, 11,
1427 12, 12, 13, 13
1428};
Eric Andersenb052b471999-11-18 00:21:59 +00001429
1430
1431
1432/* Macros for inflate() bit peeking and grabbing.
1433 The usage is:
1434
1435 NEEDBITS(j)
1436 x = b & mask_bits[j];
1437 DUMPBITS(j)
1438
1439 where NEEDBITS makes sure that b has at least j bits in it, and
1440 DUMPBITS removes the bits from b. The macros use the variable k
1441 for the number of bits in b. Normally, b and k are register
1442 variables for speed, and are initialized at the beginning of a
1443 routine that uses these macros from a global bit buffer and count.
1444
1445 If we assume that EOB will be the longest code, then we will never
1446 ask for bits with NEEDBITS that are beyond the end of the stream.
1447 So, NEEDBITS should not read any more bytes than are needed to
1448 meet the request. Then no bytes need to be "returned" to the buffer
1449 at the end of the last block.
1450
1451 However, this assumption is not true for fixed blocks--the EOB code
1452 is 7 bits, but the other literal/length codes can be 8 or 9 bits.
1453 (The EOB code is shorter than other codes because fixed blocks are
1454 generally short. So, while a block always has an EOB, many other
1455 literal/length codes have a significantly lower probability of
1456 showing up at all.) However, by making the first table have a
1457 lookup of seven bits, the EOB code will be found in that first
1458 lookup, and so will not require that too many bits be pulled from
1459 the stream.
1460 */
1461
Erik Andersene49d5ec2000-02-08 19:58:47 +00001462ulg bb; /* bit buffer */
1463unsigned bk; /* bits in bit buffer */
Eric Andersenb052b471999-11-18 00:21:59 +00001464
1465ush mask_bits[] = {
Erik Andersene49d5ec2000-02-08 19:58:47 +00001466 0x0000,
1467 0x0001, 0x0003, 0x0007, 0x000f, 0x001f, 0x003f, 0x007f, 0x00ff,
1468 0x01ff, 0x03ff, 0x07ff, 0x0fff, 0x1fff, 0x3fff, 0x7fff, 0xffff
Eric Andersenb052b471999-11-18 00:21:59 +00001469};
1470
1471#ifdef CRYPT
Erik Andersene49d5ec2000-02-08 19:58:47 +00001472uch cc;
1473
Eric Andersenb052b471999-11-18 00:21:59 +00001474# define NEXTBYTE() (cc = get_byte(), zdecode(cc), cc)
1475#else
1476# define NEXTBYTE() (uch)get_byte()
1477#endif
1478#define NEEDBITS(n) {while(k<(n)){b|=((ulg)NEXTBYTE())<<k;k+=8;}}
1479#define DUMPBITS(n) {b>>=(n);k-=(n);}
1480
1481
1482/*
1483 Huffman code decoding is performed using a multi-level table lookup.
1484 The fastest way to decode is to simply build a lookup table whose
1485 size is determined by the longest code. However, the time it takes
1486 to build this table can also be a factor if the data being decoded
1487 is not very long. The most common codes are necessarily the
1488 shortest codes, so those codes dominate the decoding time, and hence
1489 the speed. The idea is you can have a shorter table that decodes the
1490 shorter, more probable codes, and then point to subsidiary tables for
1491 the longer codes. The time it costs to decode the longer codes is
1492 then traded against the time it takes to make longer tables.
1493
1494 This results of this trade are in the variables lbits and dbits
1495 below. lbits is the number of bits the first level table for literal/
1496 length codes can decode in one step, and dbits is the same thing for
1497 the distance codes. Subsequent tables are also less than or equal to
1498 those sizes. These values may be adjusted either when all of the
1499 codes are shorter than that, in which case the longest code length in
1500 bits is used, or when the shortest code is *longer* than the requested
1501 table size, in which case the length of the shortest code in bits is
1502 used.
1503
1504 There are two different values for the two tables, since they code a
1505 different number of possibilities each. The literal/length table
1506 codes 286 possible values, or in a flat code, a little over eight
1507 bits. The distance table codes 30 possible values, or a little less
1508 than five bits, flat. The optimum values for speed end up being
1509 about one bit more than those, so lbits is 8+1 and dbits is 5+1.
1510 The optimum values may differ though from machine to machine, and
1511 possibly even between compilers. Your mileage may vary.
1512 */
1513
1514
Erik Andersene49d5ec2000-02-08 19:58:47 +00001515int lbits = 9; /* bits in base literal/length lookup table */
1516int dbits = 6; /* bits in base distance lookup table */
Eric Andersenb052b471999-11-18 00:21:59 +00001517
1518
1519/* If BMAX needs to be larger than 16, then h and x[] should be ulg. */
Erik Andersene49d5ec2000-02-08 19:58:47 +00001520#define BMAX 16 /* maximum bit length of any code (16 for explode) */
1521#define N_MAX 288 /* maximum number of codes in any set */
Eric Andersenb052b471999-11-18 00:21:59 +00001522
1523
Erik Andersene49d5ec2000-02-08 19:58:47 +00001524unsigned hufts; /* track memory usage */
Eric Andersenb052b471999-11-18 00:21:59 +00001525
1526
1527int huft_build(b, n, s, d, e, t, m)
Erik Andersene49d5ec2000-02-08 19:58:47 +00001528unsigned *b; /* code lengths in bits (all assumed <= BMAX) */
1529unsigned n; /* number of codes (assumed <= N_MAX) */
1530unsigned s; /* number of simple-valued codes (0..s-1) */
1531ush *d; /* list of base values for non-simple codes */
1532ush *e; /* list of extra bits for non-simple codes */
1533struct huft **t; /* result: starting table */
1534int *m; /* maximum lookup bits, returns actual */
1535
Eric Andersenb052b471999-11-18 00:21:59 +00001536/* Given a list of code lengths and a maximum table size, make a set of
1537 tables to decode that set of codes. Return zero on success, one if
1538 the given code set is incomplete (the tables are still built in this
1539 case), two if the input is invalid (all zero length codes or an
1540 oversubscribed set of lengths), and three if not enough memory. */
1541{
Erik Andersene49d5ec2000-02-08 19:58:47 +00001542 unsigned a; /* counter for codes of length k */
1543 unsigned c[BMAX + 1]; /* bit length count table */
1544 unsigned f; /* i repeats in table every f entries */
1545 int g; /* maximum code length */
1546 int h; /* table level */
1547 register unsigned i; /* counter, current code */
1548 register unsigned j; /* counter */
1549 register int k; /* number of bits in current code */
1550 int l; /* bits per table (returned in m) */
1551 register unsigned *p; /* pointer into c[], b[], or v[] */
1552 register struct huft *q; /* points to current table */
1553 struct huft r; /* table entry for structure assignment */
1554 struct huft *u[BMAX]; /* table stack */
1555 unsigned v[N_MAX]; /* values in order of bit length */
1556 register int w; /* bits before this table == (l * h) */
1557 unsigned x[BMAX + 1]; /* bit offsets, then code stack */
1558 unsigned *xp; /* pointer into x */
1559 int y; /* number of dummy codes added */
1560 unsigned z; /* number of entries in current table */
Eric Andersenb052b471999-11-18 00:21:59 +00001561
1562
Erik Andersene49d5ec2000-02-08 19:58:47 +00001563 /* Generate counts for each bit length */
1564 memzero(c, sizeof(c));
1565 p = b;
1566 i = n;
1567 do {
1568 Tracecv(*p,
1569 (stderr,
1570 (n - i >= ' '
1571 && n - i <= '~' ? "%c %d\n" : "0x%x %d\n"), n - i, *p));
1572 c[*p]++; /* assume all entries <= BMAX */
1573 p++; /* Can't combine with above line (Solaris bug) */
1574 } while (--i);
1575 if (c[0] == n) { /* null input--all zero length codes */
1576 *t = (struct huft *) NULL;
1577 *m = 0;
1578 return 0;
1579 }
Eric Andersenb052b471999-11-18 00:21:59 +00001580
1581
Erik Andersene49d5ec2000-02-08 19:58:47 +00001582 /* Find minimum and maximum length, bound *m by those */
1583 l = *m;
1584 for (j = 1; j <= BMAX; j++)
1585 if (c[j])
1586 break;
1587 k = j; /* minimum code length */
1588 if ((unsigned) l < j)
1589 l = j;
1590 for (i = BMAX; i; i--)
1591 if (c[i])
1592 break;
1593 g = i; /* maximum code length */
1594 if ((unsigned) l > i)
1595 l = i;
1596 *m = l;
Eric Andersenb052b471999-11-18 00:21:59 +00001597
1598
Erik Andersene49d5ec2000-02-08 19:58:47 +00001599 /* Adjust last length count to fill out codes, if needed */
1600 for (y = 1 << j; j < i; j++, y <<= 1)
1601 if ((y -= c[j]) < 0)
1602 return 2; /* bad input: more codes than bits */
1603 if ((y -= c[i]) < 0)
1604 return 2;
1605 c[i] += y;
Eric Andersenb052b471999-11-18 00:21:59 +00001606
1607
Erik Andersene49d5ec2000-02-08 19:58:47 +00001608 /* Generate starting offsets into the value table for each length */
1609 x[1] = j = 0;
1610 p = c + 1;
1611 xp = x + 2;
1612 while (--i) { /* note that i == g from above */
1613 *xp++ = (j += *p++);
1614 }
Eric Andersenb052b471999-11-18 00:21:59 +00001615
1616
Erik Andersene49d5ec2000-02-08 19:58:47 +00001617 /* Make a table of values in order of bit lengths */
1618 p = b;
1619 i = 0;
1620 do {
1621 if ((j = *p++) != 0)
1622 v[x[j]++] = i;
1623 } while (++i < n);
Eric Andersenb052b471999-11-18 00:21:59 +00001624
1625
Erik Andersene49d5ec2000-02-08 19:58:47 +00001626 /* Generate the Huffman codes and for each, make the table entries */
1627 x[0] = i = 0; /* first Huffman code is zero */
1628 p = v; /* grab values in bit order */
1629 h = -1; /* no tables yet--level -1 */
1630 w = -l; /* bits decoded == (l * h) */
1631 u[0] = (struct huft *) NULL; /* just to keep compilers happy */
1632 q = (struct huft *) NULL; /* ditto */
1633 z = 0; /* ditto */
Eric Andersenb052b471999-11-18 00:21:59 +00001634
Erik Andersene49d5ec2000-02-08 19:58:47 +00001635 /* go through the bit lengths (k already is bits in shortest code) */
1636 for (; k <= g; k++) {
1637 a = c[k];
1638 while (a--) {
1639 /* here i is the Huffman code of length k bits for value *p */
1640 /* make tables up to required level */
1641 while (k > w + l) {
1642 h++;
1643 w += l; /* previous table always l bits */
Eric Andersenb052b471999-11-18 00:21:59 +00001644
Erik Andersene49d5ec2000-02-08 19:58:47 +00001645 /* compute minimum size table less than or equal to l bits */
1646 z = (z = g - w) > (unsigned) l ? l : z; /* upper limit on table size */
1647 if ((f = 1 << (j = k - w)) > a + 1) { /* try a k-w bit table *//* too few codes for k-w bit table */
1648 f -= a + 1; /* deduct codes from patterns left */
1649 xp = c + k;
1650 while (++j < z) { /* try smaller tables up to z bits */
1651 if ((f <<= 1) <= *++xp)
1652 break; /* enough codes to use up j bits */
1653 f -= *xp; /* else deduct codes from patterns */
1654 }
1655 }
1656 z = 1 << j; /* table entries for j-bit table */
Eric Andersenb052b471999-11-18 00:21:59 +00001657
Erik Andersene49d5ec2000-02-08 19:58:47 +00001658 /* allocate and link in new table */
1659 if (
1660 (q =
1661 (struct huft *) malloc((z + 1) *
1662 sizeof(struct huft))) ==
1663 (struct huft *) NULL) {
1664 if (h)
1665 huft_free(u[0]);
1666 return 3; /* not enough memory */
1667 }
1668 hufts += z + 1; /* track memory usage */
1669 *t = q + 1; /* link to list for huft_free() */
1670 *(t = &(q->v.t)) = (struct huft *) NULL;
1671 u[h] = ++q; /* table starts after link */
Eric Andersenb052b471999-11-18 00:21:59 +00001672
Erik Andersene49d5ec2000-02-08 19:58:47 +00001673 /* connect to last table, if there is one */
1674 if (h) {
1675 x[h] = i; /* save pattern for backing up */
1676 r.b = (uch) l; /* bits to dump before this table */
1677 r.e = (uch) (16 + j); /* bits in this table */
1678 r.v.t = q; /* pointer to this table */
1679 j = i >> (w - l); /* (get around Turbo C bug) */
1680 u[h - 1][j] = r; /* connect to last table */
1681 }
1682 }
Eric Andersenb052b471999-11-18 00:21:59 +00001683
Erik Andersene49d5ec2000-02-08 19:58:47 +00001684 /* set up table entry in r */
1685 r.b = (uch) (k - w);
1686 if (p >= v + n)
1687 r.e = 99; /* out of values--invalid code */
1688 else if (*p < s) {
1689 r.e = (uch) (*p < 256 ? 16 : 15); /* 256 is end-of-block code */
1690 r.v.n = (ush) (*p); /* simple code is just the value */
1691 p++; /* one compiler does not like *p++ */
1692 } else {
1693 r.e = (uch) e[*p - s]; /* non-simple--look up in lists */
1694 r.v.n = d[*p++ - s];
1695 }
Eric Andersenb052b471999-11-18 00:21:59 +00001696
Erik Andersene49d5ec2000-02-08 19:58:47 +00001697 /* fill code-like entries with r */
1698 f = 1 << (k - w);
1699 for (j = i >> w; j < z; j += f)
1700 q[j] = r;
Eric Andersenb052b471999-11-18 00:21:59 +00001701
Erik Andersene49d5ec2000-02-08 19:58:47 +00001702 /* backwards increment the k-bit code i */
1703 for (j = 1 << (k - 1); i & j; j >>= 1)
1704 i ^= j;
1705 i ^= j;
Eric Andersenb052b471999-11-18 00:21:59 +00001706
Erik Andersene49d5ec2000-02-08 19:58:47 +00001707 /* backup over finished tables */
1708 while ((i & ((1 << w) - 1)) != x[h]) {
1709 h--; /* don't need to update q */
1710 w -= l;
1711 }
1712 }
1713 }
Eric Andersenb052b471999-11-18 00:21:59 +00001714
1715
Erik Andersene49d5ec2000-02-08 19:58:47 +00001716 /* Return true (1) if we were given an incomplete table */
1717 return y != 0 && g != 1;
Eric Andersenb052b471999-11-18 00:21:59 +00001718}
1719
1720
1721
1722int huft_free(t)
Erik Andersene49d5ec2000-02-08 19:58:47 +00001723struct huft *t; /* table to free */
1724
Eric Andersenb052b471999-11-18 00:21:59 +00001725/* Free the malloc'ed tables built by huft_build(), which makes a linked
1726 list of the tables it made, with the links in a dummy first entry of
1727 each table. */
1728{
Erik Andersene49d5ec2000-02-08 19:58:47 +00001729 register struct huft *p, *q;
Eric Andersenb052b471999-11-18 00:21:59 +00001730
1731
Erik Andersene49d5ec2000-02-08 19:58:47 +00001732 /* Go through linked list, freeing from the malloced (t[-1]) address. */
1733 p = t;
1734 while (p != (struct huft *) NULL) {
1735 q = (--p)->v.t;
1736 free((char *) p);
1737 p = q;
1738 }
1739 return 0;
Eric Andersenb052b471999-11-18 00:21:59 +00001740}
1741
1742
1743int inflate_codes(tl, td, bl, bd)
Erik Andersene49d5ec2000-02-08 19:58:47 +00001744struct huft *tl, *td; /* literal/length and distance decoder tables */
1745int bl, bd; /* number of bits decoded by tl[] and td[] */
1746
Eric Andersenb052b471999-11-18 00:21:59 +00001747/* inflate (decompress) the codes in a deflated (compressed) block.
1748 Return an error code or zero if it all goes ok. */
1749{
Erik Andersene49d5ec2000-02-08 19:58:47 +00001750 register unsigned e; /* table entry flag/number of extra bits */
1751 unsigned n, d; /* length and index for copy */
1752 unsigned w; /* current window position */
1753 struct huft *t; /* pointer to table entry */
1754 unsigned ml, md; /* masks for bl and bd bits */
1755 register ulg b; /* bit buffer */
1756 register unsigned k; /* number of bits in bit buffer */
Eric Andersenb052b471999-11-18 00:21:59 +00001757
1758
Erik Andersene49d5ec2000-02-08 19:58:47 +00001759 /* make local copies of globals */
1760 b = bb; /* initialize bit buffer */
1761 k = bk;
1762 w = wp; /* initialize window position */
Eric Andersenb052b471999-11-18 00:21:59 +00001763
Erik Andersene49d5ec2000-02-08 19:58:47 +00001764 /* inflate the coded data */
1765 ml = mask_bits[bl]; /* precompute masks for speed */
1766 md = mask_bits[bd];
1767 for (;;) { /* do until end of block */
1768 NEEDBITS((unsigned) bl)
1769 if ((e = (t = tl + ((unsigned) b & ml))->e) > 16)
1770 do {
1771 if (e == 99)
1772 return 1;
1773 DUMPBITS(t->b)
1774 e -= 16;
1775 NEEDBITS(e)
1776 } while ((e = (t = t->v.t + ((unsigned) b & mask_bits[e]))->e)
1777 > 16);
1778 DUMPBITS(t->b)
1779 if (e == 16) { /* then it's a literal */
1780 slide[w++] = (uch) t->v.n;
1781 Tracevv((stderr, "%c", slide[w - 1]));
1782 if (w == WSIZE) {
1783 flush_output(w);
1784 w = 0;
1785 }
1786 } else { /* it's an EOB or a length */
Eric Andersenb052b471999-11-18 00:21:59 +00001787
Erik Andersene49d5ec2000-02-08 19:58:47 +00001788 /* exit if end of block */
1789 if (e == 15)
1790 break;
Eric Andersenb052b471999-11-18 00:21:59 +00001791
Erik Andersene49d5ec2000-02-08 19:58:47 +00001792 /* get length of block to copy */
1793 NEEDBITS(e)
1794 n = t->v.n + ((unsigned) b & mask_bits[e]);
1795 DUMPBITS(e);
Eric Andersenb052b471999-11-18 00:21:59 +00001796
Erik Andersene49d5ec2000-02-08 19:58:47 +00001797 /* decode distance of block to copy */
1798 NEEDBITS((unsigned) bd)
1799 if ((e = (t = td + ((unsigned) b & md))->e) > 16)
1800 do {
1801 if (e == 99)
1802 return 1;
1803 DUMPBITS(t->b)
1804 e -= 16;
1805 NEEDBITS(e)
1806 }
1807 while (
1808 (e =
1809 (t =
1810 t->v.t + ((unsigned) b & mask_bits[e]))->e) >
1811 16);
1812 DUMPBITS(t->b)
1813 NEEDBITS(e)
1814 d = w - t->v.n - ((unsigned) b & mask_bits[e]);
1815 DUMPBITS(e)
1816 Tracevv((stderr, "\\[%d,%d]", w - d, n));
1817
1818 /* do the copy */
1819 do {
1820 n -= (e =
1821 (e =
1822 WSIZE - ((d &= WSIZE - 1) > w ? d : w)) >
1823 n ? n : e);
Eric Andersenb052b471999-11-18 00:21:59 +00001824#if !defined(NOMEMCPY) && !defined(DEBUG)
Erik Andersene49d5ec2000-02-08 19:58:47 +00001825 if (w - d >= e) { /* (this test assumes unsigned comparison) */
1826 memcpy(slide + w, slide + d, e);
1827 w += e;
1828 d += e;
1829 } else /* do it slow to avoid memcpy() overlap */
1830#endif /* !NOMEMCPY */
1831 do {
1832 slide[w++] = slide[d++];
1833 Tracevv((stderr, "%c", slide[w - 1]));
1834 } while (--e);
1835 if (w == WSIZE) {
1836 flush_output(w);
1837 w = 0;
1838 }
1839 } while (n);
1840 }
1841 }
Eric Andersenb052b471999-11-18 00:21:59 +00001842
1843
Erik Andersene49d5ec2000-02-08 19:58:47 +00001844 /* restore the globals from the locals */
1845 wp = w; /* restore global window pointer */
1846 bb = b; /* restore global bit buffer */
1847 bk = k;
Eric Andersenb052b471999-11-18 00:21:59 +00001848
Erik Andersene49d5ec2000-02-08 19:58:47 +00001849 /* done */
1850 return 0;
Eric Andersenb052b471999-11-18 00:21:59 +00001851}
1852
1853
1854
1855int inflate_stored()
1856/* "decompress" an inflated type 0 (stored) block. */
1857{
Erik Andersene49d5ec2000-02-08 19:58:47 +00001858 unsigned n; /* number of bytes in block */
1859 unsigned w; /* current window position */
1860 register ulg b; /* bit buffer */
1861 register unsigned k; /* number of bits in bit buffer */
Eric Andersenb052b471999-11-18 00:21:59 +00001862
1863
Erik Andersene49d5ec2000-02-08 19:58:47 +00001864 /* make local copies of globals */
1865 b = bb; /* initialize bit buffer */
1866 k = bk;
1867 w = wp; /* initialize window position */
Eric Andersenb052b471999-11-18 00:21:59 +00001868
1869
Erik Andersene49d5ec2000-02-08 19:58:47 +00001870 /* go to byte boundary */
1871 n = k & 7;
1872 DUMPBITS(n);
Eric Andersenb052b471999-11-18 00:21:59 +00001873
1874
Erik Andersene49d5ec2000-02-08 19:58:47 +00001875 /* get the length and its complement */
1876 NEEDBITS(16)
1877 n = ((unsigned) b & 0xffff);
1878 DUMPBITS(16)
1879 NEEDBITS(16)
1880 if (n != (unsigned) ((~b) & 0xffff))
1881 return 1; /* error in compressed data */
1882 DUMPBITS(16)
Eric Andersenb052b471999-11-18 00:21:59 +00001883
1884
Erik Andersene49d5ec2000-02-08 19:58:47 +00001885 /* read and output the compressed data */
1886 while (n--) {
1887 NEEDBITS(8)
1888 slide[w++] = (uch) b;
1889 if (w == WSIZE) {
1890 flush_output(w);
1891 w = 0;
1892 }
1893 DUMPBITS(8)
1894 }
Eric Andersenb052b471999-11-18 00:21:59 +00001895
1896
Erik Andersene49d5ec2000-02-08 19:58:47 +00001897 /* restore the globals from the locals */
1898 wp = w; /* restore global window pointer */
1899 bb = b; /* restore global bit buffer */
1900 bk = k;
1901 return 0;
Eric Andersenb052b471999-11-18 00:21:59 +00001902}
1903
1904
1905
1906int inflate_fixed()
1907/* decompress an inflated type 1 (fixed Huffman codes) block. We should
1908 either replace this with a custom decoder, or at least precompute the
1909 Huffman tables. */
1910{
Erik Andersene49d5ec2000-02-08 19:58:47 +00001911 int i; /* temporary variable */
1912 struct huft *tl; /* literal/length code table */
1913 struct huft *td; /* distance code table */
1914 int bl; /* lookup bits for tl */
1915 int bd; /* lookup bits for td */
1916 unsigned l[288]; /* length list for huft_build */
Eric Andersenb052b471999-11-18 00:21:59 +00001917
1918
Erik Andersene49d5ec2000-02-08 19:58:47 +00001919 /* set up literal table */
1920 for (i = 0; i < 144; i++)
1921 l[i] = 8;
1922 for (; i < 256; i++)
1923 l[i] = 9;
1924 for (; i < 280; i++)
1925 l[i] = 7;
1926 for (; i < 288; i++) /* make a complete, but wrong code set */
1927 l[i] = 8;
1928 bl = 7;
1929 if ((i = huft_build(l, 288, 257, cplens, cplext, &tl, &bl)) != 0)
1930 return i;
Eric Andersenb052b471999-11-18 00:21:59 +00001931
1932
Erik Andersene49d5ec2000-02-08 19:58:47 +00001933 /* set up distance table */
1934 for (i = 0; i < 30; i++) /* make an incomplete code set */
1935 l[i] = 5;
1936 bd = 5;
1937 if ((i = huft_build(l, 30, 0, cpdist, cpdext, &td, &bd)) > 1) {
1938 huft_free(tl);
1939 return i;
1940 }
Eric Andersenb052b471999-11-18 00:21:59 +00001941
1942
Erik Andersene49d5ec2000-02-08 19:58:47 +00001943 /* decompress until an end-of-block code */
1944 if (inflate_codes(tl, td, bl, bd))
1945 return 1;
Eric Andersenb052b471999-11-18 00:21:59 +00001946
1947
Erik Andersene49d5ec2000-02-08 19:58:47 +00001948 /* free the decoding tables, return */
1949 huft_free(tl);
1950 huft_free(td);
1951 return 0;
Eric Andersenb052b471999-11-18 00:21:59 +00001952}
1953
1954
1955
1956int inflate_dynamic()
1957/* decompress an inflated type 2 (dynamic Huffman codes) block. */
1958{
Erik Andersene49d5ec2000-02-08 19:58:47 +00001959 int i; /* temporary variables */
1960 unsigned j;
1961 unsigned l; /* last length */
1962 unsigned m; /* mask for bit lengths table */
1963 unsigned n; /* number of lengths to get */
1964 struct huft *tl; /* literal/length code table */
1965 struct huft *td; /* distance code table */
1966 int bl; /* lookup bits for tl */
1967 int bd; /* lookup bits for td */
1968 unsigned nb; /* number of bit length codes */
1969 unsigned nl; /* number of literal/length codes */
1970 unsigned nd; /* number of distance codes */
1971
Eric Andersenb052b471999-11-18 00:21:59 +00001972#ifdef PKZIP_BUG_WORKAROUND
Erik Andersene49d5ec2000-02-08 19:58:47 +00001973 unsigned ll[288 + 32]; /* literal/length and distance code lengths */
Eric Andersenb052b471999-11-18 00:21:59 +00001974#else
Erik Andersene49d5ec2000-02-08 19:58:47 +00001975 unsigned ll[286 + 30]; /* literal/length and distance code lengths */
Eric Andersenb052b471999-11-18 00:21:59 +00001976#endif
Erik Andersene49d5ec2000-02-08 19:58:47 +00001977 register ulg b; /* bit buffer */
1978 register unsigned k; /* number of bits in bit buffer */
Eric Andersenb052b471999-11-18 00:21:59 +00001979
1980
Erik Andersene49d5ec2000-02-08 19:58:47 +00001981 /* make local bit buffer */
1982 b = bb;
1983 k = bk;
Eric Andersenb052b471999-11-18 00:21:59 +00001984
1985
Erik Andersene49d5ec2000-02-08 19:58:47 +00001986 /* read in table lengths */
1987 NEEDBITS(5)
1988 nl = 257 + ((unsigned) b & 0x1f); /* number of literal/length codes */
1989 DUMPBITS(5)
1990 NEEDBITS(5)
1991 nd = 1 + ((unsigned) b & 0x1f); /* number of distance codes */
1992 DUMPBITS(5)
1993 NEEDBITS(4)
1994 nb = 4 + ((unsigned) b & 0xf); /* number of bit length codes */
1995 DUMPBITS(4)
Eric Andersenb052b471999-11-18 00:21:59 +00001996#ifdef PKZIP_BUG_WORKAROUND
Erik Andersene49d5ec2000-02-08 19:58:47 +00001997 if (nl > 288 || nd > 32)
Eric Andersenb052b471999-11-18 00:21:59 +00001998#else
Erik Andersene49d5ec2000-02-08 19:58:47 +00001999 if (nl > 286 || nd > 30)
Eric Andersenb052b471999-11-18 00:21:59 +00002000#endif
Erik Andersene49d5ec2000-02-08 19:58:47 +00002001 return 1; /* bad lengths */
Eric Andersenb052b471999-11-18 00:21:59 +00002002
2003
Erik Andersene49d5ec2000-02-08 19:58:47 +00002004 /* read in bit-length-code lengths */
2005 for (j = 0; j < nb; j++) {
2006 NEEDBITS(3)
2007 ll[border[j]] = (unsigned) b & 7;
2008 DUMPBITS(3)
2009 }
2010 for (; j < 19; j++)
2011 ll[border[j]] = 0;
Eric Andersenb052b471999-11-18 00:21:59 +00002012
2013
Erik Andersene49d5ec2000-02-08 19:58:47 +00002014 /* build decoding table for trees--single level, 7 bit lookup */
2015 bl = 7;
2016 if ((i = huft_build(ll, 19, 19, NULL, NULL, &tl, &bl)) != 0) {
2017 if (i == 1)
2018 huft_free(tl);
2019 return i; /* incomplete code set */
2020 }
Eric Andersenb052b471999-11-18 00:21:59 +00002021
2022
Erik Andersene49d5ec2000-02-08 19:58:47 +00002023 /* read in literal and distance code lengths */
2024 n = nl + nd;
2025 m = mask_bits[bl];
2026 i = l = 0;
2027 while ((unsigned) i < n) {
2028 NEEDBITS((unsigned) bl)
2029 j = (td = tl + ((unsigned) b & m))->b;
2030 DUMPBITS(j)
2031 j = td->v.n;
2032 if (j < 16) /* length of code in bits (0..15) */
2033 ll[i++] = l = j; /* save last length in l */
2034 else if (j == 16) { /* repeat last length 3 to 6 times */
2035 NEEDBITS(2)
2036 j = 3 + ((unsigned) b & 3);
2037 DUMPBITS(2)
2038 if ((unsigned) i + j > n)
2039 return 1;
2040 while (j--)
2041 ll[i++] = l;
2042 } else if (j == 17) { /* 3 to 10 zero length codes */
2043 NEEDBITS(3)
2044 j = 3 + ((unsigned) b & 7);
2045 DUMPBITS(3)
2046 if ((unsigned) i + j > n)
2047 return 1;
2048 while (j--)
2049 ll[i++] = 0;
2050 l = 0;
2051 } else { /* j == 18: 11 to 138 zero length codes */
2052
2053 NEEDBITS(7)
2054 j = 11 + ((unsigned) b & 0x7f);
2055 DUMPBITS(7)
2056 if ((unsigned) i + j > n)
2057 return 1;
2058 while (j--)
2059 ll[i++] = 0;
2060 l = 0;
2061 }
2062 }
Eric Andersenb052b471999-11-18 00:21:59 +00002063
2064
Erik Andersene49d5ec2000-02-08 19:58:47 +00002065 /* free decoding table for trees */
2066 huft_free(tl);
Eric Andersenb052b471999-11-18 00:21:59 +00002067
2068
Erik Andersene49d5ec2000-02-08 19:58:47 +00002069 /* restore the global bit buffer */
2070 bb = b;
2071 bk = k;
Eric Andersenb052b471999-11-18 00:21:59 +00002072
2073
Erik Andersene49d5ec2000-02-08 19:58:47 +00002074 /* build the decoding tables for literal/length and distance codes */
2075 bl = lbits;
2076 if ((i = huft_build(ll, nl, 257, cplens, cplext, &tl, &bl)) != 0) {
2077 if (i == 1) {
2078 fprintf(stderr, " incomplete literal tree\n");
2079 huft_free(tl);
2080 }
2081 return i; /* incomplete code set */
2082 }
2083 bd = dbits;
2084 if ((i = huft_build(ll + nl, nd, 0, cpdist, cpdext, &td, &bd)) != 0) {
2085 if (i == 1) {
2086 fprintf(stderr, " incomplete distance tree\n");
Eric Andersenb052b471999-11-18 00:21:59 +00002087#ifdef PKZIP_BUG_WORKAROUND
Erik Andersene49d5ec2000-02-08 19:58:47 +00002088 i = 0;
2089 }
Eric Andersenb052b471999-11-18 00:21:59 +00002090#else
Erik Andersene49d5ec2000-02-08 19:58:47 +00002091 huft_free(td);
2092 }
2093 huft_free(tl);
2094 return i; /* incomplete code set */
Eric Andersenb052b471999-11-18 00:21:59 +00002095#endif
Erik Andersene49d5ec2000-02-08 19:58:47 +00002096 }
Eric Andersenb052b471999-11-18 00:21:59 +00002097
2098
Erik Andersene49d5ec2000-02-08 19:58:47 +00002099 /* decompress until an end-of-block code */
2100 if (inflate_codes(tl, td, bl, bd))
2101 return 1;
Eric Andersenb052b471999-11-18 00:21:59 +00002102
2103
Erik Andersene49d5ec2000-02-08 19:58:47 +00002104 /* free the decoding tables, return */
2105 huft_free(tl);
2106 huft_free(td);
2107 return 0;
Eric Andersenb052b471999-11-18 00:21:59 +00002108}
2109
2110
2111
2112int inflate_block(e)
Erik Andersene49d5ec2000-02-08 19:58:47 +00002113int *e; /* last block flag */
2114
Eric Andersenb052b471999-11-18 00:21:59 +00002115/* decompress an inflated block */
2116{
Erik Andersene49d5ec2000-02-08 19:58:47 +00002117 unsigned t; /* block type */
2118 register ulg b; /* bit buffer */
2119 register unsigned k; /* number of bits in bit buffer */
Eric Andersenb052b471999-11-18 00:21:59 +00002120
2121
Erik Andersene49d5ec2000-02-08 19:58:47 +00002122 /* make local bit buffer */
2123 b = bb;
2124 k = bk;
Eric Andersenb052b471999-11-18 00:21:59 +00002125
2126
Erik Andersene49d5ec2000-02-08 19:58:47 +00002127 /* read in last block bit */
2128 NEEDBITS(1)
2129 * e = (int) b & 1;
2130 DUMPBITS(1)
Eric Andersenb052b471999-11-18 00:21:59 +00002131
2132
Erik Andersene49d5ec2000-02-08 19:58:47 +00002133 /* read in block type */
2134 NEEDBITS(2)
2135 t = (unsigned) b & 3;
2136 DUMPBITS(2)
Eric Andersenb052b471999-11-18 00:21:59 +00002137
2138
Erik Andersene49d5ec2000-02-08 19:58:47 +00002139 /* restore the global bit buffer */
2140 bb = b;
2141 bk = k;
Eric Andersenb052b471999-11-18 00:21:59 +00002142
2143
Erik Andersene49d5ec2000-02-08 19:58:47 +00002144 /* inflate that block type */
2145 if (t == 2)
2146 return inflate_dynamic();
2147 if (t == 0)
2148 return inflate_stored();
2149 if (t == 1)
2150 return inflate_fixed();
Eric Andersenb052b471999-11-18 00:21:59 +00002151
2152
Erik Andersene49d5ec2000-02-08 19:58:47 +00002153 /* bad block type */
2154 return 2;
Eric Andersenb052b471999-11-18 00:21:59 +00002155}
2156
2157
2158
2159int inflate()
2160/* decompress an inflated entry */
2161{
Erik Andersene49d5ec2000-02-08 19:58:47 +00002162 int e; /* last block flag */
2163 int r; /* result code */
2164 unsigned h; /* maximum struct huft's malloc'ed */
Eric Andersenb052b471999-11-18 00:21:59 +00002165
2166
Erik Andersene49d5ec2000-02-08 19:58:47 +00002167 /* initialize window, bit buffer */
2168 wp = 0;
2169 bk = 0;
2170 bb = 0;
Eric Andersenb052b471999-11-18 00:21:59 +00002171
2172
Erik Andersene49d5ec2000-02-08 19:58:47 +00002173 /* decompress until the last block */
2174 h = 0;
2175 do {
2176 hufts = 0;
2177 if ((r = inflate_block(&e)) != 0)
2178 return r;
2179 if (hufts > h)
2180 h = hufts;
2181 } while (!e);
Eric Andersenb052b471999-11-18 00:21:59 +00002182
Erik Andersene49d5ec2000-02-08 19:58:47 +00002183 /* Undo too much lookahead. The next read will be byte aligned so we
2184 * can discard unused bits in the last meaningful byte.
2185 */
2186 while (bk >= 8) {
2187 bk -= 8;
2188 inptr--;
2189 }
Eric Andersenb052b471999-11-18 00:21:59 +00002190
Erik Andersene49d5ec2000-02-08 19:58:47 +00002191 /* flush out slide */
2192 flush_output(wp);
Eric Andersenb052b471999-11-18 00:21:59 +00002193
2194
Erik Andersene49d5ec2000-02-08 19:58:47 +00002195 /* return success */
Eric Andersenb052b471999-11-18 00:21:59 +00002196#ifdef DEBUG
Erik Andersene49d5ec2000-02-08 19:58:47 +00002197 fprintf(stderr, "<%u> ", h);
2198#endif /* DEBUG */
2199 return 0;
Eric Andersenb052b471999-11-18 00:21:59 +00002200}