Blame - src/main/java/org/apache/commons/compress/archivers/zip/ZipFile.java - platform/external/apache-commons-compress

blob: d29e4e7f27e08203eade8941dfdf8774cb46ed42 [file] [log] [blame]

Torsten Curdt	e190c40	2009-01-12 11:29:53 +0000	[diff] [blame]	1	/*
				2	* Licensed to the Apache Software Foundation (ASF) under one or more
				3	* contributor license agreements. See the NOTICE file distributed with
				4	* this work for additional information regarding copyright ownership.
				5	* The ASF licenses this file to You under the Apache License, Version 2.0
				6	* (the "License"); you may not use this file except in compliance with
				7	* the License. You may obtain a copy of the License at
				8	*
				9	* http://www.apache.org/licenses/LICENSE-2.0
				10	*
				11	* Unless required by applicable law or agreed to in writing, software
				12	* distributed under the License is distributed on an "AS IS" BASIS,
				13	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
				14	* See the License for the specific language governing permissions and
				15	* limitations under the License.
				16	*
				17	*/
				18	package org.apache.commons.compress.archivers.zip;
				19
				20	import java.io.File;
				21	import java.io.IOException;
				22	import java.io.InputStream;
				23	import java.io.RandomAccessFile;
				24	import java.io.UnsupportedEncodingException;
				25	import java.util.Calendar;
Stefan Bodewig	2ae5e28	2009-02-04 08:45:31 +0000	[diff] [blame]	26	import java.util.Collections;
Torsten Curdt	e190c40	2009-01-12 11:29:53 +0000	[diff] [blame]	27	import java.util.Date;
				28	import java.util.Enumeration;
Stefan Bodewig	2ae5e28	2009-02-04 08:45:31 +0000	[diff] [blame]	29	import java.util.HashMap;
				30	import java.util.Map;
Torsten Curdt	e190c40	2009-01-12 11:29:53 +0000	[diff] [blame]	31	import java.util.zip.Inflater;
				32	import java.util.zip.InflaterInputStream;
				33	import java.util.zip.ZipException;
				34
				35	/**
				36	* Replacement for <code>java.util.ZipFile</code>.
				37	*
				38	* <p>This class adds support for file name encodings other than UTF-8
				39	* (which is required to work on ZIP files created by native zip tools
				40	* and is able to skip a preamble like the one found in self
				41	* extracting archives. Furthermore it returns instances of
				42	* <code>org.apache.tools.zip.ZipEntry</code> instead of
				43	* <code>java.util.zip.ZipEntry</code>.</p>
				44	*
				45	* <p>It doesn't extend <code>java.util.zip.ZipFile</code> as it would
				46	* have to reimplement all methods anyway. Like
				47	* <code>java.util.ZipFile</code>, it uses RandomAccessFile under the
				48	* covers and supports compressed and uncompressed entries.</p>
				49	*
				50	* <p>The method signatures mimic the ones of
				51	* <code>java.util.zip.ZipFile</code>, with a couple of exceptions:
				52	*
				53	* <ul>
				54	* <li>There is no getName method.</li>
				55	* <li>entries has been renamed to getEntries.</li>
				56	* <li>getEntries and getEntry return
				57	* <code>org.apache.tools.zip.ZipEntry</code> instances.</li>
				58	* <li>close is allowed to throw IOException.</li>
				59	* </ul>
				60	*
				61	*/
				62	public class ZipFile {
				63	private static final int HASH_SIZE = 509;
				64	private static final int SHORT = 2;
				65	private static final int WORD = 4;
				66	private static final int NIBLET_MASK = 0x0f;
				67	private static final int BYTE_SHIFT = 8;
				68	private static final int POS_0 = 0;
				69	private static final int POS_1 = 1;
				70	private static final int POS_2 = 2;
				71	private static final int POS_3 = 3;
				72
				73	/**
				74	* Maps ZipEntrys to Longs, recording the offsets of the local
				75	* file headers.
				76	*/
Stefan Bodewig	2ae5e28	2009-02-04 08:45:31 +0000	[diff] [blame]	77	private final Map entries = new HashMap(HASH_SIZE);
Torsten Curdt	e190c40	2009-01-12 11:29:53 +0000	[diff] [blame]	78
				79	/**
				80	* Maps String to ZipEntrys, name -> actual entry.
				81	*/
Stefan Bodewig	2ae5e28	2009-02-04 08:45:31 +0000	[diff] [blame]	82	private final Map nameMap = new HashMap(HASH_SIZE);
Torsten Curdt	e190c40	2009-01-12 11:29:53 +0000	[diff] [blame]	83
				84	private static final class OffsetEntry {
				85	private long headerOffset = -1;
				86	private long dataOffset = -1;
				87	}
				88
				89	/**
				90	* The encoding to use for filenames and the file comment.
				91	*
				92	* <p>For a list of possible values see <a
				93	* href="http://java.sun.com/j2se/1.5.0/docs/guide/intl/encoding.doc.html">http://java.sun.com/j2se/1.5.0/docs/guide/intl/encoding.doc.html</a>.
				94	* Defaults to the platform's default character encoding.</p>
				95	*/
				96	private String encoding = null;
				97
				98	/**
				99	* The actual data source.
				100	*/
				101	private RandomAccessFile archive;
				102
				103	/**
				104	* Opens the given file for reading, assuming the platform's
				105	* native encoding for file names.
				106	*
				107	* @param f the archive.
				108	*
				109	* @throws IOException if an error occurs while reading the file.
				110	*/
				111	public ZipFile(File f) throws IOException {
				112	this(f, null);
				113	}
				114
				115	/**
				116	* Opens the given file for reading, assuming the platform's
				117	* native encoding for file names.
				118	*
				119	* @param name name of the archive.
				120	*
				121	* @throws IOException if an error occurs while reading the file.
				122	*/
				123	public ZipFile(String name) throws IOException {
				124	this(new File(name), null);
				125	}
				126
				127	/**
				128	* Opens the given file for reading, assuming the specified
				129	* encoding for file names.
				130	*
				131	* @param name name of the archive.
				132	* @param encoding the encoding to use for file names
				133	*
				134	* @throws IOException if an error occurs while reading the file.
				135	*/
				136	public ZipFile(String name, String encoding) throws IOException {
				137	this(new File(name), encoding);
				138	}
				139
				140	/**
				141	* Opens the given file for reading, assuming the specified
				142	* encoding for file names.
				143	*
				144	* @param f the archive.
				145	* @param encoding the encoding to use for file names
				146	*
				147	* @throws IOException if an error occurs while reading the file.
				148	*/
				149	public ZipFile(File f, String encoding) throws IOException {
				150	this.encoding = encoding;
				151	archive = new RandomAccessFile(f, "r");
Stefan Bodewig	4669f29	2009-02-04 04:56:10 +0000	[diff] [blame]	152	boolean success = false;
Torsten Curdt	e190c40	2009-01-12 11:29:53 +0000	[diff] [blame]	153	try {
				154	populateFromCentralDirectory();
				155	resolveLocalFileHeaderData();
Stefan Bodewig	4669f29	2009-02-04 04:56:10 +0000	[diff] [blame]	156	success = true;
				157	} finally {
				158	if (!success) {
				159	try {
				160	archive.close();
				161	} catch (IOException e2) {
				162	// swallow, throw the original exception instead
				163	}
Torsten Curdt	e190c40	2009-01-12 11:29:53 +0000	[diff] [blame]	164	}
Torsten Curdt	e190c40	2009-01-12 11:29:53 +0000	[diff] [blame]	165	}
				166	}
				167
				168	/**
				169	* The encoding to use for filenames and the file comment.
				170	*
				171	* @return null if using the platform's default character encoding.
				172	*/
				173	public String getEncoding() {
				174	return encoding;
				175	}
				176
				177	/**
				178	* Closes the archive.
				179	* @throws IOException if an error occurs closing the archive.
				180	*/
				181	public void close() throws IOException {
				182	archive.close();
				183	}
				184
				185	/**
				186	* close a zipfile quietly; throw no io fault, do nothing
				187	* on a null parameter
				188	* @param zipfile file to close, can be null
				189	*/
				190	public static void closeQuietly(ZipFile zipfile) {
				191	if (zipfile != null) {
				192	try {
				193	zipfile.close();
				194	} catch (IOException e) {
				195	//ignore
				196	}
				197	}
				198	}
				199
				200	/**
				201	* Returns all entries.
				202	* @return all entries as {@link ZipEntry} instances
				203	*/
				204	public Enumeration getEntries() {
Stefan Bodewig	2ae5e28	2009-02-04 08:45:31 +0000	[diff] [blame]	205	return Collections.enumeration(entries.keySet());
Torsten Curdt	e190c40	2009-01-12 11:29:53 +0000	[diff] [blame]	206	}
				207
				208	/**
				209	* Returns a named entry - or <code>null</code> if no entry by
				210	* that name exists.
				211	* @param name name of the entry.
				212	* @return the ZipEntry corresponding to the given name - or
				213	* <code>null</code> if not present.
				214	*/
				215	public ZipEntry getEntry(String name) {
				216	return (ZipEntry) nameMap.get(name);
				217	}
				218
				219	/**
				220	* Returns an InputStream for reading the contents of the given entry.
				221	* @param ze the entry to get the stream for.
				222	* @return a stream to read the entry from.
				223	* @throws IOException if unable to create an input stream from the zipenty
				224	* @throws ZipException if the zipentry has an unsupported compression method
				225	*/
				226	public InputStream getInputStream(ZipEntry ze)
				227	throws IOException, ZipException {
				228	OffsetEntry offsetEntry = (OffsetEntry) entries.get(ze);
				229	if (offsetEntry == null) {
				230	return null;
				231	}
				232	long start = offsetEntry.dataOffset;
				233	BoundedInputStream bis =
				234	new BoundedInputStream(start, ze.getCompressedSize());
				235	switch (ze.getMethod()) {
				236	case ZipEntry.STORED:
				237	return bis;
				238	case ZipEntry.DEFLATED:
				239	bis.addDummy();
				240	return new InflaterInputStream(bis, new Inflater(true));
				241	default:
				242	throw new ZipException("Found unsupported compression method "
				243	+ ze.getMethod());
				244	}
				245	}
				246
				247	private static final int CFH_LEN =
				248	/* version made by */ SHORT
				249	/* version needed to extract */ + SHORT
				250	/* general purpose bit flag */ + SHORT
				251	/* compression method */ + SHORT
				252	/* last mod file time */ + SHORT
				253	/* last mod file date */ + SHORT
				254	/* crc-32 */ + WORD
				255	/* compressed size */ + WORD
				256	/* uncompressed size */ + WORD
				257	/* filename length */ + SHORT
				258	/* extra field length */ + SHORT
				259	/* file comment length */ + SHORT
				260	/* disk number start */ + SHORT
				261	/* internal file attributes */ + SHORT
				262	/* external file attributes */ + WORD
				263	/* relative offset of local header */ + WORD;
				264
				265	/**
				266	* Reads the central directory of the given archive and populates
				267	* the internal tables with ZipEntry instances.
				268	*
				269	* <p>The ZipEntrys will know all data that can be obtained from
				270	* the central directory alone, but not the data that requires the
				271	* local file header or additional data to be read.</p>
				272	*/
				273	private void populateFromCentralDirectory()
				274	throws IOException {
				275	positionAtCentralDirectory();
				276
				277	byte[] cfh = new byte[CFH_LEN];
				278
				279	byte[] signatureBytes = new byte[WORD];
				280	archive.readFully(signatureBytes);
				281	long sig = ZipLong.getValue(signatureBytes);
				282	final long cfhSig = ZipLong.getValue(ZipOutputStream.CFH_SIG);
				283	if (sig != cfhSig && startsWithLocalFileHeader()) {
				284	throw new IOException("central directory is empty, can't expand"
				285	+ " corrupt archive.");
				286	}
				287	while (sig == cfhSig) {
				288	archive.readFully(cfh);
				289	int off = 0;
				290	ZipEntry ze = new ZipEntry();
				291
				292	int versionMadeBy = ZipShort.getValue(cfh, off);
				293	off += SHORT;
				294	ze.setPlatform((versionMadeBy >> BYTE_SHIFT) & NIBLET_MASK);
				295
				296	off += WORD; // skip version info and general purpose byte
				297
				298	ze.setMethod(ZipShort.getValue(cfh, off));
				299	off += SHORT;
				300
				301	// FIXME this is actually not very cpu cycles friendly as we are converting from
				302	// dos to java while the underlying Sun implementation will convert
				303	// from java to dos time for internal storage...
				304	long time = dosToJavaTime(ZipLong.getValue(cfh, off));
				305	ze.setTime(time);
				306	off += WORD;
				307
				308	ze.setCrc(ZipLong.getValue(cfh, off));
				309	off += WORD;
				310
				311	ze.setCompressedSize(ZipLong.getValue(cfh, off));
				312	off += WORD;
				313
				314	ze.setSize(ZipLong.getValue(cfh, off));
				315	off += WORD;
				316
				317	int fileNameLen = ZipShort.getValue(cfh, off);
				318	off += SHORT;
				319
				320	int extraLen = ZipShort.getValue(cfh, off);
				321	off += SHORT;
				322
				323	int commentLen = ZipShort.getValue(cfh, off);
				324	off += SHORT;
				325
				326	off += SHORT; // disk number
				327
				328	ze.setInternalAttributes(ZipShort.getValue(cfh, off));
				329	off += SHORT;
				330
				331	ze.setExternalAttributes(ZipLong.getValue(cfh, off));
				332	off += WORD;
				333
				334	byte[] fileName = new byte[fileNameLen];
				335	archive.readFully(fileName);
				336	ze.setName(getString(fileName));
				337
				338
				339	// LFH offset,
				340	OffsetEntry offset = new OffsetEntry();
				341	offset.headerOffset = ZipLong.getValue(cfh, off);
				342	// data offset will be filled later
				343	entries.put(ze, offset);
				344
				345	nameMap.put(ze.getName(), ze);
				346
Stefan Bodewig	5e5804c	2009-02-05 12:45:23 +0000	[diff] [blame^]	347	int lenToSkip = extraLen;
				348	while (lenToSkip > 0) {
				349	int skipped = archive.skipBytes(lenToSkip);
				350	if (skipped <= 0) {
				351	throw new RuntimeException("failed to skip extra data in"
				352	+ " central directory");
				353	}
				354	lenToSkip -= skipped;
				355	}
Torsten Curdt	e190c40	2009-01-12 11:29:53 +0000	[diff] [blame]	356
				357	byte[] comment = new byte[commentLen];
				358	archive.readFully(comment);
				359	ze.setComment(getString(comment));
				360
				361	archive.readFully(signatureBytes);
				362	sig = ZipLong.getValue(signatureBytes);
				363	}
				364	}
				365
				366	private static final int MIN_EOCD_SIZE =
				367	/* end of central dir signature */ WORD
				368	/* number of this disk */ + SHORT
				369	/* number of the disk with the */
				370	/* start of the central directory */ + SHORT
				371	/* total number of entries in */
				372	/* the central dir on this disk */ + SHORT
				373	/* total number of entries in */
				374	/* the central dir */ + SHORT
				375	/* size of the central directory */ + WORD
				376	/* offset of start of central */
				377	/* directory with respect to */
				378	/* the starting disk number */ + WORD
				379	/* zipfile comment length */ + SHORT;
				380
				381	private static final int MAX_EOCD_SIZE = MIN_EOCD_SIZE
				382	/* maximum length of zipfile comment */ + 0xFFFF;
				383
				384	private static final int CFD_LOCATOR_OFFSET =
				385	/* end of central dir signature */ WORD
				386	/* number of this disk */ + SHORT
				387	/* number of the disk with the */
				388	/* start of the central directory */ + SHORT
				389	/* total number of entries in */
				390	/* the central dir on this disk */ + SHORT
				391	/* total number of entries in */
				392	/* the central dir */ + SHORT
				393	/* size of the central directory */ + WORD;
				394
				395	/**
				396	* Searches for the "End of central dir record", parses
				397	* it and positions the stream at the first central directory
				398	* record.
				399	*/
				400	private void positionAtCentralDirectory()
				401	throws IOException {
				402	boolean found = false;
				403	long off = archive.length() - MIN_EOCD_SIZE;
				404	long stopSearching = Math.max(0L, archive.length() - MAX_EOCD_SIZE);
				405	if (off >= 0) {
				406	archive.seek(off);
				407	byte[] sig = ZipOutputStream.EOCD_SIG;
				408	int curr = archive.read();
				409	while (off >= stopSearching && curr != -1) {
				410	if (curr == sig[POS_0]) {
				411	curr = archive.read();
				412	if (curr == sig[POS_1]) {
				413	curr = archive.read();
				414	if (curr == sig[POS_2]) {
				415	curr = archive.read();
				416	if (curr == sig[POS_3]) {
				417	found = true;
				418	break;
				419	}
				420	}
				421	}
				422	}
				423	archive.seek(--off);
				424	curr = archive.read();
				425	}
				426	}
				427	if (!found) {
				428	throw new ZipException("archive is not a ZIP archive");
				429	}
				430	archive.seek(off + CFD_LOCATOR_OFFSET);
				431	byte[] cfdOffset = new byte[WORD];
				432	archive.readFully(cfdOffset);
				433	archive.seek(ZipLong.getValue(cfdOffset));
				434	}
				435
				436	/**
				437	* Number of bytes in local file header up to the "length of
				438	* filename" entry.
				439	*/
				440	private static final long LFH_OFFSET_FOR_FILENAME_LENGTH =
				441	/* local file header signature */ WORD
				442	/* version needed to extract */ + SHORT
				443	/* general purpose bit flag */ + SHORT
				444	/* compression method */ + SHORT
				445	/* last mod file time */ + SHORT
				446	/* last mod file date */ + SHORT
				447	/* crc-32 */ + WORD
				448	/* compressed size */ + WORD
				449	/* uncompressed size */ + WORD;
				450
				451	/**
				452	* Walks through all recorded entries and adds the data available
				453	* from the local file header.
				454	*
				455	* <p>Also records the offsets for the data to read from the
				456	* entries.</p>
				457	*/
				458	private void resolveLocalFileHeaderData()
				459	throws IOException {
				460	Enumeration e = getEntries();
				461	while (e.hasMoreElements()) {
				462	ZipEntry ze = (ZipEntry) e.nextElement();
				463	OffsetEntry offsetEntry = (OffsetEntry) entries.get(ze);
				464	long offset = offsetEntry.headerOffset;
				465	archive.seek(offset + LFH_OFFSET_FOR_FILENAME_LENGTH);
				466	byte[] b = new byte[SHORT];
				467	archive.readFully(b);
				468	int fileNameLen = ZipShort.getValue(b);
				469	archive.readFully(b);
				470	int extraFieldLen = ZipShort.getValue(b);
Stefan Bodewig	5e5804c	2009-02-05 12:45:23 +0000	[diff] [blame^]	471	int lenToSkip = fileNameLen;
				472	while (lenToSkip > 0) {
				473	int skipped = archive.skipBytes(lenToSkip);
				474	if (skipped <= 0) {
				475	throw new RuntimeException("failed to skip file name in"
				476	+ " local file header");
				477	}
				478	lenToSkip -= skipped;
				479	}
Torsten Curdt	e190c40	2009-01-12 11:29:53 +0000	[diff] [blame]	480	byte[] localExtraData = new byte[extraFieldLen];
				481	archive.readFully(localExtraData);
				482	ze.setExtra(localExtraData);
				483	/*dataOffsets.put(ze,
				484	new Long(offset + LFH_OFFSET_FOR_FILENAME_LENGTH
				485	+ SHORT + SHORT + fileNameLen + extraFieldLen));
				486	*/
				487	offsetEntry.dataOffset = offset + LFH_OFFSET_FOR_FILENAME_LENGTH
				488	+ SHORT + SHORT + fileNameLen + extraFieldLen;
				489	}
				490	}
				491
				492	/**
				493	* Convert a DOS date/time field to a Date object.
				494	*
				495	* @param zipDosTime contains the stored DOS time.
				496	* @return a Date instance corresponding to the given time.
				497	*/
				498	protected static Date fromDosTime(ZipLong zipDosTime) {
				499	long dosTime = zipDosTime.getValue();
				500	return new Date(dosToJavaTime(dosTime));
				501	}
				502
				503	/*
				504	* Converts DOS time to Java time (number of milliseconds since epoch).
				505	*/
				506	private static long dosToJavaTime(long dosTime) {
				507	Calendar cal = Calendar.getInstance();
				508	// CheckStyle:MagicNumberCheck OFF - no point
				509	cal.set(Calendar.YEAR, (int) ((dosTime >> 25) & 0x7f) + 1980);
				510	cal.set(Calendar.MONTH, (int) ((dosTime >> 21) & 0x0f) - 1);
				511	cal.set(Calendar.DATE, (int) (dosTime >> 16) & 0x1f);
				512	cal.set(Calendar.HOUR_OF_DAY, (int) (dosTime >> 11) & 0x1f);
				513	cal.set(Calendar.MINUTE, (int) (dosTime >> 5) & 0x3f);
				514	cal.set(Calendar.SECOND, (int) (dosTime << 1) & 0x3e);
				515	// CheckStyle:MagicNumberCheck ON
				516	return cal.getTime().getTime();
				517	}
				518
				519
				520	/**
				521	* Retrieve a String from the given bytes using the encoding set
				522	* for this ZipFile.
				523	*
				524	* @param bytes the byte array to transform
				525	* @return String obtained by using the given encoding
				526	* @throws ZipException if the encoding cannot be recognized.
				527	*/
				528	protected String getString(byte[] bytes) throws ZipException {
				529	if (encoding == null) {
				530	return new String(bytes);
				531	} else {
				532	try {
				533	return new String(bytes, encoding);
				534	} catch (UnsupportedEncodingException uee) {
				535	throw new ZipException(uee.getMessage());
				536	}
				537	}
				538	}
				539
				540	/**
				541	* Checks whether the archive starts with a LFH. If it doesn't,
				542	* it may be an empty archive.
				543	*/
				544	private boolean startsWithLocalFileHeader() throws IOException {
				545	archive.seek(0);
				546	final byte[] start = new byte[WORD];
				547	archive.readFully(start);
				548	for (int i = 0; i < start.length; i++) {
				549	if (start[i] != ZipOutputStream.LFH_SIG[i]) {
				550	return false;
				551	}
				552	}
				553	return true;
				554	}
				555
				556	/**
				557	* InputStream that delegates requests to the underlying
				558	* RandomAccessFile, making sure that only bytes from a certain
				559	* range can be read.
				560	*/
				561	private class BoundedInputStream extends InputStream {
				562	private long remaining;
				563	private long loc;
				564	private boolean addDummyByte = false;
				565
				566	BoundedInputStream(long start, long remaining) {
				567	this.remaining = remaining;
				568	loc = start;
				569	}
				570
				571	public int read() throws IOException {
				572	if (remaining-- <= 0) {
				573	if (addDummyByte) {
				574	addDummyByte = false;
				575	return 0;
				576	}
				577	return -1;
				578	}
				579	synchronized (archive) {
				580	archive.seek(loc++);
				581	return archive.read();
				582	}
				583	}
				584
				585	public int read(byte[] b, int off, int len) throws IOException {
				586	if (remaining <= 0) {
				587	if (addDummyByte) {
				588	addDummyByte = false;
				589	b[off] = 0;
				590	return 1;
				591	}
				592	return -1;
				593	}
				594
				595	if (len <= 0) {
				596	return 0;
				597	}
				598
				599	if (len > remaining) {
				600	len = (int) remaining;
				601	}
				602	int ret = -1;
				603	synchronized (archive) {
				604	archive.seek(loc);
				605	ret = archive.read(b, off, len);
				606	}
				607	if (ret > 0) {
				608	loc += ret;
				609	remaining -= ret;
				610	}
				611	return ret;
				612	}
				613
				614	/**
				615	* Inflater needs an extra dummy byte for nowrap - see
				616	* Inflater's javadocs.
				617	*/
				618	void addDummy() {
				619	addDummyByte = true;
				620	}
				621	}
				622
				623	}