ZipFile: Improve performance for ZipFile.<init>
Avoid thousands of JNI transitions in the constructor. We can
quite easily check for invalid entry names and duplicates while
parsing the central directory in native code.
The benchmarks show that the median time for opening a file with 8192
entries decreased from 488ms to 120us.
AFTER:
Trial Report (1 of 3):
Experiment {instrument=runtime, benchmarkMethod=timeZipFileOpen, vm=default, parameters={numEntries=128}}
Results:
runtime(ns): min=112794.57, 1st qu.=113238.72, median=114539.28, mean=119694.66, 3rd qu.=125191.84, max=143145.02
Trial Report (2 of 3):
Experiment {instrument=runtime, benchmarkMethod=timeZipFileOpen, vm=default, parameters={numEntries=1024}}
Results:
runtime(ns): min=112868.04, 1st qu.=113582.70, median=115180.93, mean=117760.32, 3rd qu.=118067.87, max=137760.10
Trial Report (3 of 3):
Experiment {instrument=runtime, benchmarkMethod=timeZipFileOpen, vm=default, parameters={numEntries=8192}}
Results:
runtime(ns): min=112417.27, 1st qu.=114630.16, median=117272.70, mean=119197.74, 3rd qu.=122742.84, max=135330.50
BEFORE:
Trial Report (1 of 3):
Experiment {instrument=runtime, benchmarkMethod=benchmarkZipFileOpen, vm=default, parameters={numEntries=128}}
Results:
runtime(ns): min=9360144.37, 1st qu.=9559712.86, median=9839983.50, mean=9816559.27, 3rd qu.=10057399.81, max=10170918.14
Trial Report (2 of 3):
Experiment {instrument=runtime, benchmarkMethod=benchmarkZipFileOpen, vm=default, parameters={numEntries=1024}}
Results:
runtime(ns): min=60682471.64, 1st qu.=62388598.00, median=64319977.57, mean=66545494.60, 3rd qu.=72114121.64, max=77298980.57
Trial Report (3 of 3):
Experiment {instrument=runtime, benchmarkMethod=benchmarkZipFileOpen, vm=default, parameters={numEntries=8192}}
Results:
runtime(ns): min=485804479.00, 1st qu.=487322656.00, median=488986250.00, mean=489820034.67, 3rd qu.=492383229.50, max=495372604.00
bug: 27145664
Change-Id: I510e923ac54d52082b9eafa0f151ad4f71826333
diff --git a/benchmarks/src/benchmarks/BufferedZipFileBenchmark.java b/benchmarks/src/benchmarks/BufferedZipFileBenchmark.java
index 3c0122f..aecf461 100644
--- a/benchmarks/src/benchmarks/BufferedZipFileBenchmark.java
+++ b/benchmarks/src/benchmarks/BufferedZipFileBenchmark.java
@@ -35,6 +35,7 @@
@BeforeExperiment
protected void setUp() throws Exception {
+ System.setProperty("java.io.tmpdir", "/data/local/tmp");
file = File.createTempFile(getClass().getName(), ".zip");
file.deleteOnExit();
diff --git a/benchmarks/src/benchmarks/ZipFileBenchmark.java b/benchmarks/src/benchmarks/ZipFileBenchmark.java
new file mode 100644
index 0000000..78557e0
--- /dev/null
+++ b/benchmarks/src/benchmarks/ZipFileBenchmark.java
@@ -0,0 +1,80 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License
+ */
+
+package benchmarks;
+
+import com.google.caliper.BeforeExperiment;
+import com.google.caliper.Param;
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.util.Enumeration;
+import java.util.Random;
+import java.util.zip.ZipEntry;
+import java.util.zip.ZipFile;
+import java.util.zip.ZipOutputStream;
+
+
+public class ZipFileBenchmark {
+
+ private File file;
+ @Param({"128", "1024", "8192"}) int numEntries;
+
+ @BeforeExperiment
+ protected void setUp() throws Exception {
+ System.setProperty("java.io.tmpdir", "/data/local/tmp");
+ file = File.createTempFile(getClass().getName(), ".zip");
+ file.deleteOnExit();
+ writeEntries(new ZipOutputStream(new FileOutputStream(file)), numEntries, 0);
+ ZipFile zipFile = new ZipFile(file);
+ for (Enumeration<? extends ZipEntry> e = zipFile.entries(); e.hasMoreElements(); ) {
+ ZipEntry zipEntry = e.nextElement();
+ }
+ zipFile.close();
+ }
+
+ public void timeZipFileOpen(int reps) throws Exception {
+ for (int i = 0; i < reps; ++i) {
+ ZipFile zf = new ZipFile(file);
+ }
+ }
+
+ /**
+ * Compresses the given number of files, each of the given size, into a .zip archive.
+ */
+ protected void writeEntries(ZipOutputStream out, int entryCount, long entrySize)
+ throws IOException {
+ byte[] writeBuffer = new byte[8192];
+ Random random = new Random();
+ try {
+ for (int entry = 0; entry < entryCount; ++entry) {
+ ZipEntry ze = new ZipEntry(Integer.toHexString(entry));
+ ze.setSize(entrySize);
+ out.putNextEntry(ze);
+
+ for (long i = 0; i < entrySize; i += writeBuffer.length) {
+ random.nextBytes(writeBuffer);
+ int byteCount = (int) Math.min(writeBuffer.length, entrySize - i);
+ out.write(writeBuffer, 0, byteCount);
+ }
+
+ out.closeEntry();
+ }
+ } finally {
+ out.close();
+ }
+ }
+}
diff --git a/ojluni/src/main/java/java/util/zip/ZipFile.java b/ojluni/src/main/java/java/util/zip/ZipFile.java
index f9e7268..85a1cef 100755
--- a/ojluni/src/main/java/java/util/zip/ZipFile.java
+++ b/ojluni/src/main/java/java/util/zip/ZipFile.java
@@ -233,22 +233,6 @@
throw new ZipException("No entries");
}
- // Android-changed: Disallow duplicate entry names or entries that have a C
- // string terminator in them.
- Set<String> entryNames = new HashSet<String>();
- while (entries.hasMoreElements()) {
- String entryName = entries.nextElement().getName();
- if (entryNames.contains(entryName)) {
- close();
- throw new ZipException("Duplicate entry name: " + entryName);
- }
- if (entryName.indexOf('\0') != -1) {
- close();
- throw new ZipException("Null in entry name: " + entryName);
- }
- entryNames.add(entryName);
- }
-
guard.open("close");
}
diff --git a/ojluni/src/main/native/zip_util.c b/ojluni/src/main/native/zip_util.c
index 3735334..c79f738 100644
--- a/ojluni/src/main/native/zip_util.c
+++ b/ojluni/src/main/native/zip_util.c
@@ -27,6 +27,7 @@
* Support for reading ZIP/JAR files.
*/
+#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <stddef.h>
@@ -417,6 +418,20 @@
return h;
}
+/*
+ * Returns true if |s| is a valid zip entry name.
+ */
+static bool isValidEntryName(const char *s, int length)
+{
+ while (length-- > 0) {
+ if (*s++ == 0) {
+ return false;
+ }
+ }
+
+ return true;
+}
+
static unsigned int
hash_append(unsigned int hash, char c)
{
@@ -695,8 +710,13 @@
ZIP_FORMAT_ERROR("invalid CEN header (bad header size)");
}
+ const char* entryName = (const char *)cp + CENHDR;
+ if (!isValidEntryName(entryName, nlen)) {
+ ZIP_FORMAT_ERROR("invalid CEN header (invalid entry name)");
+ }
+
/* if the entry is metadata add it to our metadata names */
- if (isMetaName((char *)cp+CENHDR, nlen)) {
+ if (isMetaName(entryName, nlen)) {
if (addMetaName(zip, (char *)cp+CENHDR, nlen) != 0) {
goto Catch;
}
@@ -704,10 +724,30 @@
/* Record the CEN offset and the name hash in our hash cell. */
entries[i].cenpos = cenpos + (cp - cenbuf);
- entries[i].hash = hashN((char *)cp+CENHDR, nlen);
+ entries[i].hash = hashN(entryName, nlen);
+ entries[i].next = ZIP_ENDCHAIN;
/* Add the entry to the hash table */
hsh = entries[i].hash % tablelen;
+
+ /* First check that there are no other entries that have the same name. */
+ int chain = table[hsh];
+ while (chain != ZIP_ENDCHAIN) {
+ const jzcell* cell = &entries[chain];
+ if (cell->hash == entries[i].hash) {
+ const char* cenStart = (const char *) cenbuf + cell->cenpos - cenpos;
+ if (CENNAM(cenStart) == nlen) {
+ const char* chainName = cenStart + CENHDR;
+ if (strncmp(entryName, chainName, nlen) == 0) {
+ ZIP_FORMAT_ERROR("invalid CEN header (duplicate entry)");
+ }
+ }
+ }
+
+ chain = cell->next;
+ }
+
+
entries[i].next = table[hsh];
table[hsh] = i;
}