Fix decoding of filenames in zip files, when read by ZipFile.
Java always writes UTF-8 filenames in zip files, but harmony cunningly
always reads them back as ISO-8859-1. Fix that, and also set the flag
that says "these filenames are UTF-8" (the RI will do this from Java 7
on).
Note that this patch doesn't actually touch ZipFile because the bug is
in the package-private ZipEntry constructor that's used by
ZipFile.readCentralDirectory.
Bug: http://code.google.com/p/android/issues/detail?id=4690
diff --git a/archive/src/main/java/java/util/zip/ZipConstants.java b/archive/src/main/java/java/util/zip/ZipConstants.java
index 9fbce06..f0b1f69 100644
--- a/archive/src/main/java/java/util/zip/ZipConstants.java
+++ b/archive/src/main/java/java/util/zip/ZipConstants.java
@@ -30,4 +30,25 @@
CENNAM = 28, CENEXT = 30, CENCOM = 32, CENDSK = 34, CENATT = 36,
CENATX = 38, CENOFF = 42, ENDSUB = 8, ENDTOT = 10, ENDSIZ = 12,
ENDOFF = 16, ENDCOM = 20;
+
+ /**
+ * General Purpose Bit Flags, Bit 3.
+ * If this bit is set, the fields crc-32, compressed
+ * size and uncompressed size are set to zero in the
+ * local header. The correct values are put in the
+ * data descriptor immediately following the compressed
+ * data. (Note: PKZIP version 2.04g for DOS only
+ * recognizes this bit for method 8 compression, newer
+ * versions of PKZIP recognize this bit for any
+ * compression method.)
+ */
+ public static final int GPBF_DATA_DESCRIPTOR_FLAG = 1 << 3; // android-added
+
+ /**
+ * General Purpose Bit Flags, Bit 11.
+ * Language encoding flag (EFS). If this bit is set,
+ * the filename and comment fields for this file
+ * must be encoded using UTF-8.
+ */
+ public static final int GPBF_UTF8_FLAG = 1 << 11; // android-added
}
diff --git a/archive/src/main/java/java/util/zip/ZipEntry.java b/archive/src/main/java/java/util/zip/ZipEntry.java
index 75466ce..d68aa2f 100644
--- a/archive/src/main/java/java/util/zip/ZipEntry.java
+++ b/archive/src/main/java/java/util/zip/ZipEntry.java
@@ -400,20 +400,16 @@
}
try {
- /*
- * The actual character set is "IBM Code Page 437". As of
- * Sep 2006, the Zip spec (APPNOTE.TXT) supports UTF-8. When
- * bit 11 of the GP flags field is set, the file name and
- * comment fields are UTF-8.
- *
- * TODO: add correct UTF-8 support.
- */
- name = new String(nameBytes, "ISO-8859-1");
+ // BEGIN android-changed
+ // The RI has always assumed UTF-8. (If GPBF_UTF8_FLAG isn't set, the encoding is
+ // actually IBM-437.)
+ name = new String(nameBytes, "UTF-8");
if (commentBytes != null) {
- comment = new String(commentBytes, "ISO-8859-1");
+ comment = new String(commentBytes, "UTF-8");
} else {
comment = null;
}
+ // END android-changed
} catch (UnsupportedEncodingException uee) {
throw new InternalError(uee.getMessage());
}
diff --git a/archive/src/main/java/java/util/zip/ZipInputStream.java b/archive/src/main/java/java/util/zip/ZipInputStream.java
index 554f5d5..ddebd6f 100644
--- a/archive/src/main/java/java/util/zip/ZipInputStream.java
+++ b/archive/src/main/java/java/util/zip/ZipInputStream.java
@@ -49,8 +49,6 @@
static final int STORED = 0;
- static final int ZIPDataDescriptorFlag = 8;
-
static final int ZIPLocalHeaderVersionNeeded = 20;
private boolean entriesEnd = false;
@@ -236,7 +234,7 @@
throw new ZipException(Messages.getString("archive.22")); //$NON-NLS-1$
}
int flags = getShort(hdrBuf, LOCFLG - LOCVER);
- hasDD = ((flags & ZIPDataDescriptorFlag) == ZIPDataDescriptorFlag);
+ hasDD = ((flags & GPBF_DATA_DESCRIPTOR_FLAG) == GPBF_DATA_DESCRIPTOR_FLAG);
int cetime = getShort(hdrBuf, LOCTIM - LOCVER);
int cemodDate = getShort(hdrBuf, LOCTIM - LOCVER + 2);
int cecompressionMethod = getShort(hdrBuf, LOCHOW - LOCVER);
diff --git a/archive/src/main/java/java/util/zip/ZipOutputStream.java b/archive/src/main/java/java/util/zip/ZipOutputStream.java
index 21b4221..f85e253 100644
--- a/archive/src/main/java/java/util/zip/ZipOutputStream.java
+++ b/archive/src/main/java/java/util/zip/ZipOutputStream.java
@@ -54,8 +54,6 @@
*/
public static final int STORED = 0;
- static final int ZIPDataDescriptorFlag = 8;
-
static final int ZIPLocalHeaderVersionNeeded = 20;
private String comment;
@@ -141,11 +139,12 @@
writeLong(out, currentEntry.size = def.getTotalIn());
}
// Update the CentralDirectory
+ // http://www.pkware.com/documents/casestudies/APPNOTE.TXT
+ int flags = currentEntry.getMethod() == STORED ? 0 : GPBF_DATA_DESCRIPTOR_FLAG;
writeLong(cDir, CENSIG);
writeShort(cDir, ZIPLocalHeaderVersionNeeded); // Version created
writeShort(cDir, ZIPLocalHeaderVersionNeeded); // Version to extract
- writeShort(cDir, currentEntry.getMethod() == STORED ? 0
- : ZIPDataDescriptorFlag);
+ writeShort(cDir, flags);
writeShort(cDir, currentEntry.getMethod());
writeShort(cDir, currentEntry.time);
writeShort(cDir, currentEntry.modDate);
@@ -283,16 +282,23 @@
if (currentEntry.getMethod() == -1) {
currentEntry.setMethod(compressMethod);
}
+ // BEGIN android-changed
+ // Local file header.
+ // http://www.pkware.com/documents/casestudies/APPNOTE.TXT
+ int flags = currentEntry.getMethod() == STORED ? 0 : GPBF_DATA_DESCRIPTOR_FLAG;
+ // Java always outputs UTF-8 filenames. (Before Java 7, the RI didn't set this flag and used
+ // modified UTF-8. From Java 7, it sets this flag and uses normal UTF-8.)
+ flags |= GPBF_UTF8_FLAG;
writeLong(out, LOCSIG); // Entry header
writeShort(out, ZIPLocalHeaderVersionNeeded); // Extraction version
- writeShort(out, currentEntry.getMethod() == STORED ? 0
- : ZIPDataDescriptorFlag);
+ writeShort(out, flags);
writeShort(out, currentEntry.getMethod());
if (currentEntry.getTime() == -1) {
currentEntry.setTime(System.currentTimeMillis());
}
writeShort(out, currentEntry.time);
writeShort(out, currentEntry.modDate);
+ // END android-changed
if (currentEntry.getMethod() == STORED) {
if (currentEntry.size == -1) {
diff --git a/luni/src/test/java/java/util/zip/ZipEntryTest.java b/luni/src/test/java/java/util/zip/ZipEntryTest.java
new file mode 100644
index 0000000..4c7cccf
--- /dev/null
+++ b/luni/src/test/java/java/util/zip/ZipEntryTest.java
@@ -0,0 +1,60 @@
+/*
+ * Copyright (C) 2010 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package java.util.zip;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileOutputStream;
+import java.util.Arrays;
+import java.util.List;
+
+import junit.framework.Test;
+import junit.framework.TestSuite;
+
+public class ZipEntryTest extends junit.framework.TestCase {
+ // http://code.google.com/p/android/issues/detail?id=4690
+ public void test_utf8FileNames() throws Exception {
+ // Create a zip file containing non-ASCII filenames.
+ File f = File.createTempFile("your", "mum");
+ List<String> filenames = Arrays.asList("us-ascii",
+ "\u043c\u0430\u0440\u0442\u0430", // russian
+ "\u1f00\u03c0\u1f78", // greek
+ "\u30b3\u30f3\u30cb\u30c1\u30cf"); // japanese
+ ZipOutputStream out = new ZipOutputStream(new FileOutputStream(f));
+ for (String filename : filenames) {
+ out.putNextEntry(new ZipEntry(filename));
+ out.closeEntry(); // Empty files are fine.
+ }
+ out.close();
+ // Read it back, and check we find all those names.
+ // This failed when we were mangling the encoding.
+ ZipFile zipFile = new ZipFile(f);
+ for (String filename : filenames) {
+ assertNotNull(filename, zipFile.getEntry(filename));
+ }
+ // Check that ZipInputStream works too.
+ ZipInputStream in = new ZipInputStream(new FileInputStream(f));
+ ZipEntry entry;
+ int entryCount = 0;
+ while ((entry = in.getNextEntry()) != null) {
+ assertTrue(entry.getName(), filenames.contains(entry.getName()));
+ ++entryCount;
+ }
+ assertEquals(filenames.size(), entryCount);
+ in.close();
+ }
+}