blob: 2d0ef4e5b23b493ff401dadcbc227546774fd861 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
package org.apache.commons.compress.archivers.zip;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.UnsupportedEncodingException;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.URL;
import java.nio.ByteBuffer;
import java.util.Enumeration;
import java.util.zip.CRC32;
import org.apache.commons.compress.AbstractTestCase;
import org.apache.commons.compress.utils.CharsetNames;
public class UTF8ZipFilesTest extends AbstractTestCase {
private static final String CP437 = "cp437";
private static final String ASCII_TXT = "ascii.txt";
private static final String EURO_FOR_DOLLAR_TXT = "\u20AC_for_Dollar.txt";
private static final String OIL_BARREL_TXT = "\u00D6lf\u00E4sser.txt";
public void testUtf8FileRoundtripExplicitUnicodeExtra()
throws IOException {
testFileRoundtrip(CharsetNames.UTF_8, true, true);
}
public void testUtf8FileRoundtripNoEFSExplicitUnicodeExtra()
throws IOException {
testFileRoundtrip(CharsetNames.UTF_8, false, true);
}
public void testCP437FileRoundtripExplicitUnicodeExtra()
throws IOException {
testFileRoundtrip(CP437, false, true);
}
public void testASCIIFileRoundtripExplicitUnicodeExtra()
throws IOException {
testFileRoundtrip(CharsetNames.US_ASCII, false, true);
}
public void testUtf8FileRoundtripImplicitUnicodeExtra()
throws IOException {
testFileRoundtrip(CharsetNames.UTF_8, true, false);
}
public void testUtf8FileRoundtripNoEFSImplicitUnicodeExtra()
throws IOException {
testFileRoundtrip(CharsetNames.UTF_8, false, false);
}
public void testCP437FileRoundtripImplicitUnicodeExtra()
throws IOException {
testFileRoundtrip(CP437, false, false);
}
public void testASCIIFileRoundtripImplicitUnicodeExtra()
throws IOException {
testFileRoundtrip(CharsetNames.US_ASCII, false, false);
}
/*
* 7-ZIP created archive, uses EFS to signal UTF-8 filenames.
*
* 7-ZIP doesn't use EFS for strings that can be encoded in CP437
* - which is true for OIL_BARREL_TXT.
*/
public void testRead7ZipArchive() throws IOException, URISyntaxException {
URL zip = getClass().getResource("/utf8-7zip-test.zip");
File archive = new File(new URI(zip.toString()));
ZipFile zf = null;
try {
zf = new ZipFile(archive, CP437, false);
assertNotNull(zf.getEntry(ASCII_TXT));
assertNotNull(zf.getEntry(EURO_FOR_DOLLAR_TXT));
assertNotNull(zf.getEntry(OIL_BARREL_TXT));
} finally {
ZipFile.closeQuietly(zf);
}
}
public void testRead7ZipArchiveForStream() throws IOException,
URISyntaxException {
URL zip = getClass().getResource("/utf8-7zip-test.zip");
FileInputStream archive =
new FileInputStream(new File(new URI(zip.toString())));
ZipArchiveInputStream zi = null;
try {
zi = new ZipArchiveInputStream(archive, CP437, false);
assertEquals(ASCII_TXT, zi.getNextEntry().getName());
assertEquals(OIL_BARREL_TXT, zi.getNextEntry().getName());
assertEquals(EURO_FOR_DOLLAR_TXT, zi.getNextEntry().getName());
} finally {
if (zi != null) {
zi.close();
}
}
}
/*
* WinZIP created archive, uses Unicode Extra Fields but only in
* the central directory.
*/
public void testReadWinZipArchive() throws IOException, URISyntaxException {
URL zip = getClass().getResource("/utf8-winzip-test.zip");
File archive = new File(new URI(zip.toString()));
ZipFile zf = null;
try {
zf = new ZipFile(archive, null, true);
assertCanRead(zf, ASCII_TXT);
assertCanRead(zf, EURO_FOR_DOLLAR_TXT);
assertCanRead(zf, OIL_BARREL_TXT);
} finally {
ZipFile.closeQuietly(zf);
}
}
private void assertCanRead(ZipFile zf, String fileName) throws IOException {
ZipArchiveEntry entry = zf.getEntry(fileName);
assertNotNull("Entry doesn't exist", entry);
InputStream is = zf.getInputStream(entry);
assertNotNull("InputStream is null", is);
try {
is.read();
} finally {
is.close();
}
}
public void testReadWinZipArchiveForStream() throws IOException,
URISyntaxException {
URL zip = getClass().getResource("/utf8-winzip-test.zip");
FileInputStream archive =
new FileInputStream(new File(new URI(zip.toString())));
ZipArchiveInputStream zi = null;
try {
zi = new ZipArchiveInputStream(archive, null, true);
assertEquals(EURO_FOR_DOLLAR_TXT, zi.getNextEntry().getName());
assertEquals(OIL_BARREL_TXT, zi.getNextEntry().getName());
assertEquals(ASCII_TXT, zi.getNextEntry().getName());
} finally {
if (zi != null) {
zi.close();
}
}
}
public void testZipFileReadsUnicodeFields() throws IOException {
File file = File.createTempFile("unicode-test", ".zip");
file.deleteOnExit();
ZipArchiveInputStream zi = null;
try {
createTestFile(file, CharsetNames.US_ASCII, false, true);
FileInputStream archive = new FileInputStream(file);
zi = new ZipArchiveInputStream(archive, CharsetNames.US_ASCII, true);
assertEquals(OIL_BARREL_TXT, zi.getNextEntry().getName());
assertEquals(EURO_FOR_DOLLAR_TXT, zi.getNextEntry().getName());
assertEquals(ASCII_TXT, zi.getNextEntry().getName());
} finally {
if (zi != null) {
zi.close();
}
tryHardToDelete(file);
}
}
public void testZipArchiveInputStreamReadsUnicodeFields()
throws IOException {
File file = File.createTempFile("unicode-test", ".zip");
file.deleteOnExit();
ZipFile zf = null;
try {
createTestFile(file, CharsetNames.US_ASCII, false, true);
zf = new ZipFile(file, CharsetNames.US_ASCII, true);
assertNotNull(zf.getEntry(ASCII_TXT));
assertNotNull(zf.getEntry(EURO_FOR_DOLLAR_TXT));
assertNotNull(zf.getEntry(OIL_BARREL_TXT));
} finally {
ZipFile.closeQuietly(zf);
tryHardToDelete(file);
}
}
public void testRawNameReadFromZipFile()
throws IOException, URISyntaxException {
URL zip = getClass().getResource("/utf8-7zip-test.zip");
File archive = new File(new URI(zip.toString()));
ZipFile zf = null;
try {
zf = new ZipFile(archive, CP437, false);
assertRawNameOfAcsiiTxt(zf.getEntry(ASCII_TXT));
} finally {
ZipFile.closeQuietly(zf);
}
}
public void testRawNameReadFromStream()
throws IOException, URISyntaxException {
URL zip = getClass().getResource("/utf8-7zip-test.zip");
FileInputStream archive =
new FileInputStream(new File(new URI(zip.toString())));
ZipArchiveInputStream zi = null;
try {
zi = new ZipArchiveInputStream(archive, CP437, false);
assertRawNameOfAcsiiTxt((ZipArchiveEntry) zi.getNextEntry());
} finally {
if (zi != null) {
zi.close();
}
}
}
private static void testFileRoundtrip(String encoding, boolean withEFS,
boolean withExplicitUnicodeExtra)
throws IOException {
File file = File.createTempFile(encoding + "-test", ".zip");
file.deleteOnExit();
try {
createTestFile(file, encoding, withEFS, withExplicitUnicodeExtra);
testFile(file, encoding);
} finally {
tryHardToDelete(file);
}
}
private static void createTestFile(File file, String encoding,
boolean withEFS,
boolean withExplicitUnicodeExtra)
throws UnsupportedEncodingException, IOException {
ZipEncoding zipEncoding = ZipEncodingHelper.getZipEncoding(encoding);
ZipArchiveOutputStream zos = null;
try {
zos = new ZipArchiveOutputStream(file);
zos.setEncoding(encoding);
zos.setUseLanguageEncodingFlag(withEFS);
zos.setCreateUnicodeExtraFields(withExplicitUnicodeExtra ?
ZipArchiveOutputStream
.UnicodeExtraFieldPolicy.NEVER
: ZipArchiveOutputStream
.UnicodeExtraFieldPolicy.ALWAYS);
ZipArchiveEntry ze = new ZipArchiveEntry(OIL_BARREL_TXT);
if (withExplicitUnicodeExtra
&& !zipEncoding.canEncode(ze.getName())) {
ByteBuffer en = zipEncoding.encode(ze.getName());
ze.addExtraField(new UnicodePathExtraField(ze.getName(),
en.array(),
en.arrayOffset(),
en.limit()));
}
zos.putArchiveEntry(ze);
zos.write("Hello, world!".getBytes(CharsetNames.US_ASCII));
zos.closeArchiveEntry();
ze = new ZipArchiveEntry(EURO_FOR_DOLLAR_TXT);
if (withExplicitUnicodeExtra
&& !zipEncoding.canEncode(ze.getName())) {
ByteBuffer en = zipEncoding.encode(ze.getName());
ze.addExtraField(new UnicodePathExtraField(ze.getName(),
en.array(),
en.arrayOffset(),
en.limit()));
}
zos.putArchiveEntry(ze);
zos.write("Give me your money!".getBytes(CharsetNames.US_ASCII));
zos.closeArchiveEntry();
ze = new ZipArchiveEntry(ASCII_TXT);
if (withExplicitUnicodeExtra
&& !zipEncoding.canEncode(ze.getName())) {
ByteBuffer en = zipEncoding.encode(ze.getName());
ze.addExtraField(new UnicodePathExtraField(ze.getName(),
en.array(),
en.arrayOffset(),
en.limit()));
}
zos.putArchiveEntry(ze);
zos.write("ascii".getBytes(CharsetNames.US_ASCII));
zos.closeArchiveEntry();
zos.finish();
} finally {
if (zos != null) {
try {
zos.close();
} catch (IOException e) { /* swallow */ }
}
}
}
private static void testFile(File file, String encoding)
throws IOException {
ZipFile zf = null;
try {
zf = new ZipFile(file, encoding, false);
Enumeration<ZipArchiveEntry> e = zf.getEntries();
while (e.hasMoreElements()) {
ZipArchiveEntry ze = e.nextElement();
if (ze.getName().endsWith("sser.txt")) {
assertUnicodeName(ze, OIL_BARREL_TXT, encoding);
} else if (ze.getName().endsWith("_for_Dollar.txt")) {
assertUnicodeName(ze, EURO_FOR_DOLLAR_TXT, encoding);
} else if (!ze.getName().equals(ASCII_TXT)) {
throw new AssertionError("Urecognized ZIP entry with name ["
+ ze.getName() + "] found.");
}
}
} finally {
ZipFile.closeQuietly(zf);
}
}
private static UnicodePathExtraField findUniCodePath(ZipArchiveEntry ze) {
return (UnicodePathExtraField)
ze.getExtraField(UnicodePathExtraField.UPATH_ID);
}
private static void assertUnicodeName(ZipArchiveEntry ze,
String expectedName,
String encoding)
throws IOException {
if (!expectedName.equals(ze.getName())) {
UnicodePathExtraField ucpf = findUniCodePath(ze);
assertNotNull(ucpf);
ZipEncoding enc = ZipEncodingHelper.getZipEncoding(encoding);
ByteBuffer ne = enc.encode(ze.getName());
CRC32 crc = new CRC32();
crc.update(ne.array(),ne.arrayOffset(),ne.limit());
assertEquals(crc.getValue(), ucpf.getNameCRC32());
assertEquals(expectedName, new String(ucpf.getUnicodeName(),
CharsetNames.UTF_8));
}
}
public void testUtf8Interoperability() throws IOException {
File file1 = super.getFile("utf8-7zip-test.zip");
File file2 = super.getFile("utf8-winzip-test.zip");
testFile(file1,CP437);
testFile(file2,CP437);
}
private static void assertRawNameOfAcsiiTxt(ZipArchiveEntry ze) {
byte[] b = ze.getRawName();
assertNotNull(b);
final int len = ASCII_TXT.length();
assertEquals(len, b.length);
for (int i = 0; i < len; i++) {
assertEquals("Byte " + i, (byte) ASCII_TXT.charAt(i), b[i]);
}
assertNotSame(b, ze.getRawName());
}
}