COMPRESS-212 use specified encoding when reading GNU long names in TarAIS
git-svn-id: https://svn.apache.org/repos/asf/commons/proper/compress/trunk@1428942 13f79535-47bb-0310-9956-ffa450edef68
diff --git a/src/changes/changes.xml b/src/changes/changes.xml
index 029a40f..a80488f 100644
--- a/src/changes/changes.xml
+++ b/src/changes/changes.xml
@@ -103,6 +103,10 @@
ChangeSetPerformer has a new perform overload that uses a
ZipFile instance as input.
</action>
+ <action type="fix" date="2013-01-04" issue="COMPRESS-212">
+ TarArchiveInputStream ignored the encoding for GNU long name
+ entries.
+ </action>
</release>
<release version="1.4.1" date="2012-05-23"
description="Release 1.4.1">
diff --git a/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveInputStream.java b/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveInputStream.java
index 4221844..b54656b 100644
--- a/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveInputStream.java
+++ b/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveInputStream.java
@@ -247,11 +247,11 @@
if (currEntry.isGNULongNameEntry()) {
// read in the name
- StringBuffer longName = new StringBuffer();
+ ByteArrayOutputStream longName = new ByteArrayOutputStream();
byte[] buf = new byte[SMALL_BUFFER_SIZE];
int length = 0;
while ((length = read(buf)) >= 0) {
- longName.append(new String(buf, 0, length)); // TODO default charset?
+ longName.write(buf, 0, length);
}
getNextEntry();
if (currEntry == null) {
@@ -259,12 +259,19 @@
// Malformed tar file - long entry name not followed by entry
return null;
}
- // remove trailing null terminator
- if (longName.length() > 0
- && longName.charAt(longName.length() - 1) == 0) {
- longName.deleteCharAt(longName.length() - 1);
+ byte[] longNameData = longName.toByteArray();
+ // remove trailing null terminator(s)
+ length = longNameData.length;
+ while (length > 0 && longNameData[length - 1] == 0) {
+ --length;
}
- currEntry.setName(longName.toString());
+ if (length != longNameData.length) {
+ byte[] l = new byte[length];
+ System.arraycopy(longNameData, 0, l, 0, length);
+ longNameData = l;
+ }
+
+ currEntry.setName(encoding.decode(longNameData));
}
if (currEntry.isPaxHeader()){ // Process Pax headers
diff --git a/src/test/java/org/apache/commons/compress/archivers/tar/TarArchiveInputStreamTest.java b/src/test/java/org/apache/commons/compress/archivers/tar/TarArchiveInputStreamTest.java
index d76360f..d3450ed 100644
--- a/src/test/java/org/apache/commons/compress/archivers/tar/TarArchiveInputStreamTest.java
+++ b/src/test/java/org/apache/commons/compress/archivers/tar/TarArchiveInputStreamTest.java
@@ -23,6 +23,7 @@
import static org.junit.Assert.fail;
import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
@@ -137,6 +138,31 @@
}
}
+ @Test
+ public void shouldUseSpecifiedEncodingWhenReadingGNULongNames()
+ throws Exception {
+ ByteArrayOutputStream bos = new ByteArrayOutputStream();
+ String encoding = CharsetNames.UTF_16;
+ String name = "1234567890123456789012345678901234567890123456789"
+ + "01234567890123456789012345678901234567890123456789"
+ + "01234567890\u00e4";
+ TarArchiveOutputStream tos =
+ new TarArchiveOutputStream(bos, encoding);
+ tos.setLongFileMode(TarArchiveOutputStream.LONGFILE_GNU);
+ TarArchiveEntry t = new TarArchiveEntry(name);
+ t.setSize(1);
+ tos.putArchiveEntry(t);
+ tos.write(30);
+ tos.closeArchiveEntry();
+ tos.close();
+ byte[] data = bos.toByteArray();
+ ByteArrayInputStream bis = new ByteArrayInputStream(data);
+ TarArchiveInputStream tis =
+ new TarArchiveInputStream(bis, encoding);
+ t = tis.getNextTarEntry();
+ assertEquals(name, t.getName());
+ }
+
private TarArchiveInputStream getTestStream(String name) {
return new TarArchiveInputStream(
TarArchiveInputStreamTest.class.getResourceAsStream(name));