COMPRESS-212 use specified encoding when reading GNU long names in TarAIS

git-svn-id: https://svn.apache.org/repos/asf/commons/proper/compress/trunk@1428942 13f79535-47bb-0310-9956-ffa450edef68
diff --git a/src/changes/changes.xml b/src/changes/changes.xml
index 029a40f..a80488f 100644
--- a/src/changes/changes.xml
+++ b/src/changes/changes.xml
@@ -103,6 +103,10 @@
         ChangeSetPerformer has a new perform overload that uses a
         ZipFile instance as input.
       </action>
+      <action type="fix" date="2013-01-04" issue="COMPRESS-212">
+        TarArchiveInputStream ignored the encoding for GNU long name
+        entries.
+      </action>
     </release>
     <release version="1.4.1" date="2012-05-23"
              description="Release 1.4.1">
diff --git a/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveInputStream.java b/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveInputStream.java
index 4221844..b54656b 100644
--- a/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveInputStream.java
+++ b/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveInputStream.java
@@ -247,11 +247,11 @@
 
         if (currEntry.isGNULongNameEntry()) {
             // read in the name
-            StringBuffer longName = new StringBuffer();
+            ByteArrayOutputStream longName = new ByteArrayOutputStream();
             byte[] buf = new byte[SMALL_BUFFER_SIZE];
             int length = 0;
             while ((length = read(buf)) >= 0) {
-                longName.append(new String(buf, 0, length)); // TODO default charset?
+                longName.write(buf, 0, length);
             }
             getNextEntry();
             if (currEntry == null) {
@@ -259,12 +259,19 @@
                 // Malformed tar file - long entry name not followed by entry
                 return null;
             }
-            // remove trailing null terminator
-            if (longName.length() > 0
-                && longName.charAt(longName.length() - 1) == 0) {
-                longName.deleteCharAt(longName.length() - 1);
+            byte[] longNameData = longName.toByteArray();
+            // remove trailing null terminator(s)
+            length = longNameData.length;
+            while (length > 0 && longNameData[length - 1] == 0) {
+                --length;
             }
-            currEntry.setName(longName.toString());
+            if (length != longNameData.length) {
+                byte[] l = new byte[length];
+                System.arraycopy(longNameData, 0, l, 0, length);
+                longNameData = l;
+            }
+            
+            currEntry.setName(encoding.decode(longNameData));
         }
 
         if (currEntry.isPaxHeader()){ // Process Pax headers
diff --git a/src/test/java/org/apache/commons/compress/archivers/tar/TarArchiveInputStreamTest.java b/src/test/java/org/apache/commons/compress/archivers/tar/TarArchiveInputStreamTest.java
index d76360f..d3450ed 100644
--- a/src/test/java/org/apache/commons/compress/archivers/tar/TarArchiveInputStreamTest.java
+++ b/src/test/java/org/apache/commons/compress/archivers/tar/TarArchiveInputStreamTest.java
@@ -23,6 +23,7 @@
 import static org.junit.Assert.fail;
 
 import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
 import java.io.File;
 import java.io.FileInputStream;
 import java.io.IOException;
@@ -137,6 +138,31 @@
         }
     }
 
+    @Test
+    public void shouldUseSpecifiedEncodingWhenReadingGNULongNames()
+        throws Exception {
+        ByteArrayOutputStream bos = new ByteArrayOutputStream();
+        String encoding = CharsetNames.UTF_16;
+        String name = "1234567890123456789012345678901234567890123456789"
+            + "01234567890123456789012345678901234567890123456789"
+            + "01234567890\u00e4";
+        TarArchiveOutputStream tos =
+            new TarArchiveOutputStream(bos, encoding);
+        tos.setLongFileMode(TarArchiveOutputStream.LONGFILE_GNU);
+        TarArchiveEntry t = new TarArchiveEntry(name);
+        t.setSize(1);
+        tos.putArchiveEntry(t);
+        tos.write(30);
+        tos.closeArchiveEntry();
+        tos.close();
+        byte[] data = bos.toByteArray();
+        ByteArrayInputStream bis = new ByteArrayInputStream(data);
+        TarArchiveInputStream tis =
+            new TarArchiveInputStream(bis, encoding);
+        t = tis.getNextTarEntry();
+        assertEquals(name, t.getName());
+    }
+
     private TarArchiveInputStream getTestStream(String name) {
         return new TarArchiveInputStream(
                 TarArchiveInputStreamTest.class.getResourceAsStream(name));