optionally use PAX headers when writing non-ASCII file names. COMPRESS-183
git-svn-id: https://svn.apache.org/repos/asf/commons/proper/compress/trunk@1304709 13f79535-47bb-0310-9956-ffa450edef68
diff --git a/src/changes/changes.xml b/src/changes/changes.xml
index 5dcf8ca..86ef461 100644
--- a/src/changes/changes.xml
+++ b/src/changes/changes.xml
@@ -46,6 +46,17 @@
<body>
<release version="1.4" date="unreleased"
description="Release 1.4">
+ <action issue="COMPRESS-183" type="fix" date="2012-03-24">
+ The tar package now allows the encoding of file names to be
+ specified and can optionally use PAX extension headers to
+ write non-ASCII file names.
+ The stream classes now write (or expect to read) archives that
+ use the platform's native encoding for file names. Apache
+ Commons Compress 1.3 used to strip everything but the lower
+ eight bits of each character which effectively only worked for
+ ASCII and ISO-8859-1 file names.
+ This new default behavior is a breaking change.
+ </action>
<action issue="COMPRESS-184" type="fix" date="2012-03-23">
TarArchiveInputStream failed to parse PAX headers that
contained non-ASCII characters.
diff --git a/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveOutputStream.java b/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveOutputStream.java
index 9031f24..2c98d14 100644
--- a/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveOutputStream.java
+++ b/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveOutputStream.java
@@ -81,6 +81,10 @@
private final ZipEncoding encoding;
+ private boolean addPaxHeadersForNonAsciiNames = false;
+ private static final ZipEncoding ASCII =
+ ZipEncodingHelper.getZipEncoding("ASCII");
+
/**
* Constructor for TarInputStream.
* @param os the output stream to use
@@ -172,6 +176,13 @@
this.bigNumberMode = bigNumberMode;
}
+ /**
+ * Whether to add a PAX extension header for non-ASCII file names.
+ * @since Apache Commons Compress 1.4
+ */
+ public void setAddPaxHeadersForNonAsciiNames(boolean b) {
+ addPaxHeadersForNonAsciiNames = b;
+ }
@Deprecated
@Override
@@ -254,11 +265,14 @@
}
TarArchiveEntry entry = (TarArchiveEntry) archiveEntry;
Map<String, String> paxHeaders = new HashMap<String, String>();
- final byte[] nameBytes = encoding.encode(entry.getName()).array();
+ final String entryName = entry.getName();
+ final byte[] nameBytes = encoding.encode(entryName).array();
+ boolean paxHeaderContainsPath = false;
if (nameBytes.length >= TarConstants.NAMELEN) {
if (longFileMode == LONGFILE_POSIX) {
- paxHeaders.put("path", entry.getName());
+ paxHeaders.put("path", entryName);
+ paxHeaderContainsPath = true;
} else if (longFileMode == LONGFILE_GNU) {
// create a TarEntry for the LongLink, the contents
// of which are the entry's name
@@ -271,7 +285,7 @@
write(0); // NUL terminator
closeArchiveEntry();
} else if (longFileMode != LONGFILE_TRUNCATE) {
- throw new RuntimeException("file name '" + entry.getName()
+ throw new RuntimeException("file name '" + entryName
+ "' is too long ( > "
+ TarConstants.NAMELEN + " bytes)");
}
@@ -283,8 +297,13 @@
failForBigNumbers(entry);
}
+ if (addPaxHeadersForNonAsciiNames && !paxHeaderContainsPath
+ && !ASCII.canEncode(entryName)) {
+ paxHeaders.put("path", entryName);
+ }
+
if (paxHeaders.size() > 0) {
- writePaxHeaders(entry.getName(), paxHeaders);
+ writePaxHeaders(entryName, paxHeaders);
}
entry.writeEntryHeader(recordBuf, encoding,
@@ -298,7 +317,7 @@
} else {
currSize = entry.getSize();
}
- currName = entry.getName();
+ currName = entryName;
haveUnclosedEntry = true;
}
@@ -426,7 +445,7 @@
*/
void writePaxHeaders(String entryName,
Map<String, String> headers) throws IOException {
- String name = "./PaxHeaders.X/" + entryName;
+ String name = "./PaxHeaders.X/" + stripTo7Bits(entryName);
if (name.length() >= TarConstants.NAMELEN) {
name = name.substring(0, TarConstants.NAMELEN - 1);
}
@@ -461,6 +480,18 @@
closeArchiveEntry();
}
+ private String stripTo7Bits(String name) {
+ final int length = name.length();
+ StringBuffer result = new StringBuffer(length);
+ for (int i = 0; i < length; i++) {
+ char stripped = (char) (name.charAt(i) & 0x7F);
+ if (stripped != 0) { // would be read as Trailing null
+ result.append(stripped);
+ }
+ }
+ return result.toString();
+ }
+
/**
* Write an EOF (end of archive) record to the tar archive.
* An EOF record consists of a record of all zeros.
diff --git a/src/test/java/org/apache/commons/compress/archivers/tar/TarArchiveOutputStreamTest.java b/src/test/java/org/apache/commons/compress/archivers/tar/TarArchiveOutputStreamTest.java
index deeb5c3..92e80c3 100644
--- a/src/test/java/org/apache/commons/compress/archivers/tar/TarArchiveOutputStreamTest.java
+++ b/src/test/java/org/apache/commons/compress/archivers/tar/TarArchiveOutputStreamTest.java
@@ -274,4 +274,27 @@
}
}
+ public void testWriteNonAsciiPathNamePaxHeader() throws Exception {
+ String n = "\u00e4";
+ TarArchiveEntry t = new TarArchiveEntry(n);
+ t.setSize(10 * 1024);
+ ByteArrayOutputStream bos = new ByteArrayOutputStream();
+ TarArchiveOutputStream tos = new TarArchiveOutputStream(bos);
+ tos.setAddPaxHeadersForNonAsciiNames(true);
+ tos.putArchiveEntry(t);
+ tos.write(new byte[10 * 1024]);
+ tos.closeArchiveEntry();
+ tos.close();
+ byte[] data = bos.toByteArray();
+ assertEquals("11 path=" + n + "\n",
+ new String(data, 512, 11, "UTF-8"));
+ FileOutputStream fos = new FileOutputStream("/tmp/x");
+ fos.write(data);
+ fos.close();
+ TarArchiveInputStream tin =
+ new TarArchiveInputStream(new ByteArrayInputStream(data));
+ TarArchiveEntry e = tin.getNextTarEntry();
+ assertEquals(n, e.getName());
+ }
+
}
\ No newline at end of file