COMPRESS-113:  TarArchiveEntry.parseTarHeader() includes the trailing space/NUL when parsing the octal size

git-svn-id: https://svn.apache.org/repos/asf/commons/proper/compress/trunk@950489 13f79535-47bb-0310-9956-ffa450edef68
diff --git a/RELEASE-NOTES.txt b/RELEASE-NOTES.txt
index ef40f9f..50f8cf5 100644
--- a/RELEASE-NOTES.txt
+++ b/RELEASE-NOTES.txt
@@ -7,6 +7,11 @@
 Changes in this version include:
 
 New features:
+o COMPRESS-108:  Command-line interface to list archive contents.
+       Usage: java -jar commons-compress-n.m.jar archive-name [zip|tar|etc] 
+o COMPRESS-109:  Tar implementation does not support Pax headers
+       Added support for reading pax headers.
+       Note: does not support global pax headers 
 o COMPRESS-103:  ZipArchiveInputStream can optionally extract data that used
         the STORED compression method and a data descriptor.
         Doing so in a stream is not safe in general, so you have to
@@ -36,6 +41,7 @@
 o COMPRESS-78:  Add a BZip2Utils class modelled after GZipUtils Thanks to Jukka Zitting. 
 
 Fixed Bugs:
+o COMPRESS-113:  TarArchiveEntry.parseTarHeader() includes the trailing space/NUL when parsing the octal size 
 o COMPRESS-118:  TarUtils.parseName does not properly handle characters outside the range 0-127 
 o COMPRESS-107:  ArchiveStreamFactory does not recognise tar files created by Ant 
 o COMPRESS-110:  Support "ustar" prefix field, which is used when file paths are longer
@@ -77,12 +83,7 @@
 o COMPRESS-81:  TarOutputStream can leave garbage at the end of the archive 
 
 Changes:
-o COMPRESS-108:  Command-line interface to list archive contents.
-       Usage: java -jar commons-compress-n.m.jar archive-name [zip|tar|etc] 
 o COMPRESS-112:  ArArchiveInputStream does not handle GNU extended filename records (//) 
-o COMPRESS-109:  Tar implementation does not support Pax headers
-       Added support for reading pax headers.
-       Note: does not support global pax headers 
 o COMPRESS-105:  Document that the name of an ZipArchiveEntry determines whether
        an entry is considered a directory or not.
        If you don't use the constructor with the File argument the entry's
diff --git a/src/changes/changes.xml b/src/changes/changes.xml
index f07a803..932e603 100644
--- a/src/changes/changes.xml
+++ b/src/changes/changes.xml
@@ -45,7 +45,10 @@
   </properties>
   <body>
     <release version="1.1" date="as in SVN" description="Release 1.1">
-      <action issue="COMPRESS-108" type="update" date="2010-05-23">
+      <action issue="COMPRESS-113" type="fix" date="2010-06-02">
+       TarArchiveEntry.parseTarHeader() includes the trailing space/NUL when parsing the octal size
+      </action> 
+      <action issue="COMPRESS-108" type="add" date="2010-05-23">
        Command-line interface to list archive contents.
        Usage: java -jar commons-compress-n.m.jar archive-name [zip|tar|etc]
       </action>
@@ -55,7 +58,7 @@
       <action issue="COMPRESS-112" type="update" date="2010-05-13">
        ArArchiveInputStream does not handle GNU extended filename records (//)
       </action>
-      <action issue="COMPRESS-109" type="update" date="2010-05-10">
+      <action issue="COMPRESS-109" type="add" date="2010-05-10">
        Tar implementation does not support Pax headers
        Added support for reading pax headers.
        Note: does not support global pax headers
diff --git a/src/main/java/org/apache/commons/compress/archivers/tar/TarUtils.java b/src/main/java/org/apache/commons/compress/archivers/tar/TarUtils.java
index 8aba2e2..27b2a00 100644
--- a/src/main/java/org/apache/commons/compress/archivers/tar/TarUtils.java
+++ b/src/main/java/org/apache/commons/compress/archivers/tar/TarUtils.java
@@ -35,51 +35,86 @@
     /**
      * Parse an octal string from a buffer.
      * Leading spaces are ignored.
-     * Parsing stops when a NUL is found, or a trailing space,
-     * or the buffer length is reached.
+     * The buffer must contain a trailing space or NUL,
+     * and may contain an additional trailing space or NUL.
      *
-     * Behaviour with non-octal input is currently undefined.
-     * 
+     * The input buffer is allowed to contain all NULs,
+     * in which case the method returns 0L
+     * (this allows for missing fields).
+     *
      * @param buffer The buffer from which to parse.
      * @param offset The offset into the buffer from which to parse.
-     * @param length The maximum number of bytes to parse.
+     * @param length The maximum number of bytes to parse - must be at least 2 bytes.
      * @return The long value of the octal string.
+     * @throws IllegalArgumentException if the trailing space/NUL is missing or if a invalid byte is detected.
      */
-    public static long parseOctal(byte[] buffer, final int offset, final int length) {
+    public static long parseOctal(final byte[] buffer, final int offset, final int length) {
         long    result = 0;
-        boolean stillPadding = true;
         int     end = offset + length;
+        int     start = offset;
 
-        for (int i = offset; i < end; ++i) {
-            final byte currentByte = buffer[i];
-            if (currentByte == 0) { // Found trailing null
+        if (length < 2){
+            throw new IllegalArgumentException("Length "+length+" must be at least 2");
+        }
+
+        boolean allNUL = true;
+        for (int i = start; i < end; i++){
+            if (buffer[i] != 0){
+                allNUL = false;
                 break;
             }
+        }
+        if (allNUL) {
+            return 0L;
+        }
 
-            // Ignore leading spaces ('0' can be ignored anyway)
-            if (currentByte == (byte) ' ' || currentByte == '0') {
-                if (stillPadding) {
-                    continue;
-                }
-
-                if (currentByte == (byte) ' ') { // Found trailing space
-                    break;
-                }
+        // Skip leading spaces
+        while (start < end){
+            if (buffer[start] == ' '){
+                start++;
+            } else {
+                break;
             }
+        }
 
-            stillPadding = false;
+        // Must have trailing NUL or space
+        byte trailer;
+        trailer = buffer[end-1];
+        if (trailer == 0 || trailer == ' '){
+            end--;
+        } else {
+            throw new IllegalArgumentException(
+                    exceptionMessage(buffer, offset, length, end-1, trailer));
+        }
+        // May have additional NUL or space
+        trailer = buffer[end-1];
+        if (trailer == 0 || trailer == ' '){
+            end--;
+        }
+
+        for ( ;start < end; start++) {
+            final byte currentByte = buffer[start];
             // CheckStyle:MagicNumber OFF
             if (currentByte < '0' || currentByte > '7'){
                 throw new IllegalArgumentException(
-                        "Invalid octal digit at position "+i+" in '"+new String(buffer, offset, length)+"'");
+                        exceptionMessage(buffer, offset, length, start, currentByte));
             }
-            result = (result << 3) + (currentByte - '0');// TODO needs to reject invalid bytes
+            result = (result << 3) + (currentByte - '0'); // convert from ASCII
             // CheckStyle:MagicNumber ON
         }
 
         return result;
     }
 
+    // Helper method to generate the exception message
+    private static String exceptionMessage(byte[] buffer, final int offset,
+            final int length, int current, final byte currentByte) {
+        String string = new String(buffer, offset, length);
+        string=string.replaceAll("\0", "{NUL}"); // Replace NULs to allow string to be printed
+        final String s = "Invalid byte "+currentByte+" at offset "+(current-offset)+" in '"+string+"' len="+length;
+        return s;
+    }
+
     /**
      * Parse an entry name from a buffer.
      * Parsing stops when a NUL is found
diff --git a/src/test/java/org/apache/commons/compress/archivers/tar/TarUtilsTest.java b/src/test/java/org/apache/commons/compress/archivers/tar/TarUtilsTest.java
index f7558b1..d541341 100644
--- a/src/test/java/org/apache/commons/compress/archivers/tar/TarUtilsTest.java
+++ b/src/test/java/org/apache/commons/compress/archivers/tar/TarUtilsTest.java
@@ -50,20 +50,66 @@
         buffer[buffer.length-1]=0;
         value = TarUtils.parseOctal(buffer,0, buffer.length);
         assertEquals(MAX_OCTAL, value);
+        buffer=new byte[]{0,0};
+        value = TarUtils.parseOctal(buffer,0, buffer.length);
+        assertEquals(0, value);        
+        buffer=new byte[]{0,' '};
+        value = TarUtils.parseOctal(buffer,0, buffer.length);
+        assertEquals(0, value);        
+    }
+
+    public void testParseOctalInvalid() throws Exception{
+        byte [] buffer;
+        buffer=new byte[0]; // empty byte array
+        try {
+            TarUtils.parseOctal(buffer,0, buffer.length);
+            fail("Expected IllegalArgumentException - should be at least 2 bytes long");
+        } catch (IllegalArgumentException expected) {
+        }
+        buffer=new byte[]{0}; // 1-byte array
+        try {
+            TarUtils.parseOctal(buffer,0, buffer.length);
+            fail("Expected IllegalArgumentException - should be at least 2 bytes long");
+        } catch (IllegalArgumentException expected) {
+        }
+        buffer=new byte[]{0,0,' '}; // not all NULs
+        try {
+            TarUtils.parseOctal(buffer,0, buffer.length);
+            fail("Expected IllegalArgumentException - not all NULs");
+        } catch (IllegalArgumentException expected) {
+        }
+        buffer=new byte[]{' ',0,0,0}; // not all NULs
+        try {
+            TarUtils.parseOctal(buffer,0, buffer.length);
+            fail("Expected IllegalArgumentException - not all NULs");
+        } catch (IllegalArgumentException expected) {
+        }
         buffer = "abcdef ".getBytes("UTF-8"); // Invalid input
         try {
             TarUtils.parseOctal(buffer,0, buffer.length);
             fail("Expected IllegalArgumentException");
         } catch (IllegalArgumentException expected) {
         }
-        buffer = "77777777777".getBytes("UTF-8"); // Invalid input
+        buffer = "77777777777".getBytes("UTF-8"); // Invalid input - no trailer
         try {
             TarUtils.parseOctal(buffer,0, buffer.length);
-            fail("Expected IllegalArgumentException");
+            fail("Expected IllegalArgumentException - no trailer");
+        } catch (IllegalArgumentException expected) {
+        }
+        buffer = " 0 07 ".getBytes("UTF-8"); // Invalid - embedded space
+        try {
+            TarUtils.parseOctal(buffer,0, buffer.length);
+            fail("Expected IllegalArgumentException - embedded space");
+        } catch (IllegalArgumentException expected) {
+        }
+        buffer = " 0\00007 ".getBytes("UTF-8"); // Invalid - embedded NUL
+        try {
+            TarUtils.parseOctal(buffer,0, buffer.length);
+            fail("Expected IllegalArgumentException - embedded NUL");
         } catch (IllegalArgumentException expected) {
         }
     }
-    
+
     private void checkRoundTripOctal(final long value) {
         byte [] buffer = new byte[12];
         long parseValue;