Merge pull request #895 from facebook/fileSize_unknown

Distinguish 0-size from size-unavailable
diff --git a/programs/bench.c b/programs/bench.c
index ec99c61..efec189 100644
--- a/programs/bench.c
+++ b/programs/bench.c
@@ -551,6 +551,11 @@
             fileSizes[n] = 0;
             continue;
         }
+        if (fileSize == UTIL_FILESIZE_UNKNOWN) {
+            DISPLAYLEVEL(2, "Cannot evaluate size of %s, ignoring ... \n", fileNamesTable[n]);
+            fileSizes[n] = 0;
+            continue;
+        }
         f = fopen(fileNamesTable[n], "rb");
         if (f==NULL) EXM_THROW(10, "impossible to open file %s", fileNamesTable[n]);
         DISPLAYUPDATE(2, "Loading %s...       \r", fileNamesTable[n]);
@@ -581,11 +586,14 @@
 
     /* Load dictionary */
     if (dictFileName != NULL) {
-        U64 dictFileSize = UTIL_getFileSize(dictFileName);
-        if (dictFileSize > 64 MB) EXM_THROW(10, "dictionary file %s too large", dictFileName);
+        U64 const dictFileSize = UTIL_getFileSize(dictFileName);
+        if (dictFileSize > 64 MB)
+            EXM_THROW(10, "dictionary file %s too large", dictFileName);
         dictBufferSize = (size_t)dictFileSize;
         dictBuffer = malloc(dictBufferSize);
-        if (dictBuffer==NULL) EXM_THROW(11, "not enough memory for dictionary (%u bytes)", (U32)dictBufferSize);
+        if (dictBuffer==NULL)
+            EXM_THROW(11, "not enough memory for dictionary (%u bytes)",
+                            (U32)dictBufferSize);
         BMK_loadFiles(dictBuffer, dictBufferSize, fileSizes, &dictFileName, 1);
     }
 
diff --git a/programs/dibio.c b/programs/dibio.c
index 2cb2a42..dea3ec4 100644
--- a/programs/dibio.c
+++ b/programs/dibio.c
@@ -117,7 +117,7 @@
     for (fileIndex=0; fileIndex<nbFiles; fileIndex++) {
         const char* const fileName = fileNamesTable[fileIndex];
         unsigned long long const fs64 = UTIL_getFileSize(fileName);
-        unsigned long long remainingToLoad = fs64;
+        unsigned long long remainingToLoad = (fs64 == UTIL_FILESIZE_UNKNOWN) ? 0 : fs64;
         U32 const nbChunks = targetChunkSize ? (U32)((fs64 + (targetChunkSize-1)) / targetChunkSize) : 1;
         U64 const chunkSize = targetChunkSize ? MIN(targetChunkSize, fs64) : fs64;
         size_t const maxChunkSize = (size_t)MIN(chunkSize, SAMPLESIZE_MAX);
@@ -245,8 +245,9 @@
     memset(&fs, 0, sizeof(fs));
     for (n=0; n<nbFiles; n++) {
         U64 const fileSize = UTIL_getFileSize(fileNamesTable[n]);
-        U32 const nbSamples = (U32)(chunkSize ? (fileSize + (chunkSize-1)) / chunkSize : 1);
-        U64 const chunkToLoad = chunkSize ? MIN(chunkSize, fileSize) : fileSize;
+        U64 const srcSize = (fileSize == UTIL_FILESIZE_UNKNOWN) ? 0 : fileSize;
+        U32 const nbSamples = (U32)(chunkSize ? (srcSize + (chunkSize-1)) / chunkSize : 1);
+        U64 const chunkToLoad = chunkSize ? MIN(chunkSize, srcSize) : srcSize;
         size_t const cappedChunkSize = (size_t)MIN(chunkToLoad, SAMPLESIZE_MAX);
         fs.totalSizeToLoad += cappedChunkSize * nbSamples;
         fs.oneSampleTooLarge |= (chunkSize > 2*SAMPLESIZE_MAX);
diff --git a/programs/fileio.c b/programs/fileio.c
index 03e6538..4ddb8f8 100644
--- a/programs/fileio.c
+++ b/programs/fileio.c
@@ -25,7 +25,7 @@
 *  Includes
 ***************************************/
 #include "platform.h"   /* Large Files support, SET_BINARY_MODE */
-#include "util.h"       /* UTIL_getFileSize */
+#include "util.h"       /* UTIL_getFileSize, UTIL_isRegularFile */
 #include <stdio.h>      /* fprintf, fopen, fread, _fileno, stdin, stdout */
 #include <stdlib.h>     /* malloc, free */
 #include <string.h>     /* strcmp, strlen */
@@ -564,7 +564,7 @@
                 strm.avail_out = (uInt)ress->dstBufferSize;
             }
         }
-        if (!srcFileSize)
+        if (srcFileSize == UTIL_FILESIZE_UNKNOWN)
             DISPLAYUPDATE(2, "\rRead : %u MB ==> %.2f%%",
                             (U32)(inFileSize>>20),
                             (double)outFileSize/inFileSize*100)
@@ -651,7 +651,7 @@
                 strm.next_out = (BYTE*)ress->dstBuffer;
                 strm.avail_out = ress->dstBufferSize;
         }   }
-        if (!srcFileSize)
+        if (srcFileSize == UTIL_FILESIZE_UNKNOWN)
             DISPLAYUPDATE(2, "\rRead : %u MB ==> %.2f%%",
                             (U32)(inFileSize>>20),
                             (double)outFileSize/inFileSize*100)
@@ -697,18 +697,17 @@
     prefs.frameInfo.blockSizeID = LZ4F_max4MB;
     prefs.frameInfo.contentChecksumFlag = (contentChecksum_t)g_checksumFlag;
 #if LZ4_VERSION_NUMBER >= 10600
-    prefs.frameInfo.contentSize = srcFileSize;
+    prefs.frameInfo.contentSize = (srcFileSize==UTIL_FILESIZE_UNKNOWN) ? 0 : srcFileSize;
 #endif
 
-    {
-        size_t blockSize = FIO_LZ4_GetBlockSize_FromBlockId(LZ4F_max4MB);
+    {   size_t blockSize = FIO_LZ4_GetBlockSize_FromBlockId(LZ4F_max4MB);
         size_t readSize;
         size_t headerSize = LZ4F_compressBegin(ctx, ress->dstBuffer, ress->dstBufferSize, &prefs);
         if (LZ4F_isError(headerSize))
             EXM_THROW(33, "File header generation failed : %s",
                             LZ4F_getErrorName(headerSize));
-        { size_t const sizeCheck = fwrite(ress->dstBuffer, 1, headerSize, ress->dstFile);
-          if (sizeCheck!=headerSize) EXM_THROW(34, "Write error : cannot write header"); }
+        if (fwrite(ress->dstBuffer, 1, headerSize, ress->dstFile) != headerSize)
+            EXM_THROW(34, "Write error : cannot write header");
         outFileSize += headerSize;
 
         /* Read first block */
@@ -725,7 +724,7 @@
                 EXM_THROW(35, "zstd: %s: lz4 compression failed : %s",
                             srcFileName, LZ4F_getErrorName(outSize));
             outFileSize += outSize;
-            if (!srcFileSize)
+            if (srcFileSize == UTIL_FILESIZE_UNKNOWN)
                 DISPLAYUPDATE(2, "\rRead : %u MB ==> %.2f%%",
                                 (U32)(inFileSize>>20),
                                 (double)outFileSize/inFileSize*100)
@@ -816,12 +815,12 @@
 
     /* init */
 #ifdef ZSTD_NEWAPI
-    if (fileSize!=0)  /* when src is stdin, fileSize==0, but is effectively unknown */
+    if (fileSize!=UTIL_FILESIZE_UNKNOWN)  /* when src is stdin, fileSize==0, but is effectively unknown */
         ZSTD_CCtx_setPledgedSrcSize(ress.cctx, fileSize);
 #elif defined(ZSTD_MULTITHREAD)
-    CHECK( ZSTDMT_resetCStream(ress.cctx, fileSize ? fileSize : ZSTD_CONTENTSIZE_UNKNOWN) );
+    CHECK( ZSTDMT_resetCStream(ress.cctx, (fileSize==UTIL_FILESIZE_UNKNOWN) ? ZSTD_CONTENTSIZE_UNKNOWN : fileSize) );
 #else
-    CHECK( ZSTD_resetCStream(ress.cctx, fileSize ? fileSize : ZSTD_CONTENTSIZE_UNKNOWN) );
+    CHECK( ZSTD_resetCStream(ress.cctx, (fileSize==UTIL_FILESIZE_UNKNOWN) ? ZSTD_CONTENTSIZE_UNKNOWN : fileSize) );
 #endif
 
     /* Main compression loop */
@@ -851,18 +850,18 @@
                 compressedfilesize += outBuff.pos;
         }   }
         if (g_nbThreads > 1) {
-            if (!fileSize)
+            if (fileSize == UTIL_FILESIZE_UNKNOWN)
                 DISPLAYUPDATE(2, "\rRead : %u MB", (U32)(readsize>>20))
             else
                 DISPLAYUPDATE(2, "\rRead : %u / %u MB",
                                     (U32)(readsize>>20), (U32)(fileSize>>20));
         } else {
-            if (!fileSize)
+            if (fileSize == UTIL_FILESIZE_UNKNOWN)
                 DISPLAYUPDATE(2, "\rRead : %u MB ==> %.2f%%",
                                 (U32)(readsize>>20),
                                 (double)compressedfilesize/readsize*100)
             else
-            DISPLAYUPDATE(2, "\rRead : %u / %u MB ==> %.2f%%",
+                DISPLAYUPDATE(2, "\rRead : %u / %u MB ==> %.2f%%",
                                 (U32)(readsize>>20), (U32)(fileSize>>20),
                                 (double)compressedfilesize/readsize*100);
         }
@@ -985,7 +984,8 @@
                          const char* dictFileName, int compressionLevel, ZSTD_compressionParameters* comprParams)
 {
     clock_t const start = clock();
-    U64 const srcSize = UTIL_getFileSize(srcFileName);
+    U64 const fileSize = UTIL_getFileSize(srcFileName);
+    U64 const srcSize = (fileSize == UTIL_FILESIZE_UNKNOWN) ? ZSTD_CONTENTSIZE_UNKNOWN : fileSize;
 
     cRess_t const ress = FIO_createCResources(dictFileName, compressionLevel, srcSize, comprParams);
     int const result = FIO_compressFilename_dstFile(ress, dstFileName, srcFileName, compressionLevel);
@@ -1007,7 +1007,9 @@
     size_t dfnSize = FNSPACE;
     char*  dstFileName = (char*)malloc(FNSPACE);
     size_t const suffixSize = suffix ? strlen(suffix) : 0;
-    U64 const srcSize = (nbFiles != 1) ? ZSTD_CONTENTSIZE_UNKNOWN : UTIL_getFileSize(inFileNamesTable[0]) ;
+    U64 const firstFileSize = UTIL_getFileSize(inFileNamesTable[0]);
+    U64 const firstSrcSize = (firstFileSize == UTIL_FILESIZE_UNKNOWN) ? ZSTD_CONTENTSIZE_UNKNOWN : firstFileSize;
+    U64 const srcSize = (nbFiles != 1) ? ZSTD_CONTENTSIZE_UNKNOWN : firstSrcSize ;
     cRess_t ress = FIO_createCResources(dictFileName, compressionLevel, srcSize, comprParams);
 
     /* init */
@@ -1799,7 +1801,7 @@
  *           2 for file not compressed with zstd
  *           3 for cases in which file could not be opened.
  */
-static int getFileInfo(fileInfo_t* info, const char* inFileName){
+static int getFileInfo_fileConfirmed(fileInfo_t* info, const char* inFileName){
     int detectError = 0;
     FILE* const srcFile = FIO_openSrcFile(inFileName);
     if (srcFile == NULL) {
@@ -1815,7 +1817,8 @@
         if (numBytesRead < ZSTD_frameHeaderSize_min) {
             if ( feof(srcFile)
               && (numBytesRead == 0)
-              && (info->compressedSize > 0) ) {
+              && (info->compressedSize > 0)
+              && (info->compressedSize != UTIL_FILESIZE_UNKNOWN) ) {
                 break;
             }
             else if (feof(srcFile)) {
@@ -1928,6 +1931,17 @@
     return detectError;
 }
 
+static int getFileInfo(fileInfo_t* info, const char* srcFileName)
+{
+    int const isAFile = UTIL_isRegularFile(srcFileName);
+    if (!isAFile) {
+        DISPLAY("Error : %s is not a file", srcFileName);
+        return 3;
+    }
+    return getFileInfo_fileConfirmed(info, srcFileName);
+}
+
+
 static void displayInfo(const char* inFileName, const fileInfo_t* info, int displayLevel){
     unsigned const unit = info->compressedSize < (1 MB) ? (1 KB) : (1 MB);
     const char* const unitStr = info->compressedSize < (1 MB) ? "KB" : "MB";
diff --git a/programs/util.h b/programs/util.h
index c8be5f5..c5e4365 100644
--- a/programs/util.h
+++ b/programs/util.h
@@ -313,33 +313,40 @@
 }
 
 
+#define UTIL_FILESIZE_UNKNOWN  ((U64)(-1))
 UTIL_STATIC U64 UTIL_getFileSize(const char* infilename)
 {
-    int r;
+    if (!UTIL_isRegularFile(infilename)) return UTIL_FILESIZE_UNKNOWN;
+    {   int r;
 #if defined(_MSC_VER)
-    struct __stat64 statbuf;
-    r = _stat64(infilename, &statbuf);
-    if (r || !(statbuf.st_mode & S_IFREG)) return 0;   /* No good... */
+        struct __stat64 statbuf;
+        r = _stat64(infilename, &statbuf);
+        if (r || !(statbuf.st_mode & S_IFREG)) return UTIL_FILESIZE_UNKNOWN;
 #elif defined(__MINGW32__) && defined (__MSVCRT__)
-    struct _stati64 statbuf;
-    r = _stati64(infilename, &statbuf);
-    if (r || !(statbuf.st_mode & S_IFREG)) return 0;   /* No good... */
+        struct _stati64 statbuf;
+        r = _stati64(infilename, &statbuf);
+        if (r || !(statbuf.st_mode & S_IFREG)) return UTIL_FILESIZE_UNKNOWN;
 #else
-    struct stat statbuf;
-    r = stat(infilename, &statbuf);
-    if (r || !S_ISREG(statbuf.st_mode)) return 0;   /* No good... */
+        struct stat statbuf;
+        r = stat(infilename, &statbuf);
+        if (r || !S_ISREG(statbuf.st_mode)) return UTIL_FILESIZE_UNKNOWN;
 #endif
-    return (U64)statbuf.st_size;
+        return (U64)statbuf.st_size;
+    }
 }
 
 
 UTIL_STATIC U64 UTIL_getTotalFileSize(const char** fileNamesTable, unsigned nbFiles)
 {
     U64 total = 0;
+    int error = 0;
     unsigned n;
-    for (n=0; n<nbFiles; n++)
-        total += UTIL_getFileSize(fileNamesTable[n]);
-    return total;
+    for (n=0; n<nbFiles; n++) {
+        U64 const size = UTIL_getFileSize(fileNamesTable[n]);
+        error |= (size == UTIL_FILESIZE_UNKNOWN);
+        total += size;
+    }
+    return error ? UTIL_FILESIZE_UNKNOWN : total;
 }
 
 
diff --git a/tests/fullbench.c b/tests/fullbench.c
index 3f497b0..2ec3ce5 100644
--- a/tests/fullbench.c
+++ b/tests/fullbench.c
@@ -484,8 +484,8 @@
     /* Loop for each file */
     int fileIdx;
     for (fileIdx=0; fileIdx<nbFiles; fileIdx++) {
-        const char* inFileName = fileNamesTable[fileIdx];
-        FILE* inFile = fopen( inFileName, "rb" );
+        const char* const inFileName = fileNamesTable[fileIdx];
+        FILE* const inFile = fopen( inFileName, "rb" );
         U64   inFileSize;
         size_t benchedSize;
         void* origBuff;
@@ -495,6 +495,11 @@
 
         /* Memory allocation & restrictions */
         inFileSize = UTIL_getFileSize(inFileName);
+        if (inFileSize == UTIL_FILESIZE_UNKNOWN) {
+            DISPLAY( "Cannot measure size of %s\n", inFileName);
+            fclose(inFile);
+            return 11;
+        }
         benchedSize = BMK_findMaxMem(inFileSize*3) / 3;
         if ((U64)benchedSize > inFileSize) benchedSize = (size_t)inFileSize;
         if (benchedSize < inFileSize)
diff --git a/tests/paramgrill.c b/tests/paramgrill.c
index 317ec46..a387a44 100644
--- a/tests/paramgrill.c
+++ b/tests/paramgrill.c
@@ -687,6 +687,11 @@
             DISPLAY( "Pb opening %s\n", inFileName);
             return 11;
         }
+        if (inFileSize == UTIL_FILESIZE_UNKNOWN) {
+            DISPLAY("Pb evaluatin size of %s \n", inFileName);
+            fclose(inFile);
+            return 11;
+        }
 
         /* Memory allocation */
         benchedSize = BMK_findMaxMem(inFileSize*3) / 3;
@@ -740,6 +745,11 @@
 
     /* Init */
     if (inFile==NULL) { DISPLAY( "Pb opening %s\n", inFileName); return 11; }
+    if (inFileSize == UTIL_FILESIZE_UNKNOWN) {
+        DISPLAY("Pb evaluatin size of %s \n", inFileName);
+        fclose(inFile);
+        return 11;
+    }
 
     /* Memory allocation & restrictions */
     if ((U64)benchedSize > inFileSize) benchedSize = (size_t)inFileSize;
diff --git a/tests/playTests.sh b/tests/playTests.sh
index f008d89..299c2d8 100755
--- a/tests/playTests.sh
+++ b/tests/playTests.sh
@@ -621,7 +621,7 @@
 $ZSTD tmp5
 $ZSTD -l tmp5.zst
 ! $ZSTD -l tmp5*
-$ZSTD -lv tmp5.zst
+$ZSTD -lv tmp5.zst | grep "Decompressed Size: 0.00 KB (0 B)"  # check that 0 size is present in header
 ! $ZSTD -lv tmp5*
 
 $ECHO "\n===>  zstd --list/-l test with no content size field "
diff --git a/zlibWrapper/examples/zwrapbench.c b/zlibWrapper/examples/zwrapbench.c
index 25a23f8..9226932 100644
--- a/zlibWrapper/examples/zwrapbench.c
+++ b/zlibWrapper/examples/zwrapbench.c
@@ -684,6 +684,11 @@
             fileSizes[n] = 0;
             continue;
         }
+        if (fileSize == UTIL_FILESIZE_UNKNOWN) {
+            DISPLAYLEVEL(2, "Cannot determine size of %s ...    \n", fileNamesTable[n]);
+            fileSizes[n] = 0;
+            continue;
+        }
         f = fopen(fileNamesTable[n], "rb");
         if (f==NULL) EXM_THROW(10, "impossible to open file %s", fileNamesTable[n]);
         DISPLAYUPDATE(2, "Loading %s...       \r", fileNamesTable[n]);
@@ -714,11 +719,13 @@
 
     /* Load dictionary */
     if (dictFileName != NULL) {
-        U64 dictFileSize = UTIL_getFileSize(dictFileName);
-        if (dictFileSize > 64 MB) EXM_THROW(10, "dictionary file %s too large", dictFileName);
+        U64 const dictFileSize = UTIL_getFileSize(dictFileName);
+        if (dictFileSize > 64 MB)
+            EXM_THROW(10, "dictionary file %s too large", dictFileName);
         dictBufferSize = (size_t)dictFileSize;
         dictBuffer = malloc(dictBufferSize);
-        if (dictBuffer==NULL) EXM_THROW(11, "not enough memory for dictionary (%u bytes)", (U32)dictBufferSize);
+        if (dictBuffer==NULL)
+            EXM_THROW(11, "not enough memory for dictionary (%u bytes)", (U32)dictBufferSize);
         BMK_loadFiles(dictBuffer, dictBufferSize, fileSizes, &dictFileName, 1);
     }