Merge pull request #895 from facebook/fileSize_unknown
Distinguish 0-size from size-unavailable
diff --git a/programs/bench.c b/programs/bench.c
index ec99c61..efec189 100644
--- a/programs/bench.c
+++ b/programs/bench.c
@@ -551,6 +551,11 @@
fileSizes[n] = 0;
continue;
}
+ if (fileSize == UTIL_FILESIZE_UNKNOWN) {
+ DISPLAYLEVEL(2, "Cannot evaluate size of %s, ignoring ... \n", fileNamesTable[n]);
+ fileSizes[n] = 0;
+ continue;
+ }
f = fopen(fileNamesTable[n], "rb");
if (f==NULL) EXM_THROW(10, "impossible to open file %s", fileNamesTable[n]);
DISPLAYUPDATE(2, "Loading %s... \r", fileNamesTable[n]);
@@ -581,11 +586,14 @@
/* Load dictionary */
if (dictFileName != NULL) {
- U64 dictFileSize = UTIL_getFileSize(dictFileName);
- if (dictFileSize > 64 MB) EXM_THROW(10, "dictionary file %s too large", dictFileName);
+ U64 const dictFileSize = UTIL_getFileSize(dictFileName);
+ if (dictFileSize > 64 MB)
+ EXM_THROW(10, "dictionary file %s too large", dictFileName);
dictBufferSize = (size_t)dictFileSize;
dictBuffer = malloc(dictBufferSize);
- if (dictBuffer==NULL) EXM_THROW(11, "not enough memory for dictionary (%u bytes)", (U32)dictBufferSize);
+ if (dictBuffer==NULL)
+ EXM_THROW(11, "not enough memory for dictionary (%u bytes)",
+ (U32)dictBufferSize);
BMK_loadFiles(dictBuffer, dictBufferSize, fileSizes, &dictFileName, 1);
}
diff --git a/programs/dibio.c b/programs/dibio.c
index 2cb2a42..dea3ec4 100644
--- a/programs/dibio.c
+++ b/programs/dibio.c
@@ -117,7 +117,7 @@
for (fileIndex=0; fileIndex<nbFiles; fileIndex++) {
const char* const fileName = fileNamesTable[fileIndex];
unsigned long long const fs64 = UTIL_getFileSize(fileName);
- unsigned long long remainingToLoad = fs64;
+ unsigned long long remainingToLoad = (fs64 == UTIL_FILESIZE_UNKNOWN) ? 0 : fs64;
U32 const nbChunks = targetChunkSize ? (U32)((fs64 + (targetChunkSize-1)) / targetChunkSize) : 1;
U64 const chunkSize = targetChunkSize ? MIN(targetChunkSize, fs64) : fs64;
size_t const maxChunkSize = (size_t)MIN(chunkSize, SAMPLESIZE_MAX);
@@ -245,8 +245,9 @@
memset(&fs, 0, sizeof(fs));
for (n=0; n<nbFiles; n++) {
U64 const fileSize = UTIL_getFileSize(fileNamesTable[n]);
- U32 const nbSamples = (U32)(chunkSize ? (fileSize + (chunkSize-1)) / chunkSize : 1);
- U64 const chunkToLoad = chunkSize ? MIN(chunkSize, fileSize) : fileSize;
+ U64 const srcSize = (fileSize == UTIL_FILESIZE_UNKNOWN) ? 0 : fileSize;
+ U32 const nbSamples = (U32)(chunkSize ? (srcSize + (chunkSize-1)) / chunkSize : 1);
+ U64 const chunkToLoad = chunkSize ? MIN(chunkSize, srcSize) : srcSize;
size_t const cappedChunkSize = (size_t)MIN(chunkToLoad, SAMPLESIZE_MAX);
fs.totalSizeToLoad += cappedChunkSize * nbSamples;
fs.oneSampleTooLarge |= (chunkSize > 2*SAMPLESIZE_MAX);
diff --git a/programs/fileio.c b/programs/fileio.c
index 03e6538..4ddb8f8 100644
--- a/programs/fileio.c
+++ b/programs/fileio.c
@@ -25,7 +25,7 @@
* Includes
***************************************/
#include "platform.h" /* Large Files support, SET_BINARY_MODE */
-#include "util.h" /* UTIL_getFileSize */
+#include "util.h" /* UTIL_getFileSize, UTIL_isRegularFile */
#include <stdio.h> /* fprintf, fopen, fread, _fileno, stdin, stdout */
#include <stdlib.h> /* malloc, free */
#include <string.h> /* strcmp, strlen */
@@ -564,7 +564,7 @@
strm.avail_out = (uInt)ress->dstBufferSize;
}
}
- if (!srcFileSize)
+ if (srcFileSize == UTIL_FILESIZE_UNKNOWN)
DISPLAYUPDATE(2, "\rRead : %u MB ==> %.2f%%",
(U32)(inFileSize>>20),
(double)outFileSize/inFileSize*100)
@@ -651,7 +651,7 @@
strm.next_out = (BYTE*)ress->dstBuffer;
strm.avail_out = ress->dstBufferSize;
} }
- if (!srcFileSize)
+ if (srcFileSize == UTIL_FILESIZE_UNKNOWN)
DISPLAYUPDATE(2, "\rRead : %u MB ==> %.2f%%",
(U32)(inFileSize>>20),
(double)outFileSize/inFileSize*100)
@@ -697,18 +697,17 @@
prefs.frameInfo.blockSizeID = LZ4F_max4MB;
prefs.frameInfo.contentChecksumFlag = (contentChecksum_t)g_checksumFlag;
#if LZ4_VERSION_NUMBER >= 10600
- prefs.frameInfo.contentSize = srcFileSize;
+ prefs.frameInfo.contentSize = (srcFileSize==UTIL_FILESIZE_UNKNOWN) ? 0 : srcFileSize;
#endif
- {
- size_t blockSize = FIO_LZ4_GetBlockSize_FromBlockId(LZ4F_max4MB);
+ { size_t blockSize = FIO_LZ4_GetBlockSize_FromBlockId(LZ4F_max4MB);
size_t readSize;
size_t headerSize = LZ4F_compressBegin(ctx, ress->dstBuffer, ress->dstBufferSize, &prefs);
if (LZ4F_isError(headerSize))
EXM_THROW(33, "File header generation failed : %s",
LZ4F_getErrorName(headerSize));
- { size_t const sizeCheck = fwrite(ress->dstBuffer, 1, headerSize, ress->dstFile);
- if (sizeCheck!=headerSize) EXM_THROW(34, "Write error : cannot write header"); }
+ if (fwrite(ress->dstBuffer, 1, headerSize, ress->dstFile) != headerSize)
+ EXM_THROW(34, "Write error : cannot write header");
outFileSize += headerSize;
/* Read first block */
@@ -725,7 +724,7 @@
EXM_THROW(35, "zstd: %s: lz4 compression failed : %s",
srcFileName, LZ4F_getErrorName(outSize));
outFileSize += outSize;
- if (!srcFileSize)
+ if (srcFileSize == UTIL_FILESIZE_UNKNOWN)
DISPLAYUPDATE(2, "\rRead : %u MB ==> %.2f%%",
(U32)(inFileSize>>20),
(double)outFileSize/inFileSize*100)
@@ -816,12 +815,12 @@
/* init */
#ifdef ZSTD_NEWAPI
- if (fileSize!=0) /* when src is stdin, fileSize==0, but is effectively unknown */
+ if (fileSize!=UTIL_FILESIZE_UNKNOWN) /* when src is stdin, fileSize==0, but is effectively unknown */
ZSTD_CCtx_setPledgedSrcSize(ress.cctx, fileSize);
#elif defined(ZSTD_MULTITHREAD)
- CHECK( ZSTDMT_resetCStream(ress.cctx, fileSize ? fileSize : ZSTD_CONTENTSIZE_UNKNOWN) );
+ CHECK( ZSTDMT_resetCStream(ress.cctx, (fileSize==UTIL_FILESIZE_UNKNOWN) ? ZSTD_CONTENTSIZE_UNKNOWN : fileSize) );
#else
- CHECK( ZSTD_resetCStream(ress.cctx, fileSize ? fileSize : ZSTD_CONTENTSIZE_UNKNOWN) );
+ CHECK( ZSTD_resetCStream(ress.cctx, (fileSize==UTIL_FILESIZE_UNKNOWN) ? ZSTD_CONTENTSIZE_UNKNOWN : fileSize) );
#endif
/* Main compression loop */
@@ -851,18 +850,18 @@
compressedfilesize += outBuff.pos;
} }
if (g_nbThreads > 1) {
- if (!fileSize)
+ if (fileSize == UTIL_FILESIZE_UNKNOWN)
DISPLAYUPDATE(2, "\rRead : %u MB", (U32)(readsize>>20))
else
DISPLAYUPDATE(2, "\rRead : %u / %u MB",
(U32)(readsize>>20), (U32)(fileSize>>20));
} else {
- if (!fileSize)
+ if (fileSize == UTIL_FILESIZE_UNKNOWN)
DISPLAYUPDATE(2, "\rRead : %u MB ==> %.2f%%",
(U32)(readsize>>20),
(double)compressedfilesize/readsize*100)
else
- DISPLAYUPDATE(2, "\rRead : %u / %u MB ==> %.2f%%",
+ DISPLAYUPDATE(2, "\rRead : %u / %u MB ==> %.2f%%",
(U32)(readsize>>20), (U32)(fileSize>>20),
(double)compressedfilesize/readsize*100);
}
@@ -985,7 +984,8 @@
const char* dictFileName, int compressionLevel, ZSTD_compressionParameters* comprParams)
{
clock_t const start = clock();
- U64 const srcSize = UTIL_getFileSize(srcFileName);
+ U64 const fileSize = UTIL_getFileSize(srcFileName);
+ U64 const srcSize = (fileSize == UTIL_FILESIZE_UNKNOWN) ? ZSTD_CONTENTSIZE_UNKNOWN : fileSize;
cRess_t const ress = FIO_createCResources(dictFileName, compressionLevel, srcSize, comprParams);
int const result = FIO_compressFilename_dstFile(ress, dstFileName, srcFileName, compressionLevel);
@@ -1007,7 +1007,9 @@
size_t dfnSize = FNSPACE;
char* dstFileName = (char*)malloc(FNSPACE);
size_t const suffixSize = suffix ? strlen(suffix) : 0;
- U64 const srcSize = (nbFiles != 1) ? ZSTD_CONTENTSIZE_UNKNOWN : UTIL_getFileSize(inFileNamesTable[0]) ;
+ U64 const firstFileSize = UTIL_getFileSize(inFileNamesTable[0]);
+ U64 const firstSrcSize = (firstFileSize == UTIL_FILESIZE_UNKNOWN) ? ZSTD_CONTENTSIZE_UNKNOWN : firstFileSize;
+ U64 const srcSize = (nbFiles != 1) ? ZSTD_CONTENTSIZE_UNKNOWN : firstSrcSize ;
cRess_t ress = FIO_createCResources(dictFileName, compressionLevel, srcSize, comprParams);
/* init */
@@ -1799,7 +1801,7 @@
* 2 for file not compressed with zstd
* 3 for cases in which file could not be opened.
*/
-static int getFileInfo(fileInfo_t* info, const char* inFileName){
+static int getFileInfo_fileConfirmed(fileInfo_t* info, const char* inFileName){
int detectError = 0;
FILE* const srcFile = FIO_openSrcFile(inFileName);
if (srcFile == NULL) {
@@ -1815,7 +1817,8 @@
if (numBytesRead < ZSTD_frameHeaderSize_min) {
if ( feof(srcFile)
&& (numBytesRead == 0)
- && (info->compressedSize > 0) ) {
+ && (info->compressedSize > 0)
+ && (info->compressedSize != UTIL_FILESIZE_UNKNOWN) ) {
break;
}
else if (feof(srcFile)) {
@@ -1928,6 +1931,17 @@
return detectError;
}
+static int getFileInfo(fileInfo_t* info, const char* srcFileName)
+{
+ int const isAFile = UTIL_isRegularFile(srcFileName);
+ if (!isAFile) {
+ DISPLAY("Error : %s is not a file", srcFileName);
+ return 3;
+ }
+ return getFileInfo_fileConfirmed(info, srcFileName);
+}
+
+
static void displayInfo(const char* inFileName, const fileInfo_t* info, int displayLevel){
unsigned const unit = info->compressedSize < (1 MB) ? (1 KB) : (1 MB);
const char* const unitStr = info->compressedSize < (1 MB) ? "KB" : "MB";
diff --git a/programs/util.h b/programs/util.h
index c8be5f5..c5e4365 100644
--- a/programs/util.h
+++ b/programs/util.h
@@ -313,33 +313,40 @@
}
+#define UTIL_FILESIZE_UNKNOWN ((U64)(-1))
UTIL_STATIC U64 UTIL_getFileSize(const char* infilename)
{
- int r;
+ if (!UTIL_isRegularFile(infilename)) return UTIL_FILESIZE_UNKNOWN;
+ { int r;
#if defined(_MSC_VER)
- struct __stat64 statbuf;
- r = _stat64(infilename, &statbuf);
- if (r || !(statbuf.st_mode & S_IFREG)) return 0; /* No good... */
+ struct __stat64 statbuf;
+ r = _stat64(infilename, &statbuf);
+ if (r || !(statbuf.st_mode & S_IFREG)) return UTIL_FILESIZE_UNKNOWN;
#elif defined(__MINGW32__) && defined (__MSVCRT__)
- struct _stati64 statbuf;
- r = _stati64(infilename, &statbuf);
- if (r || !(statbuf.st_mode & S_IFREG)) return 0; /* No good... */
+ struct _stati64 statbuf;
+ r = _stati64(infilename, &statbuf);
+ if (r || !(statbuf.st_mode & S_IFREG)) return UTIL_FILESIZE_UNKNOWN;
#else
- struct stat statbuf;
- r = stat(infilename, &statbuf);
- if (r || !S_ISREG(statbuf.st_mode)) return 0; /* No good... */
+ struct stat statbuf;
+ r = stat(infilename, &statbuf);
+ if (r || !S_ISREG(statbuf.st_mode)) return UTIL_FILESIZE_UNKNOWN;
#endif
- return (U64)statbuf.st_size;
+ return (U64)statbuf.st_size;
+ }
}
UTIL_STATIC U64 UTIL_getTotalFileSize(const char** fileNamesTable, unsigned nbFiles)
{
U64 total = 0;
+ int error = 0;
unsigned n;
- for (n=0; n<nbFiles; n++)
- total += UTIL_getFileSize(fileNamesTable[n]);
- return total;
+ for (n=0; n<nbFiles; n++) {
+ U64 const size = UTIL_getFileSize(fileNamesTable[n]);
+ error |= (size == UTIL_FILESIZE_UNKNOWN);
+ total += size;
+ }
+ return error ? UTIL_FILESIZE_UNKNOWN : total;
}
diff --git a/tests/fullbench.c b/tests/fullbench.c
index 3f497b0..2ec3ce5 100644
--- a/tests/fullbench.c
+++ b/tests/fullbench.c
@@ -484,8 +484,8 @@
/* Loop for each file */
int fileIdx;
for (fileIdx=0; fileIdx<nbFiles; fileIdx++) {
- const char* inFileName = fileNamesTable[fileIdx];
- FILE* inFile = fopen( inFileName, "rb" );
+ const char* const inFileName = fileNamesTable[fileIdx];
+ FILE* const inFile = fopen( inFileName, "rb" );
U64 inFileSize;
size_t benchedSize;
void* origBuff;
@@ -495,6 +495,11 @@
/* Memory allocation & restrictions */
inFileSize = UTIL_getFileSize(inFileName);
+ if (inFileSize == UTIL_FILESIZE_UNKNOWN) {
+ DISPLAY( "Cannot measure size of %s\n", inFileName);
+ fclose(inFile);
+ return 11;
+ }
benchedSize = BMK_findMaxMem(inFileSize*3) / 3;
if ((U64)benchedSize > inFileSize) benchedSize = (size_t)inFileSize;
if (benchedSize < inFileSize)
diff --git a/tests/paramgrill.c b/tests/paramgrill.c
index 317ec46..a387a44 100644
--- a/tests/paramgrill.c
+++ b/tests/paramgrill.c
@@ -687,6 +687,11 @@
DISPLAY( "Pb opening %s\n", inFileName);
return 11;
}
+ if (inFileSize == UTIL_FILESIZE_UNKNOWN) {
+ DISPLAY("Pb evaluatin size of %s \n", inFileName);
+ fclose(inFile);
+ return 11;
+ }
/* Memory allocation */
benchedSize = BMK_findMaxMem(inFileSize*3) / 3;
@@ -740,6 +745,11 @@
/* Init */
if (inFile==NULL) { DISPLAY( "Pb opening %s\n", inFileName); return 11; }
+ if (inFileSize == UTIL_FILESIZE_UNKNOWN) {
+ DISPLAY("Pb evaluatin size of %s \n", inFileName);
+ fclose(inFile);
+ return 11;
+ }
/* Memory allocation & restrictions */
if ((U64)benchedSize > inFileSize) benchedSize = (size_t)inFileSize;
diff --git a/tests/playTests.sh b/tests/playTests.sh
index f008d89..299c2d8 100755
--- a/tests/playTests.sh
+++ b/tests/playTests.sh
@@ -621,7 +621,7 @@
$ZSTD tmp5
$ZSTD -l tmp5.zst
! $ZSTD -l tmp5*
-$ZSTD -lv tmp5.zst
+$ZSTD -lv tmp5.zst | grep "Decompressed Size: 0.00 KB (0 B)" # check that 0 size is present in header
! $ZSTD -lv tmp5*
$ECHO "\n===> zstd --list/-l test with no content size field "
diff --git a/zlibWrapper/examples/zwrapbench.c b/zlibWrapper/examples/zwrapbench.c
index 25a23f8..9226932 100644
--- a/zlibWrapper/examples/zwrapbench.c
+++ b/zlibWrapper/examples/zwrapbench.c
@@ -684,6 +684,11 @@
fileSizes[n] = 0;
continue;
}
+ if (fileSize == UTIL_FILESIZE_UNKNOWN) {
+ DISPLAYLEVEL(2, "Cannot determine size of %s ... \n", fileNamesTable[n]);
+ fileSizes[n] = 0;
+ continue;
+ }
f = fopen(fileNamesTable[n], "rb");
if (f==NULL) EXM_THROW(10, "impossible to open file %s", fileNamesTable[n]);
DISPLAYUPDATE(2, "Loading %s... \r", fileNamesTable[n]);
@@ -714,11 +719,13 @@
/* Load dictionary */
if (dictFileName != NULL) {
- U64 dictFileSize = UTIL_getFileSize(dictFileName);
- if (dictFileSize > 64 MB) EXM_THROW(10, "dictionary file %s too large", dictFileName);
+ U64 const dictFileSize = UTIL_getFileSize(dictFileName);
+ if (dictFileSize > 64 MB)
+ EXM_THROW(10, "dictionary file %s too large", dictFileName);
dictBufferSize = (size_t)dictFileSize;
dictBuffer = malloc(dictBufferSize);
- if (dictBuffer==NULL) EXM_THROW(11, "not enough memory for dictionary (%u bytes)", (U32)dictBufferSize);
+ if (dictBuffer==NULL)
+ EXM_THROW(11, "not enough memory for dictionary (%u bytes)", (U32)dictBufferSize);
BMK_loadFiles(dictBuffer, dictBufferSize, fileSizes, &dictFileName, 1);
}