Add support for storing compressed logs

If log_store_compressed is set, fio won't decompress for storing
them in a file. Instead, you can use --inflate-log=logfile.fz to
output the contents to stdout.

Signed-off-by: Jens Axboe <axboe@fb.com>
diff --git a/HOWTO b/HOWTO
index e94d09f..fbc455d 100644
--- a/HOWTO
+++ b/HOWTO
@@ -1350,6 +1350,12 @@
 		in the specified log file. This feature depends on the
 		availability of zlib.
 
+log_store_compressed=bool	If set, and log_compression is also set,
+		fio will store the log files in a compressed format. They
+		can be decompressed with fio, using the --inflate-log
+		command line parameter. The files will be stored with a
+		.fz suffix.
+
 lockmem=int	Pin down the specified amount of memory with mlock(2). Can
 		potentially be used instead of removing memory or booting
 		with less memory to simulate a smaller amount of memory.
diff --git a/README b/README
index 1f72876..457b83d 100644
--- a/README
+++ b/README
@@ -176,6 +176,7 @@
 	--idle-prof=option	Report cpu idleness on a system or percpu basis
 				(option=system,percpu) or run unit work
 				calibration only (option=calibrate).
+	--inflate-log=log	Inflate and output compressed log
 
 
 Any parameters following the options will be assumed to be job files,
diff --git a/cconv.c b/cconv.c
index aeec04b..d4fb158 100644
--- a/cconv.c
+++ b/cconv.c
@@ -153,6 +153,7 @@
 	o->log_avg_msec = le32_to_cpu(top->log_avg_msec);
 	o->log_offset = le32_to_cpu(top->log_offset);
 	o->log_gz = le32_to_cpu(top->log_gz);
+	o->log_gz_store = le32_to_cpu(top->log_gz_store);
 	o->norandommap = le32_to_cpu(top->norandommap);
 	o->softrandommap = le32_to_cpu(top->softrandommap);
 	o->bs_unaligned = le32_to_cpu(top->bs_unaligned);
@@ -325,6 +326,7 @@
 	top->log_avg_msec = cpu_to_le32(o->log_avg_msec);
 	top->log_offset = cpu_to_le32(o->log_offset);
 	top->log_gz = cpu_to_le32(o->log_gz);
+	top->log_gz_store = cpu_to_le32(o->log_gz_store);
 	top->norandommap = cpu_to_le32(o->norandommap);
 	top->softrandommap = cpu_to_le32(o->softrandommap);
 	top->bs_unaligned = cpu_to_le32(o->bs_unaligned);
diff --git a/client.c b/client.c
index e70a27d..1f52734 100644
--- a/client.c
+++ b/client.c
@@ -1237,10 +1237,10 @@
 		struct io_sample *s;
 
 		s = __get_sample(samples, ret->log_offset, i);
-		s->time	= le64_to_cpu(s->time);
-		s->val	= le64_to_cpu(s->val);
-		s->ddir	= le32_to_cpu(s->ddir);
-		s->bs	= le32_to_cpu(s->bs);
+		s->time		= le64_to_cpu(s->time);
+		s->val		= le64_to_cpu(s->val);
+		s->__ddir	= le32_to_cpu(s->__ddir);
+		s->bs		= le32_to_cpu(s->bs);
 
 		if (ret->log_offset) {
 			struct io_sample_offset *so = (void *) s;
diff --git a/configure b/configure
index 5fd0c50..9c64d14 100755
--- a/configure
+++ b/configure
@@ -420,7 +420,7 @@
 # zlib probe
 zlib="no"
 cat > $TMPC <<EOF
-#include <zlib.h>
+#include <zlib2.h>
 int main(void)
 {
   z_stream stream;
diff --git a/fio.1 b/fio.1
index 1a04e6c..c58e817 100644
--- a/fio.1
+++ b/fio.1
@@ -1225,6 +1225,12 @@
 normally at the end of a run, by decompressing the chunks and storing them
 in the specified log file. This feature depends on the availability of zlib.
 .TP
+.BI log_store_compressed \fR=\fPbool
+If set, and \fBlog\fR_compression is also set, fio will store the log files in
+a compressed format. They can be decompressed with fio, using the
+\fB\-\-inflate-log\fR command line parameter. The files will be stored with a
+\fB\.fz\fR suffix.
+.TP
 .BI disable_lat \fR=\fPbool
 Disable measurements of total latency numbers. Useful only for cutting
 back the number of calls to \fBgettimeofday\fR\|(2), as that does impact performance at
diff --git a/init.c b/init.c
index 678d598..91cf70d 100644
--- a/init.c
+++ b/init.c
@@ -170,6 +170,13 @@
 		.has_arg	= required_argument,
 		.val		= 'x' | FIO_CLIENT_FLAG,
 	},
+#ifdef CONFIG_ZLIB
+	{
+		.name		= (char *) "inflate-log",
+		.has_arg	= required_argument,
+		.val		= 'X' | FIO_CLIENT_FLAG,
+	},
+#endif
 	{
 		.name		= (char *) "alloc-size",
 		.has_arg	= required_argument,
@@ -1151,16 +1158,23 @@
 			.log_type = IO_LOG_TYPE_LAT,
 			.log_offset = o->log_offset,
 			.log_gz = o->log_gz,
+			.log_gz_store = o->log_gz_store,
 		};
+		const char *suf;
 
-		snprintf(logname, sizeof(logname), "%s_lat.log",
-				o->lat_log_file);
+		if (p.log_gz_store)
+			suf = "log.fz";
+		else
+			suf = "log";
+
+		snprintf(logname, sizeof(logname), "%s_lat.%s",
+				o->lat_log_file, suf);
 		setup_log(&td->lat_log, &p, logname);
-		snprintf(logname, sizeof(logname), "%s_slat.log",
-				o->lat_log_file);
+		snprintf(logname, sizeof(logname), "%s_slat.%s",
+				o->lat_log_file, suf);
 		setup_log(&td->slat_log, &p, logname);
-		snprintf(logname, sizeof(logname), "%s_clat.log",
-				o->lat_log_file);
+		snprintf(logname, sizeof(logname), "%s_clat.%s",
+				o->lat_log_file, suf);
 		setup_log(&td->clat_log, &p, logname);
 	}
 	if (o->bw_log_file) {
@@ -1170,10 +1184,17 @@
 			.log_type = IO_LOG_TYPE_BW,
 			.log_offset = o->log_offset,
 			.log_gz = o->log_gz,
+			.log_gz_store = o->log_gz_store,
 		};
+		const char *suf;
 
-		snprintf(logname, sizeof(logname), "%s_bw.log",
-				o->bw_log_file);
+		if (p.log_gz_store)
+			suf = "log.fz";
+		else
+			suf = "log";
+
+		snprintf(logname, sizeof(logname), "%s_bw.%s",
+				o->bw_log_file, suf);
 		setup_log(&td->bw_log, &p, logname);
 	}
 	if (o->iops_log_file) {
@@ -1183,10 +1204,17 @@
 			.log_type = IO_LOG_TYPE_IOPS,
 			.log_offset = o->log_offset,
 			.log_gz = o->log_gz,
+			.log_gz_store = o->log_gz_store,
 		};
+		const char *suf;
 
-		snprintf(logname, sizeof(logname), "%s_iops.log",
-				o->iops_log_file);
+		if (p.log_gz_store)
+			suf = "log.fz";
+		else
+			suf = "log";
+
+		snprintf(logname, sizeof(logname), "%s_iops.%s",
+				o->iops_log_file, suf);
 		setup_log(&td->iops_log, &p, logname);
 	}
 
@@ -1579,6 +1607,9 @@
 	printf("  --idle-prof=option\tReport cpu idleness on a system or percpu basis\n"
 		"\t\t\t(option=system,percpu) or run unit work\n"
 		"\t\t\tcalibration only (option=calibrate)\n");
+#ifdef CONFIG_ZLIB
+	printf("  --inflate-log=log\tInflate and output compressed log\n");
+#endif
 	printf("\nFio was written by Jens Axboe <jens.axboe@oracle.com>");
 	printf("\n                   Jens Axboe <jaxboe@fusionio.com>");
 	printf("\n                   Jens Axboe <axboe@fb.com>\n");
@@ -1910,6 +1941,13 @@
 			nr_job_sections++;
 			break;
 			}
+#ifdef CONFIG_ZLIB
+		case 'X':
+			exit_val = iolog_file_inflate(optarg);
+			did_arg++;
+			do_exit++;
+			break;
+#endif
 		case 'p':
 			did_arg = 1;
 			if (exec_profile)
diff --git a/iolog.c b/iolog.c
index 7c05d32..0d07b0e 100644
--- a/iolog.c
+++ b/iolog.c
@@ -6,6 +6,9 @@
 #include <stdlib.h>
 #include <libgen.h>
 #include <assert.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
 #ifdef CONFIG_ZLIB
 #include <zlib.h>
 #endif
@@ -26,6 +29,7 @@
 	void *buf;
 	size_t len;
 	unsigned int seq;
+	int nofree;
 };
 
 #define GZ_CHUNK	131072
@@ -39,13 +43,16 @@
 	c->buf = malloc(GZ_CHUNK);
 	c->len = 0;
 	c->seq = seq;
+	c->nofree = 0;
 	return c;
 }
 
 static void free_chunk(struct iolog_compress *ic)
 {
-	free(ic->buf);
-	free(ic);
+	if (!ic->nofree) {
+		free(ic->buf);
+		free(ic);
+	}
 }
 
 #endif
@@ -586,11 +593,15 @@
 	l->log_type = p->log_type;
 	l->log_offset = p->log_offset;
 	l->log_gz = p->log_gz;
+	l->log_gz_store = p->log_gz_store;
 	l->log = malloc(l->max_samples * log_entry_sz(l));
 	l->avg_msec = p->avg_msec;
 	l->filename = strdup(filename);
 	l->td = p->td;
 
+	if (l->log_offset)
+		l->log_ddir_mask = 0x80000000;
+
 	INIT_FLIST_HEAD(&l->chunk_list);
 
 	if (l->log_gz && !p->td)
@@ -636,40 +647,57 @@
 	free(log);
 }
 
-static void flush_samples(FILE *f, void *samples, uint64_t nr_samples,
-			  int log_offset)
+static void flush_samples(FILE *f, void *samples, uint64_t sample_size)
 {
-	uint64_t i;
+	struct io_sample *s;
+	int log_offset;
+	uint64_t i, nr_samples;
+
+	if (!sample_size)
+		return;
+
+	s = __get_sample(samples, 0, 0);
+	if (s->__ddir & 0x80000000)
+		log_offset = 1;
+	else
+		log_offset = 0;
+
+	nr_samples = sample_size / __log_entry_sz(log_offset);
 
 	for (i = 0; i < nr_samples; i++) {
-		struct io_sample *s = __get_sample(samples, log_offset, i);
+		s = __get_sample(samples, log_offset, i);
 
 		if (!log_offset) {
 			fprintf(f, "%lu, %lu, %u, %u\n",
 					(unsigned long) s->time,
 					(unsigned long) s->val,
-					s->ddir, s->bs);
+					io_sample_ddir(s), s->bs);
 		} else {
 			struct io_sample_offset *so = (void *) s;
 
 			fprintf(f, "%lu, %lu, %u, %u, %llu\n",
 					(unsigned long) s->time,
 					(unsigned long) s->val,
-					s->ddir, s->bs,
+					io_sample_ddir(s), s->bs,
 					(unsigned long long) so->offset);
 		}
 	}
 }
 
 #ifdef CONFIG_ZLIB
-static int z_stream_init(z_stream *stream)
+static int z_stream_init(z_stream *stream, int gz_hdr)
 {
+	int wbits = 15;
+
 	stream->zalloc = Z_NULL;
 	stream->zfree = Z_NULL;
 	stream->opaque = Z_NULL;
 	stream->next_in = Z_NULL;
 
-	if (inflateInit(stream) != Z_OK)
+	if (gz_hdr)
+		wbits += 32;
+
+	if (inflateInit2(stream, wbits) != Z_OK)
 		return 1;
 
 	return 0;
@@ -683,31 +711,29 @@
 	size_t chunk_sz;
 };
 
-static void finish_chunk(z_stream *stream, int log_offset, FILE *f,
+static void finish_chunk(z_stream *stream, FILE *f,
 			 struct flush_chunk_iter *iter)
 {
-	uint64_t nr_samples;
 	int ret;
 
 	ret = inflateEnd(stream);
 	if (ret != Z_OK)
 		log_err("fio: failed to end log inflation (%d)\n", ret);
 
-	nr_samples = iter->buf_used / __log_entry_sz(log_offset);
-	flush_samples(f, iter->buf, nr_samples, log_offset);
+	flush_samples(f, iter->buf, iter->buf_used);
 	free(iter->buf);
 	iter->buf = NULL;
 	iter->buf_size = iter->buf_used = 0;
 }
 
-static int flush_chunk(struct iolog_compress *ic, int log_offset, FILE *f,
+static int flush_chunk(struct iolog_compress *ic, int gz_hdr, FILE *f,
 		       z_stream *stream, struct flush_chunk_iter *iter)
 {
 	if (ic->seq != iter->seq) {
 		if (iter->seq)
-			finish_chunk(stream, log_offset, f, iter);
+			finish_chunk(stream, f, iter);
 
-		z_stream_init(stream);
+		z_stream_init(stream, gz_hdr);
 		iter->seq = ic->seq;
 	}
 
@@ -720,9 +746,10 @@
 	}
 
 	while (stream->avail_in) {
+		size_t this_out = iter->buf_size - iter->buf_used;
 		int err;
 
-		stream->avail_out = iter->buf_size - iter->buf_used;
+		stream->avail_out = this_out;
 		stream->next_out = iter->buf + iter->buf_used;
 
 		err = inflate(stream, Z_NO_FLUSH);
@@ -731,7 +758,16 @@
 			break;
 		}
 
-		iter->buf_used += iter->buf_size - iter->buf_used - stream->avail_out;
+		iter->buf_used += this_out - stream->avail_out;
+
+		if (!stream->avail_out) {
+			iter->buf_size += iter->chunk_sz;
+			iter->buf = realloc(iter->buf, iter->buf_size);
+			continue;
+		}
+
+		if (err == Z_STREAM_END)
+			break;
 	}
 
 	free_chunk(ic);
@@ -750,15 +786,69 @@
 		node = log->chunk_list.next;
 		ic = flist_entry(node, struct iolog_compress, list);
 		flist_del(&ic->list);
-		flush_chunk(ic, log->log_offset, f, &stream, &iter);
+
+		if (log->log_gz_store) {
+			fwrite(ic->buf, ic->len, 1, f);
+			free_chunk(ic);
+		} else
+			flush_chunk(ic, log->log_gz_store, f, &stream, &iter);
 	}
 
 	if (iter.seq) {
-		finish_chunk(&stream, log->log_offset, f, &iter);
+		finish_chunk(&stream, f, &iter);
 		free(iter.buf);
 	}
 }
 
+int iolog_file_inflate(const char *file)
+{
+	struct flush_chunk_iter iter = { .chunk_sz = 64 * 1024 * 1024, };
+	struct iolog_compress ic;
+	z_stream stream;
+	struct stat sb;
+	size_t ret;
+	FILE *f;
+
+	if (stat(file, &sb) < 0) {
+		perror("stat");
+		return 1;
+	}
+
+	f = fopen(file, "r");
+	if (!f) {
+		perror("fopen");
+		return 1;
+	}
+
+	ic.buf = malloc(sb.st_size);
+	ic.len = sb.st_size;
+	ic.nofree = 1;
+	ic.seq = 1;
+
+	ret = fread(ic.buf, ic.len, 1, f);
+	if (ret < 0) {
+		perror("fread");
+		fclose(f);
+		return 1;
+	} else if (ret != 1) {
+		log_err("fio: short read on reading log\n");
+		fclose(f);
+		return 1;
+	}
+
+	fclose(f);
+
+	flush_chunk(&ic,  1, stdout, &stream, &iter);
+
+	if (iter.seq) {
+		finish_chunk(&stream, stdout, &iter);
+		free(iter.buf);
+	}
+
+	free(ic.buf);
+	return 0;
+}
+
 #else
 
 static void flush_gz_chunks(struct io_log *log, FILE *f)
@@ -782,7 +872,7 @@
 
 	flush_gz_chunks(log, f);
 
-	flush_samples(f, log->log, log->nr_samples, log->log_offset);
+	flush_samples(f, log->log, log->nr_samples * log_entry_sz(log));
 
 	fclose(f);
 	clear_file_buffer(buf);
@@ -826,7 +916,7 @@
 	unsigned int seq;
 	z_stream stream;
 	size_t total = 0;
-	int ret;
+	int ret, wbits;
 
 	INIT_FLIST_HEAD(&list);
 
@@ -836,12 +926,26 @@
 	stream.zfree = Z_NULL;
 	stream.opaque = Z_NULL;
 
-	if (deflateInit(&stream, Z_DEFAULT_COMPRESSION) != Z_OK) {
+	/*
+	 * Store gz header if storing to a file
+	 */
+#if 0
+	wbits = 15;
+	if (data->log->log_gz_store)
+		wbits += 16;
+
+	ret = deflateInit2(&stream, Z_DEFAULT_COMPRESSION, Z_DEFLATED,
+				31, 8, Z_DEFAULT_STRATEGY);
+#else
+	ret = deflateInit(&stream, Z_DEFAULT_COMPRESSION);
+#endif
+	if (ret != Z_OK) {
 		log_err("fio: failed to init gz stream\n");
 		return 0;
 	}
 
 	seq = ++data->log->chunk_seq;
+
 	stream.next_in = (void *) data->samples;
 	stream.avail_in = data->nr_samples * log_entry_sz(data->log);
 
@@ -900,7 +1004,7 @@
 
 int iolog_flush(struct io_log *log, int wait)
 {
-	struct thread_data *td = log->td;
+	struct tp_data *tdat = log->td->tp_data;
 	struct iolog_flush_data *data;
 	size_t sample_size;
 
@@ -929,7 +1033,7 @@
 	} else
 		data->work.wait = 0;
 
-	tp_queue_work(td->tp_data, &data->work);
+	tp_queue_work(tdat, &data->work);
 
 	if (wait) {
 		pthread_mutex_lock(&data->work.lock);
diff --git a/iolog.h b/iolog.h
index 66c596e..d268fe2 100644
--- a/iolog.h
+++ b/iolog.h
@@ -24,7 +24,7 @@
 struct io_sample {
 	uint64_t time;
 	uint64_t val;
-	uint32_t ddir;
+	uint32_t __ddir;
 	uint32_t bs;
 };
 
@@ -52,6 +52,8 @@
 	uint64_t max_samples;
 	void *log;
 
+	unsigned int log_ddir_mask;
+
 	char *filename;
 
 	struct thread_data *td;
@@ -74,6 +76,11 @@
 	unsigned int log_gz;
 
 	/*
+	 * Don't deflate for storing, just store the compressed bits
+	 */
+	unsigned int log_gz_store;
+
+	/*
 	 * Windowed average, for logging single entries average over some
 	 * period of time.
 	 */
@@ -86,6 +93,15 @@
 	struct flist_head chunk_list;
 };
 
+#define io_sample_ddir(io)	((io)->__ddir & ~0x80000000U)
+
+static inline void io_sample_set_ddir(struct io_log *log,
+				      struct io_sample *io,
+				      enum fio_ddir ddir)
+{
+	io->__ddir = ddir | log->log_ddir_mask;
+}
+
 static inline size_t __log_entry_sz(int log_offset)
 {
 	if (log_offset)
@@ -164,6 +180,10 @@
 extern void prune_io_piece_log(struct thread_data *);
 extern void write_iolog_close(struct thread_data *);
 
+#ifdef CONFIG_ZLIB
+extern int iolog_file_inflate(const char *);
+#endif
+
 /*
  * Logging
  */
@@ -173,6 +193,7 @@
 	int log_type;
 	int log_offset;
 	int log_gz;
+	int log_gz_store;
 	int log_compress;
 };
 
diff --git a/options.c b/options.c
index 75c9bf5..3a3321f 100644
--- a/options.c
+++ b/options.c
@@ -3116,6 +3116,15 @@
 		.category = FIO_OPT_C_LOG,
 		.group	= FIO_OPT_G_INVALID,
 	},
+	{
+		.name	= "log_store_compressed",
+		.lname	= "Log store compressed",
+		.type	= FIO_OPT_BOOL,
+		.off1	= td_var_offset(log_gz_store),
+		.help	= "Store logs in a compressed format",
+		.category = FIO_OPT_C_LOG,
+		.group	= FIO_OPT_G_INVALID,
+	},
 #endif
 	{
 		.name	= "bwavgtime",
diff --git a/server.c b/server.c
index cd00cc6..e2a6f73 100644
--- a/server.c
+++ b/server.c
@@ -1225,10 +1225,10 @@
 	for (i = 0; i < log->nr_samples; i++) {
 		struct io_sample *s = get_sample(log, i);
 
-		s->time	= cpu_to_le64(s->time);
-		s->val	= cpu_to_le64(s->val);
-		s->ddir	= cpu_to_le32(s->ddir);
-		s->bs	= cpu_to_le32(s->bs);
+		s->time		= cpu_to_le64(s->time);
+		s->val		= cpu_to_le64(s->val);
+		s->__ddir	= cpu_to_le32(s->__ddir);
+		s->bs		= cpu_to_le32(s->bs);
 
 		if (log->log_offset) {
 			struct io_sample_offset *so = (void *) s;
diff --git a/stat.c b/stat.c
index ccf08a6..979c810 100644
--- a/stat.c
+++ b/stat.c
@@ -1603,7 +1603,7 @@
 
 	s->val = val;
 	s->time = t;
-	s->ddir = ddir;
+	io_sample_set_ddir(iolog, s, ddir);
 	s->bs = bs;
 
 	if (iolog->log_offset) {
diff --git a/thread_options.h b/thread_options.h
index deb3d67..e545a8f 100644
--- a/thread_options.h
+++ b/thread_options.h
@@ -110,6 +110,7 @@
 	unsigned int log_avg_msec;
 	unsigned int log_offset;
 	unsigned int log_gz;
+	unsigned int log_gz_store;
 	unsigned int norandommap;
 	unsigned int softrandommap;
 	unsigned int bs_unaligned;
@@ -339,6 +340,7 @@
 	uint32_t log_avg_msec;
 	uint32_t log_offset;
 	uint32_t log_gz;
+	uint32_t log_gz_store;
 	uint32_t norandommap;
 	uint32_t softrandommap;
 	uint32_t bs_unaligned;