Show IOPS as well as bw numbers

Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
diff --git a/fio.h b/fio.h
index 9d190b1..e9f34e8 100644
--- a/fio.h
+++ b/fio.h
@@ -291,7 +291,7 @@
 	 */
 	unsigned int io_u_map[FIO_IO_U_MAP_NR];
 	unsigned int io_u_lat[FIO_IO_U_LAT_NR];
-	unsigned long total_io_u;
+	unsigned long total_io_u[2];
 
 	unsigned long long io_bytes[2];
 	unsigned long runtime[2];
@@ -700,6 +700,7 @@
 extern void io_u_log_error(struct thread_data *, struct io_u *);
 extern void io_u_init_timeout(void);
 extern void io_u_set_timeout(struct thread_data *);
+extern void io_u_mark_depth(struct thread_data *, struct io_u *);
 
 /*
  * io engine entry points
diff --git a/io_u.c b/io_u.c
index c9a344f..69f2f19 100644
--- a/io_u.c
+++ b/io_u.c
@@ -265,10 +265,13 @@
 	return 0;
 }
 
-static void io_u_mark_depth(struct thread_data *td)
+void io_u_mark_depth(struct thread_data *td, struct io_u *io_u)
 {
 	int index = 0;
 
+	if (io_u->ddir == DDIR_SYNC)
+		return;
+
 	switch (td->cur_depth) {
 	default:
 		index++;
@@ -287,7 +290,7 @@
 	}
 
 	td->ts.io_u_map[index]++;
-	td->ts.total_io_u++;
+	td->ts.total_io_u[io_u->ddir]++;
 }
 
 static void io_u_mark_latency(struct thread_data *td, unsigned long msec)
@@ -410,7 +413,6 @@
 		list_del(&io_u->list);
 		list_add(&io_u->list, &td->io_u_busylist);
 		td->cur_depth++;
-		io_u_mark_depth(td);
 	}
 
 	return io_u;
diff --git a/ioengines.c b/ioengines.c
index 9de7ca1..88e91cc 100644
--- a/ioengines.c
+++ b/ioengines.c
@@ -202,6 +202,8 @@
 	if (io_u->ddir != DDIR_SYNC)
 		td->io_issues[io_u->ddir]++;
 
+	io_u_mark_depth(td, io_u);
+
 	ret = td->io_ops->queue(td, io_u);
 
 	if (ret == FIO_Q_QUEUED) {
diff --git a/stat.c b/stat.c
index 7b4b9d5..8d391a3 100644
--- a/stat.c
+++ b/stat.c
@@ -16,17 +16,18 @@
 /*
  * Cheesy number->string conversion, complete with carry rounding error.
  */
-static char *num2str(unsigned long num, int maxlen, int base)
+static char *num2str(unsigned long num, int maxlen, int base, int pow2)
 {
-	/*
-	 * could be passed in for 10^3 base, but every caller expects
-	 * 2^10 base right now.
-	 */
-	const unsigned int thousand = 1024;
-	char postfix[] = { 'K', 'M', 'G', 'P', 'E' };
+	char postfix[] = { ' ', 'K', 'M', 'G', 'P', 'E' };
+	unsigned int thousand;
 	char *buf;
 	int i;
 
+	if (pow2)
+		thousand = 1024;
+	else
+		thousand = 1000;
+
 	buf = malloc(128);
 
 	for (i = 0; base > 1; i++)
@@ -37,8 +38,10 @@
 
 		len = sprintf(buf, "%'lu", num);
 		if (len <= maxlen) {
-			buf[len] = postfix[i];
-			buf[len + 1] = '\0';
+			if (i >= 1) {
+				buf[len] = postfix[i];
+				buf[len + 1] = '\0';
+			}
 			return buf;
 		}
 
@@ -367,10 +370,10 @@
 		if (!rs->max_run[i])
 			continue;
 
-		p1 = num2str(rs->io_kb[i], 6, 1);
-		p2 = num2str(rs->agg[i], 6, 1);
-		p3 = num2str(rs->min_bw[i], 6, 1);
-		p4 = num2str(rs->max_bw[i], 6, 1);
+		p1 = num2str(rs->io_kb[i], 6, 1000, 1);
+		p2 = num2str(rs->agg[i], 6, 1000, 1);
+		p3 = num2str(rs->min_bw[i], 6, 1000, 1);
+		p4 = num2str(rs->max_bw[i], 6, 1000, 1);
 
 		fprintf(f_out, "%s: io=%siB, aggrb=%siB/s, minb=%siB/s, maxb=%siB/s, mint=%llumsec, maxt=%llumsec\n", ddir_str[i], p1, p2, p3, p4, rs->min_run[i], rs->max_run[i]);
 
@@ -412,6 +415,9 @@
 	}
 }
 
+#define ts_total_io_u(ts)	\
+	((ts)->total_io_u[0] + (ts)->total_io_u[1])
+
 static void stat_calc_dist(struct thread_stat *ts, double *io_u_dist)
 {
 	int i;
@@ -420,7 +426,7 @@
 	 * Do depth distribution calculations
 	 */
 	for (i = 0; i < FIO_IO_U_MAP_NR; i++) {
-		io_u_dist[i] = (double) ts->io_u_map[i] / (double) ts->total_io_u;
+		io_u_dist[i] = (double) ts->io_u_map[i] / (double) ts_total_io_u(ts);
 		io_u_dist[i] *= 100.0;
 	}
 }
@@ -433,7 +439,7 @@
 	 * Do latency distribution calculations
 	 */
 	for (i = 0; i < FIO_IO_U_LAT_NR; i++) {
-		io_u_lat[i] = (double) ts->io_u_lat[i] / (double) ts->total_io_u;
+		io_u_lat[i] = (double) ts->io_u_lat[i] / (double) ts_total_io_u(ts);
 		io_u_lat[i] *= 100.0;
 	}
 }
@@ -443,21 +449,24 @@
 {
 	const char *ddir_str[] = { "read ", "write" };
 	unsigned long min, max;
-	unsigned long long bw;
+	unsigned long long bw, iops;
 	double mean, dev;
-	char *io_p, *bw_p;
+	char *io_p, *bw_p, *iops_p;
 
 	if (!ts->runtime[ddir])
 		return;
 
 	bw = ts->io_bytes[ddir] / ts->runtime[ddir];
-	io_p = num2str(ts->io_bytes[ddir] >> 10, 6, 1);
-	bw_p = num2str(bw, 6, 1);
+	iops = (1000 * ts->total_io_u[ddir]) / ts->runtime[ddir];
+	io_p = num2str(ts->io_bytes[ddir] >> 10, 6, 1000, 1);
+	bw_p = num2str(bw, 6, 1000, 1);
+	iops_p = num2str(iops, 6, 1, 0);
 
-	fprintf(f_out, "  %s: io=%siB, bw=%siB/s, runt=%6lumsec\n", ddir_str[ddir], io_p, bw_p, ts->runtime[ddir]);
+	fprintf(f_out, "  %s: io=%siB, bw=%siB/s, iops=%s, runt=%6lumsec\n", ddir_str[ddir], io_p, bw_p, iops_p, ts->runtime[ddir]);
 
 	free(io_p);
 	free(bw_p);
+	free(iops_p);
 
 	if (calc_lat(&ts->slat_stat[ddir], &min, &max, &mean, &dev))
 		fprintf(f_out, "    slat (msec): min=%5lu, max=%5lu, avg=%5.02f, stdev=%5.02f\n", min, max, mean, dev);
@@ -711,7 +720,8 @@
 		for (k = 0; k < FIO_IO_U_LAT_NR; k++)
 			ts->io_u_lat[k] += td->ts.io_u_lat[k];
 
-		ts->total_io_u += td->ts.total_io_u;
+		for (k = 0; k <= DDIR_WRITE; k++)
+			ts->total_io_u[k] += td->ts.total_io_u[k];
 
 		ts->total_run_time += td->ts.total_run_time;