Adaptive io latency distrobution "histogram"

Log ranges all the way down to microseconds, and display the most
appropriate values for those ranges.

This makes the latency usable for sub-milisecond devices as well.

Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
diff --git a/fio.h b/fio.h
index 2104e3a..813e55b 100644
--- a/fio.h
+++ b/fio.h
@@ -295,7 +295,8 @@
  * How many depth levels to log
  */
 #define FIO_IO_U_MAP_NR	8
-#define FIO_IO_U_LAT_NR 12
+#define FIO_IO_U_LAT_U_NR 12
+#define FIO_IO_U_LAT_M_NR 10
 
 struct thread_stat {
 	char *name;
@@ -333,7 +334,8 @@
 	 * IO depth and latency stats
 	 */
 	unsigned int io_u_map[FIO_IO_U_MAP_NR];
-	unsigned int io_u_lat[FIO_IO_U_LAT_NR];
+	unsigned int io_u_lat_u[FIO_IO_U_LAT_U_NR];
+	unsigned int io_u_lat_m[FIO_IO_U_LAT_M_NR];
 	unsigned long total_io_u[2];
 	unsigned long short_io_u[2];
 
diff --git a/io_u.c b/io_u.c
index 9aab97f..e0614bd 100644
--- a/io_u.c
+++ b/io_u.c
@@ -387,38 +387,98 @@
 	td->ts.total_io_u[io_u->ddir]++;
 }
 
-static void io_u_mark_latency(struct thread_data *td, unsigned long msec)
+static void io_u_mark_lat_usec(struct thread_data *td, unsigned long usec)
+{
+	int index = 0;
+
+	assert(usec < 1000);
+
+	switch (usec) {
+	case 750 ... 999:
+		index = 9;
+		break;
+	case 500 ... 749:
+		index = 8;
+		break;
+	case 250 ... 499:
+		index = 7;
+		break;
+	case 100 ... 249:
+		index = 6;
+		break;
+	case 50 ... 99:
+		index = 5;
+		break;
+	case 20 ... 49:
+		index = 4;
+		break;
+	case 10 ... 19:
+		index = 3;
+		break;
+	case 4 ... 9:
+		index = 2;
+		break;
+	case 2 ... 3:
+		index = 1;
+	case 0 ... 1:
+		break;
+	}
+
+	assert(index < FIO_IO_U_LAT_U_NR);
+	td->ts.io_u_lat_u[index]++;
+}
+
+static void io_u_mark_lat_msec(struct thread_data *td, unsigned long msec)
 {
 	int index = 0;
 
 	switch (msec) {
 	default:
-		index++;
+		index = 11;
+		break;
 	case 1000 ... 1999:
-		index++;
+		index = 10;
+		break;
 	case 750 ... 999:
-		index++;
+		index = 9;
+		break;
 	case 500 ... 749:
-		index++;
+		index = 8;
+		break;
 	case 250 ... 499:
-		index++;
+		index = 7;
+		break;
 	case 100 ... 249:
-		index++;
+		index = 6;
+		break;
 	case 50 ... 99:
-		index++;
+		index = 5;
+		break;
 	case 20 ... 49:
-		index++;
+		index = 4;
+		break;
 	case 10 ... 19:
-		index++;
+		index = 3;
+		break;
 	case 4 ... 9:
-		index++;
+		index = 2;
+		break;
 	case 2 ... 3:
-		index++;
+		index = 1;
 	case 0 ... 1:
 		break;
 	}
 
-	td->ts.io_u_lat[index]++;
+	assert(index < FIO_IO_U_LAT_M_NR);
+	td->ts.io_u_lat_m[index]++;
+}
+
+static void io_u_mark_latency(struct thread_data *td, unsigned long usec)
+{
+	if (usec < 1000)
+		io_u_mark_lat_usec(td, usec);
+	else
+		io_u_mark_lat_msec(td, usec / 1000);
 }
 
 /*
@@ -682,7 +742,7 @@
 
 		add_clat_sample(td, idx, usec);
 		add_bw_sample(td, idx, &icd->time);
-		io_u_mark_latency(td, usec / 1000);
+		io_u_mark_latency(td, usec);
 
 		if (td_write(td) && idx == DDIR_WRITE &&
 		    td->o.verify != VERIFY_NONE)
diff --git a/stat.c b/stat.c
index 34c8fce..28d85c1 100644
--- a/stat.c
+++ b/stat.c
@@ -131,21 +131,32 @@
 	}
 }
 
-static void stat_calc_lat(struct thread_stat *ts, double *io_u_lat)
+static void stat_calc_lat(struct thread_stat *ts, double *dst,
+			  unsigned int *src, int nr)
 {
 	int i;
 
 	/*
 	 * Do latency distribution calculations
 	 */
-	for (i = 0; i < FIO_IO_U_LAT_NR; i++) {
-		io_u_lat[i] = (double) ts->io_u_lat[i] / (double) ts_total_io_u(ts);
-		io_u_lat[i] *= 100.0;
-		if (io_u_lat[i] < 0.01 && ts->io_u_lat[i])
-			io_u_lat[i] = 0.01;
+	for (i = 0; i < nr; i++) {
+		dst[i] = (double) src[i] / (double) ts_total_io_u(ts);
+		dst[i] *= 100.0;
+		if (dst[i] < 0.01 && src[i])
+			dst[i] = 0.01;
 	}
 }
 
+static void stat_calc_lat_u(struct thread_stat *ts, double *io_u_lat)
+{
+	stat_calc_lat(ts, io_u_lat, ts->io_u_lat_u, FIO_IO_U_LAT_U_NR);
+}
+
+static void stat_calc_lat_m(struct thread_stat *ts, double *io_u_lat)
+{
+	stat_calc_lat(ts, io_u_lat, ts->io_u_lat_m, FIO_IO_U_LAT_M_NR);
+}
+
 static int usec_to_msec(unsigned long *min, unsigned long *max, double *mean,
 			double *dev)
 {
@@ -208,13 +219,61 @@
 	}
 }
 
+static void show_lat(double *io_u_lat, int nr, const char **ranges,
+		     const char *msg)
+{
+	int new_line = 1, i, line = 0;
+
+	for (i = 0; i < nr; i++) {
+		if (io_u_lat[i] <= 0.0)
+			continue;
+		if (new_line) {
+			log_info("     lat (%s): ", msg);
+			new_line = 0;
+			line = 0;
+		}
+		if (line)
+			log_info(", ");
+		log_info("%s%3.2f%%", ranges[i], io_u_lat[i]);
+		line++;
+		if (line == 5)
+			new_line = 1;
+	}
+
+}
+
+static void show_lat_u(double *io_u_lat_u)
+{
+	const char *ranges[] = { "2=", "4=", "10=", "20=", "50=", "100=",
+				 "250=", "500=", "750=", "1000=", };
+
+	show_lat(io_u_lat_u, FIO_IO_U_LAT_U_NR, ranges, "usec");
+}
+
+static void show_lat_m(double *io_u_lat_m)
+{
+	const char *ranges[] = { "2=", "4=", "10=", "20=", "50=", "100=",
+				 "250=", "500=", "750=", "1000=", "2000=",
+				 ">=2000=", };
+
+	show_lat(io_u_lat_m, FIO_IO_U_LAT_M_NR, ranges, "msec");
+}
+
+static void show_latencies(double *io_u_lat_u, double *io_u_lat_m)
+{
+	show_lat_u(io_u_lat_u);
+	show_lat_m(io_u_lat_m);
+	log_info("\n");
+}
+
 static void show_thread_status(struct thread_stat *ts,
 			       struct group_run_stats *rs)
 {
 	double usr_cpu, sys_cpu;
 	unsigned long runtime;
 	double io_u_dist[FIO_IO_U_MAP_NR];
-	double io_u_lat[FIO_IO_U_LAT_NR];
+	double io_u_lat_u[FIO_IO_U_LAT_U_NR];
+	double io_u_lat_m[FIO_IO_U_LAT_M_NR];
 
 	if (!(ts->io_bytes[0] + ts->io_bytes[1]))
 		return;
@@ -246,14 +305,13 @@
 	log_info("  cpu          : usr=%3.2f%%, sys=%3.2f%%, ctx=%lu\n", usr_cpu, sys_cpu, ts->ctx);
 
 	stat_calc_dist(ts, io_u_dist);
-	stat_calc_lat(ts, io_u_lat);
+	stat_calc_lat_u(ts, io_u_lat_u);
+	stat_calc_lat_m(ts, io_u_lat_m);
 
 	log_info("  IO depths    : 1=%3.1f%%, 2=%3.1f%%, 4=%3.1f%%, 8=%3.1f%%, 16=%3.1f%%, 32=%3.1f%%, >=64=%3.1f%%\n", io_u_dist[0], io_u_dist[1], io_u_dist[2], io_u_dist[3], io_u_dist[4], io_u_dist[5], io_u_dist[6]);
 	log_info("     issued r/w: total=%lu/%lu, short=%lu/%lu\n", ts->total_io_u[0], ts->total_io_u[1], ts->short_io_u[0], ts->short_io_u[1]);
 
-	log_info("     lat (msec): 2=%3.2f%%, 4=%3.2f%%, 10=%3.2f%%, 20=%3.2f%%, 50=%3.2f%%\n", io_u_lat[0], io_u_lat[1], io_u_lat[2], io_u_lat[3], io_u_lat[4]);
-	log_info("     lat (msec): 100=%3.2f%%, 250=%3.2f%%, 500=%3.2f%%, 750=%3.2f%%\n", io_u_lat[5], io_u_lat[6], io_u_lat[7], io_u_lat[8]);
-	log_info("     lat (msec): 1000=%3.2f%%, 2000=%3.2f%%, >=2000=%3.2f%%\n", io_u_lat[9], io_u_lat[10], io_u_lat[11]);
+	show_latencies(io_u_lat_u, io_u_lat_m);
 }
 
 static void show_ddir_status_terse(struct thread_stat *ts,
@@ -293,8 +351,10 @@
 				     struct group_run_stats *rs)
 {
 	double io_u_dist[FIO_IO_U_MAP_NR];
-	double io_u_lat[FIO_IO_U_LAT_NR];
+	double io_u_lat_u[FIO_IO_U_LAT_U_NR];
+	double io_u_lat_m[FIO_IO_U_LAT_M_NR];
 	double usr_cpu, sys_cpu;
+	int i;
 
 	log_info("%s;%d;%d", ts->name, ts->groupid, ts->error);
 
@@ -314,12 +374,16 @@
 	log_info(";%f%%;%f%%;%lu", usr_cpu, sys_cpu, ts->ctx);
 
 	stat_calc_dist(ts, io_u_dist);
-	stat_calc_lat(ts, io_u_lat);
+	stat_calc_lat_u(ts, io_u_lat_u);
+	stat_calc_lat_m(ts, io_u_lat_m);
 
 	log_info(";%3.1f%%;%3.1f%%;%3.1f%%;%3.1f%%;%3.1f%%;%3.1f%%;%3.1f%%", io_u_dist[0], io_u_dist[1], io_u_dist[2], io_u_dist[3], io_u_dist[4], io_u_dist[5], io_u_dist[6]);
 
-	log_info(";%3.2f%%;%3.2f%%;%3.2f%%;%3.2f%%;%3.2f%%;%3.2f%%\n", io_u_lat[0], io_u_lat[1], io_u_lat[2], io_u_lat[3], io_u_lat[4], io_u_lat[5]);
-	log_info(";%3.2f%%;%3.2f%%;%3.2f%%;%3.2f%%;%3.2f%%", io_u_lat[6], io_u_lat[7], io_u_lat[8], io_u_lat[9], io_u_lat[10]);
+	for (i = 0; i < FIO_IO_U_LAT_U_NR; i++)
+		log_info(";%3.2f%%", io_u_lat_u[i]);
+	for (i = 0; i < FIO_IO_U_LAT_M_NR; i++)
+		log_info(";%3.2f%%", io_u_lat_m[i]);
+	log_info("\n");
 
 	if (ts->description)
 		log_info(";%s", ts->description);
@@ -454,8 +518,11 @@
 
 		for (k = 0; k < FIO_IO_U_MAP_NR; k++)
 			ts->io_u_map[k] += td->ts.io_u_map[k];
-		for (k = 0; k < FIO_IO_U_LAT_NR; k++)
-			ts->io_u_lat[k] += td->ts.io_u_lat[k];
+		for (k = 0; k < FIO_IO_U_LAT_U_NR; k++)
+			ts->io_u_lat_u[k] += td->ts.io_u_lat_u[k];
+		for (k = 0; k < FIO_IO_U_LAT_M_NR; k++)
+			ts->io_u_lat_m[k] += td->ts.io_u_lat_m[k];
+
 
 		for (k = 0; k <= DDIR_WRITE; k++) {
 			ts->total_io_u[k] += td->ts.total_io_u[k];