[PATCH] IO completion time histograms

Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
diff --git a/HOWTO b/HOWTO
index e5b09fd..f61b4c1 100644
--- a/HOWTO
+++ b/HOWTO
@@ -556,6 +556,8 @@
     bw (KiB/s) : min=    0, max= 1196, per=51.00%, avg=664.02, stdev=681.68
   cpu        : usr=1.49%, sys=0.25%, ctx=7969
   IO depths    : 1=0.1%, 2=0.3%, 4=0.5%, 8=99.0%, 16=0.0%, 32=0.0%, >32=0.0%
+     lat (msec): 2=1.6%, 4=0.0%, 8=3.2%, 16=12.8%, 32=38.4%, 64=24.8%, 128=15.2%
+     lat (msec): 256=4.0%, 512=0.0%, 1024=0.0%, >=2048=0.0%
 
 The client number is printed, along with the group id and error of that
 thread. Below is the io statistics, here for writes. In the order listed,
@@ -585,6 +587,12 @@
 		16= entries includes depths up to that value but higher
 		than the previous entry. In other words, it covers the
 		range from 16 to 31.
+IO latencies=	The distribution of IO completion latencies. This is the
+		time from when IO leaves fio and when it gets completed.
+		The numbers follow the same pattern as the IO depths,
+		meaning that 2=1.6% means that 1.6% of the IO completed
+		within 2 msecs, 16=12.8% means that 12.8% of the IO
+		took more than 8 msecs, but less than (or equal to) 16 msecs.
 
 After each client has been listed, the group statistics are printed. They
 will look like this:
diff --git a/fio.h b/fio.h
index 5fad446..8415dae 100644
--- a/fio.h
+++ b/fio.h
@@ -220,6 +220,7 @@
  * How many depth levels to log
  */
 #define FIO_IO_U_MAP_NR	8
+#define FIO_IO_U_LAT_NR 12
 
 /*
  * This describes a single thread/process executing a fio job.
@@ -320,6 +321,7 @@
 	 */
 	unsigned int cur_depth;
 	unsigned int io_u_map[FIO_IO_U_MAP_NR];
+	unsigned int io_u_lat[FIO_IO_U_LAT_NR];
 	unsigned long total_io_u;
 	struct list_head io_u_freelist;
 	struct list_head io_u_busylist;
diff --git a/io_u.c b/io_u.c
index c56ccc6..132d897 100644
--- a/io_u.c
+++ b/io_u.c
@@ -263,6 +263,40 @@
 	td->total_io_u++;
 }
 
+static void io_u_mark_latency(struct thread_data *td, unsigned long msec)
+{
+	int index = 0;
+
+	switch (msec) {
+	default:
+		index++;
+	case 1024 ... 2047:
+		index++;
+	case 512 ... 1023:
+		index++;
+	case 256 ... 511:
+		index++;
+	case 128 ... 255:
+		index++;
+	case 64 ... 127:
+		index++;
+	case 32 ... 63:
+		index++;
+	case 16 ... 31:
+		index++;
+	case 8 ... 15:
+		index++;
+	case 4 ... 7:
+		index++;
+	case 2 ... 3:
+		index++;
+	case 0 ... 1:
+		break;
+	}
+
+	td->io_u_lat[index]++;
+}
+
 struct io_u *__get_io_u(struct thread_data *td)
 {
 	struct io_u *io_u = NULL;
@@ -367,6 +401,7 @@
 
 		add_clat_sample(td, idx, msec);
 		add_bw_sample(td, idx, &icd->time);
+		io_u_mark_latency(td, msec);
 
 		if ((td_rw(td) || td_write(td)) && idx == DDIR_WRITE)
 			log_io_piece(td, io_u);
diff --git a/stat.c b/stat.c
index f9c6985..819a7c6 100644
--- a/stat.c
+++ b/stat.c
@@ -439,6 +439,7 @@
 	double usr_cpu, sys_cpu;
 	unsigned long runtime;
 	double io_u_dist[FIO_IO_U_MAP_NR];
+	double io_u_lat[FIO_IO_U_LAT_NR];
 	int i;
 
 	if (!(td->io_bytes[0] + td->io_bytes[1]))
@@ -476,6 +477,17 @@
 
 	fprintf(f_out, "  IO depths    : 1=%3.1f%%, 2=%3.1f%%, 4=%3.1f%%, 8=%3.1f%%, 16=%3.1f%%, 32=%3.1f%%, >=64=%3.1f%%\n", io_u_dist[0], io_u_dist[1], io_u_dist[2], io_u_dist[3], io_u_dist[4], io_u_dist[5], io_u_dist[6]);
 
+	/*
+	 * Do latency distribution calculations
+	 */
+	for (i = 0; i < FIO_IO_U_LAT_NR; i++) {
+		io_u_lat[i] = (double) td->io_u_lat[i] / (double) td->total_io_u;
+		io_u_lat[i] *= 100.0;
+	}
+
+	fprintf(f_out, "     lat (msec): 2=%3.1f%%, 4=%3.1f%%, 8=%3.1f%%, 16=%3.1f%%, 32=%3.1f%%, 64=%3.1f%%, 128=%3.1f%%\n", io_u_lat[0], io_u_lat[1], io_u_lat[2], io_u_lat[3], io_u_lat[4], io_u_lat[5], io_u_lat[6]);
+	fprintf(f_out, "     lat (msec): 256=%3.1f%%, 512=%3.1f%%, 1024=%3.1f%%, >=2048=%3.1f%%\n", io_u_lat[7], io_u_lat[8], io_u_lat[9], io_u_lat[10]);
+
 	if (td->description)
 		fprintf(f_out, "%s\n", td->description);
 }