Add options to have fio latency profile a device

This adds three new options:

- latency_target. This defines a specific latency target, in usec.
- latency_window. This defines the period over which fio samples.
- latency_percentile. This defines the percentage of IOs that must
  meet the criteria specified by latency_target/latency_window.

With these options set, fio will run the described workload and
vary the queue depth between 1 and iodepth= to find the best
performing spot that meets the criteria specified by the three
options.

A sample job file is also added to demonstrate how to use this.

Signed-off-by: Jens Axboe <axboe@kernel.dk>
diff --git a/stat.c b/stat.c
index ac5ff16..5d50ae8 100644
--- a/stat.c
+++ b/stat.c
@@ -497,7 +497,6 @@
 	show_lat_m(io_u_lat_m);
 }
 
-
 void show_thread_status_normal(struct thread_stat *ts, struct group_run_stats *rs)
 {
 	double usr_cpu, sys_cpu;
@@ -587,6 +586,13 @@
 					ts->first_error,
 					strerror(ts->first_error));
 	}
+	if (ts->latency_depth) {
+		log_info("     latency   : target=%llu, window=%llu, percentile=%.2f%%, depth=%u\n",
+					(unsigned long long)ts->latency_target,
+					(unsigned long long)ts->latency_window,
+					ts->latency_percentile.u.f,
+					ts->latency_depth);
+	}
 }
 
 static void show_ddir_status_terse(struct thread_stat *ts,
@@ -975,6 +981,13 @@
 		json_object_add_value_int(root, "first_error", ts->first_error);
 	}
 
+	if (ts->latency_depth) {
+		json_object_add_value_int(root, "latency_depth", ts->latency_depth);
+		json_object_add_value_int(root, "latency_target", ts->latency_target);
+		json_object_add_value_float(root, "latency_percentile", ts->latency_percentile.u.f);
+		json_object_add_value_int(root, "latency_window", ts->latency_window);
+	}
+
 	/* Additional output if description is set */
 	if (strlen(ts->description))
 		json_object_add_value_string(root, "desc", ts->description);
@@ -1265,6 +1278,11 @@
 			}
 		}
 
+		ts->latency_depth = td->latency_qd;
+		ts->latency_target = td->o.latency_target;
+		ts->latency_percentile = td->o.latency_percentile;
+		ts->latency_window = td->o.latency_window;
+
 		sum_thread_stats(ts, &td->ts, idx);
 	}