Improve accuracy of rate= option
I noticed that the rate= option is not terribly precise in some cases.
It gets worse the higher the selected rate. For example:
$ fio -name=load -size=100g -ioengine=null -runtime=10 -rate=30m
One would expect that to read 300MB (307200KB) at close to 30MB/s
(30720KB/s). However it writes 315024KB at 31499KB/s. Further
experimentation shows that even higher rates can show bigger
discrepancies. At the extreme end...
$ fio -name=load -size=100g -ioengine=null -runtime=10 -rate=500m
One would expect this to write 5000MB at a rate of 500MB/s
(512000KB/s). However it writes close to double that (9536.8MB) at a
rate of over 953MB/s. At a rate of 1GB/s and higher, the rate
limiting is effectively ignored.
This patch improves the accuracy of the rate= option across the whole
range of rates, at the cost of being very slightly more
computationally expensive.
Signed-off-by: Jens Axboe <axboe@kernel.dk>
diff --git a/fio.h b/fio.h
index be684ca..a5405e3 100644
--- a/fio.h
+++ b/fio.h
@@ -357,7 +357,7 @@
/*
* Rate state
*/
- unsigned long rate_nsec_cycle[2];
+ unsigned long long rate_bps[2];
long rate_pending_usleep[2];
unsigned long rate_bytes[2];
unsigned long rate_blocks[2];
diff --git a/init.c b/init.c
index 9fafadf..01e4371 100644
--- a/init.c
+++ b/init.c
@@ -327,21 +327,19 @@
static int __setup_rate(struct thread_data *td, enum fio_ddir ddir)
{
unsigned int bs = td->o.min_bs[ddir];
- unsigned long long bytes_per_sec;
assert(ddir_rw(ddir));
if (td->o.rate[ddir])
- bytes_per_sec = td->o.rate[ddir];
+ td->rate_bps[ddir] = td->o.rate[ddir];
else
- bytes_per_sec = td->o.rate_iops[ddir] * bs;
+ td->rate_bps[ddir] = td->o.rate_iops[ddir] * bs;
- if (!bytes_per_sec) {
+ if (!td->rate_bps[ddir]) {
log_err("rate lower than supported\n");
return -1;
}
- td->rate_nsec_cycle[ddir] = 1000000000ULL / bytes_per_sec;
td->rate_pending_usleep[ddir] = 0;
return 0;
}
diff --git a/io_u.c b/io_u.c
index fc3ee49..0ff66f9 100644
--- a/io_u.c
+++ b/io_u.c
@@ -1296,6 +1296,16 @@
add_iops_sample(td, idx, &icd->time);
}
+static long long usec_for_io(struct thread_data *td, enum fio_ddir ddir)
+{
+ unsigned long long secs, remainder, bps, bytes;
+ bytes = td->this_io_bytes[ddir];
+ bps = td->rate_bps[ddir];
+ secs = bytes / bps;
+ remainder = bytes % bps;
+ return remainder * 1000000 / bps + secs * 1000000;
+}
+
static void io_completed(struct thread_data *td, struct io_u *io_u,
struct io_completion_data *icd)
{
@@ -1354,14 +1364,12 @@
if (__should_check_rate(td, idx)) {
td->rate_pending_usleep[idx] =
- ((td->this_io_bytes[idx] *
- td->rate_nsec_cycle[idx]) / 1000 -
+ (usec_for_io(td, idx) -
utime_since_now(&td->start));
}
- if (__should_check_rate(td, idx ^ 1))
+ if (__should_check_rate(td, odx))
td->rate_pending_usleep[odx] =
- ((td->this_io_bytes[odx] *
- td->rate_nsec_cycle[odx]) / 1000 -
+ (usec_for_io(td, odx) -
utime_since_now(&td->start));
}