Rework file random map

Fio slows down at the end of a random IO run, when the random
map is used and it gets fuller. This causes slowdowns in
IOPS. This is largely due to the file random map being an
array of bits, and with random access to single bits of the
array at the time, locality is awful. The effect is observable
throughout a run, though, where it gradually gets slower and
slower. It just becomes more apparent at near the end of the
run, where the last 10% are fairly bad. This is even with
doing a bunch of tricks to reduce that cost.

Implement an N-level bitmap, where layer N uses a single bit
to represent 32/64-bits at layer N-1. The number of layers
depends on the number of blocks.

This has slightly higher overhead initially in theory, in
practice it performs about the same. As a bonus, the throughput
remains constant during the run, and even becomes faster
near the end.

Signed-off-by: Jens Axboe <axboe@kernel.dk>
diff --git a/io_u.c b/io_u.c
index d81fefd..d681606 100644
--- a/io_u.c
+++ b/io_u.c
@@ -10,6 +10,7 @@
 #include "verify.h"
 #include "trim.h"
 #include "lib/rand.h"
+#include "lib/bitmap.h"
 
 struct io_completion_data {
 	int nr;				/* input */
@@ -20,17 +21,12 @@
 };
 
 /*
- * The ->file_map[] contains a map of blocks we have or have not done io
+ * The ->io_bitmap contains a map of blocks we have or have not done io
  * to yet. Used to make sure we cover the entire range in a fair fashion.
  */
 static int random_map_free(struct fio_file *f, const unsigned long long block)
 {
-	unsigned int idx = RAND_MAP_IDX(f, block);
-	unsigned int bit = RAND_MAP_BIT(f, block);
-
-	dprint(FD_RANDOM, "free: b=%llu, idx=%u, bit=%u\n", block, idx, bit);
-
-	return (f->file_map[idx] & (1UL << bit)) == 0;
+	return !bitmap_isset(f->io_bitmap, block);
 }
 
 /*
@@ -41,61 +37,15 @@
 	unsigned int min_bs = td->o.rw_min_bs;
 	struct fio_file *f = io_u->file;
 	unsigned long long block;
-	unsigned int blocks, nr_blocks;
-	int busy_check;
+	unsigned int nr_blocks;
 
 	block = (io_u->offset - f->file_offset) / (unsigned long long) min_bs;
 	nr_blocks = (io_u->buflen + min_bs - 1) / min_bs;
-	blocks = 0;
-	busy_check = !(io_u->flags & IO_U_F_BUSY_OK);
 
-	while (nr_blocks) {
-		unsigned int idx, bit;
-		unsigned long mask, this_blocks;
+	nr_blocks = bitmap_set_nr(f->io_bitmap, block, nr_blocks);
 
-		/*
-		 * If we have a mixed random workload, we may
-		 * encounter blocks we already did IO to.
-		 */
-		if (!busy_check) {
-			blocks = nr_blocks;
-			break;
-		}
-		if ((td->o.ddir_seq_nr == 1) && !random_map_free(f, block))
-			break;
-
-		idx = RAND_MAP_IDX(f, block);
-		bit = RAND_MAP_BIT(f, block);
-
-		fio_assert(td, idx < f->num_maps);
-
-		this_blocks = nr_blocks;
-		if (this_blocks + bit > BLOCKS_PER_MAP)
-			this_blocks = BLOCKS_PER_MAP - bit;
-
-		do {
-			if (this_blocks == BLOCKS_PER_MAP)
-				mask = -1UL;
-			else
-				mask = ((1UL << this_blocks) - 1) << bit;
-	
-			if (!(f->file_map[idx] & mask))
-				break;
-
-			this_blocks--;
-		} while (this_blocks);
-
-		if (!this_blocks)
-			break;
-
-		f->file_map[idx] |= mask;
-		nr_blocks -= this_blocks;
-		blocks += this_blocks;
-		block += this_blocks;
-	}
-
-	if ((blocks * min_bs) < io_u->buflen)
-		io_u->buflen = blocks * min_bs;
+	if ((nr_blocks * min_bs) < io_u->buflen)
+		io_u->buflen = nr_blocks * min_bs;
 }
 
 static unsigned long long last_block(struct thread_data *td, struct fio_file *f,
@@ -123,113 +73,46 @@
 	return max_blocks;
 }
 
-/*
- * Return the next free block in the map.
- */
-static int get_next_free_block(struct thread_data *td, struct fio_file *f,
-			       enum fio_ddir ddir, unsigned long long *b)
-{
-	unsigned long long block, min_bs = td->o.rw_min_bs, lastb;
-	int i;
-
-	lastb = last_block(td, f, ddir);
-	if (!lastb)
-		return 1;
-
-	i = f->last_free_lookup;
-	block = i * BLOCKS_PER_MAP;
-	while (block * min_bs < f->real_file_size &&
-		block * min_bs < f->io_size) {
-		if (f->file_map[i] != -1UL) {
-			block += ffz(f->file_map[i]);
-			if (block > lastb)
-				break;
-			f->last_free_lookup = i;
-			*b = block;
-			return 0;
-		}
-
-		block += BLOCKS_PER_MAP;
-		i++;
-	}
-
-	dprint(FD_IO, "failed finding a free block\n");
-	return 1;
-}
-
 static int __get_next_rand_offset(struct thread_data *td, struct fio_file *f,
 				  enum fio_ddir ddir, unsigned long long *b)
 {
 	unsigned long long rmax, r, lastb;
-	int loops = 5;
 
 	lastb = last_block(td, f, ddir);
 	if (!lastb)
 		return 1;
 
-	if (f->failed_rands >= 200)
-		goto ffz;
-
 	rmax = td->o.use_os_rand ? OS_RAND_MAX : FRAND_MAX;
-	do {
-		if (td->o.use_os_rand)
-			r = os_random_long(&td->random_state);
-		else
-			r = __rand(&td->__random_state);
 
-		*b = (lastb - 1) * (r / ((unsigned long long) rmax + 1.0));
+	if (td->o.use_os_rand) {
+		rmax = OS_RAND_MAX;
+		r = os_random_long(&td->random_state);
+	} else {
+		rmax = FRAND_MAX;
+		r = __rand(&td->__random_state);
+	}
 
-		dprint(FD_RANDOM, "off rand %llu\n", r);
+	*b = (lastb - 1) * (r / ((unsigned long long) rmax + 1.0));
 
-
-		/*
-		 * if we are not maintaining a random map, we are done.
-		 */
-		if (!file_randommap(td, f))
-			goto ret_good;
-
-		/*
-		 * calculate map offset and check if it's free
-		 */
-		if (random_map_free(f, *b))
-			goto ret_good;
-
-		dprint(FD_RANDOM, "get_next_rand_offset: offset %llu busy\n",
-									*b);
-	} while (--loops);
-
-	if (!f->failed_rands++)
-		f->last_free_lookup = 0;
+	dprint(FD_RANDOM, "off rand %llu\n", r);
 
 	/*
-	 * we get here, if we didn't suceed in looking up a block. generate
-	 * a random start offset into the filemap, and find the first free
-	 * block from there.
+	 * if we are not maintaining a random map, we are done.
 	 */
-	loops = 10;
-	do {
-		f->last_free_lookup = (f->num_maps - 1) *
-					(r / ((unsigned long long) rmax + 1.0));
-		if (!get_next_free_block(td, f, ddir, b))
-			goto ret;
-
-		if (td->o.use_os_rand)
-			r = os_random_long(&td->random_state);
-		else
-			r = __rand(&td->__random_state);
-	} while (--loops);
+	if (!file_randommap(td, f))
+		goto ret;
 
 	/*
-	 * that didn't work either, try exhaustive search from the start
+	 * calculate map offset and check if it's free
 	 */
-	f->last_free_lookup = 0;
-ffz:
-	if (!get_next_free_block(td, f, ddir, b))
-		return 0;
-	f->last_free_lookup = 0;
-	return get_next_free_block(td, f, ddir, b);
-ret_good:
-	f->failed_rands = 0;
+	if (random_map_free(f, *b))
+		goto ret;
+
+	dprint(FD_RANDOM, "get_next_rand_offset: offset %llu busy\n", *b);
+
+	*b = bitmap_next_free(f->io_bitmap, *b);
+	if (*b == (uint64_t) -1ULL)
+		return 1;
 ret:
 	return 0;
 }
@@ -347,7 +230,8 @@
 		else if (b != -1ULL)
 			io_u->offset = b * td->o.ba[ddir];
 		else {
-			log_err("fio: bug in offset generation\n");
+			log_err("fio: bug in offset generation: offset=%llu, b=%llu\n",
+								offset, b);
 			ret = 1;
 		}
 	}