Add 'filesize' option

Allows the user to define the range of file sizes generated.

Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
diff --git a/HOWTO b/HOWTO
index e54b37b..a2070d6 100644
--- a/HOWTO
+++ b/HOWTO
@@ -237,6 +237,11 @@
 		size if larger than the current file size. If this parameter
 		is not given and the file exists, the file size will be used.
 
+filesize=siint	Individual file sizes. May be a range, in which case fio
+		will select sizes for files at random within the given range
+		and limited to 'size' in total (if that is given). If not
+		given, each created file is the same size.
+
 bs=siint	The block size used for the io units. Defaults to 4k. Values
 		can be given for both read and writes. If a single siint is
 		given, it will apply to both. If a second siint is specified
diff --git a/filesetup.c b/filesetup.c
index f43efe0..053bda9 100644
--- a/filesetup.c
+++ b/filesetup.c
@@ -97,11 +97,33 @@
 	return 1;
 }
 
+static unsigned long long set_rand_file_size(struct thread_data *td,
+					     unsigned long long total_size)
+{
+	unsigned long long upper = total_size;
+	unsigned long long ret;
+	long r;
+
+	if (upper > td->file_size_high)
+		upper = td->file_size_high;
+	else if (upper < td->file_size_low)
+		return 0;
+	else if (!upper)
+		return 0;
+
+	r = os_random_long(&td->file_size_state);
+	ret = td->file_size_low + (unsigned long long) ((double) upper * (r / (RAND_MAX + 1.0)));
+	ret -= (ret % td->rw_min_bs);
+	if (ret > upper)
+		ret = upper;
+	return ret;
+}
+
 static int create_files(struct thread_data *td)
 {
 	struct fio_file *f;
 	int err, need_create, can_extend;
-	unsigned long long total_file_size;
+	unsigned long long total_file_size, local_file_size;
 	unsigned int i, new_files;
 
 	new_files = 0;
@@ -132,6 +154,10 @@
 		return 0;
 
 	need_create = 0;
+	local_file_size = total_file_size;
+	if (!local_file_size)
+		local_file_size = -1;
+
 	for_each_file(td, f, i) {
 		int file_there;
 
@@ -140,7 +166,23 @@
 		if (f->flags & FIO_FILE_EXISTS)
 			continue;
 
-		f->file_size = total_file_size / new_files;
+		if (!td->file_size_low)
+			f->file_size = total_file_size / new_files;
+		else {
+			/*
+			 * If we don't have enough space left for a file
+			 * of the minimum size, bail.
+			 */
+			if (local_file_size < td->file_size_low) {
+				log_info("fio: limited to %d files\n", i);
+				new_files -= (td->nr_files - i);
+				td->nr_files = i;
+				break;
+			}
+
+			f->file_size = set_rand_file_size(td, local_file_size);
+			local_file_size -= f->file_size;
+		}
 
 		file_there = !file_ok(td, f);
 
@@ -162,9 +204,8 @@
 	}
 
 	temp_stall_ts = 1;
-	fprintf(f_out, "%s: Laying out IO file(s) (%u x %LuMiB == %LuMiB)\n",
+	fprintf(f_out, "%s: Laying out IO file(s) (%u files / %LuMiB)\n",
 				td->name, new_files,
-				(total_file_size >> 20) / new_files,
 				total_file_size >> 20);
 
 	err = 0;
diff --git a/fio.c b/fio.c
index e6bd18a..5f06940 100644
--- a/fio.c
+++ b/fio.c
@@ -726,9 +726,6 @@
 		goto err_sem;
 	}
 
-	if (init_random_state(td))
-		goto err_sem;
-
 	if (td->ioscheduler && switch_ioscheduler(td))
 		goto err_sem;
 
diff --git a/fio.h b/fio.h
index 7210422..3f0af65 100644
--- a/fio.h
+++ b/fio.h
@@ -492,6 +492,13 @@
 	unsigned int file_service_nr;
 	unsigned int file_service_left;
 	struct fio_file *file_service_file;
+
+	/*
+	 * For generating file sizes
+	 */
+	os_random_state_t file_size_state;
+	unsigned long long file_size_low;
+	unsigned long long file_size_high;
 };
 
 /*
@@ -649,7 +656,6 @@
  * Init functions
  */
 extern int __must_check parse_options(int, char **);
-extern int __must_check init_random_state(struct thread_data *);
 
 /*
  * File setup/shutdown
@@ -740,6 +746,8 @@
 		fprintf(stderr, ##args);	\
 	} while (0)
 
+#define log_info(args...)	fprintf(f_out, ##args)
+
 FILE *get_f_out(void);
 FILE *get_f_err(void);
 
diff --git a/init.c b/init.c
index cadc0d6..e794b37 100644
--- a/init.c
+++ b/init.c
@@ -183,7 +183,14 @@
 		.name	= "size",
 		.type	= FIO_OPT_STR_VAL,
 		.off1	= td_var_offset(total_file_size),
-		.help	= "Size of device or file",
+		.help	= "Total size of device or files",
+	},
+	{
+		.name	= "filesize",
+		.type	= FIO_OPT_STR_VAL,
+		.off1	= td_var_offset(file_size_low),
+		.off2	= td_var_offset(file_size_high),
+		.help	= "Size of individual files",
 	},
 	{
 		.name	= "bs",
@@ -801,6 +808,9 @@
 
 	td->rw_min_bs = min(td->min_bs[DDIR_READ], td->min_bs[DDIR_WRITE]);
 
+	if (!td->file_size_high)
+		td->file_size_high = td->file_size_low;
+
 	if (td_read(td) && !td_rw(td))
 		td->verify = 0;
 
@@ -894,6 +904,65 @@
 }
 
 /*
+ * Initialize the various random states we need (random io, block size ranges,
+ * read/write mix, etc).
+ */
+static int init_random_state(struct thread_data *td)
+{
+	unsigned long seeds[6];
+	int fd, num_maps, blocks;
+	struct fio_file *f;
+	unsigned int i;
+
+	fd = open("/dev/urandom", O_RDONLY);
+	if (fd == -1) {
+		td_verror(td, errno, "open");
+		return 1;
+	}
+
+	if (read(fd, seeds, sizeof(seeds)) < (int) sizeof(seeds)) {
+		td_verror(td, EIO, "read");
+		close(fd);
+		return 1;
+	}
+
+	close(fd);
+
+	os_random_seed(seeds[0], &td->bsrange_state);
+	os_random_seed(seeds[1], &td->verify_state);
+	os_random_seed(seeds[2], &td->rwmix_state);
+
+	if (td->file_service_type == FIO_FSERVICE_RANDOM)
+		os_random_seed(seeds[3], &td->next_file_state);
+
+	os_random_seed(seeds[5], &td->file_size_state);
+
+	if (!td_random(td))
+		return 0;
+
+	if (td->rand_repeatable)
+		seeds[4] = FIO_RANDSEED * td->thread_number;
+
+	if (!td->norandommap) {
+		for_each_file(td, f, i) {
+			blocks = (f->real_file_size + td->rw_min_bs - 1) / td->rw_min_bs;
+			num_maps = (blocks + BLOCKS_PER_MAP-1)/ BLOCKS_PER_MAP;
+			f->file_map = malloc(num_maps * sizeof(long));
+			if (!f->file_map) {
+				log_err("fio: failed allocating random map. If running a large number of jobs, try the 'norandommap' option\n");
+				return 1;
+			}
+			f->num_maps = num_maps;
+			memset(f->file_map, 0, num_maps * sizeof(long));
+		}
+	}
+
+	os_random_seed(seeds[4], &td->random_state);
+	return 0;
+}
+
+
+/*
  * Adds a job to the list of things todo. Sanitizes the various options
  * to make sure we don't have conflicts, and initializes various
  * members of td.
@@ -966,6 +1035,9 @@
 	td->groupid = groupid;
 	prev_group_jobs++;
 
+	if (init_random_state(td))
+		goto err;
+
 	if (setup_rate(td))
 		goto err;
 
@@ -1039,65 +1111,6 @@
 	return -1;
 }
 
-/*
- * Initialize the various random states we need (random io, block size ranges,
- * read/write mix, etc).
- */
-int init_random_state(struct thread_data *td)
-{
-	unsigned long seeds[5];
-	int fd, num_maps, blocks;
-	struct fio_file *f;
-	unsigned int i;
-
-	if (td->io_ops->flags & FIO_DISKLESSIO)
-		return 0;
-
-	fd = open("/dev/urandom", O_RDONLY);
-	if (fd == -1) {
-		td_verror(td, errno, "open");
-		return 1;
-	}
-
-	if (read(fd, seeds, sizeof(seeds)) < (int) sizeof(seeds)) {
-		td_verror(td, EIO, "read");
-		close(fd);
-		return 1;
-	}
-
-	close(fd);
-
-	os_random_seed(seeds[0], &td->bsrange_state);
-	os_random_seed(seeds[1], &td->verify_state);
-	os_random_seed(seeds[2], &td->rwmix_state);
-
-	if (td->file_service_type == FIO_FSERVICE_RANDOM)
-		os_random_seed(seeds[3], &td->next_file_state);
-
-	if (!td_random(td))
-		return 0;
-
-	if (td->rand_repeatable)
-		seeds[4] = FIO_RANDSEED * td->thread_number;
-
-	if (!td->norandommap) {
-		for_each_file(td, f, i) {
-			blocks = (f->real_file_size + td->rw_min_bs - 1) / td->rw_min_bs;
-			num_maps = (blocks + BLOCKS_PER_MAP-1)/ BLOCKS_PER_MAP;
-			f->file_map = malloc(num_maps * sizeof(long));
-			if (!f->file_map) {
-				log_err("fio: failed allocating random map. If running a large number of jobs, try the 'norandommap' option\n");
-				return 1;
-			}
-			f->num_maps = num_maps;
-			memset(f->file_map, 0, num_maps * sizeof(long));
-		}
-	}
-
-	os_random_seed(seeds[4], &td->random_state);
-	return 0;
-}
-
 static void fill_cpu_mask(os_cpu_mask_t cpumask, int cpu)
 {
 #ifdef FIO_HAVE_CPU_AFFINITY