Add t/genzipf to play with distribution settings

Signed-off-by: Jens Axboe <axboe@kernel.dk>
diff --git a/Makefile b/Makefile
index 94856e0..fc3a795 100644
--- a/Makefile
+++ b/Makefile
@@ -76,8 +76,17 @@
 T_IEEE_OBJS += ieee754.o
 T_IEEE_PROGS = t/ieee754
 
+T_ZIPF_OBS = t/genzipf.o
+T_ZIPF_OBJS += t/log.o lib/ieee754.o lib/rand.o lib/zipf.o t/genzipf.o
+T_ZIPF_PROGS = t/genzip
+
 T_OBJS = $(T_SMALLOC_OBJS)
 T_OBJS += $(T_IEEE_OBJS)
+T_OBJS += $(T_ZIPF_OBJS)
+
+T_PROGS = $(T_SMALLOC_PROGS)
+T_PROGS += $(T_IEEE_PROGS)
+T_PROGS += $(T_ZIPF_PROGS)
 
 ifneq ($(findstring $(MAKEFLAGS),s),s)
 ifndef V
@@ -119,6 +128,9 @@
 t/ieee754: $(T_IEEE_OBJS)
 	$(QUIET_CC)$(CC) $(LDFLAGS) $(CFLAGS) -o $@ $(T_IEEE_OBJS) $(LIBS) $(LDFLAGS)
 
+t/genzipf: $(T_ZIPF_OBJS)
+	$(QUIET_CC)$(CC) $(LDFLAGS) $(CFLAGS) -o $@ $(T_ZIPF_OBJS) $(LIBS) $(LDFLAGS)
+
 fio: $(OBJS)
 	$(QUIET_CC)$(CC) $(LDFLAGS) $(CFLAGS) -o $@ $(OBJS) $(LIBS) $(LDFLAGS)
 
diff --git a/t/genzipf.c b/t/genzipf.c
new file mode 100644
index 0000000..cfd4f66
--- /dev/null
+++ b/t/genzipf.c
@@ -0,0 +1,110 @@
+/*
+ * Generate/analyze pareto/zipf distributions to better understand
+ * what an access pattern would look like.
+ *
+ * For instance, the following would generate a zipf distribution
+ * with theta 1.2, using 100,000 values and split the reporting into
+ * 20 buckets:
+ *
+ *	t/genzipf zipf 1.2 100000 20
+ *
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <fcntl.h>
+#include <string.h>
+
+#include "../lib/zipf.h"
+
+static int val_cmp(const void *p1, const void *p2)
+{
+	const unsigned long *v1 = p1;
+	const unsigned long *v2 = p2;
+
+	return *v1 - *v2;
+}
+
+int main(int argc, char *argv[])
+{
+	unsigned long nranges, output_nranges;
+	unsigned long *vals;
+	unsigned long i, j, nr_vals, cur_vals, max_val, interval, total;
+	double *output;
+	struct zipf_state zs;
+	int use_zipf;
+	double val;
+
+	if (argc < 4) {
+		printf("%s: {zipf,pareto} val values [output ranges]\n", argv[0]);
+		return 1;
+	}
+
+	if (!strcmp(argv[1], "zipf"))
+		use_zipf = 1;
+	else if (!strcmp(argv[1], "pareto"))
+		use_zipf = 0;
+	else {
+		printf("Bad distribution type <%s>\n", argv[1]);
+		return 1;
+	}
+
+	val = atof(argv[2]);
+	nranges = strtoul(argv[3], NULL, 10);
+	if (argc == 5)
+		output_nranges = strtoul(argv[4], NULL, 10);
+	else
+		output_nranges = nranges;
+
+	printf("Generating %s distribution with %f input and %lu ranges\n", use_zipf ? "zipf" : "pareto", val, nranges);
+	getchar();
+
+	if (use_zipf)
+		zipf_init(&zs, nranges, val);
+	else
+		pareto_init(&zs, nranges, val);
+
+	vals = malloc(nranges * sizeof(unsigned long));
+
+	total = max_val = nr_vals = 0;
+	for (i = 0; i < nranges; i++) {
+		if (use_zipf)
+			vals[nr_vals] = zipf_next(&zs);
+		else
+			vals[nr_vals] = pareto_next(&zs);
+
+		if (vals[nr_vals] > max_val)
+			max_val = vals[nr_vals];
+		nr_vals++;
+	}
+
+	qsort(vals, nr_vals, sizeof(unsigned long), val_cmp);
+
+	interval = (max_val + output_nranges - 1) / output_nranges;
+
+	output = malloc(output_nranges * sizeof(double));
+
+	for (i = j = 0, cur_vals = 1; i < nr_vals; i++) {
+		if (vals[i] > interval) {
+			output[j] = (double) cur_vals / (double) nr_vals;
+			output[j] *= 100.0;
+			j++;
+			total += cur_vals;
+			cur_vals = 1;
+			interval += (max_val + output_nranges - 1) / output_nranges;
+			continue;
+		}
+		cur_vals++;
+	}
+
+	output[j] = (double) cur_vals / (double) nr_vals;
+	output[j] *= 100.0;
+	j++;
+	total += cur_vals;
+
+	for (i = 0; i < j; i++)
+		printf("%.2f%%\n", output[i]);
+
+	free(output);
+	free(vals);
+	return 0;
+}
diff --git a/t/log.c b/t/log.c
index 7f1de27..ac02303 100644
--- a/t/log.c
+++ b/t/log.c
@@ -13,3 +13,16 @@
 
 	return fwrite(buffer, len, 1, stderr);
 }
+
+int log_info(const char *format, ...)
+{
+	char buffer[1024];
+	va_list args;
+	size_t len;
+
+	va_start(args, format);
+	len = vsnprintf(buffer, sizeof(buffer), format, args);
+	va_end(args);
+
+	return fwrite(buffer, len, 1, stdout);
+}