perf bench: Allow passing an iteration count to "bench mem mem{cpy,set}"
"perf stat ... perf bench mem mem..." is pretty meaningless when using
small block sizes (as the overhead of the invocation of each test run
basically hides the actual test result in the noise). Repeating the
actually interesting function's invocation a number of times allows the
results to become meaningful.
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/4F16D767020000780006D738@nat28.tlf.novell.com
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
diff --git a/tools/perf/bench/mem-memset.c b/tools/perf/bench/mem-memset.c
index 9c0c6f0..59d4933 100644
--- a/tools/perf/bench/mem-memset.c
+++ b/tools/perf/bench/mem-memset.c
@@ -24,6 +24,7 @@
static const char *length_str = "1MB";
static const char *routine = "default";
+static int iterations = 1;
static bool use_clock;
static int clock_fd;
static bool only_prefault;
@@ -35,6 +36,8 @@
"available unit: B, MB, GB (upper and lower)"),
OPT_STRING('r', "routine", &routine, "default",
"Specify routine to copy"),
+ OPT_INTEGER('i', "iterations", &iterations,
+ "repeat memset() invocation this number of times"),
OPT_BOOLEAN('c', "clock", &use_clock,
"Use CPU clock for measuring"),
OPT_BOOLEAN('o', "only-prefault", &only_prefault,
@@ -117,6 +120,7 @@
{
u64 clock_start = 0ULL, clock_end = 0ULL;
void *dst = NULL;
+ int i;
alloc_mem(&dst, len);
@@ -124,7 +128,8 @@
fn(dst, -1, len);
clock_start = get_clock();
- fn(dst, 0, len);
+ for (i = 0; i < iterations; ++i)
+ fn(dst, i, len);
clock_end = get_clock();
free(dst);
@@ -135,6 +140,7 @@
{
struct timeval tv_start, tv_end, tv_diff;
void *dst = NULL;
+ int i;
alloc_mem(&dst, len);
@@ -142,7 +148,8 @@
fn(dst, -1, len);
BUG_ON(gettimeofday(&tv_start, NULL));
- fn(dst, 0, len);
+ for (i = 0; i < iterations; ++i)
+ fn(dst, i, len);
BUG_ON(gettimeofday(&tv_end, NULL));
timersub(&tv_end, &tv_start, &tv_diff);