perf bench: Add memcpy() benchmark
'perf bench mem memcpy' is a benchmark suite for measuring memcpy()
performance.
Example on a Intel(R) Core(TM)2 Duo CPU E6850 @ 3.00GHz:
| % perf bench mem memcpy -l 1GB
| # Running mem/memcpy benchmark...
| # Copying 1MB Bytes from 0xb7d98008 to 0xb7e99008 ...
|
| 726.216412 MB/Sec
Signed-off-by: Hitoshi Mitake <mitake@dcl.info.waseda.ac.jp>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
LKML-Reference: <1258471212-30281-1-git-send-email-mitake@dcl.info.waseda.ac.jp>
[ v2: updated changelog, clarified history of builtin-bench.c ]
Signed-off-by: Ingo Molnar <mingo@elte.hu>
diff --git a/tools/perf/Makefile b/tools/perf/Makefile
index 3f0666a..53e663a 100644
--- a/tools/perf/Makefile
+++ b/tools/perf/Makefile
@@ -432,6 +432,7 @@
# Benchmark modules
BUILTIN_OBJS += bench/sched-messaging.o
BUILTIN_OBJS += bench/sched-pipe.o
+BUILTIN_OBJS += bench/mem-memcpy.o
BUILTIN_OBJS += builtin-help.o
BUILTIN_OBJS += builtin-sched.o
diff --git a/tools/perf/bench/bench.h b/tools/perf/bench/bench.h
index 9fbd8d7..f7781c6 100644
--- a/tools/perf/bench/bench.h
+++ b/tools/perf/bench/bench.h
@@ -3,6 +3,7 @@
extern int bench_sched_messaging(int argc, const char **argv, const char *prefix);
extern int bench_sched_pipe(int argc, const char **argv, const char *prefix);
+extern int bench_mem_memcpy(int argc, const char **argv, const char *prefix __used);
#define BENCH_FORMAT_DEFAULT_STR "default"
#define BENCH_FORMAT_DEFAULT 0
diff --git a/tools/perf/bench/mem-memcpy.c b/tools/perf/bench/mem-memcpy.c
new file mode 100644
index 0000000..d4f4f98
--- /dev/null
+++ b/tools/perf/bench/mem-memcpy.c
@@ -0,0 +1,186 @@
+/*
+ * mem-memcpy.c
+ *
+ * memcpy: Simple memory copy in various ways
+ *
+ * Written by Hitoshi Mitake <mitake@dcl.info.waseda.ac.jp>
+ */
+#include <ctype.h>
+
+#include "../perf.h"
+#include "../util/util.h"
+#include "../util/parse-options.h"
+#include "../util/string.h"
+#include "../util/header.h"
+#include "bench.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/time.h>
+#include <errno.h>
+
+#define K 1024
+
+static const char *length_str = "1MB";
+static const char *routine = "default";
+static int use_clock = 0;
+
+static const struct option options[] = {
+ OPT_STRING('l', "length", &length_str, "1MB",
+ "Specify length of memory to copy. "
+ "available unit: B, MB, GB (upper and lower)"),
+ OPT_STRING('r', "routine", &routine, "default",
+ "Specify routine to copy"),
+ OPT_BOOLEAN('c', "clock", &use_clock,
+ "Use CPU clock for measuring"),
+ OPT_END()
+};
+
+struct routine {
+ const char *name;
+ const char *desc;
+ void * (*fn)(void *dst, const void *src, size_t len);
+};
+
+struct routine routines[] = {
+ { "default",
+ "Default memcpy() provided by glibc",
+ memcpy },
+ { NULL,
+ NULL,
+ NULL }
+};
+
+static const char * const bench_mem_memcpy_usage[] = {
+ "perf bench mem memcpy <options>",
+ NULL
+};
+
+static int clock_fd;
+
+static struct perf_event_attr clock_attr = {
+ .type = PERF_TYPE_HARDWARE,
+ .config = PERF_COUNT_HW_CPU_CYCLES
+};
+
+static void init_clock(void)
+{
+ clock_fd = sys_perf_event_open(&clock_attr, getpid(), -1, -1, 0);
+ BUG_ON(clock_fd < 0);
+}
+
+static u64 get_clock(void)
+{
+ int ret;
+ u64 clk;
+
+ ret = read(clock_fd, &clk, sizeof(u64));
+ BUG_ON(ret != sizeof(u64));
+
+ return clk;
+}
+
+static double timeval2double(struct timeval *ts)
+{
+ return (double)ts->tv_sec +
+ (double)ts->tv_usec / (double)1000000;
+}
+
+int bench_mem_memcpy(int argc, const char **argv,
+ const char *prefix __used)
+{
+ int i;
+ void *dst, *src;
+ size_t length;
+ double bps = 0.0;
+ struct timeval tv_start, tv_end, tv_diff;
+ u64 clock_start, clock_end, clock_diff;
+
+ clock_start = clock_end = clock_diff = 0ULL;
+ argc = parse_options(argc, argv, options,
+ bench_mem_memcpy_usage, 0);
+
+ tv_diff.tv_sec = 0;
+ tv_diff.tv_usec = 0;
+ length = (size_t)perf_atoll((char *)length_str);
+ if ((long long int)length <= 0) {
+ fprintf(stderr, "Invalid length:%s\n", length_str);
+ return 1;
+ }
+
+ for (i = 0; routines[i].name; i++) {
+ if (!strcmp(routines[i].name, routine))
+ break;
+ }
+ if (!routines[i].name) {
+ printf("Unknown routine:%s\n", routine);
+ printf("Available routines...\n");
+ for (i = 0; routines[i].name; i++) {
+ printf("\t%s ... %s\n",
+ routines[i].name, routines[i].desc);
+ }
+ return 1;
+ }
+
+ dst = calloc(length, sizeof(char));
+ assert(dst);
+ src = calloc(length, sizeof(char));
+ assert(src);
+
+ if (bench_format == BENCH_FORMAT_DEFAULT) {
+ printf("# Copying %s Bytes from %p to %p ...\n\n",
+ length_str, src, dst);
+ }
+
+ if (use_clock) {
+ init_clock();
+ clock_start = get_clock();
+ } else
+ BUG_ON(gettimeofday(&tv_start, NULL));
+
+ routines[i].fn(dst, src, length);
+
+ if (use_clock) {
+ clock_end = get_clock();
+ clock_diff = clock_end - clock_start;
+ } else {
+ BUG_ON(gettimeofday(&tv_end, NULL));
+ timersub(&tv_end, &tv_start, &tv_diff);
+ bps = (double)((double)length / timeval2double(&tv_diff));
+ }
+
+ switch (bench_format) {
+ case BENCH_FORMAT_DEFAULT:
+ if (use_clock) {
+ printf(" %14lf Clock/Byte\n",
+ (double)clock_diff / (double)length);
+ } else {
+ if (bps < K)
+ printf(" %14lf B/Sec\n", bps);
+ else if (bps < K * K)
+ printf(" %14lfd KB/Sec\n", bps / 1024);
+ else if (bps < K * K * K)
+ printf(" %14lf MB/Sec\n", bps / 1024 / 1024);
+ else {
+ printf(" %14lf GB/Sec\n",
+ bps / 1024 / 1024 / 1024);
+ }
+ }
+ break;
+ case BENCH_FORMAT_SIMPLE:
+ if (use_clock) {
+ printf("%14lf\n",
+ (double)clock_diff / (double)length);
+ } else
+ printf("%lf\n", bps);
+ break;
+ default:
+ /* reaching here is something disaster */
+ fprintf(stderr, "Unknown format:%d\n", bench_format);
+ exit(1);
+ break;
+ }
+
+ return 0;
+}
diff --git a/tools/perf/builtin-bench.c b/tools/perf/builtin-bench.c
index 90c39ba..e043eb8 100644
--- a/tools/perf/builtin-bench.c
+++ b/tools/perf/builtin-bench.c
@@ -12,6 +12,7 @@
*
* Available subsystem list:
* sched ... scheduler and IPC mechanism
+ * mem ... memory access performance
*
*/
@@ -43,6 +44,15 @@
NULL }
};
+static struct bench_suite mem_suites[] = {
+ { "memcpy",
+ "Simple memory copy in various ways",
+ bench_mem_memcpy },
+ { NULL,
+ NULL,
+ NULL }
+};
+
struct bench_subsys {
const char *name;
const char *summary;
@@ -53,9 +63,12 @@
{ "sched",
"scheduler and IPC mechanism",
sched_suites },
+ { "mem",
+ "memory access performance",
+ mem_suites },
{ NULL,
NULL,
- NULL }
+ NULL }
};
static void dump_suites(int subsys_index)