blob: 588679167c888146d083bf38f1f0238027bef58e [file] [log] [blame]
Ingo Molnarddcacfa2009-04-20 15:37:32 +02001/*
Ingo Molnar52425192009-05-26 09:17:18 +02002 * perf stat: /usr/bin/time -alike performance counter statistics utility
Ingo Molnarddcacfa2009-04-20 15:37:32 +02003
4 It summarizes the counter events of all tasks (and child tasks),
5 covering all CPUs that the command (or workload) executes on.
6 It only counts the per-task events of the workload started,
7 independent of how many other tasks run on those CPUs.
8
9 Sample output:
10
Ingo Molnar52425192009-05-26 09:17:18 +020011 $ perf stat -e 1 -e 3 -e 5 ls -lR /usr/include/ >/dev/null
Ingo Molnarddcacfa2009-04-20 15:37:32 +020012
13 Performance counter stats for 'ls':
14
15 163516953 instructions
16 2295 cache-misses
17 2855182 branch-misses
Ingo Molnar52425192009-05-26 09:17:18 +020018 *
19 * Copyright (C) 2008, Red Hat Inc, Ingo Molnar <mingo@redhat.com>
20 *
21 * Improvements and fixes by:
22 *
23 * Arjan van de Ven <arjan@linux.intel.com>
24 * Yanmin Zhang <yanmin.zhang@intel.com>
25 * Wu Fengguang <fengguang.wu@intel.com>
26 * Mike Galbraith <efault@gmx.de>
27 * Paul Mackerras <paulus@samba.org>
28 *
29 * Released under the GPL v2. (and only v2, not any later version)
Ingo Molnarddcacfa2009-04-20 15:37:32 +020030 */
31
Peter Zijlstra1a482f32009-05-23 18:28:58 +020032#include "perf.h"
Ingo Molnar16f762a2009-05-27 09:10:38 +020033#include "builtin.h"
Ingo Molnar148be2c2009-04-27 08:02:14 +020034#include "util/util.h"
Ingo Molnar52425192009-05-26 09:17:18 +020035#include "util/parse-options.h"
36#include "util/parse-events.h"
Ingo Molnarddcacfa2009-04-20 15:37:32 +020037
Ingo Molnarddcacfa2009-04-20 15:37:32 +020038#include <sys/prctl.h>
Peter Zijlstra16c8a102009-05-05 17:50:27 +020039
Ingo Molnarddcacfa2009-04-20 15:37:32 +020040static int system_wide = 0;
Ingo Molnar52425192009-05-26 09:17:18 +020041static int inherit = 1;
Ingo Molnarddcacfa2009-04-20 15:37:32 +020042
Ingo Molnar52425192009-05-26 09:17:18 +020043static __u64 default_event_id[MAX_COUNTERS] = {
Ingo Molnarddcacfa2009-04-20 15:37:32 +020044 EID(PERF_TYPE_SOFTWARE, PERF_COUNT_TASK_CLOCK),
45 EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CONTEXT_SWITCHES),
46 EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_MIGRATIONS),
47 EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS),
48
49 EID(PERF_TYPE_HARDWARE, PERF_COUNT_CPU_CYCLES),
50 EID(PERF_TYPE_HARDWARE, PERF_COUNT_INSTRUCTIONS),
51 EID(PERF_TYPE_HARDWARE, PERF_COUNT_CACHE_REFERENCES),
52 EID(PERF_TYPE_HARDWARE, PERF_COUNT_CACHE_MISSES),
53};
Ingo Molnar52425192009-05-26 09:17:18 +020054
Ingo Molnarddcacfa2009-04-20 15:37:32 +020055static int default_interval = 100000;
56static int event_count[MAX_COUNTERS];
57static int fd[MAX_NR_CPUS][MAX_COUNTERS];
58
Ingo Molnar52425192009-05-26 09:17:18 +020059static int target_pid = -1;
Ingo Molnarddcacfa2009-04-20 15:37:32 +020060static int nr_cpus = 0;
Ingo Molnarddcacfa2009-04-20 15:37:32 +020061static unsigned int page_size;
62
Ingo Molnar66cf7822009-04-30 13:53:33 +020063static int scale = 1;
Ingo Molnarddcacfa2009-04-20 15:37:32 +020064
65static const unsigned int default_count[] = {
66 1000000,
67 1000000,
68 10000,
69 10000,
70 1000000,
71 10000,
72};
73
Ingo Molnar2996f5d2009-05-29 09:10:54 +020074static __u64 event_res[MAX_COUNTERS][3];
75static __u64 event_scaled[MAX_COUNTERS];
76
Ingo Molnarbe1ac0d2009-05-29 09:10:54 +020077static __u64 runtime_nsecs;
Ingo Molnard7c29312009-05-30 12:38:51 +020078static __u64 walltime_nsecs;
Ingo Molnarbe1ac0d2009-05-29 09:10:54 +020079
Ingo Molnarddcacfa2009-04-20 15:37:32 +020080static void create_perfstat_counter(int counter)
81{
82 struct perf_counter_hw_event hw_event;
83
84 memset(&hw_event, 0, sizeof(hw_event));
85 hw_event.config = event_id[counter];
86 hw_event.record_type = 0;
Ingo Molnar52425192009-05-26 09:17:18 +020087 hw_event.nmi = 1;
Peter Zijlstra16c8a102009-05-05 17:50:27 +020088 hw_event.exclude_kernel = event_mask[counter] & EVENT_MASK_KERNEL;
89 hw_event.exclude_user = event_mask[counter] & EVENT_MASK_USER;
90
Ingo Molnarddcacfa2009-04-20 15:37:32 +020091 if (scale)
92 hw_event.read_format = PERF_FORMAT_TOTAL_TIME_ENABLED |
93 PERF_FORMAT_TOTAL_TIME_RUNNING;
94
95 if (system_wide) {
96 int cpu;
97 for (cpu = 0; cpu < nr_cpus; cpu ++) {
98 fd[cpu][counter] = sys_perf_counter_open(&hw_event, -1, cpu, -1, 0);
99 if (fd[cpu][counter] < 0) {
100 printf("perfstat error: syscall returned with %d (%s)\n",
101 fd[cpu][counter], strerror(errno));
102 exit(-1);
103 }
104 }
105 } else {
Ingo Molnar52425192009-05-26 09:17:18 +0200106 hw_event.inherit = inherit;
Ingo Molnarddcacfa2009-04-20 15:37:32 +0200107 hw_event.disabled = 1;
108
109 fd[0][counter] = sys_perf_counter_open(&hw_event, 0, -1, -1, 0);
110 if (fd[0][counter] < 0) {
111 printf("perfstat error: syscall returned with %d (%s)\n",
112 fd[0][counter], strerror(errno));
113 exit(-1);
114 }
115 }
116}
117
Ingo Molnarc04f5e52009-05-29 09:10:54 +0200118/*
119 * Does the counter have nsecs as a unit?
120 */
121static inline int nsec_counter(int counter)
122{
123 if (event_id[counter] == EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_CLOCK))
124 return 1;
125 if (event_id[counter] == EID(PERF_TYPE_SOFTWARE, PERF_COUNT_TASK_CLOCK))
126 return 1;
127
128 return 0;
129}
130
131/*
Ingo Molnar2996f5d2009-05-29 09:10:54 +0200132 * Read out the results of a single counter:
Ingo Molnarc04f5e52009-05-29 09:10:54 +0200133 */
Ingo Molnar2996f5d2009-05-29 09:10:54 +0200134static void read_counter(int counter)
Ingo Molnarc04f5e52009-05-29 09:10:54 +0200135{
Ingo Molnar2996f5d2009-05-29 09:10:54 +0200136 __u64 *count, single_count[3];
Ingo Molnarc04f5e52009-05-29 09:10:54 +0200137 ssize_t res;
138 int cpu, nv;
139 int scaled;
140
Ingo Molnar2996f5d2009-05-29 09:10:54 +0200141 count = event_res[counter];
142
Ingo Molnarc04f5e52009-05-29 09:10:54 +0200143 count[0] = count[1] = count[2] = 0;
Ingo Molnar2996f5d2009-05-29 09:10:54 +0200144
Ingo Molnarc04f5e52009-05-29 09:10:54 +0200145 nv = scale ? 3 : 1;
146 for (cpu = 0; cpu < nr_cpus; cpu ++) {
147 res = read(fd[cpu][counter], single_count, nv * sizeof(__u64));
148 assert(res == nv * sizeof(__u64));
149
150 count[0] += single_count[0];
151 if (scale) {
152 count[1] += single_count[1];
153 count[2] += single_count[2];
154 }
155 }
156
157 scaled = 0;
158 if (scale) {
159 if (count[2] == 0) {
Ingo Molnar2996f5d2009-05-29 09:10:54 +0200160 event_scaled[counter] = -1;
161 count[0] = 0;
Ingo Molnarc04f5e52009-05-29 09:10:54 +0200162 return;
163 }
Ingo Molnar2996f5d2009-05-29 09:10:54 +0200164
Ingo Molnarc04f5e52009-05-29 09:10:54 +0200165 if (count[2] < count[1]) {
Ingo Molnar2996f5d2009-05-29 09:10:54 +0200166 event_scaled[counter] = 1;
Ingo Molnarc04f5e52009-05-29 09:10:54 +0200167 count[0] = (unsigned long long)
168 ((double)count[0] * count[1] / count[2] + 0.5);
169 }
170 }
Ingo Molnarbe1ac0d2009-05-29 09:10:54 +0200171 /*
172 * Save the full runtime - to allow normalization during printout:
173 */
174 if (event_id[counter] == EID(PERF_TYPE_SOFTWARE, PERF_COUNT_TASK_CLOCK))
175 runtime_nsecs = count[0];
Ingo Molnar2996f5d2009-05-29 09:10:54 +0200176}
177
178/*
179 * Print out the results of a single counter:
180 */
181static void print_counter(int counter)
182{
183 __u64 *count;
184 int scaled;
185
186 count = event_res[counter];
187 scaled = event_scaled[counter];
188
189 if (scaled == -1) {
190 fprintf(stderr, " %14s %-20s\n",
191 "<not counted>", event_name(counter));
192 return;
193 }
Ingo Molnarc04f5e52009-05-29 09:10:54 +0200194
195 if (nsec_counter(counter)) {
196 double msecs = (double)count[0] / 1000000;
197
Ingo Molnard7c29312009-05-30 12:38:51 +0200198 fprintf(stderr, " %14.6f %-20s",
Ingo Molnarc04f5e52009-05-29 09:10:54 +0200199 msecs, event_name(counter));
Ingo Molnard7c29312009-05-30 12:38:51 +0200200 if (event_id[counter] ==
201 EID(PERF_TYPE_SOFTWARE, PERF_COUNT_TASK_CLOCK)) {
202
203 fprintf(stderr, " # %11.3f CPU utilization factor",
204 (double)count[0] / (double)walltime_nsecs);
205 }
Ingo Molnarc04f5e52009-05-29 09:10:54 +0200206 } else {
Ingo Molnarbe1ac0d2009-05-29 09:10:54 +0200207 fprintf(stderr, " %14Ld %-20s",
Ingo Molnarc04f5e52009-05-29 09:10:54 +0200208 count[0], event_name(counter));
Ingo Molnarbe1ac0d2009-05-29 09:10:54 +0200209 if (runtime_nsecs)
Ingo Molnard7c29312009-05-30 12:38:51 +0200210 fprintf(stderr, " # %11.3f M/sec",
Ingo Molnarbe1ac0d2009-05-29 09:10:54 +0200211 (double)count[0]/runtime_nsecs*1000.0);
Ingo Molnarc04f5e52009-05-29 09:10:54 +0200212 }
213 if (scaled)
214 fprintf(stderr, " (scaled from %.2f%%)",
215 (double) count[2] / count[1] * 100);
216 fprintf(stderr, "\n");
217}
218
Ingo Molnar16f762a2009-05-27 09:10:38 +0200219static int do_perfstat(int argc, const char **argv)
Ingo Molnarddcacfa2009-04-20 15:37:32 +0200220{
221 unsigned long long t0, t1;
222 int counter;
Ingo Molnarddcacfa2009-04-20 15:37:32 +0200223 int status;
224 int pid;
225
226 if (!system_wide)
227 nr_cpus = 1;
228
229 for (counter = 0; counter < nr_counters; counter++)
230 create_perfstat_counter(counter);
231
Ingo Molnarddcacfa2009-04-20 15:37:32 +0200232 /*
233 * Enable counters and exec the command:
234 */
235 t0 = rdclock();
236 prctl(PR_TASK_PERF_COUNTERS_ENABLE);
237
238 if ((pid = fork()) < 0)
239 perror("failed to fork");
240 if (!pid) {
Ingo Molnar52425192009-05-26 09:17:18 +0200241 if (execvp(argv[0], (char **)argv)) {
Ingo Molnarddcacfa2009-04-20 15:37:32 +0200242 perror(argv[0]);
243 exit(-1);
244 }
245 }
246 while (wait(&status) >= 0)
247 ;
248 prctl(PR_TASK_PERF_COUNTERS_DISABLE);
249 t1 = rdclock();
250
Ingo Molnard7c29312009-05-30 12:38:51 +0200251 walltime_nsecs = t1 - t0;
252
Ingo Molnarddcacfa2009-04-20 15:37:32 +0200253 fflush(stdout);
254
255 fprintf(stderr, "\n");
256 fprintf(stderr, " Performance counter stats for \'%s\':\n",
257 argv[0]);
258 fprintf(stderr, "\n");
259
Ingo Molnarc04f5e52009-05-29 09:10:54 +0200260 for (counter = 0; counter < nr_counters; counter++)
Ingo Molnar2996f5d2009-05-29 09:10:54 +0200261 read_counter(counter);
262
263 for (counter = 0; counter < nr_counters; counter++)
Ingo Molnarc04f5e52009-05-29 09:10:54 +0200264 print_counter(counter);
Ingo Molnarddcacfa2009-04-20 15:37:32 +0200265
Ingo Molnarddcacfa2009-04-20 15:37:32 +0200266
Ingo Molnarddcacfa2009-04-20 15:37:32 +0200267 fprintf(stderr, "\n");
268 fprintf(stderr, " Wall-clock time elapsed: %12.6f msecs\n",
269 (double)(t1-t0)/1e6);
270 fprintf(stderr, "\n");
271
272 return 0;
273}
274
Ingo Molnar52425192009-05-26 09:17:18 +0200275static void skip_signal(int signo)
Ingo Molnarddcacfa2009-04-20 15:37:32 +0200276{
Ingo Molnar52425192009-05-26 09:17:18 +0200277}
Ingo Molnarddcacfa2009-04-20 15:37:32 +0200278
Ingo Molnar52425192009-05-26 09:17:18 +0200279static const char * const stat_usage[] = {
280 "perf stat [<options>] <command>",
281 NULL
282};
Ingo Molnarddcacfa2009-04-20 15:37:32 +0200283
Ingo Molnar52425192009-05-26 09:17:18 +0200284static char events_help_msg[EVENTS_HELP_MAX];
Ingo Molnarddcacfa2009-04-20 15:37:32 +0200285
Ingo Molnar52425192009-05-26 09:17:18 +0200286static const struct option options[] = {
287 OPT_CALLBACK('e', "event", NULL, "event",
288 events_help_msg, parse_events),
289 OPT_INTEGER('c', "count", &default_interval,
290 "event period to sample"),
291 OPT_BOOLEAN('i', "inherit", &inherit,
292 "child tasks inherit counters"),
293 OPT_INTEGER('p', "pid", &target_pid,
294 "stat events on existing pid"),
295 OPT_BOOLEAN('a', "all-cpus", &system_wide,
296 "system-wide collection from all CPUs"),
297 OPT_BOOLEAN('l', "scale", &scale,
298 "scale/normalize counters"),
299 OPT_END()
300};
Ingo Molnarddcacfa2009-04-20 15:37:32 +0200301
Ingo Molnar52425192009-05-26 09:17:18 +0200302int cmd_stat(int argc, const char **argv, const char *prefix)
303{
304 int counter;
305
306 page_size = sysconf(_SC_PAGE_SIZE);
307
308 create_events_help(events_help_msg);
309 memcpy(event_id, default_event_id, sizeof(default_event_id));
310
311 argc = parse_options(argc, argv, options, stat_usage, 0);
312 if (!argc)
313 usage_with_options(stat_usage, options);
Ingo Molnarddcacfa2009-04-20 15:37:32 +0200314
315 if (!nr_counters) {
316 nr_counters = 8;
317 }
318
319 for (counter = 0; counter < nr_counters; counter++) {
320 if (event_count[counter])
321 continue;
322
323 event_count[counter] = default_interval;
324 }
Ingo Molnarddcacfa2009-04-20 15:37:32 +0200325 nr_cpus = sysconf(_SC_NPROCESSORS_ONLN);
326 assert(nr_cpus <= MAX_NR_CPUS);
327 assert(nr_cpus >= 0);
328
Ingo Molnar58d7e992009-05-15 11:03:23 +0200329 /*
330 * We dont want to block the signals - that would cause
331 * child tasks to inherit that and Ctrl-C would not work.
332 * What we want is for Ctrl-C to work in the exec()-ed
333 * task, but being ignored by perf stat itself:
334 */
335 signal(SIGINT, skip_signal);
336 signal(SIGALRM, skip_signal);
337 signal(SIGABRT, skip_signal);
338
Ingo Molnarddcacfa2009-04-20 15:37:32 +0200339 return do_perfstat(argc, argv);
340}