blob: 59f1d87f41e938592edc15fb21293e2e07913765 [file] [log] [blame]
Peter Zijlstrade9ac072009-04-08 15:01:31 +02001
2
Thomas Gleixner6eda5832009-05-01 18:29:57 +02003#include "util/util.h"
4
Peter Zijlstrade9ac072009-04-08 15:01:31 +02005#include <sys/types.h>
6#include <sys/stat.h>
7#include <sys/time.h>
8#include <unistd.h>
9#include <stdint.h>
10#include <stdlib.h>
11#include <string.h>
12#include <limits.h>
13#include <getopt.h>
14#include <assert.h>
15#include <fcntl.h>
16#include <stdio.h>
17#include <errno.h>
18#include <ctype.h>
19#include <time.h>
20#include <sched.h>
21#include <pthread.h>
22
23#include <sys/syscall.h>
24#include <sys/ioctl.h>
25#include <sys/poll.h>
26#include <sys/prctl.h>
27#include <sys/wait.h>
28#include <sys/uio.h>
29#include <sys/mman.h>
30
31#include <linux/unistd.h>
32#include <linux/types.h>
33
34#include "../../include/linux/perf_counter.h"
35
Thomas Gleixner6eda5832009-05-01 18:29:57 +020036#include "perf.h"
Peter Zijlstrade9ac072009-04-08 15:01:31 +020037
38static int nr_counters = 0;
39static __u64 event_id[MAX_COUNTERS] = { };
40static int default_interval = 100000;
41static int event_count[MAX_COUNTERS];
42static int fd[MAX_NR_CPUS][MAX_COUNTERS];
43static int nr_cpus = 0;
44static unsigned int page_size;
45static unsigned int mmap_pages = 16;
46static int output;
47static char *output_name = "output.perf";
48static int group = 0;
49static unsigned int realtime_prio = 0;
50
51const unsigned int default_count[] = {
52 1000000,
53 1000000,
54 10000,
55 10000,
56 1000000,
57 10000,
58};
59
Peter Zijlstrade9ac072009-04-08 15:01:31 +020060struct event_symbol {
61 __u64 event;
62 char *symbol;
63};
64
65static struct event_symbol event_symbols[] = {
66 {EID(PERF_TYPE_HARDWARE, PERF_COUNT_CPU_CYCLES), "cpu-cycles", },
67 {EID(PERF_TYPE_HARDWARE, PERF_COUNT_CPU_CYCLES), "cycles", },
68 {EID(PERF_TYPE_HARDWARE, PERF_COUNT_INSTRUCTIONS), "instructions", },
69 {EID(PERF_TYPE_HARDWARE, PERF_COUNT_CACHE_REFERENCES), "cache-references", },
70 {EID(PERF_TYPE_HARDWARE, PERF_COUNT_CACHE_MISSES), "cache-misses", },
71 {EID(PERF_TYPE_HARDWARE, PERF_COUNT_BRANCH_INSTRUCTIONS), "branch-instructions", },
72 {EID(PERF_TYPE_HARDWARE, PERF_COUNT_BRANCH_INSTRUCTIONS), "branches", },
73 {EID(PERF_TYPE_HARDWARE, PERF_COUNT_BRANCH_MISSES), "branch-misses", },
74 {EID(PERF_TYPE_HARDWARE, PERF_COUNT_BUS_CYCLES), "bus-cycles", },
75
76 {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_CLOCK), "cpu-clock", },
77 {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_TASK_CLOCK), "task-clock", },
78 {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS), "page-faults", },
79 {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS), "faults", },
80 {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS_MIN), "minor-faults", },
81 {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS_MAJ), "major-faults", },
82 {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CONTEXT_SWITCHES), "context-switches", },
83 {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CONTEXT_SWITCHES), "cs", },
84 {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_MIGRATIONS), "cpu-migrations", },
85 {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_MIGRATIONS), "migrations", },
86};
87
88/*
89 * Each event can have multiple symbolic names.
90 * Symbolic names are (almost) exactly matched.
91 */
92static __u64 match_event_symbols(char *str)
93{
94 __u64 config, id;
95 int type;
96 unsigned int i;
97
98 if (sscanf(str, "r%llx", &config) == 1)
99 return config | PERF_COUNTER_RAW_MASK;
100
101 if (sscanf(str, "%d:%llu", &type, &id) == 2)
102 return EID(type, id);
103
104 for (i = 0; i < ARRAY_SIZE(event_symbols); i++) {
105 if (!strncmp(str, event_symbols[i].symbol,
106 strlen(event_symbols[i].symbol)))
107 return event_symbols[i].event;
108 }
109
110 return ~0ULL;
111}
112
113static int parse_events(char *str)
114{
115 __u64 config;
116
117again:
118 if (nr_counters == MAX_COUNTERS)
119 return -1;
120
121 config = match_event_symbols(str);
122 if (config == ~0ULL)
123 return -1;
124
125 event_id[nr_counters] = config;
126 nr_counters++;
127
128 str = strstr(str, ",");
129 if (str) {
130 str++;
131 goto again;
132 }
133
134 return 0;
135}
136
137#define __PERF_COUNTER_FIELD(config, name) \
138 ((config & PERF_COUNTER_##name##_MASK) >> PERF_COUNTER_##name##_SHIFT)
139
140#define PERF_COUNTER_RAW(config) __PERF_COUNTER_FIELD(config, RAW)
141#define PERF_COUNTER_CONFIG(config) __PERF_COUNTER_FIELD(config, CONFIG)
142#define PERF_COUNTER_TYPE(config) __PERF_COUNTER_FIELD(config, TYPE)
143#define PERF_COUNTER_ID(config) __PERF_COUNTER_FIELD(config, EVENT)
144
145static void display_events_help(void)
146{
147 unsigned int i;
148 __u64 e;
149
150 printf(
151 " -e EVENT --event=EVENT # symbolic-name abbreviations");
152
153 for (i = 0; i < ARRAY_SIZE(event_symbols); i++) {
154 int type, id;
155
156 e = event_symbols[i].event;
157 type = PERF_COUNTER_TYPE(e);
158 id = PERF_COUNTER_ID(e);
159
160 printf("\n %d:%d: %-20s",
161 type, id, event_symbols[i].symbol);
162 }
163
164 printf("\n"
165 " rNNN: raw PMU events (eventsel+umask)\n\n");
166}
167
168static void display_help(void)
169{
170 printf(
171 "Usage: perf-record [<options>]\n"
172 "perf-record Options (up to %d event types can be specified at once):\n\n",
173 MAX_COUNTERS);
174
175 display_events_help();
176
177 printf(
178 " -c CNT --count=CNT # event period to sample\n"
179 " -m pages --mmap_pages=<pages> # number of mmap data pages\n"
180 " -o file --output=<file> # output file\n"
181 " -r prio --realtime=<prio> # use RT prio\n"
182 );
183
184 exit(0);
185}
186
187static void process_options(int argc, char *argv[])
188{
189 int error = 0, counter;
190
191 for (;;) {
192 int option_index = 0;
193 /** Options for getopt */
194 static struct option long_options[] = {
195 {"count", required_argument, NULL, 'c'},
196 {"event", required_argument, NULL, 'e'},
197 {"mmap_pages", required_argument, NULL, 'm'},
198 {"output", required_argument, NULL, 'o'},
199 {"realtime", required_argument, NULL, 'r'},
200 {NULL, 0, NULL, 0 }
201 };
202 int c = getopt_long(argc, argv, "+:c:e:m:o:r:",
203 long_options, &option_index);
204 if (c == -1)
205 break;
206
207 switch (c) {
208 case 'c': default_interval = atoi(optarg); break;
209 case 'e': error = parse_events(optarg); break;
210 case 'm': mmap_pages = atoi(optarg); break;
211 case 'o': output_name = strdup(optarg); break;
212 case 'r': realtime_prio = atoi(optarg); break;
213 default: error = 1; break;
214 }
215 }
216 if (error)
217 display_help();
218
219 if (!nr_counters) {
220 nr_counters = 1;
221 event_id[0] = 0;
222 }
223
224 for (counter = 0; counter < nr_counters; counter++) {
225 if (event_count[counter])
226 continue;
227
228 event_count[counter] = default_interval;
229 }
230}
231
232struct mmap_data {
233 int counter;
234 void *base;
235 unsigned int mask;
236 unsigned int prev;
237};
238
239static unsigned int mmap_read_head(struct mmap_data *md)
240{
241 struct perf_counter_mmap_page *pc = md->base;
242 int head;
243
244 head = pc->data_head;
245 rmb();
246
247 return head;
248}
249
250static long events;
251static struct timeval last_read, this_read;
252
253static void mmap_read(struct mmap_data *md)
254{
255 unsigned int head = mmap_read_head(md);
256 unsigned int old = md->prev;
257 unsigned char *data = md->base + page_size;
258 unsigned long size;
259 void *buf;
260 int diff;
261
262 gettimeofday(&this_read, NULL);
263
264 /*
265 * If we're further behind than half the buffer, there's a chance
266 * the writer will bite our tail and screw up the events under us.
267 *
268 * If we somehow ended up ahead of the head, we got messed up.
269 *
270 * In either case, truncate and restart at head.
271 */
272 diff = head - old;
273 if (diff > md->mask / 2 || diff < 0) {
274 struct timeval iv;
275 unsigned long msecs;
276
277 timersub(&this_read, &last_read, &iv);
278 msecs = iv.tv_sec*1000 + iv.tv_usec/1000;
279
280 fprintf(stderr, "WARNING: failed to keep up with mmap data."
281 " Last read %lu msecs ago.\n", msecs);
282
283 /*
284 * head points to a known good entry, start there.
285 */
286 old = head;
287 }
288
289 last_read = this_read;
290
291 if (old != head)
292 events++;
293
294 size = head - old;
295
296 if ((old & md->mask) + size != (head & md->mask)) {
297 buf = &data[old & md->mask];
298 size = md->mask + 1 - (old & md->mask);
299 old += size;
300 while (size) {
301 int ret = write(output, buf, size);
302 if (ret < 0) {
303 perror("failed to write");
304 exit(-1);
305 }
306 size -= ret;
307 buf += ret;
308 }
309 }
310
311 buf = &data[old & md->mask];
312 size = head - old;
313 old += size;
314 while (size) {
315 int ret = write(output, buf, size);
316 if (ret < 0) {
317 perror("failed to write");
318 exit(-1);
319 }
320 size -= ret;
321 buf += ret;
322 }
323
324 md->prev = old;
325}
326
327static volatile int done = 0;
328
329static void sigchld_handler(int sig)
330{
331 if (sig == SIGCHLD)
332 done = 1;
333}
334
Ingo Molnare33e0a42009-04-20 15:58:01 +0200335int cmd_record(int argc, const char **argv)
Peter Zijlstrade9ac072009-04-08 15:01:31 +0200336{
337 struct pollfd event_array[MAX_NR_CPUS * MAX_COUNTERS];
338 struct mmap_data mmap_array[MAX_NR_CPUS][MAX_COUNTERS];
339 struct perf_counter_hw_event hw_event;
340 int i, counter, group_fd, nr_poll = 0;
341 pid_t pid;
342 int ret;
343
344 page_size = sysconf(_SC_PAGE_SIZE);
345
346 process_options(argc, argv);
347
348 nr_cpus = sysconf(_SC_NPROCESSORS_ONLN);
349 assert(nr_cpus <= MAX_NR_CPUS);
350 assert(nr_cpus >= 0);
351
352 output = open(output_name, O_CREAT|O_RDWR, S_IRWXU);
353 if (output < 0) {
354 perror("failed to create output file");
355 exit(-1);
356 }
357
358 argc -= optind;
359 argv += optind;
360
361 for (i = 0; i < nr_cpus; i++) {
362 group_fd = -1;
363 for (counter = 0; counter < nr_counters; counter++) {
364
365 memset(&hw_event, 0, sizeof(hw_event));
366 hw_event.config = event_id[counter];
367 hw_event.irq_period = event_count[counter];
368 hw_event.record_type = PERF_RECORD_IP | PERF_RECORD_TID;
369 hw_event.nmi = 1;
370 hw_event.mmap = 1;
371 hw_event.comm = 1;
372
373 fd[i][counter] = sys_perf_counter_open(&hw_event, -1, i, group_fd, 0);
374 if (fd[i][counter] < 0) {
375 int err = errno;
376 printf("kerneltop error: syscall returned with %d (%s)\n",
377 fd[i][counter], strerror(err));
378 if (err == EPERM)
379 printf("Are you root?\n");
380 exit(-1);
381 }
382 assert(fd[i][counter] >= 0);
383 fcntl(fd[i][counter], F_SETFL, O_NONBLOCK);
384
385 /*
386 * First counter acts as the group leader:
387 */
388 if (group && group_fd == -1)
389 group_fd = fd[i][counter];
390
391 event_array[nr_poll].fd = fd[i][counter];
392 event_array[nr_poll].events = POLLIN;
393 nr_poll++;
394
395 mmap_array[i][counter].counter = counter;
396 mmap_array[i][counter].prev = 0;
397 mmap_array[i][counter].mask = mmap_pages*page_size - 1;
398 mmap_array[i][counter].base = mmap(NULL, (mmap_pages+1)*page_size,
399 PROT_READ, MAP_SHARED, fd[i][counter], 0);
400 if (mmap_array[i][counter].base == MAP_FAILED) {
401 printf("kerneltop error: failed to mmap with %d (%s)\n",
402 errno, strerror(errno));
403 exit(-1);
404 }
405 }
406 }
407
408 signal(SIGCHLD, sigchld_handler);
409
410 pid = fork();
411 if (pid < 0)
412 perror("failed to fork");
413
414 if (!pid) {
415 if (execvp(argv[0], argv)) {
416 perror(argv[0]);
417 exit(-1);
418 }
419 }
420
421 if (realtime_prio) {
422 struct sched_param param;
423
424 param.sched_priority = realtime_prio;
425 if (sched_setscheduler(0, SCHED_FIFO, &param)) {
426 printf("Could not set realtime priority.\n");
427 exit(-1);
428 }
429 }
430
431 /*
432 * TODO: store the current /proc/$/maps information somewhere
433 */
434
435 while (!done) {
436 int hits = events;
437
438 for (i = 0; i < nr_cpus; i++) {
439 for (counter = 0; counter < nr_counters; counter++)
440 mmap_read(&mmap_array[i][counter]);
441 }
442
443 if (hits == events)
444 ret = poll(event_array, nr_poll, 100);
445 }
446
447 return 0;
448}