blob: 6fa6ed664950e69dca06f89ec21a41a279c12404 [file] [log] [blame]
Peter Zijlstrade9ac072009-04-08 15:01:31 +02001
2
Peter Zijlstra1a482f32009-05-23 18:28:58 +02003#include "perf.h"
Thomas Gleixner6eda5832009-05-01 18:29:57 +02004#include "util/util.h"
Ingo Molnar0e9b20b2009-05-26 09:17:18 +02005#include "util/parse-options.h"
Ingo Molnar8ad8db32009-05-26 11:10:09 +02006#include "util/parse-events.h"
Ingo Molnar0e9b20b2009-05-26 09:17:18 +02007#include "util/exec_cmd.h"
Thomas Gleixner6eda5832009-05-01 18:29:57 +02008
Peter Zijlstrade9ac072009-04-08 15:01:31 +02009#include <sched.h>
Peter Zijlstrade9ac072009-04-08 15:01:31 +020010
Ingo Molnar0e9b20b2009-05-26 09:17:18 +020011#define ALIGN(x, a) __ALIGN_MASK(x, (typeof(x))(a)-1)
12#define __ALIGN_MASK(x, mask) (((x)+(mask))&~(mask))
Arnaldo Carvalho de Melo1a853e32009-05-14 22:50:46 -030013
Peter Zijlstrade9ac072009-04-08 15:01:31 +020014static int default_interval = 100000;
15static int event_count[MAX_COUNTERS];
Ingo Molnar8ad8db32009-05-26 11:10:09 +020016
Peter Zijlstrade9ac072009-04-08 15:01:31 +020017static int fd[MAX_NR_CPUS][MAX_COUNTERS];
18static int nr_cpus = 0;
19static unsigned int page_size;
20static unsigned int mmap_pages = 16;
21static int output;
Ingo Molnar0e9b20b2009-05-26 09:17:18 +020022static const char *output_name = "output.perf";
Peter Zijlstrade9ac072009-04-08 15:01:31 +020023static int group = 0;
Peter Zijlstra16c8a102009-05-05 17:50:27 +020024static unsigned int realtime_prio = 0;
25static int system_wide = 0;
Arnaldo Carvalho de Melo1a853e32009-05-14 22:50:46 -030026static pid_t target_pid = -1;
Peter Zijlstra16c8a102009-05-05 17:50:27 +020027static int inherit = 1;
28static int nmi = 1;
Peter Zijlstrade9ac072009-04-08 15:01:31 +020029
30const unsigned int default_count[] = {
31 1000000,
32 1000000,
33 10000,
34 10000,
35 1000000,
36 10000,
37};
38
Peter Zijlstrade9ac072009-04-08 15:01:31 +020039struct mmap_data {
40 int counter;
41 void *base;
42 unsigned int mask;
43 unsigned int prev;
44};
45
46static unsigned int mmap_read_head(struct mmap_data *md)
47{
48 struct perf_counter_mmap_page *pc = md->base;
49 int head;
50
51 head = pc->data_head;
52 rmb();
53
54 return head;
55}
56
57static long events;
58static struct timeval last_read, this_read;
59
60static void mmap_read(struct mmap_data *md)
61{
62 unsigned int head = mmap_read_head(md);
63 unsigned int old = md->prev;
64 unsigned char *data = md->base + page_size;
65 unsigned long size;
66 void *buf;
67 int diff;
68
69 gettimeofday(&this_read, NULL);
70
71 /*
72 * If we're further behind than half the buffer, there's a chance
73 * the writer will bite our tail and screw up the events under us.
74 *
75 * If we somehow ended up ahead of the head, we got messed up.
76 *
77 * In either case, truncate and restart at head.
78 */
79 diff = head - old;
80 if (diff > md->mask / 2 || diff < 0) {
81 struct timeval iv;
82 unsigned long msecs;
83
84 timersub(&this_read, &last_read, &iv);
85 msecs = iv.tv_sec*1000 + iv.tv_usec/1000;
86
87 fprintf(stderr, "WARNING: failed to keep up with mmap data."
88 " Last read %lu msecs ago.\n", msecs);
89
90 /*
91 * head points to a known good entry, start there.
92 */
93 old = head;
94 }
95
96 last_read = this_read;
97
98 if (old != head)
99 events++;
100
101 size = head - old;
102
103 if ((old & md->mask) + size != (head & md->mask)) {
104 buf = &data[old & md->mask];
105 size = md->mask + 1 - (old & md->mask);
106 old += size;
107 while (size) {
108 int ret = write(output, buf, size);
109 if (ret < 0) {
110 perror("failed to write");
111 exit(-1);
112 }
113 size -= ret;
114 buf += ret;
115 }
116 }
117
118 buf = &data[old & md->mask];
119 size = head - old;
120 old += size;
121 while (size) {
122 int ret = write(output, buf, size);
123 if (ret < 0) {
124 perror("failed to write");
125 exit(-1);
126 }
127 size -= ret;
128 buf += ret;
129 }
130
131 md->prev = old;
132}
133
134static volatile int done = 0;
135
Peter Zijlstra16c8a102009-05-05 17:50:27 +0200136static void sig_handler(int sig)
Peter Zijlstrade9ac072009-04-08 15:01:31 +0200137{
Peter Zijlstra16c8a102009-05-05 17:50:27 +0200138 done = 1;
Peter Zijlstrade9ac072009-04-08 15:01:31 +0200139}
140
Peter Zijlstra16c8a102009-05-05 17:50:27 +0200141static struct pollfd event_array[MAX_NR_CPUS * MAX_COUNTERS];
142static struct mmap_data mmap_array[MAX_NR_CPUS][MAX_COUNTERS];
143
144static int nr_poll;
145static int nr_cpu;
146
Arnaldo Carvalho de Melo1a853e32009-05-14 22:50:46 -0300147struct mmap_event {
148 struct perf_event_header header;
149 __u32 pid, tid;
150 __u64 start;
151 __u64 len;
152 __u64 pgoff;
153 char filename[PATH_MAX];
154};
155struct comm_event {
156 struct perf_event_header header;
157 __u32 pid,tid;
158 char comm[16];
159};
160
161static pid_t pid_synthesize_comm_event(pid_t pid)
162{
163 char filename[PATH_MAX];
164 char bf[BUFSIZ];
165 struct comm_event comm_ev;
166 size_t size;
167 int fd;
168
169 snprintf(filename, sizeof(filename), "/proc/%d/stat", pid);
170
171 fd = open(filename, O_RDONLY);
172 if (fd < 0) {
173 fprintf(stderr, "couldn't open %s\n", filename);
174 exit(EXIT_FAILURE);
175 }
176 if (read(fd, bf, sizeof(bf)) < 0) {
177 fprintf(stderr, "couldn't read %s\n", filename);
178 exit(EXIT_FAILURE);
179 }
180 close(fd);
181
182 pid_t spid, ppid;
183 char state;
184 char comm[18];
185
186 memset(&comm_ev, 0, sizeof(comm_ev));
187 int nr = sscanf(bf, "%d %s %c %d %d ",
188 &spid, comm, &state, &ppid, &comm_ev.pid);
189 if (nr != 5) {
190 fprintf(stderr, "couldn't get COMM and pgid, malformed %s\n",
191 filename);
192 exit(EXIT_FAILURE);
193 }
194 comm_ev.header.type = PERF_EVENT_COMM;
195 comm_ev.tid = pid;
196 size = strlen(comm);
197 comm[--size] = '\0'; /* Remove the ')' at the end */
198 --size; /* Remove the '(' at the begin */
199 memcpy(comm_ev.comm, comm + 1, size);
200 size = ALIGN(size, sizeof(uint64_t));
201 comm_ev.header.size = sizeof(comm_ev) - (sizeof(comm_ev.comm) - size);
202 int ret = write(output, &comm_ev, comm_ev.header.size);
203 if (ret < 0) {
204 perror("failed to write");
205 exit(-1);
206 }
207 return comm_ev.pid;
208}
209
210static void pid_synthesize_mmap_events(pid_t pid, pid_t pgid)
211{
212 char filename[PATH_MAX];
213 FILE *fp;
214
215 snprintf(filename, sizeof(filename), "/proc/%d/maps", pid);
216
217 fp = fopen(filename, "r");
218 if (fp == NULL) {
219 fprintf(stderr, "couldn't open %s\n", filename);
220 exit(EXIT_FAILURE);
221 }
222 while (1) {
223 char bf[BUFSIZ];
224 unsigned char vm_read, vm_write, vm_exec, vm_mayshare;
225 struct mmap_event mmap_ev = {
226 .header.type = PERF_EVENT_MMAP,
227 };
228 unsigned long ino;
229 int major, minor;
230 size_t size;
231 if (fgets(bf, sizeof(bf), fp) == NULL)
232 break;
233
234 /* 00400000-0040c000 r-xp 00000000 fd:01 41038 /bin/cat */
235 sscanf(bf, "%llx-%llx %c%c%c%c %llx %x:%x %lu",
236 &mmap_ev.start, &mmap_ev.len,
237 &vm_read, &vm_write, &vm_exec, &vm_mayshare,
238 &mmap_ev.pgoff, &major, &minor, &ino);
239 if (vm_exec == 'x') {
240 char *execname = strrchr(bf, ' ');
241
242 if (execname == NULL || execname[1] != '/')
243 continue;
244
245 execname += 1;
246 size = strlen(execname);
247 execname[size - 1] = '\0'; /* Remove \n */
248 memcpy(mmap_ev.filename, execname, size);
249 size = ALIGN(size, sizeof(uint64_t));
250 mmap_ev.len -= mmap_ev.start;
251 mmap_ev.header.size = (sizeof(mmap_ev) -
252 (sizeof(mmap_ev.filename) - size));
253 mmap_ev.pid = pgid;
254 mmap_ev.tid = pid;
255
256 if (write(output, &mmap_ev, mmap_ev.header.size) < 0) {
257 perror("failed to write");
258 exit(-1);
259 }
260 }
261 }
262
263 fclose(fp);
264}
265
266static void open_counters(int cpu, pid_t pid)
Peter Zijlstrade9ac072009-04-08 15:01:31 +0200267{
Peter Zijlstrade9ac072009-04-08 15:01:31 +0200268 struct perf_counter_hw_event hw_event;
Peter Zijlstra16c8a102009-05-05 17:50:27 +0200269 int counter, group_fd;
270 int track = 1;
Peter Zijlstra16c8a102009-05-05 17:50:27 +0200271
Arnaldo Carvalho de Melo1a853e32009-05-14 22:50:46 -0300272 if (pid > 0) {
273 pid_t pgid = pid_synthesize_comm_event(pid);
274 pid_synthesize_mmap_events(pid, pgid);
275 }
Peter Zijlstra16c8a102009-05-05 17:50:27 +0200276
277 group_fd = -1;
278 for (counter = 0; counter < nr_counters; counter++) {
279
280 memset(&hw_event, 0, sizeof(hw_event));
281 hw_event.config = event_id[counter];
282 hw_event.irq_period = event_count[counter];
283 hw_event.record_type = PERF_RECORD_IP | PERF_RECORD_TID;
284 hw_event.nmi = nmi;
285 hw_event.mmap = track;
286 hw_event.comm = track;
287 hw_event.inherit = (cpu < 0) && inherit;
288
289 track = 0; // only the first counter needs these
290
291 fd[nr_cpu][counter] =
292 sys_perf_counter_open(&hw_event, pid, cpu, group_fd, 0);
293
294 if (fd[nr_cpu][counter] < 0) {
295 int err = errno;
296 printf("kerneltop error: syscall returned with %d (%s)\n",
297 fd[nr_cpu][counter], strerror(err));
298 if (err == EPERM)
299 printf("Are you root?\n");
300 exit(-1);
301 }
302 assert(fd[nr_cpu][counter] >= 0);
303 fcntl(fd[nr_cpu][counter], F_SETFL, O_NONBLOCK);
304
305 /*
306 * First counter acts as the group leader:
307 */
308 if (group && group_fd == -1)
309 group_fd = fd[nr_cpu][counter];
310
311 event_array[nr_poll].fd = fd[nr_cpu][counter];
312 event_array[nr_poll].events = POLLIN;
313 nr_poll++;
314
315 mmap_array[nr_cpu][counter].counter = counter;
316 mmap_array[nr_cpu][counter].prev = 0;
317 mmap_array[nr_cpu][counter].mask = mmap_pages*page_size - 1;
318 mmap_array[nr_cpu][counter].base = mmap(NULL, (mmap_pages+1)*page_size,
319 PROT_READ, MAP_SHARED, fd[nr_cpu][counter], 0);
320 if (mmap_array[nr_cpu][counter].base == MAP_FAILED) {
321 printf("kerneltop error: failed to mmap with %d (%s)\n",
322 errno, strerror(errno));
323 exit(-1);
324 }
325 }
326 nr_cpu++;
327}
328
Ingo Molnar0e9b20b2009-05-26 09:17:18 +0200329static int __cmd_record(int argc, const char **argv)
Peter Zijlstra16c8a102009-05-05 17:50:27 +0200330{
331 int i, counter;
Peter Zijlstrade9ac072009-04-08 15:01:31 +0200332 pid_t pid;
333 int ret;
334
335 page_size = sysconf(_SC_PAGE_SIZE);
Peter Zijlstrade9ac072009-04-08 15:01:31 +0200336 nr_cpus = sysconf(_SC_NPROCESSORS_ONLN);
337 assert(nr_cpus <= MAX_NR_CPUS);
338 assert(nr_cpus >= 0);
339
340 output = open(output_name, O_CREAT|O_RDWR, S_IRWXU);
341 if (output < 0) {
342 perror("failed to create output file");
343 exit(-1);
344 }
345
Arnaldo Carvalho de Melo1a853e32009-05-14 22:50:46 -0300346 if (!system_wide) {
347 open_counters(-1, target_pid != -1 ? target_pid : 0);
348 } else for (i = 0; i < nr_cpus; i++)
349 open_counters(i, target_pid);
Peter Zijlstrade9ac072009-04-08 15:01:31 +0200350
Peter Zijlstra16c8a102009-05-05 17:50:27 +0200351 signal(SIGCHLD, sig_handler);
352 signal(SIGINT, sig_handler);
Peter Zijlstrade9ac072009-04-08 15:01:31 +0200353
Arnaldo Carvalho de Melo1a853e32009-05-14 22:50:46 -0300354 if (target_pid == -1) {
355 pid = fork();
356 if (pid < 0)
357 perror("failed to fork");
Peter Zijlstrade9ac072009-04-08 15:01:31 +0200358
Arnaldo Carvalho de Melo1a853e32009-05-14 22:50:46 -0300359 if (!pid) {
Ingo Molnar0e9b20b2009-05-26 09:17:18 +0200360 if (execvp(argv[0], (char **)argv)) {
Arnaldo Carvalho de Melo1a853e32009-05-14 22:50:46 -0300361 perror(argv[0]);
362 exit(-1);
363 }
Peter Zijlstrade9ac072009-04-08 15:01:31 +0200364 }
365 }
366
367 if (realtime_prio) {
368 struct sched_param param;
369
370 param.sched_priority = realtime_prio;
371 if (sched_setscheduler(0, SCHED_FIFO, &param)) {
372 printf("Could not set realtime priority.\n");
373 exit(-1);
374 }
375 }
376
377 /*
378 * TODO: store the current /proc/$/maps information somewhere
379 */
380
381 while (!done) {
382 int hits = events;
383
Peter Zijlstra16c8a102009-05-05 17:50:27 +0200384 for (i = 0; i < nr_cpu; i++) {
Peter Zijlstrade9ac072009-04-08 15:01:31 +0200385 for (counter = 0; counter < nr_counters; counter++)
386 mmap_read(&mmap_array[i][counter]);
387 }
388
389 if (hits == events)
390 ret = poll(event_array, nr_poll, 100);
391 }
392
393 return 0;
394}
Ingo Molnar0e9b20b2009-05-26 09:17:18 +0200395
Ingo Molnar0e9b20b2009-05-26 09:17:18 +0200396static const char * const record_usage[] = {
397 "perf record [<options>] <command>",
398 NULL
399};
400
Ingo Molnar8ad8db32009-05-26 11:10:09 +0200401static char events_help_msg[EVENTS_HELP_MAX];
402
Ingo Molnar0e9b20b2009-05-26 09:17:18 +0200403const struct option options[] = {
404 OPT_CALLBACK('e', "event", NULL, "event",
Ingo Molnar8ad8db32009-05-26 11:10:09 +0200405 events_help_msg, parse_events),
Ingo Molnar0e9b20b2009-05-26 09:17:18 +0200406 OPT_INTEGER('c', "count", &default_interval,
407 "event period to sample"),
408 OPT_INTEGER('m', "mmap-pages", &mmap_pages,
409 "number of mmap data pages"),
410 OPT_STRING('o', "output", &output_name, "file",
411 "output file name"),
412 OPT_BOOLEAN('i', "inherit", &inherit,
413 "child tasks inherit counters"),
414 OPT_INTEGER('p', "pid", &target_pid,
415 "record events on existing pid"),
416 OPT_INTEGER('r', "realtime", &realtime_prio,
417 "collect data with this RT SCHED_FIFO priority"),
418 OPT_BOOLEAN('a', "all-cpus", &system_wide,
419 "system-wide collection from all CPUs"),
420 OPT_END()
421};
422
423int cmd_record(int argc, const char **argv, const char *prefix)
424{
425 int counter;
426
Ingo Molnar8ad8db32009-05-26 11:10:09 +0200427 create_events_help(events_help_msg);
Ingo Molnar0e9b20b2009-05-26 09:17:18 +0200428
429 argc = parse_options(argc, argv, options, record_usage, 0);
430 if (!argc)
431 usage_with_options(record_usage, options);
432
433 if (!nr_counters) {
434 nr_counters = 1;
435 event_id[0] = 0;
436 }
437
438 for (counter = 0; counter < nr_counters; counter++) {
439 if (event_count[counter])
440 continue;
441
442 event_count[counter] = default_interval;
443 }
444
445 return __cmd_record(argc, argv);
446}