blob: 6ec4f51d536b7afc96d1e9f8afa447701440ff43 [file] [log] [blame]
Ingo Molnar0a02ad92009-09-11 12:12:54 +02001#include "builtin.h"
2
3#include "util/util.h"
4#include "util/cache.h"
5#include "util/symbol.h"
6#include "util/thread.h"
7#include "util/header.h"
8
9#include "util/parse-options.h"
10
11#include "perf.h"
12#include "util/debug.h"
13
14#include "util/trace-event.h"
Ingo Molnarec156762009-09-11 12:12:54 +020015#include <sys/types.h>
Ingo Molnar0a02ad92009-09-11 12:12:54 +020016
Ingo Molnarec156762009-09-11 12:12:54 +020017static char const *input_name = "perf.data";
18static int input;
19static unsigned long page_size;
20static unsigned long mmap_window = 32;
Ingo Molnar0a02ad92009-09-11 12:12:54 +020021
Ingo Molnarec156762009-09-11 12:12:54 +020022static unsigned long total_comm = 0;
Ingo Molnar0a02ad92009-09-11 12:12:54 +020023
Ingo Molnarec156762009-09-11 12:12:54 +020024static struct rb_root threads;
25static struct thread *last_match;
Ingo Molnar0a02ad92009-09-11 12:12:54 +020026
Ingo Molnarec156762009-09-11 12:12:54 +020027static struct perf_header *header;
28static u64 sample_type;
Ingo Molnar0a02ad92009-09-11 12:12:54 +020029
30
Ingo Molnarec156762009-09-11 12:12:54 +020031/*
32 * Scheduler benchmarks
33 */
34#include <sys/resource.h>
35#include <sys/types.h>
36#include <sys/stat.h>
37#include <sys/time.h>
38#include <sys/prctl.h>
39
40#include <linux/unistd.h>
41
42#include <semaphore.h>
43#include <pthread.h>
44#include <signal.h>
45#include <values.h>
46#include <string.h>
47#include <unistd.h>
48#include <stdlib.h>
49#include <assert.h>
50#include <fcntl.h>
51#include <time.h>
52#include <math.h>
53
54#include <stdio.h>
55
56#define PR_SET_NAME 15 /* Set process name */
57
58#define BUG_ON(x) assert(!(x))
59
Ingo Molnarfbf94822009-09-11 12:12:54 +020060#define DEBUG 0
Ingo Molnarec156762009-09-11 12:12:54 +020061
62typedef unsigned long long nsec_t;
63
64#define printk(x...) do { printf(x); fflush(stdout); } while (0)
65
66nsec_t prev_printk;
67
68#define __dprintk(x,y...) do { \
69 nsec_t __now = get_nsecs(), __delta = __now - prev_printk; \
70 \
71 prev_printk = __now; \
72 \
73 printf("%.3f [%Ld] [%.3f]: " x, (double)__now/1e6, __now, (double)__delta/1e6, y);\
74} while (0)
75
76#if !DEBUG
77# define dprintk(x...) do { } while (0)
78#else
79# define dprintk(x...) __dprintk(x)
80#endif
81
82#define __DP() __dprintk("parent: line %d\n", __LINE__)
83#define DP() dprintk("parent: line %d\n", __LINE__)
84#define D() dprintk("task %ld: line %d\n", this_task->nr, __LINE__)
85
86
87static nsec_t run_measurement_overhead;
88static nsec_t sleep_measurement_overhead;
89
90static nsec_t get_nsecs(void)
91{
92 struct timespec ts;
93
94 clock_gettime(CLOCK_MONOTONIC, &ts);
95
96 return ts.tv_sec * 1000000000ULL + ts.tv_nsec;
97}
98
99static void burn_nsecs(nsec_t nsecs)
100{
101 nsec_t T0 = get_nsecs(), T1;
102
103 do {
104 T1 = get_nsecs();
105 } while (T1 + run_measurement_overhead < T0 + nsecs);
106}
107
108static void sleep_nsecs(nsec_t nsecs)
109{
110 struct timespec ts;
111
112 ts.tv_nsec = nsecs % 999999999;
113 ts.tv_sec = nsecs / 999999999;
114
115 nanosleep(&ts, NULL);
116}
117
118static void calibrate_run_measurement_overhead(void)
119{
120 nsec_t T0, T1, delta, min_delta = 1000000000ULL;
121 int i;
122
123 for (i = 0; i < 10; i++) {
124 T0 = get_nsecs();
125 burn_nsecs(0);
126 T1 = get_nsecs();
127 delta = T1-T0;
128 min_delta = min(min_delta, delta);
129 }
130 run_measurement_overhead = min_delta;
131
132 printk("run measurement overhead: %Ld nsecs\n", min_delta);
133}
134
135static void calibrate_sleep_measurement_overhead(void)
136{
137 nsec_t T0, T1, delta, min_delta = 1000000000ULL;
138 int i;
139
140 for (i = 0; i < 10; i++) {
141 T0 = get_nsecs();
142 sleep_nsecs(10000);
143 T1 = get_nsecs();
144 delta = T1-T0;
145 min_delta = min(min_delta, delta);
146 }
147 min_delta -= 10000;
148 sleep_measurement_overhead = min_delta;
149
150 printk("sleep measurement overhead: %Ld nsecs\n", min_delta);
151}
152
153#define COMM_LEN 20
154#define SYM_LEN 129
155
156#define MAX_PID 65536
157
158static unsigned long nr_tasks;
159
160struct sched_event;
161
162struct task_desc {
163 unsigned long nr;
164 unsigned long pid;
165 char comm[COMM_LEN];
166
167 unsigned long nr_events;
168 unsigned long curr_event;
169 struct sched_event **events;
170
171 pthread_t thread;
172 sem_t sleep_sem;
173
174 sem_t ready_for_work;
175 sem_t work_done_sem;
176
177 nsec_t cpu_usage;
178};
179
180enum sched_event_type {
181 SCHED_EVENT_RUN,
182 SCHED_EVENT_SLEEP,
183 SCHED_EVENT_WAKEUP,
184};
185
186struct sched_event {
187 enum sched_event_type type;
188 nsec_t timestamp;
189 nsec_t duration;
190 unsigned long nr;
191 int specific_wait;
192 sem_t *wait_sem;
193 struct task_desc *wakee;
194};
195
196static struct task_desc *pid_to_task[MAX_PID];
197
198static struct task_desc **tasks;
199
200static pthread_mutex_t start_work_mutex = PTHREAD_MUTEX_INITIALIZER;
201static nsec_t start_time;
202
203static pthread_mutex_t work_done_wait_mutex = PTHREAD_MUTEX_INITIALIZER;
204
205static unsigned long nr_run_events;
206static unsigned long nr_sleep_events;
207static unsigned long nr_wakeup_events;
208
209static unsigned long nr_sleep_corrections;
210static unsigned long nr_run_events_optimized;
211
212static struct sched_event *
213get_new_event(struct task_desc *task, nsec_t timestamp)
214{
215 struct sched_event *event = calloc(1, sizeof(*event));
216 unsigned long idx = task->nr_events;
217 size_t size;
218
219 event->timestamp = timestamp;
220 event->nr = idx;
221
222 task->nr_events++;
223 size = sizeof(struct sched_event *) * task->nr_events;
224 task->events = realloc(task->events, size);
225 BUG_ON(!task->events);
226
227 task->events[idx] = event;
228
229 return event;
230}
231
232static struct sched_event *last_event(struct task_desc *task)
233{
234 if (!task->nr_events)
235 return NULL;
236
237 return task->events[task->nr_events - 1];
238}
239
240static void
Ingo Molnarfbf94822009-09-11 12:12:54 +0200241add_sched_event_run(struct task_desc *task, nsec_t timestamp, u64 duration)
Ingo Molnarec156762009-09-11 12:12:54 +0200242{
243 struct sched_event *event, *curr_event = last_event(task);
244
245 /*
Ingo Molnarfbf94822009-09-11 12:12:54 +0200246 * optimize an existing RUN event by merging this one
247 * to it:
248 */
Ingo Molnarec156762009-09-11 12:12:54 +0200249 if (curr_event && curr_event->type == SCHED_EVENT_RUN) {
250 nr_run_events_optimized++;
251 curr_event->duration += duration;
252 return;
253 }
254
255 event = get_new_event(task, timestamp);
256
257 event->type = SCHED_EVENT_RUN;
258 event->duration = duration;
259
260 nr_run_events++;
261}
262
263static unsigned long targetless_wakeups;
264static unsigned long multitarget_wakeups;
265
266static void
267add_sched_event_wakeup(struct task_desc *task, nsec_t timestamp,
268 struct task_desc *wakee)
269{
270 struct sched_event *event, *wakee_event;
271
272 event = get_new_event(task, timestamp);
273 event->type = SCHED_EVENT_WAKEUP;
274 event->wakee = wakee;
275
276 wakee_event = last_event(wakee);
277 if (!wakee_event || wakee_event->type != SCHED_EVENT_SLEEP) {
278 targetless_wakeups++;
279 return;
280 }
281 if (wakee_event->wait_sem) {
282 multitarget_wakeups++;
283 return;
284 }
285
286 wakee_event->wait_sem = calloc(1, sizeof(*wakee_event->wait_sem));
287 sem_init(wakee_event->wait_sem, 0, 0);
288 wakee_event->specific_wait = 1;
289 event->wait_sem = wakee_event->wait_sem;
290
291 nr_wakeup_events++;
292}
293
294static void
295add_sched_event_sleep(struct task_desc *task, nsec_t timestamp,
296 unsigned long uninterruptible __used)
297{
298 struct sched_event *event = get_new_event(task, timestamp);
299
300 event->type = SCHED_EVENT_SLEEP;
301
302 nr_sleep_events++;
303}
304
305static struct task_desc *register_pid(unsigned long pid, const char *comm)
306{
307 struct task_desc *task;
308
309 BUG_ON(pid >= MAX_PID);
310
311 task = pid_to_task[pid];
312
313 if (task)
314 return task;
315
316 task = calloc(1, sizeof(*task));
317 task->pid = pid;
318 task->nr = nr_tasks;
319 strcpy(task->comm, comm);
320 /*
321 * every task starts in sleeping state - this gets ignored
322 * if there's no wakeup pointing to this sleep state:
323 */
324 add_sched_event_sleep(task, 0, 0);
325
326 pid_to_task[pid] = task;
327 nr_tasks++;
328 tasks = realloc(tasks, nr_tasks*sizeof(struct task_task *));
329 BUG_ON(!tasks);
330 tasks[task->nr] = task;
331
332 printk("registered task #%ld, PID %ld (%s)\n", nr_tasks, pid, comm);
333
334 return task;
335}
336
337
338static int first_trace_line = 1;
339
340static nsec_t first_timestamp;
341static nsec_t prev_timestamp;
342
343void parse_line(char *line);
344
345void parse_line(char *line)
346{
347 unsigned long param1 = 0, param2 = 0;
348 char comm[COMM_LEN], comm2[COMM_LEN];
349 unsigned long pid, pid2, timestamp0;
350 struct task_desc *task, *task2;
351 char func_str[SYM_LEN];
352 nsec_t timestamp;
353 int ret;
354
355 //" <idle> 0 0D.s3 0us+: try_to_wake_up <events/0 9> (1 0)"
356 ret = sscanf(line, "%20s %5ld %*s %ldus%*c:"
357 " %128s <%20s %ld> (%ld %ld)\n",
358 comm, &pid, &timestamp0,
359 func_str, comm2, &pid2, &param1, &param2);
360 dprintk("ret: %d\n", ret);
361 if (ret != 8)
362 return;
363
364 timestamp = timestamp0 * 1000LL;
365
366 if (first_trace_line) {
367 first_trace_line = 0;
368 first_timestamp = timestamp;
369 }
370
371 timestamp -= first_timestamp;
372 BUG_ON(timestamp < prev_timestamp);
373 prev_timestamp = timestamp;
374
375 dprintk("parsed: %s - %ld %Ld: %s - <%s %ld> (%ld %ld)\n",
376 comm,
377 pid,
Ingo Molnarfbf94822009-09-11 12:12:54 +0200378 timestamp,
Ingo Molnarec156762009-09-11 12:12:54 +0200379 func_str,
380 comm2,
381 pid2,
382 param1,
383 param2);
384
385 task = register_pid(pid, comm);
386 task2 = register_pid(pid2, comm2);
387
388 if (!strcmp(func_str, "update_curr")) {
389 dprintk("%Ld: task %ld runs for %ld nsecs\n",
390 timestamp, task->nr, param1);
391 add_sched_event_run(task, timestamp, param1);
392 } else if (!strcmp(func_str, "try_to_wake_up")) {
393 dprintk("%Ld: task %ld wakes up task %ld\n",
394 timestamp, task->nr, task2->nr);
395 add_sched_event_wakeup(task, timestamp, task2);
396 } else if (!strcmp(func_str, "deactivate_task")) {
397 dprintk("%Ld: task %ld goes to sleep (uninterruptible: %ld)\n",
398 timestamp, task->nr, param1);
399 add_sched_event_sleep(task, timestamp, param1);
400 }
401}
402
403static void print_task_traces(void)
404{
405 struct task_desc *task;
406 unsigned long i;
407
408 for (i = 0; i < nr_tasks; i++) {
409 task = tasks[i];
410 printk("task %6ld (%20s:%10ld), nr_events: %ld\n",
411 task->nr, task->comm, task->pid, task->nr_events);
412 }
413}
414
415static void add_cross_task_wakeups(void)
416{
417 struct task_desc *task1, *task2;
418 unsigned long i, j;
419
420 for (i = 0; i < nr_tasks; i++) {
421 task1 = tasks[i];
422 j = i + 1;
423 if (j == nr_tasks)
424 j = 0;
425 task2 = tasks[j];
426 add_sched_event_wakeup(task1, 0, task2);
427 }
428}
429
430static void
Ingo Molnarfbf94822009-09-11 12:12:54 +0200431process_sched_event(struct task_desc *this_task __used, struct sched_event *event)
Ingo Molnarec156762009-09-11 12:12:54 +0200432{
433 int ret = 0;
434 nsec_t now;
435 long long delta;
436
437 now = get_nsecs();
438 delta = start_time + event->timestamp - now;
439
440 dprintk("task %ld, event #%ld, %Ld, delta: %.3f (%Ld)\n",
441 this_task->nr, event->nr, event->timestamp,
442 (double)delta/1e6, delta);
443
444 if (0 && delta > 0) {
445 dprintk("%.3f: task %ld FIX %.3f\n",
446 (double)event->timestamp/1e6,
447 this_task->nr,
448 (double)delta/1e6);
449 sleep_nsecs(start_time + event->timestamp - now);
450 nr_sleep_corrections++;
451 }
452
453 switch (event->type) {
454 case SCHED_EVENT_RUN:
455 dprintk("%.3f: task %ld RUN for %.3f\n",
456 (double)event->timestamp/1e6,
457 this_task->nr,
458 (double)event->duration/1e6);
459 burn_nsecs(event->duration);
460 break;
461 case SCHED_EVENT_SLEEP:
462 dprintk("%.3f: task %ld %s SLEEP\n",
463 (double)event->timestamp/1e6,
464 this_task->nr, event->wait_sem ? "" : "SKIP");
465 if (event->wait_sem)
466 ret = sem_wait(event->wait_sem);
467 BUG_ON(ret);
468 break;
469 case SCHED_EVENT_WAKEUP:
470 dprintk("%.3f: task %ld WAKEUP => task %ld\n",
471 (double)event->timestamp/1e6,
472 this_task->nr,
473 event->wakee->nr);
474 if (event->wait_sem)
475 ret = sem_post(event->wait_sem);
476 BUG_ON(ret);
477 break;
478 default:
479 BUG_ON(1);
480 }
481}
482
483static nsec_t get_cpu_usage_nsec_parent(void)
484{
485 struct rusage ru;
486 nsec_t sum;
487 int err;
488
489 err = getrusage(RUSAGE_SELF, &ru);
490 BUG_ON(err);
491
492 sum = ru.ru_utime.tv_sec*1e9 + ru.ru_utime.tv_usec*1e3;
493 sum += ru.ru_stime.tv_sec*1e9 + ru.ru_stime.tv_usec*1e3;
494
495 return sum;
496}
497
498static nsec_t get_cpu_usage_nsec_self(void)
499{
500 char filename [] = "/proc/1234567890/sched";
501 unsigned long msecs, nsecs;
502 char *line = NULL;
503 nsec_t total = 0;
504 size_t len = 0;
505 ssize_t chars;
506 FILE *file;
507 int ret;
508
509 sprintf(filename, "/proc/%d/sched", getpid());
510 file = fopen(filename, "r");
511 BUG_ON(!file);
512
513 while ((chars = getline(&line, &len, file)) != -1) {
514 dprintk("got line with length %zu :\n", chars);
515 dprintk("%s", line);
516 ret = sscanf(line, "se.sum_exec_runtime : %ld.%06ld\n",
517 &msecs, &nsecs);
518 if (ret == 2) {
519 total = msecs*1e6 + nsecs;
520 dprintk("total: (%ld.%06ld) %Ld\n",
521 msecs, nsecs, total);
522 break;
523 }
524 }
525 if (line)
526 free(line);
527 fclose(file);
528
529 return total;
530}
531
532static void *thread_func(void *ctx)
533{
534 struct task_desc *this_task = ctx;
535 nsec_t cpu_usage_0, cpu_usage_1;
536 unsigned long i, ret;
537 char comm2[22];
538
539 dprintk("task %ld started up.\n", this_task->nr);
540 sprintf(comm2, ":%s", this_task->comm);
541 prctl(PR_SET_NAME, comm2);
542
543again:
544 ret = sem_post(&this_task->ready_for_work);
545 BUG_ON(ret);
546 D();
547 ret = pthread_mutex_lock(&start_work_mutex);
548 BUG_ON(ret);
549 ret = pthread_mutex_unlock(&start_work_mutex);
550 BUG_ON(ret);
551 D();
552
553 cpu_usage_0 = get_cpu_usage_nsec_self();
554
555 for (i = 0; i < this_task->nr_events; i++) {
556 this_task->curr_event = i;
557 process_sched_event(this_task, this_task->events[i]);
558 }
559
560 cpu_usage_1 = get_cpu_usage_nsec_self();
561 this_task->cpu_usage = cpu_usage_1 - cpu_usage_0;
562
563 dprintk("task %ld cpu usage: %0.3f msecs\n",
564 this_task->nr, (double)this_task->cpu_usage / 1e6);
565
566 D();
567 ret = sem_post(&this_task->work_done_sem);
568 BUG_ON(ret);
569 D();
570
571 ret = pthread_mutex_lock(&work_done_wait_mutex);
572 BUG_ON(ret);
573 ret = pthread_mutex_unlock(&work_done_wait_mutex);
574 BUG_ON(ret);
575 D();
576
577 goto again;
578}
579
580static void create_tasks(void)
581{
582 struct task_desc *task;
583 pthread_attr_t attr;
584 unsigned long i;
585 int err;
586
587 err = pthread_attr_init(&attr);
588 BUG_ON(err);
589 err = pthread_attr_setstacksize(&attr, (size_t)(16*1024));
590 BUG_ON(err);
591 err = pthread_mutex_lock(&start_work_mutex);
592 BUG_ON(err);
593 err = pthread_mutex_lock(&work_done_wait_mutex);
594 BUG_ON(err);
595 for (i = 0; i < nr_tasks; i++) {
596 task = tasks[i];
597 sem_init(&task->sleep_sem, 0, 0);
598 sem_init(&task->ready_for_work, 0, 0);
599 sem_init(&task->work_done_sem, 0, 0);
600 task->curr_event = 0;
601 err = pthread_create(&task->thread, &attr, thread_func, task);
602 BUG_ON(err);
603 }
604}
605
606static nsec_t cpu_usage;
607static nsec_t runavg_cpu_usage;
608static nsec_t parent_cpu_usage;
609static nsec_t runavg_parent_cpu_usage;
610
611static void wait_for_tasks(void)
612{
613 nsec_t cpu_usage_0, cpu_usage_1;
614 struct task_desc *task;
615 unsigned long i, ret;
616
617 DP();
618 start_time = get_nsecs();
619 DP();
620 cpu_usage = 0;
621 pthread_mutex_unlock(&work_done_wait_mutex);
622
623 for (i = 0; i < nr_tasks; i++) {
624 task = tasks[i];
625 ret = sem_wait(&task->ready_for_work);
626 BUG_ON(ret);
627 sem_init(&task->ready_for_work, 0, 0);
628 }
629 ret = pthread_mutex_lock(&work_done_wait_mutex);
630 BUG_ON(ret);
631
632 cpu_usage_0 = get_cpu_usage_nsec_parent();
633
634 pthread_mutex_unlock(&start_work_mutex);
635
636#if 0
637 for (i = 0; i < nr_tasks; i++) {
638 unsigned long missed;
639
640 task = tasks[i];
641 while (task->curr_event + 1 < task->nr_events) {
642 dprintk("parent waiting for %ld (%ld != %ld)\n",
643 i, task->curr_event, task->nr_events);
644 sleep_nsecs(100000000);
645 }
646 missed = task->nr_events - 1 - task->curr_event;
647 if (missed)
648 printk("task %ld missed events: %ld\n", i, missed);
649 ret = sem_post(&task->sleep_sem);
650 BUG_ON(ret);
651 }
652#endif
653 DP();
654 for (i = 0; i < nr_tasks; i++) {
655 task = tasks[i];
656 ret = sem_wait(&task->work_done_sem);
657 BUG_ON(ret);
658 sem_init(&task->work_done_sem, 0, 0);
659 cpu_usage += task->cpu_usage;
660 task->cpu_usage = 0;
661 }
662
663 cpu_usage_1 = get_cpu_usage_nsec_parent();
664 if (!runavg_cpu_usage)
665 runavg_cpu_usage = cpu_usage;
666 runavg_cpu_usage = (runavg_cpu_usage*9 + cpu_usage)/10;
667
668 parent_cpu_usage = cpu_usage_1 - cpu_usage_0;
669 if (!runavg_parent_cpu_usage)
670 runavg_parent_cpu_usage = parent_cpu_usage;
671 runavg_parent_cpu_usage = (runavg_parent_cpu_usage*9 +
672 parent_cpu_usage)/10;
673
674 ret = pthread_mutex_lock(&start_work_mutex);
675 BUG_ON(ret);
676
677 for (i = 0; i < nr_tasks; i++) {
678 task = tasks[i];
679 sem_init(&task->sleep_sem, 0, 0);
680 task->curr_event = 0;
681 }
682}
683
684static int __cmd_sched(void);
685
686static void parse_trace(void)
687{
688 __cmd_sched();
689
690 printk("nr_run_events: %ld\n", nr_run_events);
691 printk("nr_sleep_events: %ld\n", nr_sleep_events);
692 printk("nr_wakeup_events: %ld\n", nr_wakeup_events);
693
694 if (targetless_wakeups)
695 printk("target-less wakeups: %ld\n", targetless_wakeups);
696 if (multitarget_wakeups)
697 printk("multi-target wakeups: %ld\n", multitarget_wakeups);
698 if (nr_run_events_optimized)
699 printk("run events optimized: %ld\n",
700 nr_run_events_optimized);
701}
702
703static unsigned long nr_runs;
704static nsec_t sum_runtime;
705static nsec_t sum_fluct;
706static nsec_t run_avg;
707
708static void run_one_test(void)
709{
710 nsec_t T0, T1, delta, avg_delta, fluct, std_dev;
711
712 T0 = get_nsecs();
713 wait_for_tasks();
714 T1 = get_nsecs();
715
716 delta = T1 - T0;
717 sum_runtime += delta;
718 nr_runs++;
719
720 avg_delta = sum_runtime / nr_runs;
721 if (delta < avg_delta)
722 fluct = avg_delta - delta;
723 else
724 fluct = delta - avg_delta;
725 sum_fluct += fluct;
726 std_dev = sum_fluct / nr_runs / sqrt(nr_runs);
727 if (!run_avg)
728 run_avg = delta;
729 run_avg = (run_avg*9 + delta)/10;
730
731 printk("#%-3ld: %0.3f, ",
732 nr_runs, (double)delta/1000000.0);
733
734#if 0
735 printk("%0.2f +- %0.2f, ",
736 (double)avg_delta/1e6, (double)std_dev/1e6);
737#endif
738 printk("ravg: %0.2f, ",
739 (double)run_avg/1e6);
740
741 printk("cpu: %0.2f / %0.2f",
742 (double)cpu_usage/1e6, (double)runavg_cpu_usage/1e6);
743
744#if 0
745 /*
Ingo Molnarfbf94822009-09-11 12:12:54 +0200746 * rusage statistics done by the parent, these are less
747 * accurate than the sum_exec_runtime based statistics:
748 */
Ingo Molnarec156762009-09-11 12:12:54 +0200749 printk(" [%0.2f / %0.2f]",
750 (double)parent_cpu_usage/1e6,
751 (double)runavg_parent_cpu_usage/1e6);
752#endif
753
754 printk("\n");
755
756 if (nr_sleep_corrections)
757 printk(" (%ld sleep corrections)\n", nr_sleep_corrections);
758 nr_sleep_corrections = 0;
759}
760
761static void test_calibrations(void)
762{
763 nsec_t T0, T1;
764
765 T0 = get_nsecs();
766 burn_nsecs(1e6);
767 T1 = get_nsecs();
768
769 printk("the run test took %Ld nsecs\n", T1-T0);
770
771 T0 = get_nsecs();
772 sleep_nsecs(1e6);
773 T1 = get_nsecs();
774
775 printk("the sleep test took %Ld nsecs\n", T1-T0);
776}
777
Ingo Molnar0a02ad92009-09-11 12:12:54 +0200778static int
779process_comm_event(event_t *event, unsigned long offset, unsigned long head)
780{
781 struct thread *thread;
782
783 thread = threads__findnew(event->comm.pid, &threads, &last_match);
784
785 dump_printf("%p [%p]: PERF_EVENT_COMM: %s:%d\n",
786 (void *)(offset + head),
787 (void *)(long)(event->header.size),
788 event->comm.comm, event->comm.pid);
789
790 if (thread == NULL ||
791 thread__set_comm(thread, event->comm.comm)) {
792 dump_printf("problem processing PERF_EVENT_COMM, skipping event.\n");
793 return -1;
794 }
795 total_comm++;
796
797 return 0;
798}
799
Ingo Molnarfbf94822009-09-11 12:12:54 +0200800struct trace_wakeup_event {
801 u32 size;
802
803 u16 common_type;
804 u8 common_flags;
805 u8 common_preempt_count;
806 u32 common_pid;
807 u32 common_tgid;
808
809 char comm[16];
810 u32 pid;
811
812 u32 prio;
813 u32 success;
814 u32 cpu;
815};
816
817static void
818process_sched_wakeup_event(struct trace_wakeup_event *wakeup_event, struct event *event,
Ingo Molnarec156762009-09-11 12:12:54 +0200819 int cpu __used, u64 timestamp __used, struct thread *thread __used)
820{
Ingo Molnarfbf94822009-09-11 12:12:54 +0200821 struct task_desc *waker, *wakee;
822
Ingo Molnarec156762009-09-11 12:12:54 +0200823 printf("sched_wakeup event %p\n", event);
Ingo Molnarfbf94822009-09-11 12:12:54 +0200824
825 printf(" ... pid %d woke up %s/%d\n",
826 wakeup_event->common_pid,
827 wakeup_event->comm,
828 wakeup_event->pid);
829
830 waker = register_pid(wakeup_event->common_pid, "<unknown>");
831 wakee = register_pid(wakeup_event->pid, wakeup_event->comm);
832
833 add_sched_event_wakeup(waker, timestamp, wakee);
Ingo Molnarec156762009-09-11 12:12:54 +0200834}
835
Ingo Molnarfbf94822009-09-11 12:12:54 +0200836struct trace_switch_event {
837 u32 size;
838
839 u16 common_type;
840 u8 common_flags;
841 u8 common_preempt_count;
842 u32 common_pid;
843 u32 common_tgid;
844
845 char prev_comm[16];
846 u32 prev_pid;
847 u32 prev_prio;
848 u64 prev_state;
849 char next_comm[16];
850 u32 next_pid;
851 u32 next_prio;
852};
853
854#define MAX_CPUS 4096
855
856unsigned long cpu_last_switched[MAX_CPUS];
857
858static void
859process_sched_switch_event(struct trace_switch_event *switch_event, struct event *event,
Ingo Molnarec156762009-09-11 12:12:54 +0200860 int cpu __used, u64 timestamp __used, struct thread *thread __used)
861{
Ingo Molnarfbf94822009-09-11 12:12:54 +0200862 struct task_desc *prev, *next;
863 u64 timestamp0;
864 s64 delta;
865
Ingo Molnarec156762009-09-11 12:12:54 +0200866 printf("sched_switch event %p\n", event);
Ingo Molnarfbf94822009-09-11 12:12:54 +0200867 if (cpu >= MAX_CPUS || cpu < 0)
868 return;
869
870 timestamp0 = cpu_last_switched[cpu];
871 if (timestamp0)
872 delta = timestamp - timestamp0;
873 else
874 delta = 0;
875
876 if (delta < 0)
877 die("hm, delta: %Ld < 0 ?\n", delta);
878
879 printf(" ... switch from %s/%d to %s/%d [ran %Ld nsecs]\n",
880 switch_event->prev_comm, switch_event->prev_pid,
881 switch_event->next_comm, switch_event->next_pid,
882 delta);
883
884 prev = register_pid(switch_event->prev_pid, switch_event->prev_comm);
885 next = register_pid(switch_event->next_pid, switch_event->next_comm);
886
887 cpu_last_switched[cpu] = timestamp;
888
889 add_sched_event_run(prev, timestamp, delta);
890}
891
892struct trace_fork_event {
893 u32 size;
894
895 u16 common_type;
896 u8 common_flags;
897 u8 common_preempt_count;
898 u32 common_pid;
899 u32 common_tgid;
900
901 char parent_comm[16];
902 u32 parent_pid;
903 char child_comm[16];
904 u32 child_pid;
905};
906
907static void
908process_sched_fork_event(struct trace_fork_event *fork_event, struct event *event,
909 int cpu __used, u64 timestamp __used, struct thread *thread __used)
910{
911 printf("sched_fork event %p\n", event);
912 printf("... parent: %s/%d\n", fork_event->parent_comm, fork_event->parent_pid);
913 printf("... child: %s/%d\n", fork_event->child_comm, fork_event->child_pid);
914 register_pid(fork_event->parent_pid, fork_event->parent_comm);
915 register_pid(fork_event->child_pid, fork_event->child_comm);
916}
917
918static void process_sched_exit_event(struct event *event,
919 int cpu __used, u64 timestamp __used, struct thread *thread __used)
920{
921 printf("sched_exit event %p\n", event);
Ingo Molnarec156762009-09-11 12:12:54 +0200922}
923
924static void
925process_raw_event(event_t *raw_event, void *more_data,
926 int cpu, u64 timestamp, struct thread *thread)
927{
928 struct {
929 u32 size;
930 char data[0];
931 } *raw = more_data;
932 struct event *event;
933 int type;
934
935 type = trace_parse_common_type(raw->data);
936 event = trace_find_event(type);
937
938 /*
939 * FIXME: better resolve from pid from the struct trace_entry
940 * field, although it should be the same than this perf
941 * event pid
942 */
943 printf("id %d, type: %d, event: %s\n",
944 raw_event->header.type, type, event->name);
945
946 if (!strcmp(event->name, "sched_switch"))
Ingo Molnarfbf94822009-09-11 12:12:54 +0200947 process_sched_switch_event(more_data, event, cpu, timestamp, thread);
Ingo Molnarec156762009-09-11 12:12:54 +0200948 if (!strcmp(event->name, "sched_wakeup"))
Ingo Molnarfbf94822009-09-11 12:12:54 +0200949 process_sched_wakeup_event(more_data, event, cpu, timestamp, thread);
950 if (!strcmp(event->name, "sched_wakeup_new"))
951 process_sched_wakeup_event(more_data, event, cpu, timestamp, thread);
952 if (!strcmp(event->name, "sched_process_fork"))
953 process_sched_fork_event(more_data, event, cpu, timestamp, thread);
954 if (!strcmp(event->name, "sched_process_exit"))
955 process_sched_exit_event(event, cpu, timestamp, thread);
Ingo Molnarec156762009-09-11 12:12:54 +0200956}
957
Ingo Molnar0a02ad92009-09-11 12:12:54 +0200958static int
959process_sample_event(event_t *event, unsigned long offset, unsigned long head)
960{
961 char level;
962 int show = 0;
963 struct dso *dso = NULL;
964 struct thread *thread;
965 u64 ip = event->ip.ip;
966 u64 timestamp = -1;
967 u32 cpu = -1;
968 u64 period = 1;
969 void *more_data = event->ip.__more_data;
970 int cpumode;
971
972 thread = threads__findnew(event->ip.pid, &threads, &last_match);
973
974 if (sample_type & PERF_SAMPLE_TIME) {
975 timestamp = *(u64 *)more_data;
976 more_data += sizeof(u64);
977 }
978
979 if (sample_type & PERF_SAMPLE_CPU) {
980 cpu = *(u32 *)more_data;
981 more_data += sizeof(u32);
982 more_data += sizeof(u32); /* reserved */
983 }
984
985 if (sample_type & PERF_SAMPLE_PERIOD) {
986 period = *(u64 *)more_data;
987 more_data += sizeof(u64);
988 }
989
990 dump_printf("%p [%p]: PERF_EVENT_SAMPLE (IP, %d): %d/%d: %p period: %Ld\n",
991 (void *)(offset + head),
992 (void *)(long)(event->header.size),
993 event->header.misc,
994 event->ip.pid, event->ip.tid,
995 (void *)(long)ip,
996 (long long)period);
997
998 dump_printf(" ... thread: %s:%d\n", thread->comm, thread->pid);
999
1000 if (thread == NULL) {
1001 eprintf("problem processing %d event, skipping it.\n",
1002 event->header.type);
1003 return -1;
1004 }
1005
1006 cpumode = event->header.misc & PERF_EVENT_MISC_CPUMODE_MASK;
1007
1008 if (cpumode == PERF_EVENT_MISC_KERNEL) {
1009 show = SHOW_KERNEL;
1010 level = 'k';
1011
1012 dso = kernel_dso;
1013
1014 dump_printf(" ...... dso: %s\n", dso->name);
1015
1016 } else if (cpumode == PERF_EVENT_MISC_USER) {
1017
1018 show = SHOW_USER;
1019 level = '.';
1020
1021 } else {
1022 show = SHOW_HV;
1023 level = 'H';
1024
1025 dso = hypervisor_dso;
1026
1027 dump_printf(" ...... dso: [hypervisor]\n");
1028 }
1029
Ingo Molnarec156762009-09-11 12:12:54 +02001030 if (sample_type & PERF_SAMPLE_RAW)
1031 process_raw_event(event, more_data, cpu, timestamp, thread);
Ingo Molnar0a02ad92009-09-11 12:12:54 +02001032
1033 return 0;
1034}
1035
1036static int
1037process_event(event_t *event, unsigned long offset, unsigned long head)
1038{
1039 trace_event(event);
1040
1041 switch (event->header.type) {
1042 case PERF_EVENT_MMAP ... PERF_EVENT_LOST:
1043 return 0;
1044
1045 case PERF_EVENT_COMM:
1046 return process_comm_event(event, offset, head);
1047
1048 case PERF_EVENT_EXIT ... PERF_EVENT_READ:
1049 return 0;
1050
1051 case PERF_EVENT_SAMPLE:
1052 return process_sample_event(event, offset, head);
1053
1054 case PERF_EVENT_MAX:
1055 default:
1056 return -1;
1057 }
1058
1059 return 0;
1060}
1061
1062static int __cmd_sched(void)
1063{
1064 int ret, rc = EXIT_FAILURE;
1065 unsigned long offset = 0;
1066 unsigned long head = 0;
1067 struct stat perf_stat;
1068 event_t *event;
1069 uint32_t size;
1070 char *buf;
1071
1072 trace_report();
1073 register_idle_thread(&threads, &last_match);
1074
1075 input = open(input_name, O_RDONLY);
1076 if (input < 0) {
1077 perror("failed to open file");
1078 exit(-1);
1079 }
1080
1081 ret = fstat(input, &perf_stat);
1082 if (ret < 0) {
1083 perror("failed to stat file");
1084 exit(-1);
1085 }
1086
1087 if (!perf_stat.st_size) {
1088 fprintf(stderr, "zero-sized file, nothing to do!\n");
1089 exit(0);
1090 }
1091 header = perf_header__read(input);
1092 head = header->data_offset;
1093 sample_type = perf_header__sample_type(header);
1094
1095 if (!(sample_type & PERF_SAMPLE_RAW))
1096 die("No trace sample to read. Did you call perf record "
1097 "without -R?");
1098
1099 if (load_kernel() < 0) {
1100 perror("failed to load kernel symbols");
1101 return EXIT_FAILURE;
1102 }
1103
1104remap:
1105 buf = (char *)mmap(NULL, page_size * mmap_window, PROT_READ,
1106 MAP_SHARED, input, offset);
1107 if (buf == MAP_FAILED) {
1108 perror("failed to mmap file");
1109 exit(-1);
1110 }
1111
1112more:
1113 event = (event_t *)(buf + head);
1114
1115 size = event->header.size;
1116 if (!size)
1117 size = 8;
1118
1119 if (head + event->header.size >= page_size * mmap_window) {
1120 unsigned long shift = page_size * (head / page_size);
1121 int res;
1122
1123 res = munmap(buf, page_size * mmap_window);
1124 assert(res == 0);
1125
1126 offset += shift;
1127 head -= shift;
1128 goto remap;
1129 }
1130
1131 size = event->header.size;
1132
1133
1134 if (!size || process_event(event, offset, head) < 0) {
1135
1136 /*
1137 * assume we lost track of the stream, check alignment, and
1138 * increment a single u64 in the hope to catch on again 'soon'.
1139 */
1140
1141 if (unlikely(head & 7))
1142 head &= ~7ULL;
1143
1144 size = 8;
1145 }
1146
1147 head += size;
1148
1149 if (offset + head < (unsigned long)perf_stat.st_size)
1150 goto more;
1151
1152 rc = EXIT_SUCCESS;
1153 close(input);
1154
1155 return rc;
1156}
1157
1158static const char * const annotate_usage[] = {
1159 "perf trace [<options>] <command>",
1160 NULL
1161};
1162
1163static const struct option options[] = {
1164 OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace,
1165 "dump raw trace in ASCII"),
1166 OPT_BOOLEAN('v', "verbose", &verbose,
1167 "be more verbose (show symbol address, etc)"),
1168 OPT_END()
1169};
1170
1171int cmd_sched(int argc, const char **argv, const char *prefix __used)
1172{
Ingo Molnarfbf94822009-09-11 12:12:54 +02001173 long nr_iterations = 10, i;
Ingo Molnarec156762009-09-11 12:12:54 +02001174
Ingo Molnar0a02ad92009-09-11 12:12:54 +02001175 symbol__init();
1176 page_size = getpagesize();
1177
1178 argc = parse_options(argc, argv, options, annotate_usage, 0);
1179 if (argc) {
1180 /*
1181 * Special case: if there's an argument left then assume tha
1182 * it's a symbol filter:
1183 */
1184 if (argc > 1)
1185 usage_with_options(annotate_usage, options);
1186 }
1187
Ingo Molnarfbf94822009-09-11 12:12:54 +02001188// setup_pager();
Ingo Molnar0a02ad92009-09-11 12:12:54 +02001189
Ingo Molnarec156762009-09-11 12:12:54 +02001190 calibrate_run_measurement_overhead();
1191 calibrate_sleep_measurement_overhead();
1192
1193 test_calibrations();
1194
1195 parse_trace();
1196 print_task_traces();
1197 add_cross_task_wakeups();
1198
1199 create_tasks();
1200 printk("------------------------------------------------------------\n");
1201 for (i = 0; i < nr_iterations; i++)
1202 run_one_test();
1203
1204 return 0;
Ingo Molnar0a02ad92009-09-11 12:12:54 +02001205}