blob: fd6fd0cfe574a4ef077fce29fa7fe7b14954e065 [file] [log] [blame]
Upstreamcc2ee171970-01-12 13:46:40 +00001/**
2 * @file op_pmu.c
3 * Setup and handling of IA64 Performance Monitoring Unit (PMU)
4 *
5 * @remark Copyright 2002 OProfile authors
6 * @remark Read the file COPYING
7 *
8 * @author Bob Montgomery
9 * @author Will Cohen
10 * @author John Levon
11 * @author Philippe Elie
12 */
13
14
15#include "oprofile.h"
16#include "op_util.h"
17#include <asm/perfmon.h>
18#include "op_ia64_model.h"
19
20/* number of counters physically present */
21static uint op_nr_counters = 4;
22
23/* performance counters are in pairs: pmcN and pmdN. The pmc register acts
24 * as the event selection; the pmd register is the counter. */
25#define perf_reg(c) ((c)+4)
26
27#define IA64_1_PMD_MASK_VAL ((1UL << 32) - 1)
28#define IA64_2_PMD_MASK_VAL ((1UL << 47) - 1)
29
30/* The appropriate value is selected in pmu_init() */
31unsigned long pmd_mask = IA64_2_PMD_MASK_VAL;
32
33#define pmd_overflowed(r,c) ((r) & (1 << perf_reg(c)))
34#define set_pmd_neg(v,c) do { \
35 ia64_set_pmd(perf_reg(c), -(ulong)(v) & pmd_mask); \
36 ia64_srlz_d(); } while (0)
37#define set_pmd(v,c) do { \
38 ia64_set_pmd(perf_reg(c), (v) & pmd_mask); \
39 ia64_srlz_d(); } while (0)
40#define set_pmc(v,c) do { ia64_set_pmc(perf_reg(c), (v)); ia64_srlz_d(); } while (0)
41#define get_pmd(c) ia64_get_pmd(perf_reg(c))
42#define get_pmc(c) ia64_get_pmc(perf_reg(c))
43
44/* ---------------- IRQ handler ------------------ */
45
46/* The args match the args for pfm_overflow_handler in perfmon.c.
47 * The task_struct is currently filled in with the perfmon "owner" of
48 * the PMU. This might change. I'm not sure it makes sense in perfmon
49 * either with system-wide profiling.
50 * pmc0 is a bit mask for overflowed counters (bits 4-7)
51 * This routine should return 0 to resume interrupts.
52 */
53inline static void
54op_do_pmu_interrupt(u64 pmc0, struct pt_regs *regs)
55{
56 uint cpu = op_cpu_id();
57 int ctr;
58
59 for (ctr = 0 ; ctr < op_nr_counters ; ++ctr) {
60 if (pmd_overflowed(pmc0, ctr)) {
61 op_do_profile(cpu, regs->cr_iip, 1, ctr);
62 set_pmd_neg(oprof_data[cpu].ctr_count[ctr], ctr);
63 }
64 }
65 return;
66}
67
68
69static void
70op_raw_pmu_interrupt(int irq, void *arg, struct pt_regs *regs)
71{
72 u64 pmc0;
73
74 pmc0 = ia64_get_pmc(0);
75
76 if ((pmc0 & ~0x1UL) != 0UL) {
77 op_do_pmu_interrupt(pmc0, regs);
78 ia64_set_pmc(0, 0);
79 ia64_srlz_d();
80 }
81}
82
83
84#define MY_OPROFILE_VECTOR (IA64_PERFMON_VECTOR - 2)
85
86static void
87op_set_pmv(void *dummy)
88{
89 ia64_set_pmv(MY_OPROFILE_VECTOR);
90 ia64_srlz_d();
91}
92
93
94static void
95op_restore_pmv(void* dummy)
96{
97 ia64_set_pmv(IA64_PERFMON_VECTOR);
98 ia64_srlz_d();
99}
100
101
102static int
103install_handler(void)
104{
105 int err = 0;
106
107 /* Try it legally - confusion about vec vs irq */
108 err = request_irq(MY_OPROFILE_VECTOR, op_raw_pmu_interrupt,
109 SA_INTERRUPT | SA_PERCPU_IRQ, "oprofile", NULL);
110
111 if (err) {
112 printk(KERN_ALERT "oprofile_IA64: request_irq fails, "
113 "returns %d\n", err);
114 return err;
115 }
116
117 if ((smp_call_function(op_set_pmv, NULL, 0, 1))) {
118 printk(KERN_ALERT "oprofile_IA64: unexpected failure "
119 "of smp_call_function(op_set_pmv)\n");
120 }
121
122 op_set_pmv(NULL);
123
124 return err;
125}
126
127
128static int
129restore_handler(void)
130{
131 int err = 0;
132
133 if ((smp_call_function(op_restore_pmv, NULL, 0, 1))) {
134 printk(KERN_ALERT "oprofile_IA64: unexpected failure "
135 "of smp_call_function(op_restore_pmv)\n");
136 }
137
138 op_restore_pmv(NULL);
139
140 free_irq(MY_OPROFILE_VECTOR, NULL);
141 return err;
142}
143
144
145/* ---------------- PMU setup ------------------ */
146
147/* This is kind of artificial. The proc interface might really want to
148 * accept register values directly. There are other features not exposed
149 * by this limited interface. Of course that might require all sorts of
150 * validity checking??? */
151static void
152pmc_fill_in(ulong *val, u8 kernel, u8 user, u8 event, u8 um)
153{
154 /* enable interrupt generation */
155 *val |= (1<<5);
156
157 /* setup as a privileged monitor */
158 *val |= (1<<6);
159
160 /* McKinley requires pmc4 to have bit 23 set (enable PMU).
161 * It is supposedly ignored in other pmc registers.
162 * Try assuming it's ignored in Itanium, too, and just
163 * set it for everyone.
164 */
165
166 *val |= (1<<23);
167
168 /* enable/disable chosen OS and USR counting */
169 (user) ? (*val |= (1<<3))
170 : (*val &= ~(1<<3));
171
172 (kernel) ? (*val |= (1<<0))
173 : (*val &= ~(1<<0));
174
175 /* what are we counting ? */
176 *val &= ~(0xff << 8);
177 *val |= ((event & 0xff) << 8);
178 *val &= ~(0xf << 16);
179 *val |= ((um & 0xf) << 16);
180}
181
182
183static void
184pmu_setup(void *dummy)
185{
186 ulong pmc_val;
187 int ii;
188
189 /* setup each counter */
190 for (ii = 0 ; ii < op_nr_counters ; ++ii) {
191 if (sysctl.ctr[ii].enabled) {
192 pmc_val = 0;
193
194 set_pmd_neg(sysctl.ctr[ii].count, ii);
195 pmc_fill_in(&pmc_val, sysctl.ctr[ii].kernel,
196 sysctl.ctr[ii].user, sysctl.ctr[ii].event,
197 sysctl.ctr[ii].unit_mask);
198
199 set_pmc(pmc_val, ii);
200 }
201 }
202}
203
204
205void
206disable_psr(void *dummy)
207{
208 struct pt_regs *regs;
209 /* disable profiling for my saved state */
210 regs = (struct pt_regs *)((unsigned long) current + IA64_STK_OFFSET);
211 regs--;
212 ia64_psr(regs)->pp = 0;
213 /* shouldn't need to */
214 ia64_psr(regs)->up = 0;
215
216 /* disable profiling for my current state */
217 __asm__ __volatile__ ("rsm psr.pp;;"::: "memory");
218
219#if defined(CONFIG_PERFMON) && defined(CONFIG_SMP)
220#if V_AT_LEAST(2, 4, 21)
221 local_cpu_data->pfm_syst_info |= PFM_CPUINFO_SYST_WIDE;
222 local_cpu_data->pfm_syst_info &= ~PFM_CPUINFO_DCR_PP;
223 /* FIXME: what todo with the 3rd flags PFM_CPUINFO_EXCL_IDLE 0x4 */
224#else
225 /* disable profiling for everyone else */
226 local_cpu_data->pfm_syst_wide = 1;
227 local_cpu_data->pfm_dcr_pp = 0;
228#endif
229#endif
230 ia64_set_pmc(0,0);
231 ia64_srlz_d();
232}
233
234
235static int
236pmu_setup_all(void)
237{
238
239 /* This would be a great place to reserve all cpus with
240 * some sort of call to perfmonctl (something like the
241 * CREATE_CONTEXT command). The current interface to
242 * perfmonctl wants to be called from a different task id
243 * for each CPU to be set up (and doesn't allow calls from
244 * modules.
245 */
246
247 /* disable profiling with the psr.pp bit */
248 if ((smp_call_function(disable_psr, NULL, 0, 1)))
249 return -EFAULT;
250
251 disable_psr(NULL);
252
253 /* now I've reserved the PMUs and they should be quiet */
254
255 if ((smp_call_function(pmu_setup, NULL, 0, 1)))
256 return -EFAULT;
257
258 pmu_setup(NULL);
259 return 0;
260}
261
262
263#ifndef CONFIG_SMP
264/* from linux/arch/ia64/kernel/perfmon.c */
265/*
266 * Originaly Written by Ganesh Venkitachalam, IBM Corp.
267 * Copyright (C) 1999 Ganesh Venkitachalam <venkitac@us.ibm.com>
268 *
269 * Modifications by Stephane Eranian, Hewlett-Packard Co.
270 * Modifications by David Mosberger-Tang, Hewlett-Packard Co.
271 *
272 * Copyright (C) 1999-2002 Hewlett Packard Co
273 * Stephane Eranian <eranian@hpl.hp.com>
274 * David Mosberger-Tang <davidm@hpl.hp.com>
275 */
276
277/*
278 * On UP kernels, we do not need to constantly set the psr.pp bit
279 * when a task is scheduled. The psr.pp bit can only be changed in
280 * the kernel because of a user request. Given we are on a UP non preeemptive
281 * kernel we know that no other task is running, so we cna simply update their
282 * psr.pp from their saved state. There is this no impact on the context switch
283 * code compared to the SMP case.
284 */
285static void
286op_tasklist_toggle_pp(unsigned int val)
287{
288 struct task_struct *p;
289 struct pt_regs *regs;
290
291 read_lock(&tasklist_lock);
292
293 for_each_task(p) {
294 regs = (struct pt_regs *)((unsigned long) p + IA64_STK_OFFSET);
295
296 /*
297 * position on pt_regs saved on stack on 1st entry into the kernel
298 */
299 regs--;
300
301 /*
302 * update psr.pp
303 */
304 ia64_psr(regs)->pp = val;
305 }
306 read_unlock(&tasklist_lock);
307}
308#endif
309
310
311static void
312pmu_start(void *info)
313{
314 struct pt_regs *regs;
315
316 if (info && (*((uint *)info) != op_cpu_id()))
317 return;
318
319 /* printk(KERN_ALERT "oprofile_IA64: pmu_start on cpu %d\n",
320 op_cpu_id()); */
321 /* The default control register pp value is copied into psr.pp
322 * on an interrupt. This allows interrupt service routines to
323 * be monitored.
324 */
325 ia64_set_dcr(ia64_get_dcr() | IA64_DCR_PP);
326
327#ifdef CONFIG_PERFMON
328#ifdef CONFIG_SMP
329#if V_AT_LEAST(2, 4, 21)
330 local_cpu_data->pfm_syst_info |= PFM_CPUINFO_SYST_WIDE;
331 local_cpu_data->pfm_syst_info |= PFM_CPUINFO_DCR_PP;
332 /* FIXME: what todo with the 3rd flags PFM_CPUINFO_EXCL_IDLE 0x4 */
333#else
334 local_cpu_data->pfm_syst_wide = 1;
335 local_cpu_data->pfm_dcr_pp = 1;
336#endif
337#else
338 op_tasklist_toggle_pp(1);
339#endif
340#endif
341 /* set it in my saved state */
342 regs = (struct pt_regs *)((unsigned long) current + IA64_STK_OFFSET);
343 regs--;
344 ia64_psr(regs)->pp = 1;
345
346 /* set it in my current state */
347 __asm__ __volatile__ ("ssm psr.pp;;"::: "memory");
348 ia64_srlz_d();
349}
350
351
352static void
353pmu_stop(void *info)
354{
355 struct pt_regs *regs;
356
357 if (info && (*((uint *)info) != op_cpu_id()))
358 return;
359
360 /* stop in my current state */
361 __asm__ __volatile__ ("rsm psr.pp;;"::: "memory");
362
363 /* disable the dcr pp */
364 ia64_set_dcr(ia64_get_dcr() & ~IA64_DCR_PP);
365
366#ifdef CONFIG_PERFMON
367#ifdef CONFIG_SMP
368#if V_AT_LEAST(2, 4, 21)
369 local_cpu_data->pfm_syst_info &= ~PFM_CPUINFO_SYST_WIDE;
370 local_cpu_data->pfm_syst_info &= ~PFM_CPUINFO_DCR_PP;
371 /* FIXME: what todo with the 3rd flags PFM_CPUINFO_EXCL_IDLE 0x4 */
372#else
373 local_cpu_data->pfm_syst_wide = 0;
374 local_cpu_data->pfm_dcr_pp = 0;
375#endif
376#else
377 pfm_tasklist_toggle_pp(0);
378#endif
379#endif
380
381 /* disable in my saved state */
382 regs = (struct pt_regs *)((unsigned long) current + IA64_STK_OFFSET);
383 regs--;
384 ia64_psr(regs)->pp = 0;
385}
386
387
388static void
389pmu_select_start(uint cpu)
390{
391 if (cpu == op_cpu_id())
392 pmu_start(NULL);
393 else
394 smp_call_function(pmu_start, &cpu, 0, 1);
395}
396
397
398static void
399pmu_select_stop(uint cpu)
400{
401 if (cpu == op_cpu_id())
402 pmu_stop(NULL);
403 else
404 smp_call_function(pmu_stop, &cpu, 0, 1);
405}
406
407
408static void
409pmu_start_all(void)
410{
411 int cpu, i;
412
413 for (cpu=0; cpu < smp_num_cpus; cpu++) {
414 struct _oprof_data * data = &oprof_data[cpu];
415
416 for (i = 0 ; i < op_nr_counters ; ++i) {
417 if (sysctl.ctr[i].enabled) {
418 data->ctr_count[i] = sysctl.ctr[i].count;
419 } else {
420 data->ctr_count[i] = 0;
421 }
422 }
423 }
424
425 if (!install_handler()) {
426 smp_call_function(pmu_start, NULL, 0, 1);
427 pmu_start(NULL);
428 }
429 /* FIXME need some way to fail here */;
430}
431
432
433static void
434pmu_stop_all(void)
435{
436 smp_call_function(pmu_stop, NULL, 0, 1);
437 pmu_stop(NULL);
438 restore_handler();
439}
440
441
442static int
443pmu_check_params(void)
444{
445 int i;
446 int enabled = 0;
447
448 for (i = 0; i < op_nr_counters ; i++) {
449 if (!sysctl.ctr[i].enabled)
450 continue;
451
452 enabled = 1;
453
454 if (!sysctl.ctr[i].user && !sysctl.ctr[i].kernel) {
455 printk(KERN_ERR "oprofile: neither kernel nor user "
456 "set for counter %d\n", i);
457 return -EINVAL;
458 }
459
460 if (check_range(sysctl.ctr[i].count, 1, OP_MAX_PERF_COUNT,
461 "ctr count value %d not in range (%d %ld)\n"))
462 return -EINVAL;
463 }
464
465 if (!enabled) {
466 printk(KERN_ERR "oprofile: no counters have been enabled.\n");
467 return -EINVAL;
468 }
469
470 return 0;
471}
472
473
474static struct op_msrs cpu_msrs[NR_CPUS];
475
476
477static void free_msr_group(struct op_msr_group * group)
478{
479 if (group->addrs)
480 kfree(group->addrs);
481 if (group->saved)
482 kfree(group->saved);
483 group->addrs = NULL;
484 group->saved = NULL;
485}
486
487
488static void pmu_save_registers(void * dummy)
489{
490 uint i;
491 uint const cpu = op_cpu_id();
492 struct op_msr_group * counters = &cpu_msrs[cpu].counters;
493 struct op_msr_group * controls = &cpu_msrs[cpu].controls;
494
495 counters->addrs = NULL;
496 counters->saved = NULL;
497 controls->addrs = NULL;
498 controls->saved = NULL;
499
500 counters->saved = kmalloc(
501 op_nr_counters * sizeof(struct op_saved_msr), GFP_KERNEL);
502 if (!counters->saved)
503 goto fault;
504
505 controls->saved = kmalloc(
506 op_nr_counters * sizeof(struct op_saved_msr), GFP_KERNEL);
507 if (!controls->saved)
508 goto fault;
509
510 for (i = 0; i < op_nr_counters; ++i) {
511 controls->saved[i].low = get_pmc(i);
512 counters->saved[i].low = get_pmd(i);
513 }
514 return;
515
516fault:
517 free_msr_group(counters);
518 free_msr_group(controls);
519}
520
521
522static void pmu_restore_registers(void * dummy)
523{
524 uint i;
525 uint const cpu = op_cpu_id();
526 struct op_msr_group * counters = &cpu_msrs[cpu].counters;
527 struct op_msr_group * controls = &cpu_msrs[cpu].controls;
528
529 for (i = 0; i < op_nr_counters; ++i) {
530 set_pmc(controls->saved[i].low, i);
531 set_pmd(counters->saved[i].low, i);
532 }
533
534 free_msr_group(counters);
535 free_msr_group(controls);
536}
537
538
539
540static int
541pmu_init(void)
542{
543 int err = 0;
544
545 /* figure out processor type configure number of bits in pmd
546 and number of counters */
547 switch (get_cpu_type()) {
548 case CPU_IA64_1:
549 pmd_mask = IA64_1_PMD_MASK_VAL; break;
550 case CPU_IA64_2:
551 case CPU_IA64:
552 pmd_mask = IA64_2_PMD_MASK_VAL; break;
553 default:
554 err = -EIO; break;
555 }
556
557 op_nr_counters = 4;
558
559 if ((err = smp_call_function(pmu_save_registers, NULL, 0, 1))) {
560 goto out;
561 }
562 pmu_save_registers(NULL);
563
564out:
565 return err;
566}
567
568
569static void
570pmu_deinit(void)
571{
572 smp_call_function(pmu_restore_registers, NULL, 0, 1);
573 pmu_restore_registers(NULL);
574}
575
576
577static char *names[] = { "0", "1", "2", "3", };
578
579
580static int
581pmu_add_sysctls(ctl_table * next)
582{
583 ctl_table * start = next;
584 ctl_table * tab;
585 int i, j;
586
587 for (i=0; i < op_nr_counters; i++) {
588 next->ctl_name = 1;
589 next->procname = names[i];
590 next->mode = 0700;
591
592 if (!(tab = kmalloc(sizeof(ctl_table)*7, GFP_KERNEL)))
593 goto cleanup;
594
595 next->child = tab;
596
597 memset(tab, 0, sizeof(ctl_table)*7);
598 tab[0] = ((ctl_table) { 1, "enabled", &sysctl_parms.ctr[i].enabled, sizeof(int), 0600, NULL, lproc_dointvec, NULL, });
599 tab[1] = ((ctl_table) { 1, "event", &sysctl_parms.ctr[i].event, sizeof(int), 0600, NULL, lproc_dointvec, NULL, });
600 tab[2] = ((ctl_table) { 1, "count", &sysctl_parms.ctr[i].count, sizeof(int), 0600, NULL, lproc_dointvec, NULL, });
601 tab[3] = ((ctl_table) { 1, "unit_mask", &sysctl_parms.ctr[i].unit_mask, sizeof(int), 0600, NULL, lproc_dointvec, NULL, });
602 tab[4] = ((ctl_table) { 1, "kernel", &sysctl_parms.ctr[i].kernel, sizeof(int), 0600, NULL, lproc_dointvec, NULL, });
603 tab[5] = ((ctl_table) { 1, "user", &sysctl_parms.ctr[i].user, sizeof(int), 0600, NULL, lproc_dointvec, NULL, });
604 next++;
605 }
606
607 return 0;
608
609cleanup:
610 next = start;
611 for (j = 0; j < i; j++) {
612 kfree(next->child);
613 next++;
614 }
615 return -EFAULT;
616}
617
618
619static void pmu_remove_sysctls(ctl_table * next)
620{
621 int ii;
622
623 for (ii=0; ii < op_nr_counters; ii++) {
624 kfree(next->child);
625 next++;
626 }
627}
628
629
630struct op_int_operations op_nmi_ops = {
631 init: pmu_init,
632 deinit: pmu_deinit,
633 add_sysctls: pmu_add_sysctls,
634 remove_sysctls: pmu_remove_sysctls,
635 check_params: pmu_check_params,
636 setup: pmu_setup_all,
637 start: pmu_start_all,
638 stop: pmu_stop_all,
639 start_cpu: pmu_select_start,
640 stop_cpu: pmu_select_stop,
641};
642
643
644struct op_int_operations const * op_int_interface()
645{
646 return &op_nmi_ops;
647}
648
649/* Need this dummy so module/oprofile.c links */
650struct op_int_operations op_rtc_ops = {
651 init: NULL,
652 deinit: NULL,
653 add_sysctls: NULL,
654 remove_sysctls: NULL,
655 check_params: NULL,
656 setup: NULL,
657 start: NULL,
658 stop: NULL,
659 start_cpu: NULL,
660 stop_cpu: NULL,
661};