blob: 47925927b12f7cd7a7d293a021dc063d7462367c [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/**
2 * @file op_model_p4.c
3 * P4 model-specific MSR operations
4 *
5 * @remark Copyright 2002 OProfile authors
6 * @remark Read the file COPYING
7 *
8 * @author Graydon Hoare
9 */
10
11#include <linux/oprofile.h>
12#include <linux/smp.h>
13#include <asm/msr.h>
14#include <asm/ptrace.h>
15#include <asm/fixmap.h>
16#include <asm/apic.h>
Don Zickus3e4ff112006-06-26 13:57:01 +020017#include <asm/nmi.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070018
19#include "op_x86_model.h"
20#include "op_counter.h"
21
22#define NUM_EVENTS 39
23
24#define NUM_COUNTERS_NON_HT 8
25#define NUM_ESCRS_NON_HT 45
26#define NUM_CCCRS_NON_HT 18
27#define NUM_CONTROLS_NON_HT (NUM_ESCRS_NON_HT + NUM_CCCRS_NON_HT)
28
29#define NUM_COUNTERS_HT2 4
30#define NUM_ESCRS_HT2 23
31#define NUM_CCCRS_HT2 9
32#define NUM_CONTROLS_HT2 (NUM_ESCRS_HT2 + NUM_CCCRS_HT2)
33
34static unsigned int num_counters = NUM_COUNTERS_NON_HT;
Don Zickuscb9c4482006-09-26 10:52:26 +020035static unsigned int num_controls = NUM_CONTROLS_NON_HT;
Linus Torvalds1da177e2005-04-16 15:20:36 -070036
37/* this has to be checked dynamically since the
38 hyper-threadedness of a chip is discovered at
39 kernel boot-time. */
40static inline void setup_num_counters(void)
41{
42#ifdef CONFIG_SMP
Don Zickuscb9c4482006-09-26 10:52:26 +020043 if (smp_num_siblings == 2){
Linus Torvalds1da177e2005-04-16 15:20:36 -070044 num_counters = NUM_COUNTERS_HT2;
Don Zickuscb9c4482006-09-26 10:52:26 +020045 num_controls = NUM_CONTROLS_HT2;
46 }
Linus Torvalds1da177e2005-04-16 15:20:36 -070047#endif
48}
49
50static int inline addr_increment(void)
51{
52#ifdef CONFIG_SMP
53 return smp_num_siblings == 2 ? 2 : 1;
54#else
55 return 1;
56#endif
57}
58
59
60/* tables to simulate simplified hardware view of p4 registers */
61struct p4_counter_binding {
62 int virt_counter;
63 int counter_address;
64 int cccr_address;
65};
66
67struct p4_event_binding {
68 int escr_select; /* value to put in CCCR */
69 int event_select; /* value to put in ESCR */
70 struct {
71 int virt_counter; /* for this counter... */
72 int escr_address; /* use this ESCR */
73 } bindings[2];
74};
75
76/* nb: these CTR_* defines are a duplicate of defines in
77 event/i386.p4*events. */
78
79
80#define CTR_BPU_0 (1 << 0)
81#define CTR_MS_0 (1 << 1)
82#define CTR_FLAME_0 (1 << 2)
83#define CTR_IQ_4 (1 << 3)
84#define CTR_BPU_2 (1 << 4)
85#define CTR_MS_2 (1 << 5)
86#define CTR_FLAME_2 (1 << 6)
87#define CTR_IQ_5 (1 << 7)
88
89static struct p4_counter_binding p4_counters [NUM_COUNTERS_NON_HT] = {
90 { CTR_BPU_0, MSR_P4_BPU_PERFCTR0, MSR_P4_BPU_CCCR0 },
91 { CTR_MS_0, MSR_P4_MS_PERFCTR0, MSR_P4_MS_CCCR0 },
92 { CTR_FLAME_0, MSR_P4_FLAME_PERFCTR0, MSR_P4_FLAME_CCCR0 },
93 { CTR_IQ_4, MSR_P4_IQ_PERFCTR4, MSR_P4_IQ_CCCR4 },
94 { CTR_BPU_2, MSR_P4_BPU_PERFCTR2, MSR_P4_BPU_CCCR2 },
95 { CTR_MS_2, MSR_P4_MS_PERFCTR2, MSR_P4_MS_CCCR2 },
96 { CTR_FLAME_2, MSR_P4_FLAME_PERFCTR2, MSR_P4_FLAME_CCCR2 },
97 { CTR_IQ_5, MSR_P4_IQ_PERFCTR5, MSR_P4_IQ_CCCR5 }
98};
99
100#define NUM_UNUSED_CCCRS NUM_CCCRS_NON_HT - NUM_COUNTERS_NON_HT
101
Linus Torvalds1da177e2005-04-16 15:20:36 -0700102/* p4 event codes in libop/op_event.h are indices into this table. */
103
104static struct p4_event_binding p4_events[NUM_EVENTS] = {
105
106 { /* BRANCH_RETIRED */
107 0x05, 0x06,
108 { {CTR_IQ_4, MSR_P4_CRU_ESCR2},
109 {CTR_IQ_5, MSR_P4_CRU_ESCR3} }
110 },
111
112 { /* MISPRED_BRANCH_RETIRED */
113 0x04, 0x03,
114 { { CTR_IQ_4, MSR_P4_CRU_ESCR0},
115 { CTR_IQ_5, MSR_P4_CRU_ESCR1} }
116 },
117
118 { /* TC_DELIVER_MODE */
119 0x01, 0x01,
120 { { CTR_MS_0, MSR_P4_TC_ESCR0},
121 { CTR_MS_2, MSR_P4_TC_ESCR1} }
122 },
123
124 { /* BPU_FETCH_REQUEST */
125 0x00, 0x03,
126 { { CTR_BPU_0, MSR_P4_BPU_ESCR0},
127 { CTR_BPU_2, MSR_P4_BPU_ESCR1} }
128 },
129
130 { /* ITLB_REFERENCE */
131 0x03, 0x18,
132 { { CTR_BPU_0, MSR_P4_ITLB_ESCR0},
133 { CTR_BPU_2, MSR_P4_ITLB_ESCR1} }
134 },
135
136 { /* MEMORY_CANCEL */
137 0x05, 0x02,
138 { { CTR_FLAME_0, MSR_P4_DAC_ESCR0},
139 { CTR_FLAME_2, MSR_P4_DAC_ESCR1} }
140 },
141
142 { /* MEMORY_COMPLETE */
143 0x02, 0x08,
144 { { CTR_FLAME_0, MSR_P4_SAAT_ESCR0},
145 { CTR_FLAME_2, MSR_P4_SAAT_ESCR1} }
146 },
147
148 { /* LOAD_PORT_REPLAY */
149 0x02, 0x04,
150 { { CTR_FLAME_0, MSR_P4_SAAT_ESCR0},
151 { CTR_FLAME_2, MSR_P4_SAAT_ESCR1} }
152 },
153
154 { /* STORE_PORT_REPLAY */
155 0x02, 0x05,
156 { { CTR_FLAME_0, MSR_P4_SAAT_ESCR0},
157 { CTR_FLAME_2, MSR_P4_SAAT_ESCR1} }
158 },
159
160 { /* MOB_LOAD_REPLAY */
161 0x02, 0x03,
162 { { CTR_BPU_0, MSR_P4_MOB_ESCR0},
163 { CTR_BPU_2, MSR_P4_MOB_ESCR1} }
164 },
165
166 { /* PAGE_WALK_TYPE */
167 0x04, 0x01,
168 { { CTR_BPU_0, MSR_P4_PMH_ESCR0},
169 { CTR_BPU_2, MSR_P4_PMH_ESCR1} }
170 },
171
172 { /* BSQ_CACHE_REFERENCE */
173 0x07, 0x0c,
174 { { CTR_BPU_0, MSR_P4_BSU_ESCR0},
175 { CTR_BPU_2, MSR_P4_BSU_ESCR1} }
176 },
177
178 { /* IOQ_ALLOCATION */
179 0x06, 0x03,
180 { { CTR_BPU_0, MSR_P4_FSB_ESCR0},
181 { 0, 0 } }
182 },
183
184 { /* IOQ_ACTIVE_ENTRIES */
185 0x06, 0x1a,
186 { { CTR_BPU_2, MSR_P4_FSB_ESCR1},
187 { 0, 0 } }
188 },
189
190 { /* FSB_DATA_ACTIVITY */
191 0x06, 0x17,
192 { { CTR_BPU_0, MSR_P4_FSB_ESCR0},
193 { CTR_BPU_2, MSR_P4_FSB_ESCR1} }
194 },
195
196 { /* BSQ_ALLOCATION */
197 0x07, 0x05,
198 { { CTR_BPU_0, MSR_P4_BSU_ESCR0},
199 { 0, 0 } }
200 },
201
202 { /* BSQ_ACTIVE_ENTRIES */
203 0x07, 0x06,
204 { { CTR_BPU_2, MSR_P4_BSU_ESCR1 /* guess */},
205 { 0, 0 } }
206 },
207
208 { /* X87_ASSIST */
209 0x05, 0x03,
210 { { CTR_IQ_4, MSR_P4_CRU_ESCR2},
211 { CTR_IQ_5, MSR_P4_CRU_ESCR3} }
212 },
213
214 { /* SSE_INPUT_ASSIST */
215 0x01, 0x34,
216 { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
217 { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
218 },
219
220 { /* PACKED_SP_UOP */
221 0x01, 0x08,
222 { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
223 { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
224 },
225
226 { /* PACKED_DP_UOP */
227 0x01, 0x0c,
228 { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
229 { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
230 },
231
232 { /* SCALAR_SP_UOP */
233 0x01, 0x0a,
234 { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
235 { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
236 },
237
238 { /* SCALAR_DP_UOP */
239 0x01, 0x0e,
240 { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
241 { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
242 },
243
244 { /* 64BIT_MMX_UOP */
245 0x01, 0x02,
246 { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
247 { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
248 },
249
250 { /* 128BIT_MMX_UOP */
251 0x01, 0x1a,
252 { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
253 { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
254 },
255
256 { /* X87_FP_UOP */
257 0x01, 0x04,
258 { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
259 { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
260 },
261
262 { /* X87_SIMD_MOVES_UOP */
263 0x01, 0x2e,
264 { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
265 { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
266 },
267
268 { /* MACHINE_CLEAR */
269 0x05, 0x02,
270 { { CTR_IQ_4, MSR_P4_CRU_ESCR2},
271 { CTR_IQ_5, MSR_P4_CRU_ESCR3} }
272 },
273
274 { /* GLOBAL_POWER_EVENTS */
275 0x06, 0x13 /* older manual says 0x05, newer 0x13 */,
276 { { CTR_BPU_0, MSR_P4_FSB_ESCR0},
277 { CTR_BPU_2, MSR_P4_FSB_ESCR1} }
278 },
279
280 { /* TC_MS_XFER */
281 0x00, 0x05,
282 { { CTR_MS_0, MSR_P4_MS_ESCR0},
283 { CTR_MS_2, MSR_P4_MS_ESCR1} }
284 },
285
286 { /* UOP_QUEUE_WRITES */
287 0x00, 0x09,
288 { { CTR_MS_0, MSR_P4_MS_ESCR0},
289 { CTR_MS_2, MSR_P4_MS_ESCR1} }
290 },
291
292 { /* FRONT_END_EVENT */
293 0x05, 0x08,
294 { { CTR_IQ_4, MSR_P4_CRU_ESCR2},
295 { CTR_IQ_5, MSR_P4_CRU_ESCR3} }
296 },
297
298 { /* EXECUTION_EVENT */
299 0x05, 0x0c,
300 { { CTR_IQ_4, MSR_P4_CRU_ESCR2},
301 { CTR_IQ_5, MSR_P4_CRU_ESCR3} }
302 },
303
304 { /* REPLAY_EVENT */
305 0x05, 0x09,
306 { { CTR_IQ_4, MSR_P4_CRU_ESCR2},
307 { CTR_IQ_5, MSR_P4_CRU_ESCR3} }
308 },
309
310 { /* INSTR_RETIRED */
311 0x04, 0x02,
312 { { CTR_IQ_4, MSR_P4_CRU_ESCR0},
313 { CTR_IQ_5, MSR_P4_CRU_ESCR1} }
314 },
315
316 { /* UOPS_RETIRED */
317 0x04, 0x01,
318 { { CTR_IQ_4, MSR_P4_CRU_ESCR0},
319 { CTR_IQ_5, MSR_P4_CRU_ESCR1} }
320 },
321
322 { /* UOP_TYPE */
323 0x02, 0x02,
324 { { CTR_IQ_4, MSR_P4_RAT_ESCR0},
325 { CTR_IQ_5, MSR_P4_RAT_ESCR1} }
326 },
327
328 { /* RETIRED_MISPRED_BRANCH_TYPE */
329 0x02, 0x05,
330 { { CTR_MS_0, MSR_P4_TBPU_ESCR0},
331 { CTR_MS_2, MSR_P4_TBPU_ESCR1} }
332 },
333
334 { /* RETIRED_BRANCH_TYPE */
335 0x02, 0x04,
336 { { CTR_MS_0, MSR_P4_TBPU_ESCR0},
337 { CTR_MS_2, MSR_P4_TBPU_ESCR1} }
338 }
339};
340
341
342#define MISC_PMC_ENABLED_P(x) ((x) & 1 << 7)
343
344#define ESCR_RESERVED_BITS 0x80000003
345#define ESCR_CLEAR(escr) ((escr) &= ESCR_RESERVED_BITS)
346#define ESCR_SET_USR_0(escr, usr) ((escr) |= (((usr) & 1) << 2))
347#define ESCR_SET_OS_0(escr, os) ((escr) |= (((os) & 1) << 3))
348#define ESCR_SET_USR_1(escr, usr) ((escr) |= (((usr) & 1)))
349#define ESCR_SET_OS_1(escr, os) ((escr) |= (((os) & 1) << 1))
350#define ESCR_SET_EVENT_SELECT(escr, sel) ((escr) |= (((sel) & 0x3f) << 25))
351#define ESCR_SET_EVENT_MASK(escr, mask) ((escr) |= (((mask) & 0xffff) << 9))
352#define ESCR_READ(escr,high,ev,i) do {rdmsr(ev->bindings[(i)].escr_address, (escr), (high));} while (0)
353#define ESCR_WRITE(escr,high,ev,i) do {wrmsr(ev->bindings[(i)].escr_address, (escr), (high));} while (0)
354
355#define CCCR_RESERVED_BITS 0x38030FFF
356#define CCCR_CLEAR(cccr) ((cccr) &= CCCR_RESERVED_BITS)
357#define CCCR_SET_REQUIRED_BITS(cccr) ((cccr) |= 0x00030000)
358#define CCCR_SET_ESCR_SELECT(cccr, sel) ((cccr) |= (((sel) & 0x07) << 13))
359#define CCCR_SET_PMI_OVF_0(cccr) ((cccr) |= (1<<26))
360#define CCCR_SET_PMI_OVF_1(cccr) ((cccr) |= (1<<27))
361#define CCCR_SET_ENABLE(cccr) ((cccr) |= (1<<12))
362#define CCCR_SET_DISABLE(cccr) ((cccr) &= ~(1<<12))
363#define CCCR_READ(low, high, i) do {rdmsr(p4_counters[(i)].cccr_address, (low), (high));} while (0)
364#define CCCR_WRITE(low, high, i) do {wrmsr(p4_counters[(i)].cccr_address, (low), (high));} while (0)
365#define CCCR_OVF_P(cccr) ((cccr) & (1U<<31))
366#define CCCR_CLEAR_OVF(cccr) ((cccr) &= (~(1U<<31)))
367
Don Zickuscb9c4482006-09-26 10:52:26 +0200368#define CTRL_IS_RESERVED(msrs,c) (msrs->controls[(c)].addr ? 1 : 0)
369#define CTR_IS_RESERVED(msrs,c) (msrs->counters[(c)].addr ? 1 : 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700370#define CTR_READ(l,h,i) do {rdmsr(p4_counters[(i)].counter_address, (l), (h));} while (0)
371#define CTR_WRITE(l,i) do {wrmsr(p4_counters[(i)].counter_address, -(u32)(l), -1);} while (0)
372#define CTR_OVERFLOW_P(ctr) (!((ctr) & 0x80000000))
373
374
375/* this assigns a "stagger" to the current CPU, which is used throughout
376 the code in this module as an extra array offset, to select the "even"
377 or "odd" part of all the divided resources. */
378static unsigned int get_stagger(void)
379{
380#ifdef CONFIG_SMP
381 int cpu = smp_processor_id();
382 return (cpu != first_cpu(cpu_sibling_map[cpu]));
383#endif
384 return 0;
385}
386
387
388/* finally, mediate access to a real hardware counter
389 by passing a "virtual" counter numer to this macro,
390 along with your stagger setting. */
391#define VIRT_CTR(stagger, i) ((i) + ((num_counters) * (stagger)))
392
393static unsigned long reset_value[NUM_COUNTERS_NON_HT];
394
395
396static void p4_fill_in_addresses(struct op_msrs * const msrs)
397{
398 unsigned int i;
Don Zickuscb9c4482006-09-26 10:52:26 +0200399 unsigned int addr, cccraddr, stag;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700400
401 setup_num_counters();
402 stag = get_stagger();
403
Don Zickuscb9c4482006-09-26 10:52:26 +0200404 /* initialize some registers */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700405 for (i = 0; i < num_counters; ++i) {
Don Zickuscb9c4482006-09-26 10:52:26 +0200406 msrs->counters[i].addr = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700407 }
Don Zickuscb9c4482006-09-26 10:52:26 +0200408 for (i = 0; i < num_controls; ++i) {
409 msrs->controls[i].addr = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700410 }
411
Don Zickuscb9c4482006-09-26 10:52:26 +0200412 /* the counter & cccr registers we pay attention to */
413 for (i = 0; i < num_counters; ++i) {
414 addr = p4_counters[VIRT_CTR(stag, i)].counter_address;
415 cccraddr = p4_counters[VIRT_CTR(stag, i)].cccr_address;
416 if (reserve_perfctr_nmi(addr)){
417 msrs->counters[i].addr = addr;
418 msrs->controls[i].addr = cccraddr;
419 }
420 }
421
Linus Torvalds1da177e2005-04-16 15:20:36 -0700422 /* 43 ESCR registers in three or four discontiguous group */
423 for (addr = MSR_P4_BSU_ESCR0 + stag;
424 addr < MSR_P4_IQ_ESCR0; ++i, addr += addr_increment()) {
Don Zickuscb9c4482006-09-26 10:52:26 +0200425 if (reserve_evntsel_nmi(addr))
426 msrs->controls[i].addr = addr;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700427 }
428
429 /* no IQ_ESCR0/1 on some models, we save a seconde time BSU_ESCR0/1
430 * to avoid special case in nmi_{save|restore}_registers() */
431 if (boot_cpu_data.x86_model >= 0x3) {
432 for (addr = MSR_P4_BSU_ESCR0 + stag;
433 addr <= MSR_P4_BSU_ESCR1; ++i, addr += addr_increment()) {
Don Zickuscb9c4482006-09-26 10:52:26 +0200434 if (reserve_evntsel_nmi(addr))
435 msrs->controls[i].addr = addr;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700436 }
437 } else {
438 for (addr = MSR_P4_IQ_ESCR0 + stag;
439 addr <= MSR_P4_IQ_ESCR1; ++i, addr += addr_increment()) {
Don Zickuscb9c4482006-09-26 10:52:26 +0200440 if (reserve_evntsel_nmi(addr))
441 msrs->controls[i].addr = addr;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700442 }
443 }
444
445 for (addr = MSR_P4_RAT_ESCR0 + stag;
446 addr <= MSR_P4_SSU_ESCR0; ++i, addr += addr_increment()) {
Don Zickuscb9c4482006-09-26 10:52:26 +0200447 if (reserve_evntsel_nmi(addr))
448 msrs->controls[i].addr = addr;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700449 }
450
451 for (addr = MSR_P4_MS_ESCR0 + stag;
452 addr <= MSR_P4_TC_ESCR1; ++i, addr += addr_increment()) {
Don Zickuscb9c4482006-09-26 10:52:26 +0200453 if (reserve_evntsel_nmi(addr))
454 msrs->controls[i].addr = addr;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700455 }
456
457 for (addr = MSR_P4_IX_ESCR0 + stag;
458 addr <= MSR_P4_CRU_ESCR3; ++i, addr += addr_increment()) {
Don Zickuscb9c4482006-09-26 10:52:26 +0200459 if (reserve_evntsel_nmi(addr))
460 msrs->controls[i].addr = addr;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700461 }
462
463 /* there are 2 remaining non-contiguously located ESCRs */
464
465 if (num_counters == NUM_COUNTERS_NON_HT) {
466 /* standard non-HT CPUs handle both remaining ESCRs*/
Don Zickuscb9c4482006-09-26 10:52:26 +0200467 if (reserve_evntsel_nmi(MSR_P4_CRU_ESCR5))
468 msrs->controls[i++].addr = MSR_P4_CRU_ESCR5;
469 if (reserve_evntsel_nmi(MSR_P4_CRU_ESCR4))
470 msrs->controls[i++].addr = MSR_P4_CRU_ESCR4;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700471
472 } else if (stag == 0) {
473 /* HT CPUs give the first remainder to the even thread, as
474 the 32nd control register */
Don Zickuscb9c4482006-09-26 10:52:26 +0200475 if (reserve_evntsel_nmi(MSR_P4_CRU_ESCR4))
476 msrs->controls[i++].addr = MSR_P4_CRU_ESCR4;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700477
478 } else {
479 /* and two copies of the second to the odd thread,
480 for the 22st and 23nd control registers */
Don Zickuscb9c4482006-09-26 10:52:26 +0200481 if (reserve_evntsel_nmi(MSR_P4_CRU_ESCR5)) {
482 msrs->controls[i++].addr = MSR_P4_CRU_ESCR5;
483 msrs->controls[i++].addr = MSR_P4_CRU_ESCR5;
484 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700485 }
486}
487
488
489static void pmc_setup_one_p4_counter(unsigned int ctr)
490{
491 int i;
492 int const maxbind = 2;
493 unsigned int cccr = 0;
494 unsigned int escr = 0;
495 unsigned int high = 0;
496 unsigned int counter_bit;
497 struct p4_event_binding *ev = NULL;
498 unsigned int stag;
499
500 stag = get_stagger();
501
502 /* convert from counter *number* to counter *bit* */
503 counter_bit = 1 << VIRT_CTR(stag, ctr);
504
505 /* find our event binding structure. */
506 if (counter_config[ctr].event <= 0 || counter_config[ctr].event > NUM_EVENTS) {
507 printk(KERN_ERR
508 "oprofile: P4 event code 0x%lx out of range\n",
509 counter_config[ctr].event);
510 return;
511 }
512
513 ev = &(p4_events[counter_config[ctr].event - 1]);
514
515 for (i = 0; i < maxbind; i++) {
516 if (ev->bindings[i].virt_counter & counter_bit) {
517
518 /* modify ESCR */
519 ESCR_READ(escr, high, ev, i);
520 ESCR_CLEAR(escr);
521 if (stag == 0) {
522 ESCR_SET_USR_0(escr, counter_config[ctr].user);
523 ESCR_SET_OS_0(escr, counter_config[ctr].kernel);
524 } else {
525 ESCR_SET_USR_1(escr, counter_config[ctr].user);
526 ESCR_SET_OS_1(escr, counter_config[ctr].kernel);
527 }
528 ESCR_SET_EVENT_SELECT(escr, ev->event_select);
529 ESCR_SET_EVENT_MASK(escr, counter_config[ctr].unit_mask);
530 ESCR_WRITE(escr, high, ev, i);
531
532 /* modify CCCR */
533 CCCR_READ(cccr, high, VIRT_CTR(stag, ctr));
534 CCCR_CLEAR(cccr);
535 CCCR_SET_REQUIRED_BITS(cccr);
536 CCCR_SET_ESCR_SELECT(cccr, ev->escr_select);
537 if (stag == 0) {
538 CCCR_SET_PMI_OVF_0(cccr);
539 } else {
540 CCCR_SET_PMI_OVF_1(cccr);
541 }
542 CCCR_WRITE(cccr, high, VIRT_CTR(stag, ctr));
543 return;
544 }
545 }
546
547 printk(KERN_ERR
548 "oprofile: P4 event code 0x%lx no binding, stag %d ctr %d\n",
549 counter_config[ctr].event, stag, ctr);
550}
551
552
553static void p4_setup_ctrs(struct op_msrs const * const msrs)
554{
555 unsigned int i;
556 unsigned int low, high;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700557 unsigned int stag;
558
559 stag = get_stagger();
560
561 rdmsr(MSR_IA32_MISC_ENABLE, low, high);
562 if (! MISC_PMC_ENABLED_P(low)) {
563 printk(KERN_ERR "oprofile: P4 PMC not available\n");
564 return;
565 }
566
567 /* clear the cccrs we will use */
568 for (i = 0 ; i < num_counters ; i++) {
Don Zickuscb9c4482006-09-26 10:52:26 +0200569 if (unlikely(!CTRL_IS_RESERVED(msrs,i)))
570 continue;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700571 rdmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high);
572 CCCR_CLEAR(low);
573 CCCR_SET_REQUIRED_BITS(low);
574 wrmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high);
575 }
576
Linus Torvalds1da177e2005-04-16 15:20:36 -0700577 /* clear all escrs (including those outside our concern) */
Don Zickuscb9c4482006-09-26 10:52:26 +0200578 for (i = num_counters; i < num_controls; i++) {
579 if (unlikely(!CTRL_IS_RESERVED(msrs,i)))
580 continue;
581 wrmsr(msrs->controls[i].addr, 0, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700582 }
583
Linus Torvalds1da177e2005-04-16 15:20:36 -0700584 /* setup all counters */
585 for (i = 0 ; i < num_counters ; ++i) {
Don Zickuscb9c4482006-09-26 10:52:26 +0200586 if ((counter_config[i].enabled) && (CTRL_IS_RESERVED(msrs,i))) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700587 reset_value[i] = counter_config[i].count;
588 pmc_setup_one_p4_counter(i);
589 CTR_WRITE(counter_config[i].count, VIRT_CTR(stag, i));
590 } else {
591 reset_value[i] = 0;
592 }
593 }
594}
595
596
597static int p4_check_ctrs(struct pt_regs * const regs,
598 struct op_msrs const * const msrs)
599{
600 unsigned long ctr, low, high, stag, real;
601 int i;
602
603 stag = get_stagger();
604
605 for (i = 0; i < num_counters; ++i) {
606
607 if (!reset_value[i])
608 continue;
609
610 /*
611 * there is some eccentricity in the hardware which
612 * requires that we perform 2 extra corrections:
613 *
614 * - check both the CCCR:OVF flag for overflow and the
615 * counter high bit for un-flagged overflows.
616 *
617 * - write the counter back twice to ensure it gets
618 * updated properly.
619 *
620 * the former seems to be related to extra NMIs happening
621 * during the current NMI; the latter is reported as errata
622 * N15 in intel doc 249199-029, pentium 4 specification
623 * update, though their suggested work-around does not
624 * appear to solve the problem.
625 */
626
627 real = VIRT_CTR(stag, i);
628
629 CCCR_READ(low, high, real);
630 CTR_READ(ctr, high, real);
631 if (CCCR_OVF_P(low) || CTR_OVERFLOW_P(ctr)) {
632 oprofile_add_sample(regs, i);
633 CTR_WRITE(reset_value[i], real);
634 CCCR_CLEAR_OVF(low);
635 CCCR_WRITE(low, high, real);
636 CTR_WRITE(reset_value[i], real);
637 }
638 }
639
640 /* P4 quirk: you have to re-unmask the apic vector */
641 apic_write(APIC_LVTPC, apic_read(APIC_LVTPC) & ~APIC_LVT_MASKED);
642
643 /* See op_model_ppro.c */
644 return 1;
645}
646
647
648static void p4_start(struct op_msrs const * const msrs)
649{
650 unsigned int low, high, stag;
651 int i;
652
653 stag = get_stagger();
654
655 for (i = 0; i < num_counters; ++i) {
656 if (!reset_value[i])
657 continue;
658 CCCR_READ(low, high, VIRT_CTR(stag, i));
659 CCCR_SET_ENABLE(low);
660 CCCR_WRITE(low, high, VIRT_CTR(stag, i));
661 }
662}
663
664
665static void p4_stop(struct op_msrs const * const msrs)
666{
667 unsigned int low, high, stag;
668 int i;
669
670 stag = get_stagger();
671
672 for (i = 0; i < num_counters; ++i) {
Don Zickuscb9c4482006-09-26 10:52:26 +0200673 if (!reset_value[i])
674 continue;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700675 CCCR_READ(low, high, VIRT_CTR(stag, i));
676 CCCR_SET_DISABLE(low);
677 CCCR_WRITE(low, high, VIRT_CTR(stag, i));
678 }
679}
680
Don Zickuscb9c4482006-09-26 10:52:26 +0200681static void p4_shutdown(struct op_msrs const * const msrs)
682{
683 int i;
684
685 for (i = 0 ; i < num_counters ; ++i) {
686 if (CTR_IS_RESERVED(msrs,i))
687 release_perfctr_nmi(msrs->counters[i].addr);
688 }
689 /* some of the control registers are specially reserved in
690 * conjunction with the counter registers (hence the starting offset).
691 * This saves a few bits.
692 */
693 for (i = num_counters ; i < num_controls ; ++i) {
694 if (CTRL_IS_RESERVED(msrs,i))
695 release_evntsel_nmi(msrs->controls[i].addr);
696 }
697}
698
Linus Torvalds1da177e2005-04-16 15:20:36 -0700699
700#ifdef CONFIG_SMP
701struct op_x86_model_spec const op_p4_ht2_spec = {
702 .num_counters = NUM_COUNTERS_HT2,
703 .num_controls = NUM_CONTROLS_HT2,
704 .fill_in_addresses = &p4_fill_in_addresses,
705 .setup_ctrs = &p4_setup_ctrs,
706 .check_ctrs = &p4_check_ctrs,
707 .start = &p4_start,
Don Zickuscb9c4482006-09-26 10:52:26 +0200708 .stop = &p4_stop,
709 .shutdown = &p4_shutdown
Linus Torvalds1da177e2005-04-16 15:20:36 -0700710};
711#endif
712
713struct op_x86_model_spec const op_p4_spec = {
714 .num_counters = NUM_COUNTERS_NON_HT,
715 .num_controls = NUM_CONTROLS_NON_HT,
716 .fill_in_addresses = &p4_fill_in_addresses,
717 .setup_ctrs = &p4_setup_ctrs,
718 .check_ctrs = &p4_check_ctrs,
719 .start = &p4_start,
Don Zickuscb9c4482006-09-26 10:52:26 +0200720 .stop = &p4_stop,
721 .shutdown = &p4_shutdown
Linus Torvalds1da177e2005-04-16 15:20:36 -0700722};