blob: e6a160a4684a4a9d96914acc161122d6c7f8c208 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/**
2 * @file op_model_p4.c
3 * P4 model-specific MSR operations
4 *
5 * @remark Copyright 2002 OProfile authors
6 * @remark Read the file COPYING
7 *
8 * @author Graydon Hoare
9 */
10
11#include <linux/oprofile.h>
12#include <linux/smp.h>
Paolo Ciarrocchi20211e42008-08-18 21:25:38 +020013#include <linux/ptrace.h>
14#include <linux/nmi.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070015#include <asm/msr.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070016#include <asm/fixmap.h>
17#include <asm/apic.h>
Paolo Ciarrocchi20211e42008-08-18 21:25:38 +020018
Linus Torvalds1da177e2005-04-16 15:20:36 -070019
20#include "op_x86_model.h"
21#include "op_counter.h"
22
23#define NUM_EVENTS 39
24
25#define NUM_COUNTERS_NON_HT 8
26#define NUM_ESCRS_NON_HT 45
27#define NUM_CCCRS_NON_HT 18
28#define NUM_CONTROLS_NON_HT (NUM_ESCRS_NON_HT + NUM_CCCRS_NON_HT)
29
30#define NUM_COUNTERS_HT2 4
31#define NUM_ESCRS_HT2 23
32#define NUM_CCCRS_HT2 9
33#define NUM_CONTROLS_HT2 (NUM_ESCRS_HT2 + NUM_CCCRS_HT2)
34
Robert Richter42399ad2009-05-25 17:59:06 +020035#define OP_CTR_OVERFLOW (1ULL<<31)
36
Linus Torvalds1da177e2005-04-16 15:20:36 -070037static unsigned int num_counters = NUM_COUNTERS_NON_HT;
Don Zickuscb9c4482006-09-26 10:52:26 +020038static unsigned int num_controls = NUM_CONTROLS_NON_HT;
Linus Torvalds1da177e2005-04-16 15:20:36 -070039
40/* this has to be checked dynamically since the
41 hyper-threadedness of a chip is discovered at
42 kernel boot-time. */
43static inline void setup_num_counters(void)
44{
45#ifdef CONFIG_SMP
Paolo Ciarrocchi20211e42008-08-18 21:25:38 +020046 if (smp_num_siblings == 2) {
Linus Torvalds1da177e2005-04-16 15:20:36 -070047 num_counters = NUM_COUNTERS_HT2;
Don Zickuscb9c4482006-09-26 10:52:26 +020048 num_controls = NUM_CONTROLS_HT2;
49 }
Linus Torvalds1da177e2005-04-16 15:20:36 -070050#endif
51}
52
53static int inline addr_increment(void)
54{
55#ifdef CONFIG_SMP
56 return smp_num_siblings == 2 ? 2 : 1;
57#else
58 return 1;
59#endif
60}
61
62
63/* tables to simulate simplified hardware view of p4 registers */
64struct p4_counter_binding {
65 int virt_counter;
66 int counter_address;
67 int cccr_address;
68};
69
70struct p4_event_binding {
71 int escr_select; /* value to put in CCCR */
72 int event_select; /* value to put in ESCR */
73 struct {
74 int virt_counter; /* for this counter... */
75 int escr_address; /* use this ESCR */
76 } bindings[2];
77};
78
79/* nb: these CTR_* defines are a duplicate of defines in
80 event/i386.p4*events. */
81
82
83#define CTR_BPU_0 (1 << 0)
84#define CTR_MS_0 (1 << 1)
85#define CTR_FLAME_0 (1 << 2)
86#define CTR_IQ_4 (1 << 3)
87#define CTR_BPU_2 (1 << 4)
88#define CTR_MS_2 (1 << 5)
89#define CTR_FLAME_2 (1 << 6)
90#define CTR_IQ_5 (1 << 7)
91
Paolo Ciarrocchi20211e42008-08-18 21:25:38 +020092static struct p4_counter_binding p4_counters[NUM_COUNTERS_NON_HT] = {
Linus Torvalds1da177e2005-04-16 15:20:36 -070093 { CTR_BPU_0, MSR_P4_BPU_PERFCTR0, MSR_P4_BPU_CCCR0 },
94 { CTR_MS_0, MSR_P4_MS_PERFCTR0, MSR_P4_MS_CCCR0 },
95 { CTR_FLAME_0, MSR_P4_FLAME_PERFCTR0, MSR_P4_FLAME_CCCR0 },
96 { CTR_IQ_4, MSR_P4_IQ_PERFCTR4, MSR_P4_IQ_CCCR4 },
97 { CTR_BPU_2, MSR_P4_BPU_PERFCTR2, MSR_P4_BPU_CCCR2 },
98 { CTR_MS_2, MSR_P4_MS_PERFCTR2, MSR_P4_MS_CCCR2 },
99 { CTR_FLAME_2, MSR_P4_FLAME_PERFCTR2, MSR_P4_FLAME_CCCR2 },
100 { CTR_IQ_5, MSR_P4_IQ_PERFCTR5, MSR_P4_IQ_CCCR5 }
101};
102
Paolo Ciarrocchi20211e42008-08-18 21:25:38 +0200103#define NUM_UNUSED_CCCRS (NUM_CCCRS_NON_HT - NUM_COUNTERS_NON_HT)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700104
Linus Torvalds1da177e2005-04-16 15:20:36 -0700105/* p4 event codes in libop/op_event.h are indices into this table. */
106
107static struct p4_event_binding p4_events[NUM_EVENTS] = {
Paolo Ciarrocchi20211e42008-08-18 21:25:38 +0200108
Linus Torvalds1da177e2005-04-16 15:20:36 -0700109 { /* BRANCH_RETIRED */
Paolo Ciarrocchi20211e42008-08-18 21:25:38 +0200110 0x05, 0x06,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700111 { {CTR_IQ_4, MSR_P4_CRU_ESCR2},
112 {CTR_IQ_5, MSR_P4_CRU_ESCR3} }
113 },
Paolo Ciarrocchi20211e42008-08-18 21:25:38 +0200114
Linus Torvalds1da177e2005-04-16 15:20:36 -0700115 { /* MISPRED_BRANCH_RETIRED */
Paolo Ciarrocchi20211e42008-08-18 21:25:38 +0200116 0x04, 0x03,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700117 { { CTR_IQ_4, MSR_P4_CRU_ESCR0},
118 { CTR_IQ_5, MSR_P4_CRU_ESCR1} }
119 },
Paolo Ciarrocchi20211e42008-08-18 21:25:38 +0200120
Linus Torvalds1da177e2005-04-16 15:20:36 -0700121 { /* TC_DELIVER_MODE */
122 0x01, 0x01,
Paolo Ciarrocchi20211e42008-08-18 21:25:38 +0200123 { { CTR_MS_0, MSR_P4_TC_ESCR0},
Linus Torvalds1da177e2005-04-16 15:20:36 -0700124 { CTR_MS_2, MSR_P4_TC_ESCR1} }
125 },
Paolo Ciarrocchi20211e42008-08-18 21:25:38 +0200126
Linus Torvalds1da177e2005-04-16 15:20:36 -0700127 { /* BPU_FETCH_REQUEST */
Paolo Ciarrocchi20211e42008-08-18 21:25:38 +0200128 0x00, 0x03,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700129 { { CTR_BPU_0, MSR_P4_BPU_ESCR0},
130 { CTR_BPU_2, MSR_P4_BPU_ESCR1} }
131 },
132
133 { /* ITLB_REFERENCE */
134 0x03, 0x18,
135 { { CTR_BPU_0, MSR_P4_ITLB_ESCR0},
136 { CTR_BPU_2, MSR_P4_ITLB_ESCR1} }
137 },
138
139 { /* MEMORY_CANCEL */
140 0x05, 0x02,
141 { { CTR_FLAME_0, MSR_P4_DAC_ESCR0},
142 { CTR_FLAME_2, MSR_P4_DAC_ESCR1} }
143 },
144
145 { /* MEMORY_COMPLETE */
146 0x02, 0x08,
147 { { CTR_FLAME_0, MSR_P4_SAAT_ESCR0},
148 { CTR_FLAME_2, MSR_P4_SAAT_ESCR1} }
149 },
150
151 { /* LOAD_PORT_REPLAY */
Paolo Ciarrocchi20211e42008-08-18 21:25:38 +0200152 0x02, 0x04,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700153 { { CTR_FLAME_0, MSR_P4_SAAT_ESCR0},
154 { CTR_FLAME_2, MSR_P4_SAAT_ESCR1} }
155 },
156
157 { /* STORE_PORT_REPLAY */
158 0x02, 0x05,
159 { { CTR_FLAME_0, MSR_P4_SAAT_ESCR0},
160 { CTR_FLAME_2, MSR_P4_SAAT_ESCR1} }
161 },
162
163 { /* MOB_LOAD_REPLAY */
164 0x02, 0x03,
165 { { CTR_BPU_0, MSR_P4_MOB_ESCR0},
166 { CTR_BPU_2, MSR_P4_MOB_ESCR1} }
167 },
168
169 { /* PAGE_WALK_TYPE */
170 0x04, 0x01,
171 { { CTR_BPU_0, MSR_P4_PMH_ESCR0},
172 { CTR_BPU_2, MSR_P4_PMH_ESCR1} }
173 },
174
175 { /* BSQ_CACHE_REFERENCE */
Paolo Ciarrocchi20211e42008-08-18 21:25:38 +0200176 0x07, 0x0c,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700177 { { CTR_BPU_0, MSR_P4_BSU_ESCR0},
178 { CTR_BPU_2, MSR_P4_BSU_ESCR1} }
179 },
180
181 { /* IOQ_ALLOCATION */
Paolo Ciarrocchi20211e42008-08-18 21:25:38 +0200182 0x06, 0x03,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700183 { { CTR_BPU_0, MSR_P4_FSB_ESCR0},
184 { 0, 0 } }
185 },
186
187 { /* IOQ_ACTIVE_ENTRIES */
Paolo Ciarrocchi20211e42008-08-18 21:25:38 +0200188 0x06, 0x1a,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700189 { { CTR_BPU_2, MSR_P4_FSB_ESCR1},
190 { 0, 0 } }
191 },
192
193 { /* FSB_DATA_ACTIVITY */
Paolo Ciarrocchi20211e42008-08-18 21:25:38 +0200194 0x06, 0x17,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700195 { { CTR_BPU_0, MSR_P4_FSB_ESCR0},
196 { CTR_BPU_2, MSR_P4_FSB_ESCR1} }
197 },
198
199 { /* BSQ_ALLOCATION */
Paolo Ciarrocchi20211e42008-08-18 21:25:38 +0200200 0x07, 0x05,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700201 { { CTR_BPU_0, MSR_P4_BSU_ESCR0},
202 { 0, 0 } }
203 },
204
205 { /* BSQ_ACTIVE_ENTRIES */
206 0x07, 0x06,
Paolo Ciarrocchi20211e42008-08-18 21:25:38 +0200207 { { CTR_BPU_2, MSR_P4_BSU_ESCR1 /* guess */},
Linus Torvalds1da177e2005-04-16 15:20:36 -0700208 { 0, 0 } }
209 },
210
211 { /* X87_ASSIST */
Paolo Ciarrocchi20211e42008-08-18 21:25:38 +0200212 0x05, 0x03,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700213 { { CTR_IQ_4, MSR_P4_CRU_ESCR2},
214 { CTR_IQ_5, MSR_P4_CRU_ESCR3} }
215 },
216
217 { /* SSE_INPUT_ASSIST */
218 0x01, 0x34,
219 { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
220 { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
221 },
Paolo Ciarrocchi20211e42008-08-18 21:25:38 +0200222
Linus Torvalds1da177e2005-04-16 15:20:36 -0700223 { /* PACKED_SP_UOP */
Paolo Ciarrocchi20211e42008-08-18 21:25:38 +0200224 0x01, 0x08,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700225 { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
226 { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
227 },
Paolo Ciarrocchi20211e42008-08-18 21:25:38 +0200228
Linus Torvalds1da177e2005-04-16 15:20:36 -0700229 { /* PACKED_DP_UOP */
Paolo Ciarrocchi20211e42008-08-18 21:25:38 +0200230 0x01, 0x0c,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700231 { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
232 { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
233 },
234
235 { /* SCALAR_SP_UOP */
Paolo Ciarrocchi20211e42008-08-18 21:25:38 +0200236 0x01, 0x0a,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700237 { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
238 { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
239 },
240
241 { /* SCALAR_DP_UOP */
242 0x01, 0x0e,
243 { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
244 { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
245 },
246
247 { /* 64BIT_MMX_UOP */
Paolo Ciarrocchi20211e42008-08-18 21:25:38 +0200248 0x01, 0x02,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700249 { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
250 { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
251 },
Paolo Ciarrocchi20211e42008-08-18 21:25:38 +0200252
Linus Torvalds1da177e2005-04-16 15:20:36 -0700253 { /* 128BIT_MMX_UOP */
Paolo Ciarrocchi20211e42008-08-18 21:25:38 +0200254 0x01, 0x1a,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700255 { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
256 { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
257 },
258
259 { /* X87_FP_UOP */
Paolo Ciarrocchi20211e42008-08-18 21:25:38 +0200260 0x01, 0x04,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700261 { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
262 { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
263 },
Paolo Ciarrocchi20211e42008-08-18 21:25:38 +0200264
Linus Torvalds1da177e2005-04-16 15:20:36 -0700265 { /* X87_SIMD_MOVES_UOP */
Paolo Ciarrocchi20211e42008-08-18 21:25:38 +0200266 0x01, 0x2e,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700267 { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
268 { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
269 },
Paolo Ciarrocchi20211e42008-08-18 21:25:38 +0200270
Linus Torvalds1da177e2005-04-16 15:20:36 -0700271 { /* MACHINE_CLEAR */
Paolo Ciarrocchi20211e42008-08-18 21:25:38 +0200272 0x05, 0x02,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700273 { { CTR_IQ_4, MSR_P4_CRU_ESCR2},
274 { CTR_IQ_5, MSR_P4_CRU_ESCR3} }
275 },
276
277 { /* GLOBAL_POWER_EVENTS */
278 0x06, 0x13 /* older manual says 0x05, newer 0x13 */,
279 { { CTR_BPU_0, MSR_P4_FSB_ESCR0},
280 { CTR_BPU_2, MSR_P4_FSB_ESCR1} }
281 },
Paolo Ciarrocchi20211e42008-08-18 21:25:38 +0200282
Linus Torvalds1da177e2005-04-16 15:20:36 -0700283 { /* TC_MS_XFER */
Paolo Ciarrocchi20211e42008-08-18 21:25:38 +0200284 0x00, 0x05,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700285 { { CTR_MS_0, MSR_P4_MS_ESCR0},
286 { CTR_MS_2, MSR_P4_MS_ESCR1} }
287 },
288
289 { /* UOP_QUEUE_WRITES */
290 0x00, 0x09,
291 { { CTR_MS_0, MSR_P4_MS_ESCR0},
292 { CTR_MS_2, MSR_P4_MS_ESCR1} }
293 },
294
295 { /* FRONT_END_EVENT */
296 0x05, 0x08,
297 { { CTR_IQ_4, MSR_P4_CRU_ESCR2},
298 { CTR_IQ_5, MSR_P4_CRU_ESCR3} }
299 },
300
301 { /* EXECUTION_EVENT */
302 0x05, 0x0c,
303 { { CTR_IQ_4, MSR_P4_CRU_ESCR2},
304 { CTR_IQ_5, MSR_P4_CRU_ESCR3} }
305 },
306
307 { /* REPLAY_EVENT */
308 0x05, 0x09,
309 { { CTR_IQ_4, MSR_P4_CRU_ESCR2},
310 { CTR_IQ_5, MSR_P4_CRU_ESCR3} }
311 },
312
313 { /* INSTR_RETIRED */
Paolo Ciarrocchi20211e42008-08-18 21:25:38 +0200314 0x04, 0x02,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700315 { { CTR_IQ_4, MSR_P4_CRU_ESCR0},
316 { CTR_IQ_5, MSR_P4_CRU_ESCR1} }
317 },
318
319 { /* UOPS_RETIRED */
320 0x04, 0x01,
321 { { CTR_IQ_4, MSR_P4_CRU_ESCR0},
322 { CTR_IQ_5, MSR_P4_CRU_ESCR1} }
323 },
324
Paolo Ciarrocchi20211e42008-08-18 21:25:38 +0200325 { /* UOP_TYPE */
326 0x02, 0x02,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700327 { { CTR_IQ_4, MSR_P4_RAT_ESCR0},
328 { CTR_IQ_5, MSR_P4_RAT_ESCR1} }
329 },
330
331 { /* RETIRED_MISPRED_BRANCH_TYPE */
Paolo Ciarrocchi20211e42008-08-18 21:25:38 +0200332 0x02, 0x05,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700333 { { CTR_MS_0, MSR_P4_TBPU_ESCR0},
334 { CTR_MS_2, MSR_P4_TBPU_ESCR1} }
335 },
336
337 { /* RETIRED_BRANCH_TYPE */
338 0x02, 0x04,
339 { { CTR_MS_0, MSR_P4_TBPU_ESCR0},
340 { CTR_MS_2, MSR_P4_TBPU_ESCR1} }
341 }
342};
343
344
345#define MISC_PMC_ENABLED_P(x) ((x) & 1 << 7)
346
347#define ESCR_RESERVED_BITS 0x80000003
348#define ESCR_CLEAR(escr) ((escr) &= ESCR_RESERVED_BITS)
349#define ESCR_SET_USR_0(escr, usr) ((escr) |= (((usr) & 1) << 2))
350#define ESCR_SET_OS_0(escr, os) ((escr) |= (((os) & 1) << 3))
351#define ESCR_SET_USR_1(escr, usr) ((escr) |= (((usr) & 1)))
352#define ESCR_SET_OS_1(escr, os) ((escr) |= (((os) & 1) << 1))
353#define ESCR_SET_EVENT_SELECT(escr, sel) ((escr) |= (((sel) & 0x3f) << 25))
354#define ESCR_SET_EVENT_MASK(escr, mask) ((escr) |= (((mask) & 0xffff) << 9))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700355
356#define CCCR_RESERVED_BITS 0x38030FFF
357#define CCCR_CLEAR(cccr) ((cccr) &= CCCR_RESERVED_BITS)
358#define CCCR_SET_REQUIRED_BITS(cccr) ((cccr) |= 0x00030000)
359#define CCCR_SET_ESCR_SELECT(cccr, sel) ((cccr) |= (((sel) & 0x07) << 13))
360#define CCCR_SET_PMI_OVF_0(cccr) ((cccr) |= (1<<26))
361#define CCCR_SET_PMI_OVF_1(cccr) ((cccr) |= (1<<27))
362#define CCCR_SET_ENABLE(cccr) ((cccr) |= (1<<12))
363#define CCCR_SET_DISABLE(cccr) ((cccr) &= ~(1<<12))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700364#define CCCR_OVF_P(cccr) ((cccr) & (1U<<31))
365#define CCCR_CLEAR_OVF(cccr) ((cccr) &= (~(1U<<31)))
366
Linus Torvalds1da177e2005-04-16 15:20:36 -0700367
368/* this assigns a "stagger" to the current CPU, which is used throughout
369 the code in this module as an extra array offset, to select the "even"
370 or "odd" part of all the divided resources. */
371static unsigned int get_stagger(void)
372{
373#ifdef CONFIG_SMP
374 int cpu = smp_processor_id();
Rusty Russell7ad728f2009-03-13 14:49:50 +1030375 return cpu != cpumask_first(__get_cpu_var(cpu_sibling_map));
Paolo Ciarrocchi20211e42008-08-18 21:25:38 +0200376#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -0700377 return 0;
378}
379
380
381/* finally, mediate access to a real hardware counter
382 by passing a "virtual" counter numer to this macro,
383 along with your stagger setting. */
384#define VIRT_CTR(stagger, i) ((i) + ((num_counters) * (stagger)))
385
386static unsigned long reset_value[NUM_COUNTERS_NON_HT];
387
388
389static void p4_fill_in_addresses(struct op_msrs * const msrs)
390{
Paolo Ciarrocchi20211e42008-08-18 21:25:38 +0200391 unsigned int i;
Don Zickuscb9c4482006-09-26 10:52:26 +0200392 unsigned int addr, cccraddr, stag;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700393
394 setup_num_counters();
395 stag = get_stagger();
396
Don Zickuscb9c4482006-09-26 10:52:26 +0200397 /* the counter & cccr registers we pay attention to */
398 for (i = 0; i < num_counters; ++i) {
399 addr = p4_counters[VIRT_CTR(stag, i)].counter_address;
400 cccraddr = p4_counters[VIRT_CTR(stag, i)].cccr_address;
Paolo Ciarrocchi20211e42008-08-18 21:25:38 +0200401 if (reserve_perfctr_nmi(addr)) {
Don Zickuscb9c4482006-09-26 10:52:26 +0200402 msrs->counters[i].addr = addr;
403 msrs->controls[i].addr = cccraddr;
404 }
405 }
406
Linus Torvalds1da177e2005-04-16 15:20:36 -0700407 /* 43 ESCR registers in three or four discontiguous group */
408 for (addr = MSR_P4_BSU_ESCR0 + stag;
409 addr < MSR_P4_IQ_ESCR0; ++i, addr += addr_increment()) {
Don Zickuscb9c4482006-09-26 10:52:26 +0200410 if (reserve_evntsel_nmi(addr))
411 msrs->controls[i].addr = addr;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700412 }
413
414 /* no IQ_ESCR0/1 on some models, we save a seconde time BSU_ESCR0/1
415 * to avoid special case in nmi_{save|restore}_registers() */
416 if (boot_cpu_data.x86_model >= 0x3) {
417 for (addr = MSR_P4_BSU_ESCR0 + stag;
418 addr <= MSR_P4_BSU_ESCR1; ++i, addr += addr_increment()) {
Don Zickuscb9c4482006-09-26 10:52:26 +0200419 if (reserve_evntsel_nmi(addr))
420 msrs->controls[i].addr = addr;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700421 }
422 } else {
423 for (addr = MSR_P4_IQ_ESCR0 + stag;
424 addr <= MSR_P4_IQ_ESCR1; ++i, addr += addr_increment()) {
Don Zickuscb9c4482006-09-26 10:52:26 +0200425 if (reserve_evntsel_nmi(addr))
426 msrs->controls[i].addr = addr;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700427 }
428 }
429
430 for (addr = MSR_P4_RAT_ESCR0 + stag;
431 addr <= MSR_P4_SSU_ESCR0; ++i, addr += addr_increment()) {
Don Zickuscb9c4482006-09-26 10:52:26 +0200432 if (reserve_evntsel_nmi(addr))
433 msrs->controls[i].addr = addr;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700434 }
Paolo Ciarrocchi20211e42008-08-18 21:25:38 +0200435
Linus Torvalds1da177e2005-04-16 15:20:36 -0700436 for (addr = MSR_P4_MS_ESCR0 + stag;
Paolo Ciarrocchi20211e42008-08-18 21:25:38 +0200437 addr <= MSR_P4_TC_ESCR1; ++i, addr += addr_increment()) {
Don Zickuscb9c4482006-09-26 10:52:26 +0200438 if (reserve_evntsel_nmi(addr))
439 msrs->controls[i].addr = addr;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700440 }
Paolo Ciarrocchi20211e42008-08-18 21:25:38 +0200441
Linus Torvalds1da177e2005-04-16 15:20:36 -0700442 for (addr = MSR_P4_IX_ESCR0 + stag;
Paolo Ciarrocchi20211e42008-08-18 21:25:38 +0200443 addr <= MSR_P4_CRU_ESCR3; ++i, addr += addr_increment()) {
Don Zickuscb9c4482006-09-26 10:52:26 +0200444 if (reserve_evntsel_nmi(addr))
445 msrs->controls[i].addr = addr;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700446 }
447
448 /* there are 2 remaining non-contiguously located ESCRs */
449
Paolo Ciarrocchi20211e42008-08-18 21:25:38 +0200450 if (num_counters == NUM_COUNTERS_NON_HT) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700451 /* standard non-HT CPUs handle both remaining ESCRs*/
Don Zickuscb9c4482006-09-26 10:52:26 +0200452 if (reserve_evntsel_nmi(MSR_P4_CRU_ESCR5))
453 msrs->controls[i++].addr = MSR_P4_CRU_ESCR5;
454 if (reserve_evntsel_nmi(MSR_P4_CRU_ESCR4))
455 msrs->controls[i++].addr = MSR_P4_CRU_ESCR4;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700456
457 } else if (stag == 0) {
458 /* HT CPUs give the first remainder to the even thread, as
459 the 32nd control register */
Don Zickuscb9c4482006-09-26 10:52:26 +0200460 if (reserve_evntsel_nmi(MSR_P4_CRU_ESCR4))
461 msrs->controls[i++].addr = MSR_P4_CRU_ESCR4;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700462
463 } else {
464 /* and two copies of the second to the odd thread,
465 for the 22st and 23nd control registers */
Don Zickuscb9c4482006-09-26 10:52:26 +0200466 if (reserve_evntsel_nmi(MSR_P4_CRU_ESCR5)) {
467 msrs->controls[i++].addr = MSR_P4_CRU_ESCR5;
468 msrs->controls[i++].addr = MSR_P4_CRU_ESCR5;
469 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700470 }
471}
472
473
474static void pmc_setup_one_p4_counter(unsigned int ctr)
475{
476 int i;
477 int const maxbind = 2;
478 unsigned int cccr = 0;
479 unsigned int escr = 0;
480 unsigned int high = 0;
481 unsigned int counter_bit;
482 struct p4_event_binding *ev = NULL;
483 unsigned int stag;
484
485 stag = get_stagger();
Paolo Ciarrocchi20211e42008-08-18 21:25:38 +0200486
Linus Torvalds1da177e2005-04-16 15:20:36 -0700487 /* convert from counter *number* to counter *bit* */
488 counter_bit = 1 << VIRT_CTR(stag, ctr);
Paolo Ciarrocchi20211e42008-08-18 21:25:38 +0200489
Linus Torvalds1da177e2005-04-16 15:20:36 -0700490 /* find our event binding structure. */
491 if (counter_config[ctr].event <= 0 || counter_config[ctr].event > NUM_EVENTS) {
Paolo Ciarrocchi20211e42008-08-18 21:25:38 +0200492 printk(KERN_ERR
493 "oprofile: P4 event code 0x%lx out of range\n",
Linus Torvalds1da177e2005-04-16 15:20:36 -0700494 counter_config[ctr].event);
495 return;
496 }
Paolo Ciarrocchi20211e42008-08-18 21:25:38 +0200497
Linus Torvalds1da177e2005-04-16 15:20:36 -0700498 ev = &(p4_events[counter_config[ctr].event - 1]);
Paolo Ciarrocchi20211e42008-08-18 21:25:38 +0200499
Linus Torvalds1da177e2005-04-16 15:20:36 -0700500 for (i = 0; i < maxbind; i++) {
501 if (ev->bindings[i].virt_counter & counter_bit) {
502
503 /* modify ESCR */
Robert Richter1131a472009-05-25 20:23:23 +0200504 rdmsr(ev->bindings[i].escr_address, escr, high);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700505 ESCR_CLEAR(escr);
506 if (stag == 0) {
507 ESCR_SET_USR_0(escr, counter_config[ctr].user);
508 ESCR_SET_OS_0(escr, counter_config[ctr].kernel);
509 } else {
510 ESCR_SET_USR_1(escr, counter_config[ctr].user);
511 ESCR_SET_OS_1(escr, counter_config[ctr].kernel);
512 }
513 ESCR_SET_EVENT_SELECT(escr, ev->event_select);
Paolo Ciarrocchi20211e42008-08-18 21:25:38 +0200514 ESCR_SET_EVENT_MASK(escr, counter_config[ctr].unit_mask);
Robert Richter1131a472009-05-25 20:23:23 +0200515 wrmsr(ev->bindings[i].escr_address, escr, high);
Paolo Ciarrocchi20211e42008-08-18 21:25:38 +0200516
Linus Torvalds1da177e2005-04-16 15:20:36 -0700517 /* modify CCCR */
Robert Richter1131a472009-05-25 20:23:23 +0200518 rdmsr(p4_counters[VIRT_CTR(stag, ctr)].cccr_address,
519 cccr, high);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700520 CCCR_CLEAR(cccr);
521 CCCR_SET_REQUIRED_BITS(cccr);
522 CCCR_SET_ESCR_SELECT(cccr, ev->escr_select);
Paolo Ciarrocchi20211e42008-08-18 21:25:38 +0200523 if (stag == 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700524 CCCR_SET_PMI_OVF_0(cccr);
Paolo Ciarrocchi20211e42008-08-18 21:25:38 +0200525 else
Linus Torvalds1da177e2005-04-16 15:20:36 -0700526 CCCR_SET_PMI_OVF_1(cccr);
Robert Richter1131a472009-05-25 20:23:23 +0200527 wrmsr(p4_counters[VIRT_CTR(stag, ctr)].cccr_address,
528 cccr, high);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700529 return;
530 }
531 }
532
Paolo Ciarrocchi20211e42008-08-18 21:25:38 +0200533 printk(KERN_ERR
Linus Torvalds1da177e2005-04-16 15:20:36 -0700534 "oprofile: P4 event code 0x%lx no binding, stag %d ctr %d\n",
535 counter_config[ctr].event, stag, ctr);
536}
537
538
Robert Richteref8828d2009-05-25 19:31:44 +0200539static void p4_setup_ctrs(struct op_x86_model_spec const *model,
540 struct op_msrs const * const msrs)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700541{
542 unsigned int i;
543 unsigned int low, high;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700544 unsigned int stag;
545
546 stag = get_stagger();
547
548 rdmsr(MSR_IA32_MISC_ENABLE, low, high);
Paolo Ciarrocchi20211e42008-08-18 21:25:38 +0200549 if (!MISC_PMC_ENABLED_P(low)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700550 printk(KERN_ERR "oprofile: P4 PMC not available\n");
551 return;
552 }
553
554 /* clear the cccrs we will use */
Robert Richter6e63ea42009-07-07 19:25:39 +0200555 for (i = 0; i < num_counters; i++) {
Robert Richter217d3cf2009-06-04 02:36:44 +0200556 if (unlikely(!msrs->controls[i].addr))
Don Zickuscb9c4482006-09-26 10:52:26 +0200557 continue;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700558 rdmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high);
559 CCCR_CLEAR(low);
560 CCCR_SET_REQUIRED_BITS(low);
561 wrmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high);
562 }
563
Linus Torvalds1da177e2005-04-16 15:20:36 -0700564 /* clear all escrs (including those outside our concern) */
Don Zickuscb9c4482006-09-26 10:52:26 +0200565 for (i = num_counters; i < num_controls; i++) {
Robert Richter217d3cf2009-06-04 02:36:44 +0200566 if (unlikely(!msrs->controls[i].addr))
Don Zickuscb9c4482006-09-26 10:52:26 +0200567 continue;
568 wrmsr(msrs->controls[i].addr, 0, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700569 }
570
Linus Torvalds1da177e2005-04-16 15:20:36 -0700571 /* setup all counters */
Robert Richter6e63ea42009-07-07 19:25:39 +0200572 for (i = 0; i < num_counters; ++i) {
Robert Richter217d3cf2009-06-04 02:36:44 +0200573 if (counter_config[i].enabled && msrs->controls[i].addr) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700574 reset_value[i] = counter_config[i].count;
575 pmc_setup_one_p4_counter(i);
Robert Richterbbc59862009-05-25 17:38:19 +0200576 wrmsrl(p4_counters[VIRT_CTR(stag, i)].counter_address,
Robert Richter8045a4c2009-07-07 19:30:25 +0200577 -(u64)counter_config[i].count);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700578 } else {
579 reset_value[i] = 0;
580 }
581 }
582}
583
584
585static int p4_check_ctrs(struct pt_regs * const regs,
586 struct op_msrs const * const msrs)
587{
588 unsigned long ctr, low, high, stag, real;
589 int i;
590
591 stag = get_stagger();
592
593 for (i = 0; i < num_counters; ++i) {
Paolo Ciarrocchi20211e42008-08-18 21:25:38 +0200594
595 if (!reset_value[i])
Linus Torvalds1da177e2005-04-16 15:20:36 -0700596 continue;
597
Paolo Ciarrocchi20211e42008-08-18 21:25:38 +0200598 /*
Linus Torvalds1da177e2005-04-16 15:20:36 -0700599 * there is some eccentricity in the hardware which
600 * requires that we perform 2 extra corrections:
601 *
602 * - check both the CCCR:OVF flag for overflow and the
603 * counter high bit for un-flagged overflows.
604 *
605 * - write the counter back twice to ensure it gets
606 * updated properly.
Paolo Ciarrocchi20211e42008-08-18 21:25:38 +0200607 *
Linus Torvalds1da177e2005-04-16 15:20:36 -0700608 * the former seems to be related to extra NMIs happening
609 * during the current NMI; the latter is reported as errata
610 * N15 in intel doc 249199-029, pentium 4 specification
611 * update, though their suggested work-around does not
612 * appear to solve the problem.
613 */
Paolo Ciarrocchi20211e42008-08-18 21:25:38 +0200614
Linus Torvalds1da177e2005-04-16 15:20:36 -0700615 real = VIRT_CTR(stag, i);
616
Robert Richter1131a472009-05-25 20:23:23 +0200617 rdmsr(p4_counters[real].cccr_address, low, high);
618 rdmsr(p4_counters[real].counter_address, ctr, high);
Robert Richter42399ad2009-05-25 17:59:06 +0200619 if (CCCR_OVF_P(low) || !(ctr & OP_CTR_OVERFLOW)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700620 oprofile_add_sample(regs, i);
Robert Richterbbc59862009-05-25 17:38:19 +0200621 wrmsrl(p4_counters[real].counter_address,
Robert Richter8045a4c2009-07-07 19:30:25 +0200622 -(u64)reset_value[i]);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700623 CCCR_CLEAR_OVF(low);
Robert Richter1131a472009-05-25 20:23:23 +0200624 wrmsr(p4_counters[real].cccr_address, low, high);
Robert Richterbbc59862009-05-25 17:38:19 +0200625 wrmsrl(p4_counters[real].counter_address,
Robert Richter8045a4c2009-07-07 19:30:25 +0200626 -(u64)reset_value[i]);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700627 }
628 }
629
630 /* P4 quirk: you have to re-unmask the apic vector */
631 apic_write(APIC_LVTPC, apic_read(APIC_LVTPC) & ~APIC_LVT_MASKED);
632
633 /* See op_model_ppro.c */
634 return 1;
635}
636
637
638static void p4_start(struct op_msrs const * const msrs)
639{
640 unsigned int low, high, stag;
641 int i;
642
643 stag = get_stagger();
644
645 for (i = 0; i < num_counters; ++i) {
646 if (!reset_value[i])
647 continue;
Robert Richter1131a472009-05-25 20:23:23 +0200648 rdmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700649 CCCR_SET_ENABLE(low);
Robert Richter1131a472009-05-25 20:23:23 +0200650 wrmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700651 }
652}
653
654
655static void p4_stop(struct op_msrs const * const msrs)
656{
657 unsigned int low, high, stag;
658 int i;
659
660 stag = get_stagger();
661
662 for (i = 0; i < num_counters; ++i) {
Don Zickuscb9c4482006-09-26 10:52:26 +0200663 if (!reset_value[i])
664 continue;
Robert Richter1131a472009-05-25 20:23:23 +0200665 rdmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700666 CCCR_SET_DISABLE(low);
Robert Richter1131a472009-05-25 20:23:23 +0200667 wrmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700668 }
669}
670
Don Zickuscb9c4482006-09-26 10:52:26 +0200671static void p4_shutdown(struct op_msrs const * const msrs)
672{
673 int i;
674
Robert Richter6e63ea42009-07-07 19:25:39 +0200675 for (i = 0; i < num_counters; ++i) {
Robert Richter217d3cf2009-06-04 02:36:44 +0200676 if (msrs->counters[i].addr)
Don Zickuscb9c4482006-09-26 10:52:26 +0200677 release_perfctr_nmi(msrs->counters[i].addr);
678 }
Paolo Ciarrocchi20211e42008-08-18 21:25:38 +0200679 /*
680 * some of the control registers are specially reserved in
Don Zickuscb9c4482006-09-26 10:52:26 +0200681 * conjunction with the counter registers (hence the starting offset).
682 * This saves a few bits.
683 */
Robert Richter6e63ea42009-07-07 19:25:39 +0200684 for (i = num_counters; i < num_controls; ++i) {
Robert Richter217d3cf2009-06-04 02:36:44 +0200685 if (msrs->controls[i].addr)
Don Zickuscb9c4482006-09-26 10:52:26 +0200686 release_evntsel_nmi(msrs->controls[i].addr);
687 }
688}
689
Linus Torvalds1da177e2005-04-16 15:20:36 -0700690
691#ifdef CONFIG_SMP
Robert Richter259a83a2009-07-09 15:12:35 +0200692struct op_x86_model_spec op_p4_ht2_spec = {
Robert Richterc92960f2008-09-05 17:12:36 +0200693 .num_counters = NUM_COUNTERS_HT2,
694 .num_controls = NUM_CONTROLS_HT2,
695 .fill_in_addresses = &p4_fill_in_addresses,
696 .setup_ctrs = &p4_setup_ctrs,
697 .check_ctrs = &p4_check_ctrs,
698 .start = &p4_start,
699 .stop = &p4_stop,
700 .shutdown = &p4_shutdown
Linus Torvalds1da177e2005-04-16 15:20:36 -0700701};
702#endif
703
Robert Richter259a83a2009-07-09 15:12:35 +0200704struct op_x86_model_spec op_p4_spec = {
Robert Richterc92960f2008-09-05 17:12:36 +0200705 .num_counters = NUM_COUNTERS_NON_HT,
706 .num_controls = NUM_CONTROLS_NON_HT,
707 .fill_in_addresses = &p4_fill_in_addresses,
708 .setup_ctrs = &p4_setup_ctrs,
709 .check_ctrs = &p4_check_ctrs,
710 .start = &p4_start,
711 .stop = &p4_stop,
712 .shutdown = &p4_shutdown
Linus Torvalds1da177e2005-04-16 15:20:36 -0700713};