blob: 365d8a9c03d35668c755f112e4398d7769627da6 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/**
2 * @file op_model_p4.c
3 * P4 model-specific MSR operations
4 *
5 * @remark Copyright 2002 OProfile authors
6 * @remark Read the file COPYING
7 *
8 * @author Graydon Hoare
9 */
10
11#include <linux/oprofile.h>
12#include <linux/smp.h>
Paolo Ciarrocchi20211e42008-08-18 21:25:38 +020013#include <linux/ptrace.h>
14#include <linux/nmi.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070015#include <asm/msr.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070016#include <asm/fixmap.h>
17#include <asm/apic.h>
Paolo Ciarrocchi20211e42008-08-18 21:25:38 +020018
Linus Torvalds1da177e2005-04-16 15:20:36 -070019
20#include "op_x86_model.h"
21#include "op_counter.h"
22
23#define NUM_EVENTS 39
24
25#define NUM_COUNTERS_NON_HT 8
26#define NUM_ESCRS_NON_HT 45
27#define NUM_CCCRS_NON_HT 18
28#define NUM_CONTROLS_NON_HT (NUM_ESCRS_NON_HT + NUM_CCCRS_NON_HT)
29
30#define NUM_COUNTERS_HT2 4
31#define NUM_ESCRS_HT2 23
32#define NUM_CCCRS_HT2 9
33#define NUM_CONTROLS_HT2 (NUM_ESCRS_HT2 + NUM_CCCRS_HT2)
34
35static unsigned int num_counters = NUM_COUNTERS_NON_HT;
Don Zickuscb9c4482006-09-26 10:52:26 +020036static unsigned int num_controls = NUM_CONTROLS_NON_HT;
Linus Torvalds1da177e2005-04-16 15:20:36 -070037
38/* this has to be checked dynamically since the
39 hyper-threadedness of a chip is discovered at
40 kernel boot-time. */
41static inline void setup_num_counters(void)
42{
43#ifdef CONFIG_SMP
Paolo Ciarrocchi20211e42008-08-18 21:25:38 +020044 if (smp_num_siblings == 2) {
Linus Torvalds1da177e2005-04-16 15:20:36 -070045 num_counters = NUM_COUNTERS_HT2;
Don Zickuscb9c4482006-09-26 10:52:26 +020046 num_controls = NUM_CONTROLS_HT2;
47 }
Linus Torvalds1da177e2005-04-16 15:20:36 -070048#endif
49}
50
51static int inline addr_increment(void)
52{
53#ifdef CONFIG_SMP
54 return smp_num_siblings == 2 ? 2 : 1;
55#else
56 return 1;
57#endif
58}
59
60
61/* tables to simulate simplified hardware view of p4 registers */
62struct p4_counter_binding {
63 int virt_counter;
64 int counter_address;
65 int cccr_address;
66};
67
68struct p4_event_binding {
69 int escr_select; /* value to put in CCCR */
70 int event_select; /* value to put in ESCR */
71 struct {
72 int virt_counter; /* for this counter... */
73 int escr_address; /* use this ESCR */
74 } bindings[2];
75};
76
77/* nb: these CTR_* defines are a duplicate of defines in
78 event/i386.p4*events. */
79
80
81#define CTR_BPU_0 (1 << 0)
82#define CTR_MS_0 (1 << 1)
83#define CTR_FLAME_0 (1 << 2)
84#define CTR_IQ_4 (1 << 3)
85#define CTR_BPU_2 (1 << 4)
86#define CTR_MS_2 (1 << 5)
87#define CTR_FLAME_2 (1 << 6)
88#define CTR_IQ_5 (1 << 7)
89
Paolo Ciarrocchi20211e42008-08-18 21:25:38 +020090static struct p4_counter_binding p4_counters[NUM_COUNTERS_NON_HT] = {
Linus Torvalds1da177e2005-04-16 15:20:36 -070091 { CTR_BPU_0, MSR_P4_BPU_PERFCTR0, MSR_P4_BPU_CCCR0 },
92 { CTR_MS_0, MSR_P4_MS_PERFCTR0, MSR_P4_MS_CCCR0 },
93 { CTR_FLAME_0, MSR_P4_FLAME_PERFCTR0, MSR_P4_FLAME_CCCR0 },
94 { CTR_IQ_4, MSR_P4_IQ_PERFCTR4, MSR_P4_IQ_CCCR4 },
95 { CTR_BPU_2, MSR_P4_BPU_PERFCTR2, MSR_P4_BPU_CCCR2 },
96 { CTR_MS_2, MSR_P4_MS_PERFCTR2, MSR_P4_MS_CCCR2 },
97 { CTR_FLAME_2, MSR_P4_FLAME_PERFCTR2, MSR_P4_FLAME_CCCR2 },
98 { CTR_IQ_5, MSR_P4_IQ_PERFCTR5, MSR_P4_IQ_CCCR5 }
99};
100
Paolo Ciarrocchi20211e42008-08-18 21:25:38 +0200101#define NUM_UNUSED_CCCRS (NUM_CCCRS_NON_HT - NUM_COUNTERS_NON_HT)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700102
Linus Torvalds1da177e2005-04-16 15:20:36 -0700103/* p4 event codes in libop/op_event.h are indices into this table. */
104
105static struct p4_event_binding p4_events[NUM_EVENTS] = {
Paolo Ciarrocchi20211e42008-08-18 21:25:38 +0200106
Linus Torvalds1da177e2005-04-16 15:20:36 -0700107 { /* BRANCH_RETIRED */
Paolo Ciarrocchi20211e42008-08-18 21:25:38 +0200108 0x05, 0x06,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700109 { {CTR_IQ_4, MSR_P4_CRU_ESCR2},
110 {CTR_IQ_5, MSR_P4_CRU_ESCR3} }
111 },
Paolo Ciarrocchi20211e42008-08-18 21:25:38 +0200112
Linus Torvalds1da177e2005-04-16 15:20:36 -0700113 { /* MISPRED_BRANCH_RETIRED */
Paolo Ciarrocchi20211e42008-08-18 21:25:38 +0200114 0x04, 0x03,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700115 { { CTR_IQ_4, MSR_P4_CRU_ESCR0},
116 { CTR_IQ_5, MSR_P4_CRU_ESCR1} }
117 },
Paolo Ciarrocchi20211e42008-08-18 21:25:38 +0200118
Linus Torvalds1da177e2005-04-16 15:20:36 -0700119 { /* TC_DELIVER_MODE */
120 0x01, 0x01,
Paolo Ciarrocchi20211e42008-08-18 21:25:38 +0200121 { { CTR_MS_0, MSR_P4_TC_ESCR0},
Linus Torvalds1da177e2005-04-16 15:20:36 -0700122 { CTR_MS_2, MSR_P4_TC_ESCR1} }
123 },
Paolo Ciarrocchi20211e42008-08-18 21:25:38 +0200124
Linus Torvalds1da177e2005-04-16 15:20:36 -0700125 { /* BPU_FETCH_REQUEST */
Paolo Ciarrocchi20211e42008-08-18 21:25:38 +0200126 0x00, 0x03,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700127 { { CTR_BPU_0, MSR_P4_BPU_ESCR0},
128 { CTR_BPU_2, MSR_P4_BPU_ESCR1} }
129 },
130
131 { /* ITLB_REFERENCE */
132 0x03, 0x18,
133 { { CTR_BPU_0, MSR_P4_ITLB_ESCR0},
134 { CTR_BPU_2, MSR_P4_ITLB_ESCR1} }
135 },
136
137 { /* MEMORY_CANCEL */
138 0x05, 0x02,
139 { { CTR_FLAME_0, MSR_P4_DAC_ESCR0},
140 { CTR_FLAME_2, MSR_P4_DAC_ESCR1} }
141 },
142
143 { /* MEMORY_COMPLETE */
144 0x02, 0x08,
145 { { CTR_FLAME_0, MSR_P4_SAAT_ESCR0},
146 { CTR_FLAME_2, MSR_P4_SAAT_ESCR1} }
147 },
148
149 { /* LOAD_PORT_REPLAY */
Paolo Ciarrocchi20211e42008-08-18 21:25:38 +0200150 0x02, 0x04,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700151 { { CTR_FLAME_0, MSR_P4_SAAT_ESCR0},
152 { CTR_FLAME_2, MSR_P4_SAAT_ESCR1} }
153 },
154
155 { /* STORE_PORT_REPLAY */
156 0x02, 0x05,
157 { { CTR_FLAME_0, MSR_P4_SAAT_ESCR0},
158 { CTR_FLAME_2, MSR_P4_SAAT_ESCR1} }
159 },
160
161 { /* MOB_LOAD_REPLAY */
162 0x02, 0x03,
163 { { CTR_BPU_0, MSR_P4_MOB_ESCR0},
164 { CTR_BPU_2, MSR_P4_MOB_ESCR1} }
165 },
166
167 { /* PAGE_WALK_TYPE */
168 0x04, 0x01,
169 { { CTR_BPU_0, MSR_P4_PMH_ESCR0},
170 { CTR_BPU_2, MSR_P4_PMH_ESCR1} }
171 },
172
173 { /* BSQ_CACHE_REFERENCE */
Paolo Ciarrocchi20211e42008-08-18 21:25:38 +0200174 0x07, 0x0c,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700175 { { CTR_BPU_0, MSR_P4_BSU_ESCR0},
176 { CTR_BPU_2, MSR_P4_BSU_ESCR1} }
177 },
178
179 { /* IOQ_ALLOCATION */
Paolo Ciarrocchi20211e42008-08-18 21:25:38 +0200180 0x06, 0x03,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700181 { { CTR_BPU_0, MSR_P4_FSB_ESCR0},
182 { 0, 0 } }
183 },
184
185 { /* IOQ_ACTIVE_ENTRIES */
Paolo Ciarrocchi20211e42008-08-18 21:25:38 +0200186 0x06, 0x1a,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700187 { { CTR_BPU_2, MSR_P4_FSB_ESCR1},
188 { 0, 0 } }
189 },
190
191 { /* FSB_DATA_ACTIVITY */
Paolo Ciarrocchi20211e42008-08-18 21:25:38 +0200192 0x06, 0x17,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700193 { { CTR_BPU_0, MSR_P4_FSB_ESCR0},
194 { CTR_BPU_2, MSR_P4_FSB_ESCR1} }
195 },
196
197 { /* BSQ_ALLOCATION */
Paolo Ciarrocchi20211e42008-08-18 21:25:38 +0200198 0x07, 0x05,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700199 { { CTR_BPU_0, MSR_P4_BSU_ESCR0},
200 { 0, 0 } }
201 },
202
203 { /* BSQ_ACTIVE_ENTRIES */
204 0x07, 0x06,
Paolo Ciarrocchi20211e42008-08-18 21:25:38 +0200205 { { CTR_BPU_2, MSR_P4_BSU_ESCR1 /* guess */},
Linus Torvalds1da177e2005-04-16 15:20:36 -0700206 { 0, 0 } }
207 },
208
209 { /* X87_ASSIST */
Paolo Ciarrocchi20211e42008-08-18 21:25:38 +0200210 0x05, 0x03,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700211 { { CTR_IQ_4, MSR_P4_CRU_ESCR2},
212 { CTR_IQ_5, MSR_P4_CRU_ESCR3} }
213 },
214
215 { /* SSE_INPUT_ASSIST */
216 0x01, 0x34,
217 { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
218 { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
219 },
Paolo Ciarrocchi20211e42008-08-18 21:25:38 +0200220
Linus Torvalds1da177e2005-04-16 15:20:36 -0700221 { /* PACKED_SP_UOP */
Paolo Ciarrocchi20211e42008-08-18 21:25:38 +0200222 0x01, 0x08,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700223 { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
224 { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
225 },
Paolo Ciarrocchi20211e42008-08-18 21:25:38 +0200226
Linus Torvalds1da177e2005-04-16 15:20:36 -0700227 { /* PACKED_DP_UOP */
Paolo Ciarrocchi20211e42008-08-18 21:25:38 +0200228 0x01, 0x0c,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700229 { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
230 { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
231 },
232
233 { /* SCALAR_SP_UOP */
Paolo Ciarrocchi20211e42008-08-18 21:25:38 +0200234 0x01, 0x0a,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700235 { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
236 { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
237 },
238
239 { /* SCALAR_DP_UOP */
240 0x01, 0x0e,
241 { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
242 { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
243 },
244
245 { /* 64BIT_MMX_UOP */
Paolo Ciarrocchi20211e42008-08-18 21:25:38 +0200246 0x01, 0x02,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700247 { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
248 { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
249 },
Paolo Ciarrocchi20211e42008-08-18 21:25:38 +0200250
Linus Torvalds1da177e2005-04-16 15:20:36 -0700251 { /* 128BIT_MMX_UOP */
Paolo Ciarrocchi20211e42008-08-18 21:25:38 +0200252 0x01, 0x1a,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700253 { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
254 { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
255 },
256
257 { /* X87_FP_UOP */
Paolo Ciarrocchi20211e42008-08-18 21:25:38 +0200258 0x01, 0x04,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700259 { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
260 { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
261 },
Paolo Ciarrocchi20211e42008-08-18 21:25:38 +0200262
Linus Torvalds1da177e2005-04-16 15:20:36 -0700263 { /* X87_SIMD_MOVES_UOP */
Paolo Ciarrocchi20211e42008-08-18 21:25:38 +0200264 0x01, 0x2e,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700265 { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
266 { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
267 },
Paolo Ciarrocchi20211e42008-08-18 21:25:38 +0200268
Linus Torvalds1da177e2005-04-16 15:20:36 -0700269 { /* MACHINE_CLEAR */
Paolo Ciarrocchi20211e42008-08-18 21:25:38 +0200270 0x05, 0x02,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700271 { { CTR_IQ_4, MSR_P4_CRU_ESCR2},
272 { CTR_IQ_5, MSR_P4_CRU_ESCR3} }
273 },
274
275 { /* GLOBAL_POWER_EVENTS */
276 0x06, 0x13 /* older manual says 0x05, newer 0x13 */,
277 { { CTR_BPU_0, MSR_P4_FSB_ESCR0},
278 { CTR_BPU_2, MSR_P4_FSB_ESCR1} }
279 },
Paolo Ciarrocchi20211e42008-08-18 21:25:38 +0200280
Linus Torvalds1da177e2005-04-16 15:20:36 -0700281 { /* TC_MS_XFER */
Paolo Ciarrocchi20211e42008-08-18 21:25:38 +0200282 0x00, 0x05,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700283 { { CTR_MS_0, MSR_P4_MS_ESCR0},
284 { CTR_MS_2, MSR_P4_MS_ESCR1} }
285 },
286
287 { /* UOP_QUEUE_WRITES */
288 0x00, 0x09,
289 { { CTR_MS_0, MSR_P4_MS_ESCR0},
290 { CTR_MS_2, MSR_P4_MS_ESCR1} }
291 },
292
293 { /* FRONT_END_EVENT */
294 0x05, 0x08,
295 { { CTR_IQ_4, MSR_P4_CRU_ESCR2},
296 { CTR_IQ_5, MSR_P4_CRU_ESCR3} }
297 },
298
299 { /* EXECUTION_EVENT */
300 0x05, 0x0c,
301 { { CTR_IQ_4, MSR_P4_CRU_ESCR2},
302 { CTR_IQ_5, MSR_P4_CRU_ESCR3} }
303 },
304
305 { /* REPLAY_EVENT */
306 0x05, 0x09,
307 { { CTR_IQ_4, MSR_P4_CRU_ESCR2},
308 { CTR_IQ_5, MSR_P4_CRU_ESCR3} }
309 },
310
311 { /* INSTR_RETIRED */
Paolo Ciarrocchi20211e42008-08-18 21:25:38 +0200312 0x04, 0x02,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700313 { { CTR_IQ_4, MSR_P4_CRU_ESCR0},
314 { CTR_IQ_5, MSR_P4_CRU_ESCR1} }
315 },
316
317 { /* UOPS_RETIRED */
318 0x04, 0x01,
319 { { CTR_IQ_4, MSR_P4_CRU_ESCR0},
320 { CTR_IQ_5, MSR_P4_CRU_ESCR1} }
321 },
322
Paolo Ciarrocchi20211e42008-08-18 21:25:38 +0200323 { /* UOP_TYPE */
324 0x02, 0x02,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700325 { { CTR_IQ_4, MSR_P4_RAT_ESCR0},
326 { CTR_IQ_5, MSR_P4_RAT_ESCR1} }
327 },
328
329 { /* RETIRED_MISPRED_BRANCH_TYPE */
Paolo Ciarrocchi20211e42008-08-18 21:25:38 +0200330 0x02, 0x05,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700331 { { CTR_MS_0, MSR_P4_TBPU_ESCR0},
332 { CTR_MS_2, MSR_P4_TBPU_ESCR1} }
333 },
334
335 { /* RETIRED_BRANCH_TYPE */
336 0x02, 0x04,
337 { { CTR_MS_0, MSR_P4_TBPU_ESCR0},
338 { CTR_MS_2, MSR_P4_TBPU_ESCR1} }
339 }
340};
341
342
343#define MISC_PMC_ENABLED_P(x) ((x) & 1 << 7)
344
345#define ESCR_RESERVED_BITS 0x80000003
346#define ESCR_CLEAR(escr) ((escr) &= ESCR_RESERVED_BITS)
347#define ESCR_SET_USR_0(escr, usr) ((escr) |= (((usr) & 1) << 2))
348#define ESCR_SET_OS_0(escr, os) ((escr) |= (((os) & 1) << 3))
349#define ESCR_SET_USR_1(escr, usr) ((escr) |= (((usr) & 1)))
350#define ESCR_SET_OS_1(escr, os) ((escr) |= (((os) & 1) << 1))
351#define ESCR_SET_EVENT_SELECT(escr, sel) ((escr) |= (((sel) & 0x3f) << 25))
352#define ESCR_SET_EVENT_MASK(escr, mask) ((escr) |= (((mask) & 0xffff) << 9))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700353
354#define CCCR_RESERVED_BITS 0x38030FFF
355#define CCCR_CLEAR(cccr) ((cccr) &= CCCR_RESERVED_BITS)
356#define CCCR_SET_REQUIRED_BITS(cccr) ((cccr) |= 0x00030000)
357#define CCCR_SET_ESCR_SELECT(cccr, sel) ((cccr) |= (((sel) & 0x07) << 13))
358#define CCCR_SET_PMI_OVF_0(cccr) ((cccr) |= (1<<26))
359#define CCCR_SET_PMI_OVF_1(cccr) ((cccr) |= (1<<27))
360#define CCCR_SET_ENABLE(cccr) ((cccr) |= (1<<12))
361#define CCCR_SET_DISABLE(cccr) ((cccr) &= ~(1<<12))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700362#define CCCR_OVF_P(cccr) ((cccr) & (1U<<31))
363#define CCCR_CLEAR_OVF(cccr) ((cccr) &= (~(1U<<31)))
364
Linus Torvalds1da177e2005-04-16 15:20:36 -0700365#define CTR_OVERFLOW_P(ctr) (!((ctr) & 0x80000000))
366
367
368/* this assigns a "stagger" to the current CPU, which is used throughout
369 the code in this module as an extra array offset, to select the "even"
370 or "odd" part of all the divided resources. */
371static unsigned int get_stagger(void)
372{
373#ifdef CONFIG_SMP
374 int cpu = smp_processor_id();
Rusty Russell7ad728f2009-03-13 14:49:50 +1030375 return cpu != cpumask_first(__get_cpu_var(cpu_sibling_map));
Paolo Ciarrocchi20211e42008-08-18 21:25:38 +0200376#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -0700377 return 0;
378}
379
380
381/* finally, mediate access to a real hardware counter
382 by passing a "virtual" counter numer to this macro,
383 along with your stagger setting. */
384#define VIRT_CTR(stagger, i) ((i) + ((num_counters) * (stagger)))
385
386static unsigned long reset_value[NUM_COUNTERS_NON_HT];
387
388
389static void p4_fill_in_addresses(struct op_msrs * const msrs)
390{
Paolo Ciarrocchi20211e42008-08-18 21:25:38 +0200391 unsigned int i;
Don Zickuscb9c4482006-09-26 10:52:26 +0200392 unsigned int addr, cccraddr, stag;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700393
394 setup_num_counters();
395 stag = get_stagger();
396
Don Zickuscb9c4482006-09-26 10:52:26 +0200397 /* initialize some registers */
Paolo Ciarrocchi20211e42008-08-18 21:25:38 +0200398 for (i = 0; i < num_counters; ++i)
Don Zickuscb9c4482006-09-26 10:52:26 +0200399 msrs->counters[i].addr = 0;
Paolo Ciarrocchi20211e42008-08-18 21:25:38 +0200400 for (i = 0; i < num_controls; ++i)
Don Zickuscb9c4482006-09-26 10:52:26 +0200401 msrs->controls[i].addr = 0;
Paolo Ciarrocchi20211e42008-08-18 21:25:38 +0200402
Don Zickuscb9c4482006-09-26 10:52:26 +0200403 /* the counter & cccr registers we pay attention to */
404 for (i = 0; i < num_counters; ++i) {
405 addr = p4_counters[VIRT_CTR(stag, i)].counter_address;
406 cccraddr = p4_counters[VIRT_CTR(stag, i)].cccr_address;
Paolo Ciarrocchi20211e42008-08-18 21:25:38 +0200407 if (reserve_perfctr_nmi(addr)) {
Don Zickuscb9c4482006-09-26 10:52:26 +0200408 msrs->counters[i].addr = addr;
409 msrs->controls[i].addr = cccraddr;
410 }
411 }
412
Linus Torvalds1da177e2005-04-16 15:20:36 -0700413 /* 43 ESCR registers in three or four discontiguous group */
414 for (addr = MSR_P4_BSU_ESCR0 + stag;
415 addr < MSR_P4_IQ_ESCR0; ++i, addr += addr_increment()) {
Don Zickuscb9c4482006-09-26 10:52:26 +0200416 if (reserve_evntsel_nmi(addr))
417 msrs->controls[i].addr = addr;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700418 }
419
420 /* no IQ_ESCR0/1 on some models, we save a seconde time BSU_ESCR0/1
421 * to avoid special case in nmi_{save|restore}_registers() */
422 if (boot_cpu_data.x86_model >= 0x3) {
423 for (addr = MSR_P4_BSU_ESCR0 + stag;
424 addr <= MSR_P4_BSU_ESCR1; ++i, addr += addr_increment()) {
Don Zickuscb9c4482006-09-26 10:52:26 +0200425 if (reserve_evntsel_nmi(addr))
426 msrs->controls[i].addr = addr;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700427 }
428 } else {
429 for (addr = MSR_P4_IQ_ESCR0 + stag;
430 addr <= MSR_P4_IQ_ESCR1; ++i, addr += addr_increment()) {
Don Zickuscb9c4482006-09-26 10:52:26 +0200431 if (reserve_evntsel_nmi(addr))
432 msrs->controls[i].addr = addr;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700433 }
434 }
435
436 for (addr = MSR_P4_RAT_ESCR0 + stag;
437 addr <= MSR_P4_SSU_ESCR0; ++i, addr += addr_increment()) {
Don Zickuscb9c4482006-09-26 10:52:26 +0200438 if (reserve_evntsel_nmi(addr))
439 msrs->controls[i].addr = addr;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700440 }
Paolo Ciarrocchi20211e42008-08-18 21:25:38 +0200441
Linus Torvalds1da177e2005-04-16 15:20:36 -0700442 for (addr = MSR_P4_MS_ESCR0 + stag;
Paolo Ciarrocchi20211e42008-08-18 21:25:38 +0200443 addr <= MSR_P4_TC_ESCR1; ++i, addr += addr_increment()) {
Don Zickuscb9c4482006-09-26 10:52:26 +0200444 if (reserve_evntsel_nmi(addr))
445 msrs->controls[i].addr = addr;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700446 }
Paolo Ciarrocchi20211e42008-08-18 21:25:38 +0200447
Linus Torvalds1da177e2005-04-16 15:20:36 -0700448 for (addr = MSR_P4_IX_ESCR0 + stag;
Paolo Ciarrocchi20211e42008-08-18 21:25:38 +0200449 addr <= MSR_P4_CRU_ESCR3; ++i, addr += addr_increment()) {
Don Zickuscb9c4482006-09-26 10:52:26 +0200450 if (reserve_evntsel_nmi(addr))
451 msrs->controls[i].addr = addr;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700452 }
453
454 /* there are 2 remaining non-contiguously located ESCRs */
455
Paolo Ciarrocchi20211e42008-08-18 21:25:38 +0200456 if (num_counters == NUM_COUNTERS_NON_HT) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700457 /* standard non-HT CPUs handle both remaining ESCRs*/
Don Zickuscb9c4482006-09-26 10:52:26 +0200458 if (reserve_evntsel_nmi(MSR_P4_CRU_ESCR5))
459 msrs->controls[i++].addr = MSR_P4_CRU_ESCR5;
460 if (reserve_evntsel_nmi(MSR_P4_CRU_ESCR4))
461 msrs->controls[i++].addr = MSR_P4_CRU_ESCR4;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700462
463 } else if (stag == 0) {
464 /* HT CPUs give the first remainder to the even thread, as
465 the 32nd control register */
Don Zickuscb9c4482006-09-26 10:52:26 +0200466 if (reserve_evntsel_nmi(MSR_P4_CRU_ESCR4))
467 msrs->controls[i++].addr = MSR_P4_CRU_ESCR4;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700468
469 } else {
470 /* and two copies of the second to the odd thread,
471 for the 22st and 23nd control registers */
Don Zickuscb9c4482006-09-26 10:52:26 +0200472 if (reserve_evntsel_nmi(MSR_P4_CRU_ESCR5)) {
473 msrs->controls[i++].addr = MSR_P4_CRU_ESCR5;
474 msrs->controls[i++].addr = MSR_P4_CRU_ESCR5;
475 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700476 }
477}
478
479
480static void pmc_setup_one_p4_counter(unsigned int ctr)
481{
482 int i;
483 int const maxbind = 2;
484 unsigned int cccr = 0;
485 unsigned int escr = 0;
486 unsigned int high = 0;
487 unsigned int counter_bit;
488 struct p4_event_binding *ev = NULL;
489 unsigned int stag;
490
491 stag = get_stagger();
Paolo Ciarrocchi20211e42008-08-18 21:25:38 +0200492
Linus Torvalds1da177e2005-04-16 15:20:36 -0700493 /* convert from counter *number* to counter *bit* */
494 counter_bit = 1 << VIRT_CTR(stag, ctr);
Paolo Ciarrocchi20211e42008-08-18 21:25:38 +0200495
Linus Torvalds1da177e2005-04-16 15:20:36 -0700496 /* find our event binding structure. */
497 if (counter_config[ctr].event <= 0 || counter_config[ctr].event > NUM_EVENTS) {
Paolo Ciarrocchi20211e42008-08-18 21:25:38 +0200498 printk(KERN_ERR
499 "oprofile: P4 event code 0x%lx out of range\n",
Linus Torvalds1da177e2005-04-16 15:20:36 -0700500 counter_config[ctr].event);
501 return;
502 }
Paolo Ciarrocchi20211e42008-08-18 21:25:38 +0200503
Linus Torvalds1da177e2005-04-16 15:20:36 -0700504 ev = &(p4_events[counter_config[ctr].event - 1]);
Paolo Ciarrocchi20211e42008-08-18 21:25:38 +0200505
Linus Torvalds1da177e2005-04-16 15:20:36 -0700506 for (i = 0; i < maxbind; i++) {
507 if (ev->bindings[i].virt_counter & counter_bit) {
508
509 /* modify ESCR */
Robert Richter1131a472009-05-25 20:23:23 +0200510 rdmsr(ev->bindings[i].escr_address, escr, high);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700511 ESCR_CLEAR(escr);
512 if (stag == 0) {
513 ESCR_SET_USR_0(escr, counter_config[ctr].user);
514 ESCR_SET_OS_0(escr, counter_config[ctr].kernel);
515 } else {
516 ESCR_SET_USR_1(escr, counter_config[ctr].user);
517 ESCR_SET_OS_1(escr, counter_config[ctr].kernel);
518 }
519 ESCR_SET_EVENT_SELECT(escr, ev->event_select);
Paolo Ciarrocchi20211e42008-08-18 21:25:38 +0200520 ESCR_SET_EVENT_MASK(escr, counter_config[ctr].unit_mask);
Robert Richter1131a472009-05-25 20:23:23 +0200521 wrmsr(ev->bindings[i].escr_address, escr, high);
Paolo Ciarrocchi20211e42008-08-18 21:25:38 +0200522
Linus Torvalds1da177e2005-04-16 15:20:36 -0700523 /* modify CCCR */
Robert Richter1131a472009-05-25 20:23:23 +0200524 rdmsr(p4_counters[VIRT_CTR(stag, ctr)].cccr_address,
525 cccr, high);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700526 CCCR_CLEAR(cccr);
527 CCCR_SET_REQUIRED_BITS(cccr);
528 CCCR_SET_ESCR_SELECT(cccr, ev->escr_select);
Paolo Ciarrocchi20211e42008-08-18 21:25:38 +0200529 if (stag == 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700530 CCCR_SET_PMI_OVF_0(cccr);
Paolo Ciarrocchi20211e42008-08-18 21:25:38 +0200531 else
Linus Torvalds1da177e2005-04-16 15:20:36 -0700532 CCCR_SET_PMI_OVF_1(cccr);
Robert Richter1131a472009-05-25 20:23:23 +0200533 wrmsr(p4_counters[VIRT_CTR(stag, ctr)].cccr_address,
534 cccr, high);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700535 return;
536 }
537 }
538
Paolo Ciarrocchi20211e42008-08-18 21:25:38 +0200539 printk(KERN_ERR
Linus Torvalds1da177e2005-04-16 15:20:36 -0700540 "oprofile: P4 event code 0x%lx no binding, stag %d ctr %d\n",
541 counter_config[ctr].event, stag, ctr);
542}
543
544
545static void p4_setup_ctrs(struct op_msrs const * const msrs)
546{
547 unsigned int i;
548 unsigned int low, high;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700549 unsigned int stag;
550
551 stag = get_stagger();
552
553 rdmsr(MSR_IA32_MISC_ENABLE, low, high);
Paolo Ciarrocchi20211e42008-08-18 21:25:38 +0200554 if (!MISC_PMC_ENABLED_P(low)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700555 printk(KERN_ERR "oprofile: P4 PMC not available\n");
556 return;
557 }
558
559 /* clear the cccrs we will use */
560 for (i = 0 ; i < num_counters ; i++) {
Paolo Ciarrocchi20211e42008-08-18 21:25:38 +0200561 if (unlikely(!CTRL_IS_RESERVED(msrs, i)))
Don Zickuscb9c4482006-09-26 10:52:26 +0200562 continue;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700563 rdmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high);
564 CCCR_CLEAR(low);
565 CCCR_SET_REQUIRED_BITS(low);
566 wrmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high);
567 }
568
Linus Torvalds1da177e2005-04-16 15:20:36 -0700569 /* clear all escrs (including those outside our concern) */
Don Zickuscb9c4482006-09-26 10:52:26 +0200570 for (i = num_counters; i < num_controls; i++) {
Paolo Ciarrocchi20211e42008-08-18 21:25:38 +0200571 if (unlikely(!CTRL_IS_RESERVED(msrs, i)))
Don Zickuscb9c4482006-09-26 10:52:26 +0200572 continue;
573 wrmsr(msrs->controls[i].addr, 0, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700574 }
575
Linus Torvalds1da177e2005-04-16 15:20:36 -0700576 /* setup all counters */
577 for (i = 0 ; i < num_counters ; ++i) {
Paolo Ciarrocchi20211e42008-08-18 21:25:38 +0200578 if ((counter_config[i].enabled) && (CTRL_IS_RESERVED(msrs, i))) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700579 reset_value[i] = counter_config[i].count;
580 pmc_setup_one_p4_counter(i);
Robert Richter1131a472009-05-25 20:23:23 +0200581 wrmsr(p4_counters[VIRT_CTR(stag, i)].counter_address,
582 -(u32)counter_config[i].count, -1);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700583 } else {
584 reset_value[i] = 0;
585 }
586 }
587}
588
589
590static int p4_check_ctrs(struct pt_regs * const regs,
591 struct op_msrs const * const msrs)
592{
593 unsigned long ctr, low, high, stag, real;
594 int i;
595
596 stag = get_stagger();
597
598 for (i = 0; i < num_counters; ++i) {
Paolo Ciarrocchi20211e42008-08-18 21:25:38 +0200599
600 if (!reset_value[i])
Linus Torvalds1da177e2005-04-16 15:20:36 -0700601 continue;
602
Paolo Ciarrocchi20211e42008-08-18 21:25:38 +0200603 /*
Linus Torvalds1da177e2005-04-16 15:20:36 -0700604 * there is some eccentricity in the hardware which
605 * requires that we perform 2 extra corrections:
606 *
607 * - check both the CCCR:OVF flag for overflow and the
608 * counter high bit for un-flagged overflows.
609 *
610 * - write the counter back twice to ensure it gets
611 * updated properly.
Paolo Ciarrocchi20211e42008-08-18 21:25:38 +0200612 *
Linus Torvalds1da177e2005-04-16 15:20:36 -0700613 * the former seems to be related to extra NMIs happening
614 * during the current NMI; the latter is reported as errata
615 * N15 in intel doc 249199-029, pentium 4 specification
616 * update, though their suggested work-around does not
617 * appear to solve the problem.
618 */
Paolo Ciarrocchi20211e42008-08-18 21:25:38 +0200619
Linus Torvalds1da177e2005-04-16 15:20:36 -0700620 real = VIRT_CTR(stag, i);
621
Robert Richter1131a472009-05-25 20:23:23 +0200622 rdmsr(p4_counters[real].cccr_address, low, high);
623 rdmsr(p4_counters[real].counter_address, ctr, high);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700624 if (CCCR_OVF_P(low) || CTR_OVERFLOW_P(ctr)) {
625 oprofile_add_sample(regs, i);
Robert Richter1131a472009-05-25 20:23:23 +0200626 wrmsr(p4_counters[real].counter_address,
627 -(u32)reset_value[i], -1);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700628 CCCR_CLEAR_OVF(low);
Robert Richter1131a472009-05-25 20:23:23 +0200629 wrmsr(p4_counters[real].cccr_address, low, high);
630 wrmsr(p4_counters[real].counter_address,
631 -(u32)reset_value[i], -1);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700632 }
633 }
634
635 /* P4 quirk: you have to re-unmask the apic vector */
636 apic_write(APIC_LVTPC, apic_read(APIC_LVTPC) & ~APIC_LVT_MASKED);
637
638 /* See op_model_ppro.c */
639 return 1;
640}
641
642
643static void p4_start(struct op_msrs const * const msrs)
644{
645 unsigned int low, high, stag;
646 int i;
647
648 stag = get_stagger();
649
650 for (i = 0; i < num_counters; ++i) {
651 if (!reset_value[i])
652 continue;
Robert Richter1131a472009-05-25 20:23:23 +0200653 rdmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700654 CCCR_SET_ENABLE(low);
Robert Richter1131a472009-05-25 20:23:23 +0200655 wrmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700656 }
657}
658
659
660static void p4_stop(struct op_msrs const * const msrs)
661{
662 unsigned int low, high, stag;
663 int i;
664
665 stag = get_stagger();
666
667 for (i = 0; i < num_counters; ++i) {
Don Zickuscb9c4482006-09-26 10:52:26 +0200668 if (!reset_value[i])
669 continue;
Robert Richter1131a472009-05-25 20:23:23 +0200670 rdmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700671 CCCR_SET_DISABLE(low);
Robert Richter1131a472009-05-25 20:23:23 +0200672 wrmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700673 }
674}
675
Don Zickuscb9c4482006-09-26 10:52:26 +0200676static void p4_shutdown(struct op_msrs const * const msrs)
677{
678 int i;
679
680 for (i = 0 ; i < num_counters ; ++i) {
Paolo Ciarrocchi20211e42008-08-18 21:25:38 +0200681 if (CTR_IS_RESERVED(msrs, i))
Don Zickuscb9c4482006-09-26 10:52:26 +0200682 release_perfctr_nmi(msrs->counters[i].addr);
683 }
Paolo Ciarrocchi20211e42008-08-18 21:25:38 +0200684 /*
685 * some of the control registers are specially reserved in
Don Zickuscb9c4482006-09-26 10:52:26 +0200686 * conjunction with the counter registers (hence the starting offset).
687 * This saves a few bits.
688 */
689 for (i = num_counters ; i < num_controls ; ++i) {
Paolo Ciarrocchi20211e42008-08-18 21:25:38 +0200690 if (CTRL_IS_RESERVED(msrs, i))
Don Zickuscb9c4482006-09-26 10:52:26 +0200691 release_evntsel_nmi(msrs->controls[i].addr);
692 }
693}
694
Linus Torvalds1da177e2005-04-16 15:20:36 -0700695
696#ifdef CONFIG_SMP
697struct op_x86_model_spec const op_p4_ht2_spec = {
Robert Richterc92960f2008-09-05 17:12:36 +0200698 .num_counters = NUM_COUNTERS_HT2,
699 .num_controls = NUM_CONTROLS_HT2,
700 .fill_in_addresses = &p4_fill_in_addresses,
701 .setup_ctrs = &p4_setup_ctrs,
702 .check_ctrs = &p4_check_ctrs,
703 .start = &p4_start,
704 .stop = &p4_stop,
705 .shutdown = &p4_shutdown
Linus Torvalds1da177e2005-04-16 15:20:36 -0700706};
707#endif
708
709struct op_x86_model_spec const op_p4_spec = {
Robert Richterc92960f2008-09-05 17:12:36 +0200710 .num_counters = NUM_COUNTERS_NON_HT,
711 .num_controls = NUM_CONTROLS_NON_HT,
712 .fill_in_addresses = &p4_fill_in_addresses,
713 .setup_ctrs = &p4_setup_ctrs,
714 .check_ctrs = &p4_check_ctrs,
715 .start = &p4_start,
716 .stop = &p4_stop,
717 .shutdown = &p4_shutdown
Linus Torvalds1da177e2005-04-16 15:20:36 -0700718};