perf, x86: Fix Intel-nhm PMU programming errata workaround
Fix the Errata AAK100/AAP53/BD53 workaround, the officialy documented
workaround we implemented in:
11164cd: perf, x86: Add Nehelem PMU programming errata workaround
doesn't actually work fully and causes a stuck PMU state
under load and non-functioning perf profiling.
A functional workaround was found by trial & error.
Affects all Nehalem-class Intel PMUs.
Signed-off-by: Zhang Yanmin <yanmin_zhang@linux.intel.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <1281073148.2125.63.camel@ymzhang.sh.intel.com>
Cc: Arjan van de Ven <arjan@linux.intel.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: <stable@kernel.org> # .35.x
Signed-off-by: Ingo Molnar <mingo@elte.hu>
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 214ac86..d8d86d0 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -491,33 +491,78 @@
* Intel Errata AAP53 (model 30)
* Intel Errata BD53 (model 44)
*
- * These chips need to be 'reset' when adding counters by programming
- * the magic three (non counting) events 0x4300D2, 0x4300B1 and 0x4300B5
- * either in sequence on the same PMC or on different PMCs.
+ * The official story:
+ * These chips need to be 'reset' when adding counters by programming the
+ * magic three (non-counting) events 0x4300B5, 0x4300D2, and 0x4300B1 either
+ * in sequence on the same PMC or on different PMCs.
+ *
+ * In practise it appears some of these events do in fact count, and
+ * we need to programm all 4 events.
*/
+static void intel_pmu_nhm_workaround(void)
+{
+ struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+ static const unsigned long nhm_magic[4] = {
+ 0x4300B5,
+ 0x4300D2,
+ 0x4300B1,
+ 0x4300B1
+ };
+ struct perf_event *event;
+ int i;
+
+ /*
+ * The Errata requires below steps:
+ * 1) Clear MSR_IA32_PEBS_ENABLE and MSR_CORE_PERF_GLOBAL_CTRL;
+ * 2) Configure 4 PERFEVTSELx with the magic events and clear
+ * the corresponding PMCx;
+ * 3) set bit0~bit3 of MSR_CORE_PERF_GLOBAL_CTRL;
+ * 4) Clear MSR_CORE_PERF_GLOBAL_CTRL;
+ * 5) Clear 4 pairs of ERFEVTSELx and PMCx;
+ */
+
+ /*
+ * The real steps we choose are a little different from above.
+ * A) To reduce MSR operations, we don't run step 1) as they
+ * are already cleared before this function is called;
+ * B) Call x86_perf_event_update to save PMCx before configuring
+ * PERFEVTSELx with magic number;
+ * C) With step 5), we do clear only when the PERFEVTSELx is
+ * not used currently.
+ * D) Call x86_perf_event_set_period to restore PMCx;
+ */
+
+ /* We always operate 4 pairs of PERF Counters */
+ for (i = 0; i < 4; i++) {
+ event = cpuc->events[i];
+ if (event)
+ x86_perf_event_update(event);
+ }
+
+ for (i = 0; i < 4; i++) {
+ wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + i, nhm_magic[i]);
+ wrmsrl(MSR_ARCH_PERFMON_PERFCTR0 + i, 0x0);
+ }
+
+ wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0xf);
+ wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0x0);
+
+ for (i = 0; i < 4; i++) {
+ event = cpuc->events[i];
+
+ if (event) {
+ x86_perf_event_set_period(event);
+ __x86_pmu_enable_event(&event->hw,
+ ARCH_PERFMON_EVENTSEL_ENABLE);
+ } else
+ wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + i, 0x0);
+ }
+}
+
static void intel_pmu_nhm_enable_all(int added)
{
- if (added) {
- struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
- int i;
-
- wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + 0, 0x4300D2);
- wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + 1, 0x4300B1);
- wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + 2, 0x4300B5);
-
- wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0x3);
- wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0x0);
-
- for (i = 0; i < 3; i++) {
- struct perf_event *event = cpuc->events[i];
-
- if (!event)
- continue;
-
- __x86_pmu_enable_event(&event->hw,
- ARCH_PERFMON_EVENTSEL_ENABLE);
- }
- }
+ if (added)
+ intel_pmu_nhm_workaround();
intel_pmu_enable_all(added);
}