perf/x86/amd/ibs: Fix pmu::stop() nesting

Patch 5a50f5291701 ("perf/x86/ibs: Fix race with IBS_STARTING state")
closed a big hole while opening another, smaller hole.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Fixes: 5a50f5291701 ("perf/x86/ibs: Fix race with IBS_STARTING state")
Signed-off-by: Ingo Molnar <mingo@kernel.org>
diff --git a/arch/x86/events/amd/ibs.c b/arch/x86/events/amd/ibs.c
index 3ea25c3..feb90f6 100644
--- a/arch/x86/events/amd/ibs.c
+++ b/arch/x86/events/amd/ibs.c
@@ -28,10 +28,46 @@
 #define IBS_FETCH_CONFIG_MASK	(IBS_FETCH_RAND_EN | IBS_FETCH_MAX_CNT)
 #define IBS_OP_CONFIG_MASK	IBS_OP_MAX_CNT
 
+
+/*
+ * IBS states:
+ *
+ * ENABLED; tracks the pmu::add(), pmu::del() state, when set the counter is taken
+ * and any further add()s must fail.
+ *
+ * STARTED/STOPPING/STOPPED; deal with pmu::start(), pmu::stop() state but are
+ * complicated by the fact that the IBS hardware can send late NMIs (ie. after
+ * we've cleared the EN bit).
+ *
+ * In order to consume these late NMIs we have the STOPPED state, any NMI that
+ * happens after we've cleared the EN state will clear this bit and report the
+ * NMI handled (this is fundamentally racy in the face or multiple NMI sources,
+ * someone else can consume our BIT and our NMI will go unhandled).
+ *
+ * And since we cannot set/clear this separate bit together with the EN bit,
+ * there are races; if we cleared STARTED early, an NMI could land in
+ * between clearing STARTED and clearing the EN bit (in fact multiple NMIs
+ * could happen if the period is small enough), and consume our STOPPED bit
+ * and trigger streams of unhandled NMIs.
+ *
+ * If, however, we clear STARTED late, an NMI can hit between clearing the
+ * EN bit and clearing STARTED, still see STARTED set and process the event.
+ * If this event will have the VALID bit clear, we bail properly, but this
+ * is not a given. With VALID set we can end up calling pmu::stop() again
+ * (the throttle logic) and trigger the WARNs in there.
+ *
+ * So what we do is set STOPPING before clearing EN to avoid the pmu::stop()
+ * nesting, and clear STARTED late, so that we have a well defined state over
+ * the clearing of the EN bit.
+ *
+ * XXX: we could probably be using !atomic bitops for all this.
+ */
+
 enum ibs_states {
 	IBS_ENABLED	= 0,
 	IBS_STARTED	= 1,
 	IBS_STOPPING	= 2,
+	IBS_STOPPED	= 3,
 
 	IBS_MAX_STATES,
 };
@@ -377,11 +413,10 @@
 
 	perf_ibs_set_period(perf_ibs, hwc, &period);
 	/*
-	 * Set STARTED before enabling the hardware, such that
-	 * a subsequent NMI must observe it. Then clear STOPPING
-	 * such that we don't consume NMIs by accident.
+	 * Set STARTED before enabling the hardware, such that a subsequent NMI
+	 * must observe it.
 	 */
-	set_bit(IBS_STARTED, pcpu->state);
+	set_bit(IBS_STARTED,    pcpu->state);
 	clear_bit(IBS_STOPPING, pcpu->state);
 	perf_ibs_enable_event(perf_ibs, hwc, period >> 4);
 
@@ -396,6 +431,9 @@
 	u64 config;
 	int stopping;
 
+	if (test_and_set_bit(IBS_STOPPING, pcpu->state))
+		return;
+
 	stopping = test_bit(IBS_STARTED, pcpu->state);
 
 	if (!stopping && (hwc->state & PERF_HES_UPTODATE))
@@ -405,12 +443,12 @@
 
 	if (stopping) {
 		/*
-		 * Set STOPPING before disabling the hardware, such that it
+		 * Set STOPPED before disabling the hardware, such that it
 		 * must be visible to NMIs the moment we clear the EN bit,
 		 * at which point we can generate an !VALID sample which
 		 * we need to consume.
 		 */
-		set_bit(IBS_STOPPING, pcpu->state);
+		set_bit(IBS_STOPPED, pcpu->state);
 		perf_ibs_disable_event(perf_ibs, hwc, config);
 		/*
 		 * Clear STARTED after disabling the hardware; if it were
@@ -556,7 +594,7 @@
 		 * with samples that even have the valid bit cleared.
 		 * Mark all this NMIs as handled.
 		 */
-		if (test_and_clear_bit(IBS_STOPPING, pcpu->state))
+		if (test_and_clear_bit(IBS_STOPPED, pcpu->state))
 			return 1;
 
 		return 0;