EDAC, sb_edac: Remove double buffering of error records In the bad old days the functions from x86_mce_decoder_chain could be called in machine check context. So we used to carefully copy them and defer processing until later. But in f29a7aff4bd60 ("x86/mce: Avoid potential deadlock due to printk() in MCE context") we switched the logging code to save the record in a genpool, and call the functions that registered to be notified later from a work queue. So drop all the double buffering and do all the work we want to do as soon as sbridge_mce_check_error() is called. Signed-off-by: Tony Luck <tony.luck@intel.com> Cc: Aristeu Rozanski <arozansk@redhat.com> Cc: Mauro Carvalho Chehab <mchehab@osg.samsung.com> Cc: linux-edac <linux-edac@vger.kernel.org> Cc: patrickg@supermicro.com Link: http://lkml.kernel.org/r/100025611cd780d9bca72792b2b2146760da53e0.1460756761.git.tony.luck@intel.com Signed-off-by: Borislav Petkov <bp@suse.de>

commit: ad08c4e97485694fee5ebb181983514facedbb19 [log] [tgz]
author: Tony Luck <tony.luck@intel.com> Fri Apr 15 14:50:32 2016 -0700
committer: Borislav Petkov <bp@suse.de> Sat Apr 23 14:02:02 2016 +0200
tree: 4c4d834458440c9371c5c01bbc342299aebb7a2b
parent: ab67b6c22d8506b060a66ed0ce1a3e14e3b075e4 [diff] [blame]
diff --git a/drivers/edac/sb_edac.c b/drivers/edac/sb_edac.c
index 93f0d41..3421674 100644
--- a/drivers/edac/sb_edac.c
+++ b/drivers/edac/sb_edac.c

@@ -363,16 +363,6 @@
 	/* Memory type detection */
 	bool			is_mirrored, is_lockstep, is_close_pg;
 
-	/* Fifo double buffers */
-	struct mce		mce_entry[MCE_LOG_LEN];
-	struct mce		mce_outentry[MCE_LOG_LEN];
-
-	/* Fifo in/out counters */
-	unsigned		mce_in, mce_out;
-
-	/* Count indicator to show errors not got */
-	unsigned		mce_overrun;
-
 	/* Memory description */
 	u64			tolm, tohm;
 	struct knl_pvt knl;
@@ -3075,63 +3065,8 @@
 }
 
 /*
- *	sbridge_check_error	Retrieve and process errors reported by the
- *				hardware. Called by the Core module.
- */
-static void sbridge_check_error(struct mem_ctl_info *mci)
-{
-	struct sbridge_pvt *pvt = mci->pvt_info;
-	int i;
-	unsigned count = 0;
-	struct mce *m;
-
-	/*
-	 * MCE first step: Copy all mce errors into a temporary buffer
-	 * We use a double buffering here, to reduce the risk of
-	 * loosing an error.
-	 */
-	smp_rmb();
-	count = (pvt->mce_out + MCE_LOG_LEN - pvt->mce_in)
-		% MCE_LOG_LEN;
-	if (!count)
-		return;
-
-	m = pvt->mce_outentry;
-	if (pvt->mce_in + count > MCE_LOG_LEN) {
-		unsigned l = MCE_LOG_LEN - pvt->mce_in;
-
-		memcpy(m, &pvt->mce_entry[pvt->mce_in], sizeof(*m) * l);
-		smp_wmb();
-		pvt->mce_in = 0;
-		count -= l;
-		m += l;
-	}
-	memcpy(m, &pvt->mce_entry[pvt->mce_in], sizeof(*m) * count);
-	smp_wmb();
-	pvt->mce_in += count;
-
-	smp_rmb();
-	if (pvt->mce_overrun) {
-		sbridge_printk(KERN_ERR, "Lost %d memory errors\n",
-			      pvt->mce_overrun);
-		smp_wmb();
-		pvt->mce_overrun = 0;
-	}
-
-	/*
-	 * MCE second step: parse errors and display
-	 */
-	for (i = 0; i < count; i++)
-		sbridge_mce_output_error(mci, &pvt->mce_outentry[i]);
-}
-
-/*
- * sbridge_mce_check_error	Replicates mcelog routine to get errors
- *				This routine simply queues mcelog errors, and
- *				return. The error itself should be handled later
- *				by sbridge_check_error.
- * WARNING: As this routine should be called at NMI time, extra care should
- * be taken to avoid deadlocks, and to be as fast as possible.
+ * Check that logging is enabled and that this is the right type
+ * of error for us to handle.
  */
 static int sbridge_mce_check_error(struct notifier_block *nb, unsigned long val,
 				   void *data)
@@ -3176,21 +3111,7 @@
 			  "%u APIC %x\n", mce->cpuvendor, mce->cpuid,
 			  mce->time, mce->socketid, mce->apicid);
 
-	smp_rmb();
-	if ((pvt->mce_out + 1) % MCE_LOG_LEN == pvt->mce_in) {
-		smp_wmb();
-		pvt->mce_overrun++;
-		return NOTIFY_DONE;
-	}
-
-	/* Copy memory error at the ringbuffer */
-	memcpy(&pvt->mce_entry[pvt->mce_out], mce, sizeof(*mce));
-	smp_wmb();
-	pvt->mce_out = (pvt->mce_out + 1) % MCE_LOG_LEN;
-
-	/* Handle fatal errors immediately */
-	if (mce->mcgstatus & 1)
-		sbridge_check_error(mci);
+	sbridge_mce_output_error(mci, mce);
 
 	/* Advice mcelog that the error were handled */
 	return NOTIFY_STOP;
@@ -3276,9 +3197,6 @@
 	mci->dev_name = pci_name(pdev);
 	mci->ctl_page_to_phys = NULL;
 
-	/* Set the function pointer to an actual operation function */
-	mci->edac_check = sbridge_check_error;
-
 	pvt->info.type = type;
 	switch (type) {
 	case IVY_BRIDGE:
commit	ad08c4e97485694fee5ebb181983514facedbb19	[log] [tgz]
author	Tony Luck <tony.luck@intel.com>	Fri Apr 15 14:50:32 2016 -0700
committer	Borislav Petkov <bp@suse.de>	Sat Apr 23 14:02:02 2016 +0200
tree	4c4d834458440c9371c5c01bbc342299aebb7a2b
parent	ab67b6c22d8506b060a66ed0ce1a3e14e3b075e4 [diff] [blame]