IB/ipath: Workaround problem of errormask register being overwritten

On some system hardware, we are seeing moderately common cases of the
chip errormask register being overwritten due to a chip bug in iba6120
that is triggered by a vendor-specific PCIe broadcast message.  This
patch merely checks periodically, and corrects it if needed (the
overwrite can cause us to not get error and hardware error
interrupts).  Also, make dd->ipath_errormask the one, true canonical
source for kr_errormask, and remove references to ipath_ignorederrs as
it is currently unused.

Signed-off-by: Dave Olson <dave.olson@qlogic.com>
Signed-off-by: John Gregor <john.gregor@qlogic.com>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
diff --git a/drivers/infiniband/hw/ipath/ipath_init_chip.c b/drivers/infiniband/hw/ipath/ipath_init_chip.c
index 71e6c9d..9dd0bac 100644
--- a/drivers/infiniband/hw/ipath/ipath_init_chip.c
+++ b/drivers/infiniband/hw/ipath/ipath_init_chip.c
@@ -851,13 +851,14 @@
 	ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrmask,
 			 dd->ipath_hwerrmask);
 
-	dd->ipath_maskederrs = dd->ipath_ignorederrs;
 	/* clear all */
 	ipath_write_kreg(dd, dd->ipath_kregs->kr_errorclear, -1LL);
 	/* enable errors that are masked, at least this first time. */
 	ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask,
 			 ~dd->ipath_maskederrs);
-	/* clear any interrups up to this point (ints still not enabled) */
+	dd->ipath_errormask = ipath_read_kreg64(dd,
+		dd->ipath_kregs->kr_errormask);
+	/* clear any interrupts up to this point (ints still not enabled) */
 	ipath_write_kreg(dd, dd->ipath_kregs->kr_intclear, -1LL);
 
 	/*
diff --git a/drivers/infiniband/hw/ipath/ipath_intr.c b/drivers/infiniband/hw/ipath/ipath_intr.c
index 0c075cf..b29fe7e 100644
--- a/drivers/infiniband/hw/ipath/ipath_intr.c
+++ b/drivers/infiniband/hw/ipath/ipath_intr.c
@@ -517,10 +517,7 @@
 
 	supp_msgs = handle_frequent_errors(dd, errs, msg, &noprint);
 
-	/*
-	 * don't report errors that are masked (includes those always
-	 * ignored)
-	 */
+	/* don't report errors that are masked */
 	errs &= ~dd->ipath_maskederrs;
 
 	/* do these first, they are most important */
@@ -566,19 +563,19 @@
 		 * ones on this particular interrupt, which also isn't great
 		 */
 		dd->ipath_maskederrs |= dd->ipath_lasterror | errs;
+		dd->ipath_errormask &= ~dd->ipath_maskederrs;
 		ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask,
-				 ~dd->ipath_maskederrs);
+			dd->ipath_errormask);
 		s_iserr = ipath_decode_err(msg, sizeof msg,
-				 (dd->ipath_maskederrs & ~dd->
-				  ipath_ignorederrs));
+			dd->ipath_maskederrs);
 
-		if ((dd->ipath_maskederrs & ~dd->ipath_ignorederrs) &
+		if (dd->ipath_maskederrs &
 			~(INFINIPATH_E_RRCVEGRFULL |
 			INFINIPATH_E_RRCVHDRFULL | INFINIPATH_E_PKTERRS))
 			ipath_dev_err(dd, "Temporarily disabling "
 			    "error(s) %llx reporting; too frequent (%s)\n",
-				(unsigned long long) (dd->ipath_maskederrs &
-				~dd->ipath_ignorederrs), msg);
+				(unsigned long long)dd->ipath_maskederrs,
+				msg);
 		else {
 			/*
 			 * rcvegrfull and rcvhdrqfull are "normal",
@@ -793,6 +790,9 @@
 	/* disable error interrupts, to avoid confusion */
 	ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask, 0ULL);
 
+	/* also disable interrupts; errormask is sometimes overwriten */
+	ipath_write_kreg(dd, dd->ipath_kregs->kr_intmask, 0ULL);
+
 	/*
 	 * clear all sends, because they have may been
 	 * completed by usercode while in freeze mode, and
@@ -817,7 +817,7 @@
 	for (i = 0; i < dd->ipath_pioavregs; i++) {
 		/* deal with 6110 chip bug */
 		im = i > 3 ? ((i&1) ? i-1 : i+1) : i;
-		val = ipath_read_kreg64(dd, 0x1000+(im*sizeof(u64)));
+		val = ipath_read_kreg64(dd, (0x1000/sizeof(u64))+im);
 		dd->ipath_pioavailregs_dma[i] = dd->ipath_pioavailshadow[i]
 			= le64_to_cpu(val);
 	}
@@ -832,7 +832,8 @@
 	ipath_write_kreg(dd, dd->ipath_kregs->kr_errorclear,
 		E_SPKT_ERRS_IGNORE);
 	ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask,
-		~dd->ipath_maskederrs);
+		dd->ipath_errormask);
+	ipath_write_kreg(dd, dd->ipath_kregs->kr_intmask, -1LL);
 	ipath_write_kreg(dd, dd->ipath_kregs->kr_intclear, 0ULL);
 }
 
diff --git a/drivers/infiniband/hw/ipath/ipath_kernel.h b/drivers/infiniband/hw/ipath/ipath_kernel.h
index ef77329..7a7966f 100644
--- a/drivers/infiniband/hw/ipath/ipath_kernel.h
+++ b/drivers/infiniband/hw/ipath/ipath_kernel.h
@@ -261,18 +261,10 @@
 	 * limiting of hwerror reporting
 	 */
 	ipath_err_t ipath_lasthwerror;
-	/*
-	 * errors masked because they occur too fast, also includes errors
-	 * that are always ignored (ipath_ignorederrs)
-	 */
+	/* errors masked because they occur too fast */
 	ipath_err_t ipath_maskederrs;
 	/* time in jiffies at which to re-enable maskederrs */
 	unsigned long ipath_unmasktime;
-	/*
-	 * errors always ignored (masked), at least for a given
-	 * chip/device, because they are wrong or not useful
-	 */
-	ipath_err_t ipath_ignorederrs;
 	/* count of egrfull errors, combined for all ports */
 	u64 ipath_last_tidfull;
 	/* for ipath_qcheck() */
@@ -436,6 +428,7 @@
 	u64 ipath_lastibcstat;
 	/* hwerrmask shadow */
 	ipath_err_t ipath_hwerrmask;
+	ipath_err_t ipath_errormask; /* errormask shadow */
 	/* interrupt config reg shadow */
 	u64 ipath_intconfig;
 	/* kr_sendpiobufbase value */
diff --git a/drivers/infiniband/hw/ipath/ipath_stats.c b/drivers/infiniband/hw/ipath/ipath_stats.c
index 73ed17d..bae4f56 100644
--- a/drivers/infiniband/hw/ipath/ipath_stats.c
+++ b/drivers/infiniband/hw/ipath/ipath_stats.c
@@ -196,6 +196,45 @@
 	}
 }
 
+static void ipath_chk_errormask(struct ipath_devdata *dd)
+{
+	static u32 fixed;
+	u32 ctrl;
+	unsigned long errormask;
+	unsigned long hwerrs;
+
+	if (!dd->ipath_errormask || !(dd->ipath_flags & IPATH_INITTED))
+		return;
+
+	errormask = ipath_read_kreg64(dd, dd->ipath_kregs->kr_errormask);
+
+	if (errormask == dd->ipath_errormask)
+		return;
+	fixed++;
+
+	hwerrs = ipath_read_kreg64(dd, dd->ipath_kregs->kr_hwerrstatus);
+	ctrl = ipath_read_kreg32(dd, dd->ipath_kregs->kr_control);
+
+	ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask,
+		dd->ipath_errormask);
+
+	if ((hwerrs & dd->ipath_hwerrmask) ||
+		(ctrl & INFINIPATH_C_FREEZEMODE)) {
+		/* force re-interrupt of pending events, just in case */
+		ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrclear, 0ULL);
+		ipath_write_kreg(dd, dd->ipath_kregs->kr_errorclear, 0ULL);
+		ipath_write_kreg(dd, dd->ipath_kregs->kr_intclear, 0ULL);
+		dev_info(&dd->pcidev->dev,
+			"errormask fixed(%u) %lx -> %lx, ctrl %x hwerr %lx\n",
+			fixed, errormask, (unsigned long)dd->ipath_errormask,
+			ctrl, hwerrs);
+	} else
+		ipath_dbg("errormask fixed(%u) %lx -> %lx, no freeze\n",
+			fixed, errormask,
+			(unsigned long)dd->ipath_errormask);
+}
+
+
 /**
  * ipath_get_faststats - get word counters from chip before they overflow
  * @opaque - contains a pointer to the infinipath device ipath_devdata
@@ -251,14 +290,13 @@
 		dd->ipath_lasterror = 0;
 	if (dd->ipath_lasthwerror)
 		dd->ipath_lasthwerror = 0;
-	if ((dd->ipath_maskederrs & ~dd->ipath_ignorederrs)
+	if (dd->ipath_maskederrs
 	    && time_after(jiffies, dd->ipath_unmasktime)) {
 		char ebuf[256];
 		int iserr;
 		iserr = ipath_decode_err(ebuf, sizeof ebuf,
-				 (dd->ipath_maskederrs & ~dd->
-				  ipath_ignorederrs));
-		if ((dd->ipath_maskederrs & ~dd->ipath_ignorederrs) &
+			dd->ipath_maskederrs);
+		if (dd->ipath_maskederrs &
 				~(INFINIPATH_E_RRCVEGRFULL | INFINIPATH_E_RRCVHDRFULL |
 				INFINIPATH_E_PKTERRS ))
 			ipath_dev_err(dd, "Re-enabling masked errors "
@@ -278,9 +316,12 @@
 				ipath_cdbg(ERRPKT, "Re-enabling packet"
 						" problem interrupt (%s)\n", ebuf);
 		}
-		dd->ipath_maskederrs = dd->ipath_ignorederrs;
+
+		/* re-enable masked errors */
+		dd->ipath_errormask |= dd->ipath_maskederrs;
 		ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask,
-				 ~dd->ipath_maskederrs);
+			dd->ipath_errormask);
+		dd->ipath_maskederrs = 0;
 	}
 
 	/* limit qfull messages to ~one per minute per port */
@@ -294,6 +335,7 @@
 		}
 	}
 
+	ipath_chk_errormask(dd);
 done:
 	mod_timer(&dd->ipath_stats_timer, jiffies + HZ * 5);
 }