[PATCH] IB/ipath: reduce overhead on receive interrupts

Also count the number of interrupts where that works (fastrcvint).  On any
interrupt where the port0 head and tail registers are not equal, just call the
ipath_kreceive code without reading the interrupt status, thus saving the
approximately 0.25usec processor stall waiting for the read to return.  If any
other interrupt bits are set, or head==tail, take the normal path, but that
has been reordered to handle read ahead of pioavail.  Also no longer call
ipath_kreceive() from ipath_qcheck(), because that just seems to make things
worse, and isn't really buying us anything, these days.

Also no longer loop in ipath_kreceive(); better to not hold things off too
long (I saw many cases where we would loop 4-8 times, and handle thousands (up
to 3500) in a single call).

Signed-off-by: Dave Olson <dave.olson@qlogic.com>
Signed-off-by: Bryan O'Sullivan <bryan.osullivan@qlogic.com>
Cc: "Michael S. Tsirkin" <mst@mellanox.co.il>
Cc: Roland Dreier <rolandd@cisco.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
diff --git a/drivers/infiniband/hw/ipath/ipath_intr.c b/drivers/infiniband/hw/ipath/ipath_intr.c
index 8a4d732..e8c8a25 100644
--- a/drivers/infiniband/hw/ipath/ipath_intr.c
+++ b/drivers/infiniband/hw/ipath/ipath_intr.c
@@ -539,10 +539,10 @@
 				continue;
 			if (hd == (tl + 1) ||
 			    (!hd && tl == dd->ipath_hdrqlast)) {
-				dd->ipath_lastrcvhdrqtails[i] = tl;
-				pd->port_hdrqfull++;
 				if (i == 0)
 					chkerrpkts = 1;
+				dd->ipath_lastrcvhdrqtails[i] = tl;
+				pd->port_hdrqfull++;
 			}
 		}
 	}
@@ -724,7 +724,12 @@
 			 dd->ipath_sendctrl);
 }
 
-static void handle_rcv(struct ipath_devdata *dd, u32 istat)
+/*
+ * Handle receive interrupts for user ports; this means a user
+ * process was waiting for a packet to arrive, and didn't want
+ * to poll
+ */
+static void handle_urcv(struct ipath_devdata *dd, u32 istat)
 {
 	u64 portr;
 	int i;
@@ -734,22 +739,17 @@
 		 infinipath_i_rcvavail_mask)
 		| ((istat >> INFINIPATH_I_RCVURG_SHIFT) &
 		   infinipath_i_rcvurg_mask);
-	for (i = 0; i < dd->ipath_cfgports; i++) {
+	for (i = 1; i < dd->ipath_cfgports; i++) {
 		struct ipath_portdata *pd = dd->ipath_pd[i];
-		if (portr & (1 << i) && pd &&
-		    pd->port_cnt) {
-			if (i == 0)
-				ipath_kreceive(dd);
-			else if (test_bit(IPATH_PORT_WAITING_RCV,
-					  &pd->port_flag)) {
-				int rcbit;
-				clear_bit(IPATH_PORT_WAITING_RCV,
-					  &pd->port_flag);
-				rcbit = i + INFINIPATH_R_INTRAVAIL_SHIFT;
-				clear_bit(1UL << rcbit, &dd->ipath_rcvctrl);
-				wake_up_interruptible(&pd->port_wait);
-				rcvdint = 1;
-			}
+		if (portr & (1 << i) && pd && pd->port_cnt &&
+			test_bit(IPATH_PORT_WAITING_RCV, &pd->port_flag)) {
+			int rcbit;
+			clear_bit(IPATH_PORT_WAITING_RCV,
+				  &pd->port_flag);
+			rcbit = i + INFINIPATH_R_INTRAVAIL_SHIFT;
+			clear_bit(1UL << rcbit, &dd->ipath_rcvctrl);
+			wake_up_interruptible(&pd->port_wait);
+			rcvdint = 1;
 		}
 	}
 	if (rcvdint) {
@@ -767,14 +767,17 @@
 	struct ipath_devdata *dd = data;
 	u32 istat;
 	ipath_err_t estat = 0;
-	static unsigned unexpected = 0;
 	irqreturn_t ret;
+	u32 p0bits;
+	static unsigned unexpected = 0;
+	static const u32 port0rbits = (1U<<INFINIPATH_I_RCVAVAIL_SHIFT) |
+		 (1U<<INFINIPATH_I_RCVURG_SHIFT);
 
-	if(!(dd->ipath_flags & IPATH_PRESENT)) {
-		/* this is mostly so we don't try to touch the chip while
-		 * it is being reset */
+	ipath_stats.sps_ints++;
+
+	if (!(dd->ipath_flags & IPATH_PRESENT)) {
 		/*
-		 * This return value is perhaps odd, but we do not want the
+		 * This return value is not great, but we do not want the
 		 * interrupt core code to remove our interrupt handler
 		 * because we don't appear to be handling an interrupt
 		 * during a chip reset.
@@ -782,6 +785,50 @@
 		return IRQ_HANDLED;
 	}
 
+	/*
+	 * this needs to be flags&initted, not statusp, so we keep
+	 * taking interrupts even after link goes down, etc.
+	 * Also, we *must* clear the interrupt at some point, or we won't
+	 * take it again, which can be real bad for errors, etc...
+	 */
+
+	if (!(dd->ipath_flags & IPATH_INITTED)) {
+		ipath_bad_intr(dd, &unexpected);
+		ret = IRQ_NONE;
+		goto bail;
+	}
+
+	/*
+	 * We try to avoid reading the interrupt status register, since
+	 * that's a PIO read, and stalls the processor for up to about
+	 * ~0.25 usec. The idea is that if we processed a port0 packet,
+	 * we blindly clear the  port 0 receive interrupt bits, and nothing
+	 * else, then return.  If other interrupts are pending, the chip
+	 * will re-interrupt us as soon as we write the intclear register.
+	 * We then won't process any more kernel packets (if not the 2nd
+	 * time, then the 3rd or 4th) and we'll then handle the other
+	 * interrupts.   We clear the interrupts first so that we don't
+	 * lose intr for later packets that arrive while we are processing.
+	 */
+	if (dd->ipath_port0head !=
+		(u32)le64_to_cpu(*dd->ipath_hdrqtailptr)) {
+		u32 oldhead = dd->ipath_port0head;
+		if (dd->ipath_flags & IPATH_GPIO_INTR) {
+			ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_clear,
+					 (u64) (1 << 2));
+			p0bits = port0rbits | INFINIPATH_I_GPIO;
+		}
+		else
+			p0bits = port0rbits;
+		ipath_write_kreg(dd, dd->ipath_kregs->kr_intclear, p0bits);
+		ipath_kreceive(dd);
+		if (oldhead != dd->ipath_port0head) {
+			ipath_stats.sps_fastrcvint++;
+			goto done;
+		}
+		istat = ipath_read_kreg32(dd, dd->ipath_kregs->kr_intstatus);
+	}
+
 	istat = ipath_read_kreg32(dd, dd->ipath_kregs->kr_intstatus);
 	if (unlikely(!istat)) {
 		ipath_stats.sps_nullintr++;
@@ -795,31 +842,17 @@
 		goto bail;
 	}
 
-	ipath_stats.sps_ints++;
-
-	/*
-	 * this needs to be flags&initted, not statusp, so we keep
-	 * taking interrupts even after link goes down, etc.
-	 * Also, we *must* clear the interrupt at some point, or we won't
-	 * take it again, which can be real bad for errors, etc...
-	 */
-
-	if (!(dd->ipath_flags & IPATH_INITTED)) {
-		ipath_bad_intr(dd, &unexpected);
-		ret = IRQ_NONE;
-		goto bail;
-	}
 	if (unexpected)
 		unexpected = 0;
 
-	ipath_cdbg(VERBOSE, "intr stat=0x%x\n", istat);
-
-	if (istat & ~infinipath_i_bitsextant)
+	if (unlikely(istat & ~infinipath_i_bitsextant))
 		ipath_dev_err(dd,
 			      "interrupt with unknown interrupts %x set\n",
 			      istat & (u32) ~ infinipath_i_bitsextant);
+	else
+		ipath_cdbg(VERBOSE, "intr stat=0x%x\n", istat);
 
-	if (istat & INFINIPATH_I_ERROR) {
+	if (unlikely(istat & INFINIPATH_I_ERROR)) {
 		ipath_stats.sps_errints++;
 		estat = ipath_read_kreg64(dd,
 					  dd->ipath_kregs->kr_errorstatus);
@@ -837,7 +870,14 @@
 			handle_errors(dd, estat);
 	}
 
+	p0bits = port0rbits;
 	if (istat & INFINIPATH_I_GPIO) {
+		/*
+		 * Packets are available in the port 0 rcv queue.
+		 * Eventually this needs to be generalized to check
+		 * IPATH_GPIO_INTR, and the specific GPIO bit, if
+		 * GPIO interrupts are used for anything else.
+		 */
 		if (unlikely(!(dd->ipath_flags & IPATH_GPIO_INTR))) {
 			u32 gpiostatus;
 			gpiostatus = ipath_read_kreg32(
@@ -851,14 +891,7 @@
 			/* Clear GPIO status bit 2 */
 			ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_clear,
 					 (u64) (1 << 2));
-
-			/*
-			 * Packets are available in the port 0 rcv queue.
-			 * Eventually this needs to be generalized to check
-			 * IPATH_GPIO_INTR, and the specific GPIO bit, if
-			 * GPIO interrupts are used for anything else.
-			 */
-			ipath_kreceive(dd);
+			p0bits |= INFINIPATH_I_GPIO;
 		}
 	}
 
@@ -871,6 +904,25 @@
 	 */
 	ipath_write_kreg(dd, dd->ipath_kregs->kr_intclear, istat);
 
+	/*
+	 * we check for both transition from empty to non-empty, and urgent
+	 * packets (those with the interrupt bit set in the header), and
+	 * if enabled, the GPIO bit 2 interrupt used for port0 on some
+	 * HT-400 boards.
+	 * Do this before checking for pio buffers available, since
+	 * receives can overflow; piobuf waiters can afford a few
+	 * extra cycles, since they were waiting anyway.
+	 */
+	if (istat & p0bits) {
+		ipath_kreceive(dd);
+		istat &= ~port0rbits;
+	}
+	if (istat & ((infinipath_i_rcvavail_mask <<
+		      INFINIPATH_I_RCVAVAIL_SHIFT)
+		     | (infinipath_i_rcvurg_mask <<
+			INFINIPATH_I_RCVURG_SHIFT)))
+		handle_urcv(dd, istat);
+
 	if (istat & INFINIPATH_I_SPIOBUFAVAIL) {
 		clear_bit(IPATH_S_PIOINTBUFAVAIL, &dd->ipath_sendctrl);
 		ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
@@ -882,17 +934,7 @@
 		handle_layer_pioavail(dd);
 	}
 
-	/*
-	 * we check for both transition from empty to non-empty, and urgent
-	 * packets (those with the interrupt bit set in the header)
-	 */
-
-	if (istat & ((infinipath_i_rcvavail_mask <<
-		      INFINIPATH_I_RCVAVAIL_SHIFT)
-		     | (infinipath_i_rcvurg_mask <<
-			INFINIPATH_I_RCVURG_SHIFT)))
-		handle_rcv(dd, istat);
-
+done:
 	ret = IRQ_HANDLED;
 
 bail: