staging/rdma/hfi1: Thread the receive interrupt.
When under heavy load, the receive interrupt handler can run too long with IRQs
disabled. Add a mixed-mode threading scheme. Initially process packets in the
handler for quick responses (latency). If there are too many packets to
process move to a thread to continue (bandwidth).
Reviewed-by: Mike Marciniszyn <mike.marciniszyn@intel.com>
Signed-off-by: Dean Luick <dean.luick@intel.com>
Signed-off-by: Ira Weiny <ira.weiny@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
diff --git a/drivers/staging/rdma/hfi1/chip.c b/drivers/staging/rdma/hfi1/chip.c
index e47420f..e489819 100644
--- a/drivers/staging/rdma/hfi1/chip.c
+++ b/drivers/staging/rdma/hfi1/chip.c
@@ -4424,7 +4424,7 @@
rcd = dd->rcd[source];
if (rcd) {
if (source < dd->first_user_ctxt)
- rcd->do_interrupt(rcd);
+ rcd->do_interrupt(rcd, 0);
else
handle_user_interrupt(rcd);
return; /* OK */
@@ -4590,23 +4590,106 @@
}
/*
- * NOTE: this routine expects to be on its own MSI-X interrupt. If
- * multiple receive contexts share the same MSI-X interrupt, then this
- * routine must check for who received it.
+ * Clear the receive interrupt, forcing the write and making sure
+ * we have data from the chip, pushing everything in front of it
+ * back to the host.
+ */
+static inline void clear_recv_intr(struct hfi1_ctxtdata *rcd)
+{
+ struct hfi1_devdata *dd = rcd->dd;
+ u32 addr = CCE_INT_CLEAR + (8 * rcd->ireg);
+
+ mmiowb(); /* make sure everything before is written */
+ write_csr(dd, addr, rcd->imask);
+ /* force the above write on the chip and get a value back */
+ (void)read_csr(dd, addr);
+}
+
+/* force the receive interrupt */
+static inline void force_recv_intr(struct hfi1_ctxtdata *rcd)
+{
+ write_csr(rcd->dd, CCE_INT_FORCE + (8 * rcd->ireg), rcd->imask);
+}
+
+/* return non-zero if a packet is present */
+static inline int check_packet_present(struct hfi1_ctxtdata *rcd)
+{
+ if (!HFI1_CAP_IS_KSET(DMA_RTAIL))
+ return (rcd->seq_cnt ==
+ rhf_rcv_seq(rhf_to_cpu(get_rhf_addr(rcd))));
+
+ /* else is RDMA rtail */
+ return (rcd->head != get_rcvhdrtail(rcd));
+}
+
+/*
+ * Receive packet IRQ handler. This routine expects to be on its own IRQ.
+ * This routine will try to handle packets immediately (latency), but if
+ * it finds too many, it will invoke the thread handler (bandwitdh). The
+ * chip receive interupt is *not* cleared down until this or the thread (if
+ * invoked) is finished. The intent is to avoid extra interrupts while we
+ * are processing packets anyway.
*/
static irqreturn_t receive_context_interrupt(int irq, void *data)
{
struct hfi1_ctxtdata *rcd = data;
struct hfi1_devdata *dd = rcd->dd;
+ int disposition;
+ int present;
trace_hfi1_receive_interrupt(dd, rcd->ctxt);
this_cpu_inc(*dd->int_counter);
- /* clear the interrupt */
- write_csr(rcd->dd, CCE_INT_CLEAR + (8*rcd->ireg), rcd->imask);
+ /* receive interrupt remains blocked while processing packets */
+ disposition = rcd->do_interrupt(rcd, 0);
- /* handle the interrupt */
- rcd->do_interrupt(rcd);
+ /*
+ * Too many packets were seen while processing packets in this
+ * IRQ handler. Invoke the handler thread. The receive interrupt
+ * remains blocked.
+ */
+ if (disposition == RCV_PKT_LIMIT)
+ return IRQ_WAKE_THREAD;
+
+ /*
+ * The packet processor detected no more packets. Clear the receive
+ * interrupt and recheck for a packet packet that may have arrived
+ * after the previous check and interrupt clear. If a packet arrived,
+ * force another interrupt.
+ */
+ clear_recv_intr(rcd);
+ present = check_packet_present(rcd);
+ if (present)
+ force_recv_intr(rcd);
+
+ return IRQ_HANDLED;
+}
+
+/*
+ * Receive packet thread handler. This expects to be invoked with the
+ * receive interrupt still blocked.
+ */
+static irqreturn_t receive_context_thread(int irq, void *data)
+{
+ struct hfi1_ctxtdata *rcd = data;
+ int present;
+
+ /* receive interrupt is still blocked from the IRQ handler */
+ (void)rcd->do_interrupt(rcd, 1);
+
+ /*
+ * The packet processor will only return if it detected no more
+ * packets. Hold IRQs here so we can safely clear the interrupt and
+ * recheck for a packet that may have arrived after the previous
+ * check and the interrupt clear. If a packet arrived, force another
+ * interrupt.
+ */
+ local_irq_disable();
+ clear_recv_intr(rcd);
+ present = check_packet_present(rcd);
+ if (present)
+ force_recv_intr(rcd);
+ local_irq_enable();
return IRQ_HANDLED;
}
@@ -8858,6 +8941,7 @@
struct hfi1_msix_entry *me = &dd->msix_entries[i];
const char *err_info;
irq_handler_t handler;
+ irq_handler_t thread = NULL;
void *arg;
int idx;
struct hfi1_ctxtdata *rcd = NULL;
@@ -8894,6 +8978,7 @@
rcd->imask = ((u64)1) <<
((IS_RCVAVAIL_START+idx) % 64);
handler = receive_context_interrupt;
+ thread = receive_context_thread;
arg = rcd;
snprintf(me->name, sizeof(me->name),
DRIVER_NAME"_%d kctxt%d", dd->unit, idx);
@@ -8912,7 +8997,8 @@
/* make sure the name is terminated */
me->name[sizeof(me->name)-1] = 0;
- ret = request_irq(me->msix.vector, handler, 0, me->name, arg);
+ ret = request_threaded_irq(me->msix.vector, handler, thread, 0,
+ me->name, arg);
if (ret) {
dd_dev_err(dd,
"unable to allocate %s interrupt, vector %d, index %d, err %d\n",
diff --git a/drivers/staging/rdma/hfi1/driver.c b/drivers/staging/rdma/hfi1/driver.c
index ee4a01f..ce69141 100644
--- a/drivers/staging/rdma/hfi1/driver.c
+++ b/drivers/staging/rdma/hfi1/driver.c
@@ -427,8 +427,7 @@
packet->rcd = rcd;
packet->updegr = 0;
packet->etail = -1;
- packet->rhf_addr = (__le32 *) rcd->rcvhdrq + rcd->head +
- rcd->dd->rhf_offset;
+ packet->rhf_addr = get_rhf_addr(rcd);
packet->rhf = rhf_to_cpu(packet->rhf_addr);
packet->rhqoff = rcd->head;
packet->numpkt = 0;
@@ -619,10 +618,7 @@
}
#endif /* CONFIG_PRESCAN_RXQ */
-#define RCV_PKT_OK 0x0
-#define RCV_PKT_MAX 0x1
-
-static inline int process_rcv_packet(struct hfi1_packet *packet)
+static inline int process_rcv_packet(struct hfi1_packet *packet, int thread)
{
int ret = RCV_PKT_OK;
@@ -664,9 +660,13 @@
if (packet->rhqoff >= packet->maxcnt)
packet->rhqoff = 0;
- if (packet->numpkt == MAX_PKT_RECV) {
- ret = RCV_PKT_MAX;
- this_cpu_inc(*packet->rcd->dd->rcv_limit);
+ if (unlikely((packet->numpkt & (MAX_PKT_RECV - 1)) == 0)) {
+ if (thread) {
+ cond_resched();
+ } else {
+ ret = RCV_PKT_LIMIT;
+ this_cpu_inc(*packet->rcd->dd->rcv_limit);
+ }
}
packet->rhf_addr = (__le32 *) packet->rcd->rcvhdrq + packet->rhqoff +
@@ -743,57 +743,63 @@
/*
* Handle receive interrupts when using the no dma rtail option.
*/
-void handle_receive_interrupt_nodma_rtail(struct hfi1_ctxtdata *rcd)
+int handle_receive_interrupt_nodma_rtail(struct hfi1_ctxtdata *rcd, int thread)
{
u32 seq;
- int last = 0;
+ int last = RCV_PKT_OK;
struct hfi1_packet packet;
init_packet(rcd, &packet);
seq = rhf_rcv_seq(packet.rhf);
- if (seq != rcd->seq_cnt)
+ if (seq != rcd->seq_cnt) {
+ last = RCV_PKT_DONE;
goto bail;
+ }
prescan_rxq(&packet);
- while (!last) {
- last = process_rcv_packet(&packet);
+ while (last == RCV_PKT_OK) {
+ last = process_rcv_packet(&packet, thread);
seq = rhf_rcv_seq(packet.rhf);
if (++rcd->seq_cnt > 13)
rcd->seq_cnt = 1;
if (seq != rcd->seq_cnt)
- last = 1;
+ last = RCV_PKT_DONE;
process_rcv_update(last, &packet);
}
process_rcv_qp_work(&packet);
bail:
finish_packet(&packet);
+ return last;
}
-void handle_receive_interrupt_dma_rtail(struct hfi1_ctxtdata *rcd)
+int handle_receive_interrupt_dma_rtail(struct hfi1_ctxtdata *rcd, int thread)
{
u32 hdrqtail;
- int last = 0;
+ int last = RCV_PKT_OK;
struct hfi1_packet packet;
init_packet(rcd, &packet);
hdrqtail = get_rcvhdrtail(rcd);
- if (packet.rhqoff == hdrqtail)
+ if (packet.rhqoff == hdrqtail) {
+ last = RCV_PKT_DONE;
goto bail;
+ }
smp_rmb(); /* prevent speculative reads of dma'ed hdrq */
prescan_rxq(&packet);
- while (!last) {
- last = process_rcv_packet(&packet);
+ while (last == RCV_PKT_OK) {
+ last = process_rcv_packet(&packet, thread);
+ hdrqtail = get_rcvhdrtail(rcd);
if (packet.rhqoff == hdrqtail)
- last = 1;
+ last = RCV_PKT_DONE;
process_rcv_update(last, &packet);
}
process_rcv_qp_work(&packet);
bail:
finish_packet(&packet);
-
+ return last;
}
static inline void set_all_nodma_rtail(struct hfi1_devdata *dd)
@@ -821,12 +827,11 @@
* Called from interrupt handler for errors or receive interrupt.
* This is the slow path interrupt handler.
*/
-void handle_receive_interrupt(struct hfi1_ctxtdata *rcd)
+int handle_receive_interrupt(struct hfi1_ctxtdata *rcd, int thread)
{
-
struct hfi1_devdata *dd = rcd->dd;
u32 hdrqtail;
- int last = 0, needset = 1;
+ int last = RCV_PKT_OK, needset = 1;
struct hfi1_packet packet;
init_packet(rcd, &packet);
@@ -834,19 +839,23 @@
if (!HFI1_CAP_IS_KSET(DMA_RTAIL)) {
u32 seq = rhf_rcv_seq(packet.rhf);
- if (seq != rcd->seq_cnt)
+ if (seq != rcd->seq_cnt) {
+ last = RCV_PKT_DONE;
goto bail;
+ }
hdrqtail = 0;
} else {
hdrqtail = get_rcvhdrtail(rcd);
- if (packet.rhqoff == hdrqtail)
+ if (packet.rhqoff == hdrqtail) {
+ last = RCV_PKT_DONE;
goto bail;
+ }
smp_rmb(); /* prevent speculative reads of dma'ed hdrq */
}
prescan_rxq(&packet);
- while (!last) {
+ while (last == RCV_PKT_OK) {
if (unlikely(dd->do_drop && atomic_xchg(&dd->drop_packet,
DROP_PACKET_OFF) == DROP_PACKET_ON)) {
@@ -860,7 +869,7 @@
packet.rhf = rhf_to_cpu(packet.rhf_addr);
} else {
- last = process_rcv_packet(&packet);
+ last = process_rcv_packet(&packet, thread);
}
if (!HFI1_CAP_IS_KSET(DMA_RTAIL)) {
@@ -869,7 +878,7 @@
if (++rcd->seq_cnt > 13)
rcd->seq_cnt = 1;
if (seq != rcd->seq_cnt)
- last = 1;
+ last = RCV_PKT_DONE;
if (needset) {
dd_dev_info(dd,
"Switching to NO_DMA_RTAIL\n");
@@ -878,7 +887,7 @@
}
} else {
if (packet.rhqoff == hdrqtail)
- last = 1;
+ last = RCV_PKT_DONE;
if (needset) {
dd_dev_info(dd,
"Switching to DMA_RTAIL\n");
@@ -898,6 +907,7 @@
* if no packets were processed.
*/
finish_packet(&packet);
+ return last;
}
/*
diff --git a/drivers/staging/rdma/hfi1/hfi.h b/drivers/staging/rdma/hfi1/hfi.h
index a35213e..190f7a2 100644
--- a/drivers/staging/rdma/hfi1/hfi.h
+++ b/drivers/staging/rdma/hfi1/hfi.h
@@ -313,7 +313,7 @@
* be valid. Worst case is we process an extra interrupt and up to 64
* packets with the wrong interrupt handler.
*/
- void (*do_interrupt)(struct hfi1_ctxtdata *rcd);
+ int (*do_interrupt)(struct hfi1_ctxtdata *rcd, int threaded);
};
/*
@@ -1130,9 +1130,21 @@
struct hfi1_devdata *, u8, u8);
void hfi1_free_ctxtdata(struct hfi1_devdata *, struct hfi1_ctxtdata *);
-void handle_receive_interrupt(struct hfi1_ctxtdata *);
-void handle_receive_interrupt_nodma_rtail(struct hfi1_ctxtdata *rcd);
-void handle_receive_interrupt_dma_rtail(struct hfi1_ctxtdata *rcd);
+int handle_receive_interrupt(struct hfi1_ctxtdata *, int);
+int handle_receive_interrupt_nodma_rtail(struct hfi1_ctxtdata *, int);
+int handle_receive_interrupt_dma_rtail(struct hfi1_ctxtdata *, int);
+
+/* receive packet handler dispositions */
+#define RCV_PKT_OK 0x0 /* keep going */
+#define RCV_PKT_LIMIT 0x1 /* stop, hit limit, start thread */
+#define RCV_PKT_DONE 0x2 /* stop, no more packets detected */
+
+/* calculate the current RHF address */
+static inline __le32 *get_rhf_addr(struct hfi1_ctxtdata *rcd)
+{
+ return (__le32 *)rcd->rcvhdrq + rcd->head + rcd->dd->rhf_offset;
+}
+
int hfi1_reset_device(int);
/* return the driver's idea of the logical OPA port state */
diff --git a/drivers/staging/rdma/hfi1/sdma.c b/drivers/staging/rdma/hfi1/sdma.c
index f372b6d..2a1da21 100644
--- a/drivers/staging/rdma/hfi1/sdma.c
+++ b/drivers/staging/rdma/hfi1/sdma.c
@@ -2096,9 +2096,9 @@
tx->sn = sde->tail_sn++;
trace_hfi1_sdma_in_sn(sde, tx->sn);
#endif
- spin_lock_irqsave(&sde->flushlist_lock, flags);
+ spin_lock(&sde->flushlist_lock);
list_add_tail(&tx->list, &sde->flushlist);
- spin_unlock_irqrestore(&sde->flushlist_lock, flags);
+ spin_unlock(&sde->flushlist_lock);
if (wait) {
wait->tx_count++;
wait->count += tx->num_desc;