i40e: Add a SW workaround for lost interrupts

This patch adds a workaround for cases where we might have
interrupts that got lost but WB happened.
If that happens without this patch we will see a tx_timeout.
To work around it, this patch goes ahead and reschedules NAPI
in that situation, if NAPI is not already scheduled.
We also add a counter in ethtool to keep track of when
we detect a case of tx_lost_interrupt.

Note: napi_reschedule() can be safely called from process/service_task
context and is done in other drivers as well without an issue.

Change-ID: I00f98f1ce3774524d9421227652bef20fcbd0d20
Signed-off-by: Anjali Singhai Jain <anjali.singhai@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index 161603b..aa1c7ca 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -819,6 +819,7 @@
 	struct i40e_eth_stats *oes;
 	struct i40e_eth_stats *es;     /* device's eth stats */
 	u32 tx_restart, tx_busy;
+	u64 tx_lost_interrupt;
 	struct i40e_ring *p;
 	u32 rx_page, rx_buf;
 	u64 bytes, packets;
@@ -844,6 +845,7 @@
 	rx_b = rx_p = 0;
 	tx_b = tx_p = 0;
 	tx_restart = tx_busy = tx_linearize = tx_force_wb = 0;
+	tx_lost_interrupt = 0;
 	rx_page = 0;
 	rx_buf = 0;
 	rcu_read_lock();
@@ -862,6 +864,7 @@
 		tx_busy += p->tx_stats.tx_busy;
 		tx_linearize += p->tx_stats.tx_linearize;
 		tx_force_wb += p->tx_stats.tx_force_wb;
+		tx_lost_interrupt += p->tx_stats.tx_lost_interrupt;
 
 		/* Rx queue is part of the same block as Tx queue */
 		p = &p[1];
@@ -880,6 +883,7 @@
 	vsi->tx_busy = tx_busy;
 	vsi->tx_linearize = tx_linearize;
 	vsi->tx_force_wb = tx_force_wb;
+	vsi->tx_lost_interrupt = tx_lost_interrupt;
 	vsi->rx_page_failed = rx_page;
 	vsi->rx_buf_failed = rx_buf;
 
@@ -4349,7 +4353,7 @@
 {
 	struct i40e_ring *tx_ring = NULL;
 	struct i40e_pf	*pf;
-	u32 head, val, tx_pending;
+	u32 head, val, tx_pending_hw;
 	int i;
 
 	pf = vsi->back;
@@ -4375,16 +4379,9 @@
 	else
 		val = rd32(&pf->hw, I40E_PFINT_DYN_CTL0);
 
-	/* Bail out if interrupts are disabled because napi_poll
-	 * execution in-progress or will get scheduled soon.
-	 * napi_poll cleans TX and RX queues and updates 'next_to_clean'.
-	 */
-	if (!(val & I40E_PFINT_DYN_CTLN_INTENA_MASK))
-		return;
-
 	head = i40e_get_head(tx_ring);
 
-	tx_pending = i40e_get_tx_pending(tx_ring);
+	tx_pending_hw = i40e_get_tx_pending(tx_ring, false);
 
 	/* HW is done executing descriptors, updated HEAD write back,
 	 * but SW hasn't processed those descriptors. If interrupt is
@@ -4392,12 +4389,12 @@
 	 * dev_watchdog detecting timeout on those netdev_queue,
 	 * hence proactively trigger SW interrupt.
 	 */
-	if (tx_pending) {
+	if (tx_pending_hw && (!(val & I40E_PFINT_DYN_CTLN_INTENA_MASK))) {
 		/* NAPI Poll didn't run and clear since it was set */
 		if (test_and_clear_bit(I40E_Q_VECTOR_HUNG_DETECT,
 				       &tx_ring->q_vector->hung_detected)) {
-			netdev_info(vsi->netdev, "VSI_seid %d, Hung TX queue %d, tx_pending: %d, NTC:0x%x, HWB: 0x%x, NTU: 0x%x, TAIL: 0x%x\n",
-				    vsi->seid, q_idx, tx_pending,
+			netdev_info(vsi->netdev, "VSI_seid %d, Hung TX queue %d, tx_pending_hw: %d, NTC:0x%x, HWB: 0x%x, NTU: 0x%x, TAIL: 0x%x\n",
+				    vsi->seid, q_idx, tx_pending_hw,
 				    tx_ring->next_to_clean, head,
 				    tx_ring->next_to_use,
 				    readl(tx_ring->tail));
@@ -4410,6 +4407,17 @@
 				&tx_ring->q_vector->hung_detected);
 		}
 	}
+
+	/* This is the case where we have interrupts missing,
+	 * so the tx_pending in HW will most likely be 0, but we
+	 * will have tx_pending in SW since the WB happened but the
+	 * interrupt got lost.
+	 */
+	if ((!tx_pending_hw) && i40e_get_tx_pending(tx_ring, true) &&
+	    (!(val & I40E_PFINT_DYN_CTLN_INTENA_MASK))) {
+		if (napi_reschedule(&tx_ring->q_vector->napi))
+			tx_ring->tx_stats.tx_lost_interrupt++;
+	}
 }
 
 /**