mv643xx_eth: fix TX hang erratum workaround

The previously merged TX hang erratum workaround ("mv643xx_eth:
work around TX hang hardware issue") assumes that TX_END interrupts
are delivered simultaneously with or after their corresponding TX
interrupts, but this is not always true in practise.

In particular, it appears that TX_END interrupts are issued as soon
as descriptor fetch returns an invalid descriptor, which may happen
before earlier descriptors have been fully transmitted and written
back to memory as being done.

This hardware behavior can lead to a situation where the current
driver code mistakenly assumes that the MAC has given up transmitting
before noticing the packets that it is in fact still currently working
on, causing the driver to re-kick the transmit queue, which will only
cause the MAC to re-fetch the invalid head descriptor, and generate
another TX_END interrupt, et cetera, until the packets in the pipe
finally finish transmitting and have their descriptors written back
to memory, which will then finally break the loop.

Fix this by having the erratum workaround not check the 'number of
unfinished descriptor', but instead, to compare the software's idea
of what the head descriptor pointer should be to the hardware's head
descriptor pointer (which is updated on the same conditions as the
TX_END interupt is generated on, i.e. possibly before all previous
descriptors have been transmitted and written back).

Signed-off-by: Lennert Buytenhek <buytenh@marvell.com>
diff --git a/drivers/net/mv643xx_eth.c b/drivers/net/mv643xx_eth.c
index 8a97a00..910920e 100644
--- a/drivers/net/mv643xx_eth.c
+++ b/drivers/net/mv643xx_eth.c
@@ -96,6 +96,7 @@
 #define TX_BW_MTU(p)			(0x0458 + ((p) << 10))
 #define TX_BW_BURST(p)			(0x045c + ((p) << 10))
 #define INT_CAUSE(p)			(0x0460 + ((p) << 10))
+#define  INT_TX_END_0			0x00080000
 #define  INT_TX_END			0x07f80000
 #define  INT_RX				0x0007fbfc
 #define  INT_EXT			0x00000002
@@ -706,6 +707,7 @@
 
 static void txq_submit_skb(struct tx_queue *txq, struct sk_buff *skb)
 {
+	struct mv643xx_eth_private *mp = txq_to_mp(txq);
 	int nr_frags = skb_shinfo(skb)->nr_frags;
 	int tx_index;
 	struct tx_desc *desc;
@@ -759,6 +761,10 @@
 	wmb();
 	desc->cmd_sts = cmd_sts;
 
+	/* clear TX_END interrupt status */
+	wrl(mp, INT_CAUSE(mp->port_num), ~(INT_TX_END_0 << txq->index));
+	rdl(mp, INT_CAUSE(mp->port_num));
+
 	/* ensure all descriptors are written before poking hardware */
 	wmb();
 	txq_enable(txq);
@@ -1684,7 +1690,6 @@
 	struct mv643xx_eth_private *mp = netdev_priv(dev);
 	u32 int_cause;
 	u32 int_cause_ext;
-	u32 txq_active;
 
 	int_cause = rdl(mp, INT_CAUSE(mp->port_num)) &
 			(INT_TX_END | INT_RX | INT_EXT);
@@ -1743,8 +1748,6 @@
 	}
 #endif
 
-	txq_active = rdl(mp, TXQ_COMMAND(mp->port_num));
-
 	/*
 	 * TxBuffer or TxError set for any of the 8 queues?
 	 */
@@ -1754,6 +1757,14 @@
 		for (i = 0; i < 8; i++)
 			if (mp->txq_mask & (1 << i))
 				txq_reclaim(mp->txq + i, 0);
+
+		/*
+		 * Enough space again in the primary TX queue for a
+		 * full packet?
+		 */
+		spin_lock(&mp->lock);
+		__txq_maybe_wake(mp->txq + mp->txq_primary);
+		spin_unlock(&mp->lock);
 	}
 
 	/*
@@ -1763,19 +1774,25 @@
 		int i;
 
 		wrl(mp, INT_CAUSE(mp->port_num), ~(int_cause & INT_TX_END));
+
+		spin_lock(&mp->lock);
 		for (i = 0; i < 8; i++) {
 			struct tx_queue *txq = mp->txq + i;
-			if (txq->tx_desc_count && !((txq_active >> i) & 1))
+			u32 hw_desc_ptr;
+			u32 expected_ptr;
+
+			if ((int_cause & (INT_TX_END_0 << i)) == 0)
+				continue;
+
+			hw_desc_ptr =
+				rdl(mp, TXQ_CURRENT_DESC_PTR(mp->port_num, i));
+			expected_ptr = (u32)txq->tx_desc_dma +
+				txq->tx_curr_desc * sizeof(struct tx_desc);
+
+			if (hw_desc_ptr != expected_ptr)
 				txq_enable(txq);
 		}
-	}
-
-	/*
-	 * Enough space again in the primary TX queue for a full packet?
-	 */
-	if (int_cause_ext & INT_EXT_TX) {
-		struct tx_queue *txq = mp->txq + mp->txq_primary;
-		__txq_maybe_wake(txq);
+		spin_unlock(&mp->lock);
 	}
 
 	return IRQ_HANDLED;