ath10k: Improve performance by reducing tx_lock contention

During tx completion, tx_lock is held for longer than required, preventing
efficient refill of htt->pending_tx. Refactor the code so that only MSDU
related operations are protected by the lock.

Improves downstream performance on a dual-core ARM Freescale LS1024A
(f.k.a. Mindspeed Comcerto 2000) AP with a 3x3 client from 495 to 580 Mbps.
Other CPU bound multicore systems may also benefit.

Signed-off-by: Denton Gentry <dgentry@google.com>
Signed-off-by: Avery Pennarun <apenwarr@google.com>
[mfaltesek@google.com: removed conflicting code for tracking msdu_ids.]
Signed-off-by: Marty Faltesek <mfaltesek@google.com>
Signed-off-by: Kalle Valo <kvalo@qca.qualcomm.com>
diff --git a/drivers/net/wireless/ath/ath10k/htt_rx.c b/drivers/net/wireless/ath/ath10k/htt_rx.c
index 61a2626..5e54c39 100644
--- a/drivers/net/wireless/ath/ath10k/htt_rx.c
+++ b/drivers/net/wireless/ath/ath10k/htt_rx.c
@@ -1631,8 +1631,6 @@
 	__le16 msdu_id;
 	int i;
 
-	lockdep_assert_held(&htt->tx_lock);
-
 	switch (status) {
 	case HTT_DATA_TX_STATUS_NO_ACK:
 		tx_done.no_ack = true;
@@ -1998,15 +1996,11 @@
 			break;
 		}
 
-		spin_lock_bh(&htt->tx_lock);
 		ath10k_txrx_tx_unref(htt, &tx_done);
-		spin_unlock_bh(&htt->tx_lock);
 		break;
 	}
 	case HTT_T2H_MSG_TYPE_TX_COMPL_IND:
-		spin_lock_bh(&htt->tx_lock);
-		__skb_queue_tail(&htt->tx_compl_q, skb);
-		spin_unlock_bh(&htt->tx_lock);
+		skb_queue_tail(&htt->tx_compl_q, skb);
 		tasklet_schedule(&htt->txrx_compl_task);
 		return;
 	case HTT_T2H_MSG_TYPE_SEC_IND: {
@@ -2095,12 +2089,10 @@
 	struct htt_resp *resp;
 	struct sk_buff *skb;
 
-	spin_lock_bh(&htt->tx_lock);
-	while ((skb = __skb_dequeue(&htt->tx_compl_q))) {
+	while ((skb = skb_dequeue(&htt->tx_compl_q))) {
 		ath10k_htt_rx_frm_tx_compl(htt->ar, skb);
 		dev_kfree_skb_any(skb);
 	}
-	spin_unlock_bh(&htt->tx_lock);
 
 	spin_lock_bh(&htt->rx_ring.lock);
 	while ((skb = __skb_dequeue(&htt->rx_compl_q))) {
diff --git a/drivers/net/wireless/ath/ath10k/htt_tx.c b/drivers/net/wireless/ath/ath10k/htt_tx.c
index a97dd9d..2c0627b 100644
--- a/drivers/net/wireless/ath/ath10k/htt_tx.c
+++ b/drivers/net/wireless/ath/ath10k/htt_tx.c
@@ -134,9 +134,7 @@
 	tx_done.discard = 1;
 	tx_done.msdu_id = msdu_id;
 
-	spin_lock_bh(&htt->tx_lock);
 	ath10k_txrx_tx_unref(htt, &tx_done);
-	spin_unlock_bh(&htt->tx_lock);
 
 	return 0;
 }
@@ -429,12 +427,11 @@
 
 	spin_lock_bh(&htt->tx_lock);
 	res = ath10k_htt_tx_alloc_msdu_id(htt, msdu);
+	spin_unlock_bh(&htt->tx_lock);
 	if (res < 0) {
-		spin_unlock_bh(&htt->tx_lock);
 		goto err_tx_dec;
 	}
 	msdu_id = res;
-	spin_unlock_bh(&htt->tx_lock);
 
 	txdesc = ath10k_htc_alloc_skb(ar, len);
 	if (!txdesc) {
@@ -506,12 +503,11 @@
 
 	spin_lock_bh(&htt->tx_lock);
 	res = ath10k_htt_tx_alloc_msdu_id(htt, msdu);
+	spin_unlock_bh(&htt->tx_lock);
 	if (res < 0) {
-		spin_unlock_bh(&htt->tx_lock);
 		goto err_tx_dec;
 	}
 	msdu_id = res;
-	spin_unlock_bh(&htt->tx_lock);
 
 	prefetch_len = min(htt->prefetch_len, msdu->len);
 	prefetch_len = roundup(prefetch_len, 4);
diff --git a/drivers/net/wireless/ath/ath10k/txrx.c b/drivers/net/wireless/ath/ath10k/txrx.c
index 6cf2891..e4a9c4c 100644
--- a/drivers/net/wireless/ath/ath10k/txrx.c
+++ b/drivers/net/wireless/ath/ath10k/txrx.c
@@ -53,8 +53,6 @@
 	struct ath10k_skb_cb *skb_cb;
 	struct sk_buff *msdu;
 
-	lockdep_assert_held(&htt->tx_lock);
-
 	ath10k_dbg(ar, ATH10K_DBG_HTT,
 		   "htt tx completion msdu_id %u discard %d no_ack %d success %d\n",
 		   tx_done->msdu_id, !!tx_done->discard,
@@ -66,12 +64,19 @@
 		return;
 	}
 
+	spin_lock_bh(&htt->tx_lock);
 	msdu = idr_find(&htt->pending_tx, tx_done->msdu_id);
 	if (!msdu) {
 		ath10k_warn(ar, "received tx completion for invalid msdu_id: %d\n",
 			    tx_done->msdu_id);
+		spin_unlock_bh(&htt->tx_lock);
 		return;
 	}
+	ath10k_htt_tx_free_msdu_id(htt, tx_done->msdu_id);
+	__ath10k_htt_tx_dec_pending(htt);
+	if (htt->num_pending_tx == 0)
+		wake_up(&htt->empty_tx_wq);
+	spin_unlock_bh(&htt->tx_lock);
 
 	skb_cb = ATH10K_SKB_CB(msdu);
 
@@ -90,7 +95,7 @@
 
 	if (tx_done->discard) {
 		ieee80211_free_txskb(htt->ar->hw, msdu);
-		goto exit;
+		return;
 	}
 
 	if (!(info->flags & IEEE80211_TX_CTL_NO_ACK))
@@ -104,12 +109,6 @@
 
 	ieee80211_tx_status(htt->ar->hw, msdu);
 	/* we do not own the msdu anymore */
-
-exit:
-	ath10k_htt_tx_free_msdu_id(htt, tx_done->msdu_id);
-	__ath10k_htt_tx_dec_pending(htt);
-	if (htt->num_pending_tx == 0)
-		wake_up(&htt->empty_tx_wq);
 }
 
 struct ath10k_peer *ath10k_peer_find(struct ath10k *ar, int vdev_id,