ath9k: fix retry count for A-MPDU rate control status reports

The 'bf_retries' field of the ath_buf structure was used for both
software retries (AMPDU subframes) and hardware retries (legacy
frames). This led to a wrong retry count being reported for the A-MPDU
rate control stats.
This patch changes the code to no longer use bf_retries for reporting
retry counts, but instead always using the real on-chip retry count
from the ath_tx_status.
Additionally, if the first subframe of an A-MPDU was not acked, the tx
status report is submitted along with the first acked subframe, which
may not contain the correct rates in the tx info.
This is easily corrected by saving the tx rate info before looping over
subframes, and then copying it back once the A-MPDU status report is
submitted.
In my tests this change improves throughput visibly.

Signed-off-by: Felix Fietkau <nbd@openwrt.org>
Reported-by: Björn Smedman <bjorn.smedman@venatech.se>
Cc: stable@kernel.org
Signed-off-by: John W. Linville <linville@tuxdriver.com>
diff --git a/drivers/net/wireless/ath/ath9k/xmit.c b/drivers/net/wireless/ath/ath9k/xmit.c
index c41185b..c3681a1 100644
--- a/drivers/net/wireless/ath/ath9k/xmit.c
+++ b/drivers/net/wireless/ath/ath9k/xmit.c
@@ -328,6 +328,7 @@
 	u32 ba[WME_BA_BMP_SIZE >> 5];
 	int isaggr, txfail, txpending, sendbar = 0, needreset = 0, nbad = 0;
 	bool rc_update = true;
+	struct ieee80211_tx_rate rates[4];
 
 	skb = bf->bf_mpdu;
 	hdr = (struct ieee80211_hdr *)skb->data;
@@ -335,6 +336,8 @@
 	tx_info = IEEE80211_SKB_CB(skb);
 	hw = bf->aphy->hw;
 
+	memcpy(rates, tx_info->control.rates, sizeof(rates));
+
 	rcu_read_lock();
 
 	/* XXX: use ieee80211_find_sta! */
@@ -375,6 +378,9 @@
 		txfail = txpending = 0;
 		bf_next = bf->bf_next;
 
+		skb = bf->bf_mpdu;
+		tx_info = IEEE80211_SKB_CB(skb);
+
 		if (ATH_BA_ISSET(ba, ATH_BA_INDEX(seq_st, bf->bf_seqno))) {
 			/* transmit completion, subframe is
 			 * acked by block ack */
@@ -428,6 +434,7 @@
 			spin_unlock_bh(&txq->axq_lock);
 
 			if (rc_update && (acked_cnt == 1 || txfail_cnt == 1)) {
+				memcpy(tx_info->control.rates, rates, sizeof(rates));
 				ath_tx_rc_status(bf, ts, nbad, txok, true);
 				rc_update = false;
 			} else {
@@ -2033,7 +2040,7 @@
 		tx_info->status.rates[i].idx = -1;
 	}
 
-	tx_info->status.rates[tx_rateindex].count = bf->bf_retries + 1;
+	tx_info->status.rates[tx_rateindex].count = ts->ts_longretry + 1;
 }
 
 static void ath_wake_mac80211_queue(struct ath_softc *sc, struct ath_txq *txq)
@@ -2144,7 +2151,6 @@
 			 * This frame is sent out as a single frame.
 			 * Use hardware retry status for this frame.
 			 */
-			bf->bf_retries = ts.ts_longretry;
 			if (ts.ts_status & ATH9K_TXERR_XRETRY)
 				bf->bf_state.bf_type |= BUF_XRETRY;
 			ath_tx_rc_status(bf, &ts, 0, txok, true);
@@ -2274,7 +2280,6 @@
 		}
 
 		if (!bf_isampdu(bf)) {
-			bf->bf_retries = txs.ts_longretry;
 			if (txs.ts_status & ATH9K_TXERR_XRETRY)
 				bf->bf_state.bf_type |= BUF_XRETRY;
 			ath_tx_rc_status(bf, &txs, 0, txok, true);