ath9k_hw: optimize tx status descriptor processing

Disassembly shows, that at least on MIPS, the compiler generates a lot of
memory accesses to the same location in the descriptor field parsing.
Since it is operating on uncached memory, this can be quite expensive in
this hot path.
Change the code a bit to help the compiler optimize it properly, and get
rid of some unused fields in the ath_tx_status struct.

Signed-off-by: Felix Fietkau <nbd@openwrt.org>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
diff --git a/drivers/net/wireless/ath/ath9k/ar9002_mac.c b/drivers/net/wireless/ath/ath9k/ar9002_mac.c
index f5ed73d..3b4c52c 100644
--- a/drivers/net/wireless/ath/ath9k/ar9002_mac.c
+++ b/drivers/net/wireless/ath/ath9k/ar9002_mac.c
@@ -208,77 +208,68 @@
 				 struct ath_tx_status *ts)
 {
 	struct ar5416_desc *ads = AR5416DESC(ds);
+	u32 status;
 
-	if ((ads->ds_txstatus9 & AR_TxDone) == 0)
+	status = ACCESS_ONCE(ads->ds_txstatus9);
+	if ((status & AR_TxDone) == 0)
 		return -EINPROGRESS;
 
-	ts->ts_seqnum = MS(ads->ds_txstatus9, AR_SeqNum);
 	ts->ts_tstamp = ads->AR_SendTimestamp;
 	ts->ts_status = 0;
 	ts->ts_flags = 0;
 
-	if (ads->ds_txstatus1 & AR_FrmXmitOK)
-		ts->ts_status |= ATH9K_TX_ACKED;
-	if (ads->ds_txstatus1 & AR_ExcessiveRetries)
-		ts->ts_status |= ATH9K_TXERR_XRETRY;
-	if (ads->ds_txstatus1 & AR_Filtered)
-		ts->ts_status |= ATH9K_TXERR_FILT;
-	if (ads->ds_txstatus1 & AR_FIFOUnderrun) {
-		ts->ts_status |= ATH9K_TXERR_FIFO;
-		ath9k_hw_updatetxtriglevel(ah, true);
-	}
-	if (ads->ds_txstatus9 & AR_TxOpExceeded)
+	if (status & AR_TxOpExceeded)
 		ts->ts_status |= ATH9K_TXERR_XTXOP;
-	if (ads->ds_txstatus1 & AR_TxTimerExpired)
-		ts->ts_status |= ATH9K_TXERR_TIMER_EXPIRED;
+	ts->tid = MS(status, AR_TxTid);
+	ts->ts_rateindex = MS(status, AR_FinalTxIdx);
+	ts->ts_seqnum = MS(status, AR_SeqNum);
 
-	if (ads->ds_txstatus1 & AR_DescCfgErr)
-		ts->ts_flags |= ATH9K_TX_DESC_CFG_ERR;
-	if (ads->ds_txstatus1 & AR_TxDataUnderrun) {
-		ts->ts_flags |= ATH9K_TX_DATA_UNDERRUN;
-		ath9k_hw_updatetxtriglevel(ah, true);
-	}
-	if (ads->ds_txstatus1 & AR_TxDelimUnderrun) {
-		ts->ts_flags |= ATH9K_TX_DELIM_UNDERRUN;
-		ath9k_hw_updatetxtriglevel(ah, true);
-	}
-	if (ads->ds_txstatus0 & AR_TxBaStatus) {
+	status = ACCESS_ONCE(ads->ds_txstatus0);
+	ts->ts_rssi_ctl0 = MS(status, AR_TxRSSIAnt00);
+	ts->ts_rssi_ctl1 = MS(status, AR_TxRSSIAnt01);
+	ts->ts_rssi_ctl2 = MS(status, AR_TxRSSIAnt02);
+	if (status & AR_TxBaStatus) {
 		ts->ts_flags |= ATH9K_TX_BA;
 		ts->ba_low = ads->AR_BaBitmapLow;
 		ts->ba_high = ads->AR_BaBitmapHigh;
 	}
 
-	ts->ts_rateindex = MS(ads->ds_txstatus9, AR_FinalTxIdx);
-	switch (ts->ts_rateindex) {
-	case 0:
-		ts->ts_ratecode = MS(ads->ds_ctl3, AR_XmitRate0);
-		break;
-	case 1:
-		ts->ts_ratecode = MS(ads->ds_ctl3, AR_XmitRate1);
-		break;
-	case 2:
-		ts->ts_ratecode = MS(ads->ds_ctl3, AR_XmitRate2);
-		break;
-	case 3:
-		ts->ts_ratecode = MS(ads->ds_ctl3, AR_XmitRate3);
-		break;
+	status = ACCESS_ONCE(ads->ds_txstatus1);
+	if (status & AR_FrmXmitOK)
+		ts->ts_status |= ATH9K_TX_ACKED;
+	if (status & AR_ExcessiveRetries)
+		ts->ts_status |= ATH9K_TXERR_XRETRY;
+	if (status & AR_Filtered)
+		ts->ts_status |= ATH9K_TXERR_FILT;
+	if (status & AR_FIFOUnderrun) {
+		ts->ts_status |= ATH9K_TXERR_FIFO;
+		ath9k_hw_updatetxtriglevel(ah, true);
 	}
+	if (status & AR_TxTimerExpired)
+		ts->ts_status |= ATH9K_TXERR_TIMER_EXPIRED;
+	if (status & AR_DescCfgErr)
+		ts->ts_flags |= ATH9K_TX_DESC_CFG_ERR;
+	if (status & AR_TxDataUnderrun) {
+		ts->ts_flags |= ATH9K_TX_DATA_UNDERRUN;
+		ath9k_hw_updatetxtriglevel(ah, true);
+	}
+	if (status & AR_TxDelimUnderrun) {
+		ts->ts_flags |= ATH9K_TX_DELIM_UNDERRUN;
+		ath9k_hw_updatetxtriglevel(ah, true);
+	}
+	ts->ts_shortretry = MS(status, AR_RTSFailCnt);
+	ts->ts_longretry = MS(status, AR_DataFailCnt);
+	ts->ts_virtcol = MS(status, AR_VirtRetryCnt);
 
-	ts->ts_rssi = MS(ads->ds_txstatus5, AR_TxRSSICombined);
-	ts->ts_rssi_ctl0 = MS(ads->ds_txstatus0, AR_TxRSSIAnt00);
-	ts->ts_rssi_ctl1 = MS(ads->ds_txstatus0, AR_TxRSSIAnt01);
-	ts->ts_rssi_ctl2 = MS(ads->ds_txstatus0, AR_TxRSSIAnt02);
-	ts->ts_rssi_ext0 = MS(ads->ds_txstatus5, AR_TxRSSIAnt10);
-	ts->ts_rssi_ext1 = MS(ads->ds_txstatus5, AR_TxRSSIAnt11);
-	ts->ts_rssi_ext2 = MS(ads->ds_txstatus5, AR_TxRSSIAnt12);
+	status = ACCESS_ONCE(ads->ds_txstatus5);
+	ts->ts_rssi = MS(status, AR_TxRSSICombined);
+	ts->ts_rssi_ext0 = MS(status, AR_TxRSSIAnt10);
+	ts->ts_rssi_ext1 = MS(status, AR_TxRSSIAnt11);
+	ts->ts_rssi_ext2 = MS(status, AR_TxRSSIAnt12);
+
 	ts->evm0 = ads->AR_TxEVM0;
 	ts->evm1 = ads->AR_TxEVM1;
 	ts->evm2 = ads->AR_TxEVM2;
-	ts->ts_shortretry = MS(ads->ds_txstatus1, AR_RTSFailCnt);
-	ts->ts_longretry = MS(ads->ds_txstatus1, AR_DataFailCnt);
-	ts->ts_virtcol = MS(ads->ds_txstatus1, AR_VirtRetryCnt);
-	ts->tid = MS(ads->ds_txstatus9, AR_TxTid);
-	ts->ts_antenna = 0;
 
 	return 0;
 }