ath9k_hw: optimize tx status descriptor processing
Disassembly shows, that at least on MIPS, the compiler generates a lot of
memory accesses to the same location in the descriptor field parsing.
Since it is operating on uncached memory, this can be quite expensive in
this hot path.
Change the code a bit to help the compiler optimize it properly, and get
rid of some unused fields in the ath_tx_status struct.
Signed-off-by: Felix Fietkau <nbd@openwrt.org>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
diff --git a/drivers/net/wireless/ath/ath9k/ar9002_mac.c b/drivers/net/wireless/ath/ath9k/ar9002_mac.c
index f5ed73d..3b4c52c 100644
--- a/drivers/net/wireless/ath/ath9k/ar9002_mac.c
+++ b/drivers/net/wireless/ath/ath9k/ar9002_mac.c
@@ -208,77 +208,68 @@
struct ath_tx_status *ts)
{
struct ar5416_desc *ads = AR5416DESC(ds);
+ u32 status;
- if ((ads->ds_txstatus9 & AR_TxDone) == 0)
+ status = ACCESS_ONCE(ads->ds_txstatus9);
+ if ((status & AR_TxDone) == 0)
return -EINPROGRESS;
- ts->ts_seqnum = MS(ads->ds_txstatus9, AR_SeqNum);
ts->ts_tstamp = ads->AR_SendTimestamp;
ts->ts_status = 0;
ts->ts_flags = 0;
- if (ads->ds_txstatus1 & AR_FrmXmitOK)
- ts->ts_status |= ATH9K_TX_ACKED;
- if (ads->ds_txstatus1 & AR_ExcessiveRetries)
- ts->ts_status |= ATH9K_TXERR_XRETRY;
- if (ads->ds_txstatus1 & AR_Filtered)
- ts->ts_status |= ATH9K_TXERR_FILT;
- if (ads->ds_txstatus1 & AR_FIFOUnderrun) {
- ts->ts_status |= ATH9K_TXERR_FIFO;
- ath9k_hw_updatetxtriglevel(ah, true);
- }
- if (ads->ds_txstatus9 & AR_TxOpExceeded)
+ if (status & AR_TxOpExceeded)
ts->ts_status |= ATH9K_TXERR_XTXOP;
- if (ads->ds_txstatus1 & AR_TxTimerExpired)
- ts->ts_status |= ATH9K_TXERR_TIMER_EXPIRED;
+ ts->tid = MS(status, AR_TxTid);
+ ts->ts_rateindex = MS(status, AR_FinalTxIdx);
+ ts->ts_seqnum = MS(status, AR_SeqNum);
- if (ads->ds_txstatus1 & AR_DescCfgErr)
- ts->ts_flags |= ATH9K_TX_DESC_CFG_ERR;
- if (ads->ds_txstatus1 & AR_TxDataUnderrun) {
- ts->ts_flags |= ATH9K_TX_DATA_UNDERRUN;
- ath9k_hw_updatetxtriglevel(ah, true);
- }
- if (ads->ds_txstatus1 & AR_TxDelimUnderrun) {
- ts->ts_flags |= ATH9K_TX_DELIM_UNDERRUN;
- ath9k_hw_updatetxtriglevel(ah, true);
- }
- if (ads->ds_txstatus0 & AR_TxBaStatus) {
+ status = ACCESS_ONCE(ads->ds_txstatus0);
+ ts->ts_rssi_ctl0 = MS(status, AR_TxRSSIAnt00);
+ ts->ts_rssi_ctl1 = MS(status, AR_TxRSSIAnt01);
+ ts->ts_rssi_ctl2 = MS(status, AR_TxRSSIAnt02);
+ if (status & AR_TxBaStatus) {
ts->ts_flags |= ATH9K_TX_BA;
ts->ba_low = ads->AR_BaBitmapLow;
ts->ba_high = ads->AR_BaBitmapHigh;
}
- ts->ts_rateindex = MS(ads->ds_txstatus9, AR_FinalTxIdx);
- switch (ts->ts_rateindex) {
- case 0:
- ts->ts_ratecode = MS(ads->ds_ctl3, AR_XmitRate0);
- break;
- case 1:
- ts->ts_ratecode = MS(ads->ds_ctl3, AR_XmitRate1);
- break;
- case 2:
- ts->ts_ratecode = MS(ads->ds_ctl3, AR_XmitRate2);
- break;
- case 3:
- ts->ts_ratecode = MS(ads->ds_ctl3, AR_XmitRate3);
- break;
+ status = ACCESS_ONCE(ads->ds_txstatus1);
+ if (status & AR_FrmXmitOK)
+ ts->ts_status |= ATH9K_TX_ACKED;
+ if (status & AR_ExcessiveRetries)
+ ts->ts_status |= ATH9K_TXERR_XRETRY;
+ if (status & AR_Filtered)
+ ts->ts_status |= ATH9K_TXERR_FILT;
+ if (status & AR_FIFOUnderrun) {
+ ts->ts_status |= ATH9K_TXERR_FIFO;
+ ath9k_hw_updatetxtriglevel(ah, true);
}
+ if (status & AR_TxTimerExpired)
+ ts->ts_status |= ATH9K_TXERR_TIMER_EXPIRED;
+ if (status & AR_DescCfgErr)
+ ts->ts_flags |= ATH9K_TX_DESC_CFG_ERR;
+ if (status & AR_TxDataUnderrun) {
+ ts->ts_flags |= ATH9K_TX_DATA_UNDERRUN;
+ ath9k_hw_updatetxtriglevel(ah, true);
+ }
+ if (status & AR_TxDelimUnderrun) {
+ ts->ts_flags |= ATH9K_TX_DELIM_UNDERRUN;
+ ath9k_hw_updatetxtriglevel(ah, true);
+ }
+ ts->ts_shortretry = MS(status, AR_RTSFailCnt);
+ ts->ts_longretry = MS(status, AR_DataFailCnt);
+ ts->ts_virtcol = MS(status, AR_VirtRetryCnt);
- ts->ts_rssi = MS(ads->ds_txstatus5, AR_TxRSSICombined);
- ts->ts_rssi_ctl0 = MS(ads->ds_txstatus0, AR_TxRSSIAnt00);
- ts->ts_rssi_ctl1 = MS(ads->ds_txstatus0, AR_TxRSSIAnt01);
- ts->ts_rssi_ctl2 = MS(ads->ds_txstatus0, AR_TxRSSIAnt02);
- ts->ts_rssi_ext0 = MS(ads->ds_txstatus5, AR_TxRSSIAnt10);
- ts->ts_rssi_ext1 = MS(ads->ds_txstatus5, AR_TxRSSIAnt11);
- ts->ts_rssi_ext2 = MS(ads->ds_txstatus5, AR_TxRSSIAnt12);
+ status = ACCESS_ONCE(ads->ds_txstatus5);
+ ts->ts_rssi = MS(status, AR_TxRSSICombined);
+ ts->ts_rssi_ext0 = MS(status, AR_TxRSSIAnt10);
+ ts->ts_rssi_ext1 = MS(status, AR_TxRSSIAnt11);
+ ts->ts_rssi_ext2 = MS(status, AR_TxRSSIAnt12);
+
ts->evm0 = ads->AR_TxEVM0;
ts->evm1 = ads->AR_TxEVM1;
ts->evm2 = ads->AR_TxEVM2;
- ts->ts_shortretry = MS(ads->ds_txstatus1, AR_RTSFailCnt);
- ts->ts_longretry = MS(ads->ds_txstatus1, AR_DataFailCnt);
- ts->ts_virtcol = MS(ads->ds_txstatus1, AR_VirtRetryCnt);
- ts->tid = MS(ads->ds_txstatus9, AR_TxTid);
- ts->ts_antenna = 0;
return 0;
}