ath9k_hw: optimize tx status descriptor processing
Disassembly shows, that at least on MIPS, the compiler generates a lot of
memory accesses to the same location in the descriptor field parsing.
Since it is operating on uncached memory, this can be quite expensive in
this hot path.
Change the code a bit to help the compiler optimize it properly, and get
rid of some unused fields in the ath_tx_status struct.
Signed-off-by: Felix Fietkau <nbd@openwrt.org>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
diff --git a/drivers/net/wireless/ath/ath9k/ar9002_mac.c b/drivers/net/wireless/ath/ath9k/ar9002_mac.c
index f5ed73d..3b4c52c 100644
--- a/drivers/net/wireless/ath/ath9k/ar9002_mac.c
+++ b/drivers/net/wireless/ath/ath9k/ar9002_mac.c
@@ -208,77 +208,68 @@
struct ath_tx_status *ts)
{
struct ar5416_desc *ads = AR5416DESC(ds);
+ u32 status;
- if ((ads->ds_txstatus9 & AR_TxDone) == 0)
+ status = ACCESS_ONCE(ads->ds_txstatus9);
+ if ((status & AR_TxDone) == 0)
return -EINPROGRESS;
- ts->ts_seqnum = MS(ads->ds_txstatus9, AR_SeqNum);
ts->ts_tstamp = ads->AR_SendTimestamp;
ts->ts_status = 0;
ts->ts_flags = 0;
- if (ads->ds_txstatus1 & AR_FrmXmitOK)
- ts->ts_status |= ATH9K_TX_ACKED;
- if (ads->ds_txstatus1 & AR_ExcessiveRetries)
- ts->ts_status |= ATH9K_TXERR_XRETRY;
- if (ads->ds_txstatus1 & AR_Filtered)
- ts->ts_status |= ATH9K_TXERR_FILT;
- if (ads->ds_txstatus1 & AR_FIFOUnderrun) {
- ts->ts_status |= ATH9K_TXERR_FIFO;
- ath9k_hw_updatetxtriglevel(ah, true);
- }
- if (ads->ds_txstatus9 & AR_TxOpExceeded)
+ if (status & AR_TxOpExceeded)
ts->ts_status |= ATH9K_TXERR_XTXOP;
- if (ads->ds_txstatus1 & AR_TxTimerExpired)
- ts->ts_status |= ATH9K_TXERR_TIMER_EXPIRED;
+ ts->tid = MS(status, AR_TxTid);
+ ts->ts_rateindex = MS(status, AR_FinalTxIdx);
+ ts->ts_seqnum = MS(status, AR_SeqNum);
- if (ads->ds_txstatus1 & AR_DescCfgErr)
- ts->ts_flags |= ATH9K_TX_DESC_CFG_ERR;
- if (ads->ds_txstatus1 & AR_TxDataUnderrun) {
- ts->ts_flags |= ATH9K_TX_DATA_UNDERRUN;
- ath9k_hw_updatetxtriglevel(ah, true);
- }
- if (ads->ds_txstatus1 & AR_TxDelimUnderrun) {
- ts->ts_flags |= ATH9K_TX_DELIM_UNDERRUN;
- ath9k_hw_updatetxtriglevel(ah, true);
- }
- if (ads->ds_txstatus0 & AR_TxBaStatus) {
+ status = ACCESS_ONCE(ads->ds_txstatus0);
+ ts->ts_rssi_ctl0 = MS(status, AR_TxRSSIAnt00);
+ ts->ts_rssi_ctl1 = MS(status, AR_TxRSSIAnt01);
+ ts->ts_rssi_ctl2 = MS(status, AR_TxRSSIAnt02);
+ if (status & AR_TxBaStatus) {
ts->ts_flags |= ATH9K_TX_BA;
ts->ba_low = ads->AR_BaBitmapLow;
ts->ba_high = ads->AR_BaBitmapHigh;
}
- ts->ts_rateindex = MS(ads->ds_txstatus9, AR_FinalTxIdx);
- switch (ts->ts_rateindex) {
- case 0:
- ts->ts_ratecode = MS(ads->ds_ctl3, AR_XmitRate0);
- break;
- case 1:
- ts->ts_ratecode = MS(ads->ds_ctl3, AR_XmitRate1);
- break;
- case 2:
- ts->ts_ratecode = MS(ads->ds_ctl3, AR_XmitRate2);
- break;
- case 3:
- ts->ts_ratecode = MS(ads->ds_ctl3, AR_XmitRate3);
- break;
+ status = ACCESS_ONCE(ads->ds_txstatus1);
+ if (status & AR_FrmXmitOK)
+ ts->ts_status |= ATH9K_TX_ACKED;
+ if (status & AR_ExcessiveRetries)
+ ts->ts_status |= ATH9K_TXERR_XRETRY;
+ if (status & AR_Filtered)
+ ts->ts_status |= ATH9K_TXERR_FILT;
+ if (status & AR_FIFOUnderrun) {
+ ts->ts_status |= ATH9K_TXERR_FIFO;
+ ath9k_hw_updatetxtriglevel(ah, true);
}
+ if (status & AR_TxTimerExpired)
+ ts->ts_status |= ATH9K_TXERR_TIMER_EXPIRED;
+ if (status & AR_DescCfgErr)
+ ts->ts_flags |= ATH9K_TX_DESC_CFG_ERR;
+ if (status & AR_TxDataUnderrun) {
+ ts->ts_flags |= ATH9K_TX_DATA_UNDERRUN;
+ ath9k_hw_updatetxtriglevel(ah, true);
+ }
+ if (status & AR_TxDelimUnderrun) {
+ ts->ts_flags |= ATH9K_TX_DELIM_UNDERRUN;
+ ath9k_hw_updatetxtriglevel(ah, true);
+ }
+ ts->ts_shortretry = MS(status, AR_RTSFailCnt);
+ ts->ts_longretry = MS(status, AR_DataFailCnt);
+ ts->ts_virtcol = MS(status, AR_VirtRetryCnt);
- ts->ts_rssi = MS(ads->ds_txstatus5, AR_TxRSSICombined);
- ts->ts_rssi_ctl0 = MS(ads->ds_txstatus0, AR_TxRSSIAnt00);
- ts->ts_rssi_ctl1 = MS(ads->ds_txstatus0, AR_TxRSSIAnt01);
- ts->ts_rssi_ctl2 = MS(ads->ds_txstatus0, AR_TxRSSIAnt02);
- ts->ts_rssi_ext0 = MS(ads->ds_txstatus5, AR_TxRSSIAnt10);
- ts->ts_rssi_ext1 = MS(ads->ds_txstatus5, AR_TxRSSIAnt11);
- ts->ts_rssi_ext2 = MS(ads->ds_txstatus5, AR_TxRSSIAnt12);
+ status = ACCESS_ONCE(ads->ds_txstatus5);
+ ts->ts_rssi = MS(status, AR_TxRSSICombined);
+ ts->ts_rssi_ext0 = MS(status, AR_TxRSSIAnt10);
+ ts->ts_rssi_ext1 = MS(status, AR_TxRSSIAnt11);
+ ts->ts_rssi_ext2 = MS(status, AR_TxRSSIAnt12);
+
ts->evm0 = ads->AR_TxEVM0;
ts->evm1 = ads->AR_TxEVM1;
ts->evm2 = ads->AR_TxEVM2;
- ts->ts_shortretry = MS(ads->ds_txstatus1, AR_RTSFailCnt);
- ts->ts_longretry = MS(ads->ds_txstatus1, AR_DataFailCnt);
- ts->ts_virtcol = MS(ads->ds_txstatus1, AR_VirtRetryCnt);
- ts->tid = MS(ads->ds_txstatus9, AR_TxTid);
- ts->ts_antenna = 0;
return 0;
}
diff --git a/drivers/net/wireless/ath/ath9k/ar9003_mac.c b/drivers/net/wireless/ath/ath9k/ar9003_mac.c
index 3b424ca..10c812e 100644
--- a/drivers/net/wireless/ath/ath9k/ar9003_mac.c
+++ b/drivers/net/wireless/ath/ath9k/ar9003_mac.c
@@ -237,10 +237,12 @@
struct ath_tx_status *ts)
{
struct ar9003_txs *ads;
+ u32 status;
ads = &ah->ts_ring[ah->ts_tail];
- if ((ads->status8 & AR_TxDone) == 0)
+ status = ACCESS_ONCE(ads->status8);
+ if ((status & AR_TxDone) == 0)
return -EINPROGRESS;
ah->ts_tail = (ah->ts_tail + 1) % ah->ts_size;
@@ -253,57 +255,58 @@
return -EIO;
}
+ if (status & AR_TxOpExceeded)
+ ts->ts_status |= ATH9K_TXERR_XTXOP;
+ ts->ts_rateindex = MS(status, AR_FinalTxIdx);
+ ts->ts_seqnum = MS(status, AR_SeqNum);
+ ts->tid = MS(status, AR_TxTid);
+
ts->qid = MS(ads->ds_info, AR_TxQcuNum);
ts->desc_id = MS(ads->status1, AR_TxDescId);
- ts->ts_seqnum = MS(ads->status8, AR_SeqNum);
ts->ts_tstamp = ads->status4;
ts->ts_status = 0;
ts->ts_flags = 0;
- if (ads->status3 & AR_ExcessiveRetries)
- ts->ts_status |= ATH9K_TXERR_XRETRY;
- if (ads->status3 & AR_Filtered)
- ts->ts_status |= ATH9K_TXERR_FILT;
- if (ads->status3 & AR_FIFOUnderrun) {
- ts->ts_status |= ATH9K_TXERR_FIFO;
- ath9k_hw_updatetxtriglevel(ah, true);
- }
- if (ads->status8 & AR_TxOpExceeded)
- ts->ts_status |= ATH9K_TXERR_XTXOP;
- if (ads->status3 & AR_TxTimerExpired)
- ts->ts_status |= ATH9K_TXERR_TIMER_EXPIRED;
-
- if (ads->status3 & AR_DescCfgErr)
- ts->ts_flags |= ATH9K_TX_DESC_CFG_ERR;
- if (ads->status3 & AR_TxDataUnderrun) {
- ts->ts_flags |= ATH9K_TX_DATA_UNDERRUN;
- ath9k_hw_updatetxtriglevel(ah, true);
- }
- if (ads->status3 & AR_TxDelimUnderrun) {
- ts->ts_flags |= ATH9K_TX_DELIM_UNDERRUN;
- ath9k_hw_updatetxtriglevel(ah, true);
- }
- if (ads->status2 & AR_TxBaStatus) {
+ status = ACCESS_ONCE(ads->status2);
+ ts->ts_rssi_ctl0 = MS(status, AR_TxRSSIAnt00);
+ ts->ts_rssi_ctl1 = MS(status, AR_TxRSSIAnt01);
+ ts->ts_rssi_ctl2 = MS(status, AR_TxRSSIAnt02);
+ if (status & AR_TxBaStatus) {
ts->ts_flags |= ATH9K_TX_BA;
ts->ba_low = ads->status5;
ts->ba_high = ads->status6;
}
- ts->ts_rateindex = MS(ads->status8, AR_FinalTxIdx);
+ status = ACCESS_ONCE(ads->status3);
+ if (status & AR_ExcessiveRetries)
+ ts->ts_status |= ATH9K_TXERR_XRETRY;
+ if (status & AR_Filtered)
+ ts->ts_status |= ATH9K_TXERR_FILT;
+ if (status & AR_FIFOUnderrun) {
+ ts->ts_status |= ATH9K_TXERR_FIFO;
+ ath9k_hw_updatetxtriglevel(ah, true);
+ }
+ if (status & AR_TxTimerExpired)
+ ts->ts_status |= ATH9K_TXERR_TIMER_EXPIRED;
+ if (status & AR_DescCfgErr)
+ ts->ts_flags |= ATH9K_TX_DESC_CFG_ERR;
+ if (status & AR_TxDataUnderrun) {
+ ts->ts_flags |= ATH9K_TX_DATA_UNDERRUN;
+ ath9k_hw_updatetxtriglevel(ah, true);
+ }
+ if (status & AR_TxDelimUnderrun) {
+ ts->ts_flags |= ATH9K_TX_DELIM_UNDERRUN;
+ ath9k_hw_updatetxtriglevel(ah, true);
+ }
+ ts->ts_shortretry = MS(status, AR_RTSFailCnt);
+ ts->ts_longretry = MS(status, AR_DataFailCnt);
+ ts->ts_virtcol = MS(status, AR_VirtRetryCnt);
- ts->ts_rssi = MS(ads->status7, AR_TxRSSICombined);
- ts->ts_rssi_ctl0 = MS(ads->status2, AR_TxRSSIAnt00);
- ts->ts_rssi_ctl1 = MS(ads->status2, AR_TxRSSIAnt01);
- ts->ts_rssi_ctl2 = MS(ads->status2, AR_TxRSSIAnt02);
- ts->ts_rssi_ext0 = MS(ads->status7, AR_TxRSSIAnt10);
- ts->ts_rssi_ext1 = MS(ads->status7, AR_TxRSSIAnt11);
- ts->ts_rssi_ext2 = MS(ads->status7, AR_TxRSSIAnt12);
- ts->ts_shortretry = MS(ads->status3, AR_RTSFailCnt);
- ts->ts_longretry = MS(ads->status3, AR_DataFailCnt);
- ts->ts_virtcol = MS(ads->status3, AR_VirtRetryCnt);
- ts->ts_antenna = 0;
-
- ts->tid = MS(ads->status8, AR_TxTid);
+ status = ACCESS_ONCE(ads->status7);
+ ts->ts_rssi = MS(status, AR_TxRSSICombined);
+ ts->ts_rssi_ext0 = MS(status, AR_TxRSSIAnt10);
+ ts->ts_rssi_ext1 = MS(status, AR_TxRSSIAnt11);
+ ts->ts_rssi_ext2 = MS(status, AR_TxRSSIAnt12);
memset(ads, 0, sizeof(*ads));
diff --git a/drivers/net/wireless/ath/ath9k/mac.h b/drivers/net/wireless/ath/ath9k/mac.h
index 538c676..fdc2507 100644
--- a/drivers/net/wireless/ath/ath9k/mac.h
+++ b/drivers/net/wireless/ath/ath9k/mac.h
@@ -104,13 +104,11 @@
u32 ts_tstamp;
u16 ts_seqnum;
u8 ts_status;
- u8 ts_ratecode;
u8 ts_rateindex;
int8_t ts_rssi;
u8 ts_shortretry;
u8 ts_longretry;
u8 ts_virtcol;
- u8 ts_antenna;
u8 ts_flags;
int8_t ts_rssi_ctl0;
int8_t ts_rssi_ctl1;
@@ -121,7 +119,6 @@
u8 qid;
u16 desc_id;
u8 tid;
- u8 pad[2];
u32 ba_low;
u32 ba_high;
u32 evm0;