ath9k: rework tx queue selection and fix queue stopping/waking
The current ath9k tx queue handling code showed a few issues that could
lead to locking issues, tx stalls due to stopped queues, and maybe even
DMA issues.
The main source of these issues is that in some places the queue is
selected via skb queue mapping in places where this mapping may no
longer be valid. One such place is when data frames are transmitted via
the CAB queue (for powersave buffered frames). This is made even worse
by a lookup WMM AC values from the assigned tx queue (which is
undefined for the CAB queue).
This messed up the pending frame counting, which in turn caused issues
with queues getting stopped, but not woken again.
To fix these issues, this patch removes an unnecessary abstraction
separating a driver internal queue number from the skb queue number
(not to be confused with the hardware queue number).
It seems that this abstraction may have been necessary because of tx
queue preinitialization from the initvals. This patch avoids breakage
here by pushing the software <-> hardware queue mapping to the function
that assigns the tx queues and redefining the WMM AC definitions to
match the numbers used by mac80211 (also affects ath9k_htc).
To ensure consistency wrt. pending frame count tracking, these counters
are moved to the ath_txq struct, updated with the txq lock held, but
only where the tx queue selected by the skb queue map actually matches
the tx queue used by the driver for the frame.
Signed-off-by: Felix Fietkau <nbd@openwrt.org>
Reported-by: Björn Smedman <bjorn.smedman@venatech.se>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
diff --git a/drivers/net/wireless/ath/ath9k/xmit.c b/drivers/net/wireless/ath/ath9k/xmit.c
index 2bc422e..6380bbd 100644
--- a/drivers/net/wireless/ath/ath9k/xmit.c
+++ b/drivers/net/wireless/ath/ath9k/xmit.c
@@ -124,7 +124,7 @@
static void ath_tx_resume_tid(struct ath_softc *sc, struct ath_atx_tid *tid)
{
- struct ath_txq *txq = &sc->tx.txq[tid->ac->qnum];
+ struct ath_txq *txq = tid->ac->txq;
WARN_ON(!tid->paused);
@@ -142,7 +142,7 @@
static void ath_tx_flush_tid(struct ath_softc *sc, struct ath_atx_tid *tid)
{
- struct ath_txq *txq = &sc->tx.txq[tid->ac->qnum];
+ struct ath_txq *txq = tid->ac->txq;
struct ath_buf *bf;
struct list_head bf_head;
struct ath_tx_status ts;
@@ -817,7 +817,7 @@
{
struct ath_node *an = (struct ath_node *)sta->drv_priv;
struct ath_atx_tid *txtid = ATH_AN_2_TID(an, tid);
- struct ath_txq *txq = &sc->tx.txq[txtid->ac->qnum];
+ struct ath_txq *txq = txtid->ac->txq;
if (txtid->state & AGGR_CLEANUP)
return;
@@ -888,10 +888,16 @@
struct ath_hw *ah = sc->sc_ah;
struct ath_common *common = ath9k_hw_common(ah);
struct ath9k_tx_queue_info qi;
+ static const int subtype_txq_to_hwq[] = {
+ [WME_AC_BE] = ATH_TXQ_AC_BE,
+ [WME_AC_BK] = ATH_TXQ_AC_BK,
+ [WME_AC_VI] = ATH_TXQ_AC_VI,
+ [WME_AC_VO] = ATH_TXQ_AC_VO,
+ };
int qnum, i;
memset(&qi, 0, sizeof(qi));
- qi.tqi_subtype = subtype;
+ qi.tqi_subtype = subtype_txq_to_hwq[subtype];
qi.tqi_aifs = ATH9K_TXQ_USEDEFAULT;
qi.tqi_cwmin = ATH9K_TXQ_USEDEFAULT;
qi.tqi_cwmax = ATH9K_TXQ_USEDEFAULT;
@@ -940,7 +946,6 @@
if (!ATH_TXQ_SETUP(sc, qnum)) {
struct ath_txq *txq = &sc->tx.txq[qnum];
- txq->axq_class = subtype;
txq->axq_qnum = qnum;
txq->axq_link = NULL;
INIT_LIST_HEAD(&txq->axq_q);
@@ -1210,24 +1215,6 @@
}
}
-int ath_tx_setup(struct ath_softc *sc, int haltype)
-{
- struct ath_txq *txq;
-
- if (haltype >= ARRAY_SIZE(sc->tx.hwq_map)) {
- ath_print(ath9k_hw_common(sc->sc_ah), ATH_DBG_FATAL,
- "HAL AC %u out of range, max %zu!\n",
- haltype, ARRAY_SIZE(sc->tx.hwq_map));
- return 0;
- }
- txq = ath_txq_setup(sc, ATH9K_TX_QUEUE_DATA, haltype);
- if (txq != NULL) {
- sc->tx.hwq_map[haltype] = txq->axq_qnum;
- return 1;
- } else
- return 0;
-}
-
/***********/
/* TX, DMA */
/***********/
@@ -1708,6 +1695,7 @@
goto tx_done;
}
+ WARN_ON(tid->ac->txq != txctl->txq);
if (tx_info->flags & IEEE80211_TX_CTL_AMPDU) {
/*
* Try aggregation if it's a unicast data frame
@@ -1747,6 +1735,7 @@
return -1;
}
+ q = skb_get_queue_mapping(skb);
r = ath_tx_setup_buffer(hw, bf, skb, txctl);
if (unlikely(r)) {
ath_print(common, ATH_DBG_FATAL, "TX mem alloc failure\n");
@@ -1756,8 +1745,9 @@
* we will at least have to run TX completionon one buffer
* on the queue */
spin_lock_bh(&txq->axq_lock);
- if (!txq->stopped && txq->axq_depth > 1) {
- ath_mac80211_stop_queue(sc, skb_get_queue_mapping(skb));
+ if (txq == sc->tx.txq_map[q] && !txq->stopped &&
+ txq->axq_depth > 1) {
+ ath_mac80211_stop_queue(sc, q);
txq->stopped = 1;
}
spin_unlock_bh(&txq->axq_lock);
@@ -1767,13 +1757,10 @@
return r;
}
- q = skb_get_queue_mapping(skb);
- if (q >= 4)
- q = 0;
-
spin_lock_bh(&txq->axq_lock);
- if (++sc->tx.pending_frames[q] > ATH_MAX_QDEPTH && !txq->stopped) {
- ath_mac80211_stop_queue(sc, skb_get_queue_mapping(skb));
+ if (txq == sc->tx.txq_map[q] &&
+ ++txq->pending_frames > ATH_MAX_QDEPTH && !txq->stopped) {
+ ath_mac80211_stop_queue(sc, q);
txq->stopped = 1;
}
spin_unlock_bh(&txq->axq_lock);
@@ -1841,7 +1828,8 @@
/*****************/
static void ath_tx_complete(struct ath_softc *sc, struct sk_buff *skb,
- struct ath_wiphy *aphy, int tx_flags)
+ struct ath_wiphy *aphy, int tx_flags,
+ struct ath_txq *txq)
{
struct ieee80211_hw *hw = sc->hw;
struct ieee80211_tx_info *tx_info = IEEE80211_SKB_CB(skb);
@@ -1888,11 +1876,12 @@
ath9k_tx_status(hw, skb);
else {
q = skb_get_queue_mapping(skb);
- if (q >= 4)
- q = 0;
-
- if (--sc->tx.pending_frames[q] < 0)
- sc->tx.pending_frames[q] = 0;
+ if (txq == sc->tx.txq_map[q]) {
+ spin_lock_bh(&txq->axq_lock);
+ if (WARN_ON(--txq->pending_frames < 0))
+ txq->pending_frames = 0;
+ spin_unlock_bh(&txq->axq_lock);
+ }
ieee80211_tx_status(hw, skb);
}
@@ -1927,8 +1916,8 @@
else
complete(&sc->paprd_complete);
} else {
- ath_debug_stat_tx(sc, txq, bf, ts);
- ath_tx_complete(sc, skb, bf->aphy, tx_flags);
+ ath_debug_stat_tx(sc, bf, ts);
+ ath_tx_complete(sc, skb, bf->aphy, tx_flags, txq);
}
/* At this point, skb (bf->bf_mpdu) is consumed...make sure we don't
* accidentally reference it later.
@@ -2018,16 +2007,13 @@
tx_info->status.rates[tx_rateindex].count = ts->ts_longretry + 1;
}
-static void ath_wake_mac80211_queue(struct ath_softc *sc, struct ath_txq *txq)
+static void ath_wake_mac80211_queue(struct ath_softc *sc, int qnum)
{
- int qnum;
+ struct ath_txq *txq;
- qnum = ath_get_mac80211_qnum(txq->axq_class, sc);
- if (qnum == -1)
- return;
-
+ txq = sc->tx.txq_map[qnum];
spin_lock_bh(&txq->axq_lock);
- if (txq->stopped && sc->tx.pending_frames[qnum] < ATH_MAX_QDEPTH) {
+ if (txq->stopped && txq->pending_frames < ATH_MAX_QDEPTH) {
if (ath_mac80211_start_queue(sc, qnum))
txq->stopped = 0;
}
@@ -2044,6 +2030,7 @@
struct ath_tx_status ts;
int txok;
int status;
+ int qnum;
ath_print(common, ATH_DBG_QUEUE, "tx queue %d (%x), link %p\n",
txq->axq_qnum, ath9k_hw_gettxbuf(sc->sc_ah, txq->axq_qnum),
@@ -2119,12 +2106,15 @@
ath_tx_rc_status(bf, &ts, txok ? 0 : 1, txok, true);
}
+ qnum = skb_get_queue_mapping(bf->bf_mpdu);
+
if (bf_isampdu(bf))
ath_tx_complete_aggr(sc, txq, bf, &bf_head, &ts, txok);
else
ath_tx_complete_buf(sc, bf, txq, &bf_head, &ts, txok, 0);
- ath_wake_mac80211_queue(sc, txq);
+ if (txq == sc->tx.txq_map[qnum])
+ ath_wake_mac80211_queue(sc, qnum);
spin_lock_bh(&txq->axq_lock);
if (sc->sc_flags & SC_OP_TXAGGR)
@@ -2194,6 +2184,7 @@
struct list_head bf_head;
int status;
int txok;
+ int qnum;
for (;;) {
status = ath9k_hw_txprocdesc(ah, NULL, (void *)&txs);
@@ -2237,13 +2228,16 @@
ath_tx_rc_status(bf, &txs, txok ? 0 : 1, txok, true);
}
+ qnum = skb_get_queue_mapping(bf->bf_mpdu);
+
if (bf_isampdu(bf))
ath_tx_complete_aggr(sc, txq, bf, &bf_head, &txs, txok);
else
ath_tx_complete_buf(sc, bf, txq, &bf_head,
&txs, txok, 0);
- ath_wake_mac80211_queue(sc, txq);
+ if (txq == sc->tx.txq_map[qnum])
+ ath_wake_mac80211_queue(sc, qnum);
spin_lock_bh(&txq->axq_lock);
if (!list_empty(&txq->txq_fifo_pending)) {
@@ -2375,7 +2369,7 @@
for (acno = 0, ac = &an->ac[acno];
acno < WME_NUM_AC; acno++, ac++) {
ac->sched = false;
- ac->qnum = sc->tx.hwq_map[acno];
+ ac->txq = sc->tx.txq_map[acno];
INIT_LIST_HEAD(&ac->tid_q);
}
}
@@ -2385,17 +2379,13 @@
struct ath_atx_ac *ac;
struct ath_atx_tid *tid;
struct ath_txq *txq;
- int i, tidno;
+ int tidno;
for (tidno = 0, tid = &an->tid[tidno];
tidno < WME_NUM_TID; tidno++, tid++) {
- i = tid->ac->qnum;
- if (!ATH_TXQ_SETUP(sc, i))
- continue;
-
- txq = &sc->tx.txq[i];
ac = tid->ac;
+ txq = ac->txq;
spin_lock_bh(&txq->axq_lock);