mac80211: fix aggregation for hardware with ampdu queues

Hardware with AMPDU queues currently has broken aggregation.

This patch fixes it by making all A-MPDUs go over the regular AC queues,
but keeping track of the hardware queues in mac80211. As a first rough
version, it actually stops the AC queue for extended periods of time,
which can be removed by adding buffering internal to mac80211, but is
currently not a huge problem because people rarely use multiple TIDs
that are in the same AC (and iwlwifi currently doesn't operate as AP).

This is a short-term fix, my current medium-term plan, which I hope to
execute soon as well, but am not sure can finish before .30, looks like
this:
 1) rework the internal queuing layer in mac80211 that we use for
    fragments if the driver stopped queue in the middle of a fragmented
    frame to be able to queue more frames at once (rather than just a
    single frame with its fragments)
 2) instead of stopping the entire AC queue, queue up the frames in a
    per-station/per-TID queue during aggregation session initiation,
    when the session has come up take all those frames and put them
    onto the queue from 1)
 3) push the ampdu queue layer abstraction this patch introduces in
    mac80211 into the driver, and remove the virtual queue stuff from
    mac80211 again

This plan will probably also affect ath9k in that mac80211 queues the
frames instead of passing them down, even when there are no ampdu queues.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index 73c7d73..92ea177 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -344,15 +344,36 @@
 {
 	struct ieee80211_local *local = hw_to_local(hw);
 
-	/* we don't need to track ampdu queues */
-	if (queue < ieee80211_num_regular_queues(hw)) {
-		__clear_bit(reason, &local->queue_stop_reasons[queue]);
-
-		if (local->queue_stop_reasons[queue] != 0)
-			/* someone still has this queue stopped */
+	if (queue >= hw->queues) {
+		if (local->ampdu_ac_queue[queue - hw->queues] < 0)
 			return;
+
+		/*
+		 * for virtual aggregation queues, we need to refcount the
+		 * internal mac80211 disable (multiple times!), keep track of
+		 * driver disable _and_ make sure the regular queue is
+		 * actually enabled.
+		 */
+		if (reason == IEEE80211_QUEUE_STOP_REASON_AGGREGATION)
+			local->amdpu_ac_stop_refcnt[queue - hw->queues]--;
+		else
+			__clear_bit(reason, &local->queue_stop_reasons[queue]);
+
+		if (local->queue_stop_reasons[queue] ||
+		    local->amdpu_ac_stop_refcnt[queue - hw->queues])
+			return;
+
+		/* now go on to treat the corresponding regular queue */
+		queue = local->ampdu_ac_queue[queue - hw->queues];
+		reason = IEEE80211_QUEUE_STOP_REASON_AGGREGATION;
 	}
 
+	__clear_bit(reason, &local->queue_stop_reasons[queue]);
+
+	if (local->queue_stop_reasons[queue] != 0)
+		/* someone still has this queue stopped */
+		return;
+
 	if (test_bit(queue, local->queues_pending)) {
 		set_bit(queue, local->queues_pending_run);
 		tasklet_schedule(&local->tx_pending_tasklet);
@@ -361,8 +382,8 @@
 	}
 }
 
-static void ieee80211_wake_queue_by_reason(struct ieee80211_hw *hw, int queue,
-					   enum queue_stop_reason reason)
+void ieee80211_wake_queue_by_reason(struct ieee80211_hw *hw, int queue,
+				    enum queue_stop_reason reason)
 {
 	struct ieee80211_local *local = hw_to_local(hw);
 	unsigned long flags;
@@ -384,15 +405,33 @@
 {
 	struct ieee80211_local *local = hw_to_local(hw);
 
-	/* we don't need to track ampdu queues */
-	if (queue < ieee80211_num_regular_queues(hw))
-		__set_bit(reason, &local->queue_stop_reasons[queue]);
+	if (queue >= hw->queues) {
+		if (local->ampdu_ac_queue[queue - hw->queues] < 0)
+			return;
+
+		/*
+		 * for virtual aggregation queues, we need to refcount the
+		 * internal mac80211 disable (multiple times!), keep track of
+		 * driver disable _and_ make sure the regular queue is
+		 * actually enabled.
+		 */
+		if (reason == IEEE80211_QUEUE_STOP_REASON_AGGREGATION)
+			local->amdpu_ac_stop_refcnt[queue - hw->queues]++;
+		else
+			__set_bit(reason, &local->queue_stop_reasons[queue]);
+
+		/* now go on to treat the corresponding regular queue */
+		queue = local->ampdu_ac_queue[queue - hw->queues];
+		reason = IEEE80211_QUEUE_STOP_REASON_AGGREGATION;
+	}
+
+	__set_bit(reason, &local->queue_stop_reasons[queue]);
 
 	netif_stop_subqueue(local->mdev, queue);
 }
 
-static void ieee80211_stop_queue_by_reason(struct ieee80211_hw *hw, int queue,
-					   enum queue_stop_reason reason)
+void ieee80211_stop_queue_by_reason(struct ieee80211_hw *hw, int queue,
+				    enum queue_stop_reason reason)
 {
 	struct ieee80211_local *local = hw_to_local(hw);
 	unsigned long flags;
@@ -418,7 +457,7 @@
 
 	spin_lock_irqsave(&local->queue_stop_reason_lock, flags);
 
-	for (i = 0; i < ieee80211_num_queues(hw); i++)
+	for (i = 0; i < hw->queues; i++)
 		__ieee80211_stop_queue(hw, i, reason);
 
 	spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags);
@@ -434,6 +473,16 @@
 int ieee80211_queue_stopped(struct ieee80211_hw *hw, int queue)
 {
 	struct ieee80211_local *local = hw_to_local(hw);
+	unsigned long flags;
+
+	if (queue >= hw->queues) {
+		spin_lock_irqsave(&local->queue_stop_reason_lock, flags);
+		queue = local->ampdu_ac_queue[queue - hw->queues];
+		spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags);
+		if (queue < 0)
+			return true;
+	}
+
 	return __netif_subqueue_stopped(local->mdev, queue);
 }
 EXPORT_SYMBOL(ieee80211_queue_stopped);