netdev: Move queue_lock into struct netdev_queue. The lock is now an attribute of the device queue. One thing to notice is that "suspicious" places emerge which will need specific training about multiple queue handling. They are so marked with explicit "netdev->rx_queue" and "netdev->tx_queue" references. Signed-off-by: David S. Miller <davem@davemloft.net>

commit: dc2b48475a0a36f8b3bbb2da60d3a006dc5c2c84 [log] [tgz]
author: David S. Miller <davem@davemloft.net> Tue Jul 08 17:18:23 2008 -0700
committer: David S. Miller <davem@davemloft.net> Tue Jul 08 17:18:23 2008 -0700
tree: b2421a338840bd1c675f4f91de7c7cf03863fb78
parent: 5ce2d488fe039ddd86a638496cf704df86c74eeb [diff]
diff --git a/drivers/net/ifb.c b/drivers/net/ifb.c
index af233b5..bc3de27 100644
--- a/drivers/net/ifb.c
+++ b/drivers/net/ifb.c

@@ -229,12 +229,12 @@
 MODULE_PARM_DESC(numifbs, "Number of ifb devices");
 
 /*
- * dev_ifb->queue_lock is usually taken after dev->ingress_lock,
+ * dev_ifb->tx_queue.lock is usually taken after dev->ingress_lock,
  * reversely to e.g. qdisc_lock_tree(). It should be safe until
- * ifb doesn't take dev->queue_lock with dev_ifb->ingress_lock.
+ * ifb doesn't take dev->tx_queue.lock with dev_ifb->ingress_lock.
  * But lockdep should know that ifb has different locks from dev.
  */
-static struct lock_class_key ifb_queue_lock_key;
+static struct lock_class_key ifb_tx_queue_lock_key;
 static struct lock_class_key ifb_ingress_lock_key;
 
 
@@ -258,7 +258,7 @@
 	if (err < 0)
 		goto err;
 
-	lockdep_set_class(&dev_ifb->queue_lock, &ifb_queue_lock_key);
+	lockdep_set_class(&dev_ifb->tx_queue.lock, &ifb_tx_queue_lock_key);
 	lockdep_set_class(&dev_ifb->ingress_lock, &ifb_ingress_lock_key);
 
 	return 0;

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 515fd25..e835aca 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h

@@ -449,6 +449,7 @@
 #endif
 
 struct netdev_queue {
+	spinlock_t		lock;
 	struct net_device	*dev;
 };
 
@@ -629,7 +630,7 @@
 	unsigned char		broadcast[MAX_ADDR_LEN];	/* hw bcast add	*/
 
 	struct netdev_queue	rx_queue;
-	struct netdev_queue	tx_queue;
+	struct netdev_queue	tx_queue ____cacheline_aligned_in_smp;
 
 	/* ingress path synchronizer */
 	spinlock_t		ingress_lock;
@@ -639,7 +640,6 @@
  * Cache line mostly used on queue transmit path (qdisc)
  */
 	/* device queue lock */
-	spinlock_t		queue_lock ____cacheline_aligned_in_smp;
 	struct Qdisc		*qdisc;
 	struct Qdisc		*qdisc_sleeping;
 	struct list_head	qdisc_list;

diff --git a/net/core/dev.c b/net/core/dev.c
index 9b281c9..0501104 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c

@@ -1667,6 +1667,7 @@
 int dev_queue_xmit(struct sk_buff *skb)
 {
 	struct net_device *dev = skb->dev;
+	struct netdev_queue *txq;
 	struct Qdisc *q;
 	int rc = -ENOMEM;
 
@@ -1699,14 +1700,15 @@
 	}
 
 gso:
-	spin_lock_prefetch(&dev->queue_lock);
+	txq = &dev->tx_queue;
+	spin_lock_prefetch(&txq->lock);
 
 	/* Disable soft irqs for various locks below. Also
 	 * stops preemption for RCU.
 	 */
 	rcu_read_lock_bh();
 
-	/* Updates of qdisc are serialized by queue_lock.
+	/* Updates of qdisc are serialized by queue->lock.
 	 * The struct Qdisc which is pointed to by qdisc is now a
 	 * rcu structure - it may be accessed without acquiring
 	 * a lock (but the structure may be stale.) The freeing of the
@@ -1714,7 +1716,7 @@
 	 * more references to it.
 	 *
 	 * If the qdisc has an enqueue function, we still need to
-	 * hold the queue_lock before calling it, since queue_lock
+	 * hold the queue->lock before calling it, since queue->lock
 	 * also serializes access to the device queue.
 	 */
 
@@ -1724,19 +1726,19 @@
 #endif
 	if (q->enqueue) {
 		/* Grab device queue */
-		spin_lock(&dev->queue_lock);
+		spin_lock(&txq->lock);
 		q = dev->qdisc;
 		if (q->enqueue) {
 			/* reset queue_mapping to zero */
 			skb_set_queue_mapping(skb, 0);
 			rc = q->enqueue(skb, q);
 			qdisc_run(dev);
-			spin_unlock(&dev->queue_lock);
+			spin_unlock(&txq->lock);
 
 			rc = rc == NET_XMIT_BYPASS ? NET_XMIT_SUCCESS : rc;
 			goto out;
 		}
-		spin_unlock(&dev->queue_lock);
+		spin_unlock(&txq->lock);
 	}
 
 	/* The device has no queue. Common case for software devices:
@@ -1919,14 +1921,17 @@
 
 		while (head) {
 			struct net_device *dev = head;
+			struct netdev_queue *txq;
 			head = head->next_sched;
 
+			txq = &dev->tx_queue;
+
 			smp_mb__before_clear_bit();
 			clear_bit(__LINK_STATE_SCHED, &dev->state);
 
-			if (spin_trylock(&dev->queue_lock)) {
+			if (spin_trylock(&txq->lock)) {
 				qdisc_run(dev);
-				spin_unlock(&dev->queue_lock);
+				spin_unlock(&txq->lock);
 			} else {
 				netif_schedule(dev);
 			}
@@ -3787,7 +3792,6 @@
 	BUG_ON(!dev_net(dev));
 	net = dev_net(dev);
 
-	spin_lock_init(&dev->queue_lock);
 	spin_lock_init(&dev->_xmit_lock);
 	netdev_set_lockdep_class(&dev->_xmit_lock, dev->type);
 	dev->xmit_lock_owner = -1;
@@ -4072,10 +4076,17 @@
 	return &dev->stats;
 }
 
+static void netdev_init_one_queue(struct net_device *dev,
+				  struct netdev_queue *queue)
+{
+	spin_lock_init(&queue->lock);
+	queue->dev = dev;
+}
+
 static void netdev_init_queues(struct net_device *dev)
 {
-	dev->rx_queue.dev = dev;
-	dev->tx_queue.dev = dev;
+	netdev_init_one_queue(dev, &dev->rx_queue);
+	netdev_init_one_queue(dev, &dev->tx_queue);
 }
 
 /**

diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index cf477ad..12aeaf7 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c

@@ -636,7 +636,7 @@
 
 	/* ensure that TX flow won't interrupt us
 	 * until the end of the call to requeue function */
-	spin_lock_bh(&local->mdev->queue_lock);
+	spin_lock_bh(&local->mdev->tx_queue.lock);
 
 	/* create a new queue for this aggregation */
 	ret = ieee80211_ht_agg_queue_add(local, sta, tid);
@@ -675,7 +675,7 @@
 
 	/* Will put all the packets in the new SW queue */
 	ieee80211_requeue(local, ieee802_1d_to_ac[tid]);
-	spin_unlock_bh(&local->mdev->queue_lock);
+	spin_unlock_bh(&local->mdev->tx_queue.lock);
 	spin_unlock_bh(&sta->lock);
 
 	/* send an addBA request */
@@ -701,7 +701,7 @@
 err_unlock_queue:
 	kfree(sta->ampdu_mlme.tid_tx[tid]);
 	sta->ampdu_mlme.tid_tx[tid] = NULL;
-	spin_unlock_bh(&local->mdev->queue_lock);
+	spin_unlock_bh(&local->mdev->tx_queue.lock);
 	ret = -EBUSY;
 err_unlock_sta:
 	spin_unlock_bh(&sta->lock);
@@ -875,10 +875,10 @@
 
 	/* avoid ordering issues: we are the only one that can modify
 	 * the content of the qdiscs */
-	spin_lock_bh(&local->mdev->queue_lock);
+	spin_lock_bh(&local->mdev->tx_queue.lock);
 	/* remove the queue for this aggregation */
 	ieee80211_ht_agg_queue_remove(local, sta, tid, 1);
-	spin_unlock_bh(&local->mdev->queue_lock);
+	spin_unlock_bh(&local->mdev->tx_queue.lock);
 
 	/* we just requeued the all the frames that were in the removed
 	 * queue, and since we might miss a softirq we do netif_schedule.

diff --git a/net/mac80211/wme.c b/net/mac80211/wme.c
index 2fbc171..59ed9ca 100644
--- a/net/mac80211/wme.c
+++ b/net/mac80211/wme.c

@@ -648,7 +648,7 @@
 }
 
 /**
- * the caller needs to hold local->mdev->queue_lock
+ * the caller needs to hold local->mdev->tx_queue.lock
  */
 void ieee80211_ht_agg_queue_remove(struct ieee80211_local *local,
 				   struct sta_info *sta, u16 tid,

diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 1f89308..2a1834f 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c

@@ -606,7 +606,7 @@
 		sch->stats_lock = &dev->ingress_lock;
 		handle = TC_H_MAKE(TC_H_INGRESS, 0);
 	} else {
-		sch->stats_lock = &dev->queue_lock;
+		sch->stats_lock = &dev_queue->lock;
 		if (handle == 0) {
 			handle = qdisc_alloc_handle(dev);
 			err = -ENOMEM;

diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
index 9f2ace5..99ce3da 100644
--- a/net/sched/sch_cbq.c
+++ b/net/sched/sch_cbq.c

@@ -1746,10 +1746,10 @@
 #ifdef CONFIG_NET_CLS_ACT
 		struct cbq_sched_data *q = qdisc_priv(sch);
 
-		spin_lock_bh(&qdisc_dev(sch)->queue_lock);
+		spin_lock_bh(&sch->dev_queue->lock);
 		if (q->rx_class == cl)
 			q->rx_class = NULL;
-		spin_unlock_bh(&qdisc_dev(sch)->queue_lock);
+		spin_unlock_bh(&sch->dev_queue->lock);
 #endif
 
 		cbq_destroy_class(sch, cl);
@@ -1828,7 +1828,7 @@
 
 		if (tca[TCA_RATE])
 			gen_replace_estimator(&cl->bstats, &cl->rate_est,
-					      &qdisc_dev(sch)->queue_lock,
+					      &sch->dev_queue->lock,
 					      tca[TCA_RATE]);
 		return 0;
 	}
@@ -1919,7 +1919,7 @@
 
 	if (tca[TCA_RATE])
 		gen_new_estimator(&cl->bstats, &cl->rate_est,
-				  &qdisc_dev(sch)->queue_lock, tca[TCA_RATE]);
+				  &sch->dev_queue->lock, tca[TCA_RATE]);
 
 	*arg = (unsigned long)cl;
 	return 0;

diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index b626a4f..ee8f9f7 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c

@@ -29,31 +29,31 @@
 /* Main transmission queue. */
 
 /* Modifications to data participating in scheduling must be protected with
- * dev->queue_lock spinlock.
+ * queue->lock spinlock.
  *
  * The idea is the following:
  * - enqueue, dequeue are serialized via top level device
- *   spinlock dev->queue_lock.
+ *   spinlock queue->lock.
  * - ingress filtering is serialized via top level device
  *   spinlock dev->ingress_lock.
  * - updates to tree and tree walking are only done under the rtnl mutex.
  */
 
 void qdisc_lock_tree(struct net_device *dev)
-	__acquires(dev->queue_lock)
+	__acquires(dev->tx_queue.lock)
 	__acquires(dev->ingress_lock)
 {
-	spin_lock_bh(&dev->queue_lock);
+	spin_lock_bh(&dev->tx_queue.lock);
 	spin_lock(&dev->ingress_lock);
 }
 EXPORT_SYMBOL(qdisc_lock_tree);
 
 void qdisc_unlock_tree(struct net_device *dev)
 	__releases(dev->ingress_lock)
-	__releases(dev->queue_lock)
+	__releases(dev->tx_queue.lock)
 {
 	spin_unlock(&dev->ingress_lock);
-	spin_unlock_bh(&dev->queue_lock);
+	spin_unlock_bh(&dev->tx_queue.lock);
 }
 EXPORT_SYMBOL(qdisc_unlock_tree);
 
@@ -118,15 +118,15 @@
 }
 
 /*
- * NOTE: Called under dev->queue_lock with locally disabled BH.
+ * NOTE: Called under queue->lock with locally disabled BH.
  *
  * __LINK_STATE_QDISC_RUNNING guarantees only one CPU can process this
- * device at a time. dev->queue_lock serializes queue accesses for
+ * device at a time. queue->lock serializes queue accesses for
  * this device AND dev->qdisc pointer itself.
  *
  *  netif_tx_lock serializes accesses to device driver.
  *
- *  dev->queue_lock and netif_tx_lock are mutually exclusive,
+ *  queue->lock and netif_tx_lock are mutually exclusive,
  *  if one is grabbed, another must be free.
  *
  * Note, that this procedure can be called by a watchdog timer
@@ -148,14 +148,14 @@
 
 
 	/* And release queue */
-	spin_unlock(&dev->queue_lock);
+	spin_unlock(&q->dev_queue->lock);
 
 	HARD_TX_LOCK(dev, smp_processor_id());
 	if (!netif_subqueue_stopped(dev, skb))
 		ret = dev_hard_start_xmit(skb, dev);
 	HARD_TX_UNLOCK(dev);
 
-	spin_lock(&dev->queue_lock);
+	spin_lock(&q->dev_queue->lock);
 	q = dev->qdisc;
 
 	switch (ret) {
@@ -482,7 +482,7 @@
 	sch = qdisc_alloc(dev_queue, ops);
 	if (IS_ERR(sch))
 		goto errout;
-	sch->stats_lock = &dev->queue_lock;
+	sch->stats_lock = &dev_queue->lock;
 	sch->parent = parentid;
 
 	if (!ops->init || ops->init(sch, NULL) == 0)
@@ -494,7 +494,7 @@
 }
 EXPORT_SYMBOL(qdisc_create_dflt);
 
-/* Under dev->queue_lock and BH! */
+/* Under queue->lock and BH! */
 
 void qdisc_reset(struct Qdisc *qdisc)
 {
@@ -514,7 +514,7 @@
 	kfree((char *) qdisc - qdisc->padded);
 }
 
-/* Under dev->queue_lock and BH! */
+/* Under queue->lock and BH! */
 
 void qdisc_destroy(struct Qdisc *qdisc)
 {
@@ -566,13 +566,13 @@
 		/* Delay activation until next carrier-on event */
 		return;
 
-	spin_lock_bh(&dev->queue_lock);
+	spin_lock_bh(&dev->tx_queue.lock);
 	rcu_assign_pointer(dev->qdisc, dev->qdisc_sleeping);
 	if (dev->qdisc != &noqueue_qdisc) {
 		dev->trans_start = jiffies;
 		dev_watchdog_up(dev);
 	}
-	spin_unlock_bh(&dev->queue_lock);
+	spin_unlock_bh(&dev->tx_queue.lock);
 }
 
 void dev_deactivate(struct net_device *dev)
@@ -581,7 +581,7 @@
 	struct sk_buff *skb;
 	int running;
 
-	spin_lock_bh(&dev->queue_lock);
+	spin_lock_bh(&dev->tx_queue.lock);
 	qdisc = dev->qdisc;
 	dev->qdisc = &noop_qdisc;
 
@@ -589,7 +589,7 @@
 
 	skb = dev->gso_skb;
 	dev->gso_skb = NULL;
-	spin_unlock_bh(&dev->queue_lock);
+	spin_unlock_bh(&dev->tx_queue.lock);
 
 	kfree_skb(skb);
 
@@ -607,9 +607,9 @@
 		 * Double-check inside queue lock to ensure that all effects
 		 * of the queue run are visible when we return.
 		 */
-		spin_lock_bh(&dev->queue_lock);
+		spin_lock_bh(&dev->tx_queue.lock);
 		running = test_bit(__LINK_STATE_QDISC_RUNNING, &dev->state);
-		spin_unlock_bh(&dev->queue_lock);
+		spin_unlock_bh(&dev->tx_queue.lock);
 
 		/*
 		 * The running flag should never be set at this point because

diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index 3335254..997d520 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c

@@ -1045,7 +1045,7 @@
 
 		if (tca[TCA_RATE])
 			gen_replace_estimator(&cl->bstats, &cl->rate_est,
-					      &qdisc_dev(sch)->queue_lock,
+					      &sch->dev_queue->lock,
 					      tca[TCA_RATE]);
 		return 0;
 	}
@@ -1104,7 +1104,7 @@
 
 	if (tca[TCA_RATE])
 		gen_new_estimator(&cl->bstats, &cl->rate_est,
-				  &qdisc_dev(sch)->queue_lock, tca[TCA_RATE]);
+				  &sch->dev_queue->lock, tca[TCA_RATE]);
 	*arg = (unsigned long)cl;
 	return 0;
 }

diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index 31f7d15..c8ca54c 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c

@@ -1043,7 +1043,7 @@
 	struct nlattr *nest;
 	struct tc_htb_glob gopt;
 
-	spin_lock_bh(&qdisc_dev(sch)->queue_lock);
+	spin_lock_bh(&sch->dev_queue->lock);
 
 	gopt.direct_pkts = q->direct_pkts;
 	gopt.version = HTB_VER;
@@ -1057,11 +1057,11 @@
 	NLA_PUT(skb, TCA_HTB_INIT, sizeof(gopt), &gopt);
 	nla_nest_end(skb, nest);
 
-	spin_unlock_bh(&qdisc_dev(sch)->queue_lock);
+	spin_unlock_bh(&sch->dev_queue->lock);
 	return skb->len;
 
 nla_put_failure:
-	spin_unlock_bh(&qdisc_dev(sch)->queue_lock);
+	spin_unlock_bh(&sch->dev_queue->lock);
 	nla_nest_cancel(skb, nest);
 	return -1;
 }
@@ -1073,7 +1073,7 @@
 	struct nlattr *nest;
 	struct tc_htb_opt opt;
 
-	spin_lock_bh(&qdisc_dev(sch)->queue_lock);
+	spin_lock_bh(&sch->dev_queue->lock);
 	tcm->tcm_parent = cl->parent ? cl->parent->common.classid : TC_H_ROOT;
 	tcm->tcm_handle = cl->common.classid;
 	if (!cl->level && cl->un.leaf.q)
@@ -1095,11 +1095,11 @@
 	NLA_PUT(skb, TCA_HTB_PARMS, sizeof(opt), &opt);
 
 	nla_nest_end(skb, nest);
-	spin_unlock_bh(&qdisc_dev(sch)->queue_lock);
+	spin_unlock_bh(&sch->dev_queue->lock);
 	return skb->len;
 
 nla_put_failure:
-	spin_unlock_bh(&qdisc_dev(sch)->queue_lock);
+	spin_unlock_bh(&sch->dev_queue->lock);
 	nla_nest_cancel(skb, nest);
 	return -1;
 }
@@ -1365,7 +1365,7 @@
 			goto failure;
 
 		gen_new_estimator(&cl->bstats, &cl->rate_est,
-				  &qdisc_dev(sch)->queue_lock,
+				  &sch->dev_queue->lock,
 				  tca[TCA_RATE] ? : &est.nla);
 		cl->refcnt = 1;
 		cl->children = 0;
@@ -1420,7 +1420,7 @@
 	} else {
 		if (tca[TCA_RATE])
 			gen_replace_estimator(&cl->bstats, &cl->rate_est,
-					      &qdisc_dev(sch)->queue_lock,
+					      &sch->dev_queue->lock,
 					      tca[TCA_RATE]);
 		sch_tree_lock(sch);
 	}

diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index 7905829..71b73c5 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c

@@ -333,9 +333,9 @@
 	for (i = 0; i < n; i++)
 		d->table[i] = data[i];
 
-	spin_lock_bh(&qdisc_dev(sch)->queue_lock);
+	spin_lock_bh(&sch->dev_queue->lock);
 	d = xchg(&q->delay_dist, d);
-	spin_unlock_bh(&qdisc_dev(sch)->queue_lock);
+	spin_unlock_bh(&sch->dev_queue->lock);
 
 	kfree(d);
 	return 0;

diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c
index b3fc826..4f3054e 100644
--- a/net/sched/sch_teql.c
+++ b/net/sched/sch_teql.c

@@ -154,9 +154,9 @@
 					master->slaves = NEXT_SLAVE(q);
 					if (q == master->slaves) {
 						master->slaves = NULL;
-						spin_lock_bh(&master->dev->queue_lock);
+						spin_lock_bh(&master->dev->tx_queue.lock);
 						qdisc_reset(master->dev->qdisc);
-						spin_unlock_bh(&master->dev->queue_lock);
+						spin_unlock_bh(&master->dev->tx_queue.lock);
 					}
 				}
 				skb_queue_purge(&dat->q);
commit	dc2b48475a0a36f8b3bbb2da60d3a006dc5c2c84	[log] [tgz]
author	David S. Miller <davem@davemloft.net>	Tue Jul 08 17:18:23 2008 -0700
committer	David S. Miller <davem@davemloft.net>	Tue Jul 08 17:18:23 2008 -0700
tree	b2421a338840bd1c675f4f91de7c7cf03863fb78
parent	5ce2d488fe039ddd86a638496cf704df86c74eeb [diff]