blob: b8031aa5327262e10b0777e8de9efd2ed591ae76 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * net/sched/sch_generic.c Generic packet scheduler routines.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 *
9 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
10 * Jamal Hadi Salim, <hadi@cyberus.ca> 990601
11 * - Ingress support
12 */
13
Linus Torvalds1da177e2005-04-16 15:20:36 -070014#include <linux/bitops.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070015#include <linux/module.h>
16#include <linux/types.h>
17#include <linux/kernel.h>
18#include <linux/sched.h>
19#include <linux/string.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070020#include <linux/errno.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070021#include <linux/netdevice.h>
22#include <linux/skbuff.h>
23#include <linux/rtnetlink.h>
24#include <linux/init.h>
25#include <linux/rcupdate.h>
26#include <linux/list.h>
Tejun Heo5a0e3ad2010-03-24 17:04:11 +090027#include <linux/slab.h>
nikolay@redhat.com07ce76a2013-08-03 22:07:47 +020028#include <linux/if_vlan.h>
Jiri Pirko292f1c72013-02-12 00:12:03 +000029#include <net/sch_generic.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070030#include <net/pkt_sched.h>
Eric Dumazet7fee2262010-05-11 23:19:48 +000031#include <net/dst.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070032
stephen hemminger34aedd32013-08-31 10:15:33 -070033/* Qdisc to use by default */
34const struct Qdisc_ops *default_qdisc_ops = &pfifo_fast_ops;
35EXPORT_SYMBOL(default_qdisc_ops);
36
Linus Torvalds1da177e2005-04-16 15:20:36 -070037/* Main transmission queue. */
38
Patrick McHardy0463d4a2007-04-16 17:02:10 -070039/* Modifications to data participating in scheduling must be protected with
David S. Miller5fb66222008-08-02 20:02:43 -070040 * qdisc_lock(qdisc) spinlock.
Patrick McHardy0463d4a2007-04-16 17:02:10 -070041 *
42 * The idea is the following:
David S. Millerc7e4f3b2008-07-16 03:22:39 -070043 * - enqueue, dequeue are serialized via qdisc root lock
44 * - ingress filtering is also serialized via qdisc root lock
Patrick McHardy0463d4a2007-04-16 17:02:10 -070045 * - updates to tree and tree walking are only done under the rtnl mutex.
Linus Torvalds1da177e2005-04-16 15:20:36 -070046 */
Linus Torvalds1da177e2005-04-16 15:20:36 -070047
David S. Miller37437bb2008-07-16 02:15:04 -070048static inline int dev_requeue_skb(struct sk_buff *skb, struct Qdisc *q)
Jamal Hadi Salimc716a812007-06-10 17:31:24 -070049{
Jarek Poplawski62523522008-10-06 10:41:50 -070050 q->gso_skb = skb;
Jarek Poplawski53e91502008-10-08 11:36:22 -070051 q->qstats.requeues++;
WANG Conga27758f2016-06-03 15:05:57 -070052 qdisc_qstats_backlog_inc(q, skb);
Krishna Kumarbbd8a0d2009-08-06 01:44:21 +000053 q->q.qlen++; /* it's still part of the queue */
David S. Miller37437bb2008-07-16 02:15:04 -070054 __netif_schedule(q);
Jarek Poplawski62523522008-10-06 10:41:50 -070055
Jamal Hadi Salimc716a812007-06-10 17:31:24 -070056 return 0;
57}
58
Eric Dumazet55a93b32014-10-03 15:31:07 -070059static void try_bulk_dequeue_skb(struct Qdisc *q,
60 struct sk_buff *skb,
Jesper Dangaard Brouerb8358d72014-10-09 12:18:10 +020061 const struct netdev_queue *txq,
62 int *packets)
Jesper Dangaard Brouer5772e9a2014-10-01 22:35:59 +020063{
Eric Dumazet55a93b32014-10-03 15:31:07 -070064 int bytelimit = qdisc_avail_bulklimit(txq) - skb->len;
Jesper Dangaard Brouer5772e9a2014-10-01 22:35:59 +020065
66 while (bytelimit > 0) {
Eric Dumazet55a93b32014-10-03 15:31:07 -070067 struct sk_buff *nskb = q->dequeue(q);
68
69 if (!nskb)
Jesper Dangaard Brouer5772e9a2014-10-01 22:35:59 +020070 break;
71
Eric Dumazet55a93b32014-10-03 15:31:07 -070072 bytelimit -= nskb->len; /* covers GSO len */
73 skb->next = nskb;
74 skb = nskb;
Jesper Dangaard Brouerb8358d72014-10-09 12:18:10 +020075 (*packets)++; /* GSO counts as one pkt */
Jesper Dangaard Brouer5772e9a2014-10-01 22:35:59 +020076 }
Eric Dumazet55a93b32014-10-03 15:31:07 -070077 skb->next = NULL;
Jesper Dangaard Brouer5772e9a2014-10-01 22:35:59 +020078}
79
Eric Dumazet4d202a02016-06-21 23:16:52 -070080/* This variant of try_bulk_dequeue_skb() makes sure
81 * all skbs in the chain are for the same txq
82 */
83static void try_bulk_dequeue_skb_slow(struct Qdisc *q,
84 struct sk_buff *skb,
85 int *packets)
86{
87 int mapping = skb_get_queue_mapping(skb);
88 struct sk_buff *nskb;
89 int cnt = 0;
90
91 do {
92 nskb = q->dequeue(q);
93 if (!nskb)
94 break;
95 if (unlikely(skb_get_queue_mapping(nskb) != mapping)) {
96 q->skb_bad_txq = nskb;
97 qdisc_qstats_backlog_inc(q, nskb);
98 q->q.qlen++;
99 break;
100 }
101 skb->next = nskb;
102 skb = nskb;
103 } while (++cnt < 8);
104 (*packets) += cnt;
105 skb->next = NULL;
106}
107
Jesper Dangaard Brouer5772e9a2014-10-01 22:35:59 +0200108/* Note that dequeue_skb can possibly return a SKB list (via skb->next).
109 * A requeued skb (via q->gso_skb) can also be a SKB list.
110 */
Jesper Dangaard Brouerb8358d72014-10-09 12:18:10 +0200111static struct sk_buff *dequeue_skb(struct Qdisc *q, bool *validate,
112 int *packets)
Jamal Hadi Salimc716a812007-06-10 17:31:24 -0700113{
Jarek Poplawski554794d2008-10-06 09:54:39 -0700114 struct sk_buff *skb = q->gso_skb;
Eric Dumazet1abbe132012-12-11 15:54:33 +0000115 const struct netdev_queue *txq = q->dev_queue;
Jarek Poplawski554794d2008-10-06 09:54:39 -0700116
Jesper Dangaard Brouerb8358d72014-10-09 12:18:10 +0200117 *packets = 1;
Jarek Poplawskiebf05982008-09-22 22:16:23 -0700118 if (unlikely(skb)) {
Eric Dumazet4d202a02016-06-21 23:16:52 -0700119 /* skb in gso_skb were already validated */
120 *validate = false;
Jarek Poplawskiebf05982008-09-22 22:16:23 -0700121 /* check the reason of requeuing without tx lock first */
Daniel Borkmann10c51b56232014-08-27 11:11:27 +0200122 txq = skb_get_tx_queue(txq->dev, skb);
Tom Herbert734664982011-11-28 16:32:44 +0000123 if (!netif_xmit_frozen_or_stopped(txq)) {
Jarek Poplawski62523522008-10-06 10:41:50 -0700124 q->gso_skb = NULL;
WANG Conga27758f2016-06-03 15:05:57 -0700125 qdisc_qstats_backlog_dec(q, skb);
Krishna Kumarbbd8a0d2009-08-06 01:44:21 +0000126 q->q.qlen--;
127 } else
Jarek Poplawskiebf05982008-09-22 22:16:23 -0700128 skb = NULL;
Eric Dumazet4d202a02016-06-21 23:16:52 -0700129 return skb;
130 }
131 *validate = true;
132 skb = q->skb_bad_txq;
133 if (unlikely(skb)) {
134 /* check the reason of requeuing without tx lock first */
135 txq = skb_get_tx_queue(txq->dev, skb);
136 if (!netif_xmit_frozen_or_stopped(txq)) {
137 q->skb_bad_txq = NULL;
138 qdisc_qstats_backlog_dec(q, skb);
139 q->q.qlen--;
140 goto bulk;
David S. Miller50cbe9a2014-08-30 19:13:51 -0700141 }
Eric Dumazet4d202a02016-06-21 23:16:52 -0700142 return NULL;
143 }
144 if (!(q->flags & TCQ_F_ONETXQUEUE) ||
145 !netif_xmit_frozen_or_stopped(txq))
146 skb = q->dequeue(q);
147 if (skb) {
148bulk:
149 if (qdisc_may_bulk(q))
150 try_bulk_dequeue_skb(q, skb, txq, packets);
151 else
152 try_bulk_dequeue_skb_slow(q, skb, packets);
Jarek Poplawskiebf05982008-09-22 22:16:23 -0700153 }
Jamal Hadi Salimc716a812007-06-10 17:31:24 -0700154 return skb;
155}
156
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +0900157/*
Jesper Dangaard Brouer10770bc2014-09-02 16:35:33 +0200158 * Transmit possibly several skbs, and handle the return status as
Eric Dumazetf9eb8ae2016-06-06 09:37:15 -0700159 * required. Owning running seqcount bit guarantees that
Jesper Dangaard Brouer10770bc2014-09-02 16:35:33 +0200160 * only one CPU can execute this function.
Krishna Kumar6c1361a2007-06-24 19:56:09 -0700161 *
162 * Returns to the caller:
163 * 0 - queue is empty or throttled.
164 * >0 - queue is not empty.
Krishna Kumar6c1361a2007-06-24 19:56:09 -0700165 */
Krishna Kumarbbd8a0d2009-08-06 01:44:21 +0000166int sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q,
167 struct net_device *dev, struct netdev_queue *txq,
Eric Dumazet55a93b32014-10-03 15:31:07 -0700168 spinlock_t *root_lock, bool validate)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700169{
Peter P Waskiewicz Jr5f1a4852007-11-13 20:40:55 -0800170 int ret = NETDEV_TX_BUSY;
David S. Miller7698b4f2008-07-16 01:42:40 -0700171
172 /* And release qdisc */
173 spin_unlock(root_lock);
Herbert Xud90df3a2007-05-10 04:55:14 -0700174
Eric Dumazet55a93b32014-10-03 15:31:07 -0700175 /* Note that we validate skb (GSO, checksum, ...) outside of locks */
176 if (validate)
177 skb = validate_xmit_skb_list(skb, dev);
Patrick McHardy572a9d72009-11-10 06:14:14 +0000178
Lars Persson3dcd493fb2016-04-12 08:45:52 +0200179 if (likely(skb)) {
Eric Dumazet55a93b32014-10-03 15:31:07 -0700180 HARD_TX_LOCK(dev, txq, smp_processor_id());
Mohammed Javid7ea99462015-10-23 17:27:00 +0530181 if (!netif_xmit_frozen_or_stopped(txq)) {
182 if (unlikely(skb->fast_forwarded))
183 skb = dev_hard_start_xmit_list(skb, dev,
184 txq, &ret);
185 else
186 skb = dev_hard_start_xmit(skb, dev, txq, &ret);
187 }
Jamal Hadi Salimc716a812007-06-10 17:31:24 -0700188
Eric Dumazet55a93b32014-10-03 15:31:07 -0700189 HARD_TX_UNLOCK(dev, txq);
Lars Persson3dcd493fb2016-04-12 08:45:52 +0200190 } else {
Eric Dumazet52fbb292016-06-09 07:45:11 -0700191 spin_lock(root_lock);
Lars Persson3dcd493fb2016-04-12 08:45:52 +0200192 return qdisc_qlen(q);
Eric Dumazet55a93b32014-10-03 15:31:07 -0700193 }
Eric Dumazet52fbb292016-06-09 07:45:11 -0700194 spin_lock(root_lock);
Jamal Hadi Salimc716a812007-06-10 17:31:24 -0700195
Jarek Poplawski9a1654b2009-11-15 07:20:12 +0000196 if (dev_xmit_complete(ret)) {
197 /* Driver sent out skb successfully or skb was consumed */
Krishna Kumar6c1361a2007-06-24 19:56:09 -0700198 ret = qdisc_qlen(q);
Jarek Poplawski9a1654b2009-11-15 07:20:12 +0000199 } else {
Krishna Kumar6c1361a2007-06-24 19:56:09 -0700200 /* Driver returned NETDEV_TX_BUSY - requeue skb */
Joe Perchese87cc472012-05-13 21:56:26 +0000201 if (unlikely(ret != NETDEV_TX_BUSY))
202 net_warn_ratelimited("BUG %s code %d qlen %d\n",
203 dev->name, ret, q->q.qlen);
Krishna Kumar6c1361a2007-06-24 19:56:09 -0700204
David S. Miller37437bb2008-07-16 02:15:04 -0700205 ret = dev_requeue_skb(skb, q);
Krishna Kumar6c1361a2007-06-24 19:56:09 -0700206 }
207
Tom Herbert734664982011-11-28 16:32:44 +0000208 if (ret && netif_xmit_frozen_or_stopped(txq))
David S. Miller37437bb2008-07-16 02:15:04 -0700209 ret = 0;
210
Krishna Kumar6c1361a2007-06-24 19:56:09 -0700211 return ret;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700212}
213
Krishna Kumarbbd8a0d2009-08-06 01:44:21 +0000214/*
215 * NOTE: Called under qdisc_lock(q) with locally disabled BH.
216 *
Eric Dumazetf9eb8ae2016-06-06 09:37:15 -0700217 * running seqcount guarantees only one CPU can process
Krishna Kumarbbd8a0d2009-08-06 01:44:21 +0000218 * this qdisc at a time. qdisc_lock(q) serializes queue accesses for
219 * this queue.
220 *
221 * netif_tx_lock serializes accesses to device driver.
222 *
223 * qdisc_lock(q) and netif_tx_lock are mutually exclusive,
224 * if one is grabbed, another must be free.
225 *
226 * Note, that this procedure can be called by a watchdog timer
227 *
228 * Returns to the caller:
229 * 0 - queue is empty or throttled.
230 * >0 - queue is not empty.
231 *
232 */
Jesper Dangaard Brouerb8358d72014-10-09 12:18:10 +0200233static inline int qdisc_restart(struct Qdisc *q, int *packets)
Krishna Kumarbbd8a0d2009-08-06 01:44:21 +0000234{
235 struct netdev_queue *txq;
236 struct net_device *dev;
237 spinlock_t *root_lock;
238 struct sk_buff *skb;
Eric Dumazet55a93b32014-10-03 15:31:07 -0700239 bool validate;
Krishna Kumarbbd8a0d2009-08-06 01:44:21 +0000240
241 /* Dequeue packet */
Jesper Dangaard Brouerb8358d72014-10-09 12:18:10 +0200242 skb = dequeue_skb(q, &validate, packets);
Krishna Kumarbbd8a0d2009-08-06 01:44:21 +0000243 if (unlikely(!skb))
244 return 0;
Daniel Borkmann10c51b56232014-08-27 11:11:27 +0200245
Krishna Kumarbbd8a0d2009-08-06 01:44:21 +0000246 root_lock = qdisc_lock(q);
247 dev = qdisc_dev(q);
Daniel Borkmann10c51b56232014-08-27 11:11:27 +0200248 txq = skb_get_tx_queue(dev, skb);
Krishna Kumarbbd8a0d2009-08-06 01:44:21 +0000249
Eric Dumazet55a93b32014-10-03 15:31:07 -0700250 return sch_direct_xmit(skb, q, dev, txq, root_lock, validate);
Krishna Kumarbbd8a0d2009-08-06 01:44:21 +0000251}
252
David S. Miller37437bb2008-07-16 02:15:04 -0700253void __qdisc_run(struct Qdisc *q)
Herbert Xu48d83322006-06-19 23:57:59 -0700254{
jamald5b8aa12011-06-26 08:13:54 +0000255 int quota = weight_p;
Jesper Dangaard Brouerb8358d72014-10-09 12:18:10 +0200256 int packets;
Herbert Xu2ba25062008-03-28 16:25:26 -0700257
Jesper Dangaard Brouerb8358d72014-10-09 12:18:10 +0200258 while (qdisc_restart(q, &packets)) {
Herbert Xu2ba25062008-03-28 16:25:26 -0700259 /*
jamald5b8aa12011-06-26 08:13:54 +0000260 * Ordered by possible occurrence: Postpone processing if
261 * 1. we've exceeded packet quota
262 * 2. another process needs the CPU;
Herbert Xu2ba25062008-03-28 16:25:26 -0700263 */
Jesper Dangaard Brouerb8358d72014-10-09 12:18:10 +0200264 quota -= packets;
265 if (quota <= 0 || need_resched()) {
David S. Miller37437bb2008-07-16 02:15:04 -0700266 __netif_schedule(q);
Herbert Xu2ba25062008-03-28 16:25:26 -0700267 break;
268 }
269 }
Herbert Xu48d83322006-06-19 23:57:59 -0700270
Eric Dumazetbc135b22010-06-02 03:23:51 -0700271 qdisc_run_end(q);
Herbert Xu48d83322006-06-19 23:57:59 -0700272}
273
Eric Dumazet9d214932009-05-17 20:55:16 -0700274unsigned long dev_trans_start(struct net_device *dev)
275{
nikolay@redhat.com07ce76a2013-08-03 22:07:47 +0200276 unsigned long val, res;
Eric Dumazet9d214932009-05-17 20:55:16 -0700277 unsigned int i;
278
nikolay@redhat.com07ce76a2013-08-03 22:07:47 +0200279 if (is_vlan_dev(dev))
280 dev = vlan_dev_real_dev(dev);
Florian Westphal9b366272016-05-03 16:33:14 +0200281 res = netdev_get_tx_queue(dev, 0)->trans_start;
282 for (i = 1; i < dev->num_tx_queues; i++) {
Eric Dumazet9d214932009-05-17 20:55:16 -0700283 val = netdev_get_tx_queue(dev, i)->trans_start;
284 if (val && time_after(val, res))
285 res = val;
286 }
nikolay@redhat.com07ce76a2013-08-03 22:07:47 +0200287
Eric Dumazet9d214932009-05-17 20:55:16 -0700288 return res;
289}
290EXPORT_SYMBOL(dev_trans_start);
291
Linus Torvalds1da177e2005-04-16 15:20:36 -0700292static void dev_watchdog(unsigned long arg)
293{
294 struct net_device *dev = (struct net_device *)arg;
295
Herbert Xu932ff272006-06-09 12:20:56 -0700296 netif_tx_lock(dev);
David S. Millere8a04642008-07-17 00:34:19 -0700297 if (!qdisc_tx_is_noop(dev)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700298 if (netif_device_present(dev) &&
299 netif_running(dev) &&
300 netif_carrier_ok(dev)) {
Eric Dumazet9d214932009-05-17 20:55:16 -0700301 int some_queue_timedout = 0;
David S. Millere8a04642008-07-17 00:34:19 -0700302 unsigned int i;
Eric Dumazet9d214932009-05-17 20:55:16 -0700303 unsigned long trans_start;
Stephen Hemminger338f7562006-05-16 15:02:12 -0700304
David S. Millere8a04642008-07-17 00:34:19 -0700305 for (i = 0; i < dev->num_tx_queues; i++) {
306 struct netdev_queue *txq;
307
308 txq = netdev_get_tx_queue(dev, i);
Florian Westphal9b366272016-05-03 16:33:14 +0200309 trans_start = txq->trans_start;
Tom Herbert734664982011-11-28 16:32:44 +0000310 if (netif_xmit_stopped(txq) &&
Eric Dumazet9d214932009-05-17 20:55:16 -0700311 time_after(jiffies, (trans_start +
312 dev->watchdog_timeo))) {
313 some_queue_timedout = 1;
david decotignyccf5ff62011-11-16 12:15:10 +0000314 txq->trans_timeout++;
David S. Millere8a04642008-07-17 00:34:19 -0700315 break;
316 }
317 }
318
Eric Dumazet9d214932009-05-17 20:55:16 -0700319 if (some_queue_timedout) {
Eric Dumazet9d214932009-05-17 20:55:16 -0700320 WARN_ONCE(1, KERN_INFO "NETDEV WATCHDOG: %s (%s): transmit queue %u timed out\n",
David S. Miller3019de12011-06-06 16:41:33 -0700321 dev->name, netdev_drivername(dev), i);
Stephen Hemmingerd3147742008-11-19 21:32:24 -0800322 dev->netdev_ops->ndo_tx_timeout(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700323 }
David S. Millere8a04642008-07-17 00:34:19 -0700324 if (!mod_timer(&dev->watchdog_timer,
325 round_jiffies(jiffies +
326 dev->watchdog_timeo)))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700327 dev_hold(dev);
328 }
329 }
Herbert Xu932ff272006-06-09 12:20:56 -0700330 netif_tx_unlock(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700331
332 dev_put(dev);
333}
334
Linus Torvalds1da177e2005-04-16 15:20:36 -0700335void __netdev_watchdog_up(struct net_device *dev)
336{
Stephen Hemmingerd3147742008-11-19 21:32:24 -0800337 if (dev->netdev_ops->ndo_tx_timeout) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700338 if (dev->watchdog_timeo <= 0)
339 dev->watchdog_timeo = 5*HZ;
Venkatesh Pallipadi60468d52007-05-31 21:28:44 -0700340 if (!mod_timer(&dev->watchdog_timer,
341 round_jiffies(jiffies + dev->watchdog_timeo)))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700342 dev_hold(dev);
343 }
344}
345
346static void dev_watchdog_up(struct net_device *dev)
347{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700348 __netdev_watchdog_up(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700349}
350
351static void dev_watchdog_down(struct net_device *dev)
352{
Herbert Xu932ff272006-06-09 12:20:56 -0700353 netif_tx_lock_bh(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700354 if (del_timer(&dev->watchdog_timer))
Stephen Hemminger15333062006-03-20 22:32:28 -0800355 dev_put(dev);
Herbert Xu932ff272006-06-09 12:20:56 -0700356 netif_tx_unlock_bh(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700357}
358
Stephen Hemmingerbea33482007-10-03 16:41:36 -0700359/**
360 * netif_carrier_on - set carrier
361 * @dev: network device
362 *
363 * Device has detected that carrier.
364 */
Denis Vlasenko0a242ef2005-08-11 15:32:53 -0700365void netif_carrier_on(struct net_device *dev)
366{
Jeff Garzikbfaae0f2007-10-17 23:26:43 -0700367 if (test_and_clear_bit(__LINK_STATE_NOCARRIER, &dev->state)) {
David S. Millerb4730012008-11-19 15:33:54 -0800368 if (dev->reg_state == NETREG_UNINITIALIZED)
369 return;
david decotigny2d3b4792014-03-29 09:48:35 -0700370 atomic_inc(&dev->carrier_changes);
Denis Vlasenko0a242ef2005-08-11 15:32:53 -0700371 linkwatch_fire_event(dev);
Jeff Garzikbfaae0f2007-10-17 23:26:43 -0700372 if (netif_running(dev))
373 __netdev_watchdog_up(dev);
374 }
Denis Vlasenko0a242ef2005-08-11 15:32:53 -0700375}
Patrick McHardy62e3ba12008-01-22 22:10:23 -0800376EXPORT_SYMBOL(netif_carrier_on);
Denis Vlasenko0a242ef2005-08-11 15:32:53 -0700377
Stephen Hemmingerbea33482007-10-03 16:41:36 -0700378/**
379 * netif_carrier_off - clear carrier
380 * @dev: network device
381 *
382 * Device has detected loss of carrier.
383 */
Denis Vlasenko0a242ef2005-08-11 15:32:53 -0700384void netif_carrier_off(struct net_device *dev)
385{
David S. Millerb4730012008-11-19 15:33:54 -0800386 if (!test_and_set_bit(__LINK_STATE_NOCARRIER, &dev->state)) {
387 if (dev->reg_state == NETREG_UNINITIALIZED)
388 return;
david decotigny2d3b4792014-03-29 09:48:35 -0700389 atomic_inc(&dev->carrier_changes);
Denis Vlasenko0a242ef2005-08-11 15:32:53 -0700390 linkwatch_fire_event(dev);
David S. Millerb4730012008-11-19 15:33:54 -0800391 }
Denis Vlasenko0a242ef2005-08-11 15:32:53 -0700392}
Patrick McHardy62e3ba12008-01-22 22:10:23 -0800393EXPORT_SYMBOL(netif_carrier_off);
Denis Vlasenko0a242ef2005-08-11 15:32:53 -0700394
Linus Torvalds1da177e2005-04-16 15:20:36 -0700395/* "NOOP" scheduler: the best scheduler, recommended for all interfaces
396 under all circumstances. It is difficult to invent anything faster or
397 cheaper.
398 */
399
Eric Dumazet520ac302016-06-21 23:16:49 -0700400static int noop_enqueue(struct sk_buff *skb, struct Qdisc *qdisc,
401 struct sk_buff **to_free)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700402{
Eric Dumazet520ac302016-06-21 23:16:49 -0700403 __qdisc_drop(skb, to_free);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700404 return NET_XMIT_CN;
405}
406
Yang Yingliang82d567c2013-12-10 20:55:31 +0800407static struct sk_buff *noop_dequeue(struct Qdisc *qdisc)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700408{
409 return NULL;
410}
411
Eric Dumazet20fea082007-11-14 01:44:41 -0800412struct Qdisc_ops noop_qdisc_ops __read_mostly = {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700413 .id = "noop",
414 .priv_size = 0,
415 .enqueue = noop_enqueue,
416 .dequeue = noop_dequeue,
Jarek Poplawski99c0db22008-10-31 00:45:27 -0700417 .peek = noop_dequeue,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700418 .owner = THIS_MODULE,
419};
420
David S. Miller7698b4f2008-07-16 01:42:40 -0700421static struct netdev_queue noop_netdev_queue = {
David S. Miller7698b4f2008-07-16 01:42:40 -0700422 .qdisc = &noop_qdisc,
Jarek Poplawski9f3ffae2008-10-19 23:37:47 -0700423 .qdisc_sleeping = &noop_qdisc,
David S. Miller7698b4f2008-07-16 01:42:40 -0700424};
425
Linus Torvalds1da177e2005-04-16 15:20:36 -0700426struct Qdisc noop_qdisc = {
427 .enqueue = noop_enqueue,
428 .dequeue = noop_dequeue,
429 .flags = TCQ_F_BUILTIN,
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +0900430 .ops = &noop_qdisc_ops,
David S. Miller83874002008-07-17 00:53:03 -0700431 .q.lock = __SPIN_LOCK_UNLOCKED(noop_qdisc.q.lock),
David S. Miller7698b4f2008-07-16 01:42:40 -0700432 .dev_queue = &noop_netdev_queue,
Eric Dumazetf9eb8ae2016-06-06 09:37:15 -0700433 .running = SEQCNT_ZERO(noop_qdisc.running),
Eric Dumazet7b5edbc2010-10-15 19:22:34 +0000434 .busylock = __SPIN_LOCK_UNLOCKED(noop_qdisc.busylock),
Linus Torvalds1da177e2005-04-16 15:20:36 -0700435};
Patrick McHardy62e3ba12008-01-22 22:10:23 -0800436EXPORT_SYMBOL(noop_qdisc);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700437
Phil Sutterd66d6c32015-08-27 21:21:38 +0200438static int noqueue_init(struct Qdisc *qdisc, struct nlattr *opt)
439{
440 /* register_qdisc() assigns a default of noop_enqueue if unset,
441 * but __dev_queue_xmit() treats noqueue only as such
442 * if this is NULL - so clear it here. */
443 qdisc->enqueue = NULL;
444 return 0;
445}
446
447struct Qdisc_ops noqueue_qdisc_ops __read_mostly = {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700448 .id = "noqueue",
449 .priv_size = 0,
Phil Sutterd66d6c32015-08-27 21:21:38 +0200450 .init = noqueue_init,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700451 .enqueue = noop_enqueue,
452 .dequeue = noop_dequeue,
Jarek Poplawski99c0db22008-10-31 00:45:27 -0700453 .peek = noop_dequeue,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700454 .owner = THIS_MODULE,
455};
456
Eric Dumazetcc7ec452011-01-19 19:26:56 +0000457static const u8 prio2band[TC_PRIO_MAX + 1] = {
458 1, 2, 2, 2, 1, 2, 0, 0 , 1, 1, 1, 1, 1, 1, 1, 1
459};
Thomas Graf321090e2005-06-18 22:58:35 -0700460
David S. Millerd3678b42008-07-21 09:56:13 -0700461/* 3-band FIFO queue: old style, but should be a bit faster than
462 generic prio+fifo combination.
463 */
464
465#define PFIFO_FAST_BANDS 3
466
Krishna Kumarfd3ae5e2009-08-18 21:55:59 +0000467/*
468 * Private data for a pfifo_fast scheduler containing:
469 * - queues for the three band
470 * - bitmap indicating which of the bands contain skbs
471 */
472struct pfifo_fast_priv {
473 u32 bitmap;
Florian Westphal48da34b2016-09-18 00:57:34 +0200474 struct qdisc_skb_head q[PFIFO_FAST_BANDS];
Krishna Kumarfd3ae5e2009-08-18 21:55:59 +0000475};
476
477/*
478 * Convert a bitmap to the first band number where an skb is queued, where:
479 * bitmap=0 means there are no skbs on any band.
480 * bitmap=1 means there is an skb on band 0.
481 * bitmap=7 means there are skbs on all 3 bands, etc.
482 */
483static const int bitmap2band[] = {-1, 0, 1, 0, 2, 0, 1, 0};
484
Florian Westphal48da34b2016-09-18 00:57:34 +0200485static inline struct qdisc_skb_head *band2list(struct pfifo_fast_priv *priv,
Krishna Kumarfd3ae5e2009-08-18 21:55:59 +0000486 int band)
David S. Millerd3678b42008-07-21 09:56:13 -0700487{
Krishna Kumarfd3ae5e2009-08-18 21:55:59 +0000488 return priv->q + band;
David S. Millerd3678b42008-07-21 09:56:13 -0700489}
490
Eric Dumazet520ac302016-06-21 23:16:49 -0700491static int pfifo_fast_enqueue(struct sk_buff *skb, struct Qdisc *qdisc,
492 struct sk_buff **to_free)
David S. Millerd3678b42008-07-21 09:56:13 -0700493{
Florian Westphal97d06782016-09-18 00:57:31 +0200494 if (qdisc->q.qlen < qdisc_dev(qdisc)->tx_queue_len) {
Krishna Kumara453e062009-08-30 22:20:28 -0700495 int band = prio2band[skb->priority & TC_PRIO_MAX];
496 struct pfifo_fast_priv *priv = qdisc_priv(qdisc);
Florian Westphal48da34b2016-09-18 00:57:34 +0200497 struct qdisc_skb_head *list = band2list(priv, band);
David S. Millerd3678b42008-07-21 09:56:13 -0700498
Krishna Kumarfd3ae5e2009-08-18 21:55:59 +0000499 priv->bitmap |= (1 << band);
David S. Millerd3678b42008-07-21 09:56:13 -0700500 qdisc->q.qlen++;
Thomas Graf821d24a2005-06-18 22:58:15 -0700501 return __qdisc_enqueue_tail(skb, qdisc, list);
David S. Millerd3678b42008-07-21 09:56:13 -0700502 }
Thomas Graf821d24a2005-06-18 22:58:15 -0700503
Eric Dumazet520ac302016-06-21 23:16:49 -0700504 return qdisc_drop(skb, qdisc, to_free);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700505}
506
Eric Dumazetcc7ec452011-01-19 19:26:56 +0000507static struct sk_buff *pfifo_fast_dequeue(struct Qdisc *qdisc)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700508{
Krishna Kumarfd3ae5e2009-08-18 21:55:59 +0000509 struct pfifo_fast_priv *priv = qdisc_priv(qdisc);
510 int band = bitmap2band[priv->bitmap];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700511
Krishna Kumarfd3ae5e2009-08-18 21:55:59 +0000512 if (likely(band >= 0)) {
Florian Westphal48da34b2016-09-18 00:57:34 +0200513 struct qdisc_skb_head *qh = band2list(priv, band);
514 struct sk_buff *skb = __qdisc_dequeue_head(qh);
Florian Westphalec323362016-09-18 00:57:32 +0200515
516 if (likely(skb != NULL)) {
517 qdisc_qstats_backlog_dec(qdisc, skb);
518 qdisc_bstats_update(qdisc, skb);
519 }
Krishna Kumarfd3ae5e2009-08-18 21:55:59 +0000520
521 qdisc->q.qlen--;
Florian Westphal48da34b2016-09-18 00:57:34 +0200522 if (qh->qlen == 0)
Krishna Kumarfd3ae5e2009-08-18 21:55:59 +0000523 priv->bitmap &= ~(1 << band);
524
525 return skb;
David S. Millerd3678b42008-07-21 09:56:13 -0700526 }
Thomas Graff87a9c32005-06-18 22:58:53 -0700527
Linus Torvalds1da177e2005-04-16 15:20:36 -0700528 return NULL;
529}
530
Eric Dumazetcc7ec452011-01-19 19:26:56 +0000531static struct sk_buff *pfifo_fast_peek(struct Qdisc *qdisc)
Jarek Poplawski99c0db22008-10-31 00:45:27 -0700532{
Krishna Kumarfd3ae5e2009-08-18 21:55:59 +0000533 struct pfifo_fast_priv *priv = qdisc_priv(qdisc);
534 int band = bitmap2band[priv->bitmap];
Jarek Poplawski99c0db22008-10-31 00:45:27 -0700535
Krishna Kumarfd3ae5e2009-08-18 21:55:59 +0000536 if (band >= 0) {
Florian Westphal48da34b2016-09-18 00:57:34 +0200537 struct qdisc_skb_head *qh = band2list(priv, band);
Krishna Kumarfd3ae5e2009-08-18 21:55:59 +0000538
Florian Westphal48da34b2016-09-18 00:57:34 +0200539 return qh->head;
Jarek Poplawski99c0db22008-10-31 00:45:27 -0700540 }
541
542 return NULL;
543}
544
Eric Dumazetcc7ec452011-01-19 19:26:56 +0000545static void pfifo_fast_reset(struct Qdisc *qdisc)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700546{
David S. Millerd3678b42008-07-21 09:56:13 -0700547 int prio;
Krishna Kumarfd3ae5e2009-08-18 21:55:59 +0000548 struct pfifo_fast_priv *priv = qdisc_priv(qdisc);
David S. Millerd3678b42008-07-21 09:56:13 -0700549
550 for (prio = 0; prio < PFIFO_FAST_BANDS; prio++)
Eric Dumazet1b5c5492016-06-13 20:21:50 -0700551 __qdisc_reset_queue(band2list(priv, prio));
David S. Millerd3678b42008-07-21 09:56:13 -0700552
Krishna Kumarfd3ae5e2009-08-18 21:55:59 +0000553 priv->bitmap = 0;
Thomas Graf821d24a2005-06-18 22:58:15 -0700554 qdisc->qstats.backlog = 0;
David S. Millerd3678b42008-07-21 09:56:13 -0700555 qdisc->q.qlen = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700556}
557
David S. Millerd3678b42008-07-21 09:56:13 -0700558static int pfifo_fast_dump(struct Qdisc *qdisc, struct sk_buff *skb)
559{
560 struct tc_prio_qopt opt = { .bands = PFIFO_FAST_BANDS };
561
Eric Dumazetcc7ec452011-01-19 19:26:56 +0000562 memcpy(&opt.priomap, prio2band, TC_PRIO_MAX + 1);
David S. Miller1b34ec42012-03-29 05:11:39 -0400563 if (nla_put(skb, TCA_OPTIONS, sizeof(opt), &opt))
564 goto nla_put_failure;
David S. Millerd3678b42008-07-21 09:56:13 -0700565 return skb->len;
566
567nla_put_failure:
568 return -1;
569}
570
571static int pfifo_fast_init(struct Qdisc *qdisc, struct nlattr *opt)
572{
573 int prio;
Krishna Kumarfd3ae5e2009-08-18 21:55:59 +0000574 struct pfifo_fast_priv *priv = qdisc_priv(qdisc);
David S. Millerd3678b42008-07-21 09:56:13 -0700575
576 for (prio = 0; prio < PFIFO_FAST_BANDS; prio++)
Florian Westphal48da34b2016-09-18 00:57:34 +0200577 qdisc_skb_head_init(band2list(priv, prio));
David S. Millerd3678b42008-07-21 09:56:13 -0700578
Eric Dumazet23624932011-01-21 16:26:09 -0800579 /* Can by-pass the queue discipline */
580 qdisc->flags |= TCQ_F_CAN_BYPASS;
David S. Millerd3678b42008-07-21 09:56:13 -0700581 return 0;
582}
583
David S. Miller6ec1c692009-09-06 01:58:51 -0700584struct Qdisc_ops pfifo_fast_ops __read_mostly = {
David S. Millerd3678b42008-07-21 09:56:13 -0700585 .id = "pfifo_fast",
Krishna Kumarfd3ae5e2009-08-18 21:55:59 +0000586 .priv_size = sizeof(struct pfifo_fast_priv),
David S. Millerd3678b42008-07-21 09:56:13 -0700587 .enqueue = pfifo_fast_enqueue,
588 .dequeue = pfifo_fast_dequeue,
Jarek Poplawski99c0db22008-10-31 00:45:27 -0700589 .peek = pfifo_fast_peek,
David S. Millerd3678b42008-07-21 09:56:13 -0700590 .init = pfifo_fast_init,
591 .reset = pfifo_fast_reset,
592 .dump = pfifo_fast_dump,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700593 .owner = THIS_MODULE,
594};
Eric Dumazet1f27cde2016-03-02 08:21:43 -0800595EXPORT_SYMBOL(pfifo_fast_ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700596
Eric Dumazet23d3b8b2012-09-05 01:02:56 +0000597static struct lock_class_key qdisc_tx_busylock;
Eric Dumazetf9eb8ae2016-06-06 09:37:15 -0700598static struct lock_class_key qdisc_running_key;
Eric Dumazet23d3b8b2012-09-05 01:02:56 +0000599
David S. Miller5ce2d482008-07-08 17:06:30 -0700600struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
stephen hemmingerd2a7f262013-08-31 10:15:50 -0700601 const struct Qdisc_ops *ops)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700602{
603 void *p;
604 struct Qdisc *sch;
Eric Dumazetd2760552011-03-03 11:10:02 -0800605 unsigned int size = QDISC_ALIGN(sizeof(*sch)) + ops->priv_size;
Thomas Graf3d54b822005-07-05 14:15:09 -0700606 int err = -ENOBUFS;
Eric Dumazet23d3b8b2012-09-05 01:02:56 +0000607 struct net_device *dev = dev_queue->dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700608
Eric Dumazetf2cd2d32010-11-29 08:14:37 +0000609 p = kzalloc_node(size, GFP_KERNEL,
610 netdev_queue_numa_node_read(dev_queue));
611
Linus Torvalds1da177e2005-04-16 15:20:36 -0700612 if (!p)
Thomas Graf3d54b822005-07-05 14:15:09 -0700613 goto errout;
Thomas Graf3d54b822005-07-05 14:15:09 -0700614 sch = (struct Qdisc *) QDISC_ALIGN((unsigned long) p);
Eric Dumazetd2760552011-03-03 11:10:02 -0800615 /* if we got non aligned memory, ask more and do alignment ourself */
616 if (sch != p) {
617 kfree(p);
618 p = kzalloc_node(size + QDISC_ALIGNTO - 1, GFP_KERNEL,
619 netdev_queue_numa_node_read(dev_queue));
620 if (!p)
621 goto errout;
622 sch = (struct Qdisc *) QDISC_ALIGN((unsigned long) p);
623 sch->padded = (char *) sch - (char *) p;
624 }
Florian Westphal48da34b2016-09-18 00:57:34 +0200625 qdisc_skb_head_init(&sch->q);
626 spin_lock_init(&sch->q.lock);
Eric Dumazet23d3b8b2012-09-05 01:02:56 +0000627
Eric Dumazet79640a42010-06-02 05:09:29 -0700628 spin_lock_init(&sch->busylock);
Eric Dumazet23d3b8b2012-09-05 01:02:56 +0000629 lockdep_set_class(&sch->busylock,
630 dev->qdisc_tx_busylock ?: &qdisc_tx_busylock);
631
Eric Dumazetf9eb8ae2016-06-06 09:37:15 -0700632 seqcount_init(&sch->running);
633 lockdep_set_class(&sch->running,
634 dev->qdisc_running_key ?: &qdisc_running_key);
635
Linus Torvalds1da177e2005-04-16 15:20:36 -0700636 sch->ops = ops;
637 sch->enqueue = ops->enqueue;
638 sch->dequeue = ops->dequeue;
David S. Millerbb949fb2008-07-08 16:55:56 -0700639 sch->dev_queue = dev_queue;
Eric Dumazet23d3b8b2012-09-05 01:02:56 +0000640 dev_hold(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700641 atomic_set(&sch->refcnt, 1);
Thomas Graf3d54b822005-07-05 14:15:09 -0700642
643 return sch;
644errout:
WANG Cong01e123d2008-06-27 19:51:35 -0700645 return ERR_PTR(err);
Thomas Graf3d54b822005-07-05 14:15:09 -0700646}
647
Changli Gao3511c912010-10-16 13:04:08 +0000648struct Qdisc *qdisc_create_dflt(struct netdev_queue *dev_queue,
stephen hemmingerd2a7f262013-08-31 10:15:50 -0700649 const struct Qdisc_ops *ops,
650 unsigned int parentid)
Thomas Graf3d54b822005-07-05 14:15:09 -0700651{
652 struct Qdisc *sch;
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +0900653
stephen hemminger6da7c8f2013-08-27 16:19:08 -0700654 if (!try_module_get(ops->owner))
Eric Dumazet166ee5b2016-08-24 09:39:02 -0700655 return NULL;
stephen hemminger6da7c8f2013-08-27 16:19:08 -0700656
David S. Miller5ce2d482008-07-08 17:06:30 -0700657 sch = qdisc_alloc(dev_queue, ops);
Eric Dumazet166ee5b2016-08-24 09:39:02 -0700658 if (IS_ERR(sch)) {
659 module_put(ops->owner);
660 return NULL;
661 }
Patrick McHardy9f9afec2006-11-29 17:35:18 -0800662 sch->parent = parentid;
Thomas Graf3d54b822005-07-05 14:15:09 -0700663
Linus Torvalds1da177e2005-04-16 15:20:36 -0700664 if (!ops->init || ops->init(sch, NULL) == 0)
665 return sch;
666
Thomas Graf0fbbeb12005-08-23 10:12:44 -0700667 qdisc_destroy(sch);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700668 return NULL;
669}
Patrick McHardy62e3ba12008-01-22 22:10:23 -0800670EXPORT_SYMBOL(qdisc_create_dflt);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700671
David S. Miller5fb66222008-08-02 20:02:43 -0700672/* Under qdisc_lock(qdisc) and BH! */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700673
674void qdisc_reset(struct Qdisc *qdisc)
675{
Eric Dumazet20fea082007-11-14 01:44:41 -0800676 const struct Qdisc_ops *ops = qdisc->ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700677
678 if (ops->reset)
679 ops->reset(qdisc);
Jarek Poplawski67305eb2008-11-03 02:52:50 -0800680
Eric Dumazet4d202a02016-06-21 23:16:52 -0700681 kfree_skb(qdisc->skb_bad_txq);
682 qdisc->skb_bad_txq = NULL;
683
Krishna Kumarbbd8a0d2009-08-06 01:44:21 +0000684 if (qdisc->gso_skb) {
Jesper Dangaard Brouer3f3c7ee2014-09-03 12:12:50 +0200685 kfree_skb_list(qdisc->gso_skb);
Krishna Kumarbbd8a0d2009-08-06 01:44:21 +0000686 qdisc->gso_skb = NULL;
Krishna Kumarbbd8a0d2009-08-06 01:44:21 +0000687 }
Eric Dumazet4d202a02016-06-21 23:16:52 -0700688 qdisc->q.qlen = 0;
Konstantin Khlebnikov5600c752017-09-20 15:45:36 +0300689 qdisc->qstats.backlog = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700690}
Patrick McHardy62e3ba12008-01-22 22:10:23 -0800691EXPORT_SYMBOL(qdisc_reset);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700692
Eric Dumazet5d944c62010-03-31 07:06:04 +0000693static void qdisc_rcu_free(struct rcu_head *head)
694{
695 struct Qdisc *qdisc = container_of(head, struct Qdisc, rcu_head);
696
John Fastabend73c20a82016-01-05 09:11:36 -0800697 if (qdisc_is_percpu_stats(qdisc)) {
John Fastabend22e0f8b2014-09-28 11:52:56 -0700698 free_percpu(qdisc->cpu_bstats);
John Fastabend73c20a82016-01-05 09:11:36 -0800699 free_percpu(qdisc->cpu_qstats);
700 }
John Fastabend22e0f8b2014-09-28 11:52:56 -0700701
Eric Dumazet5d944c62010-03-31 07:06:04 +0000702 kfree((char *) qdisc - qdisc->padded);
703}
704
David S. Miller1e0d5a52008-08-17 22:31:26 -0700705void qdisc_destroy(struct Qdisc *qdisc)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700706{
Eric Dumazet20fea082007-11-14 01:44:41 -0800707 const struct Qdisc_ops *ops = qdisc->ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700708
David S. Miller1e0d5a52008-08-17 22:31:26 -0700709 if (qdisc->flags & TCQ_F_BUILTIN ||
710 !atomic_dec_and_test(&qdisc->refcnt))
711 return;
712
David S. Miller3a682fb2008-07-20 18:13:01 -0700713#ifdef CONFIG_NET_SCHED
Jiri Kosina59cc1f62016-08-10 11:05:15 +0200714 qdisc_hash_del(qdisc);
Jarek Poplawskif6e0b232008-08-22 03:24:05 -0700715
Eric Dumazeta2da5702011-01-20 03:48:19 +0000716 qdisc_put_stab(rtnl_dereference(qdisc->stab));
David S. Miller3a682fb2008-07-20 18:13:01 -0700717#endif
Patrick McHardy85670cc2006-09-27 16:45:45 -0700718 gen_kill_estimator(&qdisc->bstats, &qdisc->rate_est);
Patrick McHardy85670cc2006-09-27 16:45:45 -0700719 if (ops->reset)
720 ops->reset(qdisc);
721 if (ops->destroy)
722 ops->destroy(qdisc);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700723
Patrick McHardy85670cc2006-09-27 16:45:45 -0700724 module_put(ops->owner);
David S. Miller5ce2d482008-07-08 17:06:30 -0700725 dev_put(qdisc_dev(qdisc));
David S. Miller8a34c5d2008-07-17 00:47:45 -0700726
Jesper Dangaard Brouer3f3c7ee2014-09-03 12:12:50 +0200727 kfree_skb_list(qdisc->gso_skb);
Eric Dumazet4d202a02016-06-21 23:16:52 -0700728 kfree_skb(qdisc->skb_bad_txq);
Eric Dumazet5d944c62010-03-31 07:06:04 +0000729 /*
730 * gen_estimator est_timer() might access qdisc->q.lock,
731 * wait a RCU grace period before freeing qdisc.
732 */
733 call_rcu(&qdisc->rcu_head, qdisc_rcu_free);
David S. Miller8a34c5d2008-07-17 00:47:45 -0700734}
Patrick McHardy62e3ba12008-01-22 22:10:23 -0800735EXPORT_SYMBOL(qdisc_destroy);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700736
Patrick McHardy589983c2009-09-04 06:41:20 +0000737/* Attach toplevel qdisc to device queue. */
738struct Qdisc *dev_graft_qdisc(struct netdev_queue *dev_queue,
739 struct Qdisc *qdisc)
740{
741 struct Qdisc *oqdisc = dev_queue->qdisc_sleeping;
742 spinlock_t *root_lock;
743
744 root_lock = qdisc_lock(oqdisc);
745 spin_lock_bh(root_lock);
746
747 /* Prune old scheduler */
748 if (oqdisc && atomic_read(&oqdisc->refcnt) <= 1)
749 qdisc_reset(oqdisc);
750
751 /* ... and graft new one */
752 if (qdisc == NULL)
753 qdisc = &noop_qdisc;
754 dev_queue->qdisc_sleeping = qdisc;
755 rcu_assign_pointer(dev_queue->qdisc, &noop_qdisc);
756
757 spin_unlock_bh(root_lock);
758
759 return oqdisc;
760}
John Fastabendb8970f02011-01-17 08:06:09 +0000761EXPORT_SYMBOL(dev_graft_qdisc);
Patrick McHardy589983c2009-09-04 06:41:20 +0000762
David S. Millere8a04642008-07-17 00:34:19 -0700763static void attach_one_default_qdisc(struct net_device *dev,
764 struct netdev_queue *dev_queue,
765 void *_unused)
766{
Phil Sutter3e692f22015-08-27 21:21:39 +0200767 struct Qdisc *qdisc;
768 const struct Qdisc_ops *ops = default_qdisc_ops;
David S. Millere8a04642008-07-17 00:34:19 -0700769
Phil Sutter3e692f22015-08-27 21:21:39 +0200770 if (dev->priv_flags & IFF_NO_QUEUE)
771 ops = &noqueue_qdisc_ops;
772
773 qdisc = qdisc_create_dflt(dev_queue, ops, TC_H_ROOT);
774 if (!qdisc) {
775 netdev_info(dev, "activation failed\n");
776 return;
David S. Millere8a04642008-07-17 00:34:19 -0700777 }
Phil Sutter3e692f22015-08-27 21:21:39 +0200778 if (!netif_is_multiqueue(dev))
Eric Dumazet4eaf3b82015-12-01 20:08:51 -0800779 qdisc->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT;
David S. Millere8a04642008-07-17 00:34:19 -0700780 dev_queue->qdisc_sleeping = qdisc;
781}
782
David S. Miller6ec1c692009-09-06 01:58:51 -0700783static void attach_default_qdiscs(struct net_device *dev)
784{
785 struct netdev_queue *txq;
786 struct Qdisc *qdisc;
787
788 txq = netdev_get_tx_queue(dev, 0);
789
Phil Sutter4b469952015-08-13 19:01:07 +0200790 if (!netif_is_multiqueue(dev) ||
Phil Sutter4b469952015-08-13 19:01:07 +0200791 dev->priv_flags & IFF_NO_QUEUE) {
David S. Miller6ec1c692009-09-06 01:58:51 -0700792 netdev_for_each_tx_queue(dev, attach_one_default_qdisc, NULL);
793 dev->qdisc = txq->qdisc_sleeping;
794 atomic_inc(&dev->qdisc->refcnt);
795 } else {
Changli Gao3511c912010-10-16 13:04:08 +0000796 qdisc = qdisc_create_dflt(txq, &mq_qdisc_ops, TC_H_ROOT);
David S. Miller6ec1c692009-09-06 01:58:51 -0700797 if (qdisc) {
David S. Miller6ec1c692009-09-06 01:58:51 -0700798 dev->qdisc = qdisc;
Eric Dumazete57a7842013-12-12 15:41:56 -0800799 qdisc->ops->attach(qdisc);
David S. Miller6ec1c692009-09-06 01:58:51 -0700800 }
801 }
Jiri Kosina59cc1f62016-08-10 11:05:15 +0200802#ifdef CONFIG_NET_SCHED
803 if (dev->qdisc)
804 qdisc_hash_add(dev->qdisc);
805#endif
David S. Miller6ec1c692009-09-06 01:58:51 -0700806}
807
David S. Millere8a04642008-07-17 00:34:19 -0700808static void transition_one_qdisc(struct net_device *dev,
809 struct netdev_queue *dev_queue,
810 void *_need_watchdog)
811{
David S. Miller83874002008-07-17 00:53:03 -0700812 struct Qdisc *new_qdisc = dev_queue->qdisc_sleeping;
David S. Millere8a04642008-07-17 00:34:19 -0700813 int *need_watchdog_p = _need_watchdog;
814
David S. Millera9312ae2008-08-17 21:51:03 -0700815 if (!(new_qdisc->flags & TCQ_F_BUILTIN))
816 clear_bit(__QDISC_STATE_DEACTIVATED, &new_qdisc->state);
817
David S. Miller83874002008-07-17 00:53:03 -0700818 rcu_assign_pointer(dev_queue->qdisc, new_qdisc);
Phil Sutter3e692f22015-08-27 21:21:39 +0200819 if (need_watchdog_p) {
Eric Dumazet9d214932009-05-17 20:55:16 -0700820 dev_queue->trans_start = 0;
David S. Millere8a04642008-07-17 00:34:19 -0700821 *need_watchdog_p = 1;
Eric Dumazet9d214932009-05-17 20:55:16 -0700822 }
David S. Millere8a04642008-07-17 00:34:19 -0700823}
824
Linus Torvalds1da177e2005-04-16 15:20:36 -0700825void dev_activate(struct net_device *dev)
826{
David S. Millere8a04642008-07-17 00:34:19 -0700827 int need_watchdog;
David S. Millerb0e1e642008-07-08 17:42:10 -0700828
Linus Torvalds1da177e2005-04-16 15:20:36 -0700829 /* No queueing discipline is attached to device;
stephen hemminger6da7c8f2013-08-27 16:19:08 -0700830 * create default one for devices, which need queueing
831 * and noqueue_qdisc for virtual interfaces
Linus Torvalds1da177e2005-04-16 15:20:36 -0700832 */
833
David S. Miller6ec1c692009-09-06 01:58:51 -0700834 if (dev->qdisc == &noop_qdisc)
835 attach_default_qdiscs(dev);
Patrick McHardyaf356af2009-09-04 06:41:18 +0000836
Tommy S. Christensencacaddf2005-05-03 16:18:52 -0700837 if (!netif_carrier_ok(dev))
838 /* Delay activation until next carrier-on event */
839 return;
840
David S. Millere8a04642008-07-17 00:34:19 -0700841 need_watchdog = 0;
842 netdev_for_each_tx_queue(dev, transition_one_qdisc, &need_watchdog);
Eric Dumazet24824a02010-10-02 06:11:55 +0000843 if (dev_ingress_queue(dev))
844 transition_one_qdisc(dev, dev_ingress_queue(dev), NULL);
David S. Millere8a04642008-07-17 00:34:19 -0700845
846 if (need_watchdog) {
Florian Westphal860e9532016-05-03 16:33:13 +0200847 netif_trans_update(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700848 dev_watchdog_up(dev);
849 }
David S. Millerb0e1e642008-07-08 17:42:10 -0700850}
John Fastabendb8970f02011-01-17 08:06:09 +0000851EXPORT_SYMBOL(dev_activate);
David S. Millerb0e1e642008-07-08 17:42:10 -0700852
David S. Millere8a04642008-07-17 00:34:19 -0700853static void dev_deactivate_queue(struct net_device *dev,
854 struct netdev_queue *dev_queue,
855 void *_qdisc_default)
David S. Millerb0e1e642008-07-08 17:42:10 -0700856{
David S. Millere8a04642008-07-17 00:34:19 -0700857 struct Qdisc *qdisc_default = _qdisc_default;
David S. Miller970565b2008-07-08 23:10:33 -0700858 struct Qdisc *qdisc;
David S. Millerb0e1e642008-07-08 17:42:10 -0700859
John Fastabend46e5da42014-09-12 20:04:52 -0700860 qdisc = rtnl_dereference(dev_queue->qdisc);
David S. Millerb0e1e642008-07-08 17:42:10 -0700861 if (qdisc) {
David S. Miller83874002008-07-17 00:53:03 -0700862 spin_lock_bh(qdisc_lock(qdisc));
863
David S. Millera9312ae2008-08-17 21:51:03 -0700864 if (!(qdisc->flags & TCQ_F_BUILTIN))
865 set_bit(__QDISC_STATE_DEACTIVATED, &qdisc->state);
866
Jarek Poplawskif7a54c12008-08-27 02:22:07 -0700867 rcu_assign_pointer(dev_queue->qdisc, qdisc_default);
David S. Millerb0e1e642008-07-08 17:42:10 -0700868 qdisc_reset(qdisc);
David S. Millerd3b753d2008-07-15 20:14:35 -0700869
David S. Miller83874002008-07-17 00:53:03 -0700870 spin_unlock_bh(qdisc_lock(qdisc));
David S. Millerb0e1e642008-07-08 17:42:10 -0700871 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700872}
873
David S. Miller4335cd22008-08-17 21:58:07 -0700874static bool some_qdisc_is_busy(struct net_device *dev)
David S. Millere8a04642008-07-17 00:34:19 -0700875{
876 unsigned int i;
877
878 for (i = 0; i < dev->num_tx_queues; i++) {
879 struct netdev_queue *dev_queue;
David S. Miller7698b4f2008-07-16 01:42:40 -0700880 spinlock_t *root_lock;
David S. Millere2627c82008-07-16 00:56:32 -0700881 struct Qdisc *q;
David S. Millere8a04642008-07-17 00:34:19 -0700882 int val;
883
884 dev_queue = netdev_get_tx_queue(dev, i);
David S. Millerb9a3b112008-08-13 15:18:38 -0700885 q = dev_queue->qdisc_sleeping;
David S. Miller5fb66222008-08-02 20:02:43 -0700886 root_lock = qdisc_lock(q);
David S. Millere8a04642008-07-17 00:34:19 -0700887
David S. Miller4335cd22008-08-17 21:58:07 -0700888 spin_lock_bh(root_lock);
David S. Millere8a04642008-07-17 00:34:19 -0700889
Eric Dumazetbc135b22010-06-02 03:23:51 -0700890 val = (qdisc_is_running(q) ||
David S. Millerb9a3b112008-08-13 15:18:38 -0700891 test_bit(__QDISC_STATE_SCHED, &q->state));
David S. Millere8a04642008-07-17 00:34:19 -0700892
David S. Miller4335cd22008-08-17 21:58:07 -0700893 spin_unlock_bh(root_lock);
David S. Millere8a04642008-07-17 00:34:19 -0700894
895 if (val)
896 return true;
897 }
898 return false;
899}
900
Eric Dumazet31376632011-05-19 23:42:09 +0000901/**
902 * dev_deactivate_many - deactivate transmissions on several devices
903 * @head: list of devices to deactivate
904 *
905 * This function returns only when all outstanding transmissions
906 * have completed, unless all devices are in dismantle phase.
907 */
Octavian Purdila44345722010-12-13 12:44:07 +0000908void dev_deactivate_many(struct list_head *head)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700909{
Octavian Purdila44345722010-12-13 12:44:07 +0000910 struct net_device *dev;
Eric Dumazet31376632011-05-19 23:42:09 +0000911 bool sync_needed = false;
Herbert Xu41a23b02007-05-10 14:12:47 -0700912
Eric W. Biederman5cde2822013-10-05 19:26:05 -0700913 list_for_each_entry(dev, head, close_list) {
Octavian Purdila44345722010-12-13 12:44:07 +0000914 netdev_for_each_tx_queue(dev, dev_deactivate_queue,
915 &noop_qdisc);
916 if (dev_ingress_queue(dev))
917 dev_deactivate_queue(dev, dev_ingress_queue(dev),
918 &noop_qdisc);
919
920 dev_watchdog_down(dev);
Eric Dumazet31376632011-05-19 23:42:09 +0000921 sync_needed |= !dev->dismantle;
Octavian Purdila44345722010-12-13 12:44:07 +0000922 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700923
Eric Dumazet31376632011-05-19 23:42:09 +0000924 /* Wait for outstanding qdisc-less dev_queue_xmit calls.
925 * This is avoided if all devices are in dismantle phase :
926 * Caller will call synchronize_net() for us
927 */
928 if (sync_needed)
929 synchronize_net();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700930
Herbert Xud4828d82006-06-22 02:28:18 -0700931 /* Wait for outstanding qdisc_run calls. */
Eric W. Biederman5cde2822013-10-05 19:26:05 -0700932 list_for_each_entry(dev, head, close_list)
Octavian Purdila44345722010-12-13 12:44:07 +0000933 while (some_qdisc_is_busy(dev))
934 yield();
935}
936
937void dev_deactivate(struct net_device *dev)
938{
939 LIST_HEAD(single);
940
Eric W. Biederman5cde2822013-10-05 19:26:05 -0700941 list_add(&dev->close_list, &single);
Octavian Purdila44345722010-12-13 12:44:07 +0000942 dev_deactivate_many(&single);
Eric W. Biederman5f04d502011-02-20 11:49:45 -0800943 list_del(&single);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700944}
John Fastabendb8970f02011-01-17 08:06:09 +0000945EXPORT_SYMBOL(dev_deactivate);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700946
David S. Millerb0e1e642008-07-08 17:42:10 -0700947static void dev_init_scheduler_queue(struct net_device *dev,
948 struct netdev_queue *dev_queue,
David S. Millere8a04642008-07-17 00:34:19 -0700949 void *_qdisc)
David S. Millerb0e1e642008-07-08 17:42:10 -0700950{
David S. Millere8a04642008-07-17 00:34:19 -0700951 struct Qdisc *qdisc = _qdisc;
952
John Fastabend46e5da42014-09-12 20:04:52 -0700953 rcu_assign_pointer(dev_queue->qdisc, qdisc);
David S. Millerb0e1e642008-07-08 17:42:10 -0700954 dev_queue->qdisc_sleeping = qdisc;
David S. Millerb0e1e642008-07-08 17:42:10 -0700955}
956
Linus Torvalds1da177e2005-04-16 15:20:36 -0700957void dev_init_scheduler(struct net_device *dev)
958{
Patrick McHardyaf356af2009-09-04 06:41:18 +0000959 dev->qdisc = &noop_qdisc;
David S. Millere8a04642008-07-17 00:34:19 -0700960 netdev_for_each_tx_queue(dev, dev_init_scheduler_queue, &noop_qdisc);
Eric Dumazet24824a02010-10-02 06:11:55 +0000961 if (dev_ingress_queue(dev))
962 dev_init_scheduler_queue(dev, dev_ingress_queue(dev), &noop_qdisc);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700963
Pavel Emelyanovb24b8a22008-01-23 21:20:07 -0800964 setup_timer(&dev->watchdog_timer, dev_watchdog, (unsigned long)dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700965}
966
David S. Millere8a04642008-07-17 00:34:19 -0700967static void shutdown_scheduler_queue(struct net_device *dev,
968 struct netdev_queue *dev_queue,
969 void *_qdisc_default)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700970{
David S. Millerb0e1e642008-07-08 17:42:10 -0700971 struct Qdisc *qdisc = dev_queue->qdisc_sleeping;
David S. Millere8a04642008-07-17 00:34:19 -0700972 struct Qdisc *qdisc_default = _qdisc_default;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700973
David S. Millerb0e1e642008-07-08 17:42:10 -0700974 if (qdisc) {
Jarek Poplawskif7a54c12008-08-27 02:22:07 -0700975 rcu_assign_pointer(dev_queue->qdisc, qdisc_default);
David S. Millerb0e1e642008-07-08 17:42:10 -0700976 dev_queue->qdisc_sleeping = qdisc_default;
977
Linus Torvalds1da177e2005-04-16 15:20:36 -0700978 qdisc_destroy(qdisc);
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +0900979 }
David S. Millerb0e1e642008-07-08 17:42:10 -0700980}
981
982void dev_shutdown(struct net_device *dev)
983{
David S. Millere8a04642008-07-17 00:34:19 -0700984 netdev_for_each_tx_queue(dev, shutdown_scheduler_queue, &noop_qdisc);
Eric Dumazet24824a02010-10-02 06:11:55 +0000985 if (dev_ingress_queue(dev))
986 shutdown_scheduler_queue(dev, dev_ingress_queue(dev), &noop_qdisc);
Patrick McHardyaf356af2009-09-04 06:41:18 +0000987 qdisc_destroy(dev->qdisc);
988 dev->qdisc = &noop_qdisc;
989
Ilpo Järvinen547b7922008-07-25 21:43:18 -0700990 WARN_ON(timer_pending(&dev->watchdog_timer));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700991}
Jiri Pirko292f1c72013-02-12 00:12:03 +0000992
Eric Dumazet01cb71d2013-06-02 13:55:05 +0000993void psched_ratecfg_precompute(struct psched_ratecfg *r,
Eric Dumazet3e1e3aa2013-09-19 09:10:03 -0700994 const struct tc_ratespec *conf,
995 u64 rate64)
Jiri Pirko292f1c72013-02-12 00:12:03 +0000996{
Eric Dumazet01cb71d2013-06-02 13:55:05 +0000997 memset(r, 0, sizeof(*r));
998 r->overhead = conf->overhead;
Eric Dumazet3e1e3aa2013-09-19 09:10:03 -0700999 r->rate_bytes_ps = max_t(u64, conf->rate, rate64);
Jesper Dangaard Brouer8a8e3d82013-08-14 23:47:11 +02001000 r->linklayer = (conf->linklayer & TC_LINKLAYER_MASK);
Jiri Pirko292f1c72013-02-12 00:12:03 +00001001 r->mult = 1;
1002 /*
Eric Dumazet130d3d62013-06-06 13:56:19 -07001003 * The deal here is to replace a divide by a reciprocal one
1004 * in fast path (a reciprocal divide is a multiply and a shift)
1005 *
1006 * Normal formula would be :
1007 * time_in_ns = (NSEC_PER_SEC * len) / rate_bps
1008 *
1009 * We compute mult/shift to use instead :
1010 * time_in_ns = (len * mult) >> shift;
1011 *
1012 * We try to get the highest possible mult value for accuracy,
1013 * but have to make sure no overflows will ever happen.
Jiri Pirko292f1c72013-02-12 00:12:03 +00001014 */
Eric Dumazet130d3d62013-06-06 13:56:19 -07001015 if (r->rate_bytes_ps > 0) {
1016 u64 factor = NSEC_PER_SEC;
Jiri Pirko292f1c72013-02-12 00:12:03 +00001017
Eric Dumazet130d3d62013-06-06 13:56:19 -07001018 for (;;) {
1019 r->mult = div64_u64(factor, r->rate_bytes_ps);
1020 if (r->mult & (1U << 31) || factor & (1ULL << 63))
1021 break;
1022 factor <<= 1;
1023 r->shift++;
1024 }
Jiri Pirko292f1c72013-02-12 00:12:03 +00001025 }
1026}
1027EXPORT_SYMBOL(psched_ratecfg_precompute);