blob: 11292adce4121022a86aff0aed537be3d9319490 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * net/sched/sch_red.c Random Early Detection queue.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 *
9 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
10 *
11 * Changes:
Thomas Grafdba051f2005-11-05 21:14:08 +010012 * J Hadi Salim 980914: computation fixes
Linus Torvalds1da177e2005-04-16 15:20:36 -070013 * Alexey Makarenko <makar@phoenix.kharkov.ua> 990814: qave on idle link was calculated incorrectly.
Thomas Grafdba051f2005-11-05 21:14:08 +010014 * J Hadi Salim 980816: ECN support
Linus Torvalds1da177e2005-04-16 15:20:36 -070015 */
16
Linus Torvalds1da177e2005-04-16 15:20:36 -070017#include <linux/module.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070018#include <linux/types.h>
19#include <linux/kernel.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070020#include <linux/skbuff.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070021#include <net/pkt_sched.h>
22#include <net/inet_ecn.h>
Thomas Graf6b31b282005-11-05 21:14:05 +010023#include <net/red.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070024
25
Thomas Graf6b31b282005-11-05 21:14:05 +010026/* Parameters, settable by user:
Linus Torvalds1da177e2005-04-16 15:20:36 -070027 -----------------------------
28
29 limit - bytes (must be > qth_max + burst)
30
31 Hard limit on queue length, should be chosen >qth_max
32 to allow packet bursts. This parameter does not
33 affect the algorithms behaviour and can be chosen
34 arbitrarily high (well, less than ram size)
35 Really, this limit will never be reached
36 if RED works correctly.
Linus Torvalds1da177e2005-04-16 15:20:36 -070037 */
38
Eric Dumazetcc7ec452011-01-19 19:26:56 +000039struct red_sched_data {
Thomas Graf6b31b282005-11-05 21:14:05 +010040 u32 limit; /* HARD maximal queue length */
41 unsigned char flags;
Eric Dumazet8af2a212011-12-08 06:06:03 +000042 struct timer_list adapt_timer;
Thomas Graf6b31b282005-11-05 21:14:05 +010043 struct red_parms parms;
Eric Dumazeteeca6682012-01-05 02:25:16 +000044 struct red_vars vars;
Thomas Graf6b31b282005-11-05 21:14:05 +010045 struct red_stats stats;
Patrick McHardyf38c39d2006-03-20 19:20:44 -080046 struct Qdisc *qdisc;
Linus Torvalds1da177e2005-04-16 15:20:36 -070047};
48
Thomas Graf6b31b282005-11-05 21:14:05 +010049static inline int red_use_ecn(struct red_sched_data *q)
Linus Torvalds1da177e2005-04-16 15:20:36 -070050{
Thomas Graf6b31b282005-11-05 21:14:05 +010051 return q->flags & TC_RED_ECN;
Linus Torvalds1da177e2005-04-16 15:20:36 -070052}
53
Thomas Grafbdc450a2005-11-05 21:14:28 +010054static inline int red_use_harddrop(struct red_sched_data *q)
55{
56 return q->flags & TC_RED_HARDDROP;
57}
58
Eric Dumazet520ac302016-06-21 23:16:49 -070059static int red_enqueue(struct sk_buff *skb, struct Qdisc *sch,
60 struct sk_buff **to_free)
Linus Torvalds1da177e2005-04-16 15:20:36 -070061{
62 struct red_sched_data *q = qdisc_priv(sch);
Patrick McHardyf38c39d2006-03-20 19:20:44 -080063 struct Qdisc *child = q->qdisc;
64 int ret;
Linus Torvalds1da177e2005-04-16 15:20:36 -070065
Eric Dumazeteeca6682012-01-05 02:25:16 +000066 q->vars.qavg = red_calc_qavg(&q->parms,
67 &q->vars,
68 child->qstats.backlog);
Linus Torvalds1da177e2005-04-16 15:20:36 -070069
Eric Dumazeteeca6682012-01-05 02:25:16 +000070 if (red_is_idling(&q->vars))
71 red_end_of_idle_period(&q->vars);
Linus Torvalds1da177e2005-04-16 15:20:36 -070072
Eric Dumazeteeca6682012-01-05 02:25:16 +000073 switch (red_action(&q->parms, &q->vars, q->vars.qavg)) {
Eric Dumazetcc7ec452011-01-19 19:26:56 +000074 case RED_DONT_MARK:
75 break;
Linus Torvalds1da177e2005-04-16 15:20:36 -070076
Eric Dumazetcc7ec452011-01-19 19:26:56 +000077 case RED_PROB_MARK:
John Fastabend25331d62014-09-28 11:53:29 -070078 qdisc_qstats_overlimit(sch);
Eric Dumazetcc7ec452011-01-19 19:26:56 +000079 if (!red_use_ecn(q) || !INET_ECN_set_ce(skb)) {
80 q->stats.prob_drop++;
81 goto congestion_drop;
82 }
Linus Torvalds1da177e2005-04-16 15:20:36 -070083
Eric Dumazetcc7ec452011-01-19 19:26:56 +000084 q->stats.prob_mark++;
85 break;
Linus Torvalds1da177e2005-04-16 15:20:36 -070086
Eric Dumazetcc7ec452011-01-19 19:26:56 +000087 case RED_HARD_MARK:
John Fastabend25331d62014-09-28 11:53:29 -070088 qdisc_qstats_overlimit(sch);
Eric Dumazetcc7ec452011-01-19 19:26:56 +000089 if (red_use_harddrop(q) || !red_use_ecn(q) ||
90 !INET_ECN_set_ce(skb)) {
91 q->stats.forced_drop++;
92 goto congestion_drop;
93 }
Linus Torvalds1da177e2005-04-16 15:20:36 -070094
Eric Dumazetcc7ec452011-01-19 19:26:56 +000095 q->stats.forced_mark++;
96 break;
Linus Torvalds1da177e2005-04-16 15:20:36 -070097 }
98
Eric Dumazet520ac302016-06-21 23:16:49 -070099 ret = qdisc_enqueue(skb, child, to_free);
Patrick McHardyf38c39d2006-03-20 19:20:44 -0800100 if (likely(ret == NET_XMIT_SUCCESS)) {
WANG Congd7f4f332016-06-01 16:15:18 -0700101 qdisc_qstats_backlog_inc(sch, skb);
Patrick McHardyf38c39d2006-03-20 19:20:44 -0800102 sch->q.qlen++;
Jarek Poplawski378a2f02008-08-04 22:31:03 -0700103 } else if (net_xmit_drop_count(ret)) {
Patrick McHardyf38c39d2006-03-20 19:20:44 -0800104 q->stats.pdrop++;
John Fastabend25331d62014-09-28 11:53:29 -0700105 qdisc_qstats_drop(sch);
Patrick McHardyf38c39d2006-03-20 19:20:44 -0800106 }
107 return ret;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700108
Thomas Graf6b31b282005-11-05 21:14:05 +0100109congestion_drop:
Eric Dumazet520ac302016-06-21 23:16:49 -0700110 qdisc_drop(skb, sch, to_free);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700111 return NET_XMIT_CN;
112}
113
Eric Dumazetcc7ec452011-01-19 19:26:56 +0000114static struct sk_buff *red_dequeue(struct Qdisc *sch)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700115{
116 struct sk_buff *skb;
117 struct red_sched_data *q = qdisc_priv(sch);
Patrick McHardyf38c39d2006-03-20 19:20:44 -0800118 struct Qdisc *child = q->qdisc;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700119
Patrick McHardyf38c39d2006-03-20 19:20:44 -0800120 skb = child->dequeue(child);
Eric Dumazet9190b3b2011-01-20 23:31:33 -0800121 if (skb) {
122 qdisc_bstats_update(sch, skb);
WANG Congd7f4f332016-06-01 16:15:18 -0700123 qdisc_qstats_backlog_dec(sch, skb);
Patrick McHardyf38c39d2006-03-20 19:20:44 -0800124 sch->q.qlen--;
Eric Dumazet9190b3b2011-01-20 23:31:33 -0800125 } else {
Eric Dumazeteeca6682012-01-05 02:25:16 +0000126 if (!red_is_idling(&q->vars))
127 red_start_of_idle_period(&q->vars);
Eric Dumazet9190b3b2011-01-20 23:31:33 -0800128 }
Thomas Graf9e178ff2005-11-05 21:14:06 +0100129 return skb;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700130}
131
Eric Dumazetcc7ec452011-01-19 19:26:56 +0000132static struct sk_buff *red_peek(struct Qdisc *sch)
Jarek Poplawski8e3af972008-10-31 00:45:55 -0700133{
134 struct red_sched_data *q = qdisc_priv(sch);
135 struct Qdisc *child = q->qdisc;
136
137 return child->ops->peek(child);
138}
139
Eric Dumazetcc7ec452011-01-19 19:26:56 +0000140static void red_reset(struct Qdisc *sch)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700141{
142 struct red_sched_data *q = qdisc_priv(sch);
143
Patrick McHardyf38c39d2006-03-20 19:20:44 -0800144 qdisc_reset(q->qdisc);
WANG Congd7f4f332016-06-01 16:15:18 -0700145 sch->qstats.backlog = 0;
Patrick McHardyf38c39d2006-03-20 19:20:44 -0800146 sch->q.qlen = 0;
Eric Dumazeteeca6682012-01-05 02:25:16 +0000147 red_restart(&q->vars);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700148}
149
Patrick McHardyf38c39d2006-03-20 19:20:44 -0800150static void red_destroy(struct Qdisc *sch)
151{
152 struct red_sched_data *q = qdisc_priv(sch);
Eric Dumazet8af2a212011-12-08 06:06:03 +0000153
154 del_timer_sync(&q->adapt_timer);
Patrick McHardyf38c39d2006-03-20 19:20:44 -0800155 qdisc_destroy(q->qdisc);
156}
157
Patrick McHardy27a34212008-01-23 20:35:39 -0800158static const struct nla_policy red_policy[TCA_RED_MAX + 1] = {
159 [TCA_RED_PARMS] = { .len = sizeof(struct tc_red_qopt) },
160 [TCA_RED_STAB] = { .len = RED_STAB_SIZE },
Eric Dumazeta73ed262011-12-09 02:46:45 +0000161 [TCA_RED_MAX_P] = { .type = NLA_U32 },
Patrick McHardy27a34212008-01-23 20:35:39 -0800162};
163
Patrick McHardy1e904742008-01-22 22:11:17 -0800164static int red_change(struct Qdisc *sch, struct nlattr *opt)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700165{
166 struct red_sched_data *q = qdisc_priv(sch);
Patrick McHardy1e904742008-01-22 22:11:17 -0800167 struct nlattr *tb[TCA_RED_MAX + 1];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700168 struct tc_red_qopt *ctl;
Patrick McHardyf38c39d2006-03-20 19:20:44 -0800169 struct Qdisc *child = NULL;
Patrick McHardycee63722008-01-23 20:33:32 -0800170 int err;
Eric Dumazeta73ed262011-12-09 02:46:45 +0000171 u32 max_P;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700172
Patrick McHardycee63722008-01-23 20:33:32 -0800173 if (opt == NULL)
Thomas Grafdba051f2005-11-05 21:14:08 +0100174 return -EINVAL;
175
Johannes Bergfceb6432017-04-12 14:34:07 +0200176 err = nla_parse_nested(tb, TCA_RED_MAX, opt, red_policy, NULL);
Patrick McHardycee63722008-01-23 20:33:32 -0800177 if (err < 0)
178 return err;
179
Patrick McHardy1e904742008-01-22 22:11:17 -0800180 if (tb[TCA_RED_PARMS] == NULL ||
Patrick McHardy27a34212008-01-23 20:35:39 -0800181 tb[TCA_RED_STAB] == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700182 return -EINVAL;
183
Eric Dumazeta73ed262011-12-09 02:46:45 +0000184 max_P = tb[TCA_RED_MAX_P] ? nla_get_u32(tb[TCA_RED_MAX_P]) : 0;
185
Patrick McHardy1e904742008-01-22 22:11:17 -0800186 ctl = nla_data(tb[TCA_RED_PARMS]);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700187
Patrick McHardyf38c39d2006-03-20 19:20:44 -0800188 if (ctl->limit > 0) {
Patrick McHardyfb0305c2008-07-05 23:40:21 -0700189 child = fifo_create_dflt(sch, &bfifo_qdisc_ops, ctl->limit);
190 if (IS_ERR(child))
191 return PTR_ERR(child);
Patrick McHardyf38c39d2006-03-20 19:20:44 -0800192 }
193
Jiri Kosina49b49972017-03-08 16:03:32 +0100194 if (child != &noop_qdisc)
195 qdisc_hash_add(child, true);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700196 sch_tree_lock(sch);
197 q->flags = ctl->flags;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700198 q->limit = ctl->limit;
Patrick McHardy5e50da02006-11-29 17:36:20 -0800199 if (child) {
WANG Cong2ccccf52016-02-25 14:55:01 -0800200 qdisc_tree_reduce_backlog(q->qdisc, q->qdisc->q.qlen,
201 q->qdisc->qstats.backlog);
Patrick McHardyb94c8af2008-11-20 04:11:36 -0800202 qdisc_destroy(q->qdisc);
203 q->qdisc = child;
Patrick McHardy5e50da02006-11-29 17:36:20 -0800204 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700205
Eric Dumazeteeca6682012-01-05 02:25:16 +0000206 red_set_parms(&q->parms,
207 ctl->qth_min, ctl->qth_max, ctl->Wlog,
Eric Dumazeta73ed262011-12-09 02:46:45 +0000208 ctl->Plog, ctl->Scell_log,
209 nla_data(tb[TCA_RED_STAB]),
210 max_P);
Eric Dumazeteeca6682012-01-05 02:25:16 +0000211 red_set_vars(&q->vars);
Thomas Graf6b31b282005-11-05 21:14:05 +0100212
Eric Dumazet8af2a212011-12-08 06:06:03 +0000213 del_timer(&q->adapt_timer);
214 if (ctl->flags & TC_RED_ADAPTATIVE)
215 mod_timer(&q->adapt_timer, jiffies + HZ/2);
216
Eric Dumazet1ee5fa12011-12-01 11:06:34 +0000217 if (!q->qdisc->q.qlen)
Eric Dumazeteeca6682012-01-05 02:25:16 +0000218 red_start_of_idle_period(&q->vars);
Thomas Grafdba051f2005-11-05 21:14:08 +0100219
Linus Torvalds1da177e2005-04-16 15:20:36 -0700220 sch_tree_unlock(sch);
221 return 0;
222}
223
Eric Dumazet8af2a212011-12-08 06:06:03 +0000224static inline void red_adaptative_timer(unsigned long arg)
225{
226 struct Qdisc *sch = (struct Qdisc *)arg;
227 struct red_sched_data *q = qdisc_priv(sch);
228 spinlock_t *root_lock = qdisc_lock(qdisc_root_sleeping(sch));
229
230 spin_lock(root_lock);
Eric Dumazeteeca6682012-01-05 02:25:16 +0000231 red_adaptative_algo(&q->parms, &q->vars);
Eric Dumazet8af2a212011-12-08 06:06:03 +0000232 mod_timer(&q->adapt_timer, jiffies + HZ/2);
233 spin_unlock(root_lock);
234}
235
Eric Dumazetcc7ec452011-01-19 19:26:56 +0000236static int red_init(struct Qdisc *sch, struct nlattr *opt)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700237{
Patrick McHardyf38c39d2006-03-20 19:20:44 -0800238 struct red_sched_data *q = qdisc_priv(sch);
239
240 q->qdisc = &noop_qdisc;
Eric Dumazet8af2a212011-12-08 06:06:03 +0000241 setup_timer(&q->adapt_timer, red_adaptative_timer, (unsigned long)sch);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700242 return red_change(sch, opt);
243}
244
245static int red_dump(struct Qdisc *sch, struct sk_buff *skb)
246{
247 struct red_sched_data *q = qdisc_priv(sch);
Patrick McHardy1e904742008-01-22 22:11:17 -0800248 struct nlattr *opts = NULL;
Thomas Graf6b31b282005-11-05 21:14:05 +0100249 struct tc_red_qopt opt = {
250 .limit = q->limit,
251 .flags = q->flags,
252 .qth_min = q->parms.qth_min >> q->parms.Wlog,
253 .qth_max = q->parms.qth_max >> q->parms.Wlog,
254 .Wlog = q->parms.Wlog,
255 .Plog = q->parms.Plog,
256 .Scell_log = q->parms.Scell_log,
257 };
Linus Torvalds1da177e2005-04-16 15:20:36 -0700258
Eric Dumazet0dfb33a2011-01-03 08:11:38 +0000259 sch->qstats.backlog = q->qdisc->qstats.backlog;
Patrick McHardy1e904742008-01-22 22:11:17 -0800260 opts = nla_nest_start(skb, TCA_OPTIONS);
261 if (opts == NULL)
262 goto nla_put_failure;
David S. Miller1b34ec42012-03-29 05:11:39 -0400263 if (nla_put(skb, TCA_RED_PARMS, sizeof(opt), &opt) ||
264 nla_put_u32(skb, TCA_RED_MAX_P, q->parms.max_P))
265 goto nla_put_failure;
Patrick McHardy1e904742008-01-22 22:11:17 -0800266 return nla_nest_end(skb, opts);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700267
Patrick McHardy1e904742008-01-22 22:11:17 -0800268nla_put_failure:
Thomas Grafbc3ed282008-06-03 16:36:54 -0700269 nla_nest_cancel(skb, opts);
270 return -EMSGSIZE;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700271}
272
273static int red_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
274{
275 struct red_sched_data *q = qdisc_priv(sch);
Thomas Graf6b31b282005-11-05 21:14:05 +0100276 struct tc_red_xstats st = {
277 .early = q->stats.prob_drop + q->stats.forced_drop,
278 .pdrop = q->stats.pdrop,
279 .other = q->stats.other,
280 .marked = q->stats.prob_mark + q->stats.forced_mark,
281 };
Linus Torvalds1da177e2005-04-16 15:20:36 -0700282
Thomas Graf6b31b282005-11-05 21:14:05 +0100283 return gnet_stats_copy_app(d, &st, sizeof(st));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700284}
285
Patrick McHardyf38c39d2006-03-20 19:20:44 -0800286static int red_dump_class(struct Qdisc *sch, unsigned long cl,
287 struct sk_buff *skb, struct tcmsg *tcm)
288{
289 struct red_sched_data *q = qdisc_priv(sch);
290
Patrick McHardyf38c39d2006-03-20 19:20:44 -0800291 tcm->tcm_handle |= TC_H_MIN(1);
292 tcm->tcm_info = q->qdisc->handle;
293 return 0;
294}
295
296static int red_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
297 struct Qdisc **old)
298{
299 struct red_sched_data *q = qdisc_priv(sch);
300
301 if (new == NULL)
302 new = &noop_qdisc;
303
WANG Cong86a79962016-02-25 14:55:00 -0800304 *old = qdisc_replace(sch, new, &q->qdisc);
Patrick McHardyf38c39d2006-03-20 19:20:44 -0800305 return 0;
306}
307
308static struct Qdisc *red_leaf(struct Qdisc *sch, unsigned long arg)
309{
310 struct red_sched_data *q = qdisc_priv(sch);
311 return q->qdisc;
312}
313
314static unsigned long red_get(struct Qdisc *sch, u32 classid)
315{
316 return 1;
317}
318
319static void red_put(struct Qdisc *sch, unsigned long arg)
320{
Patrick McHardyf38c39d2006-03-20 19:20:44 -0800321}
322
Patrick McHardyf38c39d2006-03-20 19:20:44 -0800323static void red_walk(struct Qdisc *sch, struct qdisc_walker *walker)
324{
325 if (!walker->stop) {
326 if (walker->count >= walker->skip)
327 if (walker->fn(sch, 1, walker) < 0) {
328 walker->stop = 1;
329 return;
330 }
331 walker->count++;
332 }
333}
334
Eric Dumazet20fea082007-11-14 01:44:41 -0800335static const struct Qdisc_class_ops red_class_ops = {
Patrick McHardyf38c39d2006-03-20 19:20:44 -0800336 .graft = red_graft,
337 .leaf = red_leaf,
338 .get = red_get,
339 .put = red_put,
Patrick McHardyf38c39d2006-03-20 19:20:44 -0800340 .walk = red_walk,
Patrick McHardyf38c39d2006-03-20 19:20:44 -0800341 .dump = red_dump_class,
342};
343
Eric Dumazet20fea082007-11-14 01:44:41 -0800344static struct Qdisc_ops red_qdisc_ops __read_mostly = {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700345 .id = "red",
346 .priv_size = sizeof(struct red_sched_data),
Patrick McHardyf38c39d2006-03-20 19:20:44 -0800347 .cl_ops = &red_class_ops,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700348 .enqueue = red_enqueue,
349 .dequeue = red_dequeue,
Jarek Poplawski8e3af972008-10-31 00:45:55 -0700350 .peek = red_peek,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700351 .init = red_init,
352 .reset = red_reset,
Patrick McHardyf38c39d2006-03-20 19:20:44 -0800353 .destroy = red_destroy,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700354 .change = red_change,
355 .dump = red_dump,
356 .dump_stats = red_dump_stats,
357 .owner = THIS_MODULE,
358};
359
360static int __init red_module_init(void)
361{
362 return register_qdisc(&red_qdisc_ops);
363}
Thomas Grafdba051f2005-11-05 21:14:08 +0100364
365static void __exit red_module_exit(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700366{
367 unregister_qdisc(&red_qdisc_ops);
368}
Thomas Grafdba051f2005-11-05 21:14:08 +0100369
Linus Torvalds1da177e2005-04-16 15:20:36 -0700370module_init(red_module_init)
371module_exit(red_module_exit)
Thomas Grafdba051f2005-11-05 21:14:08 +0100372
Linus Torvalds1da177e2005-04-16 15:20:36 -0700373MODULE_LICENSE("GPL");