blob: 2be563cba72bce95879fabd9245a4790dae1bf1a [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * net/sched/sch_red.c Random Early Detection queue.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 *
9 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
10 *
11 * Changes:
Thomas Grafdba051f2005-11-05 21:14:08 +010012 * J Hadi Salim 980914: computation fixes
Linus Torvalds1da177e2005-04-16 15:20:36 -070013 * Alexey Makarenko <makar@phoenix.kharkov.ua> 990814: qave on idle link was calculated incorrectly.
Thomas Grafdba051f2005-11-05 21:14:08 +010014 * J Hadi Salim 980816: ECN support
Linus Torvalds1da177e2005-04-16 15:20:36 -070015 */
16
17#include <linux/config.h>
18#include <linux/module.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070019#include <linux/types.h>
20#include <linux/kernel.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070021#include <linux/netdevice.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070022#include <linux/skbuff.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070023#include <net/pkt_sched.h>
24#include <net/inet_ecn.h>
Thomas Graf6b31b282005-11-05 21:14:05 +010025#include <net/red.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070026
27
Thomas Graf6b31b282005-11-05 21:14:05 +010028/* Parameters, settable by user:
Linus Torvalds1da177e2005-04-16 15:20:36 -070029 -----------------------------
30
31 limit - bytes (must be > qth_max + burst)
32
33 Hard limit on queue length, should be chosen >qth_max
34 to allow packet bursts. This parameter does not
35 affect the algorithms behaviour and can be chosen
36 arbitrarily high (well, less than ram size)
37 Really, this limit will never be reached
38 if RED works correctly.
Linus Torvalds1da177e2005-04-16 15:20:36 -070039 */
40
41struct red_sched_data
42{
Thomas Graf6b31b282005-11-05 21:14:05 +010043 u32 limit; /* HARD maximal queue length */
44 unsigned char flags;
45 struct red_parms parms;
46 struct red_stats stats;
Patrick McHardyf38c39d2006-03-20 19:20:44 -080047 struct Qdisc *qdisc;
Linus Torvalds1da177e2005-04-16 15:20:36 -070048};
49
Thomas Graf6b31b282005-11-05 21:14:05 +010050static inline int red_use_ecn(struct red_sched_data *q)
Linus Torvalds1da177e2005-04-16 15:20:36 -070051{
Thomas Graf6b31b282005-11-05 21:14:05 +010052 return q->flags & TC_RED_ECN;
Linus Torvalds1da177e2005-04-16 15:20:36 -070053}
54
Thomas Grafbdc450a2005-11-05 21:14:28 +010055static inline int red_use_harddrop(struct red_sched_data *q)
56{
57 return q->flags & TC_RED_HARDDROP;
58}
59
Thomas Grafdba051f2005-11-05 21:14:08 +010060static int red_enqueue(struct sk_buff *skb, struct Qdisc* sch)
Linus Torvalds1da177e2005-04-16 15:20:36 -070061{
62 struct red_sched_data *q = qdisc_priv(sch);
Patrick McHardyf38c39d2006-03-20 19:20:44 -080063 struct Qdisc *child = q->qdisc;
64 int ret;
Linus Torvalds1da177e2005-04-16 15:20:36 -070065
Patrick McHardyf38c39d2006-03-20 19:20:44 -080066 q->parms.qavg = red_calc_qavg(&q->parms, child->qstats.backlog);
Linus Torvalds1da177e2005-04-16 15:20:36 -070067
Thomas Graf6b31b282005-11-05 21:14:05 +010068 if (red_is_idling(&q->parms))
69 red_end_of_idle_period(&q->parms);
Linus Torvalds1da177e2005-04-16 15:20:36 -070070
Thomas Graf6b31b282005-11-05 21:14:05 +010071 switch (red_action(&q->parms, q->parms.qavg)) {
72 case RED_DONT_MARK:
73 break;
Linus Torvalds1da177e2005-04-16 15:20:36 -070074
Thomas Graf6b31b282005-11-05 21:14:05 +010075 case RED_PROB_MARK:
76 sch->qstats.overlimits++;
77 if (!red_use_ecn(q) || !INET_ECN_set_ce(skb)) {
78 q->stats.prob_drop++;
79 goto congestion_drop;
80 }
Linus Torvalds1da177e2005-04-16 15:20:36 -070081
Thomas Graf6b31b282005-11-05 21:14:05 +010082 q->stats.prob_mark++;
83 break;
Linus Torvalds1da177e2005-04-16 15:20:36 -070084
Thomas Graf6b31b282005-11-05 21:14:05 +010085 case RED_HARD_MARK:
86 sch->qstats.overlimits++;
Thomas Grafbdc450a2005-11-05 21:14:28 +010087 if (red_use_harddrop(q) || !red_use_ecn(q) ||
88 !INET_ECN_set_ce(skb)) {
Thomas Graf6b31b282005-11-05 21:14:05 +010089 q->stats.forced_drop++;
90 goto congestion_drop;
91 }
Linus Torvalds1da177e2005-04-16 15:20:36 -070092
Thomas Graf6b31b282005-11-05 21:14:05 +010093 q->stats.forced_mark++;
94 break;
Linus Torvalds1da177e2005-04-16 15:20:36 -070095 }
96
Patrick McHardyf38c39d2006-03-20 19:20:44 -080097 ret = child->enqueue(skb, child);
98 if (likely(ret == NET_XMIT_SUCCESS)) {
99 sch->bstats.bytes += skb->len;
100 sch->bstats.packets++;
101 sch->q.qlen++;
102 } else {
103 q->stats.pdrop++;
104 sch->qstats.drops++;
105 }
106 return ret;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700107
Thomas Graf6b31b282005-11-05 21:14:05 +0100108congestion_drop:
Thomas Graf9e178ff2005-11-05 21:14:06 +0100109 qdisc_drop(skb, sch);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700110 return NET_XMIT_CN;
111}
112
Thomas Grafdba051f2005-11-05 21:14:08 +0100113static int red_requeue(struct sk_buff *skb, struct Qdisc* sch)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700114{
115 struct red_sched_data *q = qdisc_priv(sch);
Patrick McHardyf38c39d2006-03-20 19:20:44 -0800116 struct Qdisc *child = q->qdisc;
117 int ret;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700118
Thomas Graf6b31b282005-11-05 21:14:05 +0100119 if (red_is_idling(&q->parms))
120 red_end_of_idle_period(&q->parms);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700121
Patrick McHardyf38c39d2006-03-20 19:20:44 -0800122 ret = child->ops->requeue(skb, child);
123 if (likely(ret == NET_XMIT_SUCCESS)) {
124 sch->qstats.requeues++;
125 sch->q.qlen++;
126 }
127 return ret;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700128}
129
Thomas Grafdba051f2005-11-05 21:14:08 +0100130static struct sk_buff * red_dequeue(struct Qdisc* sch)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700131{
132 struct sk_buff *skb;
133 struct red_sched_data *q = qdisc_priv(sch);
Patrick McHardyf38c39d2006-03-20 19:20:44 -0800134 struct Qdisc *child = q->qdisc;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700135
Patrick McHardyf38c39d2006-03-20 19:20:44 -0800136 skb = child->dequeue(child);
137 if (skb)
138 sch->q.qlen--;
139 else if (!red_is_idling(&q->parms))
Thomas Graf9e178ff2005-11-05 21:14:06 +0100140 red_start_of_idle_period(&q->parms);
141
142 return skb;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700143}
144
145static unsigned int red_drop(struct Qdisc* sch)
146{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700147 struct red_sched_data *q = qdisc_priv(sch);
Patrick McHardyf38c39d2006-03-20 19:20:44 -0800148 struct Qdisc *child = q->qdisc;
149 unsigned int len;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700150
Patrick McHardyf38c39d2006-03-20 19:20:44 -0800151 if (child->ops->drop && (len = child->ops->drop(child)) > 0) {
Thomas Graf6b31b282005-11-05 21:14:05 +0100152 q->stats.other++;
Patrick McHardyf38c39d2006-03-20 19:20:44 -0800153 sch->qstats.drops++;
154 sch->q.qlen--;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700155 return len;
156 }
Thomas Graf6b31b282005-11-05 21:14:05 +0100157
Thomas Graf6a1b63d2005-11-05 21:14:07 +0100158 if (!red_is_idling(&q->parms))
159 red_start_of_idle_period(&q->parms);
160
Linus Torvalds1da177e2005-04-16 15:20:36 -0700161 return 0;
162}
163
164static void red_reset(struct Qdisc* sch)
165{
166 struct red_sched_data *q = qdisc_priv(sch);
167
Patrick McHardyf38c39d2006-03-20 19:20:44 -0800168 qdisc_reset(q->qdisc);
169 sch->q.qlen = 0;
Thomas Graf6b31b282005-11-05 21:14:05 +0100170 red_restart(&q->parms);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700171}
172
Patrick McHardyf38c39d2006-03-20 19:20:44 -0800173static void red_destroy(struct Qdisc *sch)
174{
175 struct red_sched_data *q = qdisc_priv(sch);
176 qdisc_destroy(q->qdisc);
177}
178
179static struct Qdisc *red_create_dflt(struct net_device *dev, u32 limit)
180{
181 struct Qdisc *q = qdisc_create_dflt(dev, &bfifo_qdisc_ops);
182 struct rtattr *rta;
183 int ret;
184
185 if (q) {
186 rta = kmalloc(RTA_LENGTH(sizeof(struct tc_fifo_qopt)),
187 GFP_KERNEL);
188 if (rta) {
189 rta->rta_type = RTM_NEWQDISC;
190 rta->rta_len = RTA_LENGTH(sizeof(struct tc_fifo_qopt));
191 ((struct tc_fifo_qopt *)RTA_DATA(rta))->limit = limit;
192
193 ret = q->ops->change(q, rta);
194 kfree(rta);
195
196 if (ret == 0)
197 return q;
198 }
199 qdisc_destroy(q);
200 }
201 return NULL;
202}
203
Linus Torvalds1da177e2005-04-16 15:20:36 -0700204static int red_change(struct Qdisc *sch, struct rtattr *opt)
205{
206 struct red_sched_data *q = qdisc_priv(sch);
Thomas Grafdba051f2005-11-05 21:14:08 +0100207 struct rtattr *tb[TCA_RED_MAX];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700208 struct tc_red_qopt *ctl;
Patrick McHardyf38c39d2006-03-20 19:20:44 -0800209 struct Qdisc *child = NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700210
Thomas Grafdba051f2005-11-05 21:14:08 +0100211 if (opt == NULL || rtattr_parse_nested(tb, TCA_RED_MAX, opt))
212 return -EINVAL;
213
214 if (tb[TCA_RED_PARMS-1] == NULL ||
Linus Torvalds1da177e2005-04-16 15:20:36 -0700215 RTA_PAYLOAD(tb[TCA_RED_PARMS-1]) < sizeof(*ctl) ||
Thomas Grafdba051f2005-11-05 21:14:08 +0100216 tb[TCA_RED_STAB-1] == NULL ||
217 RTA_PAYLOAD(tb[TCA_RED_STAB-1]) < RED_STAB_SIZE)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700218 return -EINVAL;
219
220 ctl = RTA_DATA(tb[TCA_RED_PARMS-1]);
221
Patrick McHardyf38c39d2006-03-20 19:20:44 -0800222 if (ctl->limit > 0) {
223 child = red_create_dflt(sch->dev, ctl->limit);
224 if (child == NULL)
225 return -ENOMEM;
226 }
227
Linus Torvalds1da177e2005-04-16 15:20:36 -0700228 sch_tree_lock(sch);
229 q->flags = ctl->flags;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700230 q->limit = ctl->limit;
Patrick McHardyf38c39d2006-03-20 19:20:44 -0800231 if (child)
232 qdisc_destroy(xchg(&q->qdisc, child));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700233
Thomas Graf6b31b282005-11-05 21:14:05 +0100234 red_set_parms(&q->parms, ctl->qth_min, ctl->qth_max, ctl->Wlog,
235 ctl->Plog, ctl->Scell_log,
236 RTA_DATA(tb[TCA_RED_STAB-1]));
237
David S. Millerb03efcf2005-07-08 14:57:23 -0700238 if (skb_queue_empty(&sch->q))
Thomas Graf6b31b282005-11-05 21:14:05 +0100239 red_end_of_idle_period(&q->parms);
Thomas Grafdba051f2005-11-05 21:14:08 +0100240
Linus Torvalds1da177e2005-04-16 15:20:36 -0700241 sch_tree_unlock(sch);
242 return 0;
243}
244
245static int red_init(struct Qdisc* sch, struct rtattr *opt)
246{
Patrick McHardyf38c39d2006-03-20 19:20:44 -0800247 struct red_sched_data *q = qdisc_priv(sch);
248
249 q->qdisc = &noop_qdisc;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700250 return red_change(sch, opt);
251}
252
253static int red_dump(struct Qdisc *sch, struct sk_buff *skb)
254{
255 struct red_sched_data *q = qdisc_priv(sch);
Thomas Grafdba051f2005-11-05 21:14:08 +0100256 struct rtattr *opts = NULL;
Thomas Graf6b31b282005-11-05 21:14:05 +0100257 struct tc_red_qopt opt = {
258 .limit = q->limit,
259 .flags = q->flags,
260 .qth_min = q->parms.qth_min >> q->parms.Wlog,
261 .qth_max = q->parms.qth_max >> q->parms.Wlog,
262 .Wlog = q->parms.Wlog,
263 .Plog = q->parms.Plog,
264 .Scell_log = q->parms.Scell_log,
265 };
Linus Torvalds1da177e2005-04-16 15:20:36 -0700266
Thomas Grafdba051f2005-11-05 21:14:08 +0100267 opts = RTA_NEST(skb, TCA_OPTIONS);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700268 RTA_PUT(skb, TCA_RED_PARMS, sizeof(opt), &opt);
Thomas Grafdba051f2005-11-05 21:14:08 +0100269 return RTA_NEST_END(skb, opts);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700270
271rtattr_failure:
Thomas Grafdba051f2005-11-05 21:14:08 +0100272 return RTA_NEST_CANCEL(skb, opts);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700273}
274
275static int red_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
276{
277 struct red_sched_data *q = qdisc_priv(sch);
Thomas Graf6b31b282005-11-05 21:14:05 +0100278 struct tc_red_xstats st = {
279 .early = q->stats.prob_drop + q->stats.forced_drop,
280 .pdrop = q->stats.pdrop,
281 .other = q->stats.other,
282 .marked = q->stats.prob_mark + q->stats.forced_mark,
283 };
Linus Torvalds1da177e2005-04-16 15:20:36 -0700284
Thomas Graf6b31b282005-11-05 21:14:05 +0100285 return gnet_stats_copy_app(d, &st, sizeof(st));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700286}
287
Patrick McHardyf38c39d2006-03-20 19:20:44 -0800288static int red_dump_class(struct Qdisc *sch, unsigned long cl,
289 struct sk_buff *skb, struct tcmsg *tcm)
290{
291 struct red_sched_data *q = qdisc_priv(sch);
292
293 if (cl != 1)
294 return -ENOENT;
295 tcm->tcm_handle |= TC_H_MIN(1);
296 tcm->tcm_info = q->qdisc->handle;
297 return 0;
298}
299
300static int red_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
301 struct Qdisc **old)
302{
303 struct red_sched_data *q = qdisc_priv(sch);
304
305 if (new == NULL)
306 new = &noop_qdisc;
307
308 sch_tree_lock(sch);
309 *old = xchg(&q->qdisc, new);
310 qdisc_reset(*old);
311 sch->q.qlen = 0;
312 sch_tree_unlock(sch);
313 return 0;
314}
315
316static struct Qdisc *red_leaf(struct Qdisc *sch, unsigned long arg)
317{
318 struct red_sched_data *q = qdisc_priv(sch);
319 return q->qdisc;
320}
321
322static unsigned long red_get(struct Qdisc *sch, u32 classid)
323{
324 return 1;
325}
326
327static void red_put(struct Qdisc *sch, unsigned long arg)
328{
329 return;
330}
331
332static int red_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
333 struct rtattr **tca, unsigned long *arg)
334{
335 return -ENOSYS;
336}
337
338static int red_delete(struct Qdisc *sch, unsigned long cl)
339{
340 return -ENOSYS;
341}
342
343static void red_walk(struct Qdisc *sch, struct qdisc_walker *walker)
344{
345 if (!walker->stop) {
346 if (walker->count >= walker->skip)
347 if (walker->fn(sch, 1, walker) < 0) {
348 walker->stop = 1;
349 return;
350 }
351 walker->count++;
352 }
353}
354
355static struct tcf_proto **red_find_tcf(struct Qdisc *sch, unsigned long cl)
356{
357 return NULL;
358}
359
360static struct Qdisc_class_ops red_class_ops = {
361 .graft = red_graft,
362 .leaf = red_leaf,
363 .get = red_get,
364 .put = red_put,
365 .change = red_change_class,
366 .delete = red_delete,
367 .walk = red_walk,
368 .tcf_chain = red_find_tcf,
369 .dump = red_dump_class,
370};
371
Linus Torvalds1da177e2005-04-16 15:20:36 -0700372static struct Qdisc_ops red_qdisc_ops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700373 .id = "red",
374 .priv_size = sizeof(struct red_sched_data),
Patrick McHardyf38c39d2006-03-20 19:20:44 -0800375 .cl_ops = &red_class_ops,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700376 .enqueue = red_enqueue,
377 .dequeue = red_dequeue,
378 .requeue = red_requeue,
379 .drop = red_drop,
380 .init = red_init,
381 .reset = red_reset,
Patrick McHardyf38c39d2006-03-20 19:20:44 -0800382 .destroy = red_destroy,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700383 .change = red_change,
384 .dump = red_dump,
385 .dump_stats = red_dump_stats,
386 .owner = THIS_MODULE,
387};
388
389static int __init red_module_init(void)
390{
391 return register_qdisc(&red_qdisc_ops);
392}
Thomas Grafdba051f2005-11-05 21:14:08 +0100393
394static void __exit red_module_exit(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700395{
396 unregister_qdisc(&red_qdisc_ops);
397}
Thomas Grafdba051f2005-11-05 21:14:08 +0100398
Linus Torvalds1da177e2005-04-16 15:20:36 -0700399module_init(red_module_init)
400module_exit(red_module_exit)
Thomas Grafdba051f2005-11-05 21:14:08 +0100401
Linus Torvalds1da177e2005-04-16 15:20:36 -0700402MODULE_LICENSE("GPL");