blob: 4ac6df0a5b3551be6e373a40d47835092fc95038 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * net/sched/sch_netem.c Network emulator
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
Stephen Hemminger798b6b12006-10-22 20:16:57 -07007 * 2 of the License.
Linus Torvalds1da177e2005-04-16 15:20:36 -07008 *
9 * Many of the algorithms and ideas for this came from
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +090010 * NIST Net which is not copyrighted.
Linus Torvalds1da177e2005-04-16 15:20:36 -070011 *
12 * Authors: Stephen Hemminger <shemminger@osdl.org>
13 * Catalin(ux aka Dino) BOIE <catab at umbrella dot ro>
14 */
15
Linus Torvalds1da177e2005-04-16 15:20:36 -070016#include <linux/module.h>
17#include <linux/bitops.h>
18#include <linux/types.h>
19#include <linux/kernel.h>
20#include <linux/errno.h>
21#include <linux/netdevice.h>
22#include <linux/skbuff.h>
23#include <linux/rtnetlink.h>
24
Arnaldo Carvalho de Melodc5fc572007-03-25 23:06:12 -070025#include <net/netlink.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070026#include <net/pkt_sched.h>
27
Stephen Hemmingerc865e5d2005-12-21 19:03:44 -080028#define VERSION "1.2"
Stephen Hemmingereb229c42005-11-03 13:49:01 -080029
Linus Torvalds1da177e2005-04-16 15:20:36 -070030/* Network Emulation Queuing algorithm.
31 ====================================
32
33 Sources: [1] Mark Carson, Darrin Santay, "NIST Net - A Linux-based
34 Network Emulation Tool
35 [2] Luigi Rizzo, DummyNet for FreeBSD
36
37 ----------------------------------------------------------------
38
39 This started out as a simple way to delay outgoing packets to
40 test TCP but has grown to include most of the functionality
41 of a full blown network emulator like NISTnet. It can delay
42 packets and add random jitter (and correlation). The random
43 distribution can be loaded from a table as well to provide
44 normal, Pareto, or experimental curves. Packet loss,
45 duplication, and reordering can also be emulated.
46
47 This qdisc does not do classification that can be handled in
48 layering other disciplines. It does not need to do bandwidth
49 control either since that can be handled by using token
50 bucket or other rate control.
51
52 The simulator is limited by the Linux timer resolution
53 and will create packet bursts on the HZ boundary (1ms).
54*/
55
56struct netem_sched_data {
57 struct Qdisc *qdisc;
Patrick McHardy59cb5c62007-03-16 01:20:31 -070058 struct qdisc_watchdog watchdog;
Linus Torvalds1da177e2005-04-16 15:20:36 -070059
Stephen Hemmingerb4076212007-03-22 12:16:21 -070060 psched_tdiff_t latency;
61 psched_tdiff_t jitter;
62
Linus Torvalds1da177e2005-04-16 15:20:36 -070063 u32 loss;
64 u32 limit;
65 u32 counter;
66 u32 gap;
Linus Torvalds1da177e2005-04-16 15:20:36 -070067 u32 duplicate;
Stephen Hemminger0dca51d2005-05-26 12:55:48 -070068 u32 reorder;
Stephen Hemmingerc865e5d2005-12-21 19:03:44 -080069 u32 corrupt;
Linus Torvalds1da177e2005-04-16 15:20:36 -070070
71 struct crndstate {
Stephen Hemmingerb4076212007-03-22 12:16:21 -070072 u32 last;
73 u32 rho;
Stephen Hemmingerc865e5d2005-12-21 19:03:44 -080074 } delay_cor, loss_cor, dup_cor, reorder_cor, corrupt_cor;
Linus Torvalds1da177e2005-04-16 15:20:36 -070075
76 struct disttable {
77 u32 size;
78 s16 table[0];
79 } *delay_dist;
80};
81
82/* Time stamp put into socket buffer control block */
83struct netem_skb_cb {
84 psched_time_t time_to_send;
85};
86
87/* init_crandom - initialize correlated random number generator
88 * Use entropy source for initial seed.
89 */
90static void init_crandom(struct crndstate *state, unsigned long rho)
91{
92 state->rho = rho;
93 state->last = net_random();
94}
95
96/* get_crandom - correlated random number generator
97 * Next number depends on last value.
98 * rho is scaled to avoid floating point.
99 */
Stephen Hemmingerb4076212007-03-22 12:16:21 -0700100static u32 get_crandom(struct crndstate *state)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700101{
102 u64 value, rho;
103 unsigned long answer;
104
105 if (state->rho == 0) /* no correllation */
106 return net_random();
107
108 value = net_random();
109 rho = (u64)state->rho + 1;
110 answer = (value * ((1ull<<32) - rho) + state->last * rho) >> 32;
111 state->last = answer;
112 return answer;
113}
114
115/* tabledist - return a pseudo-randomly distributed value with mean mu and
116 * std deviation sigma. Uses table lookup to approximate the desired
117 * distribution, and a uniformly-distributed pseudo-random source.
118 */
Stephen Hemmingerb4076212007-03-22 12:16:21 -0700119static psched_tdiff_t tabledist(psched_tdiff_t mu, psched_tdiff_t sigma,
120 struct crndstate *state,
121 const struct disttable *dist)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700122{
Stephen Hemmingerb4076212007-03-22 12:16:21 -0700123 psched_tdiff_t x;
124 long t;
125 u32 rnd;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700126
127 if (sigma == 0)
128 return mu;
129
130 rnd = get_crandom(state);
131
132 /* default uniform distribution */
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +0900133 if (dist == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700134 return (rnd % (2*sigma)) - sigma + mu;
135
136 t = dist->table[rnd % dist->size];
137 x = (sigma % NETEM_DIST_SCALE) * t;
138 if (x >= 0)
139 x += NETEM_DIST_SCALE/2;
140 else
141 x -= NETEM_DIST_SCALE/2;
142
143 return x / NETEM_DIST_SCALE + (sigma / NETEM_DIST_SCALE) * t + mu;
144}
145
Stephen Hemminger0afb51e2005-05-26 12:53:49 -0700146/*
147 * Insert one skb into qdisc.
148 * Note: parent depends on return value to account for queue length.
149 * NET_XMIT_DROP: queue length didn't change.
150 * NET_XMIT_SUCCESS: one skb was queued.
151 */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700152static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
153{
154 struct netem_sched_data *q = qdisc_priv(sch);
Guillaume Chazarain89e1df72006-07-21 14:45:25 -0700155 /* We don't fill cb now as skb_unshare() may invalidate it */
156 struct netem_skb_cb *cb;
Stephen Hemminger0afb51e2005-05-26 12:53:49 -0700157 struct sk_buff *skb2;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700158 int ret;
Stephen Hemminger0afb51e2005-05-26 12:53:49 -0700159 int count = 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700160
Stephen Hemminger771018e2005-05-03 16:24:32 -0700161 pr_debug("netem_enqueue skb=%p\n", skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700162
Stephen Hemminger0afb51e2005-05-26 12:53:49 -0700163 /* Random duplication */
164 if (q->duplicate && q->duplicate >= get_crandom(&q->dup_cor))
165 ++count;
166
Linus Torvalds1da177e2005-04-16 15:20:36 -0700167 /* Random packet drop 0 => none, ~0 => all */
Stephen Hemminger0afb51e2005-05-26 12:53:49 -0700168 if (q->loss && q->loss >= get_crandom(&q->loss_cor))
169 --count;
170
171 if (count == 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700172 sch->qstats.drops++;
173 kfree_skb(skb);
Stephen Hemminger89bbb0a32006-04-28 12:11:36 -0700174 return NET_XMIT_BYPASS;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700175 }
176
David S. Miller4e8a5202006-10-22 21:00:33 -0700177 skb_orphan(skb);
178
Stephen Hemminger0afb51e2005-05-26 12:53:49 -0700179 /*
180 * If we need to duplicate packet, then re-insert at top of the
181 * qdisc tree, since parent queuer expects that only one
182 * skb will be queued.
183 */
184 if (count > 1 && (skb2 = skb_clone(skb, GFP_ATOMIC)) != NULL) {
185 struct Qdisc *rootq = sch->dev->qdisc;
186 u32 dupsave = q->duplicate; /* prevent duplicating a dup... */
187 q->duplicate = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700188
Stephen Hemminger0afb51e2005-05-26 12:53:49 -0700189 rootq->enqueue(skb2, rootq);
190 q->duplicate = dupsave;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700191 }
192
Stephen Hemmingerc865e5d2005-12-21 19:03:44 -0800193 /*
194 * Randomized packet corruption.
195 * Make copy if needed since we are modifying
196 * If packet is going to be hardware checksummed, then
197 * do it now in software before we mangle it.
198 */
199 if (q->corrupt && q->corrupt >= get_crandom(&q->corrupt_cor)) {
200 if (!(skb = skb_unshare(skb, GFP_ATOMIC))
Patrick McHardy84fa7932006-08-29 16:44:56 -0700201 || (skb->ip_summed == CHECKSUM_PARTIAL
202 && skb_checksum_help(skb))) {
Stephen Hemmingerc865e5d2005-12-21 19:03:44 -0800203 sch->qstats.drops++;
204 return NET_XMIT_DROP;
205 }
206
207 skb->data[net_random() % skb_headlen(skb)] ^= 1<<(net_random() % 8);
208 }
209
Guillaume Chazarain89e1df72006-07-21 14:45:25 -0700210 cb = (struct netem_skb_cb *)skb->cb;
Stephen Hemminger0dca51d2005-05-26 12:55:48 -0700211 if (q->gap == 0 /* not doing reordering */
212 || q->counter < q->gap /* inside last reordering gap */
213 || q->reorder < get_crandom(&q->reorder_cor)) {
Stephen Hemminger0f9f32a2005-05-26 12:55:01 -0700214 psched_time_t now;
Stephen Hemminger07aaa112005-11-03 13:43:07 -0800215 psched_tdiff_t delay;
216
217 delay = tabledist(q->latency, q->jitter,
218 &q->delay_cor, q->delay_dist);
219
Stephen Hemminger0f9f32a2005-05-26 12:55:01 -0700220 PSCHED_GET_TIME(now);
Stephen Hemminger07aaa112005-11-03 13:43:07 -0800221 PSCHED_TADD2(now, delay, cb->time_to_send);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700222 ++q->counter;
223 ret = q->qdisc->enqueue(skb, q->qdisc);
224 } else {
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +0900225 /*
Stephen Hemminger0dca51d2005-05-26 12:55:48 -0700226 * Do re-ordering by putting one out of N packets at the front
227 * of the queue.
228 */
Stephen Hemminger0f9f32a2005-05-26 12:55:01 -0700229 PSCHED_GET_TIME(cb->time_to_send);
Stephen Hemminger0dca51d2005-05-26 12:55:48 -0700230 q->counter = 0;
Stephen Hemminger0f9f32a2005-05-26 12:55:01 -0700231 ret = q->qdisc->ops->requeue(skb, q->qdisc);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700232 }
233
234 if (likely(ret == NET_XMIT_SUCCESS)) {
235 sch->q.qlen++;
236 sch->bstats.bytes += skb->len;
237 sch->bstats.packets++;
238 } else
239 sch->qstats.drops++;
240
Stephen Hemmingerd5d75cd2005-05-03 16:24:57 -0700241 pr_debug("netem: enqueue ret %d\n", ret);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700242 return ret;
243}
244
245/* Requeue packets but don't change time stamp */
246static int netem_requeue(struct sk_buff *skb, struct Qdisc *sch)
247{
248 struct netem_sched_data *q = qdisc_priv(sch);
249 int ret;
250
251 if ((ret = q->qdisc->ops->requeue(skb, q->qdisc)) == 0) {
252 sch->q.qlen++;
253 sch->qstats.requeues++;
254 }
255
256 return ret;
257}
258
259static unsigned int netem_drop(struct Qdisc* sch)
260{
261 struct netem_sched_data *q = qdisc_priv(sch);
Patrick McHardy6d037a22006-03-20 19:00:49 -0800262 unsigned int len = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700263
Patrick McHardy6d037a22006-03-20 19:00:49 -0800264 if (q->qdisc->ops->drop && (len = q->qdisc->ops->drop(q->qdisc)) != 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700265 sch->q.qlen--;
266 sch->qstats.drops++;
267 }
268 return len;
269}
270
Linus Torvalds1da177e2005-04-16 15:20:36 -0700271static struct sk_buff *netem_dequeue(struct Qdisc *sch)
272{
273 struct netem_sched_data *q = qdisc_priv(sch);
274 struct sk_buff *skb;
275
276 skb = q->qdisc->dequeue(q->qdisc);
Stephen Hemminger771018e2005-05-03 16:24:32 -0700277 if (skb) {
Stephen Hemminger0f9f32a2005-05-26 12:55:01 -0700278 const struct netem_skb_cb *cb
279 = (const struct netem_skb_cb *)skb->cb;
280 psched_time_t now;
Stephen Hemminger771018e2005-05-03 16:24:32 -0700281
Stephen Hemminger0f9f32a2005-05-26 12:55:01 -0700282 /* if more time remaining? */
283 PSCHED_GET_TIME(now);
Stephen Hemminger07aaa112005-11-03 13:43:07 -0800284
285 if (PSCHED_TLESS(cb->time_to_send, now)) {
Stephen Hemminger0f9f32a2005-05-26 12:55:01 -0700286 pr_debug("netem_dequeue: return skb=%p\n", skb);
287 sch->q.qlen--;
288 sch->flags &= ~TCQ_F_THROTTLED;
289 return skb;
Stephen Hemminger07aaa112005-11-03 13:43:07 -0800290 } else {
Patrick McHardy59cb5c62007-03-16 01:20:31 -0700291 qdisc_watchdog_schedule(&q->watchdog, cb->time_to_send);
Stephen Hemminger07aaa112005-11-03 13:43:07 -0800292
293 if (q->qdisc->ops->requeue(skb, q->qdisc) != NET_XMIT_SUCCESS) {
Patrick McHardye488eaf2006-11-29 17:37:42 -0800294 qdisc_tree_decrease_qlen(q->qdisc, 1);
Stephen Hemminger07aaa112005-11-03 13:43:07 -0800295 sch->qstats.drops++;
Stephen Hemminger07aaa112005-11-03 13:43:07 -0800296 printk(KERN_ERR "netem: queue discpline %s could not requeue\n",
297 q->qdisc->ops->id);
Stephen Hemminger07aaa112005-11-03 13:43:07 -0800298 }
Stephen Hemminger0f9f32a2005-05-26 12:55:01 -0700299 }
Stephen Hemminger0f9f32a2005-05-26 12:55:01 -0700300 }
301
302 return NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700303}
304
Linus Torvalds1da177e2005-04-16 15:20:36 -0700305static void netem_reset(struct Qdisc *sch)
306{
307 struct netem_sched_data *q = qdisc_priv(sch);
308
309 qdisc_reset(q->qdisc);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700310 sch->q.qlen = 0;
Patrick McHardy59cb5c62007-03-16 01:20:31 -0700311 qdisc_watchdog_cancel(&q->watchdog);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700312}
313
Stephen Hemminger300ce172005-10-30 13:47:34 -0800314/* Pass size change message down to embedded FIFO */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700315static int set_fifo_limit(struct Qdisc *q, int limit)
316{
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +0900317 struct rtattr *rta;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700318 int ret = -ENOMEM;
319
Stephen Hemminger300ce172005-10-30 13:47:34 -0800320 /* Hack to avoid sending change message to non-FIFO */
321 if (strncmp(q->ops->id + 1, "fifo", 4) != 0)
322 return 0;
323
Linus Torvalds1da177e2005-04-16 15:20:36 -0700324 rta = kmalloc(RTA_LENGTH(sizeof(struct tc_fifo_qopt)), GFP_KERNEL);
325 if (rta) {
326 rta->rta_type = RTM_NEWQDISC;
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +0900327 rta->rta_len = RTA_LENGTH(sizeof(struct tc_fifo_qopt));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700328 ((struct tc_fifo_qopt *)RTA_DATA(rta))->limit = limit;
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +0900329
Linus Torvalds1da177e2005-04-16 15:20:36 -0700330 ret = q->ops->change(q, rta);
331 kfree(rta);
332 }
333 return ret;
334}
335
336/*
337 * Distribution data is a variable size payload containing
338 * signed 16 bit values.
339 */
340static int get_dist_table(struct Qdisc *sch, const struct rtattr *attr)
341{
342 struct netem_sched_data *q = qdisc_priv(sch);
343 unsigned long n = RTA_PAYLOAD(attr)/sizeof(__s16);
344 const __s16 *data = RTA_DATA(attr);
345 struct disttable *d;
346 int i;
347
348 if (n > 65536)
349 return -EINVAL;
350
351 d = kmalloc(sizeof(*d) + n*sizeof(d->table[0]), GFP_KERNEL);
352 if (!d)
353 return -ENOMEM;
354
355 d->size = n;
356 for (i = 0; i < n; i++)
357 d->table[i] = data[i];
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +0900358
Linus Torvalds1da177e2005-04-16 15:20:36 -0700359 spin_lock_bh(&sch->dev->queue_lock);
360 d = xchg(&q->delay_dist, d);
361 spin_unlock_bh(&sch->dev->queue_lock);
362
363 kfree(d);
364 return 0;
365}
366
367static int get_correlation(struct Qdisc *sch, const struct rtattr *attr)
368{
369 struct netem_sched_data *q = qdisc_priv(sch);
370 const struct tc_netem_corr *c = RTA_DATA(attr);
371
372 if (RTA_PAYLOAD(attr) != sizeof(*c))
373 return -EINVAL;
374
375 init_crandom(&q->delay_cor, c->delay_corr);
376 init_crandom(&q->loss_cor, c->loss_corr);
377 init_crandom(&q->dup_cor, c->dup_corr);
378 return 0;
379}
380
Stephen Hemminger0dca51d2005-05-26 12:55:48 -0700381static int get_reorder(struct Qdisc *sch, const struct rtattr *attr)
382{
383 struct netem_sched_data *q = qdisc_priv(sch);
384 const struct tc_netem_reorder *r = RTA_DATA(attr);
385
386 if (RTA_PAYLOAD(attr) != sizeof(*r))
387 return -EINVAL;
388
389 q->reorder = r->probability;
390 init_crandom(&q->reorder_cor, r->correlation);
391 return 0;
392}
393
Stephen Hemmingerc865e5d2005-12-21 19:03:44 -0800394static int get_corrupt(struct Qdisc *sch, const struct rtattr *attr)
395{
396 struct netem_sched_data *q = qdisc_priv(sch);
397 const struct tc_netem_corrupt *r = RTA_DATA(attr);
398
399 if (RTA_PAYLOAD(attr) != sizeof(*r))
400 return -EINVAL;
401
402 q->corrupt = r->probability;
403 init_crandom(&q->corrupt_cor, r->correlation);
404 return 0;
405}
406
407/* Parse netlink message to set options */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700408static int netem_change(struct Qdisc *sch, struct rtattr *opt)
409{
410 struct netem_sched_data *q = qdisc_priv(sch);
411 struct tc_netem_qopt *qopt;
412 int ret;
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +0900413
Linus Torvalds1da177e2005-04-16 15:20:36 -0700414 if (opt == NULL || RTA_PAYLOAD(opt) < sizeof(*qopt))
415 return -EINVAL;
416
417 qopt = RTA_DATA(opt);
418 ret = set_fifo_limit(q->qdisc, qopt->limit);
419 if (ret) {
420 pr_debug("netem: can't set fifo limit\n");
421 return ret;
422 }
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +0900423
Linus Torvalds1da177e2005-04-16 15:20:36 -0700424 q->latency = qopt->latency;
425 q->jitter = qopt->jitter;
426 q->limit = qopt->limit;
427 q->gap = qopt->gap;
Stephen Hemminger0dca51d2005-05-26 12:55:48 -0700428 q->counter = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700429 q->loss = qopt->loss;
430 q->duplicate = qopt->duplicate;
431
Stephen Hemminger0dca51d2005-05-26 12:55:48 -0700432 /* for compatiablity with earlier versions.
433 * if gap is set, need to assume 100% probablity
434 */
Stephen Hemmingera362e0a2007-03-22 12:15:45 -0700435 if (q->gap)
436 q->reorder = ~0;
Stephen Hemminger0dca51d2005-05-26 12:55:48 -0700437
Linus Torvalds1da177e2005-04-16 15:20:36 -0700438 /* Handle nested options after initial queue options.
439 * Should have put all options in nested format but too late now.
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +0900440 */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700441 if (RTA_PAYLOAD(opt) > sizeof(*qopt)) {
442 struct rtattr *tb[TCA_NETEM_MAX];
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +0900443 if (rtattr_parse(tb, TCA_NETEM_MAX,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700444 RTA_DATA(opt) + sizeof(*qopt),
445 RTA_PAYLOAD(opt) - sizeof(*qopt)))
446 return -EINVAL;
447
448 if (tb[TCA_NETEM_CORR-1]) {
449 ret = get_correlation(sch, tb[TCA_NETEM_CORR-1]);
450 if (ret)
451 return ret;
452 }
453
454 if (tb[TCA_NETEM_DELAY_DIST-1]) {
455 ret = get_dist_table(sch, tb[TCA_NETEM_DELAY_DIST-1]);
456 if (ret)
457 return ret;
458 }
Stephen Hemmingerc865e5d2005-12-21 19:03:44 -0800459
Stephen Hemminger0dca51d2005-05-26 12:55:48 -0700460 if (tb[TCA_NETEM_REORDER-1]) {
461 ret = get_reorder(sch, tb[TCA_NETEM_REORDER-1]);
462 if (ret)
463 return ret;
464 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700465
Stephen Hemmingerc865e5d2005-12-21 19:03:44 -0800466 if (tb[TCA_NETEM_CORRUPT-1]) {
467 ret = get_corrupt(sch, tb[TCA_NETEM_CORRUPT-1]);
468 if (ret)
469 return ret;
470 }
471 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700472
473 return 0;
474}
475
Stephen Hemminger300ce172005-10-30 13:47:34 -0800476/*
477 * Special case version of FIFO queue for use by netem.
478 * It queues in order based on timestamps in skb's
479 */
480struct fifo_sched_data {
481 u32 limit;
482};
483
484static int tfifo_enqueue(struct sk_buff *nskb, struct Qdisc *sch)
485{
486 struct fifo_sched_data *q = qdisc_priv(sch);
487 struct sk_buff_head *list = &sch->q;
488 const struct netem_skb_cb *ncb
489 = (const struct netem_skb_cb *)nskb->cb;
490 struct sk_buff *skb;
491
492 if (likely(skb_queue_len(list) < q->limit)) {
493 skb_queue_reverse_walk(list, skb) {
494 const struct netem_skb_cb *cb
495 = (const struct netem_skb_cb *)skb->cb;
496
Andrea Bittauaa875162005-11-20 13:41:05 -0800497 if (!PSCHED_TLESS(ncb->time_to_send, cb->time_to_send))
Stephen Hemminger300ce172005-10-30 13:47:34 -0800498 break;
499 }
500
501 __skb_queue_after(list, skb, nskb);
502
503 sch->qstats.backlog += nskb->len;
504 sch->bstats.bytes += nskb->len;
505 sch->bstats.packets++;
506
507 return NET_XMIT_SUCCESS;
508 }
509
510 return qdisc_drop(nskb, sch);
511}
512
513static int tfifo_init(struct Qdisc *sch, struct rtattr *opt)
514{
515 struct fifo_sched_data *q = qdisc_priv(sch);
516
517 if (opt) {
518 struct tc_fifo_qopt *ctl = RTA_DATA(opt);
519 if (RTA_PAYLOAD(opt) < sizeof(*ctl))
520 return -EINVAL;
521
522 q->limit = ctl->limit;
523 } else
524 q->limit = max_t(u32, sch->dev->tx_queue_len, 1);
525
526 return 0;
527}
528
529static int tfifo_dump(struct Qdisc *sch, struct sk_buff *skb)
530{
531 struct fifo_sched_data *q = qdisc_priv(sch);
532 struct tc_fifo_qopt opt = { .limit = q->limit };
533
534 RTA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt);
535 return skb->len;
536
537rtattr_failure:
538 return -1;
539}
540
541static struct Qdisc_ops tfifo_qdisc_ops = {
542 .id = "tfifo",
543 .priv_size = sizeof(struct fifo_sched_data),
544 .enqueue = tfifo_enqueue,
545 .dequeue = qdisc_dequeue_head,
546 .requeue = qdisc_requeue,
547 .drop = qdisc_queue_drop,
548 .init = tfifo_init,
549 .reset = qdisc_reset_queue,
550 .change = tfifo_init,
551 .dump = tfifo_dump,
552};
553
Linus Torvalds1da177e2005-04-16 15:20:36 -0700554static int netem_init(struct Qdisc *sch, struct rtattr *opt)
555{
556 struct netem_sched_data *q = qdisc_priv(sch);
557 int ret;
558
559 if (!opt)
560 return -EINVAL;
561
Patrick McHardy59cb5c62007-03-16 01:20:31 -0700562 qdisc_watchdog_init(&q->watchdog, sch);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700563
Patrick McHardy9f9afec2006-11-29 17:35:18 -0800564 q->qdisc = qdisc_create_dflt(sch->dev, &tfifo_qdisc_ops,
565 TC_H_MAKE(sch->handle, 1));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700566 if (!q->qdisc) {
567 pr_debug("netem: qdisc create failed\n");
568 return -ENOMEM;
569 }
570
571 ret = netem_change(sch, opt);
572 if (ret) {
573 pr_debug("netem: change failed\n");
574 qdisc_destroy(q->qdisc);
575 }
576 return ret;
577}
578
579static void netem_destroy(struct Qdisc *sch)
580{
581 struct netem_sched_data *q = qdisc_priv(sch);
582
Patrick McHardy59cb5c62007-03-16 01:20:31 -0700583 qdisc_watchdog_cancel(&q->watchdog);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700584 qdisc_destroy(q->qdisc);
585 kfree(q->delay_dist);
586}
587
588static int netem_dump(struct Qdisc *sch, struct sk_buff *skb)
589{
590 const struct netem_sched_data *q = qdisc_priv(sch);
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -0700591 unsigned char *b = skb_tail_pointer(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700592 struct rtattr *rta = (struct rtattr *) b;
593 struct tc_netem_qopt qopt;
594 struct tc_netem_corr cor;
Stephen Hemminger0dca51d2005-05-26 12:55:48 -0700595 struct tc_netem_reorder reorder;
Stephen Hemmingerc865e5d2005-12-21 19:03:44 -0800596 struct tc_netem_corrupt corrupt;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700597
598 qopt.latency = q->latency;
599 qopt.jitter = q->jitter;
600 qopt.limit = q->limit;
601 qopt.loss = q->loss;
602 qopt.gap = q->gap;
603 qopt.duplicate = q->duplicate;
604 RTA_PUT(skb, TCA_OPTIONS, sizeof(qopt), &qopt);
605
606 cor.delay_corr = q->delay_cor.rho;
607 cor.loss_corr = q->loss_cor.rho;
608 cor.dup_corr = q->dup_cor.rho;
609 RTA_PUT(skb, TCA_NETEM_CORR, sizeof(cor), &cor);
Stephen Hemminger0dca51d2005-05-26 12:55:48 -0700610
611 reorder.probability = q->reorder;
612 reorder.correlation = q->reorder_cor.rho;
613 RTA_PUT(skb, TCA_NETEM_REORDER, sizeof(reorder), &reorder);
614
Stephen Hemmingerc865e5d2005-12-21 19:03:44 -0800615 corrupt.probability = q->corrupt;
616 corrupt.correlation = q->corrupt_cor.rho;
617 RTA_PUT(skb, TCA_NETEM_CORRUPT, sizeof(corrupt), &corrupt);
618
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -0700619 rta->rta_len = skb_tail_pointer(skb) - b;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700620
621 return skb->len;
622
623rtattr_failure:
Arnaldo Carvalho de Melodc5fc572007-03-25 23:06:12 -0700624 nlmsg_trim(skb, b);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700625 return -1;
626}
627
628static int netem_dump_class(struct Qdisc *sch, unsigned long cl,
629 struct sk_buff *skb, struct tcmsg *tcm)
630{
631 struct netem_sched_data *q = qdisc_priv(sch);
632
633 if (cl != 1) /* only one class */
634 return -ENOENT;
635
636 tcm->tcm_handle |= TC_H_MIN(1);
637 tcm->tcm_info = q->qdisc->handle;
638
639 return 0;
640}
641
642static int netem_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
643 struct Qdisc **old)
644{
645 struct netem_sched_data *q = qdisc_priv(sch);
646
647 if (new == NULL)
648 new = &noop_qdisc;
649
650 sch_tree_lock(sch);
651 *old = xchg(&q->qdisc, new);
Patrick McHardy5e50da02006-11-29 17:36:20 -0800652 qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700653 qdisc_reset(*old);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700654 sch_tree_unlock(sch);
655
656 return 0;
657}
658
659static struct Qdisc *netem_leaf(struct Qdisc *sch, unsigned long arg)
660{
661 struct netem_sched_data *q = qdisc_priv(sch);
662 return q->qdisc;
663}
664
665static unsigned long netem_get(struct Qdisc *sch, u32 classid)
666{
667 return 1;
668}
669
670static void netem_put(struct Qdisc *sch, unsigned long arg)
671{
672}
673
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +0900674static int netem_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700675 struct rtattr **tca, unsigned long *arg)
676{
677 return -ENOSYS;
678}
679
680static int netem_delete(struct Qdisc *sch, unsigned long arg)
681{
682 return -ENOSYS;
683}
684
685static void netem_walk(struct Qdisc *sch, struct qdisc_walker *walker)
686{
687 if (!walker->stop) {
688 if (walker->count >= walker->skip)
689 if (walker->fn(sch, 1, walker) < 0) {
690 walker->stop = 1;
691 return;
692 }
693 walker->count++;
694 }
695}
696
697static struct tcf_proto **netem_find_tcf(struct Qdisc *sch, unsigned long cl)
698{
699 return NULL;
700}
701
702static struct Qdisc_class_ops netem_class_ops = {
703 .graft = netem_graft,
704 .leaf = netem_leaf,
705 .get = netem_get,
706 .put = netem_put,
707 .change = netem_change_class,
708 .delete = netem_delete,
709 .walk = netem_walk,
710 .tcf_chain = netem_find_tcf,
711 .dump = netem_dump_class,
712};
713
714static struct Qdisc_ops netem_qdisc_ops = {
715 .id = "netem",
716 .cl_ops = &netem_class_ops,
717 .priv_size = sizeof(struct netem_sched_data),
718 .enqueue = netem_enqueue,
719 .dequeue = netem_dequeue,
720 .requeue = netem_requeue,
721 .drop = netem_drop,
722 .init = netem_init,
723 .reset = netem_reset,
724 .destroy = netem_destroy,
725 .change = netem_change,
726 .dump = netem_dump,
727 .owner = THIS_MODULE,
728};
729
730
731static int __init netem_module_init(void)
732{
Stephen Hemmingereb229c42005-11-03 13:49:01 -0800733 pr_info("netem: version " VERSION "\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -0700734 return register_qdisc(&netem_qdisc_ops);
735}
736static void __exit netem_module_exit(void)
737{
738 unregister_qdisc(&netem_qdisc_ops);
739}
740module_init(netem_module_init)
741module_exit(netem_module_exit)
742MODULE_LICENSE("GPL");