blob: e85352b5c88d107d0fb5721c6da3683984c6dc54 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * net/sched/sch_sfq.c Stochastic Fairness Queueing discipline.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 *
9 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
10 */
11
Linus Torvalds1da177e2005-04-16 15:20:36 -070012#include <linux/module.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070013#include <linux/types.h>
14#include <linux/kernel.h>
15#include <linux/jiffies.h>
16#include <linux/string.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070017#include <linux/in.h>
18#include <linux/errno.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070019#include <linux/init.h>
Patrick McHardy0ba48052007-07-02 22:49:07 -070020#include <linux/ipv6.h>
21#include <linux/skbuff.h>
Alexey Kuznetsov32740dd2007-09-30 17:51:33 -070022#include <linux/jhash.h>
Tejun Heo5a0e3ad2010-03-24 17:04:11 +090023#include <linux/slab.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070024#include <net/ip.h>
Arnaldo Carvalho de Melodc5fc572007-03-25 23:06:12 -070025#include <net/netlink.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070026#include <net/pkt_sched.h>
27
28
29/* Stochastic Fairness Queuing algorithm.
30 =======================================
31
32 Source:
33 Paul E. McKenney "Stochastic Fairness Queuing",
34 IEEE INFOCOMM'90 Proceedings, San Francisco, 1990.
35
36 Paul E. McKenney "Stochastic Fairness Queuing",
37 "Interworking: Research and Experience", v.2, 1991, p.113-131.
38
39
40 See also:
41 M. Shreedhar and George Varghese "Efficient Fair
42 Queuing using Deficit Round Robin", Proc. SIGCOMM 95.
43
44
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +090045 This is not the thing that is usually called (W)FQ nowadays.
Linus Torvalds1da177e2005-04-16 15:20:36 -070046 It does not use any timestamp mechanism, but instead
47 processes queues in round-robin order.
48
49 ADVANTAGE:
50
51 - It is very cheap. Both CPU and memory requirements are minimal.
52
53 DRAWBACKS:
54
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +090055 - "Stochastic" -> It is not 100% fair.
Linus Torvalds1da177e2005-04-16 15:20:36 -070056 When hash collisions occur, several flows are considered as one.
57
58 - "Round-robin" -> It introduces larger delays than virtual clock
59 based schemes, and should not be used for isolating interactive
60 traffic from non-interactive. It means, that this scheduler
61 should be used as leaf of CBQ or P3, which put interactive traffic
62 to higher priority band.
63
64 We still need true WFQ for top level CSZ, but using WFQ
65 for the best effort traffic is absolutely pointless:
66 SFQ is superior for this purpose.
67
68 IMPLEMENTATION:
69 This implementation limits maximal queue length to 128;
70 maximal mtu to 2^15-1; number of hash buckets to 1024.
71 The only goal of this restrictions was that all data
72 fit into one 4K page :-). Struct sfq_sched_data is
73 organized in anti-cache manner: all the data for a bucket
74 are scattered over different locations. This is not good,
75 but it allowed me to put it into 4K.
76
77 It is easy to increase these values, but not in flight. */
78
79#define SFQ_DEPTH 128
80#define SFQ_HASH_DIVISOR 1024
81
82/* This type should contain at least SFQ_DEPTH*2 values */
83typedef unsigned char sfq_index;
84
85struct sfq_head
86{
87 sfq_index next;
88 sfq_index prev;
89};
90
91struct sfq_sched_data
92{
93/* Parameters */
94 int perturb_period;
95 unsigned quantum; /* Allotment per round: MUST BE >= MTU */
96 int limit;
97
98/* Variables */
Patrick McHardy7d2681a2008-01-31 18:36:52 -080099 struct tcf_proto *filter_list;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700100 struct timer_list perturb_timer;
Alexey Kuznetsov32740dd2007-09-30 17:51:33 -0700101 u32 perturbation;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700102 sfq_index tail; /* Index of current slot in round */
103 sfq_index max_depth; /* Maximal depth */
104
105 sfq_index ht[SFQ_HASH_DIVISOR]; /* Hash table */
106 sfq_index next[SFQ_DEPTH]; /* Active slots link */
107 short allot[SFQ_DEPTH]; /* Current allotment per slot */
108 unsigned short hash[SFQ_DEPTH]; /* Hash value indexed by slots */
109 struct sk_buff_head qs[SFQ_DEPTH]; /* Slot queue */
110 struct sfq_head dep[SFQ_DEPTH*2]; /* Linked list of slots, indexed by depth */
111};
112
113static __inline__ unsigned sfq_fold_hash(struct sfq_sched_data *q, u32 h, u32 h1)
114{
Alexey Kuznetsov32740dd2007-09-30 17:51:33 -0700115 return jhash_2words(h, h1, q->perturbation) & (SFQ_HASH_DIVISOR - 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700116}
117
118static unsigned sfq_hash(struct sfq_sched_data *q, struct sk_buff *skb)
119{
120 u32 h, h2;
121
122 switch (skb->protocol) {
Arnaldo Carvalho de Melo60678042008-09-20 22:20:49 -0700123 case htons(ETH_P_IP):
Linus Torvalds1da177e2005-04-16 15:20:36 -0700124 {
Changli Gaof2f00982010-08-04 04:58:59 +0000125 const struct iphdr *iph;
126
127 if (!pskb_network_may_pull(skb, sizeof(*iph)))
128 goto err;
129 iph = ip_hdr(skb);
Eric Dumazet0eae88f2010-04-20 19:06:52 -0700130 h = (__force u32)iph->daddr;
131 h2 = (__force u32)iph->saddr ^ iph->protocol;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700132 if (!(iph->frag_off&htons(IP_MF|IP_OFFSET)) &&
133 (iph->protocol == IPPROTO_TCP ||
134 iph->protocol == IPPROTO_UDP ||
Patrick McHardya8d0f952007-02-07 15:07:43 -0800135 iph->protocol == IPPROTO_UDPLITE ||
Patrick McHardyae82af52006-01-17 13:01:06 -0800136 iph->protocol == IPPROTO_SCTP ||
137 iph->protocol == IPPROTO_DCCP ||
Changli Gaof2f00982010-08-04 04:58:59 +0000138 iph->protocol == IPPROTO_ESP) &&
139 pskb_network_may_pull(skb, iph->ihl * 4 + 4))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700140 h2 ^= *(((u32*)iph) + iph->ihl);
141 break;
142 }
Arnaldo Carvalho de Melo60678042008-09-20 22:20:49 -0700143 case htons(ETH_P_IPV6):
Linus Torvalds1da177e2005-04-16 15:20:36 -0700144 {
Changli Gaof2f00982010-08-04 04:58:59 +0000145 struct ipv6hdr *iph;
146
147 if (!pskb_network_may_pull(skb, sizeof(*iph)))
148 goto err;
149 iph = ipv6_hdr(skb);
Eric Dumazet0eae88f2010-04-20 19:06:52 -0700150 h = (__force u32)iph->daddr.s6_addr32[3];
151 h2 = (__force u32)iph->saddr.s6_addr32[3] ^ iph->nexthdr;
Changli Gaof2f00982010-08-04 04:58:59 +0000152 if ((iph->nexthdr == IPPROTO_TCP ||
153 iph->nexthdr == IPPROTO_UDP ||
154 iph->nexthdr == IPPROTO_UDPLITE ||
155 iph->nexthdr == IPPROTO_SCTP ||
156 iph->nexthdr == IPPROTO_DCCP ||
157 iph->nexthdr == IPPROTO_ESP) &&
158 pskb_network_may_pull(skb, sizeof(*iph) + 4))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700159 h2 ^= *(u32*)&iph[1];
160 break;
161 }
162 default:
Changli Gaof2f00982010-08-04 04:58:59 +0000163err:
Eric Dumazet0eae88f2010-04-20 19:06:52 -0700164 h = (unsigned long)skb_dst(skb) ^ (__force u32)skb->protocol;
Stephen Hemminger6f9e98f2008-01-20 17:20:56 -0800165 h2 = (unsigned long)skb->sk;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700166 }
Stephen Hemminger6f9e98f2008-01-20 17:20:56 -0800167
Linus Torvalds1da177e2005-04-16 15:20:36 -0700168 return sfq_fold_hash(q, h, h2);
169}
170
Patrick McHardy7d2681a2008-01-31 18:36:52 -0800171static unsigned int sfq_classify(struct sk_buff *skb, struct Qdisc *sch,
172 int *qerr)
173{
174 struct sfq_sched_data *q = qdisc_priv(sch);
175 struct tcf_result res;
176 int result;
177
178 if (TC_H_MAJ(skb->priority) == sch->handle &&
179 TC_H_MIN(skb->priority) > 0 &&
180 TC_H_MIN(skb->priority) <= SFQ_HASH_DIVISOR)
181 return TC_H_MIN(skb->priority);
182
183 if (!q->filter_list)
184 return sfq_hash(q, skb) + 1;
185
Jarek Poplawskic27f3392008-08-04 22:39:11 -0700186 *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
Patrick McHardy7d2681a2008-01-31 18:36:52 -0800187 result = tc_classify(skb, q->filter_list, &res);
188 if (result >= 0) {
189#ifdef CONFIG_NET_CLS_ACT
190 switch (result) {
191 case TC_ACT_STOLEN:
192 case TC_ACT_QUEUED:
Jarek Poplawski378a2f02008-08-04 22:31:03 -0700193 *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
Patrick McHardy7d2681a2008-01-31 18:36:52 -0800194 case TC_ACT_SHOT:
195 return 0;
196 }
197#endif
198 if (TC_H_MIN(res.classid) <= SFQ_HASH_DIVISOR)
199 return TC_H_MIN(res.classid);
200 }
201 return 0;
202}
203
Linus Torvalds1da177e2005-04-16 15:20:36 -0700204static inline void sfq_link(struct sfq_sched_data *q, sfq_index x)
205{
206 sfq_index p, n;
207 int d = q->qs[x].qlen + SFQ_DEPTH;
208
209 p = d;
210 n = q->dep[d].next;
211 q->dep[x].next = n;
212 q->dep[x].prev = p;
213 q->dep[p].next = q->dep[n].prev = x;
214}
215
216static inline void sfq_dec(struct sfq_sched_data *q, sfq_index x)
217{
218 sfq_index p, n;
219
220 n = q->dep[x].next;
221 p = q->dep[x].prev;
222 q->dep[p].next = n;
223 q->dep[n].prev = p;
224
225 if (n == p && q->max_depth == q->qs[x].qlen + 1)
226 q->max_depth--;
227
228 sfq_link(q, x);
229}
230
231static inline void sfq_inc(struct sfq_sched_data *q, sfq_index x)
232{
233 sfq_index p, n;
234 int d;
235
236 n = q->dep[x].next;
237 p = q->dep[x].prev;
238 q->dep[p].next = n;
239 q->dep[n].prev = p;
240 d = q->qs[x].qlen;
241 if (q->max_depth < d)
242 q->max_depth = d;
243
244 sfq_link(q, x);
245}
246
247static unsigned int sfq_drop(struct Qdisc *sch)
248{
249 struct sfq_sched_data *q = qdisc_priv(sch);
250 sfq_index d = q->max_depth;
251 struct sk_buff *skb;
252 unsigned int len;
253
254 /* Queue is full! Find the longest slot and
255 drop a packet from it */
256
257 if (d > 1) {
Stephen Hemminger6f9e98f2008-01-20 17:20:56 -0800258 sfq_index x = q->dep[d + SFQ_DEPTH].next;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700259 skb = q->qs[x].prev;
Jussi Kivilinna0abf77e2008-07-20 00:08:27 -0700260 len = qdisc_pkt_len(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700261 __skb_unlink(skb, &q->qs[x]);
262 kfree_skb(skb);
263 sfq_dec(q, x);
264 sch->q.qlen--;
265 sch->qstats.drops++;
Patrick McHardyf5539eb2006-03-20 19:01:38 -0800266 sch->qstats.backlog -= len;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700267 return len;
268 }
269
270 if (d == 1) {
271 /* It is difficult to believe, but ALL THE SLOTS HAVE LENGTH 1. */
272 d = q->next[q->tail];
273 q->next[q->tail] = q->next[d];
274 q->allot[q->next[d]] += q->quantum;
275 skb = q->qs[d].prev;
Jussi Kivilinna0abf77e2008-07-20 00:08:27 -0700276 len = qdisc_pkt_len(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700277 __skb_unlink(skb, &q->qs[d]);
278 kfree_skb(skb);
279 sfq_dec(q, d);
280 sch->q.qlen--;
281 q->ht[q->hash[d]] = SFQ_DEPTH;
282 sch->qstats.drops++;
Patrick McHardyf5539eb2006-03-20 19:01:38 -0800283 sch->qstats.backlog -= len;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700284 return len;
285 }
286
287 return 0;
288}
289
290static int
Stephen Hemminger6f9e98f2008-01-20 17:20:56 -0800291sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700292{
293 struct sfq_sched_data *q = qdisc_priv(sch);
Patrick McHardy7d2681a2008-01-31 18:36:52 -0800294 unsigned int hash;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700295 sfq_index x;
Jarek Poplawski7f3ff4f2008-12-21 20:14:48 -0800296 int uninitialized_var(ret);
Patrick McHardy7d2681a2008-01-31 18:36:52 -0800297
298 hash = sfq_classify(skb, sch, &ret);
299 if (hash == 0) {
Jarek Poplawskic27f3392008-08-04 22:39:11 -0700300 if (ret & __NET_XMIT_BYPASS)
Patrick McHardy7d2681a2008-01-31 18:36:52 -0800301 sch->qstats.drops++;
302 kfree_skb(skb);
303 return ret;
304 }
305 hash--;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700306
307 x = q->ht[hash];
308 if (x == SFQ_DEPTH) {
309 q->ht[hash] = x = q->dep[SFQ_DEPTH].next;
310 q->hash[x] = hash;
311 }
Stephen Hemminger6f9e98f2008-01-20 17:20:56 -0800312
Alexey Kuznetsov32740dd2007-09-30 17:51:33 -0700313 /* If selected queue has length q->limit, this means that
314 * all another queues are empty and that we do simple tail drop,
315 * i.e. drop _this_ packet.
316 */
317 if (q->qs[x].qlen >= q->limit)
318 return qdisc_drop(skb, sch);
319
Jussi Kivilinna0abf77e2008-07-20 00:08:27 -0700320 sch->qstats.backlog += qdisc_pkt_len(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700321 __skb_queue_tail(&q->qs[x], skb);
322 sfq_inc(q, x);
323 if (q->qs[x].qlen == 1) { /* The flow is new */
324 if (q->tail == SFQ_DEPTH) { /* It is the first flow */
325 q->tail = x;
326 q->next[x] = x;
327 q->allot[x] = q->quantum;
328 } else {
329 q->next[x] = q->next[q->tail];
330 q->next[q->tail] = x;
331 q->tail = x;
332 }
333 }
Alexey Kuznetsov5588b402007-09-19 10:42:03 -0700334 if (++sch->q.qlen <= q->limit) {
Jussi Kivilinna0abf77e2008-07-20 00:08:27 -0700335 sch->bstats.bytes += qdisc_pkt_len(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700336 sch->bstats.packets++;
337 return 0;
338 }
339
340 sfq_drop(sch);
341 return NET_XMIT_CN;
342}
343
Patrick McHardy48a8f512008-10-31 00:44:18 -0700344static struct sk_buff *
345sfq_peek(struct Qdisc *sch)
346{
347 struct sfq_sched_data *q = qdisc_priv(sch);
348 sfq_index a;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700349
Patrick McHardy48a8f512008-10-31 00:44:18 -0700350 /* No active slots */
351 if (q->tail == SFQ_DEPTH)
352 return NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700353
Patrick McHardy48a8f512008-10-31 00:44:18 -0700354 a = q->next[q->tail];
355 return skb_peek(&q->qs[a]);
356}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700357
358static struct sk_buff *
Stephen Hemminger6f9e98f2008-01-20 17:20:56 -0800359sfq_dequeue(struct Qdisc *sch)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700360{
361 struct sfq_sched_data *q = qdisc_priv(sch);
362 struct sk_buff *skb;
363 sfq_index a, old_a;
364
365 /* No active slots */
366 if (q->tail == SFQ_DEPTH)
367 return NULL;
368
369 a = old_a = q->next[q->tail];
370
371 /* Grab packet */
372 skb = __skb_dequeue(&q->qs[a]);
373 sfq_dec(q, a);
374 sch->q.qlen--;
Jussi Kivilinna0abf77e2008-07-20 00:08:27 -0700375 sch->qstats.backlog -= qdisc_pkt_len(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700376
377 /* Is the slot empty? */
378 if (q->qs[a].qlen == 0) {
379 q->ht[q->hash[a]] = SFQ_DEPTH;
380 a = q->next[a];
381 if (a == old_a) {
382 q->tail = SFQ_DEPTH;
383 return skb;
384 }
385 q->next[q->tail] = a;
386 q->allot[a] += q->quantum;
Jussi Kivilinna0abf77e2008-07-20 00:08:27 -0700387 } else if ((q->allot[a] -= qdisc_pkt_len(skb)) <= 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700388 q->tail = a;
389 a = q->next[a];
390 q->allot[a] += q->quantum;
391 }
392 return skb;
393}
394
395static void
Stephen Hemminger6f9e98f2008-01-20 17:20:56 -0800396sfq_reset(struct Qdisc *sch)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700397{
398 struct sk_buff *skb;
399
400 while ((skb = sfq_dequeue(sch)) != NULL)
401 kfree_skb(skb);
402}
403
404static void sfq_perturbation(unsigned long arg)
405{
Stephen Hemminger6f9e98f2008-01-20 17:20:56 -0800406 struct Qdisc *sch = (struct Qdisc *)arg;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700407 struct sfq_sched_data *q = qdisc_priv(sch);
408
Stephen Hemmingerd46f8dd2008-01-20 17:19:43 -0800409 q->perturbation = net_random();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700410
Alexey Kuznetsov32740dd2007-09-30 17:51:33 -0700411 if (q->perturb_period)
412 mod_timer(&q->perturb_timer, jiffies + q->perturb_period);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700413}
414
Patrick McHardy1e904742008-01-22 22:11:17 -0800415static int sfq_change(struct Qdisc *sch, struct nlattr *opt)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700416{
417 struct sfq_sched_data *q = qdisc_priv(sch);
Patrick McHardy1e904742008-01-22 22:11:17 -0800418 struct tc_sfq_qopt *ctl = nla_data(opt);
Patrick McHardy5e50da02006-11-29 17:36:20 -0800419 unsigned int qlen;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700420
Patrick McHardy1e904742008-01-22 22:11:17 -0800421 if (opt->nla_len < nla_attr_size(sizeof(*ctl)))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700422 return -EINVAL;
423
424 sch_tree_lock(sch);
David S. Miller5ce2d482008-07-08 17:06:30 -0700425 q->quantum = ctl->quantum ? : psched_mtu(qdisc_dev(sch));
Stephen Hemminger6f9e98f2008-01-20 17:20:56 -0800426 q->perturb_period = ctl->perturb_period * HZ;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700427 if (ctl->limit)
Alexey Kuznetsov32740dd2007-09-30 17:51:33 -0700428 q->limit = min_t(u32, ctl->limit, SFQ_DEPTH - 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700429
Patrick McHardy5e50da02006-11-29 17:36:20 -0800430 qlen = sch->q.qlen;
Alexey Kuznetsov5588b402007-09-19 10:42:03 -0700431 while (sch->q.qlen > q->limit)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700432 sfq_drop(sch);
Patrick McHardy5e50da02006-11-29 17:36:20 -0800433 qdisc_tree_decrease_qlen(sch, qlen - sch->q.qlen);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700434
435 del_timer(&q->perturb_timer);
436 if (q->perturb_period) {
Alexey Kuznetsov32740dd2007-09-30 17:51:33 -0700437 mod_timer(&q->perturb_timer, jiffies + q->perturb_period);
Stephen Hemmingerd46f8dd2008-01-20 17:19:43 -0800438 q->perturbation = net_random();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700439 }
440 sch_tree_unlock(sch);
441 return 0;
442}
443
Patrick McHardy1e904742008-01-22 22:11:17 -0800444static int sfq_init(struct Qdisc *sch, struct nlattr *opt)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700445{
446 struct sfq_sched_data *q = qdisc_priv(sch);
447 int i;
448
Stephen Hemmingerd3e99482008-01-20 17:18:45 -0800449 q->perturb_timer.function = sfq_perturbation;
Fernando Carrijoc19a28e2009-01-07 18:09:08 -0800450 q->perturb_timer.data = (unsigned long)sch;
Stephen Hemmingerd3e99482008-01-20 17:18:45 -0800451 init_timer_deferrable(&q->perturb_timer);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700452
Stephen Hemminger6f9e98f2008-01-20 17:20:56 -0800453 for (i = 0; i < SFQ_HASH_DIVISOR; i++)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700454 q->ht[i] = SFQ_DEPTH;
Stephen Hemminger6f9e98f2008-01-20 17:20:56 -0800455
456 for (i = 0; i < SFQ_DEPTH; i++) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700457 skb_queue_head_init(&q->qs[i]);
Stephen Hemminger6f9e98f2008-01-20 17:20:56 -0800458 q->dep[i + SFQ_DEPTH].next = i + SFQ_DEPTH;
459 q->dep[i + SFQ_DEPTH].prev = i + SFQ_DEPTH;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700460 }
Stephen Hemminger6f9e98f2008-01-20 17:20:56 -0800461
Alexey Kuznetsov32740dd2007-09-30 17:51:33 -0700462 q->limit = SFQ_DEPTH - 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700463 q->max_depth = 0;
464 q->tail = SFQ_DEPTH;
465 if (opt == NULL) {
David S. Miller5ce2d482008-07-08 17:06:30 -0700466 q->quantum = psched_mtu(qdisc_dev(sch));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700467 q->perturb_period = 0;
Stephen Hemmingerd46f8dd2008-01-20 17:19:43 -0800468 q->perturbation = net_random();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700469 } else {
470 int err = sfq_change(sch, opt);
471 if (err)
472 return err;
473 }
Stephen Hemminger6f9e98f2008-01-20 17:20:56 -0800474
475 for (i = 0; i < SFQ_DEPTH; i++)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700476 sfq_link(q, i);
477 return 0;
478}
479
480static void sfq_destroy(struct Qdisc *sch)
481{
482 struct sfq_sched_data *q = qdisc_priv(sch);
Patrick McHardy7d2681a2008-01-31 18:36:52 -0800483
Patrick McHardyff31ab52008-07-01 19:52:38 -0700484 tcf_destroy_chain(&q->filter_list);
Jarek Poplawski980c4782008-04-29 03:29:03 -0700485 q->perturb_period = 0;
486 del_timer_sync(&q->perturb_timer);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700487}
488
489static int sfq_dump(struct Qdisc *sch, struct sk_buff *skb)
490{
491 struct sfq_sched_data *q = qdisc_priv(sch);
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -0700492 unsigned char *b = skb_tail_pointer(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700493 struct tc_sfq_qopt opt;
494
495 opt.quantum = q->quantum;
Stephen Hemminger6f9e98f2008-01-20 17:20:56 -0800496 opt.perturb_period = q->perturb_period / HZ;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700497
498 opt.limit = q->limit;
499 opt.divisor = SFQ_HASH_DIVISOR;
David S. Millercdec7e52008-07-26 02:28:09 -0700500 opt.flows = q->limit;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700501
Patrick McHardy1e904742008-01-22 22:11:17 -0800502 NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700503
504 return skb->len;
505
Patrick McHardy1e904742008-01-22 22:11:17 -0800506nla_put_failure:
Arnaldo Carvalho de Melodc5fc572007-03-25 23:06:12 -0700507 nlmsg_trim(skb, b);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700508 return -1;
509}
510
Patrick McHardy7d2681a2008-01-31 18:36:52 -0800511static unsigned long sfq_get(struct Qdisc *sch, u32 classid)
512{
513 return 0;
514}
515
516static struct tcf_proto **sfq_find_tcf(struct Qdisc *sch, unsigned long cl)
517{
518 struct sfq_sched_data *q = qdisc_priv(sch);
519
520 if (cl)
521 return NULL;
522 return &q->filter_list;
523}
524
Patrick McHardy94de78d2008-01-31 18:37:16 -0800525static int sfq_dump_class(struct Qdisc *sch, unsigned long cl,
526 struct sk_buff *skb, struct tcmsg *tcm)
527{
528 tcm->tcm_handle |= TC_H_MIN(cl);
529 return 0;
530}
531
532static int sfq_dump_class_stats(struct Qdisc *sch, unsigned long cl,
533 struct gnet_dump *d)
534{
535 struct sfq_sched_data *q = qdisc_priv(sch);
536 sfq_index idx = q->ht[cl-1];
537 struct gnet_stats_queue qs = { .qlen = q->qs[idx].qlen };
538 struct tc_sfq_xstats xstats = { .allot = q->allot[idx] };
539
540 if (gnet_stats_copy_queue(d, &qs) < 0)
541 return -1;
542 return gnet_stats_copy_app(d, &xstats, sizeof(xstats));
543}
544
Patrick McHardy7d2681a2008-01-31 18:36:52 -0800545static void sfq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
546{
Patrick McHardy94de78d2008-01-31 18:37:16 -0800547 struct sfq_sched_data *q = qdisc_priv(sch);
548 unsigned int i;
549
550 if (arg->stop)
551 return;
552
553 for (i = 0; i < SFQ_HASH_DIVISOR; i++) {
554 if (q->ht[i] == SFQ_DEPTH ||
555 arg->count < arg->skip) {
556 arg->count++;
557 continue;
558 }
559 if (arg->fn(sch, i + 1, arg) < 0) {
560 arg->stop = 1;
561 break;
562 }
563 arg->count++;
564 }
Patrick McHardy7d2681a2008-01-31 18:36:52 -0800565}
566
567static const struct Qdisc_class_ops sfq_class_ops = {
568 .get = sfq_get,
Patrick McHardy7d2681a2008-01-31 18:36:52 -0800569 .tcf_chain = sfq_find_tcf,
Patrick McHardy94de78d2008-01-31 18:37:16 -0800570 .dump = sfq_dump_class,
571 .dump_stats = sfq_dump_class_stats,
Patrick McHardy7d2681a2008-01-31 18:36:52 -0800572 .walk = sfq_walk,
573};
574
Eric Dumazet20fea082007-11-14 01:44:41 -0800575static struct Qdisc_ops sfq_qdisc_ops __read_mostly = {
Patrick McHardy7d2681a2008-01-31 18:36:52 -0800576 .cl_ops = &sfq_class_ops,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700577 .id = "sfq",
578 .priv_size = sizeof(struct sfq_sched_data),
579 .enqueue = sfq_enqueue,
580 .dequeue = sfq_dequeue,
Patrick McHardy48a8f512008-10-31 00:44:18 -0700581 .peek = sfq_peek,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700582 .drop = sfq_drop,
583 .init = sfq_init,
584 .reset = sfq_reset,
585 .destroy = sfq_destroy,
586 .change = NULL,
587 .dump = sfq_dump,
588 .owner = THIS_MODULE,
589};
590
591static int __init sfq_module_init(void)
592{
593 return register_qdisc(&sfq_qdisc_ops);
594}
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +0900595static void __exit sfq_module_exit(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700596{
597 unregister_qdisc(&sfq_qdisc_ops);
598}
599module_init(sfq_module_init)
600module_exit(sfq_module_exit)
601MODULE_LICENSE("GPL");