blob: 6a2f88fea6d8f8fbb1b791c53a74287fd7680727 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * net/sched/sch_sfq.c Stochastic Fairness Queueing discipline.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 *
9 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
10 */
11
Linus Torvalds1da177e2005-04-16 15:20:36 -070012#include <linux/module.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070013#include <linux/types.h>
14#include <linux/kernel.h>
15#include <linux/jiffies.h>
16#include <linux/string.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070017#include <linux/in.h>
18#include <linux/errno.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070019#include <linux/init.h>
Patrick McHardy0ba48052007-07-02 22:49:07 -070020#include <linux/ipv6.h>
21#include <linux/skbuff.h>
Alexey Kuznetsov32740dd2007-09-30 17:51:33 -070022#include <linux/jhash.h>
Tejun Heo5a0e3ad2010-03-24 17:04:11 +090023#include <linux/slab.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070024#include <net/ip.h>
Arnaldo Carvalho de Melodc5fc572007-03-25 23:06:12 -070025#include <net/netlink.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070026#include <net/pkt_sched.h>
27
28
29/* Stochastic Fairness Queuing algorithm.
30 =======================================
31
32 Source:
33 Paul E. McKenney "Stochastic Fairness Queuing",
34 IEEE INFOCOMM'90 Proceedings, San Francisco, 1990.
35
36 Paul E. McKenney "Stochastic Fairness Queuing",
37 "Interworking: Research and Experience", v.2, 1991, p.113-131.
38
39
40 See also:
41 M. Shreedhar and George Varghese "Efficient Fair
42 Queuing using Deficit Round Robin", Proc. SIGCOMM 95.
43
44
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +090045 This is not the thing that is usually called (W)FQ nowadays.
Linus Torvalds1da177e2005-04-16 15:20:36 -070046 It does not use any timestamp mechanism, but instead
47 processes queues in round-robin order.
48
49 ADVANTAGE:
50
51 - It is very cheap. Both CPU and memory requirements are minimal.
52
53 DRAWBACKS:
54
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +090055 - "Stochastic" -> It is not 100% fair.
Linus Torvalds1da177e2005-04-16 15:20:36 -070056 When hash collisions occur, several flows are considered as one.
57
58 - "Round-robin" -> It introduces larger delays than virtual clock
59 based schemes, and should not be used for isolating interactive
60 traffic from non-interactive. It means, that this scheduler
61 should be used as leaf of CBQ or P3, which put interactive traffic
62 to higher priority band.
63
64 We still need true WFQ for top level CSZ, but using WFQ
65 for the best effort traffic is absolutely pointless:
66 SFQ is superior for this purpose.
67
68 IMPLEMENTATION:
69 This implementation limits maximal queue length to 128;
Eric Dumazeteda83e32010-12-20 12:54:58 +000070 maximal mtu to 2^15-1; max 128 flows, number of hash buckets to 1024.
Linus Torvalds1da177e2005-04-16 15:20:36 -070071 The only goal of this restrictions was that all data
Eric Dumazeteda83e32010-12-20 12:54:58 +000072 fit into one 4K page on 32bit arches.
Linus Torvalds1da177e2005-04-16 15:20:36 -070073
74 It is easy to increase these values, but not in flight. */
75
Eric Dumazeteda83e32010-12-20 12:54:58 +000076#define SFQ_DEPTH 128 /* max number of packets per flow */
77#define SFQ_SLOTS 128 /* max number of flows */
78#define SFQ_EMPTY_SLOT 255
Linus Torvalds1da177e2005-04-16 15:20:36 -070079#define SFQ_HASH_DIVISOR 1024
80
Eric Dumazeteda83e32010-12-20 12:54:58 +000081/* This type should contain at least SFQ_DEPTH + SFQ_SLOTS values */
Linus Torvalds1da177e2005-04-16 15:20:36 -070082typedef unsigned char sfq_index;
83
Eric Dumazeteda83e32010-12-20 12:54:58 +000084/*
85 * We dont use pointers to save space.
86 * Small indexes [0 ... SFQ_SLOTS - 1] are 'pointers' to slots[] array
87 * while following values [SFQ_SLOTS ... SFQ_SLOTS + SFQ_DEPTH - 1]
88 * are 'pointers' to dep[] array
89 */
Linus Torvalds1da177e2005-04-16 15:20:36 -070090struct sfq_head
91{
92 sfq_index next;
93 sfq_index prev;
94};
95
Eric Dumazeteda83e32010-12-20 12:54:58 +000096struct sfq_slot {
97 struct sk_buff *skblist_next;
98 struct sk_buff *skblist_prev;
99 sfq_index qlen; /* number of skbs in skblist */
100 sfq_index next; /* next slot in sfq chain */
101 struct sfq_head dep; /* anchor in dep[] chains */
102 unsigned short hash; /* hash value (index in ht[]) */
103 short allot; /* credit for this slot */
104};
105
Linus Torvalds1da177e2005-04-16 15:20:36 -0700106struct sfq_sched_data
107{
108/* Parameters */
109 int perturb_period;
110 unsigned quantum; /* Allotment per round: MUST BE >= MTU */
111 int limit;
112
113/* Variables */
Patrick McHardy7d2681a2008-01-31 18:36:52 -0800114 struct tcf_proto *filter_list;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700115 struct timer_list perturb_timer;
Alexey Kuznetsov32740dd2007-09-30 17:51:33 -0700116 u32 perturbation;
Eric Dumazeteda83e32010-12-20 12:54:58 +0000117 sfq_index cur_depth; /* depth of longest slot */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700118
Eric Dumazeteda83e32010-12-20 12:54:58 +0000119 struct sfq_slot *tail; /* current slot in round */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700120 sfq_index ht[SFQ_HASH_DIVISOR]; /* Hash table */
Eric Dumazeteda83e32010-12-20 12:54:58 +0000121 struct sfq_slot slots[SFQ_SLOTS];
122 struct sfq_head dep[SFQ_DEPTH]; /* Linked list of slots, indexed by depth */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700123};
124
Eric Dumazeteda83e32010-12-20 12:54:58 +0000125/*
126 * sfq_head are either in a sfq_slot or in dep[] array
127 */
128static inline struct sfq_head *sfq_dep_head(struct sfq_sched_data *q, sfq_index val)
129{
130 if (val < SFQ_SLOTS)
131 return &q->slots[val].dep;
132 return &q->dep[val - SFQ_SLOTS];
133}
134
Linus Torvalds1da177e2005-04-16 15:20:36 -0700135static __inline__ unsigned sfq_fold_hash(struct sfq_sched_data *q, u32 h, u32 h1)
136{
Alexey Kuznetsov32740dd2007-09-30 17:51:33 -0700137 return jhash_2words(h, h1, q->perturbation) & (SFQ_HASH_DIVISOR - 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700138}
139
140static unsigned sfq_hash(struct sfq_sched_data *q, struct sk_buff *skb)
141{
142 u32 h, h2;
143
144 switch (skb->protocol) {
Arnaldo Carvalho de Melo60678042008-09-20 22:20:49 -0700145 case htons(ETH_P_IP):
Linus Torvalds1da177e2005-04-16 15:20:36 -0700146 {
Changli Gaof2f00982010-08-04 04:58:59 +0000147 const struct iphdr *iph;
Changli Gaob9959c22010-08-17 19:07:35 +0000148 int poff;
Changli Gaof2f00982010-08-04 04:58:59 +0000149
150 if (!pskb_network_may_pull(skb, sizeof(*iph)))
151 goto err;
152 iph = ip_hdr(skb);
Eric Dumazet0eae88f2010-04-20 19:06:52 -0700153 h = (__force u32)iph->daddr;
154 h2 = (__force u32)iph->saddr ^ iph->protocol;
Changli Gaob9959c22010-08-17 19:07:35 +0000155 if (iph->frag_off & htons(IP_MF|IP_OFFSET))
156 break;
157 poff = proto_ports_offset(iph->protocol);
158 if (poff >= 0 &&
159 pskb_network_may_pull(skb, iph->ihl * 4 + 4 + poff)) {
160 iph = ip_hdr(skb);
161 h2 ^= *(u32*)((void *)iph + iph->ihl * 4 + poff);
162 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700163 break;
164 }
Arnaldo Carvalho de Melo60678042008-09-20 22:20:49 -0700165 case htons(ETH_P_IPV6):
Linus Torvalds1da177e2005-04-16 15:20:36 -0700166 {
Changli Gaof2f00982010-08-04 04:58:59 +0000167 struct ipv6hdr *iph;
Changli Gaob9959c22010-08-17 19:07:35 +0000168 int poff;
Changli Gaof2f00982010-08-04 04:58:59 +0000169
170 if (!pskb_network_may_pull(skb, sizeof(*iph)))
171 goto err;
172 iph = ipv6_hdr(skb);
Eric Dumazet0eae88f2010-04-20 19:06:52 -0700173 h = (__force u32)iph->daddr.s6_addr32[3];
174 h2 = (__force u32)iph->saddr.s6_addr32[3] ^ iph->nexthdr;
Changli Gaob9959c22010-08-17 19:07:35 +0000175 poff = proto_ports_offset(iph->nexthdr);
176 if (poff >= 0 &&
177 pskb_network_may_pull(skb, sizeof(*iph) + 4 + poff)) {
178 iph = ipv6_hdr(skb);
179 h2 ^= *(u32*)((void *)iph + sizeof(*iph) + poff);
180 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700181 break;
182 }
183 default:
Changli Gaof2f00982010-08-04 04:58:59 +0000184err:
Eric Dumazet0eae88f2010-04-20 19:06:52 -0700185 h = (unsigned long)skb_dst(skb) ^ (__force u32)skb->protocol;
Stephen Hemminger6f9e98f2008-01-20 17:20:56 -0800186 h2 = (unsigned long)skb->sk;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700187 }
Stephen Hemminger6f9e98f2008-01-20 17:20:56 -0800188
Linus Torvalds1da177e2005-04-16 15:20:36 -0700189 return sfq_fold_hash(q, h, h2);
190}
191
Patrick McHardy7d2681a2008-01-31 18:36:52 -0800192static unsigned int sfq_classify(struct sk_buff *skb, struct Qdisc *sch,
193 int *qerr)
194{
195 struct sfq_sched_data *q = qdisc_priv(sch);
196 struct tcf_result res;
197 int result;
198
199 if (TC_H_MAJ(skb->priority) == sch->handle &&
200 TC_H_MIN(skb->priority) > 0 &&
201 TC_H_MIN(skb->priority) <= SFQ_HASH_DIVISOR)
202 return TC_H_MIN(skb->priority);
203
204 if (!q->filter_list)
205 return sfq_hash(q, skb) + 1;
206
Jarek Poplawskic27f3392008-08-04 22:39:11 -0700207 *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
Patrick McHardy7d2681a2008-01-31 18:36:52 -0800208 result = tc_classify(skb, q->filter_list, &res);
209 if (result >= 0) {
210#ifdef CONFIG_NET_CLS_ACT
211 switch (result) {
212 case TC_ACT_STOLEN:
213 case TC_ACT_QUEUED:
Jarek Poplawski378a2f02008-08-04 22:31:03 -0700214 *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
Patrick McHardy7d2681a2008-01-31 18:36:52 -0800215 case TC_ACT_SHOT:
216 return 0;
217 }
218#endif
219 if (TC_H_MIN(res.classid) <= SFQ_HASH_DIVISOR)
220 return TC_H_MIN(res.classid);
221 }
222 return 0;
223}
224
Eric Dumazeteda83e32010-12-20 12:54:58 +0000225/*
226 * x : slot number [0 .. SFQ_SLOTS - 1]
227 */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700228static inline void sfq_link(struct sfq_sched_data *q, sfq_index x)
229{
230 sfq_index p, n;
Eric Dumazeteda83e32010-12-20 12:54:58 +0000231 int qlen = q->slots[x].qlen;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700232
Eric Dumazeteda83e32010-12-20 12:54:58 +0000233 p = qlen + SFQ_SLOTS;
234 n = q->dep[qlen].next;
235
236 q->slots[x].dep.next = n;
237 q->slots[x].dep.prev = p;
238
239 q->dep[qlen].next = x; /* sfq_dep_head(q, p)->next = x */
240 sfq_dep_head(q, n)->prev = x;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700241}
242
Eric Dumazeteda83e32010-12-20 12:54:58 +0000243#define sfq_unlink(q, x, n, p) \
244 n = q->slots[x].dep.next; \
245 p = q->slots[x].dep.prev; \
246 sfq_dep_head(q, p)->next = n; \
247 sfq_dep_head(q, n)->prev = p
248
249
Linus Torvalds1da177e2005-04-16 15:20:36 -0700250static inline void sfq_dec(struct sfq_sched_data *q, sfq_index x)
251{
252 sfq_index p, n;
Eric Dumazeteda83e32010-12-20 12:54:58 +0000253 int d;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700254
Eric Dumazeteda83e32010-12-20 12:54:58 +0000255 sfq_unlink(q, x, n, p);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700256
Eric Dumazeteda83e32010-12-20 12:54:58 +0000257 d = q->slots[x].qlen--;
258 if (n == p && q->cur_depth == d)
259 q->cur_depth--;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700260 sfq_link(q, x);
261}
262
263static inline void sfq_inc(struct sfq_sched_data *q, sfq_index x)
264{
265 sfq_index p, n;
266 int d;
267
Eric Dumazeteda83e32010-12-20 12:54:58 +0000268 sfq_unlink(q, x, n, p);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700269
Eric Dumazeteda83e32010-12-20 12:54:58 +0000270 d = ++q->slots[x].qlen;
271 if (q->cur_depth < d)
272 q->cur_depth = d;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700273 sfq_link(q, x);
274}
275
Eric Dumazeteda83e32010-12-20 12:54:58 +0000276/* helper functions : might be changed when/if skb use a standard list_head */
277
278/* remove one skb from tail of slot queue */
279static inline struct sk_buff *slot_dequeue_tail(struct sfq_slot *slot)
280{
281 struct sk_buff *skb = slot->skblist_prev;
282
283 slot->skblist_prev = skb->prev;
Eric Dumazetee09b3c2010-12-22 11:39:59 -0800284 skb->prev->next = (struct sk_buff *)slot;
Eric Dumazeteda83e32010-12-20 12:54:58 +0000285 skb->next = skb->prev = NULL;
286 return skb;
287}
288
289/* remove one skb from head of slot queue */
290static inline struct sk_buff *slot_dequeue_head(struct sfq_slot *slot)
291{
292 struct sk_buff *skb = slot->skblist_next;
293
294 slot->skblist_next = skb->next;
295 skb->next = skb->prev = NULL;
296 return skb;
297}
298
299static inline void slot_queue_init(struct sfq_slot *slot)
300{
301 slot->skblist_prev = slot->skblist_next = (struct sk_buff *)slot;
302}
303
304/* add skb to slot queue (tail add) */
305static inline void slot_queue_add(struct sfq_slot *slot, struct sk_buff *skb)
306{
307 skb->prev = slot->skblist_prev;
308 skb->next = (struct sk_buff *)slot;
309 slot->skblist_prev->next = skb;
310 slot->skblist_prev = skb;
311}
312
313#define slot_queue_walk(slot, skb) \
314 for (skb = slot->skblist_next; \
315 skb != (struct sk_buff *)slot; \
316 skb = skb->next)
317
Linus Torvalds1da177e2005-04-16 15:20:36 -0700318static unsigned int sfq_drop(struct Qdisc *sch)
319{
320 struct sfq_sched_data *q = qdisc_priv(sch);
Eric Dumazeteda83e32010-12-20 12:54:58 +0000321 sfq_index x, d = q->cur_depth;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700322 struct sk_buff *skb;
323 unsigned int len;
Eric Dumazeteda83e32010-12-20 12:54:58 +0000324 struct sfq_slot *slot;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700325
Eric Dumazeteda83e32010-12-20 12:54:58 +0000326 /* Queue is full! Find the longest slot and drop tail packet from it */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700327 if (d > 1) {
Eric Dumazeteda83e32010-12-20 12:54:58 +0000328 x = q->dep[d].next;
329 slot = &q->slots[x];
330drop:
331 skb = slot_dequeue_tail(slot);
Jussi Kivilinna0abf77e2008-07-20 00:08:27 -0700332 len = qdisc_pkt_len(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700333 sfq_dec(q, x);
Eric Dumazeteda83e32010-12-20 12:54:58 +0000334 kfree_skb(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700335 sch->q.qlen--;
336 sch->qstats.drops++;
Patrick McHardyf5539eb2006-03-20 19:01:38 -0800337 sch->qstats.backlog -= len;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700338 return len;
339 }
340
341 if (d == 1) {
342 /* It is difficult to believe, but ALL THE SLOTS HAVE LENGTH 1. */
Eric Dumazeteda83e32010-12-20 12:54:58 +0000343 x = q->tail->next;
344 slot = &q->slots[x];
345 q->tail->next = slot->next;
346 q->ht[slot->hash] = SFQ_EMPTY_SLOT;
347 goto drop;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700348 }
349
350 return 0;
351}
352
353static int
Stephen Hemminger6f9e98f2008-01-20 17:20:56 -0800354sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700355{
356 struct sfq_sched_data *q = qdisc_priv(sch);
Patrick McHardy7d2681a2008-01-31 18:36:52 -0800357 unsigned int hash;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700358 sfq_index x;
Eric Dumazeteda83e32010-12-20 12:54:58 +0000359 struct sfq_slot *slot;
Jarek Poplawski7f3ff4f2008-12-21 20:14:48 -0800360 int uninitialized_var(ret);
Patrick McHardy7d2681a2008-01-31 18:36:52 -0800361
362 hash = sfq_classify(skb, sch, &ret);
363 if (hash == 0) {
Jarek Poplawskic27f3392008-08-04 22:39:11 -0700364 if (ret & __NET_XMIT_BYPASS)
Patrick McHardy7d2681a2008-01-31 18:36:52 -0800365 sch->qstats.drops++;
366 kfree_skb(skb);
367 return ret;
368 }
369 hash--;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700370
371 x = q->ht[hash];
Eric Dumazeteda83e32010-12-20 12:54:58 +0000372 slot = &q->slots[x];
373 if (x == SFQ_EMPTY_SLOT) {
374 x = q->dep[0].next; /* get a free slot */
375 q->ht[hash] = x;
376 slot = &q->slots[x];
377 slot->hash = hash;
378 slot_queue_init(slot);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700379 }
Stephen Hemminger6f9e98f2008-01-20 17:20:56 -0800380
Eric Dumazeteda83e32010-12-20 12:54:58 +0000381 /* If selected queue has length q->limit, do simple tail drop,
Alexey Kuznetsov32740dd2007-09-30 17:51:33 -0700382 * i.e. drop _this_ packet.
383 */
Eric Dumazeteda83e32010-12-20 12:54:58 +0000384 if (slot->qlen >= q->limit)
Alexey Kuznetsov32740dd2007-09-30 17:51:33 -0700385 return qdisc_drop(skb, sch);
386
Jussi Kivilinna0abf77e2008-07-20 00:08:27 -0700387 sch->qstats.backlog += qdisc_pkt_len(skb);
Eric Dumazeteda83e32010-12-20 12:54:58 +0000388 slot_queue_add(slot, skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700389 sfq_inc(q, x);
Eric Dumazeteda83e32010-12-20 12:54:58 +0000390 if (slot->qlen == 1) { /* The flow is new */
391 if (q->tail == NULL) { /* It is the first flow */
392 slot->next = x;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700393 } else {
Eric Dumazeteda83e32010-12-20 12:54:58 +0000394 slot->next = q->tail->next;
395 q->tail->next = x;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700396 }
Eric Dumazeteda83e32010-12-20 12:54:58 +0000397 q->tail = slot;
398 slot->allot = q->quantum;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700399 }
Alexey Kuznetsov5588b402007-09-19 10:42:03 -0700400 if (++sch->q.qlen <= q->limit) {
Jussi Kivilinna0abf77e2008-07-20 00:08:27 -0700401 sch->bstats.bytes += qdisc_pkt_len(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700402 sch->bstats.packets++;
Ben Greear9871e502010-08-10 01:45:40 -0700403 return NET_XMIT_SUCCESS;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700404 }
405
406 sfq_drop(sch);
407 return NET_XMIT_CN;
408}
409
Patrick McHardy48a8f512008-10-31 00:44:18 -0700410static struct sk_buff *
411sfq_peek(struct Qdisc *sch)
412{
413 struct sfq_sched_data *q = qdisc_priv(sch);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700414
Patrick McHardy48a8f512008-10-31 00:44:18 -0700415 /* No active slots */
Eric Dumazeteda83e32010-12-20 12:54:58 +0000416 if (q->tail == NULL)
Patrick McHardy48a8f512008-10-31 00:44:18 -0700417 return NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700418
Eric Dumazeteda83e32010-12-20 12:54:58 +0000419 return q->slots[q->tail->next].skblist_next;
Patrick McHardy48a8f512008-10-31 00:44:18 -0700420}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700421
422static struct sk_buff *
Stephen Hemminger6f9e98f2008-01-20 17:20:56 -0800423sfq_dequeue(struct Qdisc *sch)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700424{
425 struct sfq_sched_data *q = qdisc_priv(sch);
426 struct sk_buff *skb;
Eric Dumazetaa3e2192010-12-20 13:18:16 -0800427 sfq_index a, next_a;
Eric Dumazeteda83e32010-12-20 12:54:58 +0000428 struct sfq_slot *slot;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700429
430 /* No active slots */
Eric Dumazeteda83e32010-12-20 12:54:58 +0000431 if (q->tail == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700432 return NULL;
433
Eric Dumazeteda83e32010-12-20 12:54:58 +0000434 a = q->tail->next;
435 slot = &q->slots[a];
436 skb = slot_dequeue_head(slot);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700437 sfq_dec(q, a);
438 sch->q.qlen--;
Jussi Kivilinna0abf77e2008-07-20 00:08:27 -0700439 sch->qstats.backlog -= qdisc_pkt_len(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700440
441 /* Is the slot empty? */
Eric Dumazeteda83e32010-12-20 12:54:58 +0000442 if (slot->qlen == 0) {
443 q->ht[slot->hash] = SFQ_EMPTY_SLOT;
444 next_a = slot->next;
Eric Dumazetaa3e2192010-12-20 13:18:16 -0800445 if (a == next_a) {
Eric Dumazeteda83e32010-12-20 12:54:58 +0000446 q->tail = NULL; /* no more active slots */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700447 return skb;
448 }
Eric Dumazeteda83e32010-12-20 12:54:58 +0000449 q->tail->next = next_a;
450 } else if ((slot->allot -= qdisc_pkt_len(skb)) <= 0) {
451 q->tail = slot;
452 slot->allot += q->quantum;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700453 }
454 return skb;
455}
456
457static void
Stephen Hemminger6f9e98f2008-01-20 17:20:56 -0800458sfq_reset(struct Qdisc *sch)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700459{
460 struct sk_buff *skb;
461
462 while ((skb = sfq_dequeue(sch)) != NULL)
463 kfree_skb(skb);
464}
465
466static void sfq_perturbation(unsigned long arg)
467{
Stephen Hemminger6f9e98f2008-01-20 17:20:56 -0800468 struct Qdisc *sch = (struct Qdisc *)arg;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700469 struct sfq_sched_data *q = qdisc_priv(sch);
470
Stephen Hemmingerd46f8dd2008-01-20 17:19:43 -0800471 q->perturbation = net_random();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700472
Alexey Kuznetsov32740dd2007-09-30 17:51:33 -0700473 if (q->perturb_period)
474 mod_timer(&q->perturb_timer, jiffies + q->perturb_period);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700475}
476
Patrick McHardy1e904742008-01-22 22:11:17 -0800477static int sfq_change(struct Qdisc *sch, struct nlattr *opt)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700478{
479 struct sfq_sched_data *q = qdisc_priv(sch);
Patrick McHardy1e904742008-01-22 22:11:17 -0800480 struct tc_sfq_qopt *ctl = nla_data(opt);
Patrick McHardy5e50da02006-11-29 17:36:20 -0800481 unsigned int qlen;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700482
Patrick McHardy1e904742008-01-22 22:11:17 -0800483 if (opt->nla_len < nla_attr_size(sizeof(*ctl)))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700484 return -EINVAL;
485
486 sch_tree_lock(sch);
David S. Miller5ce2d482008-07-08 17:06:30 -0700487 q->quantum = ctl->quantum ? : psched_mtu(qdisc_dev(sch));
Stephen Hemminger6f9e98f2008-01-20 17:20:56 -0800488 q->perturb_period = ctl->perturb_period * HZ;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700489 if (ctl->limit)
Alexey Kuznetsov32740dd2007-09-30 17:51:33 -0700490 q->limit = min_t(u32, ctl->limit, SFQ_DEPTH - 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700491
Patrick McHardy5e50da02006-11-29 17:36:20 -0800492 qlen = sch->q.qlen;
Alexey Kuznetsov5588b402007-09-19 10:42:03 -0700493 while (sch->q.qlen > q->limit)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700494 sfq_drop(sch);
Patrick McHardy5e50da02006-11-29 17:36:20 -0800495 qdisc_tree_decrease_qlen(sch, qlen - sch->q.qlen);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700496
497 del_timer(&q->perturb_timer);
498 if (q->perturb_period) {
Alexey Kuznetsov32740dd2007-09-30 17:51:33 -0700499 mod_timer(&q->perturb_timer, jiffies + q->perturb_period);
Stephen Hemmingerd46f8dd2008-01-20 17:19:43 -0800500 q->perturbation = net_random();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700501 }
502 sch_tree_unlock(sch);
503 return 0;
504}
505
Patrick McHardy1e904742008-01-22 22:11:17 -0800506static int sfq_init(struct Qdisc *sch, struct nlattr *opt)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700507{
508 struct sfq_sched_data *q = qdisc_priv(sch);
509 int i;
510
Stephen Hemmingerd3e99482008-01-20 17:18:45 -0800511 q->perturb_timer.function = sfq_perturbation;
Fernando Carrijoc19a28e2009-01-07 18:09:08 -0800512 q->perturb_timer.data = (unsigned long)sch;
Stephen Hemmingerd3e99482008-01-20 17:18:45 -0800513 init_timer_deferrable(&q->perturb_timer);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700514
Stephen Hemminger6f9e98f2008-01-20 17:20:56 -0800515 for (i = 0; i < SFQ_HASH_DIVISOR; i++)
Eric Dumazeteda83e32010-12-20 12:54:58 +0000516 q->ht[i] = SFQ_EMPTY_SLOT;
Stephen Hemminger6f9e98f2008-01-20 17:20:56 -0800517
518 for (i = 0; i < SFQ_DEPTH; i++) {
Eric Dumazeteda83e32010-12-20 12:54:58 +0000519 q->dep[i].next = i + SFQ_SLOTS;
520 q->dep[i].prev = i + SFQ_SLOTS;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700521 }
Stephen Hemminger6f9e98f2008-01-20 17:20:56 -0800522
Alexey Kuznetsov32740dd2007-09-30 17:51:33 -0700523 q->limit = SFQ_DEPTH - 1;
Eric Dumazeteda83e32010-12-20 12:54:58 +0000524 q->cur_depth = 0;
525 q->tail = NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700526 if (opt == NULL) {
David S. Miller5ce2d482008-07-08 17:06:30 -0700527 q->quantum = psched_mtu(qdisc_dev(sch));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700528 q->perturb_period = 0;
Stephen Hemmingerd46f8dd2008-01-20 17:19:43 -0800529 q->perturbation = net_random();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700530 } else {
531 int err = sfq_change(sch, opt);
532 if (err)
533 return err;
534 }
Stephen Hemminger6f9e98f2008-01-20 17:20:56 -0800535
Eric Dumazeteda83e32010-12-20 12:54:58 +0000536 for (i = 0; i < SFQ_SLOTS; i++)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700537 sfq_link(q, i);
538 return 0;
539}
540
541static void sfq_destroy(struct Qdisc *sch)
542{
543 struct sfq_sched_data *q = qdisc_priv(sch);
Patrick McHardy7d2681a2008-01-31 18:36:52 -0800544
Patrick McHardyff31ab52008-07-01 19:52:38 -0700545 tcf_destroy_chain(&q->filter_list);
Jarek Poplawski980c4782008-04-29 03:29:03 -0700546 q->perturb_period = 0;
547 del_timer_sync(&q->perturb_timer);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700548}
549
550static int sfq_dump(struct Qdisc *sch, struct sk_buff *skb)
551{
552 struct sfq_sched_data *q = qdisc_priv(sch);
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -0700553 unsigned char *b = skb_tail_pointer(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700554 struct tc_sfq_qopt opt;
555
556 opt.quantum = q->quantum;
Stephen Hemminger6f9e98f2008-01-20 17:20:56 -0800557 opt.perturb_period = q->perturb_period / HZ;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700558
559 opt.limit = q->limit;
560 opt.divisor = SFQ_HASH_DIVISOR;
David S. Millercdec7e52008-07-26 02:28:09 -0700561 opt.flows = q->limit;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700562
Patrick McHardy1e904742008-01-22 22:11:17 -0800563 NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700564
565 return skb->len;
566
Patrick McHardy1e904742008-01-22 22:11:17 -0800567nla_put_failure:
Arnaldo Carvalho de Melodc5fc572007-03-25 23:06:12 -0700568 nlmsg_trim(skb, b);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700569 return -1;
570}
571
Jarek Poplawski41065fb2010-08-10 22:31:02 +0000572static struct Qdisc *sfq_leaf(struct Qdisc *sch, unsigned long arg)
573{
574 return NULL;
575}
576
Patrick McHardy7d2681a2008-01-31 18:36:52 -0800577static unsigned long sfq_get(struct Qdisc *sch, u32 classid)
578{
579 return 0;
580}
581
Jarek Poplawskieb4a5522010-08-06 00:22:35 +0000582static unsigned long sfq_bind(struct Qdisc *sch, unsigned long parent,
583 u32 classid)
584{
585 return 0;
586}
587
Jarek Poplawskida7115d2010-08-09 12:18:17 +0000588static void sfq_put(struct Qdisc *q, unsigned long cl)
589{
590}
591
Patrick McHardy7d2681a2008-01-31 18:36:52 -0800592static struct tcf_proto **sfq_find_tcf(struct Qdisc *sch, unsigned long cl)
593{
594 struct sfq_sched_data *q = qdisc_priv(sch);
595
596 if (cl)
597 return NULL;
598 return &q->filter_list;
599}
600
Patrick McHardy94de78d2008-01-31 18:37:16 -0800601static int sfq_dump_class(struct Qdisc *sch, unsigned long cl,
602 struct sk_buff *skb, struct tcmsg *tcm)
603{
604 tcm->tcm_handle |= TC_H_MIN(cl);
605 return 0;
606}
607
608static int sfq_dump_class_stats(struct Qdisc *sch, unsigned long cl,
609 struct gnet_dump *d)
610{
611 struct sfq_sched_data *q = qdisc_priv(sch);
Eric Dumazetee09b3c2010-12-22 11:39:59 -0800612 sfq_index idx = q->ht[cl - 1];
613 struct gnet_stats_queue qs = { 0 };
614 struct tc_sfq_xstats xstats = { 0 };
Eric Dumazetc4266262010-12-15 08:18:36 +0000615 struct sk_buff *skb;
616
Eric Dumazetee09b3c2010-12-22 11:39:59 -0800617 if (idx != SFQ_EMPTY_SLOT) {
618 const struct sfq_slot *slot = &q->slots[idx];
Patrick McHardy94de78d2008-01-31 18:37:16 -0800619
Eric Dumazetee09b3c2010-12-22 11:39:59 -0800620 xstats.allot = slot->allot;
621 qs.qlen = slot->qlen;
622 slot_queue_walk(slot, skb)
623 qs.backlog += qdisc_pkt_len(skb);
624 }
Patrick McHardy94de78d2008-01-31 18:37:16 -0800625 if (gnet_stats_copy_queue(d, &qs) < 0)
626 return -1;
627 return gnet_stats_copy_app(d, &xstats, sizeof(xstats));
628}
629
Patrick McHardy7d2681a2008-01-31 18:36:52 -0800630static void sfq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
631{
Patrick McHardy94de78d2008-01-31 18:37:16 -0800632 struct sfq_sched_data *q = qdisc_priv(sch);
633 unsigned int i;
634
635 if (arg->stop)
636 return;
637
638 for (i = 0; i < SFQ_HASH_DIVISOR; i++) {
Eric Dumazeteda83e32010-12-20 12:54:58 +0000639 if (q->ht[i] == SFQ_EMPTY_SLOT ||
Patrick McHardy94de78d2008-01-31 18:37:16 -0800640 arg->count < arg->skip) {
641 arg->count++;
642 continue;
643 }
644 if (arg->fn(sch, i + 1, arg) < 0) {
645 arg->stop = 1;
646 break;
647 }
648 arg->count++;
649 }
Patrick McHardy7d2681a2008-01-31 18:36:52 -0800650}
651
652static const struct Qdisc_class_ops sfq_class_ops = {
Jarek Poplawski41065fb2010-08-10 22:31:02 +0000653 .leaf = sfq_leaf,
Patrick McHardy7d2681a2008-01-31 18:36:52 -0800654 .get = sfq_get,
Jarek Poplawskida7115d2010-08-09 12:18:17 +0000655 .put = sfq_put,
Patrick McHardy7d2681a2008-01-31 18:36:52 -0800656 .tcf_chain = sfq_find_tcf,
Jarek Poplawskieb4a5522010-08-06 00:22:35 +0000657 .bind_tcf = sfq_bind,
Jarek Poplawskida7115d2010-08-09 12:18:17 +0000658 .unbind_tcf = sfq_put,
Patrick McHardy94de78d2008-01-31 18:37:16 -0800659 .dump = sfq_dump_class,
660 .dump_stats = sfq_dump_class_stats,
Patrick McHardy7d2681a2008-01-31 18:36:52 -0800661 .walk = sfq_walk,
662};
663
Eric Dumazet20fea082007-11-14 01:44:41 -0800664static struct Qdisc_ops sfq_qdisc_ops __read_mostly = {
Patrick McHardy7d2681a2008-01-31 18:36:52 -0800665 .cl_ops = &sfq_class_ops,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700666 .id = "sfq",
667 .priv_size = sizeof(struct sfq_sched_data),
668 .enqueue = sfq_enqueue,
669 .dequeue = sfq_dequeue,
Patrick McHardy48a8f512008-10-31 00:44:18 -0700670 .peek = sfq_peek,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700671 .drop = sfq_drop,
672 .init = sfq_init,
673 .reset = sfq_reset,
674 .destroy = sfq_destroy,
675 .change = NULL,
676 .dump = sfq_dump,
677 .owner = THIS_MODULE,
678};
679
680static int __init sfq_module_init(void)
681{
682 return register_qdisc(&sfq_qdisc_ops);
683}
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +0900684static void __exit sfq_module_exit(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700685{
686 unregister_qdisc(&sfq_qdisc_ops);
687}
688module_init(sfq_module_init)
689module_exit(sfq_module_exit)
690MODULE_LICENSE("GPL");