blob: 6c6cac65255f703abf6b15d439dd10a3b7d83eb0 [file] [log] [blame]
Stephen Hemminger87990462006-08-10 23:35:16 -07001/*
Linus Torvalds1da177e2005-04-16 15:20:36 -07002 * net/sched/sch_htb.c Hierarchical token bucket, feed tree version
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 *
9 * Authors: Martin Devera, <devik@cdi.cz>
10 *
11 * Credits (in time order) for older HTB versions:
12 * Stef Coene <stef.coene@docum.org>
13 * HTB support at LARTC mailing list
14 * Ondrej Kraus, <krauso@barr.cz>
15 * found missing INIT_QDISC(htb)
16 * Vladimir Smelhaus, Aamer Akhter, Bert Hubert
17 * helped a lot to locate nasty class stall bug
18 * Andi Kleen, Jamal Hadi, Bert Hubert
19 * code review and helpful comments on shaping
20 * Tomasz Wrona, <tw@eter.tym.pl>
21 * created test case so that I was able to fix nasty bug
22 * Wilfried Weissmann
23 * spotted bug in dequeue code and helped with fix
24 * Jiri Fojtasek
25 * fixed requeue routine
26 * and many others. thanks.
27 *
28 * $Id: sch_htb.c,v 1.25 2003/12/07 11:08:25 devik Exp devik $
29 */
Linus Torvalds1da177e2005-04-16 15:20:36 -070030#include <linux/module.h>
31#include <asm/uaccess.h>
32#include <asm/system.h>
33#include <linux/bitops.h>
34#include <linux/types.h>
35#include <linux/kernel.h>
36#include <linux/sched.h>
37#include <linux/string.h>
38#include <linux/mm.h>
39#include <linux/socket.h>
40#include <linux/sockios.h>
41#include <linux/in.h>
42#include <linux/errno.h>
43#include <linux/interrupt.h>
44#include <linux/if_ether.h>
45#include <linux/inet.h>
46#include <linux/netdevice.h>
47#include <linux/etherdevice.h>
48#include <linux/notifier.h>
49#include <net/ip.h>
50#include <net/route.h>
51#include <linux/skbuff.h>
52#include <linux/list.h>
53#include <linux/compiler.h>
54#include <net/sock.h>
55#include <net/pkt_sched.h>
56#include <linux/rbtree.h>
57
58/* HTB algorithm.
59 Author: devik@cdi.cz
60 ========================================================================
61 HTB is like TBF with multiple classes. It is also similar to CBQ because
62 it allows to assign priority to each class in hierarchy.
63 In fact it is another implementation of Floyd's formal sharing.
64
65 Levels:
66 Each class is assigned level. Leaf has ALWAYS level 0 and root
67 classes have level TC_HTB_MAXDEPTH-1. Interior nodes has level
68 one less than their parent.
69*/
70
Stephen Hemminger87990462006-08-10 23:35:16 -070071#define HTB_HSIZE 16 /* classid hash size */
72#define HTB_EWMAC 2 /* rate average over HTB_EWMAC*HTB_HSIZE sec */
73#define HTB_RATECM 1 /* whether to use rate computer */
74#define HTB_HYSTERESIS 1 /* whether to use mode hysteresis for speedup */
75#define HTB_VER 0x30011 /* major must be matched with number suplied by TC as version */
Linus Torvalds1da177e2005-04-16 15:20:36 -070076
77#if HTB_VER >> 16 != TC_HTB_PROTOVER
78#error "Mismatched sch_htb.c and pkt_sch.h"
79#endif
80
Linus Torvalds1da177e2005-04-16 15:20:36 -070081/* used internaly to keep status of single class */
82enum htb_cmode {
Stephen Hemminger87990462006-08-10 23:35:16 -070083 HTB_CANT_SEND, /* class can't send and can't borrow */
84 HTB_MAY_BORROW, /* class can't send but may borrow */
85 HTB_CAN_SEND /* class can send */
Linus Torvalds1da177e2005-04-16 15:20:36 -070086};
87
88/* interior & leaf nodes; props specific to leaves are marked L: */
Stephen Hemminger87990462006-08-10 23:35:16 -070089struct htb_class {
90 /* general class parameters */
91 u32 classid;
92 struct gnet_stats_basic bstats;
93 struct gnet_stats_queue qstats;
94 struct gnet_stats_rate_est rate_est;
95 struct tc_htb_xstats xstats; /* our special stats */
96 int refcnt; /* usage count of this class */
Linus Torvalds1da177e2005-04-16 15:20:36 -070097
98#ifdef HTB_RATECM
Stephen Hemminger87990462006-08-10 23:35:16 -070099 /* rate measurement counters */
100 unsigned long rate_bytes, sum_bytes;
101 unsigned long rate_packets, sum_packets;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700102#endif
103
Stephen Hemminger87990462006-08-10 23:35:16 -0700104 /* topology */
105 int level; /* our level (see above) */
106 struct htb_class *parent; /* parent class */
107 struct list_head hlist; /* classid hash list item */
108 struct list_head sibling; /* sibling list item */
109 struct list_head children; /* children list */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700110
Stephen Hemminger87990462006-08-10 23:35:16 -0700111 union {
112 struct htb_class_leaf {
113 struct Qdisc *q;
114 int prio;
115 int aprio;
116 int quantum;
117 int deficit[TC_HTB_MAXDEPTH];
118 struct list_head drop_list;
119 } leaf;
120 struct htb_class_inner {
121 struct rb_root feed[TC_HTB_NUMPRIO]; /* feed trees */
122 struct rb_node *ptr[TC_HTB_NUMPRIO]; /* current class ptr */
123 /* When class changes from state 1->2 and disconnects from
124 parent's feed then we lost ptr value and start from the
125 first child again. Here we store classid of the
126 last valid ptr (used when ptr is NULL). */
127 u32 last_ptr_id[TC_HTB_NUMPRIO];
128 } inner;
129 } un;
130 struct rb_node node[TC_HTB_NUMPRIO]; /* node for self or feed tree */
131 struct rb_node pq_node; /* node for event queue */
132 unsigned long pq_key; /* the same type as jiffies global */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700133
Stephen Hemminger87990462006-08-10 23:35:16 -0700134 int prio_activity; /* for which prios are we active */
135 enum htb_cmode cmode; /* current mode of the class */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700136
Stephen Hemminger87990462006-08-10 23:35:16 -0700137 /* class attached filters */
138 struct tcf_proto *filter_list;
139 int filter_cnt;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700140
Stephen Hemminger87990462006-08-10 23:35:16 -0700141 int warned; /* only one warning about non work conserving .. */
142
143 /* token bucket parameters */
144 struct qdisc_rate_table *rate; /* rate table of the class itself */
145 struct qdisc_rate_table *ceil; /* ceiling rate (limits borrows too) */
146 long buffer, cbuffer; /* token bucket depth/rate */
147 psched_tdiff_t mbuffer; /* max wait time */
148 long tokens, ctokens; /* current number of tokens */
149 psched_time_t t_c; /* checkpoint time */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700150};
151
152/* TODO: maybe compute rate when size is too large .. or drop ? */
Stephen Hemminger87990462006-08-10 23:35:16 -0700153static inline long L2T(struct htb_class *cl, struct qdisc_rate_table *rate,
154 int size)
155{
156 int slot = size >> rate->rate.cell_log;
157 if (slot > 255) {
158 cl->xstats.giants++;
159 slot = 255;
160 }
161 return rate->data[slot];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700162}
163
Stephen Hemminger87990462006-08-10 23:35:16 -0700164struct htb_sched {
165 struct list_head root; /* root classes list */
166 struct list_head hash[HTB_HSIZE]; /* hashed by classid */
167 struct list_head drops[TC_HTB_NUMPRIO]; /* active leaves (for drops) */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700168
Stephen Hemminger87990462006-08-10 23:35:16 -0700169 /* self list - roots of self generating tree */
170 struct rb_root row[TC_HTB_MAXDEPTH][TC_HTB_NUMPRIO];
171 int row_mask[TC_HTB_MAXDEPTH];
172 struct rb_node *ptr[TC_HTB_MAXDEPTH][TC_HTB_NUMPRIO];
173 u32 last_ptr_id[TC_HTB_MAXDEPTH][TC_HTB_NUMPRIO];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700174
Stephen Hemminger87990462006-08-10 23:35:16 -0700175 /* self wait list - roots of wait PQs per row */
176 struct rb_root wait_pq[TC_HTB_MAXDEPTH];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700177
Stephen Hemminger87990462006-08-10 23:35:16 -0700178 /* time of nearest event per level (row) */
179 unsigned long near_ev_cache[TC_HTB_MAXDEPTH];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700180
Stephen Hemminger87990462006-08-10 23:35:16 -0700181 /* cached value of jiffies in dequeue */
182 unsigned long jiffies;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700183
Stephen Hemminger87990462006-08-10 23:35:16 -0700184 /* whether we hit non-work conserving class during this dequeue; we use */
185 int nwc_hit; /* this to disable mindelay complaint in dequeue */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700186
Stephen Hemminger87990462006-08-10 23:35:16 -0700187 int defcls; /* class where unclassified flows go to */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700188
Stephen Hemminger87990462006-08-10 23:35:16 -0700189 /* filters for qdisc itself */
190 struct tcf_proto *filter_list;
191 int filter_cnt;
192
193 int rate2quantum; /* quant = rate / rate2quantum */
194 psched_time_t now; /* cached dequeue time */
195 struct timer_list timer; /* send delay timer */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700196#ifdef HTB_RATECM
Stephen Hemminger87990462006-08-10 23:35:16 -0700197 struct timer_list rttim; /* rate computer timer */
198 int recmp_bucket; /* which hash bucket to recompute next */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700199#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -0700200
Stephen Hemminger87990462006-08-10 23:35:16 -0700201 /* non shaped skbs; let them go directly thru */
202 struct sk_buff_head direct_queue;
203 int direct_qlen; /* max qlen of above */
204
205 long direct_pkts;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700206};
207
208/* compute hash of size HTB_HSIZE for given handle */
Stephen Hemminger87990462006-08-10 23:35:16 -0700209static inline int htb_hash(u32 h)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700210{
211#if HTB_HSIZE != 16
Stephen Hemminger87990462006-08-10 23:35:16 -0700212#error "Declare new hash for your HTB_HSIZE"
Linus Torvalds1da177e2005-04-16 15:20:36 -0700213#endif
Stephen Hemminger87990462006-08-10 23:35:16 -0700214 h ^= h >> 8; /* stolen from cbq_hash */
215 h ^= h >> 4;
216 return h & 0xf;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700217}
218
219/* find class in global hash table using given handle */
Stephen Hemminger87990462006-08-10 23:35:16 -0700220static inline struct htb_class *htb_find(u32 handle, struct Qdisc *sch)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700221{
222 struct htb_sched *q = qdisc_priv(sch);
223 struct list_head *p;
Stephen Hemminger87990462006-08-10 23:35:16 -0700224 if (TC_H_MAJ(handle) != sch->handle)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700225 return NULL;
Stephen Hemminger87990462006-08-10 23:35:16 -0700226
227 list_for_each(p, q->hash + htb_hash(handle)) {
228 struct htb_class *cl = list_entry(p, struct htb_class, hlist);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700229 if (cl->classid == handle)
230 return cl;
231 }
232 return NULL;
233}
234
235/**
236 * htb_classify - classify a packet into class
237 *
238 * It returns NULL if the packet should be dropped or -1 if the packet
239 * should be passed directly thru. In all other cases leaf class is returned.
240 * We allow direct class selection by classid in priority. The we examine
241 * filters in qdisc and in inner nodes (if higher filter points to the inner
242 * node). If we end up with classid MAJOR:0 we enqueue the skb into special
243 * internal fifo (direct). These packets then go directly thru. If we still
244 * have no valid leaf we try to use MAJOR:default leaf. It still unsuccessfull
245 * then finish and return direct queue.
246 */
247#define HTB_DIRECT (struct htb_class*)-1
248static inline u32 htb_classid(struct htb_class *cl)
249{
250 return (cl && cl != HTB_DIRECT) ? cl->classid : TC_H_UNSPEC;
251}
252
Stephen Hemminger87990462006-08-10 23:35:16 -0700253static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch,
254 int *qerr)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700255{
256 struct htb_sched *q = qdisc_priv(sch);
257 struct htb_class *cl;
258 struct tcf_result res;
259 struct tcf_proto *tcf;
260 int result;
261
262 /* allow to select class by setting skb->priority to valid classid;
263 note that nfmark can be used too by attaching filter fw with no
264 rules in it */
265 if (skb->priority == sch->handle)
Stephen Hemminger87990462006-08-10 23:35:16 -0700266 return HTB_DIRECT; /* X:0 (direct flow) selected */
267 if ((cl = htb_find(skb->priority, sch)) != NULL && cl->level == 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700268 return cl;
269
Jamal Hadi Salim29f1df62006-01-08 22:35:55 -0800270 *qerr = NET_XMIT_BYPASS;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700271 tcf = q->filter_list;
272 while (tcf && (result = tc_classify(skb, tcf, &res)) >= 0) {
273#ifdef CONFIG_NET_CLS_ACT
274 switch (result) {
275 case TC_ACT_QUEUED:
Stephen Hemminger87990462006-08-10 23:35:16 -0700276 case TC_ACT_STOLEN:
Linus Torvalds1da177e2005-04-16 15:20:36 -0700277 *qerr = NET_XMIT_SUCCESS;
278 case TC_ACT_SHOT:
279 return NULL;
280 }
281#elif defined(CONFIG_NET_CLS_POLICE)
282 if (result == TC_POLICE_SHOT)
283 return HTB_DIRECT;
284#endif
Stephen Hemminger87990462006-08-10 23:35:16 -0700285 if ((cl = (void *)res.class) == NULL) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700286 if (res.classid == sch->handle)
Stephen Hemminger87990462006-08-10 23:35:16 -0700287 return HTB_DIRECT; /* X:0 (direct flow) */
288 if ((cl = htb_find(res.classid, sch)) == NULL)
289 break; /* filter selected invalid classid */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700290 }
291 if (!cl->level)
Stephen Hemminger87990462006-08-10 23:35:16 -0700292 return cl; /* we hit leaf; return it */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700293
294 /* we have got inner class; apply inner filter chain */
295 tcf = cl->filter_list;
296 }
297 /* classification failed; try to use default class */
Stephen Hemminger87990462006-08-10 23:35:16 -0700298 cl = htb_find(TC_H_MAKE(TC_H_MAJ(sch->handle), q->defcls), sch);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700299 if (!cl || cl->level)
Stephen Hemminger87990462006-08-10 23:35:16 -0700300 return HTB_DIRECT; /* bad default .. this is safe bet */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700301 return cl;
302}
303
Linus Torvalds1da177e2005-04-16 15:20:36 -0700304/**
305 * htb_add_to_id_tree - adds class to the round robin list
306 *
307 * Routine adds class to the list (actually tree) sorted by classid.
308 * Make sure that class is not already on such list for given prio.
309 */
Stephen Hemminger87990462006-08-10 23:35:16 -0700310static void htb_add_to_id_tree(struct rb_root *root,
311 struct htb_class *cl, int prio)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700312{
313 struct rb_node **p = &root->rb_node, *parent = NULL;
Stephen Hemminger3bf72952006-08-10 23:31:08 -0700314
Linus Torvalds1da177e2005-04-16 15:20:36 -0700315 while (*p) {
Stephen Hemminger87990462006-08-10 23:35:16 -0700316 struct htb_class *c;
317 parent = *p;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700318 c = rb_entry(parent, struct htb_class, node[prio]);
Stephen Hemminger3bf72952006-08-10 23:31:08 -0700319
Linus Torvalds1da177e2005-04-16 15:20:36 -0700320 if (cl->classid > c->classid)
321 p = &parent->rb_right;
Stephen Hemminger87990462006-08-10 23:35:16 -0700322 else
Linus Torvalds1da177e2005-04-16 15:20:36 -0700323 p = &parent->rb_left;
324 }
325 rb_link_node(&cl->node[prio], parent, p);
326 rb_insert_color(&cl->node[prio], root);
327}
328
329/**
330 * htb_add_to_wait_tree - adds class to the event queue with delay
331 *
332 * The class is added to priority event queue to indicate that class will
333 * change its mode in cl->pq_key microseconds. Make sure that class is not
334 * already in the queue.
335 */
Stephen Hemminger87990462006-08-10 23:35:16 -0700336static void htb_add_to_wait_tree(struct htb_sched *q,
337 struct htb_class *cl, long delay)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700338{
339 struct rb_node **p = &q->wait_pq[cl->level].rb_node, *parent = NULL;
Stephen Hemminger3bf72952006-08-10 23:31:08 -0700340
Linus Torvalds1da177e2005-04-16 15:20:36 -0700341 cl->pq_key = q->jiffies + PSCHED_US2JIFFIE(delay);
342 if (cl->pq_key == q->jiffies)
343 cl->pq_key++;
344
345 /* update the nearest event cache */
346 if (time_after(q->near_ev_cache[cl->level], cl->pq_key))
347 q->near_ev_cache[cl->level] = cl->pq_key;
Stephen Hemminger87990462006-08-10 23:35:16 -0700348
Linus Torvalds1da177e2005-04-16 15:20:36 -0700349 while (*p) {
Stephen Hemminger87990462006-08-10 23:35:16 -0700350 struct htb_class *c;
351 parent = *p;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700352 c = rb_entry(parent, struct htb_class, pq_node);
353 if (time_after_eq(cl->pq_key, c->pq_key))
354 p = &parent->rb_right;
Stephen Hemminger87990462006-08-10 23:35:16 -0700355 else
Linus Torvalds1da177e2005-04-16 15:20:36 -0700356 p = &parent->rb_left;
357 }
358 rb_link_node(&cl->pq_node, parent, p);
359 rb_insert_color(&cl->pq_node, &q->wait_pq[cl->level]);
360}
361
362/**
363 * htb_next_rb_node - finds next node in binary tree
364 *
365 * When we are past last key we return NULL.
366 * Average complexity is 2 steps per call.
367 */
368static void htb_next_rb_node(struct rb_node **n)
369{
370 *n = rb_next(*n);
371}
372
373/**
374 * htb_add_class_to_row - add class to its row
375 *
376 * The class is added to row at priorities marked in mask.
377 * It does nothing if mask == 0.
378 */
Stephen Hemminger87990462006-08-10 23:35:16 -0700379static inline void htb_add_class_to_row(struct htb_sched *q,
380 struct htb_class *cl, int mask)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700381{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700382 q->row_mask[cl->level] |= mask;
383 while (mask) {
384 int prio = ffz(~mask);
385 mask &= ~(1 << prio);
Stephen Hemminger87990462006-08-10 23:35:16 -0700386 htb_add_to_id_tree(q->row[cl->level] + prio, cl, prio);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700387 }
388}
389
390/**
391 * htb_remove_class_from_row - removes class from its row
392 *
393 * The class is removed from row at priorities marked in mask.
394 * It does nothing if mask == 0.
395 */
Stephen Hemminger87990462006-08-10 23:35:16 -0700396static inline void htb_remove_class_from_row(struct htb_sched *q,
397 struct htb_class *cl, int mask)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700398{
399 int m = 0;
Stephen Hemminger3bf72952006-08-10 23:31:08 -0700400
Linus Torvalds1da177e2005-04-16 15:20:36 -0700401 while (mask) {
402 int prio = ffz(~mask);
403 mask &= ~(1 << prio);
Stephen Hemminger87990462006-08-10 23:35:16 -0700404 if (q->ptr[cl->level][prio] == cl->node + prio)
405 htb_next_rb_node(q->ptr[cl->level] + prio);
406 rb_erase(cl->node + prio, q->row[cl->level] + prio);
407 if (!q->row[cl->level][prio].rb_node)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700408 m |= 1 << prio;
409 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700410 q->row_mask[cl->level] &= ~m;
411}
412
413/**
414 * htb_activate_prios - creates active classe's feed chain
415 *
416 * The class is connected to ancestors and/or appropriate rows
417 * for priorities it is participating on. cl->cmode must be new
418 * (activated) mode. It does nothing if cl->prio_activity == 0.
419 */
Stephen Hemminger87990462006-08-10 23:35:16 -0700420static void htb_activate_prios(struct htb_sched *q, struct htb_class *cl)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700421{
422 struct htb_class *p = cl->parent;
Stephen Hemminger87990462006-08-10 23:35:16 -0700423 long m, mask = cl->prio_activity;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700424
425 while (cl->cmode == HTB_MAY_BORROW && p && mask) {
Stephen Hemminger87990462006-08-10 23:35:16 -0700426 m = mask;
427 while (m) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700428 int prio = ffz(~m);
429 m &= ~(1 << prio);
Stephen Hemminger87990462006-08-10 23:35:16 -0700430
Linus Torvalds1da177e2005-04-16 15:20:36 -0700431 if (p->un.inner.feed[prio].rb_node)
432 /* parent already has its feed in use so that
433 reset bit in mask as parent is already ok */
434 mask &= ~(1 << prio);
Stephen Hemminger87990462006-08-10 23:35:16 -0700435
436 htb_add_to_id_tree(p->un.inner.feed + prio, cl, prio);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700437 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700438 p->prio_activity |= mask;
Stephen Hemminger87990462006-08-10 23:35:16 -0700439 cl = p;
440 p = cl->parent;
Stephen Hemminger3bf72952006-08-10 23:31:08 -0700441
Linus Torvalds1da177e2005-04-16 15:20:36 -0700442 }
443 if (cl->cmode == HTB_CAN_SEND && mask)
Stephen Hemminger87990462006-08-10 23:35:16 -0700444 htb_add_class_to_row(q, cl, mask);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700445}
446
447/**
448 * htb_deactivate_prios - remove class from feed chain
449 *
450 * cl->cmode must represent old mode (before deactivation). It does
451 * nothing if cl->prio_activity == 0. Class is removed from all feed
452 * chains and rows.
453 */
454static void htb_deactivate_prios(struct htb_sched *q, struct htb_class *cl)
455{
456 struct htb_class *p = cl->parent;
Stephen Hemminger87990462006-08-10 23:35:16 -0700457 long m, mask = cl->prio_activity;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700458
459 while (cl->cmode == HTB_MAY_BORROW && p && mask) {
Stephen Hemminger87990462006-08-10 23:35:16 -0700460 m = mask;
461 mask = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700462 while (m) {
463 int prio = ffz(~m);
464 m &= ~(1 << prio);
Stephen Hemminger87990462006-08-10 23:35:16 -0700465
466 if (p->un.inner.ptr[prio] == cl->node + prio) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700467 /* we are removing child which is pointed to from
468 parent feed - forget the pointer but remember
469 classid */
470 p->un.inner.last_ptr_id[prio] = cl->classid;
471 p->un.inner.ptr[prio] = NULL;
472 }
Stephen Hemminger87990462006-08-10 23:35:16 -0700473
474 rb_erase(cl->node + prio, p->un.inner.feed + prio);
475
476 if (!p->un.inner.feed[prio].rb_node)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700477 mask |= 1 << prio;
478 }
Stephen Hemminger3bf72952006-08-10 23:31:08 -0700479
Linus Torvalds1da177e2005-04-16 15:20:36 -0700480 p->prio_activity &= ~mask;
Stephen Hemminger87990462006-08-10 23:35:16 -0700481 cl = p;
482 p = cl->parent;
Stephen Hemminger3bf72952006-08-10 23:31:08 -0700483
Linus Torvalds1da177e2005-04-16 15:20:36 -0700484 }
Stephen Hemminger87990462006-08-10 23:35:16 -0700485 if (cl->cmode == HTB_CAN_SEND && mask)
486 htb_remove_class_from_row(q, cl, mask);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700487}
488
Stephen Hemminger18a63e82006-08-10 23:34:02 -0700489#if HTB_HYSTERESIS
490static inline long htb_lowater(const struct htb_class *cl)
491{
492 return cl->cmode != HTB_CANT_SEND ? -cl->cbuffer : 0;
493}
494static inline long htb_hiwater(const struct htb_class *cl)
495{
496 return cl->cmode == HTB_CAN_SEND ? -cl->buffer : 0;
497}
498#else
499#define htb_lowater(cl) (0)
500#define htb_hiwater(cl) (0)
501#endif
502
Linus Torvalds1da177e2005-04-16 15:20:36 -0700503/**
504 * htb_class_mode - computes and returns current class mode
505 *
506 * It computes cl's mode at time cl->t_c+diff and returns it. If mode
507 * is not HTB_CAN_SEND then cl->pq_key is updated to time difference
508 * from now to time when cl will change its state.
509 * Also it is worth to note that class mode doesn't change simply
510 * at cl->{c,}tokens == 0 but there can rather be hysteresis of
511 * 0 .. -cl->{c,}buffer range. It is meant to limit number of
512 * mode transitions per time unit. The speed gain is about 1/6.
513 */
Stephen Hemminger87990462006-08-10 23:35:16 -0700514static inline enum htb_cmode
515htb_class_mode(struct htb_class *cl, long *diff)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700516{
Stephen Hemminger87990462006-08-10 23:35:16 -0700517 long toks;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700518
Stephen Hemminger87990462006-08-10 23:35:16 -0700519 if ((toks = (cl->ctokens + *diff)) < htb_lowater(cl)) {
520 *diff = -toks;
521 return HTB_CANT_SEND;
522 }
Stephen Hemminger18a63e82006-08-10 23:34:02 -0700523
Stephen Hemminger87990462006-08-10 23:35:16 -0700524 if ((toks = (cl->tokens + *diff)) >= htb_hiwater(cl))
525 return HTB_CAN_SEND;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700526
Stephen Hemminger87990462006-08-10 23:35:16 -0700527 *diff = -toks;
528 return HTB_MAY_BORROW;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700529}
530
531/**
532 * htb_change_class_mode - changes classe's mode
533 *
534 * This should be the only way how to change classe's mode under normal
535 * cirsumstances. Routine will update feed lists linkage, change mode
536 * and add class to the wait event queue if appropriate. New mode should
537 * be different from old one and cl->pq_key has to be valid if changing
538 * to mode other than HTB_CAN_SEND (see htb_add_to_wait_tree).
539 */
Stephen Hemminger87990462006-08-10 23:35:16 -0700540static void
Linus Torvalds1da177e2005-04-16 15:20:36 -0700541htb_change_class_mode(struct htb_sched *q, struct htb_class *cl, long *diff)
Stephen Hemminger87990462006-08-10 23:35:16 -0700542{
543 enum htb_cmode new_mode = htb_class_mode(cl, diff);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700544
545 if (new_mode == cl->cmode)
Stephen Hemminger87990462006-08-10 23:35:16 -0700546 return;
547
548 if (cl->prio_activity) { /* not necessary: speed optimization */
549 if (cl->cmode != HTB_CANT_SEND)
550 htb_deactivate_prios(q, cl);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700551 cl->cmode = new_mode;
Stephen Hemminger87990462006-08-10 23:35:16 -0700552 if (new_mode != HTB_CANT_SEND)
553 htb_activate_prios(q, cl);
554 } else
Linus Torvalds1da177e2005-04-16 15:20:36 -0700555 cl->cmode = new_mode;
556}
557
558/**
559 * htb_activate - inserts leaf cl into appropriate active feeds
560 *
561 * Routine learns (new) priority of leaf and activates feed chain
562 * for the prio. It can be called on already active leaf safely.
563 * It also adds leaf into droplist.
564 */
Stephen Hemminger87990462006-08-10 23:35:16 -0700565static inline void htb_activate(struct htb_sched *q, struct htb_class *cl)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700566{
567 BUG_TRAP(!cl->level && cl->un.leaf.q && cl->un.leaf.q->q.qlen);
Stephen Hemminger3bf72952006-08-10 23:31:08 -0700568
Linus Torvalds1da177e2005-04-16 15:20:36 -0700569 if (!cl->prio_activity) {
570 cl->prio_activity = 1 << (cl->un.leaf.aprio = cl->un.leaf.prio);
Stephen Hemminger87990462006-08-10 23:35:16 -0700571 htb_activate_prios(q, cl);
572 list_add_tail(&cl->un.leaf.drop_list,
573 q->drops + cl->un.leaf.aprio);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700574 }
575}
576
577/**
578 * htb_deactivate - remove leaf cl from active feeds
579 *
580 * Make sure that leaf is active. In the other words it can't be called
581 * with non-active leaf. It also removes class from the drop list.
582 */
Stephen Hemminger87990462006-08-10 23:35:16 -0700583static inline void htb_deactivate(struct htb_sched *q, struct htb_class *cl)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700584{
585 BUG_TRAP(cl->prio_activity);
Stephen Hemminger3bf72952006-08-10 23:31:08 -0700586
Stephen Hemminger87990462006-08-10 23:35:16 -0700587 htb_deactivate_prios(q, cl);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700588 cl->prio_activity = 0;
589 list_del_init(&cl->un.leaf.drop_list);
590}
591
592static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch)
593{
Stephen Hemminger87990462006-08-10 23:35:16 -0700594 int ret;
595 struct htb_sched *q = qdisc_priv(sch);
596 struct htb_class *cl = htb_classify(skb, sch, &ret);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700597
Stephen Hemminger87990462006-08-10 23:35:16 -0700598 if (cl == HTB_DIRECT) {
599 /* enqueue to helper queue */
600 if (q->direct_queue.qlen < q->direct_qlen) {
601 __skb_queue_tail(&q->direct_queue, skb);
602 q->direct_pkts++;
603 } else {
604 kfree_skb(skb);
605 sch->qstats.drops++;
606 return NET_XMIT_DROP;
607 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700608#ifdef CONFIG_NET_CLS_ACT
Stephen Hemminger87990462006-08-10 23:35:16 -0700609 } else if (!cl) {
610 if (ret == NET_XMIT_BYPASS)
611 sch->qstats.drops++;
612 kfree_skb(skb);
613 return ret;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700614#endif
Stephen Hemminger87990462006-08-10 23:35:16 -0700615 } else if (cl->un.leaf.q->enqueue(skb, cl->un.leaf.q) !=
616 NET_XMIT_SUCCESS) {
617 sch->qstats.drops++;
618 cl->qstats.drops++;
619 return NET_XMIT_DROP;
620 } else {
621 cl->bstats.packets++;
622 cl->bstats.bytes += skb->len;
623 htb_activate(q, cl);
624 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700625
Stephen Hemminger87990462006-08-10 23:35:16 -0700626 sch->q.qlen++;
627 sch->bstats.packets++;
628 sch->bstats.bytes += skb->len;
629 return NET_XMIT_SUCCESS;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700630}
631
632/* TODO: requeuing packet charges it to policers again !! */
633static int htb_requeue(struct sk_buff *skb, struct Qdisc *sch)
634{
Stephen Hemminger87990462006-08-10 23:35:16 -0700635 struct htb_sched *q = qdisc_priv(sch);
636 int ret = NET_XMIT_SUCCESS;
637 struct htb_class *cl = htb_classify(skb, sch, &ret);
638 struct sk_buff *tskb;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700639
Stephen Hemminger87990462006-08-10 23:35:16 -0700640 if (cl == HTB_DIRECT || !cl) {
641 /* enqueue to helper queue */
642 if (q->direct_queue.qlen < q->direct_qlen && cl) {
643 __skb_queue_head(&q->direct_queue, skb);
644 } else {
645 __skb_queue_head(&q->direct_queue, skb);
646 tskb = __skb_dequeue_tail(&q->direct_queue);
647 kfree_skb(tskb);
648 sch->qstats.drops++;
649 return NET_XMIT_CN;
650 }
651 } else if (cl->un.leaf.q->ops->requeue(skb, cl->un.leaf.q) !=
652 NET_XMIT_SUCCESS) {
653 sch->qstats.drops++;
654 cl->qstats.drops++;
655 return NET_XMIT_DROP;
656 } else
657 htb_activate(q, cl);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700658
Stephen Hemminger87990462006-08-10 23:35:16 -0700659 sch->q.qlen++;
660 sch->qstats.requeues++;
661 return NET_XMIT_SUCCESS;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700662}
663
664static void htb_timer(unsigned long arg)
665{
Stephen Hemminger87990462006-08-10 23:35:16 -0700666 struct Qdisc *sch = (struct Qdisc *)arg;
667 sch->flags &= ~TCQ_F_THROTTLED;
668 wmb();
669 netif_schedule(sch->dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700670}
671
672#ifdef HTB_RATECM
673#define RT_GEN(D,R) R+=D-(R/HTB_EWMAC);D=0
674static void htb_rate_timer(unsigned long arg)
675{
Stephen Hemminger87990462006-08-10 23:35:16 -0700676 struct Qdisc *sch = (struct Qdisc *)arg;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700677 struct htb_sched *q = qdisc_priv(sch);
678 struct list_head *p;
679
680 /* lock queue so that we can muck with it */
Stephen Hemminger9ac961e2006-08-10 23:33:16 -0700681 spin_lock_bh(&sch->dev->queue_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700682
683 q->rttim.expires = jiffies + HZ;
684 add_timer(&q->rttim);
685
686 /* scan and recompute one bucket at time */
Stephen Hemminger87990462006-08-10 23:35:16 -0700687 if (++q->recmp_bucket >= HTB_HSIZE)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700688 q->recmp_bucket = 0;
Stephen Hemminger87990462006-08-10 23:35:16 -0700689 list_for_each(p, q->hash + q->recmp_bucket) {
690 struct htb_class *cl = list_entry(p, struct htb_class, hlist);
Stephen Hemminger3bf72952006-08-10 23:31:08 -0700691
Stephen Hemminger87990462006-08-10 23:35:16 -0700692 RT_GEN(cl->sum_bytes, cl->rate_bytes);
693 RT_GEN(cl->sum_packets, cl->rate_packets);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700694 }
Stephen Hemminger9ac961e2006-08-10 23:33:16 -0700695 spin_unlock_bh(&sch->dev->queue_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700696}
697#endif
698
699/**
700 * htb_charge_class - charges amount "bytes" to leaf and ancestors
701 *
702 * Routine assumes that packet "bytes" long was dequeued from leaf cl
703 * borrowing from "level". It accounts bytes to ceil leaky bucket for
704 * leaf and all ancestors and to rate bucket for ancestors at levels
705 * "level" and higher. It also handles possible change of mode resulting
706 * from the update. Note that mode can also increase here (MAY_BORROW to
707 * CAN_SEND) because we can use more precise clock that event queue here.
708 * In such case we remove class from event queue first.
709 */
Stephen Hemminger87990462006-08-10 23:35:16 -0700710static void htb_charge_class(struct htb_sched *q, struct htb_class *cl,
711 int level, int bytes)
712{
713 long toks, diff;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700714 enum htb_cmode old_mode;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700715
716#define HTB_ACCNT(T,B,R) toks = diff + cl->T; \
717 if (toks > cl->B) toks = cl->B; \
718 toks -= L2T(cl, cl->R, bytes); \
719 if (toks <= -cl->mbuffer) toks = 1-cl->mbuffer; \
720 cl->T = toks
721
722 while (cl) {
Stephen Hemminger87990462006-08-10 23:35:16 -0700723 diff = PSCHED_TDIFF_SAFE(q->now, cl->t_c, (u32) cl->mbuffer);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700724 if (cl->level >= level) {
Stephen Hemminger87990462006-08-10 23:35:16 -0700725 if (cl->level == level)
726 cl->xstats.lends++;
727 HTB_ACCNT(tokens, buffer, rate);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700728 } else {
729 cl->xstats.borrows++;
Stephen Hemminger87990462006-08-10 23:35:16 -0700730 cl->tokens += diff; /* we moved t_c; update tokens */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700731 }
Stephen Hemminger87990462006-08-10 23:35:16 -0700732 HTB_ACCNT(ctokens, cbuffer, ceil);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700733 cl->t_c = q->now;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700734
Stephen Hemminger87990462006-08-10 23:35:16 -0700735 old_mode = cl->cmode;
736 diff = 0;
737 htb_change_class_mode(q, cl, &diff);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700738 if (old_mode != cl->cmode) {
739 if (old_mode != HTB_CAN_SEND)
Stephen Hemminger87990462006-08-10 23:35:16 -0700740 rb_erase(&cl->pq_node, q->wait_pq + cl->level);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700741 if (cl->cmode != HTB_CAN_SEND)
Stephen Hemminger87990462006-08-10 23:35:16 -0700742 htb_add_to_wait_tree(q, cl, diff);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700743 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700744#ifdef HTB_RATECM
745 /* update rate counters */
Stephen Hemminger87990462006-08-10 23:35:16 -0700746 cl->sum_bytes += bytes;
747 cl->sum_packets++;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700748#endif
749
750 /* update byte stats except for leaves which are already updated */
751 if (cl->level) {
752 cl->bstats.bytes += bytes;
753 cl->bstats.packets++;
754 }
755 cl = cl->parent;
756 }
757}
758
759/**
760 * htb_do_events - make mode changes to classes at the level
761 *
762 * Scans event queue for pending events and applies them. Returns jiffies to
763 * next pending event (0 for no event in pq).
764 * Note: Aplied are events whose have cl->pq_key <= jiffies.
765 */
Stephen Hemminger87990462006-08-10 23:35:16 -0700766static long htb_do_events(struct htb_sched *q, int level)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700767{
768 int i;
Stephen Hemminger3bf72952006-08-10 23:31:08 -0700769
Linus Torvalds1da177e2005-04-16 15:20:36 -0700770 for (i = 0; i < 500; i++) {
771 struct htb_class *cl;
772 long diff;
773 struct rb_node *p = q->wait_pq[level].rb_node;
Stephen Hemminger87990462006-08-10 23:35:16 -0700774 if (!p)
775 return 0;
776 while (p->rb_left)
777 p = p->rb_left;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700778
779 cl = rb_entry(p, struct htb_class, pq_node);
780 if (time_after(cl->pq_key, q->jiffies)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700781 return cl->pq_key - q->jiffies;
782 }
Stephen Hemminger87990462006-08-10 23:35:16 -0700783 rb_erase(p, q->wait_pq + level);
784 diff = PSCHED_TDIFF_SAFE(q->now, cl->t_c, (u32) cl->mbuffer);
785 htb_change_class_mode(q, cl, &diff);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700786 if (cl->cmode != HTB_CAN_SEND)
Stephen Hemminger87990462006-08-10 23:35:16 -0700787 htb_add_to_wait_tree(q, cl, diff);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700788 }
789 if (net_ratelimit())
790 printk(KERN_WARNING "htb: too many events !\n");
Stephen Hemminger87990462006-08-10 23:35:16 -0700791 return HZ / 10;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700792}
793
794/* Returns class->node+prio from id-tree where classe's id is >= id. NULL
795 is no such one exists. */
Stephen Hemminger87990462006-08-10 23:35:16 -0700796static struct rb_node *htb_id_find_next_upper(int prio, struct rb_node *n,
797 u32 id)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700798{
799 struct rb_node *r = NULL;
800 while (n) {
Stephen Hemminger87990462006-08-10 23:35:16 -0700801 struct htb_class *cl =
802 rb_entry(n, struct htb_class, node[prio]);
803 if (id == cl->classid)
804 return n;
805
Linus Torvalds1da177e2005-04-16 15:20:36 -0700806 if (id > cl->classid) {
807 n = n->rb_right;
808 } else {
809 r = n;
810 n = n->rb_left;
811 }
812 }
813 return r;
814}
815
816/**
817 * htb_lookup_leaf - returns next leaf class in DRR order
818 *
819 * Find leaf where current feed pointers points to.
820 */
Stephen Hemminger87990462006-08-10 23:35:16 -0700821static struct htb_class *htb_lookup_leaf(struct rb_root *tree, int prio,
822 struct rb_node **pptr, u32 * pid)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700823{
824 int i;
825 struct {
826 struct rb_node *root;
827 struct rb_node **pptr;
828 u32 *pid;
Stephen Hemminger87990462006-08-10 23:35:16 -0700829 } stk[TC_HTB_MAXDEPTH], *sp = stk;
830
Linus Torvalds1da177e2005-04-16 15:20:36 -0700831 BUG_TRAP(tree->rb_node);
832 sp->root = tree->rb_node;
833 sp->pptr = pptr;
834 sp->pid = pid;
835
836 for (i = 0; i < 65535; i++) {
Stephen Hemminger87990462006-08-10 23:35:16 -0700837 if (!*sp->pptr && *sp->pid) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700838 /* ptr was invalidated but id is valid - try to recover
839 the original or next ptr */
Stephen Hemminger87990462006-08-10 23:35:16 -0700840 *sp->pptr =
841 htb_id_find_next_upper(prio, sp->root, *sp->pid);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700842 }
Stephen Hemminger87990462006-08-10 23:35:16 -0700843 *sp->pid = 0; /* ptr is valid now so that remove this hint as it
844 can become out of date quickly */
845 if (!*sp->pptr) { /* we are at right end; rewind & go up */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700846 *sp->pptr = sp->root;
Stephen Hemminger87990462006-08-10 23:35:16 -0700847 while ((*sp->pptr)->rb_left)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700848 *sp->pptr = (*sp->pptr)->rb_left;
849 if (sp > stk) {
850 sp--;
Stephen Hemminger87990462006-08-10 23:35:16 -0700851 BUG_TRAP(*sp->pptr);
852 if (!*sp->pptr)
853 return NULL;
854 htb_next_rb_node(sp->pptr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700855 }
856 } else {
857 struct htb_class *cl;
Stephen Hemminger87990462006-08-10 23:35:16 -0700858 cl = rb_entry(*sp->pptr, struct htb_class, node[prio]);
859 if (!cl->level)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700860 return cl;
861 (++sp)->root = cl->un.inner.feed[prio].rb_node;
Stephen Hemminger87990462006-08-10 23:35:16 -0700862 sp->pptr = cl->un.inner.ptr + prio;
863 sp->pid = cl->un.inner.last_ptr_id + prio;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700864 }
865 }
866 BUG_TRAP(0);
867 return NULL;
868}
869
870/* dequeues packet at given priority and level; call only if
871 you are sure that there is active class at prio/level */
Stephen Hemminger87990462006-08-10 23:35:16 -0700872static struct sk_buff *htb_dequeue_tree(struct htb_sched *q, int prio,
873 int level)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700874{
875 struct sk_buff *skb = NULL;
Stephen Hemminger87990462006-08-10 23:35:16 -0700876 struct htb_class *cl, *start;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700877 /* look initial class up in the row */
Stephen Hemminger87990462006-08-10 23:35:16 -0700878 start = cl = htb_lookup_leaf(q->row[level] + prio, prio,
879 q->ptr[level] + prio,
880 q->last_ptr_id[level] + prio);
881
Linus Torvalds1da177e2005-04-16 15:20:36 -0700882 do {
883next:
Stephen Hemminger87990462006-08-10 23:35:16 -0700884 BUG_TRAP(cl);
885 if (!cl)
886 return NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700887
888 /* class can be empty - it is unlikely but can be true if leaf
889 qdisc drops packets in enqueue routine or if someone used
890 graft operation on the leaf since last dequeue;
891 simply deactivate and skip such class */
892 if (unlikely(cl->un.leaf.q->q.qlen == 0)) {
893 struct htb_class *next;
Stephen Hemminger87990462006-08-10 23:35:16 -0700894 htb_deactivate(q, cl);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700895
896 /* row/level might become empty */
897 if ((q->row_mask[level] & (1 << prio)) == 0)
Stephen Hemminger87990462006-08-10 23:35:16 -0700898 return NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700899
Stephen Hemminger87990462006-08-10 23:35:16 -0700900 next = htb_lookup_leaf(q->row[level] + prio,
901 prio, q->ptr[level] + prio,
902 q->last_ptr_id[level] + prio);
903
904 if (cl == start) /* fix start if we just deleted it */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700905 start = next;
906 cl = next;
907 goto next;
908 }
Stephen Hemminger87990462006-08-10 23:35:16 -0700909
910 skb = cl->un.leaf.q->dequeue(cl->un.leaf.q);
911 if (likely(skb != NULL))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700912 break;
913 if (!cl->warned) {
Stephen Hemminger87990462006-08-10 23:35:16 -0700914 printk(KERN_WARNING
915 "htb: class %X isn't work conserving ?!\n",
916 cl->classid);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700917 cl->warned = 1;
918 }
919 q->nwc_hit++;
Stephen Hemminger87990462006-08-10 23:35:16 -0700920 htb_next_rb_node((level ? cl->parent->un.inner.ptr : q->
921 ptr[0]) + prio);
922 cl = htb_lookup_leaf(q->row[level] + prio, prio,
923 q->ptr[level] + prio,
924 q->last_ptr_id[level] + prio);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700925
926 } while (cl != start);
927
928 if (likely(skb != NULL)) {
929 if ((cl->un.leaf.deficit[level] -= skb->len) < 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700930 cl->un.leaf.deficit[level] += cl->un.leaf.quantum;
Stephen Hemminger87990462006-08-10 23:35:16 -0700931 htb_next_rb_node((level ? cl->parent->un.inner.ptr : q->
932 ptr[0]) + prio);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700933 }
934 /* this used to be after charge_class but this constelation
935 gives us slightly better performance */
936 if (!cl->un.leaf.q->q.qlen)
Stephen Hemminger87990462006-08-10 23:35:16 -0700937 htb_deactivate(q, cl);
938 htb_charge_class(q, cl, level, skb->len);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700939 }
940 return skb;
941}
942
Stephen Hemminger87990462006-08-10 23:35:16 -0700943static void htb_delay_by(struct Qdisc *sch, long delay)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700944{
945 struct htb_sched *q = qdisc_priv(sch);
Stephen Hemminger87990462006-08-10 23:35:16 -0700946 if (delay <= 0)
947 delay = 1;
948 if (unlikely(delay > 5 * HZ)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700949 if (net_ratelimit())
950 printk(KERN_INFO "HTB delay %ld > 5sec\n", delay);
Stephen Hemminger87990462006-08-10 23:35:16 -0700951 delay = 5 * HZ;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700952 }
953 /* why don't use jiffies here ? because expires can be in past */
954 mod_timer(&q->timer, q->jiffies + delay);
955 sch->flags |= TCQ_F_THROTTLED;
956 sch->qstats.overlimits++;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700957}
958
959static struct sk_buff *htb_dequeue(struct Qdisc *sch)
960{
961 struct sk_buff *skb = NULL;
962 struct htb_sched *q = qdisc_priv(sch);
963 int level;
964 long min_delay;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700965
966 q->jiffies = jiffies;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700967
968 /* try to dequeue direct packets as high prio (!) to minimize cpu work */
Stephen Hemminger87990462006-08-10 23:35:16 -0700969 skb = __skb_dequeue(&q->direct_queue);
970 if (skb != NULL) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700971 sch->flags &= ~TCQ_F_THROTTLED;
972 sch->q.qlen--;
973 return skb;
974 }
975
Stephen Hemminger87990462006-08-10 23:35:16 -0700976 if (!sch->q.qlen)
977 goto fin;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700978 PSCHED_GET_TIME(q->now);
979
980 min_delay = LONG_MAX;
981 q->nwc_hit = 0;
982 for (level = 0; level < TC_HTB_MAXDEPTH; level++) {
983 /* common case optimization - skip event handler quickly */
984 int m;
985 long delay;
986 if (time_after_eq(q->jiffies, q->near_ev_cache[level])) {
Stephen Hemminger87990462006-08-10 23:35:16 -0700987 delay = htb_do_events(q, level);
988 q->near_ev_cache[level] =
989 q->jiffies + (delay ? delay : HZ);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700990 } else
Stephen Hemminger87990462006-08-10 23:35:16 -0700991 delay = q->near_ev_cache[level] - q->jiffies;
992
993 if (delay && min_delay > delay)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700994 min_delay = delay;
995 m = ~q->row_mask[level];
996 while (m != (int)(-1)) {
Stephen Hemminger87990462006-08-10 23:35:16 -0700997 int prio = ffz(m);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700998 m |= 1 << prio;
Stephen Hemminger87990462006-08-10 23:35:16 -0700999 skb = htb_dequeue_tree(q, prio, level);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001000 if (likely(skb != NULL)) {
1001 sch->q.qlen--;
1002 sch->flags &= ~TCQ_F_THROTTLED;
1003 goto fin;
1004 }
1005 }
1006 }
Stephen Hemminger87990462006-08-10 23:35:16 -07001007 htb_delay_by(sch, min_delay > 5 * HZ ? 5 * HZ : min_delay);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001008fin:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001009 return skb;
1010}
1011
1012/* try to drop from each class (by prio) until one succeed */
Stephen Hemminger87990462006-08-10 23:35:16 -07001013static unsigned int htb_drop(struct Qdisc *sch)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001014{
1015 struct htb_sched *q = qdisc_priv(sch);
1016 int prio;
1017
1018 for (prio = TC_HTB_NUMPRIO - 1; prio >= 0; prio--) {
1019 struct list_head *p;
Stephen Hemminger87990462006-08-10 23:35:16 -07001020 list_for_each(p, q->drops + prio) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001021 struct htb_class *cl = list_entry(p, struct htb_class,
1022 un.leaf.drop_list);
1023 unsigned int len;
Stephen Hemminger87990462006-08-10 23:35:16 -07001024 if (cl->un.leaf.q->ops->drop &&
1025 (len = cl->un.leaf.q->ops->drop(cl->un.leaf.q))) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001026 sch->q.qlen--;
1027 if (!cl->un.leaf.q->q.qlen)
Stephen Hemminger87990462006-08-10 23:35:16 -07001028 htb_deactivate(q, cl);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001029 return len;
1030 }
1031 }
1032 }
1033 return 0;
1034}
1035
1036/* reset all classes */
1037/* always caled under BH & queue lock */
Stephen Hemminger87990462006-08-10 23:35:16 -07001038static void htb_reset(struct Qdisc *sch)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001039{
1040 struct htb_sched *q = qdisc_priv(sch);
1041 int i;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001042
1043 for (i = 0; i < HTB_HSIZE; i++) {
1044 struct list_head *p;
Stephen Hemminger87990462006-08-10 23:35:16 -07001045 list_for_each(p, q->hash + i) {
1046 struct htb_class *cl =
1047 list_entry(p, struct htb_class, hlist);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001048 if (cl->level)
Stephen Hemminger87990462006-08-10 23:35:16 -07001049 memset(&cl->un.inner, 0, sizeof(cl->un.inner));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001050 else {
Stephen Hemminger87990462006-08-10 23:35:16 -07001051 if (cl->un.leaf.q)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001052 qdisc_reset(cl->un.leaf.q);
1053 INIT_LIST_HEAD(&cl->un.leaf.drop_list);
1054 }
1055 cl->prio_activity = 0;
1056 cl->cmode = HTB_CAN_SEND;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001057
1058 }
1059 }
1060 sch->flags &= ~TCQ_F_THROTTLED;
1061 del_timer(&q->timer);
1062 __skb_queue_purge(&q->direct_queue);
1063 sch->q.qlen = 0;
Stephen Hemminger87990462006-08-10 23:35:16 -07001064 memset(q->row, 0, sizeof(q->row));
1065 memset(q->row_mask, 0, sizeof(q->row_mask));
1066 memset(q->wait_pq, 0, sizeof(q->wait_pq));
1067 memset(q->ptr, 0, sizeof(q->ptr));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001068 for (i = 0; i < TC_HTB_NUMPRIO; i++)
Stephen Hemminger87990462006-08-10 23:35:16 -07001069 INIT_LIST_HEAD(q->drops + i);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001070}
1071
1072static int htb_init(struct Qdisc *sch, struct rtattr *opt)
1073{
1074 struct htb_sched *q = qdisc_priv(sch);
1075 struct rtattr *tb[TCA_HTB_INIT];
1076 struct tc_htb_glob *gopt;
1077 int i;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001078 if (!opt || rtattr_parse_nested(tb, TCA_HTB_INIT, opt) ||
Stephen Hemminger87990462006-08-10 23:35:16 -07001079 tb[TCA_HTB_INIT - 1] == NULL ||
1080 RTA_PAYLOAD(tb[TCA_HTB_INIT - 1]) < sizeof(*gopt)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001081 printk(KERN_ERR "HTB: hey probably you have bad tc tool ?\n");
1082 return -EINVAL;
1083 }
Stephen Hemminger87990462006-08-10 23:35:16 -07001084 gopt = RTA_DATA(tb[TCA_HTB_INIT - 1]);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001085 if (gopt->version != HTB_VER >> 16) {
Stephen Hemminger87990462006-08-10 23:35:16 -07001086 printk(KERN_ERR
1087 "HTB: need tc/htb version %d (minor is %d), you have %d\n",
1088 HTB_VER >> 16, HTB_VER & 0xffff, gopt->version);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001089 return -EINVAL;
1090 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001091
1092 INIT_LIST_HEAD(&q->root);
1093 for (i = 0; i < HTB_HSIZE; i++)
Stephen Hemminger87990462006-08-10 23:35:16 -07001094 INIT_LIST_HEAD(q->hash + i);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001095 for (i = 0; i < TC_HTB_NUMPRIO; i++)
Stephen Hemminger87990462006-08-10 23:35:16 -07001096 INIT_LIST_HEAD(q->drops + i);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001097
1098 init_timer(&q->timer);
1099 skb_queue_head_init(&q->direct_queue);
1100
1101 q->direct_qlen = sch->dev->tx_queue_len;
Stephen Hemminger87990462006-08-10 23:35:16 -07001102 if (q->direct_qlen < 2) /* some devices have zero tx_queue_len */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001103 q->direct_qlen = 2;
1104 q->timer.function = htb_timer;
1105 q->timer.data = (unsigned long)sch;
1106
1107#ifdef HTB_RATECM
1108 init_timer(&q->rttim);
1109 q->rttim.function = htb_rate_timer;
1110 q->rttim.data = (unsigned long)sch;
1111 q->rttim.expires = jiffies + HZ;
1112 add_timer(&q->rttim);
1113#endif
1114 if ((q->rate2quantum = gopt->rate2quantum) < 1)
1115 q->rate2quantum = 1;
1116 q->defcls = gopt->defcls;
1117
1118 return 0;
1119}
1120
1121static int htb_dump(struct Qdisc *sch, struct sk_buff *skb)
1122{
1123 struct htb_sched *q = qdisc_priv(sch);
Stephen Hemminger87990462006-08-10 23:35:16 -07001124 unsigned char *b = skb->tail;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001125 struct rtattr *rta;
1126 struct tc_htb_glob gopt;
Stephen Hemminger9ac961e2006-08-10 23:33:16 -07001127 spin_lock_bh(&sch->dev->queue_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001128 gopt.direct_pkts = q->direct_pkts;
1129
Linus Torvalds1da177e2005-04-16 15:20:36 -07001130 gopt.version = HTB_VER;
1131 gopt.rate2quantum = q->rate2quantum;
1132 gopt.defcls = q->defcls;
Stephen Hemminger3bf72952006-08-10 23:31:08 -07001133 gopt.debug = 0;
Stephen Hemminger87990462006-08-10 23:35:16 -07001134 rta = (struct rtattr *)b;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001135 RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
1136 RTA_PUT(skb, TCA_HTB_INIT, sizeof(gopt), &gopt);
1137 rta->rta_len = skb->tail - b;
Stephen Hemminger9ac961e2006-08-10 23:33:16 -07001138 spin_unlock_bh(&sch->dev->queue_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001139 return skb->len;
1140rtattr_failure:
Stephen Hemminger9ac961e2006-08-10 23:33:16 -07001141 spin_unlock_bh(&sch->dev->queue_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001142 skb_trim(skb, skb->tail - skb->data);
1143 return -1;
1144}
1145
1146static int htb_dump_class(struct Qdisc *sch, unsigned long arg,
Stephen Hemminger87990462006-08-10 23:35:16 -07001147 struct sk_buff *skb, struct tcmsg *tcm)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001148{
Stephen Hemminger87990462006-08-10 23:35:16 -07001149 struct htb_class *cl = (struct htb_class *)arg;
1150 unsigned char *b = skb->tail;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001151 struct rtattr *rta;
1152 struct tc_htb_opt opt;
1153
Stephen Hemminger9ac961e2006-08-10 23:33:16 -07001154 spin_lock_bh(&sch->dev->queue_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001155 tcm->tcm_parent = cl->parent ? cl->parent->classid : TC_H_ROOT;
1156 tcm->tcm_handle = cl->classid;
1157 if (!cl->level && cl->un.leaf.q)
1158 tcm->tcm_info = cl->un.leaf.q->handle;
1159
Stephen Hemminger87990462006-08-10 23:35:16 -07001160 rta = (struct rtattr *)b;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001161 RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
1162
Stephen Hemminger87990462006-08-10 23:35:16 -07001163 memset(&opt, 0, sizeof(opt));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001164
Stephen Hemminger87990462006-08-10 23:35:16 -07001165 opt.rate = cl->rate->rate;
1166 opt.buffer = cl->buffer;
1167 opt.ceil = cl->ceil->rate;
1168 opt.cbuffer = cl->cbuffer;
1169 opt.quantum = cl->un.leaf.quantum;
1170 opt.prio = cl->un.leaf.prio;
1171 opt.level = cl->level;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001172 RTA_PUT(skb, TCA_HTB_PARMS, sizeof(opt), &opt);
1173 rta->rta_len = skb->tail - b;
Stephen Hemminger9ac961e2006-08-10 23:33:16 -07001174 spin_unlock_bh(&sch->dev->queue_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001175 return skb->len;
1176rtattr_failure:
Stephen Hemminger9ac961e2006-08-10 23:33:16 -07001177 spin_unlock_bh(&sch->dev->queue_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001178 skb_trim(skb, b - skb->data);
1179 return -1;
1180}
1181
1182static int
Stephen Hemminger87990462006-08-10 23:35:16 -07001183htb_dump_class_stats(struct Qdisc *sch, unsigned long arg, struct gnet_dump *d)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001184{
Stephen Hemminger87990462006-08-10 23:35:16 -07001185 struct htb_class *cl = (struct htb_class *)arg;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001186
1187#ifdef HTB_RATECM
Stephen Hemminger87990462006-08-10 23:35:16 -07001188 cl->rate_est.bps = cl->rate_bytes / (HTB_EWMAC * HTB_HSIZE);
1189 cl->rate_est.pps = cl->rate_packets / (HTB_EWMAC * HTB_HSIZE);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001190#endif
1191
1192 if (!cl->level && cl->un.leaf.q)
1193 cl->qstats.qlen = cl->un.leaf.q->q.qlen;
1194 cl->xstats.tokens = cl->tokens;
1195 cl->xstats.ctokens = cl->ctokens;
1196
1197 if (gnet_stats_copy_basic(d, &cl->bstats) < 0 ||
1198 gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
1199 gnet_stats_copy_queue(d, &cl->qstats) < 0)
1200 return -1;
1201
1202 return gnet_stats_copy_app(d, &cl->xstats, sizeof(cl->xstats));
1203}
1204
1205static int htb_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
Stephen Hemminger87990462006-08-10 23:35:16 -07001206 struct Qdisc **old)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001207{
Stephen Hemminger87990462006-08-10 23:35:16 -07001208 struct htb_class *cl = (struct htb_class *)arg;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001209
1210 if (cl && !cl->level) {
Stephen Hemminger87990462006-08-10 23:35:16 -07001211 if (new == NULL && (new = qdisc_create_dflt(sch->dev,
1212 &pfifo_qdisc_ops))
1213 == NULL)
1214 return -ENOBUFS;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001215 sch_tree_lock(sch);
1216 if ((*old = xchg(&cl->un.leaf.q, new)) != NULL) {
1217 if (cl->prio_activity)
Stephen Hemminger87990462006-08-10 23:35:16 -07001218 htb_deactivate(qdisc_priv(sch), cl);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001219
1220 /* TODO: is it correct ? Why CBQ doesn't do it ? */
Stephen Hemminger87990462006-08-10 23:35:16 -07001221 sch->q.qlen -= (*old)->q.qlen;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001222 qdisc_reset(*old);
1223 }
1224 sch_tree_unlock(sch);
1225 return 0;
1226 }
1227 return -ENOENT;
1228}
1229
Stephen Hemminger87990462006-08-10 23:35:16 -07001230static struct Qdisc *htb_leaf(struct Qdisc *sch, unsigned long arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001231{
Stephen Hemminger87990462006-08-10 23:35:16 -07001232 struct htb_class *cl = (struct htb_class *)arg;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001233 return (cl && !cl->level) ? cl->un.leaf.q : NULL;
1234}
1235
1236static unsigned long htb_get(struct Qdisc *sch, u32 classid)
1237{
Stephen Hemminger87990462006-08-10 23:35:16 -07001238 struct htb_class *cl = htb_find(classid, sch);
1239 if (cl)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001240 cl->refcnt++;
1241 return (unsigned long)cl;
1242}
1243
1244static void htb_destroy_filters(struct tcf_proto **fl)
1245{
1246 struct tcf_proto *tp;
1247
1248 while ((tp = *fl) != NULL) {
1249 *fl = tp->next;
1250 tcf_destroy(tp);
1251 }
1252}
1253
Stephen Hemminger87990462006-08-10 23:35:16 -07001254static void htb_destroy_class(struct Qdisc *sch, struct htb_class *cl)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001255{
1256 struct htb_sched *q = qdisc_priv(sch);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001257 if (!cl->level) {
1258 BUG_TRAP(cl->un.leaf.q);
1259 sch->q.qlen -= cl->un.leaf.q->q.qlen;
1260 qdisc_destroy(cl->un.leaf.q);
1261 }
1262 qdisc_put_rtab(cl->rate);
1263 qdisc_put_rtab(cl->ceil);
Stephen Hemminger87990462006-08-10 23:35:16 -07001264
1265 htb_destroy_filters(&cl->filter_list);
1266
1267 while (!list_empty(&cl->children))
1268 htb_destroy_class(sch, list_entry(cl->children.next,
1269 struct htb_class, sibling));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001270
1271 /* note: this delete may happen twice (see htb_delete) */
1272 list_del(&cl->hlist);
1273 list_del(&cl->sibling);
Stephen Hemminger87990462006-08-10 23:35:16 -07001274
Linus Torvalds1da177e2005-04-16 15:20:36 -07001275 if (cl->prio_activity)
Stephen Hemminger87990462006-08-10 23:35:16 -07001276 htb_deactivate(q, cl);
1277
Linus Torvalds1da177e2005-04-16 15:20:36 -07001278 if (cl->cmode != HTB_CAN_SEND)
Stephen Hemminger87990462006-08-10 23:35:16 -07001279 rb_erase(&cl->pq_node, q->wait_pq + cl->level);
1280
Linus Torvalds1da177e2005-04-16 15:20:36 -07001281 kfree(cl);
1282}
1283
1284/* always caled under BH & queue lock */
Stephen Hemminger87990462006-08-10 23:35:16 -07001285static void htb_destroy(struct Qdisc *sch)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001286{
1287 struct htb_sched *q = qdisc_priv(sch);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001288
Stephen Hemminger87990462006-08-10 23:35:16 -07001289 del_timer_sync(&q->timer);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001290#ifdef HTB_RATECM
Stephen Hemminger87990462006-08-10 23:35:16 -07001291 del_timer_sync(&q->rttim);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001292#endif
1293 /* This line used to be after htb_destroy_class call below
1294 and surprisingly it worked in 2.4. But it must precede it
1295 because filter need its target class alive to be able to call
1296 unbind_filter on it (without Oops). */
1297 htb_destroy_filters(&q->filter_list);
Stephen Hemminger87990462006-08-10 23:35:16 -07001298
1299 while (!list_empty(&q->root))
1300 htb_destroy_class(sch, list_entry(q->root.next,
1301 struct htb_class, sibling));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001302
1303 __skb_queue_purge(&q->direct_queue);
1304}
1305
1306static int htb_delete(struct Qdisc *sch, unsigned long arg)
1307{
1308 struct htb_sched *q = qdisc_priv(sch);
Stephen Hemminger87990462006-08-10 23:35:16 -07001309 struct htb_class *cl = (struct htb_class *)arg;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001310
1311 // TODO: why don't allow to delete subtree ? references ? does
1312 // tc subsys quarantee us that in htb_destroy it holds no class
1313 // refs so that we can remove children safely there ?
1314 if (!list_empty(&cl->children) || cl->filter_cnt)
1315 return -EBUSY;
Stephen Hemminger87990462006-08-10 23:35:16 -07001316
Linus Torvalds1da177e2005-04-16 15:20:36 -07001317 sch_tree_lock(sch);
Stephen Hemminger87990462006-08-10 23:35:16 -07001318
Linus Torvalds1da177e2005-04-16 15:20:36 -07001319 /* delete from hash and active; remainder in destroy_class */
1320 list_del_init(&cl->hlist);
1321 if (cl->prio_activity)
Stephen Hemminger87990462006-08-10 23:35:16 -07001322 htb_deactivate(q, cl);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001323
1324 if (--cl->refcnt == 0)
Stephen Hemminger87990462006-08-10 23:35:16 -07001325 htb_destroy_class(sch, cl);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001326
1327 sch_tree_unlock(sch);
1328 return 0;
1329}
1330
1331static void htb_put(struct Qdisc *sch, unsigned long arg)
1332{
Stephen Hemminger87990462006-08-10 23:35:16 -07001333 struct htb_class *cl = (struct htb_class *)arg;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001334
1335 if (--cl->refcnt == 0)
Stephen Hemminger87990462006-08-10 23:35:16 -07001336 htb_destroy_class(sch, cl);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001337}
1338
Stephen Hemminger87990462006-08-10 23:35:16 -07001339static int htb_change_class(struct Qdisc *sch, u32 classid,
1340 u32 parentid, struct rtattr **tca,
1341 unsigned long *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001342{
1343 int err = -EINVAL;
1344 struct htb_sched *q = qdisc_priv(sch);
Stephen Hemminger87990462006-08-10 23:35:16 -07001345 struct htb_class *cl = (struct htb_class *)*arg, *parent;
1346 struct rtattr *opt = tca[TCA_OPTIONS - 1];
Linus Torvalds1da177e2005-04-16 15:20:36 -07001347 struct qdisc_rate_table *rtab = NULL, *ctab = NULL;
1348 struct rtattr *tb[TCA_HTB_RTAB];
1349 struct tc_htb_opt *hopt;
1350
1351 /* extract all subattrs from opt attr */
1352 if (!opt || rtattr_parse_nested(tb, TCA_HTB_RTAB, opt) ||
Stephen Hemminger87990462006-08-10 23:35:16 -07001353 tb[TCA_HTB_PARMS - 1] == NULL ||
1354 RTA_PAYLOAD(tb[TCA_HTB_PARMS - 1]) < sizeof(*hopt))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001355 goto failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001356
Stephen Hemminger87990462006-08-10 23:35:16 -07001357 parent = parentid == TC_H_ROOT ? NULL : htb_find(parentid, sch);
Stephen Hemminger3bf72952006-08-10 23:31:08 -07001358
Stephen Hemminger87990462006-08-10 23:35:16 -07001359 hopt = RTA_DATA(tb[TCA_HTB_PARMS - 1]);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001360
Stephen Hemminger87990462006-08-10 23:35:16 -07001361 rtab = qdisc_get_rtab(&hopt->rate, tb[TCA_HTB_RTAB - 1]);
1362 ctab = qdisc_get_rtab(&hopt->ceil, tb[TCA_HTB_CTAB - 1]);
1363 if (!rtab || !ctab)
1364 goto failure;
1365
1366 if (!cl) { /* new class */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001367 struct Qdisc *new_q;
1368 /* check for valid classid */
Stephen Hemminger87990462006-08-10 23:35:16 -07001369 if (!classid || TC_H_MAJ(classid ^ sch->handle)
1370 || htb_find(classid, sch))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001371 goto failure;
1372
1373 /* check maximal depth */
1374 if (parent && parent->parent && parent->parent->level < 2) {
1375 printk(KERN_ERR "htb: tree is too deep\n");
1376 goto failure;
1377 }
1378 err = -ENOBUFS;
Panagiotis Issaris0da974f2006-07-21 14:51:30 -07001379 if ((cl = kzalloc(sizeof(*cl), GFP_KERNEL)) == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001380 goto failure;
Stephen Hemminger87990462006-08-10 23:35:16 -07001381
Linus Torvalds1da177e2005-04-16 15:20:36 -07001382 cl->refcnt = 1;
1383 INIT_LIST_HEAD(&cl->sibling);
1384 INIT_LIST_HEAD(&cl->hlist);
1385 INIT_LIST_HEAD(&cl->children);
1386 INIT_LIST_HEAD(&cl->un.leaf.drop_list);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001387
1388 /* create leaf qdisc early because it uses kmalloc(GFP_KERNEL)
1389 so that can't be used inside of sch_tree_lock
1390 -- thanks to Karlis Peisenieks */
1391 new_q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops);
1392 sch_tree_lock(sch);
1393 if (parent && !parent->level) {
1394 /* turn parent into inner node */
1395 sch->q.qlen -= parent->un.leaf.q->q.qlen;
Stephen Hemminger87990462006-08-10 23:35:16 -07001396 qdisc_destroy(parent->un.leaf.q);
1397 if (parent->prio_activity)
1398 htb_deactivate(q, parent);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001399
1400 /* remove from evt list because of level change */
1401 if (parent->cmode != HTB_CAN_SEND) {
Stephen Hemminger87990462006-08-10 23:35:16 -07001402 rb_erase(&parent->pq_node, q->wait_pq);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001403 parent->cmode = HTB_CAN_SEND;
1404 }
1405 parent->level = (parent->parent ? parent->parent->level
Stephen Hemminger87990462006-08-10 23:35:16 -07001406 : TC_HTB_MAXDEPTH) - 1;
1407 memset(&parent->un.inner, 0, sizeof(parent->un.inner));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001408 }
1409 /* leaf (we) needs elementary qdisc */
1410 cl->un.leaf.q = new_q ? new_q : &noop_qdisc;
1411
Stephen Hemminger87990462006-08-10 23:35:16 -07001412 cl->classid = classid;
1413 cl->parent = parent;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001414
1415 /* set class to be in HTB_CAN_SEND state */
1416 cl->tokens = hopt->buffer;
1417 cl->ctokens = hopt->cbuffer;
Stephen Hemminger87990462006-08-10 23:35:16 -07001418 cl->mbuffer = PSCHED_JIFFIE2US(HZ * 60); /* 1min */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001419 PSCHED_GET_TIME(cl->t_c);
1420 cl->cmode = HTB_CAN_SEND;
1421
1422 /* attach to the hash list and parent's family */
Stephen Hemminger87990462006-08-10 23:35:16 -07001423 list_add_tail(&cl->hlist, q->hash + htb_hash(classid));
1424 list_add_tail(&cl->sibling,
1425 parent ? &parent->children : &q->root);
1426 } else
1427 sch_tree_lock(sch);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001428
1429 /* it used to be a nasty bug here, we have to check that node
Stephen Hemminger87990462006-08-10 23:35:16 -07001430 is really leaf before changing cl->un.leaf ! */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001431 if (!cl->level) {
1432 cl->un.leaf.quantum = rtab->rate.rate / q->rate2quantum;
1433 if (!hopt->quantum && cl->un.leaf.quantum < 1000) {
Stephen Hemminger87990462006-08-10 23:35:16 -07001434 printk(KERN_WARNING
1435 "HTB: quantum of class %X is small. Consider r2q change.\n",
1436 cl->classid);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001437 cl->un.leaf.quantum = 1000;
1438 }
1439 if (!hopt->quantum && cl->un.leaf.quantum > 200000) {
Stephen Hemminger87990462006-08-10 23:35:16 -07001440 printk(KERN_WARNING
1441 "HTB: quantum of class %X is big. Consider r2q change.\n",
1442 cl->classid);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001443 cl->un.leaf.quantum = 200000;
1444 }
1445 if (hopt->quantum)
1446 cl->un.leaf.quantum = hopt->quantum;
1447 if ((cl->un.leaf.prio = hopt->prio) >= TC_HTB_NUMPRIO)
1448 cl->un.leaf.prio = TC_HTB_NUMPRIO - 1;
1449 }
1450
1451 cl->buffer = hopt->buffer;
1452 cl->cbuffer = hopt->cbuffer;
Stephen Hemminger87990462006-08-10 23:35:16 -07001453 if (cl->rate)
1454 qdisc_put_rtab(cl->rate);
1455 cl->rate = rtab;
1456 if (cl->ceil)
1457 qdisc_put_rtab(cl->ceil);
1458 cl->ceil = ctab;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001459 sch_tree_unlock(sch);
1460
1461 *arg = (unsigned long)cl;
1462 return 0;
1463
1464failure:
Stephen Hemminger87990462006-08-10 23:35:16 -07001465 if (rtab)
1466 qdisc_put_rtab(rtab);
1467 if (ctab)
1468 qdisc_put_rtab(ctab);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001469 return err;
1470}
1471
1472static struct tcf_proto **htb_find_tcf(struct Qdisc *sch, unsigned long arg)
1473{
1474 struct htb_sched *q = qdisc_priv(sch);
1475 struct htb_class *cl = (struct htb_class *)arg;
1476 struct tcf_proto **fl = cl ? &cl->filter_list : &q->filter_list;
Stephen Hemminger3bf72952006-08-10 23:31:08 -07001477
Linus Torvalds1da177e2005-04-16 15:20:36 -07001478 return fl;
1479}
1480
1481static unsigned long htb_bind_filter(struct Qdisc *sch, unsigned long parent,
Stephen Hemminger87990462006-08-10 23:35:16 -07001482 u32 classid)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001483{
1484 struct htb_sched *q = qdisc_priv(sch);
Stephen Hemminger87990462006-08-10 23:35:16 -07001485 struct htb_class *cl = htb_find(classid, sch);
Stephen Hemminger3bf72952006-08-10 23:31:08 -07001486
Linus Torvalds1da177e2005-04-16 15:20:36 -07001487 /*if (cl && !cl->level) return 0;
Stephen Hemminger87990462006-08-10 23:35:16 -07001488 The line above used to be there to prevent attaching filters to
1489 leaves. But at least tc_index filter uses this just to get class
1490 for other reasons so that we have to allow for it.
1491 ----
1492 19.6.2002 As Werner explained it is ok - bind filter is just
1493 another way to "lock" the class - unlike "get" this lock can
1494 be broken by class during destroy IIUC.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001495 */
Stephen Hemminger87990462006-08-10 23:35:16 -07001496 if (cl)
1497 cl->filter_cnt++;
1498 else
Linus Torvalds1da177e2005-04-16 15:20:36 -07001499 q->filter_cnt++;
1500 return (unsigned long)cl;
1501}
1502
1503static void htb_unbind_filter(struct Qdisc *sch, unsigned long arg)
1504{
1505 struct htb_sched *q = qdisc_priv(sch);
1506 struct htb_class *cl = (struct htb_class *)arg;
Stephen Hemminger3bf72952006-08-10 23:31:08 -07001507
Stephen Hemminger87990462006-08-10 23:35:16 -07001508 if (cl)
1509 cl->filter_cnt--;
1510 else
Linus Torvalds1da177e2005-04-16 15:20:36 -07001511 q->filter_cnt--;
1512}
1513
1514static void htb_walk(struct Qdisc *sch, struct qdisc_walker *arg)
1515{
1516 struct htb_sched *q = qdisc_priv(sch);
1517 int i;
1518
1519 if (arg->stop)
1520 return;
1521
1522 for (i = 0; i < HTB_HSIZE; i++) {
1523 struct list_head *p;
Stephen Hemminger87990462006-08-10 23:35:16 -07001524 list_for_each(p, q->hash + i) {
1525 struct htb_class *cl =
1526 list_entry(p, struct htb_class, hlist);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001527 if (arg->count < arg->skip) {
1528 arg->count++;
1529 continue;
1530 }
1531 if (arg->fn(sch, (unsigned long)cl, arg) < 0) {
1532 arg->stop = 1;
1533 return;
1534 }
1535 arg->count++;
1536 }
1537 }
1538}
1539
1540static struct Qdisc_class_ops htb_class_ops = {
1541 .graft = htb_graft,
1542 .leaf = htb_leaf,
1543 .get = htb_get,
1544 .put = htb_put,
1545 .change = htb_change_class,
1546 .delete = htb_delete,
1547 .walk = htb_walk,
1548 .tcf_chain = htb_find_tcf,
1549 .bind_tcf = htb_bind_filter,
1550 .unbind_tcf = htb_unbind_filter,
1551 .dump = htb_dump_class,
1552 .dump_stats = htb_dump_class_stats,
1553};
1554
1555static struct Qdisc_ops htb_qdisc_ops = {
1556 .next = NULL,
1557 .cl_ops = &htb_class_ops,
1558 .id = "htb",
1559 .priv_size = sizeof(struct htb_sched),
1560 .enqueue = htb_enqueue,
1561 .dequeue = htb_dequeue,
1562 .requeue = htb_requeue,
1563 .drop = htb_drop,
1564 .init = htb_init,
1565 .reset = htb_reset,
1566 .destroy = htb_destroy,
1567 .change = NULL /* htb_change */,
1568 .dump = htb_dump,
1569 .owner = THIS_MODULE,
1570};
1571
1572static int __init htb_module_init(void)
1573{
Stephen Hemminger87990462006-08-10 23:35:16 -07001574 return register_qdisc(&htb_qdisc_ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001575}
Stephen Hemminger87990462006-08-10 23:35:16 -07001576static void __exit htb_module_exit(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001577{
Stephen Hemminger87990462006-08-10 23:35:16 -07001578 unregister_qdisc(&htb_qdisc_ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001579}
Stephen Hemminger87990462006-08-10 23:35:16 -07001580
Linus Torvalds1da177e2005-04-16 15:20:36 -07001581module_init(htb_module_init)
1582module_exit(htb_module_exit)
1583MODULE_LICENSE("GPL");