blob: a686b9511b054b765c164a1c0d9cb91117bac1c8 [file] [log] [blame]
Stephen Hemminger87990462006-08-10 23:35:16 -07001/*
Linus Torvalds1da177e2005-04-16 15:20:36 -07002 * net/sched/sch_htb.c Hierarchical token bucket, feed tree version
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 *
9 * Authors: Martin Devera, <devik@cdi.cz>
10 *
11 * Credits (in time order) for older HTB versions:
12 * Stef Coene <stef.coene@docum.org>
13 * HTB support at LARTC mailing list
14 * Ondrej Kraus, <krauso@barr.cz>
15 * found missing INIT_QDISC(htb)
16 * Vladimir Smelhaus, Aamer Akhter, Bert Hubert
17 * helped a lot to locate nasty class stall bug
18 * Andi Kleen, Jamal Hadi, Bert Hubert
19 * code review and helpful comments on shaping
20 * Tomasz Wrona, <tw@eter.tym.pl>
21 * created test case so that I was able to fix nasty bug
22 * Wilfried Weissmann
23 * spotted bug in dequeue code and helped with fix
24 * Jiri Fojtasek
25 * fixed requeue routine
26 * and many others. thanks.
27 *
28 * $Id: sch_htb.c,v 1.25 2003/12/07 11:08:25 devik Exp devik $
29 */
Linus Torvalds1da177e2005-04-16 15:20:36 -070030#include <linux/module.h>
31#include <asm/uaccess.h>
32#include <asm/system.h>
33#include <linux/bitops.h>
34#include <linux/types.h>
35#include <linux/kernel.h>
36#include <linux/sched.h>
37#include <linux/string.h>
38#include <linux/mm.h>
39#include <linux/socket.h>
40#include <linux/sockios.h>
41#include <linux/in.h>
42#include <linux/errno.h>
43#include <linux/interrupt.h>
44#include <linux/if_ether.h>
45#include <linux/inet.h>
46#include <linux/netdevice.h>
47#include <linux/etherdevice.h>
48#include <linux/notifier.h>
49#include <net/ip.h>
50#include <net/route.h>
51#include <linux/skbuff.h>
52#include <linux/list.h>
53#include <linux/compiler.h>
54#include <net/sock.h>
55#include <net/pkt_sched.h>
56#include <linux/rbtree.h>
57
58/* HTB algorithm.
59 Author: devik@cdi.cz
60 ========================================================================
61 HTB is like TBF with multiple classes. It is also similar to CBQ because
62 it allows to assign priority to each class in hierarchy.
63 In fact it is another implementation of Floyd's formal sharing.
64
65 Levels:
66 Each class is assigned level. Leaf has ALWAYS level 0 and root
67 classes have level TC_HTB_MAXDEPTH-1. Interior nodes has level
68 one less than their parent.
69*/
70
Stephen Hemminger87990462006-08-10 23:35:16 -070071#define HTB_HSIZE 16 /* classid hash size */
72#define HTB_EWMAC 2 /* rate average over HTB_EWMAC*HTB_HSIZE sec */
73#define HTB_RATECM 1 /* whether to use rate computer */
74#define HTB_HYSTERESIS 1 /* whether to use mode hysteresis for speedup */
75#define HTB_VER 0x30011 /* major must be matched with number suplied by TC as version */
Linus Torvalds1da177e2005-04-16 15:20:36 -070076
77#if HTB_VER >> 16 != TC_HTB_PROTOVER
78#error "Mismatched sch_htb.c and pkt_sch.h"
79#endif
80
Linus Torvalds1da177e2005-04-16 15:20:36 -070081/* used internaly to keep status of single class */
82enum htb_cmode {
Stephen Hemminger87990462006-08-10 23:35:16 -070083 HTB_CANT_SEND, /* class can't send and can't borrow */
84 HTB_MAY_BORROW, /* class can't send but may borrow */
85 HTB_CAN_SEND /* class can send */
Linus Torvalds1da177e2005-04-16 15:20:36 -070086};
87
88/* interior & leaf nodes; props specific to leaves are marked L: */
Stephen Hemminger87990462006-08-10 23:35:16 -070089struct htb_class {
90 /* general class parameters */
91 u32 classid;
92 struct gnet_stats_basic bstats;
93 struct gnet_stats_queue qstats;
94 struct gnet_stats_rate_est rate_est;
95 struct tc_htb_xstats xstats; /* our special stats */
96 int refcnt; /* usage count of this class */
Linus Torvalds1da177e2005-04-16 15:20:36 -070097
98#ifdef HTB_RATECM
Stephen Hemminger87990462006-08-10 23:35:16 -070099 /* rate measurement counters */
100 unsigned long rate_bytes, sum_bytes;
101 unsigned long rate_packets, sum_packets;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700102#endif
103
Stephen Hemminger87990462006-08-10 23:35:16 -0700104 /* topology */
105 int level; /* our level (see above) */
106 struct htb_class *parent; /* parent class */
Stephen Hemminger0cef2962006-08-10 23:35:38 -0700107 struct hlist_node hlist; /* classid hash list item */
Stephen Hemminger87990462006-08-10 23:35:16 -0700108 struct list_head sibling; /* sibling list item */
109 struct list_head children; /* children list */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700110
Stephen Hemminger87990462006-08-10 23:35:16 -0700111 union {
112 struct htb_class_leaf {
113 struct Qdisc *q;
114 int prio;
115 int aprio;
116 int quantum;
117 int deficit[TC_HTB_MAXDEPTH];
118 struct list_head drop_list;
119 } leaf;
120 struct htb_class_inner {
121 struct rb_root feed[TC_HTB_NUMPRIO]; /* feed trees */
122 struct rb_node *ptr[TC_HTB_NUMPRIO]; /* current class ptr */
123 /* When class changes from state 1->2 and disconnects from
124 parent's feed then we lost ptr value and start from the
125 first child again. Here we store classid of the
126 last valid ptr (used when ptr is NULL). */
127 u32 last_ptr_id[TC_HTB_NUMPRIO];
128 } inner;
129 } un;
130 struct rb_node node[TC_HTB_NUMPRIO]; /* node for self or feed tree */
131 struct rb_node pq_node; /* node for event queue */
132 unsigned long pq_key; /* the same type as jiffies global */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700133
Stephen Hemminger87990462006-08-10 23:35:16 -0700134 int prio_activity; /* for which prios are we active */
135 enum htb_cmode cmode; /* current mode of the class */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700136
Stephen Hemminger87990462006-08-10 23:35:16 -0700137 /* class attached filters */
138 struct tcf_proto *filter_list;
139 int filter_cnt;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700140
Stephen Hemminger87990462006-08-10 23:35:16 -0700141 int warned; /* only one warning about non work conserving .. */
142
143 /* token bucket parameters */
144 struct qdisc_rate_table *rate; /* rate table of the class itself */
145 struct qdisc_rate_table *ceil; /* ceiling rate (limits borrows too) */
146 long buffer, cbuffer; /* token bucket depth/rate */
147 psched_tdiff_t mbuffer; /* max wait time */
148 long tokens, ctokens; /* current number of tokens */
149 psched_time_t t_c; /* checkpoint time */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700150};
151
152/* TODO: maybe compute rate when size is too large .. or drop ? */
Stephen Hemminger87990462006-08-10 23:35:16 -0700153static inline long L2T(struct htb_class *cl, struct qdisc_rate_table *rate,
154 int size)
155{
156 int slot = size >> rate->rate.cell_log;
157 if (slot > 255) {
158 cl->xstats.giants++;
159 slot = 255;
160 }
161 return rate->data[slot];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700162}
163
Stephen Hemminger87990462006-08-10 23:35:16 -0700164struct htb_sched {
165 struct list_head root; /* root classes list */
Stephen Hemminger0cef2962006-08-10 23:35:38 -0700166 struct hlist_head hash[HTB_HSIZE]; /* hashed by classid */
167 struct list_head drops[TC_HTB_NUMPRIO];/* active leaves (for drops) */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700168
Stephen Hemminger87990462006-08-10 23:35:16 -0700169 /* self list - roots of self generating tree */
170 struct rb_root row[TC_HTB_MAXDEPTH][TC_HTB_NUMPRIO];
171 int row_mask[TC_HTB_MAXDEPTH];
172 struct rb_node *ptr[TC_HTB_MAXDEPTH][TC_HTB_NUMPRIO];
173 u32 last_ptr_id[TC_HTB_MAXDEPTH][TC_HTB_NUMPRIO];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700174
Stephen Hemminger87990462006-08-10 23:35:16 -0700175 /* self wait list - roots of wait PQs per row */
176 struct rb_root wait_pq[TC_HTB_MAXDEPTH];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700177
Stephen Hemminger87990462006-08-10 23:35:16 -0700178 /* time of nearest event per level (row) */
179 unsigned long near_ev_cache[TC_HTB_MAXDEPTH];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700180
Stephen Hemminger87990462006-08-10 23:35:16 -0700181 /* cached value of jiffies in dequeue */
182 unsigned long jiffies;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700183
Stephen Hemminger87990462006-08-10 23:35:16 -0700184 /* whether we hit non-work conserving class during this dequeue; we use */
185 int nwc_hit; /* this to disable mindelay complaint in dequeue */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700186
Stephen Hemminger87990462006-08-10 23:35:16 -0700187 int defcls; /* class where unclassified flows go to */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700188
Stephen Hemminger87990462006-08-10 23:35:16 -0700189 /* filters for qdisc itself */
190 struct tcf_proto *filter_list;
191 int filter_cnt;
192
193 int rate2quantum; /* quant = rate / rate2quantum */
194 psched_time_t now; /* cached dequeue time */
195 struct timer_list timer; /* send delay timer */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700196#ifdef HTB_RATECM
Stephen Hemminger87990462006-08-10 23:35:16 -0700197 struct timer_list rttim; /* rate computer timer */
198 int recmp_bucket; /* which hash bucket to recompute next */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700199#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -0700200
Stephen Hemminger87990462006-08-10 23:35:16 -0700201 /* non shaped skbs; let them go directly thru */
202 struct sk_buff_head direct_queue;
203 int direct_qlen; /* max qlen of above */
204
205 long direct_pkts;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700206};
207
208/* compute hash of size HTB_HSIZE for given handle */
Stephen Hemminger87990462006-08-10 23:35:16 -0700209static inline int htb_hash(u32 h)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700210{
211#if HTB_HSIZE != 16
Stephen Hemminger87990462006-08-10 23:35:16 -0700212#error "Declare new hash for your HTB_HSIZE"
Linus Torvalds1da177e2005-04-16 15:20:36 -0700213#endif
Stephen Hemminger87990462006-08-10 23:35:16 -0700214 h ^= h >> 8; /* stolen from cbq_hash */
215 h ^= h >> 4;
216 return h & 0xf;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700217}
218
219/* find class in global hash table using given handle */
Stephen Hemminger87990462006-08-10 23:35:16 -0700220static inline struct htb_class *htb_find(u32 handle, struct Qdisc *sch)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700221{
222 struct htb_sched *q = qdisc_priv(sch);
Stephen Hemminger0cef2962006-08-10 23:35:38 -0700223 struct hlist_node *p;
224 struct htb_class *cl;
225
Stephen Hemminger87990462006-08-10 23:35:16 -0700226 if (TC_H_MAJ(handle) != sch->handle)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700227 return NULL;
Stephen Hemminger87990462006-08-10 23:35:16 -0700228
Stephen Hemminger0cef2962006-08-10 23:35:38 -0700229 hlist_for_each_entry(cl, p, q->hash + htb_hash(handle), hlist) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700230 if (cl->classid == handle)
231 return cl;
232 }
233 return NULL;
234}
235
236/**
237 * htb_classify - classify a packet into class
238 *
239 * It returns NULL if the packet should be dropped or -1 if the packet
240 * should be passed directly thru. In all other cases leaf class is returned.
241 * We allow direct class selection by classid in priority. The we examine
242 * filters in qdisc and in inner nodes (if higher filter points to the inner
243 * node). If we end up with classid MAJOR:0 we enqueue the skb into special
244 * internal fifo (direct). These packets then go directly thru. If we still
245 * have no valid leaf we try to use MAJOR:default leaf. It still unsuccessfull
246 * then finish and return direct queue.
247 */
248#define HTB_DIRECT (struct htb_class*)-1
249static inline u32 htb_classid(struct htb_class *cl)
250{
251 return (cl && cl != HTB_DIRECT) ? cl->classid : TC_H_UNSPEC;
252}
253
Stephen Hemminger87990462006-08-10 23:35:16 -0700254static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch,
255 int *qerr)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700256{
257 struct htb_sched *q = qdisc_priv(sch);
258 struct htb_class *cl;
259 struct tcf_result res;
260 struct tcf_proto *tcf;
261 int result;
262
263 /* allow to select class by setting skb->priority to valid classid;
264 note that nfmark can be used too by attaching filter fw with no
265 rules in it */
266 if (skb->priority == sch->handle)
Stephen Hemminger87990462006-08-10 23:35:16 -0700267 return HTB_DIRECT; /* X:0 (direct flow) selected */
268 if ((cl = htb_find(skb->priority, sch)) != NULL && cl->level == 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700269 return cl;
270
Jamal Hadi Salim29f1df62006-01-08 22:35:55 -0800271 *qerr = NET_XMIT_BYPASS;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700272 tcf = q->filter_list;
273 while (tcf && (result = tc_classify(skb, tcf, &res)) >= 0) {
274#ifdef CONFIG_NET_CLS_ACT
275 switch (result) {
276 case TC_ACT_QUEUED:
Stephen Hemminger87990462006-08-10 23:35:16 -0700277 case TC_ACT_STOLEN:
Linus Torvalds1da177e2005-04-16 15:20:36 -0700278 *qerr = NET_XMIT_SUCCESS;
279 case TC_ACT_SHOT:
280 return NULL;
281 }
282#elif defined(CONFIG_NET_CLS_POLICE)
283 if (result == TC_POLICE_SHOT)
284 return HTB_DIRECT;
285#endif
Stephen Hemminger87990462006-08-10 23:35:16 -0700286 if ((cl = (void *)res.class) == NULL) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700287 if (res.classid == sch->handle)
Stephen Hemminger87990462006-08-10 23:35:16 -0700288 return HTB_DIRECT; /* X:0 (direct flow) */
289 if ((cl = htb_find(res.classid, sch)) == NULL)
290 break; /* filter selected invalid classid */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700291 }
292 if (!cl->level)
Stephen Hemminger87990462006-08-10 23:35:16 -0700293 return cl; /* we hit leaf; return it */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700294
295 /* we have got inner class; apply inner filter chain */
296 tcf = cl->filter_list;
297 }
298 /* classification failed; try to use default class */
Stephen Hemminger87990462006-08-10 23:35:16 -0700299 cl = htb_find(TC_H_MAKE(TC_H_MAJ(sch->handle), q->defcls), sch);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700300 if (!cl || cl->level)
Stephen Hemminger87990462006-08-10 23:35:16 -0700301 return HTB_DIRECT; /* bad default .. this is safe bet */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700302 return cl;
303}
304
Linus Torvalds1da177e2005-04-16 15:20:36 -0700305/**
306 * htb_add_to_id_tree - adds class to the round robin list
307 *
308 * Routine adds class to the list (actually tree) sorted by classid.
309 * Make sure that class is not already on such list for given prio.
310 */
Stephen Hemminger87990462006-08-10 23:35:16 -0700311static void htb_add_to_id_tree(struct rb_root *root,
312 struct htb_class *cl, int prio)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700313{
314 struct rb_node **p = &root->rb_node, *parent = NULL;
Stephen Hemminger3bf72952006-08-10 23:31:08 -0700315
Linus Torvalds1da177e2005-04-16 15:20:36 -0700316 while (*p) {
Stephen Hemminger87990462006-08-10 23:35:16 -0700317 struct htb_class *c;
318 parent = *p;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700319 c = rb_entry(parent, struct htb_class, node[prio]);
Stephen Hemminger3bf72952006-08-10 23:31:08 -0700320
Linus Torvalds1da177e2005-04-16 15:20:36 -0700321 if (cl->classid > c->classid)
322 p = &parent->rb_right;
Stephen Hemminger87990462006-08-10 23:35:16 -0700323 else
Linus Torvalds1da177e2005-04-16 15:20:36 -0700324 p = &parent->rb_left;
325 }
326 rb_link_node(&cl->node[prio], parent, p);
327 rb_insert_color(&cl->node[prio], root);
328}
329
330/**
331 * htb_add_to_wait_tree - adds class to the event queue with delay
332 *
333 * The class is added to priority event queue to indicate that class will
334 * change its mode in cl->pq_key microseconds. Make sure that class is not
335 * already in the queue.
336 */
Stephen Hemminger87990462006-08-10 23:35:16 -0700337static void htb_add_to_wait_tree(struct htb_sched *q,
338 struct htb_class *cl, long delay)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700339{
340 struct rb_node **p = &q->wait_pq[cl->level].rb_node, *parent = NULL;
Stephen Hemminger3bf72952006-08-10 23:31:08 -0700341
Linus Torvalds1da177e2005-04-16 15:20:36 -0700342 cl->pq_key = q->jiffies + PSCHED_US2JIFFIE(delay);
343 if (cl->pq_key == q->jiffies)
344 cl->pq_key++;
345
346 /* update the nearest event cache */
347 if (time_after(q->near_ev_cache[cl->level], cl->pq_key))
348 q->near_ev_cache[cl->level] = cl->pq_key;
Stephen Hemminger87990462006-08-10 23:35:16 -0700349
Linus Torvalds1da177e2005-04-16 15:20:36 -0700350 while (*p) {
Stephen Hemminger87990462006-08-10 23:35:16 -0700351 struct htb_class *c;
352 parent = *p;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700353 c = rb_entry(parent, struct htb_class, pq_node);
354 if (time_after_eq(cl->pq_key, c->pq_key))
355 p = &parent->rb_right;
Stephen Hemminger87990462006-08-10 23:35:16 -0700356 else
Linus Torvalds1da177e2005-04-16 15:20:36 -0700357 p = &parent->rb_left;
358 }
359 rb_link_node(&cl->pq_node, parent, p);
360 rb_insert_color(&cl->pq_node, &q->wait_pq[cl->level]);
361}
362
363/**
364 * htb_next_rb_node - finds next node in binary tree
365 *
366 * When we are past last key we return NULL.
367 * Average complexity is 2 steps per call.
368 */
369static void htb_next_rb_node(struct rb_node **n)
370{
371 *n = rb_next(*n);
372}
373
374/**
375 * htb_add_class_to_row - add class to its row
376 *
377 * The class is added to row at priorities marked in mask.
378 * It does nothing if mask == 0.
379 */
Stephen Hemminger87990462006-08-10 23:35:16 -0700380static inline void htb_add_class_to_row(struct htb_sched *q,
381 struct htb_class *cl, int mask)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700382{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700383 q->row_mask[cl->level] |= mask;
384 while (mask) {
385 int prio = ffz(~mask);
386 mask &= ~(1 << prio);
Stephen Hemminger87990462006-08-10 23:35:16 -0700387 htb_add_to_id_tree(q->row[cl->level] + prio, cl, prio);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700388 }
389}
390
391/**
392 * htb_remove_class_from_row - removes class from its row
393 *
394 * The class is removed from row at priorities marked in mask.
395 * It does nothing if mask == 0.
396 */
Stephen Hemminger87990462006-08-10 23:35:16 -0700397static inline void htb_remove_class_from_row(struct htb_sched *q,
398 struct htb_class *cl, int mask)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700399{
400 int m = 0;
Stephen Hemminger3bf72952006-08-10 23:31:08 -0700401
Linus Torvalds1da177e2005-04-16 15:20:36 -0700402 while (mask) {
403 int prio = ffz(~mask);
404 mask &= ~(1 << prio);
Stephen Hemminger87990462006-08-10 23:35:16 -0700405 if (q->ptr[cl->level][prio] == cl->node + prio)
406 htb_next_rb_node(q->ptr[cl->level] + prio);
407 rb_erase(cl->node + prio, q->row[cl->level] + prio);
408 if (!q->row[cl->level][prio].rb_node)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700409 m |= 1 << prio;
410 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700411 q->row_mask[cl->level] &= ~m;
412}
413
414/**
415 * htb_activate_prios - creates active classe's feed chain
416 *
417 * The class is connected to ancestors and/or appropriate rows
418 * for priorities it is participating on. cl->cmode must be new
419 * (activated) mode. It does nothing if cl->prio_activity == 0.
420 */
Stephen Hemminger87990462006-08-10 23:35:16 -0700421static void htb_activate_prios(struct htb_sched *q, struct htb_class *cl)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700422{
423 struct htb_class *p = cl->parent;
Stephen Hemminger87990462006-08-10 23:35:16 -0700424 long m, mask = cl->prio_activity;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700425
426 while (cl->cmode == HTB_MAY_BORROW && p && mask) {
Stephen Hemminger87990462006-08-10 23:35:16 -0700427 m = mask;
428 while (m) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700429 int prio = ffz(~m);
430 m &= ~(1 << prio);
Stephen Hemminger87990462006-08-10 23:35:16 -0700431
Linus Torvalds1da177e2005-04-16 15:20:36 -0700432 if (p->un.inner.feed[prio].rb_node)
433 /* parent already has its feed in use so that
434 reset bit in mask as parent is already ok */
435 mask &= ~(1 << prio);
Stephen Hemminger87990462006-08-10 23:35:16 -0700436
437 htb_add_to_id_tree(p->un.inner.feed + prio, cl, prio);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700438 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700439 p->prio_activity |= mask;
Stephen Hemminger87990462006-08-10 23:35:16 -0700440 cl = p;
441 p = cl->parent;
Stephen Hemminger3bf72952006-08-10 23:31:08 -0700442
Linus Torvalds1da177e2005-04-16 15:20:36 -0700443 }
444 if (cl->cmode == HTB_CAN_SEND && mask)
Stephen Hemminger87990462006-08-10 23:35:16 -0700445 htb_add_class_to_row(q, cl, mask);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700446}
447
448/**
449 * htb_deactivate_prios - remove class from feed chain
450 *
451 * cl->cmode must represent old mode (before deactivation). It does
452 * nothing if cl->prio_activity == 0. Class is removed from all feed
453 * chains and rows.
454 */
455static void htb_deactivate_prios(struct htb_sched *q, struct htb_class *cl)
456{
457 struct htb_class *p = cl->parent;
Stephen Hemminger87990462006-08-10 23:35:16 -0700458 long m, mask = cl->prio_activity;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700459
460 while (cl->cmode == HTB_MAY_BORROW && p && mask) {
Stephen Hemminger87990462006-08-10 23:35:16 -0700461 m = mask;
462 mask = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700463 while (m) {
464 int prio = ffz(~m);
465 m &= ~(1 << prio);
Stephen Hemminger87990462006-08-10 23:35:16 -0700466
467 if (p->un.inner.ptr[prio] == cl->node + prio) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700468 /* we are removing child which is pointed to from
469 parent feed - forget the pointer but remember
470 classid */
471 p->un.inner.last_ptr_id[prio] = cl->classid;
472 p->un.inner.ptr[prio] = NULL;
473 }
Stephen Hemminger87990462006-08-10 23:35:16 -0700474
475 rb_erase(cl->node + prio, p->un.inner.feed + prio);
476
477 if (!p->un.inner.feed[prio].rb_node)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700478 mask |= 1 << prio;
479 }
Stephen Hemminger3bf72952006-08-10 23:31:08 -0700480
Linus Torvalds1da177e2005-04-16 15:20:36 -0700481 p->prio_activity &= ~mask;
Stephen Hemminger87990462006-08-10 23:35:16 -0700482 cl = p;
483 p = cl->parent;
Stephen Hemminger3bf72952006-08-10 23:31:08 -0700484
Linus Torvalds1da177e2005-04-16 15:20:36 -0700485 }
Stephen Hemminger87990462006-08-10 23:35:16 -0700486 if (cl->cmode == HTB_CAN_SEND && mask)
487 htb_remove_class_from_row(q, cl, mask);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700488}
489
Stephen Hemminger18a63e82006-08-10 23:34:02 -0700490#if HTB_HYSTERESIS
491static inline long htb_lowater(const struct htb_class *cl)
492{
493 return cl->cmode != HTB_CANT_SEND ? -cl->cbuffer : 0;
494}
495static inline long htb_hiwater(const struct htb_class *cl)
496{
497 return cl->cmode == HTB_CAN_SEND ? -cl->buffer : 0;
498}
499#else
500#define htb_lowater(cl) (0)
501#define htb_hiwater(cl) (0)
502#endif
503
Linus Torvalds1da177e2005-04-16 15:20:36 -0700504/**
505 * htb_class_mode - computes and returns current class mode
506 *
507 * It computes cl's mode at time cl->t_c+diff and returns it. If mode
508 * is not HTB_CAN_SEND then cl->pq_key is updated to time difference
509 * from now to time when cl will change its state.
510 * Also it is worth to note that class mode doesn't change simply
511 * at cl->{c,}tokens == 0 but there can rather be hysteresis of
512 * 0 .. -cl->{c,}buffer range. It is meant to limit number of
513 * mode transitions per time unit. The speed gain is about 1/6.
514 */
Stephen Hemminger87990462006-08-10 23:35:16 -0700515static inline enum htb_cmode
516htb_class_mode(struct htb_class *cl, long *diff)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700517{
Stephen Hemminger87990462006-08-10 23:35:16 -0700518 long toks;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700519
Stephen Hemminger87990462006-08-10 23:35:16 -0700520 if ((toks = (cl->ctokens + *diff)) < htb_lowater(cl)) {
521 *diff = -toks;
522 return HTB_CANT_SEND;
523 }
Stephen Hemminger18a63e82006-08-10 23:34:02 -0700524
Stephen Hemminger87990462006-08-10 23:35:16 -0700525 if ((toks = (cl->tokens + *diff)) >= htb_hiwater(cl))
526 return HTB_CAN_SEND;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700527
Stephen Hemminger87990462006-08-10 23:35:16 -0700528 *diff = -toks;
529 return HTB_MAY_BORROW;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700530}
531
532/**
533 * htb_change_class_mode - changes classe's mode
534 *
535 * This should be the only way how to change classe's mode under normal
536 * cirsumstances. Routine will update feed lists linkage, change mode
537 * and add class to the wait event queue if appropriate. New mode should
538 * be different from old one and cl->pq_key has to be valid if changing
539 * to mode other than HTB_CAN_SEND (see htb_add_to_wait_tree).
540 */
Stephen Hemminger87990462006-08-10 23:35:16 -0700541static void
Linus Torvalds1da177e2005-04-16 15:20:36 -0700542htb_change_class_mode(struct htb_sched *q, struct htb_class *cl, long *diff)
Stephen Hemminger87990462006-08-10 23:35:16 -0700543{
544 enum htb_cmode new_mode = htb_class_mode(cl, diff);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700545
546 if (new_mode == cl->cmode)
Stephen Hemminger87990462006-08-10 23:35:16 -0700547 return;
548
549 if (cl->prio_activity) { /* not necessary: speed optimization */
550 if (cl->cmode != HTB_CANT_SEND)
551 htb_deactivate_prios(q, cl);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700552 cl->cmode = new_mode;
Stephen Hemminger87990462006-08-10 23:35:16 -0700553 if (new_mode != HTB_CANT_SEND)
554 htb_activate_prios(q, cl);
555 } else
Linus Torvalds1da177e2005-04-16 15:20:36 -0700556 cl->cmode = new_mode;
557}
558
559/**
560 * htb_activate - inserts leaf cl into appropriate active feeds
561 *
562 * Routine learns (new) priority of leaf and activates feed chain
563 * for the prio. It can be called on already active leaf safely.
564 * It also adds leaf into droplist.
565 */
Stephen Hemminger87990462006-08-10 23:35:16 -0700566static inline void htb_activate(struct htb_sched *q, struct htb_class *cl)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700567{
568 BUG_TRAP(!cl->level && cl->un.leaf.q && cl->un.leaf.q->q.qlen);
Stephen Hemminger3bf72952006-08-10 23:31:08 -0700569
Linus Torvalds1da177e2005-04-16 15:20:36 -0700570 if (!cl->prio_activity) {
571 cl->prio_activity = 1 << (cl->un.leaf.aprio = cl->un.leaf.prio);
Stephen Hemminger87990462006-08-10 23:35:16 -0700572 htb_activate_prios(q, cl);
573 list_add_tail(&cl->un.leaf.drop_list,
574 q->drops + cl->un.leaf.aprio);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700575 }
576}
577
578/**
579 * htb_deactivate - remove leaf cl from active feeds
580 *
581 * Make sure that leaf is active. In the other words it can't be called
582 * with non-active leaf. It also removes class from the drop list.
583 */
Stephen Hemminger87990462006-08-10 23:35:16 -0700584static inline void htb_deactivate(struct htb_sched *q, struct htb_class *cl)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700585{
586 BUG_TRAP(cl->prio_activity);
Stephen Hemminger3bf72952006-08-10 23:31:08 -0700587
Stephen Hemminger87990462006-08-10 23:35:16 -0700588 htb_deactivate_prios(q, cl);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700589 cl->prio_activity = 0;
590 list_del_init(&cl->un.leaf.drop_list);
591}
592
593static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch)
594{
Stephen Hemminger87990462006-08-10 23:35:16 -0700595 int ret;
596 struct htb_sched *q = qdisc_priv(sch);
597 struct htb_class *cl = htb_classify(skb, sch, &ret);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700598
Stephen Hemminger87990462006-08-10 23:35:16 -0700599 if (cl == HTB_DIRECT) {
600 /* enqueue to helper queue */
601 if (q->direct_queue.qlen < q->direct_qlen) {
602 __skb_queue_tail(&q->direct_queue, skb);
603 q->direct_pkts++;
604 } else {
605 kfree_skb(skb);
606 sch->qstats.drops++;
607 return NET_XMIT_DROP;
608 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700609#ifdef CONFIG_NET_CLS_ACT
Stephen Hemminger87990462006-08-10 23:35:16 -0700610 } else if (!cl) {
611 if (ret == NET_XMIT_BYPASS)
612 sch->qstats.drops++;
613 kfree_skb(skb);
614 return ret;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700615#endif
Stephen Hemminger87990462006-08-10 23:35:16 -0700616 } else if (cl->un.leaf.q->enqueue(skb, cl->un.leaf.q) !=
617 NET_XMIT_SUCCESS) {
618 sch->qstats.drops++;
619 cl->qstats.drops++;
620 return NET_XMIT_DROP;
621 } else {
622 cl->bstats.packets++;
623 cl->bstats.bytes += skb->len;
624 htb_activate(q, cl);
625 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700626
Stephen Hemminger87990462006-08-10 23:35:16 -0700627 sch->q.qlen++;
628 sch->bstats.packets++;
629 sch->bstats.bytes += skb->len;
630 return NET_XMIT_SUCCESS;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700631}
632
633/* TODO: requeuing packet charges it to policers again !! */
634static int htb_requeue(struct sk_buff *skb, struct Qdisc *sch)
635{
Stephen Hemminger87990462006-08-10 23:35:16 -0700636 struct htb_sched *q = qdisc_priv(sch);
637 int ret = NET_XMIT_SUCCESS;
638 struct htb_class *cl = htb_classify(skb, sch, &ret);
639 struct sk_buff *tskb;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700640
Stephen Hemminger87990462006-08-10 23:35:16 -0700641 if (cl == HTB_DIRECT || !cl) {
642 /* enqueue to helper queue */
643 if (q->direct_queue.qlen < q->direct_qlen && cl) {
644 __skb_queue_head(&q->direct_queue, skb);
645 } else {
646 __skb_queue_head(&q->direct_queue, skb);
647 tskb = __skb_dequeue_tail(&q->direct_queue);
648 kfree_skb(tskb);
649 sch->qstats.drops++;
650 return NET_XMIT_CN;
651 }
652 } else if (cl->un.leaf.q->ops->requeue(skb, cl->un.leaf.q) !=
653 NET_XMIT_SUCCESS) {
654 sch->qstats.drops++;
655 cl->qstats.drops++;
656 return NET_XMIT_DROP;
657 } else
658 htb_activate(q, cl);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700659
Stephen Hemminger87990462006-08-10 23:35:16 -0700660 sch->q.qlen++;
661 sch->qstats.requeues++;
662 return NET_XMIT_SUCCESS;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700663}
664
665static void htb_timer(unsigned long arg)
666{
Stephen Hemminger87990462006-08-10 23:35:16 -0700667 struct Qdisc *sch = (struct Qdisc *)arg;
668 sch->flags &= ~TCQ_F_THROTTLED;
669 wmb();
670 netif_schedule(sch->dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700671}
672
673#ifdef HTB_RATECM
674#define RT_GEN(D,R) R+=D-(R/HTB_EWMAC);D=0
675static void htb_rate_timer(unsigned long arg)
676{
Stephen Hemminger87990462006-08-10 23:35:16 -0700677 struct Qdisc *sch = (struct Qdisc *)arg;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700678 struct htb_sched *q = qdisc_priv(sch);
Stephen Hemminger0cef2962006-08-10 23:35:38 -0700679 struct hlist_node *p;
680 struct htb_class *cl;
681
Linus Torvalds1da177e2005-04-16 15:20:36 -0700682
683 /* lock queue so that we can muck with it */
Stephen Hemminger9ac961e2006-08-10 23:33:16 -0700684 spin_lock_bh(&sch->dev->queue_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700685
686 q->rttim.expires = jiffies + HZ;
687 add_timer(&q->rttim);
688
689 /* scan and recompute one bucket at time */
Stephen Hemminger87990462006-08-10 23:35:16 -0700690 if (++q->recmp_bucket >= HTB_HSIZE)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700691 q->recmp_bucket = 0;
Stephen Hemminger3bf72952006-08-10 23:31:08 -0700692
Stephen Hemminger0cef2962006-08-10 23:35:38 -0700693 hlist_for_each_entry(cl,p, q->hash + q->recmp_bucket, hlist) {
Stephen Hemminger87990462006-08-10 23:35:16 -0700694 RT_GEN(cl->sum_bytes, cl->rate_bytes);
695 RT_GEN(cl->sum_packets, cl->rate_packets);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700696 }
Stephen Hemminger9ac961e2006-08-10 23:33:16 -0700697 spin_unlock_bh(&sch->dev->queue_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700698}
699#endif
700
701/**
702 * htb_charge_class - charges amount "bytes" to leaf and ancestors
703 *
704 * Routine assumes that packet "bytes" long was dequeued from leaf cl
705 * borrowing from "level". It accounts bytes to ceil leaky bucket for
706 * leaf and all ancestors and to rate bucket for ancestors at levels
707 * "level" and higher. It also handles possible change of mode resulting
708 * from the update. Note that mode can also increase here (MAY_BORROW to
709 * CAN_SEND) because we can use more precise clock that event queue here.
710 * In such case we remove class from event queue first.
711 */
Stephen Hemminger87990462006-08-10 23:35:16 -0700712static void htb_charge_class(struct htb_sched *q, struct htb_class *cl,
713 int level, int bytes)
714{
715 long toks, diff;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700716 enum htb_cmode old_mode;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700717
718#define HTB_ACCNT(T,B,R) toks = diff + cl->T; \
719 if (toks > cl->B) toks = cl->B; \
720 toks -= L2T(cl, cl->R, bytes); \
721 if (toks <= -cl->mbuffer) toks = 1-cl->mbuffer; \
722 cl->T = toks
723
724 while (cl) {
Stephen Hemminger87990462006-08-10 23:35:16 -0700725 diff = PSCHED_TDIFF_SAFE(q->now, cl->t_c, (u32) cl->mbuffer);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700726 if (cl->level >= level) {
Stephen Hemminger87990462006-08-10 23:35:16 -0700727 if (cl->level == level)
728 cl->xstats.lends++;
729 HTB_ACCNT(tokens, buffer, rate);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700730 } else {
731 cl->xstats.borrows++;
Stephen Hemminger87990462006-08-10 23:35:16 -0700732 cl->tokens += diff; /* we moved t_c; update tokens */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700733 }
Stephen Hemminger87990462006-08-10 23:35:16 -0700734 HTB_ACCNT(ctokens, cbuffer, ceil);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700735 cl->t_c = q->now;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700736
Stephen Hemminger87990462006-08-10 23:35:16 -0700737 old_mode = cl->cmode;
738 diff = 0;
739 htb_change_class_mode(q, cl, &diff);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700740 if (old_mode != cl->cmode) {
741 if (old_mode != HTB_CAN_SEND)
Stephen Hemminger87990462006-08-10 23:35:16 -0700742 rb_erase(&cl->pq_node, q->wait_pq + cl->level);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700743 if (cl->cmode != HTB_CAN_SEND)
Stephen Hemminger87990462006-08-10 23:35:16 -0700744 htb_add_to_wait_tree(q, cl, diff);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700745 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700746#ifdef HTB_RATECM
747 /* update rate counters */
Stephen Hemminger87990462006-08-10 23:35:16 -0700748 cl->sum_bytes += bytes;
749 cl->sum_packets++;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700750#endif
751
752 /* update byte stats except for leaves which are already updated */
753 if (cl->level) {
754 cl->bstats.bytes += bytes;
755 cl->bstats.packets++;
756 }
757 cl = cl->parent;
758 }
759}
760
761/**
762 * htb_do_events - make mode changes to classes at the level
763 *
764 * Scans event queue for pending events and applies them. Returns jiffies to
765 * next pending event (0 for no event in pq).
766 * Note: Aplied are events whose have cl->pq_key <= jiffies.
767 */
Stephen Hemminger87990462006-08-10 23:35:16 -0700768static long htb_do_events(struct htb_sched *q, int level)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700769{
770 int i;
Stephen Hemminger3bf72952006-08-10 23:31:08 -0700771
Linus Torvalds1da177e2005-04-16 15:20:36 -0700772 for (i = 0; i < 500; i++) {
773 struct htb_class *cl;
774 long diff;
775 struct rb_node *p = q->wait_pq[level].rb_node;
Stephen Hemminger87990462006-08-10 23:35:16 -0700776 if (!p)
777 return 0;
778 while (p->rb_left)
779 p = p->rb_left;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700780
781 cl = rb_entry(p, struct htb_class, pq_node);
782 if (time_after(cl->pq_key, q->jiffies)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700783 return cl->pq_key - q->jiffies;
784 }
Stephen Hemminger87990462006-08-10 23:35:16 -0700785 rb_erase(p, q->wait_pq + level);
786 diff = PSCHED_TDIFF_SAFE(q->now, cl->t_c, (u32) cl->mbuffer);
787 htb_change_class_mode(q, cl, &diff);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700788 if (cl->cmode != HTB_CAN_SEND)
Stephen Hemminger87990462006-08-10 23:35:16 -0700789 htb_add_to_wait_tree(q, cl, diff);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700790 }
791 if (net_ratelimit())
792 printk(KERN_WARNING "htb: too many events !\n");
Stephen Hemminger87990462006-08-10 23:35:16 -0700793 return HZ / 10;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700794}
795
796/* Returns class->node+prio from id-tree where classe's id is >= id. NULL
797 is no such one exists. */
Stephen Hemminger87990462006-08-10 23:35:16 -0700798static struct rb_node *htb_id_find_next_upper(int prio, struct rb_node *n,
799 u32 id)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700800{
801 struct rb_node *r = NULL;
802 while (n) {
Stephen Hemminger87990462006-08-10 23:35:16 -0700803 struct htb_class *cl =
804 rb_entry(n, struct htb_class, node[prio]);
805 if (id == cl->classid)
806 return n;
807
Linus Torvalds1da177e2005-04-16 15:20:36 -0700808 if (id > cl->classid) {
809 n = n->rb_right;
810 } else {
811 r = n;
812 n = n->rb_left;
813 }
814 }
815 return r;
816}
817
818/**
819 * htb_lookup_leaf - returns next leaf class in DRR order
820 *
821 * Find leaf where current feed pointers points to.
822 */
Stephen Hemminger87990462006-08-10 23:35:16 -0700823static struct htb_class *htb_lookup_leaf(struct rb_root *tree, int prio,
824 struct rb_node **pptr, u32 * pid)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700825{
826 int i;
827 struct {
828 struct rb_node *root;
829 struct rb_node **pptr;
830 u32 *pid;
Stephen Hemminger87990462006-08-10 23:35:16 -0700831 } stk[TC_HTB_MAXDEPTH], *sp = stk;
832
Linus Torvalds1da177e2005-04-16 15:20:36 -0700833 BUG_TRAP(tree->rb_node);
834 sp->root = tree->rb_node;
835 sp->pptr = pptr;
836 sp->pid = pid;
837
838 for (i = 0; i < 65535; i++) {
Stephen Hemminger87990462006-08-10 23:35:16 -0700839 if (!*sp->pptr && *sp->pid) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700840 /* ptr was invalidated but id is valid - try to recover
841 the original or next ptr */
Stephen Hemminger87990462006-08-10 23:35:16 -0700842 *sp->pptr =
843 htb_id_find_next_upper(prio, sp->root, *sp->pid);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700844 }
Stephen Hemminger87990462006-08-10 23:35:16 -0700845 *sp->pid = 0; /* ptr is valid now so that remove this hint as it
846 can become out of date quickly */
847 if (!*sp->pptr) { /* we are at right end; rewind & go up */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700848 *sp->pptr = sp->root;
Stephen Hemminger87990462006-08-10 23:35:16 -0700849 while ((*sp->pptr)->rb_left)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700850 *sp->pptr = (*sp->pptr)->rb_left;
851 if (sp > stk) {
852 sp--;
Stephen Hemminger87990462006-08-10 23:35:16 -0700853 BUG_TRAP(*sp->pptr);
854 if (!*sp->pptr)
855 return NULL;
856 htb_next_rb_node(sp->pptr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700857 }
858 } else {
859 struct htb_class *cl;
Stephen Hemminger87990462006-08-10 23:35:16 -0700860 cl = rb_entry(*sp->pptr, struct htb_class, node[prio]);
861 if (!cl->level)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700862 return cl;
863 (++sp)->root = cl->un.inner.feed[prio].rb_node;
Stephen Hemminger87990462006-08-10 23:35:16 -0700864 sp->pptr = cl->un.inner.ptr + prio;
865 sp->pid = cl->un.inner.last_ptr_id + prio;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700866 }
867 }
868 BUG_TRAP(0);
869 return NULL;
870}
871
872/* dequeues packet at given priority and level; call only if
873 you are sure that there is active class at prio/level */
Stephen Hemminger87990462006-08-10 23:35:16 -0700874static struct sk_buff *htb_dequeue_tree(struct htb_sched *q, int prio,
875 int level)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700876{
877 struct sk_buff *skb = NULL;
Stephen Hemminger87990462006-08-10 23:35:16 -0700878 struct htb_class *cl, *start;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700879 /* look initial class up in the row */
Stephen Hemminger87990462006-08-10 23:35:16 -0700880 start = cl = htb_lookup_leaf(q->row[level] + prio, prio,
881 q->ptr[level] + prio,
882 q->last_ptr_id[level] + prio);
883
Linus Torvalds1da177e2005-04-16 15:20:36 -0700884 do {
885next:
Stephen Hemminger87990462006-08-10 23:35:16 -0700886 BUG_TRAP(cl);
887 if (!cl)
888 return NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700889
890 /* class can be empty - it is unlikely but can be true if leaf
891 qdisc drops packets in enqueue routine or if someone used
892 graft operation on the leaf since last dequeue;
893 simply deactivate and skip such class */
894 if (unlikely(cl->un.leaf.q->q.qlen == 0)) {
895 struct htb_class *next;
Stephen Hemminger87990462006-08-10 23:35:16 -0700896 htb_deactivate(q, cl);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700897
898 /* row/level might become empty */
899 if ((q->row_mask[level] & (1 << prio)) == 0)
Stephen Hemminger87990462006-08-10 23:35:16 -0700900 return NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700901
Stephen Hemminger87990462006-08-10 23:35:16 -0700902 next = htb_lookup_leaf(q->row[level] + prio,
903 prio, q->ptr[level] + prio,
904 q->last_ptr_id[level] + prio);
905
906 if (cl == start) /* fix start if we just deleted it */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700907 start = next;
908 cl = next;
909 goto next;
910 }
Stephen Hemminger87990462006-08-10 23:35:16 -0700911
912 skb = cl->un.leaf.q->dequeue(cl->un.leaf.q);
913 if (likely(skb != NULL))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700914 break;
915 if (!cl->warned) {
Stephen Hemminger87990462006-08-10 23:35:16 -0700916 printk(KERN_WARNING
917 "htb: class %X isn't work conserving ?!\n",
918 cl->classid);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700919 cl->warned = 1;
920 }
921 q->nwc_hit++;
Stephen Hemminger87990462006-08-10 23:35:16 -0700922 htb_next_rb_node((level ? cl->parent->un.inner.ptr : q->
923 ptr[0]) + prio);
924 cl = htb_lookup_leaf(q->row[level] + prio, prio,
925 q->ptr[level] + prio,
926 q->last_ptr_id[level] + prio);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700927
928 } while (cl != start);
929
930 if (likely(skb != NULL)) {
931 if ((cl->un.leaf.deficit[level] -= skb->len) < 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700932 cl->un.leaf.deficit[level] += cl->un.leaf.quantum;
Stephen Hemminger87990462006-08-10 23:35:16 -0700933 htb_next_rb_node((level ? cl->parent->un.inner.ptr : q->
934 ptr[0]) + prio);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700935 }
936 /* this used to be after charge_class but this constelation
937 gives us slightly better performance */
938 if (!cl->un.leaf.q->q.qlen)
Stephen Hemminger87990462006-08-10 23:35:16 -0700939 htb_deactivate(q, cl);
940 htb_charge_class(q, cl, level, skb->len);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700941 }
942 return skb;
943}
944
Stephen Hemminger87990462006-08-10 23:35:16 -0700945static void htb_delay_by(struct Qdisc *sch, long delay)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700946{
947 struct htb_sched *q = qdisc_priv(sch);
Stephen Hemminger87990462006-08-10 23:35:16 -0700948 if (delay <= 0)
949 delay = 1;
950 if (unlikely(delay > 5 * HZ)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700951 if (net_ratelimit())
952 printk(KERN_INFO "HTB delay %ld > 5sec\n", delay);
Stephen Hemminger87990462006-08-10 23:35:16 -0700953 delay = 5 * HZ;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700954 }
955 /* why don't use jiffies here ? because expires can be in past */
956 mod_timer(&q->timer, q->jiffies + delay);
957 sch->flags |= TCQ_F_THROTTLED;
958 sch->qstats.overlimits++;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700959}
960
961static struct sk_buff *htb_dequeue(struct Qdisc *sch)
962{
963 struct sk_buff *skb = NULL;
964 struct htb_sched *q = qdisc_priv(sch);
965 int level;
966 long min_delay;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700967
968 q->jiffies = jiffies;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700969
970 /* try to dequeue direct packets as high prio (!) to minimize cpu work */
Stephen Hemminger87990462006-08-10 23:35:16 -0700971 skb = __skb_dequeue(&q->direct_queue);
972 if (skb != NULL) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700973 sch->flags &= ~TCQ_F_THROTTLED;
974 sch->q.qlen--;
975 return skb;
976 }
977
Stephen Hemminger87990462006-08-10 23:35:16 -0700978 if (!sch->q.qlen)
979 goto fin;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700980 PSCHED_GET_TIME(q->now);
981
982 min_delay = LONG_MAX;
983 q->nwc_hit = 0;
984 for (level = 0; level < TC_HTB_MAXDEPTH; level++) {
985 /* common case optimization - skip event handler quickly */
986 int m;
987 long delay;
988 if (time_after_eq(q->jiffies, q->near_ev_cache[level])) {
Stephen Hemminger87990462006-08-10 23:35:16 -0700989 delay = htb_do_events(q, level);
990 q->near_ev_cache[level] =
991 q->jiffies + (delay ? delay : HZ);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700992 } else
Stephen Hemminger87990462006-08-10 23:35:16 -0700993 delay = q->near_ev_cache[level] - q->jiffies;
994
995 if (delay && min_delay > delay)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700996 min_delay = delay;
997 m = ~q->row_mask[level];
998 while (m != (int)(-1)) {
Stephen Hemminger87990462006-08-10 23:35:16 -0700999 int prio = ffz(m);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001000 m |= 1 << prio;
Stephen Hemminger87990462006-08-10 23:35:16 -07001001 skb = htb_dequeue_tree(q, prio, level);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001002 if (likely(skb != NULL)) {
1003 sch->q.qlen--;
1004 sch->flags &= ~TCQ_F_THROTTLED;
1005 goto fin;
1006 }
1007 }
1008 }
Stephen Hemminger87990462006-08-10 23:35:16 -07001009 htb_delay_by(sch, min_delay > 5 * HZ ? 5 * HZ : min_delay);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001010fin:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001011 return skb;
1012}
1013
1014/* try to drop from each class (by prio) until one succeed */
Stephen Hemminger87990462006-08-10 23:35:16 -07001015static unsigned int htb_drop(struct Qdisc *sch)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001016{
1017 struct htb_sched *q = qdisc_priv(sch);
1018 int prio;
1019
1020 for (prio = TC_HTB_NUMPRIO - 1; prio >= 0; prio--) {
1021 struct list_head *p;
Stephen Hemminger87990462006-08-10 23:35:16 -07001022 list_for_each(p, q->drops + prio) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001023 struct htb_class *cl = list_entry(p, struct htb_class,
1024 un.leaf.drop_list);
1025 unsigned int len;
Stephen Hemminger87990462006-08-10 23:35:16 -07001026 if (cl->un.leaf.q->ops->drop &&
1027 (len = cl->un.leaf.q->ops->drop(cl->un.leaf.q))) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001028 sch->q.qlen--;
1029 if (!cl->un.leaf.q->q.qlen)
Stephen Hemminger87990462006-08-10 23:35:16 -07001030 htb_deactivate(q, cl);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001031 return len;
1032 }
1033 }
1034 }
1035 return 0;
1036}
1037
1038/* reset all classes */
1039/* always caled under BH & queue lock */
Stephen Hemminger87990462006-08-10 23:35:16 -07001040static void htb_reset(struct Qdisc *sch)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001041{
1042 struct htb_sched *q = qdisc_priv(sch);
1043 int i;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001044
1045 for (i = 0; i < HTB_HSIZE; i++) {
Stephen Hemminger0cef2962006-08-10 23:35:38 -07001046 struct hlist_node *p;
1047 struct htb_class *cl;
1048
1049 hlist_for_each_entry(cl, p, q->hash + i, hlist) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001050 if (cl->level)
Stephen Hemminger87990462006-08-10 23:35:16 -07001051 memset(&cl->un.inner, 0, sizeof(cl->un.inner));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001052 else {
Stephen Hemminger87990462006-08-10 23:35:16 -07001053 if (cl->un.leaf.q)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001054 qdisc_reset(cl->un.leaf.q);
1055 INIT_LIST_HEAD(&cl->un.leaf.drop_list);
1056 }
1057 cl->prio_activity = 0;
1058 cl->cmode = HTB_CAN_SEND;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001059
1060 }
1061 }
1062 sch->flags &= ~TCQ_F_THROTTLED;
1063 del_timer(&q->timer);
1064 __skb_queue_purge(&q->direct_queue);
1065 sch->q.qlen = 0;
Stephen Hemminger87990462006-08-10 23:35:16 -07001066 memset(q->row, 0, sizeof(q->row));
1067 memset(q->row_mask, 0, sizeof(q->row_mask));
1068 memset(q->wait_pq, 0, sizeof(q->wait_pq));
1069 memset(q->ptr, 0, sizeof(q->ptr));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001070 for (i = 0; i < TC_HTB_NUMPRIO; i++)
Stephen Hemminger87990462006-08-10 23:35:16 -07001071 INIT_LIST_HEAD(q->drops + i);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001072}
1073
1074static int htb_init(struct Qdisc *sch, struct rtattr *opt)
1075{
1076 struct htb_sched *q = qdisc_priv(sch);
1077 struct rtattr *tb[TCA_HTB_INIT];
1078 struct tc_htb_glob *gopt;
1079 int i;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001080 if (!opt || rtattr_parse_nested(tb, TCA_HTB_INIT, opt) ||
Stephen Hemminger87990462006-08-10 23:35:16 -07001081 tb[TCA_HTB_INIT - 1] == NULL ||
1082 RTA_PAYLOAD(tb[TCA_HTB_INIT - 1]) < sizeof(*gopt)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001083 printk(KERN_ERR "HTB: hey probably you have bad tc tool ?\n");
1084 return -EINVAL;
1085 }
Stephen Hemminger87990462006-08-10 23:35:16 -07001086 gopt = RTA_DATA(tb[TCA_HTB_INIT - 1]);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001087 if (gopt->version != HTB_VER >> 16) {
Stephen Hemminger87990462006-08-10 23:35:16 -07001088 printk(KERN_ERR
1089 "HTB: need tc/htb version %d (minor is %d), you have %d\n",
1090 HTB_VER >> 16, HTB_VER & 0xffff, gopt->version);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001091 return -EINVAL;
1092 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001093
1094 INIT_LIST_HEAD(&q->root);
1095 for (i = 0; i < HTB_HSIZE; i++)
Stephen Hemminger0cef2962006-08-10 23:35:38 -07001096 INIT_HLIST_HEAD(q->hash + i);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001097 for (i = 0; i < TC_HTB_NUMPRIO; i++)
Stephen Hemminger87990462006-08-10 23:35:16 -07001098 INIT_LIST_HEAD(q->drops + i);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001099
1100 init_timer(&q->timer);
1101 skb_queue_head_init(&q->direct_queue);
1102
1103 q->direct_qlen = sch->dev->tx_queue_len;
Stephen Hemminger87990462006-08-10 23:35:16 -07001104 if (q->direct_qlen < 2) /* some devices have zero tx_queue_len */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001105 q->direct_qlen = 2;
1106 q->timer.function = htb_timer;
1107 q->timer.data = (unsigned long)sch;
1108
1109#ifdef HTB_RATECM
1110 init_timer(&q->rttim);
1111 q->rttim.function = htb_rate_timer;
1112 q->rttim.data = (unsigned long)sch;
1113 q->rttim.expires = jiffies + HZ;
1114 add_timer(&q->rttim);
1115#endif
1116 if ((q->rate2quantum = gopt->rate2quantum) < 1)
1117 q->rate2quantum = 1;
1118 q->defcls = gopt->defcls;
1119
1120 return 0;
1121}
1122
1123static int htb_dump(struct Qdisc *sch, struct sk_buff *skb)
1124{
1125 struct htb_sched *q = qdisc_priv(sch);
Stephen Hemminger87990462006-08-10 23:35:16 -07001126 unsigned char *b = skb->tail;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001127 struct rtattr *rta;
1128 struct tc_htb_glob gopt;
Stephen Hemminger9ac961e2006-08-10 23:33:16 -07001129 spin_lock_bh(&sch->dev->queue_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001130 gopt.direct_pkts = q->direct_pkts;
1131
Linus Torvalds1da177e2005-04-16 15:20:36 -07001132 gopt.version = HTB_VER;
1133 gopt.rate2quantum = q->rate2quantum;
1134 gopt.defcls = q->defcls;
Stephen Hemminger3bf72952006-08-10 23:31:08 -07001135 gopt.debug = 0;
Stephen Hemminger87990462006-08-10 23:35:16 -07001136 rta = (struct rtattr *)b;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001137 RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
1138 RTA_PUT(skb, TCA_HTB_INIT, sizeof(gopt), &gopt);
1139 rta->rta_len = skb->tail - b;
Stephen Hemminger9ac961e2006-08-10 23:33:16 -07001140 spin_unlock_bh(&sch->dev->queue_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001141 return skb->len;
1142rtattr_failure:
Stephen Hemminger9ac961e2006-08-10 23:33:16 -07001143 spin_unlock_bh(&sch->dev->queue_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001144 skb_trim(skb, skb->tail - skb->data);
1145 return -1;
1146}
1147
1148static int htb_dump_class(struct Qdisc *sch, unsigned long arg,
Stephen Hemminger87990462006-08-10 23:35:16 -07001149 struct sk_buff *skb, struct tcmsg *tcm)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001150{
Stephen Hemminger87990462006-08-10 23:35:16 -07001151 struct htb_class *cl = (struct htb_class *)arg;
1152 unsigned char *b = skb->tail;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001153 struct rtattr *rta;
1154 struct tc_htb_opt opt;
1155
Stephen Hemminger9ac961e2006-08-10 23:33:16 -07001156 spin_lock_bh(&sch->dev->queue_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001157 tcm->tcm_parent = cl->parent ? cl->parent->classid : TC_H_ROOT;
1158 tcm->tcm_handle = cl->classid;
1159 if (!cl->level && cl->un.leaf.q)
1160 tcm->tcm_info = cl->un.leaf.q->handle;
1161
Stephen Hemminger87990462006-08-10 23:35:16 -07001162 rta = (struct rtattr *)b;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001163 RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
1164
Stephen Hemminger87990462006-08-10 23:35:16 -07001165 memset(&opt, 0, sizeof(opt));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001166
Stephen Hemminger87990462006-08-10 23:35:16 -07001167 opt.rate = cl->rate->rate;
1168 opt.buffer = cl->buffer;
1169 opt.ceil = cl->ceil->rate;
1170 opt.cbuffer = cl->cbuffer;
1171 opt.quantum = cl->un.leaf.quantum;
1172 opt.prio = cl->un.leaf.prio;
1173 opt.level = cl->level;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001174 RTA_PUT(skb, TCA_HTB_PARMS, sizeof(opt), &opt);
1175 rta->rta_len = skb->tail - b;
Stephen Hemminger9ac961e2006-08-10 23:33:16 -07001176 spin_unlock_bh(&sch->dev->queue_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001177 return skb->len;
1178rtattr_failure:
Stephen Hemminger9ac961e2006-08-10 23:33:16 -07001179 spin_unlock_bh(&sch->dev->queue_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001180 skb_trim(skb, b - skb->data);
1181 return -1;
1182}
1183
1184static int
Stephen Hemminger87990462006-08-10 23:35:16 -07001185htb_dump_class_stats(struct Qdisc *sch, unsigned long arg, struct gnet_dump *d)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001186{
Stephen Hemminger87990462006-08-10 23:35:16 -07001187 struct htb_class *cl = (struct htb_class *)arg;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001188
1189#ifdef HTB_RATECM
Stephen Hemminger87990462006-08-10 23:35:16 -07001190 cl->rate_est.bps = cl->rate_bytes / (HTB_EWMAC * HTB_HSIZE);
1191 cl->rate_est.pps = cl->rate_packets / (HTB_EWMAC * HTB_HSIZE);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001192#endif
1193
1194 if (!cl->level && cl->un.leaf.q)
1195 cl->qstats.qlen = cl->un.leaf.q->q.qlen;
1196 cl->xstats.tokens = cl->tokens;
1197 cl->xstats.ctokens = cl->ctokens;
1198
1199 if (gnet_stats_copy_basic(d, &cl->bstats) < 0 ||
1200 gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
1201 gnet_stats_copy_queue(d, &cl->qstats) < 0)
1202 return -1;
1203
1204 return gnet_stats_copy_app(d, &cl->xstats, sizeof(cl->xstats));
1205}
1206
1207static int htb_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
Stephen Hemminger87990462006-08-10 23:35:16 -07001208 struct Qdisc **old)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001209{
Stephen Hemminger87990462006-08-10 23:35:16 -07001210 struct htb_class *cl = (struct htb_class *)arg;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001211
1212 if (cl && !cl->level) {
Stephen Hemminger87990462006-08-10 23:35:16 -07001213 if (new == NULL && (new = qdisc_create_dflt(sch->dev,
1214 &pfifo_qdisc_ops))
1215 == NULL)
1216 return -ENOBUFS;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001217 sch_tree_lock(sch);
1218 if ((*old = xchg(&cl->un.leaf.q, new)) != NULL) {
1219 if (cl->prio_activity)
Stephen Hemminger87990462006-08-10 23:35:16 -07001220 htb_deactivate(qdisc_priv(sch), cl);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001221
1222 /* TODO: is it correct ? Why CBQ doesn't do it ? */
Stephen Hemminger87990462006-08-10 23:35:16 -07001223 sch->q.qlen -= (*old)->q.qlen;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001224 qdisc_reset(*old);
1225 }
1226 sch_tree_unlock(sch);
1227 return 0;
1228 }
1229 return -ENOENT;
1230}
1231
Stephen Hemminger87990462006-08-10 23:35:16 -07001232static struct Qdisc *htb_leaf(struct Qdisc *sch, unsigned long arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001233{
Stephen Hemminger87990462006-08-10 23:35:16 -07001234 struct htb_class *cl = (struct htb_class *)arg;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001235 return (cl && !cl->level) ? cl->un.leaf.q : NULL;
1236}
1237
1238static unsigned long htb_get(struct Qdisc *sch, u32 classid)
1239{
Stephen Hemminger87990462006-08-10 23:35:16 -07001240 struct htb_class *cl = htb_find(classid, sch);
1241 if (cl)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001242 cl->refcnt++;
1243 return (unsigned long)cl;
1244}
1245
1246static void htb_destroy_filters(struct tcf_proto **fl)
1247{
1248 struct tcf_proto *tp;
1249
1250 while ((tp = *fl) != NULL) {
1251 *fl = tp->next;
1252 tcf_destroy(tp);
1253 }
1254}
1255
Stephen Hemminger87990462006-08-10 23:35:16 -07001256static void htb_destroy_class(struct Qdisc *sch, struct htb_class *cl)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001257{
1258 struct htb_sched *q = qdisc_priv(sch);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001259 if (!cl->level) {
1260 BUG_TRAP(cl->un.leaf.q);
1261 sch->q.qlen -= cl->un.leaf.q->q.qlen;
1262 qdisc_destroy(cl->un.leaf.q);
1263 }
1264 qdisc_put_rtab(cl->rate);
1265 qdisc_put_rtab(cl->ceil);
Stephen Hemminger87990462006-08-10 23:35:16 -07001266
1267 htb_destroy_filters(&cl->filter_list);
1268
1269 while (!list_empty(&cl->children))
1270 htb_destroy_class(sch, list_entry(cl->children.next,
1271 struct htb_class, sibling));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001272
1273 /* note: this delete may happen twice (see htb_delete) */
Stephen Hemminger0cef2962006-08-10 23:35:38 -07001274 if (!hlist_unhashed(&cl->hlist))
1275 hlist_del(&cl->hlist);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001276 list_del(&cl->sibling);
Stephen Hemminger87990462006-08-10 23:35:16 -07001277
Linus Torvalds1da177e2005-04-16 15:20:36 -07001278 if (cl->prio_activity)
Stephen Hemminger87990462006-08-10 23:35:16 -07001279 htb_deactivate(q, cl);
1280
Linus Torvalds1da177e2005-04-16 15:20:36 -07001281 if (cl->cmode != HTB_CAN_SEND)
Stephen Hemminger87990462006-08-10 23:35:16 -07001282 rb_erase(&cl->pq_node, q->wait_pq + cl->level);
1283
Linus Torvalds1da177e2005-04-16 15:20:36 -07001284 kfree(cl);
1285}
1286
1287/* always caled under BH & queue lock */
Stephen Hemminger87990462006-08-10 23:35:16 -07001288static void htb_destroy(struct Qdisc *sch)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001289{
1290 struct htb_sched *q = qdisc_priv(sch);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001291
Stephen Hemminger87990462006-08-10 23:35:16 -07001292 del_timer_sync(&q->timer);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001293#ifdef HTB_RATECM
Stephen Hemminger87990462006-08-10 23:35:16 -07001294 del_timer_sync(&q->rttim);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001295#endif
1296 /* This line used to be after htb_destroy_class call below
1297 and surprisingly it worked in 2.4. But it must precede it
1298 because filter need its target class alive to be able to call
1299 unbind_filter on it (without Oops). */
1300 htb_destroy_filters(&q->filter_list);
Stephen Hemminger87990462006-08-10 23:35:16 -07001301
1302 while (!list_empty(&q->root))
1303 htb_destroy_class(sch, list_entry(q->root.next,
1304 struct htb_class, sibling));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001305
1306 __skb_queue_purge(&q->direct_queue);
1307}
1308
1309static int htb_delete(struct Qdisc *sch, unsigned long arg)
1310{
1311 struct htb_sched *q = qdisc_priv(sch);
Stephen Hemminger87990462006-08-10 23:35:16 -07001312 struct htb_class *cl = (struct htb_class *)arg;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001313
1314 // TODO: why don't allow to delete subtree ? references ? does
1315 // tc subsys quarantee us that in htb_destroy it holds no class
1316 // refs so that we can remove children safely there ?
1317 if (!list_empty(&cl->children) || cl->filter_cnt)
1318 return -EBUSY;
Stephen Hemminger87990462006-08-10 23:35:16 -07001319
Linus Torvalds1da177e2005-04-16 15:20:36 -07001320 sch_tree_lock(sch);
Stephen Hemminger87990462006-08-10 23:35:16 -07001321
Linus Torvalds1da177e2005-04-16 15:20:36 -07001322 /* delete from hash and active; remainder in destroy_class */
Stephen Hemminger0cef2962006-08-10 23:35:38 -07001323 if (!hlist_unhashed(&cl->hlist))
1324 hlist_del(&cl->hlist);
1325
Linus Torvalds1da177e2005-04-16 15:20:36 -07001326 if (cl->prio_activity)
Stephen Hemminger87990462006-08-10 23:35:16 -07001327 htb_deactivate(q, cl);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001328
1329 if (--cl->refcnt == 0)
Stephen Hemminger87990462006-08-10 23:35:16 -07001330 htb_destroy_class(sch, cl);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001331
1332 sch_tree_unlock(sch);
1333 return 0;
1334}
1335
1336static void htb_put(struct Qdisc *sch, unsigned long arg)
1337{
Stephen Hemminger87990462006-08-10 23:35:16 -07001338 struct htb_class *cl = (struct htb_class *)arg;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001339
1340 if (--cl->refcnt == 0)
Stephen Hemminger87990462006-08-10 23:35:16 -07001341 htb_destroy_class(sch, cl);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001342}
1343
Stephen Hemminger87990462006-08-10 23:35:16 -07001344static int htb_change_class(struct Qdisc *sch, u32 classid,
1345 u32 parentid, struct rtattr **tca,
1346 unsigned long *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001347{
1348 int err = -EINVAL;
1349 struct htb_sched *q = qdisc_priv(sch);
Stephen Hemminger87990462006-08-10 23:35:16 -07001350 struct htb_class *cl = (struct htb_class *)*arg, *parent;
1351 struct rtattr *opt = tca[TCA_OPTIONS - 1];
Linus Torvalds1da177e2005-04-16 15:20:36 -07001352 struct qdisc_rate_table *rtab = NULL, *ctab = NULL;
1353 struct rtattr *tb[TCA_HTB_RTAB];
1354 struct tc_htb_opt *hopt;
1355
1356 /* extract all subattrs from opt attr */
1357 if (!opt || rtattr_parse_nested(tb, TCA_HTB_RTAB, opt) ||
Stephen Hemminger87990462006-08-10 23:35:16 -07001358 tb[TCA_HTB_PARMS - 1] == NULL ||
1359 RTA_PAYLOAD(tb[TCA_HTB_PARMS - 1]) < sizeof(*hopt))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001360 goto failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001361
Stephen Hemminger87990462006-08-10 23:35:16 -07001362 parent = parentid == TC_H_ROOT ? NULL : htb_find(parentid, sch);
Stephen Hemminger3bf72952006-08-10 23:31:08 -07001363
Stephen Hemminger87990462006-08-10 23:35:16 -07001364 hopt = RTA_DATA(tb[TCA_HTB_PARMS - 1]);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001365
Stephen Hemminger87990462006-08-10 23:35:16 -07001366 rtab = qdisc_get_rtab(&hopt->rate, tb[TCA_HTB_RTAB - 1]);
1367 ctab = qdisc_get_rtab(&hopt->ceil, tb[TCA_HTB_CTAB - 1]);
1368 if (!rtab || !ctab)
1369 goto failure;
1370
1371 if (!cl) { /* new class */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001372 struct Qdisc *new_q;
1373 /* check for valid classid */
Stephen Hemminger87990462006-08-10 23:35:16 -07001374 if (!classid || TC_H_MAJ(classid ^ sch->handle)
1375 || htb_find(classid, sch))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001376 goto failure;
1377
1378 /* check maximal depth */
1379 if (parent && parent->parent && parent->parent->level < 2) {
1380 printk(KERN_ERR "htb: tree is too deep\n");
1381 goto failure;
1382 }
1383 err = -ENOBUFS;
Panagiotis Issaris0da974f2006-07-21 14:51:30 -07001384 if ((cl = kzalloc(sizeof(*cl), GFP_KERNEL)) == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001385 goto failure;
Stephen Hemminger87990462006-08-10 23:35:16 -07001386
Linus Torvalds1da177e2005-04-16 15:20:36 -07001387 cl->refcnt = 1;
1388 INIT_LIST_HEAD(&cl->sibling);
Stephen Hemminger0cef2962006-08-10 23:35:38 -07001389 INIT_HLIST_NODE(&cl->hlist);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001390 INIT_LIST_HEAD(&cl->children);
1391 INIT_LIST_HEAD(&cl->un.leaf.drop_list);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001392
1393 /* create leaf qdisc early because it uses kmalloc(GFP_KERNEL)
1394 so that can't be used inside of sch_tree_lock
1395 -- thanks to Karlis Peisenieks */
1396 new_q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops);
1397 sch_tree_lock(sch);
1398 if (parent && !parent->level) {
1399 /* turn parent into inner node */
1400 sch->q.qlen -= parent->un.leaf.q->q.qlen;
Stephen Hemminger87990462006-08-10 23:35:16 -07001401 qdisc_destroy(parent->un.leaf.q);
1402 if (parent->prio_activity)
1403 htb_deactivate(q, parent);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001404
1405 /* remove from evt list because of level change */
1406 if (parent->cmode != HTB_CAN_SEND) {
Stephen Hemminger87990462006-08-10 23:35:16 -07001407 rb_erase(&parent->pq_node, q->wait_pq);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001408 parent->cmode = HTB_CAN_SEND;
1409 }
1410 parent->level = (parent->parent ? parent->parent->level
Stephen Hemminger87990462006-08-10 23:35:16 -07001411 : TC_HTB_MAXDEPTH) - 1;
1412 memset(&parent->un.inner, 0, sizeof(parent->un.inner));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001413 }
1414 /* leaf (we) needs elementary qdisc */
1415 cl->un.leaf.q = new_q ? new_q : &noop_qdisc;
1416
Stephen Hemminger87990462006-08-10 23:35:16 -07001417 cl->classid = classid;
1418 cl->parent = parent;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001419
1420 /* set class to be in HTB_CAN_SEND state */
1421 cl->tokens = hopt->buffer;
1422 cl->ctokens = hopt->cbuffer;
Stephen Hemminger87990462006-08-10 23:35:16 -07001423 cl->mbuffer = PSCHED_JIFFIE2US(HZ * 60); /* 1min */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001424 PSCHED_GET_TIME(cl->t_c);
1425 cl->cmode = HTB_CAN_SEND;
1426
1427 /* attach to the hash list and parent's family */
Stephen Hemminger0cef2962006-08-10 23:35:38 -07001428 hlist_add_head(&cl->hlist, q->hash + htb_hash(classid));
Stephen Hemminger87990462006-08-10 23:35:16 -07001429 list_add_tail(&cl->sibling,
1430 parent ? &parent->children : &q->root);
1431 } else
1432 sch_tree_lock(sch);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001433
1434 /* it used to be a nasty bug here, we have to check that node
Stephen Hemminger87990462006-08-10 23:35:16 -07001435 is really leaf before changing cl->un.leaf ! */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001436 if (!cl->level) {
1437 cl->un.leaf.quantum = rtab->rate.rate / q->rate2quantum;
1438 if (!hopt->quantum && cl->un.leaf.quantum < 1000) {
Stephen Hemminger87990462006-08-10 23:35:16 -07001439 printk(KERN_WARNING
1440 "HTB: quantum of class %X is small. Consider r2q change.\n",
1441 cl->classid);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001442 cl->un.leaf.quantum = 1000;
1443 }
1444 if (!hopt->quantum && cl->un.leaf.quantum > 200000) {
Stephen Hemminger87990462006-08-10 23:35:16 -07001445 printk(KERN_WARNING
1446 "HTB: quantum of class %X is big. Consider r2q change.\n",
1447 cl->classid);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001448 cl->un.leaf.quantum = 200000;
1449 }
1450 if (hopt->quantum)
1451 cl->un.leaf.quantum = hopt->quantum;
1452 if ((cl->un.leaf.prio = hopt->prio) >= TC_HTB_NUMPRIO)
1453 cl->un.leaf.prio = TC_HTB_NUMPRIO - 1;
1454 }
1455
1456 cl->buffer = hopt->buffer;
1457 cl->cbuffer = hopt->cbuffer;
Stephen Hemminger87990462006-08-10 23:35:16 -07001458 if (cl->rate)
1459 qdisc_put_rtab(cl->rate);
1460 cl->rate = rtab;
1461 if (cl->ceil)
1462 qdisc_put_rtab(cl->ceil);
1463 cl->ceil = ctab;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001464 sch_tree_unlock(sch);
1465
1466 *arg = (unsigned long)cl;
1467 return 0;
1468
1469failure:
Stephen Hemminger87990462006-08-10 23:35:16 -07001470 if (rtab)
1471 qdisc_put_rtab(rtab);
1472 if (ctab)
1473 qdisc_put_rtab(ctab);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001474 return err;
1475}
1476
1477static struct tcf_proto **htb_find_tcf(struct Qdisc *sch, unsigned long arg)
1478{
1479 struct htb_sched *q = qdisc_priv(sch);
1480 struct htb_class *cl = (struct htb_class *)arg;
1481 struct tcf_proto **fl = cl ? &cl->filter_list : &q->filter_list;
Stephen Hemminger3bf72952006-08-10 23:31:08 -07001482
Linus Torvalds1da177e2005-04-16 15:20:36 -07001483 return fl;
1484}
1485
1486static unsigned long htb_bind_filter(struct Qdisc *sch, unsigned long parent,
Stephen Hemminger87990462006-08-10 23:35:16 -07001487 u32 classid)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001488{
1489 struct htb_sched *q = qdisc_priv(sch);
Stephen Hemminger87990462006-08-10 23:35:16 -07001490 struct htb_class *cl = htb_find(classid, sch);
Stephen Hemminger3bf72952006-08-10 23:31:08 -07001491
Linus Torvalds1da177e2005-04-16 15:20:36 -07001492 /*if (cl && !cl->level) return 0;
Stephen Hemminger87990462006-08-10 23:35:16 -07001493 The line above used to be there to prevent attaching filters to
1494 leaves. But at least tc_index filter uses this just to get class
1495 for other reasons so that we have to allow for it.
1496 ----
1497 19.6.2002 As Werner explained it is ok - bind filter is just
1498 another way to "lock" the class - unlike "get" this lock can
1499 be broken by class during destroy IIUC.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001500 */
Stephen Hemminger87990462006-08-10 23:35:16 -07001501 if (cl)
1502 cl->filter_cnt++;
1503 else
Linus Torvalds1da177e2005-04-16 15:20:36 -07001504 q->filter_cnt++;
1505 return (unsigned long)cl;
1506}
1507
1508static void htb_unbind_filter(struct Qdisc *sch, unsigned long arg)
1509{
1510 struct htb_sched *q = qdisc_priv(sch);
1511 struct htb_class *cl = (struct htb_class *)arg;
Stephen Hemminger3bf72952006-08-10 23:31:08 -07001512
Stephen Hemminger87990462006-08-10 23:35:16 -07001513 if (cl)
1514 cl->filter_cnt--;
1515 else
Linus Torvalds1da177e2005-04-16 15:20:36 -07001516 q->filter_cnt--;
1517}
1518
1519static void htb_walk(struct Qdisc *sch, struct qdisc_walker *arg)
1520{
1521 struct htb_sched *q = qdisc_priv(sch);
1522 int i;
1523
1524 if (arg->stop)
1525 return;
1526
1527 for (i = 0; i < HTB_HSIZE; i++) {
Stephen Hemminger0cef2962006-08-10 23:35:38 -07001528 struct hlist_node *p;
1529 struct htb_class *cl;
1530
1531 hlist_for_each_entry(cl, p, q->hash + i, hlist) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001532 if (arg->count < arg->skip) {
1533 arg->count++;
1534 continue;
1535 }
1536 if (arg->fn(sch, (unsigned long)cl, arg) < 0) {
1537 arg->stop = 1;
1538 return;
1539 }
1540 arg->count++;
1541 }
1542 }
1543}
1544
1545static struct Qdisc_class_ops htb_class_ops = {
1546 .graft = htb_graft,
1547 .leaf = htb_leaf,
1548 .get = htb_get,
1549 .put = htb_put,
1550 .change = htb_change_class,
1551 .delete = htb_delete,
1552 .walk = htb_walk,
1553 .tcf_chain = htb_find_tcf,
1554 .bind_tcf = htb_bind_filter,
1555 .unbind_tcf = htb_unbind_filter,
1556 .dump = htb_dump_class,
1557 .dump_stats = htb_dump_class_stats,
1558};
1559
1560static struct Qdisc_ops htb_qdisc_ops = {
1561 .next = NULL,
1562 .cl_ops = &htb_class_ops,
1563 .id = "htb",
1564 .priv_size = sizeof(struct htb_sched),
1565 .enqueue = htb_enqueue,
1566 .dequeue = htb_dequeue,
1567 .requeue = htb_requeue,
1568 .drop = htb_drop,
1569 .init = htb_init,
1570 .reset = htb_reset,
1571 .destroy = htb_destroy,
1572 .change = NULL /* htb_change */,
1573 .dump = htb_dump,
1574 .owner = THIS_MODULE,
1575};
1576
1577static int __init htb_module_init(void)
1578{
Stephen Hemminger87990462006-08-10 23:35:16 -07001579 return register_qdisc(&htb_qdisc_ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001580}
Stephen Hemminger87990462006-08-10 23:35:16 -07001581static void __exit htb_module_exit(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001582{
Stephen Hemminger87990462006-08-10 23:35:16 -07001583 unregister_qdisc(&htb_qdisc_ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001584}
Stephen Hemminger87990462006-08-10 23:35:16 -07001585
Linus Torvalds1da177e2005-04-16 15:20:36 -07001586module_init(htb_module_init)
1587module_exit(htb_module_exit)
1588MODULE_LICENSE("GPL");