blob: cf8e70392fe0938acb4b8b88fa8bc74dbeaed584 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * net/sched/gen_estimator.c Simple rate estimator.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 *
9 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
10 *
11 * Changes:
12 * Jamal Hadi Salim - moved it to net/core and reshulfed
13 * names to make it usable in general net subsystem.
14 */
15
16#include <asm/uaccess.h>
17#include <asm/system.h>
Jiri Slaby1977f032007-10-18 23:40:25 -070018#include <linux/bitops.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070019#include <linux/module.h>
20#include <linux/types.h>
21#include <linux/kernel.h>
22#include <linux/jiffies.h>
23#include <linux/string.h>
24#include <linux/mm.h>
25#include <linux/socket.h>
26#include <linux/sockios.h>
27#include <linux/in.h>
28#include <linux/errno.h>
29#include <linux/interrupt.h>
30#include <linux/netdevice.h>
31#include <linux/skbuff.h>
32#include <linux/rtnetlink.h>
33#include <linux/init.h>
Jarek Poplawski4db0acf2008-11-24 15:48:05 -080034#include <linux/rbtree.h>
Tejun Heo5a0e3ad2010-03-24 17:04:11 +090035#include <linux/slab.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070036#include <net/sock.h>
37#include <net/gen_stats.h>
38
39/*
40 This code is NOT intended to be used for statistics collection,
41 its purpose is to provide a base for statistical multiplexing
42 for controlled load service.
43 If you need only statistics, run a user level daemon which
44 periodically reads byte counters.
45
46 Unfortunately, rate estimation is not a very easy task.
47 F.e. I did not find a simple way to estimate the current peak rate
48 and even failed to formulate the problem 8)8)
49
50 So I preferred not to built an estimator into the scheduler,
51 but run this task separately.
52 Ideally, it should be kernel thread(s), but for now it runs
53 from timers, which puts apparent top bounds on the number of rated
54 flows, has minimal overhead on small, but is enough
55 to handle controlled load service, sets of aggregates.
56
57 We measure rate over A=(1<<interval) seconds and evaluate EWMA:
58
59 avrate = avrate*(1-W) + rate*W
60
61 where W is chosen as negative power of 2: W = 2^(-ewma_log)
62
63 The resulting time constant is:
64
65 T = A/(-ln(1-W))
66
67
68 NOTES.
69
Eric Dumazet511e11e2009-05-18 19:26:37 -070070 * avbps is scaled by 2^5, avpps is scaled by 2^10.
71 * both values are reported as 32 bit unsigned values. bps can
72 overflow for fast links : max speed being 34360Mbit/sec
Linus Torvalds1da177e2005-04-16 15:20:36 -070073 * Minimal interval is HZ/4=250msec (it is the greatest common divisor
74 for HZ=100 and HZ=1024 8)), maximal interval
75 is (HZ*2^EST_MAX_INTERVAL)/4 = 8sec. Shorter intervals
76 are too expensive, longer ones can be implemented
77 at user level painlessly.
78 */
79
80#define EST_MAX_INTERVAL 5
81
82struct gen_estimator
83{
Ranko Zivojnovic0929c2d2007-07-16 18:28:32 -070084 struct list_head list;
Eric Dumazetc1a8f1f2009-08-16 09:36:49 +000085 struct gnet_stats_basic_packed *bstats;
Linus Torvalds1da177e2005-04-16 15:20:36 -070086 struct gnet_stats_rate_est *rate_est;
87 spinlock_t *stats_lock;
Linus Torvalds1da177e2005-04-16 15:20:36 -070088 int ewma_log;
89 u64 last_bytes;
Eric Dumazet511e11e2009-05-18 19:26:37 -070090 u64 avbps;
Linus Torvalds1da177e2005-04-16 15:20:36 -070091 u32 last_packets;
92 u32 avpps;
Ranko Zivojnovic0929c2d2007-07-16 18:28:32 -070093 struct rcu_head e_rcu;
Jarek Poplawski4db0acf2008-11-24 15:48:05 -080094 struct rb_node node;
Linus Torvalds1da177e2005-04-16 15:20:36 -070095};
96
97struct gen_estimator_head
98{
99 struct timer_list timer;
Ranko Zivojnovic0929c2d2007-07-16 18:28:32 -0700100 struct list_head list;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700101};
102
103static struct gen_estimator_head elist[EST_MAX_INTERVAL+1];
104
David S. Millerdeb3abf2008-08-18 22:32:10 -0700105/* Protects against NULL dereference */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700106static DEFINE_RWLOCK(est_lock);
107
Jarek Poplawski4db0acf2008-11-24 15:48:05 -0800108/* Protects against soft lockup during large deletion */
109static struct rb_root est_root = RB_ROOT;
110
Linus Torvalds1da177e2005-04-16 15:20:36 -0700111static void est_timer(unsigned long arg)
112{
113 int idx = (int)arg;
114 struct gen_estimator *e;
115
Ranko Zivojnovic0929c2d2007-07-16 18:28:32 -0700116 rcu_read_lock();
117 list_for_each_entry_rcu(e, &elist[idx].list, list) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700118 u64 nbytes;
Eric Dumazet511e11e2009-05-18 19:26:37 -0700119 u64 brate;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700120 u32 npackets;
121 u32 rate;
122
123 spin_lock(e->stats_lock);
Ranko Zivojnovic0929c2d2007-07-16 18:28:32 -0700124 read_lock(&est_lock);
125 if (e->bstats == NULL)
126 goto skip;
127
Linus Torvalds1da177e2005-04-16 15:20:36 -0700128 nbytes = e->bstats->bytes;
129 npackets = e->bstats->packets;
Eric Dumazet511e11e2009-05-18 19:26:37 -0700130 brate = (nbytes - e->last_bytes)<<(7 - idx);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700131 e->last_bytes = nbytes;
Jarek Poplawskia1dcb662009-05-25 22:47:01 -0700132 e->avbps += (brate >> e->ewma_log) - (e->avbps >> e->ewma_log);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700133 e->rate_est->bps = (e->avbps+0xF)>>5;
134
135 rate = (npackets - e->last_packets)<<(12 - idx);
136 e->last_packets = npackets;
Jarek Poplawskia1dcb662009-05-25 22:47:01 -0700137 e->avpps += (rate >> e->ewma_log) - (e->avpps >> e->ewma_log);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700138 e->rate_est->pps = (e->avpps+0x1FF)>>10;
Ranko Zivojnovic0929c2d2007-07-16 18:28:32 -0700139skip:
140 read_unlock(&est_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700141 spin_unlock(e->stats_lock);
142 }
143
Ranko Zivojnovic0929c2d2007-07-16 18:28:32 -0700144 if (!list_empty(&elist[idx].list))
Eric Dumazet789675e2008-01-03 20:40:01 -0800145 mod_timer(&elist[idx].timer, jiffies + ((HZ/4) << idx));
Ranko Zivojnovic0929c2d2007-07-16 18:28:32 -0700146 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700147}
148
Jarek Poplawski4db0acf2008-11-24 15:48:05 -0800149static void gen_add_node(struct gen_estimator *est)
150{
151 struct rb_node **p = &est_root.rb_node, *parent = NULL;
152
153 while (*p) {
154 struct gen_estimator *e;
155
156 parent = *p;
157 e = rb_entry(parent, struct gen_estimator, node);
158
159 if (est->bstats > e->bstats)
160 p = &parent->rb_right;
161 else
162 p = &parent->rb_left;
163 }
164 rb_link_node(&est->node, parent, p);
165 rb_insert_color(&est->node, &est_root);
166}
167
Jarek Poplawski244e6c22008-11-26 15:24:32 -0800168static
Eric Dumazetc1a8f1f2009-08-16 09:36:49 +0000169struct gen_estimator *gen_find_node(const struct gnet_stats_basic_packed *bstats,
Jarek Poplawski244e6c22008-11-26 15:24:32 -0800170 const struct gnet_stats_rate_est *rate_est)
Jarek Poplawski4db0acf2008-11-24 15:48:05 -0800171{
172 struct rb_node *p = est_root.rb_node;
173
174 while (p) {
175 struct gen_estimator *e;
176
177 e = rb_entry(p, struct gen_estimator, node);
178
179 if (bstats > e->bstats)
180 p = p->rb_right;
181 else if (bstats < e->bstats || rate_est != e->rate_est)
182 p = p->rb_left;
183 else
184 return e;
185 }
186 return NULL;
187}
188
Linus Torvalds1da177e2005-04-16 15:20:36 -0700189/**
190 * gen_new_estimator - create a new rate estimator
191 * @bstats: basic statistics
192 * @rate_est: rate estimator statistics
193 * @stats_lock: statistics lock
194 * @opt: rate estimator configuration TLV
195 *
196 * Creates a new rate estimator with &bstats as source and &rate_est
197 * as destination. A new timer with the interval specified in the
198 * configuration TLV is created. Upon each interval, the latest statistics
199 * will be read from &bstats and the estimated rate will be stored in
200 * &rate_est with the statistics lock grabed during this period.
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +0900201 *
Linus Torvalds1da177e2005-04-16 15:20:36 -0700202 * Returns 0 on success or a negative error code.
Ranko Zivojnovic0929c2d2007-07-16 18:28:32 -0700203 *
204 * NOTE: Called under rtnl_mutex
Linus Torvalds1da177e2005-04-16 15:20:36 -0700205 */
Eric Dumazetc1a8f1f2009-08-16 09:36:49 +0000206int gen_new_estimator(struct gnet_stats_basic_packed *bstats,
Ranko Zivojnovic0929c2d2007-07-16 18:28:32 -0700207 struct gnet_stats_rate_est *rate_est,
208 spinlock_t *stats_lock,
Patrick McHardy1e904742008-01-22 22:11:17 -0800209 struct nlattr *opt)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700210{
211 struct gen_estimator *est;
Patrick McHardy1e904742008-01-22 22:11:17 -0800212 struct gnet_estimator *parm = nla_data(opt);
Ranko Zivojnovic0929c2d2007-07-16 18:28:32 -0700213 int idx;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700214
Patrick McHardy1e904742008-01-22 22:11:17 -0800215 if (nla_len(opt) < sizeof(*parm))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700216 return -EINVAL;
217
218 if (parm->interval < -2 || parm->interval > 3)
219 return -EINVAL;
220
Andrew Morton77d04bd2006-04-07 14:52:59 -0700221 est = kzalloc(sizeof(*est), GFP_KERNEL);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700222 if (est == NULL)
223 return -ENOBUFS;
224
Ranko Zivojnovic0929c2d2007-07-16 18:28:32 -0700225 idx = parm->interval + 2;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700226 est->bstats = bstats;
227 est->rate_est = rate_est;
228 est->stats_lock = stats_lock;
229 est->ewma_log = parm->ewma_log;
230 est->last_bytes = bstats->bytes;
231 est->avbps = rate_est->bps<<5;
232 est->last_packets = bstats->packets;
233 est->avpps = rate_est->pps<<10;
234
Ranko Zivojnovic0929c2d2007-07-16 18:28:32 -0700235 if (!elist[idx].timer.function) {
236 INIT_LIST_HEAD(&elist[idx].list);
237 setup_timer(&elist[idx].timer, est_timer, idx);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700238 }
Ranko Zivojnovic0929c2d2007-07-16 18:28:32 -0700239
240 if (list_empty(&elist[idx].list))
Eric Dumazet789675e2008-01-03 20:40:01 -0800241 mod_timer(&elist[idx].timer, jiffies + ((HZ/4) << idx));
Ranko Zivojnovic0929c2d2007-07-16 18:28:32 -0700242
243 list_add_rcu(&est->list, &elist[idx].list);
Jarek Poplawski4db0acf2008-11-24 15:48:05 -0800244 gen_add_node(est);
245
Linus Torvalds1da177e2005-04-16 15:20:36 -0700246 return 0;
247}
Stephen Hemmingerc1b56872008-11-25 21:14:06 -0800248EXPORT_SYMBOL(gen_new_estimator);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700249
Ranko Zivojnovic0929c2d2007-07-16 18:28:32 -0700250static void __gen_kill_estimator(struct rcu_head *head)
251{
252 struct gen_estimator *e = container_of(head,
253 struct gen_estimator, e_rcu);
254 kfree(e);
255}
256
Linus Torvalds1da177e2005-04-16 15:20:36 -0700257/**
258 * gen_kill_estimator - remove a rate estimator
259 * @bstats: basic statistics
260 * @rate_est: rate estimator statistics
261 *
Jarek Poplawski4db0acf2008-11-24 15:48:05 -0800262 * Removes the rate estimator specified by &bstats and &rate_est.
Ranko Zivojnovic0929c2d2007-07-16 18:28:32 -0700263 *
David S. Millerdeb3abf2008-08-18 22:32:10 -0700264 * NOTE: Called under rtnl_mutex
Linus Torvalds1da177e2005-04-16 15:20:36 -0700265 */
Eric Dumazetc1a8f1f2009-08-16 09:36:49 +0000266void gen_kill_estimator(struct gnet_stats_basic_packed *bstats,
Jarek Poplawski4db0acf2008-11-24 15:48:05 -0800267 struct gnet_stats_rate_est *rate_est)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700268{
Jarek Poplawski4db0acf2008-11-24 15:48:05 -0800269 struct gen_estimator *e;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700270
Jarek Poplawski4db0acf2008-11-24 15:48:05 -0800271 while ((e = gen_find_node(bstats, rate_est))) {
272 rb_erase(&e->node, &est_root);
Ranko Zivojnovic0929c2d2007-07-16 18:28:32 -0700273
Jarek Poplawski4db0acf2008-11-24 15:48:05 -0800274 write_lock_bh(&est_lock);
275 e->bstats = NULL;
276 write_unlock_bh(&est_lock);
Ranko Zivojnovic0929c2d2007-07-16 18:28:32 -0700277
Jarek Poplawski4db0acf2008-11-24 15:48:05 -0800278 list_del_rcu(&e->list);
279 call_rcu(&e->e_rcu, __gen_kill_estimator);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700280 }
281}
Stephen Hemmingerc1b56872008-11-25 21:14:06 -0800282EXPORT_SYMBOL(gen_kill_estimator);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700283
284/**
Jarek Poplawski96750162008-01-21 02:36:02 -0800285 * gen_replace_estimator - replace rate estimator configuration
Linus Torvalds1da177e2005-04-16 15:20:36 -0700286 * @bstats: basic statistics
287 * @rate_est: rate estimator statistics
288 * @stats_lock: statistics lock
289 * @opt: rate estimator configuration TLV
290 *
291 * Replaces the configuration of a rate estimator by calling
292 * gen_kill_estimator() and gen_new_estimator().
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +0900293 *
Linus Torvalds1da177e2005-04-16 15:20:36 -0700294 * Returns 0 on success or a negative error code.
295 */
Eric Dumazetc1a8f1f2009-08-16 09:36:49 +0000296int gen_replace_estimator(struct gnet_stats_basic_packed *bstats,
Jarek Poplawski96750162008-01-21 02:36:02 -0800297 struct gnet_stats_rate_est *rate_est,
Patrick McHardy1e904742008-01-22 22:11:17 -0800298 spinlock_t *stats_lock, struct nlattr *opt)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700299{
Jarek Poplawski96750162008-01-21 02:36:02 -0800300 gen_kill_estimator(bstats, rate_est);
301 return gen_new_estimator(bstats, rate_est, stats_lock, opt);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700302}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700303EXPORT_SYMBOL(gen_replace_estimator);
Stephen Hemmingerc1b56872008-11-25 21:14:06 -0800304
305/**
306 * gen_estimator_active - test if estimator is currently in use
Jarek Poplawski244e6c22008-11-26 15:24:32 -0800307 * @bstats: basic statistics
Stephen Hemmingerc1b56872008-11-25 21:14:06 -0800308 * @rate_est: rate estimator statistics
309 *
Jarek Poplawski244e6c22008-11-26 15:24:32 -0800310 * Returns true if estimator is active, and false if not.
Stephen Hemmingerc1b56872008-11-25 21:14:06 -0800311 */
Eric Dumazetc1a8f1f2009-08-16 09:36:49 +0000312bool gen_estimator_active(const struct gnet_stats_basic_packed *bstats,
Jarek Poplawski244e6c22008-11-26 15:24:32 -0800313 const struct gnet_stats_rate_est *rate_est)
Stephen Hemmingerc1b56872008-11-25 21:14:06 -0800314{
Stephen Hemmingerc1b56872008-11-25 21:14:06 -0800315 ASSERT_RTNL();
316
Jarek Poplawski244e6c22008-11-26 15:24:32 -0800317 return gen_find_node(bstats, rate_est) != NULL;
Stephen Hemmingerc1b56872008-11-25 21:14:06 -0800318}
319EXPORT_SYMBOL(gen_estimator_active);