blob: b3751cfede2cbf2d39168004a2c1d063751720c2 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * ip_vs_est.c: simple rate estimator for IPVS
3 *
Linus Torvalds1da177e2005-04-16 15:20:36 -07004 * Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
Hans Schillstrom29c20262011-01-03 14:44:54 +010011 * Changes: Hans Schillstrom <hans.schillstrom@ericsson.com>
12 * Network name space (netns) aware.
13 * Global data moved to netns i.e struct netns_ipvs
14 * Affected data: est_list and est_lock.
15 * estimation_timer() runs with timer per netns.
16 * get_stats()) do the per cpu summing.
Linus Torvalds1da177e2005-04-16 15:20:36 -070017 */
Hannes Eder9aada7a2009-07-30 14:29:44 -070018
19#define KMSG_COMPONENT "IPVS"
20#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
21
Linus Torvalds1da177e2005-04-16 15:20:36 -070022#include <linux/kernel.h>
Arnaldo Carvalho de Melo14c85022005-12-27 02:43:12 -020023#include <linux/jiffies.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070024#include <linux/types.h>
Adrian Bunk4ffd2e42006-01-05 12:14:43 -080025#include <linux/interrupt.h>
Pavel Emelyanov90754f82008-01-12 02:33:50 -080026#include <linux/sysctl.h>
Sven Wegener3a14a3132008-08-10 18:24:41 +000027#include <linux/list.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070028
29#include <net/ip_vs.h>
30
31/*
32 This code is to estimate rate in a shorter interval (such as 8
33 seconds) for virtual services and real servers. For measure rate in a
34 long interval, it is easy to implement a user level daemon which
35 periodically reads those statistical counters and measure rate.
36
37 Currently, the measurement is activated by slow timer handler. Hope
38 this measurement will not introduce too much load.
39
40 We measure rate during the last 8 seconds every 2 seconds:
41
42 avgrate = avgrate*(1-W) + rate*W
43
44 where W = 2^(-2)
45
46 NOTES.
47
48 * The stored value for average bps is scaled by 2^5, so that maximal
49 rate is ~2.15Gbits/s, average pps and cps are scaled by 2^10.
50
51 * A lot code is taken from net/sched/estimator.c
52 */
53
54
Hans Schillstromb17fc992011-01-03 14:44:56 +010055/*
56 * Make a summary from each cpu
57 */
58static void ip_vs_read_cpu_stats(struct ip_vs_stats_user *sum,
59 struct ip_vs_cpu_stats *stats)
60{
61 int i;
62
63 for_each_possible_cpu(i) {
64 struct ip_vs_cpu_stats *s = per_cpu_ptr(stats, i);
65 unsigned int start;
66 __u64 inbytes, outbytes;
67 if (i) {
68 sum->conns += s->ustats.conns;
69 sum->inpkts += s->ustats.inpkts;
70 sum->outpkts += s->ustats.outpkts;
71 do {
Julian Anastasov4a569c02011-03-04 12:28:20 +020072 start = u64_stats_fetch_begin(&s->syncp);
Hans Schillstromb17fc992011-01-03 14:44:56 +010073 inbytes = s->ustats.inbytes;
74 outbytes = s->ustats.outbytes;
Julian Anastasov4a569c02011-03-04 12:28:20 +020075 } while (u64_stats_fetch_retry(&s->syncp, start));
Hans Schillstromb17fc992011-01-03 14:44:56 +010076 sum->inbytes += inbytes;
77 sum->outbytes += outbytes;
78 } else {
79 sum->conns = s->ustats.conns;
80 sum->inpkts = s->ustats.inpkts;
81 sum->outpkts = s->ustats.outpkts;
82 do {
Julian Anastasov4a569c02011-03-04 12:28:20 +020083 start = u64_stats_fetch_begin(&s->syncp);
Hans Schillstromb17fc992011-01-03 14:44:56 +010084 sum->inbytes = s->ustats.inbytes;
85 sum->outbytes = s->ustats.outbytes;
Julian Anastasov4a569c02011-03-04 12:28:20 +020086 } while (u64_stats_fetch_retry(&s->syncp, start));
Hans Schillstromb17fc992011-01-03 14:44:56 +010087 }
88 }
89}
90
91
Linus Torvalds1da177e2005-04-16 15:20:36 -070092static void estimation_timer(unsigned long arg)
93{
94 struct ip_vs_estimator *e;
95 struct ip_vs_stats *s;
96 u32 n_conns;
97 u32 n_inpkts, n_outpkts;
98 u64 n_inbytes, n_outbytes;
99 u32 rate;
Hans Schillstrom29c20262011-01-03 14:44:54 +0100100 struct net *net = (struct net *)arg;
101 struct netns_ipvs *ipvs;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700102
Hans Schillstrom29c20262011-01-03 14:44:54 +0100103 ipvs = net_ipvs(net);
104 spin_lock(&ipvs->est_lock);
105 list_for_each_entry(e, &ipvs->est_list, list) {
Sven Wegener3a14a3132008-08-10 18:24:41 +0000106 s = container_of(e, struct ip_vs_stats, est);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700107
108 spin_lock(&s->lock);
Julian Anastasov2a0751a2011-03-04 12:20:35 +0200109 ip_vs_read_cpu_stats(&s->ustats, s->cpustats);
Sven Wegenere9c0ce22008-09-08 13:39:04 +0200110 n_conns = s->ustats.conns;
111 n_inpkts = s->ustats.inpkts;
112 n_outpkts = s->ustats.outpkts;
113 n_inbytes = s->ustats.inbytes;
114 n_outbytes = s->ustats.outbytes;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700115
116 /* scaled by 2^10, but divided 2 seconds */
Hans Schillstrom29c20262011-01-03 14:44:54 +0100117 rate = (n_conns - e->last_conns) << 9;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700118 e->last_conns = n_conns;
Hans Schillstrom29c20262011-01-03 14:44:54 +0100119 e->cps += ((long)rate - (long)e->cps) >> 2;
120 s->ustats.cps = (e->cps + 0x1FF) >> 10;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700121
Hans Schillstrom29c20262011-01-03 14:44:54 +0100122 rate = (n_inpkts - e->last_inpkts) << 9;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700123 e->last_inpkts = n_inpkts;
Hans Schillstrom29c20262011-01-03 14:44:54 +0100124 e->inpps += ((long)rate - (long)e->inpps) >> 2;
125 s->ustats.inpps = (e->inpps + 0x1FF) >> 10;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700126
Hans Schillstrom29c20262011-01-03 14:44:54 +0100127 rate = (n_outpkts - e->last_outpkts) << 9;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700128 e->last_outpkts = n_outpkts;
Hans Schillstrom29c20262011-01-03 14:44:54 +0100129 e->outpps += ((long)rate - (long)e->outpps) >> 2;
130 s->ustats.outpps = (e->outpps + 0x1FF) >> 10;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700131
Hans Schillstrom29c20262011-01-03 14:44:54 +0100132 rate = (n_inbytes - e->last_inbytes) << 4;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700133 e->last_inbytes = n_inbytes;
Hans Schillstrom29c20262011-01-03 14:44:54 +0100134 e->inbps += ((long)rate - (long)e->inbps) >> 2;
135 s->ustats.inbps = (e->inbps + 0xF) >> 5;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700136
Hans Schillstrom29c20262011-01-03 14:44:54 +0100137 rate = (n_outbytes - e->last_outbytes) << 4;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700138 e->last_outbytes = n_outbytes;
Hans Schillstrom29c20262011-01-03 14:44:54 +0100139 e->outbps += ((long)rate - (long)e->outbps) >> 2;
140 s->ustats.outbps = (e->outbps + 0xF) >> 5;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700141 spin_unlock(&s->lock);
142 }
Hans Schillstrom29c20262011-01-03 14:44:54 +0100143 spin_unlock(&ipvs->est_lock);
144 mod_timer(&ipvs->est_timer, jiffies + 2*HZ);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700145}
146
Hans Schillstrom29c20262011-01-03 14:44:54 +0100147void ip_vs_new_estimator(struct net *net, struct ip_vs_stats *stats)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700148{
Hans Schillstrom29c20262011-01-03 14:44:54 +0100149 struct netns_ipvs *ipvs = net_ipvs(net);
Sven Wegener3a14a3132008-08-10 18:24:41 +0000150 struct ip_vs_estimator *est = &stats->est;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700151
Sven Wegener3a14a3132008-08-10 18:24:41 +0000152 INIT_LIST_HEAD(&est->list);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700153
Sven Wegenere9c0ce22008-09-08 13:39:04 +0200154 est->last_conns = stats->ustats.conns;
155 est->cps = stats->ustats.cps<<10;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700156
Sven Wegenere9c0ce22008-09-08 13:39:04 +0200157 est->last_inpkts = stats->ustats.inpkts;
158 est->inpps = stats->ustats.inpps<<10;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700159
Sven Wegenere9c0ce22008-09-08 13:39:04 +0200160 est->last_outpkts = stats->ustats.outpkts;
161 est->outpps = stats->ustats.outpps<<10;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700162
Sven Wegenere9c0ce22008-09-08 13:39:04 +0200163 est->last_inbytes = stats->ustats.inbytes;
164 est->inbps = stats->ustats.inbps<<5;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700165
Sven Wegenere9c0ce22008-09-08 13:39:04 +0200166 est->last_outbytes = stats->ustats.outbytes;
167 est->outbps = stats->ustats.outbps<<5;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700168
Hans Schillstrom29c20262011-01-03 14:44:54 +0100169 spin_lock_bh(&ipvs->est_lock);
170 list_add(&est->list, &ipvs->est_list);
171 spin_unlock_bh(&ipvs->est_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700172}
173
Hans Schillstrom29c20262011-01-03 14:44:54 +0100174void ip_vs_kill_estimator(struct net *net, struct ip_vs_stats *stats)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700175{
Hans Schillstrom29c20262011-01-03 14:44:54 +0100176 struct netns_ipvs *ipvs = net_ipvs(net);
Sven Wegener3a14a3132008-08-10 18:24:41 +0000177 struct ip_vs_estimator *est = &stats->est;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700178
Hans Schillstrom29c20262011-01-03 14:44:54 +0100179 spin_lock_bh(&ipvs->est_lock);
Sven Wegener3a14a3132008-08-10 18:24:41 +0000180 list_del(&est->list);
Hans Schillstrom29c20262011-01-03 14:44:54 +0100181 spin_unlock_bh(&ipvs->est_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700182}
183
184void ip_vs_zero_estimator(struct ip_vs_stats *stats)
185{
Sven Wegener3a14a3132008-08-10 18:24:41 +0000186 struct ip_vs_estimator *est = &stats->est;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700187
Sven Wegener3a14a3132008-08-10 18:24:41 +0000188 /* set counters zero, caller must hold the stats->lock lock */
189 est->last_inbytes = 0;
190 est->last_outbytes = 0;
191 est->last_conns = 0;
192 est->last_inpkts = 0;
193 est->last_outpkts = 0;
194 est->cps = 0;
195 est->inpps = 0;
196 est->outpps = 0;
197 est->inbps = 0;
198 est->outbps = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700199}
Sven Wegenera919cf42008-08-14 00:47:16 +0200200
Hans Schillstrom61b1ab42011-01-03 14:44:42 +0100201static int __net_init __ip_vs_estimator_init(struct net *net)
202{
Hans Schillstrom29c20262011-01-03 14:44:54 +0100203 struct netns_ipvs *ipvs = net_ipvs(net);
204
Hans Schillstrom29c20262011-01-03 14:44:54 +0100205 INIT_LIST_HEAD(&ipvs->est_list);
206 spin_lock_init(&ipvs->est_lock);
207 setup_timer(&ipvs->est_timer, estimation_timer, (unsigned long)net);
208 mod_timer(&ipvs->est_timer, jiffies + 2 * HZ);
Hans Schillstrom61b1ab42011-01-03 14:44:42 +0100209 return 0;
210}
211
Hans Schillstrom29c20262011-01-03 14:44:54 +0100212static void __net_exit __ip_vs_estimator_exit(struct net *net)
213{
214 del_timer_sync(&net_ipvs(net)->est_timer);
215}
Hans Schillstrom61b1ab42011-01-03 14:44:42 +0100216static struct pernet_operations ip_vs_app_ops = {
217 .init = __ip_vs_estimator_init,
Hans Schillstrom29c20262011-01-03 14:44:54 +0100218 .exit = __ip_vs_estimator_exit,
Hans Schillstrom61b1ab42011-01-03 14:44:42 +0100219};
220
Sven Wegenera919cf42008-08-14 00:47:16 +0200221int __init ip_vs_estimator_init(void)
222{
Hans Schillstrom61b1ab42011-01-03 14:44:42 +0100223 int rv;
224
225 rv = register_pernet_subsys(&ip_vs_app_ops);
Hans Schillstrom61b1ab42011-01-03 14:44:42 +0100226 return rv;
Sven Wegenera919cf42008-08-14 00:47:16 +0200227}
228
229void ip_vs_estimator_cleanup(void)
230{
Hans Schillstrom61b1ab42011-01-03 14:44:42 +0100231 unregister_pernet_subsys(&ip_vs_app_ops);
Sven Wegenera919cf42008-08-14 00:47:16 +0200232}