blob: 6bee6d0c73a52e93e1413162b4db971340fa3312 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * ip_vs_est.c: simple rate estimator for IPVS
3 *
Linus Torvalds1da177e2005-04-16 15:20:36 -07004 * Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
Hans Schillstrom29c20262011-01-03 14:44:54 +010011 * Changes: Hans Schillstrom <hans.schillstrom@ericsson.com>
12 * Network name space (netns) aware.
13 * Global data moved to netns i.e struct netns_ipvs
14 * Affected data: est_list and est_lock.
15 * estimation_timer() runs with timer per netns.
16 * get_stats()) do the per cpu summing.
Linus Torvalds1da177e2005-04-16 15:20:36 -070017 */
Hannes Eder9aada7a2009-07-30 14:29:44 -070018
19#define KMSG_COMPONENT "IPVS"
20#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
21
Linus Torvalds1da177e2005-04-16 15:20:36 -070022#include <linux/kernel.h>
Arnaldo Carvalho de Melo14c85022005-12-27 02:43:12 -020023#include <linux/jiffies.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070024#include <linux/types.h>
Adrian Bunk4ffd2e42006-01-05 12:14:43 -080025#include <linux/interrupt.h>
Pavel Emelyanov90754f82008-01-12 02:33:50 -080026#include <linux/sysctl.h>
Sven Wegener3a14a3132008-08-10 18:24:41 +000027#include <linux/list.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070028
29#include <net/ip_vs.h>
30
31/*
32 This code is to estimate rate in a shorter interval (such as 8
33 seconds) for virtual services and real servers. For measure rate in a
34 long interval, it is easy to implement a user level daemon which
35 periodically reads those statistical counters and measure rate.
36
37 Currently, the measurement is activated by slow timer handler. Hope
38 this measurement will not introduce too much load.
39
40 We measure rate during the last 8 seconds every 2 seconds:
41
42 avgrate = avgrate*(1-W) + rate*W
43
44 where W = 2^(-2)
45
46 NOTES.
47
48 * The stored value for average bps is scaled by 2^5, so that maximal
49 rate is ~2.15Gbits/s, average pps and cps are scaled by 2^10.
50
51 * A lot code is taken from net/sched/estimator.c
52 */
53
54
Hans Schillstromb17fc992011-01-03 14:44:56 +010055/*
56 * Make a summary from each cpu
57 */
58static void ip_vs_read_cpu_stats(struct ip_vs_stats_user *sum,
Julian Anastasovb962abd2013-03-09 23:25:08 +020059 struct ip_vs_cpu_stats __percpu *stats)
Hans Schillstromb17fc992011-01-03 14:44:56 +010060{
61 int i;
62
63 for_each_possible_cpu(i) {
64 struct ip_vs_cpu_stats *s = per_cpu_ptr(stats, i);
65 unsigned int start;
66 __u64 inbytes, outbytes;
67 if (i) {
68 sum->conns += s->ustats.conns;
69 sum->inpkts += s->ustats.inpkts;
70 sum->outpkts += s->ustats.outpkts;
71 do {
Julian Anastasov4a569c02011-03-04 12:28:20 +020072 start = u64_stats_fetch_begin(&s->syncp);
Hans Schillstromb17fc992011-01-03 14:44:56 +010073 inbytes = s->ustats.inbytes;
74 outbytes = s->ustats.outbytes;
Julian Anastasov4a569c02011-03-04 12:28:20 +020075 } while (u64_stats_fetch_retry(&s->syncp, start));
Hans Schillstromb17fc992011-01-03 14:44:56 +010076 sum->inbytes += inbytes;
77 sum->outbytes += outbytes;
78 } else {
79 sum->conns = s->ustats.conns;
80 sum->inpkts = s->ustats.inpkts;
81 sum->outpkts = s->ustats.outpkts;
82 do {
Julian Anastasov4a569c02011-03-04 12:28:20 +020083 start = u64_stats_fetch_begin(&s->syncp);
Hans Schillstromb17fc992011-01-03 14:44:56 +010084 sum->inbytes = s->ustats.inbytes;
85 sum->outbytes = s->ustats.outbytes;
Julian Anastasov4a569c02011-03-04 12:28:20 +020086 } while (u64_stats_fetch_retry(&s->syncp, start));
Hans Schillstromb17fc992011-01-03 14:44:56 +010087 }
88 }
89}
90
91
Linus Torvalds1da177e2005-04-16 15:20:36 -070092static void estimation_timer(unsigned long arg)
93{
94 struct ip_vs_estimator *e;
95 struct ip_vs_stats *s;
96 u32 n_conns;
97 u32 n_inpkts, n_outpkts;
98 u64 n_inbytes, n_outbytes;
99 u32 rate;
Hans Schillstrom29c20262011-01-03 14:44:54 +0100100 struct net *net = (struct net *)arg;
101 struct netns_ipvs *ipvs;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700102
Hans Schillstrom29c20262011-01-03 14:44:54 +0100103 ipvs = net_ipvs(net);
104 spin_lock(&ipvs->est_lock);
105 list_for_each_entry(e, &ipvs->est_list, list) {
Sven Wegener3a14a3132008-08-10 18:24:41 +0000106 s = container_of(e, struct ip_vs_stats, est);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700107
108 spin_lock(&s->lock);
Julian Anastasov2a0751a2011-03-04 12:20:35 +0200109 ip_vs_read_cpu_stats(&s->ustats, s->cpustats);
Sven Wegenere9c0ce22008-09-08 13:39:04 +0200110 n_conns = s->ustats.conns;
111 n_inpkts = s->ustats.inpkts;
112 n_outpkts = s->ustats.outpkts;
113 n_inbytes = s->ustats.inbytes;
114 n_outbytes = s->ustats.outbytes;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700115
116 /* scaled by 2^10, but divided 2 seconds */
Hans Schillstrom29c20262011-01-03 14:44:54 +0100117 rate = (n_conns - e->last_conns) << 9;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700118 e->last_conns = n_conns;
Hans Schillstrom29c20262011-01-03 14:44:54 +0100119 e->cps += ((long)rate - (long)e->cps) >> 2;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700120
Hans Schillstrom29c20262011-01-03 14:44:54 +0100121 rate = (n_inpkts - e->last_inpkts) << 9;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700122 e->last_inpkts = n_inpkts;
Hans Schillstrom29c20262011-01-03 14:44:54 +0100123 e->inpps += ((long)rate - (long)e->inpps) >> 2;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700124
Hans Schillstrom29c20262011-01-03 14:44:54 +0100125 rate = (n_outpkts - e->last_outpkts) << 9;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700126 e->last_outpkts = n_outpkts;
Hans Schillstrom29c20262011-01-03 14:44:54 +0100127 e->outpps += ((long)rate - (long)e->outpps) >> 2;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700128
Hans Schillstrom29c20262011-01-03 14:44:54 +0100129 rate = (n_inbytes - e->last_inbytes) << 4;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700130 e->last_inbytes = n_inbytes;
Hans Schillstrom29c20262011-01-03 14:44:54 +0100131 e->inbps += ((long)rate - (long)e->inbps) >> 2;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700132
Hans Schillstrom29c20262011-01-03 14:44:54 +0100133 rate = (n_outbytes - e->last_outbytes) << 4;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700134 e->last_outbytes = n_outbytes;
Hans Schillstrom29c20262011-01-03 14:44:54 +0100135 e->outbps += ((long)rate - (long)e->outbps) >> 2;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700136 spin_unlock(&s->lock);
137 }
Hans Schillstrom29c20262011-01-03 14:44:54 +0100138 spin_unlock(&ipvs->est_lock);
139 mod_timer(&ipvs->est_timer, jiffies + 2*HZ);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700140}
141
Julian Anastasov6ef757f2011-03-14 01:44:28 +0200142void ip_vs_start_estimator(struct net *net, struct ip_vs_stats *stats)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700143{
Hans Schillstrom29c20262011-01-03 14:44:54 +0100144 struct netns_ipvs *ipvs = net_ipvs(net);
Sven Wegener3a14a3132008-08-10 18:24:41 +0000145 struct ip_vs_estimator *est = &stats->est;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700146
Sven Wegener3a14a3132008-08-10 18:24:41 +0000147 INIT_LIST_HEAD(&est->list);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700148
Hans Schillstrom29c20262011-01-03 14:44:54 +0100149 spin_lock_bh(&ipvs->est_lock);
150 list_add(&est->list, &ipvs->est_list);
151 spin_unlock_bh(&ipvs->est_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700152}
153
Julian Anastasov6ef757f2011-03-14 01:44:28 +0200154void ip_vs_stop_estimator(struct net *net, struct ip_vs_stats *stats)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700155{
Hans Schillstrom29c20262011-01-03 14:44:54 +0100156 struct netns_ipvs *ipvs = net_ipvs(net);
Sven Wegener3a14a3132008-08-10 18:24:41 +0000157 struct ip_vs_estimator *est = &stats->est;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700158
Hans Schillstrom29c20262011-01-03 14:44:54 +0100159 spin_lock_bh(&ipvs->est_lock);
Sven Wegener3a14a3132008-08-10 18:24:41 +0000160 list_del(&est->list);
Hans Schillstrom29c20262011-01-03 14:44:54 +0100161 spin_unlock_bh(&ipvs->est_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700162}
163
164void ip_vs_zero_estimator(struct ip_vs_stats *stats)
165{
Sven Wegener3a14a3132008-08-10 18:24:41 +0000166 struct ip_vs_estimator *est = &stats->est;
Julian Anastasov55a3d4e2011-03-14 01:37:49 +0200167 struct ip_vs_stats_user *u = &stats->ustats;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700168
Julian Anastasov55a3d4e2011-03-14 01:37:49 +0200169 /* reset counters, caller must hold the stats->lock lock */
170 est->last_inbytes = u->inbytes;
171 est->last_outbytes = u->outbytes;
172 est->last_conns = u->conns;
173 est->last_inpkts = u->inpkts;
174 est->last_outpkts = u->outpkts;
Sven Wegener3a14a3132008-08-10 18:24:41 +0000175 est->cps = 0;
176 est->inpps = 0;
177 est->outpps = 0;
178 est->inbps = 0;
179 est->outbps = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700180}
Sven Wegenera919cf42008-08-14 00:47:16 +0200181
Julian Anastasovea9f22c2011-03-14 01:41:54 +0200182/* Get decoded rates */
183void ip_vs_read_estimator(struct ip_vs_stats_user *dst,
184 struct ip_vs_stats *stats)
185{
186 struct ip_vs_estimator *e = &stats->est;
187
188 dst->cps = (e->cps + 0x1FF) >> 10;
189 dst->inpps = (e->inpps + 0x1FF) >> 10;
190 dst->outpps = (e->outpps + 0x1FF) >> 10;
191 dst->inbps = (e->inbps + 0xF) >> 5;
192 dst->outbps = (e->outbps + 0xF) >> 5;
193}
194
Hans Schillstrom503cf152011-05-01 18:50:16 +0200195int __net_init ip_vs_estimator_net_init(struct net *net)
Hans Schillstrom61b1ab42011-01-03 14:44:42 +0100196{
Hans Schillstrom29c20262011-01-03 14:44:54 +0100197 struct netns_ipvs *ipvs = net_ipvs(net);
198
Hans Schillstrom29c20262011-01-03 14:44:54 +0100199 INIT_LIST_HEAD(&ipvs->est_list);
200 spin_lock_init(&ipvs->est_lock);
201 setup_timer(&ipvs->est_timer, estimation_timer, (unsigned long)net);
202 mod_timer(&ipvs->est_timer, jiffies + 2 * HZ);
Hans Schillstrom61b1ab42011-01-03 14:44:42 +0100203 return 0;
204}
205
Hans Schillstrom503cf152011-05-01 18:50:16 +0200206void __net_exit ip_vs_estimator_net_cleanup(struct net *net)
Hans Schillstrom29c20262011-01-03 14:44:54 +0100207{
208 del_timer_sync(&net_ipvs(net)->est_timer);
209}