blob: d13616b138cdac6a5ac54bb60fffbb7c20f3652d [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * ip_vs_est.c: simple rate estimator for IPVS
3 *
Linus Torvalds1da177e2005-04-16 15:20:36 -07004 * Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
Hans Schillstrom29c20262011-01-03 14:44:54 +010011 * Changes: Hans Schillstrom <hans.schillstrom@ericsson.com>
12 * Network name space (netns) aware.
13 * Global data moved to netns i.e struct netns_ipvs
14 * Affected data: est_list and est_lock.
15 * estimation_timer() runs with timer per netns.
16 * get_stats()) do the per cpu summing.
Linus Torvalds1da177e2005-04-16 15:20:36 -070017 */
Hannes Eder9aada7a2009-07-30 14:29:44 -070018
19#define KMSG_COMPONENT "IPVS"
20#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
21
Linus Torvalds1da177e2005-04-16 15:20:36 -070022#include <linux/kernel.h>
Arnaldo Carvalho de Melo14c85022005-12-27 02:43:12 -020023#include <linux/jiffies.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070024#include <linux/types.h>
Adrian Bunk4ffd2e42006-01-05 12:14:43 -080025#include <linux/interrupt.h>
Pavel Emelyanov90754f82008-01-12 02:33:50 -080026#include <linux/sysctl.h>
Sven Wegener3a14a3132008-08-10 18:24:41 +000027#include <linux/list.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070028
29#include <net/ip_vs.h>
30
31/*
32 This code is to estimate rate in a shorter interval (such as 8
33 seconds) for virtual services and real servers. For measure rate in a
34 long interval, it is easy to implement a user level daemon which
35 periodically reads those statistical counters and measure rate.
36
37 Currently, the measurement is activated by slow timer handler. Hope
38 this measurement will not introduce too much load.
39
40 We measure rate during the last 8 seconds every 2 seconds:
41
42 avgrate = avgrate*(1-W) + rate*W
43
44 where W = 2^(-2)
45
46 NOTES.
47
48 * The stored value for average bps is scaled by 2^5, so that maximal
49 rate is ~2.15Gbits/s, average pps and cps are scaled by 2^10.
50
51 * A lot code is taken from net/sched/estimator.c
52 */
53
54
Hans Schillstromb17fc992011-01-03 14:44:56 +010055/*
56 * Make a summary from each cpu
57 */
58static void ip_vs_read_cpu_stats(struct ip_vs_stats_user *sum,
59 struct ip_vs_cpu_stats *stats)
60{
61 int i;
62
63 for_each_possible_cpu(i) {
64 struct ip_vs_cpu_stats *s = per_cpu_ptr(stats, i);
65 unsigned int start;
66 __u64 inbytes, outbytes;
67 if (i) {
68 sum->conns += s->ustats.conns;
69 sum->inpkts += s->ustats.inpkts;
70 sum->outpkts += s->ustats.outpkts;
71 do {
72 start = u64_stats_fetch_begin_bh(&s->syncp);
73 inbytes = s->ustats.inbytes;
74 outbytes = s->ustats.outbytes;
75 } while (u64_stats_fetch_retry_bh(&s->syncp, start));
76 sum->inbytes += inbytes;
77 sum->outbytes += outbytes;
78 } else {
79 sum->conns = s->ustats.conns;
80 sum->inpkts = s->ustats.inpkts;
81 sum->outpkts = s->ustats.outpkts;
82 do {
83 start = u64_stats_fetch_begin_bh(&s->syncp);
84 sum->inbytes = s->ustats.inbytes;
85 sum->outbytes = s->ustats.outbytes;
86 } while (u64_stats_fetch_retry_bh(&s->syncp, start));
87 }
88 }
89}
90
91
Linus Torvalds1da177e2005-04-16 15:20:36 -070092static void estimation_timer(unsigned long arg)
93{
94 struct ip_vs_estimator *e;
95 struct ip_vs_stats *s;
96 u32 n_conns;
97 u32 n_inpkts, n_outpkts;
98 u64 n_inbytes, n_outbytes;
99 u32 rate;
Hans Schillstrom29c20262011-01-03 14:44:54 +0100100 struct net *net = (struct net *)arg;
101 struct netns_ipvs *ipvs;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700102
Hans Schillstrom29c20262011-01-03 14:44:54 +0100103 ipvs = net_ipvs(net);
Hans Schillstromb17fc992011-01-03 14:44:56 +0100104 ip_vs_read_cpu_stats(&ipvs->tot_stats->ustats, ipvs->cpustats);
Hans Schillstrom29c20262011-01-03 14:44:54 +0100105 spin_lock(&ipvs->est_lock);
106 list_for_each_entry(e, &ipvs->est_list, list) {
Sven Wegener3a14a3132008-08-10 18:24:41 +0000107 s = container_of(e, struct ip_vs_stats, est);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700108
Hans Schillstromb17fc992011-01-03 14:44:56 +0100109 ip_vs_read_cpu_stats(&s->ustats, s->cpustats);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700110 spin_lock(&s->lock);
Sven Wegenere9c0ce22008-09-08 13:39:04 +0200111 n_conns = s->ustats.conns;
112 n_inpkts = s->ustats.inpkts;
113 n_outpkts = s->ustats.outpkts;
114 n_inbytes = s->ustats.inbytes;
115 n_outbytes = s->ustats.outbytes;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700116
117 /* scaled by 2^10, but divided 2 seconds */
Hans Schillstrom29c20262011-01-03 14:44:54 +0100118 rate = (n_conns - e->last_conns) << 9;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700119 e->last_conns = n_conns;
Hans Schillstrom29c20262011-01-03 14:44:54 +0100120 e->cps += ((long)rate - (long)e->cps) >> 2;
121 s->ustats.cps = (e->cps + 0x1FF) >> 10;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700122
Hans Schillstrom29c20262011-01-03 14:44:54 +0100123 rate = (n_inpkts - e->last_inpkts) << 9;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700124 e->last_inpkts = n_inpkts;
Hans Schillstrom29c20262011-01-03 14:44:54 +0100125 e->inpps += ((long)rate - (long)e->inpps) >> 2;
126 s->ustats.inpps = (e->inpps + 0x1FF) >> 10;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700127
Hans Schillstrom29c20262011-01-03 14:44:54 +0100128 rate = (n_outpkts - e->last_outpkts) << 9;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700129 e->last_outpkts = n_outpkts;
Hans Schillstrom29c20262011-01-03 14:44:54 +0100130 e->outpps += ((long)rate - (long)e->outpps) >> 2;
131 s->ustats.outpps = (e->outpps + 0x1FF) >> 10;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700132
Hans Schillstrom29c20262011-01-03 14:44:54 +0100133 rate = (n_inbytes - e->last_inbytes) << 4;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700134 e->last_inbytes = n_inbytes;
Hans Schillstrom29c20262011-01-03 14:44:54 +0100135 e->inbps += ((long)rate - (long)e->inbps) >> 2;
136 s->ustats.inbps = (e->inbps + 0xF) >> 5;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700137
Hans Schillstrom29c20262011-01-03 14:44:54 +0100138 rate = (n_outbytes - e->last_outbytes) << 4;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700139 e->last_outbytes = n_outbytes;
Hans Schillstrom29c20262011-01-03 14:44:54 +0100140 e->outbps += ((long)rate - (long)e->outbps) >> 2;
141 s->ustats.outbps = (e->outbps + 0xF) >> 5;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700142 spin_unlock(&s->lock);
143 }
Hans Schillstrom29c20262011-01-03 14:44:54 +0100144 spin_unlock(&ipvs->est_lock);
145 mod_timer(&ipvs->est_timer, jiffies + 2*HZ);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700146}
147
Hans Schillstrom29c20262011-01-03 14:44:54 +0100148void ip_vs_new_estimator(struct net *net, struct ip_vs_stats *stats)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700149{
Hans Schillstrom29c20262011-01-03 14:44:54 +0100150 struct netns_ipvs *ipvs = net_ipvs(net);
Sven Wegener3a14a3132008-08-10 18:24:41 +0000151 struct ip_vs_estimator *est = &stats->est;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700152
Sven Wegener3a14a3132008-08-10 18:24:41 +0000153 INIT_LIST_HEAD(&est->list);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700154
Sven Wegenere9c0ce22008-09-08 13:39:04 +0200155 est->last_conns = stats->ustats.conns;
156 est->cps = stats->ustats.cps<<10;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700157
Sven Wegenere9c0ce22008-09-08 13:39:04 +0200158 est->last_inpkts = stats->ustats.inpkts;
159 est->inpps = stats->ustats.inpps<<10;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700160
Sven Wegenere9c0ce22008-09-08 13:39:04 +0200161 est->last_outpkts = stats->ustats.outpkts;
162 est->outpps = stats->ustats.outpps<<10;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700163
Sven Wegenere9c0ce22008-09-08 13:39:04 +0200164 est->last_inbytes = stats->ustats.inbytes;
165 est->inbps = stats->ustats.inbps<<5;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700166
Sven Wegenere9c0ce22008-09-08 13:39:04 +0200167 est->last_outbytes = stats->ustats.outbytes;
168 est->outbps = stats->ustats.outbps<<5;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700169
Hans Schillstrom29c20262011-01-03 14:44:54 +0100170 spin_lock_bh(&ipvs->est_lock);
171 list_add(&est->list, &ipvs->est_list);
172 spin_unlock_bh(&ipvs->est_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700173}
174
Hans Schillstrom29c20262011-01-03 14:44:54 +0100175void ip_vs_kill_estimator(struct net *net, struct ip_vs_stats *stats)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700176{
Hans Schillstrom29c20262011-01-03 14:44:54 +0100177 struct netns_ipvs *ipvs = net_ipvs(net);
Sven Wegener3a14a3132008-08-10 18:24:41 +0000178 struct ip_vs_estimator *est = &stats->est;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700179
Hans Schillstrom29c20262011-01-03 14:44:54 +0100180 spin_lock_bh(&ipvs->est_lock);
Sven Wegener3a14a3132008-08-10 18:24:41 +0000181 list_del(&est->list);
Hans Schillstrom29c20262011-01-03 14:44:54 +0100182 spin_unlock_bh(&ipvs->est_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700183}
184
185void ip_vs_zero_estimator(struct ip_vs_stats *stats)
186{
Sven Wegener3a14a3132008-08-10 18:24:41 +0000187 struct ip_vs_estimator *est = &stats->est;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700188
Sven Wegener3a14a3132008-08-10 18:24:41 +0000189 /* set counters zero, caller must hold the stats->lock lock */
190 est->last_inbytes = 0;
191 est->last_outbytes = 0;
192 est->last_conns = 0;
193 est->last_inpkts = 0;
194 est->last_outpkts = 0;
195 est->cps = 0;
196 est->inpps = 0;
197 est->outpps = 0;
198 est->inbps = 0;
199 est->outbps = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700200}
Sven Wegenera919cf42008-08-14 00:47:16 +0200201
Hans Schillstrom61b1ab42011-01-03 14:44:42 +0100202static int __net_init __ip_vs_estimator_init(struct net *net)
203{
Hans Schillstrom29c20262011-01-03 14:44:54 +0100204 struct netns_ipvs *ipvs = net_ipvs(net);
205
Hans Schillstrom61b1ab42011-01-03 14:44:42 +0100206 if (!net_eq(net, &init_net)) /* netns not enabled yet */
207 return -EPERM;
208
Hans Schillstrom29c20262011-01-03 14:44:54 +0100209 INIT_LIST_HEAD(&ipvs->est_list);
210 spin_lock_init(&ipvs->est_lock);
211 setup_timer(&ipvs->est_timer, estimation_timer, (unsigned long)net);
212 mod_timer(&ipvs->est_timer, jiffies + 2 * HZ);
Hans Schillstrom61b1ab42011-01-03 14:44:42 +0100213 return 0;
214}
215
Hans Schillstrom29c20262011-01-03 14:44:54 +0100216static void __net_exit __ip_vs_estimator_exit(struct net *net)
217{
218 del_timer_sync(&net_ipvs(net)->est_timer);
219}
Hans Schillstrom61b1ab42011-01-03 14:44:42 +0100220static struct pernet_operations ip_vs_app_ops = {
221 .init = __ip_vs_estimator_init,
Hans Schillstrom29c20262011-01-03 14:44:54 +0100222 .exit = __ip_vs_estimator_exit,
Hans Schillstrom61b1ab42011-01-03 14:44:42 +0100223};
224
Sven Wegenera919cf42008-08-14 00:47:16 +0200225int __init ip_vs_estimator_init(void)
226{
Hans Schillstrom61b1ab42011-01-03 14:44:42 +0100227 int rv;
228
229 rv = register_pernet_subsys(&ip_vs_app_ops);
Hans Schillstrom61b1ab42011-01-03 14:44:42 +0100230 return rv;
Sven Wegenera919cf42008-08-14 00:47:16 +0200231}
232
233void ip_vs_estimator_cleanup(void)
234{
Hans Schillstrom61b1ab42011-01-03 14:44:42 +0100235 unregister_pernet_subsys(&ip_vs_app_ops);
Sven Wegenera919cf42008-08-14 00:47:16 +0200236}