blob: a60b20fa142e721e1805963c24d8abb00565dc62 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * IPVS An implementation of the IP virtual server support for the
3 * LINUX operating system. IPVS is now implemented as a module
4 * over the NetFilter framework. IPVS can be used to build a
5 * high-performance and highly available server based on a
6 * cluster of servers.
7 *
Linus Torvalds1da177e2005-04-16 15:20:36 -07008 * Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
9 * Peter Kese <peter.kese@ijs.si>
10 * Julian Anastasov <ja@ssi.bg>
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version
15 * 2 of the License, or (at your option) any later version.
16 *
17 * Changes:
18 *
19 */
20
Hannes Eder9aada7a2009-07-30 14:29:44 -070021#define KMSG_COMPONENT "IPVS"
22#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
23
Linus Torvalds1da177e2005-04-16 15:20:36 -070024#include <linux/module.h>
25#include <linux/init.h>
26#include <linux/types.h>
Randy Dunlap4fc268d2006-01-11 12:17:47 -080027#include <linux/capability.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070028#include <linux/fs.h>
29#include <linux/sysctl.h>
30#include <linux/proc_fs.h>
31#include <linux/workqueue.h>
32#include <linux/swap.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070033#include <linux/seq_file.h>
Tejun Heo5a0e3ad2010-03-24 17:04:11 +090034#include <linux/slab.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070035
36#include <linux/netfilter.h>
37#include <linux/netfilter_ipv4.h>
Ingo Molnar14cc3e22006-03-26 01:37:14 -080038#include <linux/mutex.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070039
Eric W. Biederman457c4cb2007-09-12 12:01:34 +020040#include <net/net_namespace.h>
Hans Schillstrom93304192011-01-03 14:44:51 +010041#include <linux/nsproxy.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070042#include <net/ip.h>
Vince Busam09571c72008-09-02 15:55:52 +020043#ifdef CONFIG_IP_VS_IPV6
44#include <net/ipv6.h>
45#include <net/ip6_route.h>
46#endif
Arnaldo Carvalho de Melo14c85022005-12-27 02:43:12 -020047#include <net/route.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070048#include <net/sock.h>
Julius Volz9a812192008-08-14 14:08:44 +020049#include <net/genetlink.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070050
51#include <asm/uaccess.h>
52
53#include <net/ip_vs.h>
54
55/* semaphore for IPVS sockopts. And, [gs]etsockopt may sleep. */
Ingo Molnar14cc3e22006-03-26 01:37:14 -080056static DEFINE_MUTEX(__ip_vs_mutex);
Linus Torvalds1da177e2005-04-16 15:20:36 -070057
58/* lock for service table */
59static DEFINE_RWLOCK(__ip_vs_svc_lock);
60
Linus Torvalds1da177e2005-04-16 15:20:36 -070061/* sysctl variables */
Linus Torvalds1da177e2005-04-16 15:20:36 -070062
63#ifdef CONFIG_IP_VS_DEBUG
64static int sysctl_ip_vs_debug_level = 0;
65
66int ip_vs_get_debug_level(void)
67{
68 return sysctl_ip_vs_debug_level;
69}
70#endif
71
Vince Busam09571c72008-09-02 15:55:52 +020072#ifdef CONFIG_IP_VS_IPV6
73/* Taken from rt6_fill_node() in net/ipv6/route.c, is there a better way? */
Hans Schillstrom4a984802011-01-03 14:45:02 +010074static int __ip_vs_addr_is_local_v6(struct net *net,
75 const struct in6_addr *addr)
Vince Busam09571c72008-09-02 15:55:52 +020076{
77 struct rt6_info *rt;
David S. Miller4c9483b2011-03-12 16:22:43 -050078 struct flowi6 fl6 = {
79 .daddr = *addr,
Vince Busam09571c72008-09-02 15:55:52 +020080 };
81
David S. Miller4c9483b2011-03-12 16:22:43 -050082 rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
Vince Busam09571c72008-09-02 15:55:52 +020083 if (rt && rt->rt6i_dev && (rt->rt6i_dev->flags & IFF_LOOPBACK))
David S. Miller4c9483b2011-03-12 16:22:43 -050084 return 1;
Vince Busam09571c72008-09-02 15:55:52 +020085
86 return 0;
87}
88#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -070089/*
Julian Anastasovaf9debd2005-07-11 20:59:57 -070090 * update_defense_level is called from keventd and from sysctl,
91 * so it needs to protect itself from softirqs
Linus Torvalds1da177e2005-04-16 15:20:36 -070092 */
Hans Schillstrom93304192011-01-03 14:44:51 +010093static void update_defense_level(struct netns_ipvs *ipvs)
Linus Torvalds1da177e2005-04-16 15:20:36 -070094{
95 struct sysinfo i;
96 static int old_secure_tcp = 0;
97 int availmem;
98 int nomem;
99 int to_change = -1;
100
101 /* we only count free and buffered memory (in pages) */
102 si_meminfo(&i);
103 availmem = i.freeram + i.bufferram;
104 /* however in linux 2.5 the i.bufferram is total page cache size,
105 we need adjust it */
106 /* si_swapinfo(&i); */
107 /* availmem = availmem - (i.totalswap - i.freeswap); */
108
Hans Schillstroma0840e22011-01-03 14:44:58 +0100109 nomem = (availmem < ipvs->sysctl_amemthresh);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700110
Julian Anastasovaf9debd2005-07-11 20:59:57 -0700111 local_bh_disable();
112
Linus Torvalds1da177e2005-04-16 15:20:36 -0700113 /* drop_entry */
Hans Schillstroma0840e22011-01-03 14:44:58 +0100114 spin_lock(&ipvs->dropentry_lock);
115 switch (ipvs->sysctl_drop_entry) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700116 case 0:
Hans Schillstroma0840e22011-01-03 14:44:58 +0100117 atomic_set(&ipvs->dropentry, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700118 break;
119 case 1:
120 if (nomem) {
Hans Schillstroma0840e22011-01-03 14:44:58 +0100121 atomic_set(&ipvs->dropentry, 1);
122 ipvs->sysctl_drop_entry = 2;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700123 } else {
Hans Schillstroma0840e22011-01-03 14:44:58 +0100124 atomic_set(&ipvs->dropentry, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700125 }
126 break;
127 case 2:
128 if (nomem) {
Hans Schillstroma0840e22011-01-03 14:44:58 +0100129 atomic_set(&ipvs->dropentry, 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700130 } else {
Hans Schillstroma0840e22011-01-03 14:44:58 +0100131 atomic_set(&ipvs->dropentry, 0);
132 ipvs->sysctl_drop_entry = 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700133 };
134 break;
135 case 3:
Hans Schillstroma0840e22011-01-03 14:44:58 +0100136 atomic_set(&ipvs->dropentry, 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700137 break;
138 }
Hans Schillstroma0840e22011-01-03 14:44:58 +0100139 spin_unlock(&ipvs->dropentry_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700140
141 /* drop_packet */
Hans Schillstroma0840e22011-01-03 14:44:58 +0100142 spin_lock(&ipvs->droppacket_lock);
143 switch (ipvs->sysctl_drop_packet) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700144 case 0:
Hans Schillstroma0840e22011-01-03 14:44:58 +0100145 ipvs->drop_rate = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700146 break;
147 case 1:
148 if (nomem) {
Hans Schillstroma0840e22011-01-03 14:44:58 +0100149 ipvs->drop_rate = ipvs->drop_counter
150 = ipvs->sysctl_amemthresh /
151 (ipvs->sysctl_amemthresh-availmem);
152 ipvs->sysctl_drop_packet = 2;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700153 } else {
Hans Schillstroma0840e22011-01-03 14:44:58 +0100154 ipvs->drop_rate = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700155 }
156 break;
157 case 2:
158 if (nomem) {
Hans Schillstroma0840e22011-01-03 14:44:58 +0100159 ipvs->drop_rate = ipvs->drop_counter
160 = ipvs->sysctl_amemthresh /
161 (ipvs->sysctl_amemthresh-availmem);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700162 } else {
Hans Schillstroma0840e22011-01-03 14:44:58 +0100163 ipvs->drop_rate = 0;
164 ipvs->sysctl_drop_packet = 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700165 }
166 break;
167 case 3:
Hans Schillstroma0840e22011-01-03 14:44:58 +0100168 ipvs->drop_rate = ipvs->sysctl_am_droprate;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700169 break;
170 }
Hans Schillstroma0840e22011-01-03 14:44:58 +0100171 spin_unlock(&ipvs->droppacket_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700172
173 /* secure_tcp */
Hans Schillstroma0840e22011-01-03 14:44:58 +0100174 spin_lock(&ipvs->securetcp_lock);
175 switch (ipvs->sysctl_secure_tcp) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700176 case 0:
177 if (old_secure_tcp >= 2)
178 to_change = 0;
179 break;
180 case 1:
181 if (nomem) {
182 if (old_secure_tcp < 2)
183 to_change = 1;
Hans Schillstroma0840e22011-01-03 14:44:58 +0100184 ipvs->sysctl_secure_tcp = 2;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700185 } else {
186 if (old_secure_tcp >= 2)
187 to_change = 0;
188 }
189 break;
190 case 2:
191 if (nomem) {
192 if (old_secure_tcp < 2)
193 to_change = 1;
194 } else {
195 if (old_secure_tcp >= 2)
196 to_change = 0;
Hans Schillstroma0840e22011-01-03 14:44:58 +0100197 ipvs->sysctl_secure_tcp = 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700198 }
199 break;
200 case 3:
201 if (old_secure_tcp < 2)
202 to_change = 1;
203 break;
204 }
Hans Schillstroma0840e22011-01-03 14:44:58 +0100205 old_secure_tcp = ipvs->sysctl_secure_tcp;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700206 if (to_change >= 0)
Hans Schillstrom93304192011-01-03 14:44:51 +0100207 ip_vs_protocol_timeout_change(ipvs,
Hans Schillstroma0840e22011-01-03 14:44:58 +0100208 ipvs->sysctl_secure_tcp > 1);
209 spin_unlock(&ipvs->securetcp_lock);
Julian Anastasovaf9debd2005-07-11 20:59:57 -0700210
211 local_bh_enable();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700212}
213
214
215/*
216 * Timer for checking the defense
217 */
218#define DEFENSE_TIMER_PERIOD 1*HZ
Linus Torvalds1da177e2005-04-16 15:20:36 -0700219
David Howellsc4028952006-11-22 14:57:56 +0000220static void defense_work_handler(struct work_struct *work)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700221{
Hans Schillstromf6340ee2011-01-03 14:44:59 +0100222 struct netns_ipvs *ipvs =
223 container_of(work, struct netns_ipvs, defense_work.work);
Hans Schillstrom93304192011-01-03 14:44:51 +0100224
225 update_defense_level(ipvs);
Hans Schillstroma0840e22011-01-03 14:44:58 +0100226 if (atomic_read(&ipvs->dropentry))
Hans Schillstromf6340ee2011-01-03 14:44:59 +0100227 ip_vs_random_dropentry(ipvs->net);
228 schedule_delayed_work(&ipvs->defense_work, DEFENSE_TIMER_PERIOD);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700229}
230
231int
232ip_vs_use_count_inc(void)
233{
234 return try_module_get(THIS_MODULE);
235}
236
237void
238ip_vs_use_count_dec(void)
239{
240 module_put(THIS_MODULE);
241}
242
243
244/*
245 * Hash table: for virtual service lookups
246 */
247#define IP_VS_SVC_TAB_BITS 8
248#define IP_VS_SVC_TAB_SIZE (1 << IP_VS_SVC_TAB_BITS)
249#define IP_VS_SVC_TAB_MASK (IP_VS_SVC_TAB_SIZE - 1)
250
251/* the service table hashed by <protocol, addr, port> */
252static struct list_head ip_vs_svc_table[IP_VS_SVC_TAB_SIZE];
253/* the service table hashed by fwmark */
254static struct list_head ip_vs_svc_fwm_table[IP_VS_SVC_TAB_SIZE];
255
Linus Torvalds1da177e2005-04-16 15:20:36 -0700256
257/*
258 * Returns hash value for virtual service
259 */
Hans Schillstromfc723252011-01-03 14:44:43 +0100260static inline unsigned
261ip_vs_svc_hashkey(struct net *net, int af, unsigned proto,
262 const union nf_inet_addr *addr, __be16 port)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700263{
264 register unsigned porth = ntohs(port);
Julius Volzb18610d2008-09-02 15:55:37 +0200265 __be32 addr_fold = addr->ip;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700266
Julius Volzb18610d2008-09-02 15:55:37 +0200267#ifdef CONFIG_IP_VS_IPV6
268 if (af == AF_INET6)
269 addr_fold = addr->ip6[0]^addr->ip6[1]^
270 addr->ip6[2]^addr->ip6[3];
271#endif
Hans Schillstromfc723252011-01-03 14:44:43 +0100272 addr_fold ^= ((size_t)net>>8);
Julius Volzb18610d2008-09-02 15:55:37 +0200273
274 return (proto^ntohl(addr_fold)^(porth>>IP_VS_SVC_TAB_BITS)^porth)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700275 & IP_VS_SVC_TAB_MASK;
276}
277
278/*
279 * Returns hash value of fwmark for virtual service lookup
280 */
Hans Schillstromfc723252011-01-03 14:44:43 +0100281static inline unsigned ip_vs_svc_fwm_hashkey(struct net *net, __u32 fwmark)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700282{
Hans Schillstromfc723252011-01-03 14:44:43 +0100283 return (((size_t)net>>8) ^ fwmark) & IP_VS_SVC_TAB_MASK;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700284}
285
286/*
Hans Schillstromfc723252011-01-03 14:44:43 +0100287 * Hashes a service in the ip_vs_svc_table by <netns,proto,addr,port>
Linus Torvalds1da177e2005-04-16 15:20:36 -0700288 * or in the ip_vs_svc_fwm_table by fwmark.
289 * Should be called with locked tables.
290 */
291static int ip_vs_svc_hash(struct ip_vs_service *svc)
292{
293 unsigned hash;
294
295 if (svc->flags & IP_VS_SVC_F_HASHED) {
Hannes Eder1e3e2382009-08-02 11:05:41 +0000296 pr_err("%s(): request for already hashed, called from %pF\n",
297 __func__, __builtin_return_address(0));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700298 return 0;
299 }
300
301 if (svc->fwmark == 0) {
302 /*
Hans Schillstromfc723252011-01-03 14:44:43 +0100303 * Hash it by <netns,protocol,addr,port> in ip_vs_svc_table
Linus Torvalds1da177e2005-04-16 15:20:36 -0700304 */
Hans Schillstromfc723252011-01-03 14:44:43 +0100305 hash = ip_vs_svc_hashkey(svc->net, svc->af, svc->protocol,
306 &svc->addr, svc->port);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700307 list_add(&svc->s_list, &ip_vs_svc_table[hash]);
308 } else {
309 /*
Hans Schillstromfc723252011-01-03 14:44:43 +0100310 * Hash it by fwmark in svc_fwm_table
Linus Torvalds1da177e2005-04-16 15:20:36 -0700311 */
Hans Schillstromfc723252011-01-03 14:44:43 +0100312 hash = ip_vs_svc_fwm_hashkey(svc->net, svc->fwmark);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700313 list_add(&svc->f_list, &ip_vs_svc_fwm_table[hash]);
314 }
315
316 svc->flags |= IP_VS_SVC_F_HASHED;
317 /* increase its refcnt because it is referenced by the svc table */
318 atomic_inc(&svc->refcnt);
319 return 1;
320}
321
322
323/*
Hans Schillstromfc723252011-01-03 14:44:43 +0100324 * Unhashes a service from svc_table / svc_fwm_table.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700325 * Should be called with locked tables.
326 */
327static int ip_vs_svc_unhash(struct ip_vs_service *svc)
328{
329 if (!(svc->flags & IP_VS_SVC_F_HASHED)) {
Hannes Eder1e3e2382009-08-02 11:05:41 +0000330 pr_err("%s(): request for unhash flagged, called from %pF\n",
331 __func__, __builtin_return_address(0));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700332 return 0;
333 }
334
335 if (svc->fwmark == 0) {
Hans Schillstromfc723252011-01-03 14:44:43 +0100336 /* Remove it from the svc_table table */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700337 list_del(&svc->s_list);
338 } else {
Hans Schillstromfc723252011-01-03 14:44:43 +0100339 /* Remove it from the svc_fwm_table table */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700340 list_del(&svc->f_list);
341 }
342
343 svc->flags &= ~IP_VS_SVC_F_HASHED;
344 atomic_dec(&svc->refcnt);
345 return 1;
346}
347
348
349/*
Hans Schillstromfc723252011-01-03 14:44:43 +0100350 * Get service by {netns, proto,addr,port} in the service table.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700351 */
Julius Volzb18610d2008-09-02 15:55:37 +0200352static inline struct ip_vs_service *
Hans Schillstromfc723252011-01-03 14:44:43 +0100353__ip_vs_service_find(struct net *net, int af, __u16 protocol,
354 const union nf_inet_addr *vaddr, __be16 vport)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700355{
356 unsigned hash;
357 struct ip_vs_service *svc;
358
359 /* Check for "full" addressed entries */
Hans Schillstromfc723252011-01-03 14:44:43 +0100360 hash = ip_vs_svc_hashkey(net, af, protocol, vaddr, vport);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700361
362 list_for_each_entry(svc, &ip_vs_svc_table[hash], s_list){
Julius Volzb18610d2008-09-02 15:55:37 +0200363 if ((svc->af == af)
364 && ip_vs_addr_equal(af, &svc->addr, vaddr)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700365 && (svc->port == vport)
Hans Schillstromfc723252011-01-03 14:44:43 +0100366 && (svc->protocol == protocol)
367 && net_eq(svc->net, net)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700368 /* HIT */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700369 return svc;
370 }
371 }
372
373 return NULL;
374}
375
376
377/*
378 * Get service by {fwmark} in the service table.
379 */
Julius Volzb18610d2008-09-02 15:55:37 +0200380static inline struct ip_vs_service *
Hans Schillstromfc723252011-01-03 14:44:43 +0100381__ip_vs_svc_fwm_find(struct net *net, int af, __u32 fwmark)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700382{
383 unsigned hash;
384 struct ip_vs_service *svc;
385
386 /* Check for fwmark addressed entries */
Hans Schillstromfc723252011-01-03 14:44:43 +0100387 hash = ip_vs_svc_fwm_hashkey(net, fwmark);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700388
389 list_for_each_entry(svc, &ip_vs_svc_fwm_table[hash], f_list) {
Hans Schillstromfc723252011-01-03 14:44:43 +0100390 if (svc->fwmark == fwmark && svc->af == af
391 && net_eq(svc->net, net)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700392 /* HIT */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700393 return svc;
394 }
395 }
396
397 return NULL;
398}
399
400struct ip_vs_service *
Hans Schillstromfc723252011-01-03 14:44:43 +0100401ip_vs_service_get(struct net *net, int af, __u32 fwmark, __u16 protocol,
Julius Volz3c2e0502008-09-02 15:55:38 +0200402 const union nf_inet_addr *vaddr, __be16 vport)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700403{
404 struct ip_vs_service *svc;
Hans Schillstrom763f8d02011-01-03 14:45:01 +0100405 struct netns_ipvs *ipvs = net_ipvs(net);
Julius Volz3c2e0502008-09-02 15:55:38 +0200406
Linus Torvalds1da177e2005-04-16 15:20:36 -0700407 read_lock(&__ip_vs_svc_lock);
408
409 /*
410 * Check the table hashed by fwmark first
411 */
Hans Schillstromfc723252011-01-03 14:44:43 +0100412 svc = __ip_vs_svc_fwm_find(net, af, fwmark);
413 if (fwmark && svc)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700414 goto out;
415
416 /*
417 * Check the table hashed by <protocol,addr,port>
418 * for "full" addressed entries
419 */
Hans Schillstromfc723252011-01-03 14:44:43 +0100420 svc = __ip_vs_service_find(net, af, protocol, vaddr, vport);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700421
422 if (svc == NULL
423 && protocol == IPPROTO_TCP
Hans Schillstrom763f8d02011-01-03 14:45:01 +0100424 && atomic_read(&ipvs->ftpsvc_counter)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700425 && (vport == FTPDATA || ntohs(vport) >= PROT_SOCK)) {
426 /*
427 * Check if ftp service entry exists, the packet
428 * might belong to FTP data connections.
429 */
Hans Schillstromfc723252011-01-03 14:44:43 +0100430 svc = __ip_vs_service_find(net, af, protocol, vaddr, FTPPORT);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700431 }
432
433 if (svc == NULL
Hans Schillstrom763f8d02011-01-03 14:45:01 +0100434 && atomic_read(&ipvs->nullsvc_counter)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700435 /*
436 * Check if the catch-all port (port zero) exists
437 */
Hans Schillstromfc723252011-01-03 14:44:43 +0100438 svc = __ip_vs_service_find(net, af, protocol, vaddr, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700439 }
440
441 out:
Julian Anastasov26c15cf2010-09-21 18:12:30 +0200442 if (svc)
443 atomic_inc(&svc->usecnt);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700444 read_unlock(&__ip_vs_svc_lock);
445
Julius Volz3c2e0502008-09-02 15:55:38 +0200446 IP_VS_DBG_BUF(9, "lookup service: fwm %u %s %s:%u %s\n",
447 fwmark, ip_vs_proto_name(protocol),
448 IP_VS_DBG_ADDR(af, vaddr), ntohs(vport),
449 svc ? "hit" : "not hit");
Linus Torvalds1da177e2005-04-16 15:20:36 -0700450
451 return svc;
452}
453
454
455static inline void
456__ip_vs_bind_svc(struct ip_vs_dest *dest, struct ip_vs_service *svc)
457{
458 atomic_inc(&svc->refcnt);
459 dest->svc = svc;
460}
461
Julian Anastasov26c15cf2010-09-21 18:12:30 +0200462static void
Linus Torvalds1da177e2005-04-16 15:20:36 -0700463__ip_vs_unbind_svc(struct ip_vs_dest *dest)
464{
465 struct ip_vs_service *svc = dest->svc;
466
467 dest->svc = NULL;
Julian Anastasov26c15cf2010-09-21 18:12:30 +0200468 if (atomic_dec_and_test(&svc->refcnt)) {
469 IP_VS_DBG_BUF(3, "Removing service %u/%s:%u usecnt=%d\n",
470 svc->fwmark,
471 IP_VS_DBG_ADDR(svc->af, &svc->addr),
472 ntohs(svc->port), atomic_read(&svc->usecnt));
Hans Schillstromb17fc992011-01-03 14:44:56 +0100473 free_percpu(svc->stats.cpustats);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700474 kfree(svc);
Julian Anastasov26c15cf2010-09-21 18:12:30 +0200475 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700476}
477
478
479/*
480 * Returns hash value for real service
481 */
Julius Volz7937df12008-09-02 15:55:48 +0200482static inline unsigned ip_vs_rs_hashkey(int af,
483 const union nf_inet_addr *addr,
484 __be16 port)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700485{
486 register unsigned porth = ntohs(port);
Julius Volz7937df12008-09-02 15:55:48 +0200487 __be32 addr_fold = addr->ip;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700488
Julius Volz7937df12008-09-02 15:55:48 +0200489#ifdef CONFIG_IP_VS_IPV6
490 if (af == AF_INET6)
491 addr_fold = addr->ip6[0]^addr->ip6[1]^
492 addr->ip6[2]^addr->ip6[3];
493#endif
494
495 return (ntohl(addr_fold)^(porth>>IP_VS_RTAB_BITS)^porth)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700496 & IP_VS_RTAB_MASK;
497}
498
499/*
Hans Schillstromfc723252011-01-03 14:44:43 +0100500 * Hashes ip_vs_dest in rs_table by <proto,addr,port>.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700501 * should be called with locked tables.
502 */
Hans Schillstromfc723252011-01-03 14:44:43 +0100503static int ip_vs_rs_hash(struct netns_ipvs *ipvs, struct ip_vs_dest *dest)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700504{
505 unsigned hash;
506
507 if (!list_empty(&dest->d_list)) {
508 return 0;
509 }
510
511 /*
512 * Hash by proto,addr,port,
513 * which are the parameters of the real service.
514 */
Julius Volz7937df12008-09-02 15:55:48 +0200515 hash = ip_vs_rs_hashkey(dest->af, &dest->addr, dest->port);
516
Hans Schillstromfc723252011-01-03 14:44:43 +0100517 list_add(&dest->d_list, &ipvs->rs_table[hash]);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700518
519 return 1;
520}
521
522/*
Hans Schillstromfc723252011-01-03 14:44:43 +0100523 * UNhashes ip_vs_dest from rs_table.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700524 * should be called with locked tables.
525 */
526static int ip_vs_rs_unhash(struct ip_vs_dest *dest)
527{
528 /*
Hans Schillstromfc723252011-01-03 14:44:43 +0100529 * Remove it from the rs_table table.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700530 */
531 if (!list_empty(&dest->d_list)) {
532 list_del(&dest->d_list);
533 INIT_LIST_HEAD(&dest->d_list);
534 }
535
536 return 1;
537}
538
539/*
540 * Lookup real service by <proto,addr,port> in the real service table.
541 */
542struct ip_vs_dest *
Hans Schillstromfc723252011-01-03 14:44:43 +0100543ip_vs_lookup_real_service(struct net *net, int af, __u16 protocol,
Julius Volz7937df12008-09-02 15:55:48 +0200544 const union nf_inet_addr *daddr,
545 __be16 dport)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700546{
Hans Schillstromfc723252011-01-03 14:44:43 +0100547 struct netns_ipvs *ipvs = net_ipvs(net);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700548 unsigned hash;
549 struct ip_vs_dest *dest;
550
551 /*
552 * Check for "full" addressed entries
553 * Return the first found entry
554 */
Julius Volz7937df12008-09-02 15:55:48 +0200555 hash = ip_vs_rs_hashkey(af, daddr, dport);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700556
Hans Schillstroma0840e22011-01-03 14:44:58 +0100557 read_lock(&ipvs->rs_lock);
Hans Schillstromfc723252011-01-03 14:44:43 +0100558 list_for_each_entry(dest, &ipvs->rs_table[hash], d_list) {
Julius Volz7937df12008-09-02 15:55:48 +0200559 if ((dest->af == af)
560 && ip_vs_addr_equal(af, &dest->addr, daddr)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700561 && (dest->port == dport)
562 && ((dest->protocol == protocol) ||
563 dest->vfwmark)) {
564 /* HIT */
Hans Schillstroma0840e22011-01-03 14:44:58 +0100565 read_unlock(&ipvs->rs_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700566 return dest;
567 }
568 }
Hans Schillstroma0840e22011-01-03 14:44:58 +0100569 read_unlock(&ipvs->rs_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700570
571 return NULL;
572}
573
574/*
575 * Lookup destination by {addr,port} in the given service
576 */
577static struct ip_vs_dest *
Julius Volz7937df12008-09-02 15:55:48 +0200578ip_vs_lookup_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
579 __be16 dport)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700580{
581 struct ip_vs_dest *dest;
582
583 /*
584 * Find the destination for the given service
585 */
586 list_for_each_entry(dest, &svc->destinations, n_list) {
Julius Volz7937df12008-09-02 15:55:48 +0200587 if ((dest->af == svc->af)
588 && ip_vs_addr_equal(svc->af, &dest->addr, daddr)
589 && (dest->port == dport)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700590 /* HIT */
591 return dest;
592 }
593 }
594
595 return NULL;
596}
597
Rumen G. Bogdanovski1e356f92007-11-07 02:35:54 -0800598/*
599 * Find destination by {daddr,dport,vaddr,protocol}
600 * Cretaed to be used in ip_vs_process_message() in
601 * the backup synchronization daemon. It finds the
602 * destination to be bound to the received connection
603 * on the backup.
604 *
605 * ip_vs_lookup_real_service() looked promissing, but
606 * seems not working as expected.
607 */
Hans Schillstromfc723252011-01-03 14:44:43 +0100608struct ip_vs_dest *ip_vs_find_dest(struct net *net, int af,
609 const union nf_inet_addr *daddr,
Julius Volz7937df12008-09-02 15:55:48 +0200610 __be16 dport,
611 const union nf_inet_addr *vaddr,
Hans Schillstrom0e051e62010-11-19 14:25:07 +0100612 __be16 vport, __u16 protocol, __u32 fwmark)
Rumen G. Bogdanovski1e356f92007-11-07 02:35:54 -0800613{
614 struct ip_vs_dest *dest;
615 struct ip_vs_service *svc;
616
Hans Schillstromfc723252011-01-03 14:44:43 +0100617 svc = ip_vs_service_get(net, af, fwmark, protocol, vaddr, vport);
Rumen G. Bogdanovski1e356f92007-11-07 02:35:54 -0800618 if (!svc)
619 return NULL;
620 dest = ip_vs_lookup_dest(svc, daddr, dport);
621 if (dest)
622 atomic_inc(&dest->refcnt);
623 ip_vs_service_put(svc);
624 return dest;
625}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700626
627/*
628 * Lookup dest by {svc,addr,port} in the destination trash.
629 * The destination trash is used to hold the destinations that are removed
630 * from the service table but are still referenced by some conn entries.
631 * The reason to add the destination trash is when the dest is temporary
632 * down (either by administrator or by monitor program), the dest can be
633 * picked back from the trash, the remaining connections to the dest can
634 * continue, and the counting information of the dest is also useful for
635 * scheduling.
636 */
637static struct ip_vs_dest *
Julius Volz7937df12008-09-02 15:55:48 +0200638ip_vs_trash_get_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
639 __be16 dport)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700640{
641 struct ip_vs_dest *dest, *nxt;
Hans Schillstromf2431e62011-01-03 14:45:00 +0100642 struct netns_ipvs *ipvs = net_ipvs(svc->net);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700643
644 /*
645 * Find the destination in trash
646 */
Hans Schillstromf2431e62011-01-03 14:45:00 +0100647 list_for_each_entry_safe(dest, nxt, &ipvs->dest_trash, n_list) {
Julius Volz7937df12008-09-02 15:55:48 +0200648 IP_VS_DBG_BUF(3, "Destination %u/%s:%u still in trash, "
649 "dest->refcnt=%d\n",
650 dest->vfwmark,
651 IP_VS_DBG_ADDR(svc->af, &dest->addr),
652 ntohs(dest->port),
653 atomic_read(&dest->refcnt));
654 if (dest->af == svc->af &&
655 ip_vs_addr_equal(svc->af, &dest->addr, daddr) &&
Linus Torvalds1da177e2005-04-16 15:20:36 -0700656 dest->port == dport &&
657 dest->vfwmark == svc->fwmark &&
658 dest->protocol == svc->protocol &&
659 (svc->fwmark ||
Julius Volz7937df12008-09-02 15:55:48 +0200660 (ip_vs_addr_equal(svc->af, &dest->vaddr, &svc->addr) &&
Linus Torvalds1da177e2005-04-16 15:20:36 -0700661 dest->vport == svc->port))) {
662 /* HIT */
663 return dest;
664 }
665
666 /*
667 * Try to purge the destination from trash if not referenced
668 */
669 if (atomic_read(&dest->refcnt) == 1) {
Julius Volz7937df12008-09-02 15:55:48 +0200670 IP_VS_DBG_BUF(3, "Removing destination %u/%s:%u "
671 "from trash\n",
672 dest->vfwmark,
673 IP_VS_DBG_ADDR(svc->af, &dest->addr),
674 ntohs(dest->port));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700675 list_del(&dest->n_list);
676 ip_vs_dst_reset(dest);
677 __ip_vs_unbind_svc(dest);
Hans Schillstromb17fc992011-01-03 14:44:56 +0100678 free_percpu(dest->stats.cpustats);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700679 kfree(dest);
680 }
681 }
682
683 return NULL;
684}
685
686
687/*
688 * Clean up all the destinations in the trash
689 * Called by the ip_vs_control_cleanup()
690 *
691 * When the ip_vs_control_clearup is activated by ipvs module exit,
692 * the service tables must have been flushed and all the connections
693 * are expired, and the refcnt of each destination in the trash must
694 * be 1, so we simply release them here.
695 */
Hans Schillstromf2431e62011-01-03 14:45:00 +0100696static void ip_vs_trash_cleanup(struct net *net)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700697{
698 struct ip_vs_dest *dest, *nxt;
Hans Schillstromf2431e62011-01-03 14:45:00 +0100699 struct netns_ipvs *ipvs = net_ipvs(net);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700700
Hans Schillstromf2431e62011-01-03 14:45:00 +0100701 list_for_each_entry_safe(dest, nxt, &ipvs->dest_trash, n_list) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700702 list_del(&dest->n_list);
703 ip_vs_dst_reset(dest);
704 __ip_vs_unbind_svc(dest);
Hans Schillstromb17fc992011-01-03 14:44:56 +0100705 free_percpu(dest->stats.cpustats);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700706 kfree(dest);
707 }
708}
709
710
711static void
712ip_vs_zero_stats(struct ip_vs_stats *stats)
713{
714 spin_lock_bh(&stats->lock);
Simon Hormane93615d2008-08-11 17:19:14 +1000715
Sven Wegenere9c0ce22008-09-08 13:39:04 +0200716 memset(&stats->ustats, 0, sizeof(stats->ustats));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700717 ip_vs_zero_estimator(stats);
Simon Hormane93615d2008-08-11 17:19:14 +1000718
Sven Wegener3a14a3132008-08-10 18:24:41 +0000719 spin_unlock_bh(&stats->lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700720}
721
722/*
723 * Update a destination in the given service
724 */
725static void
Julian Anastasov26c15cf2010-09-21 18:12:30 +0200726__ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest,
727 struct ip_vs_dest_user_kern *udest, int add)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700728{
Hans Schillstromfc723252011-01-03 14:44:43 +0100729 struct netns_ipvs *ipvs = net_ipvs(svc->net);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700730 int conn_flags;
731
732 /* set the weight and the flags */
733 atomic_set(&dest->weight, udest->weight);
Julian Anastasov35757922010-09-17 14:18:16 +0200734 conn_flags = udest->conn_flags & IP_VS_CONN_F_DEST_MASK;
735 conn_flags |= IP_VS_CONN_F_INACTIVE;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700736
Linus Torvalds1da177e2005-04-16 15:20:36 -0700737 /* set the IP_VS_CONN_F_NOOUTPUT flag if not masquerading/NAT */
Julian Anastasov35757922010-09-17 14:18:16 +0200738 if ((conn_flags & IP_VS_CONN_F_FWD_MASK) != IP_VS_CONN_F_MASQ) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700739 conn_flags |= IP_VS_CONN_F_NOOUTPUT;
740 } else {
741 /*
Hans Schillstromfc723252011-01-03 14:44:43 +0100742 * Put the real service in rs_table if not present.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700743 * For now only for NAT!
744 */
Hans Schillstroma0840e22011-01-03 14:44:58 +0100745 write_lock_bh(&ipvs->rs_lock);
Hans Schillstromfc723252011-01-03 14:44:43 +0100746 ip_vs_rs_hash(ipvs, dest);
Hans Schillstroma0840e22011-01-03 14:44:58 +0100747 write_unlock_bh(&ipvs->rs_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700748 }
749 atomic_set(&dest->conn_flags, conn_flags);
750
751 /* bind the service */
752 if (!dest->svc) {
753 __ip_vs_bind_svc(dest, svc);
754 } else {
755 if (dest->svc != svc) {
756 __ip_vs_unbind_svc(dest);
757 ip_vs_zero_stats(&dest->stats);
758 __ip_vs_bind_svc(dest, svc);
759 }
760 }
761
762 /* set the dest status flags */
763 dest->flags |= IP_VS_DEST_F_AVAILABLE;
764
765 if (udest->u_threshold == 0 || udest->u_threshold > dest->u_threshold)
766 dest->flags &= ~IP_VS_DEST_F_OVERLOAD;
767 dest->u_threshold = udest->u_threshold;
768 dest->l_threshold = udest->l_threshold;
Julian Anastasov26c15cf2010-09-21 18:12:30 +0200769
Julian Anastasovff75f402011-02-22 10:40:25 +0200770 spin_lock_bh(&dest->dst_lock);
Julian Anastasovfc604762010-10-17 16:38:15 +0300771 ip_vs_dst_reset(dest);
Julian Anastasovff75f402011-02-22 10:40:25 +0200772 spin_unlock_bh(&dest->dst_lock);
Julian Anastasovfc604762010-10-17 16:38:15 +0300773
Julian Anastasov26c15cf2010-09-21 18:12:30 +0200774 if (add)
Hans Schillstrom29c20262011-01-03 14:44:54 +0100775 ip_vs_new_estimator(svc->net, &dest->stats);
Julian Anastasov26c15cf2010-09-21 18:12:30 +0200776
777 write_lock_bh(&__ip_vs_svc_lock);
778
779 /* Wait until all other svc users go away */
780 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
781
782 if (add) {
783 list_add(&dest->n_list, &svc->destinations);
784 svc->num_dests++;
785 }
786
787 /* call the update_service, because server weight may be changed */
788 if (svc->scheduler->update_service)
789 svc->scheduler->update_service(svc);
790
791 write_unlock_bh(&__ip_vs_svc_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700792}
793
794
795/*
796 * Create a destination for the given service
797 */
798static int
Julius Volzc860c6b2008-09-02 15:55:36 +0200799ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700800 struct ip_vs_dest **dest_p)
801{
802 struct ip_vs_dest *dest;
803 unsigned atype;
804
805 EnterFunction(2);
806
Vince Busam09571c72008-09-02 15:55:52 +0200807#ifdef CONFIG_IP_VS_IPV6
808 if (svc->af == AF_INET6) {
809 atype = ipv6_addr_type(&udest->addr.in6);
Sven Wegener3bfb92f2008-09-05 16:53:49 +0200810 if ((!(atype & IPV6_ADDR_UNICAST) ||
811 atype & IPV6_ADDR_LINKLOCAL) &&
Hans Schillstrom4a984802011-01-03 14:45:02 +0100812 !__ip_vs_addr_is_local_v6(svc->net, &udest->addr.in6))
Vince Busam09571c72008-09-02 15:55:52 +0200813 return -EINVAL;
814 } else
815#endif
816 {
Hans Schillstrom4a984802011-01-03 14:45:02 +0100817 atype = inet_addr_type(svc->net, udest->addr.ip);
Vince Busam09571c72008-09-02 15:55:52 +0200818 if (atype != RTN_LOCAL && atype != RTN_UNICAST)
819 return -EINVAL;
820 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700821
Simon Hormandee06e42010-08-26 02:54:31 +0000822 dest = kzalloc(sizeof(struct ip_vs_dest), GFP_KERNEL);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700823 if (dest == NULL) {
Hannes Eder1e3e2382009-08-02 11:05:41 +0000824 pr_err("%s(): no memory.\n", __func__);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700825 return -ENOMEM;
826 }
Hans Schillstromb17fc992011-01-03 14:44:56 +0100827 dest->stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats);
828 if (!dest->stats.cpustats) {
829 pr_err("%s() alloc_percpu failed\n", __func__);
830 goto err_alloc;
831 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700832
Julius Volzc860c6b2008-09-02 15:55:36 +0200833 dest->af = svc->af;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700834 dest->protocol = svc->protocol;
Julius Volzc860c6b2008-09-02 15:55:36 +0200835 dest->vaddr = svc->addr;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700836 dest->vport = svc->port;
837 dest->vfwmark = svc->fwmark;
Julius Volzc860c6b2008-09-02 15:55:36 +0200838 ip_vs_addr_copy(svc->af, &dest->addr, &udest->addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700839 dest->port = udest->port;
840
841 atomic_set(&dest->activeconns, 0);
842 atomic_set(&dest->inactconns, 0);
843 atomic_set(&dest->persistconns, 0);
Julian Anastasov26c15cf2010-09-21 18:12:30 +0200844 atomic_set(&dest->refcnt, 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700845
846 INIT_LIST_HEAD(&dest->d_list);
847 spin_lock_init(&dest->dst_lock);
848 spin_lock_init(&dest->stats.lock);
Julian Anastasov26c15cf2010-09-21 18:12:30 +0200849 __ip_vs_update_dest(svc, dest, udest, 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700850
851 *dest_p = dest;
852
853 LeaveFunction(2);
854 return 0;
Hans Schillstromb17fc992011-01-03 14:44:56 +0100855
856err_alloc:
857 kfree(dest);
858 return -ENOMEM;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700859}
860
861
862/*
863 * Add a destination into an existing service
864 */
865static int
Julius Volzc860c6b2008-09-02 15:55:36 +0200866ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700867{
868 struct ip_vs_dest *dest;
Julius Volzc860c6b2008-09-02 15:55:36 +0200869 union nf_inet_addr daddr;
Al Viro014d7302006-09-28 14:29:52 -0700870 __be16 dport = udest->port;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700871 int ret;
872
873 EnterFunction(2);
874
875 if (udest->weight < 0) {
Hannes Eder1e3e2382009-08-02 11:05:41 +0000876 pr_err("%s(): server weight less than zero\n", __func__);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700877 return -ERANGE;
878 }
879
880 if (udest->l_threshold > udest->u_threshold) {
Hannes Eder1e3e2382009-08-02 11:05:41 +0000881 pr_err("%s(): lower threshold is higher than upper threshold\n",
882 __func__);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700883 return -ERANGE;
884 }
885
Julius Volzc860c6b2008-09-02 15:55:36 +0200886 ip_vs_addr_copy(svc->af, &daddr, &udest->addr);
887
Linus Torvalds1da177e2005-04-16 15:20:36 -0700888 /*
889 * Check if the dest already exists in the list
890 */
Julius Volz7937df12008-09-02 15:55:48 +0200891 dest = ip_vs_lookup_dest(svc, &daddr, dport);
892
Linus Torvalds1da177e2005-04-16 15:20:36 -0700893 if (dest != NULL) {
Hannes Eder1e3e2382009-08-02 11:05:41 +0000894 IP_VS_DBG(1, "%s(): dest already exists\n", __func__);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700895 return -EEXIST;
896 }
897
898 /*
899 * Check if the dest already exists in the trash and
900 * is from the same service
901 */
Julius Volz7937df12008-09-02 15:55:48 +0200902 dest = ip_vs_trash_get_dest(svc, &daddr, dport);
903
Linus Torvalds1da177e2005-04-16 15:20:36 -0700904 if (dest != NULL) {
Julius Volzcfc78c52008-09-02 15:55:53 +0200905 IP_VS_DBG_BUF(3, "Get destination %s:%u from trash, "
906 "dest->refcnt=%d, service %u/%s:%u\n",
907 IP_VS_DBG_ADDR(svc->af, &daddr), ntohs(dport),
908 atomic_read(&dest->refcnt),
909 dest->vfwmark,
910 IP_VS_DBG_ADDR(svc->af, &dest->vaddr),
911 ntohs(dest->vport));
912
Linus Torvalds1da177e2005-04-16 15:20:36 -0700913 /*
914 * Get the destination from the trash
915 */
916 list_del(&dest->n_list);
917
Julian Anastasov26c15cf2010-09-21 18:12:30 +0200918 __ip_vs_update_dest(svc, dest, udest, 1);
919 ret = 0;
920 } else {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700921 /*
Julian Anastasov26c15cf2010-09-21 18:12:30 +0200922 * Allocate and initialize the dest structure
Linus Torvalds1da177e2005-04-16 15:20:36 -0700923 */
Julian Anastasov26c15cf2010-09-21 18:12:30 +0200924 ret = ip_vs_new_dest(svc, udest, &dest);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700925 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700926 LeaveFunction(2);
927
Julian Anastasov26c15cf2010-09-21 18:12:30 +0200928 return ret;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700929}
930
931
932/*
933 * Edit a destination in the given service
934 */
935static int
Julius Volzc860c6b2008-09-02 15:55:36 +0200936ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700937{
938 struct ip_vs_dest *dest;
Julius Volzc860c6b2008-09-02 15:55:36 +0200939 union nf_inet_addr daddr;
Al Viro014d7302006-09-28 14:29:52 -0700940 __be16 dport = udest->port;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700941
942 EnterFunction(2);
943
944 if (udest->weight < 0) {
Hannes Eder1e3e2382009-08-02 11:05:41 +0000945 pr_err("%s(): server weight less than zero\n", __func__);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700946 return -ERANGE;
947 }
948
949 if (udest->l_threshold > udest->u_threshold) {
Hannes Eder1e3e2382009-08-02 11:05:41 +0000950 pr_err("%s(): lower threshold is higher than upper threshold\n",
951 __func__);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700952 return -ERANGE;
953 }
954
Julius Volzc860c6b2008-09-02 15:55:36 +0200955 ip_vs_addr_copy(svc->af, &daddr, &udest->addr);
956
Linus Torvalds1da177e2005-04-16 15:20:36 -0700957 /*
958 * Lookup the destination list
959 */
Julius Volz7937df12008-09-02 15:55:48 +0200960 dest = ip_vs_lookup_dest(svc, &daddr, dport);
961
Linus Torvalds1da177e2005-04-16 15:20:36 -0700962 if (dest == NULL) {
Hannes Eder1e3e2382009-08-02 11:05:41 +0000963 IP_VS_DBG(1, "%s(): dest doesn't exist\n", __func__);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700964 return -ENOENT;
965 }
966
Julian Anastasov26c15cf2010-09-21 18:12:30 +0200967 __ip_vs_update_dest(svc, dest, udest, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700968 LeaveFunction(2);
969
970 return 0;
971}
972
973
974/*
975 * Delete a destination (must be already unlinked from the service)
976 */
Hans Schillstrom29c20262011-01-03 14:44:54 +0100977static void __ip_vs_del_dest(struct net *net, struct ip_vs_dest *dest)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700978{
Hans Schillstroma0840e22011-01-03 14:44:58 +0100979 struct netns_ipvs *ipvs = net_ipvs(net);
980
Hans Schillstrom29c20262011-01-03 14:44:54 +0100981 ip_vs_kill_estimator(net, &dest->stats);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700982
983 /*
984 * Remove it from the d-linked list with the real services.
985 */
Hans Schillstroma0840e22011-01-03 14:44:58 +0100986 write_lock_bh(&ipvs->rs_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700987 ip_vs_rs_unhash(dest);
Hans Schillstroma0840e22011-01-03 14:44:58 +0100988 write_unlock_bh(&ipvs->rs_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700989
990 /*
991 * Decrease the refcnt of the dest, and free the dest
992 * if nobody refers to it (refcnt=0). Otherwise, throw
993 * the destination into the trash.
994 */
995 if (atomic_dec_and_test(&dest->refcnt)) {
Julian Anastasov26c15cf2010-09-21 18:12:30 +0200996 IP_VS_DBG_BUF(3, "Removing destination %u/%s:%u\n",
997 dest->vfwmark,
998 IP_VS_DBG_ADDR(dest->af, &dest->addr),
999 ntohs(dest->port));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001000 ip_vs_dst_reset(dest);
1001 /* simply decrease svc->refcnt here, let the caller check
1002 and release the service if nobody refers to it.
1003 Only user context can release destination and service,
1004 and only one user context can update virtual service at a
1005 time, so the operation here is OK */
1006 atomic_dec(&dest->svc->refcnt);
Hans Schillstromb17fc992011-01-03 14:44:56 +01001007 free_percpu(dest->stats.cpustats);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001008 kfree(dest);
1009 } else {
Julius Volzcfc78c52008-09-02 15:55:53 +02001010 IP_VS_DBG_BUF(3, "Moving dest %s:%u into trash, "
1011 "dest->refcnt=%d\n",
1012 IP_VS_DBG_ADDR(dest->af, &dest->addr),
1013 ntohs(dest->port),
1014 atomic_read(&dest->refcnt));
Hans Schillstromf2431e62011-01-03 14:45:00 +01001015 list_add(&dest->n_list, &ipvs->dest_trash);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001016 atomic_inc(&dest->refcnt);
1017 }
1018}
1019
1020
1021/*
1022 * Unlink a destination from the given service
1023 */
1024static void __ip_vs_unlink_dest(struct ip_vs_service *svc,
1025 struct ip_vs_dest *dest,
1026 int svcupd)
1027{
1028 dest->flags &= ~IP_VS_DEST_F_AVAILABLE;
1029
1030 /*
1031 * Remove it from the d-linked destination list.
1032 */
1033 list_del(&dest->n_list);
1034 svc->num_dests--;
Sven Wegener82dfb6f2008-08-11 19:36:06 +00001035
1036 /*
1037 * Call the update_service function of its scheduler
1038 */
1039 if (svcupd && svc->scheduler->update_service)
1040 svc->scheduler->update_service(svc);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001041}
1042
1043
1044/*
1045 * Delete a destination server in the given service
1046 */
1047static int
Julius Volzc860c6b2008-09-02 15:55:36 +02001048ip_vs_del_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001049{
1050 struct ip_vs_dest *dest;
Al Viro014d7302006-09-28 14:29:52 -07001051 __be16 dport = udest->port;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001052
1053 EnterFunction(2);
1054
Julius Volz7937df12008-09-02 15:55:48 +02001055 dest = ip_vs_lookup_dest(svc, &udest->addr, dport);
Julius Volzc860c6b2008-09-02 15:55:36 +02001056
Linus Torvalds1da177e2005-04-16 15:20:36 -07001057 if (dest == NULL) {
Hannes Eder1e3e2382009-08-02 11:05:41 +00001058 IP_VS_DBG(1, "%s(): destination not found!\n", __func__);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001059 return -ENOENT;
1060 }
1061
1062 write_lock_bh(&__ip_vs_svc_lock);
1063
1064 /*
1065 * Wait until all other svc users go away.
1066 */
Julian Anastasov26c15cf2010-09-21 18:12:30 +02001067 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001068
1069 /*
1070 * Unlink dest from the service
1071 */
1072 __ip_vs_unlink_dest(svc, dest, 1);
1073
1074 write_unlock_bh(&__ip_vs_svc_lock);
1075
1076 /*
1077 * Delete the destination
1078 */
Hans Schillstroma0840e22011-01-03 14:44:58 +01001079 __ip_vs_del_dest(svc->net, dest);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001080
1081 LeaveFunction(2);
1082
1083 return 0;
1084}
1085
1086
1087/*
1088 * Add a service into the service hash table
1089 */
1090static int
Hans Schillstromfc723252011-01-03 14:44:43 +01001091ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u,
Julius Volzc860c6b2008-09-02 15:55:36 +02001092 struct ip_vs_service **svc_p)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001093{
1094 int ret = 0;
1095 struct ip_vs_scheduler *sched = NULL;
Simon Horman0d1e71b2010-08-22 21:37:54 +09001096 struct ip_vs_pe *pe = NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001097 struct ip_vs_service *svc = NULL;
Hans Schillstroma0840e22011-01-03 14:44:58 +01001098 struct netns_ipvs *ipvs = net_ipvs(net);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001099
1100 /* increase the module use count */
1101 ip_vs_use_count_inc();
1102
1103 /* Lookup the scheduler by 'u->sched_name' */
1104 sched = ip_vs_scheduler_get(u->sched_name);
1105 if (sched == NULL) {
Hannes Eder1e3e2382009-08-02 11:05:41 +00001106 pr_info("Scheduler module ip_vs_%s not found\n", u->sched_name);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001107 ret = -ENOENT;
Simon Horman6e08bfb2010-08-22 21:37:52 +09001108 goto out_err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001109 }
1110
Simon Horman0d1e71b2010-08-22 21:37:54 +09001111 if (u->pe_name && *u->pe_name) {
Simon Hormane9e5eee2010-11-08 20:05:57 +09001112 pe = ip_vs_pe_getbyname(u->pe_name);
Simon Horman0d1e71b2010-08-22 21:37:54 +09001113 if (pe == NULL) {
1114 pr_info("persistence engine module ip_vs_pe_%s "
1115 "not found\n", u->pe_name);
1116 ret = -ENOENT;
1117 goto out_err;
1118 }
1119 }
1120
Julius Volzf94fd042008-09-02 15:55:55 +02001121#ifdef CONFIG_IP_VS_IPV6
Julius Volz48148932008-11-03 17:08:56 -08001122 if (u->af == AF_INET6 && (u->netmask < 1 || u->netmask > 128)) {
1123 ret = -EINVAL;
1124 goto out_err;
Julius Volzf94fd042008-09-02 15:55:55 +02001125 }
1126#endif
1127
Simon Hormandee06e42010-08-26 02:54:31 +00001128 svc = kzalloc(sizeof(struct ip_vs_service), GFP_KERNEL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001129 if (svc == NULL) {
Hannes Eder1e3e2382009-08-02 11:05:41 +00001130 IP_VS_DBG(1, "%s(): no memory\n", __func__);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001131 ret = -ENOMEM;
1132 goto out_err;
1133 }
Hans Schillstromb17fc992011-01-03 14:44:56 +01001134 svc->stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats);
1135 if (!svc->stats.cpustats) {
1136 pr_err("%s() alloc_percpu failed\n", __func__);
1137 goto out_err;
1138 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001139
1140 /* I'm the first user of the service */
Julian Anastasov26c15cf2010-09-21 18:12:30 +02001141 atomic_set(&svc->usecnt, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001142 atomic_set(&svc->refcnt, 0);
1143
Julius Volzc860c6b2008-09-02 15:55:36 +02001144 svc->af = u->af;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001145 svc->protocol = u->protocol;
Julius Volzc860c6b2008-09-02 15:55:36 +02001146 ip_vs_addr_copy(svc->af, &svc->addr, &u->addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001147 svc->port = u->port;
1148 svc->fwmark = u->fwmark;
1149 svc->flags = u->flags;
1150 svc->timeout = u->timeout * HZ;
1151 svc->netmask = u->netmask;
Hans Schillstromfc723252011-01-03 14:44:43 +01001152 svc->net = net;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001153
1154 INIT_LIST_HEAD(&svc->destinations);
1155 rwlock_init(&svc->sched_lock);
1156 spin_lock_init(&svc->stats.lock);
1157
1158 /* Bind the scheduler */
1159 ret = ip_vs_bind_scheduler(svc, sched);
1160 if (ret)
1161 goto out_err;
1162 sched = NULL;
1163
Simon Horman0d1e71b2010-08-22 21:37:54 +09001164 /* Bind the ct retriever */
1165 ip_vs_bind_pe(svc, pe);
1166 pe = NULL;
1167
Linus Torvalds1da177e2005-04-16 15:20:36 -07001168 /* Update the virtual service counters */
1169 if (svc->port == FTPPORT)
Hans Schillstrom763f8d02011-01-03 14:45:01 +01001170 atomic_inc(&ipvs->ftpsvc_counter);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001171 else if (svc->port == 0)
Hans Schillstrom763f8d02011-01-03 14:45:01 +01001172 atomic_inc(&ipvs->nullsvc_counter);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001173
Hans Schillstrom29c20262011-01-03 14:44:54 +01001174 ip_vs_new_estimator(net, &svc->stats);
Julius Volzf94fd042008-09-02 15:55:55 +02001175
1176 /* Count only IPv4 services for old get/setsockopt interface */
1177 if (svc->af == AF_INET)
Hans Schillstroma0840e22011-01-03 14:44:58 +01001178 ipvs->num_services++;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001179
1180 /* Hash the service into the service table */
1181 write_lock_bh(&__ip_vs_svc_lock);
1182 ip_vs_svc_hash(svc);
1183 write_unlock_bh(&__ip_vs_svc_lock);
1184
1185 *svc_p = svc;
1186 return 0;
1187
Hans Schillstromb17fc992011-01-03 14:44:56 +01001188
Simon Horman6e08bfb2010-08-22 21:37:52 +09001189 out_err:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001190 if (svc != NULL) {
Simon Horman2fabf352010-08-22 21:37:52 +09001191 ip_vs_unbind_scheduler(svc);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001192 if (svc->inc) {
1193 local_bh_disable();
1194 ip_vs_app_inc_put(svc->inc);
1195 local_bh_enable();
1196 }
Hans Schillstromb17fc992011-01-03 14:44:56 +01001197 if (svc->stats.cpustats)
1198 free_percpu(svc->stats.cpustats);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001199 kfree(svc);
1200 }
1201 ip_vs_scheduler_put(sched);
Simon Horman0d1e71b2010-08-22 21:37:54 +09001202 ip_vs_pe_put(pe);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001203
Linus Torvalds1da177e2005-04-16 15:20:36 -07001204 /* decrease the module use count */
1205 ip_vs_use_count_dec();
1206
1207 return ret;
1208}
1209
1210
1211/*
1212 * Edit a service and bind it with a new scheduler
1213 */
1214static int
Julius Volzc860c6b2008-09-02 15:55:36 +02001215ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001216{
1217 struct ip_vs_scheduler *sched, *old_sched;
Simon Horman0d1e71b2010-08-22 21:37:54 +09001218 struct ip_vs_pe *pe = NULL, *old_pe = NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001219 int ret = 0;
1220
1221 /*
1222 * Lookup the scheduler, by 'u->sched_name'
1223 */
1224 sched = ip_vs_scheduler_get(u->sched_name);
1225 if (sched == NULL) {
Hannes Eder1e3e2382009-08-02 11:05:41 +00001226 pr_info("Scheduler module ip_vs_%s not found\n", u->sched_name);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001227 return -ENOENT;
1228 }
1229 old_sched = sched;
1230
Simon Horman0d1e71b2010-08-22 21:37:54 +09001231 if (u->pe_name && *u->pe_name) {
Simon Hormane9e5eee2010-11-08 20:05:57 +09001232 pe = ip_vs_pe_getbyname(u->pe_name);
Simon Horman0d1e71b2010-08-22 21:37:54 +09001233 if (pe == NULL) {
1234 pr_info("persistence engine module ip_vs_pe_%s "
1235 "not found\n", u->pe_name);
1236 ret = -ENOENT;
1237 goto out;
1238 }
1239 old_pe = pe;
1240 }
1241
Julius Volzf94fd042008-09-02 15:55:55 +02001242#ifdef CONFIG_IP_VS_IPV6
Julius Volz48148932008-11-03 17:08:56 -08001243 if (u->af == AF_INET6 && (u->netmask < 1 || u->netmask > 128)) {
1244 ret = -EINVAL;
1245 goto out;
Julius Volzf94fd042008-09-02 15:55:55 +02001246 }
1247#endif
1248
Linus Torvalds1da177e2005-04-16 15:20:36 -07001249 write_lock_bh(&__ip_vs_svc_lock);
1250
1251 /*
1252 * Wait until all other svc users go away.
1253 */
Julian Anastasov26c15cf2010-09-21 18:12:30 +02001254 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001255
1256 /*
1257 * Set the flags and timeout value
1258 */
1259 svc->flags = u->flags | IP_VS_SVC_F_HASHED;
1260 svc->timeout = u->timeout * HZ;
1261 svc->netmask = u->netmask;
1262
1263 old_sched = svc->scheduler;
1264 if (sched != old_sched) {
1265 /*
1266 * Unbind the old scheduler
1267 */
1268 if ((ret = ip_vs_unbind_scheduler(svc))) {
1269 old_sched = sched;
Simon Horman9e691ed2008-09-17 10:10:41 +10001270 goto out_unlock;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001271 }
1272
1273 /*
1274 * Bind the new scheduler
1275 */
1276 if ((ret = ip_vs_bind_scheduler(svc, sched))) {
1277 /*
1278 * If ip_vs_bind_scheduler fails, restore the old
1279 * scheduler.
1280 * The main reason of failure is out of memory.
1281 *
1282 * The question is if the old scheduler can be
1283 * restored all the time. TODO: if it cannot be
1284 * restored some time, we must delete the service,
1285 * otherwise the system may crash.
1286 */
1287 ip_vs_bind_scheduler(svc, old_sched);
1288 old_sched = sched;
Simon Horman9e691ed2008-09-17 10:10:41 +10001289 goto out_unlock;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001290 }
1291 }
1292
Simon Horman0d1e71b2010-08-22 21:37:54 +09001293 old_pe = svc->pe;
1294 if (pe != old_pe) {
1295 ip_vs_unbind_pe(svc);
1296 ip_vs_bind_pe(svc, pe);
1297 }
1298
Simon Horman9e691ed2008-09-17 10:10:41 +10001299 out_unlock:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001300 write_unlock_bh(&__ip_vs_svc_lock);
Simon Horman9e691ed2008-09-17 10:10:41 +10001301 out:
Simon Horman6e08bfb2010-08-22 21:37:52 +09001302 ip_vs_scheduler_put(old_sched);
Simon Horman0d1e71b2010-08-22 21:37:54 +09001303 ip_vs_pe_put(old_pe);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001304 return ret;
1305}
1306
1307
1308/*
1309 * Delete a service from the service list
1310 * - The service must be unlinked, unlocked and not referenced!
1311 * - We are called under _bh lock
1312 */
1313static void __ip_vs_del_service(struct ip_vs_service *svc)
1314{
1315 struct ip_vs_dest *dest, *nxt;
1316 struct ip_vs_scheduler *old_sched;
Simon Horman0d1e71b2010-08-22 21:37:54 +09001317 struct ip_vs_pe *old_pe;
Hans Schillstroma0840e22011-01-03 14:44:58 +01001318 struct netns_ipvs *ipvs = net_ipvs(svc->net);
Simon Horman0d1e71b2010-08-22 21:37:54 +09001319
1320 pr_info("%s: enter\n", __func__);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001321
Julius Volzf94fd042008-09-02 15:55:55 +02001322 /* Count only IPv4 services for old get/setsockopt interface */
1323 if (svc->af == AF_INET)
Hans Schillstroma0840e22011-01-03 14:44:58 +01001324 ipvs->num_services--;
Julius Volzf94fd042008-09-02 15:55:55 +02001325
Hans Schillstrom29c20262011-01-03 14:44:54 +01001326 ip_vs_kill_estimator(svc->net, &svc->stats);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001327
1328 /* Unbind scheduler */
1329 old_sched = svc->scheduler;
1330 ip_vs_unbind_scheduler(svc);
Simon Horman6e08bfb2010-08-22 21:37:52 +09001331 ip_vs_scheduler_put(old_sched);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001332
Simon Horman0d1e71b2010-08-22 21:37:54 +09001333 /* Unbind persistence engine */
1334 old_pe = svc->pe;
1335 ip_vs_unbind_pe(svc);
1336 ip_vs_pe_put(old_pe);
1337
Linus Torvalds1da177e2005-04-16 15:20:36 -07001338 /* Unbind app inc */
1339 if (svc->inc) {
1340 ip_vs_app_inc_put(svc->inc);
1341 svc->inc = NULL;
1342 }
1343
1344 /*
1345 * Unlink the whole destination list
1346 */
1347 list_for_each_entry_safe(dest, nxt, &svc->destinations, n_list) {
1348 __ip_vs_unlink_dest(svc, dest, 0);
Hans Schillstrom29c20262011-01-03 14:44:54 +01001349 __ip_vs_del_dest(svc->net, dest);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001350 }
1351
1352 /*
1353 * Update the virtual service counters
1354 */
1355 if (svc->port == FTPPORT)
Hans Schillstrom763f8d02011-01-03 14:45:01 +01001356 atomic_dec(&ipvs->ftpsvc_counter);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001357 else if (svc->port == 0)
Hans Schillstrom763f8d02011-01-03 14:45:01 +01001358 atomic_dec(&ipvs->nullsvc_counter);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001359
1360 /*
1361 * Free the service if nobody refers to it
1362 */
Julian Anastasov26c15cf2010-09-21 18:12:30 +02001363 if (atomic_read(&svc->refcnt) == 0) {
1364 IP_VS_DBG_BUF(3, "Removing service %u/%s:%u usecnt=%d\n",
1365 svc->fwmark,
1366 IP_VS_DBG_ADDR(svc->af, &svc->addr),
1367 ntohs(svc->port), atomic_read(&svc->usecnt));
Hans Schillstromb17fc992011-01-03 14:44:56 +01001368 free_percpu(svc->stats.cpustats);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001369 kfree(svc);
Julian Anastasov26c15cf2010-09-21 18:12:30 +02001370 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001371
1372 /* decrease the module use count */
1373 ip_vs_use_count_dec();
1374}
1375
1376/*
Julian Anastasov26c15cf2010-09-21 18:12:30 +02001377 * Unlink a service from list and try to delete it if its refcnt reached 0
Linus Torvalds1da177e2005-04-16 15:20:36 -07001378 */
Julian Anastasov26c15cf2010-09-21 18:12:30 +02001379static void ip_vs_unlink_service(struct ip_vs_service *svc)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001380{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001381 /*
1382 * Unhash it from the service table
1383 */
1384 write_lock_bh(&__ip_vs_svc_lock);
1385
1386 ip_vs_svc_unhash(svc);
1387
1388 /*
1389 * Wait until all the svc users go away.
1390 */
Julian Anastasov26c15cf2010-09-21 18:12:30 +02001391 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001392
1393 __ip_vs_del_service(svc);
1394
1395 write_unlock_bh(&__ip_vs_svc_lock);
Julian Anastasov26c15cf2010-09-21 18:12:30 +02001396}
1397
1398/*
1399 * Delete a service from the service list
1400 */
1401static int ip_vs_del_service(struct ip_vs_service *svc)
1402{
1403 if (svc == NULL)
1404 return -EEXIST;
1405 ip_vs_unlink_service(svc);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001406
1407 return 0;
1408}
1409
1410
1411/*
1412 * Flush all the virtual services
1413 */
Hans Schillstromfc723252011-01-03 14:44:43 +01001414static int ip_vs_flush(struct net *net)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001415{
1416 int idx;
1417 struct ip_vs_service *svc, *nxt;
1418
1419 /*
Hans Schillstromfc723252011-01-03 14:44:43 +01001420 * Flush the service table hashed by <netns,protocol,addr,port>
Linus Torvalds1da177e2005-04-16 15:20:36 -07001421 */
1422 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
Hans Schillstromfc723252011-01-03 14:44:43 +01001423 list_for_each_entry_safe(svc, nxt, &ip_vs_svc_table[idx],
1424 s_list) {
1425 if (net_eq(svc->net, net))
1426 ip_vs_unlink_service(svc);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001427 }
1428 }
1429
1430 /*
1431 * Flush the service table hashed by fwmark
1432 */
1433 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1434 list_for_each_entry_safe(svc, nxt,
1435 &ip_vs_svc_fwm_table[idx], f_list) {
Hans Schillstromfc723252011-01-03 14:44:43 +01001436 if (net_eq(svc->net, net))
1437 ip_vs_unlink_service(svc);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001438 }
1439 }
1440
1441 return 0;
1442}
1443
1444
1445/*
1446 * Zero counters in a service or all services
1447 */
1448static int ip_vs_zero_service(struct ip_vs_service *svc)
1449{
1450 struct ip_vs_dest *dest;
1451
1452 write_lock_bh(&__ip_vs_svc_lock);
1453 list_for_each_entry(dest, &svc->destinations, n_list) {
1454 ip_vs_zero_stats(&dest->stats);
1455 }
1456 ip_vs_zero_stats(&svc->stats);
1457 write_unlock_bh(&__ip_vs_svc_lock);
1458 return 0;
1459}
1460
Hans Schillstromfc723252011-01-03 14:44:43 +01001461static int ip_vs_zero_all(struct net *net)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001462{
1463 int idx;
1464 struct ip_vs_service *svc;
1465
1466 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1467 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
Hans Schillstromfc723252011-01-03 14:44:43 +01001468 if (net_eq(svc->net, net))
1469 ip_vs_zero_service(svc);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001470 }
1471 }
1472
1473 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1474 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
Hans Schillstromfc723252011-01-03 14:44:43 +01001475 if (net_eq(svc->net, net))
1476 ip_vs_zero_service(svc);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001477 }
1478 }
1479
Hans Schillstromb17fc992011-01-03 14:44:56 +01001480 ip_vs_zero_stats(net_ipvs(net)->tot_stats);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001481 return 0;
1482}
1483
1484
1485static int
Alexey Dobriyan8d65af72009-09-23 15:57:19 -07001486proc_do_defense_mode(ctl_table *table, int write,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001487 void __user *buffer, size_t *lenp, loff_t *ppos)
1488{
Hans Schillstrom93304192011-01-03 14:44:51 +01001489 struct net *net = current->nsproxy->net_ns;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001490 int *valp = table->data;
1491 int val = *valp;
1492 int rc;
1493
Alexey Dobriyan8d65af72009-09-23 15:57:19 -07001494 rc = proc_dointvec(table, write, buffer, lenp, ppos);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001495 if (write && (*valp != val)) {
1496 if ((*valp < 0) || (*valp > 3)) {
1497 /* Restore the correct value */
1498 *valp = val;
1499 } else {
Hans Schillstrom93304192011-01-03 14:44:51 +01001500 update_defense_level(net_ipvs(net));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001501 }
1502 }
1503 return rc;
1504}
1505
1506
1507static int
Alexey Dobriyan8d65af72009-09-23 15:57:19 -07001508proc_do_sync_threshold(ctl_table *table, int write,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001509 void __user *buffer, size_t *lenp, loff_t *ppos)
1510{
1511 int *valp = table->data;
1512 int val[2];
1513 int rc;
1514
1515 /* backup the value first */
1516 memcpy(val, valp, sizeof(val));
1517
Alexey Dobriyan8d65af72009-09-23 15:57:19 -07001518 rc = proc_dointvec(table, write, buffer, lenp, ppos);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001519 if (write && (valp[0] < 0 || valp[1] < 0 || valp[0] >= valp[1])) {
1520 /* Restore the correct value */
1521 memcpy(valp, val, sizeof(val));
1522 }
1523 return rc;
1524}
1525
Hans Schillstromb880c1f2010-11-19 14:25:14 +01001526static int
1527proc_do_sync_mode(ctl_table *table, int write,
1528 void __user *buffer, size_t *lenp, loff_t *ppos)
1529{
1530 int *valp = table->data;
1531 int val = *valp;
1532 int rc;
1533
1534 rc = proc_dointvec(table, write, buffer, lenp, ppos);
1535 if (write && (*valp != val)) {
1536 if ((*valp < 0) || (*valp > 1)) {
1537 /* Restore the correct value */
1538 *valp = val;
1539 } else {
Hans Schillstromf1313152011-01-03 14:44:55 +01001540 struct net *net = current->nsproxy->net_ns;
1541 ip_vs_sync_switch_mode(net, val);
Hans Schillstromb880c1f2010-11-19 14:25:14 +01001542 }
1543 }
1544 return rc;
1545}
Linus Torvalds1da177e2005-04-16 15:20:36 -07001546
1547/*
1548 * IPVS sysctl table (under the /proc/sys/net/ipv4/vs/)
Hans Schillstroma0840e22011-01-03 14:44:58 +01001549 * Do not change order or insert new entries without
1550 * align with netns init in __ip_vs_control_init()
Linus Torvalds1da177e2005-04-16 15:20:36 -07001551 */
1552
1553static struct ctl_table vs_vars[] = {
1554 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001555 .procname = "amemthresh",
Hans Schillstroma0840e22011-01-03 14:44:58 +01001556 .maxlen = sizeof(int),
1557 .mode = 0644,
1558 .proc_handler = proc_dointvec,
1559 },
1560 {
1561 .procname = "am_droprate",
1562 .maxlen = sizeof(int),
1563 .mode = 0644,
1564 .proc_handler = proc_dointvec,
1565 },
1566 {
1567 .procname = "drop_entry",
1568 .maxlen = sizeof(int),
1569 .mode = 0644,
1570 .proc_handler = proc_do_defense_mode,
1571 },
1572 {
1573 .procname = "drop_packet",
1574 .maxlen = sizeof(int),
1575 .mode = 0644,
1576 .proc_handler = proc_do_defense_mode,
1577 },
1578#ifdef CONFIG_IP_VS_NFCT
1579 {
1580 .procname = "conntrack",
1581 .maxlen = sizeof(int),
1582 .mode = 0644,
1583 .proc_handler = &proc_dointvec,
1584 },
1585#endif
1586 {
1587 .procname = "secure_tcp",
1588 .maxlen = sizeof(int),
1589 .mode = 0644,
1590 .proc_handler = proc_do_defense_mode,
1591 },
1592 {
1593 .procname = "snat_reroute",
1594 .maxlen = sizeof(int),
1595 .mode = 0644,
1596 .proc_handler = &proc_dointvec,
1597 },
1598 {
1599 .procname = "sync_version",
1600 .maxlen = sizeof(int),
1601 .mode = 0644,
1602 .proc_handler = &proc_do_sync_mode,
1603 },
1604 {
1605 .procname = "cache_bypass",
1606 .maxlen = sizeof(int),
1607 .mode = 0644,
1608 .proc_handler = proc_dointvec,
1609 },
1610 {
1611 .procname = "expire_nodest_conn",
1612 .maxlen = sizeof(int),
1613 .mode = 0644,
1614 .proc_handler = proc_dointvec,
1615 },
1616 {
1617 .procname = "expire_quiescent_template",
1618 .maxlen = sizeof(int),
1619 .mode = 0644,
1620 .proc_handler = proc_dointvec,
1621 },
1622 {
1623 .procname = "sync_threshold",
1624 .maxlen =
1625 sizeof(((struct netns_ipvs *)0)->sysctl_sync_threshold),
1626 .mode = 0644,
1627 .proc_handler = proc_do_sync_threshold,
1628 },
1629 {
1630 .procname = "nat_icmp_send",
Linus Torvalds1da177e2005-04-16 15:20:36 -07001631 .maxlen = sizeof(int),
1632 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08001633 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001634 },
1635#ifdef CONFIG_IP_VS_DEBUG
1636 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001637 .procname = "debug_level",
1638 .data = &sysctl_ip_vs_debug_level,
1639 .maxlen = sizeof(int),
1640 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08001641 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001642 },
1643#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -07001644#if 0
1645 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001646 .procname = "timeout_established",
1647 .data = &vs_timeout_table_dos.timeout[IP_VS_S_ESTABLISHED],
1648 .maxlen = sizeof(int),
1649 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08001650 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001651 },
1652 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001653 .procname = "timeout_synsent",
1654 .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_SENT],
1655 .maxlen = sizeof(int),
1656 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08001657 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001658 },
1659 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001660 .procname = "timeout_synrecv",
1661 .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_RECV],
1662 .maxlen = sizeof(int),
1663 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08001664 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001665 },
1666 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001667 .procname = "timeout_finwait",
1668 .data = &vs_timeout_table_dos.timeout[IP_VS_S_FIN_WAIT],
1669 .maxlen = sizeof(int),
1670 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08001671 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001672 },
1673 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001674 .procname = "timeout_timewait",
1675 .data = &vs_timeout_table_dos.timeout[IP_VS_S_TIME_WAIT],
1676 .maxlen = sizeof(int),
1677 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08001678 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001679 },
1680 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001681 .procname = "timeout_close",
1682 .data = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE],
1683 .maxlen = sizeof(int),
1684 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08001685 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001686 },
1687 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001688 .procname = "timeout_closewait",
1689 .data = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE_WAIT],
1690 .maxlen = sizeof(int),
1691 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08001692 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001693 },
1694 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001695 .procname = "timeout_lastack",
1696 .data = &vs_timeout_table_dos.timeout[IP_VS_S_LAST_ACK],
1697 .maxlen = sizeof(int),
1698 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08001699 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001700 },
1701 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001702 .procname = "timeout_listen",
1703 .data = &vs_timeout_table_dos.timeout[IP_VS_S_LISTEN],
1704 .maxlen = sizeof(int),
1705 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08001706 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001707 },
1708 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001709 .procname = "timeout_synack",
1710 .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYNACK],
1711 .maxlen = sizeof(int),
1712 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08001713 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001714 },
1715 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001716 .procname = "timeout_udp",
1717 .data = &vs_timeout_table_dos.timeout[IP_VS_S_UDP],
1718 .maxlen = sizeof(int),
1719 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08001720 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001721 },
1722 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001723 .procname = "timeout_icmp",
1724 .data = &vs_timeout_table_dos.timeout[IP_VS_S_ICMP],
1725 .maxlen = sizeof(int),
1726 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08001727 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001728 },
1729#endif
Eric W. Biedermanf8572d82009-11-05 13:32:03 -08001730 { }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001731};
1732
Sven Wegener5587da52008-08-10 18:24:40 +00001733const struct ctl_path net_vs_ctl_path[] = {
Eric W. Biedermanf8572d82009-11-05 13:32:03 -08001734 { .procname = "net", },
1735 { .procname = "ipv4", },
Pavel Emelyanov90754f82008-01-12 02:33:50 -08001736 { .procname = "vs", },
1737 { }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001738};
Pavel Emelyanov90754f82008-01-12 02:33:50 -08001739EXPORT_SYMBOL_GPL(net_vs_ctl_path);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001740
Linus Torvalds1da177e2005-04-16 15:20:36 -07001741#ifdef CONFIG_PROC_FS
1742
1743struct ip_vs_iter {
Hans Schillstromfc723252011-01-03 14:44:43 +01001744 struct seq_net_private p; /* Do not move this, netns depends upon it*/
Linus Torvalds1da177e2005-04-16 15:20:36 -07001745 struct list_head *table;
1746 int bucket;
1747};
1748
1749/*
1750 * Write the contents of the VS rule table to a PROCfs file.
1751 * (It is kept just for backward compatibility)
1752 */
1753static inline const char *ip_vs_fwd_name(unsigned flags)
1754{
1755 switch (flags & IP_VS_CONN_F_FWD_MASK) {
1756 case IP_VS_CONN_F_LOCALNODE:
1757 return "Local";
1758 case IP_VS_CONN_F_TUNNEL:
1759 return "Tunnel";
1760 case IP_VS_CONN_F_DROUTE:
1761 return "Route";
1762 default:
1763 return "Masq";
1764 }
1765}
1766
1767
1768/* Get the Nth entry in the two lists */
1769static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos)
1770{
Hans Schillstromfc723252011-01-03 14:44:43 +01001771 struct net *net = seq_file_net(seq);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001772 struct ip_vs_iter *iter = seq->private;
1773 int idx;
1774 struct ip_vs_service *svc;
1775
1776 /* look in hash by protocol */
1777 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1778 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
Hans Schillstromfc723252011-01-03 14:44:43 +01001779 if (net_eq(svc->net, net) && pos-- == 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001780 iter->table = ip_vs_svc_table;
1781 iter->bucket = idx;
1782 return svc;
1783 }
1784 }
1785 }
1786
1787 /* keep looking in fwmark */
1788 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1789 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
Hans Schillstromfc723252011-01-03 14:44:43 +01001790 if (net_eq(svc->net, net) && pos-- == 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001791 iter->table = ip_vs_svc_fwm_table;
1792 iter->bucket = idx;
1793 return svc;
1794 }
1795 }
1796 }
1797
1798 return NULL;
1799}
1800
1801static void *ip_vs_info_seq_start(struct seq_file *seq, loff_t *pos)
Simon Horman563e94f2008-09-17 10:10:42 +10001802__acquires(__ip_vs_svc_lock)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001803{
1804
1805 read_lock_bh(&__ip_vs_svc_lock);
1806 return *pos ? ip_vs_info_array(seq, *pos - 1) : SEQ_START_TOKEN;
1807}
1808
1809
1810static void *ip_vs_info_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1811{
1812 struct list_head *e;
1813 struct ip_vs_iter *iter;
1814 struct ip_vs_service *svc;
1815
1816 ++*pos;
1817 if (v == SEQ_START_TOKEN)
1818 return ip_vs_info_array(seq,0);
1819
1820 svc = v;
1821 iter = seq->private;
1822
1823 if (iter->table == ip_vs_svc_table) {
1824 /* next service in table hashed by protocol */
1825 if ((e = svc->s_list.next) != &ip_vs_svc_table[iter->bucket])
1826 return list_entry(e, struct ip_vs_service, s_list);
1827
1828
1829 while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
1830 list_for_each_entry(svc,&ip_vs_svc_table[iter->bucket],
1831 s_list) {
1832 return svc;
1833 }
1834 }
1835
1836 iter->table = ip_vs_svc_fwm_table;
1837 iter->bucket = -1;
1838 goto scan_fwmark;
1839 }
1840
1841 /* next service in hashed by fwmark */
1842 if ((e = svc->f_list.next) != &ip_vs_svc_fwm_table[iter->bucket])
1843 return list_entry(e, struct ip_vs_service, f_list);
1844
1845 scan_fwmark:
1846 while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
1847 list_for_each_entry(svc, &ip_vs_svc_fwm_table[iter->bucket],
1848 f_list)
1849 return svc;
1850 }
1851
1852 return NULL;
1853}
1854
1855static void ip_vs_info_seq_stop(struct seq_file *seq, void *v)
Simon Horman563e94f2008-09-17 10:10:42 +10001856__releases(__ip_vs_svc_lock)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001857{
1858 read_unlock_bh(&__ip_vs_svc_lock);
1859}
1860
1861
1862static int ip_vs_info_seq_show(struct seq_file *seq, void *v)
1863{
1864 if (v == SEQ_START_TOKEN) {
1865 seq_printf(seq,
1866 "IP Virtual Server version %d.%d.%d (size=%d)\n",
Catalin(ux) M. BOIE6f7edb42010-01-05 05:50:24 +01001867 NVERSION(IP_VS_VERSION_CODE), ip_vs_conn_tab_size);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001868 seq_puts(seq,
1869 "Prot LocalAddress:Port Scheduler Flags\n");
1870 seq_puts(seq,
1871 " -> RemoteAddress:Port Forward Weight ActiveConn InActConn\n");
1872 } else {
1873 const struct ip_vs_service *svc = v;
1874 const struct ip_vs_iter *iter = seq->private;
1875 const struct ip_vs_dest *dest;
1876
Vince Busam667a5f12008-09-02 15:55:49 +02001877 if (iter->table == ip_vs_svc_table) {
1878#ifdef CONFIG_IP_VS_IPV6
1879 if (svc->af == AF_INET6)
Harvey Harrison5b095d9892008-10-29 12:52:50 -07001880 seq_printf(seq, "%s [%pI6]:%04X %s ",
Vince Busam667a5f12008-09-02 15:55:49 +02001881 ip_vs_proto_name(svc->protocol),
Harvey Harrison38ff4fa2008-10-28 16:08:13 -07001882 &svc->addr.in6,
Vince Busam667a5f12008-09-02 15:55:49 +02001883 ntohs(svc->port),
1884 svc->scheduler->name);
1885 else
1886#endif
Nick Chalk26ec0372010-06-22 08:07:01 +02001887 seq_printf(seq, "%s %08X:%04X %s %s ",
Vince Busam667a5f12008-09-02 15:55:49 +02001888 ip_vs_proto_name(svc->protocol),
1889 ntohl(svc->addr.ip),
1890 ntohs(svc->port),
Nick Chalk26ec0372010-06-22 08:07:01 +02001891 svc->scheduler->name,
1892 (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":"");
Vince Busam667a5f12008-09-02 15:55:49 +02001893 } else {
Nick Chalk26ec0372010-06-22 08:07:01 +02001894 seq_printf(seq, "FWM %08X %s %s",
1895 svc->fwmark, svc->scheduler->name,
1896 (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":"");
Vince Busam667a5f12008-09-02 15:55:49 +02001897 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001898
1899 if (svc->flags & IP_VS_SVC_F_PERSISTENT)
1900 seq_printf(seq, "persistent %d %08X\n",
1901 svc->timeout,
1902 ntohl(svc->netmask));
1903 else
1904 seq_putc(seq, '\n');
1905
1906 list_for_each_entry(dest, &svc->destinations, n_list) {
Vince Busam667a5f12008-09-02 15:55:49 +02001907#ifdef CONFIG_IP_VS_IPV6
1908 if (dest->af == AF_INET6)
1909 seq_printf(seq,
Harvey Harrison5b095d9892008-10-29 12:52:50 -07001910 " -> [%pI6]:%04X"
Vince Busam667a5f12008-09-02 15:55:49 +02001911 " %-7s %-6d %-10d %-10d\n",
Harvey Harrison38ff4fa2008-10-28 16:08:13 -07001912 &dest->addr.in6,
Vince Busam667a5f12008-09-02 15:55:49 +02001913 ntohs(dest->port),
1914 ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
1915 atomic_read(&dest->weight),
1916 atomic_read(&dest->activeconns),
1917 atomic_read(&dest->inactconns));
1918 else
1919#endif
1920 seq_printf(seq,
1921 " -> %08X:%04X "
1922 "%-7s %-6d %-10d %-10d\n",
1923 ntohl(dest->addr.ip),
1924 ntohs(dest->port),
1925 ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
1926 atomic_read(&dest->weight),
1927 atomic_read(&dest->activeconns),
1928 atomic_read(&dest->inactconns));
1929
Linus Torvalds1da177e2005-04-16 15:20:36 -07001930 }
1931 }
1932 return 0;
1933}
1934
Philippe De Muyter56b3d972007-07-10 23:07:31 -07001935static const struct seq_operations ip_vs_info_seq_ops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001936 .start = ip_vs_info_seq_start,
1937 .next = ip_vs_info_seq_next,
1938 .stop = ip_vs_info_seq_stop,
1939 .show = ip_vs_info_seq_show,
1940};
1941
1942static int ip_vs_info_open(struct inode *inode, struct file *file)
1943{
Hans Schillstromfc723252011-01-03 14:44:43 +01001944 return seq_open_net(inode, file, &ip_vs_info_seq_ops,
Pavel Emelyanovcf7732e2007-10-10 02:29:29 -07001945 sizeof(struct ip_vs_iter));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001946}
1947
Arjan van de Ven9a321442007-02-12 00:55:35 -08001948static const struct file_operations ip_vs_info_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001949 .owner = THIS_MODULE,
1950 .open = ip_vs_info_open,
1951 .read = seq_read,
1952 .llseek = seq_lseek,
1953 .release = seq_release_private,
1954};
1955
1956#endif
1957
Linus Torvalds1da177e2005-04-16 15:20:36 -07001958#ifdef CONFIG_PROC_FS
1959static int ip_vs_stats_show(struct seq_file *seq, void *v)
1960{
Hans Schillstromb17fc992011-01-03 14:44:56 +01001961 struct net *net = seq_file_single_net(seq);
1962 struct ip_vs_stats *tot_stats = net_ipvs(net)->tot_stats;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001963
1964/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
1965 seq_puts(seq,
1966 " Total Incoming Outgoing Incoming Outgoing\n");
1967 seq_printf(seq,
1968 " Conns Packets Packets Bytes Bytes\n");
1969
Hans Schillstromb17fc992011-01-03 14:44:56 +01001970 spin_lock_bh(&tot_stats->lock);
1971 seq_printf(seq, "%8X %8X %8X %16LX %16LX\n\n", tot_stats->ustats.conns,
1972 tot_stats->ustats.inpkts, tot_stats->ustats.outpkts,
1973 (unsigned long long) tot_stats->ustats.inbytes,
1974 (unsigned long long) tot_stats->ustats.outbytes);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001975
1976/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
1977 seq_puts(seq,
1978 " Conns/s Pkts/s Pkts/s Bytes/s Bytes/s\n");
1979 seq_printf(seq,"%8X %8X %8X %16X %16X\n",
Hans Schillstromb17fc992011-01-03 14:44:56 +01001980 tot_stats->ustats.cps,
1981 tot_stats->ustats.inpps,
1982 tot_stats->ustats.outpps,
1983 tot_stats->ustats.inbps,
1984 tot_stats->ustats.outbps);
1985 spin_unlock_bh(&tot_stats->lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001986
1987 return 0;
1988}
1989
1990static int ip_vs_stats_seq_open(struct inode *inode, struct file *file)
1991{
Hans Schillstromfc723252011-01-03 14:44:43 +01001992 return single_open_net(inode, file, ip_vs_stats_show);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001993}
1994
Arjan van de Ven9a321442007-02-12 00:55:35 -08001995static const struct file_operations ip_vs_stats_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001996 .owner = THIS_MODULE,
1997 .open = ip_vs_stats_seq_open,
1998 .read = seq_read,
1999 .llseek = seq_lseek,
2000 .release = single_release,
2001};
2002
Hans Schillstromb17fc992011-01-03 14:44:56 +01002003static int ip_vs_stats_percpu_show(struct seq_file *seq, void *v)
2004{
2005 struct net *net = seq_file_single_net(seq);
2006 struct ip_vs_stats *tot_stats = net_ipvs(net)->tot_stats;
2007 int i;
2008
2009/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
2010 seq_puts(seq,
2011 " Total Incoming Outgoing Incoming Outgoing\n");
2012 seq_printf(seq,
2013 "CPU Conns Packets Packets Bytes Bytes\n");
2014
2015 for_each_possible_cpu(i) {
2016 struct ip_vs_cpu_stats *u = per_cpu_ptr(net->ipvs->cpustats, i);
2017 seq_printf(seq, "%3X %8X %8X %8X %16LX %16LX\n",
2018 i, u->ustats.conns, u->ustats.inpkts,
2019 u->ustats.outpkts, (__u64)u->ustats.inbytes,
2020 (__u64)u->ustats.outbytes);
2021 }
2022
2023 spin_lock_bh(&tot_stats->lock);
2024 seq_printf(seq, " ~ %8X %8X %8X %16LX %16LX\n\n",
2025 tot_stats->ustats.conns, tot_stats->ustats.inpkts,
2026 tot_stats->ustats.outpkts,
2027 (unsigned long long) tot_stats->ustats.inbytes,
2028 (unsigned long long) tot_stats->ustats.outbytes);
2029
2030/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
2031 seq_puts(seq,
2032 " Conns/s Pkts/s Pkts/s Bytes/s Bytes/s\n");
2033 seq_printf(seq, " %8X %8X %8X %16X %16X\n",
2034 tot_stats->ustats.cps,
2035 tot_stats->ustats.inpps,
2036 tot_stats->ustats.outpps,
2037 tot_stats->ustats.inbps,
2038 tot_stats->ustats.outbps);
2039 spin_unlock_bh(&tot_stats->lock);
2040
2041 return 0;
2042}
2043
2044static int ip_vs_stats_percpu_seq_open(struct inode *inode, struct file *file)
2045{
2046 return single_open_net(inode, file, ip_vs_stats_percpu_show);
2047}
2048
2049static const struct file_operations ip_vs_stats_percpu_fops = {
2050 .owner = THIS_MODULE,
2051 .open = ip_vs_stats_percpu_seq_open,
2052 .read = seq_read,
2053 .llseek = seq_lseek,
2054 .release = single_release,
2055};
Linus Torvalds1da177e2005-04-16 15:20:36 -07002056#endif
2057
2058/*
2059 * Set timeout values for tcp tcpfin udp in the timeout_table.
2060 */
Hans Schillstrom93304192011-01-03 14:44:51 +01002061static int ip_vs_set_timeout(struct net *net, struct ip_vs_timeout_user *u)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002062{
Changli Gao091bb342011-01-21 18:02:13 +08002063#if defined(CONFIG_IP_VS_PROTO_TCP) || defined(CONFIG_IP_VS_PROTO_UDP)
Hans Schillstrom93304192011-01-03 14:44:51 +01002064 struct ip_vs_proto_data *pd;
Changli Gao091bb342011-01-21 18:02:13 +08002065#endif
Hans Schillstrom93304192011-01-03 14:44:51 +01002066
Linus Torvalds1da177e2005-04-16 15:20:36 -07002067 IP_VS_DBG(2, "Setting timeout tcp:%d tcpfin:%d udp:%d\n",
2068 u->tcp_timeout,
2069 u->tcp_fin_timeout,
2070 u->udp_timeout);
2071
2072#ifdef CONFIG_IP_VS_PROTO_TCP
2073 if (u->tcp_timeout) {
Hans Schillstrom93304192011-01-03 14:44:51 +01002074 pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
2075 pd->timeout_table[IP_VS_TCP_S_ESTABLISHED]
Linus Torvalds1da177e2005-04-16 15:20:36 -07002076 = u->tcp_timeout * HZ;
2077 }
2078
2079 if (u->tcp_fin_timeout) {
Hans Schillstrom93304192011-01-03 14:44:51 +01002080 pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
2081 pd->timeout_table[IP_VS_TCP_S_FIN_WAIT]
Linus Torvalds1da177e2005-04-16 15:20:36 -07002082 = u->tcp_fin_timeout * HZ;
2083 }
2084#endif
2085
2086#ifdef CONFIG_IP_VS_PROTO_UDP
2087 if (u->udp_timeout) {
Hans Schillstrom93304192011-01-03 14:44:51 +01002088 pd = ip_vs_proto_data_get(net, IPPROTO_UDP);
2089 pd->timeout_table[IP_VS_UDP_S_NORMAL]
Linus Torvalds1da177e2005-04-16 15:20:36 -07002090 = u->udp_timeout * HZ;
2091 }
2092#endif
2093 return 0;
2094}
2095
2096
2097#define SET_CMDID(cmd) (cmd - IP_VS_BASE_CTL)
2098#define SERVICE_ARG_LEN (sizeof(struct ip_vs_service_user))
2099#define SVCDEST_ARG_LEN (sizeof(struct ip_vs_service_user) + \
2100 sizeof(struct ip_vs_dest_user))
2101#define TIMEOUT_ARG_LEN (sizeof(struct ip_vs_timeout_user))
2102#define DAEMON_ARG_LEN (sizeof(struct ip_vs_daemon_user))
2103#define MAX_ARG_LEN SVCDEST_ARG_LEN
2104
Arjan van de Ven9b5b5cf2005-11-29 16:21:38 -08002105static const unsigned char set_arglen[SET_CMDID(IP_VS_SO_SET_MAX)+1] = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002106 [SET_CMDID(IP_VS_SO_SET_ADD)] = SERVICE_ARG_LEN,
2107 [SET_CMDID(IP_VS_SO_SET_EDIT)] = SERVICE_ARG_LEN,
2108 [SET_CMDID(IP_VS_SO_SET_DEL)] = SERVICE_ARG_LEN,
2109 [SET_CMDID(IP_VS_SO_SET_FLUSH)] = 0,
2110 [SET_CMDID(IP_VS_SO_SET_ADDDEST)] = SVCDEST_ARG_LEN,
2111 [SET_CMDID(IP_VS_SO_SET_DELDEST)] = SVCDEST_ARG_LEN,
2112 [SET_CMDID(IP_VS_SO_SET_EDITDEST)] = SVCDEST_ARG_LEN,
2113 [SET_CMDID(IP_VS_SO_SET_TIMEOUT)] = TIMEOUT_ARG_LEN,
2114 [SET_CMDID(IP_VS_SO_SET_STARTDAEMON)] = DAEMON_ARG_LEN,
2115 [SET_CMDID(IP_VS_SO_SET_STOPDAEMON)] = DAEMON_ARG_LEN,
2116 [SET_CMDID(IP_VS_SO_SET_ZERO)] = SERVICE_ARG_LEN,
2117};
2118
Julius Volzc860c6b2008-09-02 15:55:36 +02002119static void ip_vs_copy_usvc_compat(struct ip_vs_service_user_kern *usvc,
2120 struct ip_vs_service_user *usvc_compat)
2121{
Simon Horman0d1e71b2010-08-22 21:37:54 +09002122 memset(usvc, 0, sizeof(*usvc));
2123
Julius Volzc860c6b2008-09-02 15:55:36 +02002124 usvc->af = AF_INET;
2125 usvc->protocol = usvc_compat->protocol;
2126 usvc->addr.ip = usvc_compat->addr;
2127 usvc->port = usvc_compat->port;
2128 usvc->fwmark = usvc_compat->fwmark;
2129
2130 /* Deep copy of sched_name is not needed here */
2131 usvc->sched_name = usvc_compat->sched_name;
2132
2133 usvc->flags = usvc_compat->flags;
2134 usvc->timeout = usvc_compat->timeout;
2135 usvc->netmask = usvc_compat->netmask;
2136}
2137
2138static void ip_vs_copy_udest_compat(struct ip_vs_dest_user_kern *udest,
2139 struct ip_vs_dest_user *udest_compat)
2140{
Simon Horman0d1e71b2010-08-22 21:37:54 +09002141 memset(udest, 0, sizeof(*udest));
2142
Julius Volzc860c6b2008-09-02 15:55:36 +02002143 udest->addr.ip = udest_compat->addr;
2144 udest->port = udest_compat->port;
2145 udest->conn_flags = udest_compat->conn_flags;
2146 udest->weight = udest_compat->weight;
2147 udest->u_threshold = udest_compat->u_threshold;
2148 udest->l_threshold = udest_compat->l_threshold;
2149}
2150
Linus Torvalds1da177e2005-04-16 15:20:36 -07002151static int
2152do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
2153{
Hans Schillstromfc723252011-01-03 14:44:43 +01002154 struct net *net = sock_net(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002155 int ret;
2156 unsigned char arg[MAX_ARG_LEN];
Julius Volzc860c6b2008-09-02 15:55:36 +02002157 struct ip_vs_service_user *usvc_compat;
2158 struct ip_vs_service_user_kern usvc;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002159 struct ip_vs_service *svc;
Julius Volzc860c6b2008-09-02 15:55:36 +02002160 struct ip_vs_dest_user *udest_compat;
2161 struct ip_vs_dest_user_kern udest;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002162
2163 if (!capable(CAP_NET_ADMIN))
2164 return -EPERM;
2165
Arjan van de Ven04bcef22010-01-04 16:37:12 +01002166 if (cmd < IP_VS_BASE_CTL || cmd > IP_VS_SO_SET_MAX)
2167 return -EINVAL;
2168 if (len < 0 || len > MAX_ARG_LEN)
2169 return -EINVAL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002170 if (len != set_arglen[SET_CMDID(cmd)]) {
Hannes Eder1e3e2382009-08-02 11:05:41 +00002171 pr_err("set_ctl: len %u != %u\n",
2172 len, set_arglen[SET_CMDID(cmd)]);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002173 return -EINVAL;
2174 }
2175
2176 if (copy_from_user(arg, user, len) != 0)
2177 return -EFAULT;
2178
2179 /* increase the module use count */
2180 ip_vs_use_count_inc();
2181
Ingo Molnar14cc3e22006-03-26 01:37:14 -08002182 if (mutex_lock_interruptible(&__ip_vs_mutex)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002183 ret = -ERESTARTSYS;
2184 goto out_dec;
2185 }
2186
2187 if (cmd == IP_VS_SO_SET_FLUSH) {
2188 /* Flush the virtual service */
Hans Schillstromfc723252011-01-03 14:44:43 +01002189 ret = ip_vs_flush(net);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002190 goto out_unlock;
2191 } else if (cmd == IP_VS_SO_SET_TIMEOUT) {
2192 /* Set timeout values for (tcp tcpfin udp) */
Hans Schillstrom93304192011-01-03 14:44:51 +01002193 ret = ip_vs_set_timeout(net, (struct ip_vs_timeout_user *)arg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002194 goto out_unlock;
2195 } else if (cmd == IP_VS_SO_SET_STARTDAEMON) {
2196 struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
Hans Schillstromf1313152011-01-03 14:44:55 +01002197 ret = start_sync_thread(net, dm->state, dm->mcast_ifn,
2198 dm->syncid);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002199 goto out_unlock;
2200 } else if (cmd == IP_VS_SO_SET_STOPDAEMON) {
2201 struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
Hans Schillstromf1313152011-01-03 14:44:55 +01002202 ret = stop_sync_thread(net, dm->state);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002203 goto out_unlock;
2204 }
2205
Julius Volzc860c6b2008-09-02 15:55:36 +02002206 usvc_compat = (struct ip_vs_service_user *)arg;
2207 udest_compat = (struct ip_vs_dest_user *)(usvc_compat + 1);
2208
2209 /* We only use the new structs internally, so copy userspace compat
2210 * structs to extended internal versions */
2211 ip_vs_copy_usvc_compat(&usvc, usvc_compat);
2212 ip_vs_copy_udest_compat(&udest, udest_compat);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002213
2214 if (cmd == IP_VS_SO_SET_ZERO) {
2215 /* if no service address is set, zero counters in all */
Julius Volzc860c6b2008-09-02 15:55:36 +02002216 if (!usvc.fwmark && !usvc.addr.ip && !usvc.port) {
Hans Schillstromfc723252011-01-03 14:44:43 +01002217 ret = ip_vs_zero_all(net);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002218 goto out_unlock;
2219 }
2220 }
2221
Venkata Mohan Reddy2906f662010-02-18 12:31:05 +01002222 /* Check for valid protocol: TCP or UDP or SCTP, even for fwmark!=0 */
2223 if (usvc.protocol != IPPROTO_TCP && usvc.protocol != IPPROTO_UDP &&
2224 usvc.protocol != IPPROTO_SCTP) {
Hannes Eder1e3e2382009-08-02 11:05:41 +00002225 pr_err("set_ctl: invalid protocol: %d %pI4:%d %s\n",
2226 usvc.protocol, &usvc.addr.ip,
2227 ntohs(usvc.port), usvc.sched_name);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002228 ret = -EFAULT;
2229 goto out_unlock;
2230 }
2231
2232 /* Lookup the exact service by <protocol, addr, port> or fwmark */
Julius Volzc860c6b2008-09-02 15:55:36 +02002233 if (usvc.fwmark == 0)
Hans Schillstromfc723252011-01-03 14:44:43 +01002234 svc = __ip_vs_service_find(net, usvc.af, usvc.protocol,
Julian Anastasov26c15cf2010-09-21 18:12:30 +02002235 &usvc.addr, usvc.port);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002236 else
Hans Schillstromfc723252011-01-03 14:44:43 +01002237 svc = __ip_vs_svc_fwm_find(net, usvc.af, usvc.fwmark);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002238
2239 if (cmd != IP_VS_SO_SET_ADD
Julius Volzc860c6b2008-09-02 15:55:36 +02002240 && (svc == NULL || svc->protocol != usvc.protocol)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002241 ret = -ESRCH;
Julian Anastasov26c15cf2010-09-21 18:12:30 +02002242 goto out_unlock;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002243 }
2244
2245 switch (cmd) {
2246 case IP_VS_SO_SET_ADD:
2247 if (svc != NULL)
2248 ret = -EEXIST;
2249 else
Hans Schillstromfc723252011-01-03 14:44:43 +01002250 ret = ip_vs_add_service(net, &usvc, &svc);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002251 break;
2252 case IP_VS_SO_SET_EDIT:
Julius Volzc860c6b2008-09-02 15:55:36 +02002253 ret = ip_vs_edit_service(svc, &usvc);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002254 break;
2255 case IP_VS_SO_SET_DEL:
2256 ret = ip_vs_del_service(svc);
2257 if (!ret)
2258 goto out_unlock;
2259 break;
2260 case IP_VS_SO_SET_ZERO:
2261 ret = ip_vs_zero_service(svc);
2262 break;
2263 case IP_VS_SO_SET_ADDDEST:
Julius Volzc860c6b2008-09-02 15:55:36 +02002264 ret = ip_vs_add_dest(svc, &udest);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002265 break;
2266 case IP_VS_SO_SET_EDITDEST:
Julius Volzc860c6b2008-09-02 15:55:36 +02002267 ret = ip_vs_edit_dest(svc, &udest);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002268 break;
2269 case IP_VS_SO_SET_DELDEST:
Julius Volzc860c6b2008-09-02 15:55:36 +02002270 ret = ip_vs_del_dest(svc, &udest);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002271 break;
2272 default:
2273 ret = -EINVAL;
2274 }
2275
Linus Torvalds1da177e2005-04-16 15:20:36 -07002276 out_unlock:
Ingo Molnar14cc3e22006-03-26 01:37:14 -08002277 mutex_unlock(&__ip_vs_mutex);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002278 out_dec:
2279 /* decrease the module use count */
2280 ip_vs_use_count_dec();
2281
2282 return ret;
2283}
2284
2285
2286static void
2287ip_vs_copy_stats(struct ip_vs_stats_user *dst, struct ip_vs_stats *src)
2288{
2289 spin_lock_bh(&src->lock);
Sven Wegenere9c0ce22008-09-08 13:39:04 +02002290 memcpy(dst, &src->ustats, sizeof(*dst));
Linus Torvalds1da177e2005-04-16 15:20:36 -07002291 spin_unlock_bh(&src->lock);
2292}
2293
2294static void
2295ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src)
2296{
2297 dst->protocol = src->protocol;
Julius Volze7ade462008-09-02 15:55:33 +02002298 dst->addr = src->addr.ip;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002299 dst->port = src->port;
2300 dst->fwmark = src->fwmark;
pageexec4da62fc2005-06-26 16:00:19 -07002301 strlcpy(dst->sched_name, src->scheduler->name, sizeof(dst->sched_name));
Linus Torvalds1da177e2005-04-16 15:20:36 -07002302 dst->flags = src->flags;
2303 dst->timeout = src->timeout / HZ;
2304 dst->netmask = src->netmask;
2305 dst->num_dests = src->num_dests;
2306 ip_vs_copy_stats(&dst->stats, &src->stats);
2307}
2308
2309static inline int
Hans Schillstromfc723252011-01-03 14:44:43 +01002310__ip_vs_get_service_entries(struct net *net,
2311 const struct ip_vs_get_services *get,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002312 struct ip_vs_get_services __user *uptr)
2313{
2314 int idx, count=0;
2315 struct ip_vs_service *svc;
2316 struct ip_vs_service_entry entry;
2317 int ret = 0;
2318
2319 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2320 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
Julius Volzf94fd042008-09-02 15:55:55 +02002321 /* Only expose IPv4 entries to old interface */
Hans Schillstromfc723252011-01-03 14:44:43 +01002322 if (svc->af != AF_INET || !net_eq(svc->net, net))
Julius Volzf94fd042008-09-02 15:55:55 +02002323 continue;
2324
Linus Torvalds1da177e2005-04-16 15:20:36 -07002325 if (count >= get->num_services)
2326 goto out;
pageexec4da62fc2005-06-26 16:00:19 -07002327 memset(&entry, 0, sizeof(entry));
Linus Torvalds1da177e2005-04-16 15:20:36 -07002328 ip_vs_copy_service(&entry, svc);
2329 if (copy_to_user(&uptr->entrytable[count],
2330 &entry, sizeof(entry))) {
2331 ret = -EFAULT;
2332 goto out;
2333 }
2334 count++;
2335 }
2336 }
2337
2338 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2339 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
Julius Volzf94fd042008-09-02 15:55:55 +02002340 /* Only expose IPv4 entries to old interface */
Hans Schillstromfc723252011-01-03 14:44:43 +01002341 if (svc->af != AF_INET || !net_eq(svc->net, net))
Julius Volzf94fd042008-09-02 15:55:55 +02002342 continue;
2343
Linus Torvalds1da177e2005-04-16 15:20:36 -07002344 if (count >= get->num_services)
2345 goto out;
pageexec4da62fc2005-06-26 16:00:19 -07002346 memset(&entry, 0, sizeof(entry));
Linus Torvalds1da177e2005-04-16 15:20:36 -07002347 ip_vs_copy_service(&entry, svc);
2348 if (copy_to_user(&uptr->entrytable[count],
2349 &entry, sizeof(entry))) {
2350 ret = -EFAULT;
2351 goto out;
2352 }
2353 count++;
2354 }
2355 }
2356 out:
2357 return ret;
2358}
2359
2360static inline int
Hans Schillstromfc723252011-01-03 14:44:43 +01002361__ip_vs_get_dest_entries(struct net *net, const struct ip_vs_get_dests *get,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002362 struct ip_vs_get_dests __user *uptr)
2363{
2364 struct ip_vs_service *svc;
Julius Volzb18610d2008-09-02 15:55:37 +02002365 union nf_inet_addr addr = { .ip = get->addr };
Linus Torvalds1da177e2005-04-16 15:20:36 -07002366 int ret = 0;
2367
2368 if (get->fwmark)
Hans Schillstromfc723252011-01-03 14:44:43 +01002369 svc = __ip_vs_svc_fwm_find(net, AF_INET, get->fwmark);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002370 else
Hans Schillstromfc723252011-01-03 14:44:43 +01002371 svc = __ip_vs_service_find(net, AF_INET, get->protocol, &addr,
Julian Anastasov26c15cf2010-09-21 18:12:30 +02002372 get->port);
Julius Volzb18610d2008-09-02 15:55:37 +02002373
Linus Torvalds1da177e2005-04-16 15:20:36 -07002374 if (svc) {
2375 int count = 0;
2376 struct ip_vs_dest *dest;
2377 struct ip_vs_dest_entry entry;
2378
2379 list_for_each_entry(dest, &svc->destinations, n_list) {
2380 if (count >= get->num_dests)
2381 break;
2382
Julius Volze7ade462008-09-02 15:55:33 +02002383 entry.addr = dest->addr.ip;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002384 entry.port = dest->port;
2385 entry.conn_flags = atomic_read(&dest->conn_flags);
2386 entry.weight = atomic_read(&dest->weight);
2387 entry.u_threshold = dest->u_threshold;
2388 entry.l_threshold = dest->l_threshold;
2389 entry.activeconns = atomic_read(&dest->activeconns);
2390 entry.inactconns = atomic_read(&dest->inactconns);
2391 entry.persistconns = atomic_read(&dest->persistconns);
2392 ip_vs_copy_stats(&entry.stats, &dest->stats);
2393 if (copy_to_user(&uptr->entrytable[count],
2394 &entry, sizeof(entry))) {
2395 ret = -EFAULT;
2396 break;
2397 }
2398 count++;
2399 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002400 } else
2401 ret = -ESRCH;
2402 return ret;
2403}
2404
2405static inline void
Hans Schillstrom93304192011-01-03 14:44:51 +01002406__ip_vs_get_timeouts(struct net *net, struct ip_vs_timeout_user *u)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002407{
Changli Gao091bb342011-01-21 18:02:13 +08002408#if defined(CONFIG_IP_VS_PROTO_TCP) || defined(CONFIG_IP_VS_PROTO_UDP)
Hans Schillstrom93304192011-01-03 14:44:51 +01002409 struct ip_vs_proto_data *pd;
Changli Gao091bb342011-01-21 18:02:13 +08002410#endif
Hans Schillstrom93304192011-01-03 14:44:51 +01002411
Linus Torvalds1da177e2005-04-16 15:20:36 -07002412#ifdef CONFIG_IP_VS_PROTO_TCP
Hans Schillstrom93304192011-01-03 14:44:51 +01002413 pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
2414 u->tcp_timeout = pd->timeout_table[IP_VS_TCP_S_ESTABLISHED] / HZ;
2415 u->tcp_fin_timeout = pd->timeout_table[IP_VS_TCP_S_FIN_WAIT] / HZ;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002416#endif
2417#ifdef CONFIG_IP_VS_PROTO_UDP
Hans Schillstrom93304192011-01-03 14:44:51 +01002418 pd = ip_vs_proto_data_get(net, IPPROTO_UDP);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002419 u->udp_timeout =
Hans Schillstrom93304192011-01-03 14:44:51 +01002420 pd->timeout_table[IP_VS_UDP_S_NORMAL] / HZ;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002421#endif
2422}
2423
2424
2425#define GET_CMDID(cmd) (cmd - IP_VS_BASE_CTL)
2426#define GET_INFO_ARG_LEN (sizeof(struct ip_vs_getinfo))
2427#define GET_SERVICES_ARG_LEN (sizeof(struct ip_vs_get_services))
2428#define GET_SERVICE_ARG_LEN (sizeof(struct ip_vs_service_entry))
2429#define GET_DESTS_ARG_LEN (sizeof(struct ip_vs_get_dests))
2430#define GET_TIMEOUT_ARG_LEN (sizeof(struct ip_vs_timeout_user))
2431#define GET_DAEMON_ARG_LEN (sizeof(struct ip_vs_daemon_user) * 2)
2432
Arjan van de Ven9b5b5cf2005-11-29 16:21:38 -08002433static const unsigned char get_arglen[GET_CMDID(IP_VS_SO_GET_MAX)+1] = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002434 [GET_CMDID(IP_VS_SO_GET_VERSION)] = 64,
2435 [GET_CMDID(IP_VS_SO_GET_INFO)] = GET_INFO_ARG_LEN,
2436 [GET_CMDID(IP_VS_SO_GET_SERVICES)] = GET_SERVICES_ARG_LEN,
2437 [GET_CMDID(IP_VS_SO_GET_SERVICE)] = GET_SERVICE_ARG_LEN,
2438 [GET_CMDID(IP_VS_SO_GET_DESTS)] = GET_DESTS_ARG_LEN,
2439 [GET_CMDID(IP_VS_SO_GET_TIMEOUT)] = GET_TIMEOUT_ARG_LEN,
2440 [GET_CMDID(IP_VS_SO_GET_DAEMON)] = GET_DAEMON_ARG_LEN,
2441};
2442
2443static int
2444do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
2445{
2446 unsigned char arg[128];
2447 int ret = 0;
Arjan van de Ven04bcef22010-01-04 16:37:12 +01002448 unsigned int copylen;
Hans Schillstromfc723252011-01-03 14:44:43 +01002449 struct net *net = sock_net(sk);
Hans Schillstromf1313152011-01-03 14:44:55 +01002450 struct netns_ipvs *ipvs = net_ipvs(net);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002451
Hans Schillstromfc723252011-01-03 14:44:43 +01002452 BUG_ON(!net);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002453 if (!capable(CAP_NET_ADMIN))
2454 return -EPERM;
2455
Arjan van de Ven04bcef22010-01-04 16:37:12 +01002456 if (cmd < IP_VS_BASE_CTL || cmd > IP_VS_SO_GET_MAX)
2457 return -EINVAL;
2458
Linus Torvalds1da177e2005-04-16 15:20:36 -07002459 if (*len < get_arglen[GET_CMDID(cmd)]) {
Hannes Eder1e3e2382009-08-02 11:05:41 +00002460 pr_err("get_ctl: len %u < %u\n",
2461 *len, get_arglen[GET_CMDID(cmd)]);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002462 return -EINVAL;
2463 }
2464
Arjan van de Ven04bcef22010-01-04 16:37:12 +01002465 copylen = get_arglen[GET_CMDID(cmd)];
2466 if (copylen > 128)
2467 return -EINVAL;
2468
2469 if (copy_from_user(arg, user, copylen) != 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002470 return -EFAULT;
2471
Ingo Molnar14cc3e22006-03-26 01:37:14 -08002472 if (mutex_lock_interruptible(&__ip_vs_mutex))
Linus Torvalds1da177e2005-04-16 15:20:36 -07002473 return -ERESTARTSYS;
2474
2475 switch (cmd) {
2476 case IP_VS_SO_GET_VERSION:
2477 {
2478 char buf[64];
2479
2480 sprintf(buf, "IP Virtual Server version %d.%d.%d (size=%d)",
Catalin(ux) M. BOIE6f7edb42010-01-05 05:50:24 +01002481 NVERSION(IP_VS_VERSION_CODE), ip_vs_conn_tab_size);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002482 if (copy_to_user(user, buf, strlen(buf)+1) != 0) {
2483 ret = -EFAULT;
2484 goto out;
2485 }
2486 *len = strlen(buf)+1;
2487 }
2488 break;
2489
2490 case IP_VS_SO_GET_INFO:
2491 {
2492 struct ip_vs_getinfo info;
2493 info.version = IP_VS_VERSION_CODE;
Catalin(ux) M. BOIE6f7edb42010-01-05 05:50:24 +01002494 info.size = ip_vs_conn_tab_size;
Hans Schillstroma0840e22011-01-03 14:44:58 +01002495 info.num_services = ipvs->num_services;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002496 if (copy_to_user(user, &info, sizeof(info)) != 0)
2497 ret = -EFAULT;
2498 }
2499 break;
2500
2501 case IP_VS_SO_GET_SERVICES:
2502 {
2503 struct ip_vs_get_services *get;
2504 int size;
2505
2506 get = (struct ip_vs_get_services *)arg;
2507 size = sizeof(*get) +
2508 sizeof(struct ip_vs_service_entry) * get->num_services;
2509 if (*len != size) {
Hannes Eder1e3e2382009-08-02 11:05:41 +00002510 pr_err("length: %u != %u\n", *len, size);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002511 ret = -EINVAL;
2512 goto out;
2513 }
Hans Schillstromfc723252011-01-03 14:44:43 +01002514 ret = __ip_vs_get_service_entries(net, get, user);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002515 }
2516 break;
2517
2518 case IP_VS_SO_GET_SERVICE:
2519 {
2520 struct ip_vs_service_entry *entry;
2521 struct ip_vs_service *svc;
Julius Volzb18610d2008-09-02 15:55:37 +02002522 union nf_inet_addr addr;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002523
2524 entry = (struct ip_vs_service_entry *)arg;
Julius Volzb18610d2008-09-02 15:55:37 +02002525 addr.ip = entry->addr;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002526 if (entry->fwmark)
Hans Schillstromfc723252011-01-03 14:44:43 +01002527 svc = __ip_vs_svc_fwm_find(net, AF_INET, entry->fwmark);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002528 else
Hans Schillstromfc723252011-01-03 14:44:43 +01002529 svc = __ip_vs_service_find(net, AF_INET,
2530 entry->protocol, &addr,
2531 entry->port);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002532 if (svc) {
2533 ip_vs_copy_service(entry, svc);
2534 if (copy_to_user(user, entry, sizeof(*entry)) != 0)
2535 ret = -EFAULT;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002536 } else
2537 ret = -ESRCH;
2538 }
2539 break;
2540
2541 case IP_VS_SO_GET_DESTS:
2542 {
2543 struct ip_vs_get_dests *get;
2544 int size;
2545
2546 get = (struct ip_vs_get_dests *)arg;
2547 size = sizeof(*get) +
2548 sizeof(struct ip_vs_dest_entry) * get->num_dests;
2549 if (*len != size) {
Hannes Eder1e3e2382009-08-02 11:05:41 +00002550 pr_err("length: %u != %u\n", *len, size);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002551 ret = -EINVAL;
2552 goto out;
2553 }
Hans Schillstromfc723252011-01-03 14:44:43 +01002554 ret = __ip_vs_get_dest_entries(net, get, user);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002555 }
2556 break;
2557
2558 case IP_VS_SO_GET_TIMEOUT:
2559 {
2560 struct ip_vs_timeout_user t;
2561
Hans Schillstrom93304192011-01-03 14:44:51 +01002562 __ip_vs_get_timeouts(net, &t);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002563 if (copy_to_user(user, &t, sizeof(t)) != 0)
2564 ret = -EFAULT;
2565 }
2566 break;
2567
2568 case IP_VS_SO_GET_DAEMON:
2569 {
2570 struct ip_vs_daemon_user d[2];
2571
2572 memset(&d, 0, sizeof(d));
Hans Schillstromf1313152011-01-03 14:44:55 +01002573 if (ipvs->sync_state & IP_VS_STATE_MASTER) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002574 d[0].state = IP_VS_STATE_MASTER;
Hans Schillstromf1313152011-01-03 14:44:55 +01002575 strlcpy(d[0].mcast_ifn, ipvs->master_mcast_ifn,
2576 sizeof(d[0].mcast_ifn));
2577 d[0].syncid = ipvs->master_syncid;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002578 }
Hans Schillstromf1313152011-01-03 14:44:55 +01002579 if (ipvs->sync_state & IP_VS_STATE_BACKUP) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002580 d[1].state = IP_VS_STATE_BACKUP;
Hans Schillstromf1313152011-01-03 14:44:55 +01002581 strlcpy(d[1].mcast_ifn, ipvs->backup_mcast_ifn,
2582 sizeof(d[1].mcast_ifn));
2583 d[1].syncid = ipvs->backup_syncid;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002584 }
2585 if (copy_to_user(user, &d, sizeof(d)) != 0)
2586 ret = -EFAULT;
2587 }
2588 break;
2589
2590 default:
2591 ret = -EINVAL;
2592 }
2593
2594 out:
Ingo Molnar14cc3e22006-03-26 01:37:14 -08002595 mutex_unlock(&__ip_vs_mutex);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002596 return ret;
2597}
2598
2599
2600static struct nf_sockopt_ops ip_vs_sockopts = {
2601 .pf = PF_INET,
2602 .set_optmin = IP_VS_BASE_CTL,
2603 .set_optmax = IP_VS_SO_SET_MAX+1,
2604 .set = do_ip_vs_set_ctl,
2605 .get_optmin = IP_VS_BASE_CTL,
2606 .get_optmax = IP_VS_SO_GET_MAX+1,
2607 .get = do_ip_vs_get_ctl,
Neil Horman16fcec32007-09-11 11:28:26 +02002608 .owner = THIS_MODULE,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002609};
2610
Julius Volz9a812192008-08-14 14:08:44 +02002611/*
2612 * Generic Netlink interface
2613 */
2614
2615/* IPVS genetlink family */
2616static struct genl_family ip_vs_genl_family = {
2617 .id = GENL_ID_GENERATE,
2618 .hdrsize = 0,
2619 .name = IPVS_GENL_NAME,
2620 .version = IPVS_GENL_VERSION,
2621 .maxattr = IPVS_CMD_MAX,
Hans Schillstromc6d2d442011-01-03 14:45:03 +01002622 .netnsok = true, /* Make ipvsadm to work on netns */
Julius Volz9a812192008-08-14 14:08:44 +02002623};
2624
2625/* Policy used for first-level command attributes */
2626static const struct nla_policy ip_vs_cmd_policy[IPVS_CMD_ATTR_MAX + 1] = {
2627 [IPVS_CMD_ATTR_SERVICE] = { .type = NLA_NESTED },
2628 [IPVS_CMD_ATTR_DEST] = { .type = NLA_NESTED },
2629 [IPVS_CMD_ATTR_DAEMON] = { .type = NLA_NESTED },
2630 [IPVS_CMD_ATTR_TIMEOUT_TCP] = { .type = NLA_U32 },
2631 [IPVS_CMD_ATTR_TIMEOUT_TCP_FIN] = { .type = NLA_U32 },
2632 [IPVS_CMD_ATTR_TIMEOUT_UDP] = { .type = NLA_U32 },
2633};
2634
2635/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DAEMON */
2636static const struct nla_policy ip_vs_daemon_policy[IPVS_DAEMON_ATTR_MAX + 1] = {
2637 [IPVS_DAEMON_ATTR_STATE] = { .type = NLA_U32 },
2638 [IPVS_DAEMON_ATTR_MCAST_IFN] = { .type = NLA_NUL_STRING,
2639 .len = IP_VS_IFNAME_MAXLEN },
2640 [IPVS_DAEMON_ATTR_SYNC_ID] = { .type = NLA_U32 },
2641};
2642
2643/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_SERVICE */
2644static const struct nla_policy ip_vs_svc_policy[IPVS_SVC_ATTR_MAX + 1] = {
2645 [IPVS_SVC_ATTR_AF] = { .type = NLA_U16 },
2646 [IPVS_SVC_ATTR_PROTOCOL] = { .type = NLA_U16 },
2647 [IPVS_SVC_ATTR_ADDR] = { .type = NLA_BINARY,
2648 .len = sizeof(union nf_inet_addr) },
2649 [IPVS_SVC_ATTR_PORT] = { .type = NLA_U16 },
2650 [IPVS_SVC_ATTR_FWMARK] = { .type = NLA_U32 },
2651 [IPVS_SVC_ATTR_SCHED_NAME] = { .type = NLA_NUL_STRING,
2652 .len = IP_VS_SCHEDNAME_MAXLEN },
Simon Horman0d1e71b2010-08-22 21:37:54 +09002653 [IPVS_SVC_ATTR_PE_NAME] = { .type = NLA_NUL_STRING,
2654 .len = IP_VS_PENAME_MAXLEN },
Julius Volz9a812192008-08-14 14:08:44 +02002655 [IPVS_SVC_ATTR_FLAGS] = { .type = NLA_BINARY,
2656 .len = sizeof(struct ip_vs_flags) },
2657 [IPVS_SVC_ATTR_TIMEOUT] = { .type = NLA_U32 },
2658 [IPVS_SVC_ATTR_NETMASK] = { .type = NLA_U32 },
2659 [IPVS_SVC_ATTR_STATS] = { .type = NLA_NESTED },
2660};
2661
2662/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DEST */
2663static const struct nla_policy ip_vs_dest_policy[IPVS_DEST_ATTR_MAX + 1] = {
2664 [IPVS_DEST_ATTR_ADDR] = { .type = NLA_BINARY,
2665 .len = sizeof(union nf_inet_addr) },
2666 [IPVS_DEST_ATTR_PORT] = { .type = NLA_U16 },
2667 [IPVS_DEST_ATTR_FWD_METHOD] = { .type = NLA_U32 },
2668 [IPVS_DEST_ATTR_WEIGHT] = { .type = NLA_U32 },
2669 [IPVS_DEST_ATTR_U_THRESH] = { .type = NLA_U32 },
2670 [IPVS_DEST_ATTR_L_THRESH] = { .type = NLA_U32 },
2671 [IPVS_DEST_ATTR_ACTIVE_CONNS] = { .type = NLA_U32 },
2672 [IPVS_DEST_ATTR_INACT_CONNS] = { .type = NLA_U32 },
2673 [IPVS_DEST_ATTR_PERSIST_CONNS] = { .type = NLA_U32 },
2674 [IPVS_DEST_ATTR_STATS] = { .type = NLA_NESTED },
2675};
2676
2677static int ip_vs_genl_fill_stats(struct sk_buff *skb, int container_type,
2678 struct ip_vs_stats *stats)
2679{
2680 struct nlattr *nl_stats = nla_nest_start(skb, container_type);
2681 if (!nl_stats)
2682 return -EMSGSIZE;
2683
2684 spin_lock_bh(&stats->lock);
2685
Sven Wegenere9c0ce22008-09-08 13:39:04 +02002686 NLA_PUT_U32(skb, IPVS_STATS_ATTR_CONNS, stats->ustats.conns);
2687 NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPKTS, stats->ustats.inpkts);
2688 NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPKTS, stats->ustats.outpkts);
2689 NLA_PUT_U64(skb, IPVS_STATS_ATTR_INBYTES, stats->ustats.inbytes);
2690 NLA_PUT_U64(skb, IPVS_STATS_ATTR_OUTBYTES, stats->ustats.outbytes);
2691 NLA_PUT_U32(skb, IPVS_STATS_ATTR_CPS, stats->ustats.cps);
2692 NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPPS, stats->ustats.inpps);
2693 NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPPS, stats->ustats.outpps);
2694 NLA_PUT_U32(skb, IPVS_STATS_ATTR_INBPS, stats->ustats.inbps);
2695 NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTBPS, stats->ustats.outbps);
Julius Volz9a812192008-08-14 14:08:44 +02002696
2697 spin_unlock_bh(&stats->lock);
2698
2699 nla_nest_end(skb, nl_stats);
2700
2701 return 0;
2702
2703nla_put_failure:
2704 spin_unlock_bh(&stats->lock);
2705 nla_nest_cancel(skb, nl_stats);
2706 return -EMSGSIZE;
2707}
2708
2709static int ip_vs_genl_fill_service(struct sk_buff *skb,
2710 struct ip_vs_service *svc)
2711{
2712 struct nlattr *nl_service;
2713 struct ip_vs_flags flags = { .flags = svc->flags,
2714 .mask = ~0 };
2715
2716 nl_service = nla_nest_start(skb, IPVS_CMD_ATTR_SERVICE);
2717 if (!nl_service)
2718 return -EMSGSIZE;
2719
Julius Volzf94fd042008-09-02 15:55:55 +02002720 NLA_PUT_U16(skb, IPVS_SVC_ATTR_AF, svc->af);
Julius Volz9a812192008-08-14 14:08:44 +02002721
2722 if (svc->fwmark) {
2723 NLA_PUT_U32(skb, IPVS_SVC_ATTR_FWMARK, svc->fwmark);
2724 } else {
2725 NLA_PUT_U16(skb, IPVS_SVC_ATTR_PROTOCOL, svc->protocol);
2726 NLA_PUT(skb, IPVS_SVC_ATTR_ADDR, sizeof(svc->addr), &svc->addr);
2727 NLA_PUT_U16(skb, IPVS_SVC_ATTR_PORT, svc->port);
2728 }
2729
2730 NLA_PUT_STRING(skb, IPVS_SVC_ATTR_SCHED_NAME, svc->scheduler->name);
Simon Horman0d1e71b2010-08-22 21:37:54 +09002731 if (svc->pe)
2732 NLA_PUT_STRING(skb, IPVS_SVC_ATTR_PE_NAME, svc->pe->name);
Julius Volz9a812192008-08-14 14:08:44 +02002733 NLA_PUT(skb, IPVS_SVC_ATTR_FLAGS, sizeof(flags), &flags);
2734 NLA_PUT_U32(skb, IPVS_SVC_ATTR_TIMEOUT, svc->timeout / HZ);
2735 NLA_PUT_U32(skb, IPVS_SVC_ATTR_NETMASK, svc->netmask);
2736
2737 if (ip_vs_genl_fill_stats(skb, IPVS_SVC_ATTR_STATS, &svc->stats))
2738 goto nla_put_failure;
2739
2740 nla_nest_end(skb, nl_service);
2741
2742 return 0;
2743
2744nla_put_failure:
2745 nla_nest_cancel(skb, nl_service);
2746 return -EMSGSIZE;
2747}
2748
2749static int ip_vs_genl_dump_service(struct sk_buff *skb,
2750 struct ip_vs_service *svc,
2751 struct netlink_callback *cb)
2752{
2753 void *hdr;
2754
2755 hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
2756 &ip_vs_genl_family, NLM_F_MULTI,
2757 IPVS_CMD_NEW_SERVICE);
2758 if (!hdr)
2759 return -EMSGSIZE;
2760
2761 if (ip_vs_genl_fill_service(skb, svc) < 0)
2762 goto nla_put_failure;
2763
2764 return genlmsg_end(skb, hdr);
2765
2766nla_put_failure:
2767 genlmsg_cancel(skb, hdr);
2768 return -EMSGSIZE;
2769}
2770
2771static int ip_vs_genl_dump_services(struct sk_buff *skb,
2772 struct netlink_callback *cb)
2773{
2774 int idx = 0, i;
2775 int start = cb->args[0];
2776 struct ip_vs_service *svc;
Hans Schillstromfc723252011-01-03 14:44:43 +01002777 struct net *net = skb_sknet(skb);
Julius Volz9a812192008-08-14 14:08:44 +02002778
2779 mutex_lock(&__ip_vs_mutex);
2780 for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
2781 list_for_each_entry(svc, &ip_vs_svc_table[i], s_list) {
Hans Schillstromfc723252011-01-03 14:44:43 +01002782 if (++idx <= start || !net_eq(svc->net, net))
Julius Volz9a812192008-08-14 14:08:44 +02002783 continue;
2784 if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
2785 idx--;
2786 goto nla_put_failure;
2787 }
2788 }
2789 }
2790
2791 for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
2792 list_for_each_entry(svc, &ip_vs_svc_fwm_table[i], f_list) {
Hans Schillstromfc723252011-01-03 14:44:43 +01002793 if (++idx <= start || !net_eq(svc->net, net))
Julius Volz9a812192008-08-14 14:08:44 +02002794 continue;
2795 if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
2796 idx--;
2797 goto nla_put_failure;
2798 }
2799 }
2800 }
2801
2802nla_put_failure:
2803 mutex_unlock(&__ip_vs_mutex);
2804 cb->args[0] = idx;
2805
2806 return skb->len;
2807}
2808
Hans Schillstromfc723252011-01-03 14:44:43 +01002809static int ip_vs_genl_parse_service(struct net *net,
2810 struct ip_vs_service_user_kern *usvc,
Julian Anastasov26c15cf2010-09-21 18:12:30 +02002811 struct nlattr *nla, int full_entry,
2812 struct ip_vs_service **ret_svc)
Julius Volz9a812192008-08-14 14:08:44 +02002813{
2814 struct nlattr *attrs[IPVS_SVC_ATTR_MAX + 1];
2815 struct nlattr *nla_af, *nla_port, *nla_fwmark, *nla_protocol, *nla_addr;
Julian Anastasov26c15cf2010-09-21 18:12:30 +02002816 struct ip_vs_service *svc;
Julius Volz9a812192008-08-14 14:08:44 +02002817
2818 /* Parse mandatory identifying service fields first */
2819 if (nla == NULL ||
2820 nla_parse_nested(attrs, IPVS_SVC_ATTR_MAX, nla, ip_vs_svc_policy))
2821 return -EINVAL;
2822
2823 nla_af = attrs[IPVS_SVC_ATTR_AF];
2824 nla_protocol = attrs[IPVS_SVC_ATTR_PROTOCOL];
2825 nla_addr = attrs[IPVS_SVC_ATTR_ADDR];
2826 nla_port = attrs[IPVS_SVC_ATTR_PORT];
2827 nla_fwmark = attrs[IPVS_SVC_ATTR_FWMARK];
2828
2829 if (!(nla_af && (nla_fwmark || (nla_port && nla_protocol && nla_addr))))
2830 return -EINVAL;
2831
Simon Horman258c8892009-12-15 17:01:25 +01002832 memset(usvc, 0, sizeof(*usvc));
2833
Julius Volzc860c6b2008-09-02 15:55:36 +02002834 usvc->af = nla_get_u16(nla_af);
Julius Volzf94fd042008-09-02 15:55:55 +02002835#ifdef CONFIG_IP_VS_IPV6
2836 if (usvc->af != AF_INET && usvc->af != AF_INET6)
2837#else
2838 if (usvc->af != AF_INET)
2839#endif
Julius Volz9a812192008-08-14 14:08:44 +02002840 return -EAFNOSUPPORT;
2841
2842 if (nla_fwmark) {
2843 usvc->protocol = IPPROTO_TCP;
2844 usvc->fwmark = nla_get_u32(nla_fwmark);
2845 } else {
2846 usvc->protocol = nla_get_u16(nla_protocol);
2847 nla_memcpy(&usvc->addr, nla_addr, sizeof(usvc->addr));
2848 usvc->port = nla_get_u16(nla_port);
2849 usvc->fwmark = 0;
2850 }
2851
Julian Anastasov26c15cf2010-09-21 18:12:30 +02002852 if (usvc->fwmark)
Hans Schillstromfc723252011-01-03 14:44:43 +01002853 svc = __ip_vs_svc_fwm_find(net, usvc->af, usvc->fwmark);
Julian Anastasov26c15cf2010-09-21 18:12:30 +02002854 else
Hans Schillstromfc723252011-01-03 14:44:43 +01002855 svc = __ip_vs_service_find(net, usvc->af, usvc->protocol,
Julian Anastasov26c15cf2010-09-21 18:12:30 +02002856 &usvc->addr, usvc->port);
2857 *ret_svc = svc;
2858
Julius Volz9a812192008-08-14 14:08:44 +02002859 /* If a full entry was requested, check for the additional fields */
2860 if (full_entry) {
Simon Horman0d1e71b2010-08-22 21:37:54 +09002861 struct nlattr *nla_sched, *nla_flags, *nla_pe, *nla_timeout,
Julius Volz9a812192008-08-14 14:08:44 +02002862 *nla_netmask;
2863 struct ip_vs_flags flags;
Julius Volz9a812192008-08-14 14:08:44 +02002864
2865 nla_sched = attrs[IPVS_SVC_ATTR_SCHED_NAME];
Simon Horman0d1e71b2010-08-22 21:37:54 +09002866 nla_pe = attrs[IPVS_SVC_ATTR_PE_NAME];
Julius Volz9a812192008-08-14 14:08:44 +02002867 nla_flags = attrs[IPVS_SVC_ATTR_FLAGS];
2868 nla_timeout = attrs[IPVS_SVC_ATTR_TIMEOUT];
2869 nla_netmask = attrs[IPVS_SVC_ATTR_NETMASK];
2870
2871 if (!(nla_sched && nla_flags && nla_timeout && nla_netmask))
2872 return -EINVAL;
2873
2874 nla_memcpy(&flags, nla_flags, sizeof(flags));
2875
2876 /* prefill flags from service if it already exists */
Julian Anastasov26c15cf2010-09-21 18:12:30 +02002877 if (svc)
Julius Volz9a812192008-08-14 14:08:44 +02002878 usvc->flags = svc->flags;
Julius Volz9a812192008-08-14 14:08:44 +02002879
2880 /* set new flags from userland */
2881 usvc->flags = (usvc->flags & ~flags.mask) |
2882 (flags.flags & flags.mask);
Julius Volzc860c6b2008-09-02 15:55:36 +02002883 usvc->sched_name = nla_data(nla_sched);
Simon Horman0d1e71b2010-08-22 21:37:54 +09002884 usvc->pe_name = nla_pe ? nla_data(nla_pe) : NULL;
Julius Volz9a812192008-08-14 14:08:44 +02002885 usvc->timeout = nla_get_u32(nla_timeout);
2886 usvc->netmask = nla_get_u32(nla_netmask);
2887 }
2888
2889 return 0;
2890}
2891
Hans Schillstromfc723252011-01-03 14:44:43 +01002892static struct ip_vs_service *ip_vs_genl_find_service(struct net *net,
2893 struct nlattr *nla)
Julius Volz9a812192008-08-14 14:08:44 +02002894{
Julius Volzc860c6b2008-09-02 15:55:36 +02002895 struct ip_vs_service_user_kern usvc;
Julian Anastasov26c15cf2010-09-21 18:12:30 +02002896 struct ip_vs_service *svc;
Julius Volz9a812192008-08-14 14:08:44 +02002897 int ret;
2898
Hans Schillstromfc723252011-01-03 14:44:43 +01002899 ret = ip_vs_genl_parse_service(net, &usvc, nla, 0, &svc);
Julian Anastasov26c15cf2010-09-21 18:12:30 +02002900 return ret ? ERR_PTR(ret) : svc;
Julius Volz9a812192008-08-14 14:08:44 +02002901}
2902
2903static int ip_vs_genl_fill_dest(struct sk_buff *skb, struct ip_vs_dest *dest)
2904{
2905 struct nlattr *nl_dest;
2906
2907 nl_dest = nla_nest_start(skb, IPVS_CMD_ATTR_DEST);
2908 if (!nl_dest)
2909 return -EMSGSIZE;
2910
2911 NLA_PUT(skb, IPVS_DEST_ATTR_ADDR, sizeof(dest->addr), &dest->addr);
2912 NLA_PUT_U16(skb, IPVS_DEST_ATTR_PORT, dest->port);
2913
2914 NLA_PUT_U32(skb, IPVS_DEST_ATTR_FWD_METHOD,
2915 atomic_read(&dest->conn_flags) & IP_VS_CONN_F_FWD_MASK);
2916 NLA_PUT_U32(skb, IPVS_DEST_ATTR_WEIGHT, atomic_read(&dest->weight));
2917 NLA_PUT_U32(skb, IPVS_DEST_ATTR_U_THRESH, dest->u_threshold);
2918 NLA_PUT_U32(skb, IPVS_DEST_ATTR_L_THRESH, dest->l_threshold);
2919 NLA_PUT_U32(skb, IPVS_DEST_ATTR_ACTIVE_CONNS,
2920 atomic_read(&dest->activeconns));
2921 NLA_PUT_U32(skb, IPVS_DEST_ATTR_INACT_CONNS,
2922 atomic_read(&dest->inactconns));
2923 NLA_PUT_U32(skb, IPVS_DEST_ATTR_PERSIST_CONNS,
2924 atomic_read(&dest->persistconns));
2925
2926 if (ip_vs_genl_fill_stats(skb, IPVS_DEST_ATTR_STATS, &dest->stats))
2927 goto nla_put_failure;
2928
2929 nla_nest_end(skb, nl_dest);
2930
2931 return 0;
2932
2933nla_put_failure:
2934 nla_nest_cancel(skb, nl_dest);
2935 return -EMSGSIZE;
2936}
2937
2938static int ip_vs_genl_dump_dest(struct sk_buff *skb, struct ip_vs_dest *dest,
2939 struct netlink_callback *cb)
2940{
2941 void *hdr;
2942
2943 hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
2944 &ip_vs_genl_family, NLM_F_MULTI,
2945 IPVS_CMD_NEW_DEST);
2946 if (!hdr)
2947 return -EMSGSIZE;
2948
2949 if (ip_vs_genl_fill_dest(skb, dest) < 0)
2950 goto nla_put_failure;
2951
2952 return genlmsg_end(skb, hdr);
2953
2954nla_put_failure:
2955 genlmsg_cancel(skb, hdr);
2956 return -EMSGSIZE;
2957}
2958
2959static int ip_vs_genl_dump_dests(struct sk_buff *skb,
2960 struct netlink_callback *cb)
2961{
2962 int idx = 0;
2963 int start = cb->args[0];
2964 struct ip_vs_service *svc;
2965 struct ip_vs_dest *dest;
2966 struct nlattr *attrs[IPVS_CMD_ATTR_MAX + 1];
Hans Schillstroma0840e22011-01-03 14:44:58 +01002967 struct net *net = skb_sknet(skb);
Julius Volz9a812192008-08-14 14:08:44 +02002968
2969 mutex_lock(&__ip_vs_mutex);
2970
2971 /* Try to find the service for which to dump destinations */
2972 if (nlmsg_parse(cb->nlh, GENL_HDRLEN, attrs,
2973 IPVS_CMD_ATTR_MAX, ip_vs_cmd_policy))
2974 goto out_err;
2975
Hans Schillstroma0840e22011-01-03 14:44:58 +01002976
Hans Schillstromfc723252011-01-03 14:44:43 +01002977 svc = ip_vs_genl_find_service(net, attrs[IPVS_CMD_ATTR_SERVICE]);
Julius Volz9a812192008-08-14 14:08:44 +02002978 if (IS_ERR(svc) || svc == NULL)
2979 goto out_err;
2980
2981 /* Dump the destinations */
2982 list_for_each_entry(dest, &svc->destinations, n_list) {
2983 if (++idx <= start)
2984 continue;
2985 if (ip_vs_genl_dump_dest(skb, dest, cb) < 0) {
2986 idx--;
2987 goto nla_put_failure;
2988 }
2989 }
2990
2991nla_put_failure:
2992 cb->args[0] = idx;
Julius Volz9a812192008-08-14 14:08:44 +02002993
2994out_err:
2995 mutex_unlock(&__ip_vs_mutex);
2996
2997 return skb->len;
2998}
2999
Julius Volzc860c6b2008-09-02 15:55:36 +02003000static int ip_vs_genl_parse_dest(struct ip_vs_dest_user_kern *udest,
Julius Volz9a812192008-08-14 14:08:44 +02003001 struct nlattr *nla, int full_entry)
3002{
3003 struct nlattr *attrs[IPVS_DEST_ATTR_MAX + 1];
3004 struct nlattr *nla_addr, *nla_port;
3005
3006 /* Parse mandatory identifying destination fields first */
3007 if (nla == NULL ||
3008 nla_parse_nested(attrs, IPVS_DEST_ATTR_MAX, nla, ip_vs_dest_policy))
3009 return -EINVAL;
3010
3011 nla_addr = attrs[IPVS_DEST_ATTR_ADDR];
3012 nla_port = attrs[IPVS_DEST_ATTR_PORT];
3013
3014 if (!(nla_addr && nla_port))
3015 return -EINVAL;
3016
Simon Horman258c8892009-12-15 17:01:25 +01003017 memset(udest, 0, sizeof(*udest));
3018
Julius Volz9a812192008-08-14 14:08:44 +02003019 nla_memcpy(&udest->addr, nla_addr, sizeof(udest->addr));
3020 udest->port = nla_get_u16(nla_port);
3021
3022 /* If a full entry was requested, check for the additional fields */
3023 if (full_entry) {
3024 struct nlattr *nla_fwd, *nla_weight, *nla_u_thresh,
3025 *nla_l_thresh;
3026
3027 nla_fwd = attrs[IPVS_DEST_ATTR_FWD_METHOD];
3028 nla_weight = attrs[IPVS_DEST_ATTR_WEIGHT];
3029 nla_u_thresh = attrs[IPVS_DEST_ATTR_U_THRESH];
3030 nla_l_thresh = attrs[IPVS_DEST_ATTR_L_THRESH];
3031
3032 if (!(nla_fwd && nla_weight && nla_u_thresh && nla_l_thresh))
3033 return -EINVAL;
3034
3035 udest->conn_flags = nla_get_u32(nla_fwd)
3036 & IP_VS_CONN_F_FWD_MASK;
3037 udest->weight = nla_get_u32(nla_weight);
3038 udest->u_threshold = nla_get_u32(nla_u_thresh);
3039 udest->l_threshold = nla_get_u32(nla_l_thresh);
3040 }
3041
3042 return 0;
3043}
3044
3045static int ip_vs_genl_fill_daemon(struct sk_buff *skb, __be32 state,
3046 const char *mcast_ifn, __be32 syncid)
3047{
3048 struct nlattr *nl_daemon;
3049
3050 nl_daemon = nla_nest_start(skb, IPVS_CMD_ATTR_DAEMON);
3051 if (!nl_daemon)
3052 return -EMSGSIZE;
3053
3054 NLA_PUT_U32(skb, IPVS_DAEMON_ATTR_STATE, state);
3055 NLA_PUT_STRING(skb, IPVS_DAEMON_ATTR_MCAST_IFN, mcast_ifn);
3056 NLA_PUT_U32(skb, IPVS_DAEMON_ATTR_SYNC_ID, syncid);
3057
3058 nla_nest_end(skb, nl_daemon);
3059
3060 return 0;
3061
3062nla_put_failure:
3063 nla_nest_cancel(skb, nl_daemon);
3064 return -EMSGSIZE;
3065}
3066
3067static int ip_vs_genl_dump_daemon(struct sk_buff *skb, __be32 state,
3068 const char *mcast_ifn, __be32 syncid,
3069 struct netlink_callback *cb)
3070{
3071 void *hdr;
3072 hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
3073 &ip_vs_genl_family, NLM_F_MULTI,
3074 IPVS_CMD_NEW_DAEMON);
3075 if (!hdr)
3076 return -EMSGSIZE;
3077
3078 if (ip_vs_genl_fill_daemon(skb, state, mcast_ifn, syncid))
3079 goto nla_put_failure;
3080
3081 return genlmsg_end(skb, hdr);
3082
3083nla_put_failure:
3084 genlmsg_cancel(skb, hdr);
3085 return -EMSGSIZE;
3086}
3087
3088static int ip_vs_genl_dump_daemons(struct sk_buff *skb,
3089 struct netlink_callback *cb)
3090{
Hans Schillstromf1313152011-01-03 14:44:55 +01003091 struct net *net = skb_net(skb);
3092 struct netns_ipvs *ipvs = net_ipvs(net);
3093
Julius Volz9a812192008-08-14 14:08:44 +02003094 mutex_lock(&__ip_vs_mutex);
Hans Schillstromf1313152011-01-03 14:44:55 +01003095 if ((ipvs->sync_state & IP_VS_STATE_MASTER) && !cb->args[0]) {
Julius Volz9a812192008-08-14 14:08:44 +02003096 if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_MASTER,
Hans Schillstromf1313152011-01-03 14:44:55 +01003097 ipvs->master_mcast_ifn,
3098 ipvs->master_syncid, cb) < 0)
Julius Volz9a812192008-08-14 14:08:44 +02003099 goto nla_put_failure;
3100
3101 cb->args[0] = 1;
3102 }
3103
Hans Schillstromf1313152011-01-03 14:44:55 +01003104 if ((ipvs->sync_state & IP_VS_STATE_BACKUP) && !cb->args[1]) {
Julius Volz9a812192008-08-14 14:08:44 +02003105 if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_BACKUP,
Hans Schillstromf1313152011-01-03 14:44:55 +01003106 ipvs->backup_mcast_ifn,
3107 ipvs->backup_syncid, cb) < 0)
Julius Volz9a812192008-08-14 14:08:44 +02003108 goto nla_put_failure;
3109
3110 cb->args[1] = 1;
3111 }
3112
3113nla_put_failure:
3114 mutex_unlock(&__ip_vs_mutex);
3115
3116 return skb->len;
3117}
3118
Hans Schillstromf1313152011-01-03 14:44:55 +01003119static int ip_vs_genl_new_daemon(struct net *net, struct nlattr **attrs)
Julius Volz9a812192008-08-14 14:08:44 +02003120{
3121 if (!(attrs[IPVS_DAEMON_ATTR_STATE] &&
3122 attrs[IPVS_DAEMON_ATTR_MCAST_IFN] &&
3123 attrs[IPVS_DAEMON_ATTR_SYNC_ID]))
3124 return -EINVAL;
3125
Hans Schillstromf1313152011-01-03 14:44:55 +01003126 return start_sync_thread(net,
3127 nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]),
Julius Volz9a812192008-08-14 14:08:44 +02003128 nla_data(attrs[IPVS_DAEMON_ATTR_MCAST_IFN]),
3129 nla_get_u32(attrs[IPVS_DAEMON_ATTR_SYNC_ID]));
3130}
3131
Hans Schillstromf1313152011-01-03 14:44:55 +01003132static int ip_vs_genl_del_daemon(struct net *net, struct nlattr **attrs)
Julius Volz9a812192008-08-14 14:08:44 +02003133{
3134 if (!attrs[IPVS_DAEMON_ATTR_STATE])
3135 return -EINVAL;
3136
Hans Schillstromf1313152011-01-03 14:44:55 +01003137 return stop_sync_thread(net,
3138 nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]));
Julius Volz9a812192008-08-14 14:08:44 +02003139}
3140
Hans Schillstrom93304192011-01-03 14:44:51 +01003141static int ip_vs_genl_set_config(struct net *net, struct nlattr **attrs)
Julius Volz9a812192008-08-14 14:08:44 +02003142{
3143 struct ip_vs_timeout_user t;
3144
Hans Schillstrom93304192011-01-03 14:44:51 +01003145 __ip_vs_get_timeouts(net, &t);
Julius Volz9a812192008-08-14 14:08:44 +02003146
3147 if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP])
3148 t.tcp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP]);
3149
3150 if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN])
3151 t.tcp_fin_timeout =
3152 nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN]);
3153
3154 if (attrs[IPVS_CMD_ATTR_TIMEOUT_UDP])
3155 t.udp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_UDP]);
3156
Hans Schillstrom93304192011-01-03 14:44:51 +01003157 return ip_vs_set_timeout(net, &t);
Julius Volz9a812192008-08-14 14:08:44 +02003158}
3159
3160static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
3161{
3162 struct ip_vs_service *svc = NULL;
Julius Volzc860c6b2008-09-02 15:55:36 +02003163 struct ip_vs_service_user_kern usvc;
3164 struct ip_vs_dest_user_kern udest;
Julius Volz9a812192008-08-14 14:08:44 +02003165 int ret = 0, cmd;
3166 int need_full_svc = 0, need_full_dest = 0;
Hans Schillstromfc723252011-01-03 14:44:43 +01003167 struct net *net;
Hans Schillstroma0840e22011-01-03 14:44:58 +01003168 struct netns_ipvs *ipvs;
Julius Volz9a812192008-08-14 14:08:44 +02003169
Hans Schillstromfc723252011-01-03 14:44:43 +01003170 net = skb_sknet(skb);
Hans Schillstroma0840e22011-01-03 14:44:58 +01003171 ipvs = net_ipvs(net);
Julius Volz9a812192008-08-14 14:08:44 +02003172 cmd = info->genlhdr->cmd;
3173
3174 mutex_lock(&__ip_vs_mutex);
3175
3176 if (cmd == IPVS_CMD_FLUSH) {
Hans Schillstromfc723252011-01-03 14:44:43 +01003177 ret = ip_vs_flush(net);
Julius Volz9a812192008-08-14 14:08:44 +02003178 goto out;
3179 } else if (cmd == IPVS_CMD_SET_CONFIG) {
Hans Schillstrom93304192011-01-03 14:44:51 +01003180 ret = ip_vs_genl_set_config(net, info->attrs);
Julius Volz9a812192008-08-14 14:08:44 +02003181 goto out;
3182 } else if (cmd == IPVS_CMD_NEW_DAEMON ||
3183 cmd == IPVS_CMD_DEL_DAEMON) {
3184
3185 struct nlattr *daemon_attrs[IPVS_DAEMON_ATTR_MAX + 1];
3186
3187 if (!info->attrs[IPVS_CMD_ATTR_DAEMON] ||
3188 nla_parse_nested(daemon_attrs, IPVS_DAEMON_ATTR_MAX,
3189 info->attrs[IPVS_CMD_ATTR_DAEMON],
3190 ip_vs_daemon_policy)) {
3191 ret = -EINVAL;
3192 goto out;
3193 }
3194
3195 if (cmd == IPVS_CMD_NEW_DAEMON)
Hans Schillstromf1313152011-01-03 14:44:55 +01003196 ret = ip_vs_genl_new_daemon(net, daemon_attrs);
Julius Volz9a812192008-08-14 14:08:44 +02003197 else
Hans Schillstromf1313152011-01-03 14:44:55 +01003198 ret = ip_vs_genl_del_daemon(net, daemon_attrs);
Julius Volz9a812192008-08-14 14:08:44 +02003199 goto out;
3200 } else if (cmd == IPVS_CMD_ZERO &&
3201 !info->attrs[IPVS_CMD_ATTR_SERVICE]) {
Hans Schillstromfc723252011-01-03 14:44:43 +01003202 ret = ip_vs_zero_all(net);
Julius Volz9a812192008-08-14 14:08:44 +02003203 goto out;
3204 }
3205
3206 /* All following commands require a service argument, so check if we
3207 * received a valid one. We need a full service specification when
3208 * adding / editing a service. Only identifying members otherwise. */
3209 if (cmd == IPVS_CMD_NEW_SERVICE || cmd == IPVS_CMD_SET_SERVICE)
3210 need_full_svc = 1;
3211
Hans Schillstromfc723252011-01-03 14:44:43 +01003212 ret = ip_vs_genl_parse_service(net, &usvc,
Julius Volz9a812192008-08-14 14:08:44 +02003213 info->attrs[IPVS_CMD_ATTR_SERVICE],
Julian Anastasov26c15cf2010-09-21 18:12:30 +02003214 need_full_svc, &svc);
Julius Volz9a812192008-08-14 14:08:44 +02003215 if (ret)
3216 goto out;
3217
Julius Volz9a812192008-08-14 14:08:44 +02003218 /* Unless we're adding a new service, the service must already exist */
3219 if ((cmd != IPVS_CMD_NEW_SERVICE) && (svc == NULL)) {
3220 ret = -ESRCH;
3221 goto out;
3222 }
3223
3224 /* Destination commands require a valid destination argument. For
3225 * adding / editing a destination, we need a full destination
3226 * specification. */
3227 if (cmd == IPVS_CMD_NEW_DEST || cmd == IPVS_CMD_SET_DEST ||
3228 cmd == IPVS_CMD_DEL_DEST) {
3229 if (cmd != IPVS_CMD_DEL_DEST)
3230 need_full_dest = 1;
3231
3232 ret = ip_vs_genl_parse_dest(&udest,
3233 info->attrs[IPVS_CMD_ATTR_DEST],
3234 need_full_dest);
3235 if (ret)
3236 goto out;
3237 }
3238
3239 switch (cmd) {
3240 case IPVS_CMD_NEW_SERVICE:
3241 if (svc == NULL)
Hans Schillstromfc723252011-01-03 14:44:43 +01003242 ret = ip_vs_add_service(net, &usvc, &svc);
Julius Volz9a812192008-08-14 14:08:44 +02003243 else
3244 ret = -EEXIST;
3245 break;
3246 case IPVS_CMD_SET_SERVICE:
3247 ret = ip_vs_edit_service(svc, &usvc);
3248 break;
3249 case IPVS_CMD_DEL_SERVICE:
3250 ret = ip_vs_del_service(svc);
Julian Anastasov26c15cf2010-09-21 18:12:30 +02003251 /* do not use svc, it can be freed */
Julius Volz9a812192008-08-14 14:08:44 +02003252 break;
3253 case IPVS_CMD_NEW_DEST:
3254 ret = ip_vs_add_dest(svc, &udest);
3255 break;
3256 case IPVS_CMD_SET_DEST:
3257 ret = ip_vs_edit_dest(svc, &udest);
3258 break;
3259 case IPVS_CMD_DEL_DEST:
3260 ret = ip_vs_del_dest(svc, &udest);
3261 break;
3262 case IPVS_CMD_ZERO:
3263 ret = ip_vs_zero_service(svc);
3264 break;
3265 default:
3266 ret = -EINVAL;
3267 }
3268
3269out:
Julius Volz9a812192008-08-14 14:08:44 +02003270 mutex_unlock(&__ip_vs_mutex);
3271
3272 return ret;
3273}
3274
3275static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info)
3276{
3277 struct sk_buff *msg;
3278 void *reply;
3279 int ret, cmd, reply_cmd;
Hans Schillstromfc723252011-01-03 14:44:43 +01003280 struct net *net;
Hans Schillstroma0840e22011-01-03 14:44:58 +01003281 struct netns_ipvs *ipvs;
Julius Volz9a812192008-08-14 14:08:44 +02003282
Hans Schillstromfc723252011-01-03 14:44:43 +01003283 net = skb_sknet(skb);
Hans Schillstroma0840e22011-01-03 14:44:58 +01003284 ipvs = net_ipvs(net);
Julius Volz9a812192008-08-14 14:08:44 +02003285 cmd = info->genlhdr->cmd;
3286
3287 if (cmd == IPVS_CMD_GET_SERVICE)
3288 reply_cmd = IPVS_CMD_NEW_SERVICE;
3289 else if (cmd == IPVS_CMD_GET_INFO)
3290 reply_cmd = IPVS_CMD_SET_INFO;
3291 else if (cmd == IPVS_CMD_GET_CONFIG)
3292 reply_cmd = IPVS_CMD_SET_CONFIG;
3293 else {
Hannes Eder1e3e2382009-08-02 11:05:41 +00003294 pr_err("unknown Generic Netlink command\n");
Julius Volz9a812192008-08-14 14:08:44 +02003295 return -EINVAL;
3296 }
3297
3298 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
3299 if (!msg)
3300 return -ENOMEM;
3301
3302 mutex_lock(&__ip_vs_mutex);
3303
3304 reply = genlmsg_put_reply(msg, info, &ip_vs_genl_family, 0, reply_cmd);
3305 if (reply == NULL)
3306 goto nla_put_failure;
3307
3308 switch (cmd) {
3309 case IPVS_CMD_GET_SERVICE:
3310 {
3311 struct ip_vs_service *svc;
3312
Hans Schillstromfc723252011-01-03 14:44:43 +01003313 svc = ip_vs_genl_find_service(net,
3314 info->attrs[IPVS_CMD_ATTR_SERVICE]);
Julius Volz9a812192008-08-14 14:08:44 +02003315 if (IS_ERR(svc)) {
3316 ret = PTR_ERR(svc);
3317 goto out_err;
3318 } else if (svc) {
3319 ret = ip_vs_genl_fill_service(msg, svc);
Julius Volz9a812192008-08-14 14:08:44 +02003320 if (ret)
3321 goto nla_put_failure;
3322 } else {
3323 ret = -ESRCH;
3324 goto out_err;
3325 }
3326
3327 break;
3328 }
3329
3330 case IPVS_CMD_GET_CONFIG:
3331 {
3332 struct ip_vs_timeout_user t;
3333
Hans Schillstrom93304192011-01-03 14:44:51 +01003334 __ip_vs_get_timeouts(net, &t);
Julius Volz9a812192008-08-14 14:08:44 +02003335#ifdef CONFIG_IP_VS_PROTO_TCP
3336 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP, t.tcp_timeout);
3337 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP_FIN,
3338 t.tcp_fin_timeout);
3339#endif
3340#ifdef CONFIG_IP_VS_PROTO_UDP
3341 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_UDP, t.udp_timeout);
3342#endif
3343
3344 break;
3345 }
3346
3347 case IPVS_CMD_GET_INFO:
3348 NLA_PUT_U32(msg, IPVS_INFO_ATTR_VERSION, IP_VS_VERSION_CODE);
3349 NLA_PUT_U32(msg, IPVS_INFO_ATTR_CONN_TAB_SIZE,
Catalin(ux) M. BOIE6f7edb42010-01-05 05:50:24 +01003350 ip_vs_conn_tab_size);
Julius Volz9a812192008-08-14 14:08:44 +02003351 break;
3352 }
3353
3354 genlmsg_end(msg, reply);
Johannes Berg134e6372009-07-10 09:51:34 +00003355 ret = genlmsg_reply(msg, info);
Julius Volz9a812192008-08-14 14:08:44 +02003356 goto out;
3357
3358nla_put_failure:
Hannes Eder1e3e2382009-08-02 11:05:41 +00003359 pr_err("not enough space in Netlink message\n");
Julius Volz9a812192008-08-14 14:08:44 +02003360 ret = -EMSGSIZE;
3361
3362out_err:
3363 nlmsg_free(msg);
3364out:
3365 mutex_unlock(&__ip_vs_mutex);
3366
3367 return ret;
3368}
3369
3370
3371static struct genl_ops ip_vs_genl_ops[] __read_mostly = {
3372 {
3373 .cmd = IPVS_CMD_NEW_SERVICE,
3374 .flags = GENL_ADMIN_PERM,
3375 .policy = ip_vs_cmd_policy,
3376 .doit = ip_vs_genl_set_cmd,
3377 },
3378 {
3379 .cmd = IPVS_CMD_SET_SERVICE,
3380 .flags = GENL_ADMIN_PERM,
3381 .policy = ip_vs_cmd_policy,
3382 .doit = ip_vs_genl_set_cmd,
3383 },
3384 {
3385 .cmd = IPVS_CMD_DEL_SERVICE,
3386 .flags = GENL_ADMIN_PERM,
3387 .policy = ip_vs_cmd_policy,
3388 .doit = ip_vs_genl_set_cmd,
3389 },
3390 {
3391 .cmd = IPVS_CMD_GET_SERVICE,
3392 .flags = GENL_ADMIN_PERM,
3393 .doit = ip_vs_genl_get_cmd,
3394 .dumpit = ip_vs_genl_dump_services,
3395 .policy = ip_vs_cmd_policy,
3396 },
3397 {
3398 .cmd = IPVS_CMD_NEW_DEST,
3399 .flags = GENL_ADMIN_PERM,
3400 .policy = ip_vs_cmd_policy,
3401 .doit = ip_vs_genl_set_cmd,
3402 },
3403 {
3404 .cmd = IPVS_CMD_SET_DEST,
3405 .flags = GENL_ADMIN_PERM,
3406 .policy = ip_vs_cmd_policy,
3407 .doit = ip_vs_genl_set_cmd,
3408 },
3409 {
3410 .cmd = IPVS_CMD_DEL_DEST,
3411 .flags = GENL_ADMIN_PERM,
3412 .policy = ip_vs_cmd_policy,
3413 .doit = ip_vs_genl_set_cmd,
3414 },
3415 {
3416 .cmd = IPVS_CMD_GET_DEST,
3417 .flags = GENL_ADMIN_PERM,
3418 .policy = ip_vs_cmd_policy,
3419 .dumpit = ip_vs_genl_dump_dests,
3420 },
3421 {
3422 .cmd = IPVS_CMD_NEW_DAEMON,
3423 .flags = GENL_ADMIN_PERM,
3424 .policy = ip_vs_cmd_policy,
3425 .doit = ip_vs_genl_set_cmd,
3426 },
3427 {
3428 .cmd = IPVS_CMD_DEL_DAEMON,
3429 .flags = GENL_ADMIN_PERM,
3430 .policy = ip_vs_cmd_policy,
3431 .doit = ip_vs_genl_set_cmd,
3432 },
3433 {
3434 .cmd = IPVS_CMD_GET_DAEMON,
3435 .flags = GENL_ADMIN_PERM,
3436 .dumpit = ip_vs_genl_dump_daemons,
3437 },
3438 {
3439 .cmd = IPVS_CMD_SET_CONFIG,
3440 .flags = GENL_ADMIN_PERM,
3441 .policy = ip_vs_cmd_policy,
3442 .doit = ip_vs_genl_set_cmd,
3443 },
3444 {
3445 .cmd = IPVS_CMD_GET_CONFIG,
3446 .flags = GENL_ADMIN_PERM,
3447 .doit = ip_vs_genl_get_cmd,
3448 },
3449 {
3450 .cmd = IPVS_CMD_GET_INFO,
3451 .flags = GENL_ADMIN_PERM,
3452 .doit = ip_vs_genl_get_cmd,
3453 },
3454 {
3455 .cmd = IPVS_CMD_ZERO,
3456 .flags = GENL_ADMIN_PERM,
3457 .policy = ip_vs_cmd_policy,
3458 .doit = ip_vs_genl_set_cmd,
3459 },
3460 {
3461 .cmd = IPVS_CMD_FLUSH,
3462 .flags = GENL_ADMIN_PERM,
3463 .doit = ip_vs_genl_set_cmd,
3464 },
3465};
3466
3467static int __init ip_vs_genl_register(void)
3468{
Michał Mirosław8f698d52009-05-21 10:34:05 +00003469 return genl_register_family_with_ops(&ip_vs_genl_family,
3470 ip_vs_genl_ops, ARRAY_SIZE(ip_vs_genl_ops));
Julius Volz9a812192008-08-14 14:08:44 +02003471}
3472
3473static void ip_vs_genl_unregister(void)
3474{
3475 genl_unregister_family(&ip_vs_genl_family);
3476}
3477
3478/* End of Generic Netlink interface definitions */
3479
Hans Schillstrom61b1ab42011-01-03 14:44:42 +01003480/*
3481 * per netns intit/exit func.
3482 */
3483int __net_init __ip_vs_control_init(struct net *net)
3484{
Hans Schillstromfc723252011-01-03 14:44:43 +01003485 int idx;
3486 struct netns_ipvs *ipvs = net_ipvs(net);
Hans Schillstroma0840e22011-01-03 14:44:58 +01003487 struct ctl_table *tbl;
Hans Schillstromfc723252011-01-03 14:44:43 +01003488
Hans Schillstroma0840e22011-01-03 14:44:58 +01003489 atomic_set(&ipvs->dropentry, 0);
3490 spin_lock_init(&ipvs->dropentry_lock);
3491 spin_lock_init(&ipvs->droppacket_lock);
3492 spin_lock_init(&ipvs->securetcp_lock);
3493 ipvs->rs_lock = __RW_LOCK_UNLOCKED(ipvs->rs_lock);
3494
3495 /* Initialize rs_table */
3496 for (idx = 0; idx < IP_VS_RTAB_SIZE; idx++)
3497 INIT_LIST_HEAD(&ipvs->rs_table[idx]);
3498
Hans Schillstromf2431e62011-01-03 14:45:00 +01003499 INIT_LIST_HEAD(&ipvs->dest_trash);
Hans Schillstrom763f8d02011-01-03 14:45:01 +01003500 atomic_set(&ipvs->ftpsvc_counter, 0);
3501 atomic_set(&ipvs->nullsvc_counter, 0);
Hans Schillstromf2431e62011-01-03 14:45:00 +01003502
Hans Schillstromb17fc992011-01-03 14:44:56 +01003503 /* procfs stats */
3504 ipvs->tot_stats = kzalloc(sizeof(struct ip_vs_stats), GFP_KERNEL);
3505 if (ipvs->tot_stats == NULL) {
3506 pr_err("%s(): no memory.\n", __func__);
3507 return -ENOMEM;
3508 }
3509 ipvs->cpustats = alloc_percpu(struct ip_vs_cpu_stats);
3510 if (!ipvs->cpustats) {
3511 pr_err("%s() alloc_percpu failed\n", __func__);
3512 goto err_alloc;
3513 }
3514 spin_lock_init(&ipvs->tot_stats->lock);
Hans Schillstrom61b1ab42011-01-03 14:44:42 +01003515
3516 proc_net_fops_create(net, "ip_vs", 0, &ip_vs_info_fops);
3517 proc_net_fops_create(net, "ip_vs_stats", 0, &ip_vs_stats_fops);
Hans Schillstromb17fc992011-01-03 14:44:56 +01003518 proc_net_fops_create(net, "ip_vs_stats_percpu", 0,
3519 &ip_vs_stats_percpu_fops);
Hans Schillstroma0840e22011-01-03 14:44:58 +01003520
3521 if (!net_eq(net, &init_net)) {
3522 tbl = kmemdup(vs_vars, sizeof(vs_vars), GFP_KERNEL);
3523 if (tbl == NULL)
3524 goto err_dup;
3525 } else
3526 tbl = vs_vars;
3527 /* Initialize sysctl defaults */
3528 idx = 0;
3529 ipvs->sysctl_amemthresh = 1024;
3530 tbl[idx++].data = &ipvs->sysctl_amemthresh;
3531 ipvs->sysctl_am_droprate = 10;
3532 tbl[idx++].data = &ipvs->sysctl_am_droprate;
3533 tbl[idx++].data = &ipvs->sysctl_drop_entry;
3534 tbl[idx++].data = &ipvs->sysctl_drop_packet;
3535#ifdef CONFIG_IP_VS_NFCT
3536 tbl[idx++].data = &ipvs->sysctl_conntrack;
3537#endif
3538 tbl[idx++].data = &ipvs->sysctl_secure_tcp;
3539 ipvs->sysctl_snat_reroute = 1;
3540 tbl[idx++].data = &ipvs->sysctl_snat_reroute;
3541 ipvs->sysctl_sync_ver = 1;
3542 tbl[idx++].data = &ipvs->sysctl_sync_ver;
3543 tbl[idx++].data = &ipvs->sysctl_cache_bypass;
3544 tbl[idx++].data = &ipvs->sysctl_expire_nodest_conn;
3545 tbl[idx++].data = &ipvs->sysctl_expire_quiescent_template;
3546 ipvs->sysctl_sync_threshold[0] = 3;
3547 ipvs->sysctl_sync_threshold[1] = 50;
3548 tbl[idx].data = &ipvs->sysctl_sync_threshold;
3549 tbl[idx++].maxlen = sizeof(ipvs->sysctl_sync_threshold);
3550 tbl[idx++].data = &ipvs->sysctl_nat_icmp_send;
3551
3552
Simon Horman04439292011-02-01 18:29:04 +01003553#ifdef CONFIG_SYSCTL
Hans Schillstroma0840e22011-01-03 14:44:58 +01003554 ipvs->sysctl_hdr = register_net_sysctl_table(net, net_vs_ctl_path,
Hans Schillstrom07924702011-01-24 15:14:41 +01003555 tbl);
Simon Horman04439292011-02-01 18:29:04 +01003556 if (ipvs->sysctl_hdr == NULL) {
3557 if (!net_eq(net, &init_net))
3558 kfree(tbl);
3559 goto err_dup;
3560 }
3561#endif
Hans Schillstromb17fc992011-01-03 14:44:56 +01003562 ip_vs_new_estimator(net, ipvs->tot_stats);
Hans Schillstroma0840e22011-01-03 14:44:58 +01003563 ipvs->sysctl_tbl = tbl;
Hans Schillstromf6340ee2011-01-03 14:44:59 +01003564 /* Schedule defense work */
3565 INIT_DELAYED_WORK(&ipvs->defense_work, defense_work_handler);
3566 schedule_delayed_work(&ipvs->defense_work, DEFENSE_TIMER_PERIOD);
Hans Schillstrom61b1ab42011-01-03 14:44:42 +01003567 return 0;
3568
Hans Schillstroma0840e22011-01-03 14:44:58 +01003569err_dup:
Hans Schillstromb17fc992011-01-03 14:44:56 +01003570 free_percpu(ipvs->cpustats);
3571err_alloc:
3572 kfree(ipvs->tot_stats);
Hans Schillstrom61b1ab42011-01-03 14:44:42 +01003573 return -ENOMEM;
3574}
3575
3576static void __net_exit __ip_vs_control_cleanup(struct net *net)
3577{
Hans Schillstromb17fc992011-01-03 14:44:56 +01003578 struct netns_ipvs *ipvs = net_ipvs(net);
3579
Hans Schillstromf2431e62011-01-03 14:45:00 +01003580 ip_vs_trash_cleanup(net);
Hans Schillstromb17fc992011-01-03 14:44:56 +01003581 ip_vs_kill_estimator(net, ipvs->tot_stats);
Hans Schillstromf2431e62011-01-03 14:45:00 +01003582 cancel_delayed_work_sync(&ipvs->defense_work);
3583 cancel_work_sync(&ipvs->defense_work.work);
Simon Horman04439292011-02-01 18:29:04 +01003584#ifdef CONFIG_SYSCTL
Hans Schillstroma0840e22011-01-03 14:44:58 +01003585 unregister_net_sysctl_table(ipvs->sysctl_hdr);
Simon Horman04439292011-02-01 18:29:04 +01003586#endif
Hans Schillstromb17fc992011-01-03 14:44:56 +01003587 proc_net_remove(net, "ip_vs_stats_percpu");
Hans Schillstrom61b1ab42011-01-03 14:44:42 +01003588 proc_net_remove(net, "ip_vs_stats");
3589 proc_net_remove(net, "ip_vs");
Hans Schillstromb17fc992011-01-03 14:44:56 +01003590 free_percpu(ipvs->cpustats);
3591 kfree(ipvs->tot_stats);
Hans Schillstrom61b1ab42011-01-03 14:44:42 +01003592}
3593
3594static struct pernet_operations ipvs_control_ops = {
3595 .init = __ip_vs_control_init,
3596 .exit = __ip_vs_control_cleanup,
3597};
Linus Torvalds1da177e2005-04-16 15:20:36 -07003598
Sven Wegener048cf482008-08-10 18:24:35 +00003599int __init ip_vs_control_init(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003600{
Linus Torvalds1da177e2005-04-16 15:20:36 -07003601 int idx;
Hans Schillstromfc723252011-01-03 14:44:43 +01003602 int ret;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003603
3604 EnterFunction(2);
3605
Hans Schillstromfc723252011-01-03 14:44:43 +01003606 /* Initialize svc_table, ip_vs_svc_fwm_table, rs_table */
Eduardo Blancod86bef72010-10-19 10:26:47 +01003607 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
3608 INIT_LIST_HEAD(&ip_vs_svc_table[idx]);
3609 INIT_LIST_HEAD(&ip_vs_svc_fwm_table[idx]);
3610 }
Hans Schillstromfc723252011-01-03 14:44:43 +01003611
3612 ret = register_pernet_subsys(&ipvs_control_ops);
3613 if (ret) {
3614 pr_err("cannot register namespace.\n");
3615 goto err;
Eduardo Blancod86bef72010-10-19 10:26:47 +01003616 }
Hans Schillstromfc723252011-01-03 14:44:43 +01003617
3618 smp_wmb(); /* Do we really need it now ? */
Eduardo Blancod86bef72010-10-19 10:26:47 +01003619
Linus Torvalds1da177e2005-04-16 15:20:36 -07003620 ret = nf_register_sockopt(&ip_vs_sockopts);
3621 if (ret) {
Hannes Eder1e3e2382009-08-02 11:05:41 +00003622 pr_err("cannot register sockopt.\n");
Hans Schillstromfc723252011-01-03 14:44:43 +01003623 goto err_net;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003624 }
3625
Julius Volz9a812192008-08-14 14:08:44 +02003626 ret = ip_vs_genl_register();
3627 if (ret) {
Hannes Eder1e3e2382009-08-02 11:05:41 +00003628 pr_err("cannot register Generic Netlink interface.\n");
Julius Volz9a812192008-08-14 14:08:44 +02003629 nf_unregister_sockopt(&ip_vs_sockopts);
Hans Schillstromfc723252011-01-03 14:44:43 +01003630 goto err_net;
Julius Volz9a812192008-08-14 14:08:44 +02003631 }
3632
Linus Torvalds1da177e2005-04-16 15:20:36 -07003633 LeaveFunction(2);
3634 return 0;
Hans Schillstromfc723252011-01-03 14:44:43 +01003635
3636err_net:
3637 unregister_pernet_subsys(&ipvs_control_ops);
3638err:
3639 return ret;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003640}
3641
3642
3643void ip_vs_control_cleanup(void)
3644{
3645 EnterFunction(2);
Hans Schillstrom61b1ab42011-01-03 14:44:42 +01003646 unregister_pernet_subsys(&ipvs_control_ops);
Julius Volz9a812192008-08-14 14:08:44 +02003647 ip_vs_genl_unregister();
Linus Torvalds1da177e2005-04-16 15:20:36 -07003648 nf_unregister_sockopt(&ip_vs_sockopts);
3649 LeaveFunction(2);
3650}