blob: 6a963d44df48ebc5ddda19aa40007495d872b7ab [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * IPVS An implementation of the IP virtual server support for the
3 * LINUX operating system. IPVS is now implemented as a module
4 * over the NetFilter framework. IPVS can be used to build a
5 * high-performance and highly available server based on a
6 * cluster of servers.
7 *
Linus Torvalds1da177e2005-04-16 15:20:36 -07008 * Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
9 * Peter Kese <peter.kese@ijs.si>
10 * Julian Anastasov <ja@ssi.bg>
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version
15 * 2 of the License, or (at your option) any later version.
16 *
17 * Changes:
18 *
19 */
20
Hannes Eder9aada7a2009-07-30 14:29:44 -070021#define KMSG_COMPONENT "IPVS"
22#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
23
Linus Torvalds1da177e2005-04-16 15:20:36 -070024#include <linux/module.h>
25#include <linux/init.h>
26#include <linux/types.h>
Randy Dunlap4fc268d2006-01-11 12:17:47 -080027#include <linux/capability.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070028#include <linux/fs.h>
29#include <linux/sysctl.h>
30#include <linux/proc_fs.h>
31#include <linux/workqueue.h>
32#include <linux/swap.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070033#include <linux/seq_file.h>
Tejun Heo5a0e3ad2010-03-24 17:04:11 +090034#include <linux/slab.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070035
36#include <linux/netfilter.h>
37#include <linux/netfilter_ipv4.h>
Ingo Molnar14cc3e22006-03-26 01:37:14 -080038#include <linux/mutex.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070039
Eric W. Biederman457c4cb2007-09-12 12:01:34 +020040#include <net/net_namespace.h>
Hans Schillstrom93304192011-01-03 14:44:51 +010041#include <linux/nsproxy.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070042#include <net/ip.h>
Vince Busam09571c72008-09-02 15:55:52 +020043#ifdef CONFIG_IP_VS_IPV6
44#include <net/ipv6.h>
45#include <net/ip6_route.h>
46#endif
Arnaldo Carvalho de Melo14c85022005-12-27 02:43:12 -020047#include <net/route.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070048#include <net/sock.h>
Julius Volz9a812192008-08-14 14:08:44 +020049#include <net/genetlink.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070050
51#include <asm/uaccess.h>
52
53#include <net/ip_vs.h>
54
55/* semaphore for IPVS sockopts. And, [gs]etsockopt may sleep. */
Ingo Molnar14cc3e22006-03-26 01:37:14 -080056static DEFINE_MUTEX(__ip_vs_mutex);
Linus Torvalds1da177e2005-04-16 15:20:36 -070057
58/* lock for service table */
59static DEFINE_RWLOCK(__ip_vs_svc_lock);
60
Linus Torvalds1da177e2005-04-16 15:20:36 -070061/* sysctl variables */
Linus Torvalds1da177e2005-04-16 15:20:36 -070062
63#ifdef CONFIG_IP_VS_DEBUG
64static int sysctl_ip_vs_debug_level = 0;
65
66int ip_vs_get_debug_level(void)
67{
68 return sysctl_ip_vs_debug_level;
69}
70#endif
71
Vince Busam09571c72008-09-02 15:55:52 +020072#ifdef CONFIG_IP_VS_IPV6
73/* Taken from rt6_fill_node() in net/ipv6/route.c, is there a better way? */
74static int __ip_vs_addr_is_local_v6(const struct in6_addr *addr)
75{
76 struct rt6_info *rt;
77 struct flowi fl = {
78 .oif = 0,
Changli Gao58116622010-11-12 18:43:55 +000079 .fl6_dst = *addr,
80 .fl6_src = { .s6_addr32 = {0, 0, 0, 0} },
Vince Busam09571c72008-09-02 15:55:52 +020081 };
82
83 rt = (struct rt6_info *)ip6_route_output(&init_net, NULL, &fl);
84 if (rt && rt->rt6i_dev && (rt->rt6i_dev->flags & IFF_LOOPBACK))
85 return 1;
86
87 return 0;
88}
89#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -070090/*
Julian Anastasovaf9debd2005-07-11 20:59:57 -070091 * update_defense_level is called from keventd and from sysctl,
92 * so it needs to protect itself from softirqs
Linus Torvalds1da177e2005-04-16 15:20:36 -070093 */
Hans Schillstrom93304192011-01-03 14:44:51 +010094static void update_defense_level(struct netns_ipvs *ipvs)
Linus Torvalds1da177e2005-04-16 15:20:36 -070095{
96 struct sysinfo i;
97 static int old_secure_tcp = 0;
98 int availmem;
99 int nomem;
100 int to_change = -1;
101
102 /* we only count free and buffered memory (in pages) */
103 si_meminfo(&i);
104 availmem = i.freeram + i.bufferram;
105 /* however in linux 2.5 the i.bufferram is total page cache size,
106 we need adjust it */
107 /* si_swapinfo(&i); */
108 /* availmem = availmem - (i.totalswap - i.freeswap); */
109
Hans Schillstroma0840e22011-01-03 14:44:58 +0100110 nomem = (availmem < ipvs->sysctl_amemthresh);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700111
Julian Anastasovaf9debd2005-07-11 20:59:57 -0700112 local_bh_disable();
113
Linus Torvalds1da177e2005-04-16 15:20:36 -0700114 /* drop_entry */
Hans Schillstroma0840e22011-01-03 14:44:58 +0100115 spin_lock(&ipvs->dropentry_lock);
116 switch (ipvs->sysctl_drop_entry) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700117 case 0:
Hans Schillstroma0840e22011-01-03 14:44:58 +0100118 atomic_set(&ipvs->dropentry, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700119 break;
120 case 1:
121 if (nomem) {
Hans Schillstroma0840e22011-01-03 14:44:58 +0100122 atomic_set(&ipvs->dropentry, 1);
123 ipvs->sysctl_drop_entry = 2;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700124 } else {
Hans Schillstroma0840e22011-01-03 14:44:58 +0100125 atomic_set(&ipvs->dropentry, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700126 }
127 break;
128 case 2:
129 if (nomem) {
Hans Schillstroma0840e22011-01-03 14:44:58 +0100130 atomic_set(&ipvs->dropentry, 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700131 } else {
Hans Schillstroma0840e22011-01-03 14:44:58 +0100132 atomic_set(&ipvs->dropentry, 0);
133 ipvs->sysctl_drop_entry = 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700134 };
135 break;
136 case 3:
Hans Schillstroma0840e22011-01-03 14:44:58 +0100137 atomic_set(&ipvs->dropentry, 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700138 break;
139 }
Hans Schillstroma0840e22011-01-03 14:44:58 +0100140 spin_unlock(&ipvs->dropentry_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700141
142 /* drop_packet */
Hans Schillstroma0840e22011-01-03 14:44:58 +0100143 spin_lock(&ipvs->droppacket_lock);
144 switch (ipvs->sysctl_drop_packet) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700145 case 0:
Hans Schillstroma0840e22011-01-03 14:44:58 +0100146 ipvs->drop_rate = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700147 break;
148 case 1:
149 if (nomem) {
Hans Schillstroma0840e22011-01-03 14:44:58 +0100150 ipvs->drop_rate = ipvs->drop_counter
151 = ipvs->sysctl_amemthresh /
152 (ipvs->sysctl_amemthresh-availmem);
153 ipvs->sysctl_drop_packet = 2;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700154 } else {
Hans Schillstroma0840e22011-01-03 14:44:58 +0100155 ipvs->drop_rate = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700156 }
157 break;
158 case 2:
159 if (nomem) {
Hans Schillstroma0840e22011-01-03 14:44:58 +0100160 ipvs->drop_rate = ipvs->drop_counter
161 = ipvs->sysctl_amemthresh /
162 (ipvs->sysctl_amemthresh-availmem);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700163 } else {
Hans Schillstroma0840e22011-01-03 14:44:58 +0100164 ipvs->drop_rate = 0;
165 ipvs->sysctl_drop_packet = 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700166 }
167 break;
168 case 3:
Hans Schillstroma0840e22011-01-03 14:44:58 +0100169 ipvs->drop_rate = ipvs->sysctl_am_droprate;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700170 break;
171 }
Hans Schillstroma0840e22011-01-03 14:44:58 +0100172 spin_unlock(&ipvs->droppacket_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700173
174 /* secure_tcp */
Hans Schillstroma0840e22011-01-03 14:44:58 +0100175 spin_lock(&ipvs->securetcp_lock);
176 switch (ipvs->sysctl_secure_tcp) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700177 case 0:
178 if (old_secure_tcp >= 2)
179 to_change = 0;
180 break;
181 case 1:
182 if (nomem) {
183 if (old_secure_tcp < 2)
184 to_change = 1;
Hans Schillstroma0840e22011-01-03 14:44:58 +0100185 ipvs->sysctl_secure_tcp = 2;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700186 } else {
187 if (old_secure_tcp >= 2)
188 to_change = 0;
189 }
190 break;
191 case 2:
192 if (nomem) {
193 if (old_secure_tcp < 2)
194 to_change = 1;
195 } else {
196 if (old_secure_tcp >= 2)
197 to_change = 0;
Hans Schillstroma0840e22011-01-03 14:44:58 +0100198 ipvs->sysctl_secure_tcp = 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700199 }
200 break;
201 case 3:
202 if (old_secure_tcp < 2)
203 to_change = 1;
204 break;
205 }
Hans Schillstroma0840e22011-01-03 14:44:58 +0100206 old_secure_tcp = ipvs->sysctl_secure_tcp;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700207 if (to_change >= 0)
Hans Schillstrom93304192011-01-03 14:44:51 +0100208 ip_vs_protocol_timeout_change(ipvs,
Hans Schillstroma0840e22011-01-03 14:44:58 +0100209 ipvs->sysctl_secure_tcp > 1);
210 spin_unlock(&ipvs->securetcp_lock);
Julian Anastasovaf9debd2005-07-11 20:59:57 -0700211
212 local_bh_enable();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700213}
214
215
216/*
217 * Timer for checking the defense
218 */
219#define DEFENSE_TIMER_PERIOD 1*HZ
Linus Torvalds1da177e2005-04-16 15:20:36 -0700220
David Howellsc4028952006-11-22 14:57:56 +0000221static void defense_work_handler(struct work_struct *work)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700222{
Hans Schillstromf6340ee2011-01-03 14:44:59 +0100223 struct netns_ipvs *ipvs =
224 container_of(work, struct netns_ipvs, defense_work.work);
Hans Schillstrom93304192011-01-03 14:44:51 +0100225
226 update_defense_level(ipvs);
Hans Schillstroma0840e22011-01-03 14:44:58 +0100227 if (atomic_read(&ipvs->dropentry))
Hans Schillstromf6340ee2011-01-03 14:44:59 +0100228 ip_vs_random_dropentry(ipvs->net);
229 schedule_delayed_work(&ipvs->defense_work, DEFENSE_TIMER_PERIOD);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700230}
231
232int
233ip_vs_use_count_inc(void)
234{
235 return try_module_get(THIS_MODULE);
236}
237
238void
239ip_vs_use_count_dec(void)
240{
241 module_put(THIS_MODULE);
242}
243
244
245/*
246 * Hash table: for virtual service lookups
247 */
248#define IP_VS_SVC_TAB_BITS 8
249#define IP_VS_SVC_TAB_SIZE (1 << IP_VS_SVC_TAB_BITS)
250#define IP_VS_SVC_TAB_MASK (IP_VS_SVC_TAB_SIZE - 1)
251
252/* the service table hashed by <protocol, addr, port> */
253static struct list_head ip_vs_svc_table[IP_VS_SVC_TAB_SIZE];
254/* the service table hashed by fwmark */
255static struct list_head ip_vs_svc_fwm_table[IP_VS_SVC_TAB_SIZE];
256
257/*
Linus Torvalds1da177e2005-04-16 15:20:36 -0700258 * Trash for destinations
259 */
260static LIST_HEAD(ip_vs_dest_trash);
261
262/*
263 * FTP & NULL virtual service counters
264 */
265static atomic_t ip_vs_ftpsvc_counter = ATOMIC_INIT(0);
266static atomic_t ip_vs_nullsvc_counter = ATOMIC_INIT(0);
267
268
269/*
270 * Returns hash value for virtual service
271 */
Hans Schillstromfc723252011-01-03 14:44:43 +0100272static inline unsigned
273ip_vs_svc_hashkey(struct net *net, int af, unsigned proto,
274 const union nf_inet_addr *addr, __be16 port)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700275{
276 register unsigned porth = ntohs(port);
Julius Volzb18610d2008-09-02 15:55:37 +0200277 __be32 addr_fold = addr->ip;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700278
Julius Volzb18610d2008-09-02 15:55:37 +0200279#ifdef CONFIG_IP_VS_IPV6
280 if (af == AF_INET6)
281 addr_fold = addr->ip6[0]^addr->ip6[1]^
282 addr->ip6[2]^addr->ip6[3];
283#endif
Hans Schillstromfc723252011-01-03 14:44:43 +0100284 addr_fold ^= ((size_t)net>>8);
Julius Volzb18610d2008-09-02 15:55:37 +0200285
286 return (proto^ntohl(addr_fold)^(porth>>IP_VS_SVC_TAB_BITS)^porth)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700287 & IP_VS_SVC_TAB_MASK;
288}
289
290/*
291 * Returns hash value of fwmark for virtual service lookup
292 */
Hans Schillstromfc723252011-01-03 14:44:43 +0100293static inline unsigned ip_vs_svc_fwm_hashkey(struct net *net, __u32 fwmark)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700294{
Hans Schillstromfc723252011-01-03 14:44:43 +0100295 return (((size_t)net>>8) ^ fwmark) & IP_VS_SVC_TAB_MASK;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700296}
297
298/*
Hans Schillstromfc723252011-01-03 14:44:43 +0100299 * Hashes a service in the ip_vs_svc_table by <netns,proto,addr,port>
Linus Torvalds1da177e2005-04-16 15:20:36 -0700300 * or in the ip_vs_svc_fwm_table by fwmark.
301 * Should be called with locked tables.
302 */
303static int ip_vs_svc_hash(struct ip_vs_service *svc)
304{
305 unsigned hash;
306
307 if (svc->flags & IP_VS_SVC_F_HASHED) {
Hannes Eder1e3e2382009-08-02 11:05:41 +0000308 pr_err("%s(): request for already hashed, called from %pF\n",
309 __func__, __builtin_return_address(0));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700310 return 0;
311 }
312
313 if (svc->fwmark == 0) {
314 /*
Hans Schillstromfc723252011-01-03 14:44:43 +0100315 * Hash it by <netns,protocol,addr,port> in ip_vs_svc_table
Linus Torvalds1da177e2005-04-16 15:20:36 -0700316 */
Hans Schillstromfc723252011-01-03 14:44:43 +0100317 hash = ip_vs_svc_hashkey(svc->net, svc->af, svc->protocol,
318 &svc->addr, svc->port);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700319 list_add(&svc->s_list, &ip_vs_svc_table[hash]);
320 } else {
321 /*
Hans Schillstromfc723252011-01-03 14:44:43 +0100322 * Hash it by fwmark in svc_fwm_table
Linus Torvalds1da177e2005-04-16 15:20:36 -0700323 */
Hans Schillstromfc723252011-01-03 14:44:43 +0100324 hash = ip_vs_svc_fwm_hashkey(svc->net, svc->fwmark);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700325 list_add(&svc->f_list, &ip_vs_svc_fwm_table[hash]);
326 }
327
328 svc->flags |= IP_VS_SVC_F_HASHED;
329 /* increase its refcnt because it is referenced by the svc table */
330 atomic_inc(&svc->refcnt);
331 return 1;
332}
333
334
335/*
Hans Schillstromfc723252011-01-03 14:44:43 +0100336 * Unhashes a service from svc_table / svc_fwm_table.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700337 * Should be called with locked tables.
338 */
339static int ip_vs_svc_unhash(struct ip_vs_service *svc)
340{
341 if (!(svc->flags & IP_VS_SVC_F_HASHED)) {
Hannes Eder1e3e2382009-08-02 11:05:41 +0000342 pr_err("%s(): request for unhash flagged, called from %pF\n",
343 __func__, __builtin_return_address(0));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700344 return 0;
345 }
346
347 if (svc->fwmark == 0) {
Hans Schillstromfc723252011-01-03 14:44:43 +0100348 /* Remove it from the svc_table table */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700349 list_del(&svc->s_list);
350 } else {
Hans Schillstromfc723252011-01-03 14:44:43 +0100351 /* Remove it from the svc_fwm_table table */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700352 list_del(&svc->f_list);
353 }
354
355 svc->flags &= ~IP_VS_SVC_F_HASHED;
356 atomic_dec(&svc->refcnt);
357 return 1;
358}
359
360
361/*
Hans Schillstromfc723252011-01-03 14:44:43 +0100362 * Get service by {netns, proto,addr,port} in the service table.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700363 */
Julius Volzb18610d2008-09-02 15:55:37 +0200364static inline struct ip_vs_service *
Hans Schillstromfc723252011-01-03 14:44:43 +0100365__ip_vs_service_find(struct net *net, int af, __u16 protocol,
366 const union nf_inet_addr *vaddr, __be16 vport)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700367{
368 unsigned hash;
369 struct ip_vs_service *svc;
370
371 /* Check for "full" addressed entries */
Hans Schillstromfc723252011-01-03 14:44:43 +0100372 hash = ip_vs_svc_hashkey(net, af, protocol, vaddr, vport);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700373
374 list_for_each_entry(svc, &ip_vs_svc_table[hash], s_list){
Julius Volzb18610d2008-09-02 15:55:37 +0200375 if ((svc->af == af)
376 && ip_vs_addr_equal(af, &svc->addr, vaddr)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700377 && (svc->port == vport)
Hans Schillstromfc723252011-01-03 14:44:43 +0100378 && (svc->protocol == protocol)
379 && net_eq(svc->net, net)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700380 /* HIT */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700381 return svc;
382 }
383 }
384
385 return NULL;
386}
387
388
389/*
390 * Get service by {fwmark} in the service table.
391 */
Julius Volzb18610d2008-09-02 15:55:37 +0200392static inline struct ip_vs_service *
Hans Schillstromfc723252011-01-03 14:44:43 +0100393__ip_vs_svc_fwm_find(struct net *net, int af, __u32 fwmark)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700394{
395 unsigned hash;
396 struct ip_vs_service *svc;
397
398 /* Check for fwmark addressed entries */
Hans Schillstromfc723252011-01-03 14:44:43 +0100399 hash = ip_vs_svc_fwm_hashkey(net, fwmark);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700400
401 list_for_each_entry(svc, &ip_vs_svc_fwm_table[hash], f_list) {
Hans Schillstromfc723252011-01-03 14:44:43 +0100402 if (svc->fwmark == fwmark && svc->af == af
403 && net_eq(svc->net, net)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700404 /* HIT */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700405 return svc;
406 }
407 }
408
409 return NULL;
410}
411
412struct ip_vs_service *
Hans Schillstromfc723252011-01-03 14:44:43 +0100413ip_vs_service_get(struct net *net, int af, __u32 fwmark, __u16 protocol,
Julius Volz3c2e0502008-09-02 15:55:38 +0200414 const union nf_inet_addr *vaddr, __be16 vport)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700415{
416 struct ip_vs_service *svc;
Julius Volz3c2e0502008-09-02 15:55:38 +0200417
Linus Torvalds1da177e2005-04-16 15:20:36 -0700418 read_lock(&__ip_vs_svc_lock);
419
420 /*
421 * Check the table hashed by fwmark first
422 */
Hans Schillstromfc723252011-01-03 14:44:43 +0100423 svc = __ip_vs_svc_fwm_find(net, af, fwmark);
424 if (fwmark && svc)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700425 goto out;
426
427 /*
428 * Check the table hashed by <protocol,addr,port>
429 * for "full" addressed entries
430 */
Hans Schillstromfc723252011-01-03 14:44:43 +0100431 svc = __ip_vs_service_find(net, af, protocol, vaddr, vport);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700432
433 if (svc == NULL
434 && protocol == IPPROTO_TCP
435 && atomic_read(&ip_vs_ftpsvc_counter)
436 && (vport == FTPDATA || ntohs(vport) >= PROT_SOCK)) {
437 /*
438 * Check if ftp service entry exists, the packet
439 * might belong to FTP data connections.
440 */
Hans Schillstromfc723252011-01-03 14:44:43 +0100441 svc = __ip_vs_service_find(net, af, protocol, vaddr, FTPPORT);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700442 }
443
444 if (svc == NULL
445 && atomic_read(&ip_vs_nullsvc_counter)) {
446 /*
447 * Check if the catch-all port (port zero) exists
448 */
Hans Schillstromfc723252011-01-03 14:44:43 +0100449 svc = __ip_vs_service_find(net, af, protocol, vaddr, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700450 }
451
452 out:
Julian Anastasov26c15cf2010-09-21 18:12:30 +0200453 if (svc)
454 atomic_inc(&svc->usecnt);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700455 read_unlock(&__ip_vs_svc_lock);
456
Julius Volz3c2e0502008-09-02 15:55:38 +0200457 IP_VS_DBG_BUF(9, "lookup service: fwm %u %s %s:%u %s\n",
458 fwmark, ip_vs_proto_name(protocol),
459 IP_VS_DBG_ADDR(af, vaddr), ntohs(vport),
460 svc ? "hit" : "not hit");
Linus Torvalds1da177e2005-04-16 15:20:36 -0700461
462 return svc;
463}
464
465
466static inline void
467__ip_vs_bind_svc(struct ip_vs_dest *dest, struct ip_vs_service *svc)
468{
469 atomic_inc(&svc->refcnt);
470 dest->svc = svc;
471}
472
Julian Anastasov26c15cf2010-09-21 18:12:30 +0200473static void
Linus Torvalds1da177e2005-04-16 15:20:36 -0700474__ip_vs_unbind_svc(struct ip_vs_dest *dest)
475{
476 struct ip_vs_service *svc = dest->svc;
477
478 dest->svc = NULL;
Julian Anastasov26c15cf2010-09-21 18:12:30 +0200479 if (atomic_dec_and_test(&svc->refcnt)) {
480 IP_VS_DBG_BUF(3, "Removing service %u/%s:%u usecnt=%d\n",
481 svc->fwmark,
482 IP_VS_DBG_ADDR(svc->af, &svc->addr),
483 ntohs(svc->port), atomic_read(&svc->usecnt));
Hans Schillstromb17fc992011-01-03 14:44:56 +0100484 free_percpu(svc->stats.cpustats);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700485 kfree(svc);
Julian Anastasov26c15cf2010-09-21 18:12:30 +0200486 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700487}
488
489
490/*
491 * Returns hash value for real service
492 */
Julius Volz7937df12008-09-02 15:55:48 +0200493static inline unsigned ip_vs_rs_hashkey(int af,
494 const union nf_inet_addr *addr,
495 __be16 port)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700496{
497 register unsigned porth = ntohs(port);
Julius Volz7937df12008-09-02 15:55:48 +0200498 __be32 addr_fold = addr->ip;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700499
Julius Volz7937df12008-09-02 15:55:48 +0200500#ifdef CONFIG_IP_VS_IPV6
501 if (af == AF_INET6)
502 addr_fold = addr->ip6[0]^addr->ip6[1]^
503 addr->ip6[2]^addr->ip6[3];
504#endif
505
506 return (ntohl(addr_fold)^(porth>>IP_VS_RTAB_BITS)^porth)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700507 & IP_VS_RTAB_MASK;
508}
509
510/*
Hans Schillstromfc723252011-01-03 14:44:43 +0100511 * Hashes ip_vs_dest in rs_table by <proto,addr,port>.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700512 * should be called with locked tables.
513 */
Hans Schillstromfc723252011-01-03 14:44:43 +0100514static int ip_vs_rs_hash(struct netns_ipvs *ipvs, struct ip_vs_dest *dest)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700515{
516 unsigned hash;
517
518 if (!list_empty(&dest->d_list)) {
519 return 0;
520 }
521
522 /*
523 * Hash by proto,addr,port,
524 * which are the parameters of the real service.
525 */
Julius Volz7937df12008-09-02 15:55:48 +0200526 hash = ip_vs_rs_hashkey(dest->af, &dest->addr, dest->port);
527
Hans Schillstromfc723252011-01-03 14:44:43 +0100528 list_add(&dest->d_list, &ipvs->rs_table[hash]);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700529
530 return 1;
531}
532
533/*
Hans Schillstromfc723252011-01-03 14:44:43 +0100534 * UNhashes ip_vs_dest from rs_table.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700535 * should be called with locked tables.
536 */
537static int ip_vs_rs_unhash(struct ip_vs_dest *dest)
538{
539 /*
Hans Schillstromfc723252011-01-03 14:44:43 +0100540 * Remove it from the rs_table table.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700541 */
542 if (!list_empty(&dest->d_list)) {
543 list_del(&dest->d_list);
544 INIT_LIST_HEAD(&dest->d_list);
545 }
546
547 return 1;
548}
549
550/*
551 * Lookup real service by <proto,addr,port> in the real service table.
552 */
553struct ip_vs_dest *
Hans Schillstromfc723252011-01-03 14:44:43 +0100554ip_vs_lookup_real_service(struct net *net, int af, __u16 protocol,
Julius Volz7937df12008-09-02 15:55:48 +0200555 const union nf_inet_addr *daddr,
556 __be16 dport)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700557{
Hans Schillstromfc723252011-01-03 14:44:43 +0100558 struct netns_ipvs *ipvs = net_ipvs(net);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700559 unsigned hash;
560 struct ip_vs_dest *dest;
561
562 /*
563 * Check for "full" addressed entries
564 * Return the first found entry
565 */
Julius Volz7937df12008-09-02 15:55:48 +0200566 hash = ip_vs_rs_hashkey(af, daddr, dport);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700567
Hans Schillstroma0840e22011-01-03 14:44:58 +0100568 read_lock(&ipvs->rs_lock);
Hans Schillstromfc723252011-01-03 14:44:43 +0100569 list_for_each_entry(dest, &ipvs->rs_table[hash], d_list) {
Julius Volz7937df12008-09-02 15:55:48 +0200570 if ((dest->af == af)
571 && ip_vs_addr_equal(af, &dest->addr, daddr)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700572 && (dest->port == dport)
573 && ((dest->protocol == protocol) ||
574 dest->vfwmark)) {
575 /* HIT */
Hans Schillstroma0840e22011-01-03 14:44:58 +0100576 read_unlock(&ipvs->rs_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700577 return dest;
578 }
579 }
Hans Schillstroma0840e22011-01-03 14:44:58 +0100580 read_unlock(&ipvs->rs_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700581
582 return NULL;
583}
584
585/*
586 * Lookup destination by {addr,port} in the given service
587 */
588static struct ip_vs_dest *
Julius Volz7937df12008-09-02 15:55:48 +0200589ip_vs_lookup_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
590 __be16 dport)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700591{
592 struct ip_vs_dest *dest;
593
594 /*
595 * Find the destination for the given service
596 */
597 list_for_each_entry(dest, &svc->destinations, n_list) {
Julius Volz7937df12008-09-02 15:55:48 +0200598 if ((dest->af == svc->af)
599 && ip_vs_addr_equal(svc->af, &dest->addr, daddr)
600 && (dest->port == dport)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700601 /* HIT */
602 return dest;
603 }
604 }
605
606 return NULL;
607}
608
Rumen G. Bogdanovski1e356f92007-11-07 02:35:54 -0800609/*
610 * Find destination by {daddr,dport,vaddr,protocol}
611 * Cretaed to be used in ip_vs_process_message() in
612 * the backup synchronization daemon. It finds the
613 * destination to be bound to the received connection
614 * on the backup.
615 *
616 * ip_vs_lookup_real_service() looked promissing, but
617 * seems not working as expected.
618 */
Hans Schillstromfc723252011-01-03 14:44:43 +0100619struct ip_vs_dest *ip_vs_find_dest(struct net *net, int af,
620 const union nf_inet_addr *daddr,
Julius Volz7937df12008-09-02 15:55:48 +0200621 __be16 dport,
622 const union nf_inet_addr *vaddr,
Hans Schillstrom0e051e62010-11-19 14:25:07 +0100623 __be16 vport, __u16 protocol, __u32 fwmark)
Rumen G. Bogdanovski1e356f92007-11-07 02:35:54 -0800624{
625 struct ip_vs_dest *dest;
626 struct ip_vs_service *svc;
627
Hans Schillstromfc723252011-01-03 14:44:43 +0100628 svc = ip_vs_service_get(net, af, fwmark, protocol, vaddr, vport);
Rumen G. Bogdanovski1e356f92007-11-07 02:35:54 -0800629 if (!svc)
630 return NULL;
631 dest = ip_vs_lookup_dest(svc, daddr, dport);
632 if (dest)
633 atomic_inc(&dest->refcnt);
634 ip_vs_service_put(svc);
635 return dest;
636}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700637
638/*
639 * Lookup dest by {svc,addr,port} in the destination trash.
640 * The destination trash is used to hold the destinations that are removed
641 * from the service table but are still referenced by some conn entries.
642 * The reason to add the destination trash is when the dest is temporary
643 * down (either by administrator or by monitor program), the dest can be
644 * picked back from the trash, the remaining connections to the dest can
645 * continue, and the counting information of the dest is also useful for
646 * scheduling.
647 */
648static struct ip_vs_dest *
Julius Volz7937df12008-09-02 15:55:48 +0200649ip_vs_trash_get_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
650 __be16 dport)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700651{
652 struct ip_vs_dest *dest, *nxt;
653
654 /*
655 * Find the destination in trash
656 */
657 list_for_each_entry_safe(dest, nxt, &ip_vs_dest_trash, n_list) {
Julius Volz7937df12008-09-02 15:55:48 +0200658 IP_VS_DBG_BUF(3, "Destination %u/%s:%u still in trash, "
659 "dest->refcnt=%d\n",
660 dest->vfwmark,
661 IP_VS_DBG_ADDR(svc->af, &dest->addr),
662 ntohs(dest->port),
663 atomic_read(&dest->refcnt));
664 if (dest->af == svc->af &&
665 ip_vs_addr_equal(svc->af, &dest->addr, daddr) &&
Linus Torvalds1da177e2005-04-16 15:20:36 -0700666 dest->port == dport &&
667 dest->vfwmark == svc->fwmark &&
668 dest->protocol == svc->protocol &&
669 (svc->fwmark ||
Julius Volz7937df12008-09-02 15:55:48 +0200670 (ip_vs_addr_equal(svc->af, &dest->vaddr, &svc->addr) &&
Linus Torvalds1da177e2005-04-16 15:20:36 -0700671 dest->vport == svc->port))) {
672 /* HIT */
673 return dest;
674 }
675
676 /*
677 * Try to purge the destination from trash if not referenced
678 */
679 if (atomic_read(&dest->refcnt) == 1) {
Julius Volz7937df12008-09-02 15:55:48 +0200680 IP_VS_DBG_BUF(3, "Removing destination %u/%s:%u "
681 "from trash\n",
682 dest->vfwmark,
683 IP_VS_DBG_ADDR(svc->af, &dest->addr),
684 ntohs(dest->port));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700685 list_del(&dest->n_list);
686 ip_vs_dst_reset(dest);
687 __ip_vs_unbind_svc(dest);
Hans Schillstromb17fc992011-01-03 14:44:56 +0100688 free_percpu(dest->stats.cpustats);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700689 kfree(dest);
690 }
691 }
692
693 return NULL;
694}
695
696
697/*
698 * Clean up all the destinations in the trash
699 * Called by the ip_vs_control_cleanup()
700 *
701 * When the ip_vs_control_clearup is activated by ipvs module exit,
702 * the service tables must have been flushed and all the connections
703 * are expired, and the refcnt of each destination in the trash must
704 * be 1, so we simply release them here.
705 */
706static void ip_vs_trash_cleanup(void)
707{
708 struct ip_vs_dest *dest, *nxt;
709
710 list_for_each_entry_safe(dest, nxt, &ip_vs_dest_trash, n_list) {
711 list_del(&dest->n_list);
712 ip_vs_dst_reset(dest);
713 __ip_vs_unbind_svc(dest);
Hans Schillstromb17fc992011-01-03 14:44:56 +0100714 free_percpu(dest->stats.cpustats);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700715 kfree(dest);
716 }
717}
718
719
720static void
721ip_vs_zero_stats(struct ip_vs_stats *stats)
722{
723 spin_lock_bh(&stats->lock);
Simon Hormane93615d2008-08-11 17:19:14 +1000724
Sven Wegenere9c0ce22008-09-08 13:39:04 +0200725 memset(&stats->ustats, 0, sizeof(stats->ustats));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700726 ip_vs_zero_estimator(stats);
Simon Hormane93615d2008-08-11 17:19:14 +1000727
Sven Wegener3a14a3132008-08-10 18:24:41 +0000728 spin_unlock_bh(&stats->lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700729}
730
731/*
732 * Update a destination in the given service
733 */
734static void
Julian Anastasov26c15cf2010-09-21 18:12:30 +0200735__ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest,
736 struct ip_vs_dest_user_kern *udest, int add)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700737{
Hans Schillstromfc723252011-01-03 14:44:43 +0100738 struct netns_ipvs *ipvs = net_ipvs(svc->net);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700739 int conn_flags;
740
741 /* set the weight and the flags */
742 atomic_set(&dest->weight, udest->weight);
Julian Anastasov35757922010-09-17 14:18:16 +0200743 conn_flags = udest->conn_flags & IP_VS_CONN_F_DEST_MASK;
744 conn_flags |= IP_VS_CONN_F_INACTIVE;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700745
Linus Torvalds1da177e2005-04-16 15:20:36 -0700746 /* set the IP_VS_CONN_F_NOOUTPUT flag if not masquerading/NAT */
Julian Anastasov35757922010-09-17 14:18:16 +0200747 if ((conn_flags & IP_VS_CONN_F_FWD_MASK) != IP_VS_CONN_F_MASQ) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700748 conn_flags |= IP_VS_CONN_F_NOOUTPUT;
749 } else {
750 /*
Hans Schillstromfc723252011-01-03 14:44:43 +0100751 * Put the real service in rs_table if not present.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700752 * For now only for NAT!
753 */
Hans Schillstroma0840e22011-01-03 14:44:58 +0100754 write_lock_bh(&ipvs->rs_lock);
Hans Schillstromfc723252011-01-03 14:44:43 +0100755 ip_vs_rs_hash(ipvs, dest);
Hans Schillstroma0840e22011-01-03 14:44:58 +0100756 write_unlock_bh(&ipvs->rs_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700757 }
758 atomic_set(&dest->conn_flags, conn_flags);
759
760 /* bind the service */
761 if (!dest->svc) {
762 __ip_vs_bind_svc(dest, svc);
763 } else {
764 if (dest->svc != svc) {
765 __ip_vs_unbind_svc(dest);
766 ip_vs_zero_stats(&dest->stats);
767 __ip_vs_bind_svc(dest, svc);
768 }
769 }
770
771 /* set the dest status flags */
772 dest->flags |= IP_VS_DEST_F_AVAILABLE;
773
774 if (udest->u_threshold == 0 || udest->u_threshold > dest->u_threshold)
775 dest->flags &= ~IP_VS_DEST_F_OVERLOAD;
776 dest->u_threshold = udest->u_threshold;
777 dest->l_threshold = udest->l_threshold;
Julian Anastasov26c15cf2010-09-21 18:12:30 +0200778
Julian Anastasovfc604762010-10-17 16:38:15 +0300779 spin_lock(&dest->dst_lock);
780 ip_vs_dst_reset(dest);
781 spin_unlock(&dest->dst_lock);
782
Julian Anastasov26c15cf2010-09-21 18:12:30 +0200783 if (add)
Hans Schillstrom29c20262011-01-03 14:44:54 +0100784 ip_vs_new_estimator(svc->net, &dest->stats);
Julian Anastasov26c15cf2010-09-21 18:12:30 +0200785
786 write_lock_bh(&__ip_vs_svc_lock);
787
788 /* Wait until all other svc users go away */
789 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
790
791 if (add) {
792 list_add(&dest->n_list, &svc->destinations);
793 svc->num_dests++;
794 }
795
796 /* call the update_service, because server weight may be changed */
797 if (svc->scheduler->update_service)
798 svc->scheduler->update_service(svc);
799
800 write_unlock_bh(&__ip_vs_svc_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700801}
802
803
804/*
805 * Create a destination for the given service
806 */
807static int
Julius Volzc860c6b2008-09-02 15:55:36 +0200808ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700809 struct ip_vs_dest **dest_p)
810{
811 struct ip_vs_dest *dest;
812 unsigned atype;
813
814 EnterFunction(2);
815
Vince Busam09571c72008-09-02 15:55:52 +0200816#ifdef CONFIG_IP_VS_IPV6
817 if (svc->af == AF_INET6) {
818 atype = ipv6_addr_type(&udest->addr.in6);
Sven Wegener3bfb92f2008-09-05 16:53:49 +0200819 if ((!(atype & IPV6_ADDR_UNICAST) ||
820 atype & IPV6_ADDR_LINKLOCAL) &&
Vince Busam09571c72008-09-02 15:55:52 +0200821 !__ip_vs_addr_is_local_v6(&udest->addr.in6))
822 return -EINVAL;
823 } else
824#endif
825 {
826 atype = inet_addr_type(&init_net, udest->addr.ip);
827 if (atype != RTN_LOCAL && atype != RTN_UNICAST)
828 return -EINVAL;
829 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700830
Simon Hormandee06e42010-08-26 02:54:31 +0000831 dest = kzalloc(sizeof(struct ip_vs_dest), GFP_KERNEL);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700832 if (dest == NULL) {
Hannes Eder1e3e2382009-08-02 11:05:41 +0000833 pr_err("%s(): no memory.\n", __func__);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700834 return -ENOMEM;
835 }
Hans Schillstromb17fc992011-01-03 14:44:56 +0100836 dest->stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats);
837 if (!dest->stats.cpustats) {
838 pr_err("%s() alloc_percpu failed\n", __func__);
839 goto err_alloc;
840 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700841
Julius Volzc860c6b2008-09-02 15:55:36 +0200842 dest->af = svc->af;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700843 dest->protocol = svc->protocol;
Julius Volzc860c6b2008-09-02 15:55:36 +0200844 dest->vaddr = svc->addr;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700845 dest->vport = svc->port;
846 dest->vfwmark = svc->fwmark;
Julius Volzc860c6b2008-09-02 15:55:36 +0200847 ip_vs_addr_copy(svc->af, &dest->addr, &udest->addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700848 dest->port = udest->port;
849
850 atomic_set(&dest->activeconns, 0);
851 atomic_set(&dest->inactconns, 0);
852 atomic_set(&dest->persistconns, 0);
Julian Anastasov26c15cf2010-09-21 18:12:30 +0200853 atomic_set(&dest->refcnt, 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700854
855 INIT_LIST_HEAD(&dest->d_list);
856 spin_lock_init(&dest->dst_lock);
857 spin_lock_init(&dest->stats.lock);
Julian Anastasov26c15cf2010-09-21 18:12:30 +0200858 __ip_vs_update_dest(svc, dest, udest, 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700859
860 *dest_p = dest;
861
862 LeaveFunction(2);
863 return 0;
Hans Schillstromb17fc992011-01-03 14:44:56 +0100864
865err_alloc:
866 kfree(dest);
867 return -ENOMEM;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700868}
869
870
871/*
872 * Add a destination into an existing service
873 */
874static int
Julius Volzc860c6b2008-09-02 15:55:36 +0200875ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700876{
877 struct ip_vs_dest *dest;
Julius Volzc860c6b2008-09-02 15:55:36 +0200878 union nf_inet_addr daddr;
Al Viro014d7302006-09-28 14:29:52 -0700879 __be16 dport = udest->port;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700880 int ret;
881
882 EnterFunction(2);
883
884 if (udest->weight < 0) {
Hannes Eder1e3e2382009-08-02 11:05:41 +0000885 pr_err("%s(): server weight less than zero\n", __func__);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700886 return -ERANGE;
887 }
888
889 if (udest->l_threshold > udest->u_threshold) {
Hannes Eder1e3e2382009-08-02 11:05:41 +0000890 pr_err("%s(): lower threshold is higher than upper threshold\n",
891 __func__);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700892 return -ERANGE;
893 }
894
Julius Volzc860c6b2008-09-02 15:55:36 +0200895 ip_vs_addr_copy(svc->af, &daddr, &udest->addr);
896
Linus Torvalds1da177e2005-04-16 15:20:36 -0700897 /*
898 * Check if the dest already exists in the list
899 */
Julius Volz7937df12008-09-02 15:55:48 +0200900 dest = ip_vs_lookup_dest(svc, &daddr, dport);
901
Linus Torvalds1da177e2005-04-16 15:20:36 -0700902 if (dest != NULL) {
Hannes Eder1e3e2382009-08-02 11:05:41 +0000903 IP_VS_DBG(1, "%s(): dest already exists\n", __func__);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700904 return -EEXIST;
905 }
906
907 /*
908 * Check if the dest already exists in the trash and
909 * is from the same service
910 */
Julius Volz7937df12008-09-02 15:55:48 +0200911 dest = ip_vs_trash_get_dest(svc, &daddr, dport);
912
Linus Torvalds1da177e2005-04-16 15:20:36 -0700913 if (dest != NULL) {
Julius Volzcfc78c52008-09-02 15:55:53 +0200914 IP_VS_DBG_BUF(3, "Get destination %s:%u from trash, "
915 "dest->refcnt=%d, service %u/%s:%u\n",
916 IP_VS_DBG_ADDR(svc->af, &daddr), ntohs(dport),
917 atomic_read(&dest->refcnt),
918 dest->vfwmark,
919 IP_VS_DBG_ADDR(svc->af, &dest->vaddr),
920 ntohs(dest->vport));
921
Linus Torvalds1da177e2005-04-16 15:20:36 -0700922 /*
923 * Get the destination from the trash
924 */
925 list_del(&dest->n_list);
926
Julian Anastasov26c15cf2010-09-21 18:12:30 +0200927 __ip_vs_update_dest(svc, dest, udest, 1);
928 ret = 0;
929 } else {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700930 /*
Julian Anastasov26c15cf2010-09-21 18:12:30 +0200931 * Allocate and initialize the dest structure
Linus Torvalds1da177e2005-04-16 15:20:36 -0700932 */
Julian Anastasov26c15cf2010-09-21 18:12:30 +0200933 ret = ip_vs_new_dest(svc, udest, &dest);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700934 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700935 LeaveFunction(2);
936
Julian Anastasov26c15cf2010-09-21 18:12:30 +0200937 return ret;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700938}
939
940
941/*
942 * Edit a destination in the given service
943 */
944static int
Julius Volzc860c6b2008-09-02 15:55:36 +0200945ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700946{
947 struct ip_vs_dest *dest;
Julius Volzc860c6b2008-09-02 15:55:36 +0200948 union nf_inet_addr daddr;
Al Viro014d7302006-09-28 14:29:52 -0700949 __be16 dport = udest->port;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700950
951 EnterFunction(2);
952
953 if (udest->weight < 0) {
Hannes Eder1e3e2382009-08-02 11:05:41 +0000954 pr_err("%s(): server weight less than zero\n", __func__);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700955 return -ERANGE;
956 }
957
958 if (udest->l_threshold > udest->u_threshold) {
Hannes Eder1e3e2382009-08-02 11:05:41 +0000959 pr_err("%s(): lower threshold is higher than upper threshold\n",
960 __func__);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700961 return -ERANGE;
962 }
963
Julius Volzc860c6b2008-09-02 15:55:36 +0200964 ip_vs_addr_copy(svc->af, &daddr, &udest->addr);
965
Linus Torvalds1da177e2005-04-16 15:20:36 -0700966 /*
967 * Lookup the destination list
968 */
Julius Volz7937df12008-09-02 15:55:48 +0200969 dest = ip_vs_lookup_dest(svc, &daddr, dport);
970
Linus Torvalds1da177e2005-04-16 15:20:36 -0700971 if (dest == NULL) {
Hannes Eder1e3e2382009-08-02 11:05:41 +0000972 IP_VS_DBG(1, "%s(): dest doesn't exist\n", __func__);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700973 return -ENOENT;
974 }
975
Julian Anastasov26c15cf2010-09-21 18:12:30 +0200976 __ip_vs_update_dest(svc, dest, udest, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700977 LeaveFunction(2);
978
979 return 0;
980}
981
982
983/*
984 * Delete a destination (must be already unlinked from the service)
985 */
Hans Schillstrom29c20262011-01-03 14:44:54 +0100986static void __ip_vs_del_dest(struct net *net, struct ip_vs_dest *dest)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700987{
Hans Schillstroma0840e22011-01-03 14:44:58 +0100988 struct netns_ipvs *ipvs = net_ipvs(net);
989
Hans Schillstrom29c20262011-01-03 14:44:54 +0100990 ip_vs_kill_estimator(net, &dest->stats);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700991
992 /*
993 * Remove it from the d-linked list with the real services.
994 */
Hans Schillstroma0840e22011-01-03 14:44:58 +0100995 write_lock_bh(&ipvs->rs_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700996 ip_vs_rs_unhash(dest);
Hans Schillstroma0840e22011-01-03 14:44:58 +0100997 write_unlock_bh(&ipvs->rs_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700998
999 /*
1000 * Decrease the refcnt of the dest, and free the dest
1001 * if nobody refers to it (refcnt=0). Otherwise, throw
1002 * the destination into the trash.
1003 */
1004 if (atomic_dec_and_test(&dest->refcnt)) {
Julian Anastasov26c15cf2010-09-21 18:12:30 +02001005 IP_VS_DBG_BUF(3, "Removing destination %u/%s:%u\n",
1006 dest->vfwmark,
1007 IP_VS_DBG_ADDR(dest->af, &dest->addr),
1008 ntohs(dest->port));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001009 ip_vs_dst_reset(dest);
1010 /* simply decrease svc->refcnt here, let the caller check
1011 and release the service if nobody refers to it.
1012 Only user context can release destination and service,
1013 and only one user context can update virtual service at a
1014 time, so the operation here is OK */
1015 atomic_dec(&dest->svc->refcnt);
Hans Schillstromb17fc992011-01-03 14:44:56 +01001016 free_percpu(dest->stats.cpustats);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001017 kfree(dest);
1018 } else {
Julius Volzcfc78c52008-09-02 15:55:53 +02001019 IP_VS_DBG_BUF(3, "Moving dest %s:%u into trash, "
1020 "dest->refcnt=%d\n",
1021 IP_VS_DBG_ADDR(dest->af, &dest->addr),
1022 ntohs(dest->port),
1023 atomic_read(&dest->refcnt));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001024 list_add(&dest->n_list, &ip_vs_dest_trash);
1025 atomic_inc(&dest->refcnt);
1026 }
1027}
1028
1029
1030/*
1031 * Unlink a destination from the given service
1032 */
1033static void __ip_vs_unlink_dest(struct ip_vs_service *svc,
1034 struct ip_vs_dest *dest,
1035 int svcupd)
1036{
1037 dest->flags &= ~IP_VS_DEST_F_AVAILABLE;
1038
1039 /*
1040 * Remove it from the d-linked destination list.
1041 */
1042 list_del(&dest->n_list);
1043 svc->num_dests--;
Sven Wegener82dfb6f2008-08-11 19:36:06 +00001044
1045 /*
1046 * Call the update_service function of its scheduler
1047 */
1048 if (svcupd && svc->scheduler->update_service)
1049 svc->scheduler->update_service(svc);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001050}
1051
1052
1053/*
1054 * Delete a destination server in the given service
1055 */
1056static int
Julius Volzc860c6b2008-09-02 15:55:36 +02001057ip_vs_del_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001058{
1059 struct ip_vs_dest *dest;
Al Viro014d7302006-09-28 14:29:52 -07001060 __be16 dport = udest->port;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001061
1062 EnterFunction(2);
1063
Julius Volz7937df12008-09-02 15:55:48 +02001064 dest = ip_vs_lookup_dest(svc, &udest->addr, dport);
Julius Volzc860c6b2008-09-02 15:55:36 +02001065
Linus Torvalds1da177e2005-04-16 15:20:36 -07001066 if (dest == NULL) {
Hannes Eder1e3e2382009-08-02 11:05:41 +00001067 IP_VS_DBG(1, "%s(): destination not found!\n", __func__);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001068 return -ENOENT;
1069 }
1070
1071 write_lock_bh(&__ip_vs_svc_lock);
1072
1073 /*
1074 * Wait until all other svc users go away.
1075 */
Julian Anastasov26c15cf2010-09-21 18:12:30 +02001076 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001077
1078 /*
1079 * Unlink dest from the service
1080 */
1081 __ip_vs_unlink_dest(svc, dest, 1);
1082
1083 write_unlock_bh(&__ip_vs_svc_lock);
1084
1085 /*
1086 * Delete the destination
1087 */
Hans Schillstroma0840e22011-01-03 14:44:58 +01001088 __ip_vs_del_dest(svc->net, dest);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001089
1090 LeaveFunction(2);
1091
1092 return 0;
1093}
1094
1095
1096/*
1097 * Add a service into the service hash table
1098 */
1099static int
Hans Schillstromfc723252011-01-03 14:44:43 +01001100ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u,
Julius Volzc860c6b2008-09-02 15:55:36 +02001101 struct ip_vs_service **svc_p)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001102{
1103 int ret = 0;
1104 struct ip_vs_scheduler *sched = NULL;
Simon Horman0d1e71b2010-08-22 21:37:54 +09001105 struct ip_vs_pe *pe = NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001106 struct ip_vs_service *svc = NULL;
Hans Schillstroma0840e22011-01-03 14:44:58 +01001107 struct netns_ipvs *ipvs = net_ipvs(net);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001108
1109 /* increase the module use count */
1110 ip_vs_use_count_inc();
1111
1112 /* Lookup the scheduler by 'u->sched_name' */
1113 sched = ip_vs_scheduler_get(u->sched_name);
1114 if (sched == NULL) {
Hannes Eder1e3e2382009-08-02 11:05:41 +00001115 pr_info("Scheduler module ip_vs_%s not found\n", u->sched_name);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001116 ret = -ENOENT;
Simon Horman6e08bfb2010-08-22 21:37:52 +09001117 goto out_err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001118 }
1119
Simon Horman0d1e71b2010-08-22 21:37:54 +09001120 if (u->pe_name && *u->pe_name) {
Simon Hormane9e5eee2010-11-08 20:05:57 +09001121 pe = ip_vs_pe_getbyname(u->pe_name);
Simon Horman0d1e71b2010-08-22 21:37:54 +09001122 if (pe == NULL) {
1123 pr_info("persistence engine module ip_vs_pe_%s "
1124 "not found\n", u->pe_name);
1125 ret = -ENOENT;
1126 goto out_err;
1127 }
1128 }
1129
Julius Volzf94fd042008-09-02 15:55:55 +02001130#ifdef CONFIG_IP_VS_IPV6
Julius Volz48148932008-11-03 17:08:56 -08001131 if (u->af == AF_INET6 && (u->netmask < 1 || u->netmask > 128)) {
1132 ret = -EINVAL;
1133 goto out_err;
Julius Volzf94fd042008-09-02 15:55:55 +02001134 }
1135#endif
1136
Simon Hormandee06e42010-08-26 02:54:31 +00001137 svc = kzalloc(sizeof(struct ip_vs_service), GFP_KERNEL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001138 if (svc == NULL) {
Hannes Eder1e3e2382009-08-02 11:05:41 +00001139 IP_VS_DBG(1, "%s(): no memory\n", __func__);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001140 ret = -ENOMEM;
1141 goto out_err;
1142 }
Hans Schillstromb17fc992011-01-03 14:44:56 +01001143 svc->stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats);
1144 if (!svc->stats.cpustats) {
1145 pr_err("%s() alloc_percpu failed\n", __func__);
1146 goto out_err;
1147 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001148
1149 /* I'm the first user of the service */
Julian Anastasov26c15cf2010-09-21 18:12:30 +02001150 atomic_set(&svc->usecnt, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001151 atomic_set(&svc->refcnt, 0);
1152
Julius Volzc860c6b2008-09-02 15:55:36 +02001153 svc->af = u->af;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001154 svc->protocol = u->protocol;
Julius Volzc860c6b2008-09-02 15:55:36 +02001155 ip_vs_addr_copy(svc->af, &svc->addr, &u->addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001156 svc->port = u->port;
1157 svc->fwmark = u->fwmark;
1158 svc->flags = u->flags;
1159 svc->timeout = u->timeout * HZ;
1160 svc->netmask = u->netmask;
Hans Schillstromfc723252011-01-03 14:44:43 +01001161 svc->net = net;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001162
1163 INIT_LIST_HEAD(&svc->destinations);
1164 rwlock_init(&svc->sched_lock);
1165 spin_lock_init(&svc->stats.lock);
1166
1167 /* Bind the scheduler */
1168 ret = ip_vs_bind_scheduler(svc, sched);
1169 if (ret)
1170 goto out_err;
1171 sched = NULL;
1172
Simon Horman0d1e71b2010-08-22 21:37:54 +09001173 /* Bind the ct retriever */
1174 ip_vs_bind_pe(svc, pe);
1175 pe = NULL;
1176
Linus Torvalds1da177e2005-04-16 15:20:36 -07001177 /* Update the virtual service counters */
1178 if (svc->port == FTPPORT)
1179 atomic_inc(&ip_vs_ftpsvc_counter);
1180 else if (svc->port == 0)
1181 atomic_inc(&ip_vs_nullsvc_counter);
1182
Hans Schillstrom29c20262011-01-03 14:44:54 +01001183 ip_vs_new_estimator(net, &svc->stats);
Julius Volzf94fd042008-09-02 15:55:55 +02001184
1185 /* Count only IPv4 services for old get/setsockopt interface */
1186 if (svc->af == AF_INET)
Hans Schillstroma0840e22011-01-03 14:44:58 +01001187 ipvs->num_services++;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001188
1189 /* Hash the service into the service table */
1190 write_lock_bh(&__ip_vs_svc_lock);
1191 ip_vs_svc_hash(svc);
1192 write_unlock_bh(&__ip_vs_svc_lock);
1193
1194 *svc_p = svc;
1195 return 0;
1196
Hans Schillstromb17fc992011-01-03 14:44:56 +01001197
Simon Horman6e08bfb2010-08-22 21:37:52 +09001198 out_err:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001199 if (svc != NULL) {
Simon Horman2fabf352010-08-22 21:37:52 +09001200 ip_vs_unbind_scheduler(svc);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001201 if (svc->inc) {
1202 local_bh_disable();
1203 ip_vs_app_inc_put(svc->inc);
1204 local_bh_enable();
1205 }
Hans Schillstromb17fc992011-01-03 14:44:56 +01001206 if (svc->stats.cpustats)
1207 free_percpu(svc->stats.cpustats);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001208 kfree(svc);
1209 }
1210 ip_vs_scheduler_put(sched);
Simon Horman0d1e71b2010-08-22 21:37:54 +09001211 ip_vs_pe_put(pe);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001212
Linus Torvalds1da177e2005-04-16 15:20:36 -07001213 /* decrease the module use count */
1214 ip_vs_use_count_dec();
1215
1216 return ret;
1217}
1218
1219
1220/*
1221 * Edit a service and bind it with a new scheduler
1222 */
1223static int
Julius Volzc860c6b2008-09-02 15:55:36 +02001224ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001225{
1226 struct ip_vs_scheduler *sched, *old_sched;
Simon Horman0d1e71b2010-08-22 21:37:54 +09001227 struct ip_vs_pe *pe = NULL, *old_pe = NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001228 int ret = 0;
1229
1230 /*
1231 * Lookup the scheduler, by 'u->sched_name'
1232 */
1233 sched = ip_vs_scheduler_get(u->sched_name);
1234 if (sched == NULL) {
Hannes Eder1e3e2382009-08-02 11:05:41 +00001235 pr_info("Scheduler module ip_vs_%s not found\n", u->sched_name);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001236 return -ENOENT;
1237 }
1238 old_sched = sched;
1239
Simon Horman0d1e71b2010-08-22 21:37:54 +09001240 if (u->pe_name && *u->pe_name) {
Simon Hormane9e5eee2010-11-08 20:05:57 +09001241 pe = ip_vs_pe_getbyname(u->pe_name);
Simon Horman0d1e71b2010-08-22 21:37:54 +09001242 if (pe == NULL) {
1243 pr_info("persistence engine module ip_vs_pe_%s "
1244 "not found\n", u->pe_name);
1245 ret = -ENOENT;
1246 goto out;
1247 }
1248 old_pe = pe;
1249 }
1250
Julius Volzf94fd042008-09-02 15:55:55 +02001251#ifdef CONFIG_IP_VS_IPV6
Julius Volz48148932008-11-03 17:08:56 -08001252 if (u->af == AF_INET6 && (u->netmask < 1 || u->netmask > 128)) {
1253 ret = -EINVAL;
1254 goto out;
Julius Volzf94fd042008-09-02 15:55:55 +02001255 }
1256#endif
1257
Linus Torvalds1da177e2005-04-16 15:20:36 -07001258 write_lock_bh(&__ip_vs_svc_lock);
1259
1260 /*
1261 * Wait until all other svc users go away.
1262 */
Julian Anastasov26c15cf2010-09-21 18:12:30 +02001263 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001264
1265 /*
1266 * Set the flags and timeout value
1267 */
1268 svc->flags = u->flags | IP_VS_SVC_F_HASHED;
1269 svc->timeout = u->timeout * HZ;
1270 svc->netmask = u->netmask;
1271
1272 old_sched = svc->scheduler;
1273 if (sched != old_sched) {
1274 /*
1275 * Unbind the old scheduler
1276 */
1277 if ((ret = ip_vs_unbind_scheduler(svc))) {
1278 old_sched = sched;
Simon Horman9e691ed2008-09-17 10:10:41 +10001279 goto out_unlock;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001280 }
1281
1282 /*
1283 * Bind the new scheduler
1284 */
1285 if ((ret = ip_vs_bind_scheduler(svc, sched))) {
1286 /*
1287 * If ip_vs_bind_scheduler fails, restore the old
1288 * scheduler.
1289 * The main reason of failure is out of memory.
1290 *
1291 * The question is if the old scheduler can be
1292 * restored all the time. TODO: if it cannot be
1293 * restored some time, we must delete the service,
1294 * otherwise the system may crash.
1295 */
1296 ip_vs_bind_scheduler(svc, old_sched);
1297 old_sched = sched;
Simon Horman9e691ed2008-09-17 10:10:41 +10001298 goto out_unlock;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001299 }
1300 }
1301
Simon Horman0d1e71b2010-08-22 21:37:54 +09001302 old_pe = svc->pe;
1303 if (pe != old_pe) {
1304 ip_vs_unbind_pe(svc);
1305 ip_vs_bind_pe(svc, pe);
1306 }
1307
Simon Horman9e691ed2008-09-17 10:10:41 +10001308 out_unlock:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001309 write_unlock_bh(&__ip_vs_svc_lock);
Simon Horman9e691ed2008-09-17 10:10:41 +10001310 out:
Simon Horman6e08bfb2010-08-22 21:37:52 +09001311 ip_vs_scheduler_put(old_sched);
Simon Horman0d1e71b2010-08-22 21:37:54 +09001312 ip_vs_pe_put(old_pe);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001313 return ret;
1314}
1315
1316
1317/*
1318 * Delete a service from the service list
1319 * - The service must be unlinked, unlocked and not referenced!
1320 * - We are called under _bh lock
1321 */
1322static void __ip_vs_del_service(struct ip_vs_service *svc)
1323{
1324 struct ip_vs_dest *dest, *nxt;
1325 struct ip_vs_scheduler *old_sched;
Simon Horman0d1e71b2010-08-22 21:37:54 +09001326 struct ip_vs_pe *old_pe;
Hans Schillstroma0840e22011-01-03 14:44:58 +01001327 struct netns_ipvs *ipvs = net_ipvs(svc->net);
Simon Horman0d1e71b2010-08-22 21:37:54 +09001328
1329 pr_info("%s: enter\n", __func__);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001330
Julius Volzf94fd042008-09-02 15:55:55 +02001331 /* Count only IPv4 services for old get/setsockopt interface */
1332 if (svc->af == AF_INET)
Hans Schillstroma0840e22011-01-03 14:44:58 +01001333 ipvs->num_services--;
Julius Volzf94fd042008-09-02 15:55:55 +02001334
Hans Schillstrom29c20262011-01-03 14:44:54 +01001335 ip_vs_kill_estimator(svc->net, &svc->stats);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001336
1337 /* Unbind scheduler */
1338 old_sched = svc->scheduler;
1339 ip_vs_unbind_scheduler(svc);
Simon Horman6e08bfb2010-08-22 21:37:52 +09001340 ip_vs_scheduler_put(old_sched);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001341
Simon Horman0d1e71b2010-08-22 21:37:54 +09001342 /* Unbind persistence engine */
1343 old_pe = svc->pe;
1344 ip_vs_unbind_pe(svc);
1345 ip_vs_pe_put(old_pe);
1346
Linus Torvalds1da177e2005-04-16 15:20:36 -07001347 /* Unbind app inc */
1348 if (svc->inc) {
1349 ip_vs_app_inc_put(svc->inc);
1350 svc->inc = NULL;
1351 }
1352
1353 /*
1354 * Unlink the whole destination list
1355 */
1356 list_for_each_entry_safe(dest, nxt, &svc->destinations, n_list) {
1357 __ip_vs_unlink_dest(svc, dest, 0);
Hans Schillstrom29c20262011-01-03 14:44:54 +01001358 __ip_vs_del_dest(svc->net, dest);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001359 }
1360
1361 /*
1362 * Update the virtual service counters
1363 */
1364 if (svc->port == FTPPORT)
1365 atomic_dec(&ip_vs_ftpsvc_counter);
1366 else if (svc->port == 0)
1367 atomic_dec(&ip_vs_nullsvc_counter);
1368
1369 /*
1370 * Free the service if nobody refers to it
1371 */
Julian Anastasov26c15cf2010-09-21 18:12:30 +02001372 if (atomic_read(&svc->refcnt) == 0) {
1373 IP_VS_DBG_BUF(3, "Removing service %u/%s:%u usecnt=%d\n",
1374 svc->fwmark,
1375 IP_VS_DBG_ADDR(svc->af, &svc->addr),
1376 ntohs(svc->port), atomic_read(&svc->usecnt));
Hans Schillstromb17fc992011-01-03 14:44:56 +01001377 free_percpu(svc->stats.cpustats);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001378 kfree(svc);
Julian Anastasov26c15cf2010-09-21 18:12:30 +02001379 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001380
1381 /* decrease the module use count */
1382 ip_vs_use_count_dec();
1383}
1384
1385/*
Julian Anastasov26c15cf2010-09-21 18:12:30 +02001386 * Unlink a service from list and try to delete it if its refcnt reached 0
Linus Torvalds1da177e2005-04-16 15:20:36 -07001387 */
Julian Anastasov26c15cf2010-09-21 18:12:30 +02001388static void ip_vs_unlink_service(struct ip_vs_service *svc)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001389{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001390 /*
1391 * Unhash it from the service table
1392 */
1393 write_lock_bh(&__ip_vs_svc_lock);
1394
1395 ip_vs_svc_unhash(svc);
1396
1397 /*
1398 * Wait until all the svc users go away.
1399 */
Julian Anastasov26c15cf2010-09-21 18:12:30 +02001400 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001401
1402 __ip_vs_del_service(svc);
1403
1404 write_unlock_bh(&__ip_vs_svc_lock);
Julian Anastasov26c15cf2010-09-21 18:12:30 +02001405}
1406
1407/*
1408 * Delete a service from the service list
1409 */
1410static int ip_vs_del_service(struct ip_vs_service *svc)
1411{
1412 if (svc == NULL)
1413 return -EEXIST;
1414 ip_vs_unlink_service(svc);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001415
1416 return 0;
1417}
1418
1419
1420/*
1421 * Flush all the virtual services
1422 */
Hans Schillstromfc723252011-01-03 14:44:43 +01001423static int ip_vs_flush(struct net *net)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001424{
1425 int idx;
1426 struct ip_vs_service *svc, *nxt;
1427
1428 /*
Hans Schillstromfc723252011-01-03 14:44:43 +01001429 * Flush the service table hashed by <netns,protocol,addr,port>
Linus Torvalds1da177e2005-04-16 15:20:36 -07001430 */
1431 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
Hans Schillstromfc723252011-01-03 14:44:43 +01001432 list_for_each_entry_safe(svc, nxt, &ip_vs_svc_table[idx],
1433 s_list) {
1434 if (net_eq(svc->net, net))
1435 ip_vs_unlink_service(svc);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001436 }
1437 }
1438
1439 /*
1440 * Flush the service table hashed by fwmark
1441 */
1442 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1443 list_for_each_entry_safe(svc, nxt,
1444 &ip_vs_svc_fwm_table[idx], f_list) {
Hans Schillstromfc723252011-01-03 14:44:43 +01001445 if (net_eq(svc->net, net))
1446 ip_vs_unlink_service(svc);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001447 }
1448 }
1449
1450 return 0;
1451}
1452
1453
1454/*
1455 * Zero counters in a service or all services
1456 */
1457static int ip_vs_zero_service(struct ip_vs_service *svc)
1458{
1459 struct ip_vs_dest *dest;
1460
1461 write_lock_bh(&__ip_vs_svc_lock);
1462 list_for_each_entry(dest, &svc->destinations, n_list) {
1463 ip_vs_zero_stats(&dest->stats);
1464 }
1465 ip_vs_zero_stats(&svc->stats);
1466 write_unlock_bh(&__ip_vs_svc_lock);
1467 return 0;
1468}
1469
Hans Schillstromfc723252011-01-03 14:44:43 +01001470static int ip_vs_zero_all(struct net *net)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001471{
1472 int idx;
1473 struct ip_vs_service *svc;
1474
1475 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1476 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
Hans Schillstromfc723252011-01-03 14:44:43 +01001477 if (net_eq(svc->net, net))
1478 ip_vs_zero_service(svc);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001479 }
1480 }
1481
1482 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1483 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
Hans Schillstromfc723252011-01-03 14:44:43 +01001484 if (net_eq(svc->net, net))
1485 ip_vs_zero_service(svc);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001486 }
1487 }
1488
Hans Schillstromb17fc992011-01-03 14:44:56 +01001489 ip_vs_zero_stats(net_ipvs(net)->tot_stats);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001490 return 0;
1491}
1492
1493
1494static int
Alexey Dobriyan8d65af72009-09-23 15:57:19 -07001495proc_do_defense_mode(ctl_table *table, int write,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001496 void __user *buffer, size_t *lenp, loff_t *ppos)
1497{
Hans Schillstrom93304192011-01-03 14:44:51 +01001498 struct net *net = current->nsproxy->net_ns;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001499 int *valp = table->data;
1500 int val = *valp;
1501 int rc;
1502
Alexey Dobriyan8d65af72009-09-23 15:57:19 -07001503 rc = proc_dointvec(table, write, buffer, lenp, ppos);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001504 if (write && (*valp != val)) {
1505 if ((*valp < 0) || (*valp > 3)) {
1506 /* Restore the correct value */
1507 *valp = val;
1508 } else {
Hans Schillstrom93304192011-01-03 14:44:51 +01001509 update_defense_level(net_ipvs(net));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001510 }
1511 }
1512 return rc;
1513}
1514
1515
1516static int
Alexey Dobriyan8d65af72009-09-23 15:57:19 -07001517proc_do_sync_threshold(ctl_table *table, int write,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001518 void __user *buffer, size_t *lenp, loff_t *ppos)
1519{
1520 int *valp = table->data;
1521 int val[2];
1522 int rc;
1523
1524 /* backup the value first */
1525 memcpy(val, valp, sizeof(val));
1526
Alexey Dobriyan8d65af72009-09-23 15:57:19 -07001527 rc = proc_dointvec(table, write, buffer, lenp, ppos);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001528 if (write && (valp[0] < 0 || valp[1] < 0 || valp[0] >= valp[1])) {
1529 /* Restore the correct value */
1530 memcpy(valp, val, sizeof(val));
1531 }
1532 return rc;
1533}
1534
Hans Schillstromb880c1f2010-11-19 14:25:14 +01001535static int
1536proc_do_sync_mode(ctl_table *table, int write,
1537 void __user *buffer, size_t *lenp, loff_t *ppos)
1538{
1539 int *valp = table->data;
1540 int val = *valp;
1541 int rc;
1542
1543 rc = proc_dointvec(table, write, buffer, lenp, ppos);
1544 if (write && (*valp != val)) {
1545 if ((*valp < 0) || (*valp > 1)) {
1546 /* Restore the correct value */
1547 *valp = val;
1548 } else {
Hans Schillstromf1313152011-01-03 14:44:55 +01001549 struct net *net = current->nsproxy->net_ns;
1550 ip_vs_sync_switch_mode(net, val);
Hans Schillstromb880c1f2010-11-19 14:25:14 +01001551 }
1552 }
1553 return rc;
1554}
Linus Torvalds1da177e2005-04-16 15:20:36 -07001555
1556/*
1557 * IPVS sysctl table (under the /proc/sys/net/ipv4/vs/)
Hans Schillstroma0840e22011-01-03 14:44:58 +01001558 * Do not change order or insert new entries without
1559 * align with netns init in __ip_vs_control_init()
Linus Torvalds1da177e2005-04-16 15:20:36 -07001560 */
1561
1562static struct ctl_table vs_vars[] = {
1563 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001564 .procname = "amemthresh",
Hans Schillstroma0840e22011-01-03 14:44:58 +01001565 .maxlen = sizeof(int),
1566 .mode = 0644,
1567 .proc_handler = proc_dointvec,
1568 },
1569 {
1570 .procname = "am_droprate",
1571 .maxlen = sizeof(int),
1572 .mode = 0644,
1573 .proc_handler = proc_dointvec,
1574 },
1575 {
1576 .procname = "drop_entry",
1577 .maxlen = sizeof(int),
1578 .mode = 0644,
1579 .proc_handler = proc_do_defense_mode,
1580 },
1581 {
1582 .procname = "drop_packet",
1583 .maxlen = sizeof(int),
1584 .mode = 0644,
1585 .proc_handler = proc_do_defense_mode,
1586 },
1587#ifdef CONFIG_IP_VS_NFCT
1588 {
1589 .procname = "conntrack",
1590 .maxlen = sizeof(int),
1591 .mode = 0644,
1592 .proc_handler = &proc_dointvec,
1593 },
1594#endif
1595 {
1596 .procname = "secure_tcp",
1597 .maxlen = sizeof(int),
1598 .mode = 0644,
1599 .proc_handler = proc_do_defense_mode,
1600 },
1601 {
1602 .procname = "snat_reroute",
1603 .maxlen = sizeof(int),
1604 .mode = 0644,
1605 .proc_handler = &proc_dointvec,
1606 },
1607 {
1608 .procname = "sync_version",
1609 .maxlen = sizeof(int),
1610 .mode = 0644,
1611 .proc_handler = &proc_do_sync_mode,
1612 },
1613 {
1614 .procname = "cache_bypass",
1615 .maxlen = sizeof(int),
1616 .mode = 0644,
1617 .proc_handler = proc_dointvec,
1618 },
1619 {
1620 .procname = "expire_nodest_conn",
1621 .maxlen = sizeof(int),
1622 .mode = 0644,
1623 .proc_handler = proc_dointvec,
1624 },
1625 {
1626 .procname = "expire_quiescent_template",
1627 .maxlen = sizeof(int),
1628 .mode = 0644,
1629 .proc_handler = proc_dointvec,
1630 },
1631 {
1632 .procname = "sync_threshold",
1633 .maxlen =
1634 sizeof(((struct netns_ipvs *)0)->sysctl_sync_threshold),
1635 .mode = 0644,
1636 .proc_handler = proc_do_sync_threshold,
1637 },
1638 {
1639 .procname = "nat_icmp_send",
Linus Torvalds1da177e2005-04-16 15:20:36 -07001640 .maxlen = sizeof(int),
1641 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08001642 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001643 },
1644#ifdef CONFIG_IP_VS_DEBUG
1645 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001646 .procname = "debug_level",
1647 .data = &sysctl_ip_vs_debug_level,
1648 .maxlen = sizeof(int),
1649 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08001650 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001651 },
1652#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -07001653#if 0
1654 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001655 .procname = "timeout_established",
1656 .data = &vs_timeout_table_dos.timeout[IP_VS_S_ESTABLISHED],
1657 .maxlen = sizeof(int),
1658 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08001659 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001660 },
1661 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001662 .procname = "timeout_synsent",
1663 .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_SENT],
1664 .maxlen = sizeof(int),
1665 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08001666 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001667 },
1668 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001669 .procname = "timeout_synrecv",
1670 .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_RECV],
1671 .maxlen = sizeof(int),
1672 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08001673 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001674 },
1675 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001676 .procname = "timeout_finwait",
1677 .data = &vs_timeout_table_dos.timeout[IP_VS_S_FIN_WAIT],
1678 .maxlen = sizeof(int),
1679 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08001680 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001681 },
1682 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001683 .procname = "timeout_timewait",
1684 .data = &vs_timeout_table_dos.timeout[IP_VS_S_TIME_WAIT],
1685 .maxlen = sizeof(int),
1686 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08001687 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001688 },
1689 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001690 .procname = "timeout_close",
1691 .data = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE],
1692 .maxlen = sizeof(int),
1693 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08001694 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001695 },
1696 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001697 .procname = "timeout_closewait",
1698 .data = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE_WAIT],
1699 .maxlen = sizeof(int),
1700 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08001701 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001702 },
1703 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001704 .procname = "timeout_lastack",
1705 .data = &vs_timeout_table_dos.timeout[IP_VS_S_LAST_ACK],
1706 .maxlen = sizeof(int),
1707 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08001708 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001709 },
1710 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001711 .procname = "timeout_listen",
1712 .data = &vs_timeout_table_dos.timeout[IP_VS_S_LISTEN],
1713 .maxlen = sizeof(int),
1714 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08001715 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001716 },
1717 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001718 .procname = "timeout_synack",
1719 .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYNACK],
1720 .maxlen = sizeof(int),
1721 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08001722 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001723 },
1724 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001725 .procname = "timeout_udp",
1726 .data = &vs_timeout_table_dos.timeout[IP_VS_S_UDP],
1727 .maxlen = sizeof(int),
1728 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08001729 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001730 },
1731 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001732 .procname = "timeout_icmp",
1733 .data = &vs_timeout_table_dos.timeout[IP_VS_S_ICMP],
1734 .maxlen = sizeof(int),
1735 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08001736 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001737 },
1738#endif
Eric W. Biedermanf8572d82009-11-05 13:32:03 -08001739 { }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001740};
1741
Sven Wegener5587da52008-08-10 18:24:40 +00001742const struct ctl_path net_vs_ctl_path[] = {
Eric W. Biedermanf8572d82009-11-05 13:32:03 -08001743 { .procname = "net", },
1744 { .procname = "ipv4", },
Pavel Emelyanov90754f82008-01-12 02:33:50 -08001745 { .procname = "vs", },
1746 { }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001747};
Pavel Emelyanov90754f82008-01-12 02:33:50 -08001748EXPORT_SYMBOL_GPL(net_vs_ctl_path);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001749
Linus Torvalds1da177e2005-04-16 15:20:36 -07001750#ifdef CONFIG_PROC_FS
1751
1752struct ip_vs_iter {
Hans Schillstromfc723252011-01-03 14:44:43 +01001753 struct seq_net_private p; /* Do not move this, netns depends upon it*/
Linus Torvalds1da177e2005-04-16 15:20:36 -07001754 struct list_head *table;
1755 int bucket;
1756};
1757
1758/*
1759 * Write the contents of the VS rule table to a PROCfs file.
1760 * (It is kept just for backward compatibility)
1761 */
1762static inline const char *ip_vs_fwd_name(unsigned flags)
1763{
1764 switch (flags & IP_VS_CONN_F_FWD_MASK) {
1765 case IP_VS_CONN_F_LOCALNODE:
1766 return "Local";
1767 case IP_VS_CONN_F_TUNNEL:
1768 return "Tunnel";
1769 case IP_VS_CONN_F_DROUTE:
1770 return "Route";
1771 default:
1772 return "Masq";
1773 }
1774}
1775
1776
1777/* Get the Nth entry in the two lists */
1778static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos)
1779{
Hans Schillstromfc723252011-01-03 14:44:43 +01001780 struct net *net = seq_file_net(seq);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001781 struct ip_vs_iter *iter = seq->private;
1782 int idx;
1783 struct ip_vs_service *svc;
1784
1785 /* look in hash by protocol */
1786 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1787 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
Hans Schillstromfc723252011-01-03 14:44:43 +01001788 if (net_eq(svc->net, net) && pos-- == 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001789 iter->table = ip_vs_svc_table;
1790 iter->bucket = idx;
1791 return svc;
1792 }
1793 }
1794 }
1795
1796 /* keep looking in fwmark */
1797 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1798 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
Hans Schillstromfc723252011-01-03 14:44:43 +01001799 if (net_eq(svc->net, net) && pos-- == 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001800 iter->table = ip_vs_svc_fwm_table;
1801 iter->bucket = idx;
1802 return svc;
1803 }
1804 }
1805 }
1806
1807 return NULL;
1808}
1809
1810static void *ip_vs_info_seq_start(struct seq_file *seq, loff_t *pos)
Simon Horman563e94f2008-09-17 10:10:42 +10001811__acquires(__ip_vs_svc_lock)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001812{
1813
1814 read_lock_bh(&__ip_vs_svc_lock);
1815 return *pos ? ip_vs_info_array(seq, *pos - 1) : SEQ_START_TOKEN;
1816}
1817
1818
1819static void *ip_vs_info_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1820{
1821 struct list_head *e;
1822 struct ip_vs_iter *iter;
1823 struct ip_vs_service *svc;
1824
1825 ++*pos;
1826 if (v == SEQ_START_TOKEN)
1827 return ip_vs_info_array(seq,0);
1828
1829 svc = v;
1830 iter = seq->private;
1831
1832 if (iter->table == ip_vs_svc_table) {
1833 /* next service in table hashed by protocol */
1834 if ((e = svc->s_list.next) != &ip_vs_svc_table[iter->bucket])
1835 return list_entry(e, struct ip_vs_service, s_list);
1836
1837
1838 while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
1839 list_for_each_entry(svc,&ip_vs_svc_table[iter->bucket],
1840 s_list) {
1841 return svc;
1842 }
1843 }
1844
1845 iter->table = ip_vs_svc_fwm_table;
1846 iter->bucket = -1;
1847 goto scan_fwmark;
1848 }
1849
1850 /* next service in hashed by fwmark */
1851 if ((e = svc->f_list.next) != &ip_vs_svc_fwm_table[iter->bucket])
1852 return list_entry(e, struct ip_vs_service, f_list);
1853
1854 scan_fwmark:
1855 while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
1856 list_for_each_entry(svc, &ip_vs_svc_fwm_table[iter->bucket],
1857 f_list)
1858 return svc;
1859 }
1860
1861 return NULL;
1862}
1863
1864static void ip_vs_info_seq_stop(struct seq_file *seq, void *v)
Simon Horman563e94f2008-09-17 10:10:42 +10001865__releases(__ip_vs_svc_lock)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001866{
1867 read_unlock_bh(&__ip_vs_svc_lock);
1868}
1869
1870
1871static int ip_vs_info_seq_show(struct seq_file *seq, void *v)
1872{
1873 if (v == SEQ_START_TOKEN) {
1874 seq_printf(seq,
1875 "IP Virtual Server version %d.%d.%d (size=%d)\n",
Catalin(ux) M. BOIE6f7edb42010-01-05 05:50:24 +01001876 NVERSION(IP_VS_VERSION_CODE), ip_vs_conn_tab_size);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001877 seq_puts(seq,
1878 "Prot LocalAddress:Port Scheduler Flags\n");
1879 seq_puts(seq,
1880 " -> RemoteAddress:Port Forward Weight ActiveConn InActConn\n");
1881 } else {
1882 const struct ip_vs_service *svc = v;
1883 const struct ip_vs_iter *iter = seq->private;
1884 const struct ip_vs_dest *dest;
1885
Vince Busam667a5f12008-09-02 15:55:49 +02001886 if (iter->table == ip_vs_svc_table) {
1887#ifdef CONFIG_IP_VS_IPV6
1888 if (svc->af == AF_INET6)
Harvey Harrison5b095d9892008-10-29 12:52:50 -07001889 seq_printf(seq, "%s [%pI6]:%04X %s ",
Vince Busam667a5f12008-09-02 15:55:49 +02001890 ip_vs_proto_name(svc->protocol),
Harvey Harrison38ff4fa2008-10-28 16:08:13 -07001891 &svc->addr.in6,
Vince Busam667a5f12008-09-02 15:55:49 +02001892 ntohs(svc->port),
1893 svc->scheduler->name);
1894 else
1895#endif
Nick Chalk26ec0372010-06-22 08:07:01 +02001896 seq_printf(seq, "%s %08X:%04X %s %s ",
Vince Busam667a5f12008-09-02 15:55:49 +02001897 ip_vs_proto_name(svc->protocol),
1898 ntohl(svc->addr.ip),
1899 ntohs(svc->port),
Nick Chalk26ec0372010-06-22 08:07:01 +02001900 svc->scheduler->name,
1901 (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":"");
Vince Busam667a5f12008-09-02 15:55:49 +02001902 } else {
Nick Chalk26ec0372010-06-22 08:07:01 +02001903 seq_printf(seq, "FWM %08X %s %s",
1904 svc->fwmark, svc->scheduler->name,
1905 (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":"");
Vince Busam667a5f12008-09-02 15:55:49 +02001906 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001907
1908 if (svc->flags & IP_VS_SVC_F_PERSISTENT)
1909 seq_printf(seq, "persistent %d %08X\n",
1910 svc->timeout,
1911 ntohl(svc->netmask));
1912 else
1913 seq_putc(seq, '\n');
1914
1915 list_for_each_entry(dest, &svc->destinations, n_list) {
Vince Busam667a5f12008-09-02 15:55:49 +02001916#ifdef CONFIG_IP_VS_IPV6
1917 if (dest->af == AF_INET6)
1918 seq_printf(seq,
Harvey Harrison5b095d9892008-10-29 12:52:50 -07001919 " -> [%pI6]:%04X"
Vince Busam667a5f12008-09-02 15:55:49 +02001920 " %-7s %-6d %-10d %-10d\n",
Harvey Harrison38ff4fa2008-10-28 16:08:13 -07001921 &dest->addr.in6,
Vince Busam667a5f12008-09-02 15:55:49 +02001922 ntohs(dest->port),
1923 ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
1924 atomic_read(&dest->weight),
1925 atomic_read(&dest->activeconns),
1926 atomic_read(&dest->inactconns));
1927 else
1928#endif
1929 seq_printf(seq,
1930 " -> %08X:%04X "
1931 "%-7s %-6d %-10d %-10d\n",
1932 ntohl(dest->addr.ip),
1933 ntohs(dest->port),
1934 ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
1935 atomic_read(&dest->weight),
1936 atomic_read(&dest->activeconns),
1937 atomic_read(&dest->inactconns));
1938
Linus Torvalds1da177e2005-04-16 15:20:36 -07001939 }
1940 }
1941 return 0;
1942}
1943
Philippe De Muyter56b3d972007-07-10 23:07:31 -07001944static const struct seq_operations ip_vs_info_seq_ops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001945 .start = ip_vs_info_seq_start,
1946 .next = ip_vs_info_seq_next,
1947 .stop = ip_vs_info_seq_stop,
1948 .show = ip_vs_info_seq_show,
1949};
1950
1951static int ip_vs_info_open(struct inode *inode, struct file *file)
1952{
Hans Schillstromfc723252011-01-03 14:44:43 +01001953 return seq_open_net(inode, file, &ip_vs_info_seq_ops,
Pavel Emelyanovcf7732e2007-10-10 02:29:29 -07001954 sizeof(struct ip_vs_iter));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001955}
1956
Arjan van de Ven9a321442007-02-12 00:55:35 -08001957static const struct file_operations ip_vs_info_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001958 .owner = THIS_MODULE,
1959 .open = ip_vs_info_open,
1960 .read = seq_read,
1961 .llseek = seq_lseek,
1962 .release = seq_release_private,
1963};
1964
1965#endif
1966
Linus Torvalds1da177e2005-04-16 15:20:36 -07001967#ifdef CONFIG_PROC_FS
1968static int ip_vs_stats_show(struct seq_file *seq, void *v)
1969{
Hans Schillstromb17fc992011-01-03 14:44:56 +01001970 struct net *net = seq_file_single_net(seq);
1971 struct ip_vs_stats *tot_stats = net_ipvs(net)->tot_stats;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001972
1973/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
1974 seq_puts(seq,
1975 " Total Incoming Outgoing Incoming Outgoing\n");
1976 seq_printf(seq,
1977 " Conns Packets Packets Bytes Bytes\n");
1978
Hans Schillstromb17fc992011-01-03 14:44:56 +01001979 spin_lock_bh(&tot_stats->lock);
1980 seq_printf(seq, "%8X %8X %8X %16LX %16LX\n\n", tot_stats->ustats.conns,
1981 tot_stats->ustats.inpkts, tot_stats->ustats.outpkts,
1982 (unsigned long long) tot_stats->ustats.inbytes,
1983 (unsigned long long) tot_stats->ustats.outbytes);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001984
1985/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
1986 seq_puts(seq,
1987 " Conns/s Pkts/s Pkts/s Bytes/s Bytes/s\n");
1988 seq_printf(seq,"%8X %8X %8X %16X %16X\n",
Hans Schillstromb17fc992011-01-03 14:44:56 +01001989 tot_stats->ustats.cps,
1990 tot_stats->ustats.inpps,
1991 tot_stats->ustats.outpps,
1992 tot_stats->ustats.inbps,
1993 tot_stats->ustats.outbps);
1994 spin_unlock_bh(&tot_stats->lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001995
1996 return 0;
1997}
1998
1999static int ip_vs_stats_seq_open(struct inode *inode, struct file *file)
2000{
Hans Schillstromfc723252011-01-03 14:44:43 +01002001 return single_open_net(inode, file, ip_vs_stats_show);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002002}
2003
Arjan van de Ven9a321442007-02-12 00:55:35 -08002004static const struct file_operations ip_vs_stats_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002005 .owner = THIS_MODULE,
2006 .open = ip_vs_stats_seq_open,
2007 .read = seq_read,
2008 .llseek = seq_lseek,
2009 .release = single_release,
2010};
2011
Hans Schillstromb17fc992011-01-03 14:44:56 +01002012static int ip_vs_stats_percpu_show(struct seq_file *seq, void *v)
2013{
2014 struct net *net = seq_file_single_net(seq);
2015 struct ip_vs_stats *tot_stats = net_ipvs(net)->tot_stats;
2016 int i;
2017
2018/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
2019 seq_puts(seq,
2020 " Total Incoming Outgoing Incoming Outgoing\n");
2021 seq_printf(seq,
2022 "CPU Conns Packets Packets Bytes Bytes\n");
2023
2024 for_each_possible_cpu(i) {
2025 struct ip_vs_cpu_stats *u = per_cpu_ptr(net->ipvs->cpustats, i);
2026 seq_printf(seq, "%3X %8X %8X %8X %16LX %16LX\n",
2027 i, u->ustats.conns, u->ustats.inpkts,
2028 u->ustats.outpkts, (__u64)u->ustats.inbytes,
2029 (__u64)u->ustats.outbytes);
2030 }
2031
2032 spin_lock_bh(&tot_stats->lock);
2033 seq_printf(seq, " ~ %8X %8X %8X %16LX %16LX\n\n",
2034 tot_stats->ustats.conns, tot_stats->ustats.inpkts,
2035 tot_stats->ustats.outpkts,
2036 (unsigned long long) tot_stats->ustats.inbytes,
2037 (unsigned long long) tot_stats->ustats.outbytes);
2038
2039/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
2040 seq_puts(seq,
2041 " Conns/s Pkts/s Pkts/s Bytes/s Bytes/s\n");
2042 seq_printf(seq, " %8X %8X %8X %16X %16X\n",
2043 tot_stats->ustats.cps,
2044 tot_stats->ustats.inpps,
2045 tot_stats->ustats.outpps,
2046 tot_stats->ustats.inbps,
2047 tot_stats->ustats.outbps);
2048 spin_unlock_bh(&tot_stats->lock);
2049
2050 return 0;
2051}
2052
2053static int ip_vs_stats_percpu_seq_open(struct inode *inode, struct file *file)
2054{
2055 return single_open_net(inode, file, ip_vs_stats_percpu_show);
2056}
2057
2058static const struct file_operations ip_vs_stats_percpu_fops = {
2059 .owner = THIS_MODULE,
2060 .open = ip_vs_stats_percpu_seq_open,
2061 .read = seq_read,
2062 .llseek = seq_lseek,
2063 .release = single_release,
2064};
Linus Torvalds1da177e2005-04-16 15:20:36 -07002065#endif
2066
2067/*
2068 * Set timeout values for tcp tcpfin udp in the timeout_table.
2069 */
Hans Schillstrom93304192011-01-03 14:44:51 +01002070static int ip_vs_set_timeout(struct net *net, struct ip_vs_timeout_user *u)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002071{
Hans Schillstrom93304192011-01-03 14:44:51 +01002072 struct ip_vs_proto_data *pd;
2073
Linus Torvalds1da177e2005-04-16 15:20:36 -07002074 IP_VS_DBG(2, "Setting timeout tcp:%d tcpfin:%d udp:%d\n",
2075 u->tcp_timeout,
2076 u->tcp_fin_timeout,
2077 u->udp_timeout);
2078
2079#ifdef CONFIG_IP_VS_PROTO_TCP
2080 if (u->tcp_timeout) {
Hans Schillstrom93304192011-01-03 14:44:51 +01002081 pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
2082 pd->timeout_table[IP_VS_TCP_S_ESTABLISHED]
Linus Torvalds1da177e2005-04-16 15:20:36 -07002083 = u->tcp_timeout * HZ;
2084 }
2085
2086 if (u->tcp_fin_timeout) {
Hans Schillstrom93304192011-01-03 14:44:51 +01002087 pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
2088 pd->timeout_table[IP_VS_TCP_S_FIN_WAIT]
Linus Torvalds1da177e2005-04-16 15:20:36 -07002089 = u->tcp_fin_timeout * HZ;
2090 }
2091#endif
2092
2093#ifdef CONFIG_IP_VS_PROTO_UDP
2094 if (u->udp_timeout) {
Hans Schillstrom93304192011-01-03 14:44:51 +01002095 pd = ip_vs_proto_data_get(net, IPPROTO_UDP);
2096 pd->timeout_table[IP_VS_UDP_S_NORMAL]
Linus Torvalds1da177e2005-04-16 15:20:36 -07002097 = u->udp_timeout * HZ;
2098 }
2099#endif
2100 return 0;
2101}
2102
2103
2104#define SET_CMDID(cmd) (cmd - IP_VS_BASE_CTL)
2105#define SERVICE_ARG_LEN (sizeof(struct ip_vs_service_user))
2106#define SVCDEST_ARG_LEN (sizeof(struct ip_vs_service_user) + \
2107 sizeof(struct ip_vs_dest_user))
2108#define TIMEOUT_ARG_LEN (sizeof(struct ip_vs_timeout_user))
2109#define DAEMON_ARG_LEN (sizeof(struct ip_vs_daemon_user))
2110#define MAX_ARG_LEN SVCDEST_ARG_LEN
2111
Arjan van de Ven9b5b5cf2005-11-29 16:21:38 -08002112static const unsigned char set_arglen[SET_CMDID(IP_VS_SO_SET_MAX)+1] = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002113 [SET_CMDID(IP_VS_SO_SET_ADD)] = SERVICE_ARG_LEN,
2114 [SET_CMDID(IP_VS_SO_SET_EDIT)] = SERVICE_ARG_LEN,
2115 [SET_CMDID(IP_VS_SO_SET_DEL)] = SERVICE_ARG_LEN,
2116 [SET_CMDID(IP_VS_SO_SET_FLUSH)] = 0,
2117 [SET_CMDID(IP_VS_SO_SET_ADDDEST)] = SVCDEST_ARG_LEN,
2118 [SET_CMDID(IP_VS_SO_SET_DELDEST)] = SVCDEST_ARG_LEN,
2119 [SET_CMDID(IP_VS_SO_SET_EDITDEST)] = SVCDEST_ARG_LEN,
2120 [SET_CMDID(IP_VS_SO_SET_TIMEOUT)] = TIMEOUT_ARG_LEN,
2121 [SET_CMDID(IP_VS_SO_SET_STARTDAEMON)] = DAEMON_ARG_LEN,
2122 [SET_CMDID(IP_VS_SO_SET_STOPDAEMON)] = DAEMON_ARG_LEN,
2123 [SET_CMDID(IP_VS_SO_SET_ZERO)] = SERVICE_ARG_LEN,
2124};
2125
Julius Volzc860c6b2008-09-02 15:55:36 +02002126static void ip_vs_copy_usvc_compat(struct ip_vs_service_user_kern *usvc,
2127 struct ip_vs_service_user *usvc_compat)
2128{
Simon Horman0d1e71b2010-08-22 21:37:54 +09002129 memset(usvc, 0, sizeof(*usvc));
2130
Julius Volzc860c6b2008-09-02 15:55:36 +02002131 usvc->af = AF_INET;
2132 usvc->protocol = usvc_compat->protocol;
2133 usvc->addr.ip = usvc_compat->addr;
2134 usvc->port = usvc_compat->port;
2135 usvc->fwmark = usvc_compat->fwmark;
2136
2137 /* Deep copy of sched_name is not needed here */
2138 usvc->sched_name = usvc_compat->sched_name;
2139
2140 usvc->flags = usvc_compat->flags;
2141 usvc->timeout = usvc_compat->timeout;
2142 usvc->netmask = usvc_compat->netmask;
2143}
2144
2145static void ip_vs_copy_udest_compat(struct ip_vs_dest_user_kern *udest,
2146 struct ip_vs_dest_user *udest_compat)
2147{
Simon Horman0d1e71b2010-08-22 21:37:54 +09002148 memset(udest, 0, sizeof(*udest));
2149
Julius Volzc860c6b2008-09-02 15:55:36 +02002150 udest->addr.ip = udest_compat->addr;
2151 udest->port = udest_compat->port;
2152 udest->conn_flags = udest_compat->conn_flags;
2153 udest->weight = udest_compat->weight;
2154 udest->u_threshold = udest_compat->u_threshold;
2155 udest->l_threshold = udest_compat->l_threshold;
2156}
2157
Linus Torvalds1da177e2005-04-16 15:20:36 -07002158static int
2159do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
2160{
Hans Schillstromfc723252011-01-03 14:44:43 +01002161 struct net *net = sock_net(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002162 int ret;
2163 unsigned char arg[MAX_ARG_LEN];
Julius Volzc860c6b2008-09-02 15:55:36 +02002164 struct ip_vs_service_user *usvc_compat;
2165 struct ip_vs_service_user_kern usvc;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002166 struct ip_vs_service *svc;
Julius Volzc860c6b2008-09-02 15:55:36 +02002167 struct ip_vs_dest_user *udest_compat;
2168 struct ip_vs_dest_user_kern udest;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002169
2170 if (!capable(CAP_NET_ADMIN))
2171 return -EPERM;
2172
Arjan van de Ven04bcef22010-01-04 16:37:12 +01002173 if (cmd < IP_VS_BASE_CTL || cmd > IP_VS_SO_SET_MAX)
2174 return -EINVAL;
2175 if (len < 0 || len > MAX_ARG_LEN)
2176 return -EINVAL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002177 if (len != set_arglen[SET_CMDID(cmd)]) {
Hannes Eder1e3e2382009-08-02 11:05:41 +00002178 pr_err("set_ctl: len %u != %u\n",
2179 len, set_arglen[SET_CMDID(cmd)]);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002180 return -EINVAL;
2181 }
2182
2183 if (copy_from_user(arg, user, len) != 0)
2184 return -EFAULT;
2185
2186 /* increase the module use count */
2187 ip_vs_use_count_inc();
2188
Ingo Molnar14cc3e22006-03-26 01:37:14 -08002189 if (mutex_lock_interruptible(&__ip_vs_mutex)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002190 ret = -ERESTARTSYS;
2191 goto out_dec;
2192 }
2193
2194 if (cmd == IP_VS_SO_SET_FLUSH) {
2195 /* Flush the virtual service */
Hans Schillstromfc723252011-01-03 14:44:43 +01002196 ret = ip_vs_flush(net);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002197 goto out_unlock;
2198 } else if (cmd == IP_VS_SO_SET_TIMEOUT) {
2199 /* Set timeout values for (tcp tcpfin udp) */
Hans Schillstrom93304192011-01-03 14:44:51 +01002200 ret = ip_vs_set_timeout(net, (struct ip_vs_timeout_user *)arg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002201 goto out_unlock;
2202 } else if (cmd == IP_VS_SO_SET_STARTDAEMON) {
2203 struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
Hans Schillstromf1313152011-01-03 14:44:55 +01002204 ret = start_sync_thread(net, dm->state, dm->mcast_ifn,
2205 dm->syncid);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002206 goto out_unlock;
2207 } else if (cmd == IP_VS_SO_SET_STOPDAEMON) {
2208 struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
Hans Schillstromf1313152011-01-03 14:44:55 +01002209 ret = stop_sync_thread(net, dm->state);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002210 goto out_unlock;
2211 }
2212
Julius Volzc860c6b2008-09-02 15:55:36 +02002213 usvc_compat = (struct ip_vs_service_user *)arg;
2214 udest_compat = (struct ip_vs_dest_user *)(usvc_compat + 1);
2215
2216 /* We only use the new structs internally, so copy userspace compat
2217 * structs to extended internal versions */
2218 ip_vs_copy_usvc_compat(&usvc, usvc_compat);
2219 ip_vs_copy_udest_compat(&udest, udest_compat);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002220
2221 if (cmd == IP_VS_SO_SET_ZERO) {
2222 /* if no service address is set, zero counters in all */
Julius Volzc860c6b2008-09-02 15:55:36 +02002223 if (!usvc.fwmark && !usvc.addr.ip && !usvc.port) {
Hans Schillstromfc723252011-01-03 14:44:43 +01002224 ret = ip_vs_zero_all(net);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002225 goto out_unlock;
2226 }
2227 }
2228
Venkata Mohan Reddy2906f662010-02-18 12:31:05 +01002229 /* Check for valid protocol: TCP or UDP or SCTP, even for fwmark!=0 */
2230 if (usvc.protocol != IPPROTO_TCP && usvc.protocol != IPPROTO_UDP &&
2231 usvc.protocol != IPPROTO_SCTP) {
Hannes Eder1e3e2382009-08-02 11:05:41 +00002232 pr_err("set_ctl: invalid protocol: %d %pI4:%d %s\n",
2233 usvc.protocol, &usvc.addr.ip,
2234 ntohs(usvc.port), usvc.sched_name);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002235 ret = -EFAULT;
2236 goto out_unlock;
2237 }
2238
2239 /* Lookup the exact service by <protocol, addr, port> or fwmark */
Julius Volzc860c6b2008-09-02 15:55:36 +02002240 if (usvc.fwmark == 0)
Hans Schillstromfc723252011-01-03 14:44:43 +01002241 svc = __ip_vs_service_find(net, usvc.af, usvc.protocol,
Julian Anastasov26c15cf2010-09-21 18:12:30 +02002242 &usvc.addr, usvc.port);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002243 else
Hans Schillstromfc723252011-01-03 14:44:43 +01002244 svc = __ip_vs_svc_fwm_find(net, usvc.af, usvc.fwmark);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002245
2246 if (cmd != IP_VS_SO_SET_ADD
Julius Volzc860c6b2008-09-02 15:55:36 +02002247 && (svc == NULL || svc->protocol != usvc.protocol)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002248 ret = -ESRCH;
Julian Anastasov26c15cf2010-09-21 18:12:30 +02002249 goto out_unlock;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002250 }
2251
2252 switch (cmd) {
2253 case IP_VS_SO_SET_ADD:
2254 if (svc != NULL)
2255 ret = -EEXIST;
2256 else
Hans Schillstromfc723252011-01-03 14:44:43 +01002257 ret = ip_vs_add_service(net, &usvc, &svc);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002258 break;
2259 case IP_VS_SO_SET_EDIT:
Julius Volzc860c6b2008-09-02 15:55:36 +02002260 ret = ip_vs_edit_service(svc, &usvc);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002261 break;
2262 case IP_VS_SO_SET_DEL:
2263 ret = ip_vs_del_service(svc);
2264 if (!ret)
2265 goto out_unlock;
2266 break;
2267 case IP_VS_SO_SET_ZERO:
2268 ret = ip_vs_zero_service(svc);
2269 break;
2270 case IP_VS_SO_SET_ADDDEST:
Julius Volzc860c6b2008-09-02 15:55:36 +02002271 ret = ip_vs_add_dest(svc, &udest);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002272 break;
2273 case IP_VS_SO_SET_EDITDEST:
Julius Volzc860c6b2008-09-02 15:55:36 +02002274 ret = ip_vs_edit_dest(svc, &udest);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002275 break;
2276 case IP_VS_SO_SET_DELDEST:
Julius Volzc860c6b2008-09-02 15:55:36 +02002277 ret = ip_vs_del_dest(svc, &udest);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002278 break;
2279 default:
2280 ret = -EINVAL;
2281 }
2282
Linus Torvalds1da177e2005-04-16 15:20:36 -07002283 out_unlock:
Ingo Molnar14cc3e22006-03-26 01:37:14 -08002284 mutex_unlock(&__ip_vs_mutex);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002285 out_dec:
2286 /* decrease the module use count */
2287 ip_vs_use_count_dec();
2288
2289 return ret;
2290}
2291
2292
2293static void
2294ip_vs_copy_stats(struct ip_vs_stats_user *dst, struct ip_vs_stats *src)
2295{
2296 spin_lock_bh(&src->lock);
Sven Wegenere9c0ce22008-09-08 13:39:04 +02002297 memcpy(dst, &src->ustats, sizeof(*dst));
Linus Torvalds1da177e2005-04-16 15:20:36 -07002298 spin_unlock_bh(&src->lock);
2299}
2300
2301static void
2302ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src)
2303{
2304 dst->protocol = src->protocol;
Julius Volze7ade462008-09-02 15:55:33 +02002305 dst->addr = src->addr.ip;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002306 dst->port = src->port;
2307 dst->fwmark = src->fwmark;
pageexec4da62fc2005-06-26 16:00:19 -07002308 strlcpy(dst->sched_name, src->scheduler->name, sizeof(dst->sched_name));
Linus Torvalds1da177e2005-04-16 15:20:36 -07002309 dst->flags = src->flags;
2310 dst->timeout = src->timeout / HZ;
2311 dst->netmask = src->netmask;
2312 dst->num_dests = src->num_dests;
2313 ip_vs_copy_stats(&dst->stats, &src->stats);
2314}
2315
2316static inline int
Hans Schillstromfc723252011-01-03 14:44:43 +01002317__ip_vs_get_service_entries(struct net *net,
2318 const struct ip_vs_get_services *get,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002319 struct ip_vs_get_services __user *uptr)
2320{
2321 int idx, count=0;
2322 struct ip_vs_service *svc;
2323 struct ip_vs_service_entry entry;
2324 int ret = 0;
2325
2326 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2327 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
Julius Volzf94fd042008-09-02 15:55:55 +02002328 /* Only expose IPv4 entries to old interface */
Hans Schillstromfc723252011-01-03 14:44:43 +01002329 if (svc->af != AF_INET || !net_eq(svc->net, net))
Julius Volzf94fd042008-09-02 15:55:55 +02002330 continue;
2331
Linus Torvalds1da177e2005-04-16 15:20:36 -07002332 if (count >= get->num_services)
2333 goto out;
pageexec4da62fc2005-06-26 16:00:19 -07002334 memset(&entry, 0, sizeof(entry));
Linus Torvalds1da177e2005-04-16 15:20:36 -07002335 ip_vs_copy_service(&entry, svc);
2336 if (copy_to_user(&uptr->entrytable[count],
2337 &entry, sizeof(entry))) {
2338 ret = -EFAULT;
2339 goto out;
2340 }
2341 count++;
2342 }
2343 }
2344
2345 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2346 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
Julius Volzf94fd042008-09-02 15:55:55 +02002347 /* Only expose IPv4 entries to old interface */
Hans Schillstromfc723252011-01-03 14:44:43 +01002348 if (svc->af != AF_INET || !net_eq(svc->net, net))
Julius Volzf94fd042008-09-02 15:55:55 +02002349 continue;
2350
Linus Torvalds1da177e2005-04-16 15:20:36 -07002351 if (count >= get->num_services)
2352 goto out;
pageexec4da62fc2005-06-26 16:00:19 -07002353 memset(&entry, 0, sizeof(entry));
Linus Torvalds1da177e2005-04-16 15:20:36 -07002354 ip_vs_copy_service(&entry, svc);
2355 if (copy_to_user(&uptr->entrytable[count],
2356 &entry, sizeof(entry))) {
2357 ret = -EFAULT;
2358 goto out;
2359 }
2360 count++;
2361 }
2362 }
2363 out:
2364 return ret;
2365}
2366
2367static inline int
Hans Schillstromfc723252011-01-03 14:44:43 +01002368__ip_vs_get_dest_entries(struct net *net, const struct ip_vs_get_dests *get,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002369 struct ip_vs_get_dests __user *uptr)
2370{
2371 struct ip_vs_service *svc;
Julius Volzb18610d2008-09-02 15:55:37 +02002372 union nf_inet_addr addr = { .ip = get->addr };
Linus Torvalds1da177e2005-04-16 15:20:36 -07002373 int ret = 0;
2374
2375 if (get->fwmark)
Hans Schillstromfc723252011-01-03 14:44:43 +01002376 svc = __ip_vs_svc_fwm_find(net, AF_INET, get->fwmark);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002377 else
Hans Schillstromfc723252011-01-03 14:44:43 +01002378 svc = __ip_vs_service_find(net, AF_INET, get->protocol, &addr,
Julian Anastasov26c15cf2010-09-21 18:12:30 +02002379 get->port);
Julius Volzb18610d2008-09-02 15:55:37 +02002380
Linus Torvalds1da177e2005-04-16 15:20:36 -07002381 if (svc) {
2382 int count = 0;
2383 struct ip_vs_dest *dest;
2384 struct ip_vs_dest_entry entry;
2385
2386 list_for_each_entry(dest, &svc->destinations, n_list) {
2387 if (count >= get->num_dests)
2388 break;
2389
Julius Volze7ade462008-09-02 15:55:33 +02002390 entry.addr = dest->addr.ip;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002391 entry.port = dest->port;
2392 entry.conn_flags = atomic_read(&dest->conn_flags);
2393 entry.weight = atomic_read(&dest->weight);
2394 entry.u_threshold = dest->u_threshold;
2395 entry.l_threshold = dest->l_threshold;
2396 entry.activeconns = atomic_read(&dest->activeconns);
2397 entry.inactconns = atomic_read(&dest->inactconns);
2398 entry.persistconns = atomic_read(&dest->persistconns);
2399 ip_vs_copy_stats(&entry.stats, &dest->stats);
2400 if (copy_to_user(&uptr->entrytable[count],
2401 &entry, sizeof(entry))) {
2402 ret = -EFAULT;
2403 break;
2404 }
2405 count++;
2406 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002407 } else
2408 ret = -ESRCH;
2409 return ret;
2410}
2411
2412static inline void
Hans Schillstrom93304192011-01-03 14:44:51 +01002413__ip_vs_get_timeouts(struct net *net, struct ip_vs_timeout_user *u)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002414{
Hans Schillstrom93304192011-01-03 14:44:51 +01002415 struct ip_vs_proto_data *pd;
2416
Linus Torvalds1da177e2005-04-16 15:20:36 -07002417#ifdef CONFIG_IP_VS_PROTO_TCP
Hans Schillstrom93304192011-01-03 14:44:51 +01002418 pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
2419 u->tcp_timeout = pd->timeout_table[IP_VS_TCP_S_ESTABLISHED] / HZ;
2420 u->tcp_fin_timeout = pd->timeout_table[IP_VS_TCP_S_FIN_WAIT] / HZ;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002421#endif
2422#ifdef CONFIG_IP_VS_PROTO_UDP
Hans Schillstrom93304192011-01-03 14:44:51 +01002423 pd = ip_vs_proto_data_get(net, IPPROTO_UDP);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002424 u->udp_timeout =
Hans Schillstrom93304192011-01-03 14:44:51 +01002425 pd->timeout_table[IP_VS_UDP_S_NORMAL] / HZ;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002426#endif
2427}
2428
2429
2430#define GET_CMDID(cmd) (cmd - IP_VS_BASE_CTL)
2431#define GET_INFO_ARG_LEN (sizeof(struct ip_vs_getinfo))
2432#define GET_SERVICES_ARG_LEN (sizeof(struct ip_vs_get_services))
2433#define GET_SERVICE_ARG_LEN (sizeof(struct ip_vs_service_entry))
2434#define GET_DESTS_ARG_LEN (sizeof(struct ip_vs_get_dests))
2435#define GET_TIMEOUT_ARG_LEN (sizeof(struct ip_vs_timeout_user))
2436#define GET_DAEMON_ARG_LEN (sizeof(struct ip_vs_daemon_user) * 2)
2437
Arjan van de Ven9b5b5cf2005-11-29 16:21:38 -08002438static const unsigned char get_arglen[GET_CMDID(IP_VS_SO_GET_MAX)+1] = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002439 [GET_CMDID(IP_VS_SO_GET_VERSION)] = 64,
2440 [GET_CMDID(IP_VS_SO_GET_INFO)] = GET_INFO_ARG_LEN,
2441 [GET_CMDID(IP_VS_SO_GET_SERVICES)] = GET_SERVICES_ARG_LEN,
2442 [GET_CMDID(IP_VS_SO_GET_SERVICE)] = GET_SERVICE_ARG_LEN,
2443 [GET_CMDID(IP_VS_SO_GET_DESTS)] = GET_DESTS_ARG_LEN,
2444 [GET_CMDID(IP_VS_SO_GET_TIMEOUT)] = GET_TIMEOUT_ARG_LEN,
2445 [GET_CMDID(IP_VS_SO_GET_DAEMON)] = GET_DAEMON_ARG_LEN,
2446};
2447
2448static int
2449do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
2450{
2451 unsigned char arg[128];
2452 int ret = 0;
Arjan van de Ven04bcef22010-01-04 16:37:12 +01002453 unsigned int copylen;
Hans Schillstromfc723252011-01-03 14:44:43 +01002454 struct net *net = sock_net(sk);
Hans Schillstromf1313152011-01-03 14:44:55 +01002455 struct netns_ipvs *ipvs = net_ipvs(net);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002456
Hans Schillstromfc723252011-01-03 14:44:43 +01002457 BUG_ON(!net);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002458 if (!capable(CAP_NET_ADMIN))
2459 return -EPERM;
2460
Arjan van de Ven04bcef22010-01-04 16:37:12 +01002461 if (cmd < IP_VS_BASE_CTL || cmd > IP_VS_SO_GET_MAX)
2462 return -EINVAL;
2463
Linus Torvalds1da177e2005-04-16 15:20:36 -07002464 if (*len < get_arglen[GET_CMDID(cmd)]) {
Hannes Eder1e3e2382009-08-02 11:05:41 +00002465 pr_err("get_ctl: len %u < %u\n",
2466 *len, get_arglen[GET_CMDID(cmd)]);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002467 return -EINVAL;
2468 }
2469
Arjan van de Ven04bcef22010-01-04 16:37:12 +01002470 copylen = get_arglen[GET_CMDID(cmd)];
2471 if (copylen > 128)
2472 return -EINVAL;
2473
2474 if (copy_from_user(arg, user, copylen) != 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002475 return -EFAULT;
2476
Ingo Molnar14cc3e22006-03-26 01:37:14 -08002477 if (mutex_lock_interruptible(&__ip_vs_mutex))
Linus Torvalds1da177e2005-04-16 15:20:36 -07002478 return -ERESTARTSYS;
2479
2480 switch (cmd) {
2481 case IP_VS_SO_GET_VERSION:
2482 {
2483 char buf[64];
2484
2485 sprintf(buf, "IP Virtual Server version %d.%d.%d (size=%d)",
Catalin(ux) M. BOIE6f7edb42010-01-05 05:50:24 +01002486 NVERSION(IP_VS_VERSION_CODE), ip_vs_conn_tab_size);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002487 if (copy_to_user(user, buf, strlen(buf)+1) != 0) {
2488 ret = -EFAULT;
2489 goto out;
2490 }
2491 *len = strlen(buf)+1;
2492 }
2493 break;
2494
2495 case IP_VS_SO_GET_INFO:
2496 {
2497 struct ip_vs_getinfo info;
2498 info.version = IP_VS_VERSION_CODE;
Catalin(ux) M. BOIE6f7edb42010-01-05 05:50:24 +01002499 info.size = ip_vs_conn_tab_size;
Hans Schillstroma0840e22011-01-03 14:44:58 +01002500 info.num_services = ipvs->num_services;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002501 if (copy_to_user(user, &info, sizeof(info)) != 0)
2502 ret = -EFAULT;
2503 }
2504 break;
2505
2506 case IP_VS_SO_GET_SERVICES:
2507 {
2508 struct ip_vs_get_services *get;
2509 int size;
2510
2511 get = (struct ip_vs_get_services *)arg;
2512 size = sizeof(*get) +
2513 sizeof(struct ip_vs_service_entry) * get->num_services;
2514 if (*len != size) {
Hannes Eder1e3e2382009-08-02 11:05:41 +00002515 pr_err("length: %u != %u\n", *len, size);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002516 ret = -EINVAL;
2517 goto out;
2518 }
Hans Schillstromfc723252011-01-03 14:44:43 +01002519 ret = __ip_vs_get_service_entries(net, get, user);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002520 }
2521 break;
2522
2523 case IP_VS_SO_GET_SERVICE:
2524 {
2525 struct ip_vs_service_entry *entry;
2526 struct ip_vs_service *svc;
Julius Volzb18610d2008-09-02 15:55:37 +02002527 union nf_inet_addr addr;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002528
2529 entry = (struct ip_vs_service_entry *)arg;
Julius Volzb18610d2008-09-02 15:55:37 +02002530 addr.ip = entry->addr;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002531 if (entry->fwmark)
Hans Schillstromfc723252011-01-03 14:44:43 +01002532 svc = __ip_vs_svc_fwm_find(net, AF_INET, entry->fwmark);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002533 else
Hans Schillstromfc723252011-01-03 14:44:43 +01002534 svc = __ip_vs_service_find(net, AF_INET,
2535 entry->protocol, &addr,
2536 entry->port);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002537 if (svc) {
2538 ip_vs_copy_service(entry, svc);
2539 if (copy_to_user(user, entry, sizeof(*entry)) != 0)
2540 ret = -EFAULT;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002541 } else
2542 ret = -ESRCH;
2543 }
2544 break;
2545
2546 case IP_VS_SO_GET_DESTS:
2547 {
2548 struct ip_vs_get_dests *get;
2549 int size;
2550
2551 get = (struct ip_vs_get_dests *)arg;
2552 size = sizeof(*get) +
2553 sizeof(struct ip_vs_dest_entry) * get->num_dests;
2554 if (*len != size) {
Hannes Eder1e3e2382009-08-02 11:05:41 +00002555 pr_err("length: %u != %u\n", *len, size);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002556 ret = -EINVAL;
2557 goto out;
2558 }
Hans Schillstromfc723252011-01-03 14:44:43 +01002559 ret = __ip_vs_get_dest_entries(net, get, user);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002560 }
2561 break;
2562
2563 case IP_VS_SO_GET_TIMEOUT:
2564 {
2565 struct ip_vs_timeout_user t;
2566
Hans Schillstrom93304192011-01-03 14:44:51 +01002567 __ip_vs_get_timeouts(net, &t);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002568 if (copy_to_user(user, &t, sizeof(t)) != 0)
2569 ret = -EFAULT;
2570 }
2571 break;
2572
2573 case IP_VS_SO_GET_DAEMON:
2574 {
2575 struct ip_vs_daemon_user d[2];
2576
2577 memset(&d, 0, sizeof(d));
Hans Schillstromf1313152011-01-03 14:44:55 +01002578 if (ipvs->sync_state & IP_VS_STATE_MASTER) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002579 d[0].state = IP_VS_STATE_MASTER;
Hans Schillstromf1313152011-01-03 14:44:55 +01002580 strlcpy(d[0].mcast_ifn, ipvs->master_mcast_ifn,
2581 sizeof(d[0].mcast_ifn));
2582 d[0].syncid = ipvs->master_syncid;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002583 }
Hans Schillstromf1313152011-01-03 14:44:55 +01002584 if (ipvs->sync_state & IP_VS_STATE_BACKUP) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002585 d[1].state = IP_VS_STATE_BACKUP;
Hans Schillstromf1313152011-01-03 14:44:55 +01002586 strlcpy(d[1].mcast_ifn, ipvs->backup_mcast_ifn,
2587 sizeof(d[1].mcast_ifn));
2588 d[1].syncid = ipvs->backup_syncid;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002589 }
2590 if (copy_to_user(user, &d, sizeof(d)) != 0)
2591 ret = -EFAULT;
2592 }
2593 break;
2594
2595 default:
2596 ret = -EINVAL;
2597 }
2598
2599 out:
Ingo Molnar14cc3e22006-03-26 01:37:14 -08002600 mutex_unlock(&__ip_vs_mutex);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002601 return ret;
2602}
2603
2604
2605static struct nf_sockopt_ops ip_vs_sockopts = {
2606 .pf = PF_INET,
2607 .set_optmin = IP_VS_BASE_CTL,
2608 .set_optmax = IP_VS_SO_SET_MAX+1,
2609 .set = do_ip_vs_set_ctl,
2610 .get_optmin = IP_VS_BASE_CTL,
2611 .get_optmax = IP_VS_SO_GET_MAX+1,
2612 .get = do_ip_vs_get_ctl,
Neil Horman16fcec32007-09-11 11:28:26 +02002613 .owner = THIS_MODULE,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002614};
2615
Julius Volz9a812192008-08-14 14:08:44 +02002616/*
2617 * Generic Netlink interface
2618 */
2619
2620/* IPVS genetlink family */
2621static struct genl_family ip_vs_genl_family = {
2622 .id = GENL_ID_GENERATE,
2623 .hdrsize = 0,
2624 .name = IPVS_GENL_NAME,
2625 .version = IPVS_GENL_VERSION,
2626 .maxattr = IPVS_CMD_MAX,
2627};
2628
2629/* Policy used for first-level command attributes */
2630static const struct nla_policy ip_vs_cmd_policy[IPVS_CMD_ATTR_MAX + 1] = {
2631 [IPVS_CMD_ATTR_SERVICE] = { .type = NLA_NESTED },
2632 [IPVS_CMD_ATTR_DEST] = { .type = NLA_NESTED },
2633 [IPVS_CMD_ATTR_DAEMON] = { .type = NLA_NESTED },
2634 [IPVS_CMD_ATTR_TIMEOUT_TCP] = { .type = NLA_U32 },
2635 [IPVS_CMD_ATTR_TIMEOUT_TCP_FIN] = { .type = NLA_U32 },
2636 [IPVS_CMD_ATTR_TIMEOUT_UDP] = { .type = NLA_U32 },
2637};
2638
2639/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DAEMON */
2640static const struct nla_policy ip_vs_daemon_policy[IPVS_DAEMON_ATTR_MAX + 1] = {
2641 [IPVS_DAEMON_ATTR_STATE] = { .type = NLA_U32 },
2642 [IPVS_DAEMON_ATTR_MCAST_IFN] = { .type = NLA_NUL_STRING,
2643 .len = IP_VS_IFNAME_MAXLEN },
2644 [IPVS_DAEMON_ATTR_SYNC_ID] = { .type = NLA_U32 },
2645};
2646
2647/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_SERVICE */
2648static const struct nla_policy ip_vs_svc_policy[IPVS_SVC_ATTR_MAX + 1] = {
2649 [IPVS_SVC_ATTR_AF] = { .type = NLA_U16 },
2650 [IPVS_SVC_ATTR_PROTOCOL] = { .type = NLA_U16 },
2651 [IPVS_SVC_ATTR_ADDR] = { .type = NLA_BINARY,
2652 .len = sizeof(union nf_inet_addr) },
2653 [IPVS_SVC_ATTR_PORT] = { .type = NLA_U16 },
2654 [IPVS_SVC_ATTR_FWMARK] = { .type = NLA_U32 },
2655 [IPVS_SVC_ATTR_SCHED_NAME] = { .type = NLA_NUL_STRING,
2656 .len = IP_VS_SCHEDNAME_MAXLEN },
Simon Horman0d1e71b2010-08-22 21:37:54 +09002657 [IPVS_SVC_ATTR_PE_NAME] = { .type = NLA_NUL_STRING,
2658 .len = IP_VS_PENAME_MAXLEN },
Julius Volz9a812192008-08-14 14:08:44 +02002659 [IPVS_SVC_ATTR_FLAGS] = { .type = NLA_BINARY,
2660 .len = sizeof(struct ip_vs_flags) },
2661 [IPVS_SVC_ATTR_TIMEOUT] = { .type = NLA_U32 },
2662 [IPVS_SVC_ATTR_NETMASK] = { .type = NLA_U32 },
2663 [IPVS_SVC_ATTR_STATS] = { .type = NLA_NESTED },
2664};
2665
2666/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DEST */
2667static const struct nla_policy ip_vs_dest_policy[IPVS_DEST_ATTR_MAX + 1] = {
2668 [IPVS_DEST_ATTR_ADDR] = { .type = NLA_BINARY,
2669 .len = sizeof(union nf_inet_addr) },
2670 [IPVS_DEST_ATTR_PORT] = { .type = NLA_U16 },
2671 [IPVS_DEST_ATTR_FWD_METHOD] = { .type = NLA_U32 },
2672 [IPVS_DEST_ATTR_WEIGHT] = { .type = NLA_U32 },
2673 [IPVS_DEST_ATTR_U_THRESH] = { .type = NLA_U32 },
2674 [IPVS_DEST_ATTR_L_THRESH] = { .type = NLA_U32 },
2675 [IPVS_DEST_ATTR_ACTIVE_CONNS] = { .type = NLA_U32 },
2676 [IPVS_DEST_ATTR_INACT_CONNS] = { .type = NLA_U32 },
2677 [IPVS_DEST_ATTR_PERSIST_CONNS] = { .type = NLA_U32 },
2678 [IPVS_DEST_ATTR_STATS] = { .type = NLA_NESTED },
2679};
2680
2681static int ip_vs_genl_fill_stats(struct sk_buff *skb, int container_type,
2682 struct ip_vs_stats *stats)
2683{
2684 struct nlattr *nl_stats = nla_nest_start(skb, container_type);
2685 if (!nl_stats)
2686 return -EMSGSIZE;
2687
2688 spin_lock_bh(&stats->lock);
2689
Sven Wegenere9c0ce22008-09-08 13:39:04 +02002690 NLA_PUT_U32(skb, IPVS_STATS_ATTR_CONNS, stats->ustats.conns);
2691 NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPKTS, stats->ustats.inpkts);
2692 NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPKTS, stats->ustats.outpkts);
2693 NLA_PUT_U64(skb, IPVS_STATS_ATTR_INBYTES, stats->ustats.inbytes);
2694 NLA_PUT_U64(skb, IPVS_STATS_ATTR_OUTBYTES, stats->ustats.outbytes);
2695 NLA_PUT_U32(skb, IPVS_STATS_ATTR_CPS, stats->ustats.cps);
2696 NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPPS, stats->ustats.inpps);
2697 NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPPS, stats->ustats.outpps);
2698 NLA_PUT_U32(skb, IPVS_STATS_ATTR_INBPS, stats->ustats.inbps);
2699 NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTBPS, stats->ustats.outbps);
Julius Volz9a812192008-08-14 14:08:44 +02002700
2701 spin_unlock_bh(&stats->lock);
2702
2703 nla_nest_end(skb, nl_stats);
2704
2705 return 0;
2706
2707nla_put_failure:
2708 spin_unlock_bh(&stats->lock);
2709 nla_nest_cancel(skb, nl_stats);
2710 return -EMSGSIZE;
2711}
2712
2713static int ip_vs_genl_fill_service(struct sk_buff *skb,
2714 struct ip_vs_service *svc)
2715{
2716 struct nlattr *nl_service;
2717 struct ip_vs_flags flags = { .flags = svc->flags,
2718 .mask = ~0 };
2719
2720 nl_service = nla_nest_start(skb, IPVS_CMD_ATTR_SERVICE);
2721 if (!nl_service)
2722 return -EMSGSIZE;
2723
Julius Volzf94fd042008-09-02 15:55:55 +02002724 NLA_PUT_U16(skb, IPVS_SVC_ATTR_AF, svc->af);
Julius Volz9a812192008-08-14 14:08:44 +02002725
2726 if (svc->fwmark) {
2727 NLA_PUT_U32(skb, IPVS_SVC_ATTR_FWMARK, svc->fwmark);
2728 } else {
2729 NLA_PUT_U16(skb, IPVS_SVC_ATTR_PROTOCOL, svc->protocol);
2730 NLA_PUT(skb, IPVS_SVC_ATTR_ADDR, sizeof(svc->addr), &svc->addr);
2731 NLA_PUT_U16(skb, IPVS_SVC_ATTR_PORT, svc->port);
2732 }
2733
2734 NLA_PUT_STRING(skb, IPVS_SVC_ATTR_SCHED_NAME, svc->scheduler->name);
Simon Horman0d1e71b2010-08-22 21:37:54 +09002735 if (svc->pe)
2736 NLA_PUT_STRING(skb, IPVS_SVC_ATTR_PE_NAME, svc->pe->name);
Julius Volz9a812192008-08-14 14:08:44 +02002737 NLA_PUT(skb, IPVS_SVC_ATTR_FLAGS, sizeof(flags), &flags);
2738 NLA_PUT_U32(skb, IPVS_SVC_ATTR_TIMEOUT, svc->timeout / HZ);
2739 NLA_PUT_U32(skb, IPVS_SVC_ATTR_NETMASK, svc->netmask);
2740
2741 if (ip_vs_genl_fill_stats(skb, IPVS_SVC_ATTR_STATS, &svc->stats))
2742 goto nla_put_failure;
2743
2744 nla_nest_end(skb, nl_service);
2745
2746 return 0;
2747
2748nla_put_failure:
2749 nla_nest_cancel(skb, nl_service);
2750 return -EMSGSIZE;
2751}
2752
2753static int ip_vs_genl_dump_service(struct sk_buff *skb,
2754 struct ip_vs_service *svc,
2755 struct netlink_callback *cb)
2756{
2757 void *hdr;
2758
2759 hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
2760 &ip_vs_genl_family, NLM_F_MULTI,
2761 IPVS_CMD_NEW_SERVICE);
2762 if (!hdr)
2763 return -EMSGSIZE;
2764
2765 if (ip_vs_genl_fill_service(skb, svc) < 0)
2766 goto nla_put_failure;
2767
2768 return genlmsg_end(skb, hdr);
2769
2770nla_put_failure:
2771 genlmsg_cancel(skb, hdr);
2772 return -EMSGSIZE;
2773}
2774
2775static int ip_vs_genl_dump_services(struct sk_buff *skb,
2776 struct netlink_callback *cb)
2777{
2778 int idx = 0, i;
2779 int start = cb->args[0];
2780 struct ip_vs_service *svc;
Hans Schillstromfc723252011-01-03 14:44:43 +01002781 struct net *net = skb_sknet(skb);
Julius Volz9a812192008-08-14 14:08:44 +02002782
2783 mutex_lock(&__ip_vs_mutex);
2784 for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
2785 list_for_each_entry(svc, &ip_vs_svc_table[i], s_list) {
Hans Schillstromfc723252011-01-03 14:44:43 +01002786 if (++idx <= start || !net_eq(svc->net, net))
Julius Volz9a812192008-08-14 14:08:44 +02002787 continue;
2788 if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
2789 idx--;
2790 goto nla_put_failure;
2791 }
2792 }
2793 }
2794
2795 for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
2796 list_for_each_entry(svc, &ip_vs_svc_fwm_table[i], f_list) {
Hans Schillstromfc723252011-01-03 14:44:43 +01002797 if (++idx <= start || !net_eq(svc->net, net))
Julius Volz9a812192008-08-14 14:08:44 +02002798 continue;
2799 if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
2800 idx--;
2801 goto nla_put_failure;
2802 }
2803 }
2804 }
2805
2806nla_put_failure:
2807 mutex_unlock(&__ip_vs_mutex);
2808 cb->args[0] = idx;
2809
2810 return skb->len;
2811}
2812
Hans Schillstromfc723252011-01-03 14:44:43 +01002813static int ip_vs_genl_parse_service(struct net *net,
2814 struct ip_vs_service_user_kern *usvc,
Julian Anastasov26c15cf2010-09-21 18:12:30 +02002815 struct nlattr *nla, int full_entry,
2816 struct ip_vs_service **ret_svc)
Julius Volz9a812192008-08-14 14:08:44 +02002817{
2818 struct nlattr *attrs[IPVS_SVC_ATTR_MAX + 1];
2819 struct nlattr *nla_af, *nla_port, *nla_fwmark, *nla_protocol, *nla_addr;
Julian Anastasov26c15cf2010-09-21 18:12:30 +02002820 struct ip_vs_service *svc;
Julius Volz9a812192008-08-14 14:08:44 +02002821
2822 /* Parse mandatory identifying service fields first */
2823 if (nla == NULL ||
2824 nla_parse_nested(attrs, IPVS_SVC_ATTR_MAX, nla, ip_vs_svc_policy))
2825 return -EINVAL;
2826
2827 nla_af = attrs[IPVS_SVC_ATTR_AF];
2828 nla_protocol = attrs[IPVS_SVC_ATTR_PROTOCOL];
2829 nla_addr = attrs[IPVS_SVC_ATTR_ADDR];
2830 nla_port = attrs[IPVS_SVC_ATTR_PORT];
2831 nla_fwmark = attrs[IPVS_SVC_ATTR_FWMARK];
2832
2833 if (!(nla_af && (nla_fwmark || (nla_port && nla_protocol && nla_addr))))
2834 return -EINVAL;
2835
Simon Horman258c8892009-12-15 17:01:25 +01002836 memset(usvc, 0, sizeof(*usvc));
2837
Julius Volzc860c6b2008-09-02 15:55:36 +02002838 usvc->af = nla_get_u16(nla_af);
Julius Volzf94fd042008-09-02 15:55:55 +02002839#ifdef CONFIG_IP_VS_IPV6
2840 if (usvc->af != AF_INET && usvc->af != AF_INET6)
2841#else
2842 if (usvc->af != AF_INET)
2843#endif
Julius Volz9a812192008-08-14 14:08:44 +02002844 return -EAFNOSUPPORT;
2845
2846 if (nla_fwmark) {
2847 usvc->protocol = IPPROTO_TCP;
2848 usvc->fwmark = nla_get_u32(nla_fwmark);
2849 } else {
2850 usvc->protocol = nla_get_u16(nla_protocol);
2851 nla_memcpy(&usvc->addr, nla_addr, sizeof(usvc->addr));
2852 usvc->port = nla_get_u16(nla_port);
2853 usvc->fwmark = 0;
2854 }
2855
Julian Anastasov26c15cf2010-09-21 18:12:30 +02002856 if (usvc->fwmark)
Hans Schillstromfc723252011-01-03 14:44:43 +01002857 svc = __ip_vs_svc_fwm_find(net, usvc->af, usvc->fwmark);
Julian Anastasov26c15cf2010-09-21 18:12:30 +02002858 else
Hans Schillstromfc723252011-01-03 14:44:43 +01002859 svc = __ip_vs_service_find(net, usvc->af, usvc->protocol,
Julian Anastasov26c15cf2010-09-21 18:12:30 +02002860 &usvc->addr, usvc->port);
2861 *ret_svc = svc;
2862
Julius Volz9a812192008-08-14 14:08:44 +02002863 /* If a full entry was requested, check for the additional fields */
2864 if (full_entry) {
Simon Horman0d1e71b2010-08-22 21:37:54 +09002865 struct nlattr *nla_sched, *nla_flags, *nla_pe, *nla_timeout,
Julius Volz9a812192008-08-14 14:08:44 +02002866 *nla_netmask;
2867 struct ip_vs_flags flags;
Julius Volz9a812192008-08-14 14:08:44 +02002868
2869 nla_sched = attrs[IPVS_SVC_ATTR_SCHED_NAME];
Simon Horman0d1e71b2010-08-22 21:37:54 +09002870 nla_pe = attrs[IPVS_SVC_ATTR_PE_NAME];
Julius Volz9a812192008-08-14 14:08:44 +02002871 nla_flags = attrs[IPVS_SVC_ATTR_FLAGS];
2872 nla_timeout = attrs[IPVS_SVC_ATTR_TIMEOUT];
2873 nla_netmask = attrs[IPVS_SVC_ATTR_NETMASK];
2874
2875 if (!(nla_sched && nla_flags && nla_timeout && nla_netmask))
2876 return -EINVAL;
2877
2878 nla_memcpy(&flags, nla_flags, sizeof(flags));
2879
2880 /* prefill flags from service if it already exists */
Julian Anastasov26c15cf2010-09-21 18:12:30 +02002881 if (svc)
Julius Volz9a812192008-08-14 14:08:44 +02002882 usvc->flags = svc->flags;
Julius Volz9a812192008-08-14 14:08:44 +02002883
2884 /* set new flags from userland */
2885 usvc->flags = (usvc->flags & ~flags.mask) |
2886 (flags.flags & flags.mask);
Julius Volzc860c6b2008-09-02 15:55:36 +02002887 usvc->sched_name = nla_data(nla_sched);
Simon Horman0d1e71b2010-08-22 21:37:54 +09002888 usvc->pe_name = nla_pe ? nla_data(nla_pe) : NULL;
Julius Volz9a812192008-08-14 14:08:44 +02002889 usvc->timeout = nla_get_u32(nla_timeout);
2890 usvc->netmask = nla_get_u32(nla_netmask);
2891 }
2892
2893 return 0;
2894}
2895
Hans Schillstromfc723252011-01-03 14:44:43 +01002896static struct ip_vs_service *ip_vs_genl_find_service(struct net *net,
2897 struct nlattr *nla)
Julius Volz9a812192008-08-14 14:08:44 +02002898{
Julius Volzc860c6b2008-09-02 15:55:36 +02002899 struct ip_vs_service_user_kern usvc;
Julian Anastasov26c15cf2010-09-21 18:12:30 +02002900 struct ip_vs_service *svc;
Julius Volz9a812192008-08-14 14:08:44 +02002901 int ret;
2902
Hans Schillstromfc723252011-01-03 14:44:43 +01002903 ret = ip_vs_genl_parse_service(net, &usvc, nla, 0, &svc);
Julian Anastasov26c15cf2010-09-21 18:12:30 +02002904 return ret ? ERR_PTR(ret) : svc;
Julius Volz9a812192008-08-14 14:08:44 +02002905}
2906
2907static int ip_vs_genl_fill_dest(struct sk_buff *skb, struct ip_vs_dest *dest)
2908{
2909 struct nlattr *nl_dest;
2910
2911 nl_dest = nla_nest_start(skb, IPVS_CMD_ATTR_DEST);
2912 if (!nl_dest)
2913 return -EMSGSIZE;
2914
2915 NLA_PUT(skb, IPVS_DEST_ATTR_ADDR, sizeof(dest->addr), &dest->addr);
2916 NLA_PUT_U16(skb, IPVS_DEST_ATTR_PORT, dest->port);
2917
2918 NLA_PUT_U32(skb, IPVS_DEST_ATTR_FWD_METHOD,
2919 atomic_read(&dest->conn_flags) & IP_VS_CONN_F_FWD_MASK);
2920 NLA_PUT_U32(skb, IPVS_DEST_ATTR_WEIGHT, atomic_read(&dest->weight));
2921 NLA_PUT_U32(skb, IPVS_DEST_ATTR_U_THRESH, dest->u_threshold);
2922 NLA_PUT_U32(skb, IPVS_DEST_ATTR_L_THRESH, dest->l_threshold);
2923 NLA_PUT_U32(skb, IPVS_DEST_ATTR_ACTIVE_CONNS,
2924 atomic_read(&dest->activeconns));
2925 NLA_PUT_U32(skb, IPVS_DEST_ATTR_INACT_CONNS,
2926 atomic_read(&dest->inactconns));
2927 NLA_PUT_U32(skb, IPVS_DEST_ATTR_PERSIST_CONNS,
2928 atomic_read(&dest->persistconns));
2929
2930 if (ip_vs_genl_fill_stats(skb, IPVS_DEST_ATTR_STATS, &dest->stats))
2931 goto nla_put_failure;
2932
2933 nla_nest_end(skb, nl_dest);
2934
2935 return 0;
2936
2937nla_put_failure:
2938 nla_nest_cancel(skb, nl_dest);
2939 return -EMSGSIZE;
2940}
2941
2942static int ip_vs_genl_dump_dest(struct sk_buff *skb, struct ip_vs_dest *dest,
2943 struct netlink_callback *cb)
2944{
2945 void *hdr;
2946
2947 hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
2948 &ip_vs_genl_family, NLM_F_MULTI,
2949 IPVS_CMD_NEW_DEST);
2950 if (!hdr)
2951 return -EMSGSIZE;
2952
2953 if (ip_vs_genl_fill_dest(skb, dest) < 0)
2954 goto nla_put_failure;
2955
2956 return genlmsg_end(skb, hdr);
2957
2958nla_put_failure:
2959 genlmsg_cancel(skb, hdr);
2960 return -EMSGSIZE;
2961}
2962
2963static int ip_vs_genl_dump_dests(struct sk_buff *skb,
2964 struct netlink_callback *cb)
2965{
2966 int idx = 0;
2967 int start = cb->args[0];
2968 struct ip_vs_service *svc;
2969 struct ip_vs_dest *dest;
2970 struct nlattr *attrs[IPVS_CMD_ATTR_MAX + 1];
Hans Schillstroma0840e22011-01-03 14:44:58 +01002971 struct net *net = skb_sknet(skb);
Julius Volz9a812192008-08-14 14:08:44 +02002972
2973 mutex_lock(&__ip_vs_mutex);
2974
2975 /* Try to find the service for which to dump destinations */
2976 if (nlmsg_parse(cb->nlh, GENL_HDRLEN, attrs,
2977 IPVS_CMD_ATTR_MAX, ip_vs_cmd_policy))
2978 goto out_err;
2979
Hans Schillstroma0840e22011-01-03 14:44:58 +01002980
Hans Schillstromfc723252011-01-03 14:44:43 +01002981 svc = ip_vs_genl_find_service(net, attrs[IPVS_CMD_ATTR_SERVICE]);
Julius Volz9a812192008-08-14 14:08:44 +02002982 if (IS_ERR(svc) || svc == NULL)
2983 goto out_err;
2984
2985 /* Dump the destinations */
2986 list_for_each_entry(dest, &svc->destinations, n_list) {
2987 if (++idx <= start)
2988 continue;
2989 if (ip_vs_genl_dump_dest(skb, dest, cb) < 0) {
2990 idx--;
2991 goto nla_put_failure;
2992 }
2993 }
2994
2995nla_put_failure:
2996 cb->args[0] = idx;
Julius Volz9a812192008-08-14 14:08:44 +02002997
2998out_err:
2999 mutex_unlock(&__ip_vs_mutex);
3000
3001 return skb->len;
3002}
3003
Julius Volzc860c6b2008-09-02 15:55:36 +02003004static int ip_vs_genl_parse_dest(struct ip_vs_dest_user_kern *udest,
Julius Volz9a812192008-08-14 14:08:44 +02003005 struct nlattr *nla, int full_entry)
3006{
3007 struct nlattr *attrs[IPVS_DEST_ATTR_MAX + 1];
3008 struct nlattr *nla_addr, *nla_port;
3009
3010 /* Parse mandatory identifying destination fields first */
3011 if (nla == NULL ||
3012 nla_parse_nested(attrs, IPVS_DEST_ATTR_MAX, nla, ip_vs_dest_policy))
3013 return -EINVAL;
3014
3015 nla_addr = attrs[IPVS_DEST_ATTR_ADDR];
3016 nla_port = attrs[IPVS_DEST_ATTR_PORT];
3017
3018 if (!(nla_addr && nla_port))
3019 return -EINVAL;
3020
Simon Horman258c8892009-12-15 17:01:25 +01003021 memset(udest, 0, sizeof(*udest));
3022
Julius Volz9a812192008-08-14 14:08:44 +02003023 nla_memcpy(&udest->addr, nla_addr, sizeof(udest->addr));
3024 udest->port = nla_get_u16(nla_port);
3025
3026 /* If a full entry was requested, check for the additional fields */
3027 if (full_entry) {
3028 struct nlattr *nla_fwd, *nla_weight, *nla_u_thresh,
3029 *nla_l_thresh;
3030
3031 nla_fwd = attrs[IPVS_DEST_ATTR_FWD_METHOD];
3032 nla_weight = attrs[IPVS_DEST_ATTR_WEIGHT];
3033 nla_u_thresh = attrs[IPVS_DEST_ATTR_U_THRESH];
3034 nla_l_thresh = attrs[IPVS_DEST_ATTR_L_THRESH];
3035
3036 if (!(nla_fwd && nla_weight && nla_u_thresh && nla_l_thresh))
3037 return -EINVAL;
3038
3039 udest->conn_flags = nla_get_u32(nla_fwd)
3040 & IP_VS_CONN_F_FWD_MASK;
3041 udest->weight = nla_get_u32(nla_weight);
3042 udest->u_threshold = nla_get_u32(nla_u_thresh);
3043 udest->l_threshold = nla_get_u32(nla_l_thresh);
3044 }
3045
3046 return 0;
3047}
3048
3049static int ip_vs_genl_fill_daemon(struct sk_buff *skb, __be32 state,
3050 const char *mcast_ifn, __be32 syncid)
3051{
3052 struct nlattr *nl_daemon;
3053
3054 nl_daemon = nla_nest_start(skb, IPVS_CMD_ATTR_DAEMON);
3055 if (!nl_daemon)
3056 return -EMSGSIZE;
3057
3058 NLA_PUT_U32(skb, IPVS_DAEMON_ATTR_STATE, state);
3059 NLA_PUT_STRING(skb, IPVS_DAEMON_ATTR_MCAST_IFN, mcast_ifn);
3060 NLA_PUT_U32(skb, IPVS_DAEMON_ATTR_SYNC_ID, syncid);
3061
3062 nla_nest_end(skb, nl_daemon);
3063
3064 return 0;
3065
3066nla_put_failure:
3067 nla_nest_cancel(skb, nl_daemon);
3068 return -EMSGSIZE;
3069}
3070
3071static int ip_vs_genl_dump_daemon(struct sk_buff *skb, __be32 state,
3072 const char *mcast_ifn, __be32 syncid,
3073 struct netlink_callback *cb)
3074{
3075 void *hdr;
3076 hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
3077 &ip_vs_genl_family, NLM_F_MULTI,
3078 IPVS_CMD_NEW_DAEMON);
3079 if (!hdr)
3080 return -EMSGSIZE;
3081
3082 if (ip_vs_genl_fill_daemon(skb, state, mcast_ifn, syncid))
3083 goto nla_put_failure;
3084
3085 return genlmsg_end(skb, hdr);
3086
3087nla_put_failure:
3088 genlmsg_cancel(skb, hdr);
3089 return -EMSGSIZE;
3090}
3091
3092static int ip_vs_genl_dump_daemons(struct sk_buff *skb,
3093 struct netlink_callback *cb)
3094{
Hans Schillstromf1313152011-01-03 14:44:55 +01003095 struct net *net = skb_net(skb);
3096 struct netns_ipvs *ipvs = net_ipvs(net);
3097
Julius Volz9a812192008-08-14 14:08:44 +02003098 mutex_lock(&__ip_vs_mutex);
Hans Schillstromf1313152011-01-03 14:44:55 +01003099 if ((ipvs->sync_state & IP_VS_STATE_MASTER) && !cb->args[0]) {
Julius Volz9a812192008-08-14 14:08:44 +02003100 if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_MASTER,
Hans Schillstromf1313152011-01-03 14:44:55 +01003101 ipvs->master_mcast_ifn,
3102 ipvs->master_syncid, cb) < 0)
Julius Volz9a812192008-08-14 14:08:44 +02003103 goto nla_put_failure;
3104
3105 cb->args[0] = 1;
3106 }
3107
Hans Schillstromf1313152011-01-03 14:44:55 +01003108 if ((ipvs->sync_state & IP_VS_STATE_BACKUP) && !cb->args[1]) {
Julius Volz9a812192008-08-14 14:08:44 +02003109 if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_BACKUP,
Hans Schillstromf1313152011-01-03 14:44:55 +01003110 ipvs->backup_mcast_ifn,
3111 ipvs->backup_syncid, cb) < 0)
Julius Volz9a812192008-08-14 14:08:44 +02003112 goto nla_put_failure;
3113
3114 cb->args[1] = 1;
3115 }
3116
3117nla_put_failure:
3118 mutex_unlock(&__ip_vs_mutex);
3119
3120 return skb->len;
3121}
3122
Hans Schillstromf1313152011-01-03 14:44:55 +01003123static int ip_vs_genl_new_daemon(struct net *net, struct nlattr **attrs)
Julius Volz9a812192008-08-14 14:08:44 +02003124{
3125 if (!(attrs[IPVS_DAEMON_ATTR_STATE] &&
3126 attrs[IPVS_DAEMON_ATTR_MCAST_IFN] &&
3127 attrs[IPVS_DAEMON_ATTR_SYNC_ID]))
3128 return -EINVAL;
3129
Hans Schillstromf1313152011-01-03 14:44:55 +01003130 return start_sync_thread(net,
3131 nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]),
Julius Volz9a812192008-08-14 14:08:44 +02003132 nla_data(attrs[IPVS_DAEMON_ATTR_MCAST_IFN]),
3133 nla_get_u32(attrs[IPVS_DAEMON_ATTR_SYNC_ID]));
3134}
3135
Hans Schillstromf1313152011-01-03 14:44:55 +01003136static int ip_vs_genl_del_daemon(struct net *net, struct nlattr **attrs)
Julius Volz9a812192008-08-14 14:08:44 +02003137{
3138 if (!attrs[IPVS_DAEMON_ATTR_STATE])
3139 return -EINVAL;
3140
Hans Schillstromf1313152011-01-03 14:44:55 +01003141 return stop_sync_thread(net,
3142 nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]));
Julius Volz9a812192008-08-14 14:08:44 +02003143}
3144
Hans Schillstrom93304192011-01-03 14:44:51 +01003145static int ip_vs_genl_set_config(struct net *net, struct nlattr **attrs)
Julius Volz9a812192008-08-14 14:08:44 +02003146{
3147 struct ip_vs_timeout_user t;
3148
Hans Schillstrom93304192011-01-03 14:44:51 +01003149 __ip_vs_get_timeouts(net, &t);
Julius Volz9a812192008-08-14 14:08:44 +02003150
3151 if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP])
3152 t.tcp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP]);
3153
3154 if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN])
3155 t.tcp_fin_timeout =
3156 nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN]);
3157
3158 if (attrs[IPVS_CMD_ATTR_TIMEOUT_UDP])
3159 t.udp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_UDP]);
3160
Hans Schillstrom93304192011-01-03 14:44:51 +01003161 return ip_vs_set_timeout(net, &t);
Julius Volz9a812192008-08-14 14:08:44 +02003162}
3163
3164static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
3165{
3166 struct ip_vs_service *svc = NULL;
Julius Volzc860c6b2008-09-02 15:55:36 +02003167 struct ip_vs_service_user_kern usvc;
3168 struct ip_vs_dest_user_kern udest;
Julius Volz9a812192008-08-14 14:08:44 +02003169 int ret = 0, cmd;
3170 int need_full_svc = 0, need_full_dest = 0;
Hans Schillstromfc723252011-01-03 14:44:43 +01003171 struct net *net;
Hans Schillstroma0840e22011-01-03 14:44:58 +01003172 struct netns_ipvs *ipvs;
Julius Volz9a812192008-08-14 14:08:44 +02003173
Hans Schillstromfc723252011-01-03 14:44:43 +01003174 net = skb_sknet(skb);
Hans Schillstroma0840e22011-01-03 14:44:58 +01003175 ipvs = net_ipvs(net);
Julius Volz9a812192008-08-14 14:08:44 +02003176 cmd = info->genlhdr->cmd;
3177
3178 mutex_lock(&__ip_vs_mutex);
3179
3180 if (cmd == IPVS_CMD_FLUSH) {
Hans Schillstromfc723252011-01-03 14:44:43 +01003181 ret = ip_vs_flush(net);
Julius Volz9a812192008-08-14 14:08:44 +02003182 goto out;
3183 } else if (cmd == IPVS_CMD_SET_CONFIG) {
Hans Schillstrom93304192011-01-03 14:44:51 +01003184 ret = ip_vs_genl_set_config(net, info->attrs);
Julius Volz9a812192008-08-14 14:08:44 +02003185 goto out;
3186 } else if (cmd == IPVS_CMD_NEW_DAEMON ||
3187 cmd == IPVS_CMD_DEL_DAEMON) {
3188
3189 struct nlattr *daemon_attrs[IPVS_DAEMON_ATTR_MAX + 1];
3190
3191 if (!info->attrs[IPVS_CMD_ATTR_DAEMON] ||
3192 nla_parse_nested(daemon_attrs, IPVS_DAEMON_ATTR_MAX,
3193 info->attrs[IPVS_CMD_ATTR_DAEMON],
3194 ip_vs_daemon_policy)) {
3195 ret = -EINVAL;
3196 goto out;
3197 }
3198
3199 if (cmd == IPVS_CMD_NEW_DAEMON)
Hans Schillstromf1313152011-01-03 14:44:55 +01003200 ret = ip_vs_genl_new_daemon(net, daemon_attrs);
Julius Volz9a812192008-08-14 14:08:44 +02003201 else
Hans Schillstromf1313152011-01-03 14:44:55 +01003202 ret = ip_vs_genl_del_daemon(net, daemon_attrs);
Julius Volz9a812192008-08-14 14:08:44 +02003203 goto out;
3204 } else if (cmd == IPVS_CMD_ZERO &&
3205 !info->attrs[IPVS_CMD_ATTR_SERVICE]) {
Hans Schillstromfc723252011-01-03 14:44:43 +01003206 ret = ip_vs_zero_all(net);
Julius Volz9a812192008-08-14 14:08:44 +02003207 goto out;
3208 }
3209
3210 /* All following commands require a service argument, so check if we
3211 * received a valid one. We need a full service specification when
3212 * adding / editing a service. Only identifying members otherwise. */
3213 if (cmd == IPVS_CMD_NEW_SERVICE || cmd == IPVS_CMD_SET_SERVICE)
3214 need_full_svc = 1;
3215
Hans Schillstromfc723252011-01-03 14:44:43 +01003216 ret = ip_vs_genl_parse_service(net, &usvc,
Julius Volz9a812192008-08-14 14:08:44 +02003217 info->attrs[IPVS_CMD_ATTR_SERVICE],
Julian Anastasov26c15cf2010-09-21 18:12:30 +02003218 need_full_svc, &svc);
Julius Volz9a812192008-08-14 14:08:44 +02003219 if (ret)
3220 goto out;
3221
Julius Volz9a812192008-08-14 14:08:44 +02003222 /* Unless we're adding a new service, the service must already exist */
3223 if ((cmd != IPVS_CMD_NEW_SERVICE) && (svc == NULL)) {
3224 ret = -ESRCH;
3225 goto out;
3226 }
3227
3228 /* Destination commands require a valid destination argument. For
3229 * adding / editing a destination, we need a full destination
3230 * specification. */
3231 if (cmd == IPVS_CMD_NEW_DEST || cmd == IPVS_CMD_SET_DEST ||
3232 cmd == IPVS_CMD_DEL_DEST) {
3233 if (cmd != IPVS_CMD_DEL_DEST)
3234 need_full_dest = 1;
3235
3236 ret = ip_vs_genl_parse_dest(&udest,
3237 info->attrs[IPVS_CMD_ATTR_DEST],
3238 need_full_dest);
3239 if (ret)
3240 goto out;
3241 }
3242
3243 switch (cmd) {
3244 case IPVS_CMD_NEW_SERVICE:
3245 if (svc == NULL)
Hans Schillstromfc723252011-01-03 14:44:43 +01003246 ret = ip_vs_add_service(net, &usvc, &svc);
Julius Volz9a812192008-08-14 14:08:44 +02003247 else
3248 ret = -EEXIST;
3249 break;
3250 case IPVS_CMD_SET_SERVICE:
3251 ret = ip_vs_edit_service(svc, &usvc);
3252 break;
3253 case IPVS_CMD_DEL_SERVICE:
3254 ret = ip_vs_del_service(svc);
Julian Anastasov26c15cf2010-09-21 18:12:30 +02003255 /* do not use svc, it can be freed */
Julius Volz9a812192008-08-14 14:08:44 +02003256 break;
3257 case IPVS_CMD_NEW_DEST:
3258 ret = ip_vs_add_dest(svc, &udest);
3259 break;
3260 case IPVS_CMD_SET_DEST:
3261 ret = ip_vs_edit_dest(svc, &udest);
3262 break;
3263 case IPVS_CMD_DEL_DEST:
3264 ret = ip_vs_del_dest(svc, &udest);
3265 break;
3266 case IPVS_CMD_ZERO:
3267 ret = ip_vs_zero_service(svc);
3268 break;
3269 default:
3270 ret = -EINVAL;
3271 }
3272
3273out:
Julius Volz9a812192008-08-14 14:08:44 +02003274 mutex_unlock(&__ip_vs_mutex);
3275
3276 return ret;
3277}
3278
3279static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info)
3280{
3281 struct sk_buff *msg;
3282 void *reply;
3283 int ret, cmd, reply_cmd;
Hans Schillstromfc723252011-01-03 14:44:43 +01003284 struct net *net;
Hans Schillstroma0840e22011-01-03 14:44:58 +01003285 struct netns_ipvs *ipvs;
Julius Volz9a812192008-08-14 14:08:44 +02003286
Hans Schillstromfc723252011-01-03 14:44:43 +01003287 net = skb_sknet(skb);
Hans Schillstroma0840e22011-01-03 14:44:58 +01003288 ipvs = net_ipvs(net);
Julius Volz9a812192008-08-14 14:08:44 +02003289 cmd = info->genlhdr->cmd;
3290
3291 if (cmd == IPVS_CMD_GET_SERVICE)
3292 reply_cmd = IPVS_CMD_NEW_SERVICE;
3293 else if (cmd == IPVS_CMD_GET_INFO)
3294 reply_cmd = IPVS_CMD_SET_INFO;
3295 else if (cmd == IPVS_CMD_GET_CONFIG)
3296 reply_cmd = IPVS_CMD_SET_CONFIG;
3297 else {
Hannes Eder1e3e2382009-08-02 11:05:41 +00003298 pr_err("unknown Generic Netlink command\n");
Julius Volz9a812192008-08-14 14:08:44 +02003299 return -EINVAL;
3300 }
3301
3302 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
3303 if (!msg)
3304 return -ENOMEM;
3305
3306 mutex_lock(&__ip_vs_mutex);
3307
3308 reply = genlmsg_put_reply(msg, info, &ip_vs_genl_family, 0, reply_cmd);
3309 if (reply == NULL)
3310 goto nla_put_failure;
3311
3312 switch (cmd) {
3313 case IPVS_CMD_GET_SERVICE:
3314 {
3315 struct ip_vs_service *svc;
3316
Hans Schillstromfc723252011-01-03 14:44:43 +01003317 svc = ip_vs_genl_find_service(net,
3318 info->attrs[IPVS_CMD_ATTR_SERVICE]);
Julius Volz9a812192008-08-14 14:08:44 +02003319 if (IS_ERR(svc)) {
3320 ret = PTR_ERR(svc);
3321 goto out_err;
3322 } else if (svc) {
3323 ret = ip_vs_genl_fill_service(msg, svc);
Julius Volz9a812192008-08-14 14:08:44 +02003324 if (ret)
3325 goto nla_put_failure;
3326 } else {
3327 ret = -ESRCH;
3328 goto out_err;
3329 }
3330
3331 break;
3332 }
3333
3334 case IPVS_CMD_GET_CONFIG:
3335 {
3336 struct ip_vs_timeout_user t;
3337
Hans Schillstrom93304192011-01-03 14:44:51 +01003338 __ip_vs_get_timeouts(net, &t);
Julius Volz9a812192008-08-14 14:08:44 +02003339#ifdef CONFIG_IP_VS_PROTO_TCP
3340 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP, t.tcp_timeout);
3341 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP_FIN,
3342 t.tcp_fin_timeout);
3343#endif
3344#ifdef CONFIG_IP_VS_PROTO_UDP
3345 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_UDP, t.udp_timeout);
3346#endif
3347
3348 break;
3349 }
3350
3351 case IPVS_CMD_GET_INFO:
3352 NLA_PUT_U32(msg, IPVS_INFO_ATTR_VERSION, IP_VS_VERSION_CODE);
3353 NLA_PUT_U32(msg, IPVS_INFO_ATTR_CONN_TAB_SIZE,
Catalin(ux) M. BOIE6f7edb42010-01-05 05:50:24 +01003354 ip_vs_conn_tab_size);
Julius Volz9a812192008-08-14 14:08:44 +02003355 break;
3356 }
3357
3358 genlmsg_end(msg, reply);
Johannes Berg134e6372009-07-10 09:51:34 +00003359 ret = genlmsg_reply(msg, info);
Julius Volz9a812192008-08-14 14:08:44 +02003360 goto out;
3361
3362nla_put_failure:
Hannes Eder1e3e2382009-08-02 11:05:41 +00003363 pr_err("not enough space in Netlink message\n");
Julius Volz9a812192008-08-14 14:08:44 +02003364 ret = -EMSGSIZE;
3365
3366out_err:
3367 nlmsg_free(msg);
3368out:
3369 mutex_unlock(&__ip_vs_mutex);
3370
3371 return ret;
3372}
3373
3374
3375static struct genl_ops ip_vs_genl_ops[] __read_mostly = {
3376 {
3377 .cmd = IPVS_CMD_NEW_SERVICE,
3378 .flags = GENL_ADMIN_PERM,
3379 .policy = ip_vs_cmd_policy,
3380 .doit = ip_vs_genl_set_cmd,
3381 },
3382 {
3383 .cmd = IPVS_CMD_SET_SERVICE,
3384 .flags = GENL_ADMIN_PERM,
3385 .policy = ip_vs_cmd_policy,
3386 .doit = ip_vs_genl_set_cmd,
3387 },
3388 {
3389 .cmd = IPVS_CMD_DEL_SERVICE,
3390 .flags = GENL_ADMIN_PERM,
3391 .policy = ip_vs_cmd_policy,
3392 .doit = ip_vs_genl_set_cmd,
3393 },
3394 {
3395 .cmd = IPVS_CMD_GET_SERVICE,
3396 .flags = GENL_ADMIN_PERM,
3397 .doit = ip_vs_genl_get_cmd,
3398 .dumpit = ip_vs_genl_dump_services,
3399 .policy = ip_vs_cmd_policy,
3400 },
3401 {
3402 .cmd = IPVS_CMD_NEW_DEST,
3403 .flags = GENL_ADMIN_PERM,
3404 .policy = ip_vs_cmd_policy,
3405 .doit = ip_vs_genl_set_cmd,
3406 },
3407 {
3408 .cmd = IPVS_CMD_SET_DEST,
3409 .flags = GENL_ADMIN_PERM,
3410 .policy = ip_vs_cmd_policy,
3411 .doit = ip_vs_genl_set_cmd,
3412 },
3413 {
3414 .cmd = IPVS_CMD_DEL_DEST,
3415 .flags = GENL_ADMIN_PERM,
3416 .policy = ip_vs_cmd_policy,
3417 .doit = ip_vs_genl_set_cmd,
3418 },
3419 {
3420 .cmd = IPVS_CMD_GET_DEST,
3421 .flags = GENL_ADMIN_PERM,
3422 .policy = ip_vs_cmd_policy,
3423 .dumpit = ip_vs_genl_dump_dests,
3424 },
3425 {
3426 .cmd = IPVS_CMD_NEW_DAEMON,
3427 .flags = GENL_ADMIN_PERM,
3428 .policy = ip_vs_cmd_policy,
3429 .doit = ip_vs_genl_set_cmd,
3430 },
3431 {
3432 .cmd = IPVS_CMD_DEL_DAEMON,
3433 .flags = GENL_ADMIN_PERM,
3434 .policy = ip_vs_cmd_policy,
3435 .doit = ip_vs_genl_set_cmd,
3436 },
3437 {
3438 .cmd = IPVS_CMD_GET_DAEMON,
3439 .flags = GENL_ADMIN_PERM,
3440 .dumpit = ip_vs_genl_dump_daemons,
3441 },
3442 {
3443 .cmd = IPVS_CMD_SET_CONFIG,
3444 .flags = GENL_ADMIN_PERM,
3445 .policy = ip_vs_cmd_policy,
3446 .doit = ip_vs_genl_set_cmd,
3447 },
3448 {
3449 .cmd = IPVS_CMD_GET_CONFIG,
3450 .flags = GENL_ADMIN_PERM,
3451 .doit = ip_vs_genl_get_cmd,
3452 },
3453 {
3454 .cmd = IPVS_CMD_GET_INFO,
3455 .flags = GENL_ADMIN_PERM,
3456 .doit = ip_vs_genl_get_cmd,
3457 },
3458 {
3459 .cmd = IPVS_CMD_ZERO,
3460 .flags = GENL_ADMIN_PERM,
3461 .policy = ip_vs_cmd_policy,
3462 .doit = ip_vs_genl_set_cmd,
3463 },
3464 {
3465 .cmd = IPVS_CMD_FLUSH,
3466 .flags = GENL_ADMIN_PERM,
3467 .doit = ip_vs_genl_set_cmd,
3468 },
3469};
3470
3471static int __init ip_vs_genl_register(void)
3472{
Michał Mirosław8f698d52009-05-21 10:34:05 +00003473 return genl_register_family_with_ops(&ip_vs_genl_family,
3474 ip_vs_genl_ops, ARRAY_SIZE(ip_vs_genl_ops));
Julius Volz9a812192008-08-14 14:08:44 +02003475}
3476
3477static void ip_vs_genl_unregister(void)
3478{
3479 genl_unregister_family(&ip_vs_genl_family);
3480}
3481
3482/* End of Generic Netlink interface definitions */
3483
Hans Schillstrom61b1ab42011-01-03 14:44:42 +01003484/*
3485 * per netns intit/exit func.
3486 */
3487int __net_init __ip_vs_control_init(struct net *net)
3488{
Hans Schillstromfc723252011-01-03 14:44:43 +01003489 int idx;
3490 struct netns_ipvs *ipvs = net_ipvs(net);
Hans Schillstroma0840e22011-01-03 14:44:58 +01003491 struct ctl_table *tbl;
Hans Schillstromfc723252011-01-03 14:44:43 +01003492
Hans Schillstrom61b1ab42011-01-03 14:44:42 +01003493 if (!net_eq(net, &init_net)) /* netns not enabled yet */
3494 return -EPERM;
Hans Schillstroma0840e22011-01-03 14:44:58 +01003495
3496 atomic_set(&ipvs->dropentry, 0);
3497 spin_lock_init(&ipvs->dropentry_lock);
3498 spin_lock_init(&ipvs->droppacket_lock);
3499 spin_lock_init(&ipvs->securetcp_lock);
3500 ipvs->rs_lock = __RW_LOCK_UNLOCKED(ipvs->rs_lock);
3501
3502 /* Initialize rs_table */
3503 for (idx = 0; idx < IP_VS_RTAB_SIZE; idx++)
3504 INIT_LIST_HEAD(&ipvs->rs_table[idx]);
3505
Hans Schillstromb17fc992011-01-03 14:44:56 +01003506 /* procfs stats */
3507 ipvs->tot_stats = kzalloc(sizeof(struct ip_vs_stats), GFP_KERNEL);
3508 if (ipvs->tot_stats == NULL) {
3509 pr_err("%s(): no memory.\n", __func__);
3510 return -ENOMEM;
3511 }
3512 ipvs->cpustats = alloc_percpu(struct ip_vs_cpu_stats);
3513 if (!ipvs->cpustats) {
3514 pr_err("%s() alloc_percpu failed\n", __func__);
3515 goto err_alloc;
3516 }
3517 spin_lock_init(&ipvs->tot_stats->lock);
Hans Schillstrom61b1ab42011-01-03 14:44:42 +01003518
Hans Schillstromfc723252011-01-03 14:44:43 +01003519 for (idx = 0; idx < IP_VS_RTAB_SIZE; idx++)
3520 INIT_LIST_HEAD(&ipvs->rs_table[idx]);
3521
Hans Schillstrom61b1ab42011-01-03 14:44:42 +01003522 proc_net_fops_create(net, "ip_vs", 0, &ip_vs_info_fops);
3523 proc_net_fops_create(net, "ip_vs_stats", 0, &ip_vs_stats_fops);
Hans Schillstromb17fc992011-01-03 14:44:56 +01003524 proc_net_fops_create(net, "ip_vs_stats_percpu", 0,
3525 &ip_vs_stats_percpu_fops);
Hans Schillstroma0840e22011-01-03 14:44:58 +01003526
3527 if (!net_eq(net, &init_net)) {
3528 tbl = kmemdup(vs_vars, sizeof(vs_vars), GFP_KERNEL);
3529 if (tbl == NULL)
3530 goto err_dup;
3531 } else
3532 tbl = vs_vars;
3533 /* Initialize sysctl defaults */
3534 idx = 0;
3535 ipvs->sysctl_amemthresh = 1024;
3536 tbl[idx++].data = &ipvs->sysctl_amemthresh;
3537 ipvs->sysctl_am_droprate = 10;
3538 tbl[idx++].data = &ipvs->sysctl_am_droprate;
3539 tbl[idx++].data = &ipvs->sysctl_drop_entry;
3540 tbl[idx++].data = &ipvs->sysctl_drop_packet;
3541#ifdef CONFIG_IP_VS_NFCT
3542 tbl[idx++].data = &ipvs->sysctl_conntrack;
3543#endif
3544 tbl[idx++].data = &ipvs->sysctl_secure_tcp;
3545 ipvs->sysctl_snat_reroute = 1;
3546 tbl[idx++].data = &ipvs->sysctl_snat_reroute;
3547 ipvs->sysctl_sync_ver = 1;
3548 tbl[idx++].data = &ipvs->sysctl_sync_ver;
3549 tbl[idx++].data = &ipvs->sysctl_cache_bypass;
3550 tbl[idx++].data = &ipvs->sysctl_expire_nodest_conn;
3551 tbl[idx++].data = &ipvs->sysctl_expire_quiescent_template;
3552 ipvs->sysctl_sync_threshold[0] = 3;
3553 ipvs->sysctl_sync_threshold[1] = 50;
3554 tbl[idx].data = &ipvs->sysctl_sync_threshold;
3555 tbl[idx++].maxlen = sizeof(ipvs->sysctl_sync_threshold);
3556 tbl[idx++].data = &ipvs->sysctl_nat_icmp_send;
3557
3558
3559 ipvs->sysctl_hdr = register_net_sysctl_table(net, net_vs_ctl_path,
Hans Schillstrom61b1ab42011-01-03 14:44:42 +01003560 vs_vars);
Hans Schillstroma0840e22011-01-03 14:44:58 +01003561 if (ipvs->sysctl_hdr == NULL)
Hans Schillstrom61b1ab42011-01-03 14:44:42 +01003562 goto err_reg;
Hans Schillstromb17fc992011-01-03 14:44:56 +01003563 ip_vs_new_estimator(net, ipvs->tot_stats);
Hans Schillstroma0840e22011-01-03 14:44:58 +01003564 ipvs->sysctl_tbl = tbl;
Hans Schillstromf6340ee2011-01-03 14:44:59 +01003565 /* Schedule defense work */
3566 INIT_DELAYED_WORK(&ipvs->defense_work, defense_work_handler);
3567 schedule_delayed_work(&ipvs->defense_work, DEFENSE_TIMER_PERIOD);
Hans Schillstrom61b1ab42011-01-03 14:44:42 +01003568 return 0;
3569
3570err_reg:
Hans Schillstroma0840e22011-01-03 14:44:58 +01003571 if (!net_eq(net, &init_net))
3572 kfree(tbl);
3573err_dup:
Hans Schillstromb17fc992011-01-03 14:44:56 +01003574 free_percpu(ipvs->cpustats);
3575err_alloc:
3576 kfree(ipvs->tot_stats);
Hans Schillstrom61b1ab42011-01-03 14:44:42 +01003577 return -ENOMEM;
3578}
3579
3580static void __net_exit __ip_vs_control_cleanup(struct net *net)
3581{
Hans Schillstromb17fc992011-01-03 14:44:56 +01003582 struct netns_ipvs *ipvs = net_ipvs(net);
3583
Hans Schillstrom61b1ab42011-01-03 14:44:42 +01003584 if (!net_eq(net, &init_net)) /* netns not enabled yet */
3585 return;
3586
Hans Schillstromb17fc992011-01-03 14:44:56 +01003587 ip_vs_kill_estimator(net, ipvs->tot_stats);
Hans Schillstroma0840e22011-01-03 14:44:58 +01003588 unregister_net_sysctl_table(ipvs->sysctl_hdr);
Hans Schillstromb17fc992011-01-03 14:44:56 +01003589 proc_net_remove(net, "ip_vs_stats_percpu");
Hans Schillstrom61b1ab42011-01-03 14:44:42 +01003590 proc_net_remove(net, "ip_vs_stats");
3591 proc_net_remove(net, "ip_vs");
Hans Schillstromf6340ee2011-01-03 14:44:59 +01003592 cancel_delayed_work_sync(&ipvs->defense_work);
3593 cancel_work_sync(&ipvs->defense_work.work);
Hans Schillstromb17fc992011-01-03 14:44:56 +01003594 free_percpu(ipvs->cpustats);
3595 kfree(ipvs->tot_stats);
Hans Schillstrom61b1ab42011-01-03 14:44:42 +01003596}
3597
3598static struct pernet_operations ipvs_control_ops = {
3599 .init = __ip_vs_control_init,
3600 .exit = __ip_vs_control_cleanup,
3601};
Linus Torvalds1da177e2005-04-16 15:20:36 -07003602
Sven Wegener048cf482008-08-10 18:24:35 +00003603int __init ip_vs_control_init(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003604{
Linus Torvalds1da177e2005-04-16 15:20:36 -07003605 int idx;
Hans Schillstromfc723252011-01-03 14:44:43 +01003606 int ret;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003607
3608 EnterFunction(2);
3609
Hans Schillstromfc723252011-01-03 14:44:43 +01003610 /* Initialize svc_table, ip_vs_svc_fwm_table, rs_table */
Eduardo Blancod86bef72010-10-19 10:26:47 +01003611 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
3612 INIT_LIST_HEAD(&ip_vs_svc_table[idx]);
3613 INIT_LIST_HEAD(&ip_vs_svc_fwm_table[idx]);
3614 }
Hans Schillstromfc723252011-01-03 14:44:43 +01003615
3616 ret = register_pernet_subsys(&ipvs_control_ops);
3617 if (ret) {
3618 pr_err("cannot register namespace.\n");
3619 goto err;
Eduardo Blancod86bef72010-10-19 10:26:47 +01003620 }
Hans Schillstromfc723252011-01-03 14:44:43 +01003621
3622 smp_wmb(); /* Do we really need it now ? */
Eduardo Blancod86bef72010-10-19 10:26:47 +01003623
Linus Torvalds1da177e2005-04-16 15:20:36 -07003624 ret = nf_register_sockopt(&ip_vs_sockopts);
3625 if (ret) {
Hannes Eder1e3e2382009-08-02 11:05:41 +00003626 pr_err("cannot register sockopt.\n");
Hans Schillstromfc723252011-01-03 14:44:43 +01003627 goto err_net;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003628 }
3629
Julius Volz9a812192008-08-14 14:08:44 +02003630 ret = ip_vs_genl_register();
3631 if (ret) {
Hannes Eder1e3e2382009-08-02 11:05:41 +00003632 pr_err("cannot register Generic Netlink interface.\n");
Julius Volz9a812192008-08-14 14:08:44 +02003633 nf_unregister_sockopt(&ip_vs_sockopts);
Hans Schillstromfc723252011-01-03 14:44:43 +01003634 goto err_net;
Julius Volz9a812192008-08-14 14:08:44 +02003635 }
3636
Linus Torvalds1da177e2005-04-16 15:20:36 -07003637 LeaveFunction(2);
3638 return 0;
Hans Schillstromfc723252011-01-03 14:44:43 +01003639
3640err_net:
3641 unregister_pernet_subsys(&ipvs_control_ops);
3642err:
3643 return ret;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003644}
3645
3646
3647void ip_vs_control_cleanup(void)
3648{
3649 EnterFunction(2);
3650 ip_vs_trash_cleanup();
Hans Schillstrom61b1ab42011-01-03 14:44:42 +01003651 unregister_pernet_subsys(&ipvs_control_ops);
Julius Volz9a812192008-08-14 14:08:44 +02003652 ip_vs_genl_unregister();
Linus Torvalds1da177e2005-04-16 15:20:36 -07003653 nf_unregister_sockopt(&ip_vs_sockopts);
3654 LeaveFunction(2);
3655}