blob: ae47090bf45fe1fdfbf99192950aacb951cb1af1 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * IPVS An implementation of the IP virtual server support for the
3 * LINUX operating system. IPVS is now implemented as a module
4 * over the NetFilter framework. IPVS can be used to build a
5 * high-performance and highly available server based on a
6 * cluster of servers.
7 *
Linus Torvalds1da177e2005-04-16 15:20:36 -07008 * Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
9 * Peter Kese <peter.kese@ijs.si>
10 * Julian Anastasov <ja@ssi.bg>
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version
15 * 2 of the License, or (at your option) any later version.
16 *
17 * Changes:
18 *
19 */
20
Hannes Eder9aada7a2009-07-30 14:29:44 -070021#define KMSG_COMPONENT "IPVS"
22#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
23
Linus Torvalds1da177e2005-04-16 15:20:36 -070024#include <linux/module.h>
25#include <linux/init.h>
26#include <linux/types.h>
Randy Dunlap4fc268d2006-01-11 12:17:47 -080027#include <linux/capability.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070028#include <linux/fs.h>
29#include <linux/sysctl.h>
30#include <linux/proc_fs.h>
31#include <linux/workqueue.h>
32#include <linux/swap.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070033#include <linux/seq_file.h>
Tejun Heo5a0e3ad2010-03-24 17:04:11 +090034#include <linux/slab.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070035
36#include <linux/netfilter.h>
37#include <linux/netfilter_ipv4.h>
Ingo Molnar14cc3e22006-03-26 01:37:14 -080038#include <linux/mutex.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070039
Eric W. Biederman457c4cb2007-09-12 12:01:34 +020040#include <net/net_namespace.h>
Hans Schillstrom93304192011-01-03 14:44:51 +010041#include <linux/nsproxy.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070042#include <net/ip.h>
Vince Busam09571c72008-09-02 15:55:52 +020043#ifdef CONFIG_IP_VS_IPV6
44#include <net/ipv6.h>
45#include <net/ip6_route.h>
46#endif
Arnaldo Carvalho de Melo14c85022005-12-27 02:43:12 -020047#include <net/route.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070048#include <net/sock.h>
Julius Volz9a812192008-08-14 14:08:44 +020049#include <net/genetlink.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070050
51#include <asm/uaccess.h>
52
53#include <net/ip_vs.h>
54
55/* semaphore for IPVS sockopts. And, [gs]etsockopt may sleep. */
Ingo Molnar14cc3e22006-03-26 01:37:14 -080056static DEFINE_MUTEX(__ip_vs_mutex);
Linus Torvalds1da177e2005-04-16 15:20:36 -070057
58/* lock for service table */
59static DEFINE_RWLOCK(__ip_vs_svc_lock);
60
Linus Torvalds1da177e2005-04-16 15:20:36 -070061/* sysctl variables */
Linus Torvalds1da177e2005-04-16 15:20:36 -070062
63#ifdef CONFIG_IP_VS_DEBUG
64static int sysctl_ip_vs_debug_level = 0;
65
66int ip_vs_get_debug_level(void)
67{
68 return sysctl_ip_vs_debug_level;
69}
70#endif
71
Vince Busam09571c72008-09-02 15:55:52 +020072#ifdef CONFIG_IP_VS_IPV6
73/* Taken from rt6_fill_node() in net/ipv6/route.c, is there a better way? */
Hans Schillstrom4a984802011-01-03 14:45:02 +010074static int __ip_vs_addr_is_local_v6(struct net *net,
75 const struct in6_addr *addr)
Vince Busam09571c72008-09-02 15:55:52 +020076{
77 struct rt6_info *rt;
David S. Miller4c9483b2011-03-12 16:22:43 -050078 struct flowi6 fl6 = {
79 .daddr = *addr,
Vince Busam09571c72008-09-02 15:55:52 +020080 };
81
David S. Miller4c9483b2011-03-12 16:22:43 -050082 rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
Vince Busam09571c72008-09-02 15:55:52 +020083 if (rt && rt->rt6i_dev && (rt->rt6i_dev->flags & IFF_LOOPBACK))
David S. Miller4c9483b2011-03-12 16:22:43 -050084 return 1;
Vince Busam09571c72008-09-02 15:55:52 +020085
86 return 0;
87}
88#endif
Simon Horman14e40542011-02-04 18:33:02 +090089
90#ifdef CONFIG_SYSCTL
Linus Torvalds1da177e2005-04-16 15:20:36 -070091/*
Julian Anastasovaf9debd2005-07-11 20:59:57 -070092 * update_defense_level is called from keventd and from sysctl,
93 * so it needs to protect itself from softirqs
Linus Torvalds1da177e2005-04-16 15:20:36 -070094 */
Hans Schillstrom93304192011-01-03 14:44:51 +010095static void update_defense_level(struct netns_ipvs *ipvs)
Linus Torvalds1da177e2005-04-16 15:20:36 -070096{
97 struct sysinfo i;
98 static int old_secure_tcp = 0;
99 int availmem;
100 int nomem;
101 int to_change = -1;
102
103 /* we only count free and buffered memory (in pages) */
104 si_meminfo(&i);
105 availmem = i.freeram + i.bufferram;
106 /* however in linux 2.5 the i.bufferram is total page cache size,
107 we need adjust it */
108 /* si_swapinfo(&i); */
109 /* availmem = availmem - (i.totalswap - i.freeswap); */
110
Hans Schillstroma0840e22011-01-03 14:44:58 +0100111 nomem = (availmem < ipvs->sysctl_amemthresh);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700112
Julian Anastasovaf9debd2005-07-11 20:59:57 -0700113 local_bh_disable();
114
Linus Torvalds1da177e2005-04-16 15:20:36 -0700115 /* drop_entry */
Hans Schillstroma0840e22011-01-03 14:44:58 +0100116 spin_lock(&ipvs->dropentry_lock);
117 switch (ipvs->sysctl_drop_entry) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700118 case 0:
Hans Schillstroma0840e22011-01-03 14:44:58 +0100119 atomic_set(&ipvs->dropentry, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700120 break;
121 case 1:
122 if (nomem) {
Hans Schillstroma0840e22011-01-03 14:44:58 +0100123 atomic_set(&ipvs->dropentry, 1);
124 ipvs->sysctl_drop_entry = 2;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700125 } else {
Hans Schillstroma0840e22011-01-03 14:44:58 +0100126 atomic_set(&ipvs->dropentry, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700127 }
128 break;
129 case 2:
130 if (nomem) {
Hans Schillstroma0840e22011-01-03 14:44:58 +0100131 atomic_set(&ipvs->dropentry, 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700132 } else {
Hans Schillstroma0840e22011-01-03 14:44:58 +0100133 atomic_set(&ipvs->dropentry, 0);
134 ipvs->sysctl_drop_entry = 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700135 };
136 break;
137 case 3:
Hans Schillstroma0840e22011-01-03 14:44:58 +0100138 atomic_set(&ipvs->dropentry, 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700139 break;
140 }
Hans Schillstroma0840e22011-01-03 14:44:58 +0100141 spin_unlock(&ipvs->dropentry_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700142
143 /* drop_packet */
Hans Schillstroma0840e22011-01-03 14:44:58 +0100144 spin_lock(&ipvs->droppacket_lock);
145 switch (ipvs->sysctl_drop_packet) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700146 case 0:
Hans Schillstroma0840e22011-01-03 14:44:58 +0100147 ipvs->drop_rate = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700148 break;
149 case 1:
150 if (nomem) {
Hans Schillstroma0840e22011-01-03 14:44:58 +0100151 ipvs->drop_rate = ipvs->drop_counter
152 = ipvs->sysctl_amemthresh /
153 (ipvs->sysctl_amemthresh-availmem);
154 ipvs->sysctl_drop_packet = 2;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700155 } else {
Hans Schillstroma0840e22011-01-03 14:44:58 +0100156 ipvs->drop_rate = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700157 }
158 break;
159 case 2:
160 if (nomem) {
Hans Schillstroma0840e22011-01-03 14:44:58 +0100161 ipvs->drop_rate = ipvs->drop_counter
162 = ipvs->sysctl_amemthresh /
163 (ipvs->sysctl_amemthresh-availmem);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700164 } else {
Hans Schillstroma0840e22011-01-03 14:44:58 +0100165 ipvs->drop_rate = 0;
166 ipvs->sysctl_drop_packet = 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700167 }
168 break;
169 case 3:
Hans Schillstroma0840e22011-01-03 14:44:58 +0100170 ipvs->drop_rate = ipvs->sysctl_am_droprate;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700171 break;
172 }
Hans Schillstroma0840e22011-01-03 14:44:58 +0100173 spin_unlock(&ipvs->droppacket_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700174
175 /* secure_tcp */
Hans Schillstroma0840e22011-01-03 14:44:58 +0100176 spin_lock(&ipvs->securetcp_lock);
177 switch (ipvs->sysctl_secure_tcp) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700178 case 0:
179 if (old_secure_tcp >= 2)
180 to_change = 0;
181 break;
182 case 1:
183 if (nomem) {
184 if (old_secure_tcp < 2)
185 to_change = 1;
Hans Schillstroma0840e22011-01-03 14:44:58 +0100186 ipvs->sysctl_secure_tcp = 2;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700187 } else {
188 if (old_secure_tcp >= 2)
189 to_change = 0;
190 }
191 break;
192 case 2:
193 if (nomem) {
194 if (old_secure_tcp < 2)
195 to_change = 1;
196 } else {
197 if (old_secure_tcp >= 2)
198 to_change = 0;
Hans Schillstroma0840e22011-01-03 14:44:58 +0100199 ipvs->sysctl_secure_tcp = 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700200 }
201 break;
202 case 3:
203 if (old_secure_tcp < 2)
204 to_change = 1;
205 break;
206 }
Hans Schillstroma0840e22011-01-03 14:44:58 +0100207 old_secure_tcp = ipvs->sysctl_secure_tcp;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700208 if (to_change >= 0)
Hans Schillstrom93304192011-01-03 14:44:51 +0100209 ip_vs_protocol_timeout_change(ipvs,
Hans Schillstroma0840e22011-01-03 14:44:58 +0100210 ipvs->sysctl_secure_tcp > 1);
211 spin_unlock(&ipvs->securetcp_lock);
Julian Anastasovaf9debd2005-07-11 20:59:57 -0700212
213 local_bh_enable();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700214}
215
216
217/*
218 * Timer for checking the defense
219 */
220#define DEFENSE_TIMER_PERIOD 1*HZ
Linus Torvalds1da177e2005-04-16 15:20:36 -0700221
David Howellsc4028952006-11-22 14:57:56 +0000222static void defense_work_handler(struct work_struct *work)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700223{
Hans Schillstromf6340ee2011-01-03 14:44:59 +0100224 struct netns_ipvs *ipvs =
225 container_of(work, struct netns_ipvs, defense_work.work);
Hans Schillstrom93304192011-01-03 14:44:51 +0100226
227 update_defense_level(ipvs);
Hans Schillstroma0840e22011-01-03 14:44:58 +0100228 if (atomic_read(&ipvs->dropentry))
Hans Schillstromf6340ee2011-01-03 14:44:59 +0100229 ip_vs_random_dropentry(ipvs->net);
230 schedule_delayed_work(&ipvs->defense_work, DEFENSE_TIMER_PERIOD);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700231}
Simon Horman14e40542011-02-04 18:33:02 +0900232#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -0700233
234int
235ip_vs_use_count_inc(void)
236{
237 return try_module_get(THIS_MODULE);
238}
239
240void
241ip_vs_use_count_dec(void)
242{
243 module_put(THIS_MODULE);
244}
245
246
247/*
248 * Hash table: for virtual service lookups
249 */
250#define IP_VS_SVC_TAB_BITS 8
251#define IP_VS_SVC_TAB_SIZE (1 << IP_VS_SVC_TAB_BITS)
252#define IP_VS_SVC_TAB_MASK (IP_VS_SVC_TAB_SIZE - 1)
253
254/* the service table hashed by <protocol, addr, port> */
255static struct list_head ip_vs_svc_table[IP_VS_SVC_TAB_SIZE];
256/* the service table hashed by fwmark */
257static struct list_head ip_vs_svc_fwm_table[IP_VS_SVC_TAB_SIZE];
258
Linus Torvalds1da177e2005-04-16 15:20:36 -0700259
260/*
261 * Returns hash value for virtual service
262 */
Hans Schillstromfc723252011-01-03 14:44:43 +0100263static inline unsigned
264ip_vs_svc_hashkey(struct net *net, int af, unsigned proto,
265 const union nf_inet_addr *addr, __be16 port)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700266{
267 register unsigned porth = ntohs(port);
Julius Volzb18610d2008-09-02 15:55:37 +0200268 __be32 addr_fold = addr->ip;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700269
Julius Volzb18610d2008-09-02 15:55:37 +0200270#ifdef CONFIG_IP_VS_IPV6
271 if (af == AF_INET6)
272 addr_fold = addr->ip6[0]^addr->ip6[1]^
273 addr->ip6[2]^addr->ip6[3];
274#endif
Hans Schillstromfc723252011-01-03 14:44:43 +0100275 addr_fold ^= ((size_t)net>>8);
Julius Volzb18610d2008-09-02 15:55:37 +0200276
277 return (proto^ntohl(addr_fold)^(porth>>IP_VS_SVC_TAB_BITS)^porth)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700278 & IP_VS_SVC_TAB_MASK;
279}
280
281/*
282 * Returns hash value of fwmark for virtual service lookup
283 */
Hans Schillstromfc723252011-01-03 14:44:43 +0100284static inline unsigned ip_vs_svc_fwm_hashkey(struct net *net, __u32 fwmark)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700285{
Hans Schillstromfc723252011-01-03 14:44:43 +0100286 return (((size_t)net>>8) ^ fwmark) & IP_VS_SVC_TAB_MASK;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700287}
288
289/*
Hans Schillstromfc723252011-01-03 14:44:43 +0100290 * Hashes a service in the ip_vs_svc_table by <netns,proto,addr,port>
Linus Torvalds1da177e2005-04-16 15:20:36 -0700291 * or in the ip_vs_svc_fwm_table by fwmark.
292 * Should be called with locked tables.
293 */
294static int ip_vs_svc_hash(struct ip_vs_service *svc)
295{
296 unsigned hash;
297
298 if (svc->flags & IP_VS_SVC_F_HASHED) {
Hannes Eder1e3e2382009-08-02 11:05:41 +0000299 pr_err("%s(): request for already hashed, called from %pF\n",
300 __func__, __builtin_return_address(0));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700301 return 0;
302 }
303
304 if (svc->fwmark == 0) {
305 /*
Hans Schillstromfc723252011-01-03 14:44:43 +0100306 * Hash it by <netns,protocol,addr,port> in ip_vs_svc_table
Linus Torvalds1da177e2005-04-16 15:20:36 -0700307 */
Hans Schillstromfc723252011-01-03 14:44:43 +0100308 hash = ip_vs_svc_hashkey(svc->net, svc->af, svc->protocol,
309 &svc->addr, svc->port);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700310 list_add(&svc->s_list, &ip_vs_svc_table[hash]);
311 } else {
312 /*
Hans Schillstromfc723252011-01-03 14:44:43 +0100313 * Hash it by fwmark in svc_fwm_table
Linus Torvalds1da177e2005-04-16 15:20:36 -0700314 */
Hans Schillstromfc723252011-01-03 14:44:43 +0100315 hash = ip_vs_svc_fwm_hashkey(svc->net, svc->fwmark);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700316 list_add(&svc->f_list, &ip_vs_svc_fwm_table[hash]);
317 }
318
319 svc->flags |= IP_VS_SVC_F_HASHED;
320 /* increase its refcnt because it is referenced by the svc table */
321 atomic_inc(&svc->refcnt);
322 return 1;
323}
324
325
326/*
Hans Schillstromfc723252011-01-03 14:44:43 +0100327 * Unhashes a service from svc_table / svc_fwm_table.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700328 * Should be called with locked tables.
329 */
330static int ip_vs_svc_unhash(struct ip_vs_service *svc)
331{
332 if (!(svc->flags & IP_VS_SVC_F_HASHED)) {
Hannes Eder1e3e2382009-08-02 11:05:41 +0000333 pr_err("%s(): request for unhash flagged, called from %pF\n",
334 __func__, __builtin_return_address(0));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700335 return 0;
336 }
337
338 if (svc->fwmark == 0) {
Hans Schillstromfc723252011-01-03 14:44:43 +0100339 /* Remove it from the svc_table table */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700340 list_del(&svc->s_list);
341 } else {
Hans Schillstromfc723252011-01-03 14:44:43 +0100342 /* Remove it from the svc_fwm_table table */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700343 list_del(&svc->f_list);
344 }
345
346 svc->flags &= ~IP_VS_SVC_F_HASHED;
347 atomic_dec(&svc->refcnt);
348 return 1;
349}
350
351
352/*
Hans Schillstromfc723252011-01-03 14:44:43 +0100353 * Get service by {netns, proto,addr,port} in the service table.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700354 */
Julius Volzb18610d2008-09-02 15:55:37 +0200355static inline struct ip_vs_service *
Hans Schillstromfc723252011-01-03 14:44:43 +0100356__ip_vs_service_find(struct net *net, int af, __u16 protocol,
357 const union nf_inet_addr *vaddr, __be16 vport)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700358{
359 unsigned hash;
360 struct ip_vs_service *svc;
361
362 /* Check for "full" addressed entries */
Hans Schillstromfc723252011-01-03 14:44:43 +0100363 hash = ip_vs_svc_hashkey(net, af, protocol, vaddr, vport);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700364
365 list_for_each_entry(svc, &ip_vs_svc_table[hash], s_list){
Julius Volzb18610d2008-09-02 15:55:37 +0200366 if ((svc->af == af)
367 && ip_vs_addr_equal(af, &svc->addr, vaddr)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700368 && (svc->port == vport)
Hans Schillstromfc723252011-01-03 14:44:43 +0100369 && (svc->protocol == protocol)
370 && net_eq(svc->net, net)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700371 /* HIT */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700372 return svc;
373 }
374 }
375
376 return NULL;
377}
378
379
380/*
381 * Get service by {fwmark} in the service table.
382 */
Julius Volzb18610d2008-09-02 15:55:37 +0200383static inline struct ip_vs_service *
Hans Schillstromfc723252011-01-03 14:44:43 +0100384__ip_vs_svc_fwm_find(struct net *net, int af, __u32 fwmark)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700385{
386 unsigned hash;
387 struct ip_vs_service *svc;
388
389 /* Check for fwmark addressed entries */
Hans Schillstromfc723252011-01-03 14:44:43 +0100390 hash = ip_vs_svc_fwm_hashkey(net, fwmark);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700391
392 list_for_each_entry(svc, &ip_vs_svc_fwm_table[hash], f_list) {
Hans Schillstromfc723252011-01-03 14:44:43 +0100393 if (svc->fwmark == fwmark && svc->af == af
394 && net_eq(svc->net, net)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700395 /* HIT */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700396 return svc;
397 }
398 }
399
400 return NULL;
401}
402
403struct ip_vs_service *
Hans Schillstromfc723252011-01-03 14:44:43 +0100404ip_vs_service_get(struct net *net, int af, __u32 fwmark, __u16 protocol,
Julius Volz3c2e0502008-09-02 15:55:38 +0200405 const union nf_inet_addr *vaddr, __be16 vport)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700406{
407 struct ip_vs_service *svc;
Hans Schillstrom763f8d02011-01-03 14:45:01 +0100408 struct netns_ipvs *ipvs = net_ipvs(net);
Julius Volz3c2e0502008-09-02 15:55:38 +0200409
Linus Torvalds1da177e2005-04-16 15:20:36 -0700410 read_lock(&__ip_vs_svc_lock);
411
412 /*
413 * Check the table hashed by fwmark first
414 */
Julian Anastasov097fc762011-03-04 12:26:17 +0200415 if (fwmark) {
416 svc = __ip_vs_svc_fwm_find(net, af, fwmark);
417 if (svc)
418 goto out;
419 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700420
421 /*
422 * Check the table hashed by <protocol,addr,port>
423 * for "full" addressed entries
424 */
Hans Schillstromfc723252011-01-03 14:44:43 +0100425 svc = __ip_vs_service_find(net, af, protocol, vaddr, vport);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700426
427 if (svc == NULL
428 && protocol == IPPROTO_TCP
Hans Schillstrom763f8d02011-01-03 14:45:01 +0100429 && atomic_read(&ipvs->ftpsvc_counter)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700430 && (vport == FTPDATA || ntohs(vport) >= PROT_SOCK)) {
431 /*
432 * Check if ftp service entry exists, the packet
433 * might belong to FTP data connections.
434 */
Hans Schillstromfc723252011-01-03 14:44:43 +0100435 svc = __ip_vs_service_find(net, af, protocol, vaddr, FTPPORT);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700436 }
437
438 if (svc == NULL
Hans Schillstrom763f8d02011-01-03 14:45:01 +0100439 && atomic_read(&ipvs->nullsvc_counter)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700440 /*
441 * Check if the catch-all port (port zero) exists
442 */
Hans Schillstromfc723252011-01-03 14:44:43 +0100443 svc = __ip_vs_service_find(net, af, protocol, vaddr, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700444 }
445
446 out:
Julian Anastasov26c15cf2010-09-21 18:12:30 +0200447 if (svc)
448 atomic_inc(&svc->usecnt);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700449 read_unlock(&__ip_vs_svc_lock);
450
Julius Volz3c2e0502008-09-02 15:55:38 +0200451 IP_VS_DBG_BUF(9, "lookup service: fwm %u %s %s:%u %s\n",
452 fwmark, ip_vs_proto_name(protocol),
453 IP_VS_DBG_ADDR(af, vaddr), ntohs(vport),
454 svc ? "hit" : "not hit");
Linus Torvalds1da177e2005-04-16 15:20:36 -0700455
456 return svc;
457}
458
459
460static inline void
461__ip_vs_bind_svc(struct ip_vs_dest *dest, struct ip_vs_service *svc)
462{
463 atomic_inc(&svc->refcnt);
464 dest->svc = svc;
465}
466
Julian Anastasov26c15cf2010-09-21 18:12:30 +0200467static void
Linus Torvalds1da177e2005-04-16 15:20:36 -0700468__ip_vs_unbind_svc(struct ip_vs_dest *dest)
469{
470 struct ip_vs_service *svc = dest->svc;
471
472 dest->svc = NULL;
Julian Anastasov26c15cf2010-09-21 18:12:30 +0200473 if (atomic_dec_and_test(&svc->refcnt)) {
474 IP_VS_DBG_BUF(3, "Removing service %u/%s:%u usecnt=%d\n",
475 svc->fwmark,
476 IP_VS_DBG_ADDR(svc->af, &svc->addr),
477 ntohs(svc->port), atomic_read(&svc->usecnt));
Hans Schillstromb17fc992011-01-03 14:44:56 +0100478 free_percpu(svc->stats.cpustats);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700479 kfree(svc);
Julian Anastasov26c15cf2010-09-21 18:12:30 +0200480 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700481}
482
483
484/*
485 * Returns hash value for real service
486 */
Julius Volz7937df12008-09-02 15:55:48 +0200487static inline unsigned ip_vs_rs_hashkey(int af,
488 const union nf_inet_addr *addr,
489 __be16 port)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700490{
491 register unsigned porth = ntohs(port);
Julius Volz7937df12008-09-02 15:55:48 +0200492 __be32 addr_fold = addr->ip;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700493
Julius Volz7937df12008-09-02 15:55:48 +0200494#ifdef CONFIG_IP_VS_IPV6
495 if (af == AF_INET6)
496 addr_fold = addr->ip6[0]^addr->ip6[1]^
497 addr->ip6[2]^addr->ip6[3];
498#endif
499
500 return (ntohl(addr_fold)^(porth>>IP_VS_RTAB_BITS)^porth)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700501 & IP_VS_RTAB_MASK;
502}
503
504/*
Hans Schillstromfc723252011-01-03 14:44:43 +0100505 * Hashes ip_vs_dest in rs_table by <proto,addr,port>.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700506 * should be called with locked tables.
507 */
Hans Schillstromfc723252011-01-03 14:44:43 +0100508static int ip_vs_rs_hash(struct netns_ipvs *ipvs, struct ip_vs_dest *dest)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700509{
510 unsigned hash;
511
512 if (!list_empty(&dest->d_list)) {
513 return 0;
514 }
515
516 /*
517 * Hash by proto,addr,port,
518 * which are the parameters of the real service.
519 */
Julius Volz7937df12008-09-02 15:55:48 +0200520 hash = ip_vs_rs_hashkey(dest->af, &dest->addr, dest->port);
521
Hans Schillstromfc723252011-01-03 14:44:43 +0100522 list_add(&dest->d_list, &ipvs->rs_table[hash]);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700523
524 return 1;
525}
526
527/*
Hans Schillstromfc723252011-01-03 14:44:43 +0100528 * UNhashes ip_vs_dest from rs_table.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700529 * should be called with locked tables.
530 */
531static int ip_vs_rs_unhash(struct ip_vs_dest *dest)
532{
533 /*
Hans Schillstromfc723252011-01-03 14:44:43 +0100534 * Remove it from the rs_table table.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700535 */
536 if (!list_empty(&dest->d_list)) {
537 list_del(&dest->d_list);
538 INIT_LIST_HEAD(&dest->d_list);
539 }
540
541 return 1;
542}
543
544/*
545 * Lookup real service by <proto,addr,port> in the real service table.
546 */
547struct ip_vs_dest *
Hans Schillstromfc723252011-01-03 14:44:43 +0100548ip_vs_lookup_real_service(struct net *net, int af, __u16 protocol,
Julius Volz7937df12008-09-02 15:55:48 +0200549 const union nf_inet_addr *daddr,
550 __be16 dport)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700551{
Hans Schillstromfc723252011-01-03 14:44:43 +0100552 struct netns_ipvs *ipvs = net_ipvs(net);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700553 unsigned hash;
554 struct ip_vs_dest *dest;
555
556 /*
557 * Check for "full" addressed entries
558 * Return the first found entry
559 */
Julius Volz7937df12008-09-02 15:55:48 +0200560 hash = ip_vs_rs_hashkey(af, daddr, dport);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700561
Hans Schillstroma0840e22011-01-03 14:44:58 +0100562 read_lock(&ipvs->rs_lock);
Hans Schillstromfc723252011-01-03 14:44:43 +0100563 list_for_each_entry(dest, &ipvs->rs_table[hash], d_list) {
Julius Volz7937df12008-09-02 15:55:48 +0200564 if ((dest->af == af)
565 && ip_vs_addr_equal(af, &dest->addr, daddr)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700566 && (dest->port == dport)
567 && ((dest->protocol == protocol) ||
568 dest->vfwmark)) {
569 /* HIT */
Hans Schillstroma0840e22011-01-03 14:44:58 +0100570 read_unlock(&ipvs->rs_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700571 return dest;
572 }
573 }
Hans Schillstroma0840e22011-01-03 14:44:58 +0100574 read_unlock(&ipvs->rs_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700575
576 return NULL;
577}
578
579/*
580 * Lookup destination by {addr,port} in the given service
581 */
582static struct ip_vs_dest *
Julius Volz7937df12008-09-02 15:55:48 +0200583ip_vs_lookup_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
584 __be16 dport)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700585{
586 struct ip_vs_dest *dest;
587
588 /*
589 * Find the destination for the given service
590 */
591 list_for_each_entry(dest, &svc->destinations, n_list) {
Julius Volz7937df12008-09-02 15:55:48 +0200592 if ((dest->af == svc->af)
593 && ip_vs_addr_equal(svc->af, &dest->addr, daddr)
594 && (dest->port == dport)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700595 /* HIT */
596 return dest;
597 }
598 }
599
600 return NULL;
601}
602
Rumen G. Bogdanovski1e356f92007-11-07 02:35:54 -0800603/*
604 * Find destination by {daddr,dport,vaddr,protocol}
605 * Cretaed to be used in ip_vs_process_message() in
606 * the backup synchronization daemon. It finds the
607 * destination to be bound to the received connection
608 * on the backup.
609 *
610 * ip_vs_lookup_real_service() looked promissing, but
611 * seems not working as expected.
612 */
Hans Schillstromfc723252011-01-03 14:44:43 +0100613struct ip_vs_dest *ip_vs_find_dest(struct net *net, int af,
614 const union nf_inet_addr *daddr,
Julius Volz7937df12008-09-02 15:55:48 +0200615 __be16 dport,
616 const union nf_inet_addr *vaddr,
Hans Schillstrom0e051e62010-11-19 14:25:07 +0100617 __be16 vport, __u16 protocol, __u32 fwmark)
Rumen G. Bogdanovski1e356f92007-11-07 02:35:54 -0800618{
619 struct ip_vs_dest *dest;
620 struct ip_vs_service *svc;
621
Hans Schillstromfc723252011-01-03 14:44:43 +0100622 svc = ip_vs_service_get(net, af, fwmark, protocol, vaddr, vport);
Rumen G. Bogdanovski1e356f92007-11-07 02:35:54 -0800623 if (!svc)
624 return NULL;
625 dest = ip_vs_lookup_dest(svc, daddr, dport);
626 if (dest)
627 atomic_inc(&dest->refcnt);
628 ip_vs_service_put(svc);
629 return dest;
630}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700631
632/*
633 * Lookup dest by {svc,addr,port} in the destination trash.
634 * The destination trash is used to hold the destinations that are removed
635 * from the service table but are still referenced by some conn entries.
636 * The reason to add the destination trash is when the dest is temporary
637 * down (either by administrator or by monitor program), the dest can be
638 * picked back from the trash, the remaining connections to the dest can
639 * continue, and the counting information of the dest is also useful for
640 * scheduling.
641 */
642static struct ip_vs_dest *
Julius Volz7937df12008-09-02 15:55:48 +0200643ip_vs_trash_get_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
644 __be16 dport)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700645{
646 struct ip_vs_dest *dest, *nxt;
Hans Schillstromf2431e62011-01-03 14:45:00 +0100647 struct netns_ipvs *ipvs = net_ipvs(svc->net);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700648
649 /*
650 * Find the destination in trash
651 */
Hans Schillstromf2431e62011-01-03 14:45:00 +0100652 list_for_each_entry_safe(dest, nxt, &ipvs->dest_trash, n_list) {
Julius Volz7937df12008-09-02 15:55:48 +0200653 IP_VS_DBG_BUF(3, "Destination %u/%s:%u still in trash, "
654 "dest->refcnt=%d\n",
655 dest->vfwmark,
656 IP_VS_DBG_ADDR(svc->af, &dest->addr),
657 ntohs(dest->port),
658 atomic_read(&dest->refcnt));
659 if (dest->af == svc->af &&
660 ip_vs_addr_equal(svc->af, &dest->addr, daddr) &&
Linus Torvalds1da177e2005-04-16 15:20:36 -0700661 dest->port == dport &&
662 dest->vfwmark == svc->fwmark &&
663 dest->protocol == svc->protocol &&
664 (svc->fwmark ||
Julius Volz7937df12008-09-02 15:55:48 +0200665 (ip_vs_addr_equal(svc->af, &dest->vaddr, &svc->addr) &&
Linus Torvalds1da177e2005-04-16 15:20:36 -0700666 dest->vport == svc->port))) {
667 /* HIT */
668 return dest;
669 }
670
671 /*
672 * Try to purge the destination from trash if not referenced
673 */
674 if (atomic_read(&dest->refcnt) == 1) {
Julius Volz7937df12008-09-02 15:55:48 +0200675 IP_VS_DBG_BUF(3, "Removing destination %u/%s:%u "
676 "from trash\n",
677 dest->vfwmark,
678 IP_VS_DBG_ADDR(svc->af, &dest->addr),
679 ntohs(dest->port));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700680 list_del(&dest->n_list);
681 ip_vs_dst_reset(dest);
682 __ip_vs_unbind_svc(dest);
Hans Schillstromb17fc992011-01-03 14:44:56 +0100683 free_percpu(dest->stats.cpustats);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700684 kfree(dest);
685 }
686 }
687
688 return NULL;
689}
690
691
692/*
693 * Clean up all the destinations in the trash
694 * Called by the ip_vs_control_cleanup()
695 *
696 * When the ip_vs_control_clearup is activated by ipvs module exit,
697 * the service tables must have been flushed and all the connections
698 * are expired, and the refcnt of each destination in the trash must
699 * be 1, so we simply release them here.
700 */
Hans Schillstromf2431e62011-01-03 14:45:00 +0100701static void ip_vs_trash_cleanup(struct net *net)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700702{
703 struct ip_vs_dest *dest, *nxt;
Hans Schillstromf2431e62011-01-03 14:45:00 +0100704 struct netns_ipvs *ipvs = net_ipvs(net);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700705
Hans Schillstromf2431e62011-01-03 14:45:00 +0100706 list_for_each_entry_safe(dest, nxt, &ipvs->dest_trash, n_list) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700707 list_del(&dest->n_list);
708 ip_vs_dst_reset(dest);
709 __ip_vs_unbind_svc(dest);
Hans Schillstromb17fc992011-01-03 14:44:56 +0100710 free_percpu(dest->stats.cpustats);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700711 kfree(dest);
712 }
713}
714
Julian Anastasov55a3d4e2011-03-14 01:37:49 +0200715static void
716ip_vs_copy_stats(struct ip_vs_stats_user *dst, struct ip_vs_stats *src)
717{
718#define IP_VS_SHOW_STATS_COUNTER(c) dst->c = src->ustats.c - src->ustats0.c
Julian Anastasov55a3d4e2011-03-14 01:37:49 +0200719
720 spin_lock_bh(&src->lock);
721
722 IP_VS_SHOW_STATS_COUNTER(conns);
723 IP_VS_SHOW_STATS_COUNTER(inpkts);
724 IP_VS_SHOW_STATS_COUNTER(outpkts);
725 IP_VS_SHOW_STATS_COUNTER(inbytes);
726 IP_VS_SHOW_STATS_COUNTER(outbytes);
727
Julian Anastasovea9f22c2011-03-14 01:41:54 +0200728 ip_vs_read_estimator(dst, src);
Julian Anastasov55a3d4e2011-03-14 01:37:49 +0200729
730 spin_unlock_bh(&src->lock);
731}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700732
733static void
734ip_vs_zero_stats(struct ip_vs_stats *stats)
735{
736 spin_lock_bh(&stats->lock);
Simon Hormane93615d2008-08-11 17:19:14 +1000737
Julian Anastasov55a3d4e2011-03-14 01:37:49 +0200738 /* get current counters as zero point, rates are zeroed */
739
740#define IP_VS_ZERO_STATS_COUNTER(c) stats->ustats0.c = stats->ustats.c
Julian Anastasov55a3d4e2011-03-14 01:37:49 +0200741
742 IP_VS_ZERO_STATS_COUNTER(conns);
743 IP_VS_ZERO_STATS_COUNTER(inpkts);
744 IP_VS_ZERO_STATS_COUNTER(outpkts);
745 IP_VS_ZERO_STATS_COUNTER(inbytes);
746 IP_VS_ZERO_STATS_COUNTER(outbytes);
747
Linus Torvalds1da177e2005-04-16 15:20:36 -0700748 ip_vs_zero_estimator(stats);
Simon Hormane93615d2008-08-11 17:19:14 +1000749
Sven Wegener3a14a3132008-08-10 18:24:41 +0000750 spin_unlock_bh(&stats->lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700751}
752
753/*
754 * Update a destination in the given service
755 */
756static void
Julian Anastasov26c15cf2010-09-21 18:12:30 +0200757__ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest,
758 struct ip_vs_dest_user_kern *udest, int add)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700759{
Hans Schillstromfc723252011-01-03 14:44:43 +0100760 struct netns_ipvs *ipvs = net_ipvs(svc->net);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700761 int conn_flags;
762
763 /* set the weight and the flags */
764 atomic_set(&dest->weight, udest->weight);
Julian Anastasov35757922010-09-17 14:18:16 +0200765 conn_flags = udest->conn_flags & IP_VS_CONN_F_DEST_MASK;
766 conn_flags |= IP_VS_CONN_F_INACTIVE;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700767
Linus Torvalds1da177e2005-04-16 15:20:36 -0700768 /* set the IP_VS_CONN_F_NOOUTPUT flag if not masquerading/NAT */
Julian Anastasov35757922010-09-17 14:18:16 +0200769 if ((conn_flags & IP_VS_CONN_F_FWD_MASK) != IP_VS_CONN_F_MASQ) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700770 conn_flags |= IP_VS_CONN_F_NOOUTPUT;
771 } else {
772 /*
Hans Schillstromfc723252011-01-03 14:44:43 +0100773 * Put the real service in rs_table if not present.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700774 * For now only for NAT!
775 */
Hans Schillstroma0840e22011-01-03 14:44:58 +0100776 write_lock_bh(&ipvs->rs_lock);
Hans Schillstromfc723252011-01-03 14:44:43 +0100777 ip_vs_rs_hash(ipvs, dest);
Hans Schillstroma0840e22011-01-03 14:44:58 +0100778 write_unlock_bh(&ipvs->rs_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700779 }
780 atomic_set(&dest->conn_flags, conn_flags);
781
782 /* bind the service */
783 if (!dest->svc) {
784 __ip_vs_bind_svc(dest, svc);
785 } else {
786 if (dest->svc != svc) {
787 __ip_vs_unbind_svc(dest);
788 ip_vs_zero_stats(&dest->stats);
789 __ip_vs_bind_svc(dest, svc);
790 }
791 }
792
793 /* set the dest status flags */
794 dest->flags |= IP_VS_DEST_F_AVAILABLE;
795
796 if (udest->u_threshold == 0 || udest->u_threshold > dest->u_threshold)
797 dest->flags &= ~IP_VS_DEST_F_OVERLOAD;
798 dest->u_threshold = udest->u_threshold;
799 dest->l_threshold = udest->l_threshold;
Julian Anastasov26c15cf2010-09-21 18:12:30 +0200800
Julian Anastasovff75f402011-02-22 10:40:25 +0200801 spin_lock_bh(&dest->dst_lock);
Julian Anastasovfc604762010-10-17 16:38:15 +0300802 ip_vs_dst_reset(dest);
Julian Anastasovff75f402011-02-22 10:40:25 +0200803 spin_unlock_bh(&dest->dst_lock);
Julian Anastasovfc604762010-10-17 16:38:15 +0300804
Julian Anastasov26c15cf2010-09-21 18:12:30 +0200805 if (add)
Julian Anastasov6ef757f2011-03-14 01:44:28 +0200806 ip_vs_start_estimator(svc->net, &dest->stats);
Julian Anastasov26c15cf2010-09-21 18:12:30 +0200807
808 write_lock_bh(&__ip_vs_svc_lock);
809
810 /* Wait until all other svc users go away */
811 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
812
813 if (add) {
814 list_add(&dest->n_list, &svc->destinations);
815 svc->num_dests++;
816 }
817
818 /* call the update_service, because server weight may be changed */
819 if (svc->scheduler->update_service)
820 svc->scheduler->update_service(svc);
821
822 write_unlock_bh(&__ip_vs_svc_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700823}
824
825
826/*
827 * Create a destination for the given service
828 */
829static int
Julius Volzc860c6b2008-09-02 15:55:36 +0200830ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700831 struct ip_vs_dest **dest_p)
832{
833 struct ip_vs_dest *dest;
834 unsigned atype;
835
836 EnterFunction(2);
837
Vince Busam09571c72008-09-02 15:55:52 +0200838#ifdef CONFIG_IP_VS_IPV6
839 if (svc->af == AF_INET6) {
840 atype = ipv6_addr_type(&udest->addr.in6);
Sven Wegener3bfb92f2008-09-05 16:53:49 +0200841 if ((!(atype & IPV6_ADDR_UNICAST) ||
842 atype & IPV6_ADDR_LINKLOCAL) &&
Hans Schillstrom4a984802011-01-03 14:45:02 +0100843 !__ip_vs_addr_is_local_v6(svc->net, &udest->addr.in6))
Vince Busam09571c72008-09-02 15:55:52 +0200844 return -EINVAL;
845 } else
846#endif
847 {
Hans Schillstrom4a984802011-01-03 14:45:02 +0100848 atype = inet_addr_type(svc->net, udest->addr.ip);
Vince Busam09571c72008-09-02 15:55:52 +0200849 if (atype != RTN_LOCAL && atype != RTN_UNICAST)
850 return -EINVAL;
851 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700852
Simon Hormandee06e42010-08-26 02:54:31 +0000853 dest = kzalloc(sizeof(struct ip_vs_dest), GFP_KERNEL);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700854 if (dest == NULL) {
Hannes Eder1e3e2382009-08-02 11:05:41 +0000855 pr_err("%s(): no memory.\n", __func__);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700856 return -ENOMEM;
857 }
Hans Schillstromb17fc992011-01-03 14:44:56 +0100858 dest->stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats);
859 if (!dest->stats.cpustats) {
860 pr_err("%s() alloc_percpu failed\n", __func__);
861 goto err_alloc;
862 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700863
Julius Volzc860c6b2008-09-02 15:55:36 +0200864 dest->af = svc->af;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700865 dest->protocol = svc->protocol;
Julius Volzc860c6b2008-09-02 15:55:36 +0200866 dest->vaddr = svc->addr;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700867 dest->vport = svc->port;
868 dest->vfwmark = svc->fwmark;
Julius Volzc860c6b2008-09-02 15:55:36 +0200869 ip_vs_addr_copy(svc->af, &dest->addr, &udest->addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700870 dest->port = udest->port;
871
872 atomic_set(&dest->activeconns, 0);
873 atomic_set(&dest->inactconns, 0);
874 atomic_set(&dest->persistconns, 0);
Julian Anastasov26c15cf2010-09-21 18:12:30 +0200875 atomic_set(&dest->refcnt, 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700876
877 INIT_LIST_HEAD(&dest->d_list);
878 spin_lock_init(&dest->dst_lock);
879 spin_lock_init(&dest->stats.lock);
Julian Anastasov26c15cf2010-09-21 18:12:30 +0200880 __ip_vs_update_dest(svc, dest, udest, 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700881
882 *dest_p = dest;
883
884 LeaveFunction(2);
885 return 0;
Hans Schillstromb17fc992011-01-03 14:44:56 +0100886
887err_alloc:
888 kfree(dest);
889 return -ENOMEM;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700890}
891
892
893/*
894 * Add a destination into an existing service
895 */
896static int
Julius Volzc860c6b2008-09-02 15:55:36 +0200897ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700898{
899 struct ip_vs_dest *dest;
Julius Volzc860c6b2008-09-02 15:55:36 +0200900 union nf_inet_addr daddr;
Al Viro014d7302006-09-28 14:29:52 -0700901 __be16 dport = udest->port;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700902 int ret;
903
904 EnterFunction(2);
905
906 if (udest->weight < 0) {
Hannes Eder1e3e2382009-08-02 11:05:41 +0000907 pr_err("%s(): server weight less than zero\n", __func__);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700908 return -ERANGE;
909 }
910
911 if (udest->l_threshold > udest->u_threshold) {
Hannes Eder1e3e2382009-08-02 11:05:41 +0000912 pr_err("%s(): lower threshold is higher than upper threshold\n",
913 __func__);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700914 return -ERANGE;
915 }
916
Julius Volzc860c6b2008-09-02 15:55:36 +0200917 ip_vs_addr_copy(svc->af, &daddr, &udest->addr);
918
Linus Torvalds1da177e2005-04-16 15:20:36 -0700919 /*
920 * Check if the dest already exists in the list
921 */
Julius Volz7937df12008-09-02 15:55:48 +0200922 dest = ip_vs_lookup_dest(svc, &daddr, dport);
923
Linus Torvalds1da177e2005-04-16 15:20:36 -0700924 if (dest != NULL) {
Hannes Eder1e3e2382009-08-02 11:05:41 +0000925 IP_VS_DBG(1, "%s(): dest already exists\n", __func__);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700926 return -EEXIST;
927 }
928
929 /*
930 * Check if the dest already exists in the trash and
931 * is from the same service
932 */
Julius Volz7937df12008-09-02 15:55:48 +0200933 dest = ip_vs_trash_get_dest(svc, &daddr, dport);
934
Linus Torvalds1da177e2005-04-16 15:20:36 -0700935 if (dest != NULL) {
Julius Volzcfc78c52008-09-02 15:55:53 +0200936 IP_VS_DBG_BUF(3, "Get destination %s:%u from trash, "
937 "dest->refcnt=%d, service %u/%s:%u\n",
938 IP_VS_DBG_ADDR(svc->af, &daddr), ntohs(dport),
939 atomic_read(&dest->refcnt),
940 dest->vfwmark,
941 IP_VS_DBG_ADDR(svc->af, &dest->vaddr),
942 ntohs(dest->vport));
943
Linus Torvalds1da177e2005-04-16 15:20:36 -0700944 /*
945 * Get the destination from the trash
946 */
947 list_del(&dest->n_list);
948
Julian Anastasov26c15cf2010-09-21 18:12:30 +0200949 __ip_vs_update_dest(svc, dest, udest, 1);
950 ret = 0;
951 } else {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700952 /*
Julian Anastasov26c15cf2010-09-21 18:12:30 +0200953 * Allocate and initialize the dest structure
Linus Torvalds1da177e2005-04-16 15:20:36 -0700954 */
Julian Anastasov26c15cf2010-09-21 18:12:30 +0200955 ret = ip_vs_new_dest(svc, udest, &dest);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700956 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700957 LeaveFunction(2);
958
Julian Anastasov26c15cf2010-09-21 18:12:30 +0200959 return ret;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700960}
961
962
963/*
964 * Edit a destination in the given service
965 */
966static int
Julius Volzc860c6b2008-09-02 15:55:36 +0200967ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700968{
969 struct ip_vs_dest *dest;
Julius Volzc860c6b2008-09-02 15:55:36 +0200970 union nf_inet_addr daddr;
Al Viro014d7302006-09-28 14:29:52 -0700971 __be16 dport = udest->port;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700972
973 EnterFunction(2);
974
975 if (udest->weight < 0) {
Hannes Eder1e3e2382009-08-02 11:05:41 +0000976 pr_err("%s(): server weight less than zero\n", __func__);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700977 return -ERANGE;
978 }
979
980 if (udest->l_threshold > udest->u_threshold) {
Hannes Eder1e3e2382009-08-02 11:05:41 +0000981 pr_err("%s(): lower threshold is higher than upper threshold\n",
982 __func__);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700983 return -ERANGE;
984 }
985
Julius Volzc860c6b2008-09-02 15:55:36 +0200986 ip_vs_addr_copy(svc->af, &daddr, &udest->addr);
987
Linus Torvalds1da177e2005-04-16 15:20:36 -0700988 /*
989 * Lookup the destination list
990 */
Julius Volz7937df12008-09-02 15:55:48 +0200991 dest = ip_vs_lookup_dest(svc, &daddr, dport);
992
Linus Torvalds1da177e2005-04-16 15:20:36 -0700993 if (dest == NULL) {
Hannes Eder1e3e2382009-08-02 11:05:41 +0000994 IP_VS_DBG(1, "%s(): dest doesn't exist\n", __func__);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700995 return -ENOENT;
996 }
997
Julian Anastasov26c15cf2010-09-21 18:12:30 +0200998 __ip_vs_update_dest(svc, dest, udest, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700999 LeaveFunction(2);
1000
1001 return 0;
1002}
1003
1004
1005/*
1006 * Delete a destination (must be already unlinked from the service)
1007 */
Hans Schillstrom29c20262011-01-03 14:44:54 +01001008static void __ip_vs_del_dest(struct net *net, struct ip_vs_dest *dest)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001009{
Hans Schillstroma0840e22011-01-03 14:44:58 +01001010 struct netns_ipvs *ipvs = net_ipvs(net);
1011
Julian Anastasov6ef757f2011-03-14 01:44:28 +02001012 ip_vs_stop_estimator(net, &dest->stats);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001013
1014 /*
1015 * Remove it from the d-linked list with the real services.
1016 */
Hans Schillstroma0840e22011-01-03 14:44:58 +01001017 write_lock_bh(&ipvs->rs_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001018 ip_vs_rs_unhash(dest);
Hans Schillstroma0840e22011-01-03 14:44:58 +01001019 write_unlock_bh(&ipvs->rs_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001020
1021 /*
1022 * Decrease the refcnt of the dest, and free the dest
1023 * if nobody refers to it (refcnt=0). Otherwise, throw
1024 * the destination into the trash.
1025 */
1026 if (atomic_dec_and_test(&dest->refcnt)) {
Julian Anastasov26c15cf2010-09-21 18:12:30 +02001027 IP_VS_DBG_BUF(3, "Removing destination %u/%s:%u\n",
1028 dest->vfwmark,
1029 IP_VS_DBG_ADDR(dest->af, &dest->addr),
1030 ntohs(dest->port));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001031 ip_vs_dst_reset(dest);
1032 /* simply decrease svc->refcnt here, let the caller check
1033 and release the service if nobody refers to it.
1034 Only user context can release destination and service,
1035 and only one user context can update virtual service at a
1036 time, so the operation here is OK */
1037 atomic_dec(&dest->svc->refcnt);
Hans Schillstromb17fc992011-01-03 14:44:56 +01001038 free_percpu(dest->stats.cpustats);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001039 kfree(dest);
1040 } else {
Julius Volzcfc78c52008-09-02 15:55:53 +02001041 IP_VS_DBG_BUF(3, "Moving dest %s:%u into trash, "
1042 "dest->refcnt=%d\n",
1043 IP_VS_DBG_ADDR(dest->af, &dest->addr),
1044 ntohs(dest->port),
1045 atomic_read(&dest->refcnt));
Hans Schillstromf2431e62011-01-03 14:45:00 +01001046 list_add(&dest->n_list, &ipvs->dest_trash);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001047 atomic_inc(&dest->refcnt);
1048 }
1049}
1050
1051
1052/*
1053 * Unlink a destination from the given service
1054 */
1055static void __ip_vs_unlink_dest(struct ip_vs_service *svc,
1056 struct ip_vs_dest *dest,
1057 int svcupd)
1058{
1059 dest->flags &= ~IP_VS_DEST_F_AVAILABLE;
1060
1061 /*
1062 * Remove it from the d-linked destination list.
1063 */
1064 list_del(&dest->n_list);
1065 svc->num_dests--;
Sven Wegener82dfb6f2008-08-11 19:36:06 +00001066
1067 /*
1068 * Call the update_service function of its scheduler
1069 */
1070 if (svcupd && svc->scheduler->update_service)
1071 svc->scheduler->update_service(svc);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001072}
1073
1074
1075/*
1076 * Delete a destination server in the given service
1077 */
1078static int
Julius Volzc860c6b2008-09-02 15:55:36 +02001079ip_vs_del_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001080{
1081 struct ip_vs_dest *dest;
Al Viro014d7302006-09-28 14:29:52 -07001082 __be16 dport = udest->port;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001083
1084 EnterFunction(2);
1085
Julius Volz7937df12008-09-02 15:55:48 +02001086 dest = ip_vs_lookup_dest(svc, &udest->addr, dport);
Julius Volzc860c6b2008-09-02 15:55:36 +02001087
Linus Torvalds1da177e2005-04-16 15:20:36 -07001088 if (dest == NULL) {
Hannes Eder1e3e2382009-08-02 11:05:41 +00001089 IP_VS_DBG(1, "%s(): destination not found!\n", __func__);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001090 return -ENOENT;
1091 }
1092
1093 write_lock_bh(&__ip_vs_svc_lock);
1094
1095 /*
1096 * Wait until all other svc users go away.
1097 */
Julian Anastasov26c15cf2010-09-21 18:12:30 +02001098 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001099
1100 /*
1101 * Unlink dest from the service
1102 */
1103 __ip_vs_unlink_dest(svc, dest, 1);
1104
1105 write_unlock_bh(&__ip_vs_svc_lock);
1106
1107 /*
1108 * Delete the destination
1109 */
Hans Schillstroma0840e22011-01-03 14:44:58 +01001110 __ip_vs_del_dest(svc->net, dest);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001111
1112 LeaveFunction(2);
1113
1114 return 0;
1115}
1116
1117
1118/*
1119 * Add a service into the service hash table
1120 */
1121static int
Hans Schillstromfc723252011-01-03 14:44:43 +01001122ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u,
Julius Volzc860c6b2008-09-02 15:55:36 +02001123 struct ip_vs_service **svc_p)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001124{
1125 int ret = 0;
1126 struct ip_vs_scheduler *sched = NULL;
Simon Horman0d1e71b2010-08-22 21:37:54 +09001127 struct ip_vs_pe *pe = NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001128 struct ip_vs_service *svc = NULL;
Hans Schillstroma0840e22011-01-03 14:44:58 +01001129 struct netns_ipvs *ipvs = net_ipvs(net);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001130
1131 /* increase the module use count */
1132 ip_vs_use_count_inc();
1133
1134 /* Lookup the scheduler by 'u->sched_name' */
1135 sched = ip_vs_scheduler_get(u->sched_name);
1136 if (sched == NULL) {
Hannes Eder1e3e2382009-08-02 11:05:41 +00001137 pr_info("Scheduler module ip_vs_%s not found\n", u->sched_name);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001138 ret = -ENOENT;
Simon Horman6e08bfb2010-08-22 21:37:52 +09001139 goto out_err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001140 }
1141
Simon Horman0d1e71b2010-08-22 21:37:54 +09001142 if (u->pe_name && *u->pe_name) {
Simon Hormane9e5eee2010-11-08 20:05:57 +09001143 pe = ip_vs_pe_getbyname(u->pe_name);
Simon Horman0d1e71b2010-08-22 21:37:54 +09001144 if (pe == NULL) {
1145 pr_info("persistence engine module ip_vs_pe_%s "
1146 "not found\n", u->pe_name);
1147 ret = -ENOENT;
1148 goto out_err;
1149 }
1150 }
1151
Julius Volzf94fd042008-09-02 15:55:55 +02001152#ifdef CONFIG_IP_VS_IPV6
Julius Volz48148932008-11-03 17:08:56 -08001153 if (u->af == AF_INET6 && (u->netmask < 1 || u->netmask > 128)) {
1154 ret = -EINVAL;
1155 goto out_err;
Julius Volzf94fd042008-09-02 15:55:55 +02001156 }
1157#endif
1158
Simon Hormandee06e42010-08-26 02:54:31 +00001159 svc = kzalloc(sizeof(struct ip_vs_service), GFP_KERNEL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001160 if (svc == NULL) {
Hannes Eder1e3e2382009-08-02 11:05:41 +00001161 IP_VS_DBG(1, "%s(): no memory\n", __func__);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001162 ret = -ENOMEM;
1163 goto out_err;
1164 }
Hans Schillstromb17fc992011-01-03 14:44:56 +01001165 svc->stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats);
1166 if (!svc->stats.cpustats) {
1167 pr_err("%s() alloc_percpu failed\n", __func__);
1168 goto out_err;
1169 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001170
1171 /* I'm the first user of the service */
Julian Anastasov26c15cf2010-09-21 18:12:30 +02001172 atomic_set(&svc->usecnt, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001173 atomic_set(&svc->refcnt, 0);
1174
Julius Volzc860c6b2008-09-02 15:55:36 +02001175 svc->af = u->af;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001176 svc->protocol = u->protocol;
Julius Volzc860c6b2008-09-02 15:55:36 +02001177 ip_vs_addr_copy(svc->af, &svc->addr, &u->addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001178 svc->port = u->port;
1179 svc->fwmark = u->fwmark;
1180 svc->flags = u->flags;
1181 svc->timeout = u->timeout * HZ;
1182 svc->netmask = u->netmask;
Hans Schillstromfc723252011-01-03 14:44:43 +01001183 svc->net = net;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001184
1185 INIT_LIST_HEAD(&svc->destinations);
1186 rwlock_init(&svc->sched_lock);
1187 spin_lock_init(&svc->stats.lock);
1188
1189 /* Bind the scheduler */
1190 ret = ip_vs_bind_scheduler(svc, sched);
1191 if (ret)
1192 goto out_err;
1193 sched = NULL;
1194
Simon Horman0d1e71b2010-08-22 21:37:54 +09001195 /* Bind the ct retriever */
1196 ip_vs_bind_pe(svc, pe);
1197 pe = NULL;
1198
Linus Torvalds1da177e2005-04-16 15:20:36 -07001199 /* Update the virtual service counters */
1200 if (svc->port == FTPPORT)
Hans Schillstrom763f8d02011-01-03 14:45:01 +01001201 atomic_inc(&ipvs->ftpsvc_counter);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001202 else if (svc->port == 0)
Hans Schillstrom763f8d02011-01-03 14:45:01 +01001203 atomic_inc(&ipvs->nullsvc_counter);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001204
Julian Anastasov6ef757f2011-03-14 01:44:28 +02001205 ip_vs_start_estimator(net, &svc->stats);
Julius Volzf94fd042008-09-02 15:55:55 +02001206
1207 /* Count only IPv4 services for old get/setsockopt interface */
1208 if (svc->af == AF_INET)
Hans Schillstroma0840e22011-01-03 14:44:58 +01001209 ipvs->num_services++;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001210
1211 /* Hash the service into the service table */
1212 write_lock_bh(&__ip_vs_svc_lock);
1213 ip_vs_svc_hash(svc);
1214 write_unlock_bh(&__ip_vs_svc_lock);
1215
1216 *svc_p = svc;
1217 return 0;
1218
Hans Schillstromb17fc992011-01-03 14:44:56 +01001219
Simon Horman6e08bfb2010-08-22 21:37:52 +09001220 out_err:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001221 if (svc != NULL) {
Simon Horman2fabf352010-08-22 21:37:52 +09001222 ip_vs_unbind_scheduler(svc);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001223 if (svc->inc) {
1224 local_bh_disable();
1225 ip_vs_app_inc_put(svc->inc);
1226 local_bh_enable();
1227 }
Hans Schillstromb17fc992011-01-03 14:44:56 +01001228 if (svc->stats.cpustats)
1229 free_percpu(svc->stats.cpustats);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001230 kfree(svc);
1231 }
1232 ip_vs_scheduler_put(sched);
Simon Horman0d1e71b2010-08-22 21:37:54 +09001233 ip_vs_pe_put(pe);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001234
Linus Torvalds1da177e2005-04-16 15:20:36 -07001235 /* decrease the module use count */
1236 ip_vs_use_count_dec();
1237
1238 return ret;
1239}
1240
1241
1242/*
1243 * Edit a service and bind it with a new scheduler
1244 */
1245static int
Julius Volzc860c6b2008-09-02 15:55:36 +02001246ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001247{
1248 struct ip_vs_scheduler *sched, *old_sched;
Simon Horman0d1e71b2010-08-22 21:37:54 +09001249 struct ip_vs_pe *pe = NULL, *old_pe = NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001250 int ret = 0;
1251
1252 /*
1253 * Lookup the scheduler, by 'u->sched_name'
1254 */
1255 sched = ip_vs_scheduler_get(u->sched_name);
1256 if (sched == NULL) {
Hannes Eder1e3e2382009-08-02 11:05:41 +00001257 pr_info("Scheduler module ip_vs_%s not found\n", u->sched_name);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001258 return -ENOENT;
1259 }
1260 old_sched = sched;
1261
Simon Horman0d1e71b2010-08-22 21:37:54 +09001262 if (u->pe_name && *u->pe_name) {
Simon Hormane9e5eee2010-11-08 20:05:57 +09001263 pe = ip_vs_pe_getbyname(u->pe_name);
Simon Horman0d1e71b2010-08-22 21:37:54 +09001264 if (pe == NULL) {
1265 pr_info("persistence engine module ip_vs_pe_%s "
1266 "not found\n", u->pe_name);
1267 ret = -ENOENT;
1268 goto out;
1269 }
1270 old_pe = pe;
1271 }
1272
Julius Volzf94fd042008-09-02 15:55:55 +02001273#ifdef CONFIG_IP_VS_IPV6
Julius Volz48148932008-11-03 17:08:56 -08001274 if (u->af == AF_INET6 && (u->netmask < 1 || u->netmask > 128)) {
1275 ret = -EINVAL;
1276 goto out;
Julius Volzf94fd042008-09-02 15:55:55 +02001277 }
1278#endif
1279
Linus Torvalds1da177e2005-04-16 15:20:36 -07001280 write_lock_bh(&__ip_vs_svc_lock);
1281
1282 /*
1283 * Wait until all other svc users go away.
1284 */
Julian Anastasov26c15cf2010-09-21 18:12:30 +02001285 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001286
1287 /*
1288 * Set the flags and timeout value
1289 */
1290 svc->flags = u->flags | IP_VS_SVC_F_HASHED;
1291 svc->timeout = u->timeout * HZ;
1292 svc->netmask = u->netmask;
1293
1294 old_sched = svc->scheduler;
1295 if (sched != old_sched) {
1296 /*
1297 * Unbind the old scheduler
1298 */
1299 if ((ret = ip_vs_unbind_scheduler(svc))) {
1300 old_sched = sched;
Simon Horman9e691ed2008-09-17 10:10:41 +10001301 goto out_unlock;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001302 }
1303
1304 /*
1305 * Bind the new scheduler
1306 */
1307 if ((ret = ip_vs_bind_scheduler(svc, sched))) {
1308 /*
1309 * If ip_vs_bind_scheduler fails, restore the old
1310 * scheduler.
1311 * The main reason of failure is out of memory.
1312 *
1313 * The question is if the old scheduler can be
1314 * restored all the time. TODO: if it cannot be
1315 * restored some time, we must delete the service,
1316 * otherwise the system may crash.
1317 */
1318 ip_vs_bind_scheduler(svc, old_sched);
1319 old_sched = sched;
Simon Horman9e691ed2008-09-17 10:10:41 +10001320 goto out_unlock;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001321 }
1322 }
1323
Simon Horman0d1e71b2010-08-22 21:37:54 +09001324 old_pe = svc->pe;
1325 if (pe != old_pe) {
1326 ip_vs_unbind_pe(svc);
1327 ip_vs_bind_pe(svc, pe);
1328 }
1329
Simon Horman9e691ed2008-09-17 10:10:41 +10001330 out_unlock:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001331 write_unlock_bh(&__ip_vs_svc_lock);
Simon Horman9e691ed2008-09-17 10:10:41 +10001332 out:
Simon Horman6e08bfb2010-08-22 21:37:52 +09001333 ip_vs_scheduler_put(old_sched);
Simon Horman0d1e71b2010-08-22 21:37:54 +09001334 ip_vs_pe_put(old_pe);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001335 return ret;
1336}
1337
1338
1339/*
1340 * Delete a service from the service list
1341 * - The service must be unlinked, unlocked and not referenced!
1342 * - We are called under _bh lock
1343 */
1344static void __ip_vs_del_service(struct ip_vs_service *svc)
1345{
1346 struct ip_vs_dest *dest, *nxt;
1347 struct ip_vs_scheduler *old_sched;
Simon Horman0d1e71b2010-08-22 21:37:54 +09001348 struct ip_vs_pe *old_pe;
Hans Schillstroma0840e22011-01-03 14:44:58 +01001349 struct netns_ipvs *ipvs = net_ipvs(svc->net);
Simon Horman0d1e71b2010-08-22 21:37:54 +09001350
1351 pr_info("%s: enter\n", __func__);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001352
Julius Volzf94fd042008-09-02 15:55:55 +02001353 /* Count only IPv4 services for old get/setsockopt interface */
1354 if (svc->af == AF_INET)
Hans Schillstroma0840e22011-01-03 14:44:58 +01001355 ipvs->num_services--;
Julius Volzf94fd042008-09-02 15:55:55 +02001356
Julian Anastasov6ef757f2011-03-14 01:44:28 +02001357 ip_vs_stop_estimator(svc->net, &svc->stats);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001358
1359 /* Unbind scheduler */
1360 old_sched = svc->scheduler;
1361 ip_vs_unbind_scheduler(svc);
Simon Horman6e08bfb2010-08-22 21:37:52 +09001362 ip_vs_scheduler_put(old_sched);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001363
Simon Horman0d1e71b2010-08-22 21:37:54 +09001364 /* Unbind persistence engine */
1365 old_pe = svc->pe;
1366 ip_vs_unbind_pe(svc);
1367 ip_vs_pe_put(old_pe);
1368
Linus Torvalds1da177e2005-04-16 15:20:36 -07001369 /* Unbind app inc */
1370 if (svc->inc) {
1371 ip_vs_app_inc_put(svc->inc);
1372 svc->inc = NULL;
1373 }
1374
1375 /*
1376 * Unlink the whole destination list
1377 */
1378 list_for_each_entry_safe(dest, nxt, &svc->destinations, n_list) {
1379 __ip_vs_unlink_dest(svc, dest, 0);
Hans Schillstrom29c20262011-01-03 14:44:54 +01001380 __ip_vs_del_dest(svc->net, dest);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001381 }
1382
1383 /*
1384 * Update the virtual service counters
1385 */
1386 if (svc->port == FTPPORT)
Hans Schillstrom763f8d02011-01-03 14:45:01 +01001387 atomic_dec(&ipvs->ftpsvc_counter);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001388 else if (svc->port == 0)
Hans Schillstrom763f8d02011-01-03 14:45:01 +01001389 atomic_dec(&ipvs->nullsvc_counter);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001390
1391 /*
1392 * Free the service if nobody refers to it
1393 */
Julian Anastasov26c15cf2010-09-21 18:12:30 +02001394 if (atomic_read(&svc->refcnt) == 0) {
1395 IP_VS_DBG_BUF(3, "Removing service %u/%s:%u usecnt=%d\n",
1396 svc->fwmark,
1397 IP_VS_DBG_ADDR(svc->af, &svc->addr),
1398 ntohs(svc->port), atomic_read(&svc->usecnt));
Hans Schillstromb17fc992011-01-03 14:44:56 +01001399 free_percpu(svc->stats.cpustats);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001400 kfree(svc);
Julian Anastasov26c15cf2010-09-21 18:12:30 +02001401 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001402
1403 /* decrease the module use count */
1404 ip_vs_use_count_dec();
1405}
1406
1407/*
Julian Anastasov26c15cf2010-09-21 18:12:30 +02001408 * Unlink a service from list and try to delete it if its refcnt reached 0
Linus Torvalds1da177e2005-04-16 15:20:36 -07001409 */
Julian Anastasov26c15cf2010-09-21 18:12:30 +02001410static void ip_vs_unlink_service(struct ip_vs_service *svc)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001411{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001412 /*
1413 * Unhash it from the service table
1414 */
1415 write_lock_bh(&__ip_vs_svc_lock);
1416
1417 ip_vs_svc_unhash(svc);
1418
1419 /*
1420 * Wait until all the svc users go away.
1421 */
Julian Anastasov26c15cf2010-09-21 18:12:30 +02001422 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001423
1424 __ip_vs_del_service(svc);
1425
1426 write_unlock_bh(&__ip_vs_svc_lock);
Julian Anastasov26c15cf2010-09-21 18:12:30 +02001427}
1428
1429/*
1430 * Delete a service from the service list
1431 */
1432static int ip_vs_del_service(struct ip_vs_service *svc)
1433{
1434 if (svc == NULL)
1435 return -EEXIST;
1436 ip_vs_unlink_service(svc);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001437
1438 return 0;
1439}
1440
1441
1442/*
1443 * Flush all the virtual services
1444 */
Hans Schillstromfc723252011-01-03 14:44:43 +01001445static int ip_vs_flush(struct net *net)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001446{
1447 int idx;
1448 struct ip_vs_service *svc, *nxt;
1449
1450 /*
Hans Schillstromfc723252011-01-03 14:44:43 +01001451 * Flush the service table hashed by <netns,protocol,addr,port>
Linus Torvalds1da177e2005-04-16 15:20:36 -07001452 */
1453 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
Hans Schillstromfc723252011-01-03 14:44:43 +01001454 list_for_each_entry_safe(svc, nxt, &ip_vs_svc_table[idx],
1455 s_list) {
1456 if (net_eq(svc->net, net))
1457 ip_vs_unlink_service(svc);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001458 }
1459 }
1460
1461 /*
1462 * Flush the service table hashed by fwmark
1463 */
1464 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1465 list_for_each_entry_safe(svc, nxt,
1466 &ip_vs_svc_fwm_table[idx], f_list) {
Hans Schillstromfc723252011-01-03 14:44:43 +01001467 if (net_eq(svc->net, net))
1468 ip_vs_unlink_service(svc);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001469 }
1470 }
1471
1472 return 0;
1473}
1474
1475
1476/*
1477 * Zero counters in a service or all services
1478 */
1479static int ip_vs_zero_service(struct ip_vs_service *svc)
1480{
1481 struct ip_vs_dest *dest;
1482
1483 write_lock_bh(&__ip_vs_svc_lock);
1484 list_for_each_entry(dest, &svc->destinations, n_list) {
1485 ip_vs_zero_stats(&dest->stats);
1486 }
1487 ip_vs_zero_stats(&svc->stats);
1488 write_unlock_bh(&__ip_vs_svc_lock);
1489 return 0;
1490}
1491
Hans Schillstromfc723252011-01-03 14:44:43 +01001492static int ip_vs_zero_all(struct net *net)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001493{
1494 int idx;
1495 struct ip_vs_service *svc;
1496
1497 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1498 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
Hans Schillstromfc723252011-01-03 14:44:43 +01001499 if (net_eq(svc->net, net))
1500 ip_vs_zero_service(svc);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001501 }
1502 }
1503
1504 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1505 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
Hans Schillstromfc723252011-01-03 14:44:43 +01001506 if (net_eq(svc->net, net))
1507 ip_vs_zero_service(svc);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001508 }
1509 }
1510
Julian Anastasov2a0751a2011-03-04 12:20:35 +02001511 ip_vs_zero_stats(&net_ipvs(net)->tot_stats);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001512 return 0;
1513}
1514
Simon Horman14e40542011-02-04 18:33:02 +09001515#ifdef CONFIG_SYSCTL
Linus Torvalds1da177e2005-04-16 15:20:36 -07001516static int
Alexey Dobriyan8d65af72009-09-23 15:57:19 -07001517proc_do_defense_mode(ctl_table *table, int write,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001518 void __user *buffer, size_t *lenp, loff_t *ppos)
1519{
Hans Schillstrom93304192011-01-03 14:44:51 +01001520 struct net *net = current->nsproxy->net_ns;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001521 int *valp = table->data;
1522 int val = *valp;
1523 int rc;
1524
Alexey Dobriyan8d65af72009-09-23 15:57:19 -07001525 rc = proc_dointvec(table, write, buffer, lenp, ppos);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001526 if (write && (*valp != val)) {
1527 if ((*valp < 0) || (*valp > 3)) {
1528 /* Restore the correct value */
1529 *valp = val;
1530 } else {
Hans Schillstrom93304192011-01-03 14:44:51 +01001531 update_defense_level(net_ipvs(net));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001532 }
1533 }
1534 return rc;
1535}
1536
Linus Torvalds1da177e2005-04-16 15:20:36 -07001537static int
Alexey Dobriyan8d65af72009-09-23 15:57:19 -07001538proc_do_sync_threshold(ctl_table *table, int write,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001539 void __user *buffer, size_t *lenp, loff_t *ppos)
1540{
1541 int *valp = table->data;
1542 int val[2];
1543 int rc;
1544
1545 /* backup the value first */
1546 memcpy(val, valp, sizeof(val));
1547
Alexey Dobriyan8d65af72009-09-23 15:57:19 -07001548 rc = proc_dointvec(table, write, buffer, lenp, ppos);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001549 if (write && (valp[0] < 0 || valp[1] < 0 || valp[0] >= valp[1])) {
1550 /* Restore the correct value */
1551 memcpy(valp, val, sizeof(val));
1552 }
1553 return rc;
1554}
1555
Hans Schillstromb880c1f2010-11-19 14:25:14 +01001556static int
1557proc_do_sync_mode(ctl_table *table, int write,
1558 void __user *buffer, size_t *lenp, loff_t *ppos)
1559{
1560 int *valp = table->data;
1561 int val = *valp;
1562 int rc;
1563
1564 rc = proc_dointvec(table, write, buffer, lenp, ppos);
1565 if (write && (*valp != val)) {
1566 if ((*valp < 0) || (*valp > 1)) {
1567 /* Restore the correct value */
1568 *valp = val;
1569 } else {
Hans Schillstromf1313152011-01-03 14:44:55 +01001570 struct net *net = current->nsproxy->net_ns;
1571 ip_vs_sync_switch_mode(net, val);
Hans Schillstromb880c1f2010-11-19 14:25:14 +01001572 }
1573 }
1574 return rc;
1575}
Linus Torvalds1da177e2005-04-16 15:20:36 -07001576
1577/*
1578 * IPVS sysctl table (under the /proc/sys/net/ipv4/vs/)
Hans Schillstroma0840e22011-01-03 14:44:58 +01001579 * Do not change order or insert new entries without
1580 * align with netns init in __ip_vs_control_init()
Linus Torvalds1da177e2005-04-16 15:20:36 -07001581 */
1582
1583static struct ctl_table vs_vars[] = {
1584 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001585 .procname = "amemthresh",
Hans Schillstroma0840e22011-01-03 14:44:58 +01001586 .maxlen = sizeof(int),
1587 .mode = 0644,
1588 .proc_handler = proc_dointvec,
1589 },
1590 {
1591 .procname = "am_droprate",
1592 .maxlen = sizeof(int),
1593 .mode = 0644,
1594 .proc_handler = proc_dointvec,
1595 },
1596 {
1597 .procname = "drop_entry",
1598 .maxlen = sizeof(int),
1599 .mode = 0644,
1600 .proc_handler = proc_do_defense_mode,
1601 },
1602 {
1603 .procname = "drop_packet",
1604 .maxlen = sizeof(int),
1605 .mode = 0644,
1606 .proc_handler = proc_do_defense_mode,
1607 },
1608#ifdef CONFIG_IP_VS_NFCT
1609 {
1610 .procname = "conntrack",
1611 .maxlen = sizeof(int),
1612 .mode = 0644,
1613 .proc_handler = &proc_dointvec,
1614 },
1615#endif
1616 {
1617 .procname = "secure_tcp",
1618 .maxlen = sizeof(int),
1619 .mode = 0644,
1620 .proc_handler = proc_do_defense_mode,
1621 },
1622 {
1623 .procname = "snat_reroute",
1624 .maxlen = sizeof(int),
1625 .mode = 0644,
1626 .proc_handler = &proc_dointvec,
1627 },
1628 {
1629 .procname = "sync_version",
1630 .maxlen = sizeof(int),
1631 .mode = 0644,
1632 .proc_handler = &proc_do_sync_mode,
1633 },
1634 {
1635 .procname = "cache_bypass",
1636 .maxlen = sizeof(int),
1637 .mode = 0644,
1638 .proc_handler = proc_dointvec,
1639 },
1640 {
1641 .procname = "expire_nodest_conn",
1642 .maxlen = sizeof(int),
1643 .mode = 0644,
1644 .proc_handler = proc_dointvec,
1645 },
1646 {
1647 .procname = "expire_quiescent_template",
1648 .maxlen = sizeof(int),
1649 .mode = 0644,
1650 .proc_handler = proc_dointvec,
1651 },
1652 {
1653 .procname = "sync_threshold",
1654 .maxlen =
1655 sizeof(((struct netns_ipvs *)0)->sysctl_sync_threshold),
1656 .mode = 0644,
1657 .proc_handler = proc_do_sync_threshold,
1658 },
1659 {
1660 .procname = "nat_icmp_send",
Linus Torvalds1da177e2005-04-16 15:20:36 -07001661 .maxlen = sizeof(int),
1662 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08001663 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001664 },
1665#ifdef CONFIG_IP_VS_DEBUG
1666 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001667 .procname = "debug_level",
1668 .data = &sysctl_ip_vs_debug_level,
1669 .maxlen = sizeof(int),
1670 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08001671 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001672 },
1673#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -07001674#if 0
1675 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001676 .procname = "timeout_established",
1677 .data = &vs_timeout_table_dos.timeout[IP_VS_S_ESTABLISHED],
1678 .maxlen = sizeof(int),
1679 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08001680 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001681 },
1682 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001683 .procname = "timeout_synsent",
1684 .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_SENT],
1685 .maxlen = sizeof(int),
1686 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08001687 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001688 },
1689 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001690 .procname = "timeout_synrecv",
1691 .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_RECV],
1692 .maxlen = sizeof(int),
1693 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08001694 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001695 },
1696 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001697 .procname = "timeout_finwait",
1698 .data = &vs_timeout_table_dos.timeout[IP_VS_S_FIN_WAIT],
1699 .maxlen = sizeof(int),
1700 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08001701 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001702 },
1703 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001704 .procname = "timeout_timewait",
1705 .data = &vs_timeout_table_dos.timeout[IP_VS_S_TIME_WAIT],
1706 .maxlen = sizeof(int),
1707 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08001708 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001709 },
1710 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001711 .procname = "timeout_close",
1712 .data = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE],
1713 .maxlen = sizeof(int),
1714 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08001715 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001716 },
1717 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001718 .procname = "timeout_closewait",
1719 .data = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE_WAIT],
1720 .maxlen = sizeof(int),
1721 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08001722 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001723 },
1724 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001725 .procname = "timeout_lastack",
1726 .data = &vs_timeout_table_dos.timeout[IP_VS_S_LAST_ACK],
1727 .maxlen = sizeof(int),
1728 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08001729 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001730 },
1731 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001732 .procname = "timeout_listen",
1733 .data = &vs_timeout_table_dos.timeout[IP_VS_S_LISTEN],
1734 .maxlen = sizeof(int),
1735 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08001736 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001737 },
1738 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001739 .procname = "timeout_synack",
1740 .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYNACK],
1741 .maxlen = sizeof(int),
1742 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08001743 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001744 },
1745 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001746 .procname = "timeout_udp",
1747 .data = &vs_timeout_table_dos.timeout[IP_VS_S_UDP],
1748 .maxlen = sizeof(int),
1749 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08001750 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001751 },
1752 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001753 .procname = "timeout_icmp",
1754 .data = &vs_timeout_table_dos.timeout[IP_VS_S_ICMP],
1755 .maxlen = sizeof(int),
1756 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08001757 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001758 },
1759#endif
Eric W. Biedermanf8572d82009-11-05 13:32:03 -08001760 { }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001761};
1762
Sven Wegener5587da52008-08-10 18:24:40 +00001763const struct ctl_path net_vs_ctl_path[] = {
Eric W. Biedermanf8572d82009-11-05 13:32:03 -08001764 { .procname = "net", },
1765 { .procname = "ipv4", },
Pavel Emelyanov90754f82008-01-12 02:33:50 -08001766 { .procname = "vs", },
1767 { }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001768};
Pavel Emelyanov90754f82008-01-12 02:33:50 -08001769EXPORT_SYMBOL_GPL(net_vs_ctl_path);
Simon Horman14e40542011-02-04 18:33:02 +09001770#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -07001771
Linus Torvalds1da177e2005-04-16 15:20:36 -07001772#ifdef CONFIG_PROC_FS
1773
1774struct ip_vs_iter {
Hans Schillstromfc723252011-01-03 14:44:43 +01001775 struct seq_net_private p; /* Do not move this, netns depends upon it*/
Linus Torvalds1da177e2005-04-16 15:20:36 -07001776 struct list_head *table;
1777 int bucket;
1778};
1779
1780/*
1781 * Write the contents of the VS rule table to a PROCfs file.
1782 * (It is kept just for backward compatibility)
1783 */
1784static inline const char *ip_vs_fwd_name(unsigned flags)
1785{
1786 switch (flags & IP_VS_CONN_F_FWD_MASK) {
1787 case IP_VS_CONN_F_LOCALNODE:
1788 return "Local";
1789 case IP_VS_CONN_F_TUNNEL:
1790 return "Tunnel";
1791 case IP_VS_CONN_F_DROUTE:
1792 return "Route";
1793 default:
1794 return "Masq";
1795 }
1796}
1797
1798
1799/* Get the Nth entry in the two lists */
1800static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos)
1801{
Hans Schillstromfc723252011-01-03 14:44:43 +01001802 struct net *net = seq_file_net(seq);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001803 struct ip_vs_iter *iter = seq->private;
1804 int idx;
1805 struct ip_vs_service *svc;
1806
1807 /* look in hash by protocol */
1808 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1809 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
Hans Schillstromfc723252011-01-03 14:44:43 +01001810 if (net_eq(svc->net, net) && pos-- == 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001811 iter->table = ip_vs_svc_table;
1812 iter->bucket = idx;
1813 return svc;
1814 }
1815 }
1816 }
1817
1818 /* keep looking in fwmark */
1819 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1820 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
Hans Schillstromfc723252011-01-03 14:44:43 +01001821 if (net_eq(svc->net, net) && pos-- == 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001822 iter->table = ip_vs_svc_fwm_table;
1823 iter->bucket = idx;
1824 return svc;
1825 }
1826 }
1827 }
1828
1829 return NULL;
1830}
1831
1832static void *ip_vs_info_seq_start(struct seq_file *seq, loff_t *pos)
Simon Horman563e94f2008-09-17 10:10:42 +10001833__acquires(__ip_vs_svc_lock)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001834{
1835
1836 read_lock_bh(&__ip_vs_svc_lock);
1837 return *pos ? ip_vs_info_array(seq, *pos - 1) : SEQ_START_TOKEN;
1838}
1839
1840
1841static void *ip_vs_info_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1842{
1843 struct list_head *e;
1844 struct ip_vs_iter *iter;
1845 struct ip_vs_service *svc;
1846
1847 ++*pos;
1848 if (v == SEQ_START_TOKEN)
1849 return ip_vs_info_array(seq,0);
1850
1851 svc = v;
1852 iter = seq->private;
1853
1854 if (iter->table == ip_vs_svc_table) {
1855 /* next service in table hashed by protocol */
1856 if ((e = svc->s_list.next) != &ip_vs_svc_table[iter->bucket])
1857 return list_entry(e, struct ip_vs_service, s_list);
1858
1859
1860 while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
1861 list_for_each_entry(svc,&ip_vs_svc_table[iter->bucket],
1862 s_list) {
1863 return svc;
1864 }
1865 }
1866
1867 iter->table = ip_vs_svc_fwm_table;
1868 iter->bucket = -1;
1869 goto scan_fwmark;
1870 }
1871
1872 /* next service in hashed by fwmark */
1873 if ((e = svc->f_list.next) != &ip_vs_svc_fwm_table[iter->bucket])
1874 return list_entry(e, struct ip_vs_service, f_list);
1875
1876 scan_fwmark:
1877 while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
1878 list_for_each_entry(svc, &ip_vs_svc_fwm_table[iter->bucket],
1879 f_list)
1880 return svc;
1881 }
1882
1883 return NULL;
1884}
1885
1886static void ip_vs_info_seq_stop(struct seq_file *seq, void *v)
Simon Horman563e94f2008-09-17 10:10:42 +10001887__releases(__ip_vs_svc_lock)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001888{
1889 read_unlock_bh(&__ip_vs_svc_lock);
1890}
1891
1892
1893static int ip_vs_info_seq_show(struct seq_file *seq, void *v)
1894{
1895 if (v == SEQ_START_TOKEN) {
1896 seq_printf(seq,
1897 "IP Virtual Server version %d.%d.%d (size=%d)\n",
Catalin(ux) M. BOIE6f7edb42010-01-05 05:50:24 +01001898 NVERSION(IP_VS_VERSION_CODE), ip_vs_conn_tab_size);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001899 seq_puts(seq,
1900 "Prot LocalAddress:Port Scheduler Flags\n");
1901 seq_puts(seq,
1902 " -> RemoteAddress:Port Forward Weight ActiveConn InActConn\n");
1903 } else {
1904 const struct ip_vs_service *svc = v;
1905 const struct ip_vs_iter *iter = seq->private;
1906 const struct ip_vs_dest *dest;
1907
Vince Busam667a5f12008-09-02 15:55:49 +02001908 if (iter->table == ip_vs_svc_table) {
1909#ifdef CONFIG_IP_VS_IPV6
1910 if (svc->af == AF_INET6)
Harvey Harrison5b095d9892008-10-29 12:52:50 -07001911 seq_printf(seq, "%s [%pI6]:%04X %s ",
Vince Busam667a5f12008-09-02 15:55:49 +02001912 ip_vs_proto_name(svc->protocol),
Harvey Harrison38ff4fa2008-10-28 16:08:13 -07001913 &svc->addr.in6,
Vince Busam667a5f12008-09-02 15:55:49 +02001914 ntohs(svc->port),
1915 svc->scheduler->name);
1916 else
1917#endif
Nick Chalk26ec0372010-06-22 08:07:01 +02001918 seq_printf(seq, "%s %08X:%04X %s %s ",
Vince Busam667a5f12008-09-02 15:55:49 +02001919 ip_vs_proto_name(svc->protocol),
1920 ntohl(svc->addr.ip),
1921 ntohs(svc->port),
Nick Chalk26ec0372010-06-22 08:07:01 +02001922 svc->scheduler->name,
1923 (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":"");
Vince Busam667a5f12008-09-02 15:55:49 +02001924 } else {
Nick Chalk26ec0372010-06-22 08:07:01 +02001925 seq_printf(seq, "FWM %08X %s %s",
1926 svc->fwmark, svc->scheduler->name,
1927 (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":"");
Vince Busam667a5f12008-09-02 15:55:49 +02001928 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001929
1930 if (svc->flags & IP_VS_SVC_F_PERSISTENT)
1931 seq_printf(seq, "persistent %d %08X\n",
1932 svc->timeout,
1933 ntohl(svc->netmask));
1934 else
1935 seq_putc(seq, '\n');
1936
1937 list_for_each_entry(dest, &svc->destinations, n_list) {
Vince Busam667a5f12008-09-02 15:55:49 +02001938#ifdef CONFIG_IP_VS_IPV6
1939 if (dest->af == AF_INET6)
1940 seq_printf(seq,
Harvey Harrison5b095d9892008-10-29 12:52:50 -07001941 " -> [%pI6]:%04X"
Vince Busam667a5f12008-09-02 15:55:49 +02001942 " %-7s %-6d %-10d %-10d\n",
Harvey Harrison38ff4fa2008-10-28 16:08:13 -07001943 &dest->addr.in6,
Vince Busam667a5f12008-09-02 15:55:49 +02001944 ntohs(dest->port),
1945 ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
1946 atomic_read(&dest->weight),
1947 atomic_read(&dest->activeconns),
1948 atomic_read(&dest->inactconns));
1949 else
1950#endif
1951 seq_printf(seq,
1952 " -> %08X:%04X "
1953 "%-7s %-6d %-10d %-10d\n",
1954 ntohl(dest->addr.ip),
1955 ntohs(dest->port),
1956 ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
1957 atomic_read(&dest->weight),
1958 atomic_read(&dest->activeconns),
1959 atomic_read(&dest->inactconns));
1960
Linus Torvalds1da177e2005-04-16 15:20:36 -07001961 }
1962 }
1963 return 0;
1964}
1965
Philippe De Muyter56b3d972007-07-10 23:07:31 -07001966static const struct seq_operations ip_vs_info_seq_ops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001967 .start = ip_vs_info_seq_start,
1968 .next = ip_vs_info_seq_next,
1969 .stop = ip_vs_info_seq_stop,
1970 .show = ip_vs_info_seq_show,
1971};
1972
1973static int ip_vs_info_open(struct inode *inode, struct file *file)
1974{
Hans Schillstromfc723252011-01-03 14:44:43 +01001975 return seq_open_net(inode, file, &ip_vs_info_seq_ops,
Pavel Emelyanovcf7732e2007-10-10 02:29:29 -07001976 sizeof(struct ip_vs_iter));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001977}
1978
Arjan van de Ven9a321442007-02-12 00:55:35 -08001979static const struct file_operations ip_vs_info_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001980 .owner = THIS_MODULE,
1981 .open = ip_vs_info_open,
1982 .read = seq_read,
1983 .llseek = seq_lseek,
1984 .release = seq_release_private,
1985};
1986
1987#endif
1988
Linus Torvalds1da177e2005-04-16 15:20:36 -07001989#ifdef CONFIG_PROC_FS
1990static int ip_vs_stats_show(struct seq_file *seq, void *v)
1991{
Hans Schillstromb17fc992011-01-03 14:44:56 +01001992 struct net *net = seq_file_single_net(seq);
Julian Anastasov55a3d4e2011-03-14 01:37:49 +02001993 struct ip_vs_stats_user show;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001994
1995/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
1996 seq_puts(seq,
1997 " Total Incoming Outgoing Incoming Outgoing\n");
1998 seq_printf(seq,
1999 " Conns Packets Packets Bytes Bytes\n");
2000
Julian Anastasov55a3d4e2011-03-14 01:37:49 +02002001 ip_vs_copy_stats(&show, &net_ipvs(net)->tot_stats);
2002 seq_printf(seq, "%8X %8X %8X %16LX %16LX\n\n", show.conns,
2003 show.inpkts, show.outpkts,
2004 (unsigned long long) show.inbytes,
2005 (unsigned long long) show.outbytes);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002006
2007/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
2008 seq_puts(seq,
2009 " Conns/s Pkts/s Pkts/s Bytes/s Bytes/s\n");
Julian Anastasov55a3d4e2011-03-14 01:37:49 +02002010 seq_printf(seq, "%8X %8X %8X %16X %16X\n",
2011 show.cps, show.inpps, show.outpps,
2012 show.inbps, show.outbps);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002013
2014 return 0;
2015}
2016
2017static int ip_vs_stats_seq_open(struct inode *inode, struct file *file)
2018{
Hans Schillstromfc723252011-01-03 14:44:43 +01002019 return single_open_net(inode, file, ip_vs_stats_show);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002020}
2021
Arjan van de Ven9a321442007-02-12 00:55:35 -08002022static const struct file_operations ip_vs_stats_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002023 .owner = THIS_MODULE,
2024 .open = ip_vs_stats_seq_open,
2025 .read = seq_read,
2026 .llseek = seq_lseek,
2027 .release = single_release,
2028};
2029
Hans Schillstromb17fc992011-01-03 14:44:56 +01002030static int ip_vs_stats_percpu_show(struct seq_file *seq, void *v)
2031{
2032 struct net *net = seq_file_single_net(seq);
Julian Anastasov2a0751a2011-03-04 12:20:35 +02002033 struct ip_vs_stats *tot_stats = &net_ipvs(net)->tot_stats;
2034 struct ip_vs_cpu_stats *cpustats = tot_stats->cpustats;
Julian Anastasovea9f22c2011-03-14 01:41:54 +02002035 struct ip_vs_stats_user rates;
Hans Schillstromb17fc992011-01-03 14:44:56 +01002036 int i;
2037
2038/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
2039 seq_puts(seq,
2040 " Total Incoming Outgoing Incoming Outgoing\n");
2041 seq_printf(seq,
2042 "CPU Conns Packets Packets Bytes Bytes\n");
2043
2044 for_each_possible_cpu(i) {
Julian Anastasov2a0751a2011-03-04 12:20:35 +02002045 struct ip_vs_cpu_stats *u = per_cpu_ptr(cpustats, i);
2046 unsigned int start;
2047 __u64 inbytes, outbytes;
2048
2049 do {
2050 start = u64_stats_fetch_begin_bh(&u->syncp);
2051 inbytes = u->ustats.inbytes;
2052 outbytes = u->ustats.outbytes;
2053 } while (u64_stats_fetch_retry_bh(&u->syncp, start));
2054
Hans Schillstromb17fc992011-01-03 14:44:56 +01002055 seq_printf(seq, "%3X %8X %8X %8X %16LX %16LX\n",
Julian Anastasov2a0751a2011-03-04 12:20:35 +02002056 i, u->ustats.conns, u->ustats.inpkts,
2057 u->ustats.outpkts, (__u64)inbytes,
2058 (__u64)outbytes);
Hans Schillstromb17fc992011-01-03 14:44:56 +01002059 }
2060
2061 spin_lock_bh(&tot_stats->lock);
Julian Anastasovea9f22c2011-03-14 01:41:54 +02002062
Hans Schillstromb17fc992011-01-03 14:44:56 +01002063 seq_printf(seq, " ~ %8X %8X %8X %16LX %16LX\n\n",
2064 tot_stats->ustats.conns, tot_stats->ustats.inpkts,
2065 tot_stats->ustats.outpkts,
2066 (unsigned long long) tot_stats->ustats.inbytes,
2067 (unsigned long long) tot_stats->ustats.outbytes);
2068
Julian Anastasovea9f22c2011-03-14 01:41:54 +02002069 ip_vs_read_estimator(&rates, tot_stats);
2070
2071 spin_unlock_bh(&tot_stats->lock);
2072
Hans Schillstromb17fc992011-01-03 14:44:56 +01002073/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
2074 seq_puts(seq,
2075 " Conns/s Pkts/s Pkts/s Bytes/s Bytes/s\n");
2076 seq_printf(seq, " %8X %8X %8X %16X %16X\n",
Julian Anastasovea9f22c2011-03-14 01:41:54 +02002077 rates.cps,
2078 rates.inpps,
2079 rates.outpps,
2080 rates.inbps,
2081 rates.outbps);
Hans Schillstromb17fc992011-01-03 14:44:56 +01002082
2083 return 0;
2084}
2085
2086static int ip_vs_stats_percpu_seq_open(struct inode *inode, struct file *file)
2087{
2088 return single_open_net(inode, file, ip_vs_stats_percpu_show);
2089}
2090
2091static const struct file_operations ip_vs_stats_percpu_fops = {
2092 .owner = THIS_MODULE,
2093 .open = ip_vs_stats_percpu_seq_open,
2094 .read = seq_read,
2095 .llseek = seq_lseek,
2096 .release = single_release,
2097};
Linus Torvalds1da177e2005-04-16 15:20:36 -07002098#endif
2099
2100/*
2101 * Set timeout values for tcp tcpfin udp in the timeout_table.
2102 */
Hans Schillstrom93304192011-01-03 14:44:51 +01002103static int ip_vs_set_timeout(struct net *net, struct ip_vs_timeout_user *u)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002104{
Changli Gao091bb342011-01-21 18:02:13 +08002105#if defined(CONFIG_IP_VS_PROTO_TCP) || defined(CONFIG_IP_VS_PROTO_UDP)
Hans Schillstrom93304192011-01-03 14:44:51 +01002106 struct ip_vs_proto_data *pd;
Changli Gao091bb342011-01-21 18:02:13 +08002107#endif
Hans Schillstrom93304192011-01-03 14:44:51 +01002108
Linus Torvalds1da177e2005-04-16 15:20:36 -07002109 IP_VS_DBG(2, "Setting timeout tcp:%d tcpfin:%d udp:%d\n",
2110 u->tcp_timeout,
2111 u->tcp_fin_timeout,
2112 u->udp_timeout);
2113
2114#ifdef CONFIG_IP_VS_PROTO_TCP
2115 if (u->tcp_timeout) {
Hans Schillstrom93304192011-01-03 14:44:51 +01002116 pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
2117 pd->timeout_table[IP_VS_TCP_S_ESTABLISHED]
Linus Torvalds1da177e2005-04-16 15:20:36 -07002118 = u->tcp_timeout * HZ;
2119 }
2120
2121 if (u->tcp_fin_timeout) {
Hans Schillstrom93304192011-01-03 14:44:51 +01002122 pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
2123 pd->timeout_table[IP_VS_TCP_S_FIN_WAIT]
Linus Torvalds1da177e2005-04-16 15:20:36 -07002124 = u->tcp_fin_timeout * HZ;
2125 }
2126#endif
2127
2128#ifdef CONFIG_IP_VS_PROTO_UDP
2129 if (u->udp_timeout) {
Hans Schillstrom93304192011-01-03 14:44:51 +01002130 pd = ip_vs_proto_data_get(net, IPPROTO_UDP);
2131 pd->timeout_table[IP_VS_UDP_S_NORMAL]
Linus Torvalds1da177e2005-04-16 15:20:36 -07002132 = u->udp_timeout * HZ;
2133 }
2134#endif
2135 return 0;
2136}
2137
2138
2139#define SET_CMDID(cmd) (cmd - IP_VS_BASE_CTL)
2140#define SERVICE_ARG_LEN (sizeof(struct ip_vs_service_user))
2141#define SVCDEST_ARG_LEN (sizeof(struct ip_vs_service_user) + \
2142 sizeof(struct ip_vs_dest_user))
2143#define TIMEOUT_ARG_LEN (sizeof(struct ip_vs_timeout_user))
2144#define DAEMON_ARG_LEN (sizeof(struct ip_vs_daemon_user))
2145#define MAX_ARG_LEN SVCDEST_ARG_LEN
2146
Arjan van de Ven9b5b5cf2005-11-29 16:21:38 -08002147static const unsigned char set_arglen[SET_CMDID(IP_VS_SO_SET_MAX)+1] = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002148 [SET_CMDID(IP_VS_SO_SET_ADD)] = SERVICE_ARG_LEN,
2149 [SET_CMDID(IP_VS_SO_SET_EDIT)] = SERVICE_ARG_LEN,
2150 [SET_CMDID(IP_VS_SO_SET_DEL)] = SERVICE_ARG_LEN,
2151 [SET_CMDID(IP_VS_SO_SET_FLUSH)] = 0,
2152 [SET_CMDID(IP_VS_SO_SET_ADDDEST)] = SVCDEST_ARG_LEN,
2153 [SET_CMDID(IP_VS_SO_SET_DELDEST)] = SVCDEST_ARG_LEN,
2154 [SET_CMDID(IP_VS_SO_SET_EDITDEST)] = SVCDEST_ARG_LEN,
2155 [SET_CMDID(IP_VS_SO_SET_TIMEOUT)] = TIMEOUT_ARG_LEN,
2156 [SET_CMDID(IP_VS_SO_SET_STARTDAEMON)] = DAEMON_ARG_LEN,
2157 [SET_CMDID(IP_VS_SO_SET_STOPDAEMON)] = DAEMON_ARG_LEN,
2158 [SET_CMDID(IP_VS_SO_SET_ZERO)] = SERVICE_ARG_LEN,
2159};
2160
Julius Volzc860c6b2008-09-02 15:55:36 +02002161static void ip_vs_copy_usvc_compat(struct ip_vs_service_user_kern *usvc,
2162 struct ip_vs_service_user *usvc_compat)
2163{
Simon Horman0d1e71b2010-08-22 21:37:54 +09002164 memset(usvc, 0, sizeof(*usvc));
2165
Julius Volzc860c6b2008-09-02 15:55:36 +02002166 usvc->af = AF_INET;
2167 usvc->protocol = usvc_compat->protocol;
2168 usvc->addr.ip = usvc_compat->addr;
2169 usvc->port = usvc_compat->port;
2170 usvc->fwmark = usvc_compat->fwmark;
2171
2172 /* Deep copy of sched_name is not needed here */
2173 usvc->sched_name = usvc_compat->sched_name;
2174
2175 usvc->flags = usvc_compat->flags;
2176 usvc->timeout = usvc_compat->timeout;
2177 usvc->netmask = usvc_compat->netmask;
2178}
2179
2180static void ip_vs_copy_udest_compat(struct ip_vs_dest_user_kern *udest,
2181 struct ip_vs_dest_user *udest_compat)
2182{
Simon Horman0d1e71b2010-08-22 21:37:54 +09002183 memset(udest, 0, sizeof(*udest));
2184
Julius Volzc860c6b2008-09-02 15:55:36 +02002185 udest->addr.ip = udest_compat->addr;
2186 udest->port = udest_compat->port;
2187 udest->conn_flags = udest_compat->conn_flags;
2188 udest->weight = udest_compat->weight;
2189 udest->u_threshold = udest_compat->u_threshold;
2190 udest->l_threshold = udest_compat->l_threshold;
2191}
2192
Linus Torvalds1da177e2005-04-16 15:20:36 -07002193static int
2194do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
2195{
Hans Schillstromfc723252011-01-03 14:44:43 +01002196 struct net *net = sock_net(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002197 int ret;
2198 unsigned char arg[MAX_ARG_LEN];
Julius Volzc860c6b2008-09-02 15:55:36 +02002199 struct ip_vs_service_user *usvc_compat;
2200 struct ip_vs_service_user_kern usvc;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002201 struct ip_vs_service *svc;
Julius Volzc860c6b2008-09-02 15:55:36 +02002202 struct ip_vs_dest_user *udest_compat;
2203 struct ip_vs_dest_user_kern udest;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002204
2205 if (!capable(CAP_NET_ADMIN))
2206 return -EPERM;
2207
Arjan van de Ven04bcef22010-01-04 16:37:12 +01002208 if (cmd < IP_VS_BASE_CTL || cmd > IP_VS_SO_SET_MAX)
2209 return -EINVAL;
2210 if (len < 0 || len > MAX_ARG_LEN)
2211 return -EINVAL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002212 if (len != set_arglen[SET_CMDID(cmd)]) {
Hannes Eder1e3e2382009-08-02 11:05:41 +00002213 pr_err("set_ctl: len %u != %u\n",
2214 len, set_arglen[SET_CMDID(cmd)]);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002215 return -EINVAL;
2216 }
2217
2218 if (copy_from_user(arg, user, len) != 0)
2219 return -EFAULT;
2220
2221 /* increase the module use count */
2222 ip_vs_use_count_inc();
2223
Ingo Molnar14cc3e22006-03-26 01:37:14 -08002224 if (mutex_lock_interruptible(&__ip_vs_mutex)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002225 ret = -ERESTARTSYS;
2226 goto out_dec;
2227 }
2228
2229 if (cmd == IP_VS_SO_SET_FLUSH) {
2230 /* Flush the virtual service */
Hans Schillstromfc723252011-01-03 14:44:43 +01002231 ret = ip_vs_flush(net);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002232 goto out_unlock;
2233 } else if (cmd == IP_VS_SO_SET_TIMEOUT) {
2234 /* Set timeout values for (tcp tcpfin udp) */
Hans Schillstrom93304192011-01-03 14:44:51 +01002235 ret = ip_vs_set_timeout(net, (struct ip_vs_timeout_user *)arg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002236 goto out_unlock;
2237 } else if (cmd == IP_VS_SO_SET_STARTDAEMON) {
2238 struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
Hans Schillstromf1313152011-01-03 14:44:55 +01002239 ret = start_sync_thread(net, dm->state, dm->mcast_ifn,
2240 dm->syncid);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002241 goto out_unlock;
2242 } else if (cmd == IP_VS_SO_SET_STOPDAEMON) {
2243 struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
Hans Schillstromf1313152011-01-03 14:44:55 +01002244 ret = stop_sync_thread(net, dm->state);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002245 goto out_unlock;
2246 }
2247
Julius Volzc860c6b2008-09-02 15:55:36 +02002248 usvc_compat = (struct ip_vs_service_user *)arg;
2249 udest_compat = (struct ip_vs_dest_user *)(usvc_compat + 1);
2250
2251 /* We only use the new structs internally, so copy userspace compat
2252 * structs to extended internal versions */
2253 ip_vs_copy_usvc_compat(&usvc, usvc_compat);
2254 ip_vs_copy_udest_compat(&udest, udest_compat);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002255
2256 if (cmd == IP_VS_SO_SET_ZERO) {
2257 /* if no service address is set, zero counters in all */
Julius Volzc860c6b2008-09-02 15:55:36 +02002258 if (!usvc.fwmark && !usvc.addr.ip && !usvc.port) {
Hans Schillstromfc723252011-01-03 14:44:43 +01002259 ret = ip_vs_zero_all(net);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002260 goto out_unlock;
2261 }
2262 }
2263
Venkata Mohan Reddy2906f662010-02-18 12:31:05 +01002264 /* Check for valid protocol: TCP or UDP or SCTP, even for fwmark!=0 */
2265 if (usvc.protocol != IPPROTO_TCP && usvc.protocol != IPPROTO_UDP &&
2266 usvc.protocol != IPPROTO_SCTP) {
Hannes Eder1e3e2382009-08-02 11:05:41 +00002267 pr_err("set_ctl: invalid protocol: %d %pI4:%d %s\n",
2268 usvc.protocol, &usvc.addr.ip,
2269 ntohs(usvc.port), usvc.sched_name);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002270 ret = -EFAULT;
2271 goto out_unlock;
2272 }
2273
2274 /* Lookup the exact service by <protocol, addr, port> or fwmark */
Julius Volzc860c6b2008-09-02 15:55:36 +02002275 if (usvc.fwmark == 0)
Hans Schillstromfc723252011-01-03 14:44:43 +01002276 svc = __ip_vs_service_find(net, usvc.af, usvc.protocol,
Julian Anastasov26c15cf2010-09-21 18:12:30 +02002277 &usvc.addr, usvc.port);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002278 else
Hans Schillstromfc723252011-01-03 14:44:43 +01002279 svc = __ip_vs_svc_fwm_find(net, usvc.af, usvc.fwmark);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002280
2281 if (cmd != IP_VS_SO_SET_ADD
Julius Volzc860c6b2008-09-02 15:55:36 +02002282 && (svc == NULL || svc->protocol != usvc.protocol)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002283 ret = -ESRCH;
Julian Anastasov26c15cf2010-09-21 18:12:30 +02002284 goto out_unlock;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002285 }
2286
2287 switch (cmd) {
2288 case IP_VS_SO_SET_ADD:
2289 if (svc != NULL)
2290 ret = -EEXIST;
2291 else
Hans Schillstromfc723252011-01-03 14:44:43 +01002292 ret = ip_vs_add_service(net, &usvc, &svc);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002293 break;
2294 case IP_VS_SO_SET_EDIT:
Julius Volzc860c6b2008-09-02 15:55:36 +02002295 ret = ip_vs_edit_service(svc, &usvc);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002296 break;
2297 case IP_VS_SO_SET_DEL:
2298 ret = ip_vs_del_service(svc);
2299 if (!ret)
2300 goto out_unlock;
2301 break;
2302 case IP_VS_SO_SET_ZERO:
2303 ret = ip_vs_zero_service(svc);
2304 break;
2305 case IP_VS_SO_SET_ADDDEST:
Julius Volzc860c6b2008-09-02 15:55:36 +02002306 ret = ip_vs_add_dest(svc, &udest);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002307 break;
2308 case IP_VS_SO_SET_EDITDEST:
Julius Volzc860c6b2008-09-02 15:55:36 +02002309 ret = ip_vs_edit_dest(svc, &udest);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002310 break;
2311 case IP_VS_SO_SET_DELDEST:
Julius Volzc860c6b2008-09-02 15:55:36 +02002312 ret = ip_vs_del_dest(svc, &udest);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002313 break;
2314 default:
2315 ret = -EINVAL;
2316 }
2317
Linus Torvalds1da177e2005-04-16 15:20:36 -07002318 out_unlock:
Ingo Molnar14cc3e22006-03-26 01:37:14 -08002319 mutex_unlock(&__ip_vs_mutex);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002320 out_dec:
2321 /* decrease the module use count */
2322 ip_vs_use_count_dec();
2323
2324 return ret;
2325}
2326
2327
2328static void
Linus Torvalds1da177e2005-04-16 15:20:36 -07002329ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src)
2330{
2331 dst->protocol = src->protocol;
Julius Volze7ade462008-09-02 15:55:33 +02002332 dst->addr = src->addr.ip;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002333 dst->port = src->port;
2334 dst->fwmark = src->fwmark;
pageexec4da62fc2005-06-26 16:00:19 -07002335 strlcpy(dst->sched_name, src->scheduler->name, sizeof(dst->sched_name));
Linus Torvalds1da177e2005-04-16 15:20:36 -07002336 dst->flags = src->flags;
2337 dst->timeout = src->timeout / HZ;
2338 dst->netmask = src->netmask;
2339 dst->num_dests = src->num_dests;
2340 ip_vs_copy_stats(&dst->stats, &src->stats);
2341}
2342
2343static inline int
Hans Schillstromfc723252011-01-03 14:44:43 +01002344__ip_vs_get_service_entries(struct net *net,
2345 const struct ip_vs_get_services *get,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002346 struct ip_vs_get_services __user *uptr)
2347{
2348 int idx, count=0;
2349 struct ip_vs_service *svc;
2350 struct ip_vs_service_entry entry;
2351 int ret = 0;
2352
2353 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2354 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
Julius Volzf94fd042008-09-02 15:55:55 +02002355 /* Only expose IPv4 entries to old interface */
Hans Schillstromfc723252011-01-03 14:44:43 +01002356 if (svc->af != AF_INET || !net_eq(svc->net, net))
Julius Volzf94fd042008-09-02 15:55:55 +02002357 continue;
2358
Linus Torvalds1da177e2005-04-16 15:20:36 -07002359 if (count >= get->num_services)
2360 goto out;
pageexec4da62fc2005-06-26 16:00:19 -07002361 memset(&entry, 0, sizeof(entry));
Linus Torvalds1da177e2005-04-16 15:20:36 -07002362 ip_vs_copy_service(&entry, svc);
2363 if (copy_to_user(&uptr->entrytable[count],
2364 &entry, sizeof(entry))) {
2365 ret = -EFAULT;
2366 goto out;
2367 }
2368 count++;
2369 }
2370 }
2371
2372 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2373 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
Julius Volzf94fd042008-09-02 15:55:55 +02002374 /* Only expose IPv4 entries to old interface */
Hans Schillstromfc723252011-01-03 14:44:43 +01002375 if (svc->af != AF_INET || !net_eq(svc->net, net))
Julius Volzf94fd042008-09-02 15:55:55 +02002376 continue;
2377
Linus Torvalds1da177e2005-04-16 15:20:36 -07002378 if (count >= get->num_services)
2379 goto out;
pageexec4da62fc2005-06-26 16:00:19 -07002380 memset(&entry, 0, sizeof(entry));
Linus Torvalds1da177e2005-04-16 15:20:36 -07002381 ip_vs_copy_service(&entry, svc);
2382 if (copy_to_user(&uptr->entrytable[count],
2383 &entry, sizeof(entry))) {
2384 ret = -EFAULT;
2385 goto out;
2386 }
2387 count++;
2388 }
2389 }
2390 out:
2391 return ret;
2392}
2393
2394static inline int
Hans Schillstromfc723252011-01-03 14:44:43 +01002395__ip_vs_get_dest_entries(struct net *net, const struct ip_vs_get_dests *get,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002396 struct ip_vs_get_dests __user *uptr)
2397{
2398 struct ip_vs_service *svc;
Julius Volzb18610d2008-09-02 15:55:37 +02002399 union nf_inet_addr addr = { .ip = get->addr };
Linus Torvalds1da177e2005-04-16 15:20:36 -07002400 int ret = 0;
2401
2402 if (get->fwmark)
Hans Schillstromfc723252011-01-03 14:44:43 +01002403 svc = __ip_vs_svc_fwm_find(net, AF_INET, get->fwmark);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002404 else
Hans Schillstromfc723252011-01-03 14:44:43 +01002405 svc = __ip_vs_service_find(net, AF_INET, get->protocol, &addr,
Julian Anastasov26c15cf2010-09-21 18:12:30 +02002406 get->port);
Julius Volzb18610d2008-09-02 15:55:37 +02002407
Linus Torvalds1da177e2005-04-16 15:20:36 -07002408 if (svc) {
2409 int count = 0;
2410 struct ip_vs_dest *dest;
2411 struct ip_vs_dest_entry entry;
2412
2413 list_for_each_entry(dest, &svc->destinations, n_list) {
2414 if (count >= get->num_dests)
2415 break;
2416
Julius Volze7ade462008-09-02 15:55:33 +02002417 entry.addr = dest->addr.ip;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002418 entry.port = dest->port;
2419 entry.conn_flags = atomic_read(&dest->conn_flags);
2420 entry.weight = atomic_read(&dest->weight);
2421 entry.u_threshold = dest->u_threshold;
2422 entry.l_threshold = dest->l_threshold;
2423 entry.activeconns = atomic_read(&dest->activeconns);
2424 entry.inactconns = atomic_read(&dest->inactconns);
2425 entry.persistconns = atomic_read(&dest->persistconns);
2426 ip_vs_copy_stats(&entry.stats, &dest->stats);
2427 if (copy_to_user(&uptr->entrytable[count],
2428 &entry, sizeof(entry))) {
2429 ret = -EFAULT;
2430 break;
2431 }
2432 count++;
2433 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002434 } else
2435 ret = -ESRCH;
2436 return ret;
2437}
2438
2439static inline void
Hans Schillstrom93304192011-01-03 14:44:51 +01002440__ip_vs_get_timeouts(struct net *net, struct ip_vs_timeout_user *u)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002441{
Changli Gao091bb342011-01-21 18:02:13 +08002442#if defined(CONFIG_IP_VS_PROTO_TCP) || defined(CONFIG_IP_VS_PROTO_UDP)
Hans Schillstrom93304192011-01-03 14:44:51 +01002443 struct ip_vs_proto_data *pd;
Changli Gao091bb342011-01-21 18:02:13 +08002444#endif
Hans Schillstrom93304192011-01-03 14:44:51 +01002445
Linus Torvalds1da177e2005-04-16 15:20:36 -07002446#ifdef CONFIG_IP_VS_PROTO_TCP
Hans Schillstrom93304192011-01-03 14:44:51 +01002447 pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
2448 u->tcp_timeout = pd->timeout_table[IP_VS_TCP_S_ESTABLISHED] / HZ;
2449 u->tcp_fin_timeout = pd->timeout_table[IP_VS_TCP_S_FIN_WAIT] / HZ;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002450#endif
2451#ifdef CONFIG_IP_VS_PROTO_UDP
Hans Schillstrom93304192011-01-03 14:44:51 +01002452 pd = ip_vs_proto_data_get(net, IPPROTO_UDP);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002453 u->udp_timeout =
Hans Schillstrom93304192011-01-03 14:44:51 +01002454 pd->timeout_table[IP_VS_UDP_S_NORMAL] / HZ;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002455#endif
2456}
2457
2458
2459#define GET_CMDID(cmd) (cmd - IP_VS_BASE_CTL)
2460#define GET_INFO_ARG_LEN (sizeof(struct ip_vs_getinfo))
2461#define GET_SERVICES_ARG_LEN (sizeof(struct ip_vs_get_services))
2462#define GET_SERVICE_ARG_LEN (sizeof(struct ip_vs_service_entry))
2463#define GET_DESTS_ARG_LEN (sizeof(struct ip_vs_get_dests))
2464#define GET_TIMEOUT_ARG_LEN (sizeof(struct ip_vs_timeout_user))
2465#define GET_DAEMON_ARG_LEN (sizeof(struct ip_vs_daemon_user) * 2)
2466
Arjan van de Ven9b5b5cf2005-11-29 16:21:38 -08002467static const unsigned char get_arglen[GET_CMDID(IP_VS_SO_GET_MAX)+1] = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002468 [GET_CMDID(IP_VS_SO_GET_VERSION)] = 64,
2469 [GET_CMDID(IP_VS_SO_GET_INFO)] = GET_INFO_ARG_LEN,
2470 [GET_CMDID(IP_VS_SO_GET_SERVICES)] = GET_SERVICES_ARG_LEN,
2471 [GET_CMDID(IP_VS_SO_GET_SERVICE)] = GET_SERVICE_ARG_LEN,
2472 [GET_CMDID(IP_VS_SO_GET_DESTS)] = GET_DESTS_ARG_LEN,
2473 [GET_CMDID(IP_VS_SO_GET_TIMEOUT)] = GET_TIMEOUT_ARG_LEN,
2474 [GET_CMDID(IP_VS_SO_GET_DAEMON)] = GET_DAEMON_ARG_LEN,
2475};
2476
2477static int
2478do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
2479{
2480 unsigned char arg[128];
2481 int ret = 0;
Arjan van de Ven04bcef22010-01-04 16:37:12 +01002482 unsigned int copylen;
Hans Schillstromfc723252011-01-03 14:44:43 +01002483 struct net *net = sock_net(sk);
Hans Schillstromf1313152011-01-03 14:44:55 +01002484 struct netns_ipvs *ipvs = net_ipvs(net);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002485
Hans Schillstromfc723252011-01-03 14:44:43 +01002486 BUG_ON(!net);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002487 if (!capable(CAP_NET_ADMIN))
2488 return -EPERM;
2489
Arjan van de Ven04bcef22010-01-04 16:37:12 +01002490 if (cmd < IP_VS_BASE_CTL || cmd > IP_VS_SO_GET_MAX)
2491 return -EINVAL;
2492
Linus Torvalds1da177e2005-04-16 15:20:36 -07002493 if (*len < get_arglen[GET_CMDID(cmd)]) {
Hannes Eder1e3e2382009-08-02 11:05:41 +00002494 pr_err("get_ctl: len %u < %u\n",
2495 *len, get_arglen[GET_CMDID(cmd)]);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002496 return -EINVAL;
2497 }
2498
Arjan van de Ven04bcef22010-01-04 16:37:12 +01002499 copylen = get_arglen[GET_CMDID(cmd)];
2500 if (copylen > 128)
2501 return -EINVAL;
2502
2503 if (copy_from_user(arg, user, copylen) != 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002504 return -EFAULT;
2505
Ingo Molnar14cc3e22006-03-26 01:37:14 -08002506 if (mutex_lock_interruptible(&__ip_vs_mutex))
Linus Torvalds1da177e2005-04-16 15:20:36 -07002507 return -ERESTARTSYS;
2508
2509 switch (cmd) {
2510 case IP_VS_SO_GET_VERSION:
2511 {
2512 char buf[64];
2513
2514 sprintf(buf, "IP Virtual Server version %d.%d.%d (size=%d)",
Catalin(ux) M. BOIE6f7edb42010-01-05 05:50:24 +01002515 NVERSION(IP_VS_VERSION_CODE), ip_vs_conn_tab_size);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002516 if (copy_to_user(user, buf, strlen(buf)+1) != 0) {
2517 ret = -EFAULT;
2518 goto out;
2519 }
2520 *len = strlen(buf)+1;
2521 }
2522 break;
2523
2524 case IP_VS_SO_GET_INFO:
2525 {
2526 struct ip_vs_getinfo info;
2527 info.version = IP_VS_VERSION_CODE;
Catalin(ux) M. BOIE6f7edb42010-01-05 05:50:24 +01002528 info.size = ip_vs_conn_tab_size;
Hans Schillstroma0840e22011-01-03 14:44:58 +01002529 info.num_services = ipvs->num_services;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002530 if (copy_to_user(user, &info, sizeof(info)) != 0)
2531 ret = -EFAULT;
2532 }
2533 break;
2534
2535 case IP_VS_SO_GET_SERVICES:
2536 {
2537 struct ip_vs_get_services *get;
2538 int size;
2539
2540 get = (struct ip_vs_get_services *)arg;
2541 size = sizeof(*get) +
2542 sizeof(struct ip_vs_service_entry) * get->num_services;
2543 if (*len != size) {
Hannes Eder1e3e2382009-08-02 11:05:41 +00002544 pr_err("length: %u != %u\n", *len, size);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002545 ret = -EINVAL;
2546 goto out;
2547 }
Hans Schillstromfc723252011-01-03 14:44:43 +01002548 ret = __ip_vs_get_service_entries(net, get, user);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002549 }
2550 break;
2551
2552 case IP_VS_SO_GET_SERVICE:
2553 {
2554 struct ip_vs_service_entry *entry;
2555 struct ip_vs_service *svc;
Julius Volzb18610d2008-09-02 15:55:37 +02002556 union nf_inet_addr addr;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002557
2558 entry = (struct ip_vs_service_entry *)arg;
Julius Volzb18610d2008-09-02 15:55:37 +02002559 addr.ip = entry->addr;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002560 if (entry->fwmark)
Hans Schillstromfc723252011-01-03 14:44:43 +01002561 svc = __ip_vs_svc_fwm_find(net, AF_INET, entry->fwmark);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002562 else
Hans Schillstromfc723252011-01-03 14:44:43 +01002563 svc = __ip_vs_service_find(net, AF_INET,
2564 entry->protocol, &addr,
2565 entry->port);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002566 if (svc) {
2567 ip_vs_copy_service(entry, svc);
2568 if (copy_to_user(user, entry, sizeof(*entry)) != 0)
2569 ret = -EFAULT;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002570 } else
2571 ret = -ESRCH;
2572 }
2573 break;
2574
2575 case IP_VS_SO_GET_DESTS:
2576 {
2577 struct ip_vs_get_dests *get;
2578 int size;
2579
2580 get = (struct ip_vs_get_dests *)arg;
2581 size = sizeof(*get) +
2582 sizeof(struct ip_vs_dest_entry) * get->num_dests;
2583 if (*len != size) {
Hannes Eder1e3e2382009-08-02 11:05:41 +00002584 pr_err("length: %u != %u\n", *len, size);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002585 ret = -EINVAL;
2586 goto out;
2587 }
Hans Schillstromfc723252011-01-03 14:44:43 +01002588 ret = __ip_vs_get_dest_entries(net, get, user);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002589 }
2590 break;
2591
2592 case IP_VS_SO_GET_TIMEOUT:
2593 {
2594 struct ip_vs_timeout_user t;
2595
Hans Schillstrom93304192011-01-03 14:44:51 +01002596 __ip_vs_get_timeouts(net, &t);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002597 if (copy_to_user(user, &t, sizeof(t)) != 0)
2598 ret = -EFAULT;
2599 }
2600 break;
2601
2602 case IP_VS_SO_GET_DAEMON:
2603 {
2604 struct ip_vs_daemon_user d[2];
2605
2606 memset(&d, 0, sizeof(d));
Hans Schillstromf1313152011-01-03 14:44:55 +01002607 if (ipvs->sync_state & IP_VS_STATE_MASTER) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002608 d[0].state = IP_VS_STATE_MASTER;
Hans Schillstromf1313152011-01-03 14:44:55 +01002609 strlcpy(d[0].mcast_ifn, ipvs->master_mcast_ifn,
2610 sizeof(d[0].mcast_ifn));
2611 d[0].syncid = ipvs->master_syncid;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002612 }
Hans Schillstromf1313152011-01-03 14:44:55 +01002613 if (ipvs->sync_state & IP_VS_STATE_BACKUP) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002614 d[1].state = IP_VS_STATE_BACKUP;
Hans Schillstromf1313152011-01-03 14:44:55 +01002615 strlcpy(d[1].mcast_ifn, ipvs->backup_mcast_ifn,
2616 sizeof(d[1].mcast_ifn));
2617 d[1].syncid = ipvs->backup_syncid;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002618 }
2619 if (copy_to_user(user, &d, sizeof(d)) != 0)
2620 ret = -EFAULT;
2621 }
2622 break;
2623
2624 default:
2625 ret = -EINVAL;
2626 }
2627
2628 out:
Ingo Molnar14cc3e22006-03-26 01:37:14 -08002629 mutex_unlock(&__ip_vs_mutex);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002630 return ret;
2631}
2632
2633
2634static struct nf_sockopt_ops ip_vs_sockopts = {
2635 .pf = PF_INET,
2636 .set_optmin = IP_VS_BASE_CTL,
2637 .set_optmax = IP_VS_SO_SET_MAX+1,
2638 .set = do_ip_vs_set_ctl,
2639 .get_optmin = IP_VS_BASE_CTL,
2640 .get_optmax = IP_VS_SO_GET_MAX+1,
2641 .get = do_ip_vs_get_ctl,
Neil Horman16fcec32007-09-11 11:28:26 +02002642 .owner = THIS_MODULE,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002643};
2644
Julius Volz9a812192008-08-14 14:08:44 +02002645/*
2646 * Generic Netlink interface
2647 */
2648
2649/* IPVS genetlink family */
2650static struct genl_family ip_vs_genl_family = {
2651 .id = GENL_ID_GENERATE,
2652 .hdrsize = 0,
2653 .name = IPVS_GENL_NAME,
2654 .version = IPVS_GENL_VERSION,
2655 .maxattr = IPVS_CMD_MAX,
Hans Schillstromc6d2d442011-01-03 14:45:03 +01002656 .netnsok = true, /* Make ipvsadm to work on netns */
Julius Volz9a812192008-08-14 14:08:44 +02002657};
2658
2659/* Policy used for first-level command attributes */
2660static const struct nla_policy ip_vs_cmd_policy[IPVS_CMD_ATTR_MAX + 1] = {
2661 [IPVS_CMD_ATTR_SERVICE] = { .type = NLA_NESTED },
2662 [IPVS_CMD_ATTR_DEST] = { .type = NLA_NESTED },
2663 [IPVS_CMD_ATTR_DAEMON] = { .type = NLA_NESTED },
2664 [IPVS_CMD_ATTR_TIMEOUT_TCP] = { .type = NLA_U32 },
2665 [IPVS_CMD_ATTR_TIMEOUT_TCP_FIN] = { .type = NLA_U32 },
2666 [IPVS_CMD_ATTR_TIMEOUT_UDP] = { .type = NLA_U32 },
2667};
2668
2669/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DAEMON */
2670static const struct nla_policy ip_vs_daemon_policy[IPVS_DAEMON_ATTR_MAX + 1] = {
2671 [IPVS_DAEMON_ATTR_STATE] = { .type = NLA_U32 },
2672 [IPVS_DAEMON_ATTR_MCAST_IFN] = { .type = NLA_NUL_STRING,
2673 .len = IP_VS_IFNAME_MAXLEN },
2674 [IPVS_DAEMON_ATTR_SYNC_ID] = { .type = NLA_U32 },
2675};
2676
2677/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_SERVICE */
2678static const struct nla_policy ip_vs_svc_policy[IPVS_SVC_ATTR_MAX + 1] = {
2679 [IPVS_SVC_ATTR_AF] = { .type = NLA_U16 },
2680 [IPVS_SVC_ATTR_PROTOCOL] = { .type = NLA_U16 },
2681 [IPVS_SVC_ATTR_ADDR] = { .type = NLA_BINARY,
2682 .len = sizeof(union nf_inet_addr) },
2683 [IPVS_SVC_ATTR_PORT] = { .type = NLA_U16 },
2684 [IPVS_SVC_ATTR_FWMARK] = { .type = NLA_U32 },
2685 [IPVS_SVC_ATTR_SCHED_NAME] = { .type = NLA_NUL_STRING,
2686 .len = IP_VS_SCHEDNAME_MAXLEN },
Simon Horman0d1e71b2010-08-22 21:37:54 +09002687 [IPVS_SVC_ATTR_PE_NAME] = { .type = NLA_NUL_STRING,
2688 .len = IP_VS_PENAME_MAXLEN },
Julius Volz9a812192008-08-14 14:08:44 +02002689 [IPVS_SVC_ATTR_FLAGS] = { .type = NLA_BINARY,
2690 .len = sizeof(struct ip_vs_flags) },
2691 [IPVS_SVC_ATTR_TIMEOUT] = { .type = NLA_U32 },
2692 [IPVS_SVC_ATTR_NETMASK] = { .type = NLA_U32 },
2693 [IPVS_SVC_ATTR_STATS] = { .type = NLA_NESTED },
2694};
2695
2696/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DEST */
2697static const struct nla_policy ip_vs_dest_policy[IPVS_DEST_ATTR_MAX + 1] = {
2698 [IPVS_DEST_ATTR_ADDR] = { .type = NLA_BINARY,
2699 .len = sizeof(union nf_inet_addr) },
2700 [IPVS_DEST_ATTR_PORT] = { .type = NLA_U16 },
2701 [IPVS_DEST_ATTR_FWD_METHOD] = { .type = NLA_U32 },
2702 [IPVS_DEST_ATTR_WEIGHT] = { .type = NLA_U32 },
2703 [IPVS_DEST_ATTR_U_THRESH] = { .type = NLA_U32 },
2704 [IPVS_DEST_ATTR_L_THRESH] = { .type = NLA_U32 },
2705 [IPVS_DEST_ATTR_ACTIVE_CONNS] = { .type = NLA_U32 },
2706 [IPVS_DEST_ATTR_INACT_CONNS] = { .type = NLA_U32 },
2707 [IPVS_DEST_ATTR_PERSIST_CONNS] = { .type = NLA_U32 },
2708 [IPVS_DEST_ATTR_STATS] = { .type = NLA_NESTED },
2709};
2710
2711static int ip_vs_genl_fill_stats(struct sk_buff *skb, int container_type,
2712 struct ip_vs_stats *stats)
2713{
Julian Anastasov55a3d4e2011-03-14 01:37:49 +02002714 struct ip_vs_stats_user ustats;
Julius Volz9a812192008-08-14 14:08:44 +02002715 struct nlattr *nl_stats = nla_nest_start(skb, container_type);
2716 if (!nl_stats)
2717 return -EMSGSIZE;
2718
Julian Anastasov55a3d4e2011-03-14 01:37:49 +02002719 ip_vs_copy_stats(&ustats, stats);
Julius Volz9a812192008-08-14 14:08:44 +02002720
Julian Anastasov55a3d4e2011-03-14 01:37:49 +02002721 NLA_PUT_U32(skb, IPVS_STATS_ATTR_CONNS, ustats.conns);
2722 NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPKTS, ustats.inpkts);
2723 NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPKTS, ustats.outpkts);
2724 NLA_PUT_U64(skb, IPVS_STATS_ATTR_INBYTES, ustats.inbytes);
2725 NLA_PUT_U64(skb, IPVS_STATS_ATTR_OUTBYTES, ustats.outbytes);
2726 NLA_PUT_U32(skb, IPVS_STATS_ATTR_CPS, ustats.cps);
2727 NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPPS, ustats.inpps);
2728 NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPPS, ustats.outpps);
2729 NLA_PUT_U32(skb, IPVS_STATS_ATTR_INBPS, ustats.inbps);
2730 NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTBPS, ustats.outbps);
Julius Volz9a812192008-08-14 14:08:44 +02002731
2732 nla_nest_end(skb, nl_stats);
2733
2734 return 0;
2735
2736nla_put_failure:
Julius Volz9a812192008-08-14 14:08:44 +02002737 nla_nest_cancel(skb, nl_stats);
2738 return -EMSGSIZE;
2739}
2740
2741static int ip_vs_genl_fill_service(struct sk_buff *skb,
2742 struct ip_vs_service *svc)
2743{
2744 struct nlattr *nl_service;
2745 struct ip_vs_flags flags = { .flags = svc->flags,
2746 .mask = ~0 };
2747
2748 nl_service = nla_nest_start(skb, IPVS_CMD_ATTR_SERVICE);
2749 if (!nl_service)
2750 return -EMSGSIZE;
2751
Julius Volzf94fd042008-09-02 15:55:55 +02002752 NLA_PUT_U16(skb, IPVS_SVC_ATTR_AF, svc->af);
Julius Volz9a812192008-08-14 14:08:44 +02002753
2754 if (svc->fwmark) {
2755 NLA_PUT_U32(skb, IPVS_SVC_ATTR_FWMARK, svc->fwmark);
2756 } else {
2757 NLA_PUT_U16(skb, IPVS_SVC_ATTR_PROTOCOL, svc->protocol);
2758 NLA_PUT(skb, IPVS_SVC_ATTR_ADDR, sizeof(svc->addr), &svc->addr);
2759 NLA_PUT_U16(skb, IPVS_SVC_ATTR_PORT, svc->port);
2760 }
2761
2762 NLA_PUT_STRING(skb, IPVS_SVC_ATTR_SCHED_NAME, svc->scheduler->name);
Simon Horman0d1e71b2010-08-22 21:37:54 +09002763 if (svc->pe)
2764 NLA_PUT_STRING(skb, IPVS_SVC_ATTR_PE_NAME, svc->pe->name);
Julius Volz9a812192008-08-14 14:08:44 +02002765 NLA_PUT(skb, IPVS_SVC_ATTR_FLAGS, sizeof(flags), &flags);
2766 NLA_PUT_U32(skb, IPVS_SVC_ATTR_TIMEOUT, svc->timeout / HZ);
2767 NLA_PUT_U32(skb, IPVS_SVC_ATTR_NETMASK, svc->netmask);
2768
2769 if (ip_vs_genl_fill_stats(skb, IPVS_SVC_ATTR_STATS, &svc->stats))
2770 goto nla_put_failure;
2771
2772 nla_nest_end(skb, nl_service);
2773
2774 return 0;
2775
2776nla_put_failure:
2777 nla_nest_cancel(skb, nl_service);
2778 return -EMSGSIZE;
2779}
2780
2781static int ip_vs_genl_dump_service(struct sk_buff *skb,
2782 struct ip_vs_service *svc,
2783 struct netlink_callback *cb)
2784{
2785 void *hdr;
2786
2787 hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
2788 &ip_vs_genl_family, NLM_F_MULTI,
2789 IPVS_CMD_NEW_SERVICE);
2790 if (!hdr)
2791 return -EMSGSIZE;
2792
2793 if (ip_vs_genl_fill_service(skb, svc) < 0)
2794 goto nla_put_failure;
2795
2796 return genlmsg_end(skb, hdr);
2797
2798nla_put_failure:
2799 genlmsg_cancel(skb, hdr);
2800 return -EMSGSIZE;
2801}
2802
2803static int ip_vs_genl_dump_services(struct sk_buff *skb,
2804 struct netlink_callback *cb)
2805{
2806 int idx = 0, i;
2807 int start = cb->args[0];
2808 struct ip_vs_service *svc;
Hans Schillstromfc723252011-01-03 14:44:43 +01002809 struct net *net = skb_sknet(skb);
Julius Volz9a812192008-08-14 14:08:44 +02002810
2811 mutex_lock(&__ip_vs_mutex);
2812 for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
2813 list_for_each_entry(svc, &ip_vs_svc_table[i], s_list) {
Hans Schillstromfc723252011-01-03 14:44:43 +01002814 if (++idx <= start || !net_eq(svc->net, net))
Julius Volz9a812192008-08-14 14:08:44 +02002815 continue;
2816 if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
2817 idx--;
2818 goto nla_put_failure;
2819 }
2820 }
2821 }
2822
2823 for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
2824 list_for_each_entry(svc, &ip_vs_svc_fwm_table[i], f_list) {
Hans Schillstromfc723252011-01-03 14:44:43 +01002825 if (++idx <= start || !net_eq(svc->net, net))
Julius Volz9a812192008-08-14 14:08:44 +02002826 continue;
2827 if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
2828 idx--;
2829 goto nla_put_failure;
2830 }
2831 }
2832 }
2833
2834nla_put_failure:
2835 mutex_unlock(&__ip_vs_mutex);
2836 cb->args[0] = idx;
2837
2838 return skb->len;
2839}
2840
Hans Schillstromfc723252011-01-03 14:44:43 +01002841static int ip_vs_genl_parse_service(struct net *net,
2842 struct ip_vs_service_user_kern *usvc,
Julian Anastasov26c15cf2010-09-21 18:12:30 +02002843 struct nlattr *nla, int full_entry,
2844 struct ip_vs_service **ret_svc)
Julius Volz9a812192008-08-14 14:08:44 +02002845{
2846 struct nlattr *attrs[IPVS_SVC_ATTR_MAX + 1];
2847 struct nlattr *nla_af, *nla_port, *nla_fwmark, *nla_protocol, *nla_addr;
Julian Anastasov26c15cf2010-09-21 18:12:30 +02002848 struct ip_vs_service *svc;
Julius Volz9a812192008-08-14 14:08:44 +02002849
2850 /* Parse mandatory identifying service fields first */
2851 if (nla == NULL ||
2852 nla_parse_nested(attrs, IPVS_SVC_ATTR_MAX, nla, ip_vs_svc_policy))
2853 return -EINVAL;
2854
2855 nla_af = attrs[IPVS_SVC_ATTR_AF];
2856 nla_protocol = attrs[IPVS_SVC_ATTR_PROTOCOL];
2857 nla_addr = attrs[IPVS_SVC_ATTR_ADDR];
2858 nla_port = attrs[IPVS_SVC_ATTR_PORT];
2859 nla_fwmark = attrs[IPVS_SVC_ATTR_FWMARK];
2860
2861 if (!(nla_af && (nla_fwmark || (nla_port && nla_protocol && nla_addr))))
2862 return -EINVAL;
2863
Simon Horman258c8892009-12-15 17:01:25 +01002864 memset(usvc, 0, sizeof(*usvc));
2865
Julius Volzc860c6b2008-09-02 15:55:36 +02002866 usvc->af = nla_get_u16(nla_af);
Julius Volzf94fd042008-09-02 15:55:55 +02002867#ifdef CONFIG_IP_VS_IPV6
2868 if (usvc->af != AF_INET && usvc->af != AF_INET6)
2869#else
2870 if (usvc->af != AF_INET)
2871#endif
Julius Volz9a812192008-08-14 14:08:44 +02002872 return -EAFNOSUPPORT;
2873
2874 if (nla_fwmark) {
2875 usvc->protocol = IPPROTO_TCP;
2876 usvc->fwmark = nla_get_u32(nla_fwmark);
2877 } else {
2878 usvc->protocol = nla_get_u16(nla_protocol);
2879 nla_memcpy(&usvc->addr, nla_addr, sizeof(usvc->addr));
2880 usvc->port = nla_get_u16(nla_port);
2881 usvc->fwmark = 0;
2882 }
2883
Julian Anastasov26c15cf2010-09-21 18:12:30 +02002884 if (usvc->fwmark)
Hans Schillstromfc723252011-01-03 14:44:43 +01002885 svc = __ip_vs_svc_fwm_find(net, usvc->af, usvc->fwmark);
Julian Anastasov26c15cf2010-09-21 18:12:30 +02002886 else
Hans Schillstromfc723252011-01-03 14:44:43 +01002887 svc = __ip_vs_service_find(net, usvc->af, usvc->protocol,
Julian Anastasov26c15cf2010-09-21 18:12:30 +02002888 &usvc->addr, usvc->port);
2889 *ret_svc = svc;
2890
Julius Volz9a812192008-08-14 14:08:44 +02002891 /* If a full entry was requested, check for the additional fields */
2892 if (full_entry) {
Simon Horman0d1e71b2010-08-22 21:37:54 +09002893 struct nlattr *nla_sched, *nla_flags, *nla_pe, *nla_timeout,
Julius Volz9a812192008-08-14 14:08:44 +02002894 *nla_netmask;
2895 struct ip_vs_flags flags;
Julius Volz9a812192008-08-14 14:08:44 +02002896
2897 nla_sched = attrs[IPVS_SVC_ATTR_SCHED_NAME];
Simon Horman0d1e71b2010-08-22 21:37:54 +09002898 nla_pe = attrs[IPVS_SVC_ATTR_PE_NAME];
Julius Volz9a812192008-08-14 14:08:44 +02002899 nla_flags = attrs[IPVS_SVC_ATTR_FLAGS];
2900 nla_timeout = attrs[IPVS_SVC_ATTR_TIMEOUT];
2901 nla_netmask = attrs[IPVS_SVC_ATTR_NETMASK];
2902
2903 if (!(nla_sched && nla_flags && nla_timeout && nla_netmask))
2904 return -EINVAL;
2905
2906 nla_memcpy(&flags, nla_flags, sizeof(flags));
2907
2908 /* prefill flags from service if it already exists */
Julian Anastasov26c15cf2010-09-21 18:12:30 +02002909 if (svc)
Julius Volz9a812192008-08-14 14:08:44 +02002910 usvc->flags = svc->flags;
Julius Volz9a812192008-08-14 14:08:44 +02002911
2912 /* set new flags from userland */
2913 usvc->flags = (usvc->flags & ~flags.mask) |
2914 (flags.flags & flags.mask);
Julius Volzc860c6b2008-09-02 15:55:36 +02002915 usvc->sched_name = nla_data(nla_sched);
Simon Horman0d1e71b2010-08-22 21:37:54 +09002916 usvc->pe_name = nla_pe ? nla_data(nla_pe) : NULL;
Julius Volz9a812192008-08-14 14:08:44 +02002917 usvc->timeout = nla_get_u32(nla_timeout);
2918 usvc->netmask = nla_get_u32(nla_netmask);
2919 }
2920
2921 return 0;
2922}
2923
Hans Schillstromfc723252011-01-03 14:44:43 +01002924static struct ip_vs_service *ip_vs_genl_find_service(struct net *net,
2925 struct nlattr *nla)
Julius Volz9a812192008-08-14 14:08:44 +02002926{
Julius Volzc860c6b2008-09-02 15:55:36 +02002927 struct ip_vs_service_user_kern usvc;
Julian Anastasov26c15cf2010-09-21 18:12:30 +02002928 struct ip_vs_service *svc;
Julius Volz9a812192008-08-14 14:08:44 +02002929 int ret;
2930
Hans Schillstromfc723252011-01-03 14:44:43 +01002931 ret = ip_vs_genl_parse_service(net, &usvc, nla, 0, &svc);
Julian Anastasov26c15cf2010-09-21 18:12:30 +02002932 return ret ? ERR_PTR(ret) : svc;
Julius Volz9a812192008-08-14 14:08:44 +02002933}
2934
2935static int ip_vs_genl_fill_dest(struct sk_buff *skb, struct ip_vs_dest *dest)
2936{
2937 struct nlattr *nl_dest;
2938
2939 nl_dest = nla_nest_start(skb, IPVS_CMD_ATTR_DEST);
2940 if (!nl_dest)
2941 return -EMSGSIZE;
2942
2943 NLA_PUT(skb, IPVS_DEST_ATTR_ADDR, sizeof(dest->addr), &dest->addr);
2944 NLA_PUT_U16(skb, IPVS_DEST_ATTR_PORT, dest->port);
2945
2946 NLA_PUT_U32(skb, IPVS_DEST_ATTR_FWD_METHOD,
2947 atomic_read(&dest->conn_flags) & IP_VS_CONN_F_FWD_MASK);
2948 NLA_PUT_U32(skb, IPVS_DEST_ATTR_WEIGHT, atomic_read(&dest->weight));
2949 NLA_PUT_U32(skb, IPVS_DEST_ATTR_U_THRESH, dest->u_threshold);
2950 NLA_PUT_U32(skb, IPVS_DEST_ATTR_L_THRESH, dest->l_threshold);
2951 NLA_PUT_U32(skb, IPVS_DEST_ATTR_ACTIVE_CONNS,
2952 atomic_read(&dest->activeconns));
2953 NLA_PUT_U32(skb, IPVS_DEST_ATTR_INACT_CONNS,
2954 atomic_read(&dest->inactconns));
2955 NLA_PUT_U32(skb, IPVS_DEST_ATTR_PERSIST_CONNS,
2956 atomic_read(&dest->persistconns));
2957
2958 if (ip_vs_genl_fill_stats(skb, IPVS_DEST_ATTR_STATS, &dest->stats))
2959 goto nla_put_failure;
2960
2961 nla_nest_end(skb, nl_dest);
2962
2963 return 0;
2964
2965nla_put_failure:
2966 nla_nest_cancel(skb, nl_dest);
2967 return -EMSGSIZE;
2968}
2969
2970static int ip_vs_genl_dump_dest(struct sk_buff *skb, struct ip_vs_dest *dest,
2971 struct netlink_callback *cb)
2972{
2973 void *hdr;
2974
2975 hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
2976 &ip_vs_genl_family, NLM_F_MULTI,
2977 IPVS_CMD_NEW_DEST);
2978 if (!hdr)
2979 return -EMSGSIZE;
2980
2981 if (ip_vs_genl_fill_dest(skb, dest) < 0)
2982 goto nla_put_failure;
2983
2984 return genlmsg_end(skb, hdr);
2985
2986nla_put_failure:
2987 genlmsg_cancel(skb, hdr);
2988 return -EMSGSIZE;
2989}
2990
2991static int ip_vs_genl_dump_dests(struct sk_buff *skb,
2992 struct netlink_callback *cb)
2993{
2994 int idx = 0;
2995 int start = cb->args[0];
2996 struct ip_vs_service *svc;
2997 struct ip_vs_dest *dest;
2998 struct nlattr *attrs[IPVS_CMD_ATTR_MAX + 1];
Hans Schillstroma0840e22011-01-03 14:44:58 +01002999 struct net *net = skb_sknet(skb);
Julius Volz9a812192008-08-14 14:08:44 +02003000
3001 mutex_lock(&__ip_vs_mutex);
3002
3003 /* Try to find the service for which to dump destinations */
3004 if (nlmsg_parse(cb->nlh, GENL_HDRLEN, attrs,
3005 IPVS_CMD_ATTR_MAX, ip_vs_cmd_policy))
3006 goto out_err;
3007
Hans Schillstroma0840e22011-01-03 14:44:58 +01003008
Hans Schillstromfc723252011-01-03 14:44:43 +01003009 svc = ip_vs_genl_find_service(net, attrs[IPVS_CMD_ATTR_SERVICE]);
Julius Volz9a812192008-08-14 14:08:44 +02003010 if (IS_ERR(svc) || svc == NULL)
3011 goto out_err;
3012
3013 /* Dump the destinations */
3014 list_for_each_entry(dest, &svc->destinations, n_list) {
3015 if (++idx <= start)
3016 continue;
3017 if (ip_vs_genl_dump_dest(skb, dest, cb) < 0) {
3018 idx--;
3019 goto nla_put_failure;
3020 }
3021 }
3022
3023nla_put_failure:
3024 cb->args[0] = idx;
Julius Volz9a812192008-08-14 14:08:44 +02003025
3026out_err:
3027 mutex_unlock(&__ip_vs_mutex);
3028
3029 return skb->len;
3030}
3031
Julius Volzc860c6b2008-09-02 15:55:36 +02003032static int ip_vs_genl_parse_dest(struct ip_vs_dest_user_kern *udest,
Julius Volz9a812192008-08-14 14:08:44 +02003033 struct nlattr *nla, int full_entry)
3034{
3035 struct nlattr *attrs[IPVS_DEST_ATTR_MAX + 1];
3036 struct nlattr *nla_addr, *nla_port;
3037
3038 /* Parse mandatory identifying destination fields first */
3039 if (nla == NULL ||
3040 nla_parse_nested(attrs, IPVS_DEST_ATTR_MAX, nla, ip_vs_dest_policy))
3041 return -EINVAL;
3042
3043 nla_addr = attrs[IPVS_DEST_ATTR_ADDR];
3044 nla_port = attrs[IPVS_DEST_ATTR_PORT];
3045
3046 if (!(nla_addr && nla_port))
3047 return -EINVAL;
3048
Simon Horman258c8892009-12-15 17:01:25 +01003049 memset(udest, 0, sizeof(*udest));
3050
Julius Volz9a812192008-08-14 14:08:44 +02003051 nla_memcpy(&udest->addr, nla_addr, sizeof(udest->addr));
3052 udest->port = nla_get_u16(nla_port);
3053
3054 /* If a full entry was requested, check for the additional fields */
3055 if (full_entry) {
3056 struct nlattr *nla_fwd, *nla_weight, *nla_u_thresh,
3057 *nla_l_thresh;
3058
3059 nla_fwd = attrs[IPVS_DEST_ATTR_FWD_METHOD];
3060 nla_weight = attrs[IPVS_DEST_ATTR_WEIGHT];
3061 nla_u_thresh = attrs[IPVS_DEST_ATTR_U_THRESH];
3062 nla_l_thresh = attrs[IPVS_DEST_ATTR_L_THRESH];
3063
3064 if (!(nla_fwd && nla_weight && nla_u_thresh && nla_l_thresh))
3065 return -EINVAL;
3066
3067 udest->conn_flags = nla_get_u32(nla_fwd)
3068 & IP_VS_CONN_F_FWD_MASK;
3069 udest->weight = nla_get_u32(nla_weight);
3070 udest->u_threshold = nla_get_u32(nla_u_thresh);
3071 udest->l_threshold = nla_get_u32(nla_l_thresh);
3072 }
3073
3074 return 0;
3075}
3076
3077static int ip_vs_genl_fill_daemon(struct sk_buff *skb, __be32 state,
3078 const char *mcast_ifn, __be32 syncid)
3079{
3080 struct nlattr *nl_daemon;
3081
3082 nl_daemon = nla_nest_start(skb, IPVS_CMD_ATTR_DAEMON);
3083 if (!nl_daemon)
3084 return -EMSGSIZE;
3085
3086 NLA_PUT_U32(skb, IPVS_DAEMON_ATTR_STATE, state);
3087 NLA_PUT_STRING(skb, IPVS_DAEMON_ATTR_MCAST_IFN, mcast_ifn);
3088 NLA_PUT_U32(skb, IPVS_DAEMON_ATTR_SYNC_ID, syncid);
3089
3090 nla_nest_end(skb, nl_daemon);
3091
3092 return 0;
3093
3094nla_put_failure:
3095 nla_nest_cancel(skb, nl_daemon);
3096 return -EMSGSIZE;
3097}
3098
3099static int ip_vs_genl_dump_daemon(struct sk_buff *skb, __be32 state,
3100 const char *mcast_ifn, __be32 syncid,
3101 struct netlink_callback *cb)
3102{
3103 void *hdr;
3104 hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
3105 &ip_vs_genl_family, NLM_F_MULTI,
3106 IPVS_CMD_NEW_DAEMON);
3107 if (!hdr)
3108 return -EMSGSIZE;
3109
3110 if (ip_vs_genl_fill_daemon(skb, state, mcast_ifn, syncid))
3111 goto nla_put_failure;
3112
3113 return genlmsg_end(skb, hdr);
3114
3115nla_put_failure:
3116 genlmsg_cancel(skb, hdr);
3117 return -EMSGSIZE;
3118}
3119
3120static int ip_vs_genl_dump_daemons(struct sk_buff *skb,
3121 struct netlink_callback *cb)
3122{
Hans Schillstroma09d1972011-04-04 15:25:18 +02003123 struct net *net = skb_sknet(skb);
Hans Schillstromf1313152011-01-03 14:44:55 +01003124 struct netns_ipvs *ipvs = net_ipvs(net);
3125
Julius Volz9a812192008-08-14 14:08:44 +02003126 mutex_lock(&__ip_vs_mutex);
Hans Schillstromf1313152011-01-03 14:44:55 +01003127 if ((ipvs->sync_state & IP_VS_STATE_MASTER) && !cb->args[0]) {
Julius Volz9a812192008-08-14 14:08:44 +02003128 if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_MASTER,
Hans Schillstromf1313152011-01-03 14:44:55 +01003129 ipvs->master_mcast_ifn,
3130 ipvs->master_syncid, cb) < 0)
Julius Volz9a812192008-08-14 14:08:44 +02003131 goto nla_put_failure;
3132
3133 cb->args[0] = 1;
3134 }
3135
Hans Schillstromf1313152011-01-03 14:44:55 +01003136 if ((ipvs->sync_state & IP_VS_STATE_BACKUP) && !cb->args[1]) {
Julius Volz9a812192008-08-14 14:08:44 +02003137 if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_BACKUP,
Hans Schillstromf1313152011-01-03 14:44:55 +01003138 ipvs->backup_mcast_ifn,
3139 ipvs->backup_syncid, cb) < 0)
Julius Volz9a812192008-08-14 14:08:44 +02003140 goto nla_put_failure;
3141
3142 cb->args[1] = 1;
3143 }
3144
3145nla_put_failure:
3146 mutex_unlock(&__ip_vs_mutex);
3147
3148 return skb->len;
3149}
3150
Hans Schillstromf1313152011-01-03 14:44:55 +01003151static int ip_vs_genl_new_daemon(struct net *net, struct nlattr **attrs)
Julius Volz9a812192008-08-14 14:08:44 +02003152{
3153 if (!(attrs[IPVS_DAEMON_ATTR_STATE] &&
3154 attrs[IPVS_DAEMON_ATTR_MCAST_IFN] &&
3155 attrs[IPVS_DAEMON_ATTR_SYNC_ID]))
3156 return -EINVAL;
3157
Hans Schillstromf1313152011-01-03 14:44:55 +01003158 return start_sync_thread(net,
3159 nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]),
Julius Volz9a812192008-08-14 14:08:44 +02003160 nla_data(attrs[IPVS_DAEMON_ATTR_MCAST_IFN]),
3161 nla_get_u32(attrs[IPVS_DAEMON_ATTR_SYNC_ID]));
3162}
3163
Hans Schillstromf1313152011-01-03 14:44:55 +01003164static int ip_vs_genl_del_daemon(struct net *net, struct nlattr **attrs)
Julius Volz9a812192008-08-14 14:08:44 +02003165{
3166 if (!attrs[IPVS_DAEMON_ATTR_STATE])
3167 return -EINVAL;
3168
Hans Schillstromf1313152011-01-03 14:44:55 +01003169 return stop_sync_thread(net,
3170 nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]));
Julius Volz9a812192008-08-14 14:08:44 +02003171}
3172
Hans Schillstrom93304192011-01-03 14:44:51 +01003173static int ip_vs_genl_set_config(struct net *net, struct nlattr **attrs)
Julius Volz9a812192008-08-14 14:08:44 +02003174{
3175 struct ip_vs_timeout_user t;
3176
Hans Schillstrom93304192011-01-03 14:44:51 +01003177 __ip_vs_get_timeouts(net, &t);
Julius Volz9a812192008-08-14 14:08:44 +02003178
3179 if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP])
3180 t.tcp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP]);
3181
3182 if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN])
3183 t.tcp_fin_timeout =
3184 nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN]);
3185
3186 if (attrs[IPVS_CMD_ATTR_TIMEOUT_UDP])
3187 t.udp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_UDP]);
3188
Hans Schillstrom93304192011-01-03 14:44:51 +01003189 return ip_vs_set_timeout(net, &t);
Julius Volz9a812192008-08-14 14:08:44 +02003190}
3191
3192static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
3193{
3194 struct ip_vs_service *svc = NULL;
Julius Volzc860c6b2008-09-02 15:55:36 +02003195 struct ip_vs_service_user_kern usvc;
3196 struct ip_vs_dest_user_kern udest;
Julius Volz9a812192008-08-14 14:08:44 +02003197 int ret = 0, cmd;
3198 int need_full_svc = 0, need_full_dest = 0;
Hans Schillstromfc723252011-01-03 14:44:43 +01003199 struct net *net;
Hans Schillstroma0840e22011-01-03 14:44:58 +01003200 struct netns_ipvs *ipvs;
Julius Volz9a812192008-08-14 14:08:44 +02003201
Hans Schillstromfc723252011-01-03 14:44:43 +01003202 net = skb_sknet(skb);
Hans Schillstroma0840e22011-01-03 14:44:58 +01003203 ipvs = net_ipvs(net);
Julius Volz9a812192008-08-14 14:08:44 +02003204 cmd = info->genlhdr->cmd;
3205
3206 mutex_lock(&__ip_vs_mutex);
3207
3208 if (cmd == IPVS_CMD_FLUSH) {
Hans Schillstromfc723252011-01-03 14:44:43 +01003209 ret = ip_vs_flush(net);
Julius Volz9a812192008-08-14 14:08:44 +02003210 goto out;
3211 } else if (cmd == IPVS_CMD_SET_CONFIG) {
Hans Schillstrom93304192011-01-03 14:44:51 +01003212 ret = ip_vs_genl_set_config(net, info->attrs);
Julius Volz9a812192008-08-14 14:08:44 +02003213 goto out;
3214 } else if (cmd == IPVS_CMD_NEW_DAEMON ||
3215 cmd == IPVS_CMD_DEL_DAEMON) {
3216
3217 struct nlattr *daemon_attrs[IPVS_DAEMON_ATTR_MAX + 1];
3218
3219 if (!info->attrs[IPVS_CMD_ATTR_DAEMON] ||
3220 nla_parse_nested(daemon_attrs, IPVS_DAEMON_ATTR_MAX,
3221 info->attrs[IPVS_CMD_ATTR_DAEMON],
3222 ip_vs_daemon_policy)) {
3223 ret = -EINVAL;
3224 goto out;
3225 }
3226
3227 if (cmd == IPVS_CMD_NEW_DAEMON)
Hans Schillstromf1313152011-01-03 14:44:55 +01003228 ret = ip_vs_genl_new_daemon(net, daemon_attrs);
Julius Volz9a812192008-08-14 14:08:44 +02003229 else
Hans Schillstromf1313152011-01-03 14:44:55 +01003230 ret = ip_vs_genl_del_daemon(net, daemon_attrs);
Julius Volz9a812192008-08-14 14:08:44 +02003231 goto out;
3232 } else if (cmd == IPVS_CMD_ZERO &&
3233 !info->attrs[IPVS_CMD_ATTR_SERVICE]) {
Hans Schillstromfc723252011-01-03 14:44:43 +01003234 ret = ip_vs_zero_all(net);
Julius Volz9a812192008-08-14 14:08:44 +02003235 goto out;
3236 }
3237
3238 /* All following commands require a service argument, so check if we
3239 * received a valid one. We need a full service specification when
3240 * adding / editing a service. Only identifying members otherwise. */
3241 if (cmd == IPVS_CMD_NEW_SERVICE || cmd == IPVS_CMD_SET_SERVICE)
3242 need_full_svc = 1;
3243
Hans Schillstromfc723252011-01-03 14:44:43 +01003244 ret = ip_vs_genl_parse_service(net, &usvc,
Julius Volz9a812192008-08-14 14:08:44 +02003245 info->attrs[IPVS_CMD_ATTR_SERVICE],
Julian Anastasov26c15cf2010-09-21 18:12:30 +02003246 need_full_svc, &svc);
Julius Volz9a812192008-08-14 14:08:44 +02003247 if (ret)
3248 goto out;
3249
Julius Volz9a812192008-08-14 14:08:44 +02003250 /* Unless we're adding a new service, the service must already exist */
3251 if ((cmd != IPVS_CMD_NEW_SERVICE) && (svc == NULL)) {
3252 ret = -ESRCH;
3253 goto out;
3254 }
3255
3256 /* Destination commands require a valid destination argument. For
3257 * adding / editing a destination, we need a full destination
3258 * specification. */
3259 if (cmd == IPVS_CMD_NEW_DEST || cmd == IPVS_CMD_SET_DEST ||
3260 cmd == IPVS_CMD_DEL_DEST) {
3261 if (cmd != IPVS_CMD_DEL_DEST)
3262 need_full_dest = 1;
3263
3264 ret = ip_vs_genl_parse_dest(&udest,
3265 info->attrs[IPVS_CMD_ATTR_DEST],
3266 need_full_dest);
3267 if (ret)
3268 goto out;
3269 }
3270
3271 switch (cmd) {
3272 case IPVS_CMD_NEW_SERVICE:
3273 if (svc == NULL)
Hans Schillstromfc723252011-01-03 14:44:43 +01003274 ret = ip_vs_add_service(net, &usvc, &svc);
Julius Volz9a812192008-08-14 14:08:44 +02003275 else
3276 ret = -EEXIST;
3277 break;
3278 case IPVS_CMD_SET_SERVICE:
3279 ret = ip_vs_edit_service(svc, &usvc);
3280 break;
3281 case IPVS_CMD_DEL_SERVICE:
3282 ret = ip_vs_del_service(svc);
Julian Anastasov26c15cf2010-09-21 18:12:30 +02003283 /* do not use svc, it can be freed */
Julius Volz9a812192008-08-14 14:08:44 +02003284 break;
3285 case IPVS_CMD_NEW_DEST:
3286 ret = ip_vs_add_dest(svc, &udest);
3287 break;
3288 case IPVS_CMD_SET_DEST:
3289 ret = ip_vs_edit_dest(svc, &udest);
3290 break;
3291 case IPVS_CMD_DEL_DEST:
3292 ret = ip_vs_del_dest(svc, &udest);
3293 break;
3294 case IPVS_CMD_ZERO:
3295 ret = ip_vs_zero_service(svc);
3296 break;
3297 default:
3298 ret = -EINVAL;
3299 }
3300
3301out:
Julius Volz9a812192008-08-14 14:08:44 +02003302 mutex_unlock(&__ip_vs_mutex);
3303
3304 return ret;
3305}
3306
3307static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info)
3308{
3309 struct sk_buff *msg;
3310 void *reply;
3311 int ret, cmd, reply_cmd;
Hans Schillstromfc723252011-01-03 14:44:43 +01003312 struct net *net;
Hans Schillstroma0840e22011-01-03 14:44:58 +01003313 struct netns_ipvs *ipvs;
Julius Volz9a812192008-08-14 14:08:44 +02003314
Hans Schillstromfc723252011-01-03 14:44:43 +01003315 net = skb_sknet(skb);
Hans Schillstroma0840e22011-01-03 14:44:58 +01003316 ipvs = net_ipvs(net);
Julius Volz9a812192008-08-14 14:08:44 +02003317 cmd = info->genlhdr->cmd;
3318
3319 if (cmd == IPVS_CMD_GET_SERVICE)
3320 reply_cmd = IPVS_CMD_NEW_SERVICE;
3321 else if (cmd == IPVS_CMD_GET_INFO)
3322 reply_cmd = IPVS_CMD_SET_INFO;
3323 else if (cmd == IPVS_CMD_GET_CONFIG)
3324 reply_cmd = IPVS_CMD_SET_CONFIG;
3325 else {
Hannes Eder1e3e2382009-08-02 11:05:41 +00003326 pr_err("unknown Generic Netlink command\n");
Julius Volz9a812192008-08-14 14:08:44 +02003327 return -EINVAL;
3328 }
3329
3330 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
3331 if (!msg)
3332 return -ENOMEM;
3333
3334 mutex_lock(&__ip_vs_mutex);
3335
3336 reply = genlmsg_put_reply(msg, info, &ip_vs_genl_family, 0, reply_cmd);
3337 if (reply == NULL)
3338 goto nla_put_failure;
3339
3340 switch (cmd) {
3341 case IPVS_CMD_GET_SERVICE:
3342 {
3343 struct ip_vs_service *svc;
3344
Hans Schillstromfc723252011-01-03 14:44:43 +01003345 svc = ip_vs_genl_find_service(net,
3346 info->attrs[IPVS_CMD_ATTR_SERVICE]);
Julius Volz9a812192008-08-14 14:08:44 +02003347 if (IS_ERR(svc)) {
3348 ret = PTR_ERR(svc);
3349 goto out_err;
3350 } else if (svc) {
3351 ret = ip_vs_genl_fill_service(msg, svc);
Julius Volz9a812192008-08-14 14:08:44 +02003352 if (ret)
3353 goto nla_put_failure;
3354 } else {
3355 ret = -ESRCH;
3356 goto out_err;
3357 }
3358
3359 break;
3360 }
3361
3362 case IPVS_CMD_GET_CONFIG:
3363 {
3364 struct ip_vs_timeout_user t;
3365
Hans Schillstrom93304192011-01-03 14:44:51 +01003366 __ip_vs_get_timeouts(net, &t);
Julius Volz9a812192008-08-14 14:08:44 +02003367#ifdef CONFIG_IP_VS_PROTO_TCP
3368 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP, t.tcp_timeout);
3369 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP_FIN,
3370 t.tcp_fin_timeout);
3371#endif
3372#ifdef CONFIG_IP_VS_PROTO_UDP
3373 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_UDP, t.udp_timeout);
3374#endif
3375
3376 break;
3377 }
3378
3379 case IPVS_CMD_GET_INFO:
3380 NLA_PUT_U32(msg, IPVS_INFO_ATTR_VERSION, IP_VS_VERSION_CODE);
3381 NLA_PUT_U32(msg, IPVS_INFO_ATTR_CONN_TAB_SIZE,
Catalin(ux) M. BOIE6f7edb42010-01-05 05:50:24 +01003382 ip_vs_conn_tab_size);
Julius Volz9a812192008-08-14 14:08:44 +02003383 break;
3384 }
3385
3386 genlmsg_end(msg, reply);
Johannes Berg134e6372009-07-10 09:51:34 +00003387 ret = genlmsg_reply(msg, info);
Julius Volz9a812192008-08-14 14:08:44 +02003388 goto out;
3389
3390nla_put_failure:
Hannes Eder1e3e2382009-08-02 11:05:41 +00003391 pr_err("not enough space in Netlink message\n");
Julius Volz9a812192008-08-14 14:08:44 +02003392 ret = -EMSGSIZE;
3393
3394out_err:
3395 nlmsg_free(msg);
3396out:
3397 mutex_unlock(&__ip_vs_mutex);
3398
3399 return ret;
3400}
3401
3402
3403static struct genl_ops ip_vs_genl_ops[] __read_mostly = {
3404 {
3405 .cmd = IPVS_CMD_NEW_SERVICE,
3406 .flags = GENL_ADMIN_PERM,
3407 .policy = ip_vs_cmd_policy,
3408 .doit = ip_vs_genl_set_cmd,
3409 },
3410 {
3411 .cmd = IPVS_CMD_SET_SERVICE,
3412 .flags = GENL_ADMIN_PERM,
3413 .policy = ip_vs_cmd_policy,
3414 .doit = ip_vs_genl_set_cmd,
3415 },
3416 {
3417 .cmd = IPVS_CMD_DEL_SERVICE,
3418 .flags = GENL_ADMIN_PERM,
3419 .policy = ip_vs_cmd_policy,
3420 .doit = ip_vs_genl_set_cmd,
3421 },
3422 {
3423 .cmd = IPVS_CMD_GET_SERVICE,
3424 .flags = GENL_ADMIN_PERM,
3425 .doit = ip_vs_genl_get_cmd,
3426 .dumpit = ip_vs_genl_dump_services,
3427 .policy = ip_vs_cmd_policy,
3428 },
3429 {
3430 .cmd = IPVS_CMD_NEW_DEST,
3431 .flags = GENL_ADMIN_PERM,
3432 .policy = ip_vs_cmd_policy,
3433 .doit = ip_vs_genl_set_cmd,
3434 },
3435 {
3436 .cmd = IPVS_CMD_SET_DEST,
3437 .flags = GENL_ADMIN_PERM,
3438 .policy = ip_vs_cmd_policy,
3439 .doit = ip_vs_genl_set_cmd,
3440 },
3441 {
3442 .cmd = IPVS_CMD_DEL_DEST,
3443 .flags = GENL_ADMIN_PERM,
3444 .policy = ip_vs_cmd_policy,
3445 .doit = ip_vs_genl_set_cmd,
3446 },
3447 {
3448 .cmd = IPVS_CMD_GET_DEST,
3449 .flags = GENL_ADMIN_PERM,
3450 .policy = ip_vs_cmd_policy,
3451 .dumpit = ip_vs_genl_dump_dests,
3452 },
3453 {
3454 .cmd = IPVS_CMD_NEW_DAEMON,
3455 .flags = GENL_ADMIN_PERM,
3456 .policy = ip_vs_cmd_policy,
3457 .doit = ip_vs_genl_set_cmd,
3458 },
3459 {
3460 .cmd = IPVS_CMD_DEL_DAEMON,
3461 .flags = GENL_ADMIN_PERM,
3462 .policy = ip_vs_cmd_policy,
3463 .doit = ip_vs_genl_set_cmd,
3464 },
3465 {
3466 .cmd = IPVS_CMD_GET_DAEMON,
3467 .flags = GENL_ADMIN_PERM,
3468 .dumpit = ip_vs_genl_dump_daemons,
3469 },
3470 {
3471 .cmd = IPVS_CMD_SET_CONFIG,
3472 .flags = GENL_ADMIN_PERM,
3473 .policy = ip_vs_cmd_policy,
3474 .doit = ip_vs_genl_set_cmd,
3475 },
3476 {
3477 .cmd = IPVS_CMD_GET_CONFIG,
3478 .flags = GENL_ADMIN_PERM,
3479 .doit = ip_vs_genl_get_cmd,
3480 },
3481 {
3482 .cmd = IPVS_CMD_GET_INFO,
3483 .flags = GENL_ADMIN_PERM,
3484 .doit = ip_vs_genl_get_cmd,
3485 },
3486 {
3487 .cmd = IPVS_CMD_ZERO,
3488 .flags = GENL_ADMIN_PERM,
3489 .policy = ip_vs_cmd_policy,
3490 .doit = ip_vs_genl_set_cmd,
3491 },
3492 {
3493 .cmd = IPVS_CMD_FLUSH,
3494 .flags = GENL_ADMIN_PERM,
3495 .doit = ip_vs_genl_set_cmd,
3496 },
3497};
3498
3499static int __init ip_vs_genl_register(void)
3500{
Michał Mirosław8f698d52009-05-21 10:34:05 +00003501 return genl_register_family_with_ops(&ip_vs_genl_family,
3502 ip_vs_genl_ops, ARRAY_SIZE(ip_vs_genl_ops));
Julius Volz9a812192008-08-14 14:08:44 +02003503}
3504
3505static void ip_vs_genl_unregister(void)
3506{
3507 genl_unregister_family(&ip_vs_genl_family);
3508}
3509
3510/* End of Generic Netlink interface definitions */
3511
Hans Schillstrom61b1ab42011-01-03 14:44:42 +01003512/*
3513 * per netns intit/exit func.
3514 */
Simon Horman14e40542011-02-04 18:33:02 +09003515#ifdef CONFIG_SYSCTL
3516int __net_init __ip_vs_control_init_sysctl(struct net *net)
Hans Schillstrom61b1ab42011-01-03 14:44:42 +01003517{
Hans Schillstromfc723252011-01-03 14:44:43 +01003518 int idx;
3519 struct netns_ipvs *ipvs = net_ipvs(net);
Hans Schillstroma0840e22011-01-03 14:44:58 +01003520 struct ctl_table *tbl;
Hans Schillstromfc723252011-01-03 14:44:43 +01003521
Hans Schillstroma0840e22011-01-03 14:44:58 +01003522 atomic_set(&ipvs->dropentry, 0);
3523 spin_lock_init(&ipvs->dropentry_lock);
3524 spin_lock_init(&ipvs->droppacket_lock);
3525 spin_lock_init(&ipvs->securetcp_lock);
Hans Schillstroma0840e22011-01-03 14:44:58 +01003526
3527 if (!net_eq(net, &init_net)) {
3528 tbl = kmemdup(vs_vars, sizeof(vs_vars), GFP_KERNEL);
3529 if (tbl == NULL)
Simon Horman14e40542011-02-04 18:33:02 +09003530 return -ENOMEM;
Hans Schillstroma0840e22011-01-03 14:44:58 +01003531 } else
3532 tbl = vs_vars;
3533 /* Initialize sysctl defaults */
3534 idx = 0;
3535 ipvs->sysctl_amemthresh = 1024;
3536 tbl[idx++].data = &ipvs->sysctl_amemthresh;
3537 ipvs->sysctl_am_droprate = 10;
3538 tbl[idx++].data = &ipvs->sysctl_am_droprate;
3539 tbl[idx++].data = &ipvs->sysctl_drop_entry;
3540 tbl[idx++].data = &ipvs->sysctl_drop_packet;
3541#ifdef CONFIG_IP_VS_NFCT
3542 tbl[idx++].data = &ipvs->sysctl_conntrack;
3543#endif
3544 tbl[idx++].data = &ipvs->sysctl_secure_tcp;
3545 ipvs->sysctl_snat_reroute = 1;
3546 tbl[idx++].data = &ipvs->sysctl_snat_reroute;
3547 ipvs->sysctl_sync_ver = 1;
3548 tbl[idx++].data = &ipvs->sysctl_sync_ver;
3549 tbl[idx++].data = &ipvs->sysctl_cache_bypass;
3550 tbl[idx++].data = &ipvs->sysctl_expire_nodest_conn;
3551 tbl[idx++].data = &ipvs->sysctl_expire_quiescent_template;
Simon Horman59e03502011-02-04 18:33:01 +09003552 ipvs->sysctl_sync_threshold[0] = DEFAULT_SYNC_THRESHOLD;
3553 ipvs->sysctl_sync_threshold[1] = DEFAULT_SYNC_PERIOD;
Hans Schillstroma0840e22011-01-03 14:44:58 +01003554 tbl[idx].data = &ipvs->sysctl_sync_threshold;
3555 tbl[idx++].maxlen = sizeof(ipvs->sysctl_sync_threshold);
3556 tbl[idx++].data = &ipvs->sysctl_nat_icmp_send;
3557
3558
3559 ipvs->sysctl_hdr = register_net_sysctl_table(net, net_vs_ctl_path,
Hans Schillstrom07924702011-01-24 15:14:41 +01003560 tbl);
Simon Horman04439292011-02-01 18:29:04 +01003561 if (ipvs->sysctl_hdr == NULL) {
3562 if (!net_eq(net, &init_net))
3563 kfree(tbl);
Simon Horman14e40542011-02-04 18:33:02 +09003564 return -ENOMEM;
Simon Horman04439292011-02-01 18:29:04 +01003565 }
Julian Anastasov6ef757f2011-03-14 01:44:28 +02003566 ip_vs_start_estimator(net, &ipvs->tot_stats);
Hans Schillstroma0840e22011-01-03 14:44:58 +01003567 ipvs->sysctl_tbl = tbl;
Hans Schillstromf6340ee2011-01-03 14:44:59 +01003568 /* Schedule defense work */
3569 INIT_DELAYED_WORK(&ipvs->defense_work, defense_work_handler);
3570 schedule_delayed_work(&ipvs->defense_work, DEFENSE_TIMER_PERIOD);
Simon Horman14e40542011-02-04 18:33:02 +09003571
3572 return 0;
3573}
3574
3575void __net_init __ip_vs_control_cleanup_sysctl(struct net *net)
3576{
3577 struct netns_ipvs *ipvs = net_ipvs(net);
3578
3579 cancel_delayed_work_sync(&ipvs->defense_work);
3580 cancel_work_sync(&ipvs->defense_work.work);
3581 unregister_net_sysctl_table(ipvs->sysctl_hdr);
3582}
3583
3584#else
3585
3586int __net_init __ip_vs_control_init_sysctl(struct net *net) { return 0; }
3587void __net_init __ip_vs_control_cleanup_sysctl(struct net *net) { }
3588
3589#endif
3590
3591int __net_init __ip_vs_control_init(struct net *net)
3592{
3593 int idx;
3594 struct netns_ipvs *ipvs = net_ipvs(net);
3595
3596 ipvs->rs_lock = __RW_LOCK_UNLOCKED(ipvs->rs_lock);
3597
3598 /* Initialize rs_table */
3599 for (idx = 0; idx < IP_VS_RTAB_SIZE; idx++)
3600 INIT_LIST_HEAD(&ipvs->rs_table[idx]);
3601
3602 INIT_LIST_HEAD(&ipvs->dest_trash);
3603 atomic_set(&ipvs->ftpsvc_counter, 0);
3604 atomic_set(&ipvs->nullsvc_counter, 0);
3605
3606 /* procfs stats */
3607 ipvs->tot_stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats);
Eric Dumazetf40f94f2011-03-21 10:15:40 +00003608 if (!ipvs->tot_stats.cpustats) {
Simon Horman14e40542011-02-04 18:33:02 +09003609 pr_err("%s(): alloc_percpu.\n", __func__);
3610 return -ENOMEM;
3611 }
3612 spin_lock_init(&ipvs->tot_stats.lock);
3613
3614 proc_net_fops_create(net, "ip_vs", 0, &ip_vs_info_fops);
3615 proc_net_fops_create(net, "ip_vs_stats", 0, &ip_vs_stats_fops);
3616 proc_net_fops_create(net, "ip_vs_stats_percpu", 0,
3617 &ip_vs_stats_percpu_fops);
3618
3619 if (__ip_vs_control_init_sysctl(net))
3620 goto err;
3621
Hans Schillstrom61b1ab42011-01-03 14:44:42 +01003622 return 0;
3623
Simon Horman14e40542011-02-04 18:33:02 +09003624err:
Julian Anastasov2a0751a2011-03-04 12:20:35 +02003625 free_percpu(ipvs->tot_stats.cpustats);
Hans Schillstrom61b1ab42011-01-03 14:44:42 +01003626 return -ENOMEM;
3627}
3628
3629static void __net_exit __ip_vs_control_cleanup(struct net *net)
3630{
Hans Schillstromb17fc992011-01-03 14:44:56 +01003631 struct netns_ipvs *ipvs = net_ipvs(net);
3632
Hans Schillstromf2431e62011-01-03 14:45:00 +01003633 ip_vs_trash_cleanup(net);
Julian Anastasov6ef757f2011-03-14 01:44:28 +02003634 ip_vs_stop_estimator(net, &ipvs->tot_stats);
Simon Horman14e40542011-02-04 18:33:02 +09003635 __ip_vs_control_cleanup_sysctl(net);
Hans Schillstromb17fc992011-01-03 14:44:56 +01003636 proc_net_remove(net, "ip_vs_stats_percpu");
Hans Schillstrom61b1ab42011-01-03 14:44:42 +01003637 proc_net_remove(net, "ip_vs_stats");
3638 proc_net_remove(net, "ip_vs");
Julian Anastasov2a0751a2011-03-04 12:20:35 +02003639 free_percpu(ipvs->tot_stats.cpustats);
Hans Schillstrom61b1ab42011-01-03 14:44:42 +01003640}
3641
3642static struct pernet_operations ipvs_control_ops = {
3643 .init = __ip_vs_control_init,
3644 .exit = __ip_vs_control_cleanup,
3645};
Linus Torvalds1da177e2005-04-16 15:20:36 -07003646
Sven Wegener048cf482008-08-10 18:24:35 +00003647int __init ip_vs_control_init(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003648{
Linus Torvalds1da177e2005-04-16 15:20:36 -07003649 int idx;
Hans Schillstromfc723252011-01-03 14:44:43 +01003650 int ret;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003651
3652 EnterFunction(2);
3653
Hans Schillstromfc723252011-01-03 14:44:43 +01003654 /* Initialize svc_table, ip_vs_svc_fwm_table, rs_table */
Eduardo Blancod86bef72010-10-19 10:26:47 +01003655 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
3656 INIT_LIST_HEAD(&ip_vs_svc_table[idx]);
3657 INIT_LIST_HEAD(&ip_vs_svc_fwm_table[idx]);
3658 }
Hans Schillstromfc723252011-01-03 14:44:43 +01003659
3660 ret = register_pernet_subsys(&ipvs_control_ops);
3661 if (ret) {
3662 pr_err("cannot register namespace.\n");
3663 goto err;
Eduardo Blancod86bef72010-10-19 10:26:47 +01003664 }
Hans Schillstromfc723252011-01-03 14:44:43 +01003665
3666 smp_wmb(); /* Do we really need it now ? */
Eduardo Blancod86bef72010-10-19 10:26:47 +01003667
Linus Torvalds1da177e2005-04-16 15:20:36 -07003668 ret = nf_register_sockopt(&ip_vs_sockopts);
3669 if (ret) {
Hannes Eder1e3e2382009-08-02 11:05:41 +00003670 pr_err("cannot register sockopt.\n");
Hans Schillstromfc723252011-01-03 14:44:43 +01003671 goto err_net;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003672 }
3673
Julius Volz9a812192008-08-14 14:08:44 +02003674 ret = ip_vs_genl_register();
3675 if (ret) {
Hannes Eder1e3e2382009-08-02 11:05:41 +00003676 pr_err("cannot register Generic Netlink interface.\n");
Julius Volz9a812192008-08-14 14:08:44 +02003677 nf_unregister_sockopt(&ip_vs_sockopts);
Hans Schillstromfc723252011-01-03 14:44:43 +01003678 goto err_net;
Julius Volz9a812192008-08-14 14:08:44 +02003679 }
3680
Linus Torvalds1da177e2005-04-16 15:20:36 -07003681 LeaveFunction(2);
3682 return 0;
Hans Schillstromfc723252011-01-03 14:44:43 +01003683
3684err_net:
3685 unregister_pernet_subsys(&ipvs_control_ops);
3686err:
3687 return ret;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003688}
3689
3690
3691void ip_vs_control_cleanup(void)
3692{
3693 EnterFunction(2);
Hans Schillstrom61b1ab42011-01-03 14:44:42 +01003694 unregister_pernet_subsys(&ipvs_control_ops);
Julius Volz9a812192008-08-14 14:08:44 +02003695 ip_vs_genl_unregister();
Linus Torvalds1da177e2005-04-16 15:20:36 -07003696 nf_unregister_sockopt(&ip_vs_sockopts);
3697 LeaveFunction(2);
3698}