blob: 2d7c96bd21143de6d9b3a9f1c398f88a64e9ace3 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * IPVS An implementation of the IP virtual server support for the
3 * LINUX operating system. IPVS is now implemented as a module
4 * over the NetFilter framework. IPVS can be used to build a
5 * high-performance and highly available server based on a
6 * cluster of servers.
7 *
Linus Torvalds1da177e2005-04-16 15:20:36 -07008 * Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
9 * Peter Kese <peter.kese@ijs.si>
10 * Julian Anastasov <ja@ssi.bg>
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version
15 * 2 of the License, or (at your option) any later version.
16 *
17 * Changes:
18 *
19 */
20
Hannes Eder9aada7a2009-07-30 14:29:44 -070021#define KMSG_COMPONENT "IPVS"
22#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
23
Linus Torvalds1da177e2005-04-16 15:20:36 -070024#include <linux/module.h>
25#include <linux/init.h>
26#include <linux/types.h>
Randy Dunlap4fc268d2006-01-11 12:17:47 -080027#include <linux/capability.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070028#include <linux/fs.h>
29#include <linux/sysctl.h>
30#include <linux/proc_fs.h>
31#include <linux/workqueue.h>
32#include <linux/swap.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070033#include <linux/seq_file.h>
Tejun Heo5a0e3ad2010-03-24 17:04:11 +090034#include <linux/slab.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070035
36#include <linux/netfilter.h>
37#include <linux/netfilter_ipv4.h>
Ingo Molnar14cc3e22006-03-26 01:37:14 -080038#include <linux/mutex.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070039
Eric W. Biederman457c4cb2007-09-12 12:01:34 +020040#include <net/net_namespace.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070041#include <net/ip.h>
Vince Busam09571c72008-09-02 15:55:52 +020042#ifdef CONFIG_IP_VS_IPV6
43#include <net/ipv6.h>
44#include <net/ip6_route.h>
45#endif
Arnaldo Carvalho de Melo14c85022005-12-27 02:43:12 -020046#include <net/route.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070047#include <net/sock.h>
Julius Volz9a812192008-08-14 14:08:44 +020048#include <net/genetlink.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070049
50#include <asm/uaccess.h>
51
52#include <net/ip_vs.h>
53
54/* semaphore for IPVS sockopts. And, [gs]etsockopt may sleep. */
Ingo Molnar14cc3e22006-03-26 01:37:14 -080055static DEFINE_MUTEX(__ip_vs_mutex);
Linus Torvalds1da177e2005-04-16 15:20:36 -070056
57/* lock for service table */
58static DEFINE_RWLOCK(__ip_vs_svc_lock);
59
60/* lock for table with the real services */
61static DEFINE_RWLOCK(__ip_vs_rs_lock);
62
63/* lock for state and timeout tables */
Simon Horman4f728162010-08-26 02:54:30 +000064static DEFINE_SPINLOCK(ip_vs_securetcp_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -070065
66/* lock for drop entry handling */
67static DEFINE_SPINLOCK(__ip_vs_dropentry_lock);
68
69/* lock for drop packet handling */
70static DEFINE_SPINLOCK(__ip_vs_droppacket_lock);
71
72/* 1/rate drop and drop-entry variables */
73int ip_vs_drop_rate = 0;
74int ip_vs_drop_counter = 0;
75static atomic_t ip_vs_dropentry = ATOMIC_INIT(0);
76
77/* number of virtual services */
78static int ip_vs_num_services = 0;
79
80/* sysctl variables */
81static int sysctl_ip_vs_drop_entry = 0;
82static int sysctl_ip_vs_drop_packet = 0;
83static int sysctl_ip_vs_secure_tcp = 0;
84static int sysctl_ip_vs_amemthresh = 1024;
85static int sysctl_ip_vs_am_droprate = 10;
86int sysctl_ip_vs_cache_bypass = 0;
87int sysctl_ip_vs_expire_nodest_conn = 0;
88int sysctl_ip_vs_expire_quiescent_template = 0;
89int sysctl_ip_vs_sync_threshold[2] = { 3, 50 };
90int sysctl_ip_vs_nat_icmp_send = 0;
Julian Anastasovf4bc17c2010-09-21 17:35:41 +020091#ifdef CONFIG_IP_VS_NFCT
92int sysctl_ip_vs_conntrack;
93#endif
Julian Anastasov8a803042010-09-21 17:38:57 +020094int sysctl_ip_vs_snat_reroute = 1;
Hans Schillstromb880c1f2010-11-19 14:25:14 +010095int sysctl_ip_vs_sync_ver = 1; /* Default version of sync proto */
Linus Torvalds1da177e2005-04-16 15:20:36 -070096
97#ifdef CONFIG_IP_VS_DEBUG
98static int sysctl_ip_vs_debug_level = 0;
99
100int ip_vs_get_debug_level(void)
101{
102 return sysctl_ip_vs_debug_level;
103}
104#endif
105
Vince Busam09571c72008-09-02 15:55:52 +0200106#ifdef CONFIG_IP_VS_IPV6
107/* Taken from rt6_fill_node() in net/ipv6/route.c, is there a better way? */
108static int __ip_vs_addr_is_local_v6(const struct in6_addr *addr)
109{
110 struct rt6_info *rt;
111 struct flowi fl = {
112 .oif = 0,
Changli Gao58116622010-11-12 18:43:55 +0000113 .fl6_dst = *addr,
114 .fl6_src = { .s6_addr32 = {0, 0, 0, 0} },
Vince Busam09571c72008-09-02 15:55:52 +0200115 };
116
117 rt = (struct rt6_info *)ip6_route_output(&init_net, NULL, &fl);
118 if (rt && rt->rt6i_dev && (rt->rt6i_dev->flags & IFF_LOOPBACK))
119 return 1;
120
121 return 0;
122}
123#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -0700124/*
Julian Anastasovaf9debd2005-07-11 20:59:57 -0700125 * update_defense_level is called from keventd and from sysctl,
126 * so it needs to protect itself from softirqs
Linus Torvalds1da177e2005-04-16 15:20:36 -0700127 */
128static void update_defense_level(void)
129{
130 struct sysinfo i;
131 static int old_secure_tcp = 0;
132 int availmem;
133 int nomem;
134 int to_change = -1;
135
136 /* we only count free and buffered memory (in pages) */
137 si_meminfo(&i);
138 availmem = i.freeram + i.bufferram;
139 /* however in linux 2.5 the i.bufferram is total page cache size,
140 we need adjust it */
141 /* si_swapinfo(&i); */
142 /* availmem = availmem - (i.totalswap - i.freeswap); */
143
144 nomem = (availmem < sysctl_ip_vs_amemthresh);
145
Julian Anastasovaf9debd2005-07-11 20:59:57 -0700146 local_bh_disable();
147
Linus Torvalds1da177e2005-04-16 15:20:36 -0700148 /* drop_entry */
149 spin_lock(&__ip_vs_dropentry_lock);
150 switch (sysctl_ip_vs_drop_entry) {
151 case 0:
152 atomic_set(&ip_vs_dropentry, 0);
153 break;
154 case 1:
155 if (nomem) {
156 atomic_set(&ip_vs_dropentry, 1);
157 sysctl_ip_vs_drop_entry = 2;
158 } else {
159 atomic_set(&ip_vs_dropentry, 0);
160 }
161 break;
162 case 2:
163 if (nomem) {
164 atomic_set(&ip_vs_dropentry, 1);
165 } else {
166 atomic_set(&ip_vs_dropentry, 0);
167 sysctl_ip_vs_drop_entry = 1;
168 };
169 break;
170 case 3:
171 atomic_set(&ip_vs_dropentry, 1);
172 break;
173 }
174 spin_unlock(&__ip_vs_dropentry_lock);
175
176 /* drop_packet */
177 spin_lock(&__ip_vs_droppacket_lock);
178 switch (sysctl_ip_vs_drop_packet) {
179 case 0:
180 ip_vs_drop_rate = 0;
181 break;
182 case 1:
183 if (nomem) {
184 ip_vs_drop_rate = ip_vs_drop_counter
185 = sysctl_ip_vs_amemthresh /
186 (sysctl_ip_vs_amemthresh-availmem);
187 sysctl_ip_vs_drop_packet = 2;
188 } else {
189 ip_vs_drop_rate = 0;
190 }
191 break;
192 case 2:
193 if (nomem) {
194 ip_vs_drop_rate = ip_vs_drop_counter
195 = sysctl_ip_vs_amemthresh /
196 (sysctl_ip_vs_amemthresh-availmem);
197 } else {
198 ip_vs_drop_rate = 0;
199 sysctl_ip_vs_drop_packet = 1;
200 }
201 break;
202 case 3:
203 ip_vs_drop_rate = sysctl_ip_vs_am_droprate;
204 break;
205 }
206 spin_unlock(&__ip_vs_droppacket_lock);
207
208 /* secure_tcp */
Simon Horman4f728162010-08-26 02:54:30 +0000209 spin_lock(&ip_vs_securetcp_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700210 switch (sysctl_ip_vs_secure_tcp) {
211 case 0:
212 if (old_secure_tcp >= 2)
213 to_change = 0;
214 break;
215 case 1:
216 if (nomem) {
217 if (old_secure_tcp < 2)
218 to_change = 1;
219 sysctl_ip_vs_secure_tcp = 2;
220 } else {
221 if (old_secure_tcp >= 2)
222 to_change = 0;
223 }
224 break;
225 case 2:
226 if (nomem) {
227 if (old_secure_tcp < 2)
228 to_change = 1;
229 } else {
230 if (old_secure_tcp >= 2)
231 to_change = 0;
232 sysctl_ip_vs_secure_tcp = 1;
233 }
234 break;
235 case 3:
236 if (old_secure_tcp < 2)
237 to_change = 1;
238 break;
239 }
240 old_secure_tcp = sysctl_ip_vs_secure_tcp;
241 if (to_change >= 0)
242 ip_vs_protocol_timeout_change(sysctl_ip_vs_secure_tcp>1);
Simon Horman4f728162010-08-26 02:54:30 +0000243 spin_unlock(&ip_vs_securetcp_lock);
Julian Anastasovaf9debd2005-07-11 20:59:57 -0700244
245 local_bh_enable();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700246}
247
248
249/*
250 * Timer for checking the defense
251 */
252#define DEFENSE_TIMER_PERIOD 1*HZ
David Howellsc4028952006-11-22 14:57:56 +0000253static void defense_work_handler(struct work_struct *work);
254static DECLARE_DELAYED_WORK(defense_work, defense_work_handler);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700255
David Howellsc4028952006-11-22 14:57:56 +0000256static void defense_work_handler(struct work_struct *work)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700257{
258 update_defense_level();
259 if (atomic_read(&ip_vs_dropentry))
260 ip_vs_random_dropentry();
261
262 schedule_delayed_work(&defense_work, DEFENSE_TIMER_PERIOD);
263}
264
265int
266ip_vs_use_count_inc(void)
267{
268 return try_module_get(THIS_MODULE);
269}
270
271void
272ip_vs_use_count_dec(void)
273{
274 module_put(THIS_MODULE);
275}
276
277
278/*
279 * Hash table: for virtual service lookups
280 */
281#define IP_VS_SVC_TAB_BITS 8
282#define IP_VS_SVC_TAB_SIZE (1 << IP_VS_SVC_TAB_BITS)
283#define IP_VS_SVC_TAB_MASK (IP_VS_SVC_TAB_SIZE - 1)
284
285/* the service table hashed by <protocol, addr, port> */
286static struct list_head ip_vs_svc_table[IP_VS_SVC_TAB_SIZE];
287/* the service table hashed by fwmark */
288static struct list_head ip_vs_svc_fwm_table[IP_VS_SVC_TAB_SIZE];
289
290/*
Linus Torvalds1da177e2005-04-16 15:20:36 -0700291 * Trash for destinations
292 */
293static LIST_HEAD(ip_vs_dest_trash);
294
295/*
296 * FTP & NULL virtual service counters
297 */
298static atomic_t ip_vs_ftpsvc_counter = ATOMIC_INIT(0);
299static atomic_t ip_vs_nullsvc_counter = ATOMIC_INIT(0);
300
301
302/*
303 * Returns hash value for virtual service
304 */
Hans Schillstromfc723252011-01-03 14:44:43 +0100305static inline unsigned
306ip_vs_svc_hashkey(struct net *net, int af, unsigned proto,
307 const union nf_inet_addr *addr, __be16 port)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700308{
309 register unsigned porth = ntohs(port);
Julius Volzb18610d2008-09-02 15:55:37 +0200310 __be32 addr_fold = addr->ip;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700311
Julius Volzb18610d2008-09-02 15:55:37 +0200312#ifdef CONFIG_IP_VS_IPV6
313 if (af == AF_INET6)
314 addr_fold = addr->ip6[0]^addr->ip6[1]^
315 addr->ip6[2]^addr->ip6[3];
316#endif
Hans Schillstromfc723252011-01-03 14:44:43 +0100317 addr_fold ^= ((size_t)net>>8);
Julius Volzb18610d2008-09-02 15:55:37 +0200318
319 return (proto^ntohl(addr_fold)^(porth>>IP_VS_SVC_TAB_BITS)^porth)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700320 & IP_VS_SVC_TAB_MASK;
321}
322
323/*
324 * Returns hash value of fwmark for virtual service lookup
325 */
Hans Schillstromfc723252011-01-03 14:44:43 +0100326static inline unsigned ip_vs_svc_fwm_hashkey(struct net *net, __u32 fwmark)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700327{
Hans Schillstromfc723252011-01-03 14:44:43 +0100328 return (((size_t)net>>8) ^ fwmark) & IP_VS_SVC_TAB_MASK;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700329}
330
331/*
Hans Schillstromfc723252011-01-03 14:44:43 +0100332 * Hashes a service in the ip_vs_svc_table by <netns,proto,addr,port>
Linus Torvalds1da177e2005-04-16 15:20:36 -0700333 * or in the ip_vs_svc_fwm_table by fwmark.
334 * Should be called with locked tables.
335 */
336static int ip_vs_svc_hash(struct ip_vs_service *svc)
337{
338 unsigned hash;
339
340 if (svc->flags & IP_VS_SVC_F_HASHED) {
Hannes Eder1e3e2382009-08-02 11:05:41 +0000341 pr_err("%s(): request for already hashed, called from %pF\n",
342 __func__, __builtin_return_address(0));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700343 return 0;
344 }
345
346 if (svc->fwmark == 0) {
347 /*
Hans Schillstromfc723252011-01-03 14:44:43 +0100348 * Hash it by <netns,protocol,addr,port> in ip_vs_svc_table
Linus Torvalds1da177e2005-04-16 15:20:36 -0700349 */
Hans Schillstromfc723252011-01-03 14:44:43 +0100350 hash = ip_vs_svc_hashkey(svc->net, svc->af, svc->protocol,
351 &svc->addr, svc->port);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700352 list_add(&svc->s_list, &ip_vs_svc_table[hash]);
353 } else {
354 /*
Hans Schillstromfc723252011-01-03 14:44:43 +0100355 * Hash it by fwmark in svc_fwm_table
Linus Torvalds1da177e2005-04-16 15:20:36 -0700356 */
Hans Schillstromfc723252011-01-03 14:44:43 +0100357 hash = ip_vs_svc_fwm_hashkey(svc->net, svc->fwmark);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700358 list_add(&svc->f_list, &ip_vs_svc_fwm_table[hash]);
359 }
360
361 svc->flags |= IP_VS_SVC_F_HASHED;
362 /* increase its refcnt because it is referenced by the svc table */
363 atomic_inc(&svc->refcnt);
364 return 1;
365}
366
367
368/*
Hans Schillstromfc723252011-01-03 14:44:43 +0100369 * Unhashes a service from svc_table / svc_fwm_table.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700370 * Should be called with locked tables.
371 */
372static int ip_vs_svc_unhash(struct ip_vs_service *svc)
373{
374 if (!(svc->flags & IP_VS_SVC_F_HASHED)) {
Hannes Eder1e3e2382009-08-02 11:05:41 +0000375 pr_err("%s(): request for unhash flagged, called from %pF\n",
376 __func__, __builtin_return_address(0));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700377 return 0;
378 }
379
380 if (svc->fwmark == 0) {
Hans Schillstromfc723252011-01-03 14:44:43 +0100381 /* Remove it from the svc_table table */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700382 list_del(&svc->s_list);
383 } else {
Hans Schillstromfc723252011-01-03 14:44:43 +0100384 /* Remove it from the svc_fwm_table table */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700385 list_del(&svc->f_list);
386 }
387
388 svc->flags &= ~IP_VS_SVC_F_HASHED;
389 atomic_dec(&svc->refcnt);
390 return 1;
391}
392
393
394/*
Hans Schillstromfc723252011-01-03 14:44:43 +0100395 * Get service by {netns, proto,addr,port} in the service table.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700396 */
Julius Volzb18610d2008-09-02 15:55:37 +0200397static inline struct ip_vs_service *
Hans Schillstromfc723252011-01-03 14:44:43 +0100398__ip_vs_service_find(struct net *net, int af, __u16 protocol,
399 const union nf_inet_addr *vaddr, __be16 vport)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700400{
401 unsigned hash;
402 struct ip_vs_service *svc;
403
404 /* Check for "full" addressed entries */
Hans Schillstromfc723252011-01-03 14:44:43 +0100405 hash = ip_vs_svc_hashkey(net, af, protocol, vaddr, vport);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700406
407 list_for_each_entry(svc, &ip_vs_svc_table[hash], s_list){
Julius Volzb18610d2008-09-02 15:55:37 +0200408 if ((svc->af == af)
409 && ip_vs_addr_equal(af, &svc->addr, vaddr)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700410 && (svc->port == vport)
Hans Schillstromfc723252011-01-03 14:44:43 +0100411 && (svc->protocol == protocol)
412 && net_eq(svc->net, net)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700413 /* HIT */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700414 return svc;
415 }
416 }
417
418 return NULL;
419}
420
421
422/*
423 * Get service by {fwmark} in the service table.
424 */
Julius Volzb18610d2008-09-02 15:55:37 +0200425static inline struct ip_vs_service *
Hans Schillstromfc723252011-01-03 14:44:43 +0100426__ip_vs_svc_fwm_find(struct net *net, int af, __u32 fwmark)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700427{
428 unsigned hash;
429 struct ip_vs_service *svc;
430
431 /* Check for fwmark addressed entries */
Hans Schillstromfc723252011-01-03 14:44:43 +0100432 hash = ip_vs_svc_fwm_hashkey(net, fwmark);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700433
434 list_for_each_entry(svc, &ip_vs_svc_fwm_table[hash], f_list) {
Hans Schillstromfc723252011-01-03 14:44:43 +0100435 if (svc->fwmark == fwmark && svc->af == af
436 && net_eq(svc->net, net)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700437 /* HIT */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700438 return svc;
439 }
440 }
441
442 return NULL;
443}
444
445struct ip_vs_service *
Hans Schillstromfc723252011-01-03 14:44:43 +0100446ip_vs_service_get(struct net *net, int af, __u32 fwmark, __u16 protocol,
Julius Volz3c2e0502008-09-02 15:55:38 +0200447 const union nf_inet_addr *vaddr, __be16 vport)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700448{
449 struct ip_vs_service *svc;
Julius Volz3c2e0502008-09-02 15:55:38 +0200450
Linus Torvalds1da177e2005-04-16 15:20:36 -0700451 read_lock(&__ip_vs_svc_lock);
452
453 /*
454 * Check the table hashed by fwmark first
455 */
Hans Schillstromfc723252011-01-03 14:44:43 +0100456 svc = __ip_vs_svc_fwm_find(net, af, fwmark);
457 if (fwmark && svc)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700458 goto out;
459
460 /*
461 * Check the table hashed by <protocol,addr,port>
462 * for "full" addressed entries
463 */
Hans Schillstromfc723252011-01-03 14:44:43 +0100464 svc = __ip_vs_service_find(net, af, protocol, vaddr, vport);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700465
466 if (svc == NULL
467 && protocol == IPPROTO_TCP
468 && atomic_read(&ip_vs_ftpsvc_counter)
469 && (vport == FTPDATA || ntohs(vport) >= PROT_SOCK)) {
470 /*
471 * Check if ftp service entry exists, the packet
472 * might belong to FTP data connections.
473 */
Hans Schillstromfc723252011-01-03 14:44:43 +0100474 svc = __ip_vs_service_find(net, af, protocol, vaddr, FTPPORT);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700475 }
476
477 if (svc == NULL
478 && atomic_read(&ip_vs_nullsvc_counter)) {
479 /*
480 * Check if the catch-all port (port zero) exists
481 */
Hans Schillstromfc723252011-01-03 14:44:43 +0100482 svc = __ip_vs_service_find(net, af, protocol, vaddr, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700483 }
484
485 out:
Julian Anastasov26c15cf2010-09-21 18:12:30 +0200486 if (svc)
487 atomic_inc(&svc->usecnt);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700488 read_unlock(&__ip_vs_svc_lock);
489
Julius Volz3c2e0502008-09-02 15:55:38 +0200490 IP_VS_DBG_BUF(9, "lookup service: fwm %u %s %s:%u %s\n",
491 fwmark, ip_vs_proto_name(protocol),
492 IP_VS_DBG_ADDR(af, vaddr), ntohs(vport),
493 svc ? "hit" : "not hit");
Linus Torvalds1da177e2005-04-16 15:20:36 -0700494
495 return svc;
496}
497
498
499static inline void
500__ip_vs_bind_svc(struct ip_vs_dest *dest, struct ip_vs_service *svc)
501{
502 atomic_inc(&svc->refcnt);
503 dest->svc = svc;
504}
505
Julian Anastasov26c15cf2010-09-21 18:12:30 +0200506static void
Linus Torvalds1da177e2005-04-16 15:20:36 -0700507__ip_vs_unbind_svc(struct ip_vs_dest *dest)
508{
509 struct ip_vs_service *svc = dest->svc;
510
511 dest->svc = NULL;
Julian Anastasov26c15cf2010-09-21 18:12:30 +0200512 if (atomic_dec_and_test(&svc->refcnt)) {
513 IP_VS_DBG_BUF(3, "Removing service %u/%s:%u usecnt=%d\n",
514 svc->fwmark,
515 IP_VS_DBG_ADDR(svc->af, &svc->addr),
516 ntohs(svc->port), atomic_read(&svc->usecnt));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700517 kfree(svc);
Julian Anastasov26c15cf2010-09-21 18:12:30 +0200518 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700519}
520
521
522/*
523 * Returns hash value for real service
524 */
Julius Volz7937df12008-09-02 15:55:48 +0200525static inline unsigned ip_vs_rs_hashkey(int af,
526 const union nf_inet_addr *addr,
527 __be16 port)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700528{
529 register unsigned porth = ntohs(port);
Julius Volz7937df12008-09-02 15:55:48 +0200530 __be32 addr_fold = addr->ip;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700531
Julius Volz7937df12008-09-02 15:55:48 +0200532#ifdef CONFIG_IP_VS_IPV6
533 if (af == AF_INET6)
534 addr_fold = addr->ip6[0]^addr->ip6[1]^
535 addr->ip6[2]^addr->ip6[3];
536#endif
537
538 return (ntohl(addr_fold)^(porth>>IP_VS_RTAB_BITS)^porth)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700539 & IP_VS_RTAB_MASK;
540}
541
542/*
Hans Schillstromfc723252011-01-03 14:44:43 +0100543 * Hashes ip_vs_dest in rs_table by <proto,addr,port>.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700544 * should be called with locked tables.
545 */
Hans Schillstromfc723252011-01-03 14:44:43 +0100546static int ip_vs_rs_hash(struct netns_ipvs *ipvs, struct ip_vs_dest *dest)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700547{
548 unsigned hash;
549
550 if (!list_empty(&dest->d_list)) {
551 return 0;
552 }
553
554 /*
555 * Hash by proto,addr,port,
556 * which are the parameters of the real service.
557 */
Julius Volz7937df12008-09-02 15:55:48 +0200558 hash = ip_vs_rs_hashkey(dest->af, &dest->addr, dest->port);
559
Hans Schillstromfc723252011-01-03 14:44:43 +0100560 list_add(&dest->d_list, &ipvs->rs_table[hash]);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700561
562 return 1;
563}
564
565/*
Hans Schillstromfc723252011-01-03 14:44:43 +0100566 * UNhashes ip_vs_dest from rs_table.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700567 * should be called with locked tables.
568 */
569static int ip_vs_rs_unhash(struct ip_vs_dest *dest)
570{
571 /*
Hans Schillstromfc723252011-01-03 14:44:43 +0100572 * Remove it from the rs_table table.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700573 */
574 if (!list_empty(&dest->d_list)) {
575 list_del(&dest->d_list);
576 INIT_LIST_HEAD(&dest->d_list);
577 }
578
579 return 1;
580}
581
582/*
583 * Lookup real service by <proto,addr,port> in the real service table.
584 */
585struct ip_vs_dest *
Hans Schillstromfc723252011-01-03 14:44:43 +0100586ip_vs_lookup_real_service(struct net *net, int af, __u16 protocol,
Julius Volz7937df12008-09-02 15:55:48 +0200587 const union nf_inet_addr *daddr,
588 __be16 dport)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700589{
Hans Schillstromfc723252011-01-03 14:44:43 +0100590 struct netns_ipvs *ipvs = net_ipvs(net);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700591 unsigned hash;
592 struct ip_vs_dest *dest;
593
594 /*
595 * Check for "full" addressed entries
596 * Return the first found entry
597 */
Julius Volz7937df12008-09-02 15:55:48 +0200598 hash = ip_vs_rs_hashkey(af, daddr, dport);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700599
600 read_lock(&__ip_vs_rs_lock);
Hans Schillstromfc723252011-01-03 14:44:43 +0100601 list_for_each_entry(dest, &ipvs->rs_table[hash], d_list) {
Julius Volz7937df12008-09-02 15:55:48 +0200602 if ((dest->af == af)
603 && ip_vs_addr_equal(af, &dest->addr, daddr)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700604 && (dest->port == dport)
605 && ((dest->protocol == protocol) ||
606 dest->vfwmark)) {
607 /* HIT */
608 read_unlock(&__ip_vs_rs_lock);
609 return dest;
610 }
611 }
612 read_unlock(&__ip_vs_rs_lock);
613
614 return NULL;
615}
616
617/*
618 * Lookup destination by {addr,port} in the given service
619 */
620static struct ip_vs_dest *
Julius Volz7937df12008-09-02 15:55:48 +0200621ip_vs_lookup_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
622 __be16 dport)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700623{
624 struct ip_vs_dest *dest;
625
626 /*
627 * Find the destination for the given service
628 */
629 list_for_each_entry(dest, &svc->destinations, n_list) {
Julius Volz7937df12008-09-02 15:55:48 +0200630 if ((dest->af == svc->af)
631 && ip_vs_addr_equal(svc->af, &dest->addr, daddr)
632 && (dest->port == dport)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700633 /* HIT */
634 return dest;
635 }
636 }
637
638 return NULL;
639}
640
Rumen G. Bogdanovski1e356f92007-11-07 02:35:54 -0800641/*
642 * Find destination by {daddr,dport,vaddr,protocol}
643 * Cretaed to be used in ip_vs_process_message() in
644 * the backup synchronization daemon. It finds the
645 * destination to be bound to the received connection
646 * on the backup.
647 *
648 * ip_vs_lookup_real_service() looked promissing, but
649 * seems not working as expected.
650 */
Hans Schillstromfc723252011-01-03 14:44:43 +0100651struct ip_vs_dest *ip_vs_find_dest(struct net *net, int af,
652 const union nf_inet_addr *daddr,
Julius Volz7937df12008-09-02 15:55:48 +0200653 __be16 dport,
654 const union nf_inet_addr *vaddr,
Hans Schillstrom0e051e62010-11-19 14:25:07 +0100655 __be16 vport, __u16 protocol, __u32 fwmark)
Rumen G. Bogdanovski1e356f92007-11-07 02:35:54 -0800656{
657 struct ip_vs_dest *dest;
658 struct ip_vs_service *svc;
659
Hans Schillstromfc723252011-01-03 14:44:43 +0100660 svc = ip_vs_service_get(net, af, fwmark, protocol, vaddr, vport);
Rumen G. Bogdanovski1e356f92007-11-07 02:35:54 -0800661 if (!svc)
662 return NULL;
663 dest = ip_vs_lookup_dest(svc, daddr, dport);
664 if (dest)
665 atomic_inc(&dest->refcnt);
666 ip_vs_service_put(svc);
667 return dest;
668}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700669
670/*
671 * Lookup dest by {svc,addr,port} in the destination trash.
672 * The destination trash is used to hold the destinations that are removed
673 * from the service table but are still referenced by some conn entries.
674 * The reason to add the destination trash is when the dest is temporary
675 * down (either by administrator or by monitor program), the dest can be
676 * picked back from the trash, the remaining connections to the dest can
677 * continue, and the counting information of the dest is also useful for
678 * scheduling.
679 */
680static struct ip_vs_dest *
Julius Volz7937df12008-09-02 15:55:48 +0200681ip_vs_trash_get_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
682 __be16 dport)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700683{
684 struct ip_vs_dest *dest, *nxt;
685
686 /*
687 * Find the destination in trash
688 */
689 list_for_each_entry_safe(dest, nxt, &ip_vs_dest_trash, n_list) {
Julius Volz7937df12008-09-02 15:55:48 +0200690 IP_VS_DBG_BUF(3, "Destination %u/%s:%u still in trash, "
691 "dest->refcnt=%d\n",
692 dest->vfwmark,
693 IP_VS_DBG_ADDR(svc->af, &dest->addr),
694 ntohs(dest->port),
695 atomic_read(&dest->refcnt));
696 if (dest->af == svc->af &&
697 ip_vs_addr_equal(svc->af, &dest->addr, daddr) &&
Linus Torvalds1da177e2005-04-16 15:20:36 -0700698 dest->port == dport &&
699 dest->vfwmark == svc->fwmark &&
700 dest->protocol == svc->protocol &&
701 (svc->fwmark ||
Julius Volz7937df12008-09-02 15:55:48 +0200702 (ip_vs_addr_equal(svc->af, &dest->vaddr, &svc->addr) &&
Linus Torvalds1da177e2005-04-16 15:20:36 -0700703 dest->vport == svc->port))) {
704 /* HIT */
705 return dest;
706 }
707
708 /*
709 * Try to purge the destination from trash if not referenced
710 */
711 if (atomic_read(&dest->refcnt) == 1) {
Julius Volz7937df12008-09-02 15:55:48 +0200712 IP_VS_DBG_BUF(3, "Removing destination %u/%s:%u "
713 "from trash\n",
714 dest->vfwmark,
715 IP_VS_DBG_ADDR(svc->af, &dest->addr),
716 ntohs(dest->port));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700717 list_del(&dest->n_list);
718 ip_vs_dst_reset(dest);
719 __ip_vs_unbind_svc(dest);
720 kfree(dest);
721 }
722 }
723
724 return NULL;
725}
726
727
728/*
729 * Clean up all the destinations in the trash
730 * Called by the ip_vs_control_cleanup()
731 *
732 * When the ip_vs_control_clearup is activated by ipvs module exit,
733 * the service tables must have been flushed and all the connections
734 * are expired, and the refcnt of each destination in the trash must
735 * be 1, so we simply release them here.
736 */
737static void ip_vs_trash_cleanup(void)
738{
739 struct ip_vs_dest *dest, *nxt;
740
741 list_for_each_entry_safe(dest, nxt, &ip_vs_dest_trash, n_list) {
742 list_del(&dest->n_list);
743 ip_vs_dst_reset(dest);
744 __ip_vs_unbind_svc(dest);
745 kfree(dest);
746 }
747}
748
749
750static void
751ip_vs_zero_stats(struct ip_vs_stats *stats)
752{
753 spin_lock_bh(&stats->lock);
Simon Hormane93615d2008-08-11 17:19:14 +1000754
Sven Wegenere9c0ce22008-09-08 13:39:04 +0200755 memset(&stats->ustats, 0, sizeof(stats->ustats));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700756 ip_vs_zero_estimator(stats);
Simon Hormane93615d2008-08-11 17:19:14 +1000757
Sven Wegener3a14a3132008-08-10 18:24:41 +0000758 spin_unlock_bh(&stats->lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700759}
760
761/*
762 * Update a destination in the given service
763 */
764static void
Julian Anastasov26c15cf2010-09-21 18:12:30 +0200765__ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest,
766 struct ip_vs_dest_user_kern *udest, int add)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700767{
Hans Schillstromfc723252011-01-03 14:44:43 +0100768 struct netns_ipvs *ipvs = net_ipvs(svc->net);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700769 int conn_flags;
770
771 /* set the weight and the flags */
772 atomic_set(&dest->weight, udest->weight);
Julian Anastasov35757922010-09-17 14:18:16 +0200773 conn_flags = udest->conn_flags & IP_VS_CONN_F_DEST_MASK;
774 conn_flags |= IP_VS_CONN_F_INACTIVE;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700775
Linus Torvalds1da177e2005-04-16 15:20:36 -0700776 /* set the IP_VS_CONN_F_NOOUTPUT flag if not masquerading/NAT */
Julian Anastasov35757922010-09-17 14:18:16 +0200777 if ((conn_flags & IP_VS_CONN_F_FWD_MASK) != IP_VS_CONN_F_MASQ) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700778 conn_flags |= IP_VS_CONN_F_NOOUTPUT;
779 } else {
780 /*
Hans Schillstromfc723252011-01-03 14:44:43 +0100781 * Put the real service in rs_table if not present.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700782 * For now only for NAT!
783 */
784 write_lock_bh(&__ip_vs_rs_lock);
Hans Schillstromfc723252011-01-03 14:44:43 +0100785 ip_vs_rs_hash(ipvs, dest);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700786 write_unlock_bh(&__ip_vs_rs_lock);
787 }
788 atomic_set(&dest->conn_flags, conn_flags);
789
790 /* bind the service */
791 if (!dest->svc) {
792 __ip_vs_bind_svc(dest, svc);
793 } else {
794 if (dest->svc != svc) {
795 __ip_vs_unbind_svc(dest);
796 ip_vs_zero_stats(&dest->stats);
797 __ip_vs_bind_svc(dest, svc);
798 }
799 }
800
801 /* set the dest status flags */
802 dest->flags |= IP_VS_DEST_F_AVAILABLE;
803
804 if (udest->u_threshold == 0 || udest->u_threshold > dest->u_threshold)
805 dest->flags &= ~IP_VS_DEST_F_OVERLOAD;
806 dest->u_threshold = udest->u_threshold;
807 dest->l_threshold = udest->l_threshold;
Julian Anastasov26c15cf2010-09-21 18:12:30 +0200808
Julian Anastasovfc604762010-10-17 16:38:15 +0300809 spin_lock(&dest->dst_lock);
810 ip_vs_dst_reset(dest);
811 spin_unlock(&dest->dst_lock);
812
Julian Anastasov26c15cf2010-09-21 18:12:30 +0200813 if (add)
814 ip_vs_new_estimator(&dest->stats);
815
816 write_lock_bh(&__ip_vs_svc_lock);
817
818 /* Wait until all other svc users go away */
819 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
820
821 if (add) {
822 list_add(&dest->n_list, &svc->destinations);
823 svc->num_dests++;
824 }
825
826 /* call the update_service, because server weight may be changed */
827 if (svc->scheduler->update_service)
828 svc->scheduler->update_service(svc);
829
830 write_unlock_bh(&__ip_vs_svc_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700831}
832
833
834/*
835 * Create a destination for the given service
836 */
837static int
Julius Volzc860c6b2008-09-02 15:55:36 +0200838ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700839 struct ip_vs_dest **dest_p)
840{
841 struct ip_vs_dest *dest;
842 unsigned atype;
843
844 EnterFunction(2);
845
Vince Busam09571c72008-09-02 15:55:52 +0200846#ifdef CONFIG_IP_VS_IPV6
847 if (svc->af == AF_INET6) {
848 atype = ipv6_addr_type(&udest->addr.in6);
Sven Wegener3bfb92f2008-09-05 16:53:49 +0200849 if ((!(atype & IPV6_ADDR_UNICAST) ||
850 atype & IPV6_ADDR_LINKLOCAL) &&
Vince Busam09571c72008-09-02 15:55:52 +0200851 !__ip_vs_addr_is_local_v6(&udest->addr.in6))
852 return -EINVAL;
853 } else
854#endif
855 {
856 atype = inet_addr_type(&init_net, udest->addr.ip);
857 if (atype != RTN_LOCAL && atype != RTN_UNICAST)
858 return -EINVAL;
859 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700860
Simon Hormandee06e42010-08-26 02:54:31 +0000861 dest = kzalloc(sizeof(struct ip_vs_dest), GFP_KERNEL);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700862 if (dest == NULL) {
Hannes Eder1e3e2382009-08-02 11:05:41 +0000863 pr_err("%s(): no memory.\n", __func__);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700864 return -ENOMEM;
865 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700866
Julius Volzc860c6b2008-09-02 15:55:36 +0200867 dest->af = svc->af;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700868 dest->protocol = svc->protocol;
Julius Volzc860c6b2008-09-02 15:55:36 +0200869 dest->vaddr = svc->addr;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700870 dest->vport = svc->port;
871 dest->vfwmark = svc->fwmark;
Julius Volzc860c6b2008-09-02 15:55:36 +0200872 ip_vs_addr_copy(svc->af, &dest->addr, &udest->addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700873 dest->port = udest->port;
874
875 atomic_set(&dest->activeconns, 0);
876 atomic_set(&dest->inactconns, 0);
877 atomic_set(&dest->persistconns, 0);
Julian Anastasov26c15cf2010-09-21 18:12:30 +0200878 atomic_set(&dest->refcnt, 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700879
880 INIT_LIST_HEAD(&dest->d_list);
881 spin_lock_init(&dest->dst_lock);
882 spin_lock_init(&dest->stats.lock);
Julian Anastasov26c15cf2010-09-21 18:12:30 +0200883 __ip_vs_update_dest(svc, dest, udest, 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700884
885 *dest_p = dest;
886
887 LeaveFunction(2);
888 return 0;
889}
890
891
892/*
893 * Add a destination into an existing service
894 */
895static int
Julius Volzc860c6b2008-09-02 15:55:36 +0200896ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700897{
898 struct ip_vs_dest *dest;
Julius Volzc860c6b2008-09-02 15:55:36 +0200899 union nf_inet_addr daddr;
Al Viro014d7302006-09-28 14:29:52 -0700900 __be16 dport = udest->port;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700901 int ret;
902
903 EnterFunction(2);
904
905 if (udest->weight < 0) {
Hannes Eder1e3e2382009-08-02 11:05:41 +0000906 pr_err("%s(): server weight less than zero\n", __func__);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700907 return -ERANGE;
908 }
909
910 if (udest->l_threshold > udest->u_threshold) {
Hannes Eder1e3e2382009-08-02 11:05:41 +0000911 pr_err("%s(): lower threshold is higher than upper threshold\n",
912 __func__);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700913 return -ERANGE;
914 }
915
Julius Volzc860c6b2008-09-02 15:55:36 +0200916 ip_vs_addr_copy(svc->af, &daddr, &udest->addr);
917
Linus Torvalds1da177e2005-04-16 15:20:36 -0700918 /*
919 * Check if the dest already exists in the list
920 */
Julius Volz7937df12008-09-02 15:55:48 +0200921 dest = ip_vs_lookup_dest(svc, &daddr, dport);
922
Linus Torvalds1da177e2005-04-16 15:20:36 -0700923 if (dest != NULL) {
Hannes Eder1e3e2382009-08-02 11:05:41 +0000924 IP_VS_DBG(1, "%s(): dest already exists\n", __func__);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700925 return -EEXIST;
926 }
927
928 /*
929 * Check if the dest already exists in the trash and
930 * is from the same service
931 */
Julius Volz7937df12008-09-02 15:55:48 +0200932 dest = ip_vs_trash_get_dest(svc, &daddr, dport);
933
Linus Torvalds1da177e2005-04-16 15:20:36 -0700934 if (dest != NULL) {
Julius Volzcfc78c52008-09-02 15:55:53 +0200935 IP_VS_DBG_BUF(3, "Get destination %s:%u from trash, "
936 "dest->refcnt=%d, service %u/%s:%u\n",
937 IP_VS_DBG_ADDR(svc->af, &daddr), ntohs(dport),
938 atomic_read(&dest->refcnt),
939 dest->vfwmark,
940 IP_VS_DBG_ADDR(svc->af, &dest->vaddr),
941 ntohs(dest->vport));
942
Linus Torvalds1da177e2005-04-16 15:20:36 -0700943 /*
944 * Get the destination from the trash
945 */
946 list_del(&dest->n_list);
947
Julian Anastasov26c15cf2010-09-21 18:12:30 +0200948 __ip_vs_update_dest(svc, dest, udest, 1);
949 ret = 0;
950 } else {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700951 /*
Julian Anastasov26c15cf2010-09-21 18:12:30 +0200952 * Allocate and initialize the dest structure
Linus Torvalds1da177e2005-04-16 15:20:36 -0700953 */
Julian Anastasov26c15cf2010-09-21 18:12:30 +0200954 ret = ip_vs_new_dest(svc, udest, &dest);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700955 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700956 LeaveFunction(2);
957
Julian Anastasov26c15cf2010-09-21 18:12:30 +0200958 return ret;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700959}
960
961
962/*
963 * Edit a destination in the given service
964 */
965static int
Julius Volzc860c6b2008-09-02 15:55:36 +0200966ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700967{
968 struct ip_vs_dest *dest;
Julius Volzc860c6b2008-09-02 15:55:36 +0200969 union nf_inet_addr daddr;
Al Viro014d7302006-09-28 14:29:52 -0700970 __be16 dport = udest->port;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700971
972 EnterFunction(2);
973
974 if (udest->weight < 0) {
Hannes Eder1e3e2382009-08-02 11:05:41 +0000975 pr_err("%s(): server weight less than zero\n", __func__);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700976 return -ERANGE;
977 }
978
979 if (udest->l_threshold > udest->u_threshold) {
Hannes Eder1e3e2382009-08-02 11:05:41 +0000980 pr_err("%s(): lower threshold is higher than upper threshold\n",
981 __func__);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700982 return -ERANGE;
983 }
984
Julius Volzc860c6b2008-09-02 15:55:36 +0200985 ip_vs_addr_copy(svc->af, &daddr, &udest->addr);
986
Linus Torvalds1da177e2005-04-16 15:20:36 -0700987 /*
988 * Lookup the destination list
989 */
Julius Volz7937df12008-09-02 15:55:48 +0200990 dest = ip_vs_lookup_dest(svc, &daddr, dport);
991
Linus Torvalds1da177e2005-04-16 15:20:36 -0700992 if (dest == NULL) {
Hannes Eder1e3e2382009-08-02 11:05:41 +0000993 IP_VS_DBG(1, "%s(): dest doesn't exist\n", __func__);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700994 return -ENOENT;
995 }
996
Julian Anastasov26c15cf2010-09-21 18:12:30 +0200997 __ip_vs_update_dest(svc, dest, udest, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700998 LeaveFunction(2);
999
1000 return 0;
1001}
1002
1003
1004/*
1005 * Delete a destination (must be already unlinked from the service)
1006 */
1007static void __ip_vs_del_dest(struct ip_vs_dest *dest)
1008{
1009 ip_vs_kill_estimator(&dest->stats);
1010
1011 /*
1012 * Remove it from the d-linked list with the real services.
1013 */
1014 write_lock_bh(&__ip_vs_rs_lock);
1015 ip_vs_rs_unhash(dest);
1016 write_unlock_bh(&__ip_vs_rs_lock);
1017
1018 /*
1019 * Decrease the refcnt of the dest, and free the dest
1020 * if nobody refers to it (refcnt=0). Otherwise, throw
1021 * the destination into the trash.
1022 */
1023 if (atomic_dec_and_test(&dest->refcnt)) {
Julian Anastasov26c15cf2010-09-21 18:12:30 +02001024 IP_VS_DBG_BUF(3, "Removing destination %u/%s:%u\n",
1025 dest->vfwmark,
1026 IP_VS_DBG_ADDR(dest->af, &dest->addr),
1027 ntohs(dest->port));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001028 ip_vs_dst_reset(dest);
1029 /* simply decrease svc->refcnt here, let the caller check
1030 and release the service if nobody refers to it.
1031 Only user context can release destination and service,
1032 and only one user context can update virtual service at a
1033 time, so the operation here is OK */
1034 atomic_dec(&dest->svc->refcnt);
1035 kfree(dest);
1036 } else {
Julius Volzcfc78c52008-09-02 15:55:53 +02001037 IP_VS_DBG_BUF(3, "Moving dest %s:%u into trash, "
1038 "dest->refcnt=%d\n",
1039 IP_VS_DBG_ADDR(dest->af, &dest->addr),
1040 ntohs(dest->port),
1041 atomic_read(&dest->refcnt));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001042 list_add(&dest->n_list, &ip_vs_dest_trash);
1043 atomic_inc(&dest->refcnt);
1044 }
1045}
1046
1047
1048/*
1049 * Unlink a destination from the given service
1050 */
1051static void __ip_vs_unlink_dest(struct ip_vs_service *svc,
1052 struct ip_vs_dest *dest,
1053 int svcupd)
1054{
1055 dest->flags &= ~IP_VS_DEST_F_AVAILABLE;
1056
1057 /*
1058 * Remove it from the d-linked destination list.
1059 */
1060 list_del(&dest->n_list);
1061 svc->num_dests--;
Sven Wegener82dfb6f2008-08-11 19:36:06 +00001062
1063 /*
1064 * Call the update_service function of its scheduler
1065 */
1066 if (svcupd && svc->scheduler->update_service)
1067 svc->scheduler->update_service(svc);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001068}
1069
1070
1071/*
1072 * Delete a destination server in the given service
1073 */
1074static int
Julius Volzc860c6b2008-09-02 15:55:36 +02001075ip_vs_del_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001076{
1077 struct ip_vs_dest *dest;
Al Viro014d7302006-09-28 14:29:52 -07001078 __be16 dport = udest->port;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001079
1080 EnterFunction(2);
1081
Julius Volz7937df12008-09-02 15:55:48 +02001082 dest = ip_vs_lookup_dest(svc, &udest->addr, dport);
Julius Volzc860c6b2008-09-02 15:55:36 +02001083
Linus Torvalds1da177e2005-04-16 15:20:36 -07001084 if (dest == NULL) {
Hannes Eder1e3e2382009-08-02 11:05:41 +00001085 IP_VS_DBG(1, "%s(): destination not found!\n", __func__);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001086 return -ENOENT;
1087 }
1088
1089 write_lock_bh(&__ip_vs_svc_lock);
1090
1091 /*
1092 * Wait until all other svc users go away.
1093 */
Julian Anastasov26c15cf2010-09-21 18:12:30 +02001094 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001095
1096 /*
1097 * Unlink dest from the service
1098 */
1099 __ip_vs_unlink_dest(svc, dest, 1);
1100
1101 write_unlock_bh(&__ip_vs_svc_lock);
1102
1103 /*
1104 * Delete the destination
1105 */
1106 __ip_vs_del_dest(dest);
1107
1108 LeaveFunction(2);
1109
1110 return 0;
1111}
1112
1113
1114/*
1115 * Add a service into the service hash table
1116 */
1117static int
Hans Schillstromfc723252011-01-03 14:44:43 +01001118ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u,
Julius Volzc860c6b2008-09-02 15:55:36 +02001119 struct ip_vs_service **svc_p)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001120{
1121 int ret = 0;
1122 struct ip_vs_scheduler *sched = NULL;
Simon Horman0d1e71b2010-08-22 21:37:54 +09001123 struct ip_vs_pe *pe = NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001124 struct ip_vs_service *svc = NULL;
1125
1126 /* increase the module use count */
1127 ip_vs_use_count_inc();
1128
1129 /* Lookup the scheduler by 'u->sched_name' */
1130 sched = ip_vs_scheduler_get(u->sched_name);
1131 if (sched == NULL) {
Hannes Eder1e3e2382009-08-02 11:05:41 +00001132 pr_info("Scheduler module ip_vs_%s not found\n", u->sched_name);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001133 ret = -ENOENT;
Simon Horman6e08bfb2010-08-22 21:37:52 +09001134 goto out_err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001135 }
1136
Simon Horman0d1e71b2010-08-22 21:37:54 +09001137 if (u->pe_name && *u->pe_name) {
Simon Hormane9e5eee2010-11-08 20:05:57 +09001138 pe = ip_vs_pe_getbyname(u->pe_name);
Simon Horman0d1e71b2010-08-22 21:37:54 +09001139 if (pe == NULL) {
1140 pr_info("persistence engine module ip_vs_pe_%s "
1141 "not found\n", u->pe_name);
1142 ret = -ENOENT;
1143 goto out_err;
1144 }
1145 }
1146
Julius Volzf94fd042008-09-02 15:55:55 +02001147#ifdef CONFIG_IP_VS_IPV6
Julius Volz48148932008-11-03 17:08:56 -08001148 if (u->af == AF_INET6 && (u->netmask < 1 || u->netmask > 128)) {
1149 ret = -EINVAL;
1150 goto out_err;
Julius Volzf94fd042008-09-02 15:55:55 +02001151 }
1152#endif
1153
Simon Hormandee06e42010-08-26 02:54:31 +00001154 svc = kzalloc(sizeof(struct ip_vs_service), GFP_KERNEL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001155 if (svc == NULL) {
Hannes Eder1e3e2382009-08-02 11:05:41 +00001156 IP_VS_DBG(1, "%s(): no memory\n", __func__);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001157 ret = -ENOMEM;
1158 goto out_err;
1159 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001160
1161 /* I'm the first user of the service */
Julian Anastasov26c15cf2010-09-21 18:12:30 +02001162 atomic_set(&svc->usecnt, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001163 atomic_set(&svc->refcnt, 0);
1164
Julius Volzc860c6b2008-09-02 15:55:36 +02001165 svc->af = u->af;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001166 svc->protocol = u->protocol;
Julius Volzc860c6b2008-09-02 15:55:36 +02001167 ip_vs_addr_copy(svc->af, &svc->addr, &u->addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001168 svc->port = u->port;
1169 svc->fwmark = u->fwmark;
1170 svc->flags = u->flags;
1171 svc->timeout = u->timeout * HZ;
1172 svc->netmask = u->netmask;
Hans Schillstromfc723252011-01-03 14:44:43 +01001173 svc->net = net;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001174
1175 INIT_LIST_HEAD(&svc->destinations);
1176 rwlock_init(&svc->sched_lock);
1177 spin_lock_init(&svc->stats.lock);
1178
1179 /* Bind the scheduler */
1180 ret = ip_vs_bind_scheduler(svc, sched);
1181 if (ret)
1182 goto out_err;
1183 sched = NULL;
1184
Simon Horman0d1e71b2010-08-22 21:37:54 +09001185 /* Bind the ct retriever */
1186 ip_vs_bind_pe(svc, pe);
1187 pe = NULL;
1188
Linus Torvalds1da177e2005-04-16 15:20:36 -07001189 /* Update the virtual service counters */
1190 if (svc->port == FTPPORT)
1191 atomic_inc(&ip_vs_ftpsvc_counter);
1192 else if (svc->port == 0)
1193 atomic_inc(&ip_vs_nullsvc_counter);
1194
1195 ip_vs_new_estimator(&svc->stats);
Julius Volzf94fd042008-09-02 15:55:55 +02001196
1197 /* Count only IPv4 services for old get/setsockopt interface */
1198 if (svc->af == AF_INET)
1199 ip_vs_num_services++;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001200
1201 /* Hash the service into the service table */
1202 write_lock_bh(&__ip_vs_svc_lock);
1203 ip_vs_svc_hash(svc);
1204 write_unlock_bh(&__ip_vs_svc_lock);
1205
1206 *svc_p = svc;
1207 return 0;
1208
Simon Horman6e08bfb2010-08-22 21:37:52 +09001209 out_err:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001210 if (svc != NULL) {
Simon Horman2fabf352010-08-22 21:37:52 +09001211 ip_vs_unbind_scheduler(svc);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001212 if (svc->inc) {
1213 local_bh_disable();
1214 ip_vs_app_inc_put(svc->inc);
1215 local_bh_enable();
1216 }
1217 kfree(svc);
1218 }
1219 ip_vs_scheduler_put(sched);
Simon Horman0d1e71b2010-08-22 21:37:54 +09001220 ip_vs_pe_put(pe);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001221
Linus Torvalds1da177e2005-04-16 15:20:36 -07001222 /* decrease the module use count */
1223 ip_vs_use_count_dec();
1224
1225 return ret;
1226}
1227
1228
1229/*
1230 * Edit a service and bind it with a new scheduler
1231 */
1232static int
Julius Volzc860c6b2008-09-02 15:55:36 +02001233ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001234{
1235 struct ip_vs_scheduler *sched, *old_sched;
Simon Horman0d1e71b2010-08-22 21:37:54 +09001236 struct ip_vs_pe *pe = NULL, *old_pe = NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001237 int ret = 0;
1238
1239 /*
1240 * Lookup the scheduler, by 'u->sched_name'
1241 */
1242 sched = ip_vs_scheduler_get(u->sched_name);
1243 if (sched == NULL) {
Hannes Eder1e3e2382009-08-02 11:05:41 +00001244 pr_info("Scheduler module ip_vs_%s not found\n", u->sched_name);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001245 return -ENOENT;
1246 }
1247 old_sched = sched;
1248
Simon Horman0d1e71b2010-08-22 21:37:54 +09001249 if (u->pe_name && *u->pe_name) {
Simon Hormane9e5eee2010-11-08 20:05:57 +09001250 pe = ip_vs_pe_getbyname(u->pe_name);
Simon Horman0d1e71b2010-08-22 21:37:54 +09001251 if (pe == NULL) {
1252 pr_info("persistence engine module ip_vs_pe_%s "
1253 "not found\n", u->pe_name);
1254 ret = -ENOENT;
1255 goto out;
1256 }
1257 old_pe = pe;
1258 }
1259
Julius Volzf94fd042008-09-02 15:55:55 +02001260#ifdef CONFIG_IP_VS_IPV6
Julius Volz48148932008-11-03 17:08:56 -08001261 if (u->af == AF_INET6 && (u->netmask < 1 || u->netmask > 128)) {
1262 ret = -EINVAL;
1263 goto out;
Julius Volzf94fd042008-09-02 15:55:55 +02001264 }
1265#endif
1266
Linus Torvalds1da177e2005-04-16 15:20:36 -07001267 write_lock_bh(&__ip_vs_svc_lock);
1268
1269 /*
1270 * Wait until all other svc users go away.
1271 */
Julian Anastasov26c15cf2010-09-21 18:12:30 +02001272 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001273
1274 /*
1275 * Set the flags and timeout value
1276 */
1277 svc->flags = u->flags | IP_VS_SVC_F_HASHED;
1278 svc->timeout = u->timeout * HZ;
1279 svc->netmask = u->netmask;
1280
1281 old_sched = svc->scheduler;
1282 if (sched != old_sched) {
1283 /*
1284 * Unbind the old scheduler
1285 */
1286 if ((ret = ip_vs_unbind_scheduler(svc))) {
1287 old_sched = sched;
Simon Horman9e691ed2008-09-17 10:10:41 +10001288 goto out_unlock;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001289 }
1290
1291 /*
1292 * Bind the new scheduler
1293 */
1294 if ((ret = ip_vs_bind_scheduler(svc, sched))) {
1295 /*
1296 * If ip_vs_bind_scheduler fails, restore the old
1297 * scheduler.
1298 * The main reason of failure is out of memory.
1299 *
1300 * The question is if the old scheduler can be
1301 * restored all the time. TODO: if it cannot be
1302 * restored some time, we must delete the service,
1303 * otherwise the system may crash.
1304 */
1305 ip_vs_bind_scheduler(svc, old_sched);
1306 old_sched = sched;
Simon Horman9e691ed2008-09-17 10:10:41 +10001307 goto out_unlock;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001308 }
1309 }
1310
Simon Horman0d1e71b2010-08-22 21:37:54 +09001311 old_pe = svc->pe;
1312 if (pe != old_pe) {
1313 ip_vs_unbind_pe(svc);
1314 ip_vs_bind_pe(svc, pe);
1315 }
1316
Simon Horman9e691ed2008-09-17 10:10:41 +10001317 out_unlock:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001318 write_unlock_bh(&__ip_vs_svc_lock);
Simon Horman9e691ed2008-09-17 10:10:41 +10001319 out:
Simon Horman6e08bfb2010-08-22 21:37:52 +09001320 ip_vs_scheduler_put(old_sched);
Simon Horman0d1e71b2010-08-22 21:37:54 +09001321 ip_vs_pe_put(old_pe);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001322 return ret;
1323}
1324
1325
1326/*
1327 * Delete a service from the service list
1328 * - The service must be unlinked, unlocked and not referenced!
1329 * - We are called under _bh lock
1330 */
1331static void __ip_vs_del_service(struct ip_vs_service *svc)
1332{
1333 struct ip_vs_dest *dest, *nxt;
1334 struct ip_vs_scheduler *old_sched;
Simon Horman0d1e71b2010-08-22 21:37:54 +09001335 struct ip_vs_pe *old_pe;
1336
1337 pr_info("%s: enter\n", __func__);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001338
Julius Volzf94fd042008-09-02 15:55:55 +02001339 /* Count only IPv4 services for old get/setsockopt interface */
1340 if (svc->af == AF_INET)
1341 ip_vs_num_services--;
1342
Linus Torvalds1da177e2005-04-16 15:20:36 -07001343 ip_vs_kill_estimator(&svc->stats);
1344
1345 /* Unbind scheduler */
1346 old_sched = svc->scheduler;
1347 ip_vs_unbind_scheduler(svc);
Simon Horman6e08bfb2010-08-22 21:37:52 +09001348 ip_vs_scheduler_put(old_sched);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001349
Simon Horman0d1e71b2010-08-22 21:37:54 +09001350 /* Unbind persistence engine */
1351 old_pe = svc->pe;
1352 ip_vs_unbind_pe(svc);
1353 ip_vs_pe_put(old_pe);
1354
Linus Torvalds1da177e2005-04-16 15:20:36 -07001355 /* Unbind app inc */
1356 if (svc->inc) {
1357 ip_vs_app_inc_put(svc->inc);
1358 svc->inc = NULL;
1359 }
1360
1361 /*
1362 * Unlink the whole destination list
1363 */
1364 list_for_each_entry_safe(dest, nxt, &svc->destinations, n_list) {
1365 __ip_vs_unlink_dest(svc, dest, 0);
1366 __ip_vs_del_dest(dest);
1367 }
1368
1369 /*
1370 * Update the virtual service counters
1371 */
1372 if (svc->port == FTPPORT)
1373 atomic_dec(&ip_vs_ftpsvc_counter);
1374 else if (svc->port == 0)
1375 atomic_dec(&ip_vs_nullsvc_counter);
1376
1377 /*
1378 * Free the service if nobody refers to it
1379 */
Julian Anastasov26c15cf2010-09-21 18:12:30 +02001380 if (atomic_read(&svc->refcnt) == 0) {
1381 IP_VS_DBG_BUF(3, "Removing service %u/%s:%u usecnt=%d\n",
1382 svc->fwmark,
1383 IP_VS_DBG_ADDR(svc->af, &svc->addr),
1384 ntohs(svc->port), atomic_read(&svc->usecnt));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001385 kfree(svc);
Julian Anastasov26c15cf2010-09-21 18:12:30 +02001386 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001387
1388 /* decrease the module use count */
1389 ip_vs_use_count_dec();
1390}
1391
1392/*
Julian Anastasov26c15cf2010-09-21 18:12:30 +02001393 * Unlink a service from list and try to delete it if its refcnt reached 0
Linus Torvalds1da177e2005-04-16 15:20:36 -07001394 */
Julian Anastasov26c15cf2010-09-21 18:12:30 +02001395static void ip_vs_unlink_service(struct ip_vs_service *svc)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001396{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001397 /*
1398 * Unhash it from the service table
1399 */
1400 write_lock_bh(&__ip_vs_svc_lock);
1401
1402 ip_vs_svc_unhash(svc);
1403
1404 /*
1405 * Wait until all the svc users go away.
1406 */
Julian Anastasov26c15cf2010-09-21 18:12:30 +02001407 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001408
1409 __ip_vs_del_service(svc);
1410
1411 write_unlock_bh(&__ip_vs_svc_lock);
Julian Anastasov26c15cf2010-09-21 18:12:30 +02001412}
1413
1414/*
1415 * Delete a service from the service list
1416 */
1417static int ip_vs_del_service(struct ip_vs_service *svc)
1418{
1419 if (svc == NULL)
1420 return -EEXIST;
1421 ip_vs_unlink_service(svc);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001422
1423 return 0;
1424}
1425
1426
1427/*
1428 * Flush all the virtual services
1429 */
Hans Schillstromfc723252011-01-03 14:44:43 +01001430static int ip_vs_flush(struct net *net)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001431{
1432 int idx;
1433 struct ip_vs_service *svc, *nxt;
1434
1435 /*
Hans Schillstromfc723252011-01-03 14:44:43 +01001436 * Flush the service table hashed by <netns,protocol,addr,port>
Linus Torvalds1da177e2005-04-16 15:20:36 -07001437 */
1438 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
Hans Schillstromfc723252011-01-03 14:44:43 +01001439 list_for_each_entry_safe(svc, nxt, &ip_vs_svc_table[idx],
1440 s_list) {
1441 if (net_eq(svc->net, net))
1442 ip_vs_unlink_service(svc);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001443 }
1444 }
1445
1446 /*
1447 * Flush the service table hashed by fwmark
1448 */
1449 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1450 list_for_each_entry_safe(svc, nxt,
1451 &ip_vs_svc_fwm_table[idx], f_list) {
Hans Schillstromfc723252011-01-03 14:44:43 +01001452 if (net_eq(svc->net, net))
1453 ip_vs_unlink_service(svc);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001454 }
1455 }
1456
1457 return 0;
1458}
1459
1460
1461/*
1462 * Zero counters in a service or all services
1463 */
1464static int ip_vs_zero_service(struct ip_vs_service *svc)
1465{
1466 struct ip_vs_dest *dest;
1467
1468 write_lock_bh(&__ip_vs_svc_lock);
1469 list_for_each_entry(dest, &svc->destinations, n_list) {
1470 ip_vs_zero_stats(&dest->stats);
1471 }
1472 ip_vs_zero_stats(&svc->stats);
1473 write_unlock_bh(&__ip_vs_svc_lock);
1474 return 0;
1475}
1476
Hans Schillstromfc723252011-01-03 14:44:43 +01001477static int ip_vs_zero_all(struct net *net)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001478{
1479 int idx;
1480 struct ip_vs_service *svc;
1481
1482 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1483 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
Hans Schillstromfc723252011-01-03 14:44:43 +01001484 if (net_eq(svc->net, net))
1485 ip_vs_zero_service(svc);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001486 }
1487 }
1488
1489 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1490 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
Hans Schillstromfc723252011-01-03 14:44:43 +01001491 if (net_eq(svc->net, net))
1492 ip_vs_zero_service(svc);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001493 }
1494 }
1495
1496 ip_vs_zero_stats(&ip_vs_stats);
1497 return 0;
1498}
1499
1500
1501static int
Alexey Dobriyan8d65af72009-09-23 15:57:19 -07001502proc_do_defense_mode(ctl_table *table, int write,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001503 void __user *buffer, size_t *lenp, loff_t *ppos)
1504{
1505 int *valp = table->data;
1506 int val = *valp;
1507 int rc;
1508
Alexey Dobriyan8d65af72009-09-23 15:57:19 -07001509 rc = proc_dointvec(table, write, buffer, lenp, ppos);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001510 if (write && (*valp != val)) {
1511 if ((*valp < 0) || (*valp > 3)) {
1512 /* Restore the correct value */
1513 *valp = val;
1514 } else {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001515 update_defense_level();
Linus Torvalds1da177e2005-04-16 15:20:36 -07001516 }
1517 }
1518 return rc;
1519}
1520
1521
1522static int
Alexey Dobriyan8d65af72009-09-23 15:57:19 -07001523proc_do_sync_threshold(ctl_table *table, int write,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001524 void __user *buffer, size_t *lenp, loff_t *ppos)
1525{
1526 int *valp = table->data;
1527 int val[2];
1528 int rc;
1529
1530 /* backup the value first */
1531 memcpy(val, valp, sizeof(val));
1532
Alexey Dobriyan8d65af72009-09-23 15:57:19 -07001533 rc = proc_dointvec(table, write, buffer, lenp, ppos);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001534 if (write && (valp[0] < 0 || valp[1] < 0 || valp[0] >= valp[1])) {
1535 /* Restore the correct value */
1536 memcpy(valp, val, sizeof(val));
1537 }
1538 return rc;
1539}
1540
Hans Schillstromb880c1f2010-11-19 14:25:14 +01001541static int
1542proc_do_sync_mode(ctl_table *table, int write,
1543 void __user *buffer, size_t *lenp, loff_t *ppos)
1544{
1545 int *valp = table->data;
1546 int val = *valp;
1547 int rc;
1548
1549 rc = proc_dointvec(table, write, buffer, lenp, ppos);
1550 if (write && (*valp != val)) {
1551 if ((*valp < 0) || (*valp > 1)) {
1552 /* Restore the correct value */
1553 *valp = val;
1554 } else {
1555 ip_vs_sync_switch_mode(val);
1556 }
1557 }
1558 return rc;
1559}
Linus Torvalds1da177e2005-04-16 15:20:36 -07001560
1561/*
1562 * IPVS sysctl table (under the /proc/sys/net/ipv4/vs/)
1563 */
1564
1565static struct ctl_table vs_vars[] = {
1566 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001567 .procname = "amemthresh",
1568 .data = &sysctl_ip_vs_amemthresh,
1569 .maxlen = sizeof(int),
1570 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08001571 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001572 },
1573#ifdef CONFIG_IP_VS_DEBUG
1574 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001575 .procname = "debug_level",
1576 .data = &sysctl_ip_vs_debug_level,
1577 .maxlen = sizeof(int),
1578 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08001579 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001580 },
1581#endif
1582 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001583 .procname = "am_droprate",
1584 .data = &sysctl_ip_vs_am_droprate,
1585 .maxlen = sizeof(int),
1586 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08001587 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001588 },
1589 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001590 .procname = "drop_entry",
1591 .data = &sysctl_ip_vs_drop_entry,
1592 .maxlen = sizeof(int),
1593 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08001594 .proc_handler = proc_do_defense_mode,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001595 },
1596 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001597 .procname = "drop_packet",
1598 .data = &sysctl_ip_vs_drop_packet,
1599 .maxlen = sizeof(int),
1600 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08001601 .proc_handler = proc_do_defense_mode,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001602 },
Julian Anastasovf4bc17c2010-09-21 17:35:41 +02001603#ifdef CONFIG_IP_VS_NFCT
1604 {
1605 .procname = "conntrack",
1606 .data = &sysctl_ip_vs_conntrack,
1607 .maxlen = sizeof(int),
1608 .mode = 0644,
1609 .proc_handler = &proc_dointvec,
1610 },
1611#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -07001612 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001613 .procname = "secure_tcp",
1614 .data = &sysctl_ip_vs_secure_tcp,
1615 .maxlen = sizeof(int),
1616 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08001617 .proc_handler = proc_do_defense_mode,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001618 },
Julian Anastasov8a803042010-09-21 17:38:57 +02001619 {
1620 .procname = "snat_reroute",
1621 .data = &sysctl_ip_vs_snat_reroute,
1622 .maxlen = sizeof(int),
1623 .mode = 0644,
1624 .proc_handler = &proc_dointvec,
1625 },
Hans Schillstromb880c1f2010-11-19 14:25:14 +01001626 {
1627 .procname = "sync_version",
1628 .data = &sysctl_ip_vs_sync_ver,
1629 .maxlen = sizeof(int),
1630 .mode = 0644,
1631 .proc_handler = &proc_do_sync_mode,
1632 },
Linus Torvalds1da177e2005-04-16 15:20:36 -07001633#if 0
1634 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001635 .procname = "timeout_established",
1636 .data = &vs_timeout_table_dos.timeout[IP_VS_S_ESTABLISHED],
1637 .maxlen = sizeof(int),
1638 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08001639 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001640 },
1641 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001642 .procname = "timeout_synsent",
1643 .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_SENT],
1644 .maxlen = sizeof(int),
1645 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08001646 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001647 },
1648 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001649 .procname = "timeout_synrecv",
1650 .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_RECV],
1651 .maxlen = sizeof(int),
1652 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08001653 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001654 },
1655 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001656 .procname = "timeout_finwait",
1657 .data = &vs_timeout_table_dos.timeout[IP_VS_S_FIN_WAIT],
1658 .maxlen = sizeof(int),
1659 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08001660 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001661 },
1662 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001663 .procname = "timeout_timewait",
1664 .data = &vs_timeout_table_dos.timeout[IP_VS_S_TIME_WAIT],
1665 .maxlen = sizeof(int),
1666 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08001667 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001668 },
1669 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001670 .procname = "timeout_close",
1671 .data = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE],
1672 .maxlen = sizeof(int),
1673 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08001674 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001675 },
1676 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001677 .procname = "timeout_closewait",
1678 .data = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE_WAIT],
1679 .maxlen = sizeof(int),
1680 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08001681 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001682 },
1683 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001684 .procname = "timeout_lastack",
1685 .data = &vs_timeout_table_dos.timeout[IP_VS_S_LAST_ACK],
1686 .maxlen = sizeof(int),
1687 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08001688 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001689 },
1690 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001691 .procname = "timeout_listen",
1692 .data = &vs_timeout_table_dos.timeout[IP_VS_S_LISTEN],
1693 .maxlen = sizeof(int),
1694 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08001695 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001696 },
1697 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001698 .procname = "timeout_synack",
1699 .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYNACK],
1700 .maxlen = sizeof(int),
1701 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08001702 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001703 },
1704 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001705 .procname = "timeout_udp",
1706 .data = &vs_timeout_table_dos.timeout[IP_VS_S_UDP],
1707 .maxlen = sizeof(int),
1708 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08001709 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001710 },
1711 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001712 .procname = "timeout_icmp",
1713 .data = &vs_timeout_table_dos.timeout[IP_VS_S_ICMP],
1714 .maxlen = sizeof(int),
1715 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08001716 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001717 },
1718#endif
1719 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001720 .procname = "cache_bypass",
1721 .data = &sysctl_ip_vs_cache_bypass,
1722 .maxlen = sizeof(int),
1723 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08001724 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001725 },
1726 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001727 .procname = "expire_nodest_conn",
1728 .data = &sysctl_ip_vs_expire_nodest_conn,
1729 .maxlen = sizeof(int),
1730 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08001731 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001732 },
1733 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001734 .procname = "expire_quiescent_template",
1735 .data = &sysctl_ip_vs_expire_quiescent_template,
1736 .maxlen = sizeof(int),
1737 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08001738 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001739 },
1740 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001741 .procname = "sync_threshold",
1742 .data = &sysctl_ip_vs_sync_threshold,
1743 .maxlen = sizeof(sysctl_ip_vs_sync_threshold),
1744 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08001745 .proc_handler = proc_do_sync_threshold,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001746 },
1747 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001748 .procname = "nat_icmp_send",
1749 .data = &sysctl_ip_vs_nat_icmp_send,
1750 .maxlen = sizeof(int),
1751 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08001752 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001753 },
Eric W. Biedermanf8572d82009-11-05 13:32:03 -08001754 { }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001755};
1756
Sven Wegener5587da52008-08-10 18:24:40 +00001757const struct ctl_path net_vs_ctl_path[] = {
Eric W. Biedermanf8572d82009-11-05 13:32:03 -08001758 { .procname = "net", },
1759 { .procname = "ipv4", },
Pavel Emelyanov90754f82008-01-12 02:33:50 -08001760 { .procname = "vs", },
1761 { }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001762};
Pavel Emelyanov90754f82008-01-12 02:33:50 -08001763EXPORT_SYMBOL_GPL(net_vs_ctl_path);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001764
1765static struct ctl_table_header * sysctl_header;
1766
1767#ifdef CONFIG_PROC_FS
1768
1769struct ip_vs_iter {
Hans Schillstromfc723252011-01-03 14:44:43 +01001770 struct seq_net_private p; /* Do not move this, netns depends upon it*/
Linus Torvalds1da177e2005-04-16 15:20:36 -07001771 struct list_head *table;
1772 int bucket;
1773};
1774
1775/*
1776 * Write the contents of the VS rule table to a PROCfs file.
1777 * (It is kept just for backward compatibility)
1778 */
1779static inline const char *ip_vs_fwd_name(unsigned flags)
1780{
1781 switch (flags & IP_VS_CONN_F_FWD_MASK) {
1782 case IP_VS_CONN_F_LOCALNODE:
1783 return "Local";
1784 case IP_VS_CONN_F_TUNNEL:
1785 return "Tunnel";
1786 case IP_VS_CONN_F_DROUTE:
1787 return "Route";
1788 default:
1789 return "Masq";
1790 }
1791}
1792
1793
1794/* Get the Nth entry in the two lists */
1795static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos)
1796{
Hans Schillstromfc723252011-01-03 14:44:43 +01001797 struct net *net = seq_file_net(seq);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001798 struct ip_vs_iter *iter = seq->private;
1799 int idx;
1800 struct ip_vs_service *svc;
1801
1802 /* look in hash by protocol */
1803 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1804 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
Hans Schillstromfc723252011-01-03 14:44:43 +01001805 if (net_eq(svc->net, net) && pos-- == 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001806 iter->table = ip_vs_svc_table;
1807 iter->bucket = idx;
1808 return svc;
1809 }
1810 }
1811 }
1812
1813 /* keep looking in fwmark */
1814 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1815 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
Hans Schillstromfc723252011-01-03 14:44:43 +01001816 if (net_eq(svc->net, net) && pos-- == 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001817 iter->table = ip_vs_svc_fwm_table;
1818 iter->bucket = idx;
1819 return svc;
1820 }
1821 }
1822 }
1823
1824 return NULL;
1825}
1826
1827static void *ip_vs_info_seq_start(struct seq_file *seq, loff_t *pos)
Simon Horman563e94f2008-09-17 10:10:42 +10001828__acquires(__ip_vs_svc_lock)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001829{
1830
1831 read_lock_bh(&__ip_vs_svc_lock);
1832 return *pos ? ip_vs_info_array(seq, *pos - 1) : SEQ_START_TOKEN;
1833}
1834
1835
1836static void *ip_vs_info_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1837{
1838 struct list_head *e;
1839 struct ip_vs_iter *iter;
1840 struct ip_vs_service *svc;
1841
1842 ++*pos;
1843 if (v == SEQ_START_TOKEN)
1844 return ip_vs_info_array(seq,0);
1845
1846 svc = v;
1847 iter = seq->private;
1848
1849 if (iter->table == ip_vs_svc_table) {
1850 /* next service in table hashed by protocol */
1851 if ((e = svc->s_list.next) != &ip_vs_svc_table[iter->bucket])
1852 return list_entry(e, struct ip_vs_service, s_list);
1853
1854
1855 while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
1856 list_for_each_entry(svc,&ip_vs_svc_table[iter->bucket],
1857 s_list) {
1858 return svc;
1859 }
1860 }
1861
1862 iter->table = ip_vs_svc_fwm_table;
1863 iter->bucket = -1;
1864 goto scan_fwmark;
1865 }
1866
1867 /* next service in hashed by fwmark */
1868 if ((e = svc->f_list.next) != &ip_vs_svc_fwm_table[iter->bucket])
1869 return list_entry(e, struct ip_vs_service, f_list);
1870
1871 scan_fwmark:
1872 while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
1873 list_for_each_entry(svc, &ip_vs_svc_fwm_table[iter->bucket],
1874 f_list)
1875 return svc;
1876 }
1877
1878 return NULL;
1879}
1880
1881static void ip_vs_info_seq_stop(struct seq_file *seq, void *v)
Simon Horman563e94f2008-09-17 10:10:42 +10001882__releases(__ip_vs_svc_lock)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001883{
1884 read_unlock_bh(&__ip_vs_svc_lock);
1885}
1886
1887
1888static int ip_vs_info_seq_show(struct seq_file *seq, void *v)
1889{
1890 if (v == SEQ_START_TOKEN) {
1891 seq_printf(seq,
1892 "IP Virtual Server version %d.%d.%d (size=%d)\n",
Catalin(ux) M. BOIE6f7edb42010-01-05 05:50:24 +01001893 NVERSION(IP_VS_VERSION_CODE), ip_vs_conn_tab_size);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001894 seq_puts(seq,
1895 "Prot LocalAddress:Port Scheduler Flags\n");
1896 seq_puts(seq,
1897 " -> RemoteAddress:Port Forward Weight ActiveConn InActConn\n");
1898 } else {
1899 const struct ip_vs_service *svc = v;
1900 const struct ip_vs_iter *iter = seq->private;
1901 const struct ip_vs_dest *dest;
1902
Vince Busam667a5f12008-09-02 15:55:49 +02001903 if (iter->table == ip_vs_svc_table) {
1904#ifdef CONFIG_IP_VS_IPV6
1905 if (svc->af == AF_INET6)
Harvey Harrison5b095d9892008-10-29 12:52:50 -07001906 seq_printf(seq, "%s [%pI6]:%04X %s ",
Vince Busam667a5f12008-09-02 15:55:49 +02001907 ip_vs_proto_name(svc->protocol),
Harvey Harrison38ff4fa2008-10-28 16:08:13 -07001908 &svc->addr.in6,
Vince Busam667a5f12008-09-02 15:55:49 +02001909 ntohs(svc->port),
1910 svc->scheduler->name);
1911 else
1912#endif
Nick Chalk26ec0372010-06-22 08:07:01 +02001913 seq_printf(seq, "%s %08X:%04X %s %s ",
Vince Busam667a5f12008-09-02 15:55:49 +02001914 ip_vs_proto_name(svc->protocol),
1915 ntohl(svc->addr.ip),
1916 ntohs(svc->port),
Nick Chalk26ec0372010-06-22 08:07:01 +02001917 svc->scheduler->name,
1918 (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":"");
Vince Busam667a5f12008-09-02 15:55:49 +02001919 } else {
Nick Chalk26ec0372010-06-22 08:07:01 +02001920 seq_printf(seq, "FWM %08X %s %s",
1921 svc->fwmark, svc->scheduler->name,
1922 (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":"");
Vince Busam667a5f12008-09-02 15:55:49 +02001923 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001924
1925 if (svc->flags & IP_VS_SVC_F_PERSISTENT)
1926 seq_printf(seq, "persistent %d %08X\n",
1927 svc->timeout,
1928 ntohl(svc->netmask));
1929 else
1930 seq_putc(seq, '\n');
1931
1932 list_for_each_entry(dest, &svc->destinations, n_list) {
Vince Busam667a5f12008-09-02 15:55:49 +02001933#ifdef CONFIG_IP_VS_IPV6
1934 if (dest->af == AF_INET6)
1935 seq_printf(seq,
Harvey Harrison5b095d9892008-10-29 12:52:50 -07001936 " -> [%pI6]:%04X"
Vince Busam667a5f12008-09-02 15:55:49 +02001937 " %-7s %-6d %-10d %-10d\n",
Harvey Harrison38ff4fa2008-10-28 16:08:13 -07001938 &dest->addr.in6,
Vince Busam667a5f12008-09-02 15:55:49 +02001939 ntohs(dest->port),
1940 ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
1941 atomic_read(&dest->weight),
1942 atomic_read(&dest->activeconns),
1943 atomic_read(&dest->inactconns));
1944 else
1945#endif
1946 seq_printf(seq,
1947 " -> %08X:%04X "
1948 "%-7s %-6d %-10d %-10d\n",
1949 ntohl(dest->addr.ip),
1950 ntohs(dest->port),
1951 ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
1952 atomic_read(&dest->weight),
1953 atomic_read(&dest->activeconns),
1954 atomic_read(&dest->inactconns));
1955
Linus Torvalds1da177e2005-04-16 15:20:36 -07001956 }
1957 }
1958 return 0;
1959}
1960
Philippe De Muyter56b3d972007-07-10 23:07:31 -07001961static const struct seq_operations ip_vs_info_seq_ops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001962 .start = ip_vs_info_seq_start,
1963 .next = ip_vs_info_seq_next,
1964 .stop = ip_vs_info_seq_stop,
1965 .show = ip_vs_info_seq_show,
1966};
1967
1968static int ip_vs_info_open(struct inode *inode, struct file *file)
1969{
Hans Schillstromfc723252011-01-03 14:44:43 +01001970 return seq_open_net(inode, file, &ip_vs_info_seq_ops,
Pavel Emelyanovcf7732e2007-10-10 02:29:29 -07001971 sizeof(struct ip_vs_iter));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001972}
1973
Arjan van de Ven9a321442007-02-12 00:55:35 -08001974static const struct file_operations ip_vs_info_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001975 .owner = THIS_MODULE,
1976 .open = ip_vs_info_open,
1977 .read = seq_read,
1978 .llseek = seq_lseek,
1979 .release = seq_release_private,
1980};
1981
1982#endif
1983
Sven Wegener519e49e2008-08-10 18:24:41 +00001984struct ip_vs_stats ip_vs_stats = {
1985 .lock = __SPIN_LOCK_UNLOCKED(ip_vs_stats.lock),
1986};
Linus Torvalds1da177e2005-04-16 15:20:36 -07001987
1988#ifdef CONFIG_PROC_FS
1989static int ip_vs_stats_show(struct seq_file *seq, void *v)
1990{
1991
1992/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
1993 seq_puts(seq,
1994 " Total Incoming Outgoing Incoming Outgoing\n");
1995 seq_printf(seq,
1996 " Conns Packets Packets Bytes Bytes\n");
1997
1998 spin_lock_bh(&ip_vs_stats.lock);
Sven Wegenere9c0ce22008-09-08 13:39:04 +02001999 seq_printf(seq, "%8X %8X %8X %16LX %16LX\n\n", ip_vs_stats.ustats.conns,
2000 ip_vs_stats.ustats.inpkts, ip_vs_stats.ustats.outpkts,
2001 (unsigned long long) ip_vs_stats.ustats.inbytes,
2002 (unsigned long long) ip_vs_stats.ustats.outbytes);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002003
2004/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
2005 seq_puts(seq,
2006 " Conns/s Pkts/s Pkts/s Bytes/s Bytes/s\n");
2007 seq_printf(seq,"%8X %8X %8X %16X %16X\n",
Sven Wegenere9c0ce22008-09-08 13:39:04 +02002008 ip_vs_stats.ustats.cps,
2009 ip_vs_stats.ustats.inpps,
2010 ip_vs_stats.ustats.outpps,
2011 ip_vs_stats.ustats.inbps,
2012 ip_vs_stats.ustats.outbps);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002013 spin_unlock_bh(&ip_vs_stats.lock);
2014
2015 return 0;
2016}
2017
2018static int ip_vs_stats_seq_open(struct inode *inode, struct file *file)
2019{
Hans Schillstromfc723252011-01-03 14:44:43 +01002020 return single_open_net(inode, file, ip_vs_stats_show);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002021}
2022
Arjan van de Ven9a321442007-02-12 00:55:35 -08002023static const struct file_operations ip_vs_stats_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002024 .owner = THIS_MODULE,
2025 .open = ip_vs_stats_seq_open,
2026 .read = seq_read,
2027 .llseek = seq_lseek,
2028 .release = single_release,
2029};
2030
2031#endif
2032
2033/*
2034 * Set timeout values for tcp tcpfin udp in the timeout_table.
2035 */
2036static int ip_vs_set_timeout(struct ip_vs_timeout_user *u)
2037{
2038 IP_VS_DBG(2, "Setting timeout tcp:%d tcpfin:%d udp:%d\n",
2039 u->tcp_timeout,
2040 u->tcp_fin_timeout,
2041 u->udp_timeout);
2042
2043#ifdef CONFIG_IP_VS_PROTO_TCP
2044 if (u->tcp_timeout) {
2045 ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_ESTABLISHED]
2046 = u->tcp_timeout * HZ;
2047 }
2048
2049 if (u->tcp_fin_timeout) {
2050 ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_FIN_WAIT]
2051 = u->tcp_fin_timeout * HZ;
2052 }
2053#endif
2054
2055#ifdef CONFIG_IP_VS_PROTO_UDP
2056 if (u->udp_timeout) {
2057 ip_vs_protocol_udp.timeout_table[IP_VS_UDP_S_NORMAL]
2058 = u->udp_timeout * HZ;
2059 }
2060#endif
2061 return 0;
2062}
2063
2064
2065#define SET_CMDID(cmd) (cmd - IP_VS_BASE_CTL)
2066#define SERVICE_ARG_LEN (sizeof(struct ip_vs_service_user))
2067#define SVCDEST_ARG_LEN (sizeof(struct ip_vs_service_user) + \
2068 sizeof(struct ip_vs_dest_user))
2069#define TIMEOUT_ARG_LEN (sizeof(struct ip_vs_timeout_user))
2070#define DAEMON_ARG_LEN (sizeof(struct ip_vs_daemon_user))
2071#define MAX_ARG_LEN SVCDEST_ARG_LEN
2072
Arjan van de Ven9b5b5cf2005-11-29 16:21:38 -08002073static const unsigned char set_arglen[SET_CMDID(IP_VS_SO_SET_MAX)+1] = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002074 [SET_CMDID(IP_VS_SO_SET_ADD)] = SERVICE_ARG_LEN,
2075 [SET_CMDID(IP_VS_SO_SET_EDIT)] = SERVICE_ARG_LEN,
2076 [SET_CMDID(IP_VS_SO_SET_DEL)] = SERVICE_ARG_LEN,
2077 [SET_CMDID(IP_VS_SO_SET_FLUSH)] = 0,
2078 [SET_CMDID(IP_VS_SO_SET_ADDDEST)] = SVCDEST_ARG_LEN,
2079 [SET_CMDID(IP_VS_SO_SET_DELDEST)] = SVCDEST_ARG_LEN,
2080 [SET_CMDID(IP_VS_SO_SET_EDITDEST)] = SVCDEST_ARG_LEN,
2081 [SET_CMDID(IP_VS_SO_SET_TIMEOUT)] = TIMEOUT_ARG_LEN,
2082 [SET_CMDID(IP_VS_SO_SET_STARTDAEMON)] = DAEMON_ARG_LEN,
2083 [SET_CMDID(IP_VS_SO_SET_STOPDAEMON)] = DAEMON_ARG_LEN,
2084 [SET_CMDID(IP_VS_SO_SET_ZERO)] = SERVICE_ARG_LEN,
2085};
2086
Julius Volzc860c6b2008-09-02 15:55:36 +02002087static void ip_vs_copy_usvc_compat(struct ip_vs_service_user_kern *usvc,
2088 struct ip_vs_service_user *usvc_compat)
2089{
Simon Horman0d1e71b2010-08-22 21:37:54 +09002090 memset(usvc, 0, sizeof(*usvc));
2091
Julius Volzc860c6b2008-09-02 15:55:36 +02002092 usvc->af = AF_INET;
2093 usvc->protocol = usvc_compat->protocol;
2094 usvc->addr.ip = usvc_compat->addr;
2095 usvc->port = usvc_compat->port;
2096 usvc->fwmark = usvc_compat->fwmark;
2097
2098 /* Deep copy of sched_name is not needed here */
2099 usvc->sched_name = usvc_compat->sched_name;
2100
2101 usvc->flags = usvc_compat->flags;
2102 usvc->timeout = usvc_compat->timeout;
2103 usvc->netmask = usvc_compat->netmask;
2104}
2105
2106static void ip_vs_copy_udest_compat(struct ip_vs_dest_user_kern *udest,
2107 struct ip_vs_dest_user *udest_compat)
2108{
Simon Horman0d1e71b2010-08-22 21:37:54 +09002109 memset(udest, 0, sizeof(*udest));
2110
Julius Volzc860c6b2008-09-02 15:55:36 +02002111 udest->addr.ip = udest_compat->addr;
2112 udest->port = udest_compat->port;
2113 udest->conn_flags = udest_compat->conn_flags;
2114 udest->weight = udest_compat->weight;
2115 udest->u_threshold = udest_compat->u_threshold;
2116 udest->l_threshold = udest_compat->l_threshold;
2117}
2118
Linus Torvalds1da177e2005-04-16 15:20:36 -07002119static int
2120do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
2121{
Hans Schillstromfc723252011-01-03 14:44:43 +01002122 struct net *net = sock_net(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002123 int ret;
2124 unsigned char arg[MAX_ARG_LEN];
Julius Volzc860c6b2008-09-02 15:55:36 +02002125 struct ip_vs_service_user *usvc_compat;
2126 struct ip_vs_service_user_kern usvc;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002127 struct ip_vs_service *svc;
Julius Volzc860c6b2008-09-02 15:55:36 +02002128 struct ip_vs_dest_user *udest_compat;
2129 struct ip_vs_dest_user_kern udest;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002130
2131 if (!capable(CAP_NET_ADMIN))
2132 return -EPERM;
2133
Arjan van de Ven04bcef22010-01-04 16:37:12 +01002134 if (cmd < IP_VS_BASE_CTL || cmd > IP_VS_SO_SET_MAX)
2135 return -EINVAL;
2136 if (len < 0 || len > MAX_ARG_LEN)
2137 return -EINVAL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002138 if (len != set_arglen[SET_CMDID(cmd)]) {
Hannes Eder1e3e2382009-08-02 11:05:41 +00002139 pr_err("set_ctl: len %u != %u\n",
2140 len, set_arglen[SET_CMDID(cmd)]);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002141 return -EINVAL;
2142 }
2143
2144 if (copy_from_user(arg, user, len) != 0)
2145 return -EFAULT;
2146
2147 /* increase the module use count */
2148 ip_vs_use_count_inc();
2149
Ingo Molnar14cc3e22006-03-26 01:37:14 -08002150 if (mutex_lock_interruptible(&__ip_vs_mutex)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002151 ret = -ERESTARTSYS;
2152 goto out_dec;
2153 }
2154
2155 if (cmd == IP_VS_SO_SET_FLUSH) {
2156 /* Flush the virtual service */
Hans Schillstromfc723252011-01-03 14:44:43 +01002157 ret = ip_vs_flush(net);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002158 goto out_unlock;
2159 } else if (cmd == IP_VS_SO_SET_TIMEOUT) {
2160 /* Set timeout values for (tcp tcpfin udp) */
2161 ret = ip_vs_set_timeout((struct ip_vs_timeout_user *)arg);
2162 goto out_unlock;
2163 } else if (cmd == IP_VS_SO_SET_STARTDAEMON) {
2164 struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
2165 ret = start_sync_thread(dm->state, dm->mcast_ifn, dm->syncid);
2166 goto out_unlock;
2167 } else if (cmd == IP_VS_SO_SET_STOPDAEMON) {
2168 struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
2169 ret = stop_sync_thread(dm->state);
2170 goto out_unlock;
2171 }
2172
Julius Volzc860c6b2008-09-02 15:55:36 +02002173 usvc_compat = (struct ip_vs_service_user *)arg;
2174 udest_compat = (struct ip_vs_dest_user *)(usvc_compat + 1);
2175
2176 /* We only use the new structs internally, so copy userspace compat
2177 * structs to extended internal versions */
2178 ip_vs_copy_usvc_compat(&usvc, usvc_compat);
2179 ip_vs_copy_udest_compat(&udest, udest_compat);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002180
2181 if (cmd == IP_VS_SO_SET_ZERO) {
2182 /* if no service address is set, zero counters in all */
Julius Volzc860c6b2008-09-02 15:55:36 +02002183 if (!usvc.fwmark && !usvc.addr.ip && !usvc.port) {
Hans Schillstromfc723252011-01-03 14:44:43 +01002184 ret = ip_vs_zero_all(net);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002185 goto out_unlock;
2186 }
2187 }
2188
Venkata Mohan Reddy2906f662010-02-18 12:31:05 +01002189 /* Check for valid protocol: TCP or UDP or SCTP, even for fwmark!=0 */
2190 if (usvc.protocol != IPPROTO_TCP && usvc.protocol != IPPROTO_UDP &&
2191 usvc.protocol != IPPROTO_SCTP) {
Hannes Eder1e3e2382009-08-02 11:05:41 +00002192 pr_err("set_ctl: invalid protocol: %d %pI4:%d %s\n",
2193 usvc.protocol, &usvc.addr.ip,
2194 ntohs(usvc.port), usvc.sched_name);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002195 ret = -EFAULT;
2196 goto out_unlock;
2197 }
2198
2199 /* Lookup the exact service by <protocol, addr, port> or fwmark */
Julius Volzc860c6b2008-09-02 15:55:36 +02002200 if (usvc.fwmark == 0)
Hans Schillstromfc723252011-01-03 14:44:43 +01002201 svc = __ip_vs_service_find(net, usvc.af, usvc.protocol,
Julian Anastasov26c15cf2010-09-21 18:12:30 +02002202 &usvc.addr, usvc.port);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002203 else
Hans Schillstromfc723252011-01-03 14:44:43 +01002204 svc = __ip_vs_svc_fwm_find(net, usvc.af, usvc.fwmark);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002205
2206 if (cmd != IP_VS_SO_SET_ADD
Julius Volzc860c6b2008-09-02 15:55:36 +02002207 && (svc == NULL || svc->protocol != usvc.protocol)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002208 ret = -ESRCH;
Julian Anastasov26c15cf2010-09-21 18:12:30 +02002209 goto out_unlock;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002210 }
2211
2212 switch (cmd) {
2213 case IP_VS_SO_SET_ADD:
2214 if (svc != NULL)
2215 ret = -EEXIST;
2216 else
Hans Schillstromfc723252011-01-03 14:44:43 +01002217 ret = ip_vs_add_service(net, &usvc, &svc);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002218 break;
2219 case IP_VS_SO_SET_EDIT:
Julius Volzc860c6b2008-09-02 15:55:36 +02002220 ret = ip_vs_edit_service(svc, &usvc);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002221 break;
2222 case IP_VS_SO_SET_DEL:
2223 ret = ip_vs_del_service(svc);
2224 if (!ret)
2225 goto out_unlock;
2226 break;
2227 case IP_VS_SO_SET_ZERO:
2228 ret = ip_vs_zero_service(svc);
2229 break;
2230 case IP_VS_SO_SET_ADDDEST:
Julius Volzc860c6b2008-09-02 15:55:36 +02002231 ret = ip_vs_add_dest(svc, &udest);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002232 break;
2233 case IP_VS_SO_SET_EDITDEST:
Julius Volzc860c6b2008-09-02 15:55:36 +02002234 ret = ip_vs_edit_dest(svc, &udest);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002235 break;
2236 case IP_VS_SO_SET_DELDEST:
Julius Volzc860c6b2008-09-02 15:55:36 +02002237 ret = ip_vs_del_dest(svc, &udest);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002238 break;
2239 default:
2240 ret = -EINVAL;
2241 }
2242
Linus Torvalds1da177e2005-04-16 15:20:36 -07002243 out_unlock:
Ingo Molnar14cc3e22006-03-26 01:37:14 -08002244 mutex_unlock(&__ip_vs_mutex);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002245 out_dec:
2246 /* decrease the module use count */
2247 ip_vs_use_count_dec();
2248
2249 return ret;
2250}
2251
2252
2253static void
2254ip_vs_copy_stats(struct ip_vs_stats_user *dst, struct ip_vs_stats *src)
2255{
2256 spin_lock_bh(&src->lock);
Sven Wegenere9c0ce22008-09-08 13:39:04 +02002257 memcpy(dst, &src->ustats, sizeof(*dst));
Linus Torvalds1da177e2005-04-16 15:20:36 -07002258 spin_unlock_bh(&src->lock);
2259}
2260
2261static void
2262ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src)
2263{
2264 dst->protocol = src->protocol;
Julius Volze7ade462008-09-02 15:55:33 +02002265 dst->addr = src->addr.ip;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002266 dst->port = src->port;
2267 dst->fwmark = src->fwmark;
pageexec4da62fc2005-06-26 16:00:19 -07002268 strlcpy(dst->sched_name, src->scheduler->name, sizeof(dst->sched_name));
Linus Torvalds1da177e2005-04-16 15:20:36 -07002269 dst->flags = src->flags;
2270 dst->timeout = src->timeout / HZ;
2271 dst->netmask = src->netmask;
2272 dst->num_dests = src->num_dests;
2273 ip_vs_copy_stats(&dst->stats, &src->stats);
2274}
2275
2276static inline int
Hans Schillstromfc723252011-01-03 14:44:43 +01002277__ip_vs_get_service_entries(struct net *net,
2278 const struct ip_vs_get_services *get,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002279 struct ip_vs_get_services __user *uptr)
2280{
2281 int idx, count=0;
2282 struct ip_vs_service *svc;
2283 struct ip_vs_service_entry entry;
2284 int ret = 0;
2285
2286 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2287 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
Julius Volzf94fd042008-09-02 15:55:55 +02002288 /* Only expose IPv4 entries to old interface */
Hans Schillstromfc723252011-01-03 14:44:43 +01002289 if (svc->af != AF_INET || !net_eq(svc->net, net))
Julius Volzf94fd042008-09-02 15:55:55 +02002290 continue;
2291
Linus Torvalds1da177e2005-04-16 15:20:36 -07002292 if (count >= get->num_services)
2293 goto out;
pageexec4da62fc2005-06-26 16:00:19 -07002294 memset(&entry, 0, sizeof(entry));
Linus Torvalds1da177e2005-04-16 15:20:36 -07002295 ip_vs_copy_service(&entry, svc);
2296 if (copy_to_user(&uptr->entrytable[count],
2297 &entry, sizeof(entry))) {
2298 ret = -EFAULT;
2299 goto out;
2300 }
2301 count++;
2302 }
2303 }
2304
2305 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2306 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
Julius Volzf94fd042008-09-02 15:55:55 +02002307 /* Only expose IPv4 entries to old interface */
Hans Schillstromfc723252011-01-03 14:44:43 +01002308 if (svc->af != AF_INET || !net_eq(svc->net, net))
Julius Volzf94fd042008-09-02 15:55:55 +02002309 continue;
2310
Linus Torvalds1da177e2005-04-16 15:20:36 -07002311 if (count >= get->num_services)
2312 goto out;
pageexec4da62fc2005-06-26 16:00:19 -07002313 memset(&entry, 0, sizeof(entry));
Linus Torvalds1da177e2005-04-16 15:20:36 -07002314 ip_vs_copy_service(&entry, svc);
2315 if (copy_to_user(&uptr->entrytable[count],
2316 &entry, sizeof(entry))) {
2317 ret = -EFAULT;
2318 goto out;
2319 }
2320 count++;
2321 }
2322 }
2323 out:
2324 return ret;
2325}
2326
2327static inline int
Hans Schillstromfc723252011-01-03 14:44:43 +01002328__ip_vs_get_dest_entries(struct net *net, const struct ip_vs_get_dests *get,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002329 struct ip_vs_get_dests __user *uptr)
2330{
2331 struct ip_vs_service *svc;
Julius Volzb18610d2008-09-02 15:55:37 +02002332 union nf_inet_addr addr = { .ip = get->addr };
Linus Torvalds1da177e2005-04-16 15:20:36 -07002333 int ret = 0;
2334
2335 if (get->fwmark)
Hans Schillstromfc723252011-01-03 14:44:43 +01002336 svc = __ip_vs_svc_fwm_find(net, AF_INET, get->fwmark);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002337 else
Hans Schillstromfc723252011-01-03 14:44:43 +01002338 svc = __ip_vs_service_find(net, AF_INET, get->protocol, &addr,
Julian Anastasov26c15cf2010-09-21 18:12:30 +02002339 get->port);
Julius Volzb18610d2008-09-02 15:55:37 +02002340
Linus Torvalds1da177e2005-04-16 15:20:36 -07002341 if (svc) {
2342 int count = 0;
2343 struct ip_vs_dest *dest;
2344 struct ip_vs_dest_entry entry;
2345
2346 list_for_each_entry(dest, &svc->destinations, n_list) {
2347 if (count >= get->num_dests)
2348 break;
2349
Julius Volze7ade462008-09-02 15:55:33 +02002350 entry.addr = dest->addr.ip;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002351 entry.port = dest->port;
2352 entry.conn_flags = atomic_read(&dest->conn_flags);
2353 entry.weight = atomic_read(&dest->weight);
2354 entry.u_threshold = dest->u_threshold;
2355 entry.l_threshold = dest->l_threshold;
2356 entry.activeconns = atomic_read(&dest->activeconns);
2357 entry.inactconns = atomic_read(&dest->inactconns);
2358 entry.persistconns = atomic_read(&dest->persistconns);
2359 ip_vs_copy_stats(&entry.stats, &dest->stats);
2360 if (copy_to_user(&uptr->entrytable[count],
2361 &entry, sizeof(entry))) {
2362 ret = -EFAULT;
2363 break;
2364 }
2365 count++;
2366 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002367 } else
2368 ret = -ESRCH;
2369 return ret;
2370}
2371
2372static inline void
2373__ip_vs_get_timeouts(struct ip_vs_timeout_user *u)
2374{
2375#ifdef CONFIG_IP_VS_PROTO_TCP
2376 u->tcp_timeout =
2377 ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_ESTABLISHED] / HZ;
2378 u->tcp_fin_timeout =
2379 ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_FIN_WAIT] / HZ;
2380#endif
2381#ifdef CONFIG_IP_VS_PROTO_UDP
2382 u->udp_timeout =
2383 ip_vs_protocol_udp.timeout_table[IP_VS_UDP_S_NORMAL] / HZ;
2384#endif
2385}
2386
2387
2388#define GET_CMDID(cmd) (cmd - IP_VS_BASE_CTL)
2389#define GET_INFO_ARG_LEN (sizeof(struct ip_vs_getinfo))
2390#define GET_SERVICES_ARG_LEN (sizeof(struct ip_vs_get_services))
2391#define GET_SERVICE_ARG_LEN (sizeof(struct ip_vs_service_entry))
2392#define GET_DESTS_ARG_LEN (sizeof(struct ip_vs_get_dests))
2393#define GET_TIMEOUT_ARG_LEN (sizeof(struct ip_vs_timeout_user))
2394#define GET_DAEMON_ARG_LEN (sizeof(struct ip_vs_daemon_user) * 2)
2395
Arjan van de Ven9b5b5cf2005-11-29 16:21:38 -08002396static const unsigned char get_arglen[GET_CMDID(IP_VS_SO_GET_MAX)+1] = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002397 [GET_CMDID(IP_VS_SO_GET_VERSION)] = 64,
2398 [GET_CMDID(IP_VS_SO_GET_INFO)] = GET_INFO_ARG_LEN,
2399 [GET_CMDID(IP_VS_SO_GET_SERVICES)] = GET_SERVICES_ARG_LEN,
2400 [GET_CMDID(IP_VS_SO_GET_SERVICE)] = GET_SERVICE_ARG_LEN,
2401 [GET_CMDID(IP_VS_SO_GET_DESTS)] = GET_DESTS_ARG_LEN,
2402 [GET_CMDID(IP_VS_SO_GET_TIMEOUT)] = GET_TIMEOUT_ARG_LEN,
2403 [GET_CMDID(IP_VS_SO_GET_DAEMON)] = GET_DAEMON_ARG_LEN,
2404};
2405
2406static int
2407do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
2408{
2409 unsigned char arg[128];
2410 int ret = 0;
Arjan van de Ven04bcef22010-01-04 16:37:12 +01002411 unsigned int copylen;
Hans Schillstromfc723252011-01-03 14:44:43 +01002412 struct net *net = sock_net(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002413
Hans Schillstromfc723252011-01-03 14:44:43 +01002414 BUG_ON(!net);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002415 if (!capable(CAP_NET_ADMIN))
2416 return -EPERM;
2417
Arjan van de Ven04bcef22010-01-04 16:37:12 +01002418 if (cmd < IP_VS_BASE_CTL || cmd > IP_VS_SO_GET_MAX)
2419 return -EINVAL;
2420
Linus Torvalds1da177e2005-04-16 15:20:36 -07002421 if (*len < get_arglen[GET_CMDID(cmd)]) {
Hannes Eder1e3e2382009-08-02 11:05:41 +00002422 pr_err("get_ctl: len %u < %u\n",
2423 *len, get_arglen[GET_CMDID(cmd)]);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002424 return -EINVAL;
2425 }
2426
Arjan van de Ven04bcef22010-01-04 16:37:12 +01002427 copylen = get_arglen[GET_CMDID(cmd)];
2428 if (copylen > 128)
2429 return -EINVAL;
2430
2431 if (copy_from_user(arg, user, copylen) != 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002432 return -EFAULT;
2433
Ingo Molnar14cc3e22006-03-26 01:37:14 -08002434 if (mutex_lock_interruptible(&__ip_vs_mutex))
Linus Torvalds1da177e2005-04-16 15:20:36 -07002435 return -ERESTARTSYS;
2436
2437 switch (cmd) {
2438 case IP_VS_SO_GET_VERSION:
2439 {
2440 char buf[64];
2441
2442 sprintf(buf, "IP Virtual Server version %d.%d.%d (size=%d)",
Catalin(ux) M. BOIE6f7edb42010-01-05 05:50:24 +01002443 NVERSION(IP_VS_VERSION_CODE), ip_vs_conn_tab_size);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002444 if (copy_to_user(user, buf, strlen(buf)+1) != 0) {
2445 ret = -EFAULT;
2446 goto out;
2447 }
2448 *len = strlen(buf)+1;
2449 }
2450 break;
2451
2452 case IP_VS_SO_GET_INFO:
2453 {
2454 struct ip_vs_getinfo info;
2455 info.version = IP_VS_VERSION_CODE;
Catalin(ux) M. BOIE6f7edb42010-01-05 05:50:24 +01002456 info.size = ip_vs_conn_tab_size;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002457 info.num_services = ip_vs_num_services;
2458 if (copy_to_user(user, &info, sizeof(info)) != 0)
2459 ret = -EFAULT;
2460 }
2461 break;
2462
2463 case IP_VS_SO_GET_SERVICES:
2464 {
2465 struct ip_vs_get_services *get;
2466 int size;
2467
2468 get = (struct ip_vs_get_services *)arg;
2469 size = sizeof(*get) +
2470 sizeof(struct ip_vs_service_entry) * get->num_services;
2471 if (*len != size) {
Hannes Eder1e3e2382009-08-02 11:05:41 +00002472 pr_err("length: %u != %u\n", *len, size);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002473 ret = -EINVAL;
2474 goto out;
2475 }
Hans Schillstromfc723252011-01-03 14:44:43 +01002476 ret = __ip_vs_get_service_entries(net, get, user);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002477 }
2478 break;
2479
2480 case IP_VS_SO_GET_SERVICE:
2481 {
2482 struct ip_vs_service_entry *entry;
2483 struct ip_vs_service *svc;
Julius Volzb18610d2008-09-02 15:55:37 +02002484 union nf_inet_addr addr;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002485
2486 entry = (struct ip_vs_service_entry *)arg;
Julius Volzb18610d2008-09-02 15:55:37 +02002487 addr.ip = entry->addr;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002488 if (entry->fwmark)
Hans Schillstromfc723252011-01-03 14:44:43 +01002489 svc = __ip_vs_svc_fwm_find(net, AF_INET, entry->fwmark);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002490 else
Hans Schillstromfc723252011-01-03 14:44:43 +01002491 svc = __ip_vs_service_find(net, AF_INET,
2492 entry->protocol, &addr,
2493 entry->port);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002494 if (svc) {
2495 ip_vs_copy_service(entry, svc);
2496 if (copy_to_user(user, entry, sizeof(*entry)) != 0)
2497 ret = -EFAULT;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002498 } else
2499 ret = -ESRCH;
2500 }
2501 break;
2502
2503 case IP_VS_SO_GET_DESTS:
2504 {
2505 struct ip_vs_get_dests *get;
2506 int size;
2507
2508 get = (struct ip_vs_get_dests *)arg;
2509 size = sizeof(*get) +
2510 sizeof(struct ip_vs_dest_entry) * get->num_dests;
2511 if (*len != size) {
Hannes Eder1e3e2382009-08-02 11:05:41 +00002512 pr_err("length: %u != %u\n", *len, size);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002513 ret = -EINVAL;
2514 goto out;
2515 }
Hans Schillstromfc723252011-01-03 14:44:43 +01002516 ret = __ip_vs_get_dest_entries(net, get, user);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002517 }
2518 break;
2519
2520 case IP_VS_SO_GET_TIMEOUT:
2521 {
2522 struct ip_vs_timeout_user t;
2523
2524 __ip_vs_get_timeouts(&t);
2525 if (copy_to_user(user, &t, sizeof(t)) != 0)
2526 ret = -EFAULT;
2527 }
2528 break;
2529
2530 case IP_VS_SO_GET_DAEMON:
2531 {
2532 struct ip_vs_daemon_user d[2];
2533
2534 memset(&d, 0, sizeof(d));
2535 if (ip_vs_sync_state & IP_VS_STATE_MASTER) {
2536 d[0].state = IP_VS_STATE_MASTER;
pageexec4da62fc2005-06-26 16:00:19 -07002537 strlcpy(d[0].mcast_ifn, ip_vs_master_mcast_ifn, sizeof(d[0].mcast_ifn));
Linus Torvalds1da177e2005-04-16 15:20:36 -07002538 d[0].syncid = ip_vs_master_syncid;
2539 }
2540 if (ip_vs_sync_state & IP_VS_STATE_BACKUP) {
2541 d[1].state = IP_VS_STATE_BACKUP;
pageexec4da62fc2005-06-26 16:00:19 -07002542 strlcpy(d[1].mcast_ifn, ip_vs_backup_mcast_ifn, sizeof(d[1].mcast_ifn));
Linus Torvalds1da177e2005-04-16 15:20:36 -07002543 d[1].syncid = ip_vs_backup_syncid;
2544 }
2545 if (copy_to_user(user, &d, sizeof(d)) != 0)
2546 ret = -EFAULT;
2547 }
2548 break;
2549
2550 default:
2551 ret = -EINVAL;
2552 }
2553
2554 out:
Ingo Molnar14cc3e22006-03-26 01:37:14 -08002555 mutex_unlock(&__ip_vs_mutex);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002556 return ret;
2557}
2558
2559
2560static struct nf_sockopt_ops ip_vs_sockopts = {
2561 .pf = PF_INET,
2562 .set_optmin = IP_VS_BASE_CTL,
2563 .set_optmax = IP_VS_SO_SET_MAX+1,
2564 .set = do_ip_vs_set_ctl,
2565 .get_optmin = IP_VS_BASE_CTL,
2566 .get_optmax = IP_VS_SO_GET_MAX+1,
2567 .get = do_ip_vs_get_ctl,
Neil Horman16fcec32007-09-11 11:28:26 +02002568 .owner = THIS_MODULE,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002569};
2570
Julius Volz9a812192008-08-14 14:08:44 +02002571/*
2572 * Generic Netlink interface
2573 */
2574
2575/* IPVS genetlink family */
2576static struct genl_family ip_vs_genl_family = {
2577 .id = GENL_ID_GENERATE,
2578 .hdrsize = 0,
2579 .name = IPVS_GENL_NAME,
2580 .version = IPVS_GENL_VERSION,
2581 .maxattr = IPVS_CMD_MAX,
2582};
2583
2584/* Policy used for first-level command attributes */
2585static const struct nla_policy ip_vs_cmd_policy[IPVS_CMD_ATTR_MAX + 1] = {
2586 [IPVS_CMD_ATTR_SERVICE] = { .type = NLA_NESTED },
2587 [IPVS_CMD_ATTR_DEST] = { .type = NLA_NESTED },
2588 [IPVS_CMD_ATTR_DAEMON] = { .type = NLA_NESTED },
2589 [IPVS_CMD_ATTR_TIMEOUT_TCP] = { .type = NLA_U32 },
2590 [IPVS_CMD_ATTR_TIMEOUT_TCP_FIN] = { .type = NLA_U32 },
2591 [IPVS_CMD_ATTR_TIMEOUT_UDP] = { .type = NLA_U32 },
2592};
2593
2594/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DAEMON */
2595static const struct nla_policy ip_vs_daemon_policy[IPVS_DAEMON_ATTR_MAX + 1] = {
2596 [IPVS_DAEMON_ATTR_STATE] = { .type = NLA_U32 },
2597 [IPVS_DAEMON_ATTR_MCAST_IFN] = { .type = NLA_NUL_STRING,
2598 .len = IP_VS_IFNAME_MAXLEN },
2599 [IPVS_DAEMON_ATTR_SYNC_ID] = { .type = NLA_U32 },
2600};
2601
2602/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_SERVICE */
2603static const struct nla_policy ip_vs_svc_policy[IPVS_SVC_ATTR_MAX + 1] = {
2604 [IPVS_SVC_ATTR_AF] = { .type = NLA_U16 },
2605 [IPVS_SVC_ATTR_PROTOCOL] = { .type = NLA_U16 },
2606 [IPVS_SVC_ATTR_ADDR] = { .type = NLA_BINARY,
2607 .len = sizeof(union nf_inet_addr) },
2608 [IPVS_SVC_ATTR_PORT] = { .type = NLA_U16 },
2609 [IPVS_SVC_ATTR_FWMARK] = { .type = NLA_U32 },
2610 [IPVS_SVC_ATTR_SCHED_NAME] = { .type = NLA_NUL_STRING,
2611 .len = IP_VS_SCHEDNAME_MAXLEN },
Simon Horman0d1e71b2010-08-22 21:37:54 +09002612 [IPVS_SVC_ATTR_PE_NAME] = { .type = NLA_NUL_STRING,
2613 .len = IP_VS_PENAME_MAXLEN },
Julius Volz9a812192008-08-14 14:08:44 +02002614 [IPVS_SVC_ATTR_FLAGS] = { .type = NLA_BINARY,
2615 .len = sizeof(struct ip_vs_flags) },
2616 [IPVS_SVC_ATTR_TIMEOUT] = { .type = NLA_U32 },
2617 [IPVS_SVC_ATTR_NETMASK] = { .type = NLA_U32 },
2618 [IPVS_SVC_ATTR_STATS] = { .type = NLA_NESTED },
2619};
2620
2621/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DEST */
2622static const struct nla_policy ip_vs_dest_policy[IPVS_DEST_ATTR_MAX + 1] = {
2623 [IPVS_DEST_ATTR_ADDR] = { .type = NLA_BINARY,
2624 .len = sizeof(union nf_inet_addr) },
2625 [IPVS_DEST_ATTR_PORT] = { .type = NLA_U16 },
2626 [IPVS_DEST_ATTR_FWD_METHOD] = { .type = NLA_U32 },
2627 [IPVS_DEST_ATTR_WEIGHT] = { .type = NLA_U32 },
2628 [IPVS_DEST_ATTR_U_THRESH] = { .type = NLA_U32 },
2629 [IPVS_DEST_ATTR_L_THRESH] = { .type = NLA_U32 },
2630 [IPVS_DEST_ATTR_ACTIVE_CONNS] = { .type = NLA_U32 },
2631 [IPVS_DEST_ATTR_INACT_CONNS] = { .type = NLA_U32 },
2632 [IPVS_DEST_ATTR_PERSIST_CONNS] = { .type = NLA_U32 },
2633 [IPVS_DEST_ATTR_STATS] = { .type = NLA_NESTED },
2634};
2635
2636static int ip_vs_genl_fill_stats(struct sk_buff *skb, int container_type,
2637 struct ip_vs_stats *stats)
2638{
2639 struct nlattr *nl_stats = nla_nest_start(skb, container_type);
2640 if (!nl_stats)
2641 return -EMSGSIZE;
2642
2643 spin_lock_bh(&stats->lock);
2644
Sven Wegenere9c0ce22008-09-08 13:39:04 +02002645 NLA_PUT_U32(skb, IPVS_STATS_ATTR_CONNS, stats->ustats.conns);
2646 NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPKTS, stats->ustats.inpkts);
2647 NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPKTS, stats->ustats.outpkts);
2648 NLA_PUT_U64(skb, IPVS_STATS_ATTR_INBYTES, stats->ustats.inbytes);
2649 NLA_PUT_U64(skb, IPVS_STATS_ATTR_OUTBYTES, stats->ustats.outbytes);
2650 NLA_PUT_U32(skb, IPVS_STATS_ATTR_CPS, stats->ustats.cps);
2651 NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPPS, stats->ustats.inpps);
2652 NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPPS, stats->ustats.outpps);
2653 NLA_PUT_U32(skb, IPVS_STATS_ATTR_INBPS, stats->ustats.inbps);
2654 NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTBPS, stats->ustats.outbps);
Julius Volz9a812192008-08-14 14:08:44 +02002655
2656 spin_unlock_bh(&stats->lock);
2657
2658 nla_nest_end(skb, nl_stats);
2659
2660 return 0;
2661
2662nla_put_failure:
2663 spin_unlock_bh(&stats->lock);
2664 nla_nest_cancel(skb, nl_stats);
2665 return -EMSGSIZE;
2666}
2667
2668static int ip_vs_genl_fill_service(struct sk_buff *skb,
2669 struct ip_vs_service *svc)
2670{
2671 struct nlattr *nl_service;
2672 struct ip_vs_flags flags = { .flags = svc->flags,
2673 .mask = ~0 };
2674
2675 nl_service = nla_nest_start(skb, IPVS_CMD_ATTR_SERVICE);
2676 if (!nl_service)
2677 return -EMSGSIZE;
2678
Julius Volzf94fd042008-09-02 15:55:55 +02002679 NLA_PUT_U16(skb, IPVS_SVC_ATTR_AF, svc->af);
Julius Volz9a812192008-08-14 14:08:44 +02002680
2681 if (svc->fwmark) {
2682 NLA_PUT_U32(skb, IPVS_SVC_ATTR_FWMARK, svc->fwmark);
2683 } else {
2684 NLA_PUT_U16(skb, IPVS_SVC_ATTR_PROTOCOL, svc->protocol);
2685 NLA_PUT(skb, IPVS_SVC_ATTR_ADDR, sizeof(svc->addr), &svc->addr);
2686 NLA_PUT_U16(skb, IPVS_SVC_ATTR_PORT, svc->port);
2687 }
2688
2689 NLA_PUT_STRING(skb, IPVS_SVC_ATTR_SCHED_NAME, svc->scheduler->name);
Simon Horman0d1e71b2010-08-22 21:37:54 +09002690 if (svc->pe)
2691 NLA_PUT_STRING(skb, IPVS_SVC_ATTR_PE_NAME, svc->pe->name);
Julius Volz9a812192008-08-14 14:08:44 +02002692 NLA_PUT(skb, IPVS_SVC_ATTR_FLAGS, sizeof(flags), &flags);
2693 NLA_PUT_U32(skb, IPVS_SVC_ATTR_TIMEOUT, svc->timeout / HZ);
2694 NLA_PUT_U32(skb, IPVS_SVC_ATTR_NETMASK, svc->netmask);
2695
2696 if (ip_vs_genl_fill_stats(skb, IPVS_SVC_ATTR_STATS, &svc->stats))
2697 goto nla_put_failure;
2698
2699 nla_nest_end(skb, nl_service);
2700
2701 return 0;
2702
2703nla_put_failure:
2704 nla_nest_cancel(skb, nl_service);
2705 return -EMSGSIZE;
2706}
2707
2708static int ip_vs_genl_dump_service(struct sk_buff *skb,
2709 struct ip_vs_service *svc,
2710 struct netlink_callback *cb)
2711{
2712 void *hdr;
2713
2714 hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
2715 &ip_vs_genl_family, NLM_F_MULTI,
2716 IPVS_CMD_NEW_SERVICE);
2717 if (!hdr)
2718 return -EMSGSIZE;
2719
2720 if (ip_vs_genl_fill_service(skb, svc) < 0)
2721 goto nla_put_failure;
2722
2723 return genlmsg_end(skb, hdr);
2724
2725nla_put_failure:
2726 genlmsg_cancel(skb, hdr);
2727 return -EMSGSIZE;
2728}
2729
2730static int ip_vs_genl_dump_services(struct sk_buff *skb,
2731 struct netlink_callback *cb)
2732{
2733 int idx = 0, i;
2734 int start = cb->args[0];
2735 struct ip_vs_service *svc;
Hans Schillstromfc723252011-01-03 14:44:43 +01002736 struct net *net = skb_sknet(skb);
Julius Volz9a812192008-08-14 14:08:44 +02002737
2738 mutex_lock(&__ip_vs_mutex);
2739 for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
2740 list_for_each_entry(svc, &ip_vs_svc_table[i], s_list) {
Hans Schillstromfc723252011-01-03 14:44:43 +01002741 if (++idx <= start || !net_eq(svc->net, net))
Julius Volz9a812192008-08-14 14:08:44 +02002742 continue;
2743 if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
2744 idx--;
2745 goto nla_put_failure;
2746 }
2747 }
2748 }
2749
2750 for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
2751 list_for_each_entry(svc, &ip_vs_svc_fwm_table[i], f_list) {
Hans Schillstromfc723252011-01-03 14:44:43 +01002752 if (++idx <= start || !net_eq(svc->net, net))
Julius Volz9a812192008-08-14 14:08:44 +02002753 continue;
2754 if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
2755 idx--;
2756 goto nla_put_failure;
2757 }
2758 }
2759 }
2760
2761nla_put_failure:
2762 mutex_unlock(&__ip_vs_mutex);
2763 cb->args[0] = idx;
2764
2765 return skb->len;
2766}
2767
Hans Schillstromfc723252011-01-03 14:44:43 +01002768static int ip_vs_genl_parse_service(struct net *net,
2769 struct ip_vs_service_user_kern *usvc,
Julian Anastasov26c15cf2010-09-21 18:12:30 +02002770 struct nlattr *nla, int full_entry,
2771 struct ip_vs_service **ret_svc)
Julius Volz9a812192008-08-14 14:08:44 +02002772{
2773 struct nlattr *attrs[IPVS_SVC_ATTR_MAX + 1];
2774 struct nlattr *nla_af, *nla_port, *nla_fwmark, *nla_protocol, *nla_addr;
Julian Anastasov26c15cf2010-09-21 18:12:30 +02002775 struct ip_vs_service *svc;
Julius Volz9a812192008-08-14 14:08:44 +02002776
2777 /* Parse mandatory identifying service fields first */
2778 if (nla == NULL ||
2779 nla_parse_nested(attrs, IPVS_SVC_ATTR_MAX, nla, ip_vs_svc_policy))
2780 return -EINVAL;
2781
2782 nla_af = attrs[IPVS_SVC_ATTR_AF];
2783 nla_protocol = attrs[IPVS_SVC_ATTR_PROTOCOL];
2784 nla_addr = attrs[IPVS_SVC_ATTR_ADDR];
2785 nla_port = attrs[IPVS_SVC_ATTR_PORT];
2786 nla_fwmark = attrs[IPVS_SVC_ATTR_FWMARK];
2787
2788 if (!(nla_af && (nla_fwmark || (nla_port && nla_protocol && nla_addr))))
2789 return -EINVAL;
2790
Simon Horman258c8892009-12-15 17:01:25 +01002791 memset(usvc, 0, sizeof(*usvc));
2792
Julius Volzc860c6b2008-09-02 15:55:36 +02002793 usvc->af = nla_get_u16(nla_af);
Julius Volzf94fd042008-09-02 15:55:55 +02002794#ifdef CONFIG_IP_VS_IPV6
2795 if (usvc->af != AF_INET && usvc->af != AF_INET6)
2796#else
2797 if (usvc->af != AF_INET)
2798#endif
Julius Volz9a812192008-08-14 14:08:44 +02002799 return -EAFNOSUPPORT;
2800
2801 if (nla_fwmark) {
2802 usvc->protocol = IPPROTO_TCP;
2803 usvc->fwmark = nla_get_u32(nla_fwmark);
2804 } else {
2805 usvc->protocol = nla_get_u16(nla_protocol);
2806 nla_memcpy(&usvc->addr, nla_addr, sizeof(usvc->addr));
2807 usvc->port = nla_get_u16(nla_port);
2808 usvc->fwmark = 0;
2809 }
2810
Julian Anastasov26c15cf2010-09-21 18:12:30 +02002811 if (usvc->fwmark)
Hans Schillstromfc723252011-01-03 14:44:43 +01002812 svc = __ip_vs_svc_fwm_find(net, usvc->af, usvc->fwmark);
Julian Anastasov26c15cf2010-09-21 18:12:30 +02002813 else
Hans Schillstromfc723252011-01-03 14:44:43 +01002814 svc = __ip_vs_service_find(net, usvc->af, usvc->protocol,
Julian Anastasov26c15cf2010-09-21 18:12:30 +02002815 &usvc->addr, usvc->port);
2816 *ret_svc = svc;
2817
Julius Volz9a812192008-08-14 14:08:44 +02002818 /* If a full entry was requested, check for the additional fields */
2819 if (full_entry) {
Simon Horman0d1e71b2010-08-22 21:37:54 +09002820 struct nlattr *nla_sched, *nla_flags, *nla_pe, *nla_timeout,
Julius Volz9a812192008-08-14 14:08:44 +02002821 *nla_netmask;
2822 struct ip_vs_flags flags;
Julius Volz9a812192008-08-14 14:08:44 +02002823
2824 nla_sched = attrs[IPVS_SVC_ATTR_SCHED_NAME];
Simon Horman0d1e71b2010-08-22 21:37:54 +09002825 nla_pe = attrs[IPVS_SVC_ATTR_PE_NAME];
Julius Volz9a812192008-08-14 14:08:44 +02002826 nla_flags = attrs[IPVS_SVC_ATTR_FLAGS];
2827 nla_timeout = attrs[IPVS_SVC_ATTR_TIMEOUT];
2828 nla_netmask = attrs[IPVS_SVC_ATTR_NETMASK];
2829
2830 if (!(nla_sched && nla_flags && nla_timeout && nla_netmask))
2831 return -EINVAL;
2832
2833 nla_memcpy(&flags, nla_flags, sizeof(flags));
2834
2835 /* prefill flags from service if it already exists */
Julian Anastasov26c15cf2010-09-21 18:12:30 +02002836 if (svc)
Julius Volz9a812192008-08-14 14:08:44 +02002837 usvc->flags = svc->flags;
Julius Volz9a812192008-08-14 14:08:44 +02002838
2839 /* set new flags from userland */
2840 usvc->flags = (usvc->flags & ~flags.mask) |
2841 (flags.flags & flags.mask);
Julius Volzc860c6b2008-09-02 15:55:36 +02002842 usvc->sched_name = nla_data(nla_sched);
Simon Horman0d1e71b2010-08-22 21:37:54 +09002843 usvc->pe_name = nla_pe ? nla_data(nla_pe) : NULL;
Julius Volz9a812192008-08-14 14:08:44 +02002844 usvc->timeout = nla_get_u32(nla_timeout);
2845 usvc->netmask = nla_get_u32(nla_netmask);
2846 }
2847
2848 return 0;
2849}
2850
Hans Schillstromfc723252011-01-03 14:44:43 +01002851static struct ip_vs_service *ip_vs_genl_find_service(struct net *net,
2852 struct nlattr *nla)
Julius Volz9a812192008-08-14 14:08:44 +02002853{
Julius Volzc860c6b2008-09-02 15:55:36 +02002854 struct ip_vs_service_user_kern usvc;
Julian Anastasov26c15cf2010-09-21 18:12:30 +02002855 struct ip_vs_service *svc;
Julius Volz9a812192008-08-14 14:08:44 +02002856 int ret;
2857
Hans Schillstromfc723252011-01-03 14:44:43 +01002858 ret = ip_vs_genl_parse_service(net, &usvc, nla, 0, &svc);
Julian Anastasov26c15cf2010-09-21 18:12:30 +02002859 return ret ? ERR_PTR(ret) : svc;
Julius Volz9a812192008-08-14 14:08:44 +02002860}
2861
2862static int ip_vs_genl_fill_dest(struct sk_buff *skb, struct ip_vs_dest *dest)
2863{
2864 struct nlattr *nl_dest;
2865
2866 nl_dest = nla_nest_start(skb, IPVS_CMD_ATTR_DEST);
2867 if (!nl_dest)
2868 return -EMSGSIZE;
2869
2870 NLA_PUT(skb, IPVS_DEST_ATTR_ADDR, sizeof(dest->addr), &dest->addr);
2871 NLA_PUT_U16(skb, IPVS_DEST_ATTR_PORT, dest->port);
2872
2873 NLA_PUT_U32(skb, IPVS_DEST_ATTR_FWD_METHOD,
2874 atomic_read(&dest->conn_flags) & IP_VS_CONN_F_FWD_MASK);
2875 NLA_PUT_U32(skb, IPVS_DEST_ATTR_WEIGHT, atomic_read(&dest->weight));
2876 NLA_PUT_U32(skb, IPVS_DEST_ATTR_U_THRESH, dest->u_threshold);
2877 NLA_PUT_U32(skb, IPVS_DEST_ATTR_L_THRESH, dest->l_threshold);
2878 NLA_PUT_U32(skb, IPVS_DEST_ATTR_ACTIVE_CONNS,
2879 atomic_read(&dest->activeconns));
2880 NLA_PUT_U32(skb, IPVS_DEST_ATTR_INACT_CONNS,
2881 atomic_read(&dest->inactconns));
2882 NLA_PUT_U32(skb, IPVS_DEST_ATTR_PERSIST_CONNS,
2883 atomic_read(&dest->persistconns));
2884
2885 if (ip_vs_genl_fill_stats(skb, IPVS_DEST_ATTR_STATS, &dest->stats))
2886 goto nla_put_failure;
2887
2888 nla_nest_end(skb, nl_dest);
2889
2890 return 0;
2891
2892nla_put_failure:
2893 nla_nest_cancel(skb, nl_dest);
2894 return -EMSGSIZE;
2895}
2896
2897static int ip_vs_genl_dump_dest(struct sk_buff *skb, struct ip_vs_dest *dest,
2898 struct netlink_callback *cb)
2899{
2900 void *hdr;
2901
2902 hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
2903 &ip_vs_genl_family, NLM_F_MULTI,
2904 IPVS_CMD_NEW_DEST);
2905 if (!hdr)
2906 return -EMSGSIZE;
2907
2908 if (ip_vs_genl_fill_dest(skb, dest) < 0)
2909 goto nla_put_failure;
2910
2911 return genlmsg_end(skb, hdr);
2912
2913nla_put_failure:
2914 genlmsg_cancel(skb, hdr);
2915 return -EMSGSIZE;
2916}
2917
2918static int ip_vs_genl_dump_dests(struct sk_buff *skb,
2919 struct netlink_callback *cb)
2920{
2921 int idx = 0;
2922 int start = cb->args[0];
2923 struct ip_vs_service *svc;
2924 struct ip_vs_dest *dest;
2925 struct nlattr *attrs[IPVS_CMD_ATTR_MAX + 1];
Hans Schillstromfc723252011-01-03 14:44:43 +01002926 struct net *net;
Julius Volz9a812192008-08-14 14:08:44 +02002927
2928 mutex_lock(&__ip_vs_mutex);
2929
2930 /* Try to find the service for which to dump destinations */
2931 if (nlmsg_parse(cb->nlh, GENL_HDRLEN, attrs,
2932 IPVS_CMD_ATTR_MAX, ip_vs_cmd_policy))
2933 goto out_err;
2934
Hans Schillstromfc723252011-01-03 14:44:43 +01002935 net = skb_sknet(skb);
2936 svc = ip_vs_genl_find_service(net, attrs[IPVS_CMD_ATTR_SERVICE]);
Julius Volz9a812192008-08-14 14:08:44 +02002937 if (IS_ERR(svc) || svc == NULL)
2938 goto out_err;
2939
2940 /* Dump the destinations */
2941 list_for_each_entry(dest, &svc->destinations, n_list) {
2942 if (++idx <= start)
2943 continue;
2944 if (ip_vs_genl_dump_dest(skb, dest, cb) < 0) {
2945 idx--;
2946 goto nla_put_failure;
2947 }
2948 }
2949
2950nla_put_failure:
2951 cb->args[0] = idx;
Julius Volz9a812192008-08-14 14:08:44 +02002952
2953out_err:
2954 mutex_unlock(&__ip_vs_mutex);
2955
2956 return skb->len;
2957}
2958
Julius Volzc860c6b2008-09-02 15:55:36 +02002959static int ip_vs_genl_parse_dest(struct ip_vs_dest_user_kern *udest,
Julius Volz9a812192008-08-14 14:08:44 +02002960 struct nlattr *nla, int full_entry)
2961{
2962 struct nlattr *attrs[IPVS_DEST_ATTR_MAX + 1];
2963 struct nlattr *nla_addr, *nla_port;
2964
2965 /* Parse mandatory identifying destination fields first */
2966 if (nla == NULL ||
2967 nla_parse_nested(attrs, IPVS_DEST_ATTR_MAX, nla, ip_vs_dest_policy))
2968 return -EINVAL;
2969
2970 nla_addr = attrs[IPVS_DEST_ATTR_ADDR];
2971 nla_port = attrs[IPVS_DEST_ATTR_PORT];
2972
2973 if (!(nla_addr && nla_port))
2974 return -EINVAL;
2975
Simon Horman258c8892009-12-15 17:01:25 +01002976 memset(udest, 0, sizeof(*udest));
2977
Julius Volz9a812192008-08-14 14:08:44 +02002978 nla_memcpy(&udest->addr, nla_addr, sizeof(udest->addr));
2979 udest->port = nla_get_u16(nla_port);
2980
2981 /* If a full entry was requested, check for the additional fields */
2982 if (full_entry) {
2983 struct nlattr *nla_fwd, *nla_weight, *nla_u_thresh,
2984 *nla_l_thresh;
2985
2986 nla_fwd = attrs[IPVS_DEST_ATTR_FWD_METHOD];
2987 nla_weight = attrs[IPVS_DEST_ATTR_WEIGHT];
2988 nla_u_thresh = attrs[IPVS_DEST_ATTR_U_THRESH];
2989 nla_l_thresh = attrs[IPVS_DEST_ATTR_L_THRESH];
2990
2991 if (!(nla_fwd && nla_weight && nla_u_thresh && nla_l_thresh))
2992 return -EINVAL;
2993
2994 udest->conn_flags = nla_get_u32(nla_fwd)
2995 & IP_VS_CONN_F_FWD_MASK;
2996 udest->weight = nla_get_u32(nla_weight);
2997 udest->u_threshold = nla_get_u32(nla_u_thresh);
2998 udest->l_threshold = nla_get_u32(nla_l_thresh);
2999 }
3000
3001 return 0;
3002}
3003
3004static int ip_vs_genl_fill_daemon(struct sk_buff *skb, __be32 state,
3005 const char *mcast_ifn, __be32 syncid)
3006{
3007 struct nlattr *nl_daemon;
3008
3009 nl_daemon = nla_nest_start(skb, IPVS_CMD_ATTR_DAEMON);
3010 if (!nl_daemon)
3011 return -EMSGSIZE;
3012
3013 NLA_PUT_U32(skb, IPVS_DAEMON_ATTR_STATE, state);
3014 NLA_PUT_STRING(skb, IPVS_DAEMON_ATTR_MCAST_IFN, mcast_ifn);
3015 NLA_PUT_U32(skb, IPVS_DAEMON_ATTR_SYNC_ID, syncid);
3016
3017 nla_nest_end(skb, nl_daemon);
3018
3019 return 0;
3020
3021nla_put_failure:
3022 nla_nest_cancel(skb, nl_daemon);
3023 return -EMSGSIZE;
3024}
3025
3026static int ip_vs_genl_dump_daemon(struct sk_buff *skb, __be32 state,
3027 const char *mcast_ifn, __be32 syncid,
3028 struct netlink_callback *cb)
3029{
3030 void *hdr;
3031 hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
3032 &ip_vs_genl_family, NLM_F_MULTI,
3033 IPVS_CMD_NEW_DAEMON);
3034 if (!hdr)
3035 return -EMSGSIZE;
3036
3037 if (ip_vs_genl_fill_daemon(skb, state, mcast_ifn, syncid))
3038 goto nla_put_failure;
3039
3040 return genlmsg_end(skb, hdr);
3041
3042nla_put_failure:
3043 genlmsg_cancel(skb, hdr);
3044 return -EMSGSIZE;
3045}
3046
3047static int ip_vs_genl_dump_daemons(struct sk_buff *skb,
3048 struct netlink_callback *cb)
3049{
3050 mutex_lock(&__ip_vs_mutex);
3051 if ((ip_vs_sync_state & IP_VS_STATE_MASTER) && !cb->args[0]) {
3052 if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_MASTER,
3053 ip_vs_master_mcast_ifn,
3054 ip_vs_master_syncid, cb) < 0)
3055 goto nla_put_failure;
3056
3057 cb->args[0] = 1;
3058 }
3059
3060 if ((ip_vs_sync_state & IP_VS_STATE_BACKUP) && !cb->args[1]) {
3061 if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_BACKUP,
3062 ip_vs_backup_mcast_ifn,
3063 ip_vs_backup_syncid, cb) < 0)
3064 goto nla_put_failure;
3065
3066 cb->args[1] = 1;
3067 }
3068
3069nla_put_failure:
3070 mutex_unlock(&__ip_vs_mutex);
3071
3072 return skb->len;
3073}
3074
3075static int ip_vs_genl_new_daemon(struct nlattr **attrs)
3076{
3077 if (!(attrs[IPVS_DAEMON_ATTR_STATE] &&
3078 attrs[IPVS_DAEMON_ATTR_MCAST_IFN] &&
3079 attrs[IPVS_DAEMON_ATTR_SYNC_ID]))
3080 return -EINVAL;
3081
3082 return start_sync_thread(nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]),
3083 nla_data(attrs[IPVS_DAEMON_ATTR_MCAST_IFN]),
3084 nla_get_u32(attrs[IPVS_DAEMON_ATTR_SYNC_ID]));
3085}
3086
3087static int ip_vs_genl_del_daemon(struct nlattr **attrs)
3088{
3089 if (!attrs[IPVS_DAEMON_ATTR_STATE])
3090 return -EINVAL;
3091
3092 return stop_sync_thread(nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]));
3093}
3094
3095static int ip_vs_genl_set_config(struct nlattr **attrs)
3096{
3097 struct ip_vs_timeout_user t;
3098
3099 __ip_vs_get_timeouts(&t);
3100
3101 if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP])
3102 t.tcp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP]);
3103
3104 if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN])
3105 t.tcp_fin_timeout =
3106 nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN]);
3107
3108 if (attrs[IPVS_CMD_ATTR_TIMEOUT_UDP])
3109 t.udp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_UDP]);
3110
3111 return ip_vs_set_timeout(&t);
3112}
3113
3114static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
3115{
3116 struct ip_vs_service *svc = NULL;
Julius Volzc860c6b2008-09-02 15:55:36 +02003117 struct ip_vs_service_user_kern usvc;
3118 struct ip_vs_dest_user_kern udest;
Julius Volz9a812192008-08-14 14:08:44 +02003119 int ret = 0, cmd;
3120 int need_full_svc = 0, need_full_dest = 0;
Hans Schillstromfc723252011-01-03 14:44:43 +01003121 struct net *net;
Julius Volz9a812192008-08-14 14:08:44 +02003122
Hans Schillstromfc723252011-01-03 14:44:43 +01003123 net = skb_sknet(skb);
Julius Volz9a812192008-08-14 14:08:44 +02003124 cmd = info->genlhdr->cmd;
3125
3126 mutex_lock(&__ip_vs_mutex);
3127
3128 if (cmd == IPVS_CMD_FLUSH) {
Hans Schillstromfc723252011-01-03 14:44:43 +01003129 ret = ip_vs_flush(net);
Julius Volz9a812192008-08-14 14:08:44 +02003130 goto out;
3131 } else if (cmd == IPVS_CMD_SET_CONFIG) {
3132 ret = ip_vs_genl_set_config(info->attrs);
3133 goto out;
3134 } else if (cmd == IPVS_CMD_NEW_DAEMON ||
3135 cmd == IPVS_CMD_DEL_DAEMON) {
3136
3137 struct nlattr *daemon_attrs[IPVS_DAEMON_ATTR_MAX + 1];
3138
3139 if (!info->attrs[IPVS_CMD_ATTR_DAEMON] ||
3140 nla_parse_nested(daemon_attrs, IPVS_DAEMON_ATTR_MAX,
3141 info->attrs[IPVS_CMD_ATTR_DAEMON],
3142 ip_vs_daemon_policy)) {
3143 ret = -EINVAL;
3144 goto out;
3145 }
3146
3147 if (cmd == IPVS_CMD_NEW_DAEMON)
3148 ret = ip_vs_genl_new_daemon(daemon_attrs);
3149 else
3150 ret = ip_vs_genl_del_daemon(daemon_attrs);
3151 goto out;
3152 } else if (cmd == IPVS_CMD_ZERO &&
3153 !info->attrs[IPVS_CMD_ATTR_SERVICE]) {
Hans Schillstromfc723252011-01-03 14:44:43 +01003154 ret = ip_vs_zero_all(net);
Julius Volz9a812192008-08-14 14:08:44 +02003155 goto out;
3156 }
3157
3158 /* All following commands require a service argument, so check if we
3159 * received a valid one. We need a full service specification when
3160 * adding / editing a service. Only identifying members otherwise. */
3161 if (cmd == IPVS_CMD_NEW_SERVICE || cmd == IPVS_CMD_SET_SERVICE)
3162 need_full_svc = 1;
3163
Hans Schillstromfc723252011-01-03 14:44:43 +01003164 ret = ip_vs_genl_parse_service(net, &usvc,
Julius Volz9a812192008-08-14 14:08:44 +02003165 info->attrs[IPVS_CMD_ATTR_SERVICE],
Julian Anastasov26c15cf2010-09-21 18:12:30 +02003166 need_full_svc, &svc);
Julius Volz9a812192008-08-14 14:08:44 +02003167 if (ret)
3168 goto out;
3169
Julius Volz9a812192008-08-14 14:08:44 +02003170 /* Unless we're adding a new service, the service must already exist */
3171 if ((cmd != IPVS_CMD_NEW_SERVICE) && (svc == NULL)) {
3172 ret = -ESRCH;
3173 goto out;
3174 }
3175
3176 /* Destination commands require a valid destination argument. For
3177 * adding / editing a destination, we need a full destination
3178 * specification. */
3179 if (cmd == IPVS_CMD_NEW_DEST || cmd == IPVS_CMD_SET_DEST ||
3180 cmd == IPVS_CMD_DEL_DEST) {
3181 if (cmd != IPVS_CMD_DEL_DEST)
3182 need_full_dest = 1;
3183
3184 ret = ip_vs_genl_parse_dest(&udest,
3185 info->attrs[IPVS_CMD_ATTR_DEST],
3186 need_full_dest);
3187 if (ret)
3188 goto out;
3189 }
3190
3191 switch (cmd) {
3192 case IPVS_CMD_NEW_SERVICE:
3193 if (svc == NULL)
Hans Schillstromfc723252011-01-03 14:44:43 +01003194 ret = ip_vs_add_service(net, &usvc, &svc);
Julius Volz9a812192008-08-14 14:08:44 +02003195 else
3196 ret = -EEXIST;
3197 break;
3198 case IPVS_CMD_SET_SERVICE:
3199 ret = ip_vs_edit_service(svc, &usvc);
3200 break;
3201 case IPVS_CMD_DEL_SERVICE:
3202 ret = ip_vs_del_service(svc);
Julian Anastasov26c15cf2010-09-21 18:12:30 +02003203 /* do not use svc, it can be freed */
Julius Volz9a812192008-08-14 14:08:44 +02003204 break;
3205 case IPVS_CMD_NEW_DEST:
3206 ret = ip_vs_add_dest(svc, &udest);
3207 break;
3208 case IPVS_CMD_SET_DEST:
3209 ret = ip_vs_edit_dest(svc, &udest);
3210 break;
3211 case IPVS_CMD_DEL_DEST:
3212 ret = ip_vs_del_dest(svc, &udest);
3213 break;
3214 case IPVS_CMD_ZERO:
3215 ret = ip_vs_zero_service(svc);
3216 break;
3217 default:
3218 ret = -EINVAL;
3219 }
3220
3221out:
Julius Volz9a812192008-08-14 14:08:44 +02003222 mutex_unlock(&__ip_vs_mutex);
3223
3224 return ret;
3225}
3226
3227static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info)
3228{
3229 struct sk_buff *msg;
3230 void *reply;
3231 int ret, cmd, reply_cmd;
Hans Schillstromfc723252011-01-03 14:44:43 +01003232 struct net *net;
Julius Volz9a812192008-08-14 14:08:44 +02003233
Hans Schillstromfc723252011-01-03 14:44:43 +01003234 net = skb_sknet(skb);
Julius Volz9a812192008-08-14 14:08:44 +02003235 cmd = info->genlhdr->cmd;
3236
3237 if (cmd == IPVS_CMD_GET_SERVICE)
3238 reply_cmd = IPVS_CMD_NEW_SERVICE;
3239 else if (cmd == IPVS_CMD_GET_INFO)
3240 reply_cmd = IPVS_CMD_SET_INFO;
3241 else if (cmd == IPVS_CMD_GET_CONFIG)
3242 reply_cmd = IPVS_CMD_SET_CONFIG;
3243 else {
Hannes Eder1e3e2382009-08-02 11:05:41 +00003244 pr_err("unknown Generic Netlink command\n");
Julius Volz9a812192008-08-14 14:08:44 +02003245 return -EINVAL;
3246 }
3247
3248 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
3249 if (!msg)
3250 return -ENOMEM;
3251
3252 mutex_lock(&__ip_vs_mutex);
3253
3254 reply = genlmsg_put_reply(msg, info, &ip_vs_genl_family, 0, reply_cmd);
3255 if (reply == NULL)
3256 goto nla_put_failure;
3257
3258 switch (cmd) {
3259 case IPVS_CMD_GET_SERVICE:
3260 {
3261 struct ip_vs_service *svc;
3262
Hans Schillstromfc723252011-01-03 14:44:43 +01003263 svc = ip_vs_genl_find_service(net,
3264 info->attrs[IPVS_CMD_ATTR_SERVICE]);
Julius Volz9a812192008-08-14 14:08:44 +02003265 if (IS_ERR(svc)) {
3266 ret = PTR_ERR(svc);
3267 goto out_err;
3268 } else if (svc) {
3269 ret = ip_vs_genl_fill_service(msg, svc);
Julius Volz9a812192008-08-14 14:08:44 +02003270 if (ret)
3271 goto nla_put_failure;
3272 } else {
3273 ret = -ESRCH;
3274 goto out_err;
3275 }
3276
3277 break;
3278 }
3279
3280 case IPVS_CMD_GET_CONFIG:
3281 {
3282 struct ip_vs_timeout_user t;
3283
3284 __ip_vs_get_timeouts(&t);
3285#ifdef CONFIG_IP_VS_PROTO_TCP
3286 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP, t.tcp_timeout);
3287 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP_FIN,
3288 t.tcp_fin_timeout);
3289#endif
3290#ifdef CONFIG_IP_VS_PROTO_UDP
3291 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_UDP, t.udp_timeout);
3292#endif
3293
3294 break;
3295 }
3296
3297 case IPVS_CMD_GET_INFO:
3298 NLA_PUT_U32(msg, IPVS_INFO_ATTR_VERSION, IP_VS_VERSION_CODE);
3299 NLA_PUT_U32(msg, IPVS_INFO_ATTR_CONN_TAB_SIZE,
Catalin(ux) M. BOIE6f7edb42010-01-05 05:50:24 +01003300 ip_vs_conn_tab_size);
Julius Volz9a812192008-08-14 14:08:44 +02003301 break;
3302 }
3303
3304 genlmsg_end(msg, reply);
Johannes Berg134e6372009-07-10 09:51:34 +00003305 ret = genlmsg_reply(msg, info);
Julius Volz9a812192008-08-14 14:08:44 +02003306 goto out;
3307
3308nla_put_failure:
Hannes Eder1e3e2382009-08-02 11:05:41 +00003309 pr_err("not enough space in Netlink message\n");
Julius Volz9a812192008-08-14 14:08:44 +02003310 ret = -EMSGSIZE;
3311
3312out_err:
3313 nlmsg_free(msg);
3314out:
3315 mutex_unlock(&__ip_vs_mutex);
3316
3317 return ret;
3318}
3319
3320
3321static struct genl_ops ip_vs_genl_ops[] __read_mostly = {
3322 {
3323 .cmd = IPVS_CMD_NEW_SERVICE,
3324 .flags = GENL_ADMIN_PERM,
3325 .policy = ip_vs_cmd_policy,
3326 .doit = ip_vs_genl_set_cmd,
3327 },
3328 {
3329 .cmd = IPVS_CMD_SET_SERVICE,
3330 .flags = GENL_ADMIN_PERM,
3331 .policy = ip_vs_cmd_policy,
3332 .doit = ip_vs_genl_set_cmd,
3333 },
3334 {
3335 .cmd = IPVS_CMD_DEL_SERVICE,
3336 .flags = GENL_ADMIN_PERM,
3337 .policy = ip_vs_cmd_policy,
3338 .doit = ip_vs_genl_set_cmd,
3339 },
3340 {
3341 .cmd = IPVS_CMD_GET_SERVICE,
3342 .flags = GENL_ADMIN_PERM,
3343 .doit = ip_vs_genl_get_cmd,
3344 .dumpit = ip_vs_genl_dump_services,
3345 .policy = ip_vs_cmd_policy,
3346 },
3347 {
3348 .cmd = IPVS_CMD_NEW_DEST,
3349 .flags = GENL_ADMIN_PERM,
3350 .policy = ip_vs_cmd_policy,
3351 .doit = ip_vs_genl_set_cmd,
3352 },
3353 {
3354 .cmd = IPVS_CMD_SET_DEST,
3355 .flags = GENL_ADMIN_PERM,
3356 .policy = ip_vs_cmd_policy,
3357 .doit = ip_vs_genl_set_cmd,
3358 },
3359 {
3360 .cmd = IPVS_CMD_DEL_DEST,
3361 .flags = GENL_ADMIN_PERM,
3362 .policy = ip_vs_cmd_policy,
3363 .doit = ip_vs_genl_set_cmd,
3364 },
3365 {
3366 .cmd = IPVS_CMD_GET_DEST,
3367 .flags = GENL_ADMIN_PERM,
3368 .policy = ip_vs_cmd_policy,
3369 .dumpit = ip_vs_genl_dump_dests,
3370 },
3371 {
3372 .cmd = IPVS_CMD_NEW_DAEMON,
3373 .flags = GENL_ADMIN_PERM,
3374 .policy = ip_vs_cmd_policy,
3375 .doit = ip_vs_genl_set_cmd,
3376 },
3377 {
3378 .cmd = IPVS_CMD_DEL_DAEMON,
3379 .flags = GENL_ADMIN_PERM,
3380 .policy = ip_vs_cmd_policy,
3381 .doit = ip_vs_genl_set_cmd,
3382 },
3383 {
3384 .cmd = IPVS_CMD_GET_DAEMON,
3385 .flags = GENL_ADMIN_PERM,
3386 .dumpit = ip_vs_genl_dump_daemons,
3387 },
3388 {
3389 .cmd = IPVS_CMD_SET_CONFIG,
3390 .flags = GENL_ADMIN_PERM,
3391 .policy = ip_vs_cmd_policy,
3392 .doit = ip_vs_genl_set_cmd,
3393 },
3394 {
3395 .cmd = IPVS_CMD_GET_CONFIG,
3396 .flags = GENL_ADMIN_PERM,
3397 .doit = ip_vs_genl_get_cmd,
3398 },
3399 {
3400 .cmd = IPVS_CMD_GET_INFO,
3401 .flags = GENL_ADMIN_PERM,
3402 .doit = ip_vs_genl_get_cmd,
3403 },
3404 {
3405 .cmd = IPVS_CMD_ZERO,
3406 .flags = GENL_ADMIN_PERM,
3407 .policy = ip_vs_cmd_policy,
3408 .doit = ip_vs_genl_set_cmd,
3409 },
3410 {
3411 .cmd = IPVS_CMD_FLUSH,
3412 .flags = GENL_ADMIN_PERM,
3413 .doit = ip_vs_genl_set_cmd,
3414 },
3415};
3416
3417static int __init ip_vs_genl_register(void)
3418{
Michał Mirosław8f698d52009-05-21 10:34:05 +00003419 return genl_register_family_with_ops(&ip_vs_genl_family,
3420 ip_vs_genl_ops, ARRAY_SIZE(ip_vs_genl_ops));
Julius Volz9a812192008-08-14 14:08:44 +02003421}
3422
3423static void ip_vs_genl_unregister(void)
3424{
3425 genl_unregister_family(&ip_vs_genl_family);
3426}
3427
3428/* End of Generic Netlink interface definitions */
3429
Hans Schillstrom61b1ab42011-01-03 14:44:42 +01003430/*
3431 * per netns intit/exit func.
3432 */
3433int __net_init __ip_vs_control_init(struct net *net)
3434{
Hans Schillstromfc723252011-01-03 14:44:43 +01003435 int idx;
3436 struct netns_ipvs *ipvs = net_ipvs(net);
3437
Hans Schillstrom61b1ab42011-01-03 14:44:42 +01003438 if (!net_eq(net, &init_net)) /* netns not enabled yet */
3439 return -EPERM;
3440
Hans Schillstromfc723252011-01-03 14:44:43 +01003441 for (idx = 0; idx < IP_VS_RTAB_SIZE; idx++)
3442 INIT_LIST_HEAD(&ipvs->rs_table[idx]);
3443
Hans Schillstrom61b1ab42011-01-03 14:44:42 +01003444 proc_net_fops_create(net, "ip_vs", 0, &ip_vs_info_fops);
3445 proc_net_fops_create(net, "ip_vs_stats", 0, &ip_vs_stats_fops);
3446 sysctl_header = register_net_sysctl_table(net, net_vs_ctl_path,
3447 vs_vars);
3448 if (sysctl_header == NULL)
3449 goto err_reg;
3450 ip_vs_new_estimator(&ip_vs_stats);
3451 return 0;
3452
3453err_reg:
3454 return -ENOMEM;
3455}
3456
3457static void __net_exit __ip_vs_control_cleanup(struct net *net)
3458{
3459 if (!net_eq(net, &init_net)) /* netns not enabled yet */
3460 return;
3461
3462 ip_vs_kill_estimator(&ip_vs_stats);
3463 unregister_net_sysctl_table(sysctl_header);
3464 proc_net_remove(net, "ip_vs_stats");
3465 proc_net_remove(net, "ip_vs");
3466}
3467
3468static struct pernet_operations ipvs_control_ops = {
3469 .init = __ip_vs_control_init,
3470 .exit = __ip_vs_control_cleanup,
3471};
Linus Torvalds1da177e2005-04-16 15:20:36 -07003472
Sven Wegener048cf482008-08-10 18:24:35 +00003473int __init ip_vs_control_init(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003474{
Linus Torvalds1da177e2005-04-16 15:20:36 -07003475 int idx;
Hans Schillstromfc723252011-01-03 14:44:43 +01003476 int ret;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003477
3478 EnterFunction(2);
3479
Hans Schillstromfc723252011-01-03 14:44:43 +01003480 /* Initialize svc_table, ip_vs_svc_fwm_table, rs_table */
Eduardo Blancod86bef72010-10-19 10:26:47 +01003481 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
3482 INIT_LIST_HEAD(&ip_vs_svc_table[idx]);
3483 INIT_LIST_HEAD(&ip_vs_svc_fwm_table[idx]);
3484 }
Hans Schillstromfc723252011-01-03 14:44:43 +01003485
3486 ret = register_pernet_subsys(&ipvs_control_ops);
3487 if (ret) {
3488 pr_err("cannot register namespace.\n");
3489 goto err;
Eduardo Blancod86bef72010-10-19 10:26:47 +01003490 }
Hans Schillstromfc723252011-01-03 14:44:43 +01003491
3492 smp_wmb(); /* Do we really need it now ? */
Eduardo Blancod86bef72010-10-19 10:26:47 +01003493
Linus Torvalds1da177e2005-04-16 15:20:36 -07003494 ret = nf_register_sockopt(&ip_vs_sockopts);
3495 if (ret) {
Hannes Eder1e3e2382009-08-02 11:05:41 +00003496 pr_err("cannot register sockopt.\n");
Hans Schillstromfc723252011-01-03 14:44:43 +01003497 goto err_net;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003498 }
3499
Julius Volz9a812192008-08-14 14:08:44 +02003500 ret = ip_vs_genl_register();
3501 if (ret) {
Hannes Eder1e3e2382009-08-02 11:05:41 +00003502 pr_err("cannot register Generic Netlink interface.\n");
Julius Volz9a812192008-08-14 14:08:44 +02003503 nf_unregister_sockopt(&ip_vs_sockopts);
Hans Schillstromfc723252011-01-03 14:44:43 +01003504 goto err_net;
Julius Volz9a812192008-08-14 14:08:44 +02003505 }
3506
Linus Torvalds1da177e2005-04-16 15:20:36 -07003507 /* Hook the defense timer */
3508 schedule_delayed_work(&defense_work, DEFENSE_TIMER_PERIOD);
3509
3510 LeaveFunction(2);
3511 return 0;
Hans Schillstromfc723252011-01-03 14:44:43 +01003512
3513err_net:
3514 unregister_pernet_subsys(&ipvs_control_ops);
3515err:
3516 return ret;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003517}
3518
3519
3520void ip_vs_control_cleanup(void)
3521{
3522 EnterFunction(2);
3523 ip_vs_trash_cleanup();
Tejun Heoafe2c512010-12-14 16:21:17 +01003524 cancel_delayed_work_sync(&defense_work);
Oleg Nesterov28e53bd2007-05-09 02:34:22 -07003525 cancel_work_sync(&defense_work.work);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003526 ip_vs_kill_estimator(&ip_vs_stats);
Hans Schillstrom61b1ab42011-01-03 14:44:42 +01003527 unregister_pernet_subsys(&ipvs_control_ops);
Julius Volz9a812192008-08-14 14:08:44 +02003528 ip_vs_genl_unregister();
Linus Torvalds1da177e2005-04-16 15:20:36 -07003529 nf_unregister_sockopt(&ip_vs_sockopts);
3530 LeaveFunction(2);
3531}