blob: 640203a153c61c453ba4330655e98ffd7da90686 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * IPVS An implementation of the IP virtual server support for the
3 * LINUX operating system. IPVS is now implemented as a module
4 * over the NetFilter framework. IPVS can be used to build a
5 * high-performance and highly available server based on a
6 * cluster of servers.
7 *
Linus Torvalds1da177e2005-04-16 15:20:36 -07008 * Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
9 * Peter Kese <peter.kese@ijs.si>
10 * Julian Anastasov <ja@ssi.bg>
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version
15 * 2 of the License, or (at your option) any later version.
16 *
17 * Changes:
18 *
19 */
20
21#include <linux/module.h>
22#include <linux/init.h>
23#include <linux/types.h>
Randy Dunlap4fc268d2006-01-11 12:17:47 -080024#include <linux/capability.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070025#include <linux/fs.h>
26#include <linux/sysctl.h>
27#include <linux/proc_fs.h>
28#include <linux/workqueue.h>
29#include <linux/swap.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070030#include <linux/seq_file.h>
31
32#include <linux/netfilter.h>
33#include <linux/netfilter_ipv4.h>
Ingo Molnar14cc3e22006-03-26 01:37:14 -080034#include <linux/mutex.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070035
Eric W. Biederman457c4cb2007-09-12 12:01:34 +020036#include <net/net_namespace.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070037#include <net/ip.h>
Vince Busam09571c72008-09-02 15:55:52 +020038#ifdef CONFIG_IP_VS_IPV6
39#include <net/ipv6.h>
40#include <net/ip6_route.h>
41#endif
Arnaldo Carvalho de Melo14c85022005-12-27 02:43:12 -020042#include <net/route.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070043#include <net/sock.h>
Julius Volz9a812192008-08-14 14:08:44 +020044#include <net/genetlink.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070045
46#include <asm/uaccess.h>
47
48#include <net/ip_vs.h>
49
50/* semaphore for IPVS sockopts. And, [gs]etsockopt may sleep. */
Ingo Molnar14cc3e22006-03-26 01:37:14 -080051static DEFINE_MUTEX(__ip_vs_mutex);
Linus Torvalds1da177e2005-04-16 15:20:36 -070052
53/* lock for service table */
54static DEFINE_RWLOCK(__ip_vs_svc_lock);
55
56/* lock for table with the real services */
57static DEFINE_RWLOCK(__ip_vs_rs_lock);
58
59/* lock for state and timeout tables */
60static DEFINE_RWLOCK(__ip_vs_securetcp_lock);
61
62/* lock for drop entry handling */
63static DEFINE_SPINLOCK(__ip_vs_dropentry_lock);
64
65/* lock for drop packet handling */
66static DEFINE_SPINLOCK(__ip_vs_droppacket_lock);
67
68/* 1/rate drop and drop-entry variables */
69int ip_vs_drop_rate = 0;
70int ip_vs_drop_counter = 0;
71static atomic_t ip_vs_dropentry = ATOMIC_INIT(0);
72
73/* number of virtual services */
74static int ip_vs_num_services = 0;
75
76/* sysctl variables */
77static int sysctl_ip_vs_drop_entry = 0;
78static int sysctl_ip_vs_drop_packet = 0;
79static int sysctl_ip_vs_secure_tcp = 0;
80static int sysctl_ip_vs_amemthresh = 1024;
81static int sysctl_ip_vs_am_droprate = 10;
82int sysctl_ip_vs_cache_bypass = 0;
83int sysctl_ip_vs_expire_nodest_conn = 0;
84int sysctl_ip_vs_expire_quiescent_template = 0;
85int sysctl_ip_vs_sync_threshold[2] = { 3, 50 };
86int sysctl_ip_vs_nat_icmp_send = 0;
87
88
89#ifdef CONFIG_IP_VS_DEBUG
90static int sysctl_ip_vs_debug_level = 0;
91
92int ip_vs_get_debug_level(void)
93{
94 return sysctl_ip_vs_debug_level;
95}
96#endif
97
Vince Busam09571c72008-09-02 15:55:52 +020098#ifdef CONFIG_IP_VS_IPV6
99/* Taken from rt6_fill_node() in net/ipv6/route.c, is there a better way? */
100static int __ip_vs_addr_is_local_v6(const struct in6_addr *addr)
101{
102 struct rt6_info *rt;
103 struct flowi fl = {
104 .oif = 0,
105 .nl_u = {
106 .ip6_u = {
107 .daddr = *addr,
108 .saddr = { .s6_addr32 = {0, 0, 0, 0} }, } },
109 };
110
111 rt = (struct rt6_info *)ip6_route_output(&init_net, NULL, &fl);
112 if (rt && rt->rt6i_dev && (rt->rt6i_dev->flags & IFF_LOOPBACK))
113 return 1;
114
115 return 0;
116}
117#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -0700118/*
Julian Anastasovaf9debd2005-07-11 20:59:57 -0700119 * update_defense_level is called from keventd and from sysctl,
120 * so it needs to protect itself from softirqs
Linus Torvalds1da177e2005-04-16 15:20:36 -0700121 */
122static void update_defense_level(void)
123{
124 struct sysinfo i;
125 static int old_secure_tcp = 0;
126 int availmem;
127 int nomem;
128 int to_change = -1;
129
130 /* we only count free and buffered memory (in pages) */
131 si_meminfo(&i);
132 availmem = i.freeram + i.bufferram;
133 /* however in linux 2.5 the i.bufferram is total page cache size,
134 we need adjust it */
135 /* si_swapinfo(&i); */
136 /* availmem = availmem - (i.totalswap - i.freeswap); */
137
138 nomem = (availmem < sysctl_ip_vs_amemthresh);
139
Julian Anastasovaf9debd2005-07-11 20:59:57 -0700140 local_bh_disable();
141
Linus Torvalds1da177e2005-04-16 15:20:36 -0700142 /* drop_entry */
143 spin_lock(&__ip_vs_dropentry_lock);
144 switch (sysctl_ip_vs_drop_entry) {
145 case 0:
146 atomic_set(&ip_vs_dropentry, 0);
147 break;
148 case 1:
149 if (nomem) {
150 atomic_set(&ip_vs_dropentry, 1);
151 sysctl_ip_vs_drop_entry = 2;
152 } else {
153 atomic_set(&ip_vs_dropentry, 0);
154 }
155 break;
156 case 2:
157 if (nomem) {
158 atomic_set(&ip_vs_dropentry, 1);
159 } else {
160 atomic_set(&ip_vs_dropentry, 0);
161 sysctl_ip_vs_drop_entry = 1;
162 };
163 break;
164 case 3:
165 atomic_set(&ip_vs_dropentry, 1);
166 break;
167 }
168 spin_unlock(&__ip_vs_dropentry_lock);
169
170 /* drop_packet */
171 spin_lock(&__ip_vs_droppacket_lock);
172 switch (sysctl_ip_vs_drop_packet) {
173 case 0:
174 ip_vs_drop_rate = 0;
175 break;
176 case 1:
177 if (nomem) {
178 ip_vs_drop_rate = ip_vs_drop_counter
179 = sysctl_ip_vs_amemthresh /
180 (sysctl_ip_vs_amemthresh-availmem);
181 sysctl_ip_vs_drop_packet = 2;
182 } else {
183 ip_vs_drop_rate = 0;
184 }
185 break;
186 case 2:
187 if (nomem) {
188 ip_vs_drop_rate = ip_vs_drop_counter
189 = sysctl_ip_vs_amemthresh /
190 (sysctl_ip_vs_amemthresh-availmem);
191 } else {
192 ip_vs_drop_rate = 0;
193 sysctl_ip_vs_drop_packet = 1;
194 }
195 break;
196 case 3:
197 ip_vs_drop_rate = sysctl_ip_vs_am_droprate;
198 break;
199 }
200 spin_unlock(&__ip_vs_droppacket_lock);
201
202 /* secure_tcp */
203 write_lock(&__ip_vs_securetcp_lock);
204 switch (sysctl_ip_vs_secure_tcp) {
205 case 0:
206 if (old_secure_tcp >= 2)
207 to_change = 0;
208 break;
209 case 1:
210 if (nomem) {
211 if (old_secure_tcp < 2)
212 to_change = 1;
213 sysctl_ip_vs_secure_tcp = 2;
214 } else {
215 if (old_secure_tcp >= 2)
216 to_change = 0;
217 }
218 break;
219 case 2:
220 if (nomem) {
221 if (old_secure_tcp < 2)
222 to_change = 1;
223 } else {
224 if (old_secure_tcp >= 2)
225 to_change = 0;
226 sysctl_ip_vs_secure_tcp = 1;
227 }
228 break;
229 case 3:
230 if (old_secure_tcp < 2)
231 to_change = 1;
232 break;
233 }
234 old_secure_tcp = sysctl_ip_vs_secure_tcp;
235 if (to_change >= 0)
236 ip_vs_protocol_timeout_change(sysctl_ip_vs_secure_tcp>1);
237 write_unlock(&__ip_vs_securetcp_lock);
Julian Anastasovaf9debd2005-07-11 20:59:57 -0700238
239 local_bh_enable();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700240}
241
242
243/*
244 * Timer for checking the defense
245 */
246#define DEFENSE_TIMER_PERIOD 1*HZ
David Howellsc4028952006-11-22 14:57:56 +0000247static void defense_work_handler(struct work_struct *work);
248static DECLARE_DELAYED_WORK(defense_work, defense_work_handler);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700249
David Howellsc4028952006-11-22 14:57:56 +0000250static void defense_work_handler(struct work_struct *work)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700251{
252 update_defense_level();
253 if (atomic_read(&ip_vs_dropentry))
254 ip_vs_random_dropentry();
255
256 schedule_delayed_work(&defense_work, DEFENSE_TIMER_PERIOD);
257}
258
259int
260ip_vs_use_count_inc(void)
261{
262 return try_module_get(THIS_MODULE);
263}
264
265void
266ip_vs_use_count_dec(void)
267{
268 module_put(THIS_MODULE);
269}
270
271
272/*
273 * Hash table: for virtual service lookups
274 */
275#define IP_VS_SVC_TAB_BITS 8
276#define IP_VS_SVC_TAB_SIZE (1 << IP_VS_SVC_TAB_BITS)
277#define IP_VS_SVC_TAB_MASK (IP_VS_SVC_TAB_SIZE - 1)
278
279/* the service table hashed by <protocol, addr, port> */
280static struct list_head ip_vs_svc_table[IP_VS_SVC_TAB_SIZE];
281/* the service table hashed by fwmark */
282static struct list_head ip_vs_svc_fwm_table[IP_VS_SVC_TAB_SIZE];
283
284/*
285 * Hash table: for real service lookups
286 */
287#define IP_VS_RTAB_BITS 4
288#define IP_VS_RTAB_SIZE (1 << IP_VS_RTAB_BITS)
289#define IP_VS_RTAB_MASK (IP_VS_RTAB_SIZE - 1)
290
291static struct list_head ip_vs_rtable[IP_VS_RTAB_SIZE];
292
293/*
294 * Trash for destinations
295 */
296static LIST_HEAD(ip_vs_dest_trash);
297
298/*
299 * FTP & NULL virtual service counters
300 */
301static atomic_t ip_vs_ftpsvc_counter = ATOMIC_INIT(0);
302static atomic_t ip_vs_nullsvc_counter = ATOMIC_INIT(0);
303
304
305/*
306 * Returns hash value for virtual service
307 */
308static __inline__ unsigned
Julius Volzb18610d2008-09-02 15:55:37 +0200309ip_vs_svc_hashkey(int af, unsigned proto, const union nf_inet_addr *addr,
310 __be16 port)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700311{
312 register unsigned porth = ntohs(port);
Julius Volzb18610d2008-09-02 15:55:37 +0200313 __be32 addr_fold = addr->ip;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700314
Julius Volzb18610d2008-09-02 15:55:37 +0200315#ifdef CONFIG_IP_VS_IPV6
316 if (af == AF_INET6)
317 addr_fold = addr->ip6[0]^addr->ip6[1]^
318 addr->ip6[2]^addr->ip6[3];
319#endif
320
321 return (proto^ntohl(addr_fold)^(porth>>IP_VS_SVC_TAB_BITS)^porth)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700322 & IP_VS_SVC_TAB_MASK;
323}
324
325/*
326 * Returns hash value of fwmark for virtual service lookup
327 */
328static __inline__ unsigned ip_vs_svc_fwm_hashkey(__u32 fwmark)
329{
330 return fwmark & IP_VS_SVC_TAB_MASK;
331}
332
333/*
334 * Hashes a service in the ip_vs_svc_table by <proto,addr,port>
335 * or in the ip_vs_svc_fwm_table by fwmark.
336 * Should be called with locked tables.
337 */
338static int ip_vs_svc_hash(struct ip_vs_service *svc)
339{
340 unsigned hash;
341
342 if (svc->flags & IP_VS_SVC_F_HASHED) {
343 IP_VS_ERR("ip_vs_svc_hash(): request for already hashed, "
344 "called from %p\n", __builtin_return_address(0));
345 return 0;
346 }
347
348 if (svc->fwmark == 0) {
349 /*
350 * Hash it by <protocol,addr,port> in ip_vs_svc_table
351 */
Julius Volzb18610d2008-09-02 15:55:37 +0200352 hash = ip_vs_svc_hashkey(svc->af, svc->protocol, &svc->addr,
Julius Volze7ade462008-09-02 15:55:33 +0200353 svc->port);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700354 list_add(&svc->s_list, &ip_vs_svc_table[hash]);
355 } else {
356 /*
357 * Hash it by fwmark in ip_vs_svc_fwm_table
358 */
359 hash = ip_vs_svc_fwm_hashkey(svc->fwmark);
360 list_add(&svc->f_list, &ip_vs_svc_fwm_table[hash]);
361 }
362
363 svc->flags |= IP_VS_SVC_F_HASHED;
364 /* increase its refcnt because it is referenced by the svc table */
365 atomic_inc(&svc->refcnt);
366 return 1;
367}
368
369
370/*
371 * Unhashes a service from ip_vs_svc_table/ip_vs_svc_fwm_table.
372 * Should be called with locked tables.
373 */
374static int ip_vs_svc_unhash(struct ip_vs_service *svc)
375{
376 if (!(svc->flags & IP_VS_SVC_F_HASHED)) {
377 IP_VS_ERR("ip_vs_svc_unhash(): request for unhash flagged, "
378 "called from %p\n", __builtin_return_address(0));
379 return 0;
380 }
381
382 if (svc->fwmark == 0) {
383 /* Remove it from the ip_vs_svc_table table */
384 list_del(&svc->s_list);
385 } else {
386 /* Remove it from the ip_vs_svc_fwm_table table */
387 list_del(&svc->f_list);
388 }
389
390 svc->flags &= ~IP_VS_SVC_F_HASHED;
391 atomic_dec(&svc->refcnt);
392 return 1;
393}
394
395
396/*
397 * Get service by {proto,addr,port} in the service table.
398 */
Julius Volzb18610d2008-09-02 15:55:37 +0200399static inline struct ip_vs_service *
400__ip_vs_service_get(int af, __u16 protocol, const union nf_inet_addr *vaddr,
401 __be16 vport)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700402{
403 unsigned hash;
404 struct ip_vs_service *svc;
405
406 /* Check for "full" addressed entries */
Julius Volzb18610d2008-09-02 15:55:37 +0200407 hash = ip_vs_svc_hashkey(af, protocol, vaddr, vport);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700408
409 list_for_each_entry(svc, &ip_vs_svc_table[hash], s_list){
Julius Volzb18610d2008-09-02 15:55:37 +0200410 if ((svc->af == af)
411 && ip_vs_addr_equal(af, &svc->addr, vaddr)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700412 && (svc->port == vport)
413 && (svc->protocol == protocol)) {
414 /* HIT */
415 atomic_inc(&svc->usecnt);
416 return svc;
417 }
418 }
419
420 return NULL;
421}
422
423
424/*
425 * Get service by {fwmark} in the service table.
426 */
Julius Volzb18610d2008-09-02 15:55:37 +0200427static inline struct ip_vs_service *
428__ip_vs_svc_fwm_get(int af, __u32 fwmark)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700429{
430 unsigned hash;
431 struct ip_vs_service *svc;
432
433 /* Check for fwmark addressed entries */
434 hash = ip_vs_svc_fwm_hashkey(fwmark);
435
436 list_for_each_entry(svc, &ip_vs_svc_fwm_table[hash], f_list) {
Julius Volzb18610d2008-09-02 15:55:37 +0200437 if (svc->fwmark == fwmark && svc->af == af) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700438 /* HIT */
439 atomic_inc(&svc->usecnt);
440 return svc;
441 }
442 }
443
444 return NULL;
445}
446
447struct ip_vs_service *
Julius Volz3c2e0502008-09-02 15:55:38 +0200448ip_vs_service_get(int af, __u32 fwmark, __u16 protocol,
449 const union nf_inet_addr *vaddr, __be16 vport)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700450{
451 struct ip_vs_service *svc;
Julius Volz3c2e0502008-09-02 15:55:38 +0200452
Linus Torvalds1da177e2005-04-16 15:20:36 -0700453 read_lock(&__ip_vs_svc_lock);
454
455 /*
456 * Check the table hashed by fwmark first
457 */
Julius Volz3c2e0502008-09-02 15:55:38 +0200458 if (fwmark && (svc = __ip_vs_svc_fwm_get(af, fwmark)))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700459 goto out;
460
461 /*
462 * Check the table hashed by <protocol,addr,port>
463 * for "full" addressed entries
464 */
Julius Volz3c2e0502008-09-02 15:55:38 +0200465 svc = __ip_vs_service_get(af, protocol, vaddr, vport);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700466
467 if (svc == NULL
468 && protocol == IPPROTO_TCP
469 && atomic_read(&ip_vs_ftpsvc_counter)
470 && (vport == FTPDATA || ntohs(vport) >= PROT_SOCK)) {
471 /*
472 * Check if ftp service entry exists, the packet
473 * might belong to FTP data connections.
474 */
Julius Volz3c2e0502008-09-02 15:55:38 +0200475 svc = __ip_vs_service_get(af, protocol, vaddr, FTPPORT);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700476 }
477
478 if (svc == NULL
479 && atomic_read(&ip_vs_nullsvc_counter)) {
480 /*
481 * Check if the catch-all port (port zero) exists
482 */
Julius Volz3c2e0502008-09-02 15:55:38 +0200483 svc = __ip_vs_service_get(af, protocol, vaddr, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700484 }
485
486 out:
487 read_unlock(&__ip_vs_svc_lock);
488
Julius Volz3c2e0502008-09-02 15:55:38 +0200489 IP_VS_DBG_BUF(9, "lookup service: fwm %u %s %s:%u %s\n",
490 fwmark, ip_vs_proto_name(protocol),
491 IP_VS_DBG_ADDR(af, vaddr), ntohs(vport),
492 svc ? "hit" : "not hit");
Linus Torvalds1da177e2005-04-16 15:20:36 -0700493
494 return svc;
495}
496
497
498static inline void
499__ip_vs_bind_svc(struct ip_vs_dest *dest, struct ip_vs_service *svc)
500{
501 atomic_inc(&svc->refcnt);
502 dest->svc = svc;
503}
504
505static inline void
506__ip_vs_unbind_svc(struct ip_vs_dest *dest)
507{
508 struct ip_vs_service *svc = dest->svc;
509
510 dest->svc = NULL;
511 if (atomic_dec_and_test(&svc->refcnt))
512 kfree(svc);
513}
514
515
516/*
517 * Returns hash value for real service
518 */
Julius Volz7937df12008-09-02 15:55:48 +0200519static inline unsigned ip_vs_rs_hashkey(int af,
520 const union nf_inet_addr *addr,
521 __be16 port)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700522{
523 register unsigned porth = ntohs(port);
Julius Volz7937df12008-09-02 15:55:48 +0200524 __be32 addr_fold = addr->ip;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700525
Julius Volz7937df12008-09-02 15:55:48 +0200526#ifdef CONFIG_IP_VS_IPV6
527 if (af == AF_INET6)
528 addr_fold = addr->ip6[0]^addr->ip6[1]^
529 addr->ip6[2]^addr->ip6[3];
530#endif
531
532 return (ntohl(addr_fold)^(porth>>IP_VS_RTAB_BITS)^porth)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700533 & IP_VS_RTAB_MASK;
534}
535
536/*
537 * Hashes ip_vs_dest in ip_vs_rtable by <proto,addr,port>.
538 * should be called with locked tables.
539 */
540static int ip_vs_rs_hash(struct ip_vs_dest *dest)
541{
542 unsigned hash;
543
544 if (!list_empty(&dest->d_list)) {
545 return 0;
546 }
547
548 /*
549 * Hash by proto,addr,port,
550 * which are the parameters of the real service.
551 */
Julius Volz7937df12008-09-02 15:55:48 +0200552 hash = ip_vs_rs_hashkey(dest->af, &dest->addr, dest->port);
553
Linus Torvalds1da177e2005-04-16 15:20:36 -0700554 list_add(&dest->d_list, &ip_vs_rtable[hash]);
555
556 return 1;
557}
558
559/*
560 * UNhashes ip_vs_dest from ip_vs_rtable.
561 * should be called with locked tables.
562 */
563static int ip_vs_rs_unhash(struct ip_vs_dest *dest)
564{
565 /*
566 * Remove it from the ip_vs_rtable table.
567 */
568 if (!list_empty(&dest->d_list)) {
569 list_del(&dest->d_list);
570 INIT_LIST_HEAD(&dest->d_list);
571 }
572
573 return 1;
574}
575
576/*
577 * Lookup real service by <proto,addr,port> in the real service table.
578 */
579struct ip_vs_dest *
Julius Volz7937df12008-09-02 15:55:48 +0200580ip_vs_lookup_real_service(int af, __u16 protocol,
581 const union nf_inet_addr *daddr,
582 __be16 dport)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700583{
584 unsigned hash;
585 struct ip_vs_dest *dest;
586
587 /*
588 * Check for "full" addressed entries
589 * Return the first found entry
590 */
Julius Volz7937df12008-09-02 15:55:48 +0200591 hash = ip_vs_rs_hashkey(af, daddr, dport);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700592
593 read_lock(&__ip_vs_rs_lock);
594 list_for_each_entry(dest, &ip_vs_rtable[hash], d_list) {
Julius Volz7937df12008-09-02 15:55:48 +0200595 if ((dest->af == af)
596 && ip_vs_addr_equal(af, &dest->addr, daddr)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700597 && (dest->port == dport)
598 && ((dest->protocol == protocol) ||
599 dest->vfwmark)) {
600 /* HIT */
601 read_unlock(&__ip_vs_rs_lock);
602 return dest;
603 }
604 }
605 read_unlock(&__ip_vs_rs_lock);
606
607 return NULL;
608}
609
610/*
611 * Lookup destination by {addr,port} in the given service
612 */
613static struct ip_vs_dest *
Julius Volz7937df12008-09-02 15:55:48 +0200614ip_vs_lookup_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
615 __be16 dport)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700616{
617 struct ip_vs_dest *dest;
618
619 /*
620 * Find the destination for the given service
621 */
622 list_for_each_entry(dest, &svc->destinations, n_list) {
Julius Volz7937df12008-09-02 15:55:48 +0200623 if ((dest->af == svc->af)
624 && ip_vs_addr_equal(svc->af, &dest->addr, daddr)
625 && (dest->port == dport)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700626 /* HIT */
627 return dest;
628 }
629 }
630
631 return NULL;
632}
633
Rumen G. Bogdanovski1e356f92007-11-07 02:35:54 -0800634/*
635 * Find destination by {daddr,dport,vaddr,protocol}
636 * Cretaed to be used in ip_vs_process_message() in
637 * the backup synchronization daemon. It finds the
638 * destination to be bound to the received connection
639 * on the backup.
640 *
641 * ip_vs_lookup_real_service() looked promissing, but
642 * seems not working as expected.
643 */
Julius Volz7937df12008-09-02 15:55:48 +0200644struct ip_vs_dest *ip_vs_find_dest(int af, const union nf_inet_addr *daddr,
645 __be16 dport,
646 const union nf_inet_addr *vaddr,
647 __be16 vport, __u16 protocol)
Rumen G. Bogdanovski1e356f92007-11-07 02:35:54 -0800648{
649 struct ip_vs_dest *dest;
650 struct ip_vs_service *svc;
651
Julius Volz7937df12008-09-02 15:55:48 +0200652 svc = ip_vs_service_get(af, 0, protocol, vaddr, vport);
Rumen G. Bogdanovski1e356f92007-11-07 02:35:54 -0800653 if (!svc)
654 return NULL;
655 dest = ip_vs_lookup_dest(svc, daddr, dport);
656 if (dest)
657 atomic_inc(&dest->refcnt);
658 ip_vs_service_put(svc);
659 return dest;
660}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700661
662/*
663 * Lookup dest by {svc,addr,port} in the destination trash.
664 * The destination trash is used to hold the destinations that are removed
665 * from the service table but are still referenced by some conn entries.
666 * The reason to add the destination trash is when the dest is temporary
667 * down (either by administrator or by monitor program), the dest can be
668 * picked back from the trash, the remaining connections to the dest can
669 * continue, and the counting information of the dest is also useful for
670 * scheduling.
671 */
672static struct ip_vs_dest *
Julius Volz7937df12008-09-02 15:55:48 +0200673ip_vs_trash_get_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
674 __be16 dport)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700675{
676 struct ip_vs_dest *dest, *nxt;
677
678 /*
679 * Find the destination in trash
680 */
681 list_for_each_entry_safe(dest, nxt, &ip_vs_dest_trash, n_list) {
Julius Volz7937df12008-09-02 15:55:48 +0200682 IP_VS_DBG_BUF(3, "Destination %u/%s:%u still in trash, "
683 "dest->refcnt=%d\n",
684 dest->vfwmark,
685 IP_VS_DBG_ADDR(svc->af, &dest->addr),
686 ntohs(dest->port),
687 atomic_read(&dest->refcnt));
688 if (dest->af == svc->af &&
689 ip_vs_addr_equal(svc->af, &dest->addr, daddr) &&
Linus Torvalds1da177e2005-04-16 15:20:36 -0700690 dest->port == dport &&
691 dest->vfwmark == svc->fwmark &&
692 dest->protocol == svc->protocol &&
693 (svc->fwmark ||
Julius Volz7937df12008-09-02 15:55:48 +0200694 (ip_vs_addr_equal(svc->af, &dest->vaddr, &svc->addr) &&
Linus Torvalds1da177e2005-04-16 15:20:36 -0700695 dest->vport == svc->port))) {
696 /* HIT */
697 return dest;
698 }
699
700 /*
701 * Try to purge the destination from trash if not referenced
702 */
703 if (atomic_read(&dest->refcnt) == 1) {
Julius Volz7937df12008-09-02 15:55:48 +0200704 IP_VS_DBG_BUF(3, "Removing destination %u/%s:%u "
705 "from trash\n",
706 dest->vfwmark,
707 IP_VS_DBG_ADDR(svc->af, &dest->addr),
708 ntohs(dest->port));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700709 list_del(&dest->n_list);
710 ip_vs_dst_reset(dest);
711 __ip_vs_unbind_svc(dest);
712 kfree(dest);
713 }
714 }
715
716 return NULL;
717}
718
719
720/*
721 * Clean up all the destinations in the trash
722 * Called by the ip_vs_control_cleanup()
723 *
724 * When the ip_vs_control_clearup is activated by ipvs module exit,
725 * the service tables must have been flushed and all the connections
726 * are expired, and the refcnt of each destination in the trash must
727 * be 1, so we simply release them here.
728 */
729static void ip_vs_trash_cleanup(void)
730{
731 struct ip_vs_dest *dest, *nxt;
732
733 list_for_each_entry_safe(dest, nxt, &ip_vs_dest_trash, n_list) {
734 list_del(&dest->n_list);
735 ip_vs_dst_reset(dest);
736 __ip_vs_unbind_svc(dest);
737 kfree(dest);
738 }
739}
740
741
742static void
743ip_vs_zero_stats(struct ip_vs_stats *stats)
744{
745 spin_lock_bh(&stats->lock);
Simon Hormane93615d2008-08-11 17:19:14 +1000746
747 stats->conns = 0;
748 stats->inpkts = 0;
749 stats->outpkts = 0;
750 stats->inbytes = 0;
751 stats->outbytes = 0;
752
753 stats->cps = 0;
754 stats->inpps = 0;
755 stats->outpps = 0;
756 stats->inbps = 0;
757 stats->outbps = 0;
758
Linus Torvalds1da177e2005-04-16 15:20:36 -0700759 ip_vs_zero_estimator(stats);
Simon Hormane93615d2008-08-11 17:19:14 +1000760
Sven Wegener3a14a3132008-08-10 18:24:41 +0000761 spin_unlock_bh(&stats->lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700762}
763
764/*
765 * Update a destination in the given service
766 */
767static void
768__ip_vs_update_dest(struct ip_vs_service *svc,
Julius Volzc860c6b2008-09-02 15:55:36 +0200769 struct ip_vs_dest *dest, struct ip_vs_dest_user_kern *udest)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700770{
771 int conn_flags;
772
773 /* set the weight and the flags */
774 atomic_set(&dest->weight, udest->weight);
775 conn_flags = udest->conn_flags | IP_VS_CONN_F_INACTIVE;
776
777 /* check if local node and update the flags */
Vince Busam09571c72008-09-02 15:55:52 +0200778#ifdef CONFIG_IP_VS_IPV6
779 if (svc->af == AF_INET6) {
780 if (__ip_vs_addr_is_local_v6(&udest->addr.in6)) {
781 conn_flags = (conn_flags & ~IP_VS_CONN_F_FWD_MASK)
782 | IP_VS_CONN_F_LOCALNODE;
783 }
784 } else
785#endif
786 if (inet_addr_type(&init_net, udest->addr.ip) == RTN_LOCAL) {
787 conn_flags = (conn_flags & ~IP_VS_CONN_F_FWD_MASK)
788 | IP_VS_CONN_F_LOCALNODE;
789 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700790
791 /* set the IP_VS_CONN_F_NOOUTPUT flag if not masquerading/NAT */
792 if ((conn_flags & IP_VS_CONN_F_FWD_MASK) != 0) {
793 conn_flags |= IP_VS_CONN_F_NOOUTPUT;
794 } else {
795 /*
796 * Put the real service in ip_vs_rtable if not present.
797 * For now only for NAT!
798 */
799 write_lock_bh(&__ip_vs_rs_lock);
800 ip_vs_rs_hash(dest);
801 write_unlock_bh(&__ip_vs_rs_lock);
802 }
803 atomic_set(&dest->conn_flags, conn_flags);
804
805 /* bind the service */
806 if (!dest->svc) {
807 __ip_vs_bind_svc(dest, svc);
808 } else {
809 if (dest->svc != svc) {
810 __ip_vs_unbind_svc(dest);
811 ip_vs_zero_stats(&dest->stats);
812 __ip_vs_bind_svc(dest, svc);
813 }
814 }
815
816 /* set the dest status flags */
817 dest->flags |= IP_VS_DEST_F_AVAILABLE;
818
819 if (udest->u_threshold == 0 || udest->u_threshold > dest->u_threshold)
820 dest->flags &= ~IP_VS_DEST_F_OVERLOAD;
821 dest->u_threshold = udest->u_threshold;
822 dest->l_threshold = udest->l_threshold;
823}
824
825
826/*
827 * Create a destination for the given service
828 */
829static int
Julius Volzc860c6b2008-09-02 15:55:36 +0200830ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700831 struct ip_vs_dest **dest_p)
832{
833 struct ip_vs_dest *dest;
834 unsigned atype;
835
836 EnterFunction(2);
837
Vince Busam09571c72008-09-02 15:55:52 +0200838#ifdef CONFIG_IP_VS_IPV6
839 if (svc->af == AF_INET6) {
840 atype = ipv6_addr_type(&udest->addr.in6);
841 if (!(atype & IPV6_ADDR_UNICAST) &&
842 !__ip_vs_addr_is_local_v6(&udest->addr.in6))
843 return -EINVAL;
844 } else
845#endif
846 {
847 atype = inet_addr_type(&init_net, udest->addr.ip);
848 if (atype != RTN_LOCAL && atype != RTN_UNICAST)
849 return -EINVAL;
850 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700851
Panagiotis Issaris0da974f2006-07-21 14:51:30 -0700852 dest = kzalloc(sizeof(struct ip_vs_dest), GFP_ATOMIC);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700853 if (dest == NULL) {
854 IP_VS_ERR("ip_vs_new_dest: kmalloc failed.\n");
855 return -ENOMEM;
856 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700857
Julius Volzc860c6b2008-09-02 15:55:36 +0200858 dest->af = svc->af;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700859 dest->protocol = svc->protocol;
Julius Volzc860c6b2008-09-02 15:55:36 +0200860 dest->vaddr = svc->addr;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700861 dest->vport = svc->port;
862 dest->vfwmark = svc->fwmark;
Julius Volzc860c6b2008-09-02 15:55:36 +0200863 ip_vs_addr_copy(svc->af, &dest->addr, &udest->addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700864 dest->port = udest->port;
865
866 atomic_set(&dest->activeconns, 0);
867 atomic_set(&dest->inactconns, 0);
868 atomic_set(&dest->persistconns, 0);
869 atomic_set(&dest->refcnt, 0);
870
871 INIT_LIST_HEAD(&dest->d_list);
872 spin_lock_init(&dest->dst_lock);
873 spin_lock_init(&dest->stats.lock);
874 __ip_vs_update_dest(svc, dest, udest);
875 ip_vs_new_estimator(&dest->stats);
876
877 *dest_p = dest;
878
879 LeaveFunction(2);
880 return 0;
881}
882
883
884/*
885 * Add a destination into an existing service
886 */
887static int
Julius Volzc860c6b2008-09-02 15:55:36 +0200888ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700889{
890 struct ip_vs_dest *dest;
Julius Volzc860c6b2008-09-02 15:55:36 +0200891 union nf_inet_addr daddr;
Al Viro014d7302006-09-28 14:29:52 -0700892 __be16 dport = udest->port;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700893 int ret;
894
895 EnterFunction(2);
896
897 if (udest->weight < 0) {
898 IP_VS_ERR("ip_vs_add_dest(): server weight less than zero\n");
899 return -ERANGE;
900 }
901
902 if (udest->l_threshold > udest->u_threshold) {
903 IP_VS_ERR("ip_vs_add_dest(): lower threshold is higher than "
904 "upper threshold\n");
905 return -ERANGE;
906 }
907
Julius Volzc860c6b2008-09-02 15:55:36 +0200908 ip_vs_addr_copy(svc->af, &daddr, &udest->addr);
909
Linus Torvalds1da177e2005-04-16 15:20:36 -0700910 /*
911 * Check if the dest already exists in the list
912 */
Julius Volz7937df12008-09-02 15:55:48 +0200913 dest = ip_vs_lookup_dest(svc, &daddr, dport);
914
Linus Torvalds1da177e2005-04-16 15:20:36 -0700915 if (dest != NULL) {
916 IP_VS_DBG(1, "ip_vs_add_dest(): dest already exists\n");
917 return -EEXIST;
918 }
919
920 /*
921 * Check if the dest already exists in the trash and
922 * is from the same service
923 */
Julius Volz7937df12008-09-02 15:55:48 +0200924 dest = ip_vs_trash_get_dest(svc, &daddr, dport);
925
Linus Torvalds1da177e2005-04-16 15:20:36 -0700926 if (dest != NULL) {
927 IP_VS_DBG(3, "Get destination %u.%u.%u.%u:%u from trash, "
Roberto Nibali4b5bdf52006-01-03 14:22:59 -0800928 "dest->refcnt=%d, service %u/%u.%u.%u.%u:%u\n",
Linus Torvalds1da177e2005-04-16 15:20:36 -0700929 NIPQUAD(daddr), ntohs(dport),
930 atomic_read(&dest->refcnt),
931 dest->vfwmark,
Julius Volze7ade462008-09-02 15:55:33 +0200932 NIPQUAD(dest->vaddr.ip),
Linus Torvalds1da177e2005-04-16 15:20:36 -0700933 ntohs(dest->vport));
934 __ip_vs_update_dest(svc, dest, udest);
935
936 /*
937 * Get the destination from the trash
938 */
939 list_del(&dest->n_list);
940
941 ip_vs_new_estimator(&dest->stats);
942
943 write_lock_bh(&__ip_vs_svc_lock);
944
945 /*
946 * Wait until all other svc users go away.
947 */
948 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
949
950 list_add(&dest->n_list, &svc->destinations);
951 svc->num_dests++;
952
953 /* call the update_service function of its scheduler */
Sven Wegener82dfb6f2008-08-11 19:36:06 +0000954 if (svc->scheduler->update_service)
955 svc->scheduler->update_service(svc);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700956
957 write_unlock_bh(&__ip_vs_svc_lock);
958 return 0;
959 }
960
961 /*
962 * Allocate and initialize the dest structure
963 */
964 ret = ip_vs_new_dest(svc, udest, &dest);
965 if (ret) {
966 return ret;
967 }
968
969 /*
970 * Add the dest entry into the list
971 */
972 atomic_inc(&dest->refcnt);
973
974 write_lock_bh(&__ip_vs_svc_lock);
975
976 /*
977 * Wait until all other svc users go away.
978 */
979 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
980
981 list_add(&dest->n_list, &svc->destinations);
982 svc->num_dests++;
983
984 /* call the update_service function of its scheduler */
Sven Wegener82dfb6f2008-08-11 19:36:06 +0000985 if (svc->scheduler->update_service)
986 svc->scheduler->update_service(svc);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700987
988 write_unlock_bh(&__ip_vs_svc_lock);
989
990 LeaveFunction(2);
991
992 return 0;
993}
994
995
996/*
997 * Edit a destination in the given service
998 */
999static int
Julius Volzc860c6b2008-09-02 15:55:36 +02001000ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001001{
1002 struct ip_vs_dest *dest;
Julius Volzc860c6b2008-09-02 15:55:36 +02001003 union nf_inet_addr daddr;
Al Viro014d7302006-09-28 14:29:52 -07001004 __be16 dport = udest->port;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001005
1006 EnterFunction(2);
1007
1008 if (udest->weight < 0) {
1009 IP_VS_ERR("ip_vs_edit_dest(): server weight less than zero\n");
1010 return -ERANGE;
1011 }
1012
1013 if (udest->l_threshold > udest->u_threshold) {
1014 IP_VS_ERR("ip_vs_edit_dest(): lower threshold is higher than "
1015 "upper threshold\n");
1016 return -ERANGE;
1017 }
1018
Julius Volzc860c6b2008-09-02 15:55:36 +02001019 ip_vs_addr_copy(svc->af, &daddr, &udest->addr);
1020
Linus Torvalds1da177e2005-04-16 15:20:36 -07001021 /*
1022 * Lookup the destination list
1023 */
Julius Volz7937df12008-09-02 15:55:48 +02001024 dest = ip_vs_lookup_dest(svc, &daddr, dport);
1025
Linus Torvalds1da177e2005-04-16 15:20:36 -07001026 if (dest == NULL) {
1027 IP_VS_DBG(1, "ip_vs_edit_dest(): dest doesn't exist\n");
1028 return -ENOENT;
1029 }
1030
1031 __ip_vs_update_dest(svc, dest, udest);
1032
1033 write_lock_bh(&__ip_vs_svc_lock);
1034
1035 /* Wait until all other svc users go away */
Heiko Carstenscae7ca32007-08-10 15:50:30 -07001036 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001037
1038 /* call the update_service, because server weight may be changed */
Sven Wegener82dfb6f2008-08-11 19:36:06 +00001039 if (svc->scheduler->update_service)
1040 svc->scheduler->update_service(svc);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001041
1042 write_unlock_bh(&__ip_vs_svc_lock);
1043
1044 LeaveFunction(2);
1045
1046 return 0;
1047}
1048
1049
1050/*
1051 * Delete a destination (must be already unlinked from the service)
1052 */
1053static void __ip_vs_del_dest(struct ip_vs_dest *dest)
1054{
1055 ip_vs_kill_estimator(&dest->stats);
1056
1057 /*
1058 * Remove it from the d-linked list with the real services.
1059 */
1060 write_lock_bh(&__ip_vs_rs_lock);
1061 ip_vs_rs_unhash(dest);
1062 write_unlock_bh(&__ip_vs_rs_lock);
1063
1064 /*
1065 * Decrease the refcnt of the dest, and free the dest
1066 * if nobody refers to it (refcnt=0). Otherwise, throw
1067 * the destination into the trash.
1068 */
1069 if (atomic_dec_and_test(&dest->refcnt)) {
1070 ip_vs_dst_reset(dest);
1071 /* simply decrease svc->refcnt here, let the caller check
1072 and release the service if nobody refers to it.
1073 Only user context can release destination and service,
1074 and only one user context can update virtual service at a
1075 time, so the operation here is OK */
1076 atomic_dec(&dest->svc->refcnt);
1077 kfree(dest);
1078 } else {
Roberto Nibali4b5bdf52006-01-03 14:22:59 -08001079 IP_VS_DBG(3, "Moving dest %u.%u.%u.%u:%u into trash, "
1080 "dest->refcnt=%d\n",
Julius Volze7ade462008-09-02 15:55:33 +02001081 NIPQUAD(dest->addr.ip), ntohs(dest->port),
Linus Torvalds1da177e2005-04-16 15:20:36 -07001082 atomic_read(&dest->refcnt));
1083 list_add(&dest->n_list, &ip_vs_dest_trash);
1084 atomic_inc(&dest->refcnt);
1085 }
1086}
1087
1088
1089/*
1090 * Unlink a destination from the given service
1091 */
1092static void __ip_vs_unlink_dest(struct ip_vs_service *svc,
1093 struct ip_vs_dest *dest,
1094 int svcupd)
1095{
1096 dest->flags &= ~IP_VS_DEST_F_AVAILABLE;
1097
1098 /*
1099 * Remove it from the d-linked destination list.
1100 */
1101 list_del(&dest->n_list);
1102 svc->num_dests--;
Sven Wegener82dfb6f2008-08-11 19:36:06 +00001103
1104 /*
1105 * Call the update_service function of its scheduler
1106 */
1107 if (svcupd && svc->scheduler->update_service)
1108 svc->scheduler->update_service(svc);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001109}
1110
1111
1112/*
1113 * Delete a destination server in the given service
1114 */
1115static int
Julius Volzc860c6b2008-09-02 15:55:36 +02001116ip_vs_del_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001117{
1118 struct ip_vs_dest *dest;
Al Viro014d7302006-09-28 14:29:52 -07001119 __be16 dport = udest->port;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001120
1121 EnterFunction(2);
1122
Julius Volz7937df12008-09-02 15:55:48 +02001123 dest = ip_vs_lookup_dest(svc, &udest->addr, dport);
Julius Volzc860c6b2008-09-02 15:55:36 +02001124
Linus Torvalds1da177e2005-04-16 15:20:36 -07001125 if (dest == NULL) {
1126 IP_VS_DBG(1, "ip_vs_del_dest(): destination not found!\n");
1127 return -ENOENT;
1128 }
1129
1130 write_lock_bh(&__ip_vs_svc_lock);
1131
1132 /*
1133 * Wait until all other svc users go away.
1134 */
1135 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
1136
1137 /*
1138 * Unlink dest from the service
1139 */
1140 __ip_vs_unlink_dest(svc, dest, 1);
1141
1142 write_unlock_bh(&__ip_vs_svc_lock);
1143
1144 /*
1145 * Delete the destination
1146 */
1147 __ip_vs_del_dest(dest);
1148
1149 LeaveFunction(2);
1150
1151 return 0;
1152}
1153
1154
1155/*
1156 * Add a service into the service hash table
1157 */
1158static int
Julius Volzc860c6b2008-09-02 15:55:36 +02001159ip_vs_add_service(struct ip_vs_service_user_kern *u,
1160 struct ip_vs_service **svc_p)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001161{
1162 int ret = 0;
1163 struct ip_vs_scheduler *sched = NULL;
1164 struct ip_vs_service *svc = NULL;
1165
1166 /* increase the module use count */
1167 ip_vs_use_count_inc();
1168
1169 /* Lookup the scheduler by 'u->sched_name' */
1170 sched = ip_vs_scheduler_get(u->sched_name);
1171 if (sched == NULL) {
1172 IP_VS_INFO("Scheduler module ip_vs_%s not found\n",
1173 u->sched_name);
1174 ret = -ENOENT;
1175 goto out_mod_dec;
1176 }
1177
Panagiotis Issaris0da974f2006-07-21 14:51:30 -07001178 svc = kzalloc(sizeof(struct ip_vs_service), GFP_ATOMIC);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001179 if (svc == NULL) {
1180 IP_VS_DBG(1, "ip_vs_add_service: kmalloc failed.\n");
1181 ret = -ENOMEM;
1182 goto out_err;
1183 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001184
1185 /* I'm the first user of the service */
1186 atomic_set(&svc->usecnt, 1);
1187 atomic_set(&svc->refcnt, 0);
1188
Julius Volzc860c6b2008-09-02 15:55:36 +02001189 svc->af = u->af;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001190 svc->protocol = u->protocol;
Julius Volzc860c6b2008-09-02 15:55:36 +02001191 ip_vs_addr_copy(svc->af, &svc->addr, &u->addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001192 svc->port = u->port;
1193 svc->fwmark = u->fwmark;
1194 svc->flags = u->flags;
1195 svc->timeout = u->timeout * HZ;
1196 svc->netmask = u->netmask;
1197
1198 INIT_LIST_HEAD(&svc->destinations);
1199 rwlock_init(&svc->sched_lock);
1200 spin_lock_init(&svc->stats.lock);
1201
1202 /* Bind the scheduler */
1203 ret = ip_vs_bind_scheduler(svc, sched);
1204 if (ret)
1205 goto out_err;
1206 sched = NULL;
1207
1208 /* Update the virtual service counters */
1209 if (svc->port == FTPPORT)
1210 atomic_inc(&ip_vs_ftpsvc_counter);
1211 else if (svc->port == 0)
1212 atomic_inc(&ip_vs_nullsvc_counter);
1213
1214 ip_vs_new_estimator(&svc->stats);
1215 ip_vs_num_services++;
1216
1217 /* Hash the service into the service table */
1218 write_lock_bh(&__ip_vs_svc_lock);
1219 ip_vs_svc_hash(svc);
1220 write_unlock_bh(&__ip_vs_svc_lock);
1221
1222 *svc_p = svc;
1223 return 0;
1224
1225 out_err:
1226 if (svc != NULL) {
1227 if (svc->scheduler)
1228 ip_vs_unbind_scheduler(svc);
1229 if (svc->inc) {
1230 local_bh_disable();
1231 ip_vs_app_inc_put(svc->inc);
1232 local_bh_enable();
1233 }
1234 kfree(svc);
1235 }
1236 ip_vs_scheduler_put(sched);
1237
1238 out_mod_dec:
1239 /* decrease the module use count */
1240 ip_vs_use_count_dec();
1241
1242 return ret;
1243}
1244
1245
1246/*
1247 * Edit a service and bind it with a new scheduler
1248 */
1249static int
Julius Volzc860c6b2008-09-02 15:55:36 +02001250ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001251{
1252 struct ip_vs_scheduler *sched, *old_sched;
1253 int ret = 0;
1254
1255 /*
1256 * Lookup the scheduler, by 'u->sched_name'
1257 */
1258 sched = ip_vs_scheduler_get(u->sched_name);
1259 if (sched == NULL) {
1260 IP_VS_INFO("Scheduler module ip_vs_%s not found\n",
1261 u->sched_name);
1262 return -ENOENT;
1263 }
1264 old_sched = sched;
1265
1266 write_lock_bh(&__ip_vs_svc_lock);
1267
1268 /*
1269 * Wait until all other svc users go away.
1270 */
1271 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
1272
1273 /*
1274 * Set the flags and timeout value
1275 */
1276 svc->flags = u->flags | IP_VS_SVC_F_HASHED;
1277 svc->timeout = u->timeout * HZ;
1278 svc->netmask = u->netmask;
1279
1280 old_sched = svc->scheduler;
1281 if (sched != old_sched) {
1282 /*
1283 * Unbind the old scheduler
1284 */
1285 if ((ret = ip_vs_unbind_scheduler(svc))) {
1286 old_sched = sched;
1287 goto out;
1288 }
1289
1290 /*
1291 * Bind the new scheduler
1292 */
1293 if ((ret = ip_vs_bind_scheduler(svc, sched))) {
1294 /*
1295 * If ip_vs_bind_scheduler fails, restore the old
1296 * scheduler.
1297 * The main reason of failure is out of memory.
1298 *
1299 * The question is if the old scheduler can be
1300 * restored all the time. TODO: if it cannot be
1301 * restored some time, we must delete the service,
1302 * otherwise the system may crash.
1303 */
1304 ip_vs_bind_scheduler(svc, old_sched);
1305 old_sched = sched;
1306 goto out;
1307 }
1308 }
1309
1310 out:
1311 write_unlock_bh(&__ip_vs_svc_lock);
1312
1313 if (old_sched)
1314 ip_vs_scheduler_put(old_sched);
1315
1316 return ret;
1317}
1318
1319
1320/*
1321 * Delete a service from the service list
1322 * - The service must be unlinked, unlocked and not referenced!
1323 * - We are called under _bh lock
1324 */
1325static void __ip_vs_del_service(struct ip_vs_service *svc)
1326{
1327 struct ip_vs_dest *dest, *nxt;
1328 struct ip_vs_scheduler *old_sched;
1329
1330 ip_vs_num_services--;
1331 ip_vs_kill_estimator(&svc->stats);
1332
1333 /* Unbind scheduler */
1334 old_sched = svc->scheduler;
1335 ip_vs_unbind_scheduler(svc);
1336 if (old_sched)
1337 ip_vs_scheduler_put(old_sched);
1338
1339 /* Unbind app inc */
1340 if (svc->inc) {
1341 ip_vs_app_inc_put(svc->inc);
1342 svc->inc = NULL;
1343 }
1344
1345 /*
1346 * Unlink the whole destination list
1347 */
1348 list_for_each_entry_safe(dest, nxt, &svc->destinations, n_list) {
1349 __ip_vs_unlink_dest(svc, dest, 0);
1350 __ip_vs_del_dest(dest);
1351 }
1352
1353 /*
1354 * Update the virtual service counters
1355 */
1356 if (svc->port == FTPPORT)
1357 atomic_dec(&ip_vs_ftpsvc_counter);
1358 else if (svc->port == 0)
1359 atomic_dec(&ip_vs_nullsvc_counter);
1360
1361 /*
1362 * Free the service if nobody refers to it
1363 */
1364 if (atomic_read(&svc->refcnt) == 0)
1365 kfree(svc);
1366
1367 /* decrease the module use count */
1368 ip_vs_use_count_dec();
1369}
1370
1371/*
1372 * Delete a service from the service list
1373 */
1374static int ip_vs_del_service(struct ip_vs_service *svc)
1375{
1376 if (svc == NULL)
1377 return -EEXIST;
1378
1379 /*
1380 * Unhash it from the service table
1381 */
1382 write_lock_bh(&__ip_vs_svc_lock);
1383
1384 ip_vs_svc_unhash(svc);
1385
1386 /*
1387 * Wait until all the svc users go away.
1388 */
1389 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
1390
1391 __ip_vs_del_service(svc);
1392
1393 write_unlock_bh(&__ip_vs_svc_lock);
1394
1395 return 0;
1396}
1397
1398
1399/*
1400 * Flush all the virtual services
1401 */
1402static int ip_vs_flush(void)
1403{
1404 int idx;
1405 struct ip_vs_service *svc, *nxt;
1406
1407 /*
1408 * Flush the service table hashed by <protocol,addr,port>
1409 */
1410 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1411 list_for_each_entry_safe(svc, nxt, &ip_vs_svc_table[idx], s_list) {
1412 write_lock_bh(&__ip_vs_svc_lock);
1413 ip_vs_svc_unhash(svc);
1414 /*
1415 * Wait until all the svc users go away.
1416 */
1417 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1418 __ip_vs_del_service(svc);
1419 write_unlock_bh(&__ip_vs_svc_lock);
1420 }
1421 }
1422
1423 /*
1424 * Flush the service table hashed by fwmark
1425 */
1426 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1427 list_for_each_entry_safe(svc, nxt,
1428 &ip_vs_svc_fwm_table[idx], f_list) {
1429 write_lock_bh(&__ip_vs_svc_lock);
1430 ip_vs_svc_unhash(svc);
1431 /*
1432 * Wait until all the svc users go away.
1433 */
1434 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1435 __ip_vs_del_service(svc);
1436 write_unlock_bh(&__ip_vs_svc_lock);
1437 }
1438 }
1439
1440 return 0;
1441}
1442
1443
1444/*
1445 * Zero counters in a service or all services
1446 */
1447static int ip_vs_zero_service(struct ip_vs_service *svc)
1448{
1449 struct ip_vs_dest *dest;
1450
1451 write_lock_bh(&__ip_vs_svc_lock);
1452 list_for_each_entry(dest, &svc->destinations, n_list) {
1453 ip_vs_zero_stats(&dest->stats);
1454 }
1455 ip_vs_zero_stats(&svc->stats);
1456 write_unlock_bh(&__ip_vs_svc_lock);
1457 return 0;
1458}
1459
1460static int ip_vs_zero_all(void)
1461{
1462 int idx;
1463 struct ip_vs_service *svc;
1464
1465 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1466 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
1467 ip_vs_zero_service(svc);
1468 }
1469 }
1470
1471 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1472 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
1473 ip_vs_zero_service(svc);
1474 }
1475 }
1476
1477 ip_vs_zero_stats(&ip_vs_stats);
1478 return 0;
1479}
1480
1481
1482static int
1483proc_do_defense_mode(ctl_table *table, int write, struct file * filp,
1484 void __user *buffer, size_t *lenp, loff_t *ppos)
1485{
1486 int *valp = table->data;
1487 int val = *valp;
1488 int rc;
1489
1490 rc = proc_dointvec(table, write, filp, buffer, lenp, ppos);
1491 if (write && (*valp != val)) {
1492 if ((*valp < 0) || (*valp > 3)) {
1493 /* Restore the correct value */
1494 *valp = val;
1495 } else {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001496 update_defense_level();
Linus Torvalds1da177e2005-04-16 15:20:36 -07001497 }
1498 }
1499 return rc;
1500}
1501
1502
1503static int
1504proc_do_sync_threshold(ctl_table *table, int write, struct file *filp,
1505 void __user *buffer, size_t *lenp, loff_t *ppos)
1506{
1507 int *valp = table->data;
1508 int val[2];
1509 int rc;
1510
1511 /* backup the value first */
1512 memcpy(val, valp, sizeof(val));
1513
1514 rc = proc_dointvec(table, write, filp, buffer, lenp, ppos);
1515 if (write && (valp[0] < 0 || valp[1] < 0 || valp[0] >= valp[1])) {
1516 /* Restore the correct value */
1517 memcpy(valp, val, sizeof(val));
1518 }
1519 return rc;
1520}
1521
1522
1523/*
1524 * IPVS sysctl table (under the /proc/sys/net/ipv4/vs/)
1525 */
1526
1527static struct ctl_table vs_vars[] = {
1528 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001529 .procname = "amemthresh",
1530 .data = &sysctl_ip_vs_amemthresh,
1531 .maxlen = sizeof(int),
1532 .mode = 0644,
1533 .proc_handler = &proc_dointvec,
1534 },
1535#ifdef CONFIG_IP_VS_DEBUG
1536 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001537 .procname = "debug_level",
1538 .data = &sysctl_ip_vs_debug_level,
1539 .maxlen = sizeof(int),
1540 .mode = 0644,
1541 .proc_handler = &proc_dointvec,
1542 },
1543#endif
1544 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001545 .procname = "am_droprate",
1546 .data = &sysctl_ip_vs_am_droprate,
1547 .maxlen = sizeof(int),
1548 .mode = 0644,
1549 .proc_handler = &proc_dointvec,
1550 },
1551 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001552 .procname = "drop_entry",
1553 .data = &sysctl_ip_vs_drop_entry,
1554 .maxlen = sizeof(int),
1555 .mode = 0644,
1556 .proc_handler = &proc_do_defense_mode,
1557 },
1558 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001559 .procname = "drop_packet",
1560 .data = &sysctl_ip_vs_drop_packet,
1561 .maxlen = sizeof(int),
1562 .mode = 0644,
1563 .proc_handler = &proc_do_defense_mode,
1564 },
1565 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001566 .procname = "secure_tcp",
1567 .data = &sysctl_ip_vs_secure_tcp,
1568 .maxlen = sizeof(int),
1569 .mode = 0644,
1570 .proc_handler = &proc_do_defense_mode,
1571 },
1572#if 0
1573 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001574 .procname = "timeout_established",
1575 .data = &vs_timeout_table_dos.timeout[IP_VS_S_ESTABLISHED],
1576 .maxlen = sizeof(int),
1577 .mode = 0644,
1578 .proc_handler = &proc_dointvec_jiffies,
1579 },
1580 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001581 .procname = "timeout_synsent",
1582 .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_SENT],
1583 .maxlen = sizeof(int),
1584 .mode = 0644,
1585 .proc_handler = &proc_dointvec_jiffies,
1586 },
1587 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001588 .procname = "timeout_synrecv",
1589 .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_RECV],
1590 .maxlen = sizeof(int),
1591 .mode = 0644,
1592 .proc_handler = &proc_dointvec_jiffies,
1593 },
1594 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001595 .procname = "timeout_finwait",
1596 .data = &vs_timeout_table_dos.timeout[IP_VS_S_FIN_WAIT],
1597 .maxlen = sizeof(int),
1598 .mode = 0644,
1599 .proc_handler = &proc_dointvec_jiffies,
1600 },
1601 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001602 .procname = "timeout_timewait",
1603 .data = &vs_timeout_table_dos.timeout[IP_VS_S_TIME_WAIT],
1604 .maxlen = sizeof(int),
1605 .mode = 0644,
1606 .proc_handler = &proc_dointvec_jiffies,
1607 },
1608 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001609 .procname = "timeout_close",
1610 .data = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE],
1611 .maxlen = sizeof(int),
1612 .mode = 0644,
1613 .proc_handler = &proc_dointvec_jiffies,
1614 },
1615 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001616 .procname = "timeout_closewait",
1617 .data = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE_WAIT],
1618 .maxlen = sizeof(int),
1619 .mode = 0644,
1620 .proc_handler = &proc_dointvec_jiffies,
1621 },
1622 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001623 .procname = "timeout_lastack",
1624 .data = &vs_timeout_table_dos.timeout[IP_VS_S_LAST_ACK],
1625 .maxlen = sizeof(int),
1626 .mode = 0644,
1627 .proc_handler = &proc_dointvec_jiffies,
1628 },
1629 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001630 .procname = "timeout_listen",
1631 .data = &vs_timeout_table_dos.timeout[IP_VS_S_LISTEN],
1632 .maxlen = sizeof(int),
1633 .mode = 0644,
1634 .proc_handler = &proc_dointvec_jiffies,
1635 },
1636 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001637 .procname = "timeout_synack",
1638 .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYNACK],
1639 .maxlen = sizeof(int),
1640 .mode = 0644,
1641 .proc_handler = &proc_dointvec_jiffies,
1642 },
1643 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001644 .procname = "timeout_udp",
1645 .data = &vs_timeout_table_dos.timeout[IP_VS_S_UDP],
1646 .maxlen = sizeof(int),
1647 .mode = 0644,
1648 .proc_handler = &proc_dointvec_jiffies,
1649 },
1650 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001651 .procname = "timeout_icmp",
1652 .data = &vs_timeout_table_dos.timeout[IP_VS_S_ICMP],
1653 .maxlen = sizeof(int),
1654 .mode = 0644,
1655 .proc_handler = &proc_dointvec_jiffies,
1656 },
1657#endif
1658 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001659 .procname = "cache_bypass",
1660 .data = &sysctl_ip_vs_cache_bypass,
1661 .maxlen = sizeof(int),
1662 .mode = 0644,
1663 .proc_handler = &proc_dointvec,
1664 },
1665 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001666 .procname = "expire_nodest_conn",
1667 .data = &sysctl_ip_vs_expire_nodest_conn,
1668 .maxlen = sizeof(int),
1669 .mode = 0644,
1670 .proc_handler = &proc_dointvec,
1671 },
1672 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001673 .procname = "expire_quiescent_template",
1674 .data = &sysctl_ip_vs_expire_quiescent_template,
1675 .maxlen = sizeof(int),
1676 .mode = 0644,
1677 .proc_handler = &proc_dointvec,
1678 },
1679 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001680 .procname = "sync_threshold",
1681 .data = &sysctl_ip_vs_sync_threshold,
1682 .maxlen = sizeof(sysctl_ip_vs_sync_threshold),
1683 .mode = 0644,
1684 .proc_handler = &proc_do_sync_threshold,
1685 },
1686 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001687 .procname = "nat_icmp_send",
1688 .data = &sysctl_ip_vs_nat_icmp_send,
1689 .maxlen = sizeof(int),
1690 .mode = 0644,
1691 .proc_handler = &proc_dointvec,
1692 },
1693 { .ctl_name = 0 }
1694};
1695
Sven Wegener5587da52008-08-10 18:24:40 +00001696const struct ctl_path net_vs_ctl_path[] = {
Pavel Emelyanov90754f82008-01-12 02:33:50 -08001697 { .procname = "net", .ctl_name = CTL_NET, },
1698 { .procname = "ipv4", .ctl_name = NET_IPV4, },
1699 { .procname = "vs", },
1700 { }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001701};
Pavel Emelyanov90754f82008-01-12 02:33:50 -08001702EXPORT_SYMBOL_GPL(net_vs_ctl_path);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001703
1704static struct ctl_table_header * sysctl_header;
1705
1706#ifdef CONFIG_PROC_FS
1707
1708struct ip_vs_iter {
1709 struct list_head *table;
1710 int bucket;
1711};
1712
1713/*
1714 * Write the contents of the VS rule table to a PROCfs file.
1715 * (It is kept just for backward compatibility)
1716 */
1717static inline const char *ip_vs_fwd_name(unsigned flags)
1718{
1719 switch (flags & IP_VS_CONN_F_FWD_MASK) {
1720 case IP_VS_CONN_F_LOCALNODE:
1721 return "Local";
1722 case IP_VS_CONN_F_TUNNEL:
1723 return "Tunnel";
1724 case IP_VS_CONN_F_DROUTE:
1725 return "Route";
1726 default:
1727 return "Masq";
1728 }
1729}
1730
1731
1732/* Get the Nth entry in the two lists */
1733static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos)
1734{
1735 struct ip_vs_iter *iter = seq->private;
1736 int idx;
1737 struct ip_vs_service *svc;
1738
1739 /* look in hash by protocol */
1740 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1741 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
1742 if (pos-- == 0){
1743 iter->table = ip_vs_svc_table;
1744 iter->bucket = idx;
1745 return svc;
1746 }
1747 }
1748 }
1749
1750 /* keep looking in fwmark */
1751 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1752 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
1753 if (pos-- == 0) {
1754 iter->table = ip_vs_svc_fwm_table;
1755 iter->bucket = idx;
1756 return svc;
1757 }
1758 }
1759 }
1760
1761 return NULL;
1762}
1763
1764static void *ip_vs_info_seq_start(struct seq_file *seq, loff_t *pos)
1765{
1766
1767 read_lock_bh(&__ip_vs_svc_lock);
1768 return *pos ? ip_vs_info_array(seq, *pos - 1) : SEQ_START_TOKEN;
1769}
1770
1771
1772static void *ip_vs_info_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1773{
1774 struct list_head *e;
1775 struct ip_vs_iter *iter;
1776 struct ip_vs_service *svc;
1777
1778 ++*pos;
1779 if (v == SEQ_START_TOKEN)
1780 return ip_vs_info_array(seq,0);
1781
1782 svc = v;
1783 iter = seq->private;
1784
1785 if (iter->table == ip_vs_svc_table) {
1786 /* next service in table hashed by protocol */
1787 if ((e = svc->s_list.next) != &ip_vs_svc_table[iter->bucket])
1788 return list_entry(e, struct ip_vs_service, s_list);
1789
1790
1791 while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
1792 list_for_each_entry(svc,&ip_vs_svc_table[iter->bucket],
1793 s_list) {
1794 return svc;
1795 }
1796 }
1797
1798 iter->table = ip_vs_svc_fwm_table;
1799 iter->bucket = -1;
1800 goto scan_fwmark;
1801 }
1802
1803 /* next service in hashed by fwmark */
1804 if ((e = svc->f_list.next) != &ip_vs_svc_fwm_table[iter->bucket])
1805 return list_entry(e, struct ip_vs_service, f_list);
1806
1807 scan_fwmark:
1808 while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
1809 list_for_each_entry(svc, &ip_vs_svc_fwm_table[iter->bucket],
1810 f_list)
1811 return svc;
1812 }
1813
1814 return NULL;
1815}
1816
1817static void ip_vs_info_seq_stop(struct seq_file *seq, void *v)
1818{
1819 read_unlock_bh(&__ip_vs_svc_lock);
1820}
1821
1822
1823static int ip_vs_info_seq_show(struct seq_file *seq, void *v)
1824{
1825 if (v == SEQ_START_TOKEN) {
1826 seq_printf(seq,
1827 "IP Virtual Server version %d.%d.%d (size=%d)\n",
1828 NVERSION(IP_VS_VERSION_CODE), IP_VS_CONN_TAB_SIZE);
1829 seq_puts(seq,
1830 "Prot LocalAddress:Port Scheduler Flags\n");
1831 seq_puts(seq,
1832 " -> RemoteAddress:Port Forward Weight ActiveConn InActConn\n");
1833 } else {
1834 const struct ip_vs_service *svc = v;
1835 const struct ip_vs_iter *iter = seq->private;
1836 const struct ip_vs_dest *dest;
1837
Vince Busam667a5f12008-09-02 15:55:49 +02001838 if (iter->table == ip_vs_svc_table) {
1839#ifdef CONFIG_IP_VS_IPV6
1840 if (svc->af == AF_INET6)
1841 seq_printf(seq, "%s [" NIP6_FMT "]:%04X %s ",
1842 ip_vs_proto_name(svc->protocol),
1843 NIP6(svc->addr.in6),
1844 ntohs(svc->port),
1845 svc->scheduler->name);
1846 else
1847#endif
1848 seq_printf(seq, "%s %08X:%04X %s ",
1849 ip_vs_proto_name(svc->protocol),
1850 ntohl(svc->addr.ip),
1851 ntohs(svc->port),
1852 svc->scheduler->name);
1853 } else {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001854 seq_printf(seq, "FWM %08X %s ",
1855 svc->fwmark, svc->scheduler->name);
Vince Busam667a5f12008-09-02 15:55:49 +02001856 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001857
1858 if (svc->flags & IP_VS_SVC_F_PERSISTENT)
1859 seq_printf(seq, "persistent %d %08X\n",
1860 svc->timeout,
1861 ntohl(svc->netmask));
1862 else
1863 seq_putc(seq, '\n');
1864
1865 list_for_each_entry(dest, &svc->destinations, n_list) {
Vince Busam667a5f12008-09-02 15:55:49 +02001866#ifdef CONFIG_IP_VS_IPV6
1867 if (dest->af == AF_INET6)
1868 seq_printf(seq,
1869 " -> [" NIP6_FMT "]:%04X"
1870 " %-7s %-6d %-10d %-10d\n",
1871 NIP6(dest->addr.in6),
1872 ntohs(dest->port),
1873 ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
1874 atomic_read(&dest->weight),
1875 atomic_read(&dest->activeconns),
1876 atomic_read(&dest->inactconns));
1877 else
1878#endif
1879 seq_printf(seq,
1880 " -> %08X:%04X "
1881 "%-7s %-6d %-10d %-10d\n",
1882 ntohl(dest->addr.ip),
1883 ntohs(dest->port),
1884 ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
1885 atomic_read(&dest->weight),
1886 atomic_read(&dest->activeconns),
1887 atomic_read(&dest->inactconns));
1888
Linus Torvalds1da177e2005-04-16 15:20:36 -07001889 }
1890 }
1891 return 0;
1892}
1893
Philippe De Muyter56b3d972007-07-10 23:07:31 -07001894static const struct seq_operations ip_vs_info_seq_ops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001895 .start = ip_vs_info_seq_start,
1896 .next = ip_vs_info_seq_next,
1897 .stop = ip_vs_info_seq_stop,
1898 .show = ip_vs_info_seq_show,
1899};
1900
1901static int ip_vs_info_open(struct inode *inode, struct file *file)
1902{
Pavel Emelyanovcf7732e2007-10-10 02:29:29 -07001903 return seq_open_private(file, &ip_vs_info_seq_ops,
1904 sizeof(struct ip_vs_iter));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001905}
1906
Arjan van de Ven9a321442007-02-12 00:55:35 -08001907static const struct file_operations ip_vs_info_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001908 .owner = THIS_MODULE,
1909 .open = ip_vs_info_open,
1910 .read = seq_read,
1911 .llseek = seq_lseek,
1912 .release = seq_release_private,
1913};
1914
1915#endif
1916
Sven Wegener519e49e2008-08-10 18:24:41 +00001917struct ip_vs_stats ip_vs_stats = {
1918 .lock = __SPIN_LOCK_UNLOCKED(ip_vs_stats.lock),
1919};
Linus Torvalds1da177e2005-04-16 15:20:36 -07001920
1921#ifdef CONFIG_PROC_FS
1922static int ip_vs_stats_show(struct seq_file *seq, void *v)
1923{
1924
1925/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
1926 seq_puts(seq,
1927 " Total Incoming Outgoing Incoming Outgoing\n");
1928 seq_printf(seq,
1929 " Conns Packets Packets Bytes Bytes\n");
1930
1931 spin_lock_bh(&ip_vs_stats.lock);
1932 seq_printf(seq, "%8X %8X %8X %16LX %16LX\n\n", ip_vs_stats.conns,
1933 ip_vs_stats.inpkts, ip_vs_stats.outpkts,
1934 (unsigned long long) ip_vs_stats.inbytes,
1935 (unsigned long long) ip_vs_stats.outbytes);
1936
1937/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
1938 seq_puts(seq,
1939 " Conns/s Pkts/s Pkts/s Bytes/s Bytes/s\n");
1940 seq_printf(seq,"%8X %8X %8X %16X %16X\n",
1941 ip_vs_stats.cps,
1942 ip_vs_stats.inpps,
1943 ip_vs_stats.outpps,
1944 ip_vs_stats.inbps,
1945 ip_vs_stats.outbps);
1946 spin_unlock_bh(&ip_vs_stats.lock);
1947
1948 return 0;
1949}
1950
1951static int ip_vs_stats_seq_open(struct inode *inode, struct file *file)
1952{
1953 return single_open(file, ip_vs_stats_show, NULL);
1954}
1955
Arjan van de Ven9a321442007-02-12 00:55:35 -08001956static const struct file_operations ip_vs_stats_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001957 .owner = THIS_MODULE,
1958 .open = ip_vs_stats_seq_open,
1959 .read = seq_read,
1960 .llseek = seq_lseek,
1961 .release = single_release,
1962};
1963
1964#endif
1965
1966/*
1967 * Set timeout values for tcp tcpfin udp in the timeout_table.
1968 */
1969static int ip_vs_set_timeout(struct ip_vs_timeout_user *u)
1970{
1971 IP_VS_DBG(2, "Setting timeout tcp:%d tcpfin:%d udp:%d\n",
1972 u->tcp_timeout,
1973 u->tcp_fin_timeout,
1974 u->udp_timeout);
1975
1976#ifdef CONFIG_IP_VS_PROTO_TCP
1977 if (u->tcp_timeout) {
1978 ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_ESTABLISHED]
1979 = u->tcp_timeout * HZ;
1980 }
1981
1982 if (u->tcp_fin_timeout) {
1983 ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_FIN_WAIT]
1984 = u->tcp_fin_timeout * HZ;
1985 }
1986#endif
1987
1988#ifdef CONFIG_IP_VS_PROTO_UDP
1989 if (u->udp_timeout) {
1990 ip_vs_protocol_udp.timeout_table[IP_VS_UDP_S_NORMAL]
1991 = u->udp_timeout * HZ;
1992 }
1993#endif
1994 return 0;
1995}
1996
1997
1998#define SET_CMDID(cmd) (cmd - IP_VS_BASE_CTL)
1999#define SERVICE_ARG_LEN (sizeof(struct ip_vs_service_user))
2000#define SVCDEST_ARG_LEN (sizeof(struct ip_vs_service_user) + \
2001 sizeof(struct ip_vs_dest_user))
2002#define TIMEOUT_ARG_LEN (sizeof(struct ip_vs_timeout_user))
2003#define DAEMON_ARG_LEN (sizeof(struct ip_vs_daemon_user))
2004#define MAX_ARG_LEN SVCDEST_ARG_LEN
2005
Arjan van de Ven9b5b5cf2005-11-29 16:21:38 -08002006static const unsigned char set_arglen[SET_CMDID(IP_VS_SO_SET_MAX)+1] = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002007 [SET_CMDID(IP_VS_SO_SET_ADD)] = SERVICE_ARG_LEN,
2008 [SET_CMDID(IP_VS_SO_SET_EDIT)] = SERVICE_ARG_LEN,
2009 [SET_CMDID(IP_VS_SO_SET_DEL)] = SERVICE_ARG_LEN,
2010 [SET_CMDID(IP_VS_SO_SET_FLUSH)] = 0,
2011 [SET_CMDID(IP_VS_SO_SET_ADDDEST)] = SVCDEST_ARG_LEN,
2012 [SET_CMDID(IP_VS_SO_SET_DELDEST)] = SVCDEST_ARG_LEN,
2013 [SET_CMDID(IP_VS_SO_SET_EDITDEST)] = SVCDEST_ARG_LEN,
2014 [SET_CMDID(IP_VS_SO_SET_TIMEOUT)] = TIMEOUT_ARG_LEN,
2015 [SET_CMDID(IP_VS_SO_SET_STARTDAEMON)] = DAEMON_ARG_LEN,
2016 [SET_CMDID(IP_VS_SO_SET_STOPDAEMON)] = DAEMON_ARG_LEN,
2017 [SET_CMDID(IP_VS_SO_SET_ZERO)] = SERVICE_ARG_LEN,
2018};
2019
Julius Volzc860c6b2008-09-02 15:55:36 +02002020static void ip_vs_copy_usvc_compat(struct ip_vs_service_user_kern *usvc,
2021 struct ip_vs_service_user *usvc_compat)
2022{
2023 usvc->af = AF_INET;
2024 usvc->protocol = usvc_compat->protocol;
2025 usvc->addr.ip = usvc_compat->addr;
2026 usvc->port = usvc_compat->port;
2027 usvc->fwmark = usvc_compat->fwmark;
2028
2029 /* Deep copy of sched_name is not needed here */
2030 usvc->sched_name = usvc_compat->sched_name;
2031
2032 usvc->flags = usvc_compat->flags;
2033 usvc->timeout = usvc_compat->timeout;
2034 usvc->netmask = usvc_compat->netmask;
2035}
2036
2037static void ip_vs_copy_udest_compat(struct ip_vs_dest_user_kern *udest,
2038 struct ip_vs_dest_user *udest_compat)
2039{
2040 udest->addr.ip = udest_compat->addr;
2041 udest->port = udest_compat->port;
2042 udest->conn_flags = udest_compat->conn_flags;
2043 udest->weight = udest_compat->weight;
2044 udest->u_threshold = udest_compat->u_threshold;
2045 udest->l_threshold = udest_compat->l_threshold;
2046}
2047
Linus Torvalds1da177e2005-04-16 15:20:36 -07002048static int
2049do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
2050{
2051 int ret;
2052 unsigned char arg[MAX_ARG_LEN];
Julius Volzc860c6b2008-09-02 15:55:36 +02002053 struct ip_vs_service_user *usvc_compat;
2054 struct ip_vs_service_user_kern usvc;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002055 struct ip_vs_service *svc;
Julius Volzc860c6b2008-09-02 15:55:36 +02002056 struct ip_vs_dest_user *udest_compat;
2057 struct ip_vs_dest_user_kern udest;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002058
2059 if (!capable(CAP_NET_ADMIN))
2060 return -EPERM;
2061
2062 if (len != set_arglen[SET_CMDID(cmd)]) {
2063 IP_VS_ERR("set_ctl: len %u != %u\n",
2064 len, set_arglen[SET_CMDID(cmd)]);
2065 return -EINVAL;
2066 }
2067
2068 if (copy_from_user(arg, user, len) != 0)
2069 return -EFAULT;
2070
2071 /* increase the module use count */
2072 ip_vs_use_count_inc();
2073
Ingo Molnar14cc3e22006-03-26 01:37:14 -08002074 if (mutex_lock_interruptible(&__ip_vs_mutex)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002075 ret = -ERESTARTSYS;
2076 goto out_dec;
2077 }
2078
2079 if (cmd == IP_VS_SO_SET_FLUSH) {
2080 /* Flush the virtual service */
2081 ret = ip_vs_flush();
2082 goto out_unlock;
2083 } else if (cmd == IP_VS_SO_SET_TIMEOUT) {
2084 /* Set timeout values for (tcp tcpfin udp) */
2085 ret = ip_vs_set_timeout((struct ip_vs_timeout_user *)arg);
2086 goto out_unlock;
2087 } else if (cmd == IP_VS_SO_SET_STARTDAEMON) {
2088 struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
2089 ret = start_sync_thread(dm->state, dm->mcast_ifn, dm->syncid);
2090 goto out_unlock;
2091 } else if (cmd == IP_VS_SO_SET_STOPDAEMON) {
2092 struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
2093 ret = stop_sync_thread(dm->state);
2094 goto out_unlock;
2095 }
2096
Julius Volzc860c6b2008-09-02 15:55:36 +02002097 usvc_compat = (struct ip_vs_service_user *)arg;
2098 udest_compat = (struct ip_vs_dest_user *)(usvc_compat + 1);
2099
2100 /* We only use the new structs internally, so copy userspace compat
2101 * structs to extended internal versions */
2102 ip_vs_copy_usvc_compat(&usvc, usvc_compat);
2103 ip_vs_copy_udest_compat(&udest, udest_compat);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002104
2105 if (cmd == IP_VS_SO_SET_ZERO) {
2106 /* if no service address is set, zero counters in all */
Julius Volzc860c6b2008-09-02 15:55:36 +02002107 if (!usvc.fwmark && !usvc.addr.ip && !usvc.port) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002108 ret = ip_vs_zero_all();
2109 goto out_unlock;
2110 }
2111 }
2112
2113 /* Check for valid protocol: TCP or UDP, even for fwmark!=0 */
Julius Volzc860c6b2008-09-02 15:55:36 +02002114 if (usvc.protocol != IPPROTO_TCP && usvc.protocol != IPPROTO_UDP) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002115 IP_VS_ERR("set_ctl: invalid protocol: %d %d.%d.%d.%d:%d %s\n",
Julius Volzc860c6b2008-09-02 15:55:36 +02002116 usvc.protocol, NIPQUAD(usvc.addr.ip),
2117 ntohs(usvc.port), usvc.sched_name);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002118 ret = -EFAULT;
2119 goto out_unlock;
2120 }
2121
2122 /* Lookup the exact service by <protocol, addr, port> or fwmark */
Julius Volzc860c6b2008-09-02 15:55:36 +02002123 if (usvc.fwmark == 0)
Julius Volzb18610d2008-09-02 15:55:37 +02002124 svc = __ip_vs_service_get(usvc.af, usvc.protocol,
2125 &usvc.addr, usvc.port);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002126 else
Julius Volzb18610d2008-09-02 15:55:37 +02002127 svc = __ip_vs_svc_fwm_get(usvc.af, usvc.fwmark);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002128
2129 if (cmd != IP_VS_SO_SET_ADD
Julius Volzc860c6b2008-09-02 15:55:36 +02002130 && (svc == NULL || svc->protocol != usvc.protocol)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002131 ret = -ESRCH;
2132 goto out_unlock;
2133 }
2134
2135 switch (cmd) {
2136 case IP_VS_SO_SET_ADD:
2137 if (svc != NULL)
2138 ret = -EEXIST;
2139 else
Julius Volzc860c6b2008-09-02 15:55:36 +02002140 ret = ip_vs_add_service(&usvc, &svc);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002141 break;
2142 case IP_VS_SO_SET_EDIT:
Julius Volzc860c6b2008-09-02 15:55:36 +02002143 ret = ip_vs_edit_service(svc, &usvc);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002144 break;
2145 case IP_VS_SO_SET_DEL:
2146 ret = ip_vs_del_service(svc);
2147 if (!ret)
2148 goto out_unlock;
2149 break;
2150 case IP_VS_SO_SET_ZERO:
2151 ret = ip_vs_zero_service(svc);
2152 break;
2153 case IP_VS_SO_SET_ADDDEST:
Julius Volzc860c6b2008-09-02 15:55:36 +02002154 ret = ip_vs_add_dest(svc, &udest);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002155 break;
2156 case IP_VS_SO_SET_EDITDEST:
Julius Volzc860c6b2008-09-02 15:55:36 +02002157 ret = ip_vs_edit_dest(svc, &udest);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002158 break;
2159 case IP_VS_SO_SET_DELDEST:
Julius Volzc860c6b2008-09-02 15:55:36 +02002160 ret = ip_vs_del_dest(svc, &udest);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002161 break;
2162 default:
2163 ret = -EINVAL;
2164 }
2165
2166 if (svc)
2167 ip_vs_service_put(svc);
2168
2169 out_unlock:
Ingo Molnar14cc3e22006-03-26 01:37:14 -08002170 mutex_unlock(&__ip_vs_mutex);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002171 out_dec:
2172 /* decrease the module use count */
2173 ip_vs_use_count_dec();
2174
2175 return ret;
2176}
2177
2178
2179static void
2180ip_vs_copy_stats(struct ip_vs_stats_user *dst, struct ip_vs_stats *src)
2181{
2182 spin_lock_bh(&src->lock);
2183 memcpy(dst, src, (char*)&src->lock - (char*)src);
2184 spin_unlock_bh(&src->lock);
2185}
2186
2187static void
2188ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src)
2189{
2190 dst->protocol = src->protocol;
Julius Volze7ade462008-09-02 15:55:33 +02002191 dst->addr = src->addr.ip;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002192 dst->port = src->port;
2193 dst->fwmark = src->fwmark;
pageexec4da62fc2005-06-26 16:00:19 -07002194 strlcpy(dst->sched_name, src->scheduler->name, sizeof(dst->sched_name));
Linus Torvalds1da177e2005-04-16 15:20:36 -07002195 dst->flags = src->flags;
2196 dst->timeout = src->timeout / HZ;
2197 dst->netmask = src->netmask;
2198 dst->num_dests = src->num_dests;
2199 ip_vs_copy_stats(&dst->stats, &src->stats);
2200}
2201
2202static inline int
2203__ip_vs_get_service_entries(const struct ip_vs_get_services *get,
2204 struct ip_vs_get_services __user *uptr)
2205{
2206 int idx, count=0;
2207 struct ip_vs_service *svc;
2208 struct ip_vs_service_entry entry;
2209 int ret = 0;
2210
2211 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2212 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
2213 if (count >= get->num_services)
2214 goto out;
pageexec4da62fc2005-06-26 16:00:19 -07002215 memset(&entry, 0, sizeof(entry));
Linus Torvalds1da177e2005-04-16 15:20:36 -07002216 ip_vs_copy_service(&entry, svc);
2217 if (copy_to_user(&uptr->entrytable[count],
2218 &entry, sizeof(entry))) {
2219 ret = -EFAULT;
2220 goto out;
2221 }
2222 count++;
2223 }
2224 }
2225
2226 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2227 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
2228 if (count >= get->num_services)
2229 goto out;
pageexec4da62fc2005-06-26 16:00:19 -07002230 memset(&entry, 0, sizeof(entry));
Linus Torvalds1da177e2005-04-16 15:20:36 -07002231 ip_vs_copy_service(&entry, svc);
2232 if (copy_to_user(&uptr->entrytable[count],
2233 &entry, sizeof(entry))) {
2234 ret = -EFAULT;
2235 goto out;
2236 }
2237 count++;
2238 }
2239 }
2240 out:
2241 return ret;
2242}
2243
2244static inline int
2245__ip_vs_get_dest_entries(const struct ip_vs_get_dests *get,
2246 struct ip_vs_get_dests __user *uptr)
2247{
2248 struct ip_vs_service *svc;
Julius Volzb18610d2008-09-02 15:55:37 +02002249 union nf_inet_addr addr = { .ip = get->addr };
Linus Torvalds1da177e2005-04-16 15:20:36 -07002250 int ret = 0;
2251
2252 if (get->fwmark)
Julius Volzb18610d2008-09-02 15:55:37 +02002253 svc = __ip_vs_svc_fwm_get(AF_INET, get->fwmark);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002254 else
Julius Volzb18610d2008-09-02 15:55:37 +02002255 svc = __ip_vs_service_get(AF_INET, get->protocol, &addr,
2256 get->port);
2257
Linus Torvalds1da177e2005-04-16 15:20:36 -07002258 if (svc) {
2259 int count = 0;
2260 struct ip_vs_dest *dest;
2261 struct ip_vs_dest_entry entry;
2262
2263 list_for_each_entry(dest, &svc->destinations, n_list) {
2264 if (count >= get->num_dests)
2265 break;
2266
Julius Volze7ade462008-09-02 15:55:33 +02002267 entry.addr = dest->addr.ip;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002268 entry.port = dest->port;
2269 entry.conn_flags = atomic_read(&dest->conn_flags);
2270 entry.weight = atomic_read(&dest->weight);
2271 entry.u_threshold = dest->u_threshold;
2272 entry.l_threshold = dest->l_threshold;
2273 entry.activeconns = atomic_read(&dest->activeconns);
2274 entry.inactconns = atomic_read(&dest->inactconns);
2275 entry.persistconns = atomic_read(&dest->persistconns);
2276 ip_vs_copy_stats(&entry.stats, &dest->stats);
2277 if (copy_to_user(&uptr->entrytable[count],
2278 &entry, sizeof(entry))) {
2279 ret = -EFAULT;
2280 break;
2281 }
2282 count++;
2283 }
2284 ip_vs_service_put(svc);
2285 } else
2286 ret = -ESRCH;
2287 return ret;
2288}
2289
2290static inline void
2291__ip_vs_get_timeouts(struct ip_vs_timeout_user *u)
2292{
2293#ifdef CONFIG_IP_VS_PROTO_TCP
2294 u->tcp_timeout =
2295 ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_ESTABLISHED] / HZ;
2296 u->tcp_fin_timeout =
2297 ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_FIN_WAIT] / HZ;
2298#endif
2299#ifdef CONFIG_IP_VS_PROTO_UDP
2300 u->udp_timeout =
2301 ip_vs_protocol_udp.timeout_table[IP_VS_UDP_S_NORMAL] / HZ;
2302#endif
2303}
2304
2305
2306#define GET_CMDID(cmd) (cmd - IP_VS_BASE_CTL)
2307#define GET_INFO_ARG_LEN (sizeof(struct ip_vs_getinfo))
2308#define GET_SERVICES_ARG_LEN (sizeof(struct ip_vs_get_services))
2309#define GET_SERVICE_ARG_LEN (sizeof(struct ip_vs_service_entry))
2310#define GET_DESTS_ARG_LEN (sizeof(struct ip_vs_get_dests))
2311#define GET_TIMEOUT_ARG_LEN (sizeof(struct ip_vs_timeout_user))
2312#define GET_DAEMON_ARG_LEN (sizeof(struct ip_vs_daemon_user) * 2)
2313
Arjan van de Ven9b5b5cf2005-11-29 16:21:38 -08002314static const unsigned char get_arglen[GET_CMDID(IP_VS_SO_GET_MAX)+1] = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002315 [GET_CMDID(IP_VS_SO_GET_VERSION)] = 64,
2316 [GET_CMDID(IP_VS_SO_GET_INFO)] = GET_INFO_ARG_LEN,
2317 [GET_CMDID(IP_VS_SO_GET_SERVICES)] = GET_SERVICES_ARG_LEN,
2318 [GET_CMDID(IP_VS_SO_GET_SERVICE)] = GET_SERVICE_ARG_LEN,
2319 [GET_CMDID(IP_VS_SO_GET_DESTS)] = GET_DESTS_ARG_LEN,
2320 [GET_CMDID(IP_VS_SO_GET_TIMEOUT)] = GET_TIMEOUT_ARG_LEN,
2321 [GET_CMDID(IP_VS_SO_GET_DAEMON)] = GET_DAEMON_ARG_LEN,
2322};
2323
2324static int
2325do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
2326{
2327 unsigned char arg[128];
2328 int ret = 0;
2329
2330 if (!capable(CAP_NET_ADMIN))
2331 return -EPERM;
2332
2333 if (*len < get_arglen[GET_CMDID(cmd)]) {
2334 IP_VS_ERR("get_ctl: len %u < %u\n",
2335 *len, get_arglen[GET_CMDID(cmd)]);
2336 return -EINVAL;
2337 }
2338
2339 if (copy_from_user(arg, user, get_arglen[GET_CMDID(cmd)]) != 0)
2340 return -EFAULT;
2341
Ingo Molnar14cc3e22006-03-26 01:37:14 -08002342 if (mutex_lock_interruptible(&__ip_vs_mutex))
Linus Torvalds1da177e2005-04-16 15:20:36 -07002343 return -ERESTARTSYS;
2344
2345 switch (cmd) {
2346 case IP_VS_SO_GET_VERSION:
2347 {
2348 char buf[64];
2349
2350 sprintf(buf, "IP Virtual Server version %d.%d.%d (size=%d)",
2351 NVERSION(IP_VS_VERSION_CODE), IP_VS_CONN_TAB_SIZE);
2352 if (copy_to_user(user, buf, strlen(buf)+1) != 0) {
2353 ret = -EFAULT;
2354 goto out;
2355 }
2356 *len = strlen(buf)+1;
2357 }
2358 break;
2359
2360 case IP_VS_SO_GET_INFO:
2361 {
2362 struct ip_vs_getinfo info;
2363 info.version = IP_VS_VERSION_CODE;
2364 info.size = IP_VS_CONN_TAB_SIZE;
2365 info.num_services = ip_vs_num_services;
2366 if (copy_to_user(user, &info, sizeof(info)) != 0)
2367 ret = -EFAULT;
2368 }
2369 break;
2370
2371 case IP_VS_SO_GET_SERVICES:
2372 {
2373 struct ip_vs_get_services *get;
2374 int size;
2375
2376 get = (struct ip_vs_get_services *)arg;
2377 size = sizeof(*get) +
2378 sizeof(struct ip_vs_service_entry) * get->num_services;
2379 if (*len != size) {
2380 IP_VS_ERR("length: %u != %u\n", *len, size);
2381 ret = -EINVAL;
2382 goto out;
2383 }
2384 ret = __ip_vs_get_service_entries(get, user);
2385 }
2386 break;
2387
2388 case IP_VS_SO_GET_SERVICE:
2389 {
2390 struct ip_vs_service_entry *entry;
2391 struct ip_vs_service *svc;
Julius Volzb18610d2008-09-02 15:55:37 +02002392 union nf_inet_addr addr;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002393
2394 entry = (struct ip_vs_service_entry *)arg;
Julius Volzb18610d2008-09-02 15:55:37 +02002395 addr.ip = entry->addr;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002396 if (entry->fwmark)
Julius Volzb18610d2008-09-02 15:55:37 +02002397 svc = __ip_vs_svc_fwm_get(AF_INET, entry->fwmark);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002398 else
Julius Volzb18610d2008-09-02 15:55:37 +02002399 svc = __ip_vs_service_get(AF_INET, entry->protocol,
2400 &addr, entry->port);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002401 if (svc) {
2402 ip_vs_copy_service(entry, svc);
2403 if (copy_to_user(user, entry, sizeof(*entry)) != 0)
2404 ret = -EFAULT;
2405 ip_vs_service_put(svc);
2406 } else
2407 ret = -ESRCH;
2408 }
2409 break;
2410
2411 case IP_VS_SO_GET_DESTS:
2412 {
2413 struct ip_vs_get_dests *get;
2414 int size;
2415
2416 get = (struct ip_vs_get_dests *)arg;
2417 size = sizeof(*get) +
2418 sizeof(struct ip_vs_dest_entry) * get->num_dests;
2419 if (*len != size) {
2420 IP_VS_ERR("length: %u != %u\n", *len, size);
2421 ret = -EINVAL;
2422 goto out;
2423 }
2424 ret = __ip_vs_get_dest_entries(get, user);
2425 }
2426 break;
2427
2428 case IP_VS_SO_GET_TIMEOUT:
2429 {
2430 struct ip_vs_timeout_user t;
2431
2432 __ip_vs_get_timeouts(&t);
2433 if (copy_to_user(user, &t, sizeof(t)) != 0)
2434 ret = -EFAULT;
2435 }
2436 break;
2437
2438 case IP_VS_SO_GET_DAEMON:
2439 {
2440 struct ip_vs_daemon_user d[2];
2441
2442 memset(&d, 0, sizeof(d));
2443 if (ip_vs_sync_state & IP_VS_STATE_MASTER) {
2444 d[0].state = IP_VS_STATE_MASTER;
pageexec4da62fc2005-06-26 16:00:19 -07002445 strlcpy(d[0].mcast_ifn, ip_vs_master_mcast_ifn, sizeof(d[0].mcast_ifn));
Linus Torvalds1da177e2005-04-16 15:20:36 -07002446 d[0].syncid = ip_vs_master_syncid;
2447 }
2448 if (ip_vs_sync_state & IP_VS_STATE_BACKUP) {
2449 d[1].state = IP_VS_STATE_BACKUP;
pageexec4da62fc2005-06-26 16:00:19 -07002450 strlcpy(d[1].mcast_ifn, ip_vs_backup_mcast_ifn, sizeof(d[1].mcast_ifn));
Linus Torvalds1da177e2005-04-16 15:20:36 -07002451 d[1].syncid = ip_vs_backup_syncid;
2452 }
2453 if (copy_to_user(user, &d, sizeof(d)) != 0)
2454 ret = -EFAULT;
2455 }
2456 break;
2457
2458 default:
2459 ret = -EINVAL;
2460 }
2461
2462 out:
Ingo Molnar14cc3e22006-03-26 01:37:14 -08002463 mutex_unlock(&__ip_vs_mutex);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002464 return ret;
2465}
2466
2467
2468static struct nf_sockopt_ops ip_vs_sockopts = {
2469 .pf = PF_INET,
2470 .set_optmin = IP_VS_BASE_CTL,
2471 .set_optmax = IP_VS_SO_SET_MAX+1,
2472 .set = do_ip_vs_set_ctl,
2473 .get_optmin = IP_VS_BASE_CTL,
2474 .get_optmax = IP_VS_SO_GET_MAX+1,
2475 .get = do_ip_vs_get_ctl,
Neil Horman16fcec32007-09-11 11:28:26 +02002476 .owner = THIS_MODULE,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002477};
2478
Julius Volz9a812192008-08-14 14:08:44 +02002479/*
2480 * Generic Netlink interface
2481 */
2482
2483/* IPVS genetlink family */
2484static struct genl_family ip_vs_genl_family = {
2485 .id = GENL_ID_GENERATE,
2486 .hdrsize = 0,
2487 .name = IPVS_GENL_NAME,
2488 .version = IPVS_GENL_VERSION,
2489 .maxattr = IPVS_CMD_MAX,
2490};
2491
2492/* Policy used for first-level command attributes */
2493static const struct nla_policy ip_vs_cmd_policy[IPVS_CMD_ATTR_MAX + 1] = {
2494 [IPVS_CMD_ATTR_SERVICE] = { .type = NLA_NESTED },
2495 [IPVS_CMD_ATTR_DEST] = { .type = NLA_NESTED },
2496 [IPVS_CMD_ATTR_DAEMON] = { .type = NLA_NESTED },
2497 [IPVS_CMD_ATTR_TIMEOUT_TCP] = { .type = NLA_U32 },
2498 [IPVS_CMD_ATTR_TIMEOUT_TCP_FIN] = { .type = NLA_U32 },
2499 [IPVS_CMD_ATTR_TIMEOUT_UDP] = { .type = NLA_U32 },
2500};
2501
2502/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DAEMON */
2503static const struct nla_policy ip_vs_daemon_policy[IPVS_DAEMON_ATTR_MAX + 1] = {
2504 [IPVS_DAEMON_ATTR_STATE] = { .type = NLA_U32 },
2505 [IPVS_DAEMON_ATTR_MCAST_IFN] = { .type = NLA_NUL_STRING,
2506 .len = IP_VS_IFNAME_MAXLEN },
2507 [IPVS_DAEMON_ATTR_SYNC_ID] = { .type = NLA_U32 },
2508};
2509
2510/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_SERVICE */
2511static const struct nla_policy ip_vs_svc_policy[IPVS_SVC_ATTR_MAX + 1] = {
2512 [IPVS_SVC_ATTR_AF] = { .type = NLA_U16 },
2513 [IPVS_SVC_ATTR_PROTOCOL] = { .type = NLA_U16 },
2514 [IPVS_SVC_ATTR_ADDR] = { .type = NLA_BINARY,
2515 .len = sizeof(union nf_inet_addr) },
2516 [IPVS_SVC_ATTR_PORT] = { .type = NLA_U16 },
2517 [IPVS_SVC_ATTR_FWMARK] = { .type = NLA_U32 },
2518 [IPVS_SVC_ATTR_SCHED_NAME] = { .type = NLA_NUL_STRING,
2519 .len = IP_VS_SCHEDNAME_MAXLEN },
2520 [IPVS_SVC_ATTR_FLAGS] = { .type = NLA_BINARY,
2521 .len = sizeof(struct ip_vs_flags) },
2522 [IPVS_SVC_ATTR_TIMEOUT] = { .type = NLA_U32 },
2523 [IPVS_SVC_ATTR_NETMASK] = { .type = NLA_U32 },
2524 [IPVS_SVC_ATTR_STATS] = { .type = NLA_NESTED },
2525};
2526
2527/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DEST */
2528static const struct nla_policy ip_vs_dest_policy[IPVS_DEST_ATTR_MAX + 1] = {
2529 [IPVS_DEST_ATTR_ADDR] = { .type = NLA_BINARY,
2530 .len = sizeof(union nf_inet_addr) },
2531 [IPVS_DEST_ATTR_PORT] = { .type = NLA_U16 },
2532 [IPVS_DEST_ATTR_FWD_METHOD] = { .type = NLA_U32 },
2533 [IPVS_DEST_ATTR_WEIGHT] = { .type = NLA_U32 },
2534 [IPVS_DEST_ATTR_U_THRESH] = { .type = NLA_U32 },
2535 [IPVS_DEST_ATTR_L_THRESH] = { .type = NLA_U32 },
2536 [IPVS_DEST_ATTR_ACTIVE_CONNS] = { .type = NLA_U32 },
2537 [IPVS_DEST_ATTR_INACT_CONNS] = { .type = NLA_U32 },
2538 [IPVS_DEST_ATTR_PERSIST_CONNS] = { .type = NLA_U32 },
2539 [IPVS_DEST_ATTR_STATS] = { .type = NLA_NESTED },
2540};
2541
2542static int ip_vs_genl_fill_stats(struct sk_buff *skb, int container_type,
2543 struct ip_vs_stats *stats)
2544{
2545 struct nlattr *nl_stats = nla_nest_start(skb, container_type);
2546 if (!nl_stats)
2547 return -EMSGSIZE;
2548
2549 spin_lock_bh(&stats->lock);
2550
2551 NLA_PUT_U32(skb, IPVS_STATS_ATTR_CONNS, stats->conns);
2552 NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPKTS, stats->inpkts);
2553 NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPKTS, stats->outpkts);
2554 NLA_PUT_U64(skb, IPVS_STATS_ATTR_INBYTES, stats->inbytes);
2555 NLA_PUT_U64(skb, IPVS_STATS_ATTR_OUTBYTES, stats->outbytes);
2556 NLA_PUT_U32(skb, IPVS_STATS_ATTR_CPS, stats->cps);
2557 NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPPS, stats->inpps);
2558 NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPPS, stats->outpps);
2559 NLA_PUT_U32(skb, IPVS_STATS_ATTR_INBPS, stats->inbps);
2560 NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTBPS, stats->outbps);
2561
2562 spin_unlock_bh(&stats->lock);
2563
2564 nla_nest_end(skb, nl_stats);
2565
2566 return 0;
2567
2568nla_put_failure:
2569 spin_unlock_bh(&stats->lock);
2570 nla_nest_cancel(skb, nl_stats);
2571 return -EMSGSIZE;
2572}
2573
2574static int ip_vs_genl_fill_service(struct sk_buff *skb,
2575 struct ip_vs_service *svc)
2576{
2577 struct nlattr *nl_service;
2578 struct ip_vs_flags flags = { .flags = svc->flags,
2579 .mask = ~0 };
2580
2581 nl_service = nla_nest_start(skb, IPVS_CMD_ATTR_SERVICE);
2582 if (!nl_service)
2583 return -EMSGSIZE;
2584
2585 NLA_PUT_U16(skb, IPVS_SVC_ATTR_AF, AF_INET);
2586
2587 if (svc->fwmark) {
2588 NLA_PUT_U32(skb, IPVS_SVC_ATTR_FWMARK, svc->fwmark);
2589 } else {
2590 NLA_PUT_U16(skb, IPVS_SVC_ATTR_PROTOCOL, svc->protocol);
2591 NLA_PUT(skb, IPVS_SVC_ATTR_ADDR, sizeof(svc->addr), &svc->addr);
2592 NLA_PUT_U16(skb, IPVS_SVC_ATTR_PORT, svc->port);
2593 }
2594
2595 NLA_PUT_STRING(skb, IPVS_SVC_ATTR_SCHED_NAME, svc->scheduler->name);
2596 NLA_PUT(skb, IPVS_SVC_ATTR_FLAGS, sizeof(flags), &flags);
2597 NLA_PUT_U32(skb, IPVS_SVC_ATTR_TIMEOUT, svc->timeout / HZ);
2598 NLA_PUT_U32(skb, IPVS_SVC_ATTR_NETMASK, svc->netmask);
2599
2600 if (ip_vs_genl_fill_stats(skb, IPVS_SVC_ATTR_STATS, &svc->stats))
2601 goto nla_put_failure;
2602
2603 nla_nest_end(skb, nl_service);
2604
2605 return 0;
2606
2607nla_put_failure:
2608 nla_nest_cancel(skb, nl_service);
2609 return -EMSGSIZE;
2610}
2611
2612static int ip_vs_genl_dump_service(struct sk_buff *skb,
2613 struct ip_vs_service *svc,
2614 struct netlink_callback *cb)
2615{
2616 void *hdr;
2617
2618 hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
2619 &ip_vs_genl_family, NLM_F_MULTI,
2620 IPVS_CMD_NEW_SERVICE);
2621 if (!hdr)
2622 return -EMSGSIZE;
2623
2624 if (ip_vs_genl_fill_service(skb, svc) < 0)
2625 goto nla_put_failure;
2626
2627 return genlmsg_end(skb, hdr);
2628
2629nla_put_failure:
2630 genlmsg_cancel(skb, hdr);
2631 return -EMSGSIZE;
2632}
2633
2634static int ip_vs_genl_dump_services(struct sk_buff *skb,
2635 struct netlink_callback *cb)
2636{
2637 int idx = 0, i;
2638 int start = cb->args[0];
2639 struct ip_vs_service *svc;
2640
2641 mutex_lock(&__ip_vs_mutex);
2642 for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
2643 list_for_each_entry(svc, &ip_vs_svc_table[i], s_list) {
2644 if (++idx <= start)
2645 continue;
2646 if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
2647 idx--;
2648 goto nla_put_failure;
2649 }
2650 }
2651 }
2652
2653 for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
2654 list_for_each_entry(svc, &ip_vs_svc_fwm_table[i], f_list) {
2655 if (++idx <= start)
2656 continue;
2657 if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
2658 idx--;
2659 goto nla_put_failure;
2660 }
2661 }
2662 }
2663
2664nla_put_failure:
2665 mutex_unlock(&__ip_vs_mutex);
2666 cb->args[0] = idx;
2667
2668 return skb->len;
2669}
2670
Julius Volzc860c6b2008-09-02 15:55:36 +02002671static int ip_vs_genl_parse_service(struct ip_vs_service_user_kern *usvc,
Julius Volz9a812192008-08-14 14:08:44 +02002672 struct nlattr *nla, int full_entry)
2673{
2674 struct nlattr *attrs[IPVS_SVC_ATTR_MAX + 1];
2675 struct nlattr *nla_af, *nla_port, *nla_fwmark, *nla_protocol, *nla_addr;
2676
2677 /* Parse mandatory identifying service fields first */
2678 if (nla == NULL ||
2679 nla_parse_nested(attrs, IPVS_SVC_ATTR_MAX, nla, ip_vs_svc_policy))
2680 return -EINVAL;
2681
2682 nla_af = attrs[IPVS_SVC_ATTR_AF];
2683 nla_protocol = attrs[IPVS_SVC_ATTR_PROTOCOL];
2684 nla_addr = attrs[IPVS_SVC_ATTR_ADDR];
2685 nla_port = attrs[IPVS_SVC_ATTR_PORT];
2686 nla_fwmark = attrs[IPVS_SVC_ATTR_FWMARK];
2687
2688 if (!(nla_af && (nla_fwmark || (nla_port && nla_protocol && nla_addr))))
2689 return -EINVAL;
2690
Julius Volzc860c6b2008-09-02 15:55:36 +02002691 usvc->af = nla_get_u16(nla_af);
Julius Volz9a812192008-08-14 14:08:44 +02002692 /* For now, only support IPv4 */
2693 if (nla_get_u16(nla_af) != AF_INET)
2694 return -EAFNOSUPPORT;
2695
2696 if (nla_fwmark) {
2697 usvc->protocol = IPPROTO_TCP;
2698 usvc->fwmark = nla_get_u32(nla_fwmark);
2699 } else {
2700 usvc->protocol = nla_get_u16(nla_protocol);
2701 nla_memcpy(&usvc->addr, nla_addr, sizeof(usvc->addr));
2702 usvc->port = nla_get_u16(nla_port);
2703 usvc->fwmark = 0;
2704 }
2705
2706 /* If a full entry was requested, check for the additional fields */
2707 if (full_entry) {
2708 struct nlattr *nla_sched, *nla_flags, *nla_timeout,
2709 *nla_netmask;
2710 struct ip_vs_flags flags;
2711 struct ip_vs_service *svc;
2712
2713 nla_sched = attrs[IPVS_SVC_ATTR_SCHED_NAME];
2714 nla_flags = attrs[IPVS_SVC_ATTR_FLAGS];
2715 nla_timeout = attrs[IPVS_SVC_ATTR_TIMEOUT];
2716 nla_netmask = attrs[IPVS_SVC_ATTR_NETMASK];
2717
2718 if (!(nla_sched && nla_flags && nla_timeout && nla_netmask))
2719 return -EINVAL;
2720
2721 nla_memcpy(&flags, nla_flags, sizeof(flags));
2722
2723 /* prefill flags from service if it already exists */
2724 if (usvc->fwmark)
Julius Volzb18610d2008-09-02 15:55:37 +02002725 svc = __ip_vs_svc_fwm_get(usvc->af, usvc->fwmark);
Julius Volz9a812192008-08-14 14:08:44 +02002726 else
Julius Volzb18610d2008-09-02 15:55:37 +02002727 svc = __ip_vs_service_get(usvc->af, usvc->protocol,
2728 &usvc->addr, usvc->port);
Julius Volz9a812192008-08-14 14:08:44 +02002729 if (svc) {
2730 usvc->flags = svc->flags;
2731 ip_vs_service_put(svc);
2732 } else
2733 usvc->flags = 0;
2734
2735 /* set new flags from userland */
2736 usvc->flags = (usvc->flags & ~flags.mask) |
2737 (flags.flags & flags.mask);
Julius Volzc860c6b2008-09-02 15:55:36 +02002738 usvc->sched_name = nla_data(nla_sched);
Julius Volz9a812192008-08-14 14:08:44 +02002739 usvc->timeout = nla_get_u32(nla_timeout);
2740 usvc->netmask = nla_get_u32(nla_netmask);
2741 }
2742
2743 return 0;
2744}
2745
2746static struct ip_vs_service *ip_vs_genl_find_service(struct nlattr *nla)
2747{
Julius Volzc860c6b2008-09-02 15:55:36 +02002748 struct ip_vs_service_user_kern usvc;
Julius Volz9a812192008-08-14 14:08:44 +02002749 int ret;
2750
2751 ret = ip_vs_genl_parse_service(&usvc, nla, 0);
2752 if (ret)
2753 return ERR_PTR(ret);
2754
2755 if (usvc.fwmark)
Julius Volzb18610d2008-09-02 15:55:37 +02002756 return __ip_vs_svc_fwm_get(usvc.af, usvc.fwmark);
Julius Volz9a812192008-08-14 14:08:44 +02002757 else
Julius Volzb18610d2008-09-02 15:55:37 +02002758 return __ip_vs_service_get(usvc.af, usvc.protocol,
2759 &usvc.addr, usvc.port);
Julius Volz9a812192008-08-14 14:08:44 +02002760}
2761
2762static int ip_vs_genl_fill_dest(struct sk_buff *skb, struct ip_vs_dest *dest)
2763{
2764 struct nlattr *nl_dest;
2765
2766 nl_dest = nla_nest_start(skb, IPVS_CMD_ATTR_DEST);
2767 if (!nl_dest)
2768 return -EMSGSIZE;
2769
2770 NLA_PUT(skb, IPVS_DEST_ATTR_ADDR, sizeof(dest->addr), &dest->addr);
2771 NLA_PUT_U16(skb, IPVS_DEST_ATTR_PORT, dest->port);
2772
2773 NLA_PUT_U32(skb, IPVS_DEST_ATTR_FWD_METHOD,
2774 atomic_read(&dest->conn_flags) & IP_VS_CONN_F_FWD_MASK);
2775 NLA_PUT_U32(skb, IPVS_DEST_ATTR_WEIGHT, atomic_read(&dest->weight));
2776 NLA_PUT_U32(skb, IPVS_DEST_ATTR_U_THRESH, dest->u_threshold);
2777 NLA_PUT_U32(skb, IPVS_DEST_ATTR_L_THRESH, dest->l_threshold);
2778 NLA_PUT_U32(skb, IPVS_DEST_ATTR_ACTIVE_CONNS,
2779 atomic_read(&dest->activeconns));
2780 NLA_PUT_U32(skb, IPVS_DEST_ATTR_INACT_CONNS,
2781 atomic_read(&dest->inactconns));
2782 NLA_PUT_U32(skb, IPVS_DEST_ATTR_PERSIST_CONNS,
2783 atomic_read(&dest->persistconns));
2784
2785 if (ip_vs_genl_fill_stats(skb, IPVS_DEST_ATTR_STATS, &dest->stats))
2786 goto nla_put_failure;
2787
2788 nla_nest_end(skb, nl_dest);
2789
2790 return 0;
2791
2792nla_put_failure:
2793 nla_nest_cancel(skb, nl_dest);
2794 return -EMSGSIZE;
2795}
2796
2797static int ip_vs_genl_dump_dest(struct sk_buff *skb, struct ip_vs_dest *dest,
2798 struct netlink_callback *cb)
2799{
2800 void *hdr;
2801
2802 hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
2803 &ip_vs_genl_family, NLM_F_MULTI,
2804 IPVS_CMD_NEW_DEST);
2805 if (!hdr)
2806 return -EMSGSIZE;
2807
2808 if (ip_vs_genl_fill_dest(skb, dest) < 0)
2809 goto nla_put_failure;
2810
2811 return genlmsg_end(skb, hdr);
2812
2813nla_put_failure:
2814 genlmsg_cancel(skb, hdr);
2815 return -EMSGSIZE;
2816}
2817
2818static int ip_vs_genl_dump_dests(struct sk_buff *skb,
2819 struct netlink_callback *cb)
2820{
2821 int idx = 0;
2822 int start = cb->args[0];
2823 struct ip_vs_service *svc;
2824 struct ip_vs_dest *dest;
2825 struct nlattr *attrs[IPVS_CMD_ATTR_MAX + 1];
2826
2827 mutex_lock(&__ip_vs_mutex);
2828
2829 /* Try to find the service for which to dump destinations */
2830 if (nlmsg_parse(cb->nlh, GENL_HDRLEN, attrs,
2831 IPVS_CMD_ATTR_MAX, ip_vs_cmd_policy))
2832 goto out_err;
2833
2834 svc = ip_vs_genl_find_service(attrs[IPVS_CMD_ATTR_SERVICE]);
2835 if (IS_ERR(svc) || svc == NULL)
2836 goto out_err;
2837
2838 /* Dump the destinations */
2839 list_for_each_entry(dest, &svc->destinations, n_list) {
2840 if (++idx <= start)
2841 continue;
2842 if (ip_vs_genl_dump_dest(skb, dest, cb) < 0) {
2843 idx--;
2844 goto nla_put_failure;
2845 }
2846 }
2847
2848nla_put_failure:
2849 cb->args[0] = idx;
2850 ip_vs_service_put(svc);
2851
2852out_err:
2853 mutex_unlock(&__ip_vs_mutex);
2854
2855 return skb->len;
2856}
2857
Julius Volzc860c6b2008-09-02 15:55:36 +02002858static int ip_vs_genl_parse_dest(struct ip_vs_dest_user_kern *udest,
Julius Volz9a812192008-08-14 14:08:44 +02002859 struct nlattr *nla, int full_entry)
2860{
2861 struct nlattr *attrs[IPVS_DEST_ATTR_MAX + 1];
2862 struct nlattr *nla_addr, *nla_port;
2863
2864 /* Parse mandatory identifying destination fields first */
2865 if (nla == NULL ||
2866 nla_parse_nested(attrs, IPVS_DEST_ATTR_MAX, nla, ip_vs_dest_policy))
2867 return -EINVAL;
2868
2869 nla_addr = attrs[IPVS_DEST_ATTR_ADDR];
2870 nla_port = attrs[IPVS_DEST_ATTR_PORT];
2871
2872 if (!(nla_addr && nla_port))
2873 return -EINVAL;
2874
2875 nla_memcpy(&udest->addr, nla_addr, sizeof(udest->addr));
2876 udest->port = nla_get_u16(nla_port);
2877
2878 /* If a full entry was requested, check for the additional fields */
2879 if (full_entry) {
2880 struct nlattr *nla_fwd, *nla_weight, *nla_u_thresh,
2881 *nla_l_thresh;
2882
2883 nla_fwd = attrs[IPVS_DEST_ATTR_FWD_METHOD];
2884 nla_weight = attrs[IPVS_DEST_ATTR_WEIGHT];
2885 nla_u_thresh = attrs[IPVS_DEST_ATTR_U_THRESH];
2886 nla_l_thresh = attrs[IPVS_DEST_ATTR_L_THRESH];
2887
2888 if (!(nla_fwd && nla_weight && nla_u_thresh && nla_l_thresh))
2889 return -EINVAL;
2890
2891 udest->conn_flags = nla_get_u32(nla_fwd)
2892 & IP_VS_CONN_F_FWD_MASK;
2893 udest->weight = nla_get_u32(nla_weight);
2894 udest->u_threshold = nla_get_u32(nla_u_thresh);
2895 udest->l_threshold = nla_get_u32(nla_l_thresh);
2896 }
2897
2898 return 0;
2899}
2900
2901static int ip_vs_genl_fill_daemon(struct sk_buff *skb, __be32 state,
2902 const char *mcast_ifn, __be32 syncid)
2903{
2904 struct nlattr *nl_daemon;
2905
2906 nl_daemon = nla_nest_start(skb, IPVS_CMD_ATTR_DAEMON);
2907 if (!nl_daemon)
2908 return -EMSGSIZE;
2909
2910 NLA_PUT_U32(skb, IPVS_DAEMON_ATTR_STATE, state);
2911 NLA_PUT_STRING(skb, IPVS_DAEMON_ATTR_MCAST_IFN, mcast_ifn);
2912 NLA_PUT_U32(skb, IPVS_DAEMON_ATTR_SYNC_ID, syncid);
2913
2914 nla_nest_end(skb, nl_daemon);
2915
2916 return 0;
2917
2918nla_put_failure:
2919 nla_nest_cancel(skb, nl_daemon);
2920 return -EMSGSIZE;
2921}
2922
2923static int ip_vs_genl_dump_daemon(struct sk_buff *skb, __be32 state,
2924 const char *mcast_ifn, __be32 syncid,
2925 struct netlink_callback *cb)
2926{
2927 void *hdr;
2928 hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
2929 &ip_vs_genl_family, NLM_F_MULTI,
2930 IPVS_CMD_NEW_DAEMON);
2931 if (!hdr)
2932 return -EMSGSIZE;
2933
2934 if (ip_vs_genl_fill_daemon(skb, state, mcast_ifn, syncid))
2935 goto nla_put_failure;
2936
2937 return genlmsg_end(skb, hdr);
2938
2939nla_put_failure:
2940 genlmsg_cancel(skb, hdr);
2941 return -EMSGSIZE;
2942}
2943
2944static int ip_vs_genl_dump_daemons(struct sk_buff *skb,
2945 struct netlink_callback *cb)
2946{
2947 mutex_lock(&__ip_vs_mutex);
2948 if ((ip_vs_sync_state & IP_VS_STATE_MASTER) && !cb->args[0]) {
2949 if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_MASTER,
2950 ip_vs_master_mcast_ifn,
2951 ip_vs_master_syncid, cb) < 0)
2952 goto nla_put_failure;
2953
2954 cb->args[0] = 1;
2955 }
2956
2957 if ((ip_vs_sync_state & IP_VS_STATE_BACKUP) && !cb->args[1]) {
2958 if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_BACKUP,
2959 ip_vs_backup_mcast_ifn,
2960 ip_vs_backup_syncid, cb) < 0)
2961 goto nla_put_failure;
2962
2963 cb->args[1] = 1;
2964 }
2965
2966nla_put_failure:
2967 mutex_unlock(&__ip_vs_mutex);
2968
2969 return skb->len;
2970}
2971
2972static int ip_vs_genl_new_daemon(struct nlattr **attrs)
2973{
2974 if (!(attrs[IPVS_DAEMON_ATTR_STATE] &&
2975 attrs[IPVS_DAEMON_ATTR_MCAST_IFN] &&
2976 attrs[IPVS_DAEMON_ATTR_SYNC_ID]))
2977 return -EINVAL;
2978
2979 return start_sync_thread(nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]),
2980 nla_data(attrs[IPVS_DAEMON_ATTR_MCAST_IFN]),
2981 nla_get_u32(attrs[IPVS_DAEMON_ATTR_SYNC_ID]));
2982}
2983
2984static int ip_vs_genl_del_daemon(struct nlattr **attrs)
2985{
2986 if (!attrs[IPVS_DAEMON_ATTR_STATE])
2987 return -EINVAL;
2988
2989 return stop_sync_thread(nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]));
2990}
2991
2992static int ip_vs_genl_set_config(struct nlattr **attrs)
2993{
2994 struct ip_vs_timeout_user t;
2995
2996 __ip_vs_get_timeouts(&t);
2997
2998 if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP])
2999 t.tcp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP]);
3000
3001 if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN])
3002 t.tcp_fin_timeout =
3003 nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN]);
3004
3005 if (attrs[IPVS_CMD_ATTR_TIMEOUT_UDP])
3006 t.udp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_UDP]);
3007
3008 return ip_vs_set_timeout(&t);
3009}
3010
3011static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
3012{
3013 struct ip_vs_service *svc = NULL;
Julius Volzc860c6b2008-09-02 15:55:36 +02003014 struct ip_vs_service_user_kern usvc;
3015 struct ip_vs_dest_user_kern udest;
Julius Volz9a812192008-08-14 14:08:44 +02003016 int ret = 0, cmd;
3017 int need_full_svc = 0, need_full_dest = 0;
3018
3019 cmd = info->genlhdr->cmd;
3020
3021 mutex_lock(&__ip_vs_mutex);
3022
3023 if (cmd == IPVS_CMD_FLUSH) {
3024 ret = ip_vs_flush();
3025 goto out;
3026 } else if (cmd == IPVS_CMD_SET_CONFIG) {
3027 ret = ip_vs_genl_set_config(info->attrs);
3028 goto out;
3029 } else if (cmd == IPVS_CMD_NEW_DAEMON ||
3030 cmd == IPVS_CMD_DEL_DAEMON) {
3031
3032 struct nlattr *daemon_attrs[IPVS_DAEMON_ATTR_MAX + 1];
3033
3034 if (!info->attrs[IPVS_CMD_ATTR_DAEMON] ||
3035 nla_parse_nested(daemon_attrs, IPVS_DAEMON_ATTR_MAX,
3036 info->attrs[IPVS_CMD_ATTR_DAEMON],
3037 ip_vs_daemon_policy)) {
3038 ret = -EINVAL;
3039 goto out;
3040 }
3041
3042 if (cmd == IPVS_CMD_NEW_DAEMON)
3043 ret = ip_vs_genl_new_daemon(daemon_attrs);
3044 else
3045 ret = ip_vs_genl_del_daemon(daemon_attrs);
3046 goto out;
3047 } else if (cmd == IPVS_CMD_ZERO &&
3048 !info->attrs[IPVS_CMD_ATTR_SERVICE]) {
3049 ret = ip_vs_zero_all();
3050 goto out;
3051 }
3052
3053 /* All following commands require a service argument, so check if we
3054 * received a valid one. We need a full service specification when
3055 * adding / editing a service. Only identifying members otherwise. */
3056 if (cmd == IPVS_CMD_NEW_SERVICE || cmd == IPVS_CMD_SET_SERVICE)
3057 need_full_svc = 1;
3058
3059 ret = ip_vs_genl_parse_service(&usvc,
3060 info->attrs[IPVS_CMD_ATTR_SERVICE],
3061 need_full_svc);
3062 if (ret)
3063 goto out;
3064
3065 /* Lookup the exact service by <protocol, addr, port> or fwmark */
3066 if (usvc.fwmark == 0)
Julius Volzb18610d2008-09-02 15:55:37 +02003067 svc = __ip_vs_service_get(usvc.af, usvc.protocol,
3068 &usvc.addr, usvc.port);
Julius Volz9a812192008-08-14 14:08:44 +02003069 else
Julius Volzb18610d2008-09-02 15:55:37 +02003070 svc = __ip_vs_svc_fwm_get(usvc.af, usvc.fwmark);
Julius Volz9a812192008-08-14 14:08:44 +02003071
3072 /* Unless we're adding a new service, the service must already exist */
3073 if ((cmd != IPVS_CMD_NEW_SERVICE) && (svc == NULL)) {
3074 ret = -ESRCH;
3075 goto out;
3076 }
3077
3078 /* Destination commands require a valid destination argument. For
3079 * adding / editing a destination, we need a full destination
3080 * specification. */
3081 if (cmd == IPVS_CMD_NEW_DEST || cmd == IPVS_CMD_SET_DEST ||
3082 cmd == IPVS_CMD_DEL_DEST) {
3083 if (cmd != IPVS_CMD_DEL_DEST)
3084 need_full_dest = 1;
3085
3086 ret = ip_vs_genl_parse_dest(&udest,
3087 info->attrs[IPVS_CMD_ATTR_DEST],
3088 need_full_dest);
3089 if (ret)
3090 goto out;
3091 }
3092
3093 switch (cmd) {
3094 case IPVS_CMD_NEW_SERVICE:
3095 if (svc == NULL)
3096 ret = ip_vs_add_service(&usvc, &svc);
3097 else
3098 ret = -EEXIST;
3099 break;
3100 case IPVS_CMD_SET_SERVICE:
3101 ret = ip_vs_edit_service(svc, &usvc);
3102 break;
3103 case IPVS_CMD_DEL_SERVICE:
3104 ret = ip_vs_del_service(svc);
3105 break;
3106 case IPVS_CMD_NEW_DEST:
3107 ret = ip_vs_add_dest(svc, &udest);
3108 break;
3109 case IPVS_CMD_SET_DEST:
3110 ret = ip_vs_edit_dest(svc, &udest);
3111 break;
3112 case IPVS_CMD_DEL_DEST:
3113 ret = ip_vs_del_dest(svc, &udest);
3114 break;
3115 case IPVS_CMD_ZERO:
3116 ret = ip_vs_zero_service(svc);
3117 break;
3118 default:
3119 ret = -EINVAL;
3120 }
3121
3122out:
3123 if (svc)
3124 ip_vs_service_put(svc);
3125 mutex_unlock(&__ip_vs_mutex);
3126
3127 return ret;
3128}
3129
3130static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info)
3131{
3132 struct sk_buff *msg;
3133 void *reply;
3134 int ret, cmd, reply_cmd;
3135
3136 cmd = info->genlhdr->cmd;
3137
3138 if (cmd == IPVS_CMD_GET_SERVICE)
3139 reply_cmd = IPVS_CMD_NEW_SERVICE;
3140 else if (cmd == IPVS_CMD_GET_INFO)
3141 reply_cmd = IPVS_CMD_SET_INFO;
3142 else if (cmd == IPVS_CMD_GET_CONFIG)
3143 reply_cmd = IPVS_CMD_SET_CONFIG;
3144 else {
3145 IP_VS_ERR("unknown Generic Netlink command\n");
3146 return -EINVAL;
3147 }
3148
3149 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
3150 if (!msg)
3151 return -ENOMEM;
3152
3153 mutex_lock(&__ip_vs_mutex);
3154
3155 reply = genlmsg_put_reply(msg, info, &ip_vs_genl_family, 0, reply_cmd);
3156 if (reply == NULL)
3157 goto nla_put_failure;
3158
3159 switch (cmd) {
3160 case IPVS_CMD_GET_SERVICE:
3161 {
3162 struct ip_vs_service *svc;
3163
3164 svc = ip_vs_genl_find_service(info->attrs[IPVS_CMD_ATTR_SERVICE]);
3165 if (IS_ERR(svc)) {
3166 ret = PTR_ERR(svc);
3167 goto out_err;
3168 } else if (svc) {
3169 ret = ip_vs_genl_fill_service(msg, svc);
3170 ip_vs_service_put(svc);
3171 if (ret)
3172 goto nla_put_failure;
3173 } else {
3174 ret = -ESRCH;
3175 goto out_err;
3176 }
3177
3178 break;
3179 }
3180
3181 case IPVS_CMD_GET_CONFIG:
3182 {
3183 struct ip_vs_timeout_user t;
3184
3185 __ip_vs_get_timeouts(&t);
3186#ifdef CONFIG_IP_VS_PROTO_TCP
3187 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP, t.tcp_timeout);
3188 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP_FIN,
3189 t.tcp_fin_timeout);
3190#endif
3191#ifdef CONFIG_IP_VS_PROTO_UDP
3192 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_UDP, t.udp_timeout);
3193#endif
3194
3195 break;
3196 }
3197
3198 case IPVS_CMD_GET_INFO:
3199 NLA_PUT_U32(msg, IPVS_INFO_ATTR_VERSION, IP_VS_VERSION_CODE);
3200 NLA_PUT_U32(msg, IPVS_INFO_ATTR_CONN_TAB_SIZE,
3201 IP_VS_CONN_TAB_SIZE);
3202 break;
3203 }
3204
3205 genlmsg_end(msg, reply);
3206 ret = genlmsg_unicast(msg, info->snd_pid);
3207 goto out;
3208
3209nla_put_failure:
3210 IP_VS_ERR("not enough space in Netlink message\n");
3211 ret = -EMSGSIZE;
3212
3213out_err:
3214 nlmsg_free(msg);
3215out:
3216 mutex_unlock(&__ip_vs_mutex);
3217
3218 return ret;
3219}
3220
3221
3222static struct genl_ops ip_vs_genl_ops[] __read_mostly = {
3223 {
3224 .cmd = IPVS_CMD_NEW_SERVICE,
3225 .flags = GENL_ADMIN_PERM,
3226 .policy = ip_vs_cmd_policy,
3227 .doit = ip_vs_genl_set_cmd,
3228 },
3229 {
3230 .cmd = IPVS_CMD_SET_SERVICE,
3231 .flags = GENL_ADMIN_PERM,
3232 .policy = ip_vs_cmd_policy,
3233 .doit = ip_vs_genl_set_cmd,
3234 },
3235 {
3236 .cmd = IPVS_CMD_DEL_SERVICE,
3237 .flags = GENL_ADMIN_PERM,
3238 .policy = ip_vs_cmd_policy,
3239 .doit = ip_vs_genl_set_cmd,
3240 },
3241 {
3242 .cmd = IPVS_CMD_GET_SERVICE,
3243 .flags = GENL_ADMIN_PERM,
3244 .doit = ip_vs_genl_get_cmd,
3245 .dumpit = ip_vs_genl_dump_services,
3246 .policy = ip_vs_cmd_policy,
3247 },
3248 {
3249 .cmd = IPVS_CMD_NEW_DEST,
3250 .flags = GENL_ADMIN_PERM,
3251 .policy = ip_vs_cmd_policy,
3252 .doit = ip_vs_genl_set_cmd,
3253 },
3254 {
3255 .cmd = IPVS_CMD_SET_DEST,
3256 .flags = GENL_ADMIN_PERM,
3257 .policy = ip_vs_cmd_policy,
3258 .doit = ip_vs_genl_set_cmd,
3259 },
3260 {
3261 .cmd = IPVS_CMD_DEL_DEST,
3262 .flags = GENL_ADMIN_PERM,
3263 .policy = ip_vs_cmd_policy,
3264 .doit = ip_vs_genl_set_cmd,
3265 },
3266 {
3267 .cmd = IPVS_CMD_GET_DEST,
3268 .flags = GENL_ADMIN_PERM,
3269 .policy = ip_vs_cmd_policy,
3270 .dumpit = ip_vs_genl_dump_dests,
3271 },
3272 {
3273 .cmd = IPVS_CMD_NEW_DAEMON,
3274 .flags = GENL_ADMIN_PERM,
3275 .policy = ip_vs_cmd_policy,
3276 .doit = ip_vs_genl_set_cmd,
3277 },
3278 {
3279 .cmd = IPVS_CMD_DEL_DAEMON,
3280 .flags = GENL_ADMIN_PERM,
3281 .policy = ip_vs_cmd_policy,
3282 .doit = ip_vs_genl_set_cmd,
3283 },
3284 {
3285 .cmd = IPVS_CMD_GET_DAEMON,
3286 .flags = GENL_ADMIN_PERM,
3287 .dumpit = ip_vs_genl_dump_daemons,
3288 },
3289 {
3290 .cmd = IPVS_CMD_SET_CONFIG,
3291 .flags = GENL_ADMIN_PERM,
3292 .policy = ip_vs_cmd_policy,
3293 .doit = ip_vs_genl_set_cmd,
3294 },
3295 {
3296 .cmd = IPVS_CMD_GET_CONFIG,
3297 .flags = GENL_ADMIN_PERM,
3298 .doit = ip_vs_genl_get_cmd,
3299 },
3300 {
3301 .cmd = IPVS_CMD_GET_INFO,
3302 .flags = GENL_ADMIN_PERM,
3303 .doit = ip_vs_genl_get_cmd,
3304 },
3305 {
3306 .cmd = IPVS_CMD_ZERO,
3307 .flags = GENL_ADMIN_PERM,
3308 .policy = ip_vs_cmd_policy,
3309 .doit = ip_vs_genl_set_cmd,
3310 },
3311 {
3312 .cmd = IPVS_CMD_FLUSH,
3313 .flags = GENL_ADMIN_PERM,
3314 .doit = ip_vs_genl_set_cmd,
3315 },
3316};
3317
3318static int __init ip_vs_genl_register(void)
3319{
3320 int ret, i;
3321
3322 ret = genl_register_family(&ip_vs_genl_family);
3323 if (ret)
3324 return ret;
3325
3326 for (i = 0; i < ARRAY_SIZE(ip_vs_genl_ops); i++) {
3327 ret = genl_register_ops(&ip_vs_genl_family, &ip_vs_genl_ops[i]);
3328 if (ret)
3329 goto err_out;
3330 }
3331 return 0;
3332
3333err_out:
3334 genl_unregister_family(&ip_vs_genl_family);
3335 return ret;
3336}
3337
3338static void ip_vs_genl_unregister(void)
3339{
3340 genl_unregister_family(&ip_vs_genl_family);
3341}
3342
3343/* End of Generic Netlink interface definitions */
3344
Linus Torvalds1da177e2005-04-16 15:20:36 -07003345
Sven Wegener048cf482008-08-10 18:24:35 +00003346int __init ip_vs_control_init(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003347{
3348 int ret;
3349 int idx;
3350
3351 EnterFunction(2);
3352
3353 ret = nf_register_sockopt(&ip_vs_sockopts);
3354 if (ret) {
3355 IP_VS_ERR("cannot register sockopt.\n");
3356 return ret;
3357 }
3358
Julius Volz9a812192008-08-14 14:08:44 +02003359 ret = ip_vs_genl_register();
3360 if (ret) {
3361 IP_VS_ERR("cannot register Generic Netlink interface.\n");
3362 nf_unregister_sockopt(&ip_vs_sockopts);
3363 return ret;
3364 }
3365
Eric W. Biederman457c4cb2007-09-12 12:01:34 +02003366 proc_net_fops_create(&init_net, "ip_vs", 0, &ip_vs_info_fops);
3367 proc_net_fops_create(&init_net, "ip_vs_stats",0, &ip_vs_stats_fops);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003368
Pavel Emelyanov90754f82008-01-12 02:33:50 -08003369 sysctl_header = register_sysctl_paths(net_vs_ctl_path, vs_vars);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003370
3371 /* Initialize ip_vs_svc_table, ip_vs_svc_fwm_table, ip_vs_rtable */
3372 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
3373 INIT_LIST_HEAD(&ip_vs_svc_table[idx]);
3374 INIT_LIST_HEAD(&ip_vs_svc_fwm_table[idx]);
3375 }
3376 for(idx = 0; idx < IP_VS_RTAB_SIZE; idx++) {
3377 INIT_LIST_HEAD(&ip_vs_rtable[idx]);
3378 }
3379
Linus Torvalds1da177e2005-04-16 15:20:36 -07003380 ip_vs_new_estimator(&ip_vs_stats);
3381
3382 /* Hook the defense timer */
3383 schedule_delayed_work(&defense_work, DEFENSE_TIMER_PERIOD);
3384
3385 LeaveFunction(2);
3386 return 0;
3387}
3388
3389
3390void ip_vs_control_cleanup(void)
3391{
3392 EnterFunction(2);
3393 ip_vs_trash_cleanup();
3394 cancel_rearming_delayed_work(&defense_work);
Oleg Nesterov28e53bd2007-05-09 02:34:22 -07003395 cancel_work_sync(&defense_work.work);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003396 ip_vs_kill_estimator(&ip_vs_stats);
3397 unregister_sysctl_table(sysctl_header);
Eric W. Biederman457c4cb2007-09-12 12:01:34 +02003398 proc_net_remove(&init_net, "ip_vs_stats");
3399 proc_net_remove(&init_net, "ip_vs");
Julius Volz9a812192008-08-14 14:08:44 +02003400 ip_vs_genl_unregister();
Linus Torvalds1da177e2005-04-16 15:20:36 -07003401 nf_unregister_sockopt(&ip_vs_sockopts);
3402 LeaveFunction(2);
3403}