blob: 60ca24b9ec00ca69c5ade801e085c41fcd660187 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * IPVS An implementation of the IP virtual server support for the
3 * LINUX operating system. IPVS is now implemented as a module
4 * over the NetFilter framework. IPVS can be used to build a
5 * high-performance and highly available server based on a
6 * cluster of servers.
7 *
Linus Torvalds1da177e2005-04-16 15:20:36 -07008 * Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
9 * Peter Kese <peter.kese@ijs.si>
10 * Julian Anastasov <ja@ssi.bg>
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version
15 * 2 of the License, or (at your option) any later version.
16 *
17 * Changes:
18 *
19 */
20
21#include <linux/module.h>
22#include <linux/init.h>
23#include <linux/types.h>
Randy Dunlap4fc268d2006-01-11 12:17:47 -080024#include <linux/capability.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070025#include <linux/fs.h>
26#include <linux/sysctl.h>
27#include <linux/proc_fs.h>
28#include <linux/workqueue.h>
29#include <linux/swap.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070030#include <linux/seq_file.h>
31
32#include <linux/netfilter.h>
33#include <linux/netfilter_ipv4.h>
Ingo Molnar14cc3e22006-03-26 01:37:14 -080034#include <linux/mutex.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070035
Eric W. Biederman457c4cb2007-09-12 12:01:34 +020036#include <net/net_namespace.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070037#include <net/ip.h>
Vince Busam09571c72008-09-02 15:55:52 +020038#ifdef CONFIG_IP_VS_IPV6
39#include <net/ipv6.h>
40#include <net/ip6_route.h>
41#endif
Arnaldo Carvalho de Melo14c85022005-12-27 02:43:12 -020042#include <net/route.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070043#include <net/sock.h>
Julius Volz9a812192008-08-14 14:08:44 +020044#include <net/genetlink.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070045
46#include <asm/uaccess.h>
47
48#include <net/ip_vs.h>
49
50/* semaphore for IPVS sockopts. And, [gs]etsockopt may sleep. */
Ingo Molnar14cc3e22006-03-26 01:37:14 -080051static DEFINE_MUTEX(__ip_vs_mutex);
Linus Torvalds1da177e2005-04-16 15:20:36 -070052
53/* lock for service table */
54static DEFINE_RWLOCK(__ip_vs_svc_lock);
55
56/* lock for table with the real services */
57static DEFINE_RWLOCK(__ip_vs_rs_lock);
58
59/* lock for state and timeout tables */
60static DEFINE_RWLOCK(__ip_vs_securetcp_lock);
61
62/* lock for drop entry handling */
63static DEFINE_SPINLOCK(__ip_vs_dropentry_lock);
64
65/* lock for drop packet handling */
66static DEFINE_SPINLOCK(__ip_vs_droppacket_lock);
67
68/* 1/rate drop and drop-entry variables */
69int ip_vs_drop_rate = 0;
70int ip_vs_drop_counter = 0;
71static atomic_t ip_vs_dropentry = ATOMIC_INIT(0);
72
73/* number of virtual services */
74static int ip_vs_num_services = 0;
75
76/* sysctl variables */
77static int sysctl_ip_vs_drop_entry = 0;
78static int sysctl_ip_vs_drop_packet = 0;
79static int sysctl_ip_vs_secure_tcp = 0;
80static int sysctl_ip_vs_amemthresh = 1024;
81static int sysctl_ip_vs_am_droprate = 10;
82int sysctl_ip_vs_cache_bypass = 0;
83int sysctl_ip_vs_expire_nodest_conn = 0;
84int sysctl_ip_vs_expire_quiescent_template = 0;
85int sysctl_ip_vs_sync_threshold[2] = { 3, 50 };
86int sysctl_ip_vs_nat_icmp_send = 0;
87
88
89#ifdef CONFIG_IP_VS_DEBUG
90static int sysctl_ip_vs_debug_level = 0;
91
92int ip_vs_get_debug_level(void)
93{
94 return sysctl_ip_vs_debug_level;
95}
96#endif
97
Vince Busam09571c72008-09-02 15:55:52 +020098#ifdef CONFIG_IP_VS_IPV6
99/* Taken from rt6_fill_node() in net/ipv6/route.c, is there a better way? */
100static int __ip_vs_addr_is_local_v6(const struct in6_addr *addr)
101{
102 struct rt6_info *rt;
103 struct flowi fl = {
104 .oif = 0,
105 .nl_u = {
106 .ip6_u = {
107 .daddr = *addr,
108 .saddr = { .s6_addr32 = {0, 0, 0, 0} }, } },
109 };
110
111 rt = (struct rt6_info *)ip6_route_output(&init_net, NULL, &fl);
112 if (rt && rt->rt6i_dev && (rt->rt6i_dev->flags & IFF_LOOPBACK))
113 return 1;
114
115 return 0;
116}
117#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -0700118/*
Julian Anastasovaf9debd2005-07-11 20:59:57 -0700119 * update_defense_level is called from keventd and from sysctl,
120 * so it needs to protect itself from softirqs
Linus Torvalds1da177e2005-04-16 15:20:36 -0700121 */
122static void update_defense_level(void)
123{
124 struct sysinfo i;
125 static int old_secure_tcp = 0;
126 int availmem;
127 int nomem;
128 int to_change = -1;
129
130 /* we only count free and buffered memory (in pages) */
131 si_meminfo(&i);
132 availmem = i.freeram + i.bufferram;
133 /* however in linux 2.5 the i.bufferram is total page cache size,
134 we need adjust it */
135 /* si_swapinfo(&i); */
136 /* availmem = availmem - (i.totalswap - i.freeswap); */
137
138 nomem = (availmem < sysctl_ip_vs_amemthresh);
139
Julian Anastasovaf9debd2005-07-11 20:59:57 -0700140 local_bh_disable();
141
Linus Torvalds1da177e2005-04-16 15:20:36 -0700142 /* drop_entry */
143 spin_lock(&__ip_vs_dropentry_lock);
144 switch (sysctl_ip_vs_drop_entry) {
145 case 0:
146 atomic_set(&ip_vs_dropentry, 0);
147 break;
148 case 1:
149 if (nomem) {
150 atomic_set(&ip_vs_dropentry, 1);
151 sysctl_ip_vs_drop_entry = 2;
152 } else {
153 atomic_set(&ip_vs_dropentry, 0);
154 }
155 break;
156 case 2:
157 if (nomem) {
158 atomic_set(&ip_vs_dropentry, 1);
159 } else {
160 atomic_set(&ip_vs_dropentry, 0);
161 sysctl_ip_vs_drop_entry = 1;
162 };
163 break;
164 case 3:
165 atomic_set(&ip_vs_dropentry, 1);
166 break;
167 }
168 spin_unlock(&__ip_vs_dropentry_lock);
169
170 /* drop_packet */
171 spin_lock(&__ip_vs_droppacket_lock);
172 switch (sysctl_ip_vs_drop_packet) {
173 case 0:
174 ip_vs_drop_rate = 0;
175 break;
176 case 1:
177 if (nomem) {
178 ip_vs_drop_rate = ip_vs_drop_counter
179 = sysctl_ip_vs_amemthresh /
180 (sysctl_ip_vs_amemthresh-availmem);
181 sysctl_ip_vs_drop_packet = 2;
182 } else {
183 ip_vs_drop_rate = 0;
184 }
185 break;
186 case 2:
187 if (nomem) {
188 ip_vs_drop_rate = ip_vs_drop_counter
189 = sysctl_ip_vs_amemthresh /
190 (sysctl_ip_vs_amemthresh-availmem);
191 } else {
192 ip_vs_drop_rate = 0;
193 sysctl_ip_vs_drop_packet = 1;
194 }
195 break;
196 case 3:
197 ip_vs_drop_rate = sysctl_ip_vs_am_droprate;
198 break;
199 }
200 spin_unlock(&__ip_vs_droppacket_lock);
201
202 /* secure_tcp */
203 write_lock(&__ip_vs_securetcp_lock);
204 switch (sysctl_ip_vs_secure_tcp) {
205 case 0:
206 if (old_secure_tcp >= 2)
207 to_change = 0;
208 break;
209 case 1:
210 if (nomem) {
211 if (old_secure_tcp < 2)
212 to_change = 1;
213 sysctl_ip_vs_secure_tcp = 2;
214 } else {
215 if (old_secure_tcp >= 2)
216 to_change = 0;
217 }
218 break;
219 case 2:
220 if (nomem) {
221 if (old_secure_tcp < 2)
222 to_change = 1;
223 } else {
224 if (old_secure_tcp >= 2)
225 to_change = 0;
226 sysctl_ip_vs_secure_tcp = 1;
227 }
228 break;
229 case 3:
230 if (old_secure_tcp < 2)
231 to_change = 1;
232 break;
233 }
234 old_secure_tcp = sysctl_ip_vs_secure_tcp;
235 if (to_change >= 0)
236 ip_vs_protocol_timeout_change(sysctl_ip_vs_secure_tcp>1);
237 write_unlock(&__ip_vs_securetcp_lock);
Julian Anastasovaf9debd2005-07-11 20:59:57 -0700238
239 local_bh_enable();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700240}
241
242
243/*
244 * Timer for checking the defense
245 */
246#define DEFENSE_TIMER_PERIOD 1*HZ
David Howellsc4028952006-11-22 14:57:56 +0000247static void defense_work_handler(struct work_struct *work);
248static DECLARE_DELAYED_WORK(defense_work, defense_work_handler);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700249
David Howellsc4028952006-11-22 14:57:56 +0000250static void defense_work_handler(struct work_struct *work)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700251{
252 update_defense_level();
253 if (atomic_read(&ip_vs_dropentry))
254 ip_vs_random_dropentry();
255
256 schedule_delayed_work(&defense_work, DEFENSE_TIMER_PERIOD);
257}
258
259int
260ip_vs_use_count_inc(void)
261{
262 return try_module_get(THIS_MODULE);
263}
264
265void
266ip_vs_use_count_dec(void)
267{
268 module_put(THIS_MODULE);
269}
270
271
272/*
273 * Hash table: for virtual service lookups
274 */
275#define IP_VS_SVC_TAB_BITS 8
276#define IP_VS_SVC_TAB_SIZE (1 << IP_VS_SVC_TAB_BITS)
277#define IP_VS_SVC_TAB_MASK (IP_VS_SVC_TAB_SIZE - 1)
278
279/* the service table hashed by <protocol, addr, port> */
280static struct list_head ip_vs_svc_table[IP_VS_SVC_TAB_SIZE];
281/* the service table hashed by fwmark */
282static struct list_head ip_vs_svc_fwm_table[IP_VS_SVC_TAB_SIZE];
283
284/*
285 * Hash table: for real service lookups
286 */
287#define IP_VS_RTAB_BITS 4
288#define IP_VS_RTAB_SIZE (1 << IP_VS_RTAB_BITS)
289#define IP_VS_RTAB_MASK (IP_VS_RTAB_SIZE - 1)
290
291static struct list_head ip_vs_rtable[IP_VS_RTAB_SIZE];
292
293/*
294 * Trash for destinations
295 */
296static LIST_HEAD(ip_vs_dest_trash);
297
298/*
299 * FTP & NULL virtual service counters
300 */
301static atomic_t ip_vs_ftpsvc_counter = ATOMIC_INIT(0);
302static atomic_t ip_vs_nullsvc_counter = ATOMIC_INIT(0);
303
304
305/*
306 * Returns hash value for virtual service
307 */
308static __inline__ unsigned
Julius Volzb18610d2008-09-02 15:55:37 +0200309ip_vs_svc_hashkey(int af, unsigned proto, const union nf_inet_addr *addr,
310 __be16 port)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700311{
312 register unsigned porth = ntohs(port);
Julius Volzb18610d2008-09-02 15:55:37 +0200313 __be32 addr_fold = addr->ip;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700314
Julius Volzb18610d2008-09-02 15:55:37 +0200315#ifdef CONFIG_IP_VS_IPV6
316 if (af == AF_INET6)
317 addr_fold = addr->ip6[0]^addr->ip6[1]^
318 addr->ip6[2]^addr->ip6[3];
319#endif
320
321 return (proto^ntohl(addr_fold)^(porth>>IP_VS_SVC_TAB_BITS)^porth)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700322 & IP_VS_SVC_TAB_MASK;
323}
324
325/*
326 * Returns hash value of fwmark for virtual service lookup
327 */
328static __inline__ unsigned ip_vs_svc_fwm_hashkey(__u32 fwmark)
329{
330 return fwmark & IP_VS_SVC_TAB_MASK;
331}
332
333/*
334 * Hashes a service in the ip_vs_svc_table by <proto,addr,port>
335 * or in the ip_vs_svc_fwm_table by fwmark.
336 * Should be called with locked tables.
337 */
338static int ip_vs_svc_hash(struct ip_vs_service *svc)
339{
340 unsigned hash;
341
342 if (svc->flags & IP_VS_SVC_F_HASHED) {
343 IP_VS_ERR("ip_vs_svc_hash(): request for already hashed, "
344 "called from %p\n", __builtin_return_address(0));
345 return 0;
346 }
347
348 if (svc->fwmark == 0) {
349 /*
350 * Hash it by <protocol,addr,port> in ip_vs_svc_table
351 */
Julius Volzb18610d2008-09-02 15:55:37 +0200352 hash = ip_vs_svc_hashkey(svc->af, svc->protocol, &svc->addr,
Julius Volze7ade462008-09-02 15:55:33 +0200353 svc->port);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700354 list_add(&svc->s_list, &ip_vs_svc_table[hash]);
355 } else {
356 /*
357 * Hash it by fwmark in ip_vs_svc_fwm_table
358 */
359 hash = ip_vs_svc_fwm_hashkey(svc->fwmark);
360 list_add(&svc->f_list, &ip_vs_svc_fwm_table[hash]);
361 }
362
363 svc->flags |= IP_VS_SVC_F_HASHED;
364 /* increase its refcnt because it is referenced by the svc table */
365 atomic_inc(&svc->refcnt);
366 return 1;
367}
368
369
370/*
371 * Unhashes a service from ip_vs_svc_table/ip_vs_svc_fwm_table.
372 * Should be called with locked tables.
373 */
374static int ip_vs_svc_unhash(struct ip_vs_service *svc)
375{
376 if (!(svc->flags & IP_VS_SVC_F_HASHED)) {
377 IP_VS_ERR("ip_vs_svc_unhash(): request for unhash flagged, "
378 "called from %p\n", __builtin_return_address(0));
379 return 0;
380 }
381
382 if (svc->fwmark == 0) {
383 /* Remove it from the ip_vs_svc_table table */
384 list_del(&svc->s_list);
385 } else {
386 /* Remove it from the ip_vs_svc_fwm_table table */
387 list_del(&svc->f_list);
388 }
389
390 svc->flags &= ~IP_VS_SVC_F_HASHED;
391 atomic_dec(&svc->refcnt);
392 return 1;
393}
394
395
396/*
397 * Get service by {proto,addr,port} in the service table.
398 */
Julius Volzb18610d2008-09-02 15:55:37 +0200399static inline struct ip_vs_service *
400__ip_vs_service_get(int af, __u16 protocol, const union nf_inet_addr *vaddr,
401 __be16 vport)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700402{
403 unsigned hash;
404 struct ip_vs_service *svc;
405
406 /* Check for "full" addressed entries */
Julius Volzb18610d2008-09-02 15:55:37 +0200407 hash = ip_vs_svc_hashkey(af, protocol, vaddr, vport);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700408
409 list_for_each_entry(svc, &ip_vs_svc_table[hash], s_list){
Julius Volzb18610d2008-09-02 15:55:37 +0200410 if ((svc->af == af)
411 && ip_vs_addr_equal(af, &svc->addr, vaddr)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700412 && (svc->port == vport)
413 && (svc->protocol == protocol)) {
414 /* HIT */
415 atomic_inc(&svc->usecnt);
416 return svc;
417 }
418 }
419
420 return NULL;
421}
422
423
424/*
425 * Get service by {fwmark} in the service table.
426 */
Julius Volzb18610d2008-09-02 15:55:37 +0200427static inline struct ip_vs_service *
428__ip_vs_svc_fwm_get(int af, __u32 fwmark)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700429{
430 unsigned hash;
431 struct ip_vs_service *svc;
432
433 /* Check for fwmark addressed entries */
434 hash = ip_vs_svc_fwm_hashkey(fwmark);
435
436 list_for_each_entry(svc, &ip_vs_svc_fwm_table[hash], f_list) {
Julius Volzb18610d2008-09-02 15:55:37 +0200437 if (svc->fwmark == fwmark && svc->af == af) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700438 /* HIT */
439 atomic_inc(&svc->usecnt);
440 return svc;
441 }
442 }
443
444 return NULL;
445}
446
447struct ip_vs_service *
Julius Volz3c2e0502008-09-02 15:55:38 +0200448ip_vs_service_get(int af, __u32 fwmark, __u16 protocol,
449 const union nf_inet_addr *vaddr, __be16 vport)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700450{
451 struct ip_vs_service *svc;
Julius Volz3c2e0502008-09-02 15:55:38 +0200452
Linus Torvalds1da177e2005-04-16 15:20:36 -0700453 read_lock(&__ip_vs_svc_lock);
454
455 /*
456 * Check the table hashed by fwmark first
457 */
Julius Volz3c2e0502008-09-02 15:55:38 +0200458 if (fwmark && (svc = __ip_vs_svc_fwm_get(af, fwmark)))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700459 goto out;
460
461 /*
462 * Check the table hashed by <protocol,addr,port>
463 * for "full" addressed entries
464 */
Julius Volz3c2e0502008-09-02 15:55:38 +0200465 svc = __ip_vs_service_get(af, protocol, vaddr, vport);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700466
467 if (svc == NULL
468 && protocol == IPPROTO_TCP
469 && atomic_read(&ip_vs_ftpsvc_counter)
470 && (vport == FTPDATA || ntohs(vport) >= PROT_SOCK)) {
471 /*
472 * Check if ftp service entry exists, the packet
473 * might belong to FTP data connections.
474 */
Julius Volz3c2e0502008-09-02 15:55:38 +0200475 svc = __ip_vs_service_get(af, protocol, vaddr, FTPPORT);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700476 }
477
478 if (svc == NULL
479 && atomic_read(&ip_vs_nullsvc_counter)) {
480 /*
481 * Check if the catch-all port (port zero) exists
482 */
Julius Volz3c2e0502008-09-02 15:55:38 +0200483 svc = __ip_vs_service_get(af, protocol, vaddr, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700484 }
485
486 out:
487 read_unlock(&__ip_vs_svc_lock);
488
Julius Volz3c2e0502008-09-02 15:55:38 +0200489 IP_VS_DBG_BUF(9, "lookup service: fwm %u %s %s:%u %s\n",
490 fwmark, ip_vs_proto_name(protocol),
491 IP_VS_DBG_ADDR(af, vaddr), ntohs(vport),
492 svc ? "hit" : "not hit");
Linus Torvalds1da177e2005-04-16 15:20:36 -0700493
494 return svc;
495}
496
497
498static inline void
499__ip_vs_bind_svc(struct ip_vs_dest *dest, struct ip_vs_service *svc)
500{
501 atomic_inc(&svc->refcnt);
502 dest->svc = svc;
503}
504
505static inline void
506__ip_vs_unbind_svc(struct ip_vs_dest *dest)
507{
508 struct ip_vs_service *svc = dest->svc;
509
510 dest->svc = NULL;
511 if (atomic_dec_and_test(&svc->refcnt))
512 kfree(svc);
513}
514
515
516/*
517 * Returns hash value for real service
518 */
Julius Volz7937df12008-09-02 15:55:48 +0200519static inline unsigned ip_vs_rs_hashkey(int af,
520 const union nf_inet_addr *addr,
521 __be16 port)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700522{
523 register unsigned porth = ntohs(port);
Julius Volz7937df12008-09-02 15:55:48 +0200524 __be32 addr_fold = addr->ip;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700525
Julius Volz7937df12008-09-02 15:55:48 +0200526#ifdef CONFIG_IP_VS_IPV6
527 if (af == AF_INET6)
528 addr_fold = addr->ip6[0]^addr->ip6[1]^
529 addr->ip6[2]^addr->ip6[3];
530#endif
531
532 return (ntohl(addr_fold)^(porth>>IP_VS_RTAB_BITS)^porth)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700533 & IP_VS_RTAB_MASK;
534}
535
536/*
537 * Hashes ip_vs_dest in ip_vs_rtable by <proto,addr,port>.
538 * should be called with locked tables.
539 */
540static int ip_vs_rs_hash(struct ip_vs_dest *dest)
541{
542 unsigned hash;
543
544 if (!list_empty(&dest->d_list)) {
545 return 0;
546 }
547
548 /*
549 * Hash by proto,addr,port,
550 * which are the parameters of the real service.
551 */
Julius Volz7937df12008-09-02 15:55:48 +0200552 hash = ip_vs_rs_hashkey(dest->af, &dest->addr, dest->port);
553
Linus Torvalds1da177e2005-04-16 15:20:36 -0700554 list_add(&dest->d_list, &ip_vs_rtable[hash]);
555
556 return 1;
557}
558
559/*
560 * UNhashes ip_vs_dest from ip_vs_rtable.
561 * should be called with locked tables.
562 */
563static int ip_vs_rs_unhash(struct ip_vs_dest *dest)
564{
565 /*
566 * Remove it from the ip_vs_rtable table.
567 */
568 if (!list_empty(&dest->d_list)) {
569 list_del(&dest->d_list);
570 INIT_LIST_HEAD(&dest->d_list);
571 }
572
573 return 1;
574}
575
576/*
577 * Lookup real service by <proto,addr,port> in the real service table.
578 */
579struct ip_vs_dest *
Julius Volz7937df12008-09-02 15:55:48 +0200580ip_vs_lookup_real_service(int af, __u16 protocol,
581 const union nf_inet_addr *daddr,
582 __be16 dport)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700583{
584 unsigned hash;
585 struct ip_vs_dest *dest;
586
587 /*
588 * Check for "full" addressed entries
589 * Return the first found entry
590 */
Julius Volz7937df12008-09-02 15:55:48 +0200591 hash = ip_vs_rs_hashkey(af, daddr, dport);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700592
593 read_lock(&__ip_vs_rs_lock);
594 list_for_each_entry(dest, &ip_vs_rtable[hash], d_list) {
Julius Volz7937df12008-09-02 15:55:48 +0200595 if ((dest->af == af)
596 && ip_vs_addr_equal(af, &dest->addr, daddr)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700597 && (dest->port == dport)
598 && ((dest->protocol == protocol) ||
599 dest->vfwmark)) {
600 /* HIT */
601 read_unlock(&__ip_vs_rs_lock);
602 return dest;
603 }
604 }
605 read_unlock(&__ip_vs_rs_lock);
606
607 return NULL;
608}
609
610/*
611 * Lookup destination by {addr,port} in the given service
612 */
613static struct ip_vs_dest *
Julius Volz7937df12008-09-02 15:55:48 +0200614ip_vs_lookup_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
615 __be16 dport)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700616{
617 struct ip_vs_dest *dest;
618
619 /*
620 * Find the destination for the given service
621 */
622 list_for_each_entry(dest, &svc->destinations, n_list) {
Julius Volz7937df12008-09-02 15:55:48 +0200623 if ((dest->af == svc->af)
624 && ip_vs_addr_equal(svc->af, &dest->addr, daddr)
625 && (dest->port == dport)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700626 /* HIT */
627 return dest;
628 }
629 }
630
631 return NULL;
632}
633
Rumen G. Bogdanovski1e356f92007-11-07 02:35:54 -0800634/*
635 * Find destination by {daddr,dport,vaddr,protocol}
636 * Cretaed to be used in ip_vs_process_message() in
637 * the backup synchronization daemon. It finds the
638 * destination to be bound to the received connection
639 * on the backup.
640 *
641 * ip_vs_lookup_real_service() looked promissing, but
642 * seems not working as expected.
643 */
Julius Volz7937df12008-09-02 15:55:48 +0200644struct ip_vs_dest *ip_vs_find_dest(int af, const union nf_inet_addr *daddr,
645 __be16 dport,
646 const union nf_inet_addr *vaddr,
647 __be16 vport, __u16 protocol)
Rumen G. Bogdanovski1e356f92007-11-07 02:35:54 -0800648{
649 struct ip_vs_dest *dest;
650 struct ip_vs_service *svc;
651
Julius Volz7937df12008-09-02 15:55:48 +0200652 svc = ip_vs_service_get(af, 0, protocol, vaddr, vport);
Rumen G. Bogdanovski1e356f92007-11-07 02:35:54 -0800653 if (!svc)
654 return NULL;
655 dest = ip_vs_lookup_dest(svc, daddr, dport);
656 if (dest)
657 atomic_inc(&dest->refcnt);
658 ip_vs_service_put(svc);
659 return dest;
660}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700661
662/*
663 * Lookup dest by {svc,addr,port} in the destination trash.
664 * The destination trash is used to hold the destinations that are removed
665 * from the service table but are still referenced by some conn entries.
666 * The reason to add the destination trash is when the dest is temporary
667 * down (either by administrator or by monitor program), the dest can be
668 * picked back from the trash, the remaining connections to the dest can
669 * continue, and the counting information of the dest is also useful for
670 * scheduling.
671 */
672static struct ip_vs_dest *
Julius Volz7937df12008-09-02 15:55:48 +0200673ip_vs_trash_get_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
674 __be16 dport)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700675{
676 struct ip_vs_dest *dest, *nxt;
677
678 /*
679 * Find the destination in trash
680 */
681 list_for_each_entry_safe(dest, nxt, &ip_vs_dest_trash, n_list) {
Julius Volz7937df12008-09-02 15:55:48 +0200682 IP_VS_DBG_BUF(3, "Destination %u/%s:%u still in trash, "
683 "dest->refcnt=%d\n",
684 dest->vfwmark,
685 IP_VS_DBG_ADDR(svc->af, &dest->addr),
686 ntohs(dest->port),
687 atomic_read(&dest->refcnt));
688 if (dest->af == svc->af &&
689 ip_vs_addr_equal(svc->af, &dest->addr, daddr) &&
Linus Torvalds1da177e2005-04-16 15:20:36 -0700690 dest->port == dport &&
691 dest->vfwmark == svc->fwmark &&
692 dest->protocol == svc->protocol &&
693 (svc->fwmark ||
Julius Volz7937df12008-09-02 15:55:48 +0200694 (ip_vs_addr_equal(svc->af, &dest->vaddr, &svc->addr) &&
Linus Torvalds1da177e2005-04-16 15:20:36 -0700695 dest->vport == svc->port))) {
696 /* HIT */
697 return dest;
698 }
699
700 /*
701 * Try to purge the destination from trash if not referenced
702 */
703 if (atomic_read(&dest->refcnt) == 1) {
Julius Volz7937df12008-09-02 15:55:48 +0200704 IP_VS_DBG_BUF(3, "Removing destination %u/%s:%u "
705 "from trash\n",
706 dest->vfwmark,
707 IP_VS_DBG_ADDR(svc->af, &dest->addr),
708 ntohs(dest->port));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700709 list_del(&dest->n_list);
710 ip_vs_dst_reset(dest);
711 __ip_vs_unbind_svc(dest);
712 kfree(dest);
713 }
714 }
715
716 return NULL;
717}
718
719
720/*
721 * Clean up all the destinations in the trash
722 * Called by the ip_vs_control_cleanup()
723 *
724 * When the ip_vs_control_clearup is activated by ipvs module exit,
725 * the service tables must have been flushed and all the connections
726 * are expired, and the refcnt of each destination in the trash must
727 * be 1, so we simply release them here.
728 */
729static void ip_vs_trash_cleanup(void)
730{
731 struct ip_vs_dest *dest, *nxt;
732
733 list_for_each_entry_safe(dest, nxt, &ip_vs_dest_trash, n_list) {
734 list_del(&dest->n_list);
735 ip_vs_dst_reset(dest);
736 __ip_vs_unbind_svc(dest);
737 kfree(dest);
738 }
739}
740
741
742static void
743ip_vs_zero_stats(struct ip_vs_stats *stats)
744{
745 spin_lock_bh(&stats->lock);
Simon Hormane93615d2008-08-11 17:19:14 +1000746
Sven Wegenere9c0ce22008-09-08 13:39:04 +0200747 memset(&stats->ustats, 0, sizeof(stats->ustats));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700748 ip_vs_zero_estimator(stats);
Simon Hormane93615d2008-08-11 17:19:14 +1000749
Sven Wegener3a14a3132008-08-10 18:24:41 +0000750 spin_unlock_bh(&stats->lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700751}
752
753/*
754 * Update a destination in the given service
755 */
756static void
757__ip_vs_update_dest(struct ip_vs_service *svc,
Julius Volzc860c6b2008-09-02 15:55:36 +0200758 struct ip_vs_dest *dest, struct ip_vs_dest_user_kern *udest)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700759{
760 int conn_flags;
761
762 /* set the weight and the flags */
763 atomic_set(&dest->weight, udest->weight);
764 conn_flags = udest->conn_flags | IP_VS_CONN_F_INACTIVE;
765
766 /* check if local node and update the flags */
Vince Busam09571c72008-09-02 15:55:52 +0200767#ifdef CONFIG_IP_VS_IPV6
768 if (svc->af == AF_INET6) {
769 if (__ip_vs_addr_is_local_v6(&udest->addr.in6)) {
770 conn_flags = (conn_flags & ~IP_VS_CONN_F_FWD_MASK)
771 | IP_VS_CONN_F_LOCALNODE;
772 }
773 } else
774#endif
775 if (inet_addr_type(&init_net, udest->addr.ip) == RTN_LOCAL) {
776 conn_flags = (conn_flags & ~IP_VS_CONN_F_FWD_MASK)
777 | IP_VS_CONN_F_LOCALNODE;
778 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700779
780 /* set the IP_VS_CONN_F_NOOUTPUT flag if not masquerading/NAT */
781 if ((conn_flags & IP_VS_CONN_F_FWD_MASK) != 0) {
782 conn_flags |= IP_VS_CONN_F_NOOUTPUT;
783 } else {
784 /*
785 * Put the real service in ip_vs_rtable if not present.
786 * For now only for NAT!
787 */
788 write_lock_bh(&__ip_vs_rs_lock);
789 ip_vs_rs_hash(dest);
790 write_unlock_bh(&__ip_vs_rs_lock);
791 }
792 atomic_set(&dest->conn_flags, conn_flags);
793
794 /* bind the service */
795 if (!dest->svc) {
796 __ip_vs_bind_svc(dest, svc);
797 } else {
798 if (dest->svc != svc) {
799 __ip_vs_unbind_svc(dest);
800 ip_vs_zero_stats(&dest->stats);
801 __ip_vs_bind_svc(dest, svc);
802 }
803 }
804
805 /* set the dest status flags */
806 dest->flags |= IP_VS_DEST_F_AVAILABLE;
807
808 if (udest->u_threshold == 0 || udest->u_threshold > dest->u_threshold)
809 dest->flags &= ~IP_VS_DEST_F_OVERLOAD;
810 dest->u_threshold = udest->u_threshold;
811 dest->l_threshold = udest->l_threshold;
812}
813
814
815/*
816 * Create a destination for the given service
817 */
818static int
Julius Volzc860c6b2008-09-02 15:55:36 +0200819ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700820 struct ip_vs_dest **dest_p)
821{
822 struct ip_vs_dest *dest;
823 unsigned atype;
824
825 EnterFunction(2);
826
Vince Busam09571c72008-09-02 15:55:52 +0200827#ifdef CONFIG_IP_VS_IPV6
828 if (svc->af == AF_INET6) {
829 atype = ipv6_addr_type(&udest->addr.in6);
Sven Wegener3bfb92f2008-09-05 16:53:49 +0200830 if ((!(atype & IPV6_ADDR_UNICAST) ||
831 atype & IPV6_ADDR_LINKLOCAL) &&
Vince Busam09571c72008-09-02 15:55:52 +0200832 !__ip_vs_addr_is_local_v6(&udest->addr.in6))
833 return -EINVAL;
834 } else
835#endif
836 {
837 atype = inet_addr_type(&init_net, udest->addr.ip);
838 if (atype != RTN_LOCAL && atype != RTN_UNICAST)
839 return -EINVAL;
840 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700841
Panagiotis Issaris0da974f2006-07-21 14:51:30 -0700842 dest = kzalloc(sizeof(struct ip_vs_dest), GFP_ATOMIC);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700843 if (dest == NULL) {
844 IP_VS_ERR("ip_vs_new_dest: kmalloc failed.\n");
845 return -ENOMEM;
846 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700847
Julius Volzc860c6b2008-09-02 15:55:36 +0200848 dest->af = svc->af;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700849 dest->protocol = svc->protocol;
Julius Volzc860c6b2008-09-02 15:55:36 +0200850 dest->vaddr = svc->addr;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700851 dest->vport = svc->port;
852 dest->vfwmark = svc->fwmark;
Julius Volzc860c6b2008-09-02 15:55:36 +0200853 ip_vs_addr_copy(svc->af, &dest->addr, &udest->addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700854 dest->port = udest->port;
855
856 atomic_set(&dest->activeconns, 0);
857 atomic_set(&dest->inactconns, 0);
858 atomic_set(&dest->persistconns, 0);
859 atomic_set(&dest->refcnt, 0);
860
861 INIT_LIST_HEAD(&dest->d_list);
862 spin_lock_init(&dest->dst_lock);
863 spin_lock_init(&dest->stats.lock);
864 __ip_vs_update_dest(svc, dest, udest);
865 ip_vs_new_estimator(&dest->stats);
866
867 *dest_p = dest;
868
869 LeaveFunction(2);
870 return 0;
871}
872
873
874/*
875 * Add a destination into an existing service
876 */
877static int
Julius Volzc860c6b2008-09-02 15:55:36 +0200878ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700879{
880 struct ip_vs_dest *dest;
Julius Volzc860c6b2008-09-02 15:55:36 +0200881 union nf_inet_addr daddr;
Al Viro014d7302006-09-28 14:29:52 -0700882 __be16 dport = udest->port;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700883 int ret;
884
885 EnterFunction(2);
886
887 if (udest->weight < 0) {
888 IP_VS_ERR("ip_vs_add_dest(): server weight less than zero\n");
889 return -ERANGE;
890 }
891
892 if (udest->l_threshold > udest->u_threshold) {
893 IP_VS_ERR("ip_vs_add_dest(): lower threshold is higher than "
894 "upper threshold\n");
895 return -ERANGE;
896 }
897
Julius Volzc860c6b2008-09-02 15:55:36 +0200898 ip_vs_addr_copy(svc->af, &daddr, &udest->addr);
899
Linus Torvalds1da177e2005-04-16 15:20:36 -0700900 /*
901 * Check if the dest already exists in the list
902 */
Julius Volz7937df12008-09-02 15:55:48 +0200903 dest = ip_vs_lookup_dest(svc, &daddr, dport);
904
Linus Torvalds1da177e2005-04-16 15:20:36 -0700905 if (dest != NULL) {
906 IP_VS_DBG(1, "ip_vs_add_dest(): dest already exists\n");
907 return -EEXIST;
908 }
909
910 /*
911 * Check if the dest already exists in the trash and
912 * is from the same service
913 */
Julius Volz7937df12008-09-02 15:55:48 +0200914 dest = ip_vs_trash_get_dest(svc, &daddr, dport);
915
Linus Torvalds1da177e2005-04-16 15:20:36 -0700916 if (dest != NULL) {
Julius Volzcfc78c52008-09-02 15:55:53 +0200917 IP_VS_DBG_BUF(3, "Get destination %s:%u from trash, "
918 "dest->refcnt=%d, service %u/%s:%u\n",
919 IP_VS_DBG_ADDR(svc->af, &daddr), ntohs(dport),
920 atomic_read(&dest->refcnt),
921 dest->vfwmark,
922 IP_VS_DBG_ADDR(svc->af, &dest->vaddr),
923 ntohs(dest->vport));
924
Linus Torvalds1da177e2005-04-16 15:20:36 -0700925 __ip_vs_update_dest(svc, dest, udest);
926
927 /*
928 * Get the destination from the trash
929 */
930 list_del(&dest->n_list);
931
932 ip_vs_new_estimator(&dest->stats);
933
934 write_lock_bh(&__ip_vs_svc_lock);
935
936 /*
937 * Wait until all other svc users go away.
938 */
939 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
940
941 list_add(&dest->n_list, &svc->destinations);
942 svc->num_dests++;
943
944 /* call the update_service function of its scheduler */
Sven Wegener82dfb6f2008-08-11 19:36:06 +0000945 if (svc->scheduler->update_service)
946 svc->scheduler->update_service(svc);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700947
948 write_unlock_bh(&__ip_vs_svc_lock);
949 return 0;
950 }
951
952 /*
953 * Allocate and initialize the dest structure
954 */
955 ret = ip_vs_new_dest(svc, udest, &dest);
956 if (ret) {
957 return ret;
958 }
959
960 /*
961 * Add the dest entry into the list
962 */
963 atomic_inc(&dest->refcnt);
964
965 write_lock_bh(&__ip_vs_svc_lock);
966
967 /*
968 * Wait until all other svc users go away.
969 */
970 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
971
972 list_add(&dest->n_list, &svc->destinations);
973 svc->num_dests++;
974
975 /* call the update_service function of its scheduler */
Sven Wegener82dfb6f2008-08-11 19:36:06 +0000976 if (svc->scheduler->update_service)
977 svc->scheduler->update_service(svc);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700978
979 write_unlock_bh(&__ip_vs_svc_lock);
980
981 LeaveFunction(2);
982
983 return 0;
984}
985
986
987/*
988 * Edit a destination in the given service
989 */
990static int
Julius Volzc860c6b2008-09-02 15:55:36 +0200991ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700992{
993 struct ip_vs_dest *dest;
Julius Volzc860c6b2008-09-02 15:55:36 +0200994 union nf_inet_addr daddr;
Al Viro014d7302006-09-28 14:29:52 -0700995 __be16 dport = udest->port;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700996
997 EnterFunction(2);
998
999 if (udest->weight < 0) {
1000 IP_VS_ERR("ip_vs_edit_dest(): server weight less than zero\n");
1001 return -ERANGE;
1002 }
1003
1004 if (udest->l_threshold > udest->u_threshold) {
1005 IP_VS_ERR("ip_vs_edit_dest(): lower threshold is higher than "
1006 "upper threshold\n");
1007 return -ERANGE;
1008 }
1009
Julius Volzc860c6b2008-09-02 15:55:36 +02001010 ip_vs_addr_copy(svc->af, &daddr, &udest->addr);
1011
Linus Torvalds1da177e2005-04-16 15:20:36 -07001012 /*
1013 * Lookup the destination list
1014 */
Julius Volz7937df12008-09-02 15:55:48 +02001015 dest = ip_vs_lookup_dest(svc, &daddr, dport);
1016
Linus Torvalds1da177e2005-04-16 15:20:36 -07001017 if (dest == NULL) {
1018 IP_VS_DBG(1, "ip_vs_edit_dest(): dest doesn't exist\n");
1019 return -ENOENT;
1020 }
1021
1022 __ip_vs_update_dest(svc, dest, udest);
1023
1024 write_lock_bh(&__ip_vs_svc_lock);
1025
1026 /* Wait until all other svc users go away */
Heiko Carstenscae7ca32007-08-10 15:50:30 -07001027 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001028
1029 /* call the update_service, because server weight may be changed */
Sven Wegener82dfb6f2008-08-11 19:36:06 +00001030 if (svc->scheduler->update_service)
1031 svc->scheduler->update_service(svc);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001032
1033 write_unlock_bh(&__ip_vs_svc_lock);
1034
1035 LeaveFunction(2);
1036
1037 return 0;
1038}
1039
1040
1041/*
1042 * Delete a destination (must be already unlinked from the service)
1043 */
1044static void __ip_vs_del_dest(struct ip_vs_dest *dest)
1045{
1046 ip_vs_kill_estimator(&dest->stats);
1047
1048 /*
1049 * Remove it from the d-linked list with the real services.
1050 */
1051 write_lock_bh(&__ip_vs_rs_lock);
1052 ip_vs_rs_unhash(dest);
1053 write_unlock_bh(&__ip_vs_rs_lock);
1054
1055 /*
1056 * Decrease the refcnt of the dest, and free the dest
1057 * if nobody refers to it (refcnt=0). Otherwise, throw
1058 * the destination into the trash.
1059 */
1060 if (atomic_dec_and_test(&dest->refcnt)) {
1061 ip_vs_dst_reset(dest);
1062 /* simply decrease svc->refcnt here, let the caller check
1063 and release the service if nobody refers to it.
1064 Only user context can release destination and service,
1065 and only one user context can update virtual service at a
1066 time, so the operation here is OK */
1067 atomic_dec(&dest->svc->refcnt);
1068 kfree(dest);
1069 } else {
Julius Volzcfc78c52008-09-02 15:55:53 +02001070 IP_VS_DBG_BUF(3, "Moving dest %s:%u into trash, "
1071 "dest->refcnt=%d\n",
1072 IP_VS_DBG_ADDR(dest->af, &dest->addr),
1073 ntohs(dest->port),
1074 atomic_read(&dest->refcnt));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001075 list_add(&dest->n_list, &ip_vs_dest_trash);
1076 atomic_inc(&dest->refcnt);
1077 }
1078}
1079
1080
1081/*
1082 * Unlink a destination from the given service
1083 */
1084static void __ip_vs_unlink_dest(struct ip_vs_service *svc,
1085 struct ip_vs_dest *dest,
1086 int svcupd)
1087{
1088 dest->flags &= ~IP_VS_DEST_F_AVAILABLE;
1089
1090 /*
1091 * Remove it from the d-linked destination list.
1092 */
1093 list_del(&dest->n_list);
1094 svc->num_dests--;
Sven Wegener82dfb6f2008-08-11 19:36:06 +00001095
1096 /*
1097 * Call the update_service function of its scheduler
1098 */
1099 if (svcupd && svc->scheduler->update_service)
1100 svc->scheduler->update_service(svc);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001101}
1102
1103
1104/*
1105 * Delete a destination server in the given service
1106 */
1107static int
Julius Volzc860c6b2008-09-02 15:55:36 +02001108ip_vs_del_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001109{
1110 struct ip_vs_dest *dest;
Al Viro014d7302006-09-28 14:29:52 -07001111 __be16 dport = udest->port;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001112
1113 EnterFunction(2);
1114
Julius Volz7937df12008-09-02 15:55:48 +02001115 dest = ip_vs_lookup_dest(svc, &udest->addr, dport);
Julius Volzc860c6b2008-09-02 15:55:36 +02001116
Linus Torvalds1da177e2005-04-16 15:20:36 -07001117 if (dest == NULL) {
1118 IP_VS_DBG(1, "ip_vs_del_dest(): destination not found!\n");
1119 return -ENOENT;
1120 }
1121
1122 write_lock_bh(&__ip_vs_svc_lock);
1123
1124 /*
1125 * Wait until all other svc users go away.
1126 */
1127 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
1128
1129 /*
1130 * Unlink dest from the service
1131 */
1132 __ip_vs_unlink_dest(svc, dest, 1);
1133
1134 write_unlock_bh(&__ip_vs_svc_lock);
1135
1136 /*
1137 * Delete the destination
1138 */
1139 __ip_vs_del_dest(dest);
1140
1141 LeaveFunction(2);
1142
1143 return 0;
1144}
1145
1146
1147/*
1148 * Add a service into the service hash table
1149 */
1150static int
Julius Volzc860c6b2008-09-02 15:55:36 +02001151ip_vs_add_service(struct ip_vs_service_user_kern *u,
1152 struct ip_vs_service **svc_p)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001153{
1154 int ret = 0;
1155 struct ip_vs_scheduler *sched = NULL;
1156 struct ip_vs_service *svc = NULL;
1157
1158 /* increase the module use count */
1159 ip_vs_use_count_inc();
1160
1161 /* Lookup the scheduler by 'u->sched_name' */
1162 sched = ip_vs_scheduler_get(u->sched_name);
1163 if (sched == NULL) {
1164 IP_VS_INFO("Scheduler module ip_vs_%s not found\n",
1165 u->sched_name);
1166 ret = -ENOENT;
1167 goto out_mod_dec;
1168 }
1169
Julius Volzf94fd042008-09-02 15:55:55 +02001170#ifdef CONFIG_IP_VS_IPV6
1171 if (u->af == AF_INET6) {
1172 if (!sched->supports_ipv6) {
1173 ret = -EAFNOSUPPORT;
1174 goto out_err;
1175 }
1176 if ((u->netmask < 1) || (u->netmask > 128)) {
1177 ret = -EINVAL;
1178 goto out_err;
1179 }
1180 }
1181#endif
1182
Panagiotis Issaris0da974f2006-07-21 14:51:30 -07001183 svc = kzalloc(sizeof(struct ip_vs_service), GFP_ATOMIC);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001184 if (svc == NULL) {
1185 IP_VS_DBG(1, "ip_vs_add_service: kmalloc failed.\n");
1186 ret = -ENOMEM;
1187 goto out_err;
1188 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001189
1190 /* I'm the first user of the service */
1191 atomic_set(&svc->usecnt, 1);
1192 atomic_set(&svc->refcnt, 0);
1193
Julius Volzc860c6b2008-09-02 15:55:36 +02001194 svc->af = u->af;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001195 svc->protocol = u->protocol;
Julius Volzc860c6b2008-09-02 15:55:36 +02001196 ip_vs_addr_copy(svc->af, &svc->addr, &u->addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001197 svc->port = u->port;
1198 svc->fwmark = u->fwmark;
1199 svc->flags = u->flags;
1200 svc->timeout = u->timeout * HZ;
1201 svc->netmask = u->netmask;
1202
1203 INIT_LIST_HEAD(&svc->destinations);
1204 rwlock_init(&svc->sched_lock);
1205 spin_lock_init(&svc->stats.lock);
1206
1207 /* Bind the scheduler */
1208 ret = ip_vs_bind_scheduler(svc, sched);
1209 if (ret)
1210 goto out_err;
1211 sched = NULL;
1212
1213 /* Update the virtual service counters */
1214 if (svc->port == FTPPORT)
1215 atomic_inc(&ip_vs_ftpsvc_counter);
1216 else if (svc->port == 0)
1217 atomic_inc(&ip_vs_nullsvc_counter);
1218
1219 ip_vs_new_estimator(&svc->stats);
Julius Volzf94fd042008-09-02 15:55:55 +02001220
1221 /* Count only IPv4 services for old get/setsockopt interface */
1222 if (svc->af == AF_INET)
1223 ip_vs_num_services++;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001224
1225 /* Hash the service into the service table */
1226 write_lock_bh(&__ip_vs_svc_lock);
1227 ip_vs_svc_hash(svc);
1228 write_unlock_bh(&__ip_vs_svc_lock);
1229
1230 *svc_p = svc;
1231 return 0;
1232
1233 out_err:
1234 if (svc != NULL) {
1235 if (svc->scheduler)
1236 ip_vs_unbind_scheduler(svc);
1237 if (svc->inc) {
1238 local_bh_disable();
1239 ip_vs_app_inc_put(svc->inc);
1240 local_bh_enable();
1241 }
1242 kfree(svc);
1243 }
1244 ip_vs_scheduler_put(sched);
1245
1246 out_mod_dec:
1247 /* decrease the module use count */
1248 ip_vs_use_count_dec();
1249
1250 return ret;
1251}
1252
1253
1254/*
1255 * Edit a service and bind it with a new scheduler
1256 */
1257static int
Julius Volzc860c6b2008-09-02 15:55:36 +02001258ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001259{
1260 struct ip_vs_scheduler *sched, *old_sched;
1261 int ret = 0;
1262
1263 /*
1264 * Lookup the scheduler, by 'u->sched_name'
1265 */
1266 sched = ip_vs_scheduler_get(u->sched_name);
1267 if (sched == NULL) {
1268 IP_VS_INFO("Scheduler module ip_vs_%s not found\n",
1269 u->sched_name);
1270 return -ENOENT;
1271 }
1272 old_sched = sched;
1273
Julius Volzf94fd042008-09-02 15:55:55 +02001274#ifdef CONFIG_IP_VS_IPV6
1275 if (u->af == AF_INET6) {
1276 if (!sched->supports_ipv6) {
Sven Wegenera5ba4bf2008-09-05 13:47:37 +02001277 ret = -EAFNOSUPPORT;
Julius Volzf94fd042008-09-02 15:55:55 +02001278 goto out;
1279 }
1280 if ((u->netmask < 1) || (u->netmask > 128)) {
Sven Wegenera5ba4bf2008-09-05 13:47:37 +02001281 ret = -EINVAL;
Julius Volzf94fd042008-09-02 15:55:55 +02001282 goto out;
1283 }
1284 }
1285#endif
1286
Linus Torvalds1da177e2005-04-16 15:20:36 -07001287 write_lock_bh(&__ip_vs_svc_lock);
1288
1289 /*
1290 * Wait until all other svc users go away.
1291 */
1292 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
1293
1294 /*
1295 * Set the flags and timeout value
1296 */
1297 svc->flags = u->flags | IP_VS_SVC_F_HASHED;
1298 svc->timeout = u->timeout * HZ;
1299 svc->netmask = u->netmask;
1300
1301 old_sched = svc->scheduler;
1302 if (sched != old_sched) {
1303 /*
1304 * Unbind the old scheduler
1305 */
1306 if ((ret = ip_vs_unbind_scheduler(svc))) {
1307 old_sched = sched;
Simon Horman9e691ed2008-09-17 10:10:41 +10001308 goto out_unlock;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001309 }
1310
1311 /*
1312 * Bind the new scheduler
1313 */
1314 if ((ret = ip_vs_bind_scheduler(svc, sched))) {
1315 /*
1316 * If ip_vs_bind_scheduler fails, restore the old
1317 * scheduler.
1318 * The main reason of failure is out of memory.
1319 *
1320 * The question is if the old scheduler can be
1321 * restored all the time. TODO: if it cannot be
1322 * restored some time, we must delete the service,
1323 * otherwise the system may crash.
1324 */
1325 ip_vs_bind_scheduler(svc, old_sched);
1326 old_sched = sched;
Simon Horman9e691ed2008-09-17 10:10:41 +10001327 goto out_unlock;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001328 }
1329 }
1330
Simon Horman9e691ed2008-09-17 10:10:41 +10001331 out_unlock:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001332 write_unlock_bh(&__ip_vs_svc_lock);
Simon Horman9e691ed2008-09-17 10:10:41 +10001333 out:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001334
1335 if (old_sched)
1336 ip_vs_scheduler_put(old_sched);
1337
1338 return ret;
1339}
1340
1341
1342/*
1343 * Delete a service from the service list
1344 * - The service must be unlinked, unlocked and not referenced!
1345 * - We are called under _bh lock
1346 */
1347static void __ip_vs_del_service(struct ip_vs_service *svc)
1348{
1349 struct ip_vs_dest *dest, *nxt;
1350 struct ip_vs_scheduler *old_sched;
1351
Julius Volzf94fd042008-09-02 15:55:55 +02001352 /* Count only IPv4 services for old get/setsockopt interface */
1353 if (svc->af == AF_INET)
1354 ip_vs_num_services--;
1355
Linus Torvalds1da177e2005-04-16 15:20:36 -07001356 ip_vs_kill_estimator(&svc->stats);
1357
1358 /* Unbind scheduler */
1359 old_sched = svc->scheduler;
1360 ip_vs_unbind_scheduler(svc);
1361 if (old_sched)
1362 ip_vs_scheduler_put(old_sched);
1363
1364 /* Unbind app inc */
1365 if (svc->inc) {
1366 ip_vs_app_inc_put(svc->inc);
1367 svc->inc = NULL;
1368 }
1369
1370 /*
1371 * Unlink the whole destination list
1372 */
1373 list_for_each_entry_safe(dest, nxt, &svc->destinations, n_list) {
1374 __ip_vs_unlink_dest(svc, dest, 0);
1375 __ip_vs_del_dest(dest);
1376 }
1377
1378 /*
1379 * Update the virtual service counters
1380 */
1381 if (svc->port == FTPPORT)
1382 atomic_dec(&ip_vs_ftpsvc_counter);
1383 else if (svc->port == 0)
1384 atomic_dec(&ip_vs_nullsvc_counter);
1385
1386 /*
1387 * Free the service if nobody refers to it
1388 */
1389 if (atomic_read(&svc->refcnt) == 0)
1390 kfree(svc);
1391
1392 /* decrease the module use count */
1393 ip_vs_use_count_dec();
1394}
1395
1396/*
1397 * Delete a service from the service list
1398 */
1399static int ip_vs_del_service(struct ip_vs_service *svc)
1400{
1401 if (svc == NULL)
1402 return -EEXIST;
1403
1404 /*
1405 * Unhash it from the service table
1406 */
1407 write_lock_bh(&__ip_vs_svc_lock);
1408
1409 ip_vs_svc_unhash(svc);
1410
1411 /*
1412 * Wait until all the svc users go away.
1413 */
1414 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
1415
1416 __ip_vs_del_service(svc);
1417
1418 write_unlock_bh(&__ip_vs_svc_lock);
1419
1420 return 0;
1421}
1422
1423
1424/*
1425 * Flush all the virtual services
1426 */
1427static int ip_vs_flush(void)
1428{
1429 int idx;
1430 struct ip_vs_service *svc, *nxt;
1431
1432 /*
1433 * Flush the service table hashed by <protocol,addr,port>
1434 */
1435 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1436 list_for_each_entry_safe(svc, nxt, &ip_vs_svc_table[idx], s_list) {
1437 write_lock_bh(&__ip_vs_svc_lock);
1438 ip_vs_svc_unhash(svc);
1439 /*
1440 * Wait until all the svc users go away.
1441 */
1442 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1443 __ip_vs_del_service(svc);
1444 write_unlock_bh(&__ip_vs_svc_lock);
1445 }
1446 }
1447
1448 /*
1449 * Flush the service table hashed by fwmark
1450 */
1451 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1452 list_for_each_entry_safe(svc, nxt,
1453 &ip_vs_svc_fwm_table[idx], f_list) {
1454 write_lock_bh(&__ip_vs_svc_lock);
1455 ip_vs_svc_unhash(svc);
1456 /*
1457 * Wait until all the svc users go away.
1458 */
1459 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1460 __ip_vs_del_service(svc);
1461 write_unlock_bh(&__ip_vs_svc_lock);
1462 }
1463 }
1464
1465 return 0;
1466}
1467
1468
1469/*
1470 * Zero counters in a service or all services
1471 */
1472static int ip_vs_zero_service(struct ip_vs_service *svc)
1473{
1474 struct ip_vs_dest *dest;
1475
1476 write_lock_bh(&__ip_vs_svc_lock);
1477 list_for_each_entry(dest, &svc->destinations, n_list) {
1478 ip_vs_zero_stats(&dest->stats);
1479 }
1480 ip_vs_zero_stats(&svc->stats);
1481 write_unlock_bh(&__ip_vs_svc_lock);
1482 return 0;
1483}
1484
1485static int ip_vs_zero_all(void)
1486{
1487 int idx;
1488 struct ip_vs_service *svc;
1489
1490 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1491 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
1492 ip_vs_zero_service(svc);
1493 }
1494 }
1495
1496 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1497 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
1498 ip_vs_zero_service(svc);
1499 }
1500 }
1501
1502 ip_vs_zero_stats(&ip_vs_stats);
1503 return 0;
1504}
1505
1506
1507static int
1508proc_do_defense_mode(ctl_table *table, int write, struct file * filp,
1509 void __user *buffer, size_t *lenp, loff_t *ppos)
1510{
1511 int *valp = table->data;
1512 int val = *valp;
1513 int rc;
1514
1515 rc = proc_dointvec(table, write, filp, buffer, lenp, ppos);
1516 if (write && (*valp != val)) {
1517 if ((*valp < 0) || (*valp > 3)) {
1518 /* Restore the correct value */
1519 *valp = val;
1520 } else {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001521 update_defense_level();
Linus Torvalds1da177e2005-04-16 15:20:36 -07001522 }
1523 }
1524 return rc;
1525}
1526
1527
1528static int
1529proc_do_sync_threshold(ctl_table *table, int write, struct file *filp,
1530 void __user *buffer, size_t *lenp, loff_t *ppos)
1531{
1532 int *valp = table->data;
1533 int val[2];
1534 int rc;
1535
1536 /* backup the value first */
1537 memcpy(val, valp, sizeof(val));
1538
1539 rc = proc_dointvec(table, write, filp, buffer, lenp, ppos);
1540 if (write && (valp[0] < 0 || valp[1] < 0 || valp[0] >= valp[1])) {
1541 /* Restore the correct value */
1542 memcpy(valp, val, sizeof(val));
1543 }
1544 return rc;
1545}
1546
1547
1548/*
1549 * IPVS sysctl table (under the /proc/sys/net/ipv4/vs/)
1550 */
1551
1552static struct ctl_table vs_vars[] = {
1553 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001554 .procname = "amemthresh",
1555 .data = &sysctl_ip_vs_amemthresh,
1556 .maxlen = sizeof(int),
1557 .mode = 0644,
1558 .proc_handler = &proc_dointvec,
1559 },
1560#ifdef CONFIG_IP_VS_DEBUG
1561 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001562 .procname = "debug_level",
1563 .data = &sysctl_ip_vs_debug_level,
1564 .maxlen = sizeof(int),
1565 .mode = 0644,
1566 .proc_handler = &proc_dointvec,
1567 },
1568#endif
1569 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001570 .procname = "am_droprate",
1571 .data = &sysctl_ip_vs_am_droprate,
1572 .maxlen = sizeof(int),
1573 .mode = 0644,
1574 .proc_handler = &proc_dointvec,
1575 },
1576 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001577 .procname = "drop_entry",
1578 .data = &sysctl_ip_vs_drop_entry,
1579 .maxlen = sizeof(int),
1580 .mode = 0644,
1581 .proc_handler = &proc_do_defense_mode,
1582 },
1583 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001584 .procname = "drop_packet",
1585 .data = &sysctl_ip_vs_drop_packet,
1586 .maxlen = sizeof(int),
1587 .mode = 0644,
1588 .proc_handler = &proc_do_defense_mode,
1589 },
1590 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001591 .procname = "secure_tcp",
1592 .data = &sysctl_ip_vs_secure_tcp,
1593 .maxlen = sizeof(int),
1594 .mode = 0644,
1595 .proc_handler = &proc_do_defense_mode,
1596 },
1597#if 0
1598 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001599 .procname = "timeout_established",
1600 .data = &vs_timeout_table_dos.timeout[IP_VS_S_ESTABLISHED],
1601 .maxlen = sizeof(int),
1602 .mode = 0644,
1603 .proc_handler = &proc_dointvec_jiffies,
1604 },
1605 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001606 .procname = "timeout_synsent",
1607 .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_SENT],
1608 .maxlen = sizeof(int),
1609 .mode = 0644,
1610 .proc_handler = &proc_dointvec_jiffies,
1611 },
1612 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001613 .procname = "timeout_synrecv",
1614 .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_RECV],
1615 .maxlen = sizeof(int),
1616 .mode = 0644,
1617 .proc_handler = &proc_dointvec_jiffies,
1618 },
1619 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001620 .procname = "timeout_finwait",
1621 .data = &vs_timeout_table_dos.timeout[IP_VS_S_FIN_WAIT],
1622 .maxlen = sizeof(int),
1623 .mode = 0644,
1624 .proc_handler = &proc_dointvec_jiffies,
1625 },
1626 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001627 .procname = "timeout_timewait",
1628 .data = &vs_timeout_table_dos.timeout[IP_VS_S_TIME_WAIT],
1629 .maxlen = sizeof(int),
1630 .mode = 0644,
1631 .proc_handler = &proc_dointvec_jiffies,
1632 },
1633 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001634 .procname = "timeout_close",
1635 .data = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE],
1636 .maxlen = sizeof(int),
1637 .mode = 0644,
1638 .proc_handler = &proc_dointvec_jiffies,
1639 },
1640 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001641 .procname = "timeout_closewait",
1642 .data = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE_WAIT],
1643 .maxlen = sizeof(int),
1644 .mode = 0644,
1645 .proc_handler = &proc_dointvec_jiffies,
1646 },
1647 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001648 .procname = "timeout_lastack",
1649 .data = &vs_timeout_table_dos.timeout[IP_VS_S_LAST_ACK],
1650 .maxlen = sizeof(int),
1651 .mode = 0644,
1652 .proc_handler = &proc_dointvec_jiffies,
1653 },
1654 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001655 .procname = "timeout_listen",
1656 .data = &vs_timeout_table_dos.timeout[IP_VS_S_LISTEN],
1657 .maxlen = sizeof(int),
1658 .mode = 0644,
1659 .proc_handler = &proc_dointvec_jiffies,
1660 },
1661 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001662 .procname = "timeout_synack",
1663 .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYNACK],
1664 .maxlen = sizeof(int),
1665 .mode = 0644,
1666 .proc_handler = &proc_dointvec_jiffies,
1667 },
1668 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001669 .procname = "timeout_udp",
1670 .data = &vs_timeout_table_dos.timeout[IP_VS_S_UDP],
1671 .maxlen = sizeof(int),
1672 .mode = 0644,
1673 .proc_handler = &proc_dointvec_jiffies,
1674 },
1675 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001676 .procname = "timeout_icmp",
1677 .data = &vs_timeout_table_dos.timeout[IP_VS_S_ICMP],
1678 .maxlen = sizeof(int),
1679 .mode = 0644,
1680 .proc_handler = &proc_dointvec_jiffies,
1681 },
1682#endif
1683 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001684 .procname = "cache_bypass",
1685 .data = &sysctl_ip_vs_cache_bypass,
1686 .maxlen = sizeof(int),
1687 .mode = 0644,
1688 .proc_handler = &proc_dointvec,
1689 },
1690 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001691 .procname = "expire_nodest_conn",
1692 .data = &sysctl_ip_vs_expire_nodest_conn,
1693 .maxlen = sizeof(int),
1694 .mode = 0644,
1695 .proc_handler = &proc_dointvec,
1696 },
1697 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001698 .procname = "expire_quiescent_template",
1699 .data = &sysctl_ip_vs_expire_quiescent_template,
1700 .maxlen = sizeof(int),
1701 .mode = 0644,
1702 .proc_handler = &proc_dointvec,
1703 },
1704 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001705 .procname = "sync_threshold",
1706 .data = &sysctl_ip_vs_sync_threshold,
1707 .maxlen = sizeof(sysctl_ip_vs_sync_threshold),
1708 .mode = 0644,
1709 .proc_handler = &proc_do_sync_threshold,
1710 },
1711 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001712 .procname = "nat_icmp_send",
1713 .data = &sysctl_ip_vs_nat_icmp_send,
1714 .maxlen = sizeof(int),
1715 .mode = 0644,
1716 .proc_handler = &proc_dointvec,
1717 },
1718 { .ctl_name = 0 }
1719};
1720
Sven Wegener5587da52008-08-10 18:24:40 +00001721const struct ctl_path net_vs_ctl_path[] = {
Pavel Emelyanov90754f82008-01-12 02:33:50 -08001722 { .procname = "net", .ctl_name = CTL_NET, },
1723 { .procname = "ipv4", .ctl_name = NET_IPV4, },
1724 { .procname = "vs", },
1725 { }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001726};
Pavel Emelyanov90754f82008-01-12 02:33:50 -08001727EXPORT_SYMBOL_GPL(net_vs_ctl_path);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001728
1729static struct ctl_table_header * sysctl_header;
1730
1731#ifdef CONFIG_PROC_FS
1732
1733struct ip_vs_iter {
1734 struct list_head *table;
1735 int bucket;
1736};
1737
1738/*
1739 * Write the contents of the VS rule table to a PROCfs file.
1740 * (It is kept just for backward compatibility)
1741 */
1742static inline const char *ip_vs_fwd_name(unsigned flags)
1743{
1744 switch (flags & IP_VS_CONN_F_FWD_MASK) {
1745 case IP_VS_CONN_F_LOCALNODE:
1746 return "Local";
1747 case IP_VS_CONN_F_TUNNEL:
1748 return "Tunnel";
1749 case IP_VS_CONN_F_DROUTE:
1750 return "Route";
1751 default:
1752 return "Masq";
1753 }
1754}
1755
1756
1757/* Get the Nth entry in the two lists */
1758static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos)
1759{
1760 struct ip_vs_iter *iter = seq->private;
1761 int idx;
1762 struct ip_vs_service *svc;
1763
1764 /* look in hash by protocol */
1765 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1766 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
1767 if (pos-- == 0){
1768 iter->table = ip_vs_svc_table;
1769 iter->bucket = idx;
1770 return svc;
1771 }
1772 }
1773 }
1774
1775 /* keep looking in fwmark */
1776 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1777 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
1778 if (pos-- == 0) {
1779 iter->table = ip_vs_svc_fwm_table;
1780 iter->bucket = idx;
1781 return svc;
1782 }
1783 }
1784 }
1785
1786 return NULL;
1787}
1788
1789static void *ip_vs_info_seq_start(struct seq_file *seq, loff_t *pos)
1790{
1791
1792 read_lock_bh(&__ip_vs_svc_lock);
1793 return *pos ? ip_vs_info_array(seq, *pos - 1) : SEQ_START_TOKEN;
1794}
1795
1796
1797static void *ip_vs_info_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1798{
1799 struct list_head *e;
1800 struct ip_vs_iter *iter;
1801 struct ip_vs_service *svc;
1802
1803 ++*pos;
1804 if (v == SEQ_START_TOKEN)
1805 return ip_vs_info_array(seq,0);
1806
1807 svc = v;
1808 iter = seq->private;
1809
1810 if (iter->table == ip_vs_svc_table) {
1811 /* next service in table hashed by protocol */
1812 if ((e = svc->s_list.next) != &ip_vs_svc_table[iter->bucket])
1813 return list_entry(e, struct ip_vs_service, s_list);
1814
1815
1816 while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
1817 list_for_each_entry(svc,&ip_vs_svc_table[iter->bucket],
1818 s_list) {
1819 return svc;
1820 }
1821 }
1822
1823 iter->table = ip_vs_svc_fwm_table;
1824 iter->bucket = -1;
1825 goto scan_fwmark;
1826 }
1827
1828 /* next service in hashed by fwmark */
1829 if ((e = svc->f_list.next) != &ip_vs_svc_fwm_table[iter->bucket])
1830 return list_entry(e, struct ip_vs_service, f_list);
1831
1832 scan_fwmark:
1833 while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
1834 list_for_each_entry(svc, &ip_vs_svc_fwm_table[iter->bucket],
1835 f_list)
1836 return svc;
1837 }
1838
1839 return NULL;
1840}
1841
1842static void ip_vs_info_seq_stop(struct seq_file *seq, void *v)
1843{
1844 read_unlock_bh(&__ip_vs_svc_lock);
1845}
1846
1847
1848static int ip_vs_info_seq_show(struct seq_file *seq, void *v)
1849{
1850 if (v == SEQ_START_TOKEN) {
1851 seq_printf(seq,
1852 "IP Virtual Server version %d.%d.%d (size=%d)\n",
1853 NVERSION(IP_VS_VERSION_CODE), IP_VS_CONN_TAB_SIZE);
1854 seq_puts(seq,
1855 "Prot LocalAddress:Port Scheduler Flags\n");
1856 seq_puts(seq,
1857 " -> RemoteAddress:Port Forward Weight ActiveConn InActConn\n");
1858 } else {
1859 const struct ip_vs_service *svc = v;
1860 const struct ip_vs_iter *iter = seq->private;
1861 const struct ip_vs_dest *dest;
1862
Vince Busam667a5f12008-09-02 15:55:49 +02001863 if (iter->table == ip_vs_svc_table) {
1864#ifdef CONFIG_IP_VS_IPV6
1865 if (svc->af == AF_INET6)
1866 seq_printf(seq, "%s [" NIP6_FMT "]:%04X %s ",
1867 ip_vs_proto_name(svc->protocol),
1868 NIP6(svc->addr.in6),
1869 ntohs(svc->port),
1870 svc->scheduler->name);
1871 else
1872#endif
1873 seq_printf(seq, "%s %08X:%04X %s ",
1874 ip_vs_proto_name(svc->protocol),
1875 ntohl(svc->addr.ip),
1876 ntohs(svc->port),
1877 svc->scheduler->name);
1878 } else {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001879 seq_printf(seq, "FWM %08X %s ",
1880 svc->fwmark, svc->scheduler->name);
Vince Busam667a5f12008-09-02 15:55:49 +02001881 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001882
1883 if (svc->flags & IP_VS_SVC_F_PERSISTENT)
1884 seq_printf(seq, "persistent %d %08X\n",
1885 svc->timeout,
1886 ntohl(svc->netmask));
1887 else
1888 seq_putc(seq, '\n');
1889
1890 list_for_each_entry(dest, &svc->destinations, n_list) {
Vince Busam667a5f12008-09-02 15:55:49 +02001891#ifdef CONFIG_IP_VS_IPV6
1892 if (dest->af == AF_INET6)
1893 seq_printf(seq,
1894 " -> [" NIP6_FMT "]:%04X"
1895 " %-7s %-6d %-10d %-10d\n",
1896 NIP6(dest->addr.in6),
1897 ntohs(dest->port),
1898 ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
1899 atomic_read(&dest->weight),
1900 atomic_read(&dest->activeconns),
1901 atomic_read(&dest->inactconns));
1902 else
1903#endif
1904 seq_printf(seq,
1905 " -> %08X:%04X "
1906 "%-7s %-6d %-10d %-10d\n",
1907 ntohl(dest->addr.ip),
1908 ntohs(dest->port),
1909 ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
1910 atomic_read(&dest->weight),
1911 atomic_read(&dest->activeconns),
1912 atomic_read(&dest->inactconns));
1913
Linus Torvalds1da177e2005-04-16 15:20:36 -07001914 }
1915 }
1916 return 0;
1917}
1918
Philippe De Muyter56b3d972007-07-10 23:07:31 -07001919static const struct seq_operations ip_vs_info_seq_ops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001920 .start = ip_vs_info_seq_start,
1921 .next = ip_vs_info_seq_next,
1922 .stop = ip_vs_info_seq_stop,
1923 .show = ip_vs_info_seq_show,
1924};
1925
1926static int ip_vs_info_open(struct inode *inode, struct file *file)
1927{
Pavel Emelyanovcf7732e2007-10-10 02:29:29 -07001928 return seq_open_private(file, &ip_vs_info_seq_ops,
1929 sizeof(struct ip_vs_iter));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001930}
1931
Arjan van de Ven9a321442007-02-12 00:55:35 -08001932static const struct file_operations ip_vs_info_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001933 .owner = THIS_MODULE,
1934 .open = ip_vs_info_open,
1935 .read = seq_read,
1936 .llseek = seq_lseek,
1937 .release = seq_release_private,
1938};
1939
1940#endif
1941
Sven Wegener519e49e2008-08-10 18:24:41 +00001942struct ip_vs_stats ip_vs_stats = {
1943 .lock = __SPIN_LOCK_UNLOCKED(ip_vs_stats.lock),
1944};
Linus Torvalds1da177e2005-04-16 15:20:36 -07001945
1946#ifdef CONFIG_PROC_FS
1947static int ip_vs_stats_show(struct seq_file *seq, void *v)
1948{
1949
1950/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
1951 seq_puts(seq,
1952 " Total Incoming Outgoing Incoming Outgoing\n");
1953 seq_printf(seq,
1954 " Conns Packets Packets Bytes Bytes\n");
1955
1956 spin_lock_bh(&ip_vs_stats.lock);
Sven Wegenere9c0ce22008-09-08 13:39:04 +02001957 seq_printf(seq, "%8X %8X %8X %16LX %16LX\n\n", ip_vs_stats.ustats.conns,
1958 ip_vs_stats.ustats.inpkts, ip_vs_stats.ustats.outpkts,
1959 (unsigned long long) ip_vs_stats.ustats.inbytes,
1960 (unsigned long long) ip_vs_stats.ustats.outbytes);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001961
1962/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
1963 seq_puts(seq,
1964 " Conns/s Pkts/s Pkts/s Bytes/s Bytes/s\n");
1965 seq_printf(seq,"%8X %8X %8X %16X %16X\n",
Sven Wegenere9c0ce22008-09-08 13:39:04 +02001966 ip_vs_stats.ustats.cps,
1967 ip_vs_stats.ustats.inpps,
1968 ip_vs_stats.ustats.outpps,
1969 ip_vs_stats.ustats.inbps,
1970 ip_vs_stats.ustats.outbps);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001971 spin_unlock_bh(&ip_vs_stats.lock);
1972
1973 return 0;
1974}
1975
1976static int ip_vs_stats_seq_open(struct inode *inode, struct file *file)
1977{
1978 return single_open(file, ip_vs_stats_show, NULL);
1979}
1980
Arjan van de Ven9a321442007-02-12 00:55:35 -08001981static const struct file_operations ip_vs_stats_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001982 .owner = THIS_MODULE,
1983 .open = ip_vs_stats_seq_open,
1984 .read = seq_read,
1985 .llseek = seq_lseek,
1986 .release = single_release,
1987};
1988
1989#endif
1990
1991/*
1992 * Set timeout values for tcp tcpfin udp in the timeout_table.
1993 */
1994static int ip_vs_set_timeout(struct ip_vs_timeout_user *u)
1995{
1996 IP_VS_DBG(2, "Setting timeout tcp:%d tcpfin:%d udp:%d\n",
1997 u->tcp_timeout,
1998 u->tcp_fin_timeout,
1999 u->udp_timeout);
2000
2001#ifdef CONFIG_IP_VS_PROTO_TCP
2002 if (u->tcp_timeout) {
2003 ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_ESTABLISHED]
2004 = u->tcp_timeout * HZ;
2005 }
2006
2007 if (u->tcp_fin_timeout) {
2008 ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_FIN_WAIT]
2009 = u->tcp_fin_timeout * HZ;
2010 }
2011#endif
2012
2013#ifdef CONFIG_IP_VS_PROTO_UDP
2014 if (u->udp_timeout) {
2015 ip_vs_protocol_udp.timeout_table[IP_VS_UDP_S_NORMAL]
2016 = u->udp_timeout * HZ;
2017 }
2018#endif
2019 return 0;
2020}
2021
2022
2023#define SET_CMDID(cmd) (cmd - IP_VS_BASE_CTL)
2024#define SERVICE_ARG_LEN (sizeof(struct ip_vs_service_user))
2025#define SVCDEST_ARG_LEN (sizeof(struct ip_vs_service_user) + \
2026 sizeof(struct ip_vs_dest_user))
2027#define TIMEOUT_ARG_LEN (sizeof(struct ip_vs_timeout_user))
2028#define DAEMON_ARG_LEN (sizeof(struct ip_vs_daemon_user))
2029#define MAX_ARG_LEN SVCDEST_ARG_LEN
2030
Arjan van de Ven9b5b5cf2005-11-29 16:21:38 -08002031static const unsigned char set_arglen[SET_CMDID(IP_VS_SO_SET_MAX)+1] = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002032 [SET_CMDID(IP_VS_SO_SET_ADD)] = SERVICE_ARG_LEN,
2033 [SET_CMDID(IP_VS_SO_SET_EDIT)] = SERVICE_ARG_LEN,
2034 [SET_CMDID(IP_VS_SO_SET_DEL)] = SERVICE_ARG_LEN,
2035 [SET_CMDID(IP_VS_SO_SET_FLUSH)] = 0,
2036 [SET_CMDID(IP_VS_SO_SET_ADDDEST)] = SVCDEST_ARG_LEN,
2037 [SET_CMDID(IP_VS_SO_SET_DELDEST)] = SVCDEST_ARG_LEN,
2038 [SET_CMDID(IP_VS_SO_SET_EDITDEST)] = SVCDEST_ARG_LEN,
2039 [SET_CMDID(IP_VS_SO_SET_TIMEOUT)] = TIMEOUT_ARG_LEN,
2040 [SET_CMDID(IP_VS_SO_SET_STARTDAEMON)] = DAEMON_ARG_LEN,
2041 [SET_CMDID(IP_VS_SO_SET_STOPDAEMON)] = DAEMON_ARG_LEN,
2042 [SET_CMDID(IP_VS_SO_SET_ZERO)] = SERVICE_ARG_LEN,
2043};
2044
Julius Volzc860c6b2008-09-02 15:55:36 +02002045static void ip_vs_copy_usvc_compat(struct ip_vs_service_user_kern *usvc,
2046 struct ip_vs_service_user *usvc_compat)
2047{
2048 usvc->af = AF_INET;
2049 usvc->protocol = usvc_compat->protocol;
2050 usvc->addr.ip = usvc_compat->addr;
2051 usvc->port = usvc_compat->port;
2052 usvc->fwmark = usvc_compat->fwmark;
2053
2054 /* Deep copy of sched_name is not needed here */
2055 usvc->sched_name = usvc_compat->sched_name;
2056
2057 usvc->flags = usvc_compat->flags;
2058 usvc->timeout = usvc_compat->timeout;
2059 usvc->netmask = usvc_compat->netmask;
2060}
2061
2062static void ip_vs_copy_udest_compat(struct ip_vs_dest_user_kern *udest,
2063 struct ip_vs_dest_user *udest_compat)
2064{
2065 udest->addr.ip = udest_compat->addr;
2066 udest->port = udest_compat->port;
2067 udest->conn_flags = udest_compat->conn_flags;
2068 udest->weight = udest_compat->weight;
2069 udest->u_threshold = udest_compat->u_threshold;
2070 udest->l_threshold = udest_compat->l_threshold;
2071}
2072
Linus Torvalds1da177e2005-04-16 15:20:36 -07002073static int
2074do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
2075{
2076 int ret;
2077 unsigned char arg[MAX_ARG_LEN];
Julius Volzc860c6b2008-09-02 15:55:36 +02002078 struct ip_vs_service_user *usvc_compat;
2079 struct ip_vs_service_user_kern usvc;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002080 struct ip_vs_service *svc;
Julius Volzc860c6b2008-09-02 15:55:36 +02002081 struct ip_vs_dest_user *udest_compat;
2082 struct ip_vs_dest_user_kern udest;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002083
2084 if (!capable(CAP_NET_ADMIN))
2085 return -EPERM;
2086
2087 if (len != set_arglen[SET_CMDID(cmd)]) {
2088 IP_VS_ERR("set_ctl: len %u != %u\n",
2089 len, set_arglen[SET_CMDID(cmd)]);
2090 return -EINVAL;
2091 }
2092
2093 if (copy_from_user(arg, user, len) != 0)
2094 return -EFAULT;
2095
2096 /* increase the module use count */
2097 ip_vs_use_count_inc();
2098
Ingo Molnar14cc3e22006-03-26 01:37:14 -08002099 if (mutex_lock_interruptible(&__ip_vs_mutex)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002100 ret = -ERESTARTSYS;
2101 goto out_dec;
2102 }
2103
2104 if (cmd == IP_VS_SO_SET_FLUSH) {
2105 /* Flush the virtual service */
2106 ret = ip_vs_flush();
2107 goto out_unlock;
2108 } else if (cmd == IP_VS_SO_SET_TIMEOUT) {
2109 /* Set timeout values for (tcp tcpfin udp) */
2110 ret = ip_vs_set_timeout((struct ip_vs_timeout_user *)arg);
2111 goto out_unlock;
2112 } else if (cmd == IP_VS_SO_SET_STARTDAEMON) {
2113 struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
2114 ret = start_sync_thread(dm->state, dm->mcast_ifn, dm->syncid);
2115 goto out_unlock;
2116 } else if (cmd == IP_VS_SO_SET_STOPDAEMON) {
2117 struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
2118 ret = stop_sync_thread(dm->state);
2119 goto out_unlock;
2120 }
2121
Julius Volzc860c6b2008-09-02 15:55:36 +02002122 usvc_compat = (struct ip_vs_service_user *)arg;
2123 udest_compat = (struct ip_vs_dest_user *)(usvc_compat + 1);
2124
2125 /* We only use the new structs internally, so copy userspace compat
2126 * structs to extended internal versions */
2127 ip_vs_copy_usvc_compat(&usvc, usvc_compat);
2128 ip_vs_copy_udest_compat(&udest, udest_compat);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002129
2130 if (cmd == IP_VS_SO_SET_ZERO) {
2131 /* if no service address is set, zero counters in all */
Julius Volzc860c6b2008-09-02 15:55:36 +02002132 if (!usvc.fwmark && !usvc.addr.ip && !usvc.port) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002133 ret = ip_vs_zero_all();
2134 goto out_unlock;
2135 }
2136 }
2137
2138 /* Check for valid protocol: TCP or UDP, even for fwmark!=0 */
Julius Volzc860c6b2008-09-02 15:55:36 +02002139 if (usvc.protocol != IPPROTO_TCP && usvc.protocol != IPPROTO_UDP) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002140 IP_VS_ERR("set_ctl: invalid protocol: %d %d.%d.%d.%d:%d %s\n",
Julius Volzc860c6b2008-09-02 15:55:36 +02002141 usvc.protocol, NIPQUAD(usvc.addr.ip),
2142 ntohs(usvc.port), usvc.sched_name);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002143 ret = -EFAULT;
2144 goto out_unlock;
2145 }
2146
2147 /* Lookup the exact service by <protocol, addr, port> or fwmark */
Julius Volzc860c6b2008-09-02 15:55:36 +02002148 if (usvc.fwmark == 0)
Julius Volzb18610d2008-09-02 15:55:37 +02002149 svc = __ip_vs_service_get(usvc.af, usvc.protocol,
2150 &usvc.addr, usvc.port);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002151 else
Julius Volzb18610d2008-09-02 15:55:37 +02002152 svc = __ip_vs_svc_fwm_get(usvc.af, usvc.fwmark);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002153
2154 if (cmd != IP_VS_SO_SET_ADD
Julius Volzc860c6b2008-09-02 15:55:36 +02002155 && (svc == NULL || svc->protocol != usvc.protocol)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002156 ret = -ESRCH;
2157 goto out_unlock;
2158 }
2159
2160 switch (cmd) {
2161 case IP_VS_SO_SET_ADD:
2162 if (svc != NULL)
2163 ret = -EEXIST;
2164 else
Julius Volzc860c6b2008-09-02 15:55:36 +02002165 ret = ip_vs_add_service(&usvc, &svc);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002166 break;
2167 case IP_VS_SO_SET_EDIT:
Julius Volzc860c6b2008-09-02 15:55:36 +02002168 ret = ip_vs_edit_service(svc, &usvc);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002169 break;
2170 case IP_VS_SO_SET_DEL:
2171 ret = ip_vs_del_service(svc);
2172 if (!ret)
2173 goto out_unlock;
2174 break;
2175 case IP_VS_SO_SET_ZERO:
2176 ret = ip_vs_zero_service(svc);
2177 break;
2178 case IP_VS_SO_SET_ADDDEST:
Julius Volzc860c6b2008-09-02 15:55:36 +02002179 ret = ip_vs_add_dest(svc, &udest);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002180 break;
2181 case IP_VS_SO_SET_EDITDEST:
Julius Volzc860c6b2008-09-02 15:55:36 +02002182 ret = ip_vs_edit_dest(svc, &udest);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002183 break;
2184 case IP_VS_SO_SET_DELDEST:
Julius Volzc860c6b2008-09-02 15:55:36 +02002185 ret = ip_vs_del_dest(svc, &udest);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002186 break;
2187 default:
2188 ret = -EINVAL;
2189 }
2190
2191 if (svc)
2192 ip_vs_service_put(svc);
2193
2194 out_unlock:
Ingo Molnar14cc3e22006-03-26 01:37:14 -08002195 mutex_unlock(&__ip_vs_mutex);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002196 out_dec:
2197 /* decrease the module use count */
2198 ip_vs_use_count_dec();
2199
2200 return ret;
2201}
2202
2203
2204static void
2205ip_vs_copy_stats(struct ip_vs_stats_user *dst, struct ip_vs_stats *src)
2206{
2207 spin_lock_bh(&src->lock);
Sven Wegenere9c0ce22008-09-08 13:39:04 +02002208 memcpy(dst, &src->ustats, sizeof(*dst));
Linus Torvalds1da177e2005-04-16 15:20:36 -07002209 spin_unlock_bh(&src->lock);
2210}
2211
2212static void
2213ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src)
2214{
2215 dst->protocol = src->protocol;
Julius Volze7ade462008-09-02 15:55:33 +02002216 dst->addr = src->addr.ip;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002217 dst->port = src->port;
2218 dst->fwmark = src->fwmark;
pageexec4da62fc2005-06-26 16:00:19 -07002219 strlcpy(dst->sched_name, src->scheduler->name, sizeof(dst->sched_name));
Linus Torvalds1da177e2005-04-16 15:20:36 -07002220 dst->flags = src->flags;
2221 dst->timeout = src->timeout / HZ;
2222 dst->netmask = src->netmask;
2223 dst->num_dests = src->num_dests;
2224 ip_vs_copy_stats(&dst->stats, &src->stats);
2225}
2226
2227static inline int
2228__ip_vs_get_service_entries(const struct ip_vs_get_services *get,
2229 struct ip_vs_get_services __user *uptr)
2230{
2231 int idx, count=0;
2232 struct ip_vs_service *svc;
2233 struct ip_vs_service_entry entry;
2234 int ret = 0;
2235
2236 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2237 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
Julius Volzf94fd042008-09-02 15:55:55 +02002238 /* Only expose IPv4 entries to old interface */
2239 if (svc->af != AF_INET)
2240 continue;
2241
Linus Torvalds1da177e2005-04-16 15:20:36 -07002242 if (count >= get->num_services)
2243 goto out;
pageexec4da62fc2005-06-26 16:00:19 -07002244 memset(&entry, 0, sizeof(entry));
Linus Torvalds1da177e2005-04-16 15:20:36 -07002245 ip_vs_copy_service(&entry, svc);
2246 if (copy_to_user(&uptr->entrytable[count],
2247 &entry, sizeof(entry))) {
2248 ret = -EFAULT;
2249 goto out;
2250 }
2251 count++;
2252 }
2253 }
2254
2255 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2256 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
Julius Volzf94fd042008-09-02 15:55:55 +02002257 /* Only expose IPv4 entries to old interface */
2258 if (svc->af != AF_INET)
2259 continue;
2260
Linus Torvalds1da177e2005-04-16 15:20:36 -07002261 if (count >= get->num_services)
2262 goto out;
pageexec4da62fc2005-06-26 16:00:19 -07002263 memset(&entry, 0, sizeof(entry));
Linus Torvalds1da177e2005-04-16 15:20:36 -07002264 ip_vs_copy_service(&entry, svc);
2265 if (copy_to_user(&uptr->entrytable[count],
2266 &entry, sizeof(entry))) {
2267 ret = -EFAULT;
2268 goto out;
2269 }
2270 count++;
2271 }
2272 }
2273 out:
2274 return ret;
2275}
2276
2277static inline int
2278__ip_vs_get_dest_entries(const struct ip_vs_get_dests *get,
2279 struct ip_vs_get_dests __user *uptr)
2280{
2281 struct ip_vs_service *svc;
Julius Volzb18610d2008-09-02 15:55:37 +02002282 union nf_inet_addr addr = { .ip = get->addr };
Linus Torvalds1da177e2005-04-16 15:20:36 -07002283 int ret = 0;
2284
2285 if (get->fwmark)
Julius Volzb18610d2008-09-02 15:55:37 +02002286 svc = __ip_vs_svc_fwm_get(AF_INET, get->fwmark);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002287 else
Julius Volzb18610d2008-09-02 15:55:37 +02002288 svc = __ip_vs_service_get(AF_INET, get->protocol, &addr,
2289 get->port);
2290
Linus Torvalds1da177e2005-04-16 15:20:36 -07002291 if (svc) {
2292 int count = 0;
2293 struct ip_vs_dest *dest;
2294 struct ip_vs_dest_entry entry;
2295
2296 list_for_each_entry(dest, &svc->destinations, n_list) {
2297 if (count >= get->num_dests)
2298 break;
2299
Julius Volze7ade462008-09-02 15:55:33 +02002300 entry.addr = dest->addr.ip;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002301 entry.port = dest->port;
2302 entry.conn_flags = atomic_read(&dest->conn_flags);
2303 entry.weight = atomic_read(&dest->weight);
2304 entry.u_threshold = dest->u_threshold;
2305 entry.l_threshold = dest->l_threshold;
2306 entry.activeconns = atomic_read(&dest->activeconns);
2307 entry.inactconns = atomic_read(&dest->inactconns);
2308 entry.persistconns = atomic_read(&dest->persistconns);
2309 ip_vs_copy_stats(&entry.stats, &dest->stats);
2310 if (copy_to_user(&uptr->entrytable[count],
2311 &entry, sizeof(entry))) {
2312 ret = -EFAULT;
2313 break;
2314 }
2315 count++;
2316 }
2317 ip_vs_service_put(svc);
2318 } else
2319 ret = -ESRCH;
2320 return ret;
2321}
2322
2323static inline void
2324__ip_vs_get_timeouts(struct ip_vs_timeout_user *u)
2325{
2326#ifdef CONFIG_IP_VS_PROTO_TCP
2327 u->tcp_timeout =
2328 ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_ESTABLISHED] / HZ;
2329 u->tcp_fin_timeout =
2330 ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_FIN_WAIT] / HZ;
2331#endif
2332#ifdef CONFIG_IP_VS_PROTO_UDP
2333 u->udp_timeout =
2334 ip_vs_protocol_udp.timeout_table[IP_VS_UDP_S_NORMAL] / HZ;
2335#endif
2336}
2337
2338
2339#define GET_CMDID(cmd) (cmd - IP_VS_BASE_CTL)
2340#define GET_INFO_ARG_LEN (sizeof(struct ip_vs_getinfo))
2341#define GET_SERVICES_ARG_LEN (sizeof(struct ip_vs_get_services))
2342#define GET_SERVICE_ARG_LEN (sizeof(struct ip_vs_service_entry))
2343#define GET_DESTS_ARG_LEN (sizeof(struct ip_vs_get_dests))
2344#define GET_TIMEOUT_ARG_LEN (sizeof(struct ip_vs_timeout_user))
2345#define GET_DAEMON_ARG_LEN (sizeof(struct ip_vs_daemon_user) * 2)
2346
Arjan van de Ven9b5b5cf2005-11-29 16:21:38 -08002347static const unsigned char get_arglen[GET_CMDID(IP_VS_SO_GET_MAX)+1] = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002348 [GET_CMDID(IP_VS_SO_GET_VERSION)] = 64,
2349 [GET_CMDID(IP_VS_SO_GET_INFO)] = GET_INFO_ARG_LEN,
2350 [GET_CMDID(IP_VS_SO_GET_SERVICES)] = GET_SERVICES_ARG_LEN,
2351 [GET_CMDID(IP_VS_SO_GET_SERVICE)] = GET_SERVICE_ARG_LEN,
2352 [GET_CMDID(IP_VS_SO_GET_DESTS)] = GET_DESTS_ARG_LEN,
2353 [GET_CMDID(IP_VS_SO_GET_TIMEOUT)] = GET_TIMEOUT_ARG_LEN,
2354 [GET_CMDID(IP_VS_SO_GET_DAEMON)] = GET_DAEMON_ARG_LEN,
2355};
2356
2357static int
2358do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
2359{
2360 unsigned char arg[128];
2361 int ret = 0;
2362
2363 if (!capable(CAP_NET_ADMIN))
2364 return -EPERM;
2365
2366 if (*len < get_arglen[GET_CMDID(cmd)]) {
2367 IP_VS_ERR("get_ctl: len %u < %u\n",
2368 *len, get_arglen[GET_CMDID(cmd)]);
2369 return -EINVAL;
2370 }
2371
2372 if (copy_from_user(arg, user, get_arglen[GET_CMDID(cmd)]) != 0)
2373 return -EFAULT;
2374
Ingo Molnar14cc3e22006-03-26 01:37:14 -08002375 if (mutex_lock_interruptible(&__ip_vs_mutex))
Linus Torvalds1da177e2005-04-16 15:20:36 -07002376 return -ERESTARTSYS;
2377
2378 switch (cmd) {
2379 case IP_VS_SO_GET_VERSION:
2380 {
2381 char buf[64];
2382
2383 sprintf(buf, "IP Virtual Server version %d.%d.%d (size=%d)",
2384 NVERSION(IP_VS_VERSION_CODE), IP_VS_CONN_TAB_SIZE);
2385 if (copy_to_user(user, buf, strlen(buf)+1) != 0) {
2386 ret = -EFAULT;
2387 goto out;
2388 }
2389 *len = strlen(buf)+1;
2390 }
2391 break;
2392
2393 case IP_VS_SO_GET_INFO:
2394 {
2395 struct ip_vs_getinfo info;
2396 info.version = IP_VS_VERSION_CODE;
2397 info.size = IP_VS_CONN_TAB_SIZE;
2398 info.num_services = ip_vs_num_services;
2399 if (copy_to_user(user, &info, sizeof(info)) != 0)
2400 ret = -EFAULT;
2401 }
2402 break;
2403
2404 case IP_VS_SO_GET_SERVICES:
2405 {
2406 struct ip_vs_get_services *get;
2407 int size;
2408
2409 get = (struct ip_vs_get_services *)arg;
2410 size = sizeof(*get) +
2411 sizeof(struct ip_vs_service_entry) * get->num_services;
2412 if (*len != size) {
2413 IP_VS_ERR("length: %u != %u\n", *len, size);
2414 ret = -EINVAL;
2415 goto out;
2416 }
2417 ret = __ip_vs_get_service_entries(get, user);
2418 }
2419 break;
2420
2421 case IP_VS_SO_GET_SERVICE:
2422 {
2423 struct ip_vs_service_entry *entry;
2424 struct ip_vs_service *svc;
Julius Volzb18610d2008-09-02 15:55:37 +02002425 union nf_inet_addr addr;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002426
2427 entry = (struct ip_vs_service_entry *)arg;
Julius Volzb18610d2008-09-02 15:55:37 +02002428 addr.ip = entry->addr;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002429 if (entry->fwmark)
Julius Volzb18610d2008-09-02 15:55:37 +02002430 svc = __ip_vs_svc_fwm_get(AF_INET, entry->fwmark);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002431 else
Julius Volzb18610d2008-09-02 15:55:37 +02002432 svc = __ip_vs_service_get(AF_INET, entry->protocol,
2433 &addr, entry->port);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002434 if (svc) {
2435 ip_vs_copy_service(entry, svc);
2436 if (copy_to_user(user, entry, sizeof(*entry)) != 0)
2437 ret = -EFAULT;
2438 ip_vs_service_put(svc);
2439 } else
2440 ret = -ESRCH;
2441 }
2442 break;
2443
2444 case IP_VS_SO_GET_DESTS:
2445 {
2446 struct ip_vs_get_dests *get;
2447 int size;
2448
2449 get = (struct ip_vs_get_dests *)arg;
2450 size = sizeof(*get) +
2451 sizeof(struct ip_vs_dest_entry) * get->num_dests;
2452 if (*len != size) {
2453 IP_VS_ERR("length: %u != %u\n", *len, size);
2454 ret = -EINVAL;
2455 goto out;
2456 }
2457 ret = __ip_vs_get_dest_entries(get, user);
2458 }
2459 break;
2460
2461 case IP_VS_SO_GET_TIMEOUT:
2462 {
2463 struct ip_vs_timeout_user t;
2464
2465 __ip_vs_get_timeouts(&t);
2466 if (copy_to_user(user, &t, sizeof(t)) != 0)
2467 ret = -EFAULT;
2468 }
2469 break;
2470
2471 case IP_VS_SO_GET_DAEMON:
2472 {
2473 struct ip_vs_daemon_user d[2];
2474
2475 memset(&d, 0, sizeof(d));
2476 if (ip_vs_sync_state & IP_VS_STATE_MASTER) {
2477 d[0].state = IP_VS_STATE_MASTER;
pageexec4da62fc2005-06-26 16:00:19 -07002478 strlcpy(d[0].mcast_ifn, ip_vs_master_mcast_ifn, sizeof(d[0].mcast_ifn));
Linus Torvalds1da177e2005-04-16 15:20:36 -07002479 d[0].syncid = ip_vs_master_syncid;
2480 }
2481 if (ip_vs_sync_state & IP_VS_STATE_BACKUP) {
2482 d[1].state = IP_VS_STATE_BACKUP;
pageexec4da62fc2005-06-26 16:00:19 -07002483 strlcpy(d[1].mcast_ifn, ip_vs_backup_mcast_ifn, sizeof(d[1].mcast_ifn));
Linus Torvalds1da177e2005-04-16 15:20:36 -07002484 d[1].syncid = ip_vs_backup_syncid;
2485 }
2486 if (copy_to_user(user, &d, sizeof(d)) != 0)
2487 ret = -EFAULT;
2488 }
2489 break;
2490
2491 default:
2492 ret = -EINVAL;
2493 }
2494
2495 out:
Ingo Molnar14cc3e22006-03-26 01:37:14 -08002496 mutex_unlock(&__ip_vs_mutex);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002497 return ret;
2498}
2499
2500
2501static struct nf_sockopt_ops ip_vs_sockopts = {
2502 .pf = PF_INET,
2503 .set_optmin = IP_VS_BASE_CTL,
2504 .set_optmax = IP_VS_SO_SET_MAX+1,
2505 .set = do_ip_vs_set_ctl,
2506 .get_optmin = IP_VS_BASE_CTL,
2507 .get_optmax = IP_VS_SO_GET_MAX+1,
2508 .get = do_ip_vs_get_ctl,
Neil Horman16fcec32007-09-11 11:28:26 +02002509 .owner = THIS_MODULE,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002510};
2511
Julius Volz9a812192008-08-14 14:08:44 +02002512/*
2513 * Generic Netlink interface
2514 */
2515
2516/* IPVS genetlink family */
2517static struct genl_family ip_vs_genl_family = {
2518 .id = GENL_ID_GENERATE,
2519 .hdrsize = 0,
2520 .name = IPVS_GENL_NAME,
2521 .version = IPVS_GENL_VERSION,
2522 .maxattr = IPVS_CMD_MAX,
2523};
2524
2525/* Policy used for first-level command attributes */
2526static const struct nla_policy ip_vs_cmd_policy[IPVS_CMD_ATTR_MAX + 1] = {
2527 [IPVS_CMD_ATTR_SERVICE] = { .type = NLA_NESTED },
2528 [IPVS_CMD_ATTR_DEST] = { .type = NLA_NESTED },
2529 [IPVS_CMD_ATTR_DAEMON] = { .type = NLA_NESTED },
2530 [IPVS_CMD_ATTR_TIMEOUT_TCP] = { .type = NLA_U32 },
2531 [IPVS_CMD_ATTR_TIMEOUT_TCP_FIN] = { .type = NLA_U32 },
2532 [IPVS_CMD_ATTR_TIMEOUT_UDP] = { .type = NLA_U32 },
2533};
2534
2535/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DAEMON */
2536static const struct nla_policy ip_vs_daemon_policy[IPVS_DAEMON_ATTR_MAX + 1] = {
2537 [IPVS_DAEMON_ATTR_STATE] = { .type = NLA_U32 },
2538 [IPVS_DAEMON_ATTR_MCAST_IFN] = { .type = NLA_NUL_STRING,
2539 .len = IP_VS_IFNAME_MAXLEN },
2540 [IPVS_DAEMON_ATTR_SYNC_ID] = { .type = NLA_U32 },
2541};
2542
2543/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_SERVICE */
2544static const struct nla_policy ip_vs_svc_policy[IPVS_SVC_ATTR_MAX + 1] = {
2545 [IPVS_SVC_ATTR_AF] = { .type = NLA_U16 },
2546 [IPVS_SVC_ATTR_PROTOCOL] = { .type = NLA_U16 },
2547 [IPVS_SVC_ATTR_ADDR] = { .type = NLA_BINARY,
2548 .len = sizeof(union nf_inet_addr) },
2549 [IPVS_SVC_ATTR_PORT] = { .type = NLA_U16 },
2550 [IPVS_SVC_ATTR_FWMARK] = { .type = NLA_U32 },
2551 [IPVS_SVC_ATTR_SCHED_NAME] = { .type = NLA_NUL_STRING,
2552 .len = IP_VS_SCHEDNAME_MAXLEN },
2553 [IPVS_SVC_ATTR_FLAGS] = { .type = NLA_BINARY,
2554 .len = sizeof(struct ip_vs_flags) },
2555 [IPVS_SVC_ATTR_TIMEOUT] = { .type = NLA_U32 },
2556 [IPVS_SVC_ATTR_NETMASK] = { .type = NLA_U32 },
2557 [IPVS_SVC_ATTR_STATS] = { .type = NLA_NESTED },
2558};
2559
2560/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DEST */
2561static const struct nla_policy ip_vs_dest_policy[IPVS_DEST_ATTR_MAX + 1] = {
2562 [IPVS_DEST_ATTR_ADDR] = { .type = NLA_BINARY,
2563 .len = sizeof(union nf_inet_addr) },
2564 [IPVS_DEST_ATTR_PORT] = { .type = NLA_U16 },
2565 [IPVS_DEST_ATTR_FWD_METHOD] = { .type = NLA_U32 },
2566 [IPVS_DEST_ATTR_WEIGHT] = { .type = NLA_U32 },
2567 [IPVS_DEST_ATTR_U_THRESH] = { .type = NLA_U32 },
2568 [IPVS_DEST_ATTR_L_THRESH] = { .type = NLA_U32 },
2569 [IPVS_DEST_ATTR_ACTIVE_CONNS] = { .type = NLA_U32 },
2570 [IPVS_DEST_ATTR_INACT_CONNS] = { .type = NLA_U32 },
2571 [IPVS_DEST_ATTR_PERSIST_CONNS] = { .type = NLA_U32 },
2572 [IPVS_DEST_ATTR_STATS] = { .type = NLA_NESTED },
2573};
2574
2575static int ip_vs_genl_fill_stats(struct sk_buff *skb, int container_type,
2576 struct ip_vs_stats *stats)
2577{
2578 struct nlattr *nl_stats = nla_nest_start(skb, container_type);
2579 if (!nl_stats)
2580 return -EMSGSIZE;
2581
2582 spin_lock_bh(&stats->lock);
2583
Sven Wegenere9c0ce22008-09-08 13:39:04 +02002584 NLA_PUT_U32(skb, IPVS_STATS_ATTR_CONNS, stats->ustats.conns);
2585 NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPKTS, stats->ustats.inpkts);
2586 NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPKTS, stats->ustats.outpkts);
2587 NLA_PUT_U64(skb, IPVS_STATS_ATTR_INBYTES, stats->ustats.inbytes);
2588 NLA_PUT_U64(skb, IPVS_STATS_ATTR_OUTBYTES, stats->ustats.outbytes);
2589 NLA_PUT_U32(skb, IPVS_STATS_ATTR_CPS, stats->ustats.cps);
2590 NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPPS, stats->ustats.inpps);
2591 NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPPS, stats->ustats.outpps);
2592 NLA_PUT_U32(skb, IPVS_STATS_ATTR_INBPS, stats->ustats.inbps);
2593 NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTBPS, stats->ustats.outbps);
Julius Volz9a812192008-08-14 14:08:44 +02002594
2595 spin_unlock_bh(&stats->lock);
2596
2597 nla_nest_end(skb, nl_stats);
2598
2599 return 0;
2600
2601nla_put_failure:
2602 spin_unlock_bh(&stats->lock);
2603 nla_nest_cancel(skb, nl_stats);
2604 return -EMSGSIZE;
2605}
2606
2607static int ip_vs_genl_fill_service(struct sk_buff *skb,
2608 struct ip_vs_service *svc)
2609{
2610 struct nlattr *nl_service;
2611 struct ip_vs_flags flags = { .flags = svc->flags,
2612 .mask = ~0 };
2613
2614 nl_service = nla_nest_start(skb, IPVS_CMD_ATTR_SERVICE);
2615 if (!nl_service)
2616 return -EMSGSIZE;
2617
Julius Volzf94fd042008-09-02 15:55:55 +02002618 NLA_PUT_U16(skb, IPVS_SVC_ATTR_AF, svc->af);
Julius Volz9a812192008-08-14 14:08:44 +02002619
2620 if (svc->fwmark) {
2621 NLA_PUT_U32(skb, IPVS_SVC_ATTR_FWMARK, svc->fwmark);
2622 } else {
2623 NLA_PUT_U16(skb, IPVS_SVC_ATTR_PROTOCOL, svc->protocol);
2624 NLA_PUT(skb, IPVS_SVC_ATTR_ADDR, sizeof(svc->addr), &svc->addr);
2625 NLA_PUT_U16(skb, IPVS_SVC_ATTR_PORT, svc->port);
2626 }
2627
2628 NLA_PUT_STRING(skb, IPVS_SVC_ATTR_SCHED_NAME, svc->scheduler->name);
2629 NLA_PUT(skb, IPVS_SVC_ATTR_FLAGS, sizeof(flags), &flags);
2630 NLA_PUT_U32(skb, IPVS_SVC_ATTR_TIMEOUT, svc->timeout / HZ);
2631 NLA_PUT_U32(skb, IPVS_SVC_ATTR_NETMASK, svc->netmask);
2632
2633 if (ip_vs_genl_fill_stats(skb, IPVS_SVC_ATTR_STATS, &svc->stats))
2634 goto nla_put_failure;
2635
2636 nla_nest_end(skb, nl_service);
2637
2638 return 0;
2639
2640nla_put_failure:
2641 nla_nest_cancel(skb, nl_service);
2642 return -EMSGSIZE;
2643}
2644
2645static int ip_vs_genl_dump_service(struct sk_buff *skb,
2646 struct ip_vs_service *svc,
2647 struct netlink_callback *cb)
2648{
2649 void *hdr;
2650
2651 hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
2652 &ip_vs_genl_family, NLM_F_MULTI,
2653 IPVS_CMD_NEW_SERVICE);
2654 if (!hdr)
2655 return -EMSGSIZE;
2656
2657 if (ip_vs_genl_fill_service(skb, svc) < 0)
2658 goto nla_put_failure;
2659
2660 return genlmsg_end(skb, hdr);
2661
2662nla_put_failure:
2663 genlmsg_cancel(skb, hdr);
2664 return -EMSGSIZE;
2665}
2666
2667static int ip_vs_genl_dump_services(struct sk_buff *skb,
2668 struct netlink_callback *cb)
2669{
2670 int idx = 0, i;
2671 int start = cb->args[0];
2672 struct ip_vs_service *svc;
2673
2674 mutex_lock(&__ip_vs_mutex);
2675 for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
2676 list_for_each_entry(svc, &ip_vs_svc_table[i], s_list) {
2677 if (++idx <= start)
2678 continue;
2679 if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
2680 idx--;
2681 goto nla_put_failure;
2682 }
2683 }
2684 }
2685
2686 for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
2687 list_for_each_entry(svc, &ip_vs_svc_fwm_table[i], f_list) {
2688 if (++idx <= start)
2689 continue;
2690 if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
2691 idx--;
2692 goto nla_put_failure;
2693 }
2694 }
2695 }
2696
2697nla_put_failure:
2698 mutex_unlock(&__ip_vs_mutex);
2699 cb->args[0] = idx;
2700
2701 return skb->len;
2702}
2703
Julius Volzc860c6b2008-09-02 15:55:36 +02002704static int ip_vs_genl_parse_service(struct ip_vs_service_user_kern *usvc,
Julius Volz9a812192008-08-14 14:08:44 +02002705 struct nlattr *nla, int full_entry)
2706{
2707 struct nlattr *attrs[IPVS_SVC_ATTR_MAX + 1];
2708 struct nlattr *nla_af, *nla_port, *nla_fwmark, *nla_protocol, *nla_addr;
2709
2710 /* Parse mandatory identifying service fields first */
2711 if (nla == NULL ||
2712 nla_parse_nested(attrs, IPVS_SVC_ATTR_MAX, nla, ip_vs_svc_policy))
2713 return -EINVAL;
2714
2715 nla_af = attrs[IPVS_SVC_ATTR_AF];
2716 nla_protocol = attrs[IPVS_SVC_ATTR_PROTOCOL];
2717 nla_addr = attrs[IPVS_SVC_ATTR_ADDR];
2718 nla_port = attrs[IPVS_SVC_ATTR_PORT];
2719 nla_fwmark = attrs[IPVS_SVC_ATTR_FWMARK];
2720
2721 if (!(nla_af && (nla_fwmark || (nla_port && nla_protocol && nla_addr))))
2722 return -EINVAL;
2723
Julius Volzc860c6b2008-09-02 15:55:36 +02002724 usvc->af = nla_get_u16(nla_af);
Julius Volzf94fd042008-09-02 15:55:55 +02002725#ifdef CONFIG_IP_VS_IPV6
2726 if (usvc->af != AF_INET && usvc->af != AF_INET6)
2727#else
2728 if (usvc->af != AF_INET)
2729#endif
Julius Volz9a812192008-08-14 14:08:44 +02002730 return -EAFNOSUPPORT;
2731
2732 if (nla_fwmark) {
2733 usvc->protocol = IPPROTO_TCP;
2734 usvc->fwmark = nla_get_u32(nla_fwmark);
2735 } else {
2736 usvc->protocol = nla_get_u16(nla_protocol);
2737 nla_memcpy(&usvc->addr, nla_addr, sizeof(usvc->addr));
2738 usvc->port = nla_get_u16(nla_port);
2739 usvc->fwmark = 0;
2740 }
2741
2742 /* If a full entry was requested, check for the additional fields */
2743 if (full_entry) {
2744 struct nlattr *nla_sched, *nla_flags, *nla_timeout,
2745 *nla_netmask;
2746 struct ip_vs_flags flags;
2747 struct ip_vs_service *svc;
2748
2749 nla_sched = attrs[IPVS_SVC_ATTR_SCHED_NAME];
2750 nla_flags = attrs[IPVS_SVC_ATTR_FLAGS];
2751 nla_timeout = attrs[IPVS_SVC_ATTR_TIMEOUT];
2752 nla_netmask = attrs[IPVS_SVC_ATTR_NETMASK];
2753
2754 if (!(nla_sched && nla_flags && nla_timeout && nla_netmask))
2755 return -EINVAL;
2756
2757 nla_memcpy(&flags, nla_flags, sizeof(flags));
2758
2759 /* prefill flags from service if it already exists */
2760 if (usvc->fwmark)
Julius Volzb18610d2008-09-02 15:55:37 +02002761 svc = __ip_vs_svc_fwm_get(usvc->af, usvc->fwmark);
Julius Volz9a812192008-08-14 14:08:44 +02002762 else
Julius Volzb18610d2008-09-02 15:55:37 +02002763 svc = __ip_vs_service_get(usvc->af, usvc->protocol,
2764 &usvc->addr, usvc->port);
Julius Volz9a812192008-08-14 14:08:44 +02002765 if (svc) {
2766 usvc->flags = svc->flags;
2767 ip_vs_service_put(svc);
2768 } else
2769 usvc->flags = 0;
2770
2771 /* set new flags from userland */
2772 usvc->flags = (usvc->flags & ~flags.mask) |
2773 (flags.flags & flags.mask);
Julius Volzc860c6b2008-09-02 15:55:36 +02002774 usvc->sched_name = nla_data(nla_sched);
Julius Volz9a812192008-08-14 14:08:44 +02002775 usvc->timeout = nla_get_u32(nla_timeout);
2776 usvc->netmask = nla_get_u32(nla_netmask);
2777 }
2778
2779 return 0;
2780}
2781
2782static struct ip_vs_service *ip_vs_genl_find_service(struct nlattr *nla)
2783{
Julius Volzc860c6b2008-09-02 15:55:36 +02002784 struct ip_vs_service_user_kern usvc;
Julius Volz9a812192008-08-14 14:08:44 +02002785 int ret;
2786
2787 ret = ip_vs_genl_parse_service(&usvc, nla, 0);
2788 if (ret)
2789 return ERR_PTR(ret);
2790
2791 if (usvc.fwmark)
Julius Volzb18610d2008-09-02 15:55:37 +02002792 return __ip_vs_svc_fwm_get(usvc.af, usvc.fwmark);
Julius Volz9a812192008-08-14 14:08:44 +02002793 else
Julius Volzb18610d2008-09-02 15:55:37 +02002794 return __ip_vs_service_get(usvc.af, usvc.protocol,
2795 &usvc.addr, usvc.port);
Julius Volz9a812192008-08-14 14:08:44 +02002796}
2797
2798static int ip_vs_genl_fill_dest(struct sk_buff *skb, struct ip_vs_dest *dest)
2799{
2800 struct nlattr *nl_dest;
2801
2802 nl_dest = nla_nest_start(skb, IPVS_CMD_ATTR_DEST);
2803 if (!nl_dest)
2804 return -EMSGSIZE;
2805
2806 NLA_PUT(skb, IPVS_DEST_ATTR_ADDR, sizeof(dest->addr), &dest->addr);
2807 NLA_PUT_U16(skb, IPVS_DEST_ATTR_PORT, dest->port);
2808
2809 NLA_PUT_U32(skb, IPVS_DEST_ATTR_FWD_METHOD,
2810 atomic_read(&dest->conn_flags) & IP_VS_CONN_F_FWD_MASK);
2811 NLA_PUT_U32(skb, IPVS_DEST_ATTR_WEIGHT, atomic_read(&dest->weight));
2812 NLA_PUT_U32(skb, IPVS_DEST_ATTR_U_THRESH, dest->u_threshold);
2813 NLA_PUT_U32(skb, IPVS_DEST_ATTR_L_THRESH, dest->l_threshold);
2814 NLA_PUT_U32(skb, IPVS_DEST_ATTR_ACTIVE_CONNS,
2815 atomic_read(&dest->activeconns));
2816 NLA_PUT_U32(skb, IPVS_DEST_ATTR_INACT_CONNS,
2817 atomic_read(&dest->inactconns));
2818 NLA_PUT_U32(skb, IPVS_DEST_ATTR_PERSIST_CONNS,
2819 atomic_read(&dest->persistconns));
2820
2821 if (ip_vs_genl_fill_stats(skb, IPVS_DEST_ATTR_STATS, &dest->stats))
2822 goto nla_put_failure;
2823
2824 nla_nest_end(skb, nl_dest);
2825
2826 return 0;
2827
2828nla_put_failure:
2829 nla_nest_cancel(skb, nl_dest);
2830 return -EMSGSIZE;
2831}
2832
2833static int ip_vs_genl_dump_dest(struct sk_buff *skb, struct ip_vs_dest *dest,
2834 struct netlink_callback *cb)
2835{
2836 void *hdr;
2837
2838 hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
2839 &ip_vs_genl_family, NLM_F_MULTI,
2840 IPVS_CMD_NEW_DEST);
2841 if (!hdr)
2842 return -EMSGSIZE;
2843
2844 if (ip_vs_genl_fill_dest(skb, dest) < 0)
2845 goto nla_put_failure;
2846
2847 return genlmsg_end(skb, hdr);
2848
2849nla_put_failure:
2850 genlmsg_cancel(skb, hdr);
2851 return -EMSGSIZE;
2852}
2853
2854static int ip_vs_genl_dump_dests(struct sk_buff *skb,
2855 struct netlink_callback *cb)
2856{
2857 int idx = 0;
2858 int start = cb->args[0];
2859 struct ip_vs_service *svc;
2860 struct ip_vs_dest *dest;
2861 struct nlattr *attrs[IPVS_CMD_ATTR_MAX + 1];
2862
2863 mutex_lock(&__ip_vs_mutex);
2864
2865 /* Try to find the service for which to dump destinations */
2866 if (nlmsg_parse(cb->nlh, GENL_HDRLEN, attrs,
2867 IPVS_CMD_ATTR_MAX, ip_vs_cmd_policy))
2868 goto out_err;
2869
2870 svc = ip_vs_genl_find_service(attrs[IPVS_CMD_ATTR_SERVICE]);
2871 if (IS_ERR(svc) || svc == NULL)
2872 goto out_err;
2873
2874 /* Dump the destinations */
2875 list_for_each_entry(dest, &svc->destinations, n_list) {
2876 if (++idx <= start)
2877 continue;
2878 if (ip_vs_genl_dump_dest(skb, dest, cb) < 0) {
2879 idx--;
2880 goto nla_put_failure;
2881 }
2882 }
2883
2884nla_put_failure:
2885 cb->args[0] = idx;
2886 ip_vs_service_put(svc);
2887
2888out_err:
2889 mutex_unlock(&__ip_vs_mutex);
2890
2891 return skb->len;
2892}
2893
Julius Volzc860c6b2008-09-02 15:55:36 +02002894static int ip_vs_genl_parse_dest(struct ip_vs_dest_user_kern *udest,
Julius Volz9a812192008-08-14 14:08:44 +02002895 struct nlattr *nla, int full_entry)
2896{
2897 struct nlattr *attrs[IPVS_DEST_ATTR_MAX + 1];
2898 struct nlattr *nla_addr, *nla_port;
2899
2900 /* Parse mandatory identifying destination fields first */
2901 if (nla == NULL ||
2902 nla_parse_nested(attrs, IPVS_DEST_ATTR_MAX, nla, ip_vs_dest_policy))
2903 return -EINVAL;
2904
2905 nla_addr = attrs[IPVS_DEST_ATTR_ADDR];
2906 nla_port = attrs[IPVS_DEST_ATTR_PORT];
2907
2908 if (!(nla_addr && nla_port))
2909 return -EINVAL;
2910
2911 nla_memcpy(&udest->addr, nla_addr, sizeof(udest->addr));
2912 udest->port = nla_get_u16(nla_port);
2913
2914 /* If a full entry was requested, check for the additional fields */
2915 if (full_entry) {
2916 struct nlattr *nla_fwd, *nla_weight, *nla_u_thresh,
2917 *nla_l_thresh;
2918
2919 nla_fwd = attrs[IPVS_DEST_ATTR_FWD_METHOD];
2920 nla_weight = attrs[IPVS_DEST_ATTR_WEIGHT];
2921 nla_u_thresh = attrs[IPVS_DEST_ATTR_U_THRESH];
2922 nla_l_thresh = attrs[IPVS_DEST_ATTR_L_THRESH];
2923
2924 if (!(nla_fwd && nla_weight && nla_u_thresh && nla_l_thresh))
2925 return -EINVAL;
2926
2927 udest->conn_flags = nla_get_u32(nla_fwd)
2928 & IP_VS_CONN_F_FWD_MASK;
2929 udest->weight = nla_get_u32(nla_weight);
2930 udest->u_threshold = nla_get_u32(nla_u_thresh);
2931 udest->l_threshold = nla_get_u32(nla_l_thresh);
2932 }
2933
2934 return 0;
2935}
2936
2937static int ip_vs_genl_fill_daemon(struct sk_buff *skb, __be32 state,
2938 const char *mcast_ifn, __be32 syncid)
2939{
2940 struct nlattr *nl_daemon;
2941
2942 nl_daemon = nla_nest_start(skb, IPVS_CMD_ATTR_DAEMON);
2943 if (!nl_daemon)
2944 return -EMSGSIZE;
2945
2946 NLA_PUT_U32(skb, IPVS_DAEMON_ATTR_STATE, state);
2947 NLA_PUT_STRING(skb, IPVS_DAEMON_ATTR_MCAST_IFN, mcast_ifn);
2948 NLA_PUT_U32(skb, IPVS_DAEMON_ATTR_SYNC_ID, syncid);
2949
2950 nla_nest_end(skb, nl_daemon);
2951
2952 return 0;
2953
2954nla_put_failure:
2955 nla_nest_cancel(skb, nl_daemon);
2956 return -EMSGSIZE;
2957}
2958
2959static int ip_vs_genl_dump_daemon(struct sk_buff *skb, __be32 state,
2960 const char *mcast_ifn, __be32 syncid,
2961 struct netlink_callback *cb)
2962{
2963 void *hdr;
2964 hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
2965 &ip_vs_genl_family, NLM_F_MULTI,
2966 IPVS_CMD_NEW_DAEMON);
2967 if (!hdr)
2968 return -EMSGSIZE;
2969
2970 if (ip_vs_genl_fill_daemon(skb, state, mcast_ifn, syncid))
2971 goto nla_put_failure;
2972
2973 return genlmsg_end(skb, hdr);
2974
2975nla_put_failure:
2976 genlmsg_cancel(skb, hdr);
2977 return -EMSGSIZE;
2978}
2979
2980static int ip_vs_genl_dump_daemons(struct sk_buff *skb,
2981 struct netlink_callback *cb)
2982{
2983 mutex_lock(&__ip_vs_mutex);
2984 if ((ip_vs_sync_state & IP_VS_STATE_MASTER) && !cb->args[0]) {
2985 if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_MASTER,
2986 ip_vs_master_mcast_ifn,
2987 ip_vs_master_syncid, cb) < 0)
2988 goto nla_put_failure;
2989
2990 cb->args[0] = 1;
2991 }
2992
2993 if ((ip_vs_sync_state & IP_VS_STATE_BACKUP) && !cb->args[1]) {
2994 if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_BACKUP,
2995 ip_vs_backup_mcast_ifn,
2996 ip_vs_backup_syncid, cb) < 0)
2997 goto nla_put_failure;
2998
2999 cb->args[1] = 1;
3000 }
3001
3002nla_put_failure:
3003 mutex_unlock(&__ip_vs_mutex);
3004
3005 return skb->len;
3006}
3007
3008static int ip_vs_genl_new_daemon(struct nlattr **attrs)
3009{
3010 if (!(attrs[IPVS_DAEMON_ATTR_STATE] &&
3011 attrs[IPVS_DAEMON_ATTR_MCAST_IFN] &&
3012 attrs[IPVS_DAEMON_ATTR_SYNC_ID]))
3013 return -EINVAL;
3014
3015 return start_sync_thread(nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]),
3016 nla_data(attrs[IPVS_DAEMON_ATTR_MCAST_IFN]),
3017 nla_get_u32(attrs[IPVS_DAEMON_ATTR_SYNC_ID]));
3018}
3019
3020static int ip_vs_genl_del_daemon(struct nlattr **attrs)
3021{
3022 if (!attrs[IPVS_DAEMON_ATTR_STATE])
3023 return -EINVAL;
3024
3025 return stop_sync_thread(nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]));
3026}
3027
3028static int ip_vs_genl_set_config(struct nlattr **attrs)
3029{
3030 struct ip_vs_timeout_user t;
3031
3032 __ip_vs_get_timeouts(&t);
3033
3034 if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP])
3035 t.tcp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP]);
3036
3037 if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN])
3038 t.tcp_fin_timeout =
3039 nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN]);
3040
3041 if (attrs[IPVS_CMD_ATTR_TIMEOUT_UDP])
3042 t.udp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_UDP]);
3043
3044 return ip_vs_set_timeout(&t);
3045}
3046
3047static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
3048{
3049 struct ip_vs_service *svc = NULL;
Julius Volzc860c6b2008-09-02 15:55:36 +02003050 struct ip_vs_service_user_kern usvc;
3051 struct ip_vs_dest_user_kern udest;
Julius Volz9a812192008-08-14 14:08:44 +02003052 int ret = 0, cmd;
3053 int need_full_svc = 0, need_full_dest = 0;
3054
3055 cmd = info->genlhdr->cmd;
3056
3057 mutex_lock(&__ip_vs_mutex);
3058
3059 if (cmd == IPVS_CMD_FLUSH) {
3060 ret = ip_vs_flush();
3061 goto out;
3062 } else if (cmd == IPVS_CMD_SET_CONFIG) {
3063 ret = ip_vs_genl_set_config(info->attrs);
3064 goto out;
3065 } else if (cmd == IPVS_CMD_NEW_DAEMON ||
3066 cmd == IPVS_CMD_DEL_DAEMON) {
3067
3068 struct nlattr *daemon_attrs[IPVS_DAEMON_ATTR_MAX + 1];
3069
3070 if (!info->attrs[IPVS_CMD_ATTR_DAEMON] ||
3071 nla_parse_nested(daemon_attrs, IPVS_DAEMON_ATTR_MAX,
3072 info->attrs[IPVS_CMD_ATTR_DAEMON],
3073 ip_vs_daemon_policy)) {
3074 ret = -EINVAL;
3075 goto out;
3076 }
3077
3078 if (cmd == IPVS_CMD_NEW_DAEMON)
3079 ret = ip_vs_genl_new_daemon(daemon_attrs);
3080 else
3081 ret = ip_vs_genl_del_daemon(daemon_attrs);
3082 goto out;
3083 } else if (cmd == IPVS_CMD_ZERO &&
3084 !info->attrs[IPVS_CMD_ATTR_SERVICE]) {
3085 ret = ip_vs_zero_all();
3086 goto out;
3087 }
3088
3089 /* All following commands require a service argument, so check if we
3090 * received a valid one. We need a full service specification when
3091 * adding / editing a service. Only identifying members otherwise. */
3092 if (cmd == IPVS_CMD_NEW_SERVICE || cmd == IPVS_CMD_SET_SERVICE)
3093 need_full_svc = 1;
3094
3095 ret = ip_vs_genl_parse_service(&usvc,
3096 info->attrs[IPVS_CMD_ATTR_SERVICE],
3097 need_full_svc);
3098 if (ret)
3099 goto out;
3100
3101 /* Lookup the exact service by <protocol, addr, port> or fwmark */
3102 if (usvc.fwmark == 0)
Julius Volzb18610d2008-09-02 15:55:37 +02003103 svc = __ip_vs_service_get(usvc.af, usvc.protocol,
3104 &usvc.addr, usvc.port);
Julius Volz9a812192008-08-14 14:08:44 +02003105 else
Julius Volzb18610d2008-09-02 15:55:37 +02003106 svc = __ip_vs_svc_fwm_get(usvc.af, usvc.fwmark);
Julius Volz9a812192008-08-14 14:08:44 +02003107
3108 /* Unless we're adding a new service, the service must already exist */
3109 if ((cmd != IPVS_CMD_NEW_SERVICE) && (svc == NULL)) {
3110 ret = -ESRCH;
3111 goto out;
3112 }
3113
3114 /* Destination commands require a valid destination argument. For
3115 * adding / editing a destination, we need a full destination
3116 * specification. */
3117 if (cmd == IPVS_CMD_NEW_DEST || cmd == IPVS_CMD_SET_DEST ||
3118 cmd == IPVS_CMD_DEL_DEST) {
3119 if (cmd != IPVS_CMD_DEL_DEST)
3120 need_full_dest = 1;
3121
3122 ret = ip_vs_genl_parse_dest(&udest,
3123 info->attrs[IPVS_CMD_ATTR_DEST],
3124 need_full_dest);
3125 if (ret)
3126 goto out;
3127 }
3128
3129 switch (cmd) {
3130 case IPVS_CMD_NEW_SERVICE:
3131 if (svc == NULL)
3132 ret = ip_vs_add_service(&usvc, &svc);
3133 else
3134 ret = -EEXIST;
3135 break;
3136 case IPVS_CMD_SET_SERVICE:
3137 ret = ip_vs_edit_service(svc, &usvc);
3138 break;
3139 case IPVS_CMD_DEL_SERVICE:
3140 ret = ip_vs_del_service(svc);
3141 break;
3142 case IPVS_CMD_NEW_DEST:
3143 ret = ip_vs_add_dest(svc, &udest);
3144 break;
3145 case IPVS_CMD_SET_DEST:
3146 ret = ip_vs_edit_dest(svc, &udest);
3147 break;
3148 case IPVS_CMD_DEL_DEST:
3149 ret = ip_vs_del_dest(svc, &udest);
3150 break;
3151 case IPVS_CMD_ZERO:
3152 ret = ip_vs_zero_service(svc);
3153 break;
3154 default:
3155 ret = -EINVAL;
3156 }
3157
3158out:
3159 if (svc)
3160 ip_vs_service_put(svc);
3161 mutex_unlock(&__ip_vs_mutex);
3162
3163 return ret;
3164}
3165
3166static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info)
3167{
3168 struct sk_buff *msg;
3169 void *reply;
3170 int ret, cmd, reply_cmd;
3171
3172 cmd = info->genlhdr->cmd;
3173
3174 if (cmd == IPVS_CMD_GET_SERVICE)
3175 reply_cmd = IPVS_CMD_NEW_SERVICE;
3176 else if (cmd == IPVS_CMD_GET_INFO)
3177 reply_cmd = IPVS_CMD_SET_INFO;
3178 else if (cmd == IPVS_CMD_GET_CONFIG)
3179 reply_cmd = IPVS_CMD_SET_CONFIG;
3180 else {
3181 IP_VS_ERR("unknown Generic Netlink command\n");
3182 return -EINVAL;
3183 }
3184
3185 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
3186 if (!msg)
3187 return -ENOMEM;
3188
3189 mutex_lock(&__ip_vs_mutex);
3190
3191 reply = genlmsg_put_reply(msg, info, &ip_vs_genl_family, 0, reply_cmd);
3192 if (reply == NULL)
3193 goto nla_put_failure;
3194
3195 switch (cmd) {
3196 case IPVS_CMD_GET_SERVICE:
3197 {
3198 struct ip_vs_service *svc;
3199
3200 svc = ip_vs_genl_find_service(info->attrs[IPVS_CMD_ATTR_SERVICE]);
3201 if (IS_ERR(svc)) {
3202 ret = PTR_ERR(svc);
3203 goto out_err;
3204 } else if (svc) {
3205 ret = ip_vs_genl_fill_service(msg, svc);
3206 ip_vs_service_put(svc);
3207 if (ret)
3208 goto nla_put_failure;
3209 } else {
3210 ret = -ESRCH;
3211 goto out_err;
3212 }
3213
3214 break;
3215 }
3216
3217 case IPVS_CMD_GET_CONFIG:
3218 {
3219 struct ip_vs_timeout_user t;
3220
3221 __ip_vs_get_timeouts(&t);
3222#ifdef CONFIG_IP_VS_PROTO_TCP
3223 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP, t.tcp_timeout);
3224 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP_FIN,
3225 t.tcp_fin_timeout);
3226#endif
3227#ifdef CONFIG_IP_VS_PROTO_UDP
3228 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_UDP, t.udp_timeout);
3229#endif
3230
3231 break;
3232 }
3233
3234 case IPVS_CMD_GET_INFO:
3235 NLA_PUT_U32(msg, IPVS_INFO_ATTR_VERSION, IP_VS_VERSION_CODE);
3236 NLA_PUT_U32(msg, IPVS_INFO_ATTR_CONN_TAB_SIZE,
3237 IP_VS_CONN_TAB_SIZE);
3238 break;
3239 }
3240
3241 genlmsg_end(msg, reply);
3242 ret = genlmsg_unicast(msg, info->snd_pid);
3243 goto out;
3244
3245nla_put_failure:
3246 IP_VS_ERR("not enough space in Netlink message\n");
3247 ret = -EMSGSIZE;
3248
3249out_err:
3250 nlmsg_free(msg);
3251out:
3252 mutex_unlock(&__ip_vs_mutex);
3253
3254 return ret;
3255}
3256
3257
3258static struct genl_ops ip_vs_genl_ops[] __read_mostly = {
3259 {
3260 .cmd = IPVS_CMD_NEW_SERVICE,
3261 .flags = GENL_ADMIN_PERM,
3262 .policy = ip_vs_cmd_policy,
3263 .doit = ip_vs_genl_set_cmd,
3264 },
3265 {
3266 .cmd = IPVS_CMD_SET_SERVICE,
3267 .flags = GENL_ADMIN_PERM,
3268 .policy = ip_vs_cmd_policy,
3269 .doit = ip_vs_genl_set_cmd,
3270 },
3271 {
3272 .cmd = IPVS_CMD_DEL_SERVICE,
3273 .flags = GENL_ADMIN_PERM,
3274 .policy = ip_vs_cmd_policy,
3275 .doit = ip_vs_genl_set_cmd,
3276 },
3277 {
3278 .cmd = IPVS_CMD_GET_SERVICE,
3279 .flags = GENL_ADMIN_PERM,
3280 .doit = ip_vs_genl_get_cmd,
3281 .dumpit = ip_vs_genl_dump_services,
3282 .policy = ip_vs_cmd_policy,
3283 },
3284 {
3285 .cmd = IPVS_CMD_NEW_DEST,
3286 .flags = GENL_ADMIN_PERM,
3287 .policy = ip_vs_cmd_policy,
3288 .doit = ip_vs_genl_set_cmd,
3289 },
3290 {
3291 .cmd = IPVS_CMD_SET_DEST,
3292 .flags = GENL_ADMIN_PERM,
3293 .policy = ip_vs_cmd_policy,
3294 .doit = ip_vs_genl_set_cmd,
3295 },
3296 {
3297 .cmd = IPVS_CMD_DEL_DEST,
3298 .flags = GENL_ADMIN_PERM,
3299 .policy = ip_vs_cmd_policy,
3300 .doit = ip_vs_genl_set_cmd,
3301 },
3302 {
3303 .cmd = IPVS_CMD_GET_DEST,
3304 .flags = GENL_ADMIN_PERM,
3305 .policy = ip_vs_cmd_policy,
3306 .dumpit = ip_vs_genl_dump_dests,
3307 },
3308 {
3309 .cmd = IPVS_CMD_NEW_DAEMON,
3310 .flags = GENL_ADMIN_PERM,
3311 .policy = ip_vs_cmd_policy,
3312 .doit = ip_vs_genl_set_cmd,
3313 },
3314 {
3315 .cmd = IPVS_CMD_DEL_DAEMON,
3316 .flags = GENL_ADMIN_PERM,
3317 .policy = ip_vs_cmd_policy,
3318 .doit = ip_vs_genl_set_cmd,
3319 },
3320 {
3321 .cmd = IPVS_CMD_GET_DAEMON,
3322 .flags = GENL_ADMIN_PERM,
3323 .dumpit = ip_vs_genl_dump_daemons,
3324 },
3325 {
3326 .cmd = IPVS_CMD_SET_CONFIG,
3327 .flags = GENL_ADMIN_PERM,
3328 .policy = ip_vs_cmd_policy,
3329 .doit = ip_vs_genl_set_cmd,
3330 },
3331 {
3332 .cmd = IPVS_CMD_GET_CONFIG,
3333 .flags = GENL_ADMIN_PERM,
3334 .doit = ip_vs_genl_get_cmd,
3335 },
3336 {
3337 .cmd = IPVS_CMD_GET_INFO,
3338 .flags = GENL_ADMIN_PERM,
3339 .doit = ip_vs_genl_get_cmd,
3340 },
3341 {
3342 .cmd = IPVS_CMD_ZERO,
3343 .flags = GENL_ADMIN_PERM,
3344 .policy = ip_vs_cmd_policy,
3345 .doit = ip_vs_genl_set_cmd,
3346 },
3347 {
3348 .cmd = IPVS_CMD_FLUSH,
3349 .flags = GENL_ADMIN_PERM,
3350 .doit = ip_vs_genl_set_cmd,
3351 },
3352};
3353
3354static int __init ip_vs_genl_register(void)
3355{
3356 int ret, i;
3357
3358 ret = genl_register_family(&ip_vs_genl_family);
3359 if (ret)
3360 return ret;
3361
3362 for (i = 0; i < ARRAY_SIZE(ip_vs_genl_ops); i++) {
3363 ret = genl_register_ops(&ip_vs_genl_family, &ip_vs_genl_ops[i]);
3364 if (ret)
3365 goto err_out;
3366 }
3367 return 0;
3368
3369err_out:
3370 genl_unregister_family(&ip_vs_genl_family);
3371 return ret;
3372}
3373
3374static void ip_vs_genl_unregister(void)
3375{
3376 genl_unregister_family(&ip_vs_genl_family);
3377}
3378
3379/* End of Generic Netlink interface definitions */
3380
Linus Torvalds1da177e2005-04-16 15:20:36 -07003381
Sven Wegener048cf482008-08-10 18:24:35 +00003382int __init ip_vs_control_init(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003383{
3384 int ret;
3385 int idx;
3386
3387 EnterFunction(2);
3388
3389 ret = nf_register_sockopt(&ip_vs_sockopts);
3390 if (ret) {
3391 IP_VS_ERR("cannot register sockopt.\n");
3392 return ret;
3393 }
3394
Julius Volz9a812192008-08-14 14:08:44 +02003395 ret = ip_vs_genl_register();
3396 if (ret) {
3397 IP_VS_ERR("cannot register Generic Netlink interface.\n");
3398 nf_unregister_sockopt(&ip_vs_sockopts);
3399 return ret;
3400 }
3401
Eric W. Biederman457c4cb2007-09-12 12:01:34 +02003402 proc_net_fops_create(&init_net, "ip_vs", 0, &ip_vs_info_fops);
3403 proc_net_fops_create(&init_net, "ip_vs_stats",0, &ip_vs_stats_fops);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003404
Pavel Emelyanov90754f82008-01-12 02:33:50 -08003405 sysctl_header = register_sysctl_paths(net_vs_ctl_path, vs_vars);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003406
3407 /* Initialize ip_vs_svc_table, ip_vs_svc_fwm_table, ip_vs_rtable */
3408 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
3409 INIT_LIST_HEAD(&ip_vs_svc_table[idx]);
3410 INIT_LIST_HEAD(&ip_vs_svc_fwm_table[idx]);
3411 }
3412 for(idx = 0; idx < IP_VS_RTAB_SIZE; idx++) {
3413 INIT_LIST_HEAD(&ip_vs_rtable[idx]);
3414 }
3415
Linus Torvalds1da177e2005-04-16 15:20:36 -07003416 ip_vs_new_estimator(&ip_vs_stats);
3417
3418 /* Hook the defense timer */
3419 schedule_delayed_work(&defense_work, DEFENSE_TIMER_PERIOD);
3420
3421 LeaveFunction(2);
3422 return 0;
3423}
3424
3425
3426void ip_vs_control_cleanup(void)
3427{
3428 EnterFunction(2);
3429 ip_vs_trash_cleanup();
3430 cancel_rearming_delayed_work(&defense_work);
Oleg Nesterov28e53bd2007-05-09 02:34:22 -07003431 cancel_work_sync(&defense_work.work);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003432 ip_vs_kill_estimator(&ip_vs_stats);
3433 unregister_sysctl_table(sysctl_header);
Eric W. Biederman457c4cb2007-09-12 12:01:34 +02003434 proc_net_remove(&init_net, "ip_vs_stats");
3435 proc_net_remove(&init_net, "ip_vs");
Julius Volz9a812192008-08-14 14:08:44 +02003436 ip_vs_genl_unregister();
Linus Torvalds1da177e2005-04-16 15:20:36 -07003437 nf_unregister_sockopt(&ip_vs_sockopts);
3438 LeaveFunction(2);
3439}