blob: e53efe41f01dd29740e857778b1cdc4a62f806e6 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * IPVS An implementation of the IP virtual server support for the
3 * LINUX operating system. IPVS is now implemented as a module
4 * over the NetFilter framework. IPVS can be used to build a
5 * high-performance and highly available server based on a
6 * cluster of servers.
7 *
Linus Torvalds1da177e2005-04-16 15:20:36 -07008 * Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
9 * Peter Kese <peter.kese@ijs.si>
10 * Julian Anastasov <ja@ssi.bg>
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version
15 * 2 of the License, or (at your option) any later version.
16 *
17 * Changes:
18 *
19 */
20
21#include <linux/module.h>
22#include <linux/init.h>
23#include <linux/types.h>
Randy Dunlap4fc268d2006-01-11 12:17:47 -080024#include <linux/capability.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070025#include <linux/fs.h>
26#include <linux/sysctl.h>
27#include <linux/proc_fs.h>
28#include <linux/workqueue.h>
29#include <linux/swap.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070030#include <linux/seq_file.h>
31
32#include <linux/netfilter.h>
33#include <linux/netfilter_ipv4.h>
Ingo Molnar14cc3e22006-03-26 01:37:14 -080034#include <linux/mutex.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070035
Eric W. Biederman457c4cb2007-09-12 12:01:34 +020036#include <net/net_namespace.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070037#include <net/ip.h>
Vince Busam09571c72008-09-02 15:55:52 +020038#ifdef CONFIG_IP_VS_IPV6
39#include <net/ipv6.h>
40#include <net/ip6_route.h>
41#endif
Arnaldo Carvalho de Melo14c85022005-12-27 02:43:12 -020042#include <net/route.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070043#include <net/sock.h>
Julius Volz9a812192008-08-14 14:08:44 +020044#include <net/genetlink.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070045
46#include <asm/uaccess.h>
47
48#include <net/ip_vs.h>
49
50/* semaphore for IPVS sockopts. And, [gs]etsockopt may sleep. */
Ingo Molnar14cc3e22006-03-26 01:37:14 -080051static DEFINE_MUTEX(__ip_vs_mutex);
Linus Torvalds1da177e2005-04-16 15:20:36 -070052
53/* lock for service table */
54static DEFINE_RWLOCK(__ip_vs_svc_lock);
55
56/* lock for table with the real services */
57static DEFINE_RWLOCK(__ip_vs_rs_lock);
58
59/* lock for state and timeout tables */
60static DEFINE_RWLOCK(__ip_vs_securetcp_lock);
61
62/* lock for drop entry handling */
63static DEFINE_SPINLOCK(__ip_vs_dropentry_lock);
64
65/* lock for drop packet handling */
66static DEFINE_SPINLOCK(__ip_vs_droppacket_lock);
67
68/* 1/rate drop and drop-entry variables */
69int ip_vs_drop_rate = 0;
70int ip_vs_drop_counter = 0;
71static atomic_t ip_vs_dropentry = ATOMIC_INIT(0);
72
73/* number of virtual services */
74static int ip_vs_num_services = 0;
75
76/* sysctl variables */
77static int sysctl_ip_vs_drop_entry = 0;
78static int sysctl_ip_vs_drop_packet = 0;
79static int sysctl_ip_vs_secure_tcp = 0;
80static int sysctl_ip_vs_amemthresh = 1024;
81static int sysctl_ip_vs_am_droprate = 10;
82int sysctl_ip_vs_cache_bypass = 0;
83int sysctl_ip_vs_expire_nodest_conn = 0;
84int sysctl_ip_vs_expire_quiescent_template = 0;
85int sysctl_ip_vs_sync_threshold[2] = { 3, 50 };
86int sysctl_ip_vs_nat_icmp_send = 0;
87
88
89#ifdef CONFIG_IP_VS_DEBUG
90static int sysctl_ip_vs_debug_level = 0;
91
92int ip_vs_get_debug_level(void)
93{
94 return sysctl_ip_vs_debug_level;
95}
96#endif
97
Vince Busam09571c72008-09-02 15:55:52 +020098#ifdef CONFIG_IP_VS_IPV6
99/* Taken from rt6_fill_node() in net/ipv6/route.c, is there a better way? */
100static int __ip_vs_addr_is_local_v6(const struct in6_addr *addr)
101{
102 struct rt6_info *rt;
103 struct flowi fl = {
104 .oif = 0,
105 .nl_u = {
106 .ip6_u = {
107 .daddr = *addr,
108 .saddr = { .s6_addr32 = {0, 0, 0, 0} }, } },
109 };
110
111 rt = (struct rt6_info *)ip6_route_output(&init_net, NULL, &fl);
112 if (rt && rt->rt6i_dev && (rt->rt6i_dev->flags & IFF_LOOPBACK))
113 return 1;
114
115 return 0;
116}
117#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -0700118/*
Julian Anastasovaf9debd2005-07-11 20:59:57 -0700119 * update_defense_level is called from keventd and from sysctl,
120 * so it needs to protect itself from softirqs
Linus Torvalds1da177e2005-04-16 15:20:36 -0700121 */
122static void update_defense_level(void)
123{
124 struct sysinfo i;
125 static int old_secure_tcp = 0;
126 int availmem;
127 int nomem;
128 int to_change = -1;
129
130 /* we only count free and buffered memory (in pages) */
131 si_meminfo(&i);
132 availmem = i.freeram + i.bufferram;
133 /* however in linux 2.5 the i.bufferram is total page cache size,
134 we need adjust it */
135 /* si_swapinfo(&i); */
136 /* availmem = availmem - (i.totalswap - i.freeswap); */
137
138 nomem = (availmem < sysctl_ip_vs_amemthresh);
139
Julian Anastasovaf9debd2005-07-11 20:59:57 -0700140 local_bh_disable();
141
Linus Torvalds1da177e2005-04-16 15:20:36 -0700142 /* drop_entry */
143 spin_lock(&__ip_vs_dropentry_lock);
144 switch (sysctl_ip_vs_drop_entry) {
145 case 0:
146 atomic_set(&ip_vs_dropentry, 0);
147 break;
148 case 1:
149 if (nomem) {
150 atomic_set(&ip_vs_dropentry, 1);
151 sysctl_ip_vs_drop_entry = 2;
152 } else {
153 atomic_set(&ip_vs_dropentry, 0);
154 }
155 break;
156 case 2:
157 if (nomem) {
158 atomic_set(&ip_vs_dropentry, 1);
159 } else {
160 atomic_set(&ip_vs_dropentry, 0);
161 sysctl_ip_vs_drop_entry = 1;
162 };
163 break;
164 case 3:
165 atomic_set(&ip_vs_dropentry, 1);
166 break;
167 }
168 spin_unlock(&__ip_vs_dropentry_lock);
169
170 /* drop_packet */
171 spin_lock(&__ip_vs_droppacket_lock);
172 switch (sysctl_ip_vs_drop_packet) {
173 case 0:
174 ip_vs_drop_rate = 0;
175 break;
176 case 1:
177 if (nomem) {
178 ip_vs_drop_rate = ip_vs_drop_counter
179 = sysctl_ip_vs_amemthresh /
180 (sysctl_ip_vs_amemthresh-availmem);
181 sysctl_ip_vs_drop_packet = 2;
182 } else {
183 ip_vs_drop_rate = 0;
184 }
185 break;
186 case 2:
187 if (nomem) {
188 ip_vs_drop_rate = ip_vs_drop_counter
189 = sysctl_ip_vs_amemthresh /
190 (sysctl_ip_vs_amemthresh-availmem);
191 } else {
192 ip_vs_drop_rate = 0;
193 sysctl_ip_vs_drop_packet = 1;
194 }
195 break;
196 case 3:
197 ip_vs_drop_rate = sysctl_ip_vs_am_droprate;
198 break;
199 }
200 spin_unlock(&__ip_vs_droppacket_lock);
201
202 /* secure_tcp */
203 write_lock(&__ip_vs_securetcp_lock);
204 switch (sysctl_ip_vs_secure_tcp) {
205 case 0:
206 if (old_secure_tcp >= 2)
207 to_change = 0;
208 break;
209 case 1:
210 if (nomem) {
211 if (old_secure_tcp < 2)
212 to_change = 1;
213 sysctl_ip_vs_secure_tcp = 2;
214 } else {
215 if (old_secure_tcp >= 2)
216 to_change = 0;
217 }
218 break;
219 case 2:
220 if (nomem) {
221 if (old_secure_tcp < 2)
222 to_change = 1;
223 } else {
224 if (old_secure_tcp >= 2)
225 to_change = 0;
226 sysctl_ip_vs_secure_tcp = 1;
227 }
228 break;
229 case 3:
230 if (old_secure_tcp < 2)
231 to_change = 1;
232 break;
233 }
234 old_secure_tcp = sysctl_ip_vs_secure_tcp;
235 if (to_change >= 0)
236 ip_vs_protocol_timeout_change(sysctl_ip_vs_secure_tcp>1);
237 write_unlock(&__ip_vs_securetcp_lock);
Julian Anastasovaf9debd2005-07-11 20:59:57 -0700238
239 local_bh_enable();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700240}
241
242
243/*
244 * Timer for checking the defense
245 */
246#define DEFENSE_TIMER_PERIOD 1*HZ
David Howellsc4028952006-11-22 14:57:56 +0000247static void defense_work_handler(struct work_struct *work);
248static DECLARE_DELAYED_WORK(defense_work, defense_work_handler);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700249
David Howellsc4028952006-11-22 14:57:56 +0000250static void defense_work_handler(struct work_struct *work)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700251{
252 update_defense_level();
253 if (atomic_read(&ip_vs_dropentry))
254 ip_vs_random_dropentry();
255
256 schedule_delayed_work(&defense_work, DEFENSE_TIMER_PERIOD);
257}
258
259int
260ip_vs_use_count_inc(void)
261{
262 return try_module_get(THIS_MODULE);
263}
264
265void
266ip_vs_use_count_dec(void)
267{
268 module_put(THIS_MODULE);
269}
270
271
272/*
273 * Hash table: for virtual service lookups
274 */
275#define IP_VS_SVC_TAB_BITS 8
276#define IP_VS_SVC_TAB_SIZE (1 << IP_VS_SVC_TAB_BITS)
277#define IP_VS_SVC_TAB_MASK (IP_VS_SVC_TAB_SIZE - 1)
278
279/* the service table hashed by <protocol, addr, port> */
280static struct list_head ip_vs_svc_table[IP_VS_SVC_TAB_SIZE];
281/* the service table hashed by fwmark */
282static struct list_head ip_vs_svc_fwm_table[IP_VS_SVC_TAB_SIZE];
283
284/*
285 * Hash table: for real service lookups
286 */
287#define IP_VS_RTAB_BITS 4
288#define IP_VS_RTAB_SIZE (1 << IP_VS_RTAB_BITS)
289#define IP_VS_RTAB_MASK (IP_VS_RTAB_SIZE - 1)
290
291static struct list_head ip_vs_rtable[IP_VS_RTAB_SIZE];
292
293/*
294 * Trash for destinations
295 */
296static LIST_HEAD(ip_vs_dest_trash);
297
298/*
299 * FTP & NULL virtual service counters
300 */
301static atomic_t ip_vs_ftpsvc_counter = ATOMIC_INIT(0);
302static atomic_t ip_vs_nullsvc_counter = ATOMIC_INIT(0);
303
304
305/*
306 * Returns hash value for virtual service
307 */
308static __inline__ unsigned
Julius Volzb18610d2008-09-02 15:55:37 +0200309ip_vs_svc_hashkey(int af, unsigned proto, const union nf_inet_addr *addr,
310 __be16 port)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700311{
312 register unsigned porth = ntohs(port);
Julius Volzb18610d2008-09-02 15:55:37 +0200313 __be32 addr_fold = addr->ip;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700314
Julius Volzb18610d2008-09-02 15:55:37 +0200315#ifdef CONFIG_IP_VS_IPV6
316 if (af == AF_INET6)
317 addr_fold = addr->ip6[0]^addr->ip6[1]^
318 addr->ip6[2]^addr->ip6[3];
319#endif
320
321 return (proto^ntohl(addr_fold)^(porth>>IP_VS_SVC_TAB_BITS)^porth)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700322 & IP_VS_SVC_TAB_MASK;
323}
324
325/*
326 * Returns hash value of fwmark for virtual service lookup
327 */
328static __inline__ unsigned ip_vs_svc_fwm_hashkey(__u32 fwmark)
329{
330 return fwmark & IP_VS_SVC_TAB_MASK;
331}
332
333/*
334 * Hashes a service in the ip_vs_svc_table by <proto,addr,port>
335 * or in the ip_vs_svc_fwm_table by fwmark.
336 * Should be called with locked tables.
337 */
338static int ip_vs_svc_hash(struct ip_vs_service *svc)
339{
340 unsigned hash;
341
342 if (svc->flags & IP_VS_SVC_F_HASHED) {
343 IP_VS_ERR("ip_vs_svc_hash(): request for already hashed, "
344 "called from %p\n", __builtin_return_address(0));
345 return 0;
346 }
347
348 if (svc->fwmark == 0) {
349 /*
350 * Hash it by <protocol,addr,port> in ip_vs_svc_table
351 */
Julius Volzb18610d2008-09-02 15:55:37 +0200352 hash = ip_vs_svc_hashkey(svc->af, svc->protocol, &svc->addr,
Julius Volze7ade462008-09-02 15:55:33 +0200353 svc->port);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700354 list_add(&svc->s_list, &ip_vs_svc_table[hash]);
355 } else {
356 /*
357 * Hash it by fwmark in ip_vs_svc_fwm_table
358 */
359 hash = ip_vs_svc_fwm_hashkey(svc->fwmark);
360 list_add(&svc->f_list, &ip_vs_svc_fwm_table[hash]);
361 }
362
363 svc->flags |= IP_VS_SVC_F_HASHED;
364 /* increase its refcnt because it is referenced by the svc table */
365 atomic_inc(&svc->refcnt);
366 return 1;
367}
368
369
370/*
371 * Unhashes a service from ip_vs_svc_table/ip_vs_svc_fwm_table.
372 * Should be called with locked tables.
373 */
374static int ip_vs_svc_unhash(struct ip_vs_service *svc)
375{
376 if (!(svc->flags & IP_VS_SVC_F_HASHED)) {
377 IP_VS_ERR("ip_vs_svc_unhash(): request for unhash flagged, "
378 "called from %p\n", __builtin_return_address(0));
379 return 0;
380 }
381
382 if (svc->fwmark == 0) {
383 /* Remove it from the ip_vs_svc_table table */
384 list_del(&svc->s_list);
385 } else {
386 /* Remove it from the ip_vs_svc_fwm_table table */
387 list_del(&svc->f_list);
388 }
389
390 svc->flags &= ~IP_VS_SVC_F_HASHED;
391 atomic_dec(&svc->refcnt);
392 return 1;
393}
394
395
396/*
397 * Get service by {proto,addr,port} in the service table.
398 */
Julius Volzb18610d2008-09-02 15:55:37 +0200399static inline struct ip_vs_service *
400__ip_vs_service_get(int af, __u16 protocol, const union nf_inet_addr *vaddr,
401 __be16 vport)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700402{
403 unsigned hash;
404 struct ip_vs_service *svc;
405
406 /* Check for "full" addressed entries */
Julius Volzb18610d2008-09-02 15:55:37 +0200407 hash = ip_vs_svc_hashkey(af, protocol, vaddr, vport);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700408
409 list_for_each_entry(svc, &ip_vs_svc_table[hash], s_list){
Julius Volzb18610d2008-09-02 15:55:37 +0200410 if ((svc->af == af)
411 && ip_vs_addr_equal(af, &svc->addr, vaddr)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700412 && (svc->port == vport)
413 && (svc->protocol == protocol)) {
414 /* HIT */
415 atomic_inc(&svc->usecnt);
416 return svc;
417 }
418 }
419
420 return NULL;
421}
422
423
424/*
425 * Get service by {fwmark} in the service table.
426 */
Julius Volzb18610d2008-09-02 15:55:37 +0200427static inline struct ip_vs_service *
428__ip_vs_svc_fwm_get(int af, __u32 fwmark)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700429{
430 unsigned hash;
431 struct ip_vs_service *svc;
432
433 /* Check for fwmark addressed entries */
434 hash = ip_vs_svc_fwm_hashkey(fwmark);
435
436 list_for_each_entry(svc, &ip_vs_svc_fwm_table[hash], f_list) {
Julius Volzb18610d2008-09-02 15:55:37 +0200437 if (svc->fwmark == fwmark && svc->af == af) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700438 /* HIT */
439 atomic_inc(&svc->usecnt);
440 return svc;
441 }
442 }
443
444 return NULL;
445}
446
447struct ip_vs_service *
Julius Volz3c2e0502008-09-02 15:55:38 +0200448ip_vs_service_get(int af, __u32 fwmark, __u16 protocol,
449 const union nf_inet_addr *vaddr, __be16 vport)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700450{
451 struct ip_vs_service *svc;
Julius Volz3c2e0502008-09-02 15:55:38 +0200452
Linus Torvalds1da177e2005-04-16 15:20:36 -0700453 read_lock(&__ip_vs_svc_lock);
454
455 /*
456 * Check the table hashed by fwmark first
457 */
Julius Volz3c2e0502008-09-02 15:55:38 +0200458 if (fwmark && (svc = __ip_vs_svc_fwm_get(af, fwmark)))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700459 goto out;
460
461 /*
462 * Check the table hashed by <protocol,addr,port>
463 * for "full" addressed entries
464 */
Julius Volz3c2e0502008-09-02 15:55:38 +0200465 svc = __ip_vs_service_get(af, protocol, vaddr, vport);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700466
467 if (svc == NULL
468 && protocol == IPPROTO_TCP
469 && atomic_read(&ip_vs_ftpsvc_counter)
470 && (vport == FTPDATA || ntohs(vport) >= PROT_SOCK)) {
471 /*
472 * Check if ftp service entry exists, the packet
473 * might belong to FTP data connections.
474 */
Julius Volz3c2e0502008-09-02 15:55:38 +0200475 svc = __ip_vs_service_get(af, protocol, vaddr, FTPPORT);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700476 }
477
478 if (svc == NULL
479 && atomic_read(&ip_vs_nullsvc_counter)) {
480 /*
481 * Check if the catch-all port (port zero) exists
482 */
Julius Volz3c2e0502008-09-02 15:55:38 +0200483 svc = __ip_vs_service_get(af, protocol, vaddr, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700484 }
485
486 out:
487 read_unlock(&__ip_vs_svc_lock);
488
Julius Volz3c2e0502008-09-02 15:55:38 +0200489 IP_VS_DBG_BUF(9, "lookup service: fwm %u %s %s:%u %s\n",
490 fwmark, ip_vs_proto_name(protocol),
491 IP_VS_DBG_ADDR(af, vaddr), ntohs(vport),
492 svc ? "hit" : "not hit");
Linus Torvalds1da177e2005-04-16 15:20:36 -0700493
494 return svc;
495}
496
497
498static inline void
499__ip_vs_bind_svc(struct ip_vs_dest *dest, struct ip_vs_service *svc)
500{
501 atomic_inc(&svc->refcnt);
502 dest->svc = svc;
503}
504
505static inline void
506__ip_vs_unbind_svc(struct ip_vs_dest *dest)
507{
508 struct ip_vs_service *svc = dest->svc;
509
510 dest->svc = NULL;
511 if (atomic_dec_and_test(&svc->refcnt))
512 kfree(svc);
513}
514
515
516/*
517 * Returns hash value for real service
518 */
Julius Volz7937df12008-09-02 15:55:48 +0200519static inline unsigned ip_vs_rs_hashkey(int af,
520 const union nf_inet_addr *addr,
521 __be16 port)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700522{
523 register unsigned porth = ntohs(port);
Julius Volz7937df12008-09-02 15:55:48 +0200524 __be32 addr_fold = addr->ip;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700525
Julius Volz7937df12008-09-02 15:55:48 +0200526#ifdef CONFIG_IP_VS_IPV6
527 if (af == AF_INET6)
528 addr_fold = addr->ip6[0]^addr->ip6[1]^
529 addr->ip6[2]^addr->ip6[3];
530#endif
531
532 return (ntohl(addr_fold)^(porth>>IP_VS_RTAB_BITS)^porth)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700533 & IP_VS_RTAB_MASK;
534}
535
536/*
537 * Hashes ip_vs_dest in ip_vs_rtable by <proto,addr,port>.
538 * should be called with locked tables.
539 */
540static int ip_vs_rs_hash(struct ip_vs_dest *dest)
541{
542 unsigned hash;
543
544 if (!list_empty(&dest->d_list)) {
545 return 0;
546 }
547
548 /*
549 * Hash by proto,addr,port,
550 * which are the parameters of the real service.
551 */
Julius Volz7937df12008-09-02 15:55:48 +0200552 hash = ip_vs_rs_hashkey(dest->af, &dest->addr, dest->port);
553
Linus Torvalds1da177e2005-04-16 15:20:36 -0700554 list_add(&dest->d_list, &ip_vs_rtable[hash]);
555
556 return 1;
557}
558
559/*
560 * UNhashes ip_vs_dest from ip_vs_rtable.
561 * should be called with locked tables.
562 */
563static int ip_vs_rs_unhash(struct ip_vs_dest *dest)
564{
565 /*
566 * Remove it from the ip_vs_rtable table.
567 */
568 if (!list_empty(&dest->d_list)) {
569 list_del(&dest->d_list);
570 INIT_LIST_HEAD(&dest->d_list);
571 }
572
573 return 1;
574}
575
576/*
577 * Lookup real service by <proto,addr,port> in the real service table.
578 */
579struct ip_vs_dest *
Julius Volz7937df12008-09-02 15:55:48 +0200580ip_vs_lookup_real_service(int af, __u16 protocol,
581 const union nf_inet_addr *daddr,
582 __be16 dport)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700583{
584 unsigned hash;
585 struct ip_vs_dest *dest;
586
587 /*
588 * Check for "full" addressed entries
589 * Return the first found entry
590 */
Julius Volz7937df12008-09-02 15:55:48 +0200591 hash = ip_vs_rs_hashkey(af, daddr, dport);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700592
593 read_lock(&__ip_vs_rs_lock);
594 list_for_each_entry(dest, &ip_vs_rtable[hash], d_list) {
Julius Volz7937df12008-09-02 15:55:48 +0200595 if ((dest->af == af)
596 && ip_vs_addr_equal(af, &dest->addr, daddr)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700597 && (dest->port == dport)
598 && ((dest->protocol == protocol) ||
599 dest->vfwmark)) {
600 /* HIT */
601 read_unlock(&__ip_vs_rs_lock);
602 return dest;
603 }
604 }
605 read_unlock(&__ip_vs_rs_lock);
606
607 return NULL;
608}
609
610/*
611 * Lookup destination by {addr,port} in the given service
612 */
613static struct ip_vs_dest *
Julius Volz7937df12008-09-02 15:55:48 +0200614ip_vs_lookup_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
615 __be16 dport)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700616{
617 struct ip_vs_dest *dest;
618
619 /*
620 * Find the destination for the given service
621 */
622 list_for_each_entry(dest, &svc->destinations, n_list) {
Julius Volz7937df12008-09-02 15:55:48 +0200623 if ((dest->af == svc->af)
624 && ip_vs_addr_equal(svc->af, &dest->addr, daddr)
625 && (dest->port == dport)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700626 /* HIT */
627 return dest;
628 }
629 }
630
631 return NULL;
632}
633
Rumen G. Bogdanovski1e356f92007-11-07 02:35:54 -0800634/*
635 * Find destination by {daddr,dport,vaddr,protocol}
636 * Cretaed to be used in ip_vs_process_message() in
637 * the backup synchronization daemon. It finds the
638 * destination to be bound to the received connection
639 * on the backup.
640 *
641 * ip_vs_lookup_real_service() looked promissing, but
642 * seems not working as expected.
643 */
Julius Volz7937df12008-09-02 15:55:48 +0200644struct ip_vs_dest *ip_vs_find_dest(int af, const union nf_inet_addr *daddr,
645 __be16 dport,
646 const union nf_inet_addr *vaddr,
647 __be16 vport, __u16 protocol)
Rumen G. Bogdanovski1e356f92007-11-07 02:35:54 -0800648{
649 struct ip_vs_dest *dest;
650 struct ip_vs_service *svc;
651
Julius Volz7937df12008-09-02 15:55:48 +0200652 svc = ip_vs_service_get(af, 0, protocol, vaddr, vport);
Rumen G. Bogdanovski1e356f92007-11-07 02:35:54 -0800653 if (!svc)
654 return NULL;
655 dest = ip_vs_lookup_dest(svc, daddr, dport);
656 if (dest)
657 atomic_inc(&dest->refcnt);
658 ip_vs_service_put(svc);
659 return dest;
660}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700661
662/*
663 * Lookup dest by {svc,addr,port} in the destination trash.
664 * The destination trash is used to hold the destinations that are removed
665 * from the service table but are still referenced by some conn entries.
666 * The reason to add the destination trash is when the dest is temporary
667 * down (either by administrator or by monitor program), the dest can be
668 * picked back from the trash, the remaining connections to the dest can
669 * continue, and the counting information of the dest is also useful for
670 * scheduling.
671 */
672static struct ip_vs_dest *
Julius Volz7937df12008-09-02 15:55:48 +0200673ip_vs_trash_get_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
674 __be16 dport)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700675{
676 struct ip_vs_dest *dest, *nxt;
677
678 /*
679 * Find the destination in trash
680 */
681 list_for_each_entry_safe(dest, nxt, &ip_vs_dest_trash, n_list) {
Julius Volz7937df12008-09-02 15:55:48 +0200682 IP_VS_DBG_BUF(3, "Destination %u/%s:%u still in trash, "
683 "dest->refcnt=%d\n",
684 dest->vfwmark,
685 IP_VS_DBG_ADDR(svc->af, &dest->addr),
686 ntohs(dest->port),
687 atomic_read(&dest->refcnt));
688 if (dest->af == svc->af &&
689 ip_vs_addr_equal(svc->af, &dest->addr, daddr) &&
Linus Torvalds1da177e2005-04-16 15:20:36 -0700690 dest->port == dport &&
691 dest->vfwmark == svc->fwmark &&
692 dest->protocol == svc->protocol &&
693 (svc->fwmark ||
Julius Volz7937df12008-09-02 15:55:48 +0200694 (ip_vs_addr_equal(svc->af, &dest->vaddr, &svc->addr) &&
Linus Torvalds1da177e2005-04-16 15:20:36 -0700695 dest->vport == svc->port))) {
696 /* HIT */
697 return dest;
698 }
699
700 /*
701 * Try to purge the destination from trash if not referenced
702 */
703 if (atomic_read(&dest->refcnt) == 1) {
Julius Volz7937df12008-09-02 15:55:48 +0200704 IP_VS_DBG_BUF(3, "Removing destination %u/%s:%u "
705 "from trash\n",
706 dest->vfwmark,
707 IP_VS_DBG_ADDR(svc->af, &dest->addr),
708 ntohs(dest->port));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700709 list_del(&dest->n_list);
710 ip_vs_dst_reset(dest);
711 __ip_vs_unbind_svc(dest);
712 kfree(dest);
713 }
714 }
715
716 return NULL;
717}
718
719
720/*
721 * Clean up all the destinations in the trash
722 * Called by the ip_vs_control_cleanup()
723 *
724 * When the ip_vs_control_clearup is activated by ipvs module exit,
725 * the service tables must have been flushed and all the connections
726 * are expired, and the refcnt of each destination in the trash must
727 * be 1, so we simply release them here.
728 */
729static void ip_vs_trash_cleanup(void)
730{
731 struct ip_vs_dest *dest, *nxt;
732
733 list_for_each_entry_safe(dest, nxt, &ip_vs_dest_trash, n_list) {
734 list_del(&dest->n_list);
735 ip_vs_dst_reset(dest);
736 __ip_vs_unbind_svc(dest);
737 kfree(dest);
738 }
739}
740
741
742static void
743ip_vs_zero_stats(struct ip_vs_stats *stats)
744{
745 spin_lock_bh(&stats->lock);
Simon Hormane93615d2008-08-11 17:19:14 +1000746
747 stats->conns = 0;
748 stats->inpkts = 0;
749 stats->outpkts = 0;
750 stats->inbytes = 0;
751 stats->outbytes = 0;
752
753 stats->cps = 0;
754 stats->inpps = 0;
755 stats->outpps = 0;
756 stats->inbps = 0;
757 stats->outbps = 0;
758
Linus Torvalds1da177e2005-04-16 15:20:36 -0700759 ip_vs_zero_estimator(stats);
Simon Hormane93615d2008-08-11 17:19:14 +1000760
Sven Wegener3a14a3132008-08-10 18:24:41 +0000761 spin_unlock_bh(&stats->lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700762}
763
764/*
765 * Update a destination in the given service
766 */
767static void
768__ip_vs_update_dest(struct ip_vs_service *svc,
Julius Volzc860c6b2008-09-02 15:55:36 +0200769 struct ip_vs_dest *dest, struct ip_vs_dest_user_kern *udest)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700770{
771 int conn_flags;
772
773 /* set the weight and the flags */
774 atomic_set(&dest->weight, udest->weight);
775 conn_flags = udest->conn_flags | IP_VS_CONN_F_INACTIVE;
776
777 /* check if local node and update the flags */
Vince Busam09571c72008-09-02 15:55:52 +0200778#ifdef CONFIG_IP_VS_IPV6
779 if (svc->af == AF_INET6) {
780 if (__ip_vs_addr_is_local_v6(&udest->addr.in6)) {
781 conn_flags = (conn_flags & ~IP_VS_CONN_F_FWD_MASK)
782 | IP_VS_CONN_F_LOCALNODE;
783 }
784 } else
785#endif
786 if (inet_addr_type(&init_net, udest->addr.ip) == RTN_LOCAL) {
787 conn_flags = (conn_flags & ~IP_VS_CONN_F_FWD_MASK)
788 | IP_VS_CONN_F_LOCALNODE;
789 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700790
791 /* set the IP_VS_CONN_F_NOOUTPUT flag if not masquerading/NAT */
792 if ((conn_flags & IP_VS_CONN_F_FWD_MASK) != 0) {
793 conn_flags |= IP_VS_CONN_F_NOOUTPUT;
794 } else {
795 /*
796 * Put the real service in ip_vs_rtable if not present.
797 * For now only for NAT!
798 */
799 write_lock_bh(&__ip_vs_rs_lock);
800 ip_vs_rs_hash(dest);
801 write_unlock_bh(&__ip_vs_rs_lock);
802 }
803 atomic_set(&dest->conn_flags, conn_flags);
804
805 /* bind the service */
806 if (!dest->svc) {
807 __ip_vs_bind_svc(dest, svc);
808 } else {
809 if (dest->svc != svc) {
810 __ip_vs_unbind_svc(dest);
811 ip_vs_zero_stats(&dest->stats);
812 __ip_vs_bind_svc(dest, svc);
813 }
814 }
815
816 /* set the dest status flags */
817 dest->flags |= IP_VS_DEST_F_AVAILABLE;
818
819 if (udest->u_threshold == 0 || udest->u_threshold > dest->u_threshold)
820 dest->flags &= ~IP_VS_DEST_F_OVERLOAD;
821 dest->u_threshold = udest->u_threshold;
822 dest->l_threshold = udest->l_threshold;
823}
824
825
826/*
827 * Create a destination for the given service
828 */
829static int
Julius Volzc860c6b2008-09-02 15:55:36 +0200830ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700831 struct ip_vs_dest **dest_p)
832{
833 struct ip_vs_dest *dest;
834 unsigned atype;
835
836 EnterFunction(2);
837
Vince Busam09571c72008-09-02 15:55:52 +0200838#ifdef CONFIG_IP_VS_IPV6
839 if (svc->af == AF_INET6) {
840 atype = ipv6_addr_type(&udest->addr.in6);
Sven Wegener3bfb92f2008-09-05 16:53:49 +0200841 if ((!(atype & IPV6_ADDR_UNICAST) ||
842 atype & IPV6_ADDR_LINKLOCAL) &&
Vince Busam09571c72008-09-02 15:55:52 +0200843 !__ip_vs_addr_is_local_v6(&udest->addr.in6))
844 return -EINVAL;
845 } else
846#endif
847 {
848 atype = inet_addr_type(&init_net, udest->addr.ip);
849 if (atype != RTN_LOCAL && atype != RTN_UNICAST)
850 return -EINVAL;
851 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700852
Panagiotis Issaris0da974f2006-07-21 14:51:30 -0700853 dest = kzalloc(sizeof(struct ip_vs_dest), GFP_ATOMIC);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700854 if (dest == NULL) {
855 IP_VS_ERR("ip_vs_new_dest: kmalloc failed.\n");
856 return -ENOMEM;
857 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700858
Julius Volzc860c6b2008-09-02 15:55:36 +0200859 dest->af = svc->af;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700860 dest->protocol = svc->protocol;
Julius Volzc860c6b2008-09-02 15:55:36 +0200861 dest->vaddr = svc->addr;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700862 dest->vport = svc->port;
863 dest->vfwmark = svc->fwmark;
Julius Volzc860c6b2008-09-02 15:55:36 +0200864 ip_vs_addr_copy(svc->af, &dest->addr, &udest->addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700865 dest->port = udest->port;
866
867 atomic_set(&dest->activeconns, 0);
868 atomic_set(&dest->inactconns, 0);
869 atomic_set(&dest->persistconns, 0);
870 atomic_set(&dest->refcnt, 0);
871
872 INIT_LIST_HEAD(&dest->d_list);
873 spin_lock_init(&dest->dst_lock);
874 spin_lock_init(&dest->stats.lock);
875 __ip_vs_update_dest(svc, dest, udest);
876 ip_vs_new_estimator(&dest->stats);
877
878 *dest_p = dest;
879
880 LeaveFunction(2);
881 return 0;
882}
883
884
885/*
886 * Add a destination into an existing service
887 */
888static int
Julius Volzc860c6b2008-09-02 15:55:36 +0200889ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700890{
891 struct ip_vs_dest *dest;
Julius Volzc860c6b2008-09-02 15:55:36 +0200892 union nf_inet_addr daddr;
Al Viro014d7302006-09-28 14:29:52 -0700893 __be16 dport = udest->port;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700894 int ret;
895
896 EnterFunction(2);
897
898 if (udest->weight < 0) {
899 IP_VS_ERR("ip_vs_add_dest(): server weight less than zero\n");
900 return -ERANGE;
901 }
902
903 if (udest->l_threshold > udest->u_threshold) {
904 IP_VS_ERR("ip_vs_add_dest(): lower threshold is higher than "
905 "upper threshold\n");
906 return -ERANGE;
907 }
908
Julius Volzc860c6b2008-09-02 15:55:36 +0200909 ip_vs_addr_copy(svc->af, &daddr, &udest->addr);
910
Linus Torvalds1da177e2005-04-16 15:20:36 -0700911 /*
912 * Check if the dest already exists in the list
913 */
Julius Volz7937df12008-09-02 15:55:48 +0200914 dest = ip_vs_lookup_dest(svc, &daddr, dport);
915
Linus Torvalds1da177e2005-04-16 15:20:36 -0700916 if (dest != NULL) {
917 IP_VS_DBG(1, "ip_vs_add_dest(): dest already exists\n");
918 return -EEXIST;
919 }
920
921 /*
922 * Check if the dest already exists in the trash and
923 * is from the same service
924 */
Julius Volz7937df12008-09-02 15:55:48 +0200925 dest = ip_vs_trash_get_dest(svc, &daddr, dport);
926
Linus Torvalds1da177e2005-04-16 15:20:36 -0700927 if (dest != NULL) {
Julius Volzcfc78c52008-09-02 15:55:53 +0200928 IP_VS_DBG_BUF(3, "Get destination %s:%u from trash, "
929 "dest->refcnt=%d, service %u/%s:%u\n",
930 IP_VS_DBG_ADDR(svc->af, &daddr), ntohs(dport),
931 atomic_read(&dest->refcnt),
932 dest->vfwmark,
933 IP_VS_DBG_ADDR(svc->af, &dest->vaddr),
934 ntohs(dest->vport));
935
Linus Torvalds1da177e2005-04-16 15:20:36 -0700936 __ip_vs_update_dest(svc, dest, udest);
937
938 /*
939 * Get the destination from the trash
940 */
941 list_del(&dest->n_list);
942
943 ip_vs_new_estimator(&dest->stats);
944
945 write_lock_bh(&__ip_vs_svc_lock);
946
947 /*
948 * Wait until all other svc users go away.
949 */
950 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
951
952 list_add(&dest->n_list, &svc->destinations);
953 svc->num_dests++;
954
955 /* call the update_service function of its scheduler */
Sven Wegener82dfb6f2008-08-11 19:36:06 +0000956 if (svc->scheduler->update_service)
957 svc->scheduler->update_service(svc);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700958
959 write_unlock_bh(&__ip_vs_svc_lock);
960 return 0;
961 }
962
963 /*
964 * Allocate and initialize the dest structure
965 */
966 ret = ip_vs_new_dest(svc, udest, &dest);
967 if (ret) {
968 return ret;
969 }
970
971 /*
972 * Add the dest entry into the list
973 */
974 atomic_inc(&dest->refcnt);
975
976 write_lock_bh(&__ip_vs_svc_lock);
977
978 /*
979 * Wait until all other svc users go away.
980 */
981 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
982
983 list_add(&dest->n_list, &svc->destinations);
984 svc->num_dests++;
985
986 /* call the update_service function of its scheduler */
Sven Wegener82dfb6f2008-08-11 19:36:06 +0000987 if (svc->scheduler->update_service)
988 svc->scheduler->update_service(svc);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700989
990 write_unlock_bh(&__ip_vs_svc_lock);
991
992 LeaveFunction(2);
993
994 return 0;
995}
996
997
998/*
999 * Edit a destination in the given service
1000 */
1001static int
Julius Volzc860c6b2008-09-02 15:55:36 +02001002ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001003{
1004 struct ip_vs_dest *dest;
Julius Volzc860c6b2008-09-02 15:55:36 +02001005 union nf_inet_addr daddr;
Al Viro014d7302006-09-28 14:29:52 -07001006 __be16 dport = udest->port;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001007
1008 EnterFunction(2);
1009
1010 if (udest->weight < 0) {
1011 IP_VS_ERR("ip_vs_edit_dest(): server weight less than zero\n");
1012 return -ERANGE;
1013 }
1014
1015 if (udest->l_threshold > udest->u_threshold) {
1016 IP_VS_ERR("ip_vs_edit_dest(): lower threshold is higher than "
1017 "upper threshold\n");
1018 return -ERANGE;
1019 }
1020
Julius Volzc860c6b2008-09-02 15:55:36 +02001021 ip_vs_addr_copy(svc->af, &daddr, &udest->addr);
1022
Linus Torvalds1da177e2005-04-16 15:20:36 -07001023 /*
1024 * Lookup the destination list
1025 */
Julius Volz7937df12008-09-02 15:55:48 +02001026 dest = ip_vs_lookup_dest(svc, &daddr, dport);
1027
Linus Torvalds1da177e2005-04-16 15:20:36 -07001028 if (dest == NULL) {
1029 IP_VS_DBG(1, "ip_vs_edit_dest(): dest doesn't exist\n");
1030 return -ENOENT;
1031 }
1032
1033 __ip_vs_update_dest(svc, dest, udest);
1034
1035 write_lock_bh(&__ip_vs_svc_lock);
1036
1037 /* Wait until all other svc users go away */
Heiko Carstenscae7ca32007-08-10 15:50:30 -07001038 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001039
1040 /* call the update_service, because server weight may be changed */
Sven Wegener82dfb6f2008-08-11 19:36:06 +00001041 if (svc->scheduler->update_service)
1042 svc->scheduler->update_service(svc);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001043
1044 write_unlock_bh(&__ip_vs_svc_lock);
1045
1046 LeaveFunction(2);
1047
1048 return 0;
1049}
1050
1051
1052/*
1053 * Delete a destination (must be already unlinked from the service)
1054 */
1055static void __ip_vs_del_dest(struct ip_vs_dest *dest)
1056{
1057 ip_vs_kill_estimator(&dest->stats);
1058
1059 /*
1060 * Remove it from the d-linked list with the real services.
1061 */
1062 write_lock_bh(&__ip_vs_rs_lock);
1063 ip_vs_rs_unhash(dest);
1064 write_unlock_bh(&__ip_vs_rs_lock);
1065
1066 /*
1067 * Decrease the refcnt of the dest, and free the dest
1068 * if nobody refers to it (refcnt=0). Otherwise, throw
1069 * the destination into the trash.
1070 */
1071 if (atomic_dec_and_test(&dest->refcnt)) {
1072 ip_vs_dst_reset(dest);
1073 /* simply decrease svc->refcnt here, let the caller check
1074 and release the service if nobody refers to it.
1075 Only user context can release destination and service,
1076 and only one user context can update virtual service at a
1077 time, so the operation here is OK */
1078 atomic_dec(&dest->svc->refcnt);
1079 kfree(dest);
1080 } else {
Julius Volzcfc78c52008-09-02 15:55:53 +02001081 IP_VS_DBG_BUF(3, "Moving dest %s:%u into trash, "
1082 "dest->refcnt=%d\n",
1083 IP_VS_DBG_ADDR(dest->af, &dest->addr),
1084 ntohs(dest->port),
1085 atomic_read(&dest->refcnt));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001086 list_add(&dest->n_list, &ip_vs_dest_trash);
1087 atomic_inc(&dest->refcnt);
1088 }
1089}
1090
1091
1092/*
1093 * Unlink a destination from the given service
1094 */
1095static void __ip_vs_unlink_dest(struct ip_vs_service *svc,
1096 struct ip_vs_dest *dest,
1097 int svcupd)
1098{
1099 dest->flags &= ~IP_VS_DEST_F_AVAILABLE;
1100
1101 /*
1102 * Remove it from the d-linked destination list.
1103 */
1104 list_del(&dest->n_list);
1105 svc->num_dests--;
Sven Wegener82dfb6f2008-08-11 19:36:06 +00001106
1107 /*
1108 * Call the update_service function of its scheduler
1109 */
1110 if (svcupd && svc->scheduler->update_service)
1111 svc->scheduler->update_service(svc);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001112}
1113
1114
1115/*
1116 * Delete a destination server in the given service
1117 */
1118static int
Julius Volzc860c6b2008-09-02 15:55:36 +02001119ip_vs_del_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001120{
1121 struct ip_vs_dest *dest;
Al Viro014d7302006-09-28 14:29:52 -07001122 __be16 dport = udest->port;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001123
1124 EnterFunction(2);
1125
Julius Volz7937df12008-09-02 15:55:48 +02001126 dest = ip_vs_lookup_dest(svc, &udest->addr, dport);
Julius Volzc860c6b2008-09-02 15:55:36 +02001127
Linus Torvalds1da177e2005-04-16 15:20:36 -07001128 if (dest == NULL) {
1129 IP_VS_DBG(1, "ip_vs_del_dest(): destination not found!\n");
1130 return -ENOENT;
1131 }
1132
1133 write_lock_bh(&__ip_vs_svc_lock);
1134
1135 /*
1136 * Wait until all other svc users go away.
1137 */
1138 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
1139
1140 /*
1141 * Unlink dest from the service
1142 */
1143 __ip_vs_unlink_dest(svc, dest, 1);
1144
1145 write_unlock_bh(&__ip_vs_svc_lock);
1146
1147 /*
1148 * Delete the destination
1149 */
1150 __ip_vs_del_dest(dest);
1151
1152 LeaveFunction(2);
1153
1154 return 0;
1155}
1156
1157
1158/*
1159 * Add a service into the service hash table
1160 */
1161static int
Julius Volzc860c6b2008-09-02 15:55:36 +02001162ip_vs_add_service(struct ip_vs_service_user_kern *u,
1163 struct ip_vs_service **svc_p)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001164{
1165 int ret = 0;
1166 struct ip_vs_scheduler *sched = NULL;
1167 struct ip_vs_service *svc = NULL;
1168
1169 /* increase the module use count */
1170 ip_vs_use_count_inc();
1171
1172 /* Lookup the scheduler by 'u->sched_name' */
1173 sched = ip_vs_scheduler_get(u->sched_name);
1174 if (sched == NULL) {
1175 IP_VS_INFO("Scheduler module ip_vs_%s not found\n",
1176 u->sched_name);
1177 ret = -ENOENT;
1178 goto out_mod_dec;
1179 }
1180
Julius Volzf94fd042008-09-02 15:55:55 +02001181#ifdef CONFIG_IP_VS_IPV6
1182 if (u->af == AF_INET6) {
1183 if (!sched->supports_ipv6) {
1184 ret = -EAFNOSUPPORT;
1185 goto out_err;
1186 }
1187 if ((u->netmask < 1) || (u->netmask > 128)) {
1188 ret = -EINVAL;
1189 goto out_err;
1190 }
1191 }
1192#endif
1193
Panagiotis Issaris0da974f2006-07-21 14:51:30 -07001194 svc = kzalloc(sizeof(struct ip_vs_service), GFP_ATOMIC);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001195 if (svc == NULL) {
1196 IP_VS_DBG(1, "ip_vs_add_service: kmalloc failed.\n");
1197 ret = -ENOMEM;
1198 goto out_err;
1199 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001200
1201 /* I'm the first user of the service */
1202 atomic_set(&svc->usecnt, 1);
1203 atomic_set(&svc->refcnt, 0);
1204
Julius Volzc860c6b2008-09-02 15:55:36 +02001205 svc->af = u->af;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001206 svc->protocol = u->protocol;
Julius Volzc860c6b2008-09-02 15:55:36 +02001207 ip_vs_addr_copy(svc->af, &svc->addr, &u->addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001208 svc->port = u->port;
1209 svc->fwmark = u->fwmark;
1210 svc->flags = u->flags;
1211 svc->timeout = u->timeout * HZ;
1212 svc->netmask = u->netmask;
1213
1214 INIT_LIST_HEAD(&svc->destinations);
1215 rwlock_init(&svc->sched_lock);
1216 spin_lock_init(&svc->stats.lock);
1217
1218 /* Bind the scheduler */
1219 ret = ip_vs_bind_scheduler(svc, sched);
1220 if (ret)
1221 goto out_err;
1222 sched = NULL;
1223
1224 /* Update the virtual service counters */
1225 if (svc->port == FTPPORT)
1226 atomic_inc(&ip_vs_ftpsvc_counter);
1227 else if (svc->port == 0)
1228 atomic_inc(&ip_vs_nullsvc_counter);
1229
1230 ip_vs_new_estimator(&svc->stats);
Julius Volzf94fd042008-09-02 15:55:55 +02001231
1232 /* Count only IPv4 services for old get/setsockopt interface */
1233 if (svc->af == AF_INET)
1234 ip_vs_num_services++;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001235
1236 /* Hash the service into the service table */
1237 write_lock_bh(&__ip_vs_svc_lock);
1238 ip_vs_svc_hash(svc);
1239 write_unlock_bh(&__ip_vs_svc_lock);
1240
1241 *svc_p = svc;
1242 return 0;
1243
1244 out_err:
1245 if (svc != NULL) {
1246 if (svc->scheduler)
1247 ip_vs_unbind_scheduler(svc);
1248 if (svc->inc) {
1249 local_bh_disable();
1250 ip_vs_app_inc_put(svc->inc);
1251 local_bh_enable();
1252 }
1253 kfree(svc);
1254 }
1255 ip_vs_scheduler_put(sched);
1256
1257 out_mod_dec:
1258 /* decrease the module use count */
1259 ip_vs_use_count_dec();
1260
1261 return ret;
1262}
1263
1264
1265/*
1266 * Edit a service and bind it with a new scheduler
1267 */
1268static int
Julius Volzc860c6b2008-09-02 15:55:36 +02001269ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001270{
1271 struct ip_vs_scheduler *sched, *old_sched;
1272 int ret = 0;
1273
1274 /*
1275 * Lookup the scheduler, by 'u->sched_name'
1276 */
1277 sched = ip_vs_scheduler_get(u->sched_name);
1278 if (sched == NULL) {
1279 IP_VS_INFO("Scheduler module ip_vs_%s not found\n",
1280 u->sched_name);
1281 return -ENOENT;
1282 }
1283 old_sched = sched;
1284
Julius Volzf94fd042008-09-02 15:55:55 +02001285#ifdef CONFIG_IP_VS_IPV6
1286 if (u->af == AF_INET6) {
1287 if (!sched->supports_ipv6) {
Sven Wegenera5ba4bf2008-09-05 13:47:37 +02001288 ret = -EAFNOSUPPORT;
Julius Volzf94fd042008-09-02 15:55:55 +02001289 goto out;
1290 }
1291 if ((u->netmask < 1) || (u->netmask > 128)) {
Sven Wegenera5ba4bf2008-09-05 13:47:37 +02001292 ret = -EINVAL;
Julius Volzf94fd042008-09-02 15:55:55 +02001293 goto out;
1294 }
1295 }
1296#endif
1297
Linus Torvalds1da177e2005-04-16 15:20:36 -07001298 write_lock_bh(&__ip_vs_svc_lock);
1299
1300 /*
1301 * Wait until all other svc users go away.
1302 */
1303 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
1304
1305 /*
1306 * Set the flags and timeout value
1307 */
1308 svc->flags = u->flags | IP_VS_SVC_F_HASHED;
1309 svc->timeout = u->timeout * HZ;
1310 svc->netmask = u->netmask;
1311
1312 old_sched = svc->scheduler;
1313 if (sched != old_sched) {
1314 /*
1315 * Unbind the old scheduler
1316 */
1317 if ((ret = ip_vs_unbind_scheduler(svc))) {
1318 old_sched = sched;
1319 goto out;
1320 }
1321
1322 /*
1323 * Bind the new scheduler
1324 */
1325 if ((ret = ip_vs_bind_scheduler(svc, sched))) {
1326 /*
1327 * If ip_vs_bind_scheduler fails, restore the old
1328 * scheduler.
1329 * The main reason of failure is out of memory.
1330 *
1331 * The question is if the old scheduler can be
1332 * restored all the time. TODO: if it cannot be
1333 * restored some time, we must delete the service,
1334 * otherwise the system may crash.
1335 */
1336 ip_vs_bind_scheduler(svc, old_sched);
1337 old_sched = sched;
1338 goto out;
1339 }
1340 }
1341
1342 out:
1343 write_unlock_bh(&__ip_vs_svc_lock);
1344
1345 if (old_sched)
1346 ip_vs_scheduler_put(old_sched);
1347
1348 return ret;
1349}
1350
1351
1352/*
1353 * Delete a service from the service list
1354 * - The service must be unlinked, unlocked and not referenced!
1355 * - We are called under _bh lock
1356 */
1357static void __ip_vs_del_service(struct ip_vs_service *svc)
1358{
1359 struct ip_vs_dest *dest, *nxt;
1360 struct ip_vs_scheduler *old_sched;
1361
Julius Volzf94fd042008-09-02 15:55:55 +02001362 /* Count only IPv4 services for old get/setsockopt interface */
1363 if (svc->af == AF_INET)
1364 ip_vs_num_services--;
1365
Linus Torvalds1da177e2005-04-16 15:20:36 -07001366 ip_vs_kill_estimator(&svc->stats);
1367
1368 /* Unbind scheduler */
1369 old_sched = svc->scheduler;
1370 ip_vs_unbind_scheduler(svc);
1371 if (old_sched)
1372 ip_vs_scheduler_put(old_sched);
1373
1374 /* Unbind app inc */
1375 if (svc->inc) {
1376 ip_vs_app_inc_put(svc->inc);
1377 svc->inc = NULL;
1378 }
1379
1380 /*
1381 * Unlink the whole destination list
1382 */
1383 list_for_each_entry_safe(dest, nxt, &svc->destinations, n_list) {
1384 __ip_vs_unlink_dest(svc, dest, 0);
1385 __ip_vs_del_dest(dest);
1386 }
1387
1388 /*
1389 * Update the virtual service counters
1390 */
1391 if (svc->port == FTPPORT)
1392 atomic_dec(&ip_vs_ftpsvc_counter);
1393 else if (svc->port == 0)
1394 atomic_dec(&ip_vs_nullsvc_counter);
1395
1396 /*
1397 * Free the service if nobody refers to it
1398 */
1399 if (atomic_read(&svc->refcnt) == 0)
1400 kfree(svc);
1401
1402 /* decrease the module use count */
1403 ip_vs_use_count_dec();
1404}
1405
1406/*
1407 * Delete a service from the service list
1408 */
1409static int ip_vs_del_service(struct ip_vs_service *svc)
1410{
1411 if (svc == NULL)
1412 return -EEXIST;
1413
1414 /*
1415 * Unhash it from the service table
1416 */
1417 write_lock_bh(&__ip_vs_svc_lock);
1418
1419 ip_vs_svc_unhash(svc);
1420
1421 /*
1422 * Wait until all the svc users go away.
1423 */
1424 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
1425
1426 __ip_vs_del_service(svc);
1427
1428 write_unlock_bh(&__ip_vs_svc_lock);
1429
1430 return 0;
1431}
1432
1433
1434/*
1435 * Flush all the virtual services
1436 */
1437static int ip_vs_flush(void)
1438{
1439 int idx;
1440 struct ip_vs_service *svc, *nxt;
1441
1442 /*
1443 * Flush the service table hashed by <protocol,addr,port>
1444 */
1445 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1446 list_for_each_entry_safe(svc, nxt, &ip_vs_svc_table[idx], s_list) {
1447 write_lock_bh(&__ip_vs_svc_lock);
1448 ip_vs_svc_unhash(svc);
1449 /*
1450 * Wait until all the svc users go away.
1451 */
1452 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1453 __ip_vs_del_service(svc);
1454 write_unlock_bh(&__ip_vs_svc_lock);
1455 }
1456 }
1457
1458 /*
1459 * Flush the service table hashed by fwmark
1460 */
1461 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1462 list_for_each_entry_safe(svc, nxt,
1463 &ip_vs_svc_fwm_table[idx], f_list) {
1464 write_lock_bh(&__ip_vs_svc_lock);
1465 ip_vs_svc_unhash(svc);
1466 /*
1467 * Wait until all the svc users go away.
1468 */
1469 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1470 __ip_vs_del_service(svc);
1471 write_unlock_bh(&__ip_vs_svc_lock);
1472 }
1473 }
1474
1475 return 0;
1476}
1477
1478
1479/*
1480 * Zero counters in a service or all services
1481 */
1482static int ip_vs_zero_service(struct ip_vs_service *svc)
1483{
1484 struct ip_vs_dest *dest;
1485
1486 write_lock_bh(&__ip_vs_svc_lock);
1487 list_for_each_entry(dest, &svc->destinations, n_list) {
1488 ip_vs_zero_stats(&dest->stats);
1489 }
1490 ip_vs_zero_stats(&svc->stats);
1491 write_unlock_bh(&__ip_vs_svc_lock);
1492 return 0;
1493}
1494
1495static int ip_vs_zero_all(void)
1496{
1497 int idx;
1498 struct ip_vs_service *svc;
1499
1500 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1501 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
1502 ip_vs_zero_service(svc);
1503 }
1504 }
1505
1506 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1507 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
1508 ip_vs_zero_service(svc);
1509 }
1510 }
1511
1512 ip_vs_zero_stats(&ip_vs_stats);
1513 return 0;
1514}
1515
1516
1517static int
1518proc_do_defense_mode(ctl_table *table, int write, struct file * filp,
1519 void __user *buffer, size_t *lenp, loff_t *ppos)
1520{
1521 int *valp = table->data;
1522 int val = *valp;
1523 int rc;
1524
1525 rc = proc_dointvec(table, write, filp, buffer, lenp, ppos);
1526 if (write && (*valp != val)) {
1527 if ((*valp < 0) || (*valp > 3)) {
1528 /* Restore the correct value */
1529 *valp = val;
1530 } else {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001531 update_defense_level();
Linus Torvalds1da177e2005-04-16 15:20:36 -07001532 }
1533 }
1534 return rc;
1535}
1536
1537
1538static int
1539proc_do_sync_threshold(ctl_table *table, int write, struct file *filp,
1540 void __user *buffer, size_t *lenp, loff_t *ppos)
1541{
1542 int *valp = table->data;
1543 int val[2];
1544 int rc;
1545
1546 /* backup the value first */
1547 memcpy(val, valp, sizeof(val));
1548
1549 rc = proc_dointvec(table, write, filp, buffer, lenp, ppos);
1550 if (write && (valp[0] < 0 || valp[1] < 0 || valp[0] >= valp[1])) {
1551 /* Restore the correct value */
1552 memcpy(valp, val, sizeof(val));
1553 }
1554 return rc;
1555}
1556
1557
1558/*
1559 * IPVS sysctl table (under the /proc/sys/net/ipv4/vs/)
1560 */
1561
1562static struct ctl_table vs_vars[] = {
1563 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001564 .procname = "amemthresh",
1565 .data = &sysctl_ip_vs_amemthresh,
1566 .maxlen = sizeof(int),
1567 .mode = 0644,
1568 .proc_handler = &proc_dointvec,
1569 },
1570#ifdef CONFIG_IP_VS_DEBUG
1571 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001572 .procname = "debug_level",
1573 .data = &sysctl_ip_vs_debug_level,
1574 .maxlen = sizeof(int),
1575 .mode = 0644,
1576 .proc_handler = &proc_dointvec,
1577 },
1578#endif
1579 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001580 .procname = "am_droprate",
1581 .data = &sysctl_ip_vs_am_droprate,
1582 .maxlen = sizeof(int),
1583 .mode = 0644,
1584 .proc_handler = &proc_dointvec,
1585 },
1586 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001587 .procname = "drop_entry",
1588 .data = &sysctl_ip_vs_drop_entry,
1589 .maxlen = sizeof(int),
1590 .mode = 0644,
1591 .proc_handler = &proc_do_defense_mode,
1592 },
1593 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001594 .procname = "drop_packet",
1595 .data = &sysctl_ip_vs_drop_packet,
1596 .maxlen = sizeof(int),
1597 .mode = 0644,
1598 .proc_handler = &proc_do_defense_mode,
1599 },
1600 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001601 .procname = "secure_tcp",
1602 .data = &sysctl_ip_vs_secure_tcp,
1603 .maxlen = sizeof(int),
1604 .mode = 0644,
1605 .proc_handler = &proc_do_defense_mode,
1606 },
1607#if 0
1608 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001609 .procname = "timeout_established",
1610 .data = &vs_timeout_table_dos.timeout[IP_VS_S_ESTABLISHED],
1611 .maxlen = sizeof(int),
1612 .mode = 0644,
1613 .proc_handler = &proc_dointvec_jiffies,
1614 },
1615 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001616 .procname = "timeout_synsent",
1617 .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_SENT],
1618 .maxlen = sizeof(int),
1619 .mode = 0644,
1620 .proc_handler = &proc_dointvec_jiffies,
1621 },
1622 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001623 .procname = "timeout_synrecv",
1624 .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_RECV],
1625 .maxlen = sizeof(int),
1626 .mode = 0644,
1627 .proc_handler = &proc_dointvec_jiffies,
1628 },
1629 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001630 .procname = "timeout_finwait",
1631 .data = &vs_timeout_table_dos.timeout[IP_VS_S_FIN_WAIT],
1632 .maxlen = sizeof(int),
1633 .mode = 0644,
1634 .proc_handler = &proc_dointvec_jiffies,
1635 },
1636 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001637 .procname = "timeout_timewait",
1638 .data = &vs_timeout_table_dos.timeout[IP_VS_S_TIME_WAIT],
1639 .maxlen = sizeof(int),
1640 .mode = 0644,
1641 .proc_handler = &proc_dointvec_jiffies,
1642 },
1643 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001644 .procname = "timeout_close",
1645 .data = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE],
1646 .maxlen = sizeof(int),
1647 .mode = 0644,
1648 .proc_handler = &proc_dointvec_jiffies,
1649 },
1650 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001651 .procname = "timeout_closewait",
1652 .data = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE_WAIT],
1653 .maxlen = sizeof(int),
1654 .mode = 0644,
1655 .proc_handler = &proc_dointvec_jiffies,
1656 },
1657 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001658 .procname = "timeout_lastack",
1659 .data = &vs_timeout_table_dos.timeout[IP_VS_S_LAST_ACK],
1660 .maxlen = sizeof(int),
1661 .mode = 0644,
1662 .proc_handler = &proc_dointvec_jiffies,
1663 },
1664 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001665 .procname = "timeout_listen",
1666 .data = &vs_timeout_table_dos.timeout[IP_VS_S_LISTEN],
1667 .maxlen = sizeof(int),
1668 .mode = 0644,
1669 .proc_handler = &proc_dointvec_jiffies,
1670 },
1671 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001672 .procname = "timeout_synack",
1673 .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYNACK],
1674 .maxlen = sizeof(int),
1675 .mode = 0644,
1676 .proc_handler = &proc_dointvec_jiffies,
1677 },
1678 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001679 .procname = "timeout_udp",
1680 .data = &vs_timeout_table_dos.timeout[IP_VS_S_UDP],
1681 .maxlen = sizeof(int),
1682 .mode = 0644,
1683 .proc_handler = &proc_dointvec_jiffies,
1684 },
1685 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001686 .procname = "timeout_icmp",
1687 .data = &vs_timeout_table_dos.timeout[IP_VS_S_ICMP],
1688 .maxlen = sizeof(int),
1689 .mode = 0644,
1690 .proc_handler = &proc_dointvec_jiffies,
1691 },
1692#endif
1693 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001694 .procname = "cache_bypass",
1695 .data = &sysctl_ip_vs_cache_bypass,
1696 .maxlen = sizeof(int),
1697 .mode = 0644,
1698 .proc_handler = &proc_dointvec,
1699 },
1700 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001701 .procname = "expire_nodest_conn",
1702 .data = &sysctl_ip_vs_expire_nodest_conn,
1703 .maxlen = sizeof(int),
1704 .mode = 0644,
1705 .proc_handler = &proc_dointvec,
1706 },
1707 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001708 .procname = "expire_quiescent_template",
1709 .data = &sysctl_ip_vs_expire_quiescent_template,
1710 .maxlen = sizeof(int),
1711 .mode = 0644,
1712 .proc_handler = &proc_dointvec,
1713 },
1714 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001715 .procname = "sync_threshold",
1716 .data = &sysctl_ip_vs_sync_threshold,
1717 .maxlen = sizeof(sysctl_ip_vs_sync_threshold),
1718 .mode = 0644,
1719 .proc_handler = &proc_do_sync_threshold,
1720 },
1721 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001722 .procname = "nat_icmp_send",
1723 .data = &sysctl_ip_vs_nat_icmp_send,
1724 .maxlen = sizeof(int),
1725 .mode = 0644,
1726 .proc_handler = &proc_dointvec,
1727 },
1728 { .ctl_name = 0 }
1729};
1730
Sven Wegener5587da52008-08-10 18:24:40 +00001731const struct ctl_path net_vs_ctl_path[] = {
Pavel Emelyanov90754f82008-01-12 02:33:50 -08001732 { .procname = "net", .ctl_name = CTL_NET, },
1733 { .procname = "ipv4", .ctl_name = NET_IPV4, },
1734 { .procname = "vs", },
1735 { }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001736};
Pavel Emelyanov90754f82008-01-12 02:33:50 -08001737EXPORT_SYMBOL_GPL(net_vs_ctl_path);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001738
1739static struct ctl_table_header * sysctl_header;
1740
1741#ifdef CONFIG_PROC_FS
1742
1743struct ip_vs_iter {
1744 struct list_head *table;
1745 int bucket;
1746};
1747
1748/*
1749 * Write the contents of the VS rule table to a PROCfs file.
1750 * (It is kept just for backward compatibility)
1751 */
1752static inline const char *ip_vs_fwd_name(unsigned flags)
1753{
1754 switch (flags & IP_VS_CONN_F_FWD_MASK) {
1755 case IP_VS_CONN_F_LOCALNODE:
1756 return "Local";
1757 case IP_VS_CONN_F_TUNNEL:
1758 return "Tunnel";
1759 case IP_VS_CONN_F_DROUTE:
1760 return "Route";
1761 default:
1762 return "Masq";
1763 }
1764}
1765
1766
1767/* Get the Nth entry in the two lists */
1768static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos)
1769{
1770 struct ip_vs_iter *iter = seq->private;
1771 int idx;
1772 struct ip_vs_service *svc;
1773
1774 /* look in hash by protocol */
1775 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1776 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
1777 if (pos-- == 0){
1778 iter->table = ip_vs_svc_table;
1779 iter->bucket = idx;
1780 return svc;
1781 }
1782 }
1783 }
1784
1785 /* keep looking in fwmark */
1786 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1787 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
1788 if (pos-- == 0) {
1789 iter->table = ip_vs_svc_fwm_table;
1790 iter->bucket = idx;
1791 return svc;
1792 }
1793 }
1794 }
1795
1796 return NULL;
1797}
1798
1799static void *ip_vs_info_seq_start(struct seq_file *seq, loff_t *pos)
1800{
1801
1802 read_lock_bh(&__ip_vs_svc_lock);
1803 return *pos ? ip_vs_info_array(seq, *pos - 1) : SEQ_START_TOKEN;
1804}
1805
1806
1807static void *ip_vs_info_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1808{
1809 struct list_head *e;
1810 struct ip_vs_iter *iter;
1811 struct ip_vs_service *svc;
1812
1813 ++*pos;
1814 if (v == SEQ_START_TOKEN)
1815 return ip_vs_info_array(seq,0);
1816
1817 svc = v;
1818 iter = seq->private;
1819
1820 if (iter->table == ip_vs_svc_table) {
1821 /* next service in table hashed by protocol */
1822 if ((e = svc->s_list.next) != &ip_vs_svc_table[iter->bucket])
1823 return list_entry(e, struct ip_vs_service, s_list);
1824
1825
1826 while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
1827 list_for_each_entry(svc,&ip_vs_svc_table[iter->bucket],
1828 s_list) {
1829 return svc;
1830 }
1831 }
1832
1833 iter->table = ip_vs_svc_fwm_table;
1834 iter->bucket = -1;
1835 goto scan_fwmark;
1836 }
1837
1838 /* next service in hashed by fwmark */
1839 if ((e = svc->f_list.next) != &ip_vs_svc_fwm_table[iter->bucket])
1840 return list_entry(e, struct ip_vs_service, f_list);
1841
1842 scan_fwmark:
1843 while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
1844 list_for_each_entry(svc, &ip_vs_svc_fwm_table[iter->bucket],
1845 f_list)
1846 return svc;
1847 }
1848
1849 return NULL;
1850}
1851
1852static void ip_vs_info_seq_stop(struct seq_file *seq, void *v)
1853{
1854 read_unlock_bh(&__ip_vs_svc_lock);
1855}
1856
1857
1858static int ip_vs_info_seq_show(struct seq_file *seq, void *v)
1859{
1860 if (v == SEQ_START_TOKEN) {
1861 seq_printf(seq,
1862 "IP Virtual Server version %d.%d.%d (size=%d)\n",
1863 NVERSION(IP_VS_VERSION_CODE), IP_VS_CONN_TAB_SIZE);
1864 seq_puts(seq,
1865 "Prot LocalAddress:Port Scheduler Flags\n");
1866 seq_puts(seq,
1867 " -> RemoteAddress:Port Forward Weight ActiveConn InActConn\n");
1868 } else {
1869 const struct ip_vs_service *svc = v;
1870 const struct ip_vs_iter *iter = seq->private;
1871 const struct ip_vs_dest *dest;
1872
Vince Busam667a5f12008-09-02 15:55:49 +02001873 if (iter->table == ip_vs_svc_table) {
1874#ifdef CONFIG_IP_VS_IPV6
1875 if (svc->af == AF_INET6)
1876 seq_printf(seq, "%s [" NIP6_FMT "]:%04X %s ",
1877 ip_vs_proto_name(svc->protocol),
1878 NIP6(svc->addr.in6),
1879 ntohs(svc->port),
1880 svc->scheduler->name);
1881 else
1882#endif
1883 seq_printf(seq, "%s %08X:%04X %s ",
1884 ip_vs_proto_name(svc->protocol),
1885 ntohl(svc->addr.ip),
1886 ntohs(svc->port),
1887 svc->scheduler->name);
1888 } else {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001889 seq_printf(seq, "FWM %08X %s ",
1890 svc->fwmark, svc->scheduler->name);
Vince Busam667a5f12008-09-02 15:55:49 +02001891 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001892
1893 if (svc->flags & IP_VS_SVC_F_PERSISTENT)
1894 seq_printf(seq, "persistent %d %08X\n",
1895 svc->timeout,
1896 ntohl(svc->netmask));
1897 else
1898 seq_putc(seq, '\n');
1899
1900 list_for_each_entry(dest, &svc->destinations, n_list) {
Vince Busam667a5f12008-09-02 15:55:49 +02001901#ifdef CONFIG_IP_VS_IPV6
1902 if (dest->af == AF_INET6)
1903 seq_printf(seq,
1904 " -> [" NIP6_FMT "]:%04X"
1905 " %-7s %-6d %-10d %-10d\n",
1906 NIP6(dest->addr.in6),
1907 ntohs(dest->port),
1908 ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
1909 atomic_read(&dest->weight),
1910 atomic_read(&dest->activeconns),
1911 atomic_read(&dest->inactconns));
1912 else
1913#endif
1914 seq_printf(seq,
1915 " -> %08X:%04X "
1916 "%-7s %-6d %-10d %-10d\n",
1917 ntohl(dest->addr.ip),
1918 ntohs(dest->port),
1919 ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
1920 atomic_read(&dest->weight),
1921 atomic_read(&dest->activeconns),
1922 atomic_read(&dest->inactconns));
1923
Linus Torvalds1da177e2005-04-16 15:20:36 -07001924 }
1925 }
1926 return 0;
1927}
1928
Philippe De Muyter56b3d972007-07-10 23:07:31 -07001929static const struct seq_operations ip_vs_info_seq_ops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001930 .start = ip_vs_info_seq_start,
1931 .next = ip_vs_info_seq_next,
1932 .stop = ip_vs_info_seq_stop,
1933 .show = ip_vs_info_seq_show,
1934};
1935
1936static int ip_vs_info_open(struct inode *inode, struct file *file)
1937{
Pavel Emelyanovcf7732e2007-10-10 02:29:29 -07001938 return seq_open_private(file, &ip_vs_info_seq_ops,
1939 sizeof(struct ip_vs_iter));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001940}
1941
Arjan van de Ven9a321442007-02-12 00:55:35 -08001942static const struct file_operations ip_vs_info_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001943 .owner = THIS_MODULE,
1944 .open = ip_vs_info_open,
1945 .read = seq_read,
1946 .llseek = seq_lseek,
1947 .release = seq_release_private,
1948};
1949
1950#endif
1951
Sven Wegener519e49e2008-08-10 18:24:41 +00001952struct ip_vs_stats ip_vs_stats = {
1953 .lock = __SPIN_LOCK_UNLOCKED(ip_vs_stats.lock),
1954};
Linus Torvalds1da177e2005-04-16 15:20:36 -07001955
1956#ifdef CONFIG_PROC_FS
1957static int ip_vs_stats_show(struct seq_file *seq, void *v)
1958{
1959
1960/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
1961 seq_puts(seq,
1962 " Total Incoming Outgoing Incoming Outgoing\n");
1963 seq_printf(seq,
1964 " Conns Packets Packets Bytes Bytes\n");
1965
1966 spin_lock_bh(&ip_vs_stats.lock);
1967 seq_printf(seq, "%8X %8X %8X %16LX %16LX\n\n", ip_vs_stats.conns,
1968 ip_vs_stats.inpkts, ip_vs_stats.outpkts,
1969 (unsigned long long) ip_vs_stats.inbytes,
1970 (unsigned long long) ip_vs_stats.outbytes);
1971
1972/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
1973 seq_puts(seq,
1974 " Conns/s Pkts/s Pkts/s Bytes/s Bytes/s\n");
1975 seq_printf(seq,"%8X %8X %8X %16X %16X\n",
1976 ip_vs_stats.cps,
1977 ip_vs_stats.inpps,
1978 ip_vs_stats.outpps,
1979 ip_vs_stats.inbps,
1980 ip_vs_stats.outbps);
1981 spin_unlock_bh(&ip_vs_stats.lock);
1982
1983 return 0;
1984}
1985
1986static int ip_vs_stats_seq_open(struct inode *inode, struct file *file)
1987{
1988 return single_open(file, ip_vs_stats_show, NULL);
1989}
1990
Arjan van de Ven9a321442007-02-12 00:55:35 -08001991static const struct file_operations ip_vs_stats_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001992 .owner = THIS_MODULE,
1993 .open = ip_vs_stats_seq_open,
1994 .read = seq_read,
1995 .llseek = seq_lseek,
1996 .release = single_release,
1997};
1998
1999#endif
2000
2001/*
2002 * Set timeout values for tcp tcpfin udp in the timeout_table.
2003 */
2004static int ip_vs_set_timeout(struct ip_vs_timeout_user *u)
2005{
2006 IP_VS_DBG(2, "Setting timeout tcp:%d tcpfin:%d udp:%d\n",
2007 u->tcp_timeout,
2008 u->tcp_fin_timeout,
2009 u->udp_timeout);
2010
2011#ifdef CONFIG_IP_VS_PROTO_TCP
2012 if (u->tcp_timeout) {
2013 ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_ESTABLISHED]
2014 = u->tcp_timeout * HZ;
2015 }
2016
2017 if (u->tcp_fin_timeout) {
2018 ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_FIN_WAIT]
2019 = u->tcp_fin_timeout * HZ;
2020 }
2021#endif
2022
2023#ifdef CONFIG_IP_VS_PROTO_UDP
2024 if (u->udp_timeout) {
2025 ip_vs_protocol_udp.timeout_table[IP_VS_UDP_S_NORMAL]
2026 = u->udp_timeout * HZ;
2027 }
2028#endif
2029 return 0;
2030}
2031
2032
2033#define SET_CMDID(cmd) (cmd - IP_VS_BASE_CTL)
2034#define SERVICE_ARG_LEN (sizeof(struct ip_vs_service_user))
2035#define SVCDEST_ARG_LEN (sizeof(struct ip_vs_service_user) + \
2036 sizeof(struct ip_vs_dest_user))
2037#define TIMEOUT_ARG_LEN (sizeof(struct ip_vs_timeout_user))
2038#define DAEMON_ARG_LEN (sizeof(struct ip_vs_daemon_user))
2039#define MAX_ARG_LEN SVCDEST_ARG_LEN
2040
Arjan van de Ven9b5b5cf2005-11-29 16:21:38 -08002041static const unsigned char set_arglen[SET_CMDID(IP_VS_SO_SET_MAX)+1] = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002042 [SET_CMDID(IP_VS_SO_SET_ADD)] = SERVICE_ARG_LEN,
2043 [SET_CMDID(IP_VS_SO_SET_EDIT)] = SERVICE_ARG_LEN,
2044 [SET_CMDID(IP_VS_SO_SET_DEL)] = SERVICE_ARG_LEN,
2045 [SET_CMDID(IP_VS_SO_SET_FLUSH)] = 0,
2046 [SET_CMDID(IP_VS_SO_SET_ADDDEST)] = SVCDEST_ARG_LEN,
2047 [SET_CMDID(IP_VS_SO_SET_DELDEST)] = SVCDEST_ARG_LEN,
2048 [SET_CMDID(IP_VS_SO_SET_EDITDEST)] = SVCDEST_ARG_LEN,
2049 [SET_CMDID(IP_VS_SO_SET_TIMEOUT)] = TIMEOUT_ARG_LEN,
2050 [SET_CMDID(IP_VS_SO_SET_STARTDAEMON)] = DAEMON_ARG_LEN,
2051 [SET_CMDID(IP_VS_SO_SET_STOPDAEMON)] = DAEMON_ARG_LEN,
2052 [SET_CMDID(IP_VS_SO_SET_ZERO)] = SERVICE_ARG_LEN,
2053};
2054
Julius Volzc860c6b2008-09-02 15:55:36 +02002055static void ip_vs_copy_usvc_compat(struct ip_vs_service_user_kern *usvc,
2056 struct ip_vs_service_user *usvc_compat)
2057{
2058 usvc->af = AF_INET;
2059 usvc->protocol = usvc_compat->protocol;
2060 usvc->addr.ip = usvc_compat->addr;
2061 usvc->port = usvc_compat->port;
2062 usvc->fwmark = usvc_compat->fwmark;
2063
2064 /* Deep copy of sched_name is not needed here */
2065 usvc->sched_name = usvc_compat->sched_name;
2066
2067 usvc->flags = usvc_compat->flags;
2068 usvc->timeout = usvc_compat->timeout;
2069 usvc->netmask = usvc_compat->netmask;
2070}
2071
2072static void ip_vs_copy_udest_compat(struct ip_vs_dest_user_kern *udest,
2073 struct ip_vs_dest_user *udest_compat)
2074{
2075 udest->addr.ip = udest_compat->addr;
2076 udest->port = udest_compat->port;
2077 udest->conn_flags = udest_compat->conn_flags;
2078 udest->weight = udest_compat->weight;
2079 udest->u_threshold = udest_compat->u_threshold;
2080 udest->l_threshold = udest_compat->l_threshold;
2081}
2082
Linus Torvalds1da177e2005-04-16 15:20:36 -07002083static int
2084do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
2085{
2086 int ret;
2087 unsigned char arg[MAX_ARG_LEN];
Julius Volzc860c6b2008-09-02 15:55:36 +02002088 struct ip_vs_service_user *usvc_compat;
2089 struct ip_vs_service_user_kern usvc;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002090 struct ip_vs_service *svc;
Julius Volzc860c6b2008-09-02 15:55:36 +02002091 struct ip_vs_dest_user *udest_compat;
2092 struct ip_vs_dest_user_kern udest;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002093
2094 if (!capable(CAP_NET_ADMIN))
2095 return -EPERM;
2096
2097 if (len != set_arglen[SET_CMDID(cmd)]) {
2098 IP_VS_ERR("set_ctl: len %u != %u\n",
2099 len, set_arglen[SET_CMDID(cmd)]);
2100 return -EINVAL;
2101 }
2102
2103 if (copy_from_user(arg, user, len) != 0)
2104 return -EFAULT;
2105
2106 /* increase the module use count */
2107 ip_vs_use_count_inc();
2108
Ingo Molnar14cc3e22006-03-26 01:37:14 -08002109 if (mutex_lock_interruptible(&__ip_vs_mutex)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002110 ret = -ERESTARTSYS;
2111 goto out_dec;
2112 }
2113
2114 if (cmd == IP_VS_SO_SET_FLUSH) {
2115 /* Flush the virtual service */
2116 ret = ip_vs_flush();
2117 goto out_unlock;
2118 } else if (cmd == IP_VS_SO_SET_TIMEOUT) {
2119 /* Set timeout values for (tcp tcpfin udp) */
2120 ret = ip_vs_set_timeout((struct ip_vs_timeout_user *)arg);
2121 goto out_unlock;
2122 } else if (cmd == IP_VS_SO_SET_STARTDAEMON) {
2123 struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
2124 ret = start_sync_thread(dm->state, dm->mcast_ifn, dm->syncid);
2125 goto out_unlock;
2126 } else if (cmd == IP_VS_SO_SET_STOPDAEMON) {
2127 struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
2128 ret = stop_sync_thread(dm->state);
2129 goto out_unlock;
2130 }
2131
Julius Volzc860c6b2008-09-02 15:55:36 +02002132 usvc_compat = (struct ip_vs_service_user *)arg;
2133 udest_compat = (struct ip_vs_dest_user *)(usvc_compat + 1);
2134
2135 /* We only use the new structs internally, so copy userspace compat
2136 * structs to extended internal versions */
2137 ip_vs_copy_usvc_compat(&usvc, usvc_compat);
2138 ip_vs_copy_udest_compat(&udest, udest_compat);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002139
2140 if (cmd == IP_VS_SO_SET_ZERO) {
2141 /* if no service address is set, zero counters in all */
Julius Volzc860c6b2008-09-02 15:55:36 +02002142 if (!usvc.fwmark && !usvc.addr.ip && !usvc.port) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002143 ret = ip_vs_zero_all();
2144 goto out_unlock;
2145 }
2146 }
2147
2148 /* Check for valid protocol: TCP or UDP, even for fwmark!=0 */
Julius Volzc860c6b2008-09-02 15:55:36 +02002149 if (usvc.protocol != IPPROTO_TCP && usvc.protocol != IPPROTO_UDP) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002150 IP_VS_ERR("set_ctl: invalid protocol: %d %d.%d.%d.%d:%d %s\n",
Julius Volzc860c6b2008-09-02 15:55:36 +02002151 usvc.protocol, NIPQUAD(usvc.addr.ip),
2152 ntohs(usvc.port), usvc.sched_name);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002153 ret = -EFAULT;
2154 goto out_unlock;
2155 }
2156
2157 /* Lookup the exact service by <protocol, addr, port> or fwmark */
Julius Volzc860c6b2008-09-02 15:55:36 +02002158 if (usvc.fwmark == 0)
Julius Volzb18610d2008-09-02 15:55:37 +02002159 svc = __ip_vs_service_get(usvc.af, usvc.protocol,
2160 &usvc.addr, usvc.port);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002161 else
Julius Volzb18610d2008-09-02 15:55:37 +02002162 svc = __ip_vs_svc_fwm_get(usvc.af, usvc.fwmark);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002163
2164 if (cmd != IP_VS_SO_SET_ADD
Julius Volzc860c6b2008-09-02 15:55:36 +02002165 && (svc == NULL || svc->protocol != usvc.protocol)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002166 ret = -ESRCH;
2167 goto out_unlock;
2168 }
2169
2170 switch (cmd) {
2171 case IP_VS_SO_SET_ADD:
2172 if (svc != NULL)
2173 ret = -EEXIST;
2174 else
Julius Volzc860c6b2008-09-02 15:55:36 +02002175 ret = ip_vs_add_service(&usvc, &svc);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002176 break;
2177 case IP_VS_SO_SET_EDIT:
Julius Volzc860c6b2008-09-02 15:55:36 +02002178 ret = ip_vs_edit_service(svc, &usvc);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002179 break;
2180 case IP_VS_SO_SET_DEL:
2181 ret = ip_vs_del_service(svc);
2182 if (!ret)
2183 goto out_unlock;
2184 break;
2185 case IP_VS_SO_SET_ZERO:
2186 ret = ip_vs_zero_service(svc);
2187 break;
2188 case IP_VS_SO_SET_ADDDEST:
Julius Volzc860c6b2008-09-02 15:55:36 +02002189 ret = ip_vs_add_dest(svc, &udest);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002190 break;
2191 case IP_VS_SO_SET_EDITDEST:
Julius Volzc860c6b2008-09-02 15:55:36 +02002192 ret = ip_vs_edit_dest(svc, &udest);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002193 break;
2194 case IP_VS_SO_SET_DELDEST:
Julius Volzc860c6b2008-09-02 15:55:36 +02002195 ret = ip_vs_del_dest(svc, &udest);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002196 break;
2197 default:
2198 ret = -EINVAL;
2199 }
2200
2201 if (svc)
2202 ip_vs_service_put(svc);
2203
2204 out_unlock:
Ingo Molnar14cc3e22006-03-26 01:37:14 -08002205 mutex_unlock(&__ip_vs_mutex);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002206 out_dec:
2207 /* decrease the module use count */
2208 ip_vs_use_count_dec();
2209
2210 return ret;
2211}
2212
2213
2214static void
2215ip_vs_copy_stats(struct ip_vs_stats_user *dst, struct ip_vs_stats *src)
2216{
2217 spin_lock_bh(&src->lock);
2218 memcpy(dst, src, (char*)&src->lock - (char*)src);
2219 spin_unlock_bh(&src->lock);
2220}
2221
2222static void
2223ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src)
2224{
2225 dst->protocol = src->protocol;
Julius Volze7ade462008-09-02 15:55:33 +02002226 dst->addr = src->addr.ip;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002227 dst->port = src->port;
2228 dst->fwmark = src->fwmark;
pageexec4da62fc2005-06-26 16:00:19 -07002229 strlcpy(dst->sched_name, src->scheduler->name, sizeof(dst->sched_name));
Linus Torvalds1da177e2005-04-16 15:20:36 -07002230 dst->flags = src->flags;
2231 dst->timeout = src->timeout / HZ;
2232 dst->netmask = src->netmask;
2233 dst->num_dests = src->num_dests;
2234 ip_vs_copy_stats(&dst->stats, &src->stats);
2235}
2236
2237static inline int
2238__ip_vs_get_service_entries(const struct ip_vs_get_services *get,
2239 struct ip_vs_get_services __user *uptr)
2240{
2241 int idx, count=0;
2242 struct ip_vs_service *svc;
2243 struct ip_vs_service_entry entry;
2244 int ret = 0;
2245
2246 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2247 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
Julius Volzf94fd042008-09-02 15:55:55 +02002248 /* Only expose IPv4 entries to old interface */
2249 if (svc->af != AF_INET)
2250 continue;
2251
Linus Torvalds1da177e2005-04-16 15:20:36 -07002252 if (count >= get->num_services)
2253 goto out;
pageexec4da62fc2005-06-26 16:00:19 -07002254 memset(&entry, 0, sizeof(entry));
Linus Torvalds1da177e2005-04-16 15:20:36 -07002255 ip_vs_copy_service(&entry, svc);
2256 if (copy_to_user(&uptr->entrytable[count],
2257 &entry, sizeof(entry))) {
2258 ret = -EFAULT;
2259 goto out;
2260 }
2261 count++;
2262 }
2263 }
2264
2265 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2266 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
Julius Volzf94fd042008-09-02 15:55:55 +02002267 /* Only expose IPv4 entries to old interface */
2268 if (svc->af != AF_INET)
2269 continue;
2270
Linus Torvalds1da177e2005-04-16 15:20:36 -07002271 if (count >= get->num_services)
2272 goto out;
pageexec4da62fc2005-06-26 16:00:19 -07002273 memset(&entry, 0, sizeof(entry));
Linus Torvalds1da177e2005-04-16 15:20:36 -07002274 ip_vs_copy_service(&entry, svc);
2275 if (copy_to_user(&uptr->entrytable[count],
2276 &entry, sizeof(entry))) {
2277 ret = -EFAULT;
2278 goto out;
2279 }
2280 count++;
2281 }
2282 }
2283 out:
2284 return ret;
2285}
2286
2287static inline int
2288__ip_vs_get_dest_entries(const struct ip_vs_get_dests *get,
2289 struct ip_vs_get_dests __user *uptr)
2290{
2291 struct ip_vs_service *svc;
Julius Volzb18610d2008-09-02 15:55:37 +02002292 union nf_inet_addr addr = { .ip = get->addr };
Linus Torvalds1da177e2005-04-16 15:20:36 -07002293 int ret = 0;
2294
2295 if (get->fwmark)
Julius Volzb18610d2008-09-02 15:55:37 +02002296 svc = __ip_vs_svc_fwm_get(AF_INET, get->fwmark);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002297 else
Julius Volzb18610d2008-09-02 15:55:37 +02002298 svc = __ip_vs_service_get(AF_INET, get->protocol, &addr,
2299 get->port);
2300
Linus Torvalds1da177e2005-04-16 15:20:36 -07002301 if (svc) {
2302 int count = 0;
2303 struct ip_vs_dest *dest;
2304 struct ip_vs_dest_entry entry;
2305
2306 list_for_each_entry(dest, &svc->destinations, n_list) {
2307 if (count >= get->num_dests)
2308 break;
2309
Julius Volze7ade462008-09-02 15:55:33 +02002310 entry.addr = dest->addr.ip;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002311 entry.port = dest->port;
2312 entry.conn_flags = atomic_read(&dest->conn_flags);
2313 entry.weight = atomic_read(&dest->weight);
2314 entry.u_threshold = dest->u_threshold;
2315 entry.l_threshold = dest->l_threshold;
2316 entry.activeconns = atomic_read(&dest->activeconns);
2317 entry.inactconns = atomic_read(&dest->inactconns);
2318 entry.persistconns = atomic_read(&dest->persistconns);
2319 ip_vs_copy_stats(&entry.stats, &dest->stats);
2320 if (copy_to_user(&uptr->entrytable[count],
2321 &entry, sizeof(entry))) {
2322 ret = -EFAULT;
2323 break;
2324 }
2325 count++;
2326 }
2327 ip_vs_service_put(svc);
2328 } else
2329 ret = -ESRCH;
2330 return ret;
2331}
2332
2333static inline void
2334__ip_vs_get_timeouts(struct ip_vs_timeout_user *u)
2335{
2336#ifdef CONFIG_IP_VS_PROTO_TCP
2337 u->tcp_timeout =
2338 ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_ESTABLISHED] / HZ;
2339 u->tcp_fin_timeout =
2340 ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_FIN_WAIT] / HZ;
2341#endif
2342#ifdef CONFIG_IP_VS_PROTO_UDP
2343 u->udp_timeout =
2344 ip_vs_protocol_udp.timeout_table[IP_VS_UDP_S_NORMAL] / HZ;
2345#endif
2346}
2347
2348
2349#define GET_CMDID(cmd) (cmd - IP_VS_BASE_CTL)
2350#define GET_INFO_ARG_LEN (sizeof(struct ip_vs_getinfo))
2351#define GET_SERVICES_ARG_LEN (sizeof(struct ip_vs_get_services))
2352#define GET_SERVICE_ARG_LEN (sizeof(struct ip_vs_service_entry))
2353#define GET_DESTS_ARG_LEN (sizeof(struct ip_vs_get_dests))
2354#define GET_TIMEOUT_ARG_LEN (sizeof(struct ip_vs_timeout_user))
2355#define GET_DAEMON_ARG_LEN (sizeof(struct ip_vs_daemon_user) * 2)
2356
Arjan van de Ven9b5b5cf2005-11-29 16:21:38 -08002357static const unsigned char get_arglen[GET_CMDID(IP_VS_SO_GET_MAX)+1] = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002358 [GET_CMDID(IP_VS_SO_GET_VERSION)] = 64,
2359 [GET_CMDID(IP_VS_SO_GET_INFO)] = GET_INFO_ARG_LEN,
2360 [GET_CMDID(IP_VS_SO_GET_SERVICES)] = GET_SERVICES_ARG_LEN,
2361 [GET_CMDID(IP_VS_SO_GET_SERVICE)] = GET_SERVICE_ARG_LEN,
2362 [GET_CMDID(IP_VS_SO_GET_DESTS)] = GET_DESTS_ARG_LEN,
2363 [GET_CMDID(IP_VS_SO_GET_TIMEOUT)] = GET_TIMEOUT_ARG_LEN,
2364 [GET_CMDID(IP_VS_SO_GET_DAEMON)] = GET_DAEMON_ARG_LEN,
2365};
2366
2367static int
2368do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
2369{
2370 unsigned char arg[128];
2371 int ret = 0;
2372
2373 if (!capable(CAP_NET_ADMIN))
2374 return -EPERM;
2375
2376 if (*len < get_arglen[GET_CMDID(cmd)]) {
2377 IP_VS_ERR("get_ctl: len %u < %u\n",
2378 *len, get_arglen[GET_CMDID(cmd)]);
2379 return -EINVAL;
2380 }
2381
2382 if (copy_from_user(arg, user, get_arglen[GET_CMDID(cmd)]) != 0)
2383 return -EFAULT;
2384
Ingo Molnar14cc3e22006-03-26 01:37:14 -08002385 if (mutex_lock_interruptible(&__ip_vs_mutex))
Linus Torvalds1da177e2005-04-16 15:20:36 -07002386 return -ERESTARTSYS;
2387
2388 switch (cmd) {
2389 case IP_VS_SO_GET_VERSION:
2390 {
2391 char buf[64];
2392
2393 sprintf(buf, "IP Virtual Server version %d.%d.%d (size=%d)",
2394 NVERSION(IP_VS_VERSION_CODE), IP_VS_CONN_TAB_SIZE);
2395 if (copy_to_user(user, buf, strlen(buf)+1) != 0) {
2396 ret = -EFAULT;
2397 goto out;
2398 }
2399 *len = strlen(buf)+1;
2400 }
2401 break;
2402
2403 case IP_VS_SO_GET_INFO:
2404 {
2405 struct ip_vs_getinfo info;
2406 info.version = IP_VS_VERSION_CODE;
2407 info.size = IP_VS_CONN_TAB_SIZE;
2408 info.num_services = ip_vs_num_services;
2409 if (copy_to_user(user, &info, sizeof(info)) != 0)
2410 ret = -EFAULT;
2411 }
2412 break;
2413
2414 case IP_VS_SO_GET_SERVICES:
2415 {
2416 struct ip_vs_get_services *get;
2417 int size;
2418
2419 get = (struct ip_vs_get_services *)arg;
2420 size = sizeof(*get) +
2421 sizeof(struct ip_vs_service_entry) * get->num_services;
2422 if (*len != size) {
2423 IP_VS_ERR("length: %u != %u\n", *len, size);
2424 ret = -EINVAL;
2425 goto out;
2426 }
2427 ret = __ip_vs_get_service_entries(get, user);
2428 }
2429 break;
2430
2431 case IP_VS_SO_GET_SERVICE:
2432 {
2433 struct ip_vs_service_entry *entry;
2434 struct ip_vs_service *svc;
Julius Volzb18610d2008-09-02 15:55:37 +02002435 union nf_inet_addr addr;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002436
2437 entry = (struct ip_vs_service_entry *)arg;
Julius Volzb18610d2008-09-02 15:55:37 +02002438 addr.ip = entry->addr;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002439 if (entry->fwmark)
Julius Volzb18610d2008-09-02 15:55:37 +02002440 svc = __ip_vs_svc_fwm_get(AF_INET, entry->fwmark);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002441 else
Julius Volzb18610d2008-09-02 15:55:37 +02002442 svc = __ip_vs_service_get(AF_INET, entry->protocol,
2443 &addr, entry->port);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002444 if (svc) {
2445 ip_vs_copy_service(entry, svc);
2446 if (copy_to_user(user, entry, sizeof(*entry)) != 0)
2447 ret = -EFAULT;
2448 ip_vs_service_put(svc);
2449 } else
2450 ret = -ESRCH;
2451 }
2452 break;
2453
2454 case IP_VS_SO_GET_DESTS:
2455 {
2456 struct ip_vs_get_dests *get;
2457 int size;
2458
2459 get = (struct ip_vs_get_dests *)arg;
2460 size = sizeof(*get) +
2461 sizeof(struct ip_vs_dest_entry) * get->num_dests;
2462 if (*len != size) {
2463 IP_VS_ERR("length: %u != %u\n", *len, size);
2464 ret = -EINVAL;
2465 goto out;
2466 }
2467 ret = __ip_vs_get_dest_entries(get, user);
2468 }
2469 break;
2470
2471 case IP_VS_SO_GET_TIMEOUT:
2472 {
2473 struct ip_vs_timeout_user t;
2474
2475 __ip_vs_get_timeouts(&t);
2476 if (copy_to_user(user, &t, sizeof(t)) != 0)
2477 ret = -EFAULT;
2478 }
2479 break;
2480
2481 case IP_VS_SO_GET_DAEMON:
2482 {
2483 struct ip_vs_daemon_user d[2];
2484
2485 memset(&d, 0, sizeof(d));
2486 if (ip_vs_sync_state & IP_VS_STATE_MASTER) {
2487 d[0].state = IP_VS_STATE_MASTER;
pageexec4da62fc2005-06-26 16:00:19 -07002488 strlcpy(d[0].mcast_ifn, ip_vs_master_mcast_ifn, sizeof(d[0].mcast_ifn));
Linus Torvalds1da177e2005-04-16 15:20:36 -07002489 d[0].syncid = ip_vs_master_syncid;
2490 }
2491 if (ip_vs_sync_state & IP_VS_STATE_BACKUP) {
2492 d[1].state = IP_VS_STATE_BACKUP;
pageexec4da62fc2005-06-26 16:00:19 -07002493 strlcpy(d[1].mcast_ifn, ip_vs_backup_mcast_ifn, sizeof(d[1].mcast_ifn));
Linus Torvalds1da177e2005-04-16 15:20:36 -07002494 d[1].syncid = ip_vs_backup_syncid;
2495 }
2496 if (copy_to_user(user, &d, sizeof(d)) != 0)
2497 ret = -EFAULT;
2498 }
2499 break;
2500
2501 default:
2502 ret = -EINVAL;
2503 }
2504
2505 out:
Ingo Molnar14cc3e22006-03-26 01:37:14 -08002506 mutex_unlock(&__ip_vs_mutex);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002507 return ret;
2508}
2509
2510
2511static struct nf_sockopt_ops ip_vs_sockopts = {
2512 .pf = PF_INET,
2513 .set_optmin = IP_VS_BASE_CTL,
2514 .set_optmax = IP_VS_SO_SET_MAX+1,
2515 .set = do_ip_vs_set_ctl,
2516 .get_optmin = IP_VS_BASE_CTL,
2517 .get_optmax = IP_VS_SO_GET_MAX+1,
2518 .get = do_ip_vs_get_ctl,
Neil Horman16fcec32007-09-11 11:28:26 +02002519 .owner = THIS_MODULE,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002520};
2521
Julius Volz9a812192008-08-14 14:08:44 +02002522/*
2523 * Generic Netlink interface
2524 */
2525
2526/* IPVS genetlink family */
2527static struct genl_family ip_vs_genl_family = {
2528 .id = GENL_ID_GENERATE,
2529 .hdrsize = 0,
2530 .name = IPVS_GENL_NAME,
2531 .version = IPVS_GENL_VERSION,
2532 .maxattr = IPVS_CMD_MAX,
2533};
2534
2535/* Policy used for first-level command attributes */
2536static const struct nla_policy ip_vs_cmd_policy[IPVS_CMD_ATTR_MAX + 1] = {
2537 [IPVS_CMD_ATTR_SERVICE] = { .type = NLA_NESTED },
2538 [IPVS_CMD_ATTR_DEST] = { .type = NLA_NESTED },
2539 [IPVS_CMD_ATTR_DAEMON] = { .type = NLA_NESTED },
2540 [IPVS_CMD_ATTR_TIMEOUT_TCP] = { .type = NLA_U32 },
2541 [IPVS_CMD_ATTR_TIMEOUT_TCP_FIN] = { .type = NLA_U32 },
2542 [IPVS_CMD_ATTR_TIMEOUT_UDP] = { .type = NLA_U32 },
2543};
2544
2545/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DAEMON */
2546static const struct nla_policy ip_vs_daemon_policy[IPVS_DAEMON_ATTR_MAX + 1] = {
2547 [IPVS_DAEMON_ATTR_STATE] = { .type = NLA_U32 },
2548 [IPVS_DAEMON_ATTR_MCAST_IFN] = { .type = NLA_NUL_STRING,
2549 .len = IP_VS_IFNAME_MAXLEN },
2550 [IPVS_DAEMON_ATTR_SYNC_ID] = { .type = NLA_U32 },
2551};
2552
2553/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_SERVICE */
2554static const struct nla_policy ip_vs_svc_policy[IPVS_SVC_ATTR_MAX + 1] = {
2555 [IPVS_SVC_ATTR_AF] = { .type = NLA_U16 },
2556 [IPVS_SVC_ATTR_PROTOCOL] = { .type = NLA_U16 },
2557 [IPVS_SVC_ATTR_ADDR] = { .type = NLA_BINARY,
2558 .len = sizeof(union nf_inet_addr) },
2559 [IPVS_SVC_ATTR_PORT] = { .type = NLA_U16 },
2560 [IPVS_SVC_ATTR_FWMARK] = { .type = NLA_U32 },
2561 [IPVS_SVC_ATTR_SCHED_NAME] = { .type = NLA_NUL_STRING,
2562 .len = IP_VS_SCHEDNAME_MAXLEN },
2563 [IPVS_SVC_ATTR_FLAGS] = { .type = NLA_BINARY,
2564 .len = sizeof(struct ip_vs_flags) },
2565 [IPVS_SVC_ATTR_TIMEOUT] = { .type = NLA_U32 },
2566 [IPVS_SVC_ATTR_NETMASK] = { .type = NLA_U32 },
2567 [IPVS_SVC_ATTR_STATS] = { .type = NLA_NESTED },
2568};
2569
2570/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DEST */
2571static const struct nla_policy ip_vs_dest_policy[IPVS_DEST_ATTR_MAX + 1] = {
2572 [IPVS_DEST_ATTR_ADDR] = { .type = NLA_BINARY,
2573 .len = sizeof(union nf_inet_addr) },
2574 [IPVS_DEST_ATTR_PORT] = { .type = NLA_U16 },
2575 [IPVS_DEST_ATTR_FWD_METHOD] = { .type = NLA_U32 },
2576 [IPVS_DEST_ATTR_WEIGHT] = { .type = NLA_U32 },
2577 [IPVS_DEST_ATTR_U_THRESH] = { .type = NLA_U32 },
2578 [IPVS_DEST_ATTR_L_THRESH] = { .type = NLA_U32 },
2579 [IPVS_DEST_ATTR_ACTIVE_CONNS] = { .type = NLA_U32 },
2580 [IPVS_DEST_ATTR_INACT_CONNS] = { .type = NLA_U32 },
2581 [IPVS_DEST_ATTR_PERSIST_CONNS] = { .type = NLA_U32 },
2582 [IPVS_DEST_ATTR_STATS] = { .type = NLA_NESTED },
2583};
2584
2585static int ip_vs_genl_fill_stats(struct sk_buff *skb, int container_type,
2586 struct ip_vs_stats *stats)
2587{
2588 struct nlattr *nl_stats = nla_nest_start(skb, container_type);
2589 if (!nl_stats)
2590 return -EMSGSIZE;
2591
2592 spin_lock_bh(&stats->lock);
2593
2594 NLA_PUT_U32(skb, IPVS_STATS_ATTR_CONNS, stats->conns);
2595 NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPKTS, stats->inpkts);
2596 NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPKTS, stats->outpkts);
2597 NLA_PUT_U64(skb, IPVS_STATS_ATTR_INBYTES, stats->inbytes);
2598 NLA_PUT_U64(skb, IPVS_STATS_ATTR_OUTBYTES, stats->outbytes);
2599 NLA_PUT_U32(skb, IPVS_STATS_ATTR_CPS, stats->cps);
2600 NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPPS, stats->inpps);
2601 NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPPS, stats->outpps);
2602 NLA_PUT_U32(skb, IPVS_STATS_ATTR_INBPS, stats->inbps);
2603 NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTBPS, stats->outbps);
2604
2605 spin_unlock_bh(&stats->lock);
2606
2607 nla_nest_end(skb, nl_stats);
2608
2609 return 0;
2610
2611nla_put_failure:
2612 spin_unlock_bh(&stats->lock);
2613 nla_nest_cancel(skb, nl_stats);
2614 return -EMSGSIZE;
2615}
2616
2617static int ip_vs_genl_fill_service(struct sk_buff *skb,
2618 struct ip_vs_service *svc)
2619{
2620 struct nlattr *nl_service;
2621 struct ip_vs_flags flags = { .flags = svc->flags,
2622 .mask = ~0 };
2623
2624 nl_service = nla_nest_start(skb, IPVS_CMD_ATTR_SERVICE);
2625 if (!nl_service)
2626 return -EMSGSIZE;
2627
Julius Volzf94fd042008-09-02 15:55:55 +02002628 NLA_PUT_U16(skb, IPVS_SVC_ATTR_AF, svc->af);
Julius Volz9a812192008-08-14 14:08:44 +02002629
2630 if (svc->fwmark) {
2631 NLA_PUT_U32(skb, IPVS_SVC_ATTR_FWMARK, svc->fwmark);
2632 } else {
2633 NLA_PUT_U16(skb, IPVS_SVC_ATTR_PROTOCOL, svc->protocol);
2634 NLA_PUT(skb, IPVS_SVC_ATTR_ADDR, sizeof(svc->addr), &svc->addr);
2635 NLA_PUT_U16(skb, IPVS_SVC_ATTR_PORT, svc->port);
2636 }
2637
2638 NLA_PUT_STRING(skb, IPVS_SVC_ATTR_SCHED_NAME, svc->scheduler->name);
2639 NLA_PUT(skb, IPVS_SVC_ATTR_FLAGS, sizeof(flags), &flags);
2640 NLA_PUT_U32(skb, IPVS_SVC_ATTR_TIMEOUT, svc->timeout / HZ);
2641 NLA_PUT_U32(skb, IPVS_SVC_ATTR_NETMASK, svc->netmask);
2642
2643 if (ip_vs_genl_fill_stats(skb, IPVS_SVC_ATTR_STATS, &svc->stats))
2644 goto nla_put_failure;
2645
2646 nla_nest_end(skb, nl_service);
2647
2648 return 0;
2649
2650nla_put_failure:
2651 nla_nest_cancel(skb, nl_service);
2652 return -EMSGSIZE;
2653}
2654
2655static int ip_vs_genl_dump_service(struct sk_buff *skb,
2656 struct ip_vs_service *svc,
2657 struct netlink_callback *cb)
2658{
2659 void *hdr;
2660
2661 hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
2662 &ip_vs_genl_family, NLM_F_MULTI,
2663 IPVS_CMD_NEW_SERVICE);
2664 if (!hdr)
2665 return -EMSGSIZE;
2666
2667 if (ip_vs_genl_fill_service(skb, svc) < 0)
2668 goto nla_put_failure;
2669
2670 return genlmsg_end(skb, hdr);
2671
2672nla_put_failure:
2673 genlmsg_cancel(skb, hdr);
2674 return -EMSGSIZE;
2675}
2676
2677static int ip_vs_genl_dump_services(struct sk_buff *skb,
2678 struct netlink_callback *cb)
2679{
2680 int idx = 0, i;
2681 int start = cb->args[0];
2682 struct ip_vs_service *svc;
2683
2684 mutex_lock(&__ip_vs_mutex);
2685 for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
2686 list_for_each_entry(svc, &ip_vs_svc_table[i], s_list) {
2687 if (++idx <= start)
2688 continue;
2689 if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
2690 idx--;
2691 goto nla_put_failure;
2692 }
2693 }
2694 }
2695
2696 for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
2697 list_for_each_entry(svc, &ip_vs_svc_fwm_table[i], f_list) {
2698 if (++idx <= start)
2699 continue;
2700 if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
2701 idx--;
2702 goto nla_put_failure;
2703 }
2704 }
2705 }
2706
2707nla_put_failure:
2708 mutex_unlock(&__ip_vs_mutex);
2709 cb->args[0] = idx;
2710
2711 return skb->len;
2712}
2713
Julius Volzc860c6b2008-09-02 15:55:36 +02002714static int ip_vs_genl_parse_service(struct ip_vs_service_user_kern *usvc,
Julius Volz9a812192008-08-14 14:08:44 +02002715 struct nlattr *nla, int full_entry)
2716{
2717 struct nlattr *attrs[IPVS_SVC_ATTR_MAX + 1];
2718 struct nlattr *nla_af, *nla_port, *nla_fwmark, *nla_protocol, *nla_addr;
2719
2720 /* Parse mandatory identifying service fields first */
2721 if (nla == NULL ||
2722 nla_parse_nested(attrs, IPVS_SVC_ATTR_MAX, nla, ip_vs_svc_policy))
2723 return -EINVAL;
2724
2725 nla_af = attrs[IPVS_SVC_ATTR_AF];
2726 nla_protocol = attrs[IPVS_SVC_ATTR_PROTOCOL];
2727 nla_addr = attrs[IPVS_SVC_ATTR_ADDR];
2728 nla_port = attrs[IPVS_SVC_ATTR_PORT];
2729 nla_fwmark = attrs[IPVS_SVC_ATTR_FWMARK];
2730
2731 if (!(nla_af && (nla_fwmark || (nla_port && nla_protocol && nla_addr))))
2732 return -EINVAL;
2733
Julius Volzc860c6b2008-09-02 15:55:36 +02002734 usvc->af = nla_get_u16(nla_af);
Julius Volzf94fd042008-09-02 15:55:55 +02002735#ifdef CONFIG_IP_VS_IPV6
2736 if (usvc->af != AF_INET && usvc->af != AF_INET6)
2737#else
2738 if (usvc->af != AF_INET)
2739#endif
Julius Volz9a812192008-08-14 14:08:44 +02002740 return -EAFNOSUPPORT;
2741
2742 if (nla_fwmark) {
2743 usvc->protocol = IPPROTO_TCP;
2744 usvc->fwmark = nla_get_u32(nla_fwmark);
2745 } else {
2746 usvc->protocol = nla_get_u16(nla_protocol);
2747 nla_memcpy(&usvc->addr, nla_addr, sizeof(usvc->addr));
2748 usvc->port = nla_get_u16(nla_port);
2749 usvc->fwmark = 0;
2750 }
2751
2752 /* If a full entry was requested, check for the additional fields */
2753 if (full_entry) {
2754 struct nlattr *nla_sched, *nla_flags, *nla_timeout,
2755 *nla_netmask;
2756 struct ip_vs_flags flags;
2757 struct ip_vs_service *svc;
2758
2759 nla_sched = attrs[IPVS_SVC_ATTR_SCHED_NAME];
2760 nla_flags = attrs[IPVS_SVC_ATTR_FLAGS];
2761 nla_timeout = attrs[IPVS_SVC_ATTR_TIMEOUT];
2762 nla_netmask = attrs[IPVS_SVC_ATTR_NETMASK];
2763
2764 if (!(nla_sched && nla_flags && nla_timeout && nla_netmask))
2765 return -EINVAL;
2766
2767 nla_memcpy(&flags, nla_flags, sizeof(flags));
2768
2769 /* prefill flags from service if it already exists */
2770 if (usvc->fwmark)
Julius Volzb18610d2008-09-02 15:55:37 +02002771 svc = __ip_vs_svc_fwm_get(usvc->af, usvc->fwmark);
Julius Volz9a812192008-08-14 14:08:44 +02002772 else
Julius Volzb18610d2008-09-02 15:55:37 +02002773 svc = __ip_vs_service_get(usvc->af, usvc->protocol,
2774 &usvc->addr, usvc->port);
Julius Volz9a812192008-08-14 14:08:44 +02002775 if (svc) {
2776 usvc->flags = svc->flags;
2777 ip_vs_service_put(svc);
2778 } else
2779 usvc->flags = 0;
2780
2781 /* set new flags from userland */
2782 usvc->flags = (usvc->flags & ~flags.mask) |
2783 (flags.flags & flags.mask);
Julius Volzc860c6b2008-09-02 15:55:36 +02002784 usvc->sched_name = nla_data(nla_sched);
Julius Volz9a812192008-08-14 14:08:44 +02002785 usvc->timeout = nla_get_u32(nla_timeout);
2786 usvc->netmask = nla_get_u32(nla_netmask);
2787 }
2788
2789 return 0;
2790}
2791
2792static struct ip_vs_service *ip_vs_genl_find_service(struct nlattr *nla)
2793{
Julius Volzc860c6b2008-09-02 15:55:36 +02002794 struct ip_vs_service_user_kern usvc;
Julius Volz9a812192008-08-14 14:08:44 +02002795 int ret;
2796
2797 ret = ip_vs_genl_parse_service(&usvc, nla, 0);
2798 if (ret)
2799 return ERR_PTR(ret);
2800
2801 if (usvc.fwmark)
Julius Volzb18610d2008-09-02 15:55:37 +02002802 return __ip_vs_svc_fwm_get(usvc.af, usvc.fwmark);
Julius Volz9a812192008-08-14 14:08:44 +02002803 else
Julius Volzb18610d2008-09-02 15:55:37 +02002804 return __ip_vs_service_get(usvc.af, usvc.protocol,
2805 &usvc.addr, usvc.port);
Julius Volz9a812192008-08-14 14:08:44 +02002806}
2807
2808static int ip_vs_genl_fill_dest(struct sk_buff *skb, struct ip_vs_dest *dest)
2809{
2810 struct nlattr *nl_dest;
2811
2812 nl_dest = nla_nest_start(skb, IPVS_CMD_ATTR_DEST);
2813 if (!nl_dest)
2814 return -EMSGSIZE;
2815
2816 NLA_PUT(skb, IPVS_DEST_ATTR_ADDR, sizeof(dest->addr), &dest->addr);
2817 NLA_PUT_U16(skb, IPVS_DEST_ATTR_PORT, dest->port);
2818
2819 NLA_PUT_U32(skb, IPVS_DEST_ATTR_FWD_METHOD,
2820 atomic_read(&dest->conn_flags) & IP_VS_CONN_F_FWD_MASK);
2821 NLA_PUT_U32(skb, IPVS_DEST_ATTR_WEIGHT, atomic_read(&dest->weight));
2822 NLA_PUT_U32(skb, IPVS_DEST_ATTR_U_THRESH, dest->u_threshold);
2823 NLA_PUT_U32(skb, IPVS_DEST_ATTR_L_THRESH, dest->l_threshold);
2824 NLA_PUT_U32(skb, IPVS_DEST_ATTR_ACTIVE_CONNS,
2825 atomic_read(&dest->activeconns));
2826 NLA_PUT_U32(skb, IPVS_DEST_ATTR_INACT_CONNS,
2827 atomic_read(&dest->inactconns));
2828 NLA_PUT_U32(skb, IPVS_DEST_ATTR_PERSIST_CONNS,
2829 atomic_read(&dest->persistconns));
2830
2831 if (ip_vs_genl_fill_stats(skb, IPVS_DEST_ATTR_STATS, &dest->stats))
2832 goto nla_put_failure;
2833
2834 nla_nest_end(skb, nl_dest);
2835
2836 return 0;
2837
2838nla_put_failure:
2839 nla_nest_cancel(skb, nl_dest);
2840 return -EMSGSIZE;
2841}
2842
2843static int ip_vs_genl_dump_dest(struct sk_buff *skb, struct ip_vs_dest *dest,
2844 struct netlink_callback *cb)
2845{
2846 void *hdr;
2847
2848 hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
2849 &ip_vs_genl_family, NLM_F_MULTI,
2850 IPVS_CMD_NEW_DEST);
2851 if (!hdr)
2852 return -EMSGSIZE;
2853
2854 if (ip_vs_genl_fill_dest(skb, dest) < 0)
2855 goto nla_put_failure;
2856
2857 return genlmsg_end(skb, hdr);
2858
2859nla_put_failure:
2860 genlmsg_cancel(skb, hdr);
2861 return -EMSGSIZE;
2862}
2863
2864static int ip_vs_genl_dump_dests(struct sk_buff *skb,
2865 struct netlink_callback *cb)
2866{
2867 int idx = 0;
2868 int start = cb->args[0];
2869 struct ip_vs_service *svc;
2870 struct ip_vs_dest *dest;
2871 struct nlattr *attrs[IPVS_CMD_ATTR_MAX + 1];
2872
2873 mutex_lock(&__ip_vs_mutex);
2874
2875 /* Try to find the service for which to dump destinations */
2876 if (nlmsg_parse(cb->nlh, GENL_HDRLEN, attrs,
2877 IPVS_CMD_ATTR_MAX, ip_vs_cmd_policy))
2878 goto out_err;
2879
2880 svc = ip_vs_genl_find_service(attrs[IPVS_CMD_ATTR_SERVICE]);
2881 if (IS_ERR(svc) || svc == NULL)
2882 goto out_err;
2883
2884 /* Dump the destinations */
2885 list_for_each_entry(dest, &svc->destinations, n_list) {
2886 if (++idx <= start)
2887 continue;
2888 if (ip_vs_genl_dump_dest(skb, dest, cb) < 0) {
2889 idx--;
2890 goto nla_put_failure;
2891 }
2892 }
2893
2894nla_put_failure:
2895 cb->args[0] = idx;
2896 ip_vs_service_put(svc);
2897
2898out_err:
2899 mutex_unlock(&__ip_vs_mutex);
2900
2901 return skb->len;
2902}
2903
Julius Volzc860c6b2008-09-02 15:55:36 +02002904static int ip_vs_genl_parse_dest(struct ip_vs_dest_user_kern *udest,
Julius Volz9a812192008-08-14 14:08:44 +02002905 struct nlattr *nla, int full_entry)
2906{
2907 struct nlattr *attrs[IPVS_DEST_ATTR_MAX + 1];
2908 struct nlattr *nla_addr, *nla_port;
2909
2910 /* Parse mandatory identifying destination fields first */
2911 if (nla == NULL ||
2912 nla_parse_nested(attrs, IPVS_DEST_ATTR_MAX, nla, ip_vs_dest_policy))
2913 return -EINVAL;
2914
2915 nla_addr = attrs[IPVS_DEST_ATTR_ADDR];
2916 nla_port = attrs[IPVS_DEST_ATTR_PORT];
2917
2918 if (!(nla_addr && nla_port))
2919 return -EINVAL;
2920
2921 nla_memcpy(&udest->addr, nla_addr, sizeof(udest->addr));
2922 udest->port = nla_get_u16(nla_port);
2923
2924 /* If a full entry was requested, check for the additional fields */
2925 if (full_entry) {
2926 struct nlattr *nla_fwd, *nla_weight, *nla_u_thresh,
2927 *nla_l_thresh;
2928
2929 nla_fwd = attrs[IPVS_DEST_ATTR_FWD_METHOD];
2930 nla_weight = attrs[IPVS_DEST_ATTR_WEIGHT];
2931 nla_u_thresh = attrs[IPVS_DEST_ATTR_U_THRESH];
2932 nla_l_thresh = attrs[IPVS_DEST_ATTR_L_THRESH];
2933
2934 if (!(nla_fwd && nla_weight && nla_u_thresh && nla_l_thresh))
2935 return -EINVAL;
2936
2937 udest->conn_flags = nla_get_u32(nla_fwd)
2938 & IP_VS_CONN_F_FWD_MASK;
2939 udest->weight = nla_get_u32(nla_weight);
2940 udest->u_threshold = nla_get_u32(nla_u_thresh);
2941 udest->l_threshold = nla_get_u32(nla_l_thresh);
2942 }
2943
2944 return 0;
2945}
2946
2947static int ip_vs_genl_fill_daemon(struct sk_buff *skb, __be32 state,
2948 const char *mcast_ifn, __be32 syncid)
2949{
2950 struct nlattr *nl_daemon;
2951
2952 nl_daemon = nla_nest_start(skb, IPVS_CMD_ATTR_DAEMON);
2953 if (!nl_daemon)
2954 return -EMSGSIZE;
2955
2956 NLA_PUT_U32(skb, IPVS_DAEMON_ATTR_STATE, state);
2957 NLA_PUT_STRING(skb, IPVS_DAEMON_ATTR_MCAST_IFN, mcast_ifn);
2958 NLA_PUT_U32(skb, IPVS_DAEMON_ATTR_SYNC_ID, syncid);
2959
2960 nla_nest_end(skb, nl_daemon);
2961
2962 return 0;
2963
2964nla_put_failure:
2965 nla_nest_cancel(skb, nl_daemon);
2966 return -EMSGSIZE;
2967}
2968
2969static int ip_vs_genl_dump_daemon(struct sk_buff *skb, __be32 state,
2970 const char *mcast_ifn, __be32 syncid,
2971 struct netlink_callback *cb)
2972{
2973 void *hdr;
2974 hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
2975 &ip_vs_genl_family, NLM_F_MULTI,
2976 IPVS_CMD_NEW_DAEMON);
2977 if (!hdr)
2978 return -EMSGSIZE;
2979
2980 if (ip_vs_genl_fill_daemon(skb, state, mcast_ifn, syncid))
2981 goto nla_put_failure;
2982
2983 return genlmsg_end(skb, hdr);
2984
2985nla_put_failure:
2986 genlmsg_cancel(skb, hdr);
2987 return -EMSGSIZE;
2988}
2989
2990static int ip_vs_genl_dump_daemons(struct sk_buff *skb,
2991 struct netlink_callback *cb)
2992{
2993 mutex_lock(&__ip_vs_mutex);
2994 if ((ip_vs_sync_state & IP_VS_STATE_MASTER) && !cb->args[0]) {
2995 if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_MASTER,
2996 ip_vs_master_mcast_ifn,
2997 ip_vs_master_syncid, cb) < 0)
2998 goto nla_put_failure;
2999
3000 cb->args[0] = 1;
3001 }
3002
3003 if ((ip_vs_sync_state & IP_VS_STATE_BACKUP) && !cb->args[1]) {
3004 if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_BACKUP,
3005 ip_vs_backup_mcast_ifn,
3006 ip_vs_backup_syncid, cb) < 0)
3007 goto nla_put_failure;
3008
3009 cb->args[1] = 1;
3010 }
3011
3012nla_put_failure:
3013 mutex_unlock(&__ip_vs_mutex);
3014
3015 return skb->len;
3016}
3017
3018static int ip_vs_genl_new_daemon(struct nlattr **attrs)
3019{
3020 if (!(attrs[IPVS_DAEMON_ATTR_STATE] &&
3021 attrs[IPVS_DAEMON_ATTR_MCAST_IFN] &&
3022 attrs[IPVS_DAEMON_ATTR_SYNC_ID]))
3023 return -EINVAL;
3024
3025 return start_sync_thread(nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]),
3026 nla_data(attrs[IPVS_DAEMON_ATTR_MCAST_IFN]),
3027 nla_get_u32(attrs[IPVS_DAEMON_ATTR_SYNC_ID]));
3028}
3029
3030static int ip_vs_genl_del_daemon(struct nlattr **attrs)
3031{
3032 if (!attrs[IPVS_DAEMON_ATTR_STATE])
3033 return -EINVAL;
3034
3035 return stop_sync_thread(nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]));
3036}
3037
3038static int ip_vs_genl_set_config(struct nlattr **attrs)
3039{
3040 struct ip_vs_timeout_user t;
3041
3042 __ip_vs_get_timeouts(&t);
3043
3044 if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP])
3045 t.tcp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP]);
3046
3047 if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN])
3048 t.tcp_fin_timeout =
3049 nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN]);
3050
3051 if (attrs[IPVS_CMD_ATTR_TIMEOUT_UDP])
3052 t.udp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_UDP]);
3053
3054 return ip_vs_set_timeout(&t);
3055}
3056
3057static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
3058{
3059 struct ip_vs_service *svc = NULL;
Julius Volzc860c6b2008-09-02 15:55:36 +02003060 struct ip_vs_service_user_kern usvc;
3061 struct ip_vs_dest_user_kern udest;
Julius Volz9a812192008-08-14 14:08:44 +02003062 int ret = 0, cmd;
3063 int need_full_svc = 0, need_full_dest = 0;
3064
3065 cmd = info->genlhdr->cmd;
3066
3067 mutex_lock(&__ip_vs_mutex);
3068
3069 if (cmd == IPVS_CMD_FLUSH) {
3070 ret = ip_vs_flush();
3071 goto out;
3072 } else if (cmd == IPVS_CMD_SET_CONFIG) {
3073 ret = ip_vs_genl_set_config(info->attrs);
3074 goto out;
3075 } else if (cmd == IPVS_CMD_NEW_DAEMON ||
3076 cmd == IPVS_CMD_DEL_DAEMON) {
3077
3078 struct nlattr *daemon_attrs[IPVS_DAEMON_ATTR_MAX + 1];
3079
3080 if (!info->attrs[IPVS_CMD_ATTR_DAEMON] ||
3081 nla_parse_nested(daemon_attrs, IPVS_DAEMON_ATTR_MAX,
3082 info->attrs[IPVS_CMD_ATTR_DAEMON],
3083 ip_vs_daemon_policy)) {
3084 ret = -EINVAL;
3085 goto out;
3086 }
3087
3088 if (cmd == IPVS_CMD_NEW_DAEMON)
3089 ret = ip_vs_genl_new_daemon(daemon_attrs);
3090 else
3091 ret = ip_vs_genl_del_daemon(daemon_attrs);
3092 goto out;
3093 } else if (cmd == IPVS_CMD_ZERO &&
3094 !info->attrs[IPVS_CMD_ATTR_SERVICE]) {
3095 ret = ip_vs_zero_all();
3096 goto out;
3097 }
3098
3099 /* All following commands require a service argument, so check if we
3100 * received a valid one. We need a full service specification when
3101 * adding / editing a service. Only identifying members otherwise. */
3102 if (cmd == IPVS_CMD_NEW_SERVICE || cmd == IPVS_CMD_SET_SERVICE)
3103 need_full_svc = 1;
3104
3105 ret = ip_vs_genl_parse_service(&usvc,
3106 info->attrs[IPVS_CMD_ATTR_SERVICE],
3107 need_full_svc);
3108 if (ret)
3109 goto out;
3110
3111 /* Lookup the exact service by <protocol, addr, port> or fwmark */
3112 if (usvc.fwmark == 0)
Julius Volzb18610d2008-09-02 15:55:37 +02003113 svc = __ip_vs_service_get(usvc.af, usvc.protocol,
3114 &usvc.addr, usvc.port);
Julius Volz9a812192008-08-14 14:08:44 +02003115 else
Julius Volzb18610d2008-09-02 15:55:37 +02003116 svc = __ip_vs_svc_fwm_get(usvc.af, usvc.fwmark);
Julius Volz9a812192008-08-14 14:08:44 +02003117
3118 /* Unless we're adding a new service, the service must already exist */
3119 if ((cmd != IPVS_CMD_NEW_SERVICE) && (svc == NULL)) {
3120 ret = -ESRCH;
3121 goto out;
3122 }
3123
3124 /* Destination commands require a valid destination argument. For
3125 * adding / editing a destination, we need a full destination
3126 * specification. */
3127 if (cmd == IPVS_CMD_NEW_DEST || cmd == IPVS_CMD_SET_DEST ||
3128 cmd == IPVS_CMD_DEL_DEST) {
3129 if (cmd != IPVS_CMD_DEL_DEST)
3130 need_full_dest = 1;
3131
3132 ret = ip_vs_genl_parse_dest(&udest,
3133 info->attrs[IPVS_CMD_ATTR_DEST],
3134 need_full_dest);
3135 if (ret)
3136 goto out;
3137 }
3138
3139 switch (cmd) {
3140 case IPVS_CMD_NEW_SERVICE:
3141 if (svc == NULL)
3142 ret = ip_vs_add_service(&usvc, &svc);
3143 else
3144 ret = -EEXIST;
3145 break;
3146 case IPVS_CMD_SET_SERVICE:
3147 ret = ip_vs_edit_service(svc, &usvc);
3148 break;
3149 case IPVS_CMD_DEL_SERVICE:
3150 ret = ip_vs_del_service(svc);
3151 break;
3152 case IPVS_CMD_NEW_DEST:
3153 ret = ip_vs_add_dest(svc, &udest);
3154 break;
3155 case IPVS_CMD_SET_DEST:
3156 ret = ip_vs_edit_dest(svc, &udest);
3157 break;
3158 case IPVS_CMD_DEL_DEST:
3159 ret = ip_vs_del_dest(svc, &udest);
3160 break;
3161 case IPVS_CMD_ZERO:
3162 ret = ip_vs_zero_service(svc);
3163 break;
3164 default:
3165 ret = -EINVAL;
3166 }
3167
3168out:
3169 if (svc)
3170 ip_vs_service_put(svc);
3171 mutex_unlock(&__ip_vs_mutex);
3172
3173 return ret;
3174}
3175
3176static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info)
3177{
3178 struct sk_buff *msg;
3179 void *reply;
3180 int ret, cmd, reply_cmd;
3181
3182 cmd = info->genlhdr->cmd;
3183
3184 if (cmd == IPVS_CMD_GET_SERVICE)
3185 reply_cmd = IPVS_CMD_NEW_SERVICE;
3186 else if (cmd == IPVS_CMD_GET_INFO)
3187 reply_cmd = IPVS_CMD_SET_INFO;
3188 else if (cmd == IPVS_CMD_GET_CONFIG)
3189 reply_cmd = IPVS_CMD_SET_CONFIG;
3190 else {
3191 IP_VS_ERR("unknown Generic Netlink command\n");
3192 return -EINVAL;
3193 }
3194
3195 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
3196 if (!msg)
3197 return -ENOMEM;
3198
3199 mutex_lock(&__ip_vs_mutex);
3200
3201 reply = genlmsg_put_reply(msg, info, &ip_vs_genl_family, 0, reply_cmd);
3202 if (reply == NULL)
3203 goto nla_put_failure;
3204
3205 switch (cmd) {
3206 case IPVS_CMD_GET_SERVICE:
3207 {
3208 struct ip_vs_service *svc;
3209
3210 svc = ip_vs_genl_find_service(info->attrs[IPVS_CMD_ATTR_SERVICE]);
3211 if (IS_ERR(svc)) {
3212 ret = PTR_ERR(svc);
3213 goto out_err;
3214 } else if (svc) {
3215 ret = ip_vs_genl_fill_service(msg, svc);
3216 ip_vs_service_put(svc);
3217 if (ret)
3218 goto nla_put_failure;
3219 } else {
3220 ret = -ESRCH;
3221 goto out_err;
3222 }
3223
3224 break;
3225 }
3226
3227 case IPVS_CMD_GET_CONFIG:
3228 {
3229 struct ip_vs_timeout_user t;
3230
3231 __ip_vs_get_timeouts(&t);
3232#ifdef CONFIG_IP_VS_PROTO_TCP
3233 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP, t.tcp_timeout);
3234 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP_FIN,
3235 t.tcp_fin_timeout);
3236#endif
3237#ifdef CONFIG_IP_VS_PROTO_UDP
3238 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_UDP, t.udp_timeout);
3239#endif
3240
3241 break;
3242 }
3243
3244 case IPVS_CMD_GET_INFO:
3245 NLA_PUT_U32(msg, IPVS_INFO_ATTR_VERSION, IP_VS_VERSION_CODE);
3246 NLA_PUT_U32(msg, IPVS_INFO_ATTR_CONN_TAB_SIZE,
3247 IP_VS_CONN_TAB_SIZE);
3248 break;
3249 }
3250
3251 genlmsg_end(msg, reply);
3252 ret = genlmsg_unicast(msg, info->snd_pid);
3253 goto out;
3254
3255nla_put_failure:
3256 IP_VS_ERR("not enough space in Netlink message\n");
3257 ret = -EMSGSIZE;
3258
3259out_err:
3260 nlmsg_free(msg);
3261out:
3262 mutex_unlock(&__ip_vs_mutex);
3263
3264 return ret;
3265}
3266
3267
3268static struct genl_ops ip_vs_genl_ops[] __read_mostly = {
3269 {
3270 .cmd = IPVS_CMD_NEW_SERVICE,
3271 .flags = GENL_ADMIN_PERM,
3272 .policy = ip_vs_cmd_policy,
3273 .doit = ip_vs_genl_set_cmd,
3274 },
3275 {
3276 .cmd = IPVS_CMD_SET_SERVICE,
3277 .flags = GENL_ADMIN_PERM,
3278 .policy = ip_vs_cmd_policy,
3279 .doit = ip_vs_genl_set_cmd,
3280 },
3281 {
3282 .cmd = IPVS_CMD_DEL_SERVICE,
3283 .flags = GENL_ADMIN_PERM,
3284 .policy = ip_vs_cmd_policy,
3285 .doit = ip_vs_genl_set_cmd,
3286 },
3287 {
3288 .cmd = IPVS_CMD_GET_SERVICE,
3289 .flags = GENL_ADMIN_PERM,
3290 .doit = ip_vs_genl_get_cmd,
3291 .dumpit = ip_vs_genl_dump_services,
3292 .policy = ip_vs_cmd_policy,
3293 },
3294 {
3295 .cmd = IPVS_CMD_NEW_DEST,
3296 .flags = GENL_ADMIN_PERM,
3297 .policy = ip_vs_cmd_policy,
3298 .doit = ip_vs_genl_set_cmd,
3299 },
3300 {
3301 .cmd = IPVS_CMD_SET_DEST,
3302 .flags = GENL_ADMIN_PERM,
3303 .policy = ip_vs_cmd_policy,
3304 .doit = ip_vs_genl_set_cmd,
3305 },
3306 {
3307 .cmd = IPVS_CMD_DEL_DEST,
3308 .flags = GENL_ADMIN_PERM,
3309 .policy = ip_vs_cmd_policy,
3310 .doit = ip_vs_genl_set_cmd,
3311 },
3312 {
3313 .cmd = IPVS_CMD_GET_DEST,
3314 .flags = GENL_ADMIN_PERM,
3315 .policy = ip_vs_cmd_policy,
3316 .dumpit = ip_vs_genl_dump_dests,
3317 },
3318 {
3319 .cmd = IPVS_CMD_NEW_DAEMON,
3320 .flags = GENL_ADMIN_PERM,
3321 .policy = ip_vs_cmd_policy,
3322 .doit = ip_vs_genl_set_cmd,
3323 },
3324 {
3325 .cmd = IPVS_CMD_DEL_DAEMON,
3326 .flags = GENL_ADMIN_PERM,
3327 .policy = ip_vs_cmd_policy,
3328 .doit = ip_vs_genl_set_cmd,
3329 },
3330 {
3331 .cmd = IPVS_CMD_GET_DAEMON,
3332 .flags = GENL_ADMIN_PERM,
3333 .dumpit = ip_vs_genl_dump_daemons,
3334 },
3335 {
3336 .cmd = IPVS_CMD_SET_CONFIG,
3337 .flags = GENL_ADMIN_PERM,
3338 .policy = ip_vs_cmd_policy,
3339 .doit = ip_vs_genl_set_cmd,
3340 },
3341 {
3342 .cmd = IPVS_CMD_GET_CONFIG,
3343 .flags = GENL_ADMIN_PERM,
3344 .doit = ip_vs_genl_get_cmd,
3345 },
3346 {
3347 .cmd = IPVS_CMD_GET_INFO,
3348 .flags = GENL_ADMIN_PERM,
3349 .doit = ip_vs_genl_get_cmd,
3350 },
3351 {
3352 .cmd = IPVS_CMD_ZERO,
3353 .flags = GENL_ADMIN_PERM,
3354 .policy = ip_vs_cmd_policy,
3355 .doit = ip_vs_genl_set_cmd,
3356 },
3357 {
3358 .cmd = IPVS_CMD_FLUSH,
3359 .flags = GENL_ADMIN_PERM,
3360 .doit = ip_vs_genl_set_cmd,
3361 },
3362};
3363
3364static int __init ip_vs_genl_register(void)
3365{
3366 int ret, i;
3367
3368 ret = genl_register_family(&ip_vs_genl_family);
3369 if (ret)
3370 return ret;
3371
3372 for (i = 0; i < ARRAY_SIZE(ip_vs_genl_ops); i++) {
3373 ret = genl_register_ops(&ip_vs_genl_family, &ip_vs_genl_ops[i]);
3374 if (ret)
3375 goto err_out;
3376 }
3377 return 0;
3378
3379err_out:
3380 genl_unregister_family(&ip_vs_genl_family);
3381 return ret;
3382}
3383
3384static void ip_vs_genl_unregister(void)
3385{
3386 genl_unregister_family(&ip_vs_genl_family);
3387}
3388
3389/* End of Generic Netlink interface definitions */
3390
Linus Torvalds1da177e2005-04-16 15:20:36 -07003391
Sven Wegener048cf482008-08-10 18:24:35 +00003392int __init ip_vs_control_init(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003393{
3394 int ret;
3395 int idx;
3396
3397 EnterFunction(2);
3398
3399 ret = nf_register_sockopt(&ip_vs_sockopts);
3400 if (ret) {
3401 IP_VS_ERR("cannot register sockopt.\n");
3402 return ret;
3403 }
3404
Julius Volz9a812192008-08-14 14:08:44 +02003405 ret = ip_vs_genl_register();
3406 if (ret) {
3407 IP_VS_ERR("cannot register Generic Netlink interface.\n");
3408 nf_unregister_sockopt(&ip_vs_sockopts);
3409 return ret;
3410 }
3411
Eric W. Biederman457c4cb2007-09-12 12:01:34 +02003412 proc_net_fops_create(&init_net, "ip_vs", 0, &ip_vs_info_fops);
3413 proc_net_fops_create(&init_net, "ip_vs_stats",0, &ip_vs_stats_fops);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003414
Pavel Emelyanov90754f82008-01-12 02:33:50 -08003415 sysctl_header = register_sysctl_paths(net_vs_ctl_path, vs_vars);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003416
3417 /* Initialize ip_vs_svc_table, ip_vs_svc_fwm_table, ip_vs_rtable */
3418 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
3419 INIT_LIST_HEAD(&ip_vs_svc_table[idx]);
3420 INIT_LIST_HEAD(&ip_vs_svc_fwm_table[idx]);
3421 }
3422 for(idx = 0; idx < IP_VS_RTAB_SIZE; idx++) {
3423 INIT_LIST_HEAD(&ip_vs_rtable[idx]);
3424 }
3425
Linus Torvalds1da177e2005-04-16 15:20:36 -07003426 ip_vs_new_estimator(&ip_vs_stats);
3427
3428 /* Hook the defense timer */
3429 schedule_delayed_work(&defense_work, DEFENSE_TIMER_PERIOD);
3430
3431 LeaveFunction(2);
3432 return 0;
3433}
3434
3435
3436void ip_vs_control_cleanup(void)
3437{
3438 EnterFunction(2);
3439 ip_vs_trash_cleanup();
3440 cancel_rearming_delayed_work(&defense_work);
Oleg Nesterov28e53bd2007-05-09 02:34:22 -07003441 cancel_work_sync(&defense_work.work);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003442 ip_vs_kill_estimator(&ip_vs_stats);
3443 unregister_sysctl_table(sysctl_header);
Eric W. Biederman457c4cb2007-09-12 12:01:34 +02003444 proc_net_remove(&init_net, "ip_vs_stats");
3445 proc_net_remove(&init_net, "ip_vs");
Julius Volz9a812192008-08-14 14:08:44 +02003446 ip_vs_genl_unregister();
Linus Torvalds1da177e2005-04-16 15:20:36 -07003447 nf_unregister_sockopt(&ip_vs_sockopts);
3448 LeaveFunction(2);
3449}