blob: ede101eeec17d86b5bdc05903aecfed3e9ff3329 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * IPVS An implementation of the IP virtual server support for the
3 * LINUX operating system. IPVS is now implemented as a module
4 * over the NetFilter framework. IPVS can be used to build a
5 * high-performance and highly available server based on a
6 * cluster of servers.
7 *
Linus Torvalds1da177e2005-04-16 15:20:36 -07008 * Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
9 * Peter Kese <peter.kese@ijs.si>
10 * Julian Anastasov <ja@ssi.bg>
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version
15 * 2 of the License, or (at your option) any later version.
16 *
17 * Changes:
18 *
19 */
20
21#include <linux/module.h>
22#include <linux/init.h>
23#include <linux/types.h>
Randy Dunlap4fc268d2006-01-11 12:17:47 -080024#include <linux/capability.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070025#include <linux/fs.h>
26#include <linux/sysctl.h>
27#include <linux/proc_fs.h>
28#include <linux/workqueue.h>
29#include <linux/swap.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070030#include <linux/seq_file.h>
31
32#include <linux/netfilter.h>
33#include <linux/netfilter_ipv4.h>
Ingo Molnar14cc3e22006-03-26 01:37:14 -080034#include <linux/mutex.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070035
Eric W. Biederman457c4cb2007-09-12 12:01:34 +020036#include <net/net_namespace.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070037#include <net/ip.h>
Arnaldo Carvalho de Melo14c85022005-12-27 02:43:12 -020038#include <net/route.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070039#include <net/sock.h>
Julius Volz9a812192008-08-14 14:08:44 +020040#include <net/genetlink.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070041
42#include <asm/uaccess.h>
43
44#include <net/ip_vs.h>
45
46/* semaphore for IPVS sockopts. And, [gs]etsockopt may sleep. */
Ingo Molnar14cc3e22006-03-26 01:37:14 -080047static DEFINE_MUTEX(__ip_vs_mutex);
Linus Torvalds1da177e2005-04-16 15:20:36 -070048
49/* lock for service table */
50static DEFINE_RWLOCK(__ip_vs_svc_lock);
51
52/* lock for table with the real services */
53static DEFINE_RWLOCK(__ip_vs_rs_lock);
54
55/* lock for state and timeout tables */
56static DEFINE_RWLOCK(__ip_vs_securetcp_lock);
57
58/* lock for drop entry handling */
59static DEFINE_SPINLOCK(__ip_vs_dropentry_lock);
60
61/* lock for drop packet handling */
62static DEFINE_SPINLOCK(__ip_vs_droppacket_lock);
63
64/* 1/rate drop and drop-entry variables */
65int ip_vs_drop_rate = 0;
66int ip_vs_drop_counter = 0;
67static atomic_t ip_vs_dropentry = ATOMIC_INIT(0);
68
69/* number of virtual services */
70static int ip_vs_num_services = 0;
71
72/* sysctl variables */
73static int sysctl_ip_vs_drop_entry = 0;
74static int sysctl_ip_vs_drop_packet = 0;
75static int sysctl_ip_vs_secure_tcp = 0;
76static int sysctl_ip_vs_amemthresh = 1024;
77static int sysctl_ip_vs_am_droprate = 10;
78int sysctl_ip_vs_cache_bypass = 0;
79int sysctl_ip_vs_expire_nodest_conn = 0;
80int sysctl_ip_vs_expire_quiescent_template = 0;
81int sysctl_ip_vs_sync_threshold[2] = { 3, 50 };
82int sysctl_ip_vs_nat_icmp_send = 0;
83
84
85#ifdef CONFIG_IP_VS_DEBUG
86static int sysctl_ip_vs_debug_level = 0;
87
88int ip_vs_get_debug_level(void)
89{
90 return sysctl_ip_vs_debug_level;
91}
92#endif
93
94/*
Julian Anastasovaf9debd2005-07-11 20:59:57 -070095 * update_defense_level is called from keventd and from sysctl,
96 * so it needs to protect itself from softirqs
Linus Torvalds1da177e2005-04-16 15:20:36 -070097 */
98static void update_defense_level(void)
99{
100 struct sysinfo i;
101 static int old_secure_tcp = 0;
102 int availmem;
103 int nomem;
104 int to_change = -1;
105
106 /* we only count free and buffered memory (in pages) */
107 si_meminfo(&i);
108 availmem = i.freeram + i.bufferram;
109 /* however in linux 2.5 the i.bufferram is total page cache size,
110 we need adjust it */
111 /* si_swapinfo(&i); */
112 /* availmem = availmem - (i.totalswap - i.freeswap); */
113
114 nomem = (availmem < sysctl_ip_vs_amemthresh);
115
Julian Anastasovaf9debd2005-07-11 20:59:57 -0700116 local_bh_disable();
117
Linus Torvalds1da177e2005-04-16 15:20:36 -0700118 /* drop_entry */
119 spin_lock(&__ip_vs_dropentry_lock);
120 switch (sysctl_ip_vs_drop_entry) {
121 case 0:
122 atomic_set(&ip_vs_dropentry, 0);
123 break;
124 case 1:
125 if (nomem) {
126 atomic_set(&ip_vs_dropentry, 1);
127 sysctl_ip_vs_drop_entry = 2;
128 } else {
129 atomic_set(&ip_vs_dropentry, 0);
130 }
131 break;
132 case 2:
133 if (nomem) {
134 atomic_set(&ip_vs_dropentry, 1);
135 } else {
136 atomic_set(&ip_vs_dropentry, 0);
137 sysctl_ip_vs_drop_entry = 1;
138 };
139 break;
140 case 3:
141 atomic_set(&ip_vs_dropentry, 1);
142 break;
143 }
144 spin_unlock(&__ip_vs_dropentry_lock);
145
146 /* drop_packet */
147 spin_lock(&__ip_vs_droppacket_lock);
148 switch (sysctl_ip_vs_drop_packet) {
149 case 0:
150 ip_vs_drop_rate = 0;
151 break;
152 case 1:
153 if (nomem) {
154 ip_vs_drop_rate = ip_vs_drop_counter
155 = sysctl_ip_vs_amemthresh /
156 (sysctl_ip_vs_amemthresh-availmem);
157 sysctl_ip_vs_drop_packet = 2;
158 } else {
159 ip_vs_drop_rate = 0;
160 }
161 break;
162 case 2:
163 if (nomem) {
164 ip_vs_drop_rate = ip_vs_drop_counter
165 = sysctl_ip_vs_amemthresh /
166 (sysctl_ip_vs_amemthresh-availmem);
167 } else {
168 ip_vs_drop_rate = 0;
169 sysctl_ip_vs_drop_packet = 1;
170 }
171 break;
172 case 3:
173 ip_vs_drop_rate = sysctl_ip_vs_am_droprate;
174 break;
175 }
176 spin_unlock(&__ip_vs_droppacket_lock);
177
178 /* secure_tcp */
179 write_lock(&__ip_vs_securetcp_lock);
180 switch (sysctl_ip_vs_secure_tcp) {
181 case 0:
182 if (old_secure_tcp >= 2)
183 to_change = 0;
184 break;
185 case 1:
186 if (nomem) {
187 if (old_secure_tcp < 2)
188 to_change = 1;
189 sysctl_ip_vs_secure_tcp = 2;
190 } else {
191 if (old_secure_tcp >= 2)
192 to_change = 0;
193 }
194 break;
195 case 2:
196 if (nomem) {
197 if (old_secure_tcp < 2)
198 to_change = 1;
199 } else {
200 if (old_secure_tcp >= 2)
201 to_change = 0;
202 sysctl_ip_vs_secure_tcp = 1;
203 }
204 break;
205 case 3:
206 if (old_secure_tcp < 2)
207 to_change = 1;
208 break;
209 }
210 old_secure_tcp = sysctl_ip_vs_secure_tcp;
211 if (to_change >= 0)
212 ip_vs_protocol_timeout_change(sysctl_ip_vs_secure_tcp>1);
213 write_unlock(&__ip_vs_securetcp_lock);
Julian Anastasovaf9debd2005-07-11 20:59:57 -0700214
215 local_bh_enable();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700216}
217
218
219/*
220 * Timer for checking the defense
221 */
222#define DEFENSE_TIMER_PERIOD 1*HZ
David Howellsc4028952006-11-22 14:57:56 +0000223static void defense_work_handler(struct work_struct *work);
224static DECLARE_DELAYED_WORK(defense_work, defense_work_handler);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700225
David Howellsc4028952006-11-22 14:57:56 +0000226static void defense_work_handler(struct work_struct *work)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700227{
228 update_defense_level();
229 if (atomic_read(&ip_vs_dropentry))
230 ip_vs_random_dropentry();
231
232 schedule_delayed_work(&defense_work, DEFENSE_TIMER_PERIOD);
233}
234
235int
236ip_vs_use_count_inc(void)
237{
238 return try_module_get(THIS_MODULE);
239}
240
241void
242ip_vs_use_count_dec(void)
243{
244 module_put(THIS_MODULE);
245}
246
247
248/*
249 * Hash table: for virtual service lookups
250 */
251#define IP_VS_SVC_TAB_BITS 8
252#define IP_VS_SVC_TAB_SIZE (1 << IP_VS_SVC_TAB_BITS)
253#define IP_VS_SVC_TAB_MASK (IP_VS_SVC_TAB_SIZE - 1)
254
255/* the service table hashed by <protocol, addr, port> */
256static struct list_head ip_vs_svc_table[IP_VS_SVC_TAB_SIZE];
257/* the service table hashed by fwmark */
258static struct list_head ip_vs_svc_fwm_table[IP_VS_SVC_TAB_SIZE];
259
260/*
261 * Hash table: for real service lookups
262 */
263#define IP_VS_RTAB_BITS 4
264#define IP_VS_RTAB_SIZE (1 << IP_VS_RTAB_BITS)
265#define IP_VS_RTAB_MASK (IP_VS_RTAB_SIZE - 1)
266
267static struct list_head ip_vs_rtable[IP_VS_RTAB_SIZE];
268
269/*
270 * Trash for destinations
271 */
272static LIST_HEAD(ip_vs_dest_trash);
273
274/*
275 * FTP & NULL virtual service counters
276 */
277static atomic_t ip_vs_ftpsvc_counter = ATOMIC_INIT(0);
278static atomic_t ip_vs_nullsvc_counter = ATOMIC_INIT(0);
279
280
281/*
282 * Returns hash value for virtual service
283 */
284static __inline__ unsigned
Al Viro014d7302006-09-28 14:29:52 -0700285ip_vs_svc_hashkey(unsigned proto, __be32 addr, __be16 port)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700286{
287 register unsigned porth = ntohs(port);
288
289 return (proto^ntohl(addr)^(porth>>IP_VS_SVC_TAB_BITS)^porth)
290 & IP_VS_SVC_TAB_MASK;
291}
292
293/*
294 * Returns hash value of fwmark for virtual service lookup
295 */
296static __inline__ unsigned ip_vs_svc_fwm_hashkey(__u32 fwmark)
297{
298 return fwmark & IP_VS_SVC_TAB_MASK;
299}
300
301/*
302 * Hashes a service in the ip_vs_svc_table by <proto,addr,port>
303 * or in the ip_vs_svc_fwm_table by fwmark.
304 * Should be called with locked tables.
305 */
306static int ip_vs_svc_hash(struct ip_vs_service *svc)
307{
308 unsigned hash;
309
310 if (svc->flags & IP_VS_SVC_F_HASHED) {
311 IP_VS_ERR("ip_vs_svc_hash(): request for already hashed, "
312 "called from %p\n", __builtin_return_address(0));
313 return 0;
314 }
315
316 if (svc->fwmark == 0) {
317 /*
318 * Hash it by <protocol,addr,port> in ip_vs_svc_table
319 */
320 hash = ip_vs_svc_hashkey(svc->protocol, svc->addr, svc->port);
321 list_add(&svc->s_list, &ip_vs_svc_table[hash]);
322 } else {
323 /*
324 * Hash it by fwmark in ip_vs_svc_fwm_table
325 */
326 hash = ip_vs_svc_fwm_hashkey(svc->fwmark);
327 list_add(&svc->f_list, &ip_vs_svc_fwm_table[hash]);
328 }
329
330 svc->flags |= IP_VS_SVC_F_HASHED;
331 /* increase its refcnt because it is referenced by the svc table */
332 atomic_inc(&svc->refcnt);
333 return 1;
334}
335
336
337/*
338 * Unhashes a service from ip_vs_svc_table/ip_vs_svc_fwm_table.
339 * Should be called with locked tables.
340 */
341static int ip_vs_svc_unhash(struct ip_vs_service *svc)
342{
343 if (!(svc->flags & IP_VS_SVC_F_HASHED)) {
344 IP_VS_ERR("ip_vs_svc_unhash(): request for unhash flagged, "
345 "called from %p\n", __builtin_return_address(0));
346 return 0;
347 }
348
349 if (svc->fwmark == 0) {
350 /* Remove it from the ip_vs_svc_table table */
351 list_del(&svc->s_list);
352 } else {
353 /* Remove it from the ip_vs_svc_fwm_table table */
354 list_del(&svc->f_list);
355 }
356
357 svc->flags &= ~IP_VS_SVC_F_HASHED;
358 atomic_dec(&svc->refcnt);
359 return 1;
360}
361
362
363/*
364 * Get service by {proto,addr,port} in the service table.
365 */
366static __inline__ struct ip_vs_service *
Al Viro014d7302006-09-28 14:29:52 -0700367__ip_vs_service_get(__u16 protocol, __be32 vaddr, __be16 vport)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700368{
369 unsigned hash;
370 struct ip_vs_service *svc;
371
372 /* Check for "full" addressed entries */
373 hash = ip_vs_svc_hashkey(protocol, vaddr, vport);
374
375 list_for_each_entry(svc, &ip_vs_svc_table[hash], s_list){
376 if ((svc->addr == vaddr)
377 && (svc->port == vport)
378 && (svc->protocol == protocol)) {
379 /* HIT */
380 atomic_inc(&svc->usecnt);
381 return svc;
382 }
383 }
384
385 return NULL;
386}
387
388
389/*
390 * Get service by {fwmark} in the service table.
391 */
392static __inline__ struct ip_vs_service *__ip_vs_svc_fwm_get(__u32 fwmark)
393{
394 unsigned hash;
395 struct ip_vs_service *svc;
396
397 /* Check for fwmark addressed entries */
398 hash = ip_vs_svc_fwm_hashkey(fwmark);
399
400 list_for_each_entry(svc, &ip_vs_svc_fwm_table[hash], f_list) {
401 if (svc->fwmark == fwmark) {
402 /* HIT */
403 atomic_inc(&svc->usecnt);
404 return svc;
405 }
406 }
407
408 return NULL;
409}
410
411struct ip_vs_service *
Al Viro014d7302006-09-28 14:29:52 -0700412ip_vs_service_get(__u32 fwmark, __u16 protocol, __be32 vaddr, __be16 vport)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700413{
414 struct ip_vs_service *svc;
415
416 read_lock(&__ip_vs_svc_lock);
417
418 /*
419 * Check the table hashed by fwmark first
420 */
421 if (fwmark && (svc = __ip_vs_svc_fwm_get(fwmark)))
422 goto out;
423
424 /*
425 * Check the table hashed by <protocol,addr,port>
426 * for "full" addressed entries
427 */
428 svc = __ip_vs_service_get(protocol, vaddr, vport);
429
430 if (svc == NULL
431 && protocol == IPPROTO_TCP
432 && atomic_read(&ip_vs_ftpsvc_counter)
433 && (vport == FTPDATA || ntohs(vport) >= PROT_SOCK)) {
434 /*
435 * Check if ftp service entry exists, the packet
436 * might belong to FTP data connections.
437 */
438 svc = __ip_vs_service_get(protocol, vaddr, FTPPORT);
439 }
440
441 if (svc == NULL
442 && atomic_read(&ip_vs_nullsvc_counter)) {
443 /*
444 * Check if the catch-all port (port zero) exists
445 */
446 svc = __ip_vs_service_get(protocol, vaddr, 0);
447 }
448
449 out:
450 read_unlock(&__ip_vs_svc_lock);
451
Roberto Nibali4b5bdf52006-01-03 14:22:59 -0800452 IP_VS_DBG(9, "lookup service: fwm %u %s %u.%u.%u.%u:%u %s\n",
Linus Torvalds1da177e2005-04-16 15:20:36 -0700453 fwmark, ip_vs_proto_name(protocol),
454 NIPQUAD(vaddr), ntohs(vport),
455 svc?"hit":"not hit");
456
457 return svc;
458}
459
460
461static inline void
462__ip_vs_bind_svc(struct ip_vs_dest *dest, struct ip_vs_service *svc)
463{
464 atomic_inc(&svc->refcnt);
465 dest->svc = svc;
466}
467
468static inline void
469__ip_vs_unbind_svc(struct ip_vs_dest *dest)
470{
471 struct ip_vs_service *svc = dest->svc;
472
473 dest->svc = NULL;
474 if (atomic_dec_and_test(&svc->refcnt))
475 kfree(svc);
476}
477
478
479/*
480 * Returns hash value for real service
481 */
Al Viro014d7302006-09-28 14:29:52 -0700482static __inline__ unsigned ip_vs_rs_hashkey(__be32 addr, __be16 port)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700483{
484 register unsigned porth = ntohs(port);
485
486 return (ntohl(addr)^(porth>>IP_VS_RTAB_BITS)^porth)
487 & IP_VS_RTAB_MASK;
488}
489
490/*
491 * Hashes ip_vs_dest in ip_vs_rtable by <proto,addr,port>.
492 * should be called with locked tables.
493 */
494static int ip_vs_rs_hash(struct ip_vs_dest *dest)
495{
496 unsigned hash;
497
498 if (!list_empty(&dest->d_list)) {
499 return 0;
500 }
501
502 /*
503 * Hash by proto,addr,port,
504 * which are the parameters of the real service.
505 */
506 hash = ip_vs_rs_hashkey(dest->addr, dest->port);
507 list_add(&dest->d_list, &ip_vs_rtable[hash]);
508
509 return 1;
510}
511
512/*
513 * UNhashes ip_vs_dest from ip_vs_rtable.
514 * should be called with locked tables.
515 */
516static int ip_vs_rs_unhash(struct ip_vs_dest *dest)
517{
518 /*
519 * Remove it from the ip_vs_rtable table.
520 */
521 if (!list_empty(&dest->d_list)) {
522 list_del(&dest->d_list);
523 INIT_LIST_HEAD(&dest->d_list);
524 }
525
526 return 1;
527}
528
529/*
530 * Lookup real service by <proto,addr,port> in the real service table.
531 */
532struct ip_vs_dest *
Al Viro014d7302006-09-28 14:29:52 -0700533ip_vs_lookup_real_service(__u16 protocol, __be32 daddr, __be16 dport)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700534{
535 unsigned hash;
536 struct ip_vs_dest *dest;
537
538 /*
539 * Check for "full" addressed entries
540 * Return the first found entry
541 */
542 hash = ip_vs_rs_hashkey(daddr, dport);
543
544 read_lock(&__ip_vs_rs_lock);
545 list_for_each_entry(dest, &ip_vs_rtable[hash], d_list) {
546 if ((dest->addr == daddr)
547 && (dest->port == dport)
548 && ((dest->protocol == protocol) ||
549 dest->vfwmark)) {
550 /* HIT */
551 read_unlock(&__ip_vs_rs_lock);
552 return dest;
553 }
554 }
555 read_unlock(&__ip_vs_rs_lock);
556
557 return NULL;
558}
559
560/*
561 * Lookup destination by {addr,port} in the given service
562 */
563static struct ip_vs_dest *
Al Viro014d7302006-09-28 14:29:52 -0700564ip_vs_lookup_dest(struct ip_vs_service *svc, __be32 daddr, __be16 dport)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700565{
566 struct ip_vs_dest *dest;
567
568 /*
569 * Find the destination for the given service
570 */
571 list_for_each_entry(dest, &svc->destinations, n_list) {
572 if ((dest->addr == daddr) && (dest->port == dport)) {
573 /* HIT */
574 return dest;
575 }
576 }
577
578 return NULL;
579}
580
Rumen G. Bogdanovski1e356f92007-11-07 02:35:54 -0800581/*
582 * Find destination by {daddr,dport,vaddr,protocol}
583 * Cretaed to be used in ip_vs_process_message() in
584 * the backup synchronization daemon. It finds the
585 * destination to be bound to the received connection
586 * on the backup.
587 *
588 * ip_vs_lookup_real_service() looked promissing, but
589 * seems not working as expected.
590 */
591struct ip_vs_dest *ip_vs_find_dest(__be32 daddr, __be16 dport,
592 __be32 vaddr, __be16 vport, __u16 protocol)
593{
594 struct ip_vs_dest *dest;
595 struct ip_vs_service *svc;
596
597 svc = ip_vs_service_get(0, protocol, vaddr, vport);
598 if (!svc)
599 return NULL;
600 dest = ip_vs_lookup_dest(svc, daddr, dport);
601 if (dest)
602 atomic_inc(&dest->refcnt);
603 ip_vs_service_put(svc);
604 return dest;
605}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700606
607/*
608 * Lookup dest by {svc,addr,port} in the destination trash.
609 * The destination trash is used to hold the destinations that are removed
610 * from the service table but are still referenced by some conn entries.
611 * The reason to add the destination trash is when the dest is temporary
612 * down (either by administrator or by monitor program), the dest can be
613 * picked back from the trash, the remaining connections to the dest can
614 * continue, and the counting information of the dest is also useful for
615 * scheduling.
616 */
617static struct ip_vs_dest *
Al Viro014d7302006-09-28 14:29:52 -0700618ip_vs_trash_get_dest(struct ip_vs_service *svc, __be32 daddr, __be16 dport)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700619{
620 struct ip_vs_dest *dest, *nxt;
621
622 /*
623 * Find the destination in trash
624 */
625 list_for_each_entry_safe(dest, nxt, &ip_vs_dest_trash, n_list) {
626 IP_VS_DBG(3, "Destination %u/%u.%u.%u.%u:%u still in trash, "
Roberto Nibali4b5bdf52006-01-03 14:22:59 -0800627 "dest->refcnt=%d\n",
Linus Torvalds1da177e2005-04-16 15:20:36 -0700628 dest->vfwmark,
629 NIPQUAD(dest->addr), ntohs(dest->port),
630 atomic_read(&dest->refcnt));
631 if (dest->addr == daddr &&
632 dest->port == dport &&
633 dest->vfwmark == svc->fwmark &&
634 dest->protocol == svc->protocol &&
635 (svc->fwmark ||
636 (dest->vaddr == svc->addr &&
637 dest->vport == svc->port))) {
638 /* HIT */
639 return dest;
640 }
641
642 /*
643 * Try to purge the destination from trash if not referenced
644 */
645 if (atomic_read(&dest->refcnt) == 1) {
646 IP_VS_DBG(3, "Removing destination %u/%u.%u.%u.%u:%u "
647 "from trash\n",
648 dest->vfwmark,
649 NIPQUAD(dest->addr), ntohs(dest->port));
650 list_del(&dest->n_list);
651 ip_vs_dst_reset(dest);
652 __ip_vs_unbind_svc(dest);
653 kfree(dest);
654 }
655 }
656
657 return NULL;
658}
659
660
661/*
662 * Clean up all the destinations in the trash
663 * Called by the ip_vs_control_cleanup()
664 *
665 * When the ip_vs_control_clearup is activated by ipvs module exit,
666 * the service tables must have been flushed and all the connections
667 * are expired, and the refcnt of each destination in the trash must
668 * be 1, so we simply release them here.
669 */
670static void ip_vs_trash_cleanup(void)
671{
672 struct ip_vs_dest *dest, *nxt;
673
674 list_for_each_entry_safe(dest, nxt, &ip_vs_dest_trash, n_list) {
675 list_del(&dest->n_list);
676 ip_vs_dst_reset(dest);
677 __ip_vs_unbind_svc(dest);
678 kfree(dest);
679 }
680}
681
682
683static void
684ip_vs_zero_stats(struct ip_vs_stats *stats)
685{
686 spin_lock_bh(&stats->lock);
Simon Hormane93615d2008-08-11 17:19:14 +1000687
688 stats->conns = 0;
689 stats->inpkts = 0;
690 stats->outpkts = 0;
691 stats->inbytes = 0;
692 stats->outbytes = 0;
693
694 stats->cps = 0;
695 stats->inpps = 0;
696 stats->outpps = 0;
697 stats->inbps = 0;
698 stats->outbps = 0;
699
Linus Torvalds1da177e2005-04-16 15:20:36 -0700700 ip_vs_zero_estimator(stats);
Simon Hormane93615d2008-08-11 17:19:14 +1000701
Sven Wegener3a14a312008-08-10 18:24:41 +0000702 spin_unlock_bh(&stats->lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700703}
704
705/*
706 * Update a destination in the given service
707 */
708static void
709__ip_vs_update_dest(struct ip_vs_service *svc,
710 struct ip_vs_dest *dest, struct ip_vs_dest_user *udest)
711{
712 int conn_flags;
713
714 /* set the weight and the flags */
715 atomic_set(&dest->weight, udest->weight);
716 conn_flags = udest->conn_flags | IP_VS_CONN_F_INACTIVE;
717
718 /* check if local node and update the flags */
Eric W. Biederman6b175b22008-01-10 03:25:28 -0800719 if (inet_addr_type(&init_net, udest->addr) == RTN_LOCAL) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700720 conn_flags = (conn_flags & ~IP_VS_CONN_F_FWD_MASK)
721 | IP_VS_CONN_F_LOCALNODE;
722 }
723
724 /* set the IP_VS_CONN_F_NOOUTPUT flag if not masquerading/NAT */
725 if ((conn_flags & IP_VS_CONN_F_FWD_MASK) != 0) {
726 conn_flags |= IP_VS_CONN_F_NOOUTPUT;
727 } else {
728 /*
729 * Put the real service in ip_vs_rtable if not present.
730 * For now only for NAT!
731 */
732 write_lock_bh(&__ip_vs_rs_lock);
733 ip_vs_rs_hash(dest);
734 write_unlock_bh(&__ip_vs_rs_lock);
735 }
736 atomic_set(&dest->conn_flags, conn_flags);
737
738 /* bind the service */
739 if (!dest->svc) {
740 __ip_vs_bind_svc(dest, svc);
741 } else {
742 if (dest->svc != svc) {
743 __ip_vs_unbind_svc(dest);
744 ip_vs_zero_stats(&dest->stats);
745 __ip_vs_bind_svc(dest, svc);
746 }
747 }
748
749 /* set the dest status flags */
750 dest->flags |= IP_VS_DEST_F_AVAILABLE;
751
752 if (udest->u_threshold == 0 || udest->u_threshold > dest->u_threshold)
753 dest->flags &= ~IP_VS_DEST_F_OVERLOAD;
754 dest->u_threshold = udest->u_threshold;
755 dest->l_threshold = udest->l_threshold;
756}
757
758
759/*
760 * Create a destination for the given service
761 */
762static int
763ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user *udest,
764 struct ip_vs_dest **dest_p)
765{
766 struct ip_vs_dest *dest;
767 unsigned atype;
768
769 EnterFunction(2);
770
Eric W. Biederman6b175b22008-01-10 03:25:28 -0800771 atype = inet_addr_type(&init_net, udest->addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700772 if (atype != RTN_LOCAL && atype != RTN_UNICAST)
773 return -EINVAL;
774
Panagiotis Issaris0da974f2006-07-21 14:51:30 -0700775 dest = kzalloc(sizeof(struct ip_vs_dest), GFP_ATOMIC);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700776 if (dest == NULL) {
777 IP_VS_ERR("ip_vs_new_dest: kmalloc failed.\n");
778 return -ENOMEM;
779 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700780
781 dest->protocol = svc->protocol;
782 dest->vaddr = svc->addr;
783 dest->vport = svc->port;
784 dest->vfwmark = svc->fwmark;
785 dest->addr = udest->addr;
786 dest->port = udest->port;
787
788 atomic_set(&dest->activeconns, 0);
789 atomic_set(&dest->inactconns, 0);
790 atomic_set(&dest->persistconns, 0);
791 atomic_set(&dest->refcnt, 0);
792
793 INIT_LIST_HEAD(&dest->d_list);
794 spin_lock_init(&dest->dst_lock);
795 spin_lock_init(&dest->stats.lock);
796 __ip_vs_update_dest(svc, dest, udest);
797 ip_vs_new_estimator(&dest->stats);
798
799 *dest_p = dest;
800
801 LeaveFunction(2);
802 return 0;
803}
804
805
806/*
807 * Add a destination into an existing service
808 */
809static int
810ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user *udest)
811{
812 struct ip_vs_dest *dest;
Al Viro014d7302006-09-28 14:29:52 -0700813 __be32 daddr = udest->addr;
814 __be16 dport = udest->port;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700815 int ret;
816
817 EnterFunction(2);
818
819 if (udest->weight < 0) {
820 IP_VS_ERR("ip_vs_add_dest(): server weight less than zero\n");
821 return -ERANGE;
822 }
823
824 if (udest->l_threshold > udest->u_threshold) {
825 IP_VS_ERR("ip_vs_add_dest(): lower threshold is higher than "
826 "upper threshold\n");
827 return -ERANGE;
828 }
829
830 /*
831 * Check if the dest already exists in the list
832 */
833 dest = ip_vs_lookup_dest(svc, daddr, dport);
834 if (dest != NULL) {
835 IP_VS_DBG(1, "ip_vs_add_dest(): dest already exists\n");
836 return -EEXIST;
837 }
838
839 /*
840 * Check if the dest already exists in the trash and
841 * is from the same service
842 */
843 dest = ip_vs_trash_get_dest(svc, daddr, dport);
844 if (dest != NULL) {
845 IP_VS_DBG(3, "Get destination %u.%u.%u.%u:%u from trash, "
Roberto Nibali4b5bdf52006-01-03 14:22:59 -0800846 "dest->refcnt=%d, service %u/%u.%u.%u.%u:%u\n",
Linus Torvalds1da177e2005-04-16 15:20:36 -0700847 NIPQUAD(daddr), ntohs(dport),
848 atomic_read(&dest->refcnt),
849 dest->vfwmark,
850 NIPQUAD(dest->vaddr),
851 ntohs(dest->vport));
852 __ip_vs_update_dest(svc, dest, udest);
853
854 /*
855 * Get the destination from the trash
856 */
857 list_del(&dest->n_list);
858
859 ip_vs_new_estimator(&dest->stats);
860
861 write_lock_bh(&__ip_vs_svc_lock);
862
863 /*
864 * Wait until all other svc users go away.
865 */
866 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
867
868 list_add(&dest->n_list, &svc->destinations);
869 svc->num_dests++;
870
871 /* call the update_service function of its scheduler */
Sven Wegener82dfb6f2008-08-11 19:36:06 +0000872 if (svc->scheduler->update_service)
873 svc->scheduler->update_service(svc);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700874
875 write_unlock_bh(&__ip_vs_svc_lock);
876 return 0;
877 }
878
879 /*
880 * Allocate and initialize the dest structure
881 */
882 ret = ip_vs_new_dest(svc, udest, &dest);
883 if (ret) {
884 return ret;
885 }
886
887 /*
888 * Add the dest entry into the list
889 */
890 atomic_inc(&dest->refcnt);
891
892 write_lock_bh(&__ip_vs_svc_lock);
893
894 /*
895 * Wait until all other svc users go away.
896 */
897 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
898
899 list_add(&dest->n_list, &svc->destinations);
900 svc->num_dests++;
901
902 /* call the update_service function of its scheduler */
Sven Wegener82dfb6f2008-08-11 19:36:06 +0000903 if (svc->scheduler->update_service)
904 svc->scheduler->update_service(svc);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700905
906 write_unlock_bh(&__ip_vs_svc_lock);
907
908 LeaveFunction(2);
909
910 return 0;
911}
912
913
914/*
915 * Edit a destination in the given service
916 */
917static int
918ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user *udest)
919{
920 struct ip_vs_dest *dest;
Al Viro014d7302006-09-28 14:29:52 -0700921 __be32 daddr = udest->addr;
922 __be16 dport = udest->port;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700923
924 EnterFunction(2);
925
926 if (udest->weight < 0) {
927 IP_VS_ERR("ip_vs_edit_dest(): server weight less than zero\n");
928 return -ERANGE;
929 }
930
931 if (udest->l_threshold > udest->u_threshold) {
932 IP_VS_ERR("ip_vs_edit_dest(): lower threshold is higher than "
933 "upper threshold\n");
934 return -ERANGE;
935 }
936
937 /*
938 * Lookup the destination list
939 */
940 dest = ip_vs_lookup_dest(svc, daddr, dport);
941 if (dest == NULL) {
942 IP_VS_DBG(1, "ip_vs_edit_dest(): dest doesn't exist\n");
943 return -ENOENT;
944 }
945
946 __ip_vs_update_dest(svc, dest, udest);
947
948 write_lock_bh(&__ip_vs_svc_lock);
949
950 /* Wait until all other svc users go away */
Heiko Carstenscae7ca32007-08-10 15:50:30 -0700951 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700952
953 /* call the update_service, because server weight may be changed */
Sven Wegener82dfb6f2008-08-11 19:36:06 +0000954 if (svc->scheduler->update_service)
955 svc->scheduler->update_service(svc);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700956
957 write_unlock_bh(&__ip_vs_svc_lock);
958
959 LeaveFunction(2);
960
961 return 0;
962}
963
964
965/*
966 * Delete a destination (must be already unlinked from the service)
967 */
968static void __ip_vs_del_dest(struct ip_vs_dest *dest)
969{
970 ip_vs_kill_estimator(&dest->stats);
971
972 /*
973 * Remove it from the d-linked list with the real services.
974 */
975 write_lock_bh(&__ip_vs_rs_lock);
976 ip_vs_rs_unhash(dest);
977 write_unlock_bh(&__ip_vs_rs_lock);
978
979 /*
980 * Decrease the refcnt of the dest, and free the dest
981 * if nobody refers to it (refcnt=0). Otherwise, throw
982 * the destination into the trash.
983 */
984 if (atomic_dec_and_test(&dest->refcnt)) {
985 ip_vs_dst_reset(dest);
986 /* simply decrease svc->refcnt here, let the caller check
987 and release the service if nobody refers to it.
988 Only user context can release destination and service,
989 and only one user context can update virtual service at a
990 time, so the operation here is OK */
991 atomic_dec(&dest->svc->refcnt);
992 kfree(dest);
993 } else {
Roberto Nibali4b5bdf52006-01-03 14:22:59 -0800994 IP_VS_DBG(3, "Moving dest %u.%u.%u.%u:%u into trash, "
995 "dest->refcnt=%d\n",
Linus Torvalds1da177e2005-04-16 15:20:36 -0700996 NIPQUAD(dest->addr), ntohs(dest->port),
997 atomic_read(&dest->refcnt));
998 list_add(&dest->n_list, &ip_vs_dest_trash);
999 atomic_inc(&dest->refcnt);
1000 }
1001}
1002
1003
1004/*
1005 * Unlink a destination from the given service
1006 */
1007static void __ip_vs_unlink_dest(struct ip_vs_service *svc,
1008 struct ip_vs_dest *dest,
1009 int svcupd)
1010{
1011 dest->flags &= ~IP_VS_DEST_F_AVAILABLE;
1012
1013 /*
1014 * Remove it from the d-linked destination list.
1015 */
1016 list_del(&dest->n_list);
1017 svc->num_dests--;
Sven Wegener82dfb6f2008-08-11 19:36:06 +00001018
1019 /*
1020 * Call the update_service function of its scheduler
1021 */
1022 if (svcupd && svc->scheduler->update_service)
1023 svc->scheduler->update_service(svc);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001024}
1025
1026
1027/*
1028 * Delete a destination server in the given service
1029 */
1030static int
1031ip_vs_del_dest(struct ip_vs_service *svc,struct ip_vs_dest_user *udest)
1032{
1033 struct ip_vs_dest *dest;
Al Viro014d7302006-09-28 14:29:52 -07001034 __be32 daddr = udest->addr;
1035 __be16 dport = udest->port;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001036
1037 EnterFunction(2);
1038
1039 dest = ip_vs_lookup_dest(svc, daddr, dport);
1040 if (dest == NULL) {
1041 IP_VS_DBG(1, "ip_vs_del_dest(): destination not found!\n");
1042 return -ENOENT;
1043 }
1044
1045 write_lock_bh(&__ip_vs_svc_lock);
1046
1047 /*
1048 * Wait until all other svc users go away.
1049 */
1050 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
1051
1052 /*
1053 * Unlink dest from the service
1054 */
1055 __ip_vs_unlink_dest(svc, dest, 1);
1056
1057 write_unlock_bh(&__ip_vs_svc_lock);
1058
1059 /*
1060 * Delete the destination
1061 */
1062 __ip_vs_del_dest(dest);
1063
1064 LeaveFunction(2);
1065
1066 return 0;
1067}
1068
1069
1070/*
1071 * Add a service into the service hash table
1072 */
1073static int
1074ip_vs_add_service(struct ip_vs_service_user *u, struct ip_vs_service **svc_p)
1075{
1076 int ret = 0;
1077 struct ip_vs_scheduler *sched = NULL;
1078 struct ip_vs_service *svc = NULL;
1079
1080 /* increase the module use count */
1081 ip_vs_use_count_inc();
1082
1083 /* Lookup the scheduler by 'u->sched_name' */
1084 sched = ip_vs_scheduler_get(u->sched_name);
1085 if (sched == NULL) {
1086 IP_VS_INFO("Scheduler module ip_vs_%s not found\n",
1087 u->sched_name);
1088 ret = -ENOENT;
1089 goto out_mod_dec;
1090 }
1091
Panagiotis Issaris0da974f2006-07-21 14:51:30 -07001092 svc = kzalloc(sizeof(struct ip_vs_service), GFP_ATOMIC);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001093 if (svc == NULL) {
1094 IP_VS_DBG(1, "ip_vs_add_service: kmalloc failed.\n");
1095 ret = -ENOMEM;
1096 goto out_err;
1097 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001098
1099 /* I'm the first user of the service */
1100 atomic_set(&svc->usecnt, 1);
1101 atomic_set(&svc->refcnt, 0);
1102
1103 svc->protocol = u->protocol;
1104 svc->addr = u->addr;
1105 svc->port = u->port;
1106 svc->fwmark = u->fwmark;
1107 svc->flags = u->flags;
1108 svc->timeout = u->timeout * HZ;
1109 svc->netmask = u->netmask;
1110
1111 INIT_LIST_HEAD(&svc->destinations);
1112 rwlock_init(&svc->sched_lock);
1113 spin_lock_init(&svc->stats.lock);
1114
1115 /* Bind the scheduler */
1116 ret = ip_vs_bind_scheduler(svc, sched);
1117 if (ret)
1118 goto out_err;
1119 sched = NULL;
1120
1121 /* Update the virtual service counters */
1122 if (svc->port == FTPPORT)
1123 atomic_inc(&ip_vs_ftpsvc_counter);
1124 else if (svc->port == 0)
1125 atomic_inc(&ip_vs_nullsvc_counter);
1126
1127 ip_vs_new_estimator(&svc->stats);
1128 ip_vs_num_services++;
1129
1130 /* Hash the service into the service table */
1131 write_lock_bh(&__ip_vs_svc_lock);
1132 ip_vs_svc_hash(svc);
1133 write_unlock_bh(&__ip_vs_svc_lock);
1134
1135 *svc_p = svc;
1136 return 0;
1137
1138 out_err:
1139 if (svc != NULL) {
1140 if (svc->scheduler)
1141 ip_vs_unbind_scheduler(svc);
1142 if (svc->inc) {
1143 local_bh_disable();
1144 ip_vs_app_inc_put(svc->inc);
1145 local_bh_enable();
1146 }
1147 kfree(svc);
1148 }
1149 ip_vs_scheduler_put(sched);
1150
1151 out_mod_dec:
1152 /* decrease the module use count */
1153 ip_vs_use_count_dec();
1154
1155 return ret;
1156}
1157
1158
1159/*
1160 * Edit a service and bind it with a new scheduler
1161 */
1162static int
1163ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user *u)
1164{
1165 struct ip_vs_scheduler *sched, *old_sched;
1166 int ret = 0;
1167
1168 /*
1169 * Lookup the scheduler, by 'u->sched_name'
1170 */
1171 sched = ip_vs_scheduler_get(u->sched_name);
1172 if (sched == NULL) {
1173 IP_VS_INFO("Scheduler module ip_vs_%s not found\n",
1174 u->sched_name);
1175 return -ENOENT;
1176 }
1177 old_sched = sched;
1178
1179 write_lock_bh(&__ip_vs_svc_lock);
1180
1181 /*
1182 * Wait until all other svc users go away.
1183 */
1184 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
1185
1186 /*
1187 * Set the flags and timeout value
1188 */
1189 svc->flags = u->flags | IP_VS_SVC_F_HASHED;
1190 svc->timeout = u->timeout * HZ;
1191 svc->netmask = u->netmask;
1192
1193 old_sched = svc->scheduler;
1194 if (sched != old_sched) {
1195 /*
1196 * Unbind the old scheduler
1197 */
1198 if ((ret = ip_vs_unbind_scheduler(svc))) {
1199 old_sched = sched;
1200 goto out;
1201 }
1202
1203 /*
1204 * Bind the new scheduler
1205 */
1206 if ((ret = ip_vs_bind_scheduler(svc, sched))) {
1207 /*
1208 * If ip_vs_bind_scheduler fails, restore the old
1209 * scheduler.
1210 * The main reason of failure is out of memory.
1211 *
1212 * The question is if the old scheduler can be
1213 * restored all the time. TODO: if it cannot be
1214 * restored some time, we must delete the service,
1215 * otherwise the system may crash.
1216 */
1217 ip_vs_bind_scheduler(svc, old_sched);
1218 old_sched = sched;
1219 goto out;
1220 }
1221 }
1222
1223 out:
1224 write_unlock_bh(&__ip_vs_svc_lock);
1225
1226 if (old_sched)
1227 ip_vs_scheduler_put(old_sched);
1228
1229 return ret;
1230}
1231
1232
1233/*
1234 * Delete a service from the service list
1235 * - The service must be unlinked, unlocked and not referenced!
1236 * - We are called under _bh lock
1237 */
1238static void __ip_vs_del_service(struct ip_vs_service *svc)
1239{
1240 struct ip_vs_dest *dest, *nxt;
1241 struct ip_vs_scheduler *old_sched;
1242
1243 ip_vs_num_services--;
1244 ip_vs_kill_estimator(&svc->stats);
1245
1246 /* Unbind scheduler */
1247 old_sched = svc->scheduler;
1248 ip_vs_unbind_scheduler(svc);
1249 if (old_sched)
1250 ip_vs_scheduler_put(old_sched);
1251
1252 /* Unbind app inc */
1253 if (svc->inc) {
1254 ip_vs_app_inc_put(svc->inc);
1255 svc->inc = NULL;
1256 }
1257
1258 /*
1259 * Unlink the whole destination list
1260 */
1261 list_for_each_entry_safe(dest, nxt, &svc->destinations, n_list) {
1262 __ip_vs_unlink_dest(svc, dest, 0);
1263 __ip_vs_del_dest(dest);
1264 }
1265
1266 /*
1267 * Update the virtual service counters
1268 */
1269 if (svc->port == FTPPORT)
1270 atomic_dec(&ip_vs_ftpsvc_counter);
1271 else if (svc->port == 0)
1272 atomic_dec(&ip_vs_nullsvc_counter);
1273
1274 /*
1275 * Free the service if nobody refers to it
1276 */
1277 if (atomic_read(&svc->refcnt) == 0)
1278 kfree(svc);
1279
1280 /* decrease the module use count */
1281 ip_vs_use_count_dec();
1282}
1283
1284/*
1285 * Delete a service from the service list
1286 */
1287static int ip_vs_del_service(struct ip_vs_service *svc)
1288{
1289 if (svc == NULL)
1290 return -EEXIST;
1291
1292 /*
1293 * Unhash it from the service table
1294 */
1295 write_lock_bh(&__ip_vs_svc_lock);
1296
1297 ip_vs_svc_unhash(svc);
1298
1299 /*
1300 * Wait until all the svc users go away.
1301 */
1302 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
1303
1304 __ip_vs_del_service(svc);
1305
1306 write_unlock_bh(&__ip_vs_svc_lock);
1307
1308 return 0;
1309}
1310
1311
1312/*
1313 * Flush all the virtual services
1314 */
1315static int ip_vs_flush(void)
1316{
1317 int idx;
1318 struct ip_vs_service *svc, *nxt;
1319
1320 /*
1321 * Flush the service table hashed by <protocol,addr,port>
1322 */
1323 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1324 list_for_each_entry_safe(svc, nxt, &ip_vs_svc_table[idx], s_list) {
1325 write_lock_bh(&__ip_vs_svc_lock);
1326 ip_vs_svc_unhash(svc);
1327 /*
1328 * Wait until all the svc users go away.
1329 */
1330 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1331 __ip_vs_del_service(svc);
1332 write_unlock_bh(&__ip_vs_svc_lock);
1333 }
1334 }
1335
1336 /*
1337 * Flush the service table hashed by fwmark
1338 */
1339 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1340 list_for_each_entry_safe(svc, nxt,
1341 &ip_vs_svc_fwm_table[idx], f_list) {
1342 write_lock_bh(&__ip_vs_svc_lock);
1343 ip_vs_svc_unhash(svc);
1344 /*
1345 * Wait until all the svc users go away.
1346 */
1347 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1348 __ip_vs_del_service(svc);
1349 write_unlock_bh(&__ip_vs_svc_lock);
1350 }
1351 }
1352
1353 return 0;
1354}
1355
1356
1357/*
1358 * Zero counters in a service or all services
1359 */
1360static int ip_vs_zero_service(struct ip_vs_service *svc)
1361{
1362 struct ip_vs_dest *dest;
1363
1364 write_lock_bh(&__ip_vs_svc_lock);
1365 list_for_each_entry(dest, &svc->destinations, n_list) {
1366 ip_vs_zero_stats(&dest->stats);
1367 }
1368 ip_vs_zero_stats(&svc->stats);
1369 write_unlock_bh(&__ip_vs_svc_lock);
1370 return 0;
1371}
1372
1373static int ip_vs_zero_all(void)
1374{
1375 int idx;
1376 struct ip_vs_service *svc;
1377
1378 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1379 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
1380 ip_vs_zero_service(svc);
1381 }
1382 }
1383
1384 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1385 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
1386 ip_vs_zero_service(svc);
1387 }
1388 }
1389
1390 ip_vs_zero_stats(&ip_vs_stats);
1391 return 0;
1392}
1393
1394
1395static int
1396proc_do_defense_mode(ctl_table *table, int write, struct file * filp,
1397 void __user *buffer, size_t *lenp, loff_t *ppos)
1398{
1399 int *valp = table->data;
1400 int val = *valp;
1401 int rc;
1402
1403 rc = proc_dointvec(table, write, filp, buffer, lenp, ppos);
1404 if (write && (*valp != val)) {
1405 if ((*valp < 0) || (*valp > 3)) {
1406 /* Restore the correct value */
1407 *valp = val;
1408 } else {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001409 update_defense_level();
Linus Torvalds1da177e2005-04-16 15:20:36 -07001410 }
1411 }
1412 return rc;
1413}
1414
1415
1416static int
1417proc_do_sync_threshold(ctl_table *table, int write, struct file *filp,
1418 void __user *buffer, size_t *lenp, loff_t *ppos)
1419{
1420 int *valp = table->data;
1421 int val[2];
1422 int rc;
1423
1424 /* backup the value first */
1425 memcpy(val, valp, sizeof(val));
1426
1427 rc = proc_dointvec(table, write, filp, buffer, lenp, ppos);
1428 if (write && (valp[0] < 0 || valp[1] < 0 || valp[0] >= valp[1])) {
1429 /* Restore the correct value */
1430 memcpy(valp, val, sizeof(val));
1431 }
1432 return rc;
1433}
1434
1435
1436/*
1437 * IPVS sysctl table (under the /proc/sys/net/ipv4/vs/)
1438 */
1439
1440static struct ctl_table vs_vars[] = {
1441 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001442 .procname = "amemthresh",
1443 .data = &sysctl_ip_vs_amemthresh,
1444 .maxlen = sizeof(int),
1445 .mode = 0644,
1446 .proc_handler = &proc_dointvec,
1447 },
1448#ifdef CONFIG_IP_VS_DEBUG
1449 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001450 .procname = "debug_level",
1451 .data = &sysctl_ip_vs_debug_level,
1452 .maxlen = sizeof(int),
1453 .mode = 0644,
1454 .proc_handler = &proc_dointvec,
1455 },
1456#endif
1457 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001458 .procname = "am_droprate",
1459 .data = &sysctl_ip_vs_am_droprate,
1460 .maxlen = sizeof(int),
1461 .mode = 0644,
1462 .proc_handler = &proc_dointvec,
1463 },
1464 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001465 .procname = "drop_entry",
1466 .data = &sysctl_ip_vs_drop_entry,
1467 .maxlen = sizeof(int),
1468 .mode = 0644,
1469 .proc_handler = &proc_do_defense_mode,
1470 },
1471 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001472 .procname = "drop_packet",
1473 .data = &sysctl_ip_vs_drop_packet,
1474 .maxlen = sizeof(int),
1475 .mode = 0644,
1476 .proc_handler = &proc_do_defense_mode,
1477 },
1478 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001479 .procname = "secure_tcp",
1480 .data = &sysctl_ip_vs_secure_tcp,
1481 .maxlen = sizeof(int),
1482 .mode = 0644,
1483 .proc_handler = &proc_do_defense_mode,
1484 },
1485#if 0
1486 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001487 .procname = "timeout_established",
1488 .data = &vs_timeout_table_dos.timeout[IP_VS_S_ESTABLISHED],
1489 .maxlen = sizeof(int),
1490 .mode = 0644,
1491 .proc_handler = &proc_dointvec_jiffies,
1492 },
1493 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001494 .procname = "timeout_synsent",
1495 .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_SENT],
1496 .maxlen = sizeof(int),
1497 .mode = 0644,
1498 .proc_handler = &proc_dointvec_jiffies,
1499 },
1500 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001501 .procname = "timeout_synrecv",
1502 .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_RECV],
1503 .maxlen = sizeof(int),
1504 .mode = 0644,
1505 .proc_handler = &proc_dointvec_jiffies,
1506 },
1507 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001508 .procname = "timeout_finwait",
1509 .data = &vs_timeout_table_dos.timeout[IP_VS_S_FIN_WAIT],
1510 .maxlen = sizeof(int),
1511 .mode = 0644,
1512 .proc_handler = &proc_dointvec_jiffies,
1513 },
1514 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001515 .procname = "timeout_timewait",
1516 .data = &vs_timeout_table_dos.timeout[IP_VS_S_TIME_WAIT],
1517 .maxlen = sizeof(int),
1518 .mode = 0644,
1519 .proc_handler = &proc_dointvec_jiffies,
1520 },
1521 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001522 .procname = "timeout_close",
1523 .data = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE],
1524 .maxlen = sizeof(int),
1525 .mode = 0644,
1526 .proc_handler = &proc_dointvec_jiffies,
1527 },
1528 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001529 .procname = "timeout_closewait",
1530 .data = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE_WAIT],
1531 .maxlen = sizeof(int),
1532 .mode = 0644,
1533 .proc_handler = &proc_dointvec_jiffies,
1534 },
1535 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001536 .procname = "timeout_lastack",
1537 .data = &vs_timeout_table_dos.timeout[IP_VS_S_LAST_ACK],
1538 .maxlen = sizeof(int),
1539 .mode = 0644,
1540 .proc_handler = &proc_dointvec_jiffies,
1541 },
1542 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001543 .procname = "timeout_listen",
1544 .data = &vs_timeout_table_dos.timeout[IP_VS_S_LISTEN],
1545 .maxlen = sizeof(int),
1546 .mode = 0644,
1547 .proc_handler = &proc_dointvec_jiffies,
1548 },
1549 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001550 .procname = "timeout_synack",
1551 .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYNACK],
1552 .maxlen = sizeof(int),
1553 .mode = 0644,
1554 .proc_handler = &proc_dointvec_jiffies,
1555 },
1556 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001557 .procname = "timeout_udp",
1558 .data = &vs_timeout_table_dos.timeout[IP_VS_S_UDP],
1559 .maxlen = sizeof(int),
1560 .mode = 0644,
1561 .proc_handler = &proc_dointvec_jiffies,
1562 },
1563 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001564 .procname = "timeout_icmp",
1565 .data = &vs_timeout_table_dos.timeout[IP_VS_S_ICMP],
1566 .maxlen = sizeof(int),
1567 .mode = 0644,
1568 .proc_handler = &proc_dointvec_jiffies,
1569 },
1570#endif
1571 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001572 .procname = "cache_bypass",
1573 .data = &sysctl_ip_vs_cache_bypass,
1574 .maxlen = sizeof(int),
1575 .mode = 0644,
1576 .proc_handler = &proc_dointvec,
1577 },
1578 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001579 .procname = "expire_nodest_conn",
1580 .data = &sysctl_ip_vs_expire_nodest_conn,
1581 .maxlen = sizeof(int),
1582 .mode = 0644,
1583 .proc_handler = &proc_dointvec,
1584 },
1585 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001586 .procname = "expire_quiescent_template",
1587 .data = &sysctl_ip_vs_expire_quiescent_template,
1588 .maxlen = sizeof(int),
1589 .mode = 0644,
1590 .proc_handler = &proc_dointvec,
1591 },
1592 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001593 .procname = "sync_threshold",
1594 .data = &sysctl_ip_vs_sync_threshold,
1595 .maxlen = sizeof(sysctl_ip_vs_sync_threshold),
1596 .mode = 0644,
1597 .proc_handler = &proc_do_sync_threshold,
1598 },
1599 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001600 .procname = "nat_icmp_send",
1601 .data = &sysctl_ip_vs_nat_icmp_send,
1602 .maxlen = sizeof(int),
1603 .mode = 0644,
1604 .proc_handler = &proc_dointvec,
1605 },
1606 { .ctl_name = 0 }
1607};
1608
Sven Wegener5587da52008-08-10 18:24:40 +00001609const struct ctl_path net_vs_ctl_path[] = {
Pavel Emelyanov90754f82008-01-12 02:33:50 -08001610 { .procname = "net", .ctl_name = CTL_NET, },
1611 { .procname = "ipv4", .ctl_name = NET_IPV4, },
1612 { .procname = "vs", },
1613 { }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001614};
Pavel Emelyanov90754f82008-01-12 02:33:50 -08001615EXPORT_SYMBOL_GPL(net_vs_ctl_path);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001616
1617static struct ctl_table_header * sysctl_header;
1618
1619#ifdef CONFIG_PROC_FS
1620
1621struct ip_vs_iter {
1622 struct list_head *table;
1623 int bucket;
1624};
1625
1626/*
1627 * Write the contents of the VS rule table to a PROCfs file.
1628 * (It is kept just for backward compatibility)
1629 */
1630static inline const char *ip_vs_fwd_name(unsigned flags)
1631{
1632 switch (flags & IP_VS_CONN_F_FWD_MASK) {
1633 case IP_VS_CONN_F_LOCALNODE:
1634 return "Local";
1635 case IP_VS_CONN_F_TUNNEL:
1636 return "Tunnel";
1637 case IP_VS_CONN_F_DROUTE:
1638 return "Route";
1639 default:
1640 return "Masq";
1641 }
1642}
1643
1644
1645/* Get the Nth entry in the two lists */
1646static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos)
1647{
1648 struct ip_vs_iter *iter = seq->private;
1649 int idx;
1650 struct ip_vs_service *svc;
1651
1652 /* look in hash by protocol */
1653 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1654 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
1655 if (pos-- == 0){
1656 iter->table = ip_vs_svc_table;
1657 iter->bucket = idx;
1658 return svc;
1659 }
1660 }
1661 }
1662
1663 /* keep looking in fwmark */
1664 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1665 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
1666 if (pos-- == 0) {
1667 iter->table = ip_vs_svc_fwm_table;
1668 iter->bucket = idx;
1669 return svc;
1670 }
1671 }
1672 }
1673
1674 return NULL;
1675}
1676
1677static void *ip_vs_info_seq_start(struct seq_file *seq, loff_t *pos)
1678{
1679
1680 read_lock_bh(&__ip_vs_svc_lock);
1681 return *pos ? ip_vs_info_array(seq, *pos - 1) : SEQ_START_TOKEN;
1682}
1683
1684
1685static void *ip_vs_info_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1686{
1687 struct list_head *e;
1688 struct ip_vs_iter *iter;
1689 struct ip_vs_service *svc;
1690
1691 ++*pos;
1692 if (v == SEQ_START_TOKEN)
1693 return ip_vs_info_array(seq,0);
1694
1695 svc = v;
1696 iter = seq->private;
1697
1698 if (iter->table == ip_vs_svc_table) {
1699 /* next service in table hashed by protocol */
1700 if ((e = svc->s_list.next) != &ip_vs_svc_table[iter->bucket])
1701 return list_entry(e, struct ip_vs_service, s_list);
1702
1703
1704 while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
1705 list_for_each_entry(svc,&ip_vs_svc_table[iter->bucket],
1706 s_list) {
1707 return svc;
1708 }
1709 }
1710
1711 iter->table = ip_vs_svc_fwm_table;
1712 iter->bucket = -1;
1713 goto scan_fwmark;
1714 }
1715
1716 /* next service in hashed by fwmark */
1717 if ((e = svc->f_list.next) != &ip_vs_svc_fwm_table[iter->bucket])
1718 return list_entry(e, struct ip_vs_service, f_list);
1719
1720 scan_fwmark:
1721 while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
1722 list_for_each_entry(svc, &ip_vs_svc_fwm_table[iter->bucket],
1723 f_list)
1724 return svc;
1725 }
1726
1727 return NULL;
1728}
1729
1730static void ip_vs_info_seq_stop(struct seq_file *seq, void *v)
1731{
1732 read_unlock_bh(&__ip_vs_svc_lock);
1733}
1734
1735
1736static int ip_vs_info_seq_show(struct seq_file *seq, void *v)
1737{
1738 if (v == SEQ_START_TOKEN) {
1739 seq_printf(seq,
1740 "IP Virtual Server version %d.%d.%d (size=%d)\n",
1741 NVERSION(IP_VS_VERSION_CODE), IP_VS_CONN_TAB_SIZE);
1742 seq_puts(seq,
1743 "Prot LocalAddress:Port Scheduler Flags\n");
1744 seq_puts(seq,
1745 " -> RemoteAddress:Port Forward Weight ActiveConn InActConn\n");
1746 } else {
1747 const struct ip_vs_service *svc = v;
1748 const struct ip_vs_iter *iter = seq->private;
1749 const struct ip_vs_dest *dest;
1750
1751 if (iter->table == ip_vs_svc_table)
1752 seq_printf(seq, "%s %08X:%04X %s ",
1753 ip_vs_proto_name(svc->protocol),
1754 ntohl(svc->addr),
1755 ntohs(svc->port),
1756 svc->scheduler->name);
1757 else
1758 seq_printf(seq, "FWM %08X %s ",
1759 svc->fwmark, svc->scheduler->name);
1760
1761 if (svc->flags & IP_VS_SVC_F_PERSISTENT)
1762 seq_printf(seq, "persistent %d %08X\n",
1763 svc->timeout,
1764 ntohl(svc->netmask));
1765 else
1766 seq_putc(seq, '\n');
1767
1768 list_for_each_entry(dest, &svc->destinations, n_list) {
1769 seq_printf(seq,
1770 " -> %08X:%04X %-7s %-6d %-10d %-10d\n",
1771 ntohl(dest->addr), ntohs(dest->port),
1772 ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
1773 atomic_read(&dest->weight),
1774 atomic_read(&dest->activeconns),
1775 atomic_read(&dest->inactconns));
1776 }
1777 }
1778 return 0;
1779}
1780
Philippe De Muyter56b3d972007-07-10 23:07:31 -07001781static const struct seq_operations ip_vs_info_seq_ops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001782 .start = ip_vs_info_seq_start,
1783 .next = ip_vs_info_seq_next,
1784 .stop = ip_vs_info_seq_stop,
1785 .show = ip_vs_info_seq_show,
1786};
1787
1788static int ip_vs_info_open(struct inode *inode, struct file *file)
1789{
Pavel Emelyanovcf7732e2007-10-10 02:29:29 -07001790 return seq_open_private(file, &ip_vs_info_seq_ops,
1791 sizeof(struct ip_vs_iter));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001792}
1793
Arjan van de Ven9a321442007-02-12 00:55:35 -08001794static const struct file_operations ip_vs_info_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001795 .owner = THIS_MODULE,
1796 .open = ip_vs_info_open,
1797 .read = seq_read,
1798 .llseek = seq_lseek,
1799 .release = seq_release_private,
1800};
1801
1802#endif
1803
Sven Wegener519e49e2008-08-10 18:24:41 +00001804struct ip_vs_stats ip_vs_stats = {
1805 .lock = __SPIN_LOCK_UNLOCKED(ip_vs_stats.lock),
1806};
Linus Torvalds1da177e2005-04-16 15:20:36 -07001807
1808#ifdef CONFIG_PROC_FS
1809static int ip_vs_stats_show(struct seq_file *seq, void *v)
1810{
1811
1812/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
1813 seq_puts(seq,
1814 " Total Incoming Outgoing Incoming Outgoing\n");
1815 seq_printf(seq,
1816 " Conns Packets Packets Bytes Bytes\n");
1817
1818 spin_lock_bh(&ip_vs_stats.lock);
1819 seq_printf(seq, "%8X %8X %8X %16LX %16LX\n\n", ip_vs_stats.conns,
1820 ip_vs_stats.inpkts, ip_vs_stats.outpkts,
1821 (unsigned long long) ip_vs_stats.inbytes,
1822 (unsigned long long) ip_vs_stats.outbytes);
1823
1824/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
1825 seq_puts(seq,
1826 " Conns/s Pkts/s Pkts/s Bytes/s Bytes/s\n");
1827 seq_printf(seq,"%8X %8X %8X %16X %16X\n",
1828 ip_vs_stats.cps,
1829 ip_vs_stats.inpps,
1830 ip_vs_stats.outpps,
1831 ip_vs_stats.inbps,
1832 ip_vs_stats.outbps);
1833 spin_unlock_bh(&ip_vs_stats.lock);
1834
1835 return 0;
1836}
1837
1838static int ip_vs_stats_seq_open(struct inode *inode, struct file *file)
1839{
1840 return single_open(file, ip_vs_stats_show, NULL);
1841}
1842
Arjan van de Ven9a321442007-02-12 00:55:35 -08001843static const struct file_operations ip_vs_stats_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001844 .owner = THIS_MODULE,
1845 .open = ip_vs_stats_seq_open,
1846 .read = seq_read,
1847 .llseek = seq_lseek,
1848 .release = single_release,
1849};
1850
1851#endif
1852
1853/*
1854 * Set timeout values for tcp tcpfin udp in the timeout_table.
1855 */
1856static int ip_vs_set_timeout(struct ip_vs_timeout_user *u)
1857{
1858 IP_VS_DBG(2, "Setting timeout tcp:%d tcpfin:%d udp:%d\n",
1859 u->tcp_timeout,
1860 u->tcp_fin_timeout,
1861 u->udp_timeout);
1862
1863#ifdef CONFIG_IP_VS_PROTO_TCP
1864 if (u->tcp_timeout) {
1865 ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_ESTABLISHED]
1866 = u->tcp_timeout * HZ;
1867 }
1868
1869 if (u->tcp_fin_timeout) {
1870 ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_FIN_WAIT]
1871 = u->tcp_fin_timeout * HZ;
1872 }
1873#endif
1874
1875#ifdef CONFIG_IP_VS_PROTO_UDP
1876 if (u->udp_timeout) {
1877 ip_vs_protocol_udp.timeout_table[IP_VS_UDP_S_NORMAL]
1878 = u->udp_timeout * HZ;
1879 }
1880#endif
1881 return 0;
1882}
1883
1884
1885#define SET_CMDID(cmd) (cmd - IP_VS_BASE_CTL)
1886#define SERVICE_ARG_LEN (sizeof(struct ip_vs_service_user))
1887#define SVCDEST_ARG_LEN (sizeof(struct ip_vs_service_user) + \
1888 sizeof(struct ip_vs_dest_user))
1889#define TIMEOUT_ARG_LEN (sizeof(struct ip_vs_timeout_user))
1890#define DAEMON_ARG_LEN (sizeof(struct ip_vs_daemon_user))
1891#define MAX_ARG_LEN SVCDEST_ARG_LEN
1892
Arjan van de Ven9b5b5cf2005-11-29 16:21:38 -08001893static const unsigned char set_arglen[SET_CMDID(IP_VS_SO_SET_MAX)+1] = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001894 [SET_CMDID(IP_VS_SO_SET_ADD)] = SERVICE_ARG_LEN,
1895 [SET_CMDID(IP_VS_SO_SET_EDIT)] = SERVICE_ARG_LEN,
1896 [SET_CMDID(IP_VS_SO_SET_DEL)] = SERVICE_ARG_LEN,
1897 [SET_CMDID(IP_VS_SO_SET_FLUSH)] = 0,
1898 [SET_CMDID(IP_VS_SO_SET_ADDDEST)] = SVCDEST_ARG_LEN,
1899 [SET_CMDID(IP_VS_SO_SET_DELDEST)] = SVCDEST_ARG_LEN,
1900 [SET_CMDID(IP_VS_SO_SET_EDITDEST)] = SVCDEST_ARG_LEN,
1901 [SET_CMDID(IP_VS_SO_SET_TIMEOUT)] = TIMEOUT_ARG_LEN,
1902 [SET_CMDID(IP_VS_SO_SET_STARTDAEMON)] = DAEMON_ARG_LEN,
1903 [SET_CMDID(IP_VS_SO_SET_STOPDAEMON)] = DAEMON_ARG_LEN,
1904 [SET_CMDID(IP_VS_SO_SET_ZERO)] = SERVICE_ARG_LEN,
1905};
1906
1907static int
1908do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
1909{
1910 int ret;
1911 unsigned char arg[MAX_ARG_LEN];
1912 struct ip_vs_service_user *usvc;
1913 struct ip_vs_service *svc;
1914 struct ip_vs_dest_user *udest;
1915
1916 if (!capable(CAP_NET_ADMIN))
1917 return -EPERM;
1918
1919 if (len != set_arglen[SET_CMDID(cmd)]) {
1920 IP_VS_ERR("set_ctl: len %u != %u\n",
1921 len, set_arglen[SET_CMDID(cmd)]);
1922 return -EINVAL;
1923 }
1924
1925 if (copy_from_user(arg, user, len) != 0)
1926 return -EFAULT;
1927
1928 /* increase the module use count */
1929 ip_vs_use_count_inc();
1930
Ingo Molnar14cc3e22006-03-26 01:37:14 -08001931 if (mutex_lock_interruptible(&__ip_vs_mutex)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001932 ret = -ERESTARTSYS;
1933 goto out_dec;
1934 }
1935
1936 if (cmd == IP_VS_SO_SET_FLUSH) {
1937 /* Flush the virtual service */
1938 ret = ip_vs_flush();
1939 goto out_unlock;
1940 } else if (cmd == IP_VS_SO_SET_TIMEOUT) {
1941 /* Set timeout values for (tcp tcpfin udp) */
1942 ret = ip_vs_set_timeout((struct ip_vs_timeout_user *)arg);
1943 goto out_unlock;
1944 } else if (cmd == IP_VS_SO_SET_STARTDAEMON) {
1945 struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
1946 ret = start_sync_thread(dm->state, dm->mcast_ifn, dm->syncid);
1947 goto out_unlock;
1948 } else if (cmd == IP_VS_SO_SET_STOPDAEMON) {
1949 struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
1950 ret = stop_sync_thread(dm->state);
1951 goto out_unlock;
1952 }
1953
1954 usvc = (struct ip_vs_service_user *)arg;
1955 udest = (struct ip_vs_dest_user *)(usvc + 1);
1956
1957 if (cmd == IP_VS_SO_SET_ZERO) {
1958 /* if no service address is set, zero counters in all */
1959 if (!usvc->fwmark && !usvc->addr && !usvc->port) {
1960 ret = ip_vs_zero_all();
1961 goto out_unlock;
1962 }
1963 }
1964
1965 /* Check for valid protocol: TCP or UDP, even for fwmark!=0 */
1966 if (usvc->protocol!=IPPROTO_TCP && usvc->protocol!=IPPROTO_UDP) {
1967 IP_VS_ERR("set_ctl: invalid protocol: %d %d.%d.%d.%d:%d %s\n",
1968 usvc->protocol, NIPQUAD(usvc->addr),
1969 ntohs(usvc->port), usvc->sched_name);
1970 ret = -EFAULT;
1971 goto out_unlock;
1972 }
1973
1974 /* Lookup the exact service by <protocol, addr, port> or fwmark */
1975 if (usvc->fwmark == 0)
1976 svc = __ip_vs_service_get(usvc->protocol,
1977 usvc->addr, usvc->port);
1978 else
1979 svc = __ip_vs_svc_fwm_get(usvc->fwmark);
1980
1981 if (cmd != IP_VS_SO_SET_ADD
1982 && (svc == NULL || svc->protocol != usvc->protocol)) {
1983 ret = -ESRCH;
1984 goto out_unlock;
1985 }
1986
1987 switch (cmd) {
1988 case IP_VS_SO_SET_ADD:
1989 if (svc != NULL)
1990 ret = -EEXIST;
1991 else
1992 ret = ip_vs_add_service(usvc, &svc);
1993 break;
1994 case IP_VS_SO_SET_EDIT:
1995 ret = ip_vs_edit_service(svc, usvc);
1996 break;
1997 case IP_VS_SO_SET_DEL:
1998 ret = ip_vs_del_service(svc);
1999 if (!ret)
2000 goto out_unlock;
2001 break;
2002 case IP_VS_SO_SET_ZERO:
2003 ret = ip_vs_zero_service(svc);
2004 break;
2005 case IP_VS_SO_SET_ADDDEST:
2006 ret = ip_vs_add_dest(svc, udest);
2007 break;
2008 case IP_VS_SO_SET_EDITDEST:
2009 ret = ip_vs_edit_dest(svc, udest);
2010 break;
2011 case IP_VS_SO_SET_DELDEST:
2012 ret = ip_vs_del_dest(svc, udest);
2013 break;
2014 default:
2015 ret = -EINVAL;
2016 }
2017
2018 if (svc)
2019 ip_vs_service_put(svc);
2020
2021 out_unlock:
Ingo Molnar14cc3e22006-03-26 01:37:14 -08002022 mutex_unlock(&__ip_vs_mutex);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002023 out_dec:
2024 /* decrease the module use count */
2025 ip_vs_use_count_dec();
2026
2027 return ret;
2028}
2029
2030
2031static void
2032ip_vs_copy_stats(struct ip_vs_stats_user *dst, struct ip_vs_stats *src)
2033{
2034 spin_lock_bh(&src->lock);
2035 memcpy(dst, src, (char*)&src->lock - (char*)src);
2036 spin_unlock_bh(&src->lock);
2037}
2038
2039static void
2040ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src)
2041{
2042 dst->protocol = src->protocol;
2043 dst->addr = src->addr;
2044 dst->port = src->port;
2045 dst->fwmark = src->fwmark;
pageexec4da62fc2005-06-26 16:00:19 -07002046 strlcpy(dst->sched_name, src->scheduler->name, sizeof(dst->sched_name));
Linus Torvalds1da177e2005-04-16 15:20:36 -07002047 dst->flags = src->flags;
2048 dst->timeout = src->timeout / HZ;
2049 dst->netmask = src->netmask;
2050 dst->num_dests = src->num_dests;
2051 ip_vs_copy_stats(&dst->stats, &src->stats);
2052}
2053
2054static inline int
2055__ip_vs_get_service_entries(const struct ip_vs_get_services *get,
2056 struct ip_vs_get_services __user *uptr)
2057{
2058 int idx, count=0;
2059 struct ip_vs_service *svc;
2060 struct ip_vs_service_entry entry;
2061 int ret = 0;
2062
2063 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2064 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
2065 if (count >= get->num_services)
2066 goto out;
pageexec4da62fc2005-06-26 16:00:19 -07002067 memset(&entry, 0, sizeof(entry));
Linus Torvalds1da177e2005-04-16 15:20:36 -07002068 ip_vs_copy_service(&entry, svc);
2069 if (copy_to_user(&uptr->entrytable[count],
2070 &entry, sizeof(entry))) {
2071 ret = -EFAULT;
2072 goto out;
2073 }
2074 count++;
2075 }
2076 }
2077
2078 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2079 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
2080 if (count >= get->num_services)
2081 goto out;
pageexec4da62fc2005-06-26 16:00:19 -07002082 memset(&entry, 0, sizeof(entry));
Linus Torvalds1da177e2005-04-16 15:20:36 -07002083 ip_vs_copy_service(&entry, svc);
2084 if (copy_to_user(&uptr->entrytable[count],
2085 &entry, sizeof(entry))) {
2086 ret = -EFAULT;
2087 goto out;
2088 }
2089 count++;
2090 }
2091 }
2092 out:
2093 return ret;
2094}
2095
2096static inline int
2097__ip_vs_get_dest_entries(const struct ip_vs_get_dests *get,
2098 struct ip_vs_get_dests __user *uptr)
2099{
2100 struct ip_vs_service *svc;
2101 int ret = 0;
2102
2103 if (get->fwmark)
2104 svc = __ip_vs_svc_fwm_get(get->fwmark);
2105 else
2106 svc = __ip_vs_service_get(get->protocol,
2107 get->addr, get->port);
2108 if (svc) {
2109 int count = 0;
2110 struct ip_vs_dest *dest;
2111 struct ip_vs_dest_entry entry;
2112
2113 list_for_each_entry(dest, &svc->destinations, n_list) {
2114 if (count >= get->num_dests)
2115 break;
2116
2117 entry.addr = dest->addr;
2118 entry.port = dest->port;
2119 entry.conn_flags = atomic_read(&dest->conn_flags);
2120 entry.weight = atomic_read(&dest->weight);
2121 entry.u_threshold = dest->u_threshold;
2122 entry.l_threshold = dest->l_threshold;
2123 entry.activeconns = atomic_read(&dest->activeconns);
2124 entry.inactconns = atomic_read(&dest->inactconns);
2125 entry.persistconns = atomic_read(&dest->persistconns);
2126 ip_vs_copy_stats(&entry.stats, &dest->stats);
2127 if (copy_to_user(&uptr->entrytable[count],
2128 &entry, sizeof(entry))) {
2129 ret = -EFAULT;
2130 break;
2131 }
2132 count++;
2133 }
2134 ip_vs_service_put(svc);
2135 } else
2136 ret = -ESRCH;
2137 return ret;
2138}
2139
2140static inline void
2141__ip_vs_get_timeouts(struct ip_vs_timeout_user *u)
2142{
2143#ifdef CONFIG_IP_VS_PROTO_TCP
2144 u->tcp_timeout =
2145 ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_ESTABLISHED] / HZ;
2146 u->tcp_fin_timeout =
2147 ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_FIN_WAIT] / HZ;
2148#endif
2149#ifdef CONFIG_IP_VS_PROTO_UDP
2150 u->udp_timeout =
2151 ip_vs_protocol_udp.timeout_table[IP_VS_UDP_S_NORMAL] / HZ;
2152#endif
2153}
2154
2155
2156#define GET_CMDID(cmd) (cmd - IP_VS_BASE_CTL)
2157#define GET_INFO_ARG_LEN (sizeof(struct ip_vs_getinfo))
2158#define GET_SERVICES_ARG_LEN (sizeof(struct ip_vs_get_services))
2159#define GET_SERVICE_ARG_LEN (sizeof(struct ip_vs_service_entry))
2160#define GET_DESTS_ARG_LEN (sizeof(struct ip_vs_get_dests))
2161#define GET_TIMEOUT_ARG_LEN (sizeof(struct ip_vs_timeout_user))
2162#define GET_DAEMON_ARG_LEN (sizeof(struct ip_vs_daemon_user) * 2)
2163
Arjan van de Ven9b5b5cf2005-11-29 16:21:38 -08002164static const unsigned char get_arglen[GET_CMDID(IP_VS_SO_GET_MAX)+1] = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002165 [GET_CMDID(IP_VS_SO_GET_VERSION)] = 64,
2166 [GET_CMDID(IP_VS_SO_GET_INFO)] = GET_INFO_ARG_LEN,
2167 [GET_CMDID(IP_VS_SO_GET_SERVICES)] = GET_SERVICES_ARG_LEN,
2168 [GET_CMDID(IP_VS_SO_GET_SERVICE)] = GET_SERVICE_ARG_LEN,
2169 [GET_CMDID(IP_VS_SO_GET_DESTS)] = GET_DESTS_ARG_LEN,
2170 [GET_CMDID(IP_VS_SO_GET_TIMEOUT)] = GET_TIMEOUT_ARG_LEN,
2171 [GET_CMDID(IP_VS_SO_GET_DAEMON)] = GET_DAEMON_ARG_LEN,
2172};
2173
2174static int
2175do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
2176{
2177 unsigned char arg[128];
2178 int ret = 0;
2179
2180 if (!capable(CAP_NET_ADMIN))
2181 return -EPERM;
2182
2183 if (*len < get_arglen[GET_CMDID(cmd)]) {
2184 IP_VS_ERR("get_ctl: len %u < %u\n",
2185 *len, get_arglen[GET_CMDID(cmd)]);
2186 return -EINVAL;
2187 }
2188
2189 if (copy_from_user(arg, user, get_arglen[GET_CMDID(cmd)]) != 0)
2190 return -EFAULT;
2191
Ingo Molnar14cc3e22006-03-26 01:37:14 -08002192 if (mutex_lock_interruptible(&__ip_vs_mutex))
Linus Torvalds1da177e2005-04-16 15:20:36 -07002193 return -ERESTARTSYS;
2194
2195 switch (cmd) {
2196 case IP_VS_SO_GET_VERSION:
2197 {
2198 char buf[64];
2199
2200 sprintf(buf, "IP Virtual Server version %d.%d.%d (size=%d)",
2201 NVERSION(IP_VS_VERSION_CODE), IP_VS_CONN_TAB_SIZE);
2202 if (copy_to_user(user, buf, strlen(buf)+1) != 0) {
2203 ret = -EFAULT;
2204 goto out;
2205 }
2206 *len = strlen(buf)+1;
2207 }
2208 break;
2209
2210 case IP_VS_SO_GET_INFO:
2211 {
2212 struct ip_vs_getinfo info;
2213 info.version = IP_VS_VERSION_CODE;
2214 info.size = IP_VS_CONN_TAB_SIZE;
2215 info.num_services = ip_vs_num_services;
2216 if (copy_to_user(user, &info, sizeof(info)) != 0)
2217 ret = -EFAULT;
2218 }
2219 break;
2220
2221 case IP_VS_SO_GET_SERVICES:
2222 {
2223 struct ip_vs_get_services *get;
2224 int size;
2225
2226 get = (struct ip_vs_get_services *)arg;
2227 size = sizeof(*get) +
2228 sizeof(struct ip_vs_service_entry) * get->num_services;
2229 if (*len != size) {
2230 IP_VS_ERR("length: %u != %u\n", *len, size);
2231 ret = -EINVAL;
2232 goto out;
2233 }
2234 ret = __ip_vs_get_service_entries(get, user);
2235 }
2236 break;
2237
2238 case IP_VS_SO_GET_SERVICE:
2239 {
2240 struct ip_vs_service_entry *entry;
2241 struct ip_vs_service *svc;
2242
2243 entry = (struct ip_vs_service_entry *)arg;
2244 if (entry->fwmark)
2245 svc = __ip_vs_svc_fwm_get(entry->fwmark);
2246 else
2247 svc = __ip_vs_service_get(entry->protocol,
2248 entry->addr, entry->port);
2249 if (svc) {
2250 ip_vs_copy_service(entry, svc);
2251 if (copy_to_user(user, entry, sizeof(*entry)) != 0)
2252 ret = -EFAULT;
2253 ip_vs_service_put(svc);
2254 } else
2255 ret = -ESRCH;
2256 }
2257 break;
2258
2259 case IP_VS_SO_GET_DESTS:
2260 {
2261 struct ip_vs_get_dests *get;
2262 int size;
2263
2264 get = (struct ip_vs_get_dests *)arg;
2265 size = sizeof(*get) +
2266 sizeof(struct ip_vs_dest_entry) * get->num_dests;
2267 if (*len != size) {
2268 IP_VS_ERR("length: %u != %u\n", *len, size);
2269 ret = -EINVAL;
2270 goto out;
2271 }
2272 ret = __ip_vs_get_dest_entries(get, user);
2273 }
2274 break;
2275
2276 case IP_VS_SO_GET_TIMEOUT:
2277 {
2278 struct ip_vs_timeout_user t;
2279
2280 __ip_vs_get_timeouts(&t);
2281 if (copy_to_user(user, &t, sizeof(t)) != 0)
2282 ret = -EFAULT;
2283 }
2284 break;
2285
2286 case IP_VS_SO_GET_DAEMON:
2287 {
2288 struct ip_vs_daemon_user d[2];
2289
2290 memset(&d, 0, sizeof(d));
2291 if (ip_vs_sync_state & IP_VS_STATE_MASTER) {
2292 d[0].state = IP_VS_STATE_MASTER;
pageexec4da62fc2005-06-26 16:00:19 -07002293 strlcpy(d[0].mcast_ifn, ip_vs_master_mcast_ifn, sizeof(d[0].mcast_ifn));
Linus Torvalds1da177e2005-04-16 15:20:36 -07002294 d[0].syncid = ip_vs_master_syncid;
2295 }
2296 if (ip_vs_sync_state & IP_VS_STATE_BACKUP) {
2297 d[1].state = IP_VS_STATE_BACKUP;
pageexec4da62fc2005-06-26 16:00:19 -07002298 strlcpy(d[1].mcast_ifn, ip_vs_backup_mcast_ifn, sizeof(d[1].mcast_ifn));
Linus Torvalds1da177e2005-04-16 15:20:36 -07002299 d[1].syncid = ip_vs_backup_syncid;
2300 }
2301 if (copy_to_user(user, &d, sizeof(d)) != 0)
2302 ret = -EFAULT;
2303 }
2304 break;
2305
2306 default:
2307 ret = -EINVAL;
2308 }
2309
2310 out:
Ingo Molnar14cc3e22006-03-26 01:37:14 -08002311 mutex_unlock(&__ip_vs_mutex);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002312 return ret;
2313}
2314
2315
2316static struct nf_sockopt_ops ip_vs_sockopts = {
2317 .pf = PF_INET,
2318 .set_optmin = IP_VS_BASE_CTL,
2319 .set_optmax = IP_VS_SO_SET_MAX+1,
2320 .set = do_ip_vs_set_ctl,
2321 .get_optmin = IP_VS_BASE_CTL,
2322 .get_optmax = IP_VS_SO_GET_MAX+1,
2323 .get = do_ip_vs_get_ctl,
Neil Horman16fcec32007-09-11 11:28:26 +02002324 .owner = THIS_MODULE,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002325};
2326
Julius Volz9a812192008-08-14 14:08:44 +02002327/*
2328 * Generic Netlink interface
2329 */
2330
2331/* IPVS genetlink family */
2332static struct genl_family ip_vs_genl_family = {
2333 .id = GENL_ID_GENERATE,
2334 .hdrsize = 0,
2335 .name = IPVS_GENL_NAME,
2336 .version = IPVS_GENL_VERSION,
2337 .maxattr = IPVS_CMD_MAX,
2338};
2339
2340/* Policy used for first-level command attributes */
2341static const struct nla_policy ip_vs_cmd_policy[IPVS_CMD_ATTR_MAX + 1] = {
2342 [IPVS_CMD_ATTR_SERVICE] = { .type = NLA_NESTED },
2343 [IPVS_CMD_ATTR_DEST] = { .type = NLA_NESTED },
2344 [IPVS_CMD_ATTR_DAEMON] = { .type = NLA_NESTED },
2345 [IPVS_CMD_ATTR_TIMEOUT_TCP] = { .type = NLA_U32 },
2346 [IPVS_CMD_ATTR_TIMEOUT_TCP_FIN] = { .type = NLA_U32 },
2347 [IPVS_CMD_ATTR_TIMEOUT_UDP] = { .type = NLA_U32 },
2348};
2349
2350/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DAEMON */
2351static const struct nla_policy ip_vs_daemon_policy[IPVS_DAEMON_ATTR_MAX + 1] = {
2352 [IPVS_DAEMON_ATTR_STATE] = { .type = NLA_U32 },
2353 [IPVS_DAEMON_ATTR_MCAST_IFN] = { .type = NLA_NUL_STRING,
2354 .len = IP_VS_IFNAME_MAXLEN },
2355 [IPVS_DAEMON_ATTR_SYNC_ID] = { .type = NLA_U32 },
2356};
2357
2358/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_SERVICE */
2359static const struct nla_policy ip_vs_svc_policy[IPVS_SVC_ATTR_MAX + 1] = {
2360 [IPVS_SVC_ATTR_AF] = { .type = NLA_U16 },
2361 [IPVS_SVC_ATTR_PROTOCOL] = { .type = NLA_U16 },
2362 [IPVS_SVC_ATTR_ADDR] = { .type = NLA_BINARY,
2363 .len = sizeof(union nf_inet_addr) },
2364 [IPVS_SVC_ATTR_PORT] = { .type = NLA_U16 },
2365 [IPVS_SVC_ATTR_FWMARK] = { .type = NLA_U32 },
2366 [IPVS_SVC_ATTR_SCHED_NAME] = { .type = NLA_NUL_STRING,
2367 .len = IP_VS_SCHEDNAME_MAXLEN },
2368 [IPVS_SVC_ATTR_FLAGS] = { .type = NLA_BINARY,
2369 .len = sizeof(struct ip_vs_flags) },
2370 [IPVS_SVC_ATTR_TIMEOUT] = { .type = NLA_U32 },
2371 [IPVS_SVC_ATTR_NETMASK] = { .type = NLA_U32 },
2372 [IPVS_SVC_ATTR_STATS] = { .type = NLA_NESTED },
2373};
2374
2375/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DEST */
2376static const struct nla_policy ip_vs_dest_policy[IPVS_DEST_ATTR_MAX + 1] = {
2377 [IPVS_DEST_ATTR_ADDR] = { .type = NLA_BINARY,
2378 .len = sizeof(union nf_inet_addr) },
2379 [IPVS_DEST_ATTR_PORT] = { .type = NLA_U16 },
2380 [IPVS_DEST_ATTR_FWD_METHOD] = { .type = NLA_U32 },
2381 [IPVS_DEST_ATTR_WEIGHT] = { .type = NLA_U32 },
2382 [IPVS_DEST_ATTR_U_THRESH] = { .type = NLA_U32 },
2383 [IPVS_DEST_ATTR_L_THRESH] = { .type = NLA_U32 },
2384 [IPVS_DEST_ATTR_ACTIVE_CONNS] = { .type = NLA_U32 },
2385 [IPVS_DEST_ATTR_INACT_CONNS] = { .type = NLA_U32 },
2386 [IPVS_DEST_ATTR_PERSIST_CONNS] = { .type = NLA_U32 },
2387 [IPVS_DEST_ATTR_STATS] = { .type = NLA_NESTED },
2388};
2389
2390static int ip_vs_genl_fill_stats(struct sk_buff *skb, int container_type,
2391 struct ip_vs_stats *stats)
2392{
2393 struct nlattr *nl_stats = nla_nest_start(skb, container_type);
2394 if (!nl_stats)
2395 return -EMSGSIZE;
2396
2397 spin_lock_bh(&stats->lock);
2398
2399 NLA_PUT_U32(skb, IPVS_STATS_ATTR_CONNS, stats->conns);
2400 NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPKTS, stats->inpkts);
2401 NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPKTS, stats->outpkts);
2402 NLA_PUT_U64(skb, IPVS_STATS_ATTR_INBYTES, stats->inbytes);
2403 NLA_PUT_U64(skb, IPVS_STATS_ATTR_OUTBYTES, stats->outbytes);
2404 NLA_PUT_U32(skb, IPVS_STATS_ATTR_CPS, stats->cps);
2405 NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPPS, stats->inpps);
2406 NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPPS, stats->outpps);
2407 NLA_PUT_U32(skb, IPVS_STATS_ATTR_INBPS, stats->inbps);
2408 NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTBPS, stats->outbps);
2409
2410 spin_unlock_bh(&stats->lock);
2411
2412 nla_nest_end(skb, nl_stats);
2413
2414 return 0;
2415
2416nla_put_failure:
2417 spin_unlock_bh(&stats->lock);
2418 nla_nest_cancel(skb, nl_stats);
2419 return -EMSGSIZE;
2420}
2421
2422static int ip_vs_genl_fill_service(struct sk_buff *skb,
2423 struct ip_vs_service *svc)
2424{
2425 struct nlattr *nl_service;
2426 struct ip_vs_flags flags = { .flags = svc->flags,
2427 .mask = ~0 };
2428
2429 nl_service = nla_nest_start(skb, IPVS_CMD_ATTR_SERVICE);
2430 if (!nl_service)
2431 return -EMSGSIZE;
2432
2433 NLA_PUT_U16(skb, IPVS_SVC_ATTR_AF, AF_INET);
2434
2435 if (svc->fwmark) {
2436 NLA_PUT_U32(skb, IPVS_SVC_ATTR_FWMARK, svc->fwmark);
2437 } else {
2438 NLA_PUT_U16(skb, IPVS_SVC_ATTR_PROTOCOL, svc->protocol);
2439 NLA_PUT(skb, IPVS_SVC_ATTR_ADDR, sizeof(svc->addr), &svc->addr);
2440 NLA_PUT_U16(skb, IPVS_SVC_ATTR_PORT, svc->port);
2441 }
2442
2443 NLA_PUT_STRING(skb, IPVS_SVC_ATTR_SCHED_NAME, svc->scheduler->name);
2444 NLA_PUT(skb, IPVS_SVC_ATTR_FLAGS, sizeof(flags), &flags);
2445 NLA_PUT_U32(skb, IPVS_SVC_ATTR_TIMEOUT, svc->timeout / HZ);
2446 NLA_PUT_U32(skb, IPVS_SVC_ATTR_NETMASK, svc->netmask);
2447
2448 if (ip_vs_genl_fill_stats(skb, IPVS_SVC_ATTR_STATS, &svc->stats))
2449 goto nla_put_failure;
2450
2451 nla_nest_end(skb, nl_service);
2452
2453 return 0;
2454
2455nla_put_failure:
2456 nla_nest_cancel(skb, nl_service);
2457 return -EMSGSIZE;
2458}
2459
2460static int ip_vs_genl_dump_service(struct sk_buff *skb,
2461 struct ip_vs_service *svc,
2462 struct netlink_callback *cb)
2463{
2464 void *hdr;
2465
2466 hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
2467 &ip_vs_genl_family, NLM_F_MULTI,
2468 IPVS_CMD_NEW_SERVICE);
2469 if (!hdr)
2470 return -EMSGSIZE;
2471
2472 if (ip_vs_genl_fill_service(skb, svc) < 0)
2473 goto nla_put_failure;
2474
2475 return genlmsg_end(skb, hdr);
2476
2477nla_put_failure:
2478 genlmsg_cancel(skb, hdr);
2479 return -EMSGSIZE;
2480}
2481
2482static int ip_vs_genl_dump_services(struct sk_buff *skb,
2483 struct netlink_callback *cb)
2484{
2485 int idx = 0, i;
2486 int start = cb->args[0];
2487 struct ip_vs_service *svc;
2488
2489 mutex_lock(&__ip_vs_mutex);
2490 for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
2491 list_for_each_entry(svc, &ip_vs_svc_table[i], s_list) {
2492 if (++idx <= start)
2493 continue;
2494 if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
2495 idx--;
2496 goto nla_put_failure;
2497 }
2498 }
2499 }
2500
2501 for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
2502 list_for_each_entry(svc, &ip_vs_svc_fwm_table[i], f_list) {
2503 if (++idx <= start)
2504 continue;
2505 if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
2506 idx--;
2507 goto nla_put_failure;
2508 }
2509 }
2510 }
2511
2512nla_put_failure:
2513 mutex_unlock(&__ip_vs_mutex);
2514 cb->args[0] = idx;
2515
2516 return skb->len;
2517}
2518
2519static int ip_vs_genl_parse_service(struct ip_vs_service_user *usvc,
2520 struct nlattr *nla, int full_entry)
2521{
2522 struct nlattr *attrs[IPVS_SVC_ATTR_MAX + 1];
2523 struct nlattr *nla_af, *nla_port, *nla_fwmark, *nla_protocol, *nla_addr;
2524
2525 /* Parse mandatory identifying service fields first */
2526 if (nla == NULL ||
2527 nla_parse_nested(attrs, IPVS_SVC_ATTR_MAX, nla, ip_vs_svc_policy))
2528 return -EINVAL;
2529
2530 nla_af = attrs[IPVS_SVC_ATTR_AF];
2531 nla_protocol = attrs[IPVS_SVC_ATTR_PROTOCOL];
2532 nla_addr = attrs[IPVS_SVC_ATTR_ADDR];
2533 nla_port = attrs[IPVS_SVC_ATTR_PORT];
2534 nla_fwmark = attrs[IPVS_SVC_ATTR_FWMARK];
2535
2536 if (!(nla_af && (nla_fwmark || (nla_port && nla_protocol && nla_addr))))
2537 return -EINVAL;
2538
2539 /* For now, only support IPv4 */
2540 if (nla_get_u16(nla_af) != AF_INET)
2541 return -EAFNOSUPPORT;
2542
2543 if (nla_fwmark) {
2544 usvc->protocol = IPPROTO_TCP;
2545 usvc->fwmark = nla_get_u32(nla_fwmark);
2546 } else {
2547 usvc->protocol = nla_get_u16(nla_protocol);
2548 nla_memcpy(&usvc->addr, nla_addr, sizeof(usvc->addr));
2549 usvc->port = nla_get_u16(nla_port);
2550 usvc->fwmark = 0;
2551 }
2552
2553 /* If a full entry was requested, check for the additional fields */
2554 if (full_entry) {
2555 struct nlattr *nla_sched, *nla_flags, *nla_timeout,
2556 *nla_netmask;
2557 struct ip_vs_flags flags;
2558 struct ip_vs_service *svc;
2559
2560 nla_sched = attrs[IPVS_SVC_ATTR_SCHED_NAME];
2561 nla_flags = attrs[IPVS_SVC_ATTR_FLAGS];
2562 nla_timeout = attrs[IPVS_SVC_ATTR_TIMEOUT];
2563 nla_netmask = attrs[IPVS_SVC_ATTR_NETMASK];
2564
2565 if (!(nla_sched && nla_flags && nla_timeout && nla_netmask))
2566 return -EINVAL;
2567
2568 nla_memcpy(&flags, nla_flags, sizeof(flags));
2569
2570 /* prefill flags from service if it already exists */
2571 if (usvc->fwmark)
2572 svc = __ip_vs_svc_fwm_get(usvc->fwmark);
2573 else
2574 svc = __ip_vs_service_get(usvc->protocol, usvc->addr,
2575 usvc->port);
2576 if (svc) {
2577 usvc->flags = svc->flags;
2578 ip_vs_service_put(svc);
2579 } else
2580 usvc->flags = 0;
2581
2582 /* set new flags from userland */
2583 usvc->flags = (usvc->flags & ~flags.mask) |
2584 (flags.flags & flags.mask);
2585
2586 strlcpy(usvc->sched_name, nla_data(nla_sched),
2587 sizeof(usvc->sched_name));
2588 usvc->timeout = nla_get_u32(nla_timeout);
2589 usvc->netmask = nla_get_u32(nla_netmask);
2590 }
2591
2592 return 0;
2593}
2594
2595static struct ip_vs_service *ip_vs_genl_find_service(struct nlattr *nla)
2596{
2597 struct ip_vs_service_user usvc;
2598 int ret;
2599
2600 ret = ip_vs_genl_parse_service(&usvc, nla, 0);
2601 if (ret)
2602 return ERR_PTR(ret);
2603
2604 if (usvc.fwmark)
2605 return __ip_vs_svc_fwm_get(usvc.fwmark);
2606 else
2607 return __ip_vs_service_get(usvc.protocol, usvc.addr,
2608 usvc.port);
2609}
2610
2611static int ip_vs_genl_fill_dest(struct sk_buff *skb, struct ip_vs_dest *dest)
2612{
2613 struct nlattr *nl_dest;
2614
2615 nl_dest = nla_nest_start(skb, IPVS_CMD_ATTR_DEST);
2616 if (!nl_dest)
2617 return -EMSGSIZE;
2618
2619 NLA_PUT(skb, IPVS_DEST_ATTR_ADDR, sizeof(dest->addr), &dest->addr);
2620 NLA_PUT_U16(skb, IPVS_DEST_ATTR_PORT, dest->port);
2621
2622 NLA_PUT_U32(skb, IPVS_DEST_ATTR_FWD_METHOD,
2623 atomic_read(&dest->conn_flags) & IP_VS_CONN_F_FWD_MASK);
2624 NLA_PUT_U32(skb, IPVS_DEST_ATTR_WEIGHT, atomic_read(&dest->weight));
2625 NLA_PUT_U32(skb, IPVS_DEST_ATTR_U_THRESH, dest->u_threshold);
2626 NLA_PUT_U32(skb, IPVS_DEST_ATTR_L_THRESH, dest->l_threshold);
2627 NLA_PUT_U32(skb, IPVS_DEST_ATTR_ACTIVE_CONNS,
2628 atomic_read(&dest->activeconns));
2629 NLA_PUT_U32(skb, IPVS_DEST_ATTR_INACT_CONNS,
2630 atomic_read(&dest->inactconns));
2631 NLA_PUT_U32(skb, IPVS_DEST_ATTR_PERSIST_CONNS,
2632 atomic_read(&dest->persistconns));
2633
2634 if (ip_vs_genl_fill_stats(skb, IPVS_DEST_ATTR_STATS, &dest->stats))
2635 goto nla_put_failure;
2636
2637 nla_nest_end(skb, nl_dest);
2638
2639 return 0;
2640
2641nla_put_failure:
2642 nla_nest_cancel(skb, nl_dest);
2643 return -EMSGSIZE;
2644}
2645
2646static int ip_vs_genl_dump_dest(struct sk_buff *skb, struct ip_vs_dest *dest,
2647 struct netlink_callback *cb)
2648{
2649 void *hdr;
2650
2651 hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
2652 &ip_vs_genl_family, NLM_F_MULTI,
2653 IPVS_CMD_NEW_DEST);
2654 if (!hdr)
2655 return -EMSGSIZE;
2656
2657 if (ip_vs_genl_fill_dest(skb, dest) < 0)
2658 goto nla_put_failure;
2659
2660 return genlmsg_end(skb, hdr);
2661
2662nla_put_failure:
2663 genlmsg_cancel(skb, hdr);
2664 return -EMSGSIZE;
2665}
2666
2667static int ip_vs_genl_dump_dests(struct sk_buff *skb,
2668 struct netlink_callback *cb)
2669{
2670 int idx = 0;
2671 int start = cb->args[0];
2672 struct ip_vs_service *svc;
2673 struct ip_vs_dest *dest;
2674 struct nlattr *attrs[IPVS_CMD_ATTR_MAX + 1];
2675
2676 mutex_lock(&__ip_vs_mutex);
2677
2678 /* Try to find the service for which to dump destinations */
2679 if (nlmsg_parse(cb->nlh, GENL_HDRLEN, attrs,
2680 IPVS_CMD_ATTR_MAX, ip_vs_cmd_policy))
2681 goto out_err;
2682
2683 svc = ip_vs_genl_find_service(attrs[IPVS_CMD_ATTR_SERVICE]);
2684 if (IS_ERR(svc) || svc == NULL)
2685 goto out_err;
2686
2687 /* Dump the destinations */
2688 list_for_each_entry(dest, &svc->destinations, n_list) {
2689 if (++idx <= start)
2690 continue;
2691 if (ip_vs_genl_dump_dest(skb, dest, cb) < 0) {
2692 idx--;
2693 goto nla_put_failure;
2694 }
2695 }
2696
2697nla_put_failure:
2698 cb->args[0] = idx;
2699 ip_vs_service_put(svc);
2700
2701out_err:
2702 mutex_unlock(&__ip_vs_mutex);
2703
2704 return skb->len;
2705}
2706
2707static int ip_vs_genl_parse_dest(struct ip_vs_dest_user *udest,
2708 struct nlattr *nla, int full_entry)
2709{
2710 struct nlattr *attrs[IPVS_DEST_ATTR_MAX + 1];
2711 struct nlattr *nla_addr, *nla_port;
2712
2713 /* Parse mandatory identifying destination fields first */
2714 if (nla == NULL ||
2715 nla_parse_nested(attrs, IPVS_DEST_ATTR_MAX, nla, ip_vs_dest_policy))
2716 return -EINVAL;
2717
2718 nla_addr = attrs[IPVS_DEST_ATTR_ADDR];
2719 nla_port = attrs[IPVS_DEST_ATTR_PORT];
2720
2721 if (!(nla_addr && nla_port))
2722 return -EINVAL;
2723
2724 nla_memcpy(&udest->addr, nla_addr, sizeof(udest->addr));
2725 udest->port = nla_get_u16(nla_port);
2726
2727 /* If a full entry was requested, check for the additional fields */
2728 if (full_entry) {
2729 struct nlattr *nla_fwd, *nla_weight, *nla_u_thresh,
2730 *nla_l_thresh;
2731
2732 nla_fwd = attrs[IPVS_DEST_ATTR_FWD_METHOD];
2733 nla_weight = attrs[IPVS_DEST_ATTR_WEIGHT];
2734 nla_u_thresh = attrs[IPVS_DEST_ATTR_U_THRESH];
2735 nla_l_thresh = attrs[IPVS_DEST_ATTR_L_THRESH];
2736
2737 if (!(nla_fwd && nla_weight && nla_u_thresh && nla_l_thresh))
2738 return -EINVAL;
2739
2740 udest->conn_flags = nla_get_u32(nla_fwd)
2741 & IP_VS_CONN_F_FWD_MASK;
2742 udest->weight = nla_get_u32(nla_weight);
2743 udest->u_threshold = nla_get_u32(nla_u_thresh);
2744 udest->l_threshold = nla_get_u32(nla_l_thresh);
2745 }
2746
2747 return 0;
2748}
2749
2750static int ip_vs_genl_fill_daemon(struct sk_buff *skb, __be32 state,
2751 const char *mcast_ifn, __be32 syncid)
2752{
2753 struct nlattr *nl_daemon;
2754
2755 nl_daemon = nla_nest_start(skb, IPVS_CMD_ATTR_DAEMON);
2756 if (!nl_daemon)
2757 return -EMSGSIZE;
2758
2759 NLA_PUT_U32(skb, IPVS_DAEMON_ATTR_STATE, state);
2760 NLA_PUT_STRING(skb, IPVS_DAEMON_ATTR_MCAST_IFN, mcast_ifn);
2761 NLA_PUT_U32(skb, IPVS_DAEMON_ATTR_SYNC_ID, syncid);
2762
2763 nla_nest_end(skb, nl_daemon);
2764
2765 return 0;
2766
2767nla_put_failure:
2768 nla_nest_cancel(skb, nl_daemon);
2769 return -EMSGSIZE;
2770}
2771
2772static int ip_vs_genl_dump_daemon(struct sk_buff *skb, __be32 state,
2773 const char *mcast_ifn, __be32 syncid,
2774 struct netlink_callback *cb)
2775{
2776 void *hdr;
2777 hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
2778 &ip_vs_genl_family, NLM_F_MULTI,
2779 IPVS_CMD_NEW_DAEMON);
2780 if (!hdr)
2781 return -EMSGSIZE;
2782
2783 if (ip_vs_genl_fill_daemon(skb, state, mcast_ifn, syncid))
2784 goto nla_put_failure;
2785
2786 return genlmsg_end(skb, hdr);
2787
2788nla_put_failure:
2789 genlmsg_cancel(skb, hdr);
2790 return -EMSGSIZE;
2791}
2792
2793static int ip_vs_genl_dump_daemons(struct sk_buff *skb,
2794 struct netlink_callback *cb)
2795{
2796 mutex_lock(&__ip_vs_mutex);
2797 if ((ip_vs_sync_state & IP_VS_STATE_MASTER) && !cb->args[0]) {
2798 if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_MASTER,
2799 ip_vs_master_mcast_ifn,
2800 ip_vs_master_syncid, cb) < 0)
2801 goto nla_put_failure;
2802
2803 cb->args[0] = 1;
2804 }
2805
2806 if ((ip_vs_sync_state & IP_VS_STATE_BACKUP) && !cb->args[1]) {
2807 if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_BACKUP,
2808 ip_vs_backup_mcast_ifn,
2809 ip_vs_backup_syncid, cb) < 0)
2810 goto nla_put_failure;
2811
2812 cb->args[1] = 1;
2813 }
2814
2815nla_put_failure:
2816 mutex_unlock(&__ip_vs_mutex);
2817
2818 return skb->len;
2819}
2820
2821static int ip_vs_genl_new_daemon(struct nlattr **attrs)
2822{
2823 if (!(attrs[IPVS_DAEMON_ATTR_STATE] &&
2824 attrs[IPVS_DAEMON_ATTR_MCAST_IFN] &&
2825 attrs[IPVS_DAEMON_ATTR_SYNC_ID]))
2826 return -EINVAL;
2827
2828 return start_sync_thread(nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]),
2829 nla_data(attrs[IPVS_DAEMON_ATTR_MCAST_IFN]),
2830 nla_get_u32(attrs[IPVS_DAEMON_ATTR_SYNC_ID]));
2831}
2832
2833static int ip_vs_genl_del_daemon(struct nlattr **attrs)
2834{
2835 if (!attrs[IPVS_DAEMON_ATTR_STATE])
2836 return -EINVAL;
2837
2838 return stop_sync_thread(nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]));
2839}
2840
2841static int ip_vs_genl_set_config(struct nlattr **attrs)
2842{
2843 struct ip_vs_timeout_user t;
2844
2845 __ip_vs_get_timeouts(&t);
2846
2847 if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP])
2848 t.tcp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP]);
2849
2850 if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN])
2851 t.tcp_fin_timeout =
2852 nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN]);
2853
2854 if (attrs[IPVS_CMD_ATTR_TIMEOUT_UDP])
2855 t.udp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_UDP]);
2856
2857 return ip_vs_set_timeout(&t);
2858}
2859
2860static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
2861{
2862 struct ip_vs_service *svc = NULL;
2863 struct ip_vs_service_user usvc;
2864 struct ip_vs_dest_user udest;
2865 int ret = 0, cmd;
2866 int need_full_svc = 0, need_full_dest = 0;
2867
2868 cmd = info->genlhdr->cmd;
2869
2870 mutex_lock(&__ip_vs_mutex);
2871
2872 if (cmd == IPVS_CMD_FLUSH) {
2873 ret = ip_vs_flush();
2874 goto out;
2875 } else if (cmd == IPVS_CMD_SET_CONFIG) {
2876 ret = ip_vs_genl_set_config(info->attrs);
2877 goto out;
2878 } else if (cmd == IPVS_CMD_NEW_DAEMON ||
2879 cmd == IPVS_CMD_DEL_DAEMON) {
2880
2881 struct nlattr *daemon_attrs[IPVS_DAEMON_ATTR_MAX + 1];
2882
2883 if (!info->attrs[IPVS_CMD_ATTR_DAEMON] ||
2884 nla_parse_nested(daemon_attrs, IPVS_DAEMON_ATTR_MAX,
2885 info->attrs[IPVS_CMD_ATTR_DAEMON],
2886 ip_vs_daemon_policy)) {
2887 ret = -EINVAL;
2888 goto out;
2889 }
2890
2891 if (cmd == IPVS_CMD_NEW_DAEMON)
2892 ret = ip_vs_genl_new_daemon(daemon_attrs);
2893 else
2894 ret = ip_vs_genl_del_daemon(daemon_attrs);
2895 goto out;
2896 } else if (cmd == IPVS_CMD_ZERO &&
2897 !info->attrs[IPVS_CMD_ATTR_SERVICE]) {
2898 ret = ip_vs_zero_all();
2899 goto out;
2900 }
2901
2902 /* All following commands require a service argument, so check if we
2903 * received a valid one. We need a full service specification when
2904 * adding / editing a service. Only identifying members otherwise. */
2905 if (cmd == IPVS_CMD_NEW_SERVICE || cmd == IPVS_CMD_SET_SERVICE)
2906 need_full_svc = 1;
2907
2908 ret = ip_vs_genl_parse_service(&usvc,
2909 info->attrs[IPVS_CMD_ATTR_SERVICE],
2910 need_full_svc);
2911 if (ret)
2912 goto out;
2913
2914 /* Lookup the exact service by <protocol, addr, port> or fwmark */
2915 if (usvc.fwmark == 0)
2916 svc = __ip_vs_service_get(usvc.protocol, usvc.addr, usvc.port);
2917 else
2918 svc = __ip_vs_svc_fwm_get(usvc.fwmark);
2919
2920 /* Unless we're adding a new service, the service must already exist */
2921 if ((cmd != IPVS_CMD_NEW_SERVICE) && (svc == NULL)) {
2922 ret = -ESRCH;
2923 goto out;
2924 }
2925
2926 /* Destination commands require a valid destination argument. For
2927 * adding / editing a destination, we need a full destination
2928 * specification. */
2929 if (cmd == IPVS_CMD_NEW_DEST || cmd == IPVS_CMD_SET_DEST ||
2930 cmd == IPVS_CMD_DEL_DEST) {
2931 if (cmd != IPVS_CMD_DEL_DEST)
2932 need_full_dest = 1;
2933
2934 ret = ip_vs_genl_parse_dest(&udest,
2935 info->attrs[IPVS_CMD_ATTR_DEST],
2936 need_full_dest);
2937 if (ret)
2938 goto out;
2939 }
2940
2941 switch (cmd) {
2942 case IPVS_CMD_NEW_SERVICE:
2943 if (svc == NULL)
2944 ret = ip_vs_add_service(&usvc, &svc);
2945 else
2946 ret = -EEXIST;
2947 break;
2948 case IPVS_CMD_SET_SERVICE:
2949 ret = ip_vs_edit_service(svc, &usvc);
2950 break;
2951 case IPVS_CMD_DEL_SERVICE:
2952 ret = ip_vs_del_service(svc);
2953 break;
2954 case IPVS_CMD_NEW_DEST:
2955 ret = ip_vs_add_dest(svc, &udest);
2956 break;
2957 case IPVS_CMD_SET_DEST:
2958 ret = ip_vs_edit_dest(svc, &udest);
2959 break;
2960 case IPVS_CMD_DEL_DEST:
2961 ret = ip_vs_del_dest(svc, &udest);
2962 break;
2963 case IPVS_CMD_ZERO:
2964 ret = ip_vs_zero_service(svc);
2965 break;
2966 default:
2967 ret = -EINVAL;
2968 }
2969
2970out:
2971 if (svc)
2972 ip_vs_service_put(svc);
2973 mutex_unlock(&__ip_vs_mutex);
2974
2975 return ret;
2976}
2977
2978static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info)
2979{
2980 struct sk_buff *msg;
2981 void *reply;
2982 int ret, cmd, reply_cmd;
2983
2984 cmd = info->genlhdr->cmd;
2985
2986 if (cmd == IPVS_CMD_GET_SERVICE)
2987 reply_cmd = IPVS_CMD_NEW_SERVICE;
2988 else if (cmd == IPVS_CMD_GET_INFO)
2989 reply_cmd = IPVS_CMD_SET_INFO;
2990 else if (cmd == IPVS_CMD_GET_CONFIG)
2991 reply_cmd = IPVS_CMD_SET_CONFIG;
2992 else {
2993 IP_VS_ERR("unknown Generic Netlink command\n");
2994 return -EINVAL;
2995 }
2996
2997 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
2998 if (!msg)
2999 return -ENOMEM;
3000
3001 mutex_lock(&__ip_vs_mutex);
3002
3003 reply = genlmsg_put_reply(msg, info, &ip_vs_genl_family, 0, reply_cmd);
3004 if (reply == NULL)
3005 goto nla_put_failure;
3006
3007 switch (cmd) {
3008 case IPVS_CMD_GET_SERVICE:
3009 {
3010 struct ip_vs_service *svc;
3011
3012 svc = ip_vs_genl_find_service(info->attrs[IPVS_CMD_ATTR_SERVICE]);
3013 if (IS_ERR(svc)) {
3014 ret = PTR_ERR(svc);
3015 goto out_err;
3016 } else if (svc) {
3017 ret = ip_vs_genl_fill_service(msg, svc);
3018 ip_vs_service_put(svc);
3019 if (ret)
3020 goto nla_put_failure;
3021 } else {
3022 ret = -ESRCH;
3023 goto out_err;
3024 }
3025
3026 break;
3027 }
3028
3029 case IPVS_CMD_GET_CONFIG:
3030 {
3031 struct ip_vs_timeout_user t;
3032
3033 __ip_vs_get_timeouts(&t);
3034#ifdef CONFIG_IP_VS_PROTO_TCP
3035 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP, t.tcp_timeout);
3036 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP_FIN,
3037 t.tcp_fin_timeout);
3038#endif
3039#ifdef CONFIG_IP_VS_PROTO_UDP
3040 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_UDP, t.udp_timeout);
3041#endif
3042
3043 break;
3044 }
3045
3046 case IPVS_CMD_GET_INFO:
3047 NLA_PUT_U32(msg, IPVS_INFO_ATTR_VERSION, IP_VS_VERSION_CODE);
3048 NLA_PUT_U32(msg, IPVS_INFO_ATTR_CONN_TAB_SIZE,
3049 IP_VS_CONN_TAB_SIZE);
3050 break;
3051 }
3052
3053 genlmsg_end(msg, reply);
3054 ret = genlmsg_unicast(msg, info->snd_pid);
3055 goto out;
3056
3057nla_put_failure:
3058 IP_VS_ERR("not enough space in Netlink message\n");
3059 ret = -EMSGSIZE;
3060
3061out_err:
3062 nlmsg_free(msg);
3063out:
3064 mutex_unlock(&__ip_vs_mutex);
3065
3066 return ret;
3067}
3068
3069
3070static struct genl_ops ip_vs_genl_ops[] __read_mostly = {
3071 {
3072 .cmd = IPVS_CMD_NEW_SERVICE,
3073 .flags = GENL_ADMIN_PERM,
3074 .policy = ip_vs_cmd_policy,
3075 .doit = ip_vs_genl_set_cmd,
3076 },
3077 {
3078 .cmd = IPVS_CMD_SET_SERVICE,
3079 .flags = GENL_ADMIN_PERM,
3080 .policy = ip_vs_cmd_policy,
3081 .doit = ip_vs_genl_set_cmd,
3082 },
3083 {
3084 .cmd = IPVS_CMD_DEL_SERVICE,
3085 .flags = GENL_ADMIN_PERM,
3086 .policy = ip_vs_cmd_policy,
3087 .doit = ip_vs_genl_set_cmd,
3088 },
3089 {
3090 .cmd = IPVS_CMD_GET_SERVICE,
3091 .flags = GENL_ADMIN_PERM,
3092 .doit = ip_vs_genl_get_cmd,
3093 .dumpit = ip_vs_genl_dump_services,
3094 .policy = ip_vs_cmd_policy,
3095 },
3096 {
3097 .cmd = IPVS_CMD_NEW_DEST,
3098 .flags = GENL_ADMIN_PERM,
3099 .policy = ip_vs_cmd_policy,
3100 .doit = ip_vs_genl_set_cmd,
3101 },
3102 {
3103 .cmd = IPVS_CMD_SET_DEST,
3104 .flags = GENL_ADMIN_PERM,
3105 .policy = ip_vs_cmd_policy,
3106 .doit = ip_vs_genl_set_cmd,
3107 },
3108 {
3109 .cmd = IPVS_CMD_DEL_DEST,
3110 .flags = GENL_ADMIN_PERM,
3111 .policy = ip_vs_cmd_policy,
3112 .doit = ip_vs_genl_set_cmd,
3113 },
3114 {
3115 .cmd = IPVS_CMD_GET_DEST,
3116 .flags = GENL_ADMIN_PERM,
3117 .policy = ip_vs_cmd_policy,
3118 .dumpit = ip_vs_genl_dump_dests,
3119 },
3120 {
3121 .cmd = IPVS_CMD_NEW_DAEMON,
3122 .flags = GENL_ADMIN_PERM,
3123 .policy = ip_vs_cmd_policy,
3124 .doit = ip_vs_genl_set_cmd,
3125 },
3126 {
3127 .cmd = IPVS_CMD_DEL_DAEMON,
3128 .flags = GENL_ADMIN_PERM,
3129 .policy = ip_vs_cmd_policy,
3130 .doit = ip_vs_genl_set_cmd,
3131 },
3132 {
3133 .cmd = IPVS_CMD_GET_DAEMON,
3134 .flags = GENL_ADMIN_PERM,
3135 .dumpit = ip_vs_genl_dump_daemons,
3136 },
3137 {
3138 .cmd = IPVS_CMD_SET_CONFIG,
3139 .flags = GENL_ADMIN_PERM,
3140 .policy = ip_vs_cmd_policy,
3141 .doit = ip_vs_genl_set_cmd,
3142 },
3143 {
3144 .cmd = IPVS_CMD_GET_CONFIG,
3145 .flags = GENL_ADMIN_PERM,
3146 .doit = ip_vs_genl_get_cmd,
3147 },
3148 {
3149 .cmd = IPVS_CMD_GET_INFO,
3150 .flags = GENL_ADMIN_PERM,
3151 .doit = ip_vs_genl_get_cmd,
3152 },
3153 {
3154 .cmd = IPVS_CMD_ZERO,
3155 .flags = GENL_ADMIN_PERM,
3156 .policy = ip_vs_cmd_policy,
3157 .doit = ip_vs_genl_set_cmd,
3158 },
3159 {
3160 .cmd = IPVS_CMD_FLUSH,
3161 .flags = GENL_ADMIN_PERM,
3162 .doit = ip_vs_genl_set_cmd,
3163 },
3164};
3165
3166static int __init ip_vs_genl_register(void)
3167{
3168 int ret, i;
3169
3170 ret = genl_register_family(&ip_vs_genl_family);
3171 if (ret)
3172 return ret;
3173
3174 for (i = 0; i < ARRAY_SIZE(ip_vs_genl_ops); i++) {
3175 ret = genl_register_ops(&ip_vs_genl_family, &ip_vs_genl_ops[i]);
3176 if (ret)
3177 goto err_out;
3178 }
3179 return 0;
3180
3181err_out:
3182 genl_unregister_family(&ip_vs_genl_family);
3183 return ret;
3184}
3185
3186static void ip_vs_genl_unregister(void)
3187{
3188 genl_unregister_family(&ip_vs_genl_family);
3189}
3190
3191/* End of Generic Netlink interface definitions */
3192
Linus Torvalds1da177e2005-04-16 15:20:36 -07003193
Sven Wegener048cf482008-08-10 18:24:35 +00003194int __init ip_vs_control_init(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003195{
3196 int ret;
3197 int idx;
3198
3199 EnterFunction(2);
3200
3201 ret = nf_register_sockopt(&ip_vs_sockopts);
3202 if (ret) {
3203 IP_VS_ERR("cannot register sockopt.\n");
3204 return ret;
3205 }
3206
Julius Volz9a812192008-08-14 14:08:44 +02003207 ret = ip_vs_genl_register();
3208 if (ret) {
3209 IP_VS_ERR("cannot register Generic Netlink interface.\n");
3210 nf_unregister_sockopt(&ip_vs_sockopts);
3211 return ret;
3212 }
3213
Eric W. Biederman457c4cb2007-09-12 12:01:34 +02003214 proc_net_fops_create(&init_net, "ip_vs", 0, &ip_vs_info_fops);
3215 proc_net_fops_create(&init_net, "ip_vs_stats",0, &ip_vs_stats_fops);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003216
Pavel Emelyanov90754f82008-01-12 02:33:50 -08003217 sysctl_header = register_sysctl_paths(net_vs_ctl_path, vs_vars);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003218
3219 /* Initialize ip_vs_svc_table, ip_vs_svc_fwm_table, ip_vs_rtable */
3220 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
3221 INIT_LIST_HEAD(&ip_vs_svc_table[idx]);
3222 INIT_LIST_HEAD(&ip_vs_svc_fwm_table[idx]);
3223 }
3224 for(idx = 0; idx < IP_VS_RTAB_SIZE; idx++) {
3225 INIT_LIST_HEAD(&ip_vs_rtable[idx]);
3226 }
3227
Linus Torvalds1da177e2005-04-16 15:20:36 -07003228 ip_vs_new_estimator(&ip_vs_stats);
3229
3230 /* Hook the defense timer */
3231 schedule_delayed_work(&defense_work, DEFENSE_TIMER_PERIOD);
3232
3233 LeaveFunction(2);
3234 return 0;
3235}
3236
3237
3238void ip_vs_control_cleanup(void)
3239{
3240 EnterFunction(2);
3241 ip_vs_trash_cleanup();
3242 cancel_rearming_delayed_work(&defense_work);
Oleg Nesterov28e53bd2007-05-09 02:34:22 -07003243 cancel_work_sync(&defense_work.work);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003244 ip_vs_kill_estimator(&ip_vs_stats);
3245 unregister_sysctl_table(sysctl_header);
Eric W. Biederman457c4cb2007-09-12 12:01:34 +02003246 proc_net_remove(&init_net, "ip_vs_stats");
3247 proc_net_remove(&init_net, "ip_vs");
Julius Volz9a812192008-08-14 14:08:44 +02003248 ip_vs_genl_unregister();
Linus Torvalds1da177e2005-04-16 15:20:36 -07003249 nf_unregister_sockopt(&ip_vs_sockopts);
3250 LeaveFunction(2);
3251}