blob: 88474f1e828a160c5a8add518116c9ee2e464671 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * IPVS An implementation of the IP virtual server support for the
3 * LINUX operating system. IPVS is now implemented as a module
4 * over the NetFilter framework. IPVS can be used to build a
5 * high-performance and highly available server based on a
6 * cluster of servers.
7 *
Linus Torvalds1da177e2005-04-16 15:20:36 -07008 * Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
9 * Peter Kese <peter.kese@ijs.si>
10 * Julian Anastasov <ja@ssi.bg>
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version
15 * 2 of the License, or (at your option) any later version.
16 *
17 * Changes:
18 *
19 */
20
Hannes Eder9aada7a2009-07-30 14:29:44 -070021#define KMSG_COMPONENT "IPVS"
22#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
23
Linus Torvalds1da177e2005-04-16 15:20:36 -070024#include <linux/module.h>
25#include <linux/init.h>
26#include <linux/types.h>
Randy Dunlap4fc268d2006-01-11 12:17:47 -080027#include <linux/capability.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070028#include <linux/fs.h>
29#include <linux/sysctl.h>
30#include <linux/proc_fs.h>
31#include <linux/workqueue.h>
32#include <linux/swap.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070033#include <linux/seq_file.h>
Tejun Heo5a0e3ad2010-03-24 17:04:11 +090034#include <linux/slab.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070035
36#include <linux/netfilter.h>
37#include <linux/netfilter_ipv4.h>
Ingo Molnar14cc3e22006-03-26 01:37:14 -080038#include <linux/mutex.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070039
Eric W. Biederman457c4cb2007-09-12 12:01:34 +020040#include <net/net_namespace.h>
Hans Schillstrom93304192011-01-03 14:44:51 +010041#include <linux/nsproxy.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070042#include <net/ip.h>
Vince Busam09571c72008-09-02 15:55:52 +020043#ifdef CONFIG_IP_VS_IPV6
44#include <net/ipv6.h>
45#include <net/ip6_route.h>
46#endif
Arnaldo Carvalho de Melo14c85022005-12-27 02:43:12 -020047#include <net/route.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070048#include <net/sock.h>
Julius Volz9a812192008-08-14 14:08:44 +020049#include <net/genetlink.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070050
51#include <asm/uaccess.h>
52
53#include <net/ip_vs.h>
54
55/* semaphore for IPVS sockopts. And, [gs]etsockopt may sleep. */
Ingo Molnar14cc3e22006-03-26 01:37:14 -080056static DEFINE_MUTEX(__ip_vs_mutex);
Linus Torvalds1da177e2005-04-16 15:20:36 -070057
58/* lock for service table */
59static DEFINE_RWLOCK(__ip_vs_svc_lock);
60
61/* lock for table with the real services */
62static DEFINE_RWLOCK(__ip_vs_rs_lock);
63
64/* lock for state and timeout tables */
Simon Horman4f728162010-08-26 02:54:30 +000065static DEFINE_SPINLOCK(ip_vs_securetcp_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -070066
67/* lock for drop entry handling */
68static DEFINE_SPINLOCK(__ip_vs_dropentry_lock);
69
70/* lock for drop packet handling */
71static DEFINE_SPINLOCK(__ip_vs_droppacket_lock);
72
73/* 1/rate drop and drop-entry variables */
74int ip_vs_drop_rate = 0;
75int ip_vs_drop_counter = 0;
76static atomic_t ip_vs_dropentry = ATOMIC_INIT(0);
77
78/* number of virtual services */
79static int ip_vs_num_services = 0;
80
81/* sysctl variables */
82static int sysctl_ip_vs_drop_entry = 0;
83static int sysctl_ip_vs_drop_packet = 0;
84static int sysctl_ip_vs_secure_tcp = 0;
85static int sysctl_ip_vs_amemthresh = 1024;
86static int sysctl_ip_vs_am_droprate = 10;
87int sysctl_ip_vs_cache_bypass = 0;
88int sysctl_ip_vs_expire_nodest_conn = 0;
89int sysctl_ip_vs_expire_quiescent_template = 0;
90int sysctl_ip_vs_sync_threshold[2] = { 3, 50 };
91int sysctl_ip_vs_nat_icmp_send = 0;
Julian Anastasovf4bc17c2010-09-21 17:35:41 +020092#ifdef CONFIG_IP_VS_NFCT
93int sysctl_ip_vs_conntrack;
94#endif
Julian Anastasov8a803042010-09-21 17:38:57 +020095int sysctl_ip_vs_snat_reroute = 1;
Hans Schillstromb880c1f2010-11-19 14:25:14 +010096int sysctl_ip_vs_sync_ver = 1; /* Default version of sync proto */
Linus Torvalds1da177e2005-04-16 15:20:36 -070097
98#ifdef CONFIG_IP_VS_DEBUG
99static int sysctl_ip_vs_debug_level = 0;
100
101int ip_vs_get_debug_level(void)
102{
103 return sysctl_ip_vs_debug_level;
104}
105#endif
106
Vince Busam09571c72008-09-02 15:55:52 +0200107#ifdef CONFIG_IP_VS_IPV6
108/* Taken from rt6_fill_node() in net/ipv6/route.c, is there a better way? */
109static int __ip_vs_addr_is_local_v6(const struct in6_addr *addr)
110{
111 struct rt6_info *rt;
112 struct flowi fl = {
113 .oif = 0,
Changli Gao58116622010-11-12 18:43:55 +0000114 .fl6_dst = *addr,
115 .fl6_src = { .s6_addr32 = {0, 0, 0, 0} },
Vince Busam09571c72008-09-02 15:55:52 +0200116 };
117
118 rt = (struct rt6_info *)ip6_route_output(&init_net, NULL, &fl);
119 if (rt && rt->rt6i_dev && (rt->rt6i_dev->flags & IFF_LOOPBACK))
120 return 1;
121
122 return 0;
123}
124#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -0700125/*
Julian Anastasovaf9debd2005-07-11 20:59:57 -0700126 * update_defense_level is called from keventd and from sysctl,
127 * so it needs to protect itself from softirqs
Linus Torvalds1da177e2005-04-16 15:20:36 -0700128 */
Hans Schillstrom93304192011-01-03 14:44:51 +0100129static void update_defense_level(struct netns_ipvs *ipvs)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700130{
131 struct sysinfo i;
132 static int old_secure_tcp = 0;
133 int availmem;
134 int nomem;
135 int to_change = -1;
136
137 /* we only count free and buffered memory (in pages) */
138 si_meminfo(&i);
139 availmem = i.freeram + i.bufferram;
140 /* however in linux 2.5 the i.bufferram is total page cache size,
141 we need adjust it */
142 /* si_swapinfo(&i); */
143 /* availmem = availmem - (i.totalswap - i.freeswap); */
144
145 nomem = (availmem < sysctl_ip_vs_amemthresh);
146
Julian Anastasovaf9debd2005-07-11 20:59:57 -0700147 local_bh_disable();
148
Linus Torvalds1da177e2005-04-16 15:20:36 -0700149 /* drop_entry */
150 spin_lock(&__ip_vs_dropentry_lock);
151 switch (sysctl_ip_vs_drop_entry) {
152 case 0:
153 atomic_set(&ip_vs_dropentry, 0);
154 break;
155 case 1:
156 if (nomem) {
157 atomic_set(&ip_vs_dropentry, 1);
158 sysctl_ip_vs_drop_entry = 2;
159 } else {
160 atomic_set(&ip_vs_dropentry, 0);
161 }
162 break;
163 case 2:
164 if (nomem) {
165 atomic_set(&ip_vs_dropentry, 1);
166 } else {
167 atomic_set(&ip_vs_dropentry, 0);
168 sysctl_ip_vs_drop_entry = 1;
169 };
170 break;
171 case 3:
172 atomic_set(&ip_vs_dropentry, 1);
173 break;
174 }
175 spin_unlock(&__ip_vs_dropentry_lock);
176
177 /* drop_packet */
178 spin_lock(&__ip_vs_droppacket_lock);
179 switch (sysctl_ip_vs_drop_packet) {
180 case 0:
181 ip_vs_drop_rate = 0;
182 break;
183 case 1:
184 if (nomem) {
185 ip_vs_drop_rate = ip_vs_drop_counter
186 = sysctl_ip_vs_amemthresh /
187 (sysctl_ip_vs_amemthresh-availmem);
188 sysctl_ip_vs_drop_packet = 2;
189 } else {
190 ip_vs_drop_rate = 0;
191 }
192 break;
193 case 2:
194 if (nomem) {
195 ip_vs_drop_rate = ip_vs_drop_counter
196 = sysctl_ip_vs_amemthresh /
197 (sysctl_ip_vs_amemthresh-availmem);
198 } else {
199 ip_vs_drop_rate = 0;
200 sysctl_ip_vs_drop_packet = 1;
201 }
202 break;
203 case 3:
204 ip_vs_drop_rate = sysctl_ip_vs_am_droprate;
205 break;
206 }
207 spin_unlock(&__ip_vs_droppacket_lock);
208
209 /* secure_tcp */
Simon Horman4f728162010-08-26 02:54:30 +0000210 spin_lock(&ip_vs_securetcp_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700211 switch (sysctl_ip_vs_secure_tcp) {
212 case 0:
213 if (old_secure_tcp >= 2)
214 to_change = 0;
215 break;
216 case 1:
217 if (nomem) {
218 if (old_secure_tcp < 2)
219 to_change = 1;
220 sysctl_ip_vs_secure_tcp = 2;
221 } else {
222 if (old_secure_tcp >= 2)
223 to_change = 0;
224 }
225 break;
226 case 2:
227 if (nomem) {
228 if (old_secure_tcp < 2)
229 to_change = 1;
230 } else {
231 if (old_secure_tcp >= 2)
232 to_change = 0;
233 sysctl_ip_vs_secure_tcp = 1;
234 }
235 break;
236 case 3:
237 if (old_secure_tcp < 2)
238 to_change = 1;
239 break;
240 }
241 old_secure_tcp = sysctl_ip_vs_secure_tcp;
242 if (to_change >= 0)
Hans Schillstrom93304192011-01-03 14:44:51 +0100243 ip_vs_protocol_timeout_change(ipvs,
244 sysctl_ip_vs_secure_tcp > 1);
Simon Horman4f728162010-08-26 02:54:30 +0000245 spin_unlock(&ip_vs_securetcp_lock);
Julian Anastasovaf9debd2005-07-11 20:59:57 -0700246
247 local_bh_enable();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700248}
249
250
251/*
252 * Timer for checking the defense
253 */
254#define DEFENSE_TIMER_PERIOD 1*HZ
David Howellsc4028952006-11-22 14:57:56 +0000255static void defense_work_handler(struct work_struct *work);
256static DECLARE_DELAYED_WORK(defense_work, defense_work_handler);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700257
David Howellsc4028952006-11-22 14:57:56 +0000258static void defense_work_handler(struct work_struct *work)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700259{
Hans Schillstrom93304192011-01-03 14:44:51 +0100260 struct net *net = &init_net;
261 struct netns_ipvs *ipvs = net_ipvs(net);
262
263 update_defense_level(ipvs);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700264 if (atomic_read(&ip_vs_dropentry))
265 ip_vs_random_dropentry();
266
267 schedule_delayed_work(&defense_work, DEFENSE_TIMER_PERIOD);
268}
269
270int
271ip_vs_use_count_inc(void)
272{
273 return try_module_get(THIS_MODULE);
274}
275
276void
277ip_vs_use_count_dec(void)
278{
279 module_put(THIS_MODULE);
280}
281
282
283/*
284 * Hash table: for virtual service lookups
285 */
286#define IP_VS_SVC_TAB_BITS 8
287#define IP_VS_SVC_TAB_SIZE (1 << IP_VS_SVC_TAB_BITS)
288#define IP_VS_SVC_TAB_MASK (IP_VS_SVC_TAB_SIZE - 1)
289
290/* the service table hashed by <protocol, addr, port> */
291static struct list_head ip_vs_svc_table[IP_VS_SVC_TAB_SIZE];
292/* the service table hashed by fwmark */
293static struct list_head ip_vs_svc_fwm_table[IP_VS_SVC_TAB_SIZE];
294
295/*
Linus Torvalds1da177e2005-04-16 15:20:36 -0700296 * Trash for destinations
297 */
298static LIST_HEAD(ip_vs_dest_trash);
299
300/*
301 * FTP & NULL virtual service counters
302 */
303static atomic_t ip_vs_ftpsvc_counter = ATOMIC_INIT(0);
304static atomic_t ip_vs_nullsvc_counter = ATOMIC_INIT(0);
305
306
307/*
308 * Returns hash value for virtual service
309 */
Hans Schillstromfc723252011-01-03 14:44:43 +0100310static inline unsigned
311ip_vs_svc_hashkey(struct net *net, int af, unsigned proto,
312 const union nf_inet_addr *addr, __be16 port)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700313{
314 register unsigned porth = ntohs(port);
Julius Volzb18610d2008-09-02 15:55:37 +0200315 __be32 addr_fold = addr->ip;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700316
Julius Volzb18610d2008-09-02 15:55:37 +0200317#ifdef CONFIG_IP_VS_IPV6
318 if (af == AF_INET6)
319 addr_fold = addr->ip6[0]^addr->ip6[1]^
320 addr->ip6[2]^addr->ip6[3];
321#endif
Hans Schillstromfc723252011-01-03 14:44:43 +0100322 addr_fold ^= ((size_t)net>>8);
Julius Volzb18610d2008-09-02 15:55:37 +0200323
324 return (proto^ntohl(addr_fold)^(porth>>IP_VS_SVC_TAB_BITS)^porth)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700325 & IP_VS_SVC_TAB_MASK;
326}
327
328/*
329 * Returns hash value of fwmark for virtual service lookup
330 */
Hans Schillstromfc723252011-01-03 14:44:43 +0100331static inline unsigned ip_vs_svc_fwm_hashkey(struct net *net, __u32 fwmark)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700332{
Hans Schillstromfc723252011-01-03 14:44:43 +0100333 return (((size_t)net>>8) ^ fwmark) & IP_VS_SVC_TAB_MASK;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700334}
335
336/*
Hans Schillstromfc723252011-01-03 14:44:43 +0100337 * Hashes a service in the ip_vs_svc_table by <netns,proto,addr,port>
Linus Torvalds1da177e2005-04-16 15:20:36 -0700338 * or in the ip_vs_svc_fwm_table by fwmark.
339 * Should be called with locked tables.
340 */
341static int ip_vs_svc_hash(struct ip_vs_service *svc)
342{
343 unsigned hash;
344
345 if (svc->flags & IP_VS_SVC_F_HASHED) {
Hannes Eder1e3e2382009-08-02 11:05:41 +0000346 pr_err("%s(): request for already hashed, called from %pF\n",
347 __func__, __builtin_return_address(0));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700348 return 0;
349 }
350
351 if (svc->fwmark == 0) {
352 /*
Hans Schillstromfc723252011-01-03 14:44:43 +0100353 * Hash it by <netns,protocol,addr,port> in ip_vs_svc_table
Linus Torvalds1da177e2005-04-16 15:20:36 -0700354 */
Hans Schillstromfc723252011-01-03 14:44:43 +0100355 hash = ip_vs_svc_hashkey(svc->net, svc->af, svc->protocol,
356 &svc->addr, svc->port);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700357 list_add(&svc->s_list, &ip_vs_svc_table[hash]);
358 } else {
359 /*
Hans Schillstromfc723252011-01-03 14:44:43 +0100360 * Hash it by fwmark in svc_fwm_table
Linus Torvalds1da177e2005-04-16 15:20:36 -0700361 */
Hans Schillstromfc723252011-01-03 14:44:43 +0100362 hash = ip_vs_svc_fwm_hashkey(svc->net, svc->fwmark);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700363 list_add(&svc->f_list, &ip_vs_svc_fwm_table[hash]);
364 }
365
366 svc->flags |= IP_VS_SVC_F_HASHED;
367 /* increase its refcnt because it is referenced by the svc table */
368 atomic_inc(&svc->refcnt);
369 return 1;
370}
371
372
373/*
Hans Schillstromfc723252011-01-03 14:44:43 +0100374 * Unhashes a service from svc_table / svc_fwm_table.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700375 * Should be called with locked tables.
376 */
377static int ip_vs_svc_unhash(struct ip_vs_service *svc)
378{
379 if (!(svc->flags & IP_VS_SVC_F_HASHED)) {
Hannes Eder1e3e2382009-08-02 11:05:41 +0000380 pr_err("%s(): request for unhash flagged, called from %pF\n",
381 __func__, __builtin_return_address(0));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700382 return 0;
383 }
384
385 if (svc->fwmark == 0) {
Hans Schillstromfc723252011-01-03 14:44:43 +0100386 /* Remove it from the svc_table table */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700387 list_del(&svc->s_list);
388 } else {
Hans Schillstromfc723252011-01-03 14:44:43 +0100389 /* Remove it from the svc_fwm_table table */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700390 list_del(&svc->f_list);
391 }
392
393 svc->flags &= ~IP_VS_SVC_F_HASHED;
394 atomic_dec(&svc->refcnt);
395 return 1;
396}
397
398
399/*
Hans Schillstromfc723252011-01-03 14:44:43 +0100400 * Get service by {netns, proto,addr,port} in the service table.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700401 */
Julius Volzb18610d2008-09-02 15:55:37 +0200402static inline struct ip_vs_service *
Hans Schillstromfc723252011-01-03 14:44:43 +0100403__ip_vs_service_find(struct net *net, int af, __u16 protocol,
404 const union nf_inet_addr *vaddr, __be16 vport)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700405{
406 unsigned hash;
407 struct ip_vs_service *svc;
408
409 /* Check for "full" addressed entries */
Hans Schillstromfc723252011-01-03 14:44:43 +0100410 hash = ip_vs_svc_hashkey(net, af, protocol, vaddr, vport);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700411
412 list_for_each_entry(svc, &ip_vs_svc_table[hash], s_list){
Julius Volzb18610d2008-09-02 15:55:37 +0200413 if ((svc->af == af)
414 && ip_vs_addr_equal(af, &svc->addr, vaddr)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700415 && (svc->port == vport)
Hans Schillstromfc723252011-01-03 14:44:43 +0100416 && (svc->protocol == protocol)
417 && net_eq(svc->net, net)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700418 /* HIT */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700419 return svc;
420 }
421 }
422
423 return NULL;
424}
425
426
427/*
428 * Get service by {fwmark} in the service table.
429 */
Julius Volzb18610d2008-09-02 15:55:37 +0200430static inline struct ip_vs_service *
Hans Schillstromfc723252011-01-03 14:44:43 +0100431__ip_vs_svc_fwm_find(struct net *net, int af, __u32 fwmark)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700432{
433 unsigned hash;
434 struct ip_vs_service *svc;
435
436 /* Check for fwmark addressed entries */
Hans Schillstromfc723252011-01-03 14:44:43 +0100437 hash = ip_vs_svc_fwm_hashkey(net, fwmark);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700438
439 list_for_each_entry(svc, &ip_vs_svc_fwm_table[hash], f_list) {
Hans Schillstromfc723252011-01-03 14:44:43 +0100440 if (svc->fwmark == fwmark && svc->af == af
441 && net_eq(svc->net, net)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700442 /* HIT */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700443 return svc;
444 }
445 }
446
447 return NULL;
448}
449
450struct ip_vs_service *
Hans Schillstromfc723252011-01-03 14:44:43 +0100451ip_vs_service_get(struct net *net, int af, __u32 fwmark, __u16 protocol,
Julius Volz3c2e0502008-09-02 15:55:38 +0200452 const union nf_inet_addr *vaddr, __be16 vport)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700453{
454 struct ip_vs_service *svc;
Julius Volz3c2e0502008-09-02 15:55:38 +0200455
Linus Torvalds1da177e2005-04-16 15:20:36 -0700456 read_lock(&__ip_vs_svc_lock);
457
458 /*
459 * Check the table hashed by fwmark first
460 */
Hans Schillstromfc723252011-01-03 14:44:43 +0100461 svc = __ip_vs_svc_fwm_find(net, af, fwmark);
462 if (fwmark && svc)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700463 goto out;
464
465 /*
466 * Check the table hashed by <protocol,addr,port>
467 * for "full" addressed entries
468 */
Hans Schillstromfc723252011-01-03 14:44:43 +0100469 svc = __ip_vs_service_find(net, af, protocol, vaddr, vport);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700470
471 if (svc == NULL
472 && protocol == IPPROTO_TCP
473 && atomic_read(&ip_vs_ftpsvc_counter)
474 && (vport == FTPDATA || ntohs(vport) >= PROT_SOCK)) {
475 /*
476 * Check if ftp service entry exists, the packet
477 * might belong to FTP data connections.
478 */
Hans Schillstromfc723252011-01-03 14:44:43 +0100479 svc = __ip_vs_service_find(net, af, protocol, vaddr, FTPPORT);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700480 }
481
482 if (svc == NULL
483 && atomic_read(&ip_vs_nullsvc_counter)) {
484 /*
485 * Check if the catch-all port (port zero) exists
486 */
Hans Schillstromfc723252011-01-03 14:44:43 +0100487 svc = __ip_vs_service_find(net, af, protocol, vaddr, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700488 }
489
490 out:
Julian Anastasov26c15cf2010-09-21 18:12:30 +0200491 if (svc)
492 atomic_inc(&svc->usecnt);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700493 read_unlock(&__ip_vs_svc_lock);
494
Julius Volz3c2e0502008-09-02 15:55:38 +0200495 IP_VS_DBG_BUF(9, "lookup service: fwm %u %s %s:%u %s\n",
496 fwmark, ip_vs_proto_name(protocol),
497 IP_VS_DBG_ADDR(af, vaddr), ntohs(vport),
498 svc ? "hit" : "not hit");
Linus Torvalds1da177e2005-04-16 15:20:36 -0700499
500 return svc;
501}
502
503
504static inline void
505__ip_vs_bind_svc(struct ip_vs_dest *dest, struct ip_vs_service *svc)
506{
507 atomic_inc(&svc->refcnt);
508 dest->svc = svc;
509}
510
Julian Anastasov26c15cf2010-09-21 18:12:30 +0200511static void
Linus Torvalds1da177e2005-04-16 15:20:36 -0700512__ip_vs_unbind_svc(struct ip_vs_dest *dest)
513{
514 struct ip_vs_service *svc = dest->svc;
515
516 dest->svc = NULL;
Julian Anastasov26c15cf2010-09-21 18:12:30 +0200517 if (atomic_dec_and_test(&svc->refcnt)) {
518 IP_VS_DBG_BUF(3, "Removing service %u/%s:%u usecnt=%d\n",
519 svc->fwmark,
520 IP_VS_DBG_ADDR(svc->af, &svc->addr),
521 ntohs(svc->port), atomic_read(&svc->usecnt));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700522 kfree(svc);
Julian Anastasov26c15cf2010-09-21 18:12:30 +0200523 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700524}
525
526
527/*
528 * Returns hash value for real service
529 */
Julius Volz7937df12008-09-02 15:55:48 +0200530static inline unsigned ip_vs_rs_hashkey(int af,
531 const union nf_inet_addr *addr,
532 __be16 port)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700533{
534 register unsigned porth = ntohs(port);
Julius Volz7937df12008-09-02 15:55:48 +0200535 __be32 addr_fold = addr->ip;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700536
Julius Volz7937df12008-09-02 15:55:48 +0200537#ifdef CONFIG_IP_VS_IPV6
538 if (af == AF_INET6)
539 addr_fold = addr->ip6[0]^addr->ip6[1]^
540 addr->ip6[2]^addr->ip6[3];
541#endif
542
543 return (ntohl(addr_fold)^(porth>>IP_VS_RTAB_BITS)^porth)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700544 & IP_VS_RTAB_MASK;
545}
546
547/*
Hans Schillstromfc723252011-01-03 14:44:43 +0100548 * Hashes ip_vs_dest in rs_table by <proto,addr,port>.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700549 * should be called with locked tables.
550 */
Hans Schillstromfc723252011-01-03 14:44:43 +0100551static int ip_vs_rs_hash(struct netns_ipvs *ipvs, struct ip_vs_dest *dest)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700552{
553 unsigned hash;
554
555 if (!list_empty(&dest->d_list)) {
556 return 0;
557 }
558
559 /*
560 * Hash by proto,addr,port,
561 * which are the parameters of the real service.
562 */
Julius Volz7937df12008-09-02 15:55:48 +0200563 hash = ip_vs_rs_hashkey(dest->af, &dest->addr, dest->port);
564
Hans Schillstromfc723252011-01-03 14:44:43 +0100565 list_add(&dest->d_list, &ipvs->rs_table[hash]);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700566
567 return 1;
568}
569
570/*
Hans Schillstromfc723252011-01-03 14:44:43 +0100571 * UNhashes ip_vs_dest from rs_table.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700572 * should be called with locked tables.
573 */
574static int ip_vs_rs_unhash(struct ip_vs_dest *dest)
575{
576 /*
Hans Schillstromfc723252011-01-03 14:44:43 +0100577 * Remove it from the rs_table table.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700578 */
579 if (!list_empty(&dest->d_list)) {
580 list_del(&dest->d_list);
581 INIT_LIST_HEAD(&dest->d_list);
582 }
583
584 return 1;
585}
586
587/*
588 * Lookup real service by <proto,addr,port> in the real service table.
589 */
590struct ip_vs_dest *
Hans Schillstromfc723252011-01-03 14:44:43 +0100591ip_vs_lookup_real_service(struct net *net, int af, __u16 protocol,
Julius Volz7937df12008-09-02 15:55:48 +0200592 const union nf_inet_addr *daddr,
593 __be16 dport)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700594{
Hans Schillstromfc723252011-01-03 14:44:43 +0100595 struct netns_ipvs *ipvs = net_ipvs(net);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700596 unsigned hash;
597 struct ip_vs_dest *dest;
598
599 /*
600 * Check for "full" addressed entries
601 * Return the first found entry
602 */
Julius Volz7937df12008-09-02 15:55:48 +0200603 hash = ip_vs_rs_hashkey(af, daddr, dport);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700604
605 read_lock(&__ip_vs_rs_lock);
Hans Schillstromfc723252011-01-03 14:44:43 +0100606 list_for_each_entry(dest, &ipvs->rs_table[hash], d_list) {
Julius Volz7937df12008-09-02 15:55:48 +0200607 if ((dest->af == af)
608 && ip_vs_addr_equal(af, &dest->addr, daddr)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700609 && (dest->port == dport)
610 && ((dest->protocol == protocol) ||
611 dest->vfwmark)) {
612 /* HIT */
613 read_unlock(&__ip_vs_rs_lock);
614 return dest;
615 }
616 }
617 read_unlock(&__ip_vs_rs_lock);
618
619 return NULL;
620}
621
622/*
623 * Lookup destination by {addr,port} in the given service
624 */
625static struct ip_vs_dest *
Julius Volz7937df12008-09-02 15:55:48 +0200626ip_vs_lookup_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
627 __be16 dport)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700628{
629 struct ip_vs_dest *dest;
630
631 /*
632 * Find the destination for the given service
633 */
634 list_for_each_entry(dest, &svc->destinations, n_list) {
Julius Volz7937df12008-09-02 15:55:48 +0200635 if ((dest->af == svc->af)
636 && ip_vs_addr_equal(svc->af, &dest->addr, daddr)
637 && (dest->port == dport)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700638 /* HIT */
639 return dest;
640 }
641 }
642
643 return NULL;
644}
645
Rumen G. Bogdanovski1e356f92007-11-07 02:35:54 -0800646/*
647 * Find destination by {daddr,dport,vaddr,protocol}
648 * Cretaed to be used in ip_vs_process_message() in
649 * the backup synchronization daemon. It finds the
650 * destination to be bound to the received connection
651 * on the backup.
652 *
653 * ip_vs_lookup_real_service() looked promissing, but
654 * seems not working as expected.
655 */
Hans Schillstromfc723252011-01-03 14:44:43 +0100656struct ip_vs_dest *ip_vs_find_dest(struct net *net, int af,
657 const union nf_inet_addr *daddr,
Julius Volz7937df12008-09-02 15:55:48 +0200658 __be16 dport,
659 const union nf_inet_addr *vaddr,
Hans Schillstrom0e051e62010-11-19 14:25:07 +0100660 __be16 vport, __u16 protocol, __u32 fwmark)
Rumen G. Bogdanovski1e356f92007-11-07 02:35:54 -0800661{
662 struct ip_vs_dest *dest;
663 struct ip_vs_service *svc;
664
Hans Schillstromfc723252011-01-03 14:44:43 +0100665 svc = ip_vs_service_get(net, af, fwmark, protocol, vaddr, vport);
Rumen G. Bogdanovski1e356f92007-11-07 02:35:54 -0800666 if (!svc)
667 return NULL;
668 dest = ip_vs_lookup_dest(svc, daddr, dport);
669 if (dest)
670 atomic_inc(&dest->refcnt);
671 ip_vs_service_put(svc);
672 return dest;
673}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700674
675/*
676 * Lookup dest by {svc,addr,port} in the destination trash.
677 * The destination trash is used to hold the destinations that are removed
678 * from the service table but are still referenced by some conn entries.
679 * The reason to add the destination trash is when the dest is temporary
680 * down (either by administrator or by monitor program), the dest can be
681 * picked back from the trash, the remaining connections to the dest can
682 * continue, and the counting information of the dest is also useful for
683 * scheduling.
684 */
685static struct ip_vs_dest *
Julius Volz7937df12008-09-02 15:55:48 +0200686ip_vs_trash_get_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
687 __be16 dport)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700688{
689 struct ip_vs_dest *dest, *nxt;
690
691 /*
692 * Find the destination in trash
693 */
694 list_for_each_entry_safe(dest, nxt, &ip_vs_dest_trash, n_list) {
Julius Volz7937df12008-09-02 15:55:48 +0200695 IP_VS_DBG_BUF(3, "Destination %u/%s:%u still in trash, "
696 "dest->refcnt=%d\n",
697 dest->vfwmark,
698 IP_VS_DBG_ADDR(svc->af, &dest->addr),
699 ntohs(dest->port),
700 atomic_read(&dest->refcnt));
701 if (dest->af == svc->af &&
702 ip_vs_addr_equal(svc->af, &dest->addr, daddr) &&
Linus Torvalds1da177e2005-04-16 15:20:36 -0700703 dest->port == dport &&
704 dest->vfwmark == svc->fwmark &&
705 dest->protocol == svc->protocol &&
706 (svc->fwmark ||
Julius Volz7937df12008-09-02 15:55:48 +0200707 (ip_vs_addr_equal(svc->af, &dest->vaddr, &svc->addr) &&
Linus Torvalds1da177e2005-04-16 15:20:36 -0700708 dest->vport == svc->port))) {
709 /* HIT */
710 return dest;
711 }
712
713 /*
714 * Try to purge the destination from trash if not referenced
715 */
716 if (atomic_read(&dest->refcnt) == 1) {
Julius Volz7937df12008-09-02 15:55:48 +0200717 IP_VS_DBG_BUF(3, "Removing destination %u/%s:%u "
718 "from trash\n",
719 dest->vfwmark,
720 IP_VS_DBG_ADDR(svc->af, &dest->addr),
721 ntohs(dest->port));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700722 list_del(&dest->n_list);
723 ip_vs_dst_reset(dest);
724 __ip_vs_unbind_svc(dest);
725 kfree(dest);
726 }
727 }
728
729 return NULL;
730}
731
732
733/*
734 * Clean up all the destinations in the trash
735 * Called by the ip_vs_control_cleanup()
736 *
737 * When the ip_vs_control_clearup is activated by ipvs module exit,
738 * the service tables must have been flushed and all the connections
739 * are expired, and the refcnt of each destination in the trash must
740 * be 1, so we simply release them here.
741 */
742static void ip_vs_trash_cleanup(void)
743{
744 struct ip_vs_dest *dest, *nxt;
745
746 list_for_each_entry_safe(dest, nxt, &ip_vs_dest_trash, n_list) {
747 list_del(&dest->n_list);
748 ip_vs_dst_reset(dest);
749 __ip_vs_unbind_svc(dest);
750 kfree(dest);
751 }
752}
753
754
755static void
756ip_vs_zero_stats(struct ip_vs_stats *stats)
757{
758 spin_lock_bh(&stats->lock);
Simon Hormane93615d2008-08-11 17:19:14 +1000759
Sven Wegenere9c0ce22008-09-08 13:39:04 +0200760 memset(&stats->ustats, 0, sizeof(stats->ustats));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700761 ip_vs_zero_estimator(stats);
Simon Hormane93615d2008-08-11 17:19:14 +1000762
Sven Wegener3a14a3132008-08-10 18:24:41 +0000763 spin_unlock_bh(&stats->lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700764}
765
766/*
767 * Update a destination in the given service
768 */
769static void
Julian Anastasov26c15cf2010-09-21 18:12:30 +0200770__ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest,
771 struct ip_vs_dest_user_kern *udest, int add)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700772{
Hans Schillstromfc723252011-01-03 14:44:43 +0100773 struct netns_ipvs *ipvs = net_ipvs(svc->net);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700774 int conn_flags;
775
776 /* set the weight and the flags */
777 atomic_set(&dest->weight, udest->weight);
Julian Anastasov35757922010-09-17 14:18:16 +0200778 conn_flags = udest->conn_flags & IP_VS_CONN_F_DEST_MASK;
779 conn_flags |= IP_VS_CONN_F_INACTIVE;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700780
Linus Torvalds1da177e2005-04-16 15:20:36 -0700781 /* set the IP_VS_CONN_F_NOOUTPUT flag if not masquerading/NAT */
Julian Anastasov35757922010-09-17 14:18:16 +0200782 if ((conn_flags & IP_VS_CONN_F_FWD_MASK) != IP_VS_CONN_F_MASQ) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700783 conn_flags |= IP_VS_CONN_F_NOOUTPUT;
784 } else {
785 /*
Hans Schillstromfc723252011-01-03 14:44:43 +0100786 * Put the real service in rs_table if not present.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700787 * For now only for NAT!
788 */
789 write_lock_bh(&__ip_vs_rs_lock);
Hans Schillstromfc723252011-01-03 14:44:43 +0100790 ip_vs_rs_hash(ipvs, dest);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700791 write_unlock_bh(&__ip_vs_rs_lock);
792 }
793 atomic_set(&dest->conn_flags, conn_flags);
794
795 /* bind the service */
796 if (!dest->svc) {
797 __ip_vs_bind_svc(dest, svc);
798 } else {
799 if (dest->svc != svc) {
800 __ip_vs_unbind_svc(dest);
801 ip_vs_zero_stats(&dest->stats);
802 __ip_vs_bind_svc(dest, svc);
803 }
804 }
805
806 /* set the dest status flags */
807 dest->flags |= IP_VS_DEST_F_AVAILABLE;
808
809 if (udest->u_threshold == 0 || udest->u_threshold > dest->u_threshold)
810 dest->flags &= ~IP_VS_DEST_F_OVERLOAD;
811 dest->u_threshold = udest->u_threshold;
812 dest->l_threshold = udest->l_threshold;
Julian Anastasov26c15cf2010-09-21 18:12:30 +0200813
Julian Anastasovfc604762010-10-17 16:38:15 +0300814 spin_lock(&dest->dst_lock);
815 ip_vs_dst_reset(dest);
816 spin_unlock(&dest->dst_lock);
817
Julian Anastasov26c15cf2010-09-21 18:12:30 +0200818 if (add)
819 ip_vs_new_estimator(&dest->stats);
820
821 write_lock_bh(&__ip_vs_svc_lock);
822
823 /* Wait until all other svc users go away */
824 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
825
826 if (add) {
827 list_add(&dest->n_list, &svc->destinations);
828 svc->num_dests++;
829 }
830
831 /* call the update_service, because server weight may be changed */
832 if (svc->scheduler->update_service)
833 svc->scheduler->update_service(svc);
834
835 write_unlock_bh(&__ip_vs_svc_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700836}
837
838
839/*
840 * Create a destination for the given service
841 */
842static int
Julius Volzc860c6b2008-09-02 15:55:36 +0200843ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700844 struct ip_vs_dest **dest_p)
845{
846 struct ip_vs_dest *dest;
847 unsigned atype;
848
849 EnterFunction(2);
850
Vince Busam09571c72008-09-02 15:55:52 +0200851#ifdef CONFIG_IP_VS_IPV6
852 if (svc->af == AF_INET6) {
853 atype = ipv6_addr_type(&udest->addr.in6);
Sven Wegener3bfb92f2008-09-05 16:53:49 +0200854 if ((!(atype & IPV6_ADDR_UNICAST) ||
855 atype & IPV6_ADDR_LINKLOCAL) &&
Vince Busam09571c72008-09-02 15:55:52 +0200856 !__ip_vs_addr_is_local_v6(&udest->addr.in6))
857 return -EINVAL;
858 } else
859#endif
860 {
861 atype = inet_addr_type(&init_net, udest->addr.ip);
862 if (atype != RTN_LOCAL && atype != RTN_UNICAST)
863 return -EINVAL;
864 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700865
Simon Hormandee06e42010-08-26 02:54:31 +0000866 dest = kzalloc(sizeof(struct ip_vs_dest), GFP_KERNEL);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700867 if (dest == NULL) {
Hannes Eder1e3e2382009-08-02 11:05:41 +0000868 pr_err("%s(): no memory.\n", __func__);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700869 return -ENOMEM;
870 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700871
Julius Volzc860c6b2008-09-02 15:55:36 +0200872 dest->af = svc->af;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700873 dest->protocol = svc->protocol;
Julius Volzc860c6b2008-09-02 15:55:36 +0200874 dest->vaddr = svc->addr;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700875 dest->vport = svc->port;
876 dest->vfwmark = svc->fwmark;
Julius Volzc860c6b2008-09-02 15:55:36 +0200877 ip_vs_addr_copy(svc->af, &dest->addr, &udest->addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700878 dest->port = udest->port;
879
880 atomic_set(&dest->activeconns, 0);
881 atomic_set(&dest->inactconns, 0);
882 atomic_set(&dest->persistconns, 0);
Julian Anastasov26c15cf2010-09-21 18:12:30 +0200883 atomic_set(&dest->refcnt, 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700884
885 INIT_LIST_HEAD(&dest->d_list);
886 spin_lock_init(&dest->dst_lock);
887 spin_lock_init(&dest->stats.lock);
Julian Anastasov26c15cf2010-09-21 18:12:30 +0200888 __ip_vs_update_dest(svc, dest, udest, 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700889
890 *dest_p = dest;
891
892 LeaveFunction(2);
893 return 0;
894}
895
896
897/*
898 * Add a destination into an existing service
899 */
900static int
Julius Volzc860c6b2008-09-02 15:55:36 +0200901ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700902{
903 struct ip_vs_dest *dest;
Julius Volzc860c6b2008-09-02 15:55:36 +0200904 union nf_inet_addr daddr;
Al Viro014d7302006-09-28 14:29:52 -0700905 __be16 dport = udest->port;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700906 int ret;
907
908 EnterFunction(2);
909
910 if (udest->weight < 0) {
Hannes Eder1e3e2382009-08-02 11:05:41 +0000911 pr_err("%s(): server weight less than zero\n", __func__);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700912 return -ERANGE;
913 }
914
915 if (udest->l_threshold > udest->u_threshold) {
Hannes Eder1e3e2382009-08-02 11:05:41 +0000916 pr_err("%s(): lower threshold is higher than upper threshold\n",
917 __func__);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700918 return -ERANGE;
919 }
920
Julius Volzc860c6b2008-09-02 15:55:36 +0200921 ip_vs_addr_copy(svc->af, &daddr, &udest->addr);
922
Linus Torvalds1da177e2005-04-16 15:20:36 -0700923 /*
924 * Check if the dest already exists in the list
925 */
Julius Volz7937df12008-09-02 15:55:48 +0200926 dest = ip_vs_lookup_dest(svc, &daddr, dport);
927
Linus Torvalds1da177e2005-04-16 15:20:36 -0700928 if (dest != NULL) {
Hannes Eder1e3e2382009-08-02 11:05:41 +0000929 IP_VS_DBG(1, "%s(): dest already exists\n", __func__);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700930 return -EEXIST;
931 }
932
933 /*
934 * Check if the dest already exists in the trash and
935 * is from the same service
936 */
Julius Volz7937df12008-09-02 15:55:48 +0200937 dest = ip_vs_trash_get_dest(svc, &daddr, dport);
938
Linus Torvalds1da177e2005-04-16 15:20:36 -0700939 if (dest != NULL) {
Julius Volzcfc78c52008-09-02 15:55:53 +0200940 IP_VS_DBG_BUF(3, "Get destination %s:%u from trash, "
941 "dest->refcnt=%d, service %u/%s:%u\n",
942 IP_VS_DBG_ADDR(svc->af, &daddr), ntohs(dport),
943 atomic_read(&dest->refcnt),
944 dest->vfwmark,
945 IP_VS_DBG_ADDR(svc->af, &dest->vaddr),
946 ntohs(dest->vport));
947
Linus Torvalds1da177e2005-04-16 15:20:36 -0700948 /*
949 * Get the destination from the trash
950 */
951 list_del(&dest->n_list);
952
Julian Anastasov26c15cf2010-09-21 18:12:30 +0200953 __ip_vs_update_dest(svc, dest, udest, 1);
954 ret = 0;
955 } else {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700956 /*
Julian Anastasov26c15cf2010-09-21 18:12:30 +0200957 * Allocate and initialize the dest structure
Linus Torvalds1da177e2005-04-16 15:20:36 -0700958 */
Julian Anastasov26c15cf2010-09-21 18:12:30 +0200959 ret = ip_vs_new_dest(svc, udest, &dest);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700960 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700961 LeaveFunction(2);
962
Julian Anastasov26c15cf2010-09-21 18:12:30 +0200963 return ret;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700964}
965
966
967/*
968 * Edit a destination in the given service
969 */
970static int
Julius Volzc860c6b2008-09-02 15:55:36 +0200971ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700972{
973 struct ip_vs_dest *dest;
Julius Volzc860c6b2008-09-02 15:55:36 +0200974 union nf_inet_addr daddr;
Al Viro014d7302006-09-28 14:29:52 -0700975 __be16 dport = udest->port;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700976
977 EnterFunction(2);
978
979 if (udest->weight < 0) {
Hannes Eder1e3e2382009-08-02 11:05:41 +0000980 pr_err("%s(): server weight less than zero\n", __func__);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700981 return -ERANGE;
982 }
983
984 if (udest->l_threshold > udest->u_threshold) {
Hannes Eder1e3e2382009-08-02 11:05:41 +0000985 pr_err("%s(): lower threshold is higher than upper threshold\n",
986 __func__);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700987 return -ERANGE;
988 }
989
Julius Volzc860c6b2008-09-02 15:55:36 +0200990 ip_vs_addr_copy(svc->af, &daddr, &udest->addr);
991
Linus Torvalds1da177e2005-04-16 15:20:36 -0700992 /*
993 * Lookup the destination list
994 */
Julius Volz7937df12008-09-02 15:55:48 +0200995 dest = ip_vs_lookup_dest(svc, &daddr, dport);
996
Linus Torvalds1da177e2005-04-16 15:20:36 -0700997 if (dest == NULL) {
Hannes Eder1e3e2382009-08-02 11:05:41 +0000998 IP_VS_DBG(1, "%s(): dest doesn't exist\n", __func__);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700999 return -ENOENT;
1000 }
1001
Julian Anastasov26c15cf2010-09-21 18:12:30 +02001002 __ip_vs_update_dest(svc, dest, udest, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001003 LeaveFunction(2);
1004
1005 return 0;
1006}
1007
1008
1009/*
1010 * Delete a destination (must be already unlinked from the service)
1011 */
1012static void __ip_vs_del_dest(struct ip_vs_dest *dest)
1013{
1014 ip_vs_kill_estimator(&dest->stats);
1015
1016 /*
1017 * Remove it from the d-linked list with the real services.
1018 */
1019 write_lock_bh(&__ip_vs_rs_lock);
1020 ip_vs_rs_unhash(dest);
1021 write_unlock_bh(&__ip_vs_rs_lock);
1022
1023 /*
1024 * Decrease the refcnt of the dest, and free the dest
1025 * if nobody refers to it (refcnt=0). Otherwise, throw
1026 * the destination into the trash.
1027 */
1028 if (atomic_dec_and_test(&dest->refcnt)) {
Julian Anastasov26c15cf2010-09-21 18:12:30 +02001029 IP_VS_DBG_BUF(3, "Removing destination %u/%s:%u\n",
1030 dest->vfwmark,
1031 IP_VS_DBG_ADDR(dest->af, &dest->addr),
1032 ntohs(dest->port));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001033 ip_vs_dst_reset(dest);
1034 /* simply decrease svc->refcnt here, let the caller check
1035 and release the service if nobody refers to it.
1036 Only user context can release destination and service,
1037 and only one user context can update virtual service at a
1038 time, so the operation here is OK */
1039 atomic_dec(&dest->svc->refcnt);
1040 kfree(dest);
1041 } else {
Julius Volzcfc78c52008-09-02 15:55:53 +02001042 IP_VS_DBG_BUF(3, "Moving dest %s:%u into trash, "
1043 "dest->refcnt=%d\n",
1044 IP_VS_DBG_ADDR(dest->af, &dest->addr),
1045 ntohs(dest->port),
1046 atomic_read(&dest->refcnt));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001047 list_add(&dest->n_list, &ip_vs_dest_trash);
1048 atomic_inc(&dest->refcnt);
1049 }
1050}
1051
1052
1053/*
1054 * Unlink a destination from the given service
1055 */
1056static void __ip_vs_unlink_dest(struct ip_vs_service *svc,
1057 struct ip_vs_dest *dest,
1058 int svcupd)
1059{
1060 dest->flags &= ~IP_VS_DEST_F_AVAILABLE;
1061
1062 /*
1063 * Remove it from the d-linked destination list.
1064 */
1065 list_del(&dest->n_list);
1066 svc->num_dests--;
Sven Wegener82dfb6f2008-08-11 19:36:06 +00001067
1068 /*
1069 * Call the update_service function of its scheduler
1070 */
1071 if (svcupd && svc->scheduler->update_service)
1072 svc->scheduler->update_service(svc);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001073}
1074
1075
1076/*
1077 * Delete a destination server in the given service
1078 */
1079static int
Julius Volzc860c6b2008-09-02 15:55:36 +02001080ip_vs_del_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001081{
1082 struct ip_vs_dest *dest;
Al Viro014d7302006-09-28 14:29:52 -07001083 __be16 dport = udest->port;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001084
1085 EnterFunction(2);
1086
Julius Volz7937df12008-09-02 15:55:48 +02001087 dest = ip_vs_lookup_dest(svc, &udest->addr, dport);
Julius Volzc860c6b2008-09-02 15:55:36 +02001088
Linus Torvalds1da177e2005-04-16 15:20:36 -07001089 if (dest == NULL) {
Hannes Eder1e3e2382009-08-02 11:05:41 +00001090 IP_VS_DBG(1, "%s(): destination not found!\n", __func__);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001091 return -ENOENT;
1092 }
1093
1094 write_lock_bh(&__ip_vs_svc_lock);
1095
1096 /*
1097 * Wait until all other svc users go away.
1098 */
Julian Anastasov26c15cf2010-09-21 18:12:30 +02001099 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001100
1101 /*
1102 * Unlink dest from the service
1103 */
1104 __ip_vs_unlink_dest(svc, dest, 1);
1105
1106 write_unlock_bh(&__ip_vs_svc_lock);
1107
1108 /*
1109 * Delete the destination
1110 */
1111 __ip_vs_del_dest(dest);
1112
1113 LeaveFunction(2);
1114
1115 return 0;
1116}
1117
1118
1119/*
1120 * Add a service into the service hash table
1121 */
1122static int
Hans Schillstromfc723252011-01-03 14:44:43 +01001123ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u,
Julius Volzc860c6b2008-09-02 15:55:36 +02001124 struct ip_vs_service **svc_p)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001125{
1126 int ret = 0;
1127 struct ip_vs_scheduler *sched = NULL;
Simon Horman0d1e71b2010-08-22 21:37:54 +09001128 struct ip_vs_pe *pe = NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001129 struct ip_vs_service *svc = NULL;
1130
1131 /* increase the module use count */
1132 ip_vs_use_count_inc();
1133
1134 /* Lookup the scheduler by 'u->sched_name' */
1135 sched = ip_vs_scheduler_get(u->sched_name);
1136 if (sched == NULL) {
Hannes Eder1e3e2382009-08-02 11:05:41 +00001137 pr_info("Scheduler module ip_vs_%s not found\n", u->sched_name);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001138 ret = -ENOENT;
Simon Horman6e08bfb2010-08-22 21:37:52 +09001139 goto out_err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001140 }
1141
Simon Horman0d1e71b2010-08-22 21:37:54 +09001142 if (u->pe_name && *u->pe_name) {
Simon Hormane9e5eee2010-11-08 20:05:57 +09001143 pe = ip_vs_pe_getbyname(u->pe_name);
Simon Horman0d1e71b2010-08-22 21:37:54 +09001144 if (pe == NULL) {
1145 pr_info("persistence engine module ip_vs_pe_%s "
1146 "not found\n", u->pe_name);
1147 ret = -ENOENT;
1148 goto out_err;
1149 }
1150 }
1151
Julius Volzf94fd042008-09-02 15:55:55 +02001152#ifdef CONFIG_IP_VS_IPV6
Julius Volz48148932008-11-03 17:08:56 -08001153 if (u->af == AF_INET6 && (u->netmask < 1 || u->netmask > 128)) {
1154 ret = -EINVAL;
1155 goto out_err;
Julius Volzf94fd042008-09-02 15:55:55 +02001156 }
1157#endif
1158
Simon Hormandee06e42010-08-26 02:54:31 +00001159 svc = kzalloc(sizeof(struct ip_vs_service), GFP_KERNEL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001160 if (svc == NULL) {
Hannes Eder1e3e2382009-08-02 11:05:41 +00001161 IP_VS_DBG(1, "%s(): no memory\n", __func__);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001162 ret = -ENOMEM;
1163 goto out_err;
1164 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001165
1166 /* I'm the first user of the service */
Julian Anastasov26c15cf2010-09-21 18:12:30 +02001167 atomic_set(&svc->usecnt, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001168 atomic_set(&svc->refcnt, 0);
1169
Julius Volzc860c6b2008-09-02 15:55:36 +02001170 svc->af = u->af;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001171 svc->protocol = u->protocol;
Julius Volzc860c6b2008-09-02 15:55:36 +02001172 ip_vs_addr_copy(svc->af, &svc->addr, &u->addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001173 svc->port = u->port;
1174 svc->fwmark = u->fwmark;
1175 svc->flags = u->flags;
1176 svc->timeout = u->timeout * HZ;
1177 svc->netmask = u->netmask;
Hans Schillstromfc723252011-01-03 14:44:43 +01001178 svc->net = net;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001179
1180 INIT_LIST_HEAD(&svc->destinations);
1181 rwlock_init(&svc->sched_lock);
1182 spin_lock_init(&svc->stats.lock);
1183
1184 /* Bind the scheduler */
1185 ret = ip_vs_bind_scheduler(svc, sched);
1186 if (ret)
1187 goto out_err;
1188 sched = NULL;
1189
Simon Horman0d1e71b2010-08-22 21:37:54 +09001190 /* Bind the ct retriever */
1191 ip_vs_bind_pe(svc, pe);
1192 pe = NULL;
1193
Linus Torvalds1da177e2005-04-16 15:20:36 -07001194 /* Update the virtual service counters */
1195 if (svc->port == FTPPORT)
1196 atomic_inc(&ip_vs_ftpsvc_counter);
1197 else if (svc->port == 0)
1198 atomic_inc(&ip_vs_nullsvc_counter);
1199
1200 ip_vs_new_estimator(&svc->stats);
Julius Volzf94fd042008-09-02 15:55:55 +02001201
1202 /* Count only IPv4 services for old get/setsockopt interface */
1203 if (svc->af == AF_INET)
1204 ip_vs_num_services++;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001205
1206 /* Hash the service into the service table */
1207 write_lock_bh(&__ip_vs_svc_lock);
1208 ip_vs_svc_hash(svc);
1209 write_unlock_bh(&__ip_vs_svc_lock);
1210
1211 *svc_p = svc;
1212 return 0;
1213
Simon Horman6e08bfb2010-08-22 21:37:52 +09001214 out_err:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001215 if (svc != NULL) {
Simon Horman2fabf352010-08-22 21:37:52 +09001216 ip_vs_unbind_scheduler(svc);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001217 if (svc->inc) {
1218 local_bh_disable();
1219 ip_vs_app_inc_put(svc->inc);
1220 local_bh_enable();
1221 }
1222 kfree(svc);
1223 }
1224 ip_vs_scheduler_put(sched);
Simon Horman0d1e71b2010-08-22 21:37:54 +09001225 ip_vs_pe_put(pe);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001226
Linus Torvalds1da177e2005-04-16 15:20:36 -07001227 /* decrease the module use count */
1228 ip_vs_use_count_dec();
1229
1230 return ret;
1231}
1232
1233
1234/*
1235 * Edit a service and bind it with a new scheduler
1236 */
1237static int
Julius Volzc860c6b2008-09-02 15:55:36 +02001238ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001239{
1240 struct ip_vs_scheduler *sched, *old_sched;
Simon Horman0d1e71b2010-08-22 21:37:54 +09001241 struct ip_vs_pe *pe = NULL, *old_pe = NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001242 int ret = 0;
1243
1244 /*
1245 * Lookup the scheduler, by 'u->sched_name'
1246 */
1247 sched = ip_vs_scheduler_get(u->sched_name);
1248 if (sched == NULL) {
Hannes Eder1e3e2382009-08-02 11:05:41 +00001249 pr_info("Scheduler module ip_vs_%s not found\n", u->sched_name);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001250 return -ENOENT;
1251 }
1252 old_sched = sched;
1253
Simon Horman0d1e71b2010-08-22 21:37:54 +09001254 if (u->pe_name && *u->pe_name) {
Simon Hormane9e5eee2010-11-08 20:05:57 +09001255 pe = ip_vs_pe_getbyname(u->pe_name);
Simon Horman0d1e71b2010-08-22 21:37:54 +09001256 if (pe == NULL) {
1257 pr_info("persistence engine module ip_vs_pe_%s "
1258 "not found\n", u->pe_name);
1259 ret = -ENOENT;
1260 goto out;
1261 }
1262 old_pe = pe;
1263 }
1264
Julius Volzf94fd042008-09-02 15:55:55 +02001265#ifdef CONFIG_IP_VS_IPV6
Julius Volz48148932008-11-03 17:08:56 -08001266 if (u->af == AF_INET6 && (u->netmask < 1 || u->netmask > 128)) {
1267 ret = -EINVAL;
1268 goto out;
Julius Volzf94fd042008-09-02 15:55:55 +02001269 }
1270#endif
1271
Linus Torvalds1da177e2005-04-16 15:20:36 -07001272 write_lock_bh(&__ip_vs_svc_lock);
1273
1274 /*
1275 * Wait until all other svc users go away.
1276 */
Julian Anastasov26c15cf2010-09-21 18:12:30 +02001277 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001278
1279 /*
1280 * Set the flags and timeout value
1281 */
1282 svc->flags = u->flags | IP_VS_SVC_F_HASHED;
1283 svc->timeout = u->timeout * HZ;
1284 svc->netmask = u->netmask;
1285
1286 old_sched = svc->scheduler;
1287 if (sched != old_sched) {
1288 /*
1289 * Unbind the old scheduler
1290 */
1291 if ((ret = ip_vs_unbind_scheduler(svc))) {
1292 old_sched = sched;
Simon Horman9e691ed2008-09-17 10:10:41 +10001293 goto out_unlock;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001294 }
1295
1296 /*
1297 * Bind the new scheduler
1298 */
1299 if ((ret = ip_vs_bind_scheduler(svc, sched))) {
1300 /*
1301 * If ip_vs_bind_scheduler fails, restore the old
1302 * scheduler.
1303 * The main reason of failure is out of memory.
1304 *
1305 * The question is if the old scheduler can be
1306 * restored all the time. TODO: if it cannot be
1307 * restored some time, we must delete the service,
1308 * otherwise the system may crash.
1309 */
1310 ip_vs_bind_scheduler(svc, old_sched);
1311 old_sched = sched;
Simon Horman9e691ed2008-09-17 10:10:41 +10001312 goto out_unlock;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001313 }
1314 }
1315
Simon Horman0d1e71b2010-08-22 21:37:54 +09001316 old_pe = svc->pe;
1317 if (pe != old_pe) {
1318 ip_vs_unbind_pe(svc);
1319 ip_vs_bind_pe(svc, pe);
1320 }
1321
Simon Horman9e691ed2008-09-17 10:10:41 +10001322 out_unlock:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001323 write_unlock_bh(&__ip_vs_svc_lock);
Simon Horman9e691ed2008-09-17 10:10:41 +10001324 out:
Simon Horman6e08bfb2010-08-22 21:37:52 +09001325 ip_vs_scheduler_put(old_sched);
Simon Horman0d1e71b2010-08-22 21:37:54 +09001326 ip_vs_pe_put(old_pe);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001327 return ret;
1328}
1329
1330
1331/*
1332 * Delete a service from the service list
1333 * - The service must be unlinked, unlocked and not referenced!
1334 * - We are called under _bh lock
1335 */
1336static void __ip_vs_del_service(struct ip_vs_service *svc)
1337{
1338 struct ip_vs_dest *dest, *nxt;
1339 struct ip_vs_scheduler *old_sched;
Simon Horman0d1e71b2010-08-22 21:37:54 +09001340 struct ip_vs_pe *old_pe;
1341
1342 pr_info("%s: enter\n", __func__);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001343
Julius Volzf94fd042008-09-02 15:55:55 +02001344 /* Count only IPv4 services for old get/setsockopt interface */
1345 if (svc->af == AF_INET)
1346 ip_vs_num_services--;
1347
Linus Torvalds1da177e2005-04-16 15:20:36 -07001348 ip_vs_kill_estimator(&svc->stats);
1349
1350 /* Unbind scheduler */
1351 old_sched = svc->scheduler;
1352 ip_vs_unbind_scheduler(svc);
Simon Horman6e08bfb2010-08-22 21:37:52 +09001353 ip_vs_scheduler_put(old_sched);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001354
Simon Horman0d1e71b2010-08-22 21:37:54 +09001355 /* Unbind persistence engine */
1356 old_pe = svc->pe;
1357 ip_vs_unbind_pe(svc);
1358 ip_vs_pe_put(old_pe);
1359
Linus Torvalds1da177e2005-04-16 15:20:36 -07001360 /* Unbind app inc */
1361 if (svc->inc) {
1362 ip_vs_app_inc_put(svc->inc);
1363 svc->inc = NULL;
1364 }
1365
1366 /*
1367 * Unlink the whole destination list
1368 */
1369 list_for_each_entry_safe(dest, nxt, &svc->destinations, n_list) {
1370 __ip_vs_unlink_dest(svc, dest, 0);
1371 __ip_vs_del_dest(dest);
1372 }
1373
1374 /*
1375 * Update the virtual service counters
1376 */
1377 if (svc->port == FTPPORT)
1378 atomic_dec(&ip_vs_ftpsvc_counter);
1379 else if (svc->port == 0)
1380 atomic_dec(&ip_vs_nullsvc_counter);
1381
1382 /*
1383 * Free the service if nobody refers to it
1384 */
Julian Anastasov26c15cf2010-09-21 18:12:30 +02001385 if (atomic_read(&svc->refcnt) == 0) {
1386 IP_VS_DBG_BUF(3, "Removing service %u/%s:%u usecnt=%d\n",
1387 svc->fwmark,
1388 IP_VS_DBG_ADDR(svc->af, &svc->addr),
1389 ntohs(svc->port), atomic_read(&svc->usecnt));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001390 kfree(svc);
Julian Anastasov26c15cf2010-09-21 18:12:30 +02001391 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001392
1393 /* decrease the module use count */
1394 ip_vs_use_count_dec();
1395}
1396
1397/*
Julian Anastasov26c15cf2010-09-21 18:12:30 +02001398 * Unlink a service from list and try to delete it if its refcnt reached 0
Linus Torvalds1da177e2005-04-16 15:20:36 -07001399 */
Julian Anastasov26c15cf2010-09-21 18:12:30 +02001400static void ip_vs_unlink_service(struct ip_vs_service *svc)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001401{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001402 /*
1403 * Unhash it from the service table
1404 */
1405 write_lock_bh(&__ip_vs_svc_lock);
1406
1407 ip_vs_svc_unhash(svc);
1408
1409 /*
1410 * Wait until all the svc users go away.
1411 */
Julian Anastasov26c15cf2010-09-21 18:12:30 +02001412 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001413
1414 __ip_vs_del_service(svc);
1415
1416 write_unlock_bh(&__ip_vs_svc_lock);
Julian Anastasov26c15cf2010-09-21 18:12:30 +02001417}
1418
1419/*
1420 * Delete a service from the service list
1421 */
1422static int ip_vs_del_service(struct ip_vs_service *svc)
1423{
1424 if (svc == NULL)
1425 return -EEXIST;
1426 ip_vs_unlink_service(svc);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001427
1428 return 0;
1429}
1430
1431
1432/*
1433 * Flush all the virtual services
1434 */
Hans Schillstromfc723252011-01-03 14:44:43 +01001435static int ip_vs_flush(struct net *net)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001436{
1437 int idx;
1438 struct ip_vs_service *svc, *nxt;
1439
1440 /*
Hans Schillstromfc723252011-01-03 14:44:43 +01001441 * Flush the service table hashed by <netns,protocol,addr,port>
Linus Torvalds1da177e2005-04-16 15:20:36 -07001442 */
1443 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
Hans Schillstromfc723252011-01-03 14:44:43 +01001444 list_for_each_entry_safe(svc, nxt, &ip_vs_svc_table[idx],
1445 s_list) {
1446 if (net_eq(svc->net, net))
1447 ip_vs_unlink_service(svc);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001448 }
1449 }
1450
1451 /*
1452 * Flush the service table hashed by fwmark
1453 */
1454 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1455 list_for_each_entry_safe(svc, nxt,
1456 &ip_vs_svc_fwm_table[idx], f_list) {
Hans Schillstromfc723252011-01-03 14:44:43 +01001457 if (net_eq(svc->net, net))
1458 ip_vs_unlink_service(svc);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001459 }
1460 }
1461
1462 return 0;
1463}
1464
1465
1466/*
1467 * Zero counters in a service or all services
1468 */
1469static int ip_vs_zero_service(struct ip_vs_service *svc)
1470{
1471 struct ip_vs_dest *dest;
1472
1473 write_lock_bh(&__ip_vs_svc_lock);
1474 list_for_each_entry(dest, &svc->destinations, n_list) {
1475 ip_vs_zero_stats(&dest->stats);
1476 }
1477 ip_vs_zero_stats(&svc->stats);
1478 write_unlock_bh(&__ip_vs_svc_lock);
1479 return 0;
1480}
1481
Hans Schillstromfc723252011-01-03 14:44:43 +01001482static int ip_vs_zero_all(struct net *net)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001483{
1484 int idx;
1485 struct ip_vs_service *svc;
1486
1487 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1488 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
Hans Schillstromfc723252011-01-03 14:44:43 +01001489 if (net_eq(svc->net, net))
1490 ip_vs_zero_service(svc);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001491 }
1492 }
1493
1494 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1495 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
Hans Schillstromfc723252011-01-03 14:44:43 +01001496 if (net_eq(svc->net, net))
1497 ip_vs_zero_service(svc);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001498 }
1499 }
1500
1501 ip_vs_zero_stats(&ip_vs_stats);
1502 return 0;
1503}
1504
1505
1506static int
Alexey Dobriyan8d65af72009-09-23 15:57:19 -07001507proc_do_defense_mode(ctl_table *table, int write,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001508 void __user *buffer, size_t *lenp, loff_t *ppos)
1509{
Hans Schillstrom93304192011-01-03 14:44:51 +01001510 struct net *net = current->nsproxy->net_ns;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001511 int *valp = table->data;
1512 int val = *valp;
1513 int rc;
1514
Alexey Dobriyan8d65af72009-09-23 15:57:19 -07001515 rc = proc_dointvec(table, write, buffer, lenp, ppos);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001516 if (write && (*valp != val)) {
1517 if ((*valp < 0) || (*valp > 3)) {
1518 /* Restore the correct value */
1519 *valp = val;
1520 } else {
Hans Schillstrom93304192011-01-03 14:44:51 +01001521 update_defense_level(net_ipvs(net));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001522 }
1523 }
1524 return rc;
1525}
1526
1527
1528static int
Alexey Dobriyan8d65af72009-09-23 15:57:19 -07001529proc_do_sync_threshold(ctl_table *table, int write,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001530 void __user *buffer, size_t *lenp, loff_t *ppos)
1531{
1532 int *valp = table->data;
1533 int val[2];
1534 int rc;
1535
1536 /* backup the value first */
1537 memcpy(val, valp, sizeof(val));
1538
Alexey Dobriyan8d65af72009-09-23 15:57:19 -07001539 rc = proc_dointvec(table, write, buffer, lenp, ppos);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001540 if (write && (valp[0] < 0 || valp[1] < 0 || valp[0] >= valp[1])) {
1541 /* Restore the correct value */
1542 memcpy(valp, val, sizeof(val));
1543 }
1544 return rc;
1545}
1546
Hans Schillstromb880c1f2010-11-19 14:25:14 +01001547static int
1548proc_do_sync_mode(ctl_table *table, int write,
1549 void __user *buffer, size_t *lenp, loff_t *ppos)
1550{
1551 int *valp = table->data;
1552 int val = *valp;
1553 int rc;
1554
1555 rc = proc_dointvec(table, write, buffer, lenp, ppos);
1556 if (write && (*valp != val)) {
1557 if ((*valp < 0) || (*valp > 1)) {
1558 /* Restore the correct value */
1559 *valp = val;
1560 } else {
1561 ip_vs_sync_switch_mode(val);
1562 }
1563 }
1564 return rc;
1565}
Linus Torvalds1da177e2005-04-16 15:20:36 -07001566
1567/*
1568 * IPVS sysctl table (under the /proc/sys/net/ipv4/vs/)
1569 */
1570
1571static struct ctl_table vs_vars[] = {
1572 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001573 .procname = "amemthresh",
1574 .data = &sysctl_ip_vs_amemthresh,
1575 .maxlen = sizeof(int),
1576 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08001577 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001578 },
1579#ifdef CONFIG_IP_VS_DEBUG
1580 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001581 .procname = "debug_level",
1582 .data = &sysctl_ip_vs_debug_level,
1583 .maxlen = sizeof(int),
1584 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08001585 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001586 },
1587#endif
1588 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001589 .procname = "am_droprate",
1590 .data = &sysctl_ip_vs_am_droprate,
1591 .maxlen = sizeof(int),
1592 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08001593 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001594 },
1595 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001596 .procname = "drop_entry",
1597 .data = &sysctl_ip_vs_drop_entry,
1598 .maxlen = sizeof(int),
1599 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08001600 .proc_handler = proc_do_defense_mode,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001601 },
1602 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001603 .procname = "drop_packet",
1604 .data = &sysctl_ip_vs_drop_packet,
1605 .maxlen = sizeof(int),
1606 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08001607 .proc_handler = proc_do_defense_mode,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001608 },
Julian Anastasovf4bc17c2010-09-21 17:35:41 +02001609#ifdef CONFIG_IP_VS_NFCT
1610 {
1611 .procname = "conntrack",
1612 .data = &sysctl_ip_vs_conntrack,
1613 .maxlen = sizeof(int),
1614 .mode = 0644,
1615 .proc_handler = &proc_dointvec,
1616 },
1617#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -07001618 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001619 .procname = "secure_tcp",
1620 .data = &sysctl_ip_vs_secure_tcp,
1621 .maxlen = sizeof(int),
1622 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08001623 .proc_handler = proc_do_defense_mode,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001624 },
Julian Anastasov8a803042010-09-21 17:38:57 +02001625 {
1626 .procname = "snat_reroute",
1627 .data = &sysctl_ip_vs_snat_reroute,
1628 .maxlen = sizeof(int),
1629 .mode = 0644,
1630 .proc_handler = &proc_dointvec,
1631 },
Hans Schillstromb880c1f2010-11-19 14:25:14 +01001632 {
1633 .procname = "sync_version",
1634 .data = &sysctl_ip_vs_sync_ver,
1635 .maxlen = sizeof(int),
1636 .mode = 0644,
1637 .proc_handler = &proc_do_sync_mode,
1638 },
Linus Torvalds1da177e2005-04-16 15:20:36 -07001639#if 0
1640 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001641 .procname = "timeout_established",
1642 .data = &vs_timeout_table_dos.timeout[IP_VS_S_ESTABLISHED],
1643 .maxlen = sizeof(int),
1644 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08001645 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001646 },
1647 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001648 .procname = "timeout_synsent",
1649 .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_SENT],
1650 .maxlen = sizeof(int),
1651 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08001652 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001653 },
1654 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001655 .procname = "timeout_synrecv",
1656 .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_RECV],
1657 .maxlen = sizeof(int),
1658 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08001659 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001660 },
1661 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001662 .procname = "timeout_finwait",
1663 .data = &vs_timeout_table_dos.timeout[IP_VS_S_FIN_WAIT],
1664 .maxlen = sizeof(int),
1665 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08001666 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001667 },
1668 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001669 .procname = "timeout_timewait",
1670 .data = &vs_timeout_table_dos.timeout[IP_VS_S_TIME_WAIT],
1671 .maxlen = sizeof(int),
1672 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08001673 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001674 },
1675 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001676 .procname = "timeout_close",
1677 .data = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE],
1678 .maxlen = sizeof(int),
1679 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08001680 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001681 },
1682 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001683 .procname = "timeout_closewait",
1684 .data = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE_WAIT],
1685 .maxlen = sizeof(int),
1686 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08001687 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001688 },
1689 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001690 .procname = "timeout_lastack",
1691 .data = &vs_timeout_table_dos.timeout[IP_VS_S_LAST_ACK],
1692 .maxlen = sizeof(int),
1693 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08001694 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001695 },
1696 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001697 .procname = "timeout_listen",
1698 .data = &vs_timeout_table_dos.timeout[IP_VS_S_LISTEN],
1699 .maxlen = sizeof(int),
1700 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08001701 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001702 },
1703 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001704 .procname = "timeout_synack",
1705 .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYNACK],
1706 .maxlen = sizeof(int),
1707 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08001708 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001709 },
1710 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001711 .procname = "timeout_udp",
1712 .data = &vs_timeout_table_dos.timeout[IP_VS_S_UDP],
1713 .maxlen = sizeof(int),
1714 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08001715 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001716 },
1717 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001718 .procname = "timeout_icmp",
1719 .data = &vs_timeout_table_dos.timeout[IP_VS_S_ICMP],
1720 .maxlen = sizeof(int),
1721 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08001722 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001723 },
1724#endif
1725 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001726 .procname = "cache_bypass",
1727 .data = &sysctl_ip_vs_cache_bypass,
1728 .maxlen = sizeof(int),
1729 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08001730 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001731 },
1732 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001733 .procname = "expire_nodest_conn",
1734 .data = &sysctl_ip_vs_expire_nodest_conn,
1735 .maxlen = sizeof(int),
1736 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08001737 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001738 },
1739 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001740 .procname = "expire_quiescent_template",
1741 .data = &sysctl_ip_vs_expire_quiescent_template,
1742 .maxlen = sizeof(int),
1743 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08001744 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001745 },
1746 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001747 .procname = "sync_threshold",
1748 .data = &sysctl_ip_vs_sync_threshold,
1749 .maxlen = sizeof(sysctl_ip_vs_sync_threshold),
1750 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08001751 .proc_handler = proc_do_sync_threshold,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001752 },
1753 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001754 .procname = "nat_icmp_send",
1755 .data = &sysctl_ip_vs_nat_icmp_send,
1756 .maxlen = sizeof(int),
1757 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08001758 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001759 },
Eric W. Biedermanf8572d82009-11-05 13:32:03 -08001760 { }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001761};
1762
Sven Wegener5587da52008-08-10 18:24:40 +00001763const struct ctl_path net_vs_ctl_path[] = {
Eric W. Biedermanf8572d82009-11-05 13:32:03 -08001764 { .procname = "net", },
1765 { .procname = "ipv4", },
Pavel Emelyanov90754f82008-01-12 02:33:50 -08001766 { .procname = "vs", },
1767 { }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001768};
Pavel Emelyanov90754f82008-01-12 02:33:50 -08001769EXPORT_SYMBOL_GPL(net_vs_ctl_path);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001770
1771static struct ctl_table_header * sysctl_header;
1772
1773#ifdef CONFIG_PROC_FS
1774
1775struct ip_vs_iter {
Hans Schillstromfc723252011-01-03 14:44:43 +01001776 struct seq_net_private p; /* Do not move this, netns depends upon it*/
Linus Torvalds1da177e2005-04-16 15:20:36 -07001777 struct list_head *table;
1778 int bucket;
1779};
1780
1781/*
1782 * Write the contents of the VS rule table to a PROCfs file.
1783 * (It is kept just for backward compatibility)
1784 */
1785static inline const char *ip_vs_fwd_name(unsigned flags)
1786{
1787 switch (flags & IP_VS_CONN_F_FWD_MASK) {
1788 case IP_VS_CONN_F_LOCALNODE:
1789 return "Local";
1790 case IP_VS_CONN_F_TUNNEL:
1791 return "Tunnel";
1792 case IP_VS_CONN_F_DROUTE:
1793 return "Route";
1794 default:
1795 return "Masq";
1796 }
1797}
1798
1799
1800/* Get the Nth entry in the two lists */
1801static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos)
1802{
Hans Schillstromfc723252011-01-03 14:44:43 +01001803 struct net *net = seq_file_net(seq);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001804 struct ip_vs_iter *iter = seq->private;
1805 int idx;
1806 struct ip_vs_service *svc;
1807
1808 /* look in hash by protocol */
1809 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1810 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
Hans Schillstromfc723252011-01-03 14:44:43 +01001811 if (net_eq(svc->net, net) && pos-- == 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001812 iter->table = ip_vs_svc_table;
1813 iter->bucket = idx;
1814 return svc;
1815 }
1816 }
1817 }
1818
1819 /* keep looking in fwmark */
1820 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1821 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
Hans Schillstromfc723252011-01-03 14:44:43 +01001822 if (net_eq(svc->net, net) && pos-- == 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001823 iter->table = ip_vs_svc_fwm_table;
1824 iter->bucket = idx;
1825 return svc;
1826 }
1827 }
1828 }
1829
1830 return NULL;
1831}
1832
1833static void *ip_vs_info_seq_start(struct seq_file *seq, loff_t *pos)
Simon Horman563e94f2008-09-17 10:10:42 +10001834__acquires(__ip_vs_svc_lock)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001835{
1836
1837 read_lock_bh(&__ip_vs_svc_lock);
1838 return *pos ? ip_vs_info_array(seq, *pos - 1) : SEQ_START_TOKEN;
1839}
1840
1841
1842static void *ip_vs_info_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1843{
1844 struct list_head *e;
1845 struct ip_vs_iter *iter;
1846 struct ip_vs_service *svc;
1847
1848 ++*pos;
1849 if (v == SEQ_START_TOKEN)
1850 return ip_vs_info_array(seq,0);
1851
1852 svc = v;
1853 iter = seq->private;
1854
1855 if (iter->table == ip_vs_svc_table) {
1856 /* next service in table hashed by protocol */
1857 if ((e = svc->s_list.next) != &ip_vs_svc_table[iter->bucket])
1858 return list_entry(e, struct ip_vs_service, s_list);
1859
1860
1861 while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
1862 list_for_each_entry(svc,&ip_vs_svc_table[iter->bucket],
1863 s_list) {
1864 return svc;
1865 }
1866 }
1867
1868 iter->table = ip_vs_svc_fwm_table;
1869 iter->bucket = -1;
1870 goto scan_fwmark;
1871 }
1872
1873 /* next service in hashed by fwmark */
1874 if ((e = svc->f_list.next) != &ip_vs_svc_fwm_table[iter->bucket])
1875 return list_entry(e, struct ip_vs_service, f_list);
1876
1877 scan_fwmark:
1878 while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
1879 list_for_each_entry(svc, &ip_vs_svc_fwm_table[iter->bucket],
1880 f_list)
1881 return svc;
1882 }
1883
1884 return NULL;
1885}
1886
1887static void ip_vs_info_seq_stop(struct seq_file *seq, void *v)
Simon Horman563e94f2008-09-17 10:10:42 +10001888__releases(__ip_vs_svc_lock)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001889{
1890 read_unlock_bh(&__ip_vs_svc_lock);
1891}
1892
1893
1894static int ip_vs_info_seq_show(struct seq_file *seq, void *v)
1895{
1896 if (v == SEQ_START_TOKEN) {
1897 seq_printf(seq,
1898 "IP Virtual Server version %d.%d.%d (size=%d)\n",
Catalin(ux) M. BOIE6f7edb42010-01-05 05:50:24 +01001899 NVERSION(IP_VS_VERSION_CODE), ip_vs_conn_tab_size);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001900 seq_puts(seq,
1901 "Prot LocalAddress:Port Scheduler Flags\n");
1902 seq_puts(seq,
1903 " -> RemoteAddress:Port Forward Weight ActiveConn InActConn\n");
1904 } else {
1905 const struct ip_vs_service *svc = v;
1906 const struct ip_vs_iter *iter = seq->private;
1907 const struct ip_vs_dest *dest;
1908
Vince Busam667a5f12008-09-02 15:55:49 +02001909 if (iter->table == ip_vs_svc_table) {
1910#ifdef CONFIG_IP_VS_IPV6
1911 if (svc->af == AF_INET6)
Harvey Harrison5b095d9892008-10-29 12:52:50 -07001912 seq_printf(seq, "%s [%pI6]:%04X %s ",
Vince Busam667a5f12008-09-02 15:55:49 +02001913 ip_vs_proto_name(svc->protocol),
Harvey Harrison38ff4fa2008-10-28 16:08:13 -07001914 &svc->addr.in6,
Vince Busam667a5f12008-09-02 15:55:49 +02001915 ntohs(svc->port),
1916 svc->scheduler->name);
1917 else
1918#endif
Nick Chalk26ec0372010-06-22 08:07:01 +02001919 seq_printf(seq, "%s %08X:%04X %s %s ",
Vince Busam667a5f12008-09-02 15:55:49 +02001920 ip_vs_proto_name(svc->protocol),
1921 ntohl(svc->addr.ip),
1922 ntohs(svc->port),
Nick Chalk26ec0372010-06-22 08:07:01 +02001923 svc->scheduler->name,
1924 (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":"");
Vince Busam667a5f12008-09-02 15:55:49 +02001925 } else {
Nick Chalk26ec0372010-06-22 08:07:01 +02001926 seq_printf(seq, "FWM %08X %s %s",
1927 svc->fwmark, svc->scheduler->name,
1928 (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":"");
Vince Busam667a5f12008-09-02 15:55:49 +02001929 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001930
1931 if (svc->flags & IP_VS_SVC_F_PERSISTENT)
1932 seq_printf(seq, "persistent %d %08X\n",
1933 svc->timeout,
1934 ntohl(svc->netmask));
1935 else
1936 seq_putc(seq, '\n');
1937
1938 list_for_each_entry(dest, &svc->destinations, n_list) {
Vince Busam667a5f12008-09-02 15:55:49 +02001939#ifdef CONFIG_IP_VS_IPV6
1940 if (dest->af == AF_INET6)
1941 seq_printf(seq,
Harvey Harrison5b095d9892008-10-29 12:52:50 -07001942 " -> [%pI6]:%04X"
Vince Busam667a5f12008-09-02 15:55:49 +02001943 " %-7s %-6d %-10d %-10d\n",
Harvey Harrison38ff4fa2008-10-28 16:08:13 -07001944 &dest->addr.in6,
Vince Busam667a5f12008-09-02 15:55:49 +02001945 ntohs(dest->port),
1946 ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
1947 atomic_read(&dest->weight),
1948 atomic_read(&dest->activeconns),
1949 atomic_read(&dest->inactconns));
1950 else
1951#endif
1952 seq_printf(seq,
1953 " -> %08X:%04X "
1954 "%-7s %-6d %-10d %-10d\n",
1955 ntohl(dest->addr.ip),
1956 ntohs(dest->port),
1957 ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
1958 atomic_read(&dest->weight),
1959 atomic_read(&dest->activeconns),
1960 atomic_read(&dest->inactconns));
1961
Linus Torvalds1da177e2005-04-16 15:20:36 -07001962 }
1963 }
1964 return 0;
1965}
1966
Philippe De Muyter56b3d972007-07-10 23:07:31 -07001967static const struct seq_operations ip_vs_info_seq_ops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001968 .start = ip_vs_info_seq_start,
1969 .next = ip_vs_info_seq_next,
1970 .stop = ip_vs_info_seq_stop,
1971 .show = ip_vs_info_seq_show,
1972};
1973
1974static int ip_vs_info_open(struct inode *inode, struct file *file)
1975{
Hans Schillstromfc723252011-01-03 14:44:43 +01001976 return seq_open_net(inode, file, &ip_vs_info_seq_ops,
Pavel Emelyanovcf7732e2007-10-10 02:29:29 -07001977 sizeof(struct ip_vs_iter));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001978}
1979
Arjan van de Ven9a321442007-02-12 00:55:35 -08001980static const struct file_operations ip_vs_info_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001981 .owner = THIS_MODULE,
1982 .open = ip_vs_info_open,
1983 .read = seq_read,
1984 .llseek = seq_lseek,
1985 .release = seq_release_private,
1986};
1987
1988#endif
1989
Sven Wegener519e49e2008-08-10 18:24:41 +00001990struct ip_vs_stats ip_vs_stats = {
1991 .lock = __SPIN_LOCK_UNLOCKED(ip_vs_stats.lock),
1992};
Linus Torvalds1da177e2005-04-16 15:20:36 -07001993
1994#ifdef CONFIG_PROC_FS
1995static int ip_vs_stats_show(struct seq_file *seq, void *v)
1996{
1997
1998/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
1999 seq_puts(seq,
2000 " Total Incoming Outgoing Incoming Outgoing\n");
2001 seq_printf(seq,
2002 " Conns Packets Packets Bytes Bytes\n");
2003
2004 spin_lock_bh(&ip_vs_stats.lock);
Sven Wegenere9c0ce22008-09-08 13:39:04 +02002005 seq_printf(seq, "%8X %8X %8X %16LX %16LX\n\n", ip_vs_stats.ustats.conns,
2006 ip_vs_stats.ustats.inpkts, ip_vs_stats.ustats.outpkts,
2007 (unsigned long long) ip_vs_stats.ustats.inbytes,
2008 (unsigned long long) ip_vs_stats.ustats.outbytes);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002009
2010/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
2011 seq_puts(seq,
2012 " Conns/s Pkts/s Pkts/s Bytes/s Bytes/s\n");
2013 seq_printf(seq,"%8X %8X %8X %16X %16X\n",
Sven Wegenere9c0ce22008-09-08 13:39:04 +02002014 ip_vs_stats.ustats.cps,
2015 ip_vs_stats.ustats.inpps,
2016 ip_vs_stats.ustats.outpps,
2017 ip_vs_stats.ustats.inbps,
2018 ip_vs_stats.ustats.outbps);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002019 spin_unlock_bh(&ip_vs_stats.lock);
2020
2021 return 0;
2022}
2023
2024static int ip_vs_stats_seq_open(struct inode *inode, struct file *file)
2025{
Hans Schillstromfc723252011-01-03 14:44:43 +01002026 return single_open_net(inode, file, ip_vs_stats_show);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002027}
2028
Arjan van de Ven9a321442007-02-12 00:55:35 -08002029static const struct file_operations ip_vs_stats_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002030 .owner = THIS_MODULE,
2031 .open = ip_vs_stats_seq_open,
2032 .read = seq_read,
2033 .llseek = seq_lseek,
2034 .release = single_release,
2035};
2036
2037#endif
2038
2039/*
2040 * Set timeout values for tcp tcpfin udp in the timeout_table.
2041 */
Hans Schillstrom93304192011-01-03 14:44:51 +01002042static int ip_vs_set_timeout(struct net *net, struct ip_vs_timeout_user *u)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002043{
Hans Schillstrom93304192011-01-03 14:44:51 +01002044 struct ip_vs_proto_data *pd;
2045
Linus Torvalds1da177e2005-04-16 15:20:36 -07002046 IP_VS_DBG(2, "Setting timeout tcp:%d tcpfin:%d udp:%d\n",
2047 u->tcp_timeout,
2048 u->tcp_fin_timeout,
2049 u->udp_timeout);
2050
2051#ifdef CONFIG_IP_VS_PROTO_TCP
2052 if (u->tcp_timeout) {
Hans Schillstrom93304192011-01-03 14:44:51 +01002053 pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
2054 pd->timeout_table[IP_VS_TCP_S_ESTABLISHED]
Linus Torvalds1da177e2005-04-16 15:20:36 -07002055 = u->tcp_timeout * HZ;
2056 }
2057
2058 if (u->tcp_fin_timeout) {
Hans Schillstrom93304192011-01-03 14:44:51 +01002059 pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
2060 pd->timeout_table[IP_VS_TCP_S_FIN_WAIT]
Linus Torvalds1da177e2005-04-16 15:20:36 -07002061 = u->tcp_fin_timeout * HZ;
2062 }
2063#endif
2064
2065#ifdef CONFIG_IP_VS_PROTO_UDP
2066 if (u->udp_timeout) {
Hans Schillstrom93304192011-01-03 14:44:51 +01002067 pd = ip_vs_proto_data_get(net, IPPROTO_UDP);
2068 pd->timeout_table[IP_VS_UDP_S_NORMAL]
Linus Torvalds1da177e2005-04-16 15:20:36 -07002069 = u->udp_timeout * HZ;
2070 }
2071#endif
2072 return 0;
2073}
2074
2075
2076#define SET_CMDID(cmd) (cmd - IP_VS_BASE_CTL)
2077#define SERVICE_ARG_LEN (sizeof(struct ip_vs_service_user))
2078#define SVCDEST_ARG_LEN (sizeof(struct ip_vs_service_user) + \
2079 sizeof(struct ip_vs_dest_user))
2080#define TIMEOUT_ARG_LEN (sizeof(struct ip_vs_timeout_user))
2081#define DAEMON_ARG_LEN (sizeof(struct ip_vs_daemon_user))
2082#define MAX_ARG_LEN SVCDEST_ARG_LEN
2083
Arjan van de Ven9b5b5cf2005-11-29 16:21:38 -08002084static const unsigned char set_arglen[SET_CMDID(IP_VS_SO_SET_MAX)+1] = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002085 [SET_CMDID(IP_VS_SO_SET_ADD)] = SERVICE_ARG_LEN,
2086 [SET_CMDID(IP_VS_SO_SET_EDIT)] = SERVICE_ARG_LEN,
2087 [SET_CMDID(IP_VS_SO_SET_DEL)] = SERVICE_ARG_LEN,
2088 [SET_CMDID(IP_VS_SO_SET_FLUSH)] = 0,
2089 [SET_CMDID(IP_VS_SO_SET_ADDDEST)] = SVCDEST_ARG_LEN,
2090 [SET_CMDID(IP_VS_SO_SET_DELDEST)] = SVCDEST_ARG_LEN,
2091 [SET_CMDID(IP_VS_SO_SET_EDITDEST)] = SVCDEST_ARG_LEN,
2092 [SET_CMDID(IP_VS_SO_SET_TIMEOUT)] = TIMEOUT_ARG_LEN,
2093 [SET_CMDID(IP_VS_SO_SET_STARTDAEMON)] = DAEMON_ARG_LEN,
2094 [SET_CMDID(IP_VS_SO_SET_STOPDAEMON)] = DAEMON_ARG_LEN,
2095 [SET_CMDID(IP_VS_SO_SET_ZERO)] = SERVICE_ARG_LEN,
2096};
2097
Julius Volzc860c6b2008-09-02 15:55:36 +02002098static void ip_vs_copy_usvc_compat(struct ip_vs_service_user_kern *usvc,
2099 struct ip_vs_service_user *usvc_compat)
2100{
Simon Horman0d1e71b2010-08-22 21:37:54 +09002101 memset(usvc, 0, sizeof(*usvc));
2102
Julius Volzc860c6b2008-09-02 15:55:36 +02002103 usvc->af = AF_INET;
2104 usvc->protocol = usvc_compat->protocol;
2105 usvc->addr.ip = usvc_compat->addr;
2106 usvc->port = usvc_compat->port;
2107 usvc->fwmark = usvc_compat->fwmark;
2108
2109 /* Deep copy of sched_name is not needed here */
2110 usvc->sched_name = usvc_compat->sched_name;
2111
2112 usvc->flags = usvc_compat->flags;
2113 usvc->timeout = usvc_compat->timeout;
2114 usvc->netmask = usvc_compat->netmask;
2115}
2116
2117static void ip_vs_copy_udest_compat(struct ip_vs_dest_user_kern *udest,
2118 struct ip_vs_dest_user *udest_compat)
2119{
Simon Horman0d1e71b2010-08-22 21:37:54 +09002120 memset(udest, 0, sizeof(*udest));
2121
Julius Volzc860c6b2008-09-02 15:55:36 +02002122 udest->addr.ip = udest_compat->addr;
2123 udest->port = udest_compat->port;
2124 udest->conn_flags = udest_compat->conn_flags;
2125 udest->weight = udest_compat->weight;
2126 udest->u_threshold = udest_compat->u_threshold;
2127 udest->l_threshold = udest_compat->l_threshold;
2128}
2129
Linus Torvalds1da177e2005-04-16 15:20:36 -07002130static int
2131do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
2132{
Hans Schillstromfc723252011-01-03 14:44:43 +01002133 struct net *net = sock_net(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002134 int ret;
2135 unsigned char arg[MAX_ARG_LEN];
Julius Volzc860c6b2008-09-02 15:55:36 +02002136 struct ip_vs_service_user *usvc_compat;
2137 struct ip_vs_service_user_kern usvc;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002138 struct ip_vs_service *svc;
Julius Volzc860c6b2008-09-02 15:55:36 +02002139 struct ip_vs_dest_user *udest_compat;
2140 struct ip_vs_dest_user_kern udest;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002141
2142 if (!capable(CAP_NET_ADMIN))
2143 return -EPERM;
2144
Arjan van de Ven04bcef22010-01-04 16:37:12 +01002145 if (cmd < IP_VS_BASE_CTL || cmd > IP_VS_SO_SET_MAX)
2146 return -EINVAL;
2147 if (len < 0 || len > MAX_ARG_LEN)
2148 return -EINVAL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002149 if (len != set_arglen[SET_CMDID(cmd)]) {
Hannes Eder1e3e2382009-08-02 11:05:41 +00002150 pr_err("set_ctl: len %u != %u\n",
2151 len, set_arglen[SET_CMDID(cmd)]);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002152 return -EINVAL;
2153 }
2154
2155 if (copy_from_user(arg, user, len) != 0)
2156 return -EFAULT;
2157
2158 /* increase the module use count */
2159 ip_vs_use_count_inc();
2160
Ingo Molnar14cc3e22006-03-26 01:37:14 -08002161 if (mutex_lock_interruptible(&__ip_vs_mutex)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002162 ret = -ERESTARTSYS;
2163 goto out_dec;
2164 }
2165
2166 if (cmd == IP_VS_SO_SET_FLUSH) {
2167 /* Flush the virtual service */
Hans Schillstromfc723252011-01-03 14:44:43 +01002168 ret = ip_vs_flush(net);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002169 goto out_unlock;
2170 } else if (cmd == IP_VS_SO_SET_TIMEOUT) {
2171 /* Set timeout values for (tcp tcpfin udp) */
Hans Schillstrom93304192011-01-03 14:44:51 +01002172 ret = ip_vs_set_timeout(net, (struct ip_vs_timeout_user *)arg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002173 goto out_unlock;
2174 } else if (cmd == IP_VS_SO_SET_STARTDAEMON) {
2175 struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
2176 ret = start_sync_thread(dm->state, dm->mcast_ifn, dm->syncid);
2177 goto out_unlock;
2178 } else if (cmd == IP_VS_SO_SET_STOPDAEMON) {
2179 struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
2180 ret = stop_sync_thread(dm->state);
2181 goto out_unlock;
2182 }
2183
Julius Volzc860c6b2008-09-02 15:55:36 +02002184 usvc_compat = (struct ip_vs_service_user *)arg;
2185 udest_compat = (struct ip_vs_dest_user *)(usvc_compat + 1);
2186
2187 /* We only use the new structs internally, so copy userspace compat
2188 * structs to extended internal versions */
2189 ip_vs_copy_usvc_compat(&usvc, usvc_compat);
2190 ip_vs_copy_udest_compat(&udest, udest_compat);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002191
2192 if (cmd == IP_VS_SO_SET_ZERO) {
2193 /* if no service address is set, zero counters in all */
Julius Volzc860c6b2008-09-02 15:55:36 +02002194 if (!usvc.fwmark && !usvc.addr.ip && !usvc.port) {
Hans Schillstromfc723252011-01-03 14:44:43 +01002195 ret = ip_vs_zero_all(net);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002196 goto out_unlock;
2197 }
2198 }
2199
Venkata Mohan Reddy2906f662010-02-18 12:31:05 +01002200 /* Check for valid protocol: TCP or UDP or SCTP, even for fwmark!=0 */
2201 if (usvc.protocol != IPPROTO_TCP && usvc.protocol != IPPROTO_UDP &&
2202 usvc.protocol != IPPROTO_SCTP) {
Hannes Eder1e3e2382009-08-02 11:05:41 +00002203 pr_err("set_ctl: invalid protocol: %d %pI4:%d %s\n",
2204 usvc.protocol, &usvc.addr.ip,
2205 ntohs(usvc.port), usvc.sched_name);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002206 ret = -EFAULT;
2207 goto out_unlock;
2208 }
2209
2210 /* Lookup the exact service by <protocol, addr, port> or fwmark */
Julius Volzc860c6b2008-09-02 15:55:36 +02002211 if (usvc.fwmark == 0)
Hans Schillstromfc723252011-01-03 14:44:43 +01002212 svc = __ip_vs_service_find(net, usvc.af, usvc.protocol,
Julian Anastasov26c15cf2010-09-21 18:12:30 +02002213 &usvc.addr, usvc.port);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002214 else
Hans Schillstromfc723252011-01-03 14:44:43 +01002215 svc = __ip_vs_svc_fwm_find(net, usvc.af, usvc.fwmark);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002216
2217 if (cmd != IP_VS_SO_SET_ADD
Julius Volzc860c6b2008-09-02 15:55:36 +02002218 && (svc == NULL || svc->protocol != usvc.protocol)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002219 ret = -ESRCH;
Julian Anastasov26c15cf2010-09-21 18:12:30 +02002220 goto out_unlock;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002221 }
2222
2223 switch (cmd) {
2224 case IP_VS_SO_SET_ADD:
2225 if (svc != NULL)
2226 ret = -EEXIST;
2227 else
Hans Schillstromfc723252011-01-03 14:44:43 +01002228 ret = ip_vs_add_service(net, &usvc, &svc);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002229 break;
2230 case IP_VS_SO_SET_EDIT:
Julius Volzc860c6b2008-09-02 15:55:36 +02002231 ret = ip_vs_edit_service(svc, &usvc);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002232 break;
2233 case IP_VS_SO_SET_DEL:
2234 ret = ip_vs_del_service(svc);
2235 if (!ret)
2236 goto out_unlock;
2237 break;
2238 case IP_VS_SO_SET_ZERO:
2239 ret = ip_vs_zero_service(svc);
2240 break;
2241 case IP_VS_SO_SET_ADDDEST:
Julius Volzc860c6b2008-09-02 15:55:36 +02002242 ret = ip_vs_add_dest(svc, &udest);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002243 break;
2244 case IP_VS_SO_SET_EDITDEST:
Julius Volzc860c6b2008-09-02 15:55:36 +02002245 ret = ip_vs_edit_dest(svc, &udest);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002246 break;
2247 case IP_VS_SO_SET_DELDEST:
Julius Volzc860c6b2008-09-02 15:55:36 +02002248 ret = ip_vs_del_dest(svc, &udest);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002249 break;
2250 default:
2251 ret = -EINVAL;
2252 }
2253
Linus Torvalds1da177e2005-04-16 15:20:36 -07002254 out_unlock:
Ingo Molnar14cc3e22006-03-26 01:37:14 -08002255 mutex_unlock(&__ip_vs_mutex);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002256 out_dec:
2257 /* decrease the module use count */
2258 ip_vs_use_count_dec();
2259
2260 return ret;
2261}
2262
2263
2264static void
2265ip_vs_copy_stats(struct ip_vs_stats_user *dst, struct ip_vs_stats *src)
2266{
2267 spin_lock_bh(&src->lock);
Sven Wegenere9c0ce22008-09-08 13:39:04 +02002268 memcpy(dst, &src->ustats, sizeof(*dst));
Linus Torvalds1da177e2005-04-16 15:20:36 -07002269 spin_unlock_bh(&src->lock);
2270}
2271
2272static void
2273ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src)
2274{
2275 dst->protocol = src->protocol;
Julius Volze7ade462008-09-02 15:55:33 +02002276 dst->addr = src->addr.ip;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002277 dst->port = src->port;
2278 dst->fwmark = src->fwmark;
pageexec4da62fc2005-06-26 16:00:19 -07002279 strlcpy(dst->sched_name, src->scheduler->name, sizeof(dst->sched_name));
Linus Torvalds1da177e2005-04-16 15:20:36 -07002280 dst->flags = src->flags;
2281 dst->timeout = src->timeout / HZ;
2282 dst->netmask = src->netmask;
2283 dst->num_dests = src->num_dests;
2284 ip_vs_copy_stats(&dst->stats, &src->stats);
2285}
2286
2287static inline int
Hans Schillstromfc723252011-01-03 14:44:43 +01002288__ip_vs_get_service_entries(struct net *net,
2289 const struct ip_vs_get_services *get,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002290 struct ip_vs_get_services __user *uptr)
2291{
2292 int idx, count=0;
2293 struct ip_vs_service *svc;
2294 struct ip_vs_service_entry entry;
2295 int ret = 0;
2296
2297 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2298 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
Julius Volzf94fd042008-09-02 15:55:55 +02002299 /* Only expose IPv4 entries to old interface */
Hans Schillstromfc723252011-01-03 14:44:43 +01002300 if (svc->af != AF_INET || !net_eq(svc->net, net))
Julius Volzf94fd042008-09-02 15:55:55 +02002301 continue;
2302
Linus Torvalds1da177e2005-04-16 15:20:36 -07002303 if (count >= get->num_services)
2304 goto out;
pageexec4da62fc2005-06-26 16:00:19 -07002305 memset(&entry, 0, sizeof(entry));
Linus Torvalds1da177e2005-04-16 15:20:36 -07002306 ip_vs_copy_service(&entry, svc);
2307 if (copy_to_user(&uptr->entrytable[count],
2308 &entry, sizeof(entry))) {
2309 ret = -EFAULT;
2310 goto out;
2311 }
2312 count++;
2313 }
2314 }
2315
2316 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2317 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
Julius Volzf94fd042008-09-02 15:55:55 +02002318 /* Only expose IPv4 entries to old interface */
Hans Schillstromfc723252011-01-03 14:44:43 +01002319 if (svc->af != AF_INET || !net_eq(svc->net, net))
Julius Volzf94fd042008-09-02 15:55:55 +02002320 continue;
2321
Linus Torvalds1da177e2005-04-16 15:20:36 -07002322 if (count >= get->num_services)
2323 goto out;
pageexec4da62fc2005-06-26 16:00:19 -07002324 memset(&entry, 0, sizeof(entry));
Linus Torvalds1da177e2005-04-16 15:20:36 -07002325 ip_vs_copy_service(&entry, svc);
2326 if (copy_to_user(&uptr->entrytable[count],
2327 &entry, sizeof(entry))) {
2328 ret = -EFAULT;
2329 goto out;
2330 }
2331 count++;
2332 }
2333 }
2334 out:
2335 return ret;
2336}
2337
2338static inline int
Hans Schillstromfc723252011-01-03 14:44:43 +01002339__ip_vs_get_dest_entries(struct net *net, const struct ip_vs_get_dests *get,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002340 struct ip_vs_get_dests __user *uptr)
2341{
2342 struct ip_vs_service *svc;
Julius Volzb18610d2008-09-02 15:55:37 +02002343 union nf_inet_addr addr = { .ip = get->addr };
Linus Torvalds1da177e2005-04-16 15:20:36 -07002344 int ret = 0;
2345
2346 if (get->fwmark)
Hans Schillstromfc723252011-01-03 14:44:43 +01002347 svc = __ip_vs_svc_fwm_find(net, AF_INET, get->fwmark);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002348 else
Hans Schillstromfc723252011-01-03 14:44:43 +01002349 svc = __ip_vs_service_find(net, AF_INET, get->protocol, &addr,
Julian Anastasov26c15cf2010-09-21 18:12:30 +02002350 get->port);
Julius Volzb18610d2008-09-02 15:55:37 +02002351
Linus Torvalds1da177e2005-04-16 15:20:36 -07002352 if (svc) {
2353 int count = 0;
2354 struct ip_vs_dest *dest;
2355 struct ip_vs_dest_entry entry;
2356
2357 list_for_each_entry(dest, &svc->destinations, n_list) {
2358 if (count >= get->num_dests)
2359 break;
2360
Julius Volze7ade462008-09-02 15:55:33 +02002361 entry.addr = dest->addr.ip;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002362 entry.port = dest->port;
2363 entry.conn_flags = atomic_read(&dest->conn_flags);
2364 entry.weight = atomic_read(&dest->weight);
2365 entry.u_threshold = dest->u_threshold;
2366 entry.l_threshold = dest->l_threshold;
2367 entry.activeconns = atomic_read(&dest->activeconns);
2368 entry.inactconns = atomic_read(&dest->inactconns);
2369 entry.persistconns = atomic_read(&dest->persistconns);
2370 ip_vs_copy_stats(&entry.stats, &dest->stats);
2371 if (copy_to_user(&uptr->entrytable[count],
2372 &entry, sizeof(entry))) {
2373 ret = -EFAULT;
2374 break;
2375 }
2376 count++;
2377 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002378 } else
2379 ret = -ESRCH;
2380 return ret;
2381}
2382
2383static inline void
Hans Schillstrom93304192011-01-03 14:44:51 +01002384__ip_vs_get_timeouts(struct net *net, struct ip_vs_timeout_user *u)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002385{
Hans Schillstrom93304192011-01-03 14:44:51 +01002386 struct ip_vs_proto_data *pd;
2387
Linus Torvalds1da177e2005-04-16 15:20:36 -07002388#ifdef CONFIG_IP_VS_PROTO_TCP
Hans Schillstrom93304192011-01-03 14:44:51 +01002389 pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
2390 u->tcp_timeout = pd->timeout_table[IP_VS_TCP_S_ESTABLISHED] / HZ;
2391 u->tcp_fin_timeout = pd->timeout_table[IP_VS_TCP_S_FIN_WAIT] / HZ;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002392#endif
2393#ifdef CONFIG_IP_VS_PROTO_UDP
Hans Schillstrom93304192011-01-03 14:44:51 +01002394 pd = ip_vs_proto_data_get(net, IPPROTO_UDP);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002395 u->udp_timeout =
Hans Schillstrom93304192011-01-03 14:44:51 +01002396 pd->timeout_table[IP_VS_UDP_S_NORMAL] / HZ;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002397#endif
2398}
2399
2400
2401#define GET_CMDID(cmd) (cmd - IP_VS_BASE_CTL)
2402#define GET_INFO_ARG_LEN (sizeof(struct ip_vs_getinfo))
2403#define GET_SERVICES_ARG_LEN (sizeof(struct ip_vs_get_services))
2404#define GET_SERVICE_ARG_LEN (sizeof(struct ip_vs_service_entry))
2405#define GET_DESTS_ARG_LEN (sizeof(struct ip_vs_get_dests))
2406#define GET_TIMEOUT_ARG_LEN (sizeof(struct ip_vs_timeout_user))
2407#define GET_DAEMON_ARG_LEN (sizeof(struct ip_vs_daemon_user) * 2)
2408
Arjan van de Ven9b5b5cf2005-11-29 16:21:38 -08002409static const unsigned char get_arglen[GET_CMDID(IP_VS_SO_GET_MAX)+1] = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002410 [GET_CMDID(IP_VS_SO_GET_VERSION)] = 64,
2411 [GET_CMDID(IP_VS_SO_GET_INFO)] = GET_INFO_ARG_LEN,
2412 [GET_CMDID(IP_VS_SO_GET_SERVICES)] = GET_SERVICES_ARG_LEN,
2413 [GET_CMDID(IP_VS_SO_GET_SERVICE)] = GET_SERVICE_ARG_LEN,
2414 [GET_CMDID(IP_VS_SO_GET_DESTS)] = GET_DESTS_ARG_LEN,
2415 [GET_CMDID(IP_VS_SO_GET_TIMEOUT)] = GET_TIMEOUT_ARG_LEN,
2416 [GET_CMDID(IP_VS_SO_GET_DAEMON)] = GET_DAEMON_ARG_LEN,
2417};
2418
2419static int
2420do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
2421{
2422 unsigned char arg[128];
2423 int ret = 0;
Arjan van de Ven04bcef22010-01-04 16:37:12 +01002424 unsigned int copylen;
Hans Schillstromfc723252011-01-03 14:44:43 +01002425 struct net *net = sock_net(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002426
Hans Schillstromfc723252011-01-03 14:44:43 +01002427 BUG_ON(!net);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002428 if (!capable(CAP_NET_ADMIN))
2429 return -EPERM;
2430
Arjan van de Ven04bcef22010-01-04 16:37:12 +01002431 if (cmd < IP_VS_BASE_CTL || cmd > IP_VS_SO_GET_MAX)
2432 return -EINVAL;
2433
Linus Torvalds1da177e2005-04-16 15:20:36 -07002434 if (*len < get_arglen[GET_CMDID(cmd)]) {
Hannes Eder1e3e2382009-08-02 11:05:41 +00002435 pr_err("get_ctl: len %u < %u\n",
2436 *len, get_arglen[GET_CMDID(cmd)]);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002437 return -EINVAL;
2438 }
2439
Arjan van de Ven04bcef22010-01-04 16:37:12 +01002440 copylen = get_arglen[GET_CMDID(cmd)];
2441 if (copylen > 128)
2442 return -EINVAL;
2443
2444 if (copy_from_user(arg, user, copylen) != 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002445 return -EFAULT;
2446
Ingo Molnar14cc3e22006-03-26 01:37:14 -08002447 if (mutex_lock_interruptible(&__ip_vs_mutex))
Linus Torvalds1da177e2005-04-16 15:20:36 -07002448 return -ERESTARTSYS;
2449
2450 switch (cmd) {
2451 case IP_VS_SO_GET_VERSION:
2452 {
2453 char buf[64];
2454
2455 sprintf(buf, "IP Virtual Server version %d.%d.%d (size=%d)",
Catalin(ux) M. BOIE6f7edb42010-01-05 05:50:24 +01002456 NVERSION(IP_VS_VERSION_CODE), ip_vs_conn_tab_size);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002457 if (copy_to_user(user, buf, strlen(buf)+1) != 0) {
2458 ret = -EFAULT;
2459 goto out;
2460 }
2461 *len = strlen(buf)+1;
2462 }
2463 break;
2464
2465 case IP_VS_SO_GET_INFO:
2466 {
2467 struct ip_vs_getinfo info;
2468 info.version = IP_VS_VERSION_CODE;
Catalin(ux) M. BOIE6f7edb42010-01-05 05:50:24 +01002469 info.size = ip_vs_conn_tab_size;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002470 info.num_services = ip_vs_num_services;
2471 if (copy_to_user(user, &info, sizeof(info)) != 0)
2472 ret = -EFAULT;
2473 }
2474 break;
2475
2476 case IP_VS_SO_GET_SERVICES:
2477 {
2478 struct ip_vs_get_services *get;
2479 int size;
2480
2481 get = (struct ip_vs_get_services *)arg;
2482 size = sizeof(*get) +
2483 sizeof(struct ip_vs_service_entry) * get->num_services;
2484 if (*len != size) {
Hannes Eder1e3e2382009-08-02 11:05:41 +00002485 pr_err("length: %u != %u\n", *len, size);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002486 ret = -EINVAL;
2487 goto out;
2488 }
Hans Schillstromfc723252011-01-03 14:44:43 +01002489 ret = __ip_vs_get_service_entries(net, get, user);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002490 }
2491 break;
2492
2493 case IP_VS_SO_GET_SERVICE:
2494 {
2495 struct ip_vs_service_entry *entry;
2496 struct ip_vs_service *svc;
Julius Volzb18610d2008-09-02 15:55:37 +02002497 union nf_inet_addr addr;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002498
2499 entry = (struct ip_vs_service_entry *)arg;
Julius Volzb18610d2008-09-02 15:55:37 +02002500 addr.ip = entry->addr;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002501 if (entry->fwmark)
Hans Schillstromfc723252011-01-03 14:44:43 +01002502 svc = __ip_vs_svc_fwm_find(net, AF_INET, entry->fwmark);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002503 else
Hans Schillstromfc723252011-01-03 14:44:43 +01002504 svc = __ip_vs_service_find(net, AF_INET,
2505 entry->protocol, &addr,
2506 entry->port);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002507 if (svc) {
2508 ip_vs_copy_service(entry, svc);
2509 if (copy_to_user(user, entry, sizeof(*entry)) != 0)
2510 ret = -EFAULT;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002511 } else
2512 ret = -ESRCH;
2513 }
2514 break;
2515
2516 case IP_VS_SO_GET_DESTS:
2517 {
2518 struct ip_vs_get_dests *get;
2519 int size;
2520
2521 get = (struct ip_vs_get_dests *)arg;
2522 size = sizeof(*get) +
2523 sizeof(struct ip_vs_dest_entry) * get->num_dests;
2524 if (*len != size) {
Hannes Eder1e3e2382009-08-02 11:05:41 +00002525 pr_err("length: %u != %u\n", *len, size);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002526 ret = -EINVAL;
2527 goto out;
2528 }
Hans Schillstromfc723252011-01-03 14:44:43 +01002529 ret = __ip_vs_get_dest_entries(net, get, user);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002530 }
2531 break;
2532
2533 case IP_VS_SO_GET_TIMEOUT:
2534 {
2535 struct ip_vs_timeout_user t;
2536
Hans Schillstrom93304192011-01-03 14:44:51 +01002537 __ip_vs_get_timeouts(net, &t);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002538 if (copy_to_user(user, &t, sizeof(t)) != 0)
2539 ret = -EFAULT;
2540 }
2541 break;
2542
2543 case IP_VS_SO_GET_DAEMON:
2544 {
2545 struct ip_vs_daemon_user d[2];
2546
2547 memset(&d, 0, sizeof(d));
2548 if (ip_vs_sync_state & IP_VS_STATE_MASTER) {
2549 d[0].state = IP_VS_STATE_MASTER;
pageexec4da62fc2005-06-26 16:00:19 -07002550 strlcpy(d[0].mcast_ifn, ip_vs_master_mcast_ifn, sizeof(d[0].mcast_ifn));
Linus Torvalds1da177e2005-04-16 15:20:36 -07002551 d[0].syncid = ip_vs_master_syncid;
2552 }
2553 if (ip_vs_sync_state & IP_VS_STATE_BACKUP) {
2554 d[1].state = IP_VS_STATE_BACKUP;
pageexec4da62fc2005-06-26 16:00:19 -07002555 strlcpy(d[1].mcast_ifn, ip_vs_backup_mcast_ifn, sizeof(d[1].mcast_ifn));
Linus Torvalds1da177e2005-04-16 15:20:36 -07002556 d[1].syncid = ip_vs_backup_syncid;
2557 }
2558 if (copy_to_user(user, &d, sizeof(d)) != 0)
2559 ret = -EFAULT;
2560 }
2561 break;
2562
2563 default:
2564 ret = -EINVAL;
2565 }
2566
2567 out:
Ingo Molnar14cc3e22006-03-26 01:37:14 -08002568 mutex_unlock(&__ip_vs_mutex);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002569 return ret;
2570}
2571
2572
2573static struct nf_sockopt_ops ip_vs_sockopts = {
2574 .pf = PF_INET,
2575 .set_optmin = IP_VS_BASE_CTL,
2576 .set_optmax = IP_VS_SO_SET_MAX+1,
2577 .set = do_ip_vs_set_ctl,
2578 .get_optmin = IP_VS_BASE_CTL,
2579 .get_optmax = IP_VS_SO_GET_MAX+1,
2580 .get = do_ip_vs_get_ctl,
Neil Horman16fcec32007-09-11 11:28:26 +02002581 .owner = THIS_MODULE,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002582};
2583
Julius Volz9a812192008-08-14 14:08:44 +02002584/*
2585 * Generic Netlink interface
2586 */
2587
2588/* IPVS genetlink family */
2589static struct genl_family ip_vs_genl_family = {
2590 .id = GENL_ID_GENERATE,
2591 .hdrsize = 0,
2592 .name = IPVS_GENL_NAME,
2593 .version = IPVS_GENL_VERSION,
2594 .maxattr = IPVS_CMD_MAX,
2595};
2596
2597/* Policy used for first-level command attributes */
2598static const struct nla_policy ip_vs_cmd_policy[IPVS_CMD_ATTR_MAX + 1] = {
2599 [IPVS_CMD_ATTR_SERVICE] = { .type = NLA_NESTED },
2600 [IPVS_CMD_ATTR_DEST] = { .type = NLA_NESTED },
2601 [IPVS_CMD_ATTR_DAEMON] = { .type = NLA_NESTED },
2602 [IPVS_CMD_ATTR_TIMEOUT_TCP] = { .type = NLA_U32 },
2603 [IPVS_CMD_ATTR_TIMEOUT_TCP_FIN] = { .type = NLA_U32 },
2604 [IPVS_CMD_ATTR_TIMEOUT_UDP] = { .type = NLA_U32 },
2605};
2606
2607/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DAEMON */
2608static const struct nla_policy ip_vs_daemon_policy[IPVS_DAEMON_ATTR_MAX + 1] = {
2609 [IPVS_DAEMON_ATTR_STATE] = { .type = NLA_U32 },
2610 [IPVS_DAEMON_ATTR_MCAST_IFN] = { .type = NLA_NUL_STRING,
2611 .len = IP_VS_IFNAME_MAXLEN },
2612 [IPVS_DAEMON_ATTR_SYNC_ID] = { .type = NLA_U32 },
2613};
2614
2615/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_SERVICE */
2616static const struct nla_policy ip_vs_svc_policy[IPVS_SVC_ATTR_MAX + 1] = {
2617 [IPVS_SVC_ATTR_AF] = { .type = NLA_U16 },
2618 [IPVS_SVC_ATTR_PROTOCOL] = { .type = NLA_U16 },
2619 [IPVS_SVC_ATTR_ADDR] = { .type = NLA_BINARY,
2620 .len = sizeof(union nf_inet_addr) },
2621 [IPVS_SVC_ATTR_PORT] = { .type = NLA_U16 },
2622 [IPVS_SVC_ATTR_FWMARK] = { .type = NLA_U32 },
2623 [IPVS_SVC_ATTR_SCHED_NAME] = { .type = NLA_NUL_STRING,
2624 .len = IP_VS_SCHEDNAME_MAXLEN },
Simon Horman0d1e71b2010-08-22 21:37:54 +09002625 [IPVS_SVC_ATTR_PE_NAME] = { .type = NLA_NUL_STRING,
2626 .len = IP_VS_PENAME_MAXLEN },
Julius Volz9a812192008-08-14 14:08:44 +02002627 [IPVS_SVC_ATTR_FLAGS] = { .type = NLA_BINARY,
2628 .len = sizeof(struct ip_vs_flags) },
2629 [IPVS_SVC_ATTR_TIMEOUT] = { .type = NLA_U32 },
2630 [IPVS_SVC_ATTR_NETMASK] = { .type = NLA_U32 },
2631 [IPVS_SVC_ATTR_STATS] = { .type = NLA_NESTED },
2632};
2633
2634/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DEST */
2635static const struct nla_policy ip_vs_dest_policy[IPVS_DEST_ATTR_MAX + 1] = {
2636 [IPVS_DEST_ATTR_ADDR] = { .type = NLA_BINARY,
2637 .len = sizeof(union nf_inet_addr) },
2638 [IPVS_DEST_ATTR_PORT] = { .type = NLA_U16 },
2639 [IPVS_DEST_ATTR_FWD_METHOD] = { .type = NLA_U32 },
2640 [IPVS_DEST_ATTR_WEIGHT] = { .type = NLA_U32 },
2641 [IPVS_DEST_ATTR_U_THRESH] = { .type = NLA_U32 },
2642 [IPVS_DEST_ATTR_L_THRESH] = { .type = NLA_U32 },
2643 [IPVS_DEST_ATTR_ACTIVE_CONNS] = { .type = NLA_U32 },
2644 [IPVS_DEST_ATTR_INACT_CONNS] = { .type = NLA_U32 },
2645 [IPVS_DEST_ATTR_PERSIST_CONNS] = { .type = NLA_U32 },
2646 [IPVS_DEST_ATTR_STATS] = { .type = NLA_NESTED },
2647};
2648
2649static int ip_vs_genl_fill_stats(struct sk_buff *skb, int container_type,
2650 struct ip_vs_stats *stats)
2651{
2652 struct nlattr *nl_stats = nla_nest_start(skb, container_type);
2653 if (!nl_stats)
2654 return -EMSGSIZE;
2655
2656 spin_lock_bh(&stats->lock);
2657
Sven Wegenere9c0ce22008-09-08 13:39:04 +02002658 NLA_PUT_U32(skb, IPVS_STATS_ATTR_CONNS, stats->ustats.conns);
2659 NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPKTS, stats->ustats.inpkts);
2660 NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPKTS, stats->ustats.outpkts);
2661 NLA_PUT_U64(skb, IPVS_STATS_ATTR_INBYTES, stats->ustats.inbytes);
2662 NLA_PUT_U64(skb, IPVS_STATS_ATTR_OUTBYTES, stats->ustats.outbytes);
2663 NLA_PUT_U32(skb, IPVS_STATS_ATTR_CPS, stats->ustats.cps);
2664 NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPPS, stats->ustats.inpps);
2665 NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPPS, stats->ustats.outpps);
2666 NLA_PUT_U32(skb, IPVS_STATS_ATTR_INBPS, stats->ustats.inbps);
2667 NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTBPS, stats->ustats.outbps);
Julius Volz9a812192008-08-14 14:08:44 +02002668
2669 spin_unlock_bh(&stats->lock);
2670
2671 nla_nest_end(skb, nl_stats);
2672
2673 return 0;
2674
2675nla_put_failure:
2676 spin_unlock_bh(&stats->lock);
2677 nla_nest_cancel(skb, nl_stats);
2678 return -EMSGSIZE;
2679}
2680
2681static int ip_vs_genl_fill_service(struct sk_buff *skb,
2682 struct ip_vs_service *svc)
2683{
2684 struct nlattr *nl_service;
2685 struct ip_vs_flags flags = { .flags = svc->flags,
2686 .mask = ~0 };
2687
2688 nl_service = nla_nest_start(skb, IPVS_CMD_ATTR_SERVICE);
2689 if (!nl_service)
2690 return -EMSGSIZE;
2691
Julius Volzf94fd042008-09-02 15:55:55 +02002692 NLA_PUT_U16(skb, IPVS_SVC_ATTR_AF, svc->af);
Julius Volz9a812192008-08-14 14:08:44 +02002693
2694 if (svc->fwmark) {
2695 NLA_PUT_U32(skb, IPVS_SVC_ATTR_FWMARK, svc->fwmark);
2696 } else {
2697 NLA_PUT_U16(skb, IPVS_SVC_ATTR_PROTOCOL, svc->protocol);
2698 NLA_PUT(skb, IPVS_SVC_ATTR_ADDR, sizeof(svc->addr), &svc->addr);
2699 NLA_PUT_U16(skb, IPVS_SVC_ATTR_PORT, svc->port);
2700 }
2701
2702 NLA_PUT_STRING(skb, IPVS_SVC_ATTR_SCHED_NAME, svc->scheduler->name);
Simon Horman0d1e71b2010-08-22 21:37:54 +09002703 if (svc->pe)
2704 NLA_PUT_STRING(skb, IPVS_SVC_ATTR_PE_NAME, svc->pe->name);
Julius Volz9a812192008-08-14 14:08:44 +02002705 NLA_PUT(skb, IPVS_SVC_ATTR_FLAGS, sizeof(flags), &flags);
2706 NLA_PUT_U32(skb, IPVS_SVC_ATTR_TIMEOUT, svc->timeout / HZ);
2707 NLA_PUT_U32(skb, IPVS_SVC_ATTR_NETMASK, svc->netmask);
2708
2709 if (ip_vs_genl_fill_stats(skb, IPVS_SVC_ATTR_STATS, &svc->stats))
2710 goto nla_put_failure;
2711
2712 nla_nest_end(skb, nl_service);
2713
2714 return 0;
2715
2716nla_put_failure:
2717 nla_nest_cancel(skb, nl_service);
2718 return -EMSGSIZE;
2719}
2720
2721static int ip_vs_genl_dump_service(struct sk_buff *skb,
2722 struct ip_vs_service *svc,
2723 struct netlink_callback *cb)
2724{
2725 void *hdr;
2726
2727 hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
2728 &ip_vs_genl_family, NLM_F_MULTI,
2729 IPVS_CMD_NEW_SERVICE);
2730 if (!hdr)
2731 return -EMSGSIZE;
2732
2733 if (ip_vs_genl_fill_service(skb, svc) < 0)
2734 goto nla_put_failure;
2735
2736 return genlmsg_end(skb, hdr);
2737
2738nla_put_failure:
2739 genlmsg_cancel(skb, hdr);
2740 return -EMSGSIZE;
2741}
2742
2743static int ip_vs_genl_dump_services(struct sk_buff *skb,
2744 struct netlink_callback *cb)
2745{
2746 int idx = 0, i;
2747 int start = cb->args[0];
2748 struct ip_vs_service *svc;
Hans Schillstromfc723252011-01-03 14:44:43 +01002749 struct net *net = skb_sknet(skb);
Julius Volz9a812192008-08-14 14:08:44 +02002750
2751 mutex_lock(&__ip_vs_mutex);
2752 for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
2753 list_for_each_entry(svc, &ip_vs_svc_table[i], s_list) {
Hans Schillstromfc723252011-01-03 14:44:43 +01002754 if (++idx <= start || !net_eq(svc->net, net))
Julius Volz9a812192008-08-14 14:08:44 +02002755 continue;
2756 if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
2757 idx--;
2758 goto nla_put_failure;
2759 }
2760 }
2761 }
2762
2763 for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
2764 list_for_each_entry(svc, &ip_vs_svc_fwm_table[i], f_list) {
Hans Schillstromfc723252011-01-03 14:44:43 +01002765 if (++idx <= start || !net_eq(svc->net, net))
Julius Volz9a812192008-08-14 14:08:44 +02002766 continue;
2767 if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
2768 idx--;
2769 goto nla_put_failure;
2770 }
2771 }
2772 }
2773
2774nla_put_failure:
2775 mutex_unlock(&__ip_vs_mutex);
2776 cb->args[0] = idx;
2777
2778 return skb->len;
2779}
2780
Hans Schillstromfc723252011-01-03 14:44:43 +01002781static int ip_vs_genl_parse_service(struct net *net,
2782 struct ip_vs_service_user_kern *usvc,
Julian Anastasov26c15cf2010-09-21 18:12:30 +02002783 struct nlattr *nla, int full_entry,
2784 struct ip_vs_service **ret_svc)
Julius Volz9a812192008-08-14 14:08:44 +02002785{
2786 struct nlattr *attrs[IPVS_SVC_ATTR_MAX + 1];
2787 struct nlattr *nla_af, *nla_port, *nla_fwmark, *nla_protocol, *nla_addr;
Julian Anastasov26c15cf2010-09-21 18:12:30 +02002788 struct ip_vs_service *svc;
Julius Volz9a812192008-08-14 14:08:44 +02002789
2790 /* Parse mandatory identifying service fields first */
2791 if (nla == NULL ||
2792 nla_parse_nested(attrs, IPVS_SVC_ATTR_MAX, nla, ip_vs_svc_policy))
2793 return -EINVAL;
2794
2795 nla_af = attrs[IPVS_SVC_ATTR_AF];
2796 nla_protocol = attrs[IPVS_SVC_ATTR_PROTOCOL];
2797 nla_addr = attrs[IPVS_SVC_ATTR_ADDR];
2798 nla_port = attrs[IPVS_SVC_ATTR_PORT];
2799 nla_fwmark = attrs[IPVS_SVC_ATTR_FWMARK];
2800
2801 if (!(nla_af && (nla_fwmark || (nla_port && nla_protocol && nla_addr))))
2802 return -EINVAL;
2803
Simon Horman258c8892009-12-15 17:01:25 +01002804 memset(usvc, 0, sizeof(*usvc));
2805
Julius Volzc860c6b2008-09-02 15:55:36 +02002806 usvc->af = nla_get_u16(nla_af);
Julius Volzf94fd042008-09-02 15:55:55 +02002807#ifdef CONFIG_IP_VS_IPV6
2808 if (usvc->af != AF_INET && usvc->af != AF_INET6)
2809#else
2810 if (usvc->af != AF_INET)
2811#endif
Julius Volz9a812192008-08-14 14:08:44 +02002812 return -EAFNOSUPPORT;
2813
2814 if (nla_fwmark) {
2815 usvc->protocol = IPPROTO_TCP;
2816 usvc->fwmark = nla_get_u32(nla_fwmark);
2817 } else {
2818 usvc->protocol = nla_get_u16(nla_protocol);
2819 nla_memcpy(&usvc->addr, nla_addr, sizeof(usvc->addr));
2820 usvc->port = nla_get_u16(nla_port);
2821 usvc->fwmark = 0;
2822 }
2823
Julian Anastasov26c15cf2010-09-21 18:12:30 +02002824 if (usvc->fwmark)
Hans Schillstromfc723252011-01-03 14:44:43 +01002825 svc = __ip_vs_svc_fwm_find(net, usvc->af, usvc->fwmark);
Julian Anastasov26c15cf2010-09-21 18:12:30 +02002826 else
Hans Schillstromfc723252011-01-03 14:44:43 +01002827 svc = __ip_vs_service_find(net, usvc->af, usvc->protocol,
Julian Anastasov26c15cf2010-09-21 18:12:30 +02002828 &usvc->addr, usvc->port);
2829 *ret_svc = svc;
2830
Julius Volz9a812192008-08-14 14:08:44 +02002831 /* If a full entry was requested, check for the additional fields */
2832 if (full_entry) {
Simon Horman0d1e71b2010-08-22 21:37:54 +09002833 struct nlattr *nla_sched, *nla_flags, *nla_pe, *nla_timeout,
Julius Volz9a812192008-08-14 14:08:44 +02002834 *nla_netmask;
2835 struct ip_vs_flags flags;
Julius Volz9a812192008-08-14 14:08:44 +02002836
2837 nla_sched = attrs[IPVS_SVC_ATTR_SCHED_NAME];
Simon Horman0d1e71b2010-08-22 21:37:54 +09002838 nla_pe = attrs[IPVS_SVC_ATTR_PE_NAME];
Julius Volz9a812192008-08-14 14:08:44 +02002839 nla_flags = attrs[IPVS_SVC_ATTR_FLAGS];
2840 nla_timeout = attrs[IPVS_SVC_ATTR_TIMEOUT];
2841 nla_netmask = attrs[IPVS_SVC_ATTR_NETMASK];
2842
2843 if (!(nla_sched && nla_flags && nla_timeout && nla_netmask))
2844 return -EINVAL;
2845
2846 nla_memcpy(&flags, nla_flags, sizeof(flags));
2847
2848 /* prefill flags from service if it already exists */
Julian Anastasov26c15cf2010-09-21 18:12:30 +02002849 if (svc)
Julius Volz9a812192008-08-14 14:08:44 +02002850 usvc->flags = svc->flags;
Julius Volz9a812192008-08-14 14:08:44 +02002851
2852 /* set new flags from userland */
2853 usvc->flags = (usvc->flags & ~flags.mask) |
2854 (flags.flags & flags.mask);
Julius Volzc860c6b2008-09-02 15:55:36 +02002855 usvc->sched_name = nla_data(nla_sched);
Simon Horman0d1e71b2010-08-22 21:37:54 +09002856 usvc->pe_name = nla_pe ? nla_data(nla_pe) : NULL;
Julius Volz9a812192008-08-14 14:08:44 +02002857 usvc->timeout = nla_get_u32(nla_timeout);
2858 usvc->netmask = nla_get_u32(nla_netmask);
2859 }
2860
2861 return 0;
2862}
2863
Hans Schillstromfc723252011-01-03 14:44:43 +01002864static struct ip_vs_service *ip_vs_genl_find_service(struct net *net,
2865 struct nlattr *nla)
Julius Volz9a812192008-08-14 14:08:44 +02002866{
Julius Volzc860c6b2008-09-02 15:55:36 +02002867 struct ip_vs_service_user_kern usvc;
Julian Anastasov26c15cf2010-09-21 18:12:30 +02002868 struct ip_vs_service *svc;
Julius Volz9a812192008-08-14 14:08:44 +02002869 int ret;
2870
Hans Schillstromfc723252011-01-03 14:44:43 +01002871 ret = ip_vs_genl_parse_service(net, &usvc, nla, 0, &svc);
Julian Anastasov26c15cf2010-09-21 18:12:30 +02002872 return ret ? ERR_PTR(ret) : svc;
Julius Volz9a812192008-08-14 14:08:44 +02002873}
2874
2875static int ip_vs_genl_fill_dest(struct sk_buff *skb, struct ip_vs_dest *dest)
2876{
2877 struct nlattr *nl_dest;
2878
2879 nl_dest = nla_nest_start(skb, IPVS_CMD_ATTR_DEST);
2880 if (!nl_dest)
2881 return -EMSGSIZE;
2882
2883 NLA_PUT(skb, IPVS_DEST_ATTR_ADDR, sizeof(dest->addr), &dest->addr);
2884 NLA_PUT_U16(skb, IPVS_DEST_ATTR_PORT, dest->port);
2885
2886 NLA_PUT_U32(skb, IPVS_DEST_ATTR_FWD_METHOD,
2887 atomic_read(&dest->conn_flags) & IP_VS_CONN_F_FWD_MASK);
2888 NLA_PUT_U32(skb, IPVS_DEST_ATTR_WEIGHT, atomic_read(&dest->weight));
2889 NLA_PUT_U32(skb, IPVS_DEST_ATTR_U_THRESH, dest->u_threshold);
2890 NLA_PUT_U32(skb, IPVS_DEST_ATTR_L_THRESH, dest->l_threshold);
2891 NLA_PUT_U32(skb, IPVS_DEST_ATTR_ACTIVE_CONNS,
2892 atomic_read(&dest->activeconns));
2893 NLA_PUT_U32(skb, IPVS_DEST_ATTR_INACT_CONNS,
2894 atomic_read(&dest->inactconns));
2895 NLA_PUT_U32(skb, IPVS_DEST_ATTR_PERSIST_CONNS,
2896 atomic_read(&dest->persistconns));
2897
2898 if (ip_vs_genl_fill_stats(skb, IPVS_DEST_ATTR_STATS, &dest->stats))
2899 goto nla_put_failure;
2900
2901 nla_nest_end(skb, nl_dest);
2902
2903 return 0;
2904
2905nla_put_failure:
2906 nla_nest_cancel(skb, nl_dest);
2907 return -EMSGSIZE;
2908}
2909
2910static int ip_vs_genl_dump_dest(struct sk_buff *skb, struct ip_vs_dest *dest,
2911 struct netlink_callback *cb)
2912{
2913 void *hdr;
2914
2915 hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
2916 &ip_vs_genl_family, NLM_F_MULTI,
2917 IPVS_CMD_NEW_DEST);
2918 if (!hdr)
2919 return -EMSGSIZE;
2920
2921 if (ip_vs_genl_fill_dest(skb, dest) < 0)
2922 goto nla_put_failure;
2923
2924 return genlmsg_end(skb, hdr);
2925
2926nla_put_failure:
2927 genlmsg_cancel(skb, hdr);
2928 return -EMSGSIZE;
2929}
2930
2931static int ip_vs_genl_dump_dests(struct sk_buff *skb,
2932 struct netlink_callback *cb)
2933{
2934 int idx = 0;
2935 int start = cb->args[0];
2936 struct ip_vs_service *svc;
2937 struct ip_vs_dest *dest;
2938 struct nlattr *attrs[IPVS_CMD_ATTR_MAX + 1];
Hans Schillstromfc723252011-01-03 14:44:43 +01002939 struct net *net;
Julius Volz9a812192008-08-14 14:08:44 +02002940
2941 mutex_lock(&__ip_vs_mutex);
2942
2943 /* Try to find the service for which to dump destinations */
2944 if (nlmsg_parse(cb->nlh, GENL_HDRLEN, attrs,
2945 IPVS_CMD_ATTR_MAX, ip_vs_cmd_policy))
2946 goto out_err;
2947
Hans Schillstromfc723252011-01-03 14:44:43 +01002948 net = skb_sknet(skb);
2949 svc = ip_vs_genl_find_service(net, attrs[IPVS_CMD_ATTR_SERVICE]);
Julius Volz9a812192008-08-14 14:08:44 +02002950 if (IS_ERR(svc) || svc == NULL)
2951 goto out_err;
2952
2953 /* Dump the destinations */
2954 list_for_each_entry(dest, &svc->destinations, n_list) {
2955 if (++idx <= start)
2956 continue;
2957 if (ip_vs_genl_dump_dest(skb, dest, cb) < 0) {
2958 idx--;
2959 goto nla_put_failure;
2960 }
2961 }
2962
2963nla_put_failure:
2964 cb->args[0] = idx;
Julius Volz9a812192008-08-14 14:08:44 +02002965
2966out_err:
2967 mutex_unlock(&__ip_vs_mutex);
2968
2969 return skb->len;
2970}
2971
Julius Volzc860c6b2008-09-02 15:55:36 +02002972static int ip_vs_genl_parse_dest(struct ip_vs_dest_user_kern *udest,
Julius Volz9a812192008-08-14 14:08:44 +02002973 struct nlattr *nla, int full_entry)
2974{
2975 struct nlattr *attrs[IPVS_DEST_ATTR_MAX + 1];
2976 struct nlattr *nla_addr, *nla_port;
2977
2978 /* Parse mandatory identifying destination fields first */
2979 if (nla == NULL ||
2980 nla_parse_nested(attrs, IPVS_DEST_ATTR_MAX, nla, ip_vs_dest_policy))
2981 return -EINVAL;
2982
2983 nla_addr = attrs[IPVS_DEST_ATTR_ADDR];
2984 nla_port = attrs[IPVS_DEST_ATTR_PORT];
2985
2986 if (!(nla_addr && nla_port))
2987 return -EINVAL;
2988
Simon Horman258c8892009-12-15 17:01:25 +01002989 memset(udest, 0, sizeof(*udest));
2990
Julius Volz9a812192008-08-14 14:08:44 +02002991 nla_memcpy(&udest->addr, nla_addr, sizeof(udest->addr));
2992 udest->port = nla_get_u16(nla_port);
2993
2994 /* If a full entry was requested, check for the additional fields */
2995 if (full_entry) {
2996 struct nlattr *nla_fwd, *nla_weight, *nla_u_thresh,
2997 *nla_l_thresh;
2998
2999 nla_fwd = attrs[IPVS_DEST_ATTR_FWD_METHOD];
3000 nla_weight = attrs[IPVS_DEST_ATTR_WEIGHT];
3001 nla_u_thresh = attrs[IPVS_DEST_ATTR_U_THRESH];
3002 nla_l_thresh = attrs[IPVS_DEST_ATTR_L_THRESH];
3003
3004 if (!(nla_fwd && nla_weight && nla_u_thresh && nla_l_thresh))
3005 return -EINVAL;
3006
3007 udest->conn_flags = nla_get_u32(nla_fwd)
3008 & IP_VS_CONN_F_FWD_MASK;
3009 udest->weight = nla_get_u32(nla_weight);
3010 udest->u_threshold = nla_get_u32(nla_u_thresh);
3011 udest->l_threshold = nla_get_u32(nla_l_thresh);
3012 }
3013
3014 return 0;
3015}
3016
3017static int ip_vs_genl_fill_daemon(struct sk_buff *skb, __be32 state,
3018 const char *mcast_ifn, __be32 syncid)
3019{
3020 struct nlattr *nl_daemon;
3021
3022 nl_daemon = nla_nest_start(skb, IPVS_CMD_ATTR_DAEMON);
3023 if (!nl_daemon)
3024 return -EMSGSIZE;
3025
3026 NLA_PUT_U32(skb, IPVS_DAEMON_ATTR_STATE, state);
3027 NLA_PUT_STRING(skb, IPVS_DAEMON_ATTR_MCAST_IFN, mcast_ifn);
3028 NLA_PUT_U32(skb, IPVS_DAEMON_ATTR_SYNC_ID, syncid);
3029
3030 nla_nest_end(skb, nl_daemon);
3031
3032 return 0;
3033
3034nla_put_failure:
3035 nla_nest_cancel(skb, nl_daemon);
3036 return -EMSGSIZE;
3037}
3038
3039static int ip_vs_genl_dump_daemon(struct sk_buff *skb, __be32 state,
3040 const char *mcast_ifn, __be32 syncid,
3041 struct netlink_callback *cb)
3042{
3043 void *hdr;
3044 hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
3045 &ip_vs_genl_family, NLM_F_MULTI,
3046 IPVS_CMD_NEW_DAEMON);
3047 if (!hdr)
3048 return -EMSGSIZE;
3049
3050 if (ip_vs_genl_fill_daemon(skb, state, mcast_ifn, syncid))
3051 goto nla_put_failure;
3052
3053 return genlmsg_end(skb, hdr);
3054
3055nla_put_failure:
3056 genlmsg_cancel(skb, hdr);
3057 return -EMSGSIZE;
3058}
3059
3060static int ip_vs_genl_dump_daemons(struct sk_buff *skb,
3061 struct netlink_callback *cb)
3062{
3063 mutex_lock(&__ip_vs_mutex);
3064 if ((ip_vs_sync_state & IP_VS_STATE_MASTER) && !cb->args[0]) {
3065 if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_MASTER,
3066 ip_vs_master_mcast_ifn,
3067 ip_vs_master_syncid, cb) < 0)
3068 goto nla_put_failure;
3069
3070 cb->args[0] = 1;
3071 }
3072
3073 if ((ip_vs_sync_state & IP_VS_STATE_BACKUP) && !cb->args[1]) {
3074 if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_BACKUP,
3075 ip_vs_backup_mcast_ifn,
3076 ip_vs_backup_syncid, cb) < 0)
3077 goto nla_put_failure;
3078
3079 cb->args[1] = 1;
3080 }
3081
3082nla_put_failure:
3083 mutex_unlock(&__ip_vs_mutex);
3084
3085 return skb->len;
3086}
3087
3088static int ip_vs_genl_new_daemon(struct nlattr **attrs)
3089{
3090 if (!(attrs[IPVS_DAEMON_ATTR_STATE] &&
3091 attrs[IPVS_DAEMON_ATTR_MCAST_IFN] &&
3092 attrs[IPVS_DAEMON_ATTR_SYNC_ID]))
3093 return -EINVAL;
3094
3095 return start_sync_thread(nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]),
3096 nla_data(attrs[IPVS_DAEMON_ATTR_MCAST_IFN]),
3097 nla_get_u32(attrs[IPVS_DAEMON_ATTR_SYNC_ID]));
3098}
3099
3100static int ip_vs_genl_del_daemon(struct nlattr **attrs)
3101{
3102 if (!attrs[IPVS_DAEMON_ATTR_STATE])
3103 return -EINVAL;
3104
3105 return stop_sync_thread(nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]));
3106}
3107
Hans Schillstrom93304192011-01-03 14:44:51 +01003108static int ip_vs_genl_set_config(struct net *net, struct nlattr **attrs)
Julius Volz9a812192008-08-14 14:08:44 +02003109{
3110 struct ip_vs_timeout_user t;
3111
Hans Schillstrom93304192011-01-03 14:44:51 +01003112 __ip_vs_get_timeouts(net, &t);
Julius Volz9a812192008-08-14 14:08:44 +02003113
3114 if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP])
3115 t.tcp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP]);
3116
3117 if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN])
3118 t.tcp_fin_timeout =
3119 nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN]);
3120
3121 if (attrs[IPVS_CMD_ATTR_TIMEOUT_UDP])
3122 t.udp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_UDP]);
3123
Hans Schillstrom93304192011-01-03 14:44:51 +01003124 return ip_vs_set_timeout(net, &t);
Julius Volz9a812192008-08-14 14:08:44 +02003125}
3126
3127static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
3128{
3129 struct ip_vs_service *svc = NULL;
Julius Volzc860c6b2008-09-02 15:55:36 +02003130 struct ip_vs_service_user_kern usvc;
3131 struct ip_vs_dest_user_kern udest;
Julius Volz9a812192008-08-14 14:08:44 +02003132 int ret = 0, cmd;
3133 int need_full_svc = 0, need_full_dest = 0;
Hans Schillstromfc723252011-01-03 14:44:43 +01003134 struct net *net;
Julius Volz9a812192008-08-14 14:08:44 +02003135
Hans Schillstromfc723252011-01-03 14:44:43 +01003136 net = skb_sknet(skb);
Julius Volz9a812192008-08-14 14:08:44 +02003137 cmd = info->genlhdr->cmd;
3138
3139 mutex_lock(&__ip_vs_mutex);
3140
3141 if (cmd == IPVS_CMD_FLUSH) {
Hans Schillstromfc723252011-01-03 14:44:43 +01003142 ret = ip_vs_flush(net);
Julius Volz9a812192008-08-14 14:08:44 +02003143 goto out;
3144 } else if (cmd == IPVS_CMD_SET_CONFIG) {
Hans Schillstrom93304192011-01-03 14:44:51 +01003145 ret = ip_vs_genl_set_config(net, info->attrs);
Julius Volz9a812192008-08-14 14:08:44 +02003146 goto out;
3147 } else if (cmd == IPVS_CMD_NEW_DAEMON ||
3148 cmd == IPVS_CMD_DEL_DAEMON) {
3149
3150 struct nlattr *daemon_attrs[IPVS_DAEMON_ATTR_MAX + 1];
3151
3152 if (!info->attrs[IPVS_CMD_ATTR_DAEMON] ||
3153 nla_parse_nested(daemon_attrs, IPVS_DAEMON_ATTR_MAX,
3154 info->attrs[IPVS_CMD_ATTR_DAEMON],
3155 ip_vs_daemon_policy)) {
3156 ret = -EINVAL;
3157 goto out;
3158 }
3159
3160 if (cmd == IPVS_CMD_NEW_DAEMON)
3161 ret = ip_vs_genl_new_daemon(daemon_attrs);
3162 else
3163 ret = ip_vs_genl_del_daemon(daemon_attrs);
3164 goto out;
3165 } else if (cmd == IPVS_CMD_ZERO &&
3166 !info->attrs[IPVS_CMD_ATTR_SERVICE]) {
Hans Schillstromfc723252011-01-03 14:44:43 +01003167 ret = ip_vs_zero_all(net);
Julius Volz9a812192008-08-14 14:08:44 +02003168 goto out;
3169 }
3170
3171 /* All following commands require a service argument, so check if we
3172 * received a valid one. We need a full service specification when
3173 * adding / editing a service. Only identifying members otherwise. */
3174 if (cmd == IPVS_CMD_NEW_SERVICE || cmd == IPVS_CMD_SET_SERVICE)
3175 need_full_svc = 1;
3176
Hans Schillstromfc723252011-01-03 14:44:43 +01003177 ret = ip_vs_genl_parse_service(net, &usvc,
Julius Volz9a812192008-08-14 14:08:44 +02003178 info->attrs[IPVS_CMD_ATTR_SERVICE],
Julian Anastasov26c15cf2010-09-21 18:12:30 +02003179 need_full_svc, &svc);
Julius Volz9a812192008-08-14 14:08:44 +02003180 if (ret)
3181 goto out;
3182
Julius Volz9a812192008-08-14 14:08:44 +02003183 /* Unless we're adding a new service, the service must already exist */
3184 if ((cmd != IPVS_CMD_NEW_SERVICE) && (svc == NULL)) {
3185 ret = -ESRCH;
3186 goto out;
3187 }
3188
3189 /* Destination commands require a valid destination argument. For
3190 * adding / editing a destination, we need a full destination
3191 * specification. */
3192 if (cmd == IPVS_CMD_NEW_DEST || cmd == IPVS_CMD_SET_DEST ||
3193 cmd == IPVS_CMD_DEL_DEST) {
3194 if (cmd != IPVS_CMD_DEL_DEST)
3195 need_full_dest = 1;
3196
3197 ret = ip_vs_genl_parse_dest(&udest,
3198 info->attrs[IPVS_CMD_ATTR_DEST],
3199 need_full_dest);
3200 if (ret)
3201 goto out;
3202 }
3203
3204 switch (cmd) {
3205 case IPVS_CMD_NEW_SERVICE:
3206 if (svc == NULL)
Hans Schillstromfc723252011-01-03 14:44:43 +01003207 ret = ip_vs_add_service(net, &usvc, &svc);
Julius Volz9a812192008-08-14 14:08:44 +02003208 else
3209 ret = -EEXIST;
3210 break;
3211 case IPVS_CMD_SET_SERVICE:
3212 ret = ip_vs_edit_service(svc, &usvc);
3213 break;
3214 case IPVS_CMD_DEL_SERVICE:
3215 ret = ip_vs_del_service(svc);
Julian Anastasov26c15cf2010-09-21 18:12:30 +02003216 /* do not use svc, it can be freed */
Julius Volz9a812192008-08-14 14:08:44 +02003217 break;
3218 case IPVS_CMD_NEW_DEST:
3219 ret = ip_vs_add_dest(svc, &udest);
3220 break;
3221 case IPVS_CMD_SET_DEST:
3222 ret = ip_vs_edit_dest(svc, &udest);
3223 break;
3224 case IPVS_CMD_DEL_DEST:
3225 ret = ip_vs_del_dest(svc, &udest);
3226 break;
3227 case IPVS_CMD_ZERO:
3228 ret = ip_vs_zero_service(svc);
3229 break;
3230 default:
3231 ret = -EINVAL;
3232 }
3233
3234out:
Julius Volz9a812192008-08-14 14:08:44 +02003235 mutex_unlock(&__ip_vs_mutex);
3236
3237 return ret;
3238}
3239
3240static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info)
3241{
3242 struct sk_buff *msg;
3243 void *reply;
3244 int ret, cmd, reply_cmd;
Hans Schillstromfc723252011-01-03 14:44:43 +01003245 struct net *net;
Julius Volz9a812192008-08-14 14:08:44 +02003246
Hans Schillstromfc723252011-01-03 14:44:43 +01003247 net = skb_sknet(skb);
Julius Volz9a812192008-08-14 14:08:44 +02003248 cmd = info->genlhdr->cmd;
3249
3250 if (cmd == IPVS_CMD_GET_SERVICE)
3251 reply_cmd = IPVS_CMD_NEW_SERVICE;
3252 else if (cmd == IPVS_CMD_GET_INFO)
3253 reply_cmd = IPVS_CMD_SET_INFO;
3254 else if (cmd == IPVS_CMD_GET_CONFIG)
3255 reply_cmd = IPVS_CMD_SET_CONFIG;
3256 else {
Hannes Eder1e3e2382009-08-02 11:05:41 +00003257 pr_err("unknown Generic Netlink command\n");
Julius Volz9a812192008-08-14 14:08:44 +02003258 return -EINVAL;
3259 }
3260
3261 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
3262 if (!msg)
3263 return -ENOMEM;
3264
3265 mutex_lock(&__ip_vs_mutex);
3266
3267 reply = genlmsg_put_reply(msg, info, &ip_vs_genl_family, 0, reply_cmd);
3268 if (reply == NULL)
3269 goto nla_put_failure;
3270
3271 switch (cmd) {
3272 case IPVS_CMD_GET_SERVICE:
3273 {
3274 struct ip_vs_service *svc;
3275
Hans Schillstromfc723252011-01-03 14:44:43 +01003276 svc = ip_vs_genl_find_service(net,
3277 info->attrs[IPVS_CMD_ATTR_SERVICE]);
Julius Volz9a812192008-08-14 14:08:44 +02003278 if (IS_ERR(svc)) {
3279 ret = PTR_ERR(svc);
3280 goto out_err;
3281 } else if (svc) {
3282 ret = ip_vs_genl_fill_service(msg, svc);
Julius Volz9a812192008-08-14 14:08:44 +02003283 if (ret)
3284 goto nla_put_failure;
3285 } else {
3286 ret = -ESRCH;
3287 goto out_err;
3288 }
3289
3290 break;
3291 }
3292
3293 case IPVS_CMD_GET_CONFIG:
3294 {
3295 struct ip_vs_timeout_user t;
3296
Hans Schillstrom93304192011-01-03 14:44:51 +01003297 __ip_vs_get_timeouts(net, &t);
Julius Volz9a812192008-08-14 14:08:44 +02003298#ifdef CONFIG_IP_VS_PROTO_TCP
3299 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP, t.tcp_timeout);
3300 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP_FIN,
3301 t.tcp_fin_timeout);
3302#endif
3303#ifdef CONFIG_IP_VS_PROTO_UDP
3304 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_UDP, t.udp_timeout);
3305#endif
3306
3307 break;
3308 }
3309
3310 case IPVS_CMD_GET_INFO:
3311 NLA_PUT_U32(msg, IPVS_INFO_ATTR_VERSION, IP_VS_VERSION_CODE);
3312 NLA_PUT_U32(msg, IPVS_INFO_ATTR_CONN_TAB_SIZE,
Catalin(ux) M. BOIE6f7edb42010-01-05 05:50:24 +01003313 ip_vs_conn_tab_size);
Julius Volz9a812192008-08-14 14:08:44 +02003314 break;
3315 }
3316
3317 genlmsg_end(msg, reply);
Johannes Berg134e6372009-07-10 09:51:34 +00003318 ret = genlmsg_reply(msg, info);
Julius Volz9a812192008-08-14 14:08:44 +02003319 goto out;
3320
3321nla_put_failure:
Hannes Eder1e3e2382009-08-02 11:05:41 +00003322 pr_err("not enough space in Netlink message\n");
Julius Volz9a812192008-08-14 14:08:44 +02003323 ret = -EMSGSIZE;
3324
3325out_err:
3326 nlmsg_free(msg);
3327out:
3328 mutex_unlock(&__ip_vs_mutex);
3329
3330 return ret;
3331}
3332
3333
3334static struct genl_ops ip_vs_genl_ops[] __read_mostly = {
3335 {
3336 .cmd = IPVS_CMD_NEW_SERVICE,
3337 .flags = GENL_ADMIN_PERM,
3338 .policy = ip_vs_cmd_policy,
3339 .doit = ip_vs_genl_set_cmd,
3340 },
3341 {
3342 .cmd = IPVS_CMD_SET_SERVICE,
3343 .flags = GENL_ADMIN_PERM,
3344 .policy = ip_vs_cmd_policy,
3345 .doit = ip_vs_genl_set_cmd,
3346 },
3347 {
3348 .cmd = IPVS_CMD_DEL_SERVICE,
3349 .flags = GENL_ADMIN_PERM,
3350 .policy = ip_vs_cmd_policy,
3351 .doit = ip_vs_genl_set_cmd,
3352 },
3353 {
3354 .cmd = IPVS_CMD_GET_SERVICE,
3355 .flags = GENL_ADMIN_PERM,
3356 .doit = ip_vs_genl_get_cmd,
3357 .dumpit = ip_vs_genl_dump_services,
3358 .policy = ip_vs_cmd_policy,
3359 },
3360 {
3361 .cmd = IPVS_CMD_NEW_DEST,
3362 .flags = GENL_ADMIN_PERM,
3363 .policy = ip_vs_cmd_policy,
3364 .doit = ip_vs_genl_set_cmd,
3365 },
3366 {
3367 .cmd = IPVS_CMD_SET_DEST,
3368 .flags = GENL_ADMIN_PERM,
3369 .policy = ip_vs_cmd_policy,
3370 .doit = ip_vs_genl_set_cmd,
3371 },
3372 {
3373 .cmd = IPVS_CMD_DEL_DEST,
3374 .flags = GENL_ADMIN_PERM,
3375 .policy = ip_vs_cmd_policy,
3376 .doit = ip_vs_genl_set_cmd,
3377 },
3378 {
3379 .cmd = IPVS_CMD_GET_DEST,
3380 .flags = GENL_ADMIN_PERM,
3381 .policy = ip_vs_cmd_policy,
3382 .dumpit = ip_vs_genl_dump_dests,
3383 },
3384 {
3385 .cmd = IPVS_CMD_NEW_DAEMON,
3386 .flags = GENL_ADMIN_PERM,
3387 .policy = ip_vs_cmd_policy,
3388 .doit = ip_vs_genl_set_cmd,
3389 },
3390 {
3391 .cmd = IPVS_CMD_DEL_DAEMON,
3392 .flags = GENL_ADMIN_PERM,
3393 .policy = ip_vs_cmd_policy,
3394 .doit = ip_vs_genl_set_cmd,
3395 },
3396 {
3397 .cmd = IPVS_CMD_GET_DAEMON,
3398 .flags = GENL_ADMIN_PERM,
3399 .dumpit = ip_vs_genl_dump_daemons,
3400 },
3401 {
3402 .cmd = IPVS_CMD_SET_CONFIG,
3403 .flags = GENL_ADMIN_PERM,
3404 .policy = ip_vs_cmd_policy,
3405 .doit = ip_vs_genl_set_cmd,
3406 },
3407 {
3408 .cmd = IPVS_CMD_GET_CONFIG,
3409 .flags = GENL_ADMIN_PERM,
3410 .doit = ip_vs_genl_get_cmd,
3411 },
3412 {
3413 .cmd = IPVS_CMD_GET_INFO,
3414 .flags = GENL_ADMIN_PERM,
3415 .doit = ip_vs_genl_get_cmd,
3416 },
3417 {
3418 .cmd = IPVS_CMD_ZERO,
3419 .flags = GENL_ADMIN_PERM,
3420 .policy = ip_vs_cmd_policy,
3421 .doit = ip_vs_genl_set_cmd,
3422 },
3423 {
3424 .cmd = IPVS_CMD_FLUSH,
3425 .flags = GENL_ADMIN_PERM,
3426 .doit = ip_vs_genl_set_cmd,
3427 },
3428};
3429
3430static int __init ip_vs_genl_register(void)
3431{
Michał Mirosław8f698d52009-05-21 10:34:05 +00003432 return genl_register_family_with_ops(&ip_vs_genl_family,
3433 ip_vs_genl_ops, ARRAY_SIZE(ip_vs_genl_ops));
Julius Volz9a812192008-08-14 14:08:44 +02003434}
3435
3436static void ip_vs_genl_unregister(void)
3437{
3438 genl_unregister_family(&ip_vs_genl_family);
3439}
3440
3441/* End of Generic Netlink interface definitions */
3442
Hans Schillstrom61b1ab42011-01-03 14:44:42 +01003443/*
3444 * per netns intit/exit func.
3445 */
3446int __net_init __ip_vs_control_init(struct net *net)
3447{
Hans Schillstromfc723252011-01-03 14:44:43 +01003448 int idx;
3449 struct netns_ipvs *ipvs = net_ipvs(net);
3450
Hans Schillstrom61b1ab42011-01-03 14:44:42 +01003451 if (!net_eq(net, &init_net)) /* netns not enabled yet */
3452 return -EPERM;
3453
Hans Schillstromfc723252011-01-03 14:44:43 +01003454 for (idx = 0; idx < IP_VS_RTAB_SIZE; idx++)
3455 INIT_LIST_HEAD(&ipvs->rs_table[idx]);
3456
Hans Schillstrom61b1ab42011-01-03 14:44:42 +01003457 proc_net_fops_create(net, "ip_vs", 0, &ip_vs_info_fops);
3458 proc_net_fops_create(net, "ip_vs_stats", 0, &ip_vs_stats_fops);
3459 sysctl_header = register_net_sysctl_table(net, net_vs_ctl_path,
3460 vs_vars);
3461 if (sysctl_header == NULL)
3462 goto err_reg;
3463 ip_vs_new_estimator(&ip_vs_stats);
3464 return 0;
3465
3466err_reg:
3467 return -ENOMEM;
3468}
3469
3470static void __net_exit __ip_vs_control_cleanup(struct net *net)
3471{
3472 if (!net_eq(net, &init_net)) /* netns not enabled yet */
3473 return;
3474
3475 ip_vs_kill_estimator(&ip_vs_stats);
3476 unregister_net_sysctl_table(sysctl_header);
3477 proc_net_remove(net, "ip_vs_stats");
3478 proc_net_remove(net, "ip_vs");
3479}
3480
3481static struct pernet_operations ipvs_control_ops = {
3482 .init = __ip_vs_control_init,
3483 .exit = __ip_vs_control_cleanup,
3484};
Linus Torvalds1da177e2005-04-16 15:20:36 -07003485
Sven Wegener048cf482008-08-10 18:24:35 +00003486int __init ip_vs_control_init(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003487{
Linus Torvalds1da177e2005-04-16 15:20:36 -07003488 int idx;
Hans Schillstromfc723252011-01-03 14:44:43 +01003489 int ret;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003490
3491 EnterFunction(2);
3492
Hans Schillstromfc723252011-01-03 14:44:43 +01003493 /* Initialize svc_table, ip_vs_svc_fwm_table, rs_table */
Eduardo Blancod86bef72010-10-19 10:26:47 +01003494 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
3495 INIT_LIST_HEAD(&ip_vs_svc_table[idx]);
3496 INIT_LIST_HEAD(&ip_vs_svc_fwm_table[idx]);
3497 }
Hans Schillstromfc723252011-01-03 14:44:43 +01003498
3499 ret = register_pernet_subsys(&ipvs_control_ops);
3500 if (ret) {
3501 pr_err("cannot register namespace.\n");
3502 goto err;
Eduardo Blancod86bef72010-10-19 10:26:47 +01003503 }
Hans Schillstromfc723252011-01-03 14:44:43 +01003504
3505 smp_wmb(); /* Do we really need it now ? */
Eduardo Blancod86bef72010-10-19 10:26:47 +01003506
Linus Torvalds1da177e2005-04-16 15:20:36 -07003507 ret = nf_register_sockopt(&ip_vs_sockopts);
3508 if (ret) {
Hannes Eder1e3e2382009-08-02 11:05:41 +00003509 pr_err("cannot register sockopt.\n");
Hans Schillstromfc723252011-01-03 14:44:43 +01003510 goto err_net;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003511 }
3512
Julius Volz9a812192008-08-14 14:08:44 +02003513 ret = ip_vs_genl_register();
3514 if (ret) {
Hannes Eder1e3e2382009-08-02 11:05:41 +00003515 pr_err("cannot register Generic Netlink interface.\n");
Julius Volz9a812192008-08-14 14:08:44 +02003516 nf_unregister_sockopt(&ip_vs_sockopts);
Hans Schillstromfc723252011-01-03 14:44:43 +01003517 goto err_net;
Julius Volz9a812192008-08-14 14:08:44 +02003518 }
3519
Linus Torvalds1da177e2005-04-16 15:20:36 -07003520 /* Hook the defense timer */
3521 schedule_delayed_work(&defense_work, DEFENSE_TIMER_PERIOD);
3522
3523 LeaveFunction(2);
3524 return 0;
Hans Schillstromfc723252011-01-03 14:44:43 +01003525
3526err_net:
3527 unregister_pernet_subsys(&ipvs_control_ops);
3528err:
3529 return ret;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003530}
3531
3532
3533void ip_vs_control_cleanup(void)
3534{
3535 EnterFunction(2);
3536 ip_vs_trash_cleanup();
Tejun Heoafe2c512010-12-14 16:21:17 +01003537 cancel_delayed_work_sync(&defense_work);
Oleg Nesterov28e53bd2007-05-09 02:34:22 -07003538 cancel_work_sync(&defense_work.work);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003539 ip_vs_kill_estimator(&ip_vs_stats);
Hans Schillstrom61b1ab42011-01-03 14:44:42 +01003540 unregister_pernet_subsys(&ipvs_control_ops);
Julius Volz9a812192008-08-14 14:08:44 +02003541 ip_vs_genl_unregister();
Linus Torvalds1da177e2005-04-16 15:20:36 -07003542 nf_unregister_sockopt(&ip_vs_sockopts);
3543 LeaveFunction(2);
3544}