blob: 03f86312b4bbb6bdaaad592e11354cf56446f36b [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * IPVS An implementation of the IP virtual server support for the
3 * LINUX operating system. IPVS is now implemented as a module
4 * over the NetFilter framework. IPVS can be used to build a
5 * high-performance and highly available server based on a
6 * cluster of servers.
7 *
Linus Torvalds1da177e2005-04-16 15:20:36 -07008 * Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
9 * Peter Kese <peter.kese@ijs.si>
10 * Julian Anastasov <ja@ssi.bg>
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version
15 * 2 of the License, or (at your option) any later version.
16 *
17 * Changes:
18 *
19 */
20
Hannes Eder9aada7a2009-07-30 14:29:44 -070021#define KMSG_COMPONENT "IPVS"
22#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
23
Linus Torvalds1da177e2005-04-16 15:20:36 -070024#include <linux/module.h>
25#include <linux/init.h>
26#include <linux/types.h>
Randy Dunlap4fc268d2006-01-11 12:17:47 -080027#include <linux/capability.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070028#include <linux/fs.h>
29#include <linux/sysctl.h>
30#include <linux/proc_fs.h>
31#include <linux/workqueue.h>
32#include <linux/swap.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070033#include <linux/seq_file.h>
Tejun Heo5a0e3ad2010-03-24 17:04:11 +090034#include <linux/slab.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070035
36#include <linux/netfilter.h>
37#include <linux/netfilter_ipv4.h>
Ingo Molnar14cc3e22006-03-26 01:37:14 -080038#include <linux/mutex.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070039
Eric W. Biederman457c4cb2007-09-12 12:01:34 +020040#include <net/net_namespace.h>
Hans Schillstrom93304192011-01-03 14:44:51 +010041#include <linux/nsproxy.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070042#include <net/ip.h>
Vince Busam09571c72008-09-02 15:55:52 +020043#ifdef CONFIG_IP_VS_IPV6
44#include <net/ipv6.h>
45#include <net/ip6_route.h>
46#endif
Arnaldo Carvalho de Melo14c85022005-12-27 02:43:12 -020047#include <net/route.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070048#include <net/sock.h>
Julius Volz9a812192008-08-14 14:08:44 +020049#include <net/genetlink.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070050
51#include <asm/uaccess.h>
52
53#include <net/ip_vs.h>
54
55/* semaphore for IPVS sockopts. And, [gs]etsockopt may sleep. */
Ingo Molnar14cc3e22006-03-26 01:37:14 -080056static DEFINE_MUTEX(__ip_vs_mutex);
Linus Torvalds1da177e2005-04-16 15:20:36 -070057
58/* lock for service table */
59static DEFINE_RWLOCK(__ip_vs_svc_lock);
60
61/* lock for table with the real services */
62static DEFINE_RWLOCK(__ip_vs_rs_lock);
63
64/* lock for state and timeout tables */
Simon Horman4f728162010-08-26 02:54:30 +000065static DEFINE_SPINLOCK(ip_vs_securetcp_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -070066
67/* lock for drop entry handling */
68static DEFINE_SPINLOCK(__ip_vs_dropentry_lock);
69
70/* lock for drop packet handling */
71static DEFINE_SPINLOCK(__ip_vs_droppacket_lock);
72
73/* 1/rate drop and drop-entry variables */
74int ip_vs_drop_rate = 0;
75int ip_vs_drop_counter = 0;
76static atomic_t ip_vs_dropentry = ATOMIC_INIT(0);
77
78/* number of virtual services */
79static int ip_vs_num_services = 0;
80
81/* sysctl variables */
82static int sysctl_ip_vs_drop_entry = 0;
83static int sysctl_ip_vs_drop_packet = 0;
84static int sysctl_ip_vs_secure_tcp = 0;
85static int sysctl_ip_vs_amemthresh = 1024;
86static int sysctl_ip_vs_am_droprate = 10;
87int sysctl_ip_vs_cache_bypass = 0;
88int sysctl_ip_vs_expire_nodest_conn = 0;
89int sysctl_ip_vs_expire_quiescent_template = 0;
90int sysctl_ip_vs_sync_threshold[2] = { 3, 50 };
91int sysctl_ip_vs_nat_icmp_send = 0;
Julian Anastasovf4bc17c2010-09-21 17:35:41 +020092#ifdef CONFIG_IP_VS_NFCT
93int sysctl_ip_vs_conntrack;
94#endif
Julian Anastasov8a803042010-09-21 17:38:57 +020095int sysctl_ip_vs_snat_reroute = 1;
Hans Schillstromb880c1f2010-11-19 14:25:14 +010096int sysctl_ip_vs_sync_ver = 1; /* Default version of sync proto */
Linus Torvalds1da177e2005-04-16 15:20:36 -070097
98#ifdef CONFIG_IP_VS_DEBUG
99static int sysctl_ip_vs_debug_level = 0;
100
101int ip_vs_get_debug_level(void)
102{
103 return sysctl_ip_vs_debug_level;
104}
105#endif
106
Vince Busam09571c72008-09-02 15:55:52 +0200107#ifdef CONFIG_IP_VS_IPV6
108/* Taken from rt6_fill_node() in net/ipv6/route.c, is there a better way? */
109static int __ip_vs_addr_is_local_v6(const struct in6_addr *addr)
110{
111 struct rt6_info *rt;
112 struct flowi fl = {
113 .oif = 0,
Changli Gao58116622010-11-12 18:43:55 +0000114 .fl6_dst = *addr,
115 .fl6_src = { .s6_addr32 = {0, 0, 0, 0} },
Vince Busam09571c72008-09-02 15:55:52 +0200116 };
117
118 rt = (struct rt6_info *)ip6_route_output(&init_net, NULL, &fl);
119 if (rt && rt->rt6i_dev && (rt->rt6i_dev->flags & IFF_LOOPBACK))
120 return 1;
121
122 return 0;
123}
124#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -0700125/*
Julian Anastasovaf9debd2005-07-11 20:59:57 -0700126 * update_defense_level is called from keventd and from sysctl,
127 * so it needs to protect itself from softirqs
Linus Torvalds1da177e2005-04-16 15:20:36 -0700128 */
Hans Schillstrom93304192011-01-03 14:44:51 +0100129static void update_defense_level(struct netns_ipvs *ipvs)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700130{
131 struct sysinfo i;
132 static int old_secure_tcp = 0;
133 int availmem;
134 int nomem;
135 int to_change = -1;
136
137 /* we only count free and buffered memory (in pages) */
138 si_meminfo(&i);
139 availmem = i.freeram + i.bufferram;
140 /* however in linux 2.5 the i.bufferram is total page cache size,
141 we need adjust it */
142 /* si_swapinfo(&i); */
143 /* availmem = availmem - (i.totalswap - i.freeswap); */
144
145 nomem = (availmem < sysctl_ip_vs_amemthresh);
146
Julian Anastasovaf9debd2005-07-11 20:59:57 -0700147 local_bh_disable();
148
Linus Torvalds1da177e2005-04-16 15:20:36 -0700149 /* drop_entry */
150 spin_lock(&__ip_vs_dropentry_lock);
151 switch (sysctl_ip_vs_drop_entry) {
152 case 0:
153 atomic_set(&ip_vs_dropentry, 0);
154 break;
155 case 1:
156 if (nomem) {
157 atomic_set(&ip_vs_dropentry, 1);
158 sysctl_ip_vs_drop_entry = 2;
159 } else {
160 atomic_set(&ip_vs_dropentry, 0);
161 }
162 break;
163 case 2:
164 if (nomem) {
165 atomic_set(&ip_vs_dropentry, 1);
166 } else {
167 atomic_set(&ip_vs_dropentry, 0);
168 sysctl_ip_vs_drop_entry = 1;
169 };
170 break;
171 case 3:
172 atomic_set(&ip_vs_dropentry, 1);
173 break;
174 }
175 spin_unlock(&__ip_vs_dropentry_lock);
176
177 /* drop_packet */
178 spin_lock(&__ip_vs_droppacket_lock);
179 switch (sysctl_ip_vs_drop_packet) {
180 case 0:
181 ip_vs_drop_rate = 0;
182 break;
183 case 1:
184 if (nomem) {
185 ip_vs_drop_rate = ip_vs_drop_counter
186 = sysctl_ip_vs_amemthresh /
187 (sysctl_ip_vs_amemthresh-availmem);
188 sysctl_ip_vs_drop_packet = 2;
189 } else {
190 ip_vs_drop_rate = 0;
191 }
192 break;
193 case 2:
194 if (nomem) {
195 ip_vs_drop_rate = ip_vs_drop_counter
196 = sysctl_ip_vs_amemthresh /
197 (sysctl_ip_vs_amemthresh-availmem);
198 } else {
199 ip_vs_drop_rate = 0;
200 sysctl_ip_vs_drop_packet = 1;
201 }
202 break;
203 case 3:
204 ip_vs_drop_rate = sysctl_ip_vs_am_droprate;
205 break;
206 }
207 spin_unlock(&__ip_vs_droppacket_lock);
208
209 /* secure_tcp */
Simon Horman4f728162010-08-26 02:54:30 +0000210 spin_lock(&ip_vs_securetcp_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700211 switch (sysctl_ip_vs_secure_tcp) {
212 case 0:
213 if (old_secure_tcp >= 2)
214 to_change = 0;
215 break;
216 case 1:
217 if (nomem) {
218 if (old_secure_tcp < 2)
219 to_change = 1;
220 sysctl_ip_vs_secure_tcp = 2;
221 } else {
222 if (old_secure_tcp >= 2)
223 to_change = 0;
224 }
225 break;
226 case 2:
227 if (nomem) {
228 if (old_secure_tcp < 2)
229 to_change = 1;
230 } else {
231 if (old_secure_tcp >= 2)
232 to_change = 0;
233 sysctl_ip_vs_secure_tcp = 1;
234 }
235 break;
236 case 3:
237 if (old_secure_tcp < 2)
238 to_change = 1;
239 break;
240 }
241 old_secure_tcp = sysctl_ip_vs_secure_tcp;
242 if (to_change >= 0)
Hans Schillstrom93304192011-01-03 14:44:51 +0100243 ip_vs_protocol_timeout_change(ipvs,
244 sysctl_ip_vs_secure_tcp > 1);
Simon Horman4f728162010-08-26 02:54:30 +0000245 spin_unlock(&ip_vs_securetcp_lock);
Julian Anastasovaf9debd2005-07-11 20:59:57 -0700246
247 local_bh_enable();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700248}
249
250
251/*
252 * Timer for checking the defense
253 */
254#define DEFENSE_TIMER_PERIOD 1*HZ
David Howellsc4028952006-11-22 14:57:56 +0000255static void defense_work_handler(struct work_struct *work);
256static DECLARE_DELAYED_WORK(defense_work, defense_work_handler);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700257
David Howellsc4028952006-11-22 14:57:56 +0000258static void defense_work_handler(struct work_struct *work)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700259{
Hans Schillstrom93304192011-01-03 14:44:51 +0100260 struct net *net = &init_net;
261 struct netns_ipvs *ipvs = net_ipvs(net);
262
263 update_defense_level(ipvs);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700264 if (atomic_read(&ip_vs_dropentry))
265 ip_vs_random_dropentry();
266
267 schedule_delayed_work(&defense_work, DEFENSE_TIMER_PERIOD);
268}
269
270int
271ip_vs_use_count_inc(void)
272{
273 return try_module_get(THIS_MODULE);
274}
275
276void
277ip_vs_use_count_dec(void)
278{
279 module_put(THIS_MODULE);
280}
281
282
283/*
284 * Hash table: for virtual service lookups
285 */
286#define IP_VS_SVC_TAB_BITS 8
287#define IP_VS_SVC_TAB_SIZE (1 << IP_VS_SVC_TAB_BITS)
288#define IP_VS_SVC_TAB_MASK (IP_VS_SVC_TAB_SIZE - 1)
289
290/* the service table hashed by <protocol, addr, port> */
291static struct list_head ip_vs_svc_table[IP_VS_SVC_TAB_SIZE];
292/* the service table hashed by fwmark */
293static struct list_head ip_vs_svc_fwm_table[IP_VS_SVC_TAB_SIZE];
294
295/*
Linus Torvalds1da177e2005-04-16 15:20:36 -0700296 * Trash for destinations
297 */
298static LIST_HEAD(ip_vs_dest_trash);
299
300/*
301 * FTP & NULL virtual service counters
302 */
303static atomic_t ip_vs_ftpsvc_counter = ATOMIC_INIT(0);
304static atomic_t ip_vs_nullsvc_counter = ATOMIC_INIT(0);
305
306
307/*
308 * Returns hash value for virtual service
309 */
Hans Schillstromfc723252011-01-03 14:44:43 +0100310static inline unsigned
311ip_vs_svc_hashkey(struct net *net, int af, unsigned proto,
312 const union nf_inet_addr *addr, __be16 port)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700313{
314 register unsigned porth = ntohs(port);
Julius Volzb18610d2008-09-02 15:55:37 +0200315 __be32 addr_fold = addr->ip;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700316
Julius Volzb18610d2008-09-02 15:55:37 +0200317#ifdef CONFIG_IP_VS_IPV6
318 if (af == AF_INET6)
319 addr_fold = addr->ip6[0]^addr->ip6[1]^
320 addr->ip6[2]^addr->ip6[3];
321#endif
Hans Schillstromfc723252011-01-03 14:44:43 +0100322 addr_fold ^= ((size_t)net>>8);
Julius Volzb18610d2008-09-02 15:55:37 +0200323
324 return (proto^ntohl(addr_fold)^(porth>>IP_VS_SVC_TAB_BITS)^porth)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700325 & IP_VS_SVC_TAB_MASK;
326}
327
328/*
329 * Returns hash value of fwmark for virtual service lookup
330 */
Hans Schillstromfc723252011-01-03 14:44:43 +0100331static inline unsigned ip_vs_svc_fwm_hashkey(struct net *net, __u32 fwmark)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700332{
Hans Schillstromfc723252011-01-03 14:44:43 +0100333 return (((size_t)net>>8) ^ fwmark) & IP_VS_SVC_TAB_MASK;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700334}
335
336/*
Hans Schillstromfc723252011-01-03 14:44:43 +0100337 * Hashes a service in the ip_vs_svc_table by <netns,proto,addr,port>
Linus Torvalds1da177e2005-04-16 15:20:36 -0700338 * or in the ip_vs_svc_fwm_table by fwmark.
339 * Should be called with locked tables.
340 */
341static int ip_vs_svc_hash(struct ip_vs_service *svc)
342{
343 unsigned hash;
344
345 if (svc->flags & IP_VS_SVC_F_HASHED) {
Hannes Eder1e3e2382009-08-02 11:05:41 +0000346 pr_err("%s(): request for already hashed, called from %pF\n",
347 __func__, __builtin_return_address(0));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700348 return 0;
349 }
350
351 if (svc->fwmark == 0) {
352 /*
Hans Schillstromfc723252011-01-03 14:44:43 +0100353 * Hash it by <netns,protocol,addr,port> in ip_vs_svc_table
Linus Torvalds1da177e2005-04-16 15:20:36 -0700354 */
Hans Schillstromfc723252011-01-03 14:44:43 +0100355 hash = ip_vs_svc_hashkey(svc->net, svc->af, svc->protocol,
356 &svc->addr, svc->port);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700357 list_add(&svc->s_list, &ip_vs_svc_table[hash]);
358 } else {
359 /*
Hans Schillstromfc723252011-01-03 14:44:43 +0100360 * Hash it by fwmark in svc_fwm_table
Linus Torvalds1da177e2005-04-16 15:20:36 -0700361 */
Hans Schillstromfc723252011-01-03 14:44:43 +0100362 hash = ip_vs_svc_fwm_hashkey(svc->net, svc->fwmark);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700363 list_add(&svc->f_list, &ip_vs_svc_fwm_table[hash]);
364 }
365
366 svc->flags |= IP_VS_SVC_F_HASHED;
367 /* increase its refcnt because it is referenced by the svc table */
368 atomic_inc(&svc->refcnt);
369 return 1;
370}
371
372
373/*
Hans Schillstromfc723252011-01-03 14:44:43 +0100374 * Unhashes a service from svc_table / svc_fwm_table.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700375 * Should be called with locked tables.
376 */
377static int ip_vs_svc_unhash(struct ip_vs_service *svc)
378{
379 if (!(svc->flags & IP_VS_SVC_F_HASHED)) {
Hannes Eder1e3e2382009-08-02 11:05:41 +0000380 pr_err("%s(): request for unhash flagged, called from %pF\n",
381 __func__, __builtin_return_address(0));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700382 return 0;
383 }
384
385 if (svc->fwmark == 0) {
Hans Schillstromfc723252011-01-03 14:44:43 +0100386 /* Remove it from the svc_table table */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700387 list_del(&svc->s_list);
388 } else {
Hans Schillstromfc723252011-01-03 14:44:43 +0100389 /* Remove it from the svc_fwm_table table */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700390 list_del(&svc->f_list);
391 }
392
393 svc->flags &= ~IP_VS_SVC_F_HASHED;
394 atomic_dec(&svc->refcnt);
395 return 1;
396}
397
398
399/*
Hans Schillstromfc723252011-01-03 14:44:43 +0100400 * Get service by {netns, proto,addr,port} in the service table.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700401 */
Julius Volzb18610d2008-09-02 15:55:37 +0200402static inline struct ip_vs_service *
Hans Schillstromfc723252011-01-03 14:44:43 +0100403__ip_vs_service_find(struct net *net, int af, __u16 protocol,
404 const union nf_inet_addr *vaddr, __be16 vport)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700405{
406 unsigned hash;
407 struct ip_vs_service *svc;
408
409 /* Check for "full" addressed entries */
Hans Schillstromfc723252011-01-03 14:44:43 +0100410 hash = ip_vs_svc_hashkey(net, af, protocol, vaddr, vport);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700411
412 list_for_each_entry(svc, &ip_vs_svc_table[hash], s_list){
Julius Volzb18610d2008-09-02 15:55:37 +0200413 if ((svc->af == af)
414 && ip_vs_addr_equal(af, &svc->addr, vaddr)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700415 && (svc->port == vport)
Hans Schillstromfc723252011-01-03 14:44:43 +0100416 && (svc->protocol == protocol)
417 && net_eq(svc->net, net)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700418 /* HIT */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700419 return svc;
420 }
421 }
422
423 return NULL;
424}
425
426
427/*
428 * Get service by {fwmark} in the service table.
429 */
Julius Volzb18610d2008-09-02 15:55:37 +0200430static inline struct ip_vs_service *
Hans Schillstromfc723252011-01-03 14:44:43 +0100431__ip_vs_svc_fwm_find(struct net *net, int af, __u32 fwmark)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700432{
433 unsigned hash;
434 struct ip_vs_service *svc;
435
436 /* Check for fwmark addressed entries */
Hans Schillstromfc723252011-01-03 14:44:43 +0100437 hash = ip_vs_svc_fwm_hashkey(net, fwmark);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700438
439 list_for_each_entry(svc, &ip_vs_svc_fwm_table[hash], f_list) {
Hans Schillstromfc723252011-01-03 14:44:43 +0100440 if (svc->fwmark == fwmark && svc->af == af
441 && net_eq(svc->net, net)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700442 /* HIT */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700443 return svc;
444 }
445 }
446
447 return NULL;
448}
449
450struct ip_vs_service *
Hans Schillstromfc723252011-01-03 14:44:43 +0100451ip_vs_service_get(struct net *net, int af, __u32 fwmark, __u16 protocol,
Julius Volz3c2e0502008-09-02 15:55:38 +0200452 const union nf_inet_addr *vaddr, __be16 vport)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700453{
454 struct ip_vs_service *svc;
Julius Volz3c2e0502008-09-02 15:55:38 +0200455
Linus Torvalds1da177e2005-04-16 15:20:36 -0700456 read_lock(&__ip_vs_svc_lock);
457
458 /*
459 * Check the table hashed by fwmark first
460 */
Hans Schillstromfc723252011-01-03 14:44:43 +0100461 svc = __ip_vs_svc_fwm_find(net, af, fwmark);
462 if (fwmark && svc)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700463 goto out;
464
465 /*
466 * Check the table hashed by <protocol,addr,port>
467 * for "full" addressed entries
468 */
Hans Schillstromfc723252011-01-03 14:44:43 +0100469 svc = __ip_vs_service_find(net, af, protocol, vaddr, vport);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700470
471 if (svc == NULL
472 && protocol == IPPROTO_TCP
473 && atomic_read(&ip_vs_ftpsvc_counter)
474 && (vport == FTPDATA || ntohs(vport) >= PROT_SOCK)) {
475 /*
476 * Check if ftp service entry exists, the packet
477 * might belong to FTP data connections.
478 */
Hans Schillstromfc723252011-01-03 14:44:43 +0100479 svc = __ip_vs_service_find(net, af, protocol, vaddr, FTPPORT);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700480 }
481
482 if (svc == NULL
483 && atomic_read(&ip_vs_nullsvc_counter)) {
484 /*
485 * Check if the catch-all port (port zero) exists
486 */
Hans Schillstromfc723252011-01-03 14:44:43 +0100487 svc = __ip_vs_service_find(net, af, protocol, vaddr, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700488 }
489
490 out:
Julian Anastasov26c15cf2010-09-21 18:12:30 +0200491 if (svc)
492 atomic_inc(&svc->usecnt);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700493 read_unlock(&__ip_vs_svc_lock);
494
Julius Volz3c2e0502008-09-02 15:55:38 +0200495 IP_VS_DBG_BUF(9, "lookup service: fwm %u %s %s:%u %s\n",
496 fwmark, ip_vs_proto_name(protocol),
497 IP_VS_DBG_ADDR(af, vaddr), ntohs(vport),
498 svc ? "hit" : "not hit");
Linus Torvalds1da177e2005-04-16 15:20:36 -0700499
500 return svc;
501}
502
503
504static inline void
505__ip_vs_bind_svc(struct ip_vs_dest *dest, struct ip_vs_service *svc)
506{
507 atomic_inc(&svc->refcnt);
508 dest->svc = svc;
509}
510
Julian Anastasov26c15cf2010-09-21 18:12:30 +0200511static void
Linus Torvalds1da177e2005-04-16 15:20:36 -0700512__ip_vs_unbind_svc(struct ip_vs_dest *dest)
513{
514 struct ip_vs_service *svc = dest->svc;
515
516 dest->svc = NULL;
Julian Anastasov26c15cf2010-09-21 18:12:30 +0200517 if (atomic_dec_and_test(&svc->refcnt)) {
518 IP_VS_DBG_BUF(3, "Removing service %u/%s:%u usecnt=%d\n",
519 svc->fwmark,
520 IP_VS_DBG_ADDR(svc->af, &svc->addr),
521 ntohs(svc->port), atomic_read(&svc->usecnt));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700522 kfree(svc);
Julian Anastasov26c15cf2010-09-21 18:12:30 +0200523 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700524}
525
526
527/*
528 * Returns hash value for real service
529 */
Julius Volz7937df12008-09-02 15:55:48 +0200530static inline unsigned ip_vs_rs_hashkey(int af,
531 const union nf_inet_addr *addr,
532 __be16 port)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700533{
534 register unsigned porth = ntohs(port);
Julius Volz7937df12008-09-02 15:55:48 +0200535 __be32 addr_fold = addr->ip;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700536
Julius Volz7937df12008-09-02 15:55:48 +0200537#ifdef CONFIG_IP_VS_IPV6
538 if (af == AF_INET6)
539 addr_fold = addr->ip6[0]^addr->ip6[1]^
540 addr->ip6[2]^addr->ip6[3];
541#endif
542
543 return (ntohl(addr_fold)^(porth>>IP_VS_RTAB_BITS)^porth)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700544 & IP_VS_RTAB_MASK;
545}
546
547/*
Hans Schillstromfc723252011-01-03 14:44:43 +0100548 * Hashes ip_vs_dest in rs_table by <proto,addr,port>.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700549 * should be called with locked tables.
550 */
Hans Schillstromfc723252011-01-03 14:44:43 +0100551static int ip_vs_rs_hash(struct netns_ipvs *ipvs, struct ip_vs_dest *dest)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700552{
553 unsigned hash;
554
555 if (!list_empty(&dest->d_list)) {
556 return 0;
557 }
558
559 /*
560 * Hash by proto,addr,port,
561 * which are the parameters of the real service.
562 */
Julius Volz7937df12008-09-02 15:55:48 +0200563 hash = ip_vs_rs_hashkey(dest->af, &dest->addr, dest->port);
564
Hans Schillstromfc723252011-01-03 14:44:43 +0100565 list_add(&dest->d_list, &ipvs->rs_table[hash]);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700566
567 return 1;
568}
569
570/*
Hans Schillstromfc723252011-01-03 14:44:43 +0100571 * UNhashes ip_vs_dest from rs_table.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700572 * should be called with locked tables.
573 */
574static int ip_vs_rs_unhash(struct ip_vs_dest *dest)
575{
576 /*
Hans Schillstromfc723252011-01-03 14:44:43 +0100577 * Remove it from the rs_table table.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700578 */
579 if (!list_empty(&dest->d_list)) {
580 list_del(&dest->d_list);
581 INIT_LIST_HEAD(&dest->d_list);
582 }
583
584 return 1;
585}
586
587/*
588 * Lookup real service by <proto,addr,port> in the real service table.
589 */
590struct ip_vs_dest *
Hans Schillstromfc723252011-01-03 14:44:43 +0100591ip_vs_lookup_real_service(struct net *net, int af, __u16 protocol,
Julius Volz7937df12008-09-02 15:55:48 +0200592 const union nf_inet_addr *daddr,
593 __be16 dport)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700594{
Hans Schillstromfc723252011-01-03 14:44:43 +0100595 struct netns_ipvs *ipvs = net_ipvs(net);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700596 unsigned hash;
597 struct ip_vs_dest *dest;
598
599 /*
600 * Check for "full" addressed entries
601 * Return the first found entry
602 */
Julius Volz7937df12008-09-02 15:55:48 +0200603 hash = ip_vs_rs_hashkey(af, daddr, dport);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700604
605 read_lock(&__ip_vs_rs_lock);
Hans Schillstromfc723252011-01-03 14:44:43 +0100606 list_for_each_entry(dest, &ipvs->rs_table[hash], d_list) {
Julius Volz7937df12008-09-02 15:55:48 +0200607 if ((dest->af == af)
608 && ip_vs_addr_equal(af, &dest->addr, daddr)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700609 && (dest->port == dport)
610 && ((dest->protocol == protocol) ||
611 dest->vfwmark)) {
612 /* HIT */
613 read_unlock(&__ip_vs_rs_lock);
614 return dest;
615 }
616 }
617 read_unlock(&__ip_vs_rs_lock);
618
619 return NULL;
620}
621
622/*
623 * Lookup destination by {addr,port} in the given service
624 */
625static struct ip_vs_dest *
Julius Volz7937df12008-09-02 15:55:48 +0200626ip_vs_lookup_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
627 __be16 dport)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700628{
629 struct ip_vs_dest *dest;
630
631 /*
632 * Find the destination for the given service
633 */
634 list_for_each_entry(dest, &svc->destinations, n_list) {
Julius Volz7937df12008-09-02 15:55:48 +0200635 if ((dest->af == svc->af)
636 && ip_vs_addr_equal(svc->af, &dest->addr, daddr)
637 && (dest->port == dport)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700638 /* HIT */
639 return dest;
640 }
641 }
642
643 return NULL;
644}
645
Rumen G. Bogdanovski1e356f92007-11-07 02:35:54 -0800646/*
647 * Find destination by {daddr,dport,vaddr,protocol}
648 * Cretaed to be used in ip_vs_process_message() in
649 * the backup synchronization daemon. It finds the
650 * destination to be bound to the received connection
651 * on the backup.
652 *
653 * ip_vs_lookup_real_service() looked promissing, but
654 * seems not working as expected.
655 */
Hans Schillstromfc723252011-01-03 14:44:43 +0100656struct ip_vs_dest *ip_vs_find_dest(struct net *net, int af,
657 const union nf_inet_addr *daddr,
Julius Volz7937df12008-09-02 15:55:48 +0200658 __be16 dport,
659 const union nf_inet_addr *vaddr,
Hans Schillstrom0e051e62010-11-19 14:25:07 +0100660 __be16 vport, __u16 protocol, __u32 fwmark)
Rumen G. Bogdanovski1e356f92007-11-07 02:35:54 -0800661{
662 struct ip_vs_dest *dest;
663 struct ip_vs_service *svc;
664
Hans Schillstromfc723252011-01-03 14:44:43 +0100665 svc = ip_vs_service_get(net, af, fwmark, protocol, vaddr, vport);
Rumen G. Bogdanovski1e356f92007-11-07 02:35:54 -0800666 if (!svc)
667 return NULL;
668 dest = ip_vs_lookup_dest(svc, daddr, dport);
669 if (dest)
670 atomic_inc(&dest->refcnt);
671 ip_vs_service_put(svc);
672 return dest;
673}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700674
675/*
676 * Lookup dest by {svc,addr,port} in the destination trash.
677 * The destination trash is used to hold the destinations that are removed
678 * from the service table but are still referenced by some conn entries.
679 * The reason to add the destination trash is when the dest is temporary
680 * down (either by administrator or by monitor program), the dest can be
681 * picked back from the trash, the remaining connections to the dest can
682 * continue, and the counting information of the dest is also useful for
683 * scheduling.
684 */
685static struct ip_vs_dest *
Julius Volz7937df12008-09-02 15:55:48 +0200686ip_vs_trash_get_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
687 __be16 dport)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700688{
689 struct ip_vs_dest *dest, *nxt;
690
691 /*
692 * Find the destination in trash
693 */
694 list_for_each_entry_safe(dest, nxt, &ip_vs_dest_trash, n_list) {
Julius Volz7937df12008-09-02 15:55:48 +0200695 IP_VS_DBG_BUF(3, "Destination %u/%s:%u still in trash, "
696 "dest->refcnt=%d\n",
697 dest->vfwmark,
698 IP_VS_DBG_ADDR(svc->af, &dest->addr),
699 ntohs(dest->port),
700 atomic_read(&dest->refcnt));
701 if (dest->af == svc->af &&
702 ip_vs_addr_equal(svc->af, &dest->addr, daddr) &&
Linus Torvalds1da177e2005-04-16 15:20:36 -0700703 dest->port == dport &&
704 dest->vfwmark == svc->fwmark &&
705 dest->protocol == svc->protocol &&
706 (svc->fwmark ||
Julius Volz7937df12008-09-02 15:55:48 +0200707 (ip_vs_addr_equal(svc->af, &dest->vaddr, &svc->addr) &&
Linus Torvalds1da177e2005-04-16 15:20:36 -0700708 dest->vport == svc->port))) {
709 /* HIT */
710 return dest;
711 }
712
713 /*
714 * Try to purge the destination from trash if not referenced
715 */
716 if (atomic_read(&dest->refcnt) == 1) {
Julius Volz7937df12008-09-02 15:55:48 +0200717 IP_VS_DBG_BUF(3, "Removing destination %u/%s:%u "
718 "from trash\n",
719 dest->vfwmark,
720 IP_VS_DBG_ADDR(svc->af, &dest->addr),
721 ntohs(dest->port));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700722 list_del(&dest->n_list);
723 ip_vs_dst_reset(dest);
724 __ip_vs_unbind_svc(dest);
725 kfree(dest);
726 }
727 }
728
729 return NULL;
730}
731
732
733/*
734 * Clean up all the destinations in the trash
735 * Called by the ip_vs_control_cleanup()
736 *
737 * When the ip_vs_control_clearup is activated by ipvs module exit,
738 * the service tables must have been flushed and all the connections
739 * are expired, and the refcnt of each destination in the trash must
740 * be 1, so we simply release them here.
741 */
742static void ip_vs_trash_cleanup(void)
743{
744 struct ip_vs_dest *dest, *nxt;
745
746 list_for_each_entry_safe(dest, nxt, &ip_vs_dest_trash, n_list) {
747 list_del(&dest->n_list);
748 ip_vs_dst_reset(dest);
749 __ip_vs_unbind_svc(dest);
750 kfree(dest);
751 }
752}
753
754
755static void
756ip_vs_zero_stats(struct ip_vs_stats *stats)
757{
758 spin_lock_bh(&stats->lock);
Simon Hormane93615d2008-08-11 17:19:14 +1000759
Sven Wegenere9c0ce22008-09-08 13:39:04 +0200760 memset(&stats->ustats, 0, sizeof(stats->ustats));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700761 ip_vs_zero_estimator(stats);
Simon Hormane93615d2008-08-11 17:19:14 +1000762
Sven Wegener3a14a3132008-08-10 18:24:41 +0000763 spin_unlock_bh(&stats->lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700764}
765
766/*
767 * Update a destination in the given service
768 */
769static void
Julian Anastasov26c15cf2010-09-21 18:12:30 +0200770__ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest,
771 struct ip_vs_dest_user_kern *udest, int add)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700772{
Hans Schillstromfc723252011-01-03 14:44:43 +0100773 struct netns_ipvs *ipvs = net_ipvs(svc->net);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700774 int conn_flags;
775
776 /* set the weight and the flags */
777 atomic_set(&dest->weight, udest->weight);
Julian Anastasov35757922010-09-17 14:18:16 +0200778 conn_flags = udest->conn_flags & IP_VS_CONN_F_DEST_MASK;
779 conn_flags |= IP_VS_CONN_F_INACTIVE;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700780
Linus Torvalds1da177e2005-04-16 15:20:36 -0700781 /* set the IP_VS_CONN_F_NOOUTPUT flag if not masquerading/NAT */
Julian Anastasov35757922010-09-17 14:18:16 +0200782 if ((conn_flags & IP_VS_CONN_F_FWD_MASK) != IP_VS_CONN_F_MASQ) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700783 conn_flags |= IP_VS_CONN_F_NOOUTPUT;
784 } else {
785 /*
Hans Schillstromfc723252011-01-03 14:44:43 +0100786 * Put the real service in rs_table if not present.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700787 * For now only for NAT!
788 */
789 write_lock_bh(&__ip_vs_rs_lock);
Hans Schillstromfc723252011-01-03 14:44:43 +0100790 ip_vs_rs_hash(ipvs, dest);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700791 write_unlock_bh(&__ip_vs_rs_lock);
792 }
793 atomic_set(&dest->conn_flags, conn_flags);
794
795 /* bind the service */
796 if (!dest->svc) {
797 __ip_vs_bind_svc(dest, svc);
798 } else {
799 if (dest->svc != svc) {
800 __ip_vs_unbind_svc(dest);
801 ip_vs_zero_stats(&dest->stats);
802 __ip_vs_bind_svc(dest, svc);
803 }
804 }
805
806 /* set the dest status flags */
807 dest->flags |= IP_VS_DEST_F_AVAILABLE;
808
809 if (udest->u_threshold == 0 || udest->u_threshold > dest->u_threshold)
810 dest->flags &= ~IP_VS_DEST_F_OVERLOAD;
811 dest->u_threshold = udest->u_threshold;
812 dest->l_threshold = udest->l_threshold;
Julian Anastasov26c15cf2010-09-21 18:12:30 +0200813
Julian Anastasovfc604762010-10-17 16:38:15 +0300814 spin_lock(&dest->dst_lock);
815 ip_vs_dst_reset(dest);
816 spin_unlock(&dest->dst_lock);
817
Julian Anastasov26c15cf2010-09-21 18:12:30 +0200818 if (add)
Hans Schillstrom29c20262011-01-03 14:44:54 +0100819 ip_vs_new_estimator(svc->net, &dest->stats);
Julian Anastasov26c15cf2010-09-21 18:12:30 +0200820
821 write_lock_bh(&__ip_vs_svc_lock);
822
823 /* Wait until all other svc users go away */
824 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
825
826 if (add) {
827 list_add(&dest->n_list, &svc->destinations);
828 svc->num_dests++;
829 }
830
831 /* call the update_service, because server weight may be changed */
832 if (svc->scheduler->update_service)
833 svc->scheduler->update_service(svc);
834
835 write_unlock_bh(&__ip_vs_svc_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700836}
837
838
839/*
840 * Create a destination for the given service
841 */
842static int
Julius Volzc860c6b2008-09-02 15:55:36 +0200843ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700844 struct ip_vs_dest **dest_p)
845{
846 struct ip_vs_dest *dest;
847 unsigned atype;
848
849 EnterFunction(2);
850
Vince Busam09571c72008-09-02 15:55:52 +0200851#ifdef CONFIG_IP_VS_IPV6
852 if (svc->af == AF_INET6) {
853 atype = ipv6_addr_type(&udest->addr.in6);
Sven Wegener3bfb92f2008-09-05 16:53:49 +0200854 if ((!(atype & IPV6_ADDR_UNICAST) ||
855 atype & IPV6_ADDR_LINKLOCAL) &&
Vince Busam09571c72008-09-02 15:55:52 +0200856 !__ip_vs_addr_is_local_v6(&udest->addr.in6))
857 return -EINVAL;
858 } else
859#endif
860 {
861 atype = inet_addr_type(&init_net, udest->addr.ip);
862 if (atype != RTN_LOCAL && atype != RTN_UNICAST)
863 return -EINVAL;
864 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700865
Simon Hormandee06e42010-08-26 02:54:31 +0000866 dest = kzalloc(sizeof(struct ip_vs_dest), GFP_KERNEL);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700867 if (dest == NULL) {
Hannes Eder1e3e2382009-08-02 11:05:41 +0000868 pr_err("%s(): no memory.\n", __func__);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700869 return -ENOMEM;
870 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700871
Julius Volzc860c6b2008-09-02 15:55:36 +0200872 dest->af = svc->af;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700873 dest->protocol = svc->protocol;
Julius Volzc860c6b2008-09-02 15:55:36 +0200874 dest->vaddr = svc->addr;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700875 dest->vport = svc->port;
876 dest->vfwmark = svc->fwmark;
Julius Volzc860c6b2008-09-02 15:55:36 +0200877 ip_vs_addr_copy(svc->af, &dest->addr, &udest->addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700878 dest->port = udest->port;
879
880 atomic_set(&dest->activeconns, 0);
881 atomic_set(&dest->inactconns, 0);
882 atomic_set(&dest->persistconns, 0);
Julian Anastasov26c15cf2010-09-21 18:12:30 +0200883 atomic_set(&dest->refcnt, 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700884
885 INIT_LIST_HEAD(&dest->d_list);
886 spin_lock_init(&dest->dst_lock);
887 spin_lock_init(&dest->stats.lock);
Julian Anastasov26c15cf2010-09-21 18:12:30 +0200888 __ip_vs_update_dest(svc, dest, udest, 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700889
890 *dest_p = dest;
891
892 LeaveFunction(2);
893 return 0;
894}
895
896
897/*
898 * Add a destination into an existing service
899 */
900static int
Julius Volzc860c6b2008-09-02 15:55:36 +0200901ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700902{
903 struct ip_vs_dest *dest;
Julius Volzc860c6b2008-09-02 15:55:36 +0200904 union nf_inet_addr daddr;
Al Viro014d7302006-09-28 14:29:52 -0700905 __be16 dport = udest->port;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700906 int ret;
907
908 EnterFunction(2);
909
910 if (udest->weight < 0) {
Hannes Eder1e3e2382009-08-02 11:05:41 +0000911 pr_err("%s(): server weight less than zero\n", __func__);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700912 return -ERANGE;
913 }
914
915 if (udest->l_threshold > udest->u_threshold) {
Hannes Eder1e3e2382009-08-02 11:05:41 +0000916 pr_err("%s(): lower threshold is higher than upper threshold\n",
917 __func__);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700918 return -ERANGE;
919 }
920
Julius Volzc860c6b2008-09-02 15:55:36 +0200921 ip_vs_addr_copy(svc->af, &daddr, &udest->addr);
922
Linus Torvalds1da177e2005-04-16 15:20:36 -0700923 /*
924 * Check if the dest already exists in the list
925 */
Julius Volz7937df12008-09-02 15:55:48 +0200926 dest = ip_vs_lookup_dest(svc, &daddr, dport);
927
Linus Torvalds1da177e2005-04-16 15:20:36 -0700928 if (dest != NULL) {
Hannes Eder1e3e2382009-08-02 11:05:41 +0000929 IP_VS_DBG(1, "%s(): dest already exists\n", __func__);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700930 return -EEXIST;
931 }
932
933 /*
934 * Check if the dest already exists in the trash and
935 * is from the same service
936 */
Julius Volz7937df12008-09-02 15:55:48 +0200937 dest = ip_vs_trash_get_dest(svc, &daddr, dport);
938
Linus Torvalds1da177e2005-04-16 15:20:36 -0700939 if (dest != NULL) {
Julius Volzcfc78c52008-09-02 15:55:53 +0200940 IP_VS_DBG_BUF(3, "Get destination %s:%u from trash, "
941 "dest->refcnt=%d, service %u/%s:%u\n",
942 IP_VS_DBG_ADDR(svc->af, &daddr), ntohs(dport),
943 atomic_read(&dest->refcnt),
944 dest->vfwmark,
945 IP_VS_DBG_ADDR(svc->af, &dest->vaddr),
946 ntohs(dest->vport));
947
Linus Torvalds1da177e2005-04-16 15:20:36 -0700948 /*
949 * Get the destination from the trash
950 */
951 list_del(&dest->n_list);
952
Julian Anastasov26c15cf2010-09-21 18:12:30 +0200953 __ip_vs_update_dest(svc, dest, udest, 1);
954 ret = 0;
955 } else {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700956 /*
Julian Anastasov26c15cf2010-09-21 18:12:30 +0200957 * Allocate and initialize the dest structure
Linus Torvalds1da177e2005-04-16 15:20:36 -0700958 */
Julian Anastasov26c15cf2010-09-21 18:12:30 +0200959 ret = ip_vs_new_dest(svc, udest, &dest);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700960 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700961 LeaveFunction(2);
962
Julian Anastasov26c15cf2010-09-21 18:12:30 +0200963 return ret;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700964}
965
966
967/*
968 * Edit a destination in the given service
969 */
970static int
Julius Volzc860c6b2008-09-02 15:55:36 +0200971ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700972{
973 struct ip_vs_dest *dest;
Julius Volzc860c6b2008-09-02 15:55:36 +0200974 union nf_inet_addr daddr;
Al Viro014d7302006-09-28 14:29:52 -0700975 __be16 dport = udest->port;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700976
977 EnterFunction(2);
978
979 if (udest->weight < 0) {
Hannes Eder1e3e2382009-08-02 11:05:41 +0000980 pr_err("%s(): server weight less than zero\n", __func__);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700981 return -ERANGE;
982 }
983
984 if (udest->l_threshold > udest->u_threshold) {
Hannes Eder1e3e2382009-08-02 11:05:41 +0000985 pr_err("%s(): lower threshold is higher than upper threshold\n",
986 __func__);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700987 return -ERANGE;
988 }
989
Julius Volzc860c6b2008-09-02 15:55:36 +0200990 ip_vs_addr_copy(svc->af, &daddr, &udest->addr);
991
Linus Torvalds1da177e2005-04-16 15:20:36 -0700992 /*
993 * Lookup the destination list
994 */
Julius Volz7937df12008-09-02 15:55:48 +0200995 dest = ip_vs_lookup_dest(svc, &daddr, dport);
996
Linus Torvalds1da177e2005-04-16 15:20:36 -0700997 if (dest == NULL) {
Hannes Eder1e3e2382009-08-02 11:05:41 +0000998 IP_VS_DBG(1, "%s(): dest doesn't exist\n", __func__);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700999 return -ENOENT;
1000 }
1001
Julian Anastasov26c15cf2010-09-21 18:12:30 +02001002 __ip_vs_update_dest(svc, dest, udest, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001003 LeaveFunction(2);
1004
1005 return 0;
1006}
1007
1008
1009/*
1010 * Delete a destination (must be already unlinked from the service)
1011 */
Hans Schillstrom29c20262011-01-03 14:44:54 +01001012static void __ip_vs_del_dest(struct net *net, struct ip_vs_dest *dest)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001013{
Hans Schillstrom29c20262011-01-03 14:44:54 +01001014 ip_vs_kill_estimator(net, &dest->stats);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001015
1016 /*
1017 * Remove it from the d-linked list with the real services.
1018 */
1019 write_lock_bh(&__ip_vs_rs_lock);
1020 ip_vs_rs_unhash(dest);
1021 write_unlock_bh(&__ip_vs_rs_lock);
1022
1023 /*
1024 * Decrease the refcnt of the dest, and free the dest
1025 * if nobody refers to it (refcnt=0). Otherwise, throw
1026 * the destination into the trash.
1027 */
1028 if (atomic_dec_and_test(&dest->refcnt)) {
Julian Anastasov26c15cf2010-09-21 18:12:30 +02001029 IP_VS_DBG_BUF(3, "Removing destination %u/%s:%u\n",
1030 dest->vfwmark,
1031 IP_VS_DBG_ADDR(dest->af, &dest->addr),
1032 ntohs(dest->port));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001033 ip_vs_dst_reset(dest);
1034 /* simply decrease svc->refcnt here, let the caller check
1035 and release the service if nobody refers to it.
1036 Only user context can release destination and service,
1037 and only one user context can update virtual service at a
1038 time, so the operation here is OK */
1039 atomic_dec(&dest->svc->refcnt);
1040 kfree(dest);
1041 } else {
Julius Volzcfc78c52008-09-02 15:55:53 +02001042 IP_VS_DBG_BUF(3, "Moving dest %s:%u into trash, "
1043 "dest->refcnt=%d\n",
1044 IP_VS_DBG_ADDR(dest->af, &dest->addr),
1045 ntohs(dest->port),
1046 atomic_read(&dest->refcnt));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001047 list_add(&dest->n_list, &ip_vs_dest_trash);
1048 atomic_inc(&dest->refcnt);
1049 }
1050}
1051
1052
1053/*
1054 * Unlink a destination from the given service
1055 */
1056static void __ip_vs_unlink_dest(struct ip_vs_service *svc,
1057 struct ip_vs_dest *dest,
1058 int svcupd)
1059{
1060 dest->flags &= ~IP_VS_DEST_F_AVAILABLE;
1061
1062 /*
1063 * Remove it from the d-linked destination list.
1064 */
1065 list_del(&dest->n_list);
1066 svc->num_dests--;
Sven Wegener82dfb6f2008-08-11 19:36:06 +00001067
1068 /*
1069 * Call the update_service function of its scheduler
1070 */
1071 if (svcupd && svc->scheduler->update_service)
1072 svc->scheduler->update_service(svc);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001073}
1074
1075
1076/*
1077 * Delete a destination server in the given service
1078 */
1079static int
Julius Volzc860c6b2008-09-02 15:55:36 +02001080ip_vs_del_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001081{
1082 struct ip_vs_dest *dest;
Hans Schillstrom29c20262011-01-03 14:44:54 +01001083 struct net *net = svc->net;
Al Viro014d7302006-09-28 14:29:52 -07001084 __be16 dport = udest->port;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001085
1086 EnterFunction(2);
1087
Julius Volz7937df12008-09-02 15:55:48 +02001088 dest = ip_vs_lookup_dest(svc, &udest->addr, dport);
Julius Volzc860c6b2008-09-02 15:55:36 +02001089
Linus Torvalds1da177e2005-04-16 15:20:36 -07001090 if (dest == NULL) {
Hannes Eder1e3e2382009-08-02 11:05:41 +00001091 IP_VS_DBG(1, "%s(): destination not found!\n", __func__);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001092 return -ENOENT;
1093 }
1094
1095 write_lock_bh(&__ip_vs_svc_lock);
1096
1097 /*
1098 * Wait until all other svc users go away.
1099 */
Julian Anastasov26c15cf2010-09-21 18:12:30 +02001100 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001101
1102 /*
1103 * Unlink dest from the service
1104 */
1105 __ip_vs_unlink_dest(svc, dest, 1);
1106
1107 write_unlock_bh(&__ip_vs_svc_lock);
1108
1109 /*
1110 * Delete the destination
1111 */
Hans Schillstrom29c20262011-01-03 14:44:54 +01001112 __ip_vs_del_dest(net, dest);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001113
1114 LeaveFunction(2);
1115
1116 return 0;
1117}
1118
1119
1120/*
1121 * Add a service into the service hash table
1122 */
1123static int
Hans Schillstromfc723252011-01-03 14:44:43 +01001124ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u,
Julius Volzc860c6b2008-09-02 15:55:36 +02001125 struct ip_vs_service **svc_p)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001126{
1127 int ret = 0;
1128 struct ip_vs_scheduler *sched = NULL;
Simon Horman0d1e71b2010-08-22 21:37:54 +09001129 struct ip_vs_pe *pe = NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001130 struct ip_vs_service *svc = NULL;
1131
1132 /* increase the module use count */
1133 ip_vs_use_count_inc();
1134
1135 /* Lookup the scheduler by 'u->sched_name' */
1136 sched = ip_vs_scheduler_get(u->sched_name);
1137 if (sched == NULL) {
Hannes Eder1e3e2382009-08-02 11:05:41 +00001138 pr_info("Scheduler module ip_vs_%s not found\n", u->sched_name);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001139 ret = -ENOENT;
Simon Horman6e08bfb2010-08-22 21:37:52 +09001140 goto out_err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001141 }
1142
Simon Horman0d1e71b2010-08-22 21:37:54 +09001143 if (u->pe_name && *u->pe_name) {
Simon Hormane9e5eee2010-11-08 20:05:57 +09001144 pe = ip_vs_pe_getbyname(u->pe_name);
Simon Horman0d1e71b2010-08-22 21:37:54 +09001145 if (pe == NULL) {
1146 pr_info("persistence engine module ip_vs_pe_%s "
1147 "not found\n", u->pe_name);
1148 ret = -ENOENT;
1149 goto out_err;
1150 }
1151 }
1152
Julius Volzf94fd042008-09-02 15:55:55 +02001153#ifdef CONFIG_IP_VS_IPV6
Julius Volz48148932008-11-03 17:08:56 -08001154 if (u->af == AF_INET6 && (u->netmask < 1 || u->netmask > 128)) {
1155 ret = -EINVAL;
1156 goto out_err;
Julius Volzf94fd042008-09-02 15:55:55 +02001157 }
1158#endif
1159
Simon Hormandee06e42010-08-26 02:54:31 +00001160 svc = kzalloc(sizeof(struct ip_vs_service), GFP_KERNEL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001161 if (svc == NULL) {
Hannes Eder1e3e2382009-08-02 11:05:41 +00001162 IP_VS_DBG(1, "%s(): no memory\n", __func__);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001163 ret = -ENOMEM;
1164 goto out_err;
1165 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001166
1167 /* I'm the first user of the service */
Julian Anastasov26c15cf2010-09-21 18:12:30 +02001168 atomic_set(&svc->usecnt, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001169 atomic_set(&svc->refcnt, 0);
1170
Julius Volzc860c6b2008-09-02 15:55:36 +02001171 svc->af = u->af;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001172 svc->protocol = u->protocol;
Julius Volzc860c6b2008-09-02 15:55:36 +02001173 ip_vs_addr_copy(svc->af, &svc->addr, &u->addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001174 svc->port = u->port;
1175 svc->fwmark = u->fwmark;
1176 svc->flags = u->flags;
1177 svc->timeout = u->timeout * HZ;
1178 svc->netmask = u->netmask;
Hans Schillstromfc723252011-01-03 14:44:43 +01001179 svc->net = net;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001180
1181 INIT_LIST_HEAD(&svc->destinations);
1182 rwlock_init(&svc->sched_lock);
1183 spin_lock_init(&svc->stats.lock);
1184
1185 /* Bind the scheduler */
1186 ret = ip_vs_bind_scheduler(svc, sched);
1187 if (ret)
1188 goto out_err;
1189 sched = NULL;
1190
Simon Horman0d1e71b2010-08-22 21:37:54 +09001191 /* Bind the ct retriever */
1192 ip_vs_bind_pe(svc, pe);
1193 pe = NULL;
1194
Linus Torvalds1da177e2005-04-16 15:20:36 -07001195 /* Update the virtual service counters */
1196 if (svc->port == FTPPORT)
1197 atomic_inc(&ip_vs_ftpsvc_counter);
1198 else if (svc->port == 0)
1199 atomic_inc(&ip_vs_nullsvc_counter);
1200
Hans Schillstrom29c20262011-01-03 14:44:54 +01001201 ip_vs_new_estimator(net, &svc->stats);
Julius Volzf94fd042008-09-02 15:55:55 +02001202
1203 /* Count only IPv4 services for old get/setsockopt interface */
1204 if (svc->af == AF_INET)
1205 ip_vs_num_services++;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001206
1207 /* Hash the service into the service table */
1208 write_lock_bh(&__ip_vs_svc_lock);
1209 ip_vs_svc_hash(svc);
1210 write_unlock_bh(&__ip_vs_svc_lock);
1211
1212 *svc_p = svc;
1213 return 0;
1214
Simon Horman6e08bfb2010-08-22 21:37:52 +09001215 out_err:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001216 if (svc != NULL) {
Simon Horman2fabf352010-08-22 21:37:52 +09001217 ip_vs_unbind_scheduler(svc);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001218 if (svc->inc) {
1219 local_bh_disable();
1220 ip_vs_app_inc_put(svc->inc);
1221 local_bh_enable();
1222 }
1223 kfree(svc);
1224 }
1225 ip_vs_scheduler_put(sched);
Simon Horman0d1e71b2010-08-22 21:37:54 +09001226 ip_vs_pe_put(pe);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001227
Linus Torvalds1da177e2005-04-16 15:20:36 -07001228 /* decrease the module use count */
1229 ip_vs_use_count_dec();
1230
1231 return ret;
1232}
1233
1234
1235/*
1236 * Edit a service and bind it with a new scheduler
1237 */
1238static int
Julius Volzc860c6b2008-09-02 15:55:36 +02001239ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001240{
1241 struct ip_vs_scheduler *sched, *old_sched;
Simon Horman0d1e71b2010-08-22 21:37:54 +09001242 struct ip_vs_pe *pe = NULL, *old_pe = NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001243 int ret = 0;
1244
1245 /*
1246 * Lookup the scheduler, by 'u->sched_name'
1247 */
1248 sched = ip_vs_scheduler_get(u->sched_name);
1249 if (sched == NULL) {
Hannes Eder1e3e2382009-08-02 11:05:41 +00001250 pr_info("Scheduler module ip_vs_%s not found\n", u->sched_name);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001251 return -ENOENT;
1252 }
1253 old_sched = sched;
1254
Simon Horman0d1e71b2010-08-22 21:37:54 +09001255 if (u->pe_name && *u->pe_name) {
Simon Hormane9e5eee2010-11-08 20:05:57 +09001256 pe = ip_vs_pe_getbyname(u->pe_name);
Simon Horman0d1e71b2010-08-22 21:37:54 +09001257 if (pe == NULL) {
1258 pr_info("persistence engine module ip_vs_pe_%s "
1259 "not found\n", u->pe_name);
1260 ret = -ENOENT;
1261 goto out;
1262 }
1263 old_pe = pe;
1264 }
1265
Julius Volzf94fd042008-09-02 15:55:55 +02001266#ifdef CONFIG_IP_VS_IPV6
Julius Volz48148932008-11-03 17:08:56 -08001267 if (u->af == AF_INET6 && (u->netmask < 1 || u->netmask > 128)) {
1268 ret = -EINVAL;
1269 goto out;
Julius Volzf94fd042008-09-02 15:55:55 +02001270 }
1271#endif
1272
Linus Torvalds1da177e2005-04-16 15:20:36 -07001273 write_lock_bh(&__ip_vs_svc_lock);
1274
1275 /*
1276 * Wait until all other svc users go away.
1277 */
Julian Anastasov26c15cf2010-09-21 18:12:30 +02001278 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001279
1280 /*
1281 * Set the flags and timeout value
1282 */
1283 svc->flags = u->flags | IP_VS_SVC_F_HASHED;
1284 svc->timeout = u->timeout * HZ;
1285 svc->netmask = u->netmask;
1286
1287 old_sched = svc->scheduler;
1288 if (sched != old_sched) {
1289 /*
1290 * Unbind the old scheduler
1291 */
1292 if ((ret = ip_vs_unbind_scheduler(svc))) {
1293 old_sched = sched;
Simon Horman9e691ed2008-09-17 10:10:41 +10001294 goto out_unlock;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001295 }
1296
1297 /*
1298 * Bind the new scheduler
1299 */
1300 if ((ret = ip_vs_bind_scheduler(svc, sched))) {
1301 /*
1302 * If ip_vs_bind_scheduler fails, restore the old
1303 * scheduler.
1304 * The main reason of failure is out of memory.
1305 *
1306 * The question is if the old scheduler can be
1307 * restored all the time. TODO: if it cannot be
1308 * restored some time, we must delete the service,
1309 * otherwise the system may crash.
1310 */
1311 ip_vs_bind_scheduler(svc, old_sched);
1312 old_sched = sched;
Simon Horman9e691ed2008-09-17 10:10:41 +10001313 goto out_unlock;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001314 }
1315 }
1316
Simon Horman0d1e71b2010-08-22 21:37:54 +09001317 old_pe = svc->pe;
1318 if (pe != old_pe) {
1319 ip_vs_unbind_pe(svc);
1320 ip_vs_bind_pe(svc, pe);
1321 }
1322
Simon Horman9e691ed2008-09-17 10:10:41 +10001323 out_unlock:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001324 write_unlock_bh(&__ip_vs_svc_lock);
Simon Horman9e691ed2008-09-17 10:10:41 +10001325 out:
Simon Horman6e08bfb2010-08-22 21:37:52 +09001326 ip_vs_scheduler_put(old_sched);
Simon Horman0d1e71b2010-08-22 21:37:54 +09001327 ip_vs_pe_put(old_pe);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001328 return ret;
1329}
1330
1331
1332/*
1333 * Delete a service from the service list
1334 * - The service must be unlinked, unlocked and not referenced!
1335 * - We are called under _bh lock
1336 */
1337static void __ip_vs_del_service(struct ip_vs_service *svc)
1338{
1339 struct ip_vs_dest *dest, *nxt;
1340 struct ip_vs_scheduler *old_sched;
Simon Horman0d1e71b2010-08-22 21:37:54 +09001341 struct ip_vs_pe *old_pe;
1342
1343 pr_info("%s: enter\n", __func__);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001344
Julius Volzf94fd042008-09-02 15:55:55 +02001345 /* Count only IPv4 services for old get/setsockopt interface */
1346 if (svc->af == AF_INET)
1347 ip_vs_num_services--;
1348
Hans Schillstrom29c20262011-01-03 14:44:54 +01001349 ip_vs_kill_estimator(svc->net, &svc->stats);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001350
1351 /* Unbind scheduler */
1352 old_sched = svc->scheduler;
1353 ip_vs_unbind_scheduler(svc);
Simon Horman6e08bfb2010-08-22 21:37:52 +09001354 ip_vs_scheduler_put(old_sched);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001355
Simon Horman0d1e71b2010-08-22 21:37:54 +09001356 /* Unbind persistence engine */
1357 old_pe = svc->pe;
1358 ip_vs_unbind_pe(svc);
1359 ip_vs_pe_put(old_pe);
1360
Linus Torvalds1da177e2005-04-16 15:20:36 -07001361 /* Unbind app inc */
1362 if (svc->inc) {
1363 ip_vs_app_inc_put(svc->inc);
1364 svc->inc = NULL;
1365 }
1366
1367 /*
1368 * Unlink the whole destination list
1369 */
1370 list_for_each_entry_safe(dest, nxt, &svc->destinations, n_list) {
1371 __ip_vs_unlink_dest(svc, dest, 0);
Hans Schillstrom29c20262011-01-03 14:44:54 +01001372 __ip_vs_del_dest(svc->net, dest);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001373 }
1374
1375 /*
1376 * Update the virtual service counters
1377 */
1378 if (svc->port == FTPPORT)
1379 atomic_dec(&ip_vs_ftpsvc_counter);
1380 else if (svc->port == 0)
1381 atomic_dec(&ip_vs_nullsvc_counter);
1382
1383 /*
1384 * Free the service if nobody refers to it
1385 */
Julian Anastasov26c15cf2010-09-21 18:12:30 +02001386 if (atomic_read(&svc->refcnt) == 0) {
1387 IP_VS_DBG_BUF(3, "Removing service %u/%s:%u usecnt=%d\n",
1388 svc->fwmark,
1389 IP_VS_DBG_ADDR(svc->af, &svc->addr),
1390 ntohs(svc->port), atomic_read(&svc->usecnt));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001391 kfree(svc);
Julian Anastasov26c15cf2010-09-21 18:12:30 +02001392 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001393
1394 /* decrease the module use count */
1395 ip_vs_use_count_dec();
1396}
1397
1398/*
Julian Anastasov26c15cf2010-09-21 18:12:30 +02001399 * Unlink a service from list and try to delete it if its refcnt reached 0
Linus Torvalds1da177e2005-04-16 15:20:36 -07001400 */
Julian Anastasov26c15cf2010-09-21 18:12:30 +02001401static void ip_vs_unlink_service(struct ip_vs_service *svc)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001402{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001403 /*
1404 * Unhash it from the service table
1405 */
1406 write_lock_bh(&__ip_vs_svc_lock);
1407
1408 ip_vs_svc_unhash(svc);
1409
1410 /*
1411 * Wait until all the svc users go away.
1412 */
Julian Anastasov26c15cf2010-09-21 18:12:30 +02001413 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001414
1415 __ip_vs_del_service(svc);
1416
1417 write_unlock_bh(&__ip_vs_svc_lock);
Julian Anastasov26c15cf2010-09-21 18:12:30 +02001418}
1419
1420/*
1421 * Delete a service from the service list
1422 */
1423static int ip_vs_del_service(struct ip_vs_service *svc)
1424{
1425 if (svc == NULL)
1426 return -EEXIST;
1427 ip_vs_unlink_service(svc);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001428
1429 return 0;
1430}
1431
1432
1433/*
1434 * Flush all the virtual services
1435 */
Hans Schillstromfc723252011-01-03 14:44:43 +01001436static int ip_vs_flush(struct net *net)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001437{
1438 int idx;
1439 struct ip_vs_service *svc, *nxt;
1440
1441 /*
Hans Schillstromfc723252011-01-03 14:44:43 +01001442 * Flush the service table hashed by <netns,protocol,addr,port>
Linus Torvalds1da177e2005-04-16 15:20:36 -07001443 */
1444 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
Hans Schillstromfc723252011-01-03 14:44:43 +01001445 list_for_each_entry_safe(svc, nxt, &ip_vs_svc_table[idx],
1446 s_list) {
1447 if (net_eq(svc->net, net))
1448 ip_vs_unlink_service(svc);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001449 }
1450 }
1451
1452 /*
1453 * Flush the service table hashed by fwmark
1454 */
1455 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1456 list_for_each_entry_safe(svc, nxt,
1457 &ip_vs_svc_fwm_table[idx], f_list) {
Hans Schillstromfc723252011-01-03 14:44:43 +01001458 if (net_eq(svc->net, net))
1459 ip_vs_unlink_service(svc);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001460 }
1461 }
1462
1463 return 0;
1464}
1465
1466
1467/*
1468 * Zero counters in a service or all services
1469 */
1470static int ip_vs_zero_service(struct ip_vs_service *svc)
1471{
1472 struct ip_vs_dest *dest;
1473
1474 write_lock_bh(&__ip_vs_svc_lock);
1475 list_for_each_entry(dest, &svc->destinations, n_list) {
1476 ip_vs_zero_stats(&dest->stats);
1477 }
1478 ip_vs_zero_stats(&svc->stats);
1479 write_unlock_bh(&__ip_vs_svc_lock);
1480 return 0;
1481}
1482
Hans Schillstromfc723252011-01-03 14:44:43 +01001483static int ip_vs_zero_all(struct net *net)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001484{
1485 int idx;
1486 struct ip_vs_service *svc;
1487
1488 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1489 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
Hans Schillstromfc723252011-01-03 14:44:43 +01001490 if (net_eq(svc->net, net))
1491 ip_vs_zero_service(svc);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001492 }
1493 }
1494
1495 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1496 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
Hans Schillstromfc723252011-01-03 14:44:43 +01001497 if (net_eq(svc->net, net))
1498 ip_vs_zero_service(svc);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001499 }
1500 }
1501
1502 ip_vs_zero_stats(&ip_vs_stats);
1503 return 0;
1504}
1505
1506
1507static int
Alexey Dobriyan8d65af72009-09-23 15:57:19 -07001508proc_do_defense_mode(ctl_table *table, int write,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001509 void __user *buffer, size_t *lenp, loff_t *ppos)
1510{
Hans Schillstrom93304192011-01-03 14:44:51 +01001511 struct net *net = current->nsproxy->net_ns;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001512 int *valp = table->data;
1513 int val = *valp;
1514 int rc;
1515
Alexey Dobriyan8d65af72009-09-23 15:57:19 -07001516 rc = proc_dointvec(table, write, buffer, lenp, ppos);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001517 if (write && (*valp != val)) {
1518 if ((*valp < 0) || (*valp > 3)) {
1519 /* Restore the correct value */
1520 *valp = val;
1521 } else {
Hans Schillstrom93304192011-01-03 14:44:51 +01001522 update_defense_level(net_ipvs(net));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001523 }
1524 }
1525 return rc;
1526}
1527
1528
1529static int
Alexey Dobriyan8d65af72009-09-23 15:57:19 -07001530proc_do_sync_threshold(ctl_table *table, int write,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001531 void __user *buffer, size_t *lenp, loff_t *ppos)
1532{
1533 int *valp = table->data;
1534 int val[2];
1535 int rc;
1536
1537 /* backup the value first */
1538 memcpy(val, valp, sizeof(val));
1539
Alexey Dobriyan8d65af72009-09-23 15:57:19 -07001540 rc = proc_dointvec(table, write, buffer, lenp, ppos);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001541 if (write && (valp[0] < 0 || valp[1] < 0 || valp[0] >= valp[1])) {
1542 /* Restore the correct value */
1543 memcpy(valp, val, sizeof(val));
1544 }
1545 return rc;
1546}
1547
Hans Schillstromb880c1f2010-11-19 14:25:14 +01001548static int
1549proc_do_sync_mode(ctl_table *table, int write,
1550 void __user *buffer, size_t *lenp, loff_t *ppos)
1551{
1552 int *valp = table->data;
1553 int val = *valp;
1554 int rc;
1555
1556 rc = proc_dointvec(table, write, buffer, lenp, ppos);
1557 if (write && (*valp != val)) {
1558 if ((*valp < 0) || (*valp > 1)) {
1559 /* Restore the correct value */
1560 *valp = val;
1561 } else {
Hans Schillstromf1313152011-01-03 14:44:55 +01001562 struct net *net = current->nsproxy->net_ns;
1563 ip_vs_sync_switch_mode(net, val);
Hans Schillstromb880c1f2010-11-19 14:25:14 +01001564 }
1565 }
1566 return rc;
1567}
Linus Torvalds1da177e2005-04-16 15:20:36 -07001568
1569/*
1570 * IPVS sysctl table (under the /proc/sys/net/ipv4/vs/)
1571 */
1572
1573static struct ctl_table vs_vars[] = {
1574 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001575 .procname = "amemthresh",
1576 .data = &sysctl_ip_vs_amemthresh,
1577 .maxlen = sizeof(int),
1578 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08001579 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001580 },
1581#ifdef CONFIG_IP_VS_DEBUG
1582 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001583 .procname = "debug_level",
1584 .data = &sysctl_ip_vs_debug_level,
1585 .maxlen = sizeof(int),
1586 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08001587 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001588 },
1589#endif
1590 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001591 .procname = "am_droprate",
1592 .data = &sysctl_ip_vs_am_droprate,
1593 .maxlen = sizeof(int),
1594 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08001595 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001596 },
1597 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001598 .procname = "drop_entry",
1599 .data = &sysctl_ip_vs_drop_entry,
1600 .maxlen = sizeof(int),
1601 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08001602 .proc_handler = proc_do_defense_mode,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001603 },
1604 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001605 .procname = "drop_packet",
1606 .data = &sysctl_ip_vs_drop_packet,
1607 .maxlen = sizeof(int),
1608 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08001609 .proc_handler = proc_do_defense_mode,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001610 },
Julian Anastasovf4bc17c2010-09-21 17:35:41 +02001611#ifdef CONFIG_IP_VS_NFCT
1612 {
1613 .procname = "conntrack",
1614 .data = &sysctl_ip_vs_conntrack,
1615 .maxlen = sizeof(int),
1616 .mode = 0644,
1617 .proc_handler = &proc_dointvec,
1618 },
1619#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -07001620 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001621 .procname = "secure_tcp",
1622 .data = &sysctl_ip_vs_secure_tcp,
1623 .maxlen = sizeof(int),
1624 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08001625 .proc_handler = proc_do_defense_mode,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001626 },
Julian Anastasov8a803042010-09-21 17:38:57 +02001627 {
1628 .procname = "snat_reroute",
1629 .data = &sysctl_ip_vs_snat_reroute,
1630 .maxlen = sizeof(int),
1631 .mode = 0644,
1632 .proc_handler = &proc_dointvec,
1633 },
Hans Schillstromb880c1f2010-11-19 14:25:14 +01001634 {
1635 .procname = "sync_version",
1636 .data = &sysctl_ip_vs_sync_ver,
1637 .maxlen = sizeof(int),
1638 .mode = 0644,
1639 .proc_handler = &proc_do_sync_mode,
1640 },
Linus Torvalds1da177e2005-04-16 15:20:36 -07001641#if 0
1642 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001643 .procname = "timeout_established",
1644 .data = &vs_timeout_table_dos.timeout[IP_VS_S_ESTABLISHED],
1645 .maxlen = sizeof(int),
1646 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08001647 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001648 },
1649 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001650 .procname = "timeout_synsent",
1651 .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_SENT],
1652 .maxlen = sizeof(int),
1653 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08001654 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001655 },
1656 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001657 .procname = "timeout_synrecv",
1658 .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_RECV],
1659 .maxlen = sizeof(int),
1660 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08001661 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001662 },
1663 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001664 .procname = "timeout_finwait",
1665 .data = &vs_timeout_table_dos.timeout[IP_VS_S_FIN_WAIT],
1666 .maxlen = sizeof(int),
1667 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08001668 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001669 },
1670 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001671 .procname = "timeout_timewait",
1672 .data = &vs_timeout_table_dos.timeout[IP_VS_S_TIME_WAIT],
1673 .maxlen = sizeof(int),
1674 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08001675 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001676 },
1677 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001678 .procname = "timeout_close",
1679 .data = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE],
1680 .maxlen = sizeof(int),
1681 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08001682 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001683 },
1684 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001685 .procname = "timeout_closewait",
1686 .data = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE_WAIT],
1687 .maxlen = sizeof(int),
1688 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08001689 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001690 },
1691 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001692 .procname = "timeout_lastack",
1693 .data = &vs_timeout_table_dos.timeout[IP_VS_S_LAST_ACK],
1694 .maxlen = sizeof(int),
1695 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08001696 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001697 },
1698 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001699 .procname = "timeout_listen",
1700 .data = &vs_timeout_table_dos.timeout[IP_VS_S_LISTEN],
1701 .maxlen = sizeof(int),
1702 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08001703 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001704 },
1705 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001706 .procname = "timeout_synack",
1707 .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYNACK],
1708 .maxlen = sizeof(int),
1709 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08001710 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001711 },
1712 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001713 .procname = "timeout_udp",
1714 .data = &vs_timeout_table_dos.timeout[IP_VS_S_UDP],
1715 .maxlen = sizeof(int),
1716 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08001717 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001718 },
1719 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001720 .procname = "timeout_icmp",
1721 .data = &vs_timeout_table_dos.timeout[IP_VS_S_ICMP],
1722 .maxlen = sizeof(int),
1723 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08001724 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001725 },
1726#endif
1727 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001728 .procname = "cache_bypass",
1729 .data = &sysctl_ip_vs_cache_bypass,
1730 .maxlen = sizeof(int),
1731 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08001732 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001733 },
1734 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001735 .procname = "expire_nodest_conn",
1736 .data = &sysctl_ip_vs_expire_nodest_conn,
1737 .maxlen = sizeof(int),
1738 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08001739 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001740 },
1741 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001742 .procname = "expire_quiescent_template",
1743 .data = &sysctl_ip_vs_expire_quiescent_template,
1744 .maxlen = sizeof(int),
1745 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08001746 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001747 },
1748 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001749 .procname = "sync_threshold",
1750 .data = &sysctl_ip_vs_sync_threshold,
1751 .maxlen = sizeof(sysctl_ip_vs_sync_threshold),
1752 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08001753 .proc_handler = proc_do_sync_threshold,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001754 },
1755 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001756 .procname = "nat_icmp_send",
1757 .data = &sysctl_ip_vs_nat_icmp_send,
1758 .maxlen = sizeof(int),
1759 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08001760 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001761 },
Eric W. Biedermanf8572d82009-11-05 13:32:03 -08001762 { }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001763};
1764
Sven Wegener5587da52008-08-10 18:24:40 +00001765const struct ctl_path net_vs_ctl_path[] = {
Eric W. Biedermanf8572d82009-11-05 13:32:03 -08001766 { .procname = "net", },
1767 { .procname = "ipv4", },
Pavel Emelyanov90754f82008-01-12 02:33:50 -08001768 { .procname = "vs", },
1769 { }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001770};
Pavel Emelyanov90754f82008-01-12 02:33:50 -08001771EXPORT_SYMBOL_GPL(net_vs_ctl_path);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001772
1773static struct ctl_table_header * sysctl_header;
1774
1775#ifdef CONFIG_PROC_FS
1776
1777struct ip_vs_iter {
Hans Schillstromfc723252011-01-03 14:44:43 +01001778 struct seq_net_private p; /* Do not move this, netns depends upon it*/
Linus Torvalds1da177e2005-04-16 15:20:36 -07001779 struct list_head *table;
1780 int bucket;
1781};
1782
1783/*
1784 * Write the contents of the VS rule table to a PROCfs file.
1785 * (It is kept just for backward compatibility)
1786 */
1787static inline const char *ip_vs_fwd_name(unsigned flags)
1788{
1789 switch (flags & IP_VS_CONN_F_FWD_MASK) {
1790 case IP_VS_CONN_F_LOCALNODE:
1791 return "Local";
1792 case IP_VS_CONN_F_TUNNEL:
1793 return "Tunnel";
1794 case IP_VS_CONN_F_DROUTE:
1795 return "Route";
1796 default:
1797 return "Masq";
1798 }
1799}
1800
1801
1802/* Get the Nth entry in the two lists */
1803static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos)
1804{
Hans Schillstromfc723252011-01-03 14:44:43 +01001805 struct net *net = seq_file_net(seq);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001806 struct ip_vs_iter *iter = seq->private;
1807 int idx;
1808 struct ip_vs_service *svc;
1809
1810 /* look in hash by protocol */
1811 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1812 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
Hans Schillstromfc723252011-01-03 14:44:43 +01001813 if (net_eq(svc->net, net) && pos-- == 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001814 iter->table = ip_vs_svc_table;
1815 iter->bucket = idx;
1816 return svc;
1817 }
1818 }
1819 }
1820
1821 /* keep looking in fwmark */
1822 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1823 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
Hans Schillstromfc723252011-01-03 14:44:43 +01001824 if (net_eq(svc->net, net) && pos-- == 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001825 iter->table = ip_vs_svc_fwm_table;
1826 iter->bucket = idx;
1827 return svc;
1828 }
1829 }
1830 }
1831
1832 return NULL;
1833}
1834
1835static void *ip_vs_info_seq_start(struct seq_file *seq, loff_t *pos)
Simon Horman563e94f2008-09-17 10:10:42 +10001836__acquires(__ip_vs_svc_lock)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001837{
1838
1839 read_lock_bh(&__ip_vs_svc_lock);
1840 return *pos ? ip_vs_info_array(seq, *pos - 1) : SEQ_START_TOKEN;
1841}
1842
1843
1844static void *ip_vs_info_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1845{
1846 struct list_head *e;
1847 struct ip_vs_iter *iter;
1848 struct ip_vs_service *svc;
1849
1850 ++*pos;
1851 if (v == SEQ_START_TOKEN)
1852 return ip_vs_info_array(seq,0);
1853
1854 svc = v;
1855 iter = seq->private;
1856
1857 if (iter->table == ip_vs_svc_table) {
1858 /* next service in table hashed by protocol */
1859 if ((e = svc->s_list.next) != &ip_vs_svc_table[iter->bucket])
1860 return list_entry(e, struct ip_vs_service, s_list);
1861
1862
1863 while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
1864 list_for_each_entry(svc,&ip_vs_svc_table[iter->bucket],
1865 s_list) {
1866 return svc;
1867 }
1868 }
1869
1870 iter->table = ip_vs_svc_fwm_table;
1871 iter->bucket = -1;
1872 goto scan_fwmark;
1873 }
1874
1875 /* next service in hashed by fwmark */
1876 if ((e = svc->f_list.next) != &ip_vs_svc_fwm_table[iter->bucket])
1877 return list_entry(e, struct ip_vs_service, f_list);
1878
1879 scan_fwmark:
1880 while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
1881 list_for_each_entry(svc, &ip_vs_svc_fwm_table[iter->bucket],
1882 f_list)
1883 return svc;
1884 }
1885
1886 return NULL;
1887}
1888
1889static void ip_vs_info_seq_stop(struct seq_file *seq, void *v)
Simon Horman563e94f2008-09-17 10:10:42 +10001890__releases(__ip_vs_svc_lock)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001891{
1892 read_unlock_bh(&__ip_vs_svc_lock);
1893}
1894
1895
1896static int ip_vs_info_seq_show(struct seq_file *seq, void *v)
1897{
1898 if (v == SEQ_START_TOKEN) {
1899 seq_printf(seq,
1900 "IP Virtual Server version %d.%d.%d (size=%d)\n",
Catalin(ux) M. BOIE6f7edb42010-01-05 05:50:24 +01001901 NVERSION(IP_VS_VERSION_CODE), ip_vs_conn_tab_size);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001902 seq_puts(seq,
1903 "Prot LocalAddress:Port Scheduler Flags\n");
1904 seq_puts(seq,
1905 " -> RemoteAddress:Port Forward Weight ActiveConn InActConn\n");
1906 } else {
1907 const struct ip_vs_service *svc = v;
1908 const struct ip_vs_iter *iter = seq->private;
1909 const struct ip_vs_dest *dest;
1910
Vince Busam667a5f12008-09-02 15:55:49 +02001911 if (iter->table == ip_vs_svc_table) {
1912#ifdef CONFIG_IP_VS_IPV6
1913 if (svc->af == AF_INET6)
Harvey Harrison5b095d9892008-10-29 12:52:50 -07001914 seq_printf(seq, "%s [%pI6]:%04X %s ",
Vince Busam667a5f12008-09-02 15:55:49 +02001915 ip_vs_proto_name(svc->protocol),
Harvey Harrison38ff4fa2008-10-28 16:08:13 -07001916 &svc->addr.in6,
Vince Busam667a5f12008-09-02 15:55:49 +02001917 ntohs(svc->port),
1918 svc->scheduler->name);
1919 else
1920#endif
Nick Chalk26ec0372010-06-22 08:07:01 +02001921 seq_printf(seq, "%s %08X:%04X %s %s ",
Vince Busam667a5f12008-09-02 15:55:49 +02001922 ip_vs_proto_name(svc->protocol),
1923 ntohl(svc->addr.ip),
1924 ntohs(svc->port),
Nick Chalk26ec0372010-06-22 08:07:01 +02001925 svc->scheduler->name,
1926 (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":"");
Vince Busam667a5f12008-09-02 15:55:49 +02001927 } else {
Nick Chalk26ec0372010-06-22 08:07:01 +02001928 seq_printf(seq, "FWM %08X %s %s",
1929 svc->fwmark, svc->scheduler->name,
1930 (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":"");
Vince Busam667a5f12008-09-02 15:55:49 +02001931 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001932
1933 if (svc->flags & IP_VS_SVC_F_PERSISTENT)
1934 seq_printf(seq, "persistent %d %08X\n",
1935 svc->timeout,
1936 ntohl(svc->netmask));
1937 else
1938 seq_putc(seq, '\n');
1939
1940 list_for_each_entry(dest, &svc->destinations, n_list) {
Vince Busam667a5f12008-09-02 15:55:49 +02001941#ifdef CONFIG_IP_VS_IPV6
1942 if (dest->af == AF_INET6)
1943 seq_printf(seq,
Harvey Harrison5b095d9892008-10-29 12:52:50 -07001944 " -> [%pI6]:%04X"
Vince Busam667a5f12008-09-02 15:55:49 +02001945 " %-7s %-6d %-10d %-10d\n",
Harvey Harrison38ff4fa2008-10-28 16:08:13 -07001946 &dest->addr.in6,
Vince Busam667a5f12008-09-02 15:55:49 +02001947 ntohs(dest->port),
1948 ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
1949 atomic_read(&dest->weight),
1950 atomic_read(&dest->activeconns),
1951 atomic_read(&dest->inactconns));
1952 else
1953#endif
1954 seq_printf(seq,
1955 " -> %08X:%04X "
1956 "%-7s %-6d %-10d %-10d\n",
1957 ntohl(dest->addr.ip),
1958 ntohs(dest->port),
1959 ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
1960 atomic_read(&dest->weight),
1961 atomic_read(&dest->activeconns),
1962 atomic_read(&dest->inactconns));
1963
Linus Torvalds1da177e2005-04-16 15:20:36 -07001964 }
1965 }
1966 return 0;
1967}
1968
Philippe De Muyter56b3d972007-07-10 23:07:31 -07001969static const struct seq_operations ip_vs_info_seq_ops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001970 .start = ip_vs_info_seq_start,
1971 .next = ip_vs_info_seq_next,
1972 .stop = ip_vs_info_seq_stop,
1973 .show = ip_vs_info_seq_show,
1974};
1975
1976static int ip_vs_info_open(struct inode *inode, struct file *file)
1977{
Hans Schillstromfc723252011-01-03 14:44:43 +01001978 return seq_open_net(inode, file, &ip_vs_info_seq_ops,
Pavel Emelyanovcf7732e2007-10-10 02:29:29 -07001979 sizeof(struct ip_vs_iter));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001980}
1981
Arjan van de Ven9a321442007-02-12 00:55:35 -08001982static const struct file_operations ip_vs_info_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001983 .owner = THIS_MODULE,
1984 .open = ip_vs_info_open,
1985 .read = seq_read,
1986 .llseek = seq_lseek,
1987 .release = seq_release_private,
1988};
1989
1990#endif
1991
Sven Wegener519e49e2008-08-10 18:24:41 +00001992struct ip_vs_stats ip_vs_stats = {
1993 .lock = __SPIN_LOCK_UNLOCKED(ip_vs_stats.lock),
1994};
Linus Torvalds1da177e2005-04-16 15:20:36 -07001995
1996#ifdef CONFIG_PROC_FS
1997static int ip_vs_stats_show(struct seq_file *seq, void *v)
1998{
1999
2000/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
2001 seq_puts(seq,
2002 " Total Incoming Outgoing Incoming Outgoing\n");
2003 seq_printf(seq,
2004 " Conns Packets Packets Bytes Bytes\n");
2005
2006 spin_lock_bh(&ip_vs_stats.lock);
Sven Wegenere9c0ce22008-09-08 13:39:04 +02002007 seq_printf(seq, "%8X %8X %8X %16LX %16LX\n\n", ip_vs_stats.ustats.conns,
2008 ip_vs_stats.ustats.inpkts, ip_vs_stats.ustats.outpkts,
2009 (unsigned long long) ip_vs_stats.ustats.inbytes,
2010 (unsigned long long) ip_vs_stats.ustats.outbytes);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002011
2012/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
2013 seq_puts(seq,
2014 " Conns/s Pkts/s Pkts/s Bytes/s Bytes/s\n");
2015 seq_printf(seq,"%8X %8X %8X %16X %16X\n",
Sven Wegenere9c0ce22008-09-08 13:39:04 +02002016 ip_vs_stats.ustats.cps,
2017 ip_vs_stats.ustats.inpps,
2018 ip_vs_stats.ustats.outpps,
2019 ip_vs_stats.ustats.inbps,
2020 ip_vs_stats.ustats.outbps);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002021 spin_unlock_bh(&ip_vs_stats.lock);
2022
2023 return 0;
2024}
2025
2026static int ip_vs_stats_seq_open(struct inode *inode, struct file *file)
2027{
Hans Schillstromfc723252011-01-03 14:44:43 +01002028 return single_open_net(inode, file, ip_vs_stats_show);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002029}
2030
Arjan van de Ven9a321442007-02-12 00:55:35 -08002031static const struct file_operations ip_vs_stats_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002032 .owner = THIS_MODULE,
2033 .open = ip_vs_stats_seq_open,
2034 .read = seq_read,
2035 .llseek = seq_lseek,
2036 .release = single_release,
2037};
2038
2039#endif
2040
2041/*
2042 * Set timeout values for tcp tcpfin udp in the timeout_table.
2043 */
Hans Schillstrom93304192011-01-03 14:44:51 +01002044static int ip_vs_set_timeout(struct net *net, struct ip_vs_timeout_user *u)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002045{
Hans Schillstrom93304192011-01-03 14:44:51 +01002046 struct ip_vs_proto_data *pd;
2047
Linus Torvalds1da177e2005-04-16 15:20:36 -07002048 IP_VS_DBG(2, "Setting timeout tcp:%d tcpfin:%d udp:%d\n",
2049 u->tcp_timeout,
2050 u->tcp_fin_timeout,
2051 u->udp_timeout);
2052
2053#ifdef CONFIG_IP_VS_PROTO_TCP
2054 if (u->tcp_timeout) {
Hans Schillstrom93304192011-01-03 14:44:51 +01002055 pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
2056 pd->timeout_table[IP_VS_TCP_S_ESTABLISHED]
Linus Torvalds1da177e2005-04-16 15:20:36 -07002057 = u->tcp_timeout * HZ;
2058 }
2059
2060 if (u->tcp_fin_timeout) {
Hans Schillstrom93304192011-01-03 14:44:51 +01002061 pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
2062 pd->timeout_table[IP_VS_TCP_S_FIN_WAIT]
Linus Torvalds1da177e2005-04-16 15:20:36 -07002063 = u->tcp_fin_timeout * HZ;
2064 }
2065#endif
2066
2067#ifdef CONFIG_IP_VS_PROTO_UDP
2068 if (u->udp_timeout) {
Hans Schillstrom93304192011-01-03 14:44:51 +01002069 pd = ip_vs_proto_data_get(net, IPPROTO_UDP);
2070 pd->timeout_table[IP_VS_UDP_S_NORMAL]
Linus Torvalds1da177e2005-04-16 15:20:36 -07002071 = u->udp_timeout * HZ;
2072 }
2073#endif
2074 return 0;
2075}
2076
2077
2078#define SET_CMDID(cmd) (cmd - IP_VS_BASE_CTL)
2079#define SERVICE_ARG_LEN (sizeof(struct ip_vs_service_user))
2080#define SVCDEST_ARG_LEN (sizeof(struct ip_vs_service_user) + \
2081 sizeof(struct ip_vs_dest_user))
2082#define TIMEOUT_ARG_LEN (sizeof(struct ip_vs_timeout_user))
2083#define DAEMON_ARG_LEN (sizeof(struct ip_vs_daemon_user))
2084#define MAX_ARG_LEN SVCDEST_ARG_LEN
2085
Arjan van de Ven9b5b5cf2005-11-29 16:21:38 -08002086static const unsigned char set_arglen[SET_CMDID(IP_VS_SO_SET_MAX)+1] = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002087 [SET_CMDID(IP_VS_SO_SET_ADD)] = SERVICE_ARG_LEN,
2088 [SET_CMDID(IP_VS_SO_SET_EDIT)] = SERVICE_ARG_LEN,
2089 [SET_CMDID(IP_VS_SO_SET_DEL)] = SERVICE_ARG_LEN,
2090 [SET_CMDID(IP_VS_SO_SET_FLUSH)] = 0,
2091 [SET_CMDID(IP_VS_SO_SET_ADDDEST)] = SVCDEST_ARG_LEN,
2092 [SET_CMDID(IP_VS_SO_SET_DELDEST)] = SVCDEST_ARG_LEN,
2093 [SET_CMDID(IP_VS_SO_SET_EDITDEST)] = SVCDEST_ARG_LEN,
2094 [SET_CMDID(IP_VS_SO_SET_TIMEOUT)] = TIMEOUT_ARG_LEN,
2095 [SET_CMDID(IP_VS_SO_SET_STARTDAEMON)] = DAEMON_ARG_LEN,
2096 [SET_CMDID(IP_VS_SO_SET_STOPDAEMON)] = DAEMON_ARG_LEN,
2097 [SET_CMDID(IP_VS_SO_SET_ZERO)] = SERVICE_ARG_LEN,
2098};
2099
Julius Volzc860c6b2008-09-02 15:55:36 +02002100static void ip_vs_copy_usvc_compat(struct ip_vs_service_user_kern *usvc,
2101 struct ip_vs_service_user *usvc_compat)
2102{
Simon Horman0d1e71b2010-08-22 21:37:54 +09002103 memset(usvc, 0, sizeof(*usvc));
2104
Julius Volzc860c6b2008-09-02 15:55:36 +02002105 usvc->af = AF_INET;
2106 usvc->protocol = usvc_compat->protocol;
2107 usvc->addr.ip = usvc_compat->addr;
2108 usvc->port = usvc_compat->port;
2109 usvc->fwmark = usvc_compat->fwmark;
2110
2111 /* Deep copy of sched_name is not needed here */
2112 usvc->sched_name = usvc_compat->sched_name;
2113
2114 usvc->flags = usvc_compat->flags;
2115 usvc->timeout = usvc_compat->timeout;
2116 usvc->netmask = usvc_compat->netmask;
2117}
2118
2119static void ip_vs_copy_udest_compat(struct ip_vs_dest_user_kern *udest,
2120 struct ip_vs_dest_user *udest_compat)
2121{
Simon Horman0d1e71b2010-08-22 21:37:54 +09002122 memset(udest, 0, sizeof(*udest));
2123
Julius Volzc860c6b2008-09-02 15:55:36 +02002124 udest->addr.ip = udest_compat->addr;
2125 udest->port = udest_compat->port;
2126 udest->conn_flags = udest_compat->conn_flags;
2127 udest->weight = udest_compat->weight;
2128 udest->u_threshold = udest_compat->u_threshold;
2129 udest->l_threshold = udest_compat->l_threshold;
2130}
2131
Linus Torvalds1da177e2005-04-16 15:20:36 -07002132static int
2133do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
2134{
Hans Schillstromfc723252011-01-03 14:44:43 +01002135 struct net *net = sock_net(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002136 int ret;
2137 unsigned char arg[MAX_ARG_LEN];
Julius Volzc860c6b2008-09-02 15:55:36 +02002138 struct ip_vs_service_user *usvc_compat;
2139 struct ip_vs_service_user_kern usvc;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002140 struct ip_vs_service *svc;
Julius Volzc860c6b2008-09-02 15:55:36 +02002141 struct ip_vs_dest_user *udest_compat;
2142 struct ip_vs_dest_user_kern udest;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002143
2144 if (!capable(CAP_NET_ADMIN))
2145 return -EPERM;
2146
Arjan van de Ven04bcef22010-01-04 16:37:12 +01002147 if (cmd < IP_VS_BASE_CTL || cmd > IP_VS_SO_SET_MAX)
2148 return -EINVAL;
2149 if (len < 0 || len > MAX_ARG_LEN)
2150 return -EINVAL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002151 if (len != set_arglen[SET_CMDID(cmd)]) {
Hannes Eder1e3e2382009-08-02 11:05:41 +00002152 pr_err("set_ctl: len %u != %u\n",
2153 len, set_arglen[SET_CMDID(cmd)]);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002154 return -EINVAL;
2155 }
2156
2157 if (copy_from_user(arg, user, len) != 0)
2158 return -EFAULT;
2159
2160 /* increase the module use count */
2161 ip_vs_use_count_inc();
2162
Ingo Molnar14cc3e22006-03-26 01:37:14 -08002163 if (mutex_lock_interruptible(&__ip_vs_mutex)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002164 ret = -ERESTARTSYS;
2165 goto out_dec;
2166 }
2167
2168 if (cmd == IP_VS_SO_SET_FLUSH) {
2169 /* Flush the virtual service */
Hans Schillstromfc723252011-01-03 14:44:43 +01002170 ret = ip_vs_flush(net);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002171 goto out_unlock;
2172 } else if (cmd == IP_VS_SO_SET_TIMEOUT) {
2173 /* Set timeout values for (tcp tcpfin udp) */
Hans Schillstrom93304192011-01-03 14:44:51 +01002174 ret = ip_vs_set_timeout(net, (struct ip_vs_timeout_user *)arg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002175 goto out_unlock;
2176 } else if (cmd == IP_VS_SO_SET_STARTDAEMON) {
2177 struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
Hans Schillstromf1313152011-01-03 14:44:55 +01002178 ret = start_sync_thread(net, dm->state, dm->mcast_ifn,
2179 dm->syncid);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002180 goto out_unlock;
2181 } else if (cmd == IP_VS_SO_SET_STOPDAEMON) {
2182 struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
Hans Schillstromf1313152011-01-03 14:44:55 +01002183 ret = stop_sync_thread(net, dm->state);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002184 goto out_unlock;
2185 }
2186
Julius Volzc860c6b2008-09-02 15:55:36 +02002187 usvc_compat = (struct ip_vs_service_user *)arg;
2188 udest_compat = (struct ip_vs_dest_user *)(usvc_compat + 1);
2189
2190 /* We only use the new structs internally, so copy userspace compat
2191 * structs to extended internal versions */
2192 ip_vs_copy_usvc_compat(&usvc, usvc_compat);
2193 ip_vs_copy_udest_compat(&udest, udest_compat);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002194
2195 if (cmd == IP_VS_SO_SET_ZERO) {
2196 /* if no service address is set, zero counters in all */
Julius Volzc860c6b2008-09-02 15:55:36 +02002197 if (!usvc.fwmark && !usvc.addr.ip && !usvc.port) {
Hans Schillstromfc723252011-01-03 14:44:43 +01002198 ret = ip_vs_zero_all(net);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002199 goto out_unlock;
2200 }
2201 }
2202
Venkata Mohan Reddy2906f662010-02-18 12:31:05 +01002203 /* Check for valid protocol: TCP or UDP or SCTP, even for fwmark!=0 */
2204 if (usvc.protocol != IPPROTO_TCP && usvc.protocol != IPPROTO_UDP &&
2205 usvc.protocol != IPPROTO_SCTP) {
Hannes Eder1e3e2382009-08-02 11:05:41 +00002206 pr_err("set_ctl: invalid protocol: %d %pI4:%d %s\n",
2207 usvc.protocol, &usvc.addr.ip,
2208 ntohs(usvc.port), usvc.sched_name);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002209 ret = -EFAULT;
2210 goto out_unlock;
2211 }
2212
2213 /* Lookup the exact service by <protocol, addr, port> or fwmark */
Julius Volzc860c6b2008-09-02 15:55:36 +02002214 if (usvc.fwmark == 0)
Hans Schillstromfc723252011-01-03 14:44:43 +01002215 svc = __ip_vs_service_find(net, usvc.af, usvc.protocol,
Julian Anastasov26c15cf2010-09-21 18:12:30 +02002216 &usvc.addr, usvc.port);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002217 else
Hans Schillstromfc723252011-01-03 14:44:43 +01002218 svc = __ip_vs_svc_fwm_find(net, usvc.af, usvc.fwmark);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002219
2220 if (cmd != IP_VS_SO_SET_ADD
Julius Volzc860c6b2008-09-02 15:55:36 +02002221 && (svc == NULL || svc->protocol != usvc.protocol)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002222 ret = -ESRCH;
Julian Anastasov26c15cf2010-09-21 18:12:30 +02002223 goto out_unlock;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002224 }
2225
2226 switch (cmd) {
2227 case IP_VS_SO_SET_ADD:
2228 if (svc != NULL)
2229 ret = -EEXIST;
2230 else
Hans Schillstromfc723252011-01-03 14:44:43 +01002231 ret = ip_vs_add_service(net, &usvc, &svc);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002232 break;
2233 case IP_VS_SO_SET_EDIT:
Julius Volzc860c6b2008-09-02 15:55:36 +02002234 ret = ip_vs_edit_service(svc, &usvc);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002235 break;
2236 case IP_VS_SO_SET_DEL:
2237 ret = ip_vs_del_service(svc);
2238 if (!ret)
2239 goto out_unlock;
2240 break;
2241 case IP_VS_SO_SET_ZERO:
2242 ret = ip_vs_zero_service(svc);
2243 break;
2244 case IP_VS_SO_SET_ADDDEST:
Julius Volzc860c6b2008-09-02 15:55:36 +02002245 ret = ip_vs_add_dest(svc, &udest);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002246 break;
2247 case IP_VS_SO_SET_EDITDEST:
Julius Volzc860c6b2008-09-02 15:55:36 +02002248 ret = ip_vs_edit_dest(svc, &udest);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002249 break;
2250 case IP_VS_SO_SET_DELDEST:
Julius Volzc860c6b2008-09-02 15:55:36 +02002251 ret = ip_vs_del_dest(svc, &udest);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002252 break;
2253 default:
2254 ret = -EINVAL;
2255 }
2256
Linus Torvalds1da177e2005-04-16 15:20:36 -07002257 out_unlock:
Ingo Molnar14cc3e22006-03-26 01:37:14 -08002258 mutex_unlock(&__ip_vs_mutex);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002259 out_dec:
2260 /* decrease the module use count */
2261 ip_vs_use_count_dec();
2262
2263 return ret;
2264}
2265
2266
2267static void
2268ip_vs_copy_stats(struct ip_vs_stats_user *dst, struct ip_vs_stats *src)
2269{
2270 spin_lock_bh(&src->lock);
Sven Wegenere9c0ce22008-09-08 13:39:04 +02002271 memcpy(dst, &src->ustats, sizeof(*dst));
Linus Torvalds1da177e2005-04-16 15:20:36 -07002272 spin_unlock_bh(&src->lock);
2273}
2274
2275static void
2276ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src)
2277{
2278 dst->protocol = src->protocol;
Julius Volze7ade462008-09-02 15:55:33 +02002279 dst->addr = src->addr.ip;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002280 dst->port = src->port;
2281 dst->fwmark = src->fwmark;
pageexec4da62fc2005-06-26 16:00:19 -07002282 strlcpy(dst->sched_name, src->scheduler->name, sizeof(dst->sched_name));
Linus Torvalds1da177e2005-04-16 15:20:36 -07002283 dst->flags = src->flags;
2284 dst->timeout = src->timeout / HZ;
2285 dst->netmask = src->netmask;
2286 dst->num_dests = src->num_dests;
2287 ip_vs_copy_stats(&dst->stats, &src->stats);
2288}
2289
2290static inline int
Hans Schillstromfc723252011-01-03 14:44:43 +01002291__ip_vs_get_service_entries(struct net *net,
2292 const struct ip_vs_get_services *get,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002293 struct ip_vs_get_services __user *uptr)
2294{
2295 int idx, count=0;
2296 struct ip_vs_service *svc;
2297 struct ip_vs_service_entry entry;
2298 int ret = 0;
2299
2300 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2301 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
Julius Volzf94fd042008-09-02 15:55:55 +02002302 /* Only expose IPv4 entries to old interface */
Hans Schillstromfc723252011-01-03 14:44:43 +01002303 if (svc->af != AF_INET || !net_eq(svc->net, net))
Julius Volzf94fd042008-09-02 15:55:55 +02002304 continue;
2305
Linus Torvalds1da177e2005-04-16 15:20:36 -07002306 if (count >= get->num_services)
2307 goto out;
pageexec4da62fc2005-06-26 16:00:19 -07002308 memset(&entry, 0, sizeof(entry));
Linus Torvalds1da177e2005-04-16 15:20:36 -07002309 ip_vs_copy_service(&entry, svc);
2310 if (copy_to_user(&uptr->entrytable[count],
2311 &entry, sizeof(entry))) {
2312 ret = -EFAULT;
2313 goto out;
2314 }
2315 count++;
2316 }
2317 }
2318
2319 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2320 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
Julius Volzf94fd042008-09-02 15:55:55 +02002321 /* Only expose IPv4 entries to old interface */
Hans Schillstromfc723252011-01-03 14:44:43 +01002322 if (svc->af != AF_INET || !net_eq(svc->net, net))
Julius Volzf94fd042008-09-02 15:55:55 +02002323 continue;
2324
Linus Torvalds1da177e2005-04-16 15:20:36 -07002325 if (count >= get->num_services)
2326 goto out;
pageexec4da62fc2005-06-26 16:00:19 -07002327 memset(&entry, 0, sizeof(entry));
Linus Torvalds1da177e2005-04-16 15:20:36 -07002328 ip_vs_copy_service(&entry, svc);
2329 if (copy_to_user(&uptr->entrytable[count],
2330 &entry, sizeof(entry))) {
2331 ret = -EFAULT;
2332 goto out;
2333 }
2334 count++;
2335 }
2336 }
2337 out:
2338 return ret;
2339}
2340
2341static inline int
Hans Schillstromfc723252011-01-03 14:44:43 +01002342__ip_vs_get_dest_entries(struct net *net, const struct ip_vs_get_dests *get,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002343 struct ip_vs_get_dests __user *uptr)
2344{
2345 struct ip_vs_service *svc;
Julius Volzb18610d2008-09-02 15:55:37 +02002346 union nf_inet_addr addr = { .ip = get->addr };
Linus Torvalds1da177e2005-04-16 15:20:36 -07002347 int ret = 0;
2348
2349 if (get->fwmark)
Hans Schillstromfc723252011-01-03 14:44:43 +01002350 svc = __ip_vs_svc_fwm_find(net, AF_INET, get->fwmark);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002351 else
Hans Schillstromfc723252011-01-03 14:44:43 +01002352 svc = __ip_vs_service_find(net, AF_INET, get->protocol, &addr,
Julian Anastasov26c15cf2010-09-21 18:12:30 +02002353 get->port);
Julius Volzb18610d2008-09-02 15:55:37 +02002354
Linus Torvalds1da177e2005-04-16 15:20:36 -07002355 if (svc) {
2356 int count = 0;
2357 struct ip_vs_dest *dest;
2358 struct ip_vs_dest_entry entry;
2359
2360 list_for_each_entry(dest, &svc->destinations, n_list) {
2361 if (count >= get->num_dests)
2362 break;
2363
Julius Volze7ade462008-09-02 15:55:33 +02002364 entry.addr = dest->addr.ip;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002365 entry.port = dest->port;
2366 entry.conn_flags = atomic_read(&dest->conn_flags);
2367 entry.weight = atomic_read(&dest->weight);
2368 entry.u_threshold = dest->u_threshold;
2369 entry.l_threshold = dest->l_threshold;
2370 entry.activeconns = atomic_read(&dest->activeconns);
2371 entry.inactconns = atomic_read(&dest->inactconns);
2372 entry.persistconns = atomic_read(&dest->persistconns);
2373 ip_vs_copy_stats(&entry.stats, &dest->stats);
2374 if (copy_to_user(&uptr->entrytable[count],
2375 &entry, sizeof(entry))) {
2376 ret = -EFAULT;
2377 break;
2378 }
2379 count++;
2380 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002381 } else
2382 ret = -ESRCH;
2383 return ret;
2384}
2385
2386static inline void
Hans Schillstrom93304192011-01-03 14:44:51 +01002387__ip_vs_get_timeouts(struct net *net, struct ip_vs_timeout_user *u)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002388{
Hans Schillstrom93304192011-01-03 14:44:51 +01002389 struct ip_vs_proto_data *pd;
2390
Linus Torvalds1da177e2005-04-16 15:20:36 -07002391#ifdef CONFIG_IP_VS_PROTO_TCP
Hans Schillstrom93304192011-01-03 14:44:51 +01002392 pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
2393 u->tcp_timeout = pd->timeout_table[IP_VS_TCP_S_ESTABLISHED] / HZ;
2394 u->tcp_fin_timeout = pd->timeout_table[IP_VS_TCP_S_FIN_WAIT] / HZ;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002395#endif
2396#ifdef CONFIG_IP_VS_PROTO_UDP
Hans Schillstrom93304192011-01-03 14:44:51 +01002397 pd = ip_vs_proto_data_get(net, IPPROTO_UDP);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002398 u->udp_timeout =
Hans Schillstrom93304192011-01-03 14:44:51 +01002399 pd->timeout_table[IP_VS_UDP_S_NORMAL] / HZ;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002400#endif
2401}
2402
2403
2404#define GET_CMDID(cmd) (cmd - IP_VS_BASE_CTL)
2405#define GET_INFO_ARG_LEN (sizeof(struct ip_vs_getinfo))
2406#define GET_SERVICES_ARG_LEN (sizeof(struct ip_vs_get_services))
2407#define GET_SERVICE_ARG_LEN (sizeof(struct ip_vs_service_entry))
2408#define GET_DESTS_ARG_LEN (sizeof(struct ip_vs_get_dests))
2409#define GET_TIMEOUT_ARG_LEN (sizeof(struct ip_vs_timeout_user))
2410#define GET_DAEMON_ARG_LEN (sizeof(struct ip_vs_daemon_user) * 2)
2411
Arjan van de Ven9b5b5cf2005-11-29 16:21:38 -08002412static const unsigned char get_arglen[GET_CMDID(IP_VS_SO_GET_MAX)+1] = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002413 [GET_CMDID(IP_VS_SO_GET_VERSION)] = 64,
2414 [GET_CMDID(IP_VS_SO_GET_INFO)] = GET_INFO_ARG_LEN,
2415 [GET_CMDID(IP_VS_SO_GET_SERVICES)] = GET_SERVICES_ARG_LEN,
2416 [GET_CMDID(IP_VS_SO_GET_SERVICE)] = GET_SERVICE_ARG_LEN,
2417 [GET_CMDID(IP_VS_SO_GET_DESTS)] = GET_DESTS_ARG_LEN,
2418 [GET_CMDID(IP_VS_SO_GET_TIMEOUT)] = GET_TIMEOUT_ARG_LEN,
2419 [GET_CMDID(IP_VS_SO_GET_DAEMON)] = GET_DAEMON_ARG_LEN,
2420};
2421
2422static int
2423do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
2424{
2425 unsigned char arg[128];
2426 int ret = 0;
Arjan van de Ven04bcef22010-01-04 16:37:12 +01002427 unsigned int copylen;
Hans Schillstromfc723252011-01-03 14:44:43 +01002428 struct net *net = sock_net(sk);
Hans Schillstromf1313152011-01-03 14:44:55 +01002429 struct netns_ipvs *ipvs = net_ipvs(net);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002430
Hans Schillstromfc723252011-01-03 14:44:43 +01002431 BUG_ON(!net);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002432 if (!capable(CAP_NET_ADMIN))
2433 return -EPERM;
2434
Arjan van de Ven04bcef22010-01-04 16:37:12 +01002435 if (cmd < IP_VS_BASE_CTL || cmd > IP_VS_SO_GET_MAX)
2436 return -EINVAL;
2437
Linus Torvalds1da177e2005-04-16 15:20:36 -07002438 if (*len < get_arglen[GET_CMDID(cmd)]) {
Hannes Eder1e3e2382009-08-02 11:05:41 +00002439 pr_err("get_ctl: len %u < %u\n",
2440 *len, get_arglen[GET_CMDID(cmd)]);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002441 return -EINVAL;
2442 }
2443
Arjan van de Ven04bcef22010-01-04 16:37:12 +01002444 copylen = get_arglen[GET_CMDID(cmd)];
2445 if (copylen > 128)
2446 return -EINVAL;
2447
2448 if (copy_from_user(arg, user, copylen) != 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002449 return -EFAULT;
2450
Ingo Molnar14cc3e22006-03-26 01:37:14 -08002451 if (mutex_lock_interruptible(&__ip_vs_mutex))
Linus Torvalds1da177e2005-04-16 15:20:36 -07002452 return -ERESTARTSYS;
2453
2454 switch (cmd) {
2455 case IP_VS_SO_GET_VERSION:
2456 {
2457 char buf[64];
2458
2459 sprintf(buf, "IP Virtual Server version %d.%d.%d (size=%d)",
Catalin(ux) M. BOIE6f7edb42010-01-05 05:50:24 +01002460 NVERSION(IP_VS_VERSION_CODE), ip_vs_conn_tab_size);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002461 if (copy_to_user(user, buf, strlen(buf)+1) != 0) {
2462 ret = -EFAULT;
2463 goto out;
2464 }
2465 *len = strlen(buf)+1;
2466 }
2467 break;
2468
2469 case IP_VS_SO_GET_INFO:
2470 {
2471 struct ip_vs_getinfo info;
2472 info.version = IP_VS_VERSION_CODE;
Catalin(ux) M. BOIE6f7edb42010-01-05 05:50:24 +01002473 info.size = ip_vs_conn_tab_size;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002474 info.num_services = ip_vs_num_services;
2475 if (copy_to_user(user, &info, sizeof(info)) != 0)
2476 ret = -EFAULT;
2477 }
2478 break;
2479
2480 case IP_VS_SO_GET_SERVICES:
2481 {
2482 struct ip_vs_get_services *get;
2483 int size;
2484
2485 get = (struct ip_vs_get_services *)arg;
2486 size = sizeof(*get) +
2487 sizeof(struct ip_vs_service_entry) * get->num_services;
2488 if (*len != size) {
Hannes Eder1e3e2382009-08-02 11:05:41 +00002489 pr_err("length: %u != %u\n", *len, size);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002490 ret = -EINVAL;
2491 goto out;
2492 }
Hans Schillstromfc723252011-01-03 14:44:43 +01002493 ret = __ip_vs_get_service_entries(net, get, user);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002494 }
2495 break;
2496
2497 case IP_VS_SO_GET_SERVICE:
2498 {
2499 struct ip_vs_service_entry *entry;
2500 struct ip_vs_service *svc;
Julius Volzb18610d2008-09-02 15:55:37 +02002501 union nf_inet_addr addr;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002502
2503 entry = (struct ip_vs_service_entry *)arg;
Julius Volzb18610d2008-09-02 15:55:37 +02002504 addr.ip = entry->addr;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002505 if (entry->fwmark)
Hans Schillstromfc723252011-01-03 14:44:43 +01002506 svc = __ip_vs_svc_fwm_find(net, AF_INET, entry->fwmark);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002507 else
Hans Schillstromfc723252011-01-03 14:44:43 +01002508 svc = __ip_vs_service_find(net, AF_INET,
2509 entry->protocol, &addr,
2510 entry->port);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002511 if (svc) {
2512 ip_vs_copy_service(entry, svc);
2513 if (copy_to_user(user, entry, sizeof(*entry)) != 0)
2514 ret = -EFAULT;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002515 } else
2516 ret = -ESRCH;
2517 }
2518 break;
2519
2520 case IP_VS_SO_GET_DESTS:
2521 {
2522 struct ip_vs_get_dests *get;
2523 int size;
2524
2525 get = (struct ip_vs_get_dests *)arg;
2526 size = sizeof(*get) +
2527 sizeof(struct ip_vs_dest_entry) * get->num_dests;
2528 if (*len != size) {
Hannes Eder1e3e2382009-08-02 11:05:41 +00002529 pr_err("length: %u != %u\n", *len, size);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002530 ret = -EINVAL;
2531 goto out;
2532 }
Hans Schillstromfc723252011-01-03 14:44:43 +01002533 ret = __ip_vs_get_dest_entries(net, get, user);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002534 }
2535 break;
2536
2537 case IP_VS_SO_GET_TIMEOUT:
2538 {
2539 struct ip_vs_timeout_user t;
2540
Hans Schillstrom93304192011-01-03 14:44:51 +01002541 __ip_vs_get_timeouts(net, &t);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002542 if (copy_to_user(user, &t, sizeof(t)) != 0)
2543 ret = -EFAULT;
2544 }
2545 break;
2546
2547 case IP_VS_SO_GET_DAEMON:
2548 {
2549 struct ip_vs_daemon_user d[2];
2550
2551 memset(&d, 0, sizeof(d));
Hans Schillstromf1313152011-01-03 14:44:55 +01002552 if (ipvs->sync_state & IP_VS_STATE_MASTER) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002553 d[0].state = IP_VS_STATE_MASTER;
Hans Schillstromf1313152011-01-03 14:44:55 +01002554 strlcpy(d[0].mcast_ifn, ipvs->master_mcast_ifn,
2555 sizeof(d[0].mcast_ifn));
2556 d[0].syncid = ipvs->master_syncid;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002557 }
Hans Schillstromf1313152011-01-03 14:44:55 +01002558 if (ipvs->sync_state & IP_VS_STATE_BACKUP) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002559 d[1].state = IP_VS_STATE_BACKUP;
Hans Schillstromf1313152011-01-03 14:44:55 +01002560 strlcpy(d[1].mcast_ifn, ipvs->backup_mcast_ifn,
2561 sizeof(d[1].mcast_ifn));
2562 d[1].syncid = ipvs->backup_syncid;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002563 }
2564 if (copy_to_user(user, &d, sizeof(d)) != 0)
2565 ret = -EFAULT;
2566 }
2567 break;
2568
2569 default:
2570 ret = -EINVAL;
2571 }
2572
2573 out:
Ingo Molnar14cc3e22006-03-26 01:37:14 -08002574 mutex_unlock(&__ip_vs_mutex);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002575 return ret;
2576}
2577
2578
2579static struct nf_sockopt_ops ip_vs_sockopts = {
2580 .pf = PF_INET,
2581 .set_optmin = IP_VS_BASE_CTL,
2582 .set_optmax = IP_VS_SO_SET_MAX+1,
2583 .set = do_ip_vs_set_ctl,
2584 .get_optmin = IP_VS_BASE_CTL,
2585 .get_optmax = IP_VS_SO_GET_MAX+1,
2586 .get = do_ip_vs_get_ctl,
Neil Horman16fcec32007-09-11 11:28:26 +02002587 .owner = THIS_MODULE,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002588};
2589
Julius Volz9a812192008-08-14 14:08:44 +02002590/*
2591 * Generic Netlink interface
2592 */
2593
2594/* IPVS genetlink family */
2595static struct genl_family ip_vs_genl_family = {
2596 .id = GENL_ID_GENERATE,
2597 .hdrsize = 0,
2598 .name = IPVS_GENL_NAME,
2599 .version = IPVS_GENL_VERSION,
2600 .maxattr = IPVS_CMD_MAX,
2601};
2602
2603/* Policy used for first-level command attributes */
2604static const struct nla_policy ip_vs_cmd_policy[IPVS_CMD_ATTR_MAX + 1] = {
2605 [IPVS_CMD_ATTR_SERVICE] = { .type = NLA_NESTED },
2606 [IPVS_CMD_ATTR_DEST] = { .type = NLA_NESTED },
2607 [IPVS_CMD_ATTR_DAEMON] = { .type = NLA_NESTED },
2608 [IPVS_CMD_ATTR_TIMEOUT_TCP] = { .type = NLA_U32 },
2609 [IPVS_CMD_ATTR_TIMEOUT_TCP_FIN] = { .type = NLA_U32 },
2610 [IPVS_CMD_ATTR_TIMEOUT_UDP] = { .type = NLA_U32 },
2611};
2612
2613/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DAEMON */
2614static const struct nla_policy ip_vs_daemon_policy[IPVS_DAEMON_ATTR_MAX + 1] = {
2615 [IPVS_DAEMON_ATTR_STATE] = { .type = NLA_U32 },
2616 [IPVS_DAEMON_ATTR_MCAST_IFN] = { .type = NLA_NUL_STRING,
2617 .len = IP_VS_IFNAME_MAXLEN },
2618 [IPVS_DAEMON_ATTR_SYNC_ID] = { .type = NLA_U32 },
2619};
2620
2621/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_SERVICE */
2622static const struct nla_policy ip_vs_svc_policy[IPVS_SVC_ATTR_MAX + 1] = {
2623 [IPVS_SVC_ATTR_AF] = { .type = NLA_U16 },
2624 [IPVS_SVC_ATTR_PROTOCOL] = { .type = NLA_U16 },
2625 [IPVS_SVC_ATTR_ADDR] = { .type = NLA_BINARY,
2626 .len = sizeof(union nf_inet_addr) },
2627 [IPVS_SVC_ATTR_PORT] = { .type = NLA_U16 },
2628 [IPVS_SVC_ATTR_FWMARK] = { .type = NLA_U32 },
2629 [IPVS_SVC_ATTR_SCHED_NAME] = { .type = NLA_NUL_STRING,
2630 .len = IP_VS_SCHEDNAME_MAXLEN },
Simon Horman0d1e71b2010-08-22 21:37:54 +09002631 [IPVS_SVC_ATTR_PE_NAME] = { .type = NLA_NUL_STRING,
2632 .len = IP_VS_PENAME_MAXLEN },
Julius Volz9a812192008-08-14 14:08:44 +02002633 [IPVS_SVC_ATTR_FLAGS] = { .type = NLA_BINARY,
2634 .len = sizeof(struct ip_vs_flags) },
2635 [IPVS_SVC_ATTR_TIMEOUT] = { .type = NLA_U32 },
2636 [IPVS_SVC_ATTR_NETMASK] = { .type = NLA_U32 },
2637 [IPVS_SVC_ATTR_STATS] = { .type = NLA_NESTED },
2638};
2639
2640/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DEST */
2641static const struct nla_policy ip_vs_dest_policy[IPVS_DEST_ATTR_MAX + 1] = {
2642 [IPVS_DEST_ATTR_ADDR] = { .type = NLA_BINARY,
2643 .len = sizeof(union nf_inet_addr) },
2644 [IPVS_DEST_ATTR_PORT] = { .type = NLA_U16 },
2645 [IPVS_DEST_ATTR_FWD_METHOD] = { .type = NLA_U32 },
2646 [IPVS_DEST_ATTR_WEIGHT] = { .type = NLA_U32 },
2647 [IPVS_DEST_ATTR_U_THRESH] = { .type = NLA_U32 },
2648 [IPVS_DEST_ATTR_L_THRESH] = { .type = NLA_U32 },
2649 [IPVS_DEST_ATTR_ACTIVE_CONNS] = { .type = NLA_U32 },
2650 [IPVS_DEST_ATTR_INACT_CONNS] = { .type = NLA_U32 },
2651 [IPVS_DEST_ATTR_PERSIST_CONNS] = { .type = NLA_U32 },
2652 [IPVS_DEST_ATTR_STATS] = { .type = NLA_NESTED },
2653};
2654
2655static int ip_vs_genl_fill_stats(struct sk_buff *skb, int container_type,
2656 struct ip_vs_stats *stats)
2657{
2658 struct nlattr *nl_stats = nla_nest_start(skb, container_type);
2659 if (!nl_stats)
2660 return -EMSGSIZE;
2661
2662 spin_lock_bh(&stats->lock);
2663
Sven Wegenere9c0ce22008-09-08 13:39:04 +02002664 NLA_PUT_U32(skb, IPVS_STATS_ATTR_CONNS, stats->ustats.conns);
2665 NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPKTS, stats->ustats.inpkts);
2666 NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPKTS, stats->ustats.outpkts);
2667 NLA_PUT_U64(skb, IPVS_STATS_ATTR_INBYTES, stats->ustats.inbytes);
2668 NLA_PUT_U64(skb, IPVS_STATS_ATTR_OUTBYTES, stats->ustats.outbytes);
2669 NLA_PUT_U32(skb, IPVS_STATS_ATTR_CPS, stats->ustats.cps);
2670 NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPPS, stats->ustats.inpps);
2671 NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPPS, stats->ustats.outpps);
2672 NLA_PUT_U32(skb, IPVS_STATS_ATTR_INBPS, stats->ustats.inbps);
2673 NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTBPS, stats->ustats.outbps);
Julius Volz9a812192008-08-14 14:08:44 +02002674
2675 spin_unlock_bh(&stats->lock);
2676
2677 nla_nest_end(skb, nl_stats);
2678
2679 return 0;
2680
2681nla_put_failure:
2682 spin_unlock_bh(&stats->lock);
2683 nla_nest_cancel(skb, nl_stats);
2684 return -EMSGSIZE;
2685}
2686
2687static int ip_vs_genl_fill_service(struct sk_buff *skb,
2688 struct ip_vs_service *svc)
2689{
2690 struct nlattr *nl_service;
2691 struct ip_vs_flags flags = { .flags = svc->flags,
2692 .mask = ~0 };
2693
2694 nl_service = nla_nest_start(skb, IPVS_CMD_ATTR_SERVICE);
2695 if (!nl_service)
2696 return -EMSGSIZE;
2697
Julius Volzf94fd042008-09-02 15:55:55 +02002698 NLA_PUT_U16(skb, IPVS_SVC_ATTR_AF, svc->af);
Julius Volz9a812192008-08-14 14:08:44 +02002699
2700 if (svc->fwmark) {
2701 NLA_PUT_U32(skb, IPVS_SVC_ATTR_FWMARK, svc->fwmark);
2702 } else {
2703 NLA_PUT_U16(skb, IPVS_SVC_ATTR_PROTOCOL, svc->protocol);
2704 NLA_PUT(skb, IPVS_SVC_ATTR_ADDR, sizeof(svc->addr), &svc->addr);
2705 NLA_PUT_U16(skb, IPVS_SVC_ATTR_PORT, svc->port);
2706 }
2707
2708 NLA_PUT_STRING(skb, IPVS_SVC_ATTR_SCHED_NAME, svc->scheduler->name);
Simon Horman0d1e71b2010-08-22 21:37:54 +09002709 if (svc->pe)
2710 NLA_PUT_STRING(skb, IPVS_SVC_ATTR_PE_NAME, svc->pe->name);
Julius Volz9a812192008-08-14 14:08:44 +02002711 NLA_PUT(skb, IPVS_SVC_ATTR_FLAGS, sizeof(flags), &flags);
2712 NLA_PUT_U32(skb, IPVS_SVC_ATTR_TIMEOUT, svc->timeout / HZ);
2713 NLA_PUT_U32(skb, IPVS_SVC_ATTR_NETMASK, svc->netmask);
2714
2715 if (ip_vs_genl_fill_stats(skb, IPVS_SVC_ATTR_STATS, &svc->stats))
2716 goto nla_put_failure;
2717
2718 nla_nest_end(skb, nl_service);
2719
2720 return 0;
2721
2722nla_put_failure:
2723 nla_nest_cancel(skb, nl_service);
2724 return -EMSGSIZE;
2725}
2726
2727static int ip_vs_genl_dump_service(struct sk_buff *skb,
2728 struct ip_vs_service *svc,
2729 struct netlink_callback *cb)
2730{
2731 void *hdr;
2732
2733 hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
2734 &ip_vs_genl_family, NLM_F_MULTI,
2735 IPVS_CMD_NEW_SERVICE);
2736 if (!hdr)
2737 return -EMSGSIZE;
2738
2739 if (ip_vs_genl_fill_service(skb, svc) < 0)
2740 goto nla_put_failure;
2741
2742 return genlmsg_end(skb, hdr);
2743
2744nla_put_failure:
2745 genlmsg_cancel(skb, hdr);
2746 return -EMSGSIZE;
2747}
2748
2749static int ip_vs_genl_dump_services(struct sk_buff *skb,
2750 struct netlink_callback *cb)
2751{
2752 int idx = 0, i;
2753 int start = cb->args[0];
2754 struct ip_vs_service *svc;
Hans Schillstromfc723252011-01-03 14:44:43 +01002755 struct net *net = skb_sknet(skb);
Julius Volz9a812192008-08-14 14:08:44 +02002756
2757 mutex_lock(&__ip_vs_mutex);
2758 for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
2759 list_for_each_entry(svc, &ip_vs_svc_table[i], s_list) {
Hans Schillstromfc723252011-01-03 14:44:43 +01002760 if (++idx <= start || !net_eq(svc->net, net))
Julius Volz9a812192008-08-14 14:08:44 +02002761 continue;
2762 if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
2763 idx--;
2764 goto nla_put_failure;
2765 }
2766 }
2767 }
2768
2769 for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
2770 list_for_each_entry(svc, &ip_vs_svc_fwm_table[i], f_list) {
Hans Schillstromfc723252011-01-03 14:44:43 +01002771 if (++idx <= start || !net_eq(svc->net, net))
Julius Volz9a812192008-08-14 14:08:44 +02002772 continue;
2773 if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
2774 idx--;
2775 goto nla_put_failure;
2776 }
2777 }
2778 }
2779
2780nla_put_failure:
2781 mutex_unlock(&__ip_vs_mutex);
2782 cb->args[0] = idx;
2783
2784 return skb->len;
2785}
2786
Hans Schillstromfc723252011-01-03 14:44:43 +01002787static int ip_vs_genl_parse_service(struct net *net,
2788 struct ip_vs_service_user_kern *usvc,
Julian Anastasov26c15cf2010-09-21 18:12:30 +02002789 struct nlattr *nla, int full_entry,
2790 struct ip_vs_service **ret_svc)
Julius Volz9a812192008-08-14 14:08:44 +02002791{
2792 struct nlattr *attrs[IPVS_SVC_ATTR_MAX + 1];
2793 struct nlattr *nla_af, *nla_port, *nla_fwmark, *nla_protocol, *nla_addr;
Julian Anastasov26c15cf2010-09-21 18:12:30 +02002794 struct ip_vs_service *svc;
Julius Volz9a812192008-08-14 14:08:44 +02002795
2796 /* Parse mandatory identifying service fields first */
2797 if (nla == NULL ||
2798 nla_parse_nested(attrs, IPVS_SVC_ATTR_MAX, nla, ip_vs_svc_policy))
2799 return -EINVAL;
2800
2801 nla_af = attrs[IPVS_SVC_ATTR_AF];
2802 nla_protocol = attrs[IPVS_SVC_ATTR_PROTOCOL];
2803 nla_addr = attrs[IPVS_SVC_ATTR_ADDR];
2804 nla_port = attrs[IPVS_SVC_ATTR_PORT];
2805 nla_fwmark = attrs[IPVS_SVC_ATTR_FWMARK];
2806
2807 if (!(nla_af && (nla_fwmark || (nla_port && nla_protocol && nla_addr))))
2808 return -EINVAL;
2809
Simon Horman258c8892009-12-15 17:01:25 +01002810 memset(usvc, 0, sizeof(*usvc));
2811
Julius Volzc860c6b2008-09-02 15:55:36 +02002812 usvc->af = nla_get_u16(nla_af);
Julius Volzf94fd042008-09-02 15:55:55 +02002813#ifdef CONFIG_IP_VS_IPV6
2814 if (usvc->af != AF_INET && usvc->af != AF_INET6)
2815#else
2816 if (usvc->af != AF_INET)
2817#endif
Julius Volz9a812192008-08-14 14:08:44 +02002818 return -EAFNOSUPPORT;
2819
2820 if (nla_fwmark) {
2821 usvc->protocol = IPPROTO_TCP;
2822 usvc->fwmark = nla_get_u32(nla_fwmark);
2823 } else {
2824 usvc->protocol = nla_get_u16(nla_protocol);
2825 nla_memcpy(&usvc->addr, nla_addr, sizeof(usvc->addr));
2826 usvc->port = nla_get_u16(nla_port);
2827 usvc->fwmark = 0;
2828 }
2829
Julian Anastasov26c15cf2010-09-21 18:12:30 +02002830 if (usvc->fwmark)
Hans Schillstromfc723252011-01-03 14:44:43 +01002831 svc = __ip_vs_svc_fwm_find(net, usvc->af, usvc->fwmark);
Julian Anastasov26c15cf2010-09-21 18:12:30 +02002832 else
Hans Schillstromfc723252011-01-03 14:44:43 +01002833 svc = __ip_vs_service_find(net, usvc->af, usvc->protocol,
Julian Anastasov26c15cf2010-09-21 18:12:30 +02002834 &usvc->addr, usvc->port);
2835 *ret_svc = svc;
2836
Julius Volz9a812192008-08-14 14:08:44 +02002837 /* If a full entry was requested, check for the additional fields */
2838 if (full_entry) {
Simon Horman0d1e71b2010-08-22 21:37:54 +09002839 struct nlattr *nla_sched, *nla_flags, *nla_pe, *nla_timeout,
Julius Volz9a812192008-08-14 14:08:44 +02002840 *nla_netmask;
2841 struct ip_vs_flags flags;
Julius Volz9a812192008-08-14 14:08:44 +02002842
2843 nla_sched = attrs[IPVS_SVC_ATTR_SCHED_NAME];
Simon Horman0d1e71b2010-08-22 21:37:54 +09002844 nla_pe = attrs[IPVS_SVC_ATTR_PE_NAME];
Julius Volz9a812192008-08-14 14:08:44 +02002845 nla_flags = attrs[IPVS_SVC_ATTR_FLAGS];
2846 nla_timeout = attrs[IPVS_SVC_ATTR_TIMEOUT];
2847 nla_netmask = attrs[IPVS_SVC_ATTR_NETMASK];
2848
2849 if (!(nla_sched && nla_flags && nla_timeout && nla_netmask))
2850 return -EINVAL;
2851
2852 nla_memcpy(&flags, nla_flags, sizeof(flags));
2853
2854 /* prefill flags from service if it already exists */
Julian Anastasov26c15cf2010-09-21 18:12:30 +02002855 if (svc)
Julius Volz9a812192008-08-14 14:08:44 +02002856 usvc->flags = svc->flags;
Julius Volz9a812192008-08-14 14:08:44 +02002857
2858 /* set new flags from userland */
2859 usvc->flags = (usvc->flags & ~flags.mask) |
2860 (flags.flags & flags.mask);
Julius Volzc860c6b2008-09-02 15:55:36 +02002861 usvc->sched_name = nla_data(nla_sched);
Simon Horman0d1e71b2010-08-22 21:37:54 +09002862 usvc->pe_name = nla_pe ? nla_data(nla_pe) : NULL;
Julius Volz9a812192008-08-14 14:08:44 +02002863 usvc->timeout = nla_get_u32(nla_timeout);
2864 usvc->netmask = nla_get_u32(nla_netmask);
2865 }
2866
2867 return 0;
2868}
2869
Hans Schillstromfc723252011-01-03 14:44:43 +01002870static struct ip_vs_service *ip_vs_genl_find_service(struct net *net,
2871 struct nlattr *nla)
Julius Volz9a812192008-08-14 14:08:44 +02002872{
Julius Volzc860c6b2008-09-02 15:55:36 +02002873 struct ip_vs_service_user_kern usvc;
Julian Anastasov26c15cf2010-09-21 18:12:30 +02002874 struct ip_vs_service *svc;
Julius Volz9a812192008-08-14 14:08:44 +02002875 int ret;
2876
Hans Schillstromfc723252011-01-03 14:44:43 +01002877 ret = ip_vs_genl_parse_service(net, &usvc, nla, 0, &svc);
Julian Anastasov26c15cf2010-09-21 18:12:30 +02002878 return ret ? ERR_PTR(ret) : svc;
Julius Volz9a812192008-08-14 14:08:44 +02002879}
2880
2881static int ip_vs_genl_fill_dest(struct sk_buff *skb, struct ip_vs_dest *dest)
2882{
2883 struct nlattr *nl_dest;
2884
2885 nl_dest = nla_nest_start(skb, IPVS_CMD_ATTR_DEST);
2886 if (!nl_dest)
2887 return -EMSGSIZE;
2888
2889 NLA_PUT(skb, IPVS_DEST_ATTR_ADDR, sizeof(dest->addr), &dest->addr);
2890 NLA_PUT_U16(skb, IPVS_DEST_ATTR_PORT, dest->port);
2891
2892 NLA_PUT_U32(skb, IPVS_DEST_ATTR_FWD_METHOD,
2893 atomic_read(&dest->conn_flags) & IP_VS_CONN_F_FWD_MASK);
2894 NLA_PUT_U32(skb, IPVS_DEST_ATTR_WEIGHT, atomic_read(&dest->weight));
2895 NLA_PUT_U32(skb, IPVS_DEST_ATTR_U_THRESH, dest->u_threshold);
2896 NLA_PUT_U32(skb, IPVS_DEST_ATTR_L_THRESH, dest->l_threshold);
2897 NLA_PUT_U32(skb, IPVS_DEST_ATTR_ACTIVE_CONNS,
2898 atomic_read(&dest->activeconns));
2899 NLA_PUT_U32(skb, IPVS_DEST_ATTR_INACT_CONNS,
2900 atomic_read(&dest->inactconns));
2901 NLA_PUT_U32(skb, IPVS_DEST_ATTR_PERSIST_CONNS,
2902 atomic_read(&dest->persistconns));
2903
2904 if (ip_vs_genl_fill_stats(skb, IPVS_DEST_ATTR_STATS, &dest->stats))
2905 goto nla_put_failure;
2906
2907 nla_nest_end(skb, nl_dest);
2908
2909 return 0;
2910
2911nla_put_failure:
2912 nla_nest_cancel(skb, nl_dest);
2913 return -EMSGSIZE;
2914}
2915
2916static int ip_vs_genl_dump_dest(struct sk_buff *skb, struct ip_vs_dest *dest,
2917 struct netlink_callback *cb)
2918{
2919 void *hdr;
2920
2921 hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
2922 &ip_vs_genl_family, NLM_F_MULTI,
2923 IPVS_CMD_NEW_DEST);
2924 if (!hdr)
2925 return -EMSGSIZE;
2926
2927 if (ip_vs_genl_fill_dest(skb, dest) < 0)
2928 goto nla_put_failure;
2929
2930 return genlmsg_end(skb, hdr);
2931
2932nla_put_failure:
2933 genlmsg_cancel(skb, hdr);
2934 return -EMSGSIZE;
2935}
2936
2937static int ip_vs_genl_dump_dests(struct sk_buff *skb,
2938 struct netlink_callback *cb)
2939{
2940 int idx = 0;
2941 int start = cb->args[0];
2942 struct ip_vs_service *svc;
2943 struct ip_vs_dest *dest;
2944 struct nlattr *attrs[IPVS_CMD_ATTR_MAX + 1];
Hans Schillstromfc723252011-01-03 14:44:43 +01002945 struct net *net;
Julius Volz9a812192008-08-14 14:08:44 +02002946
2947 mutex_lock(&__ip_vs_mutex);
2948
2949 /* Try to find the service for which to dump destinations */
2950 if (nlmsg_parse(cb->nlh, GENL_HDRLEN, attrs,
2951 IPVS_CMD_ATTR_MAX, ip_vs_cmd_policy))
2952 goto out_err;
2953
Hans Schillstromfc723252011-01-03 14:44:43 +01002954 net = skb_sknet(skb);
2955 svc = ip_vs_genl_find_service(net, attrs[IPVS_CMD_ATTR_SERVICE]);
Julius Volz9a812192008-08-14 14:08:44 +02002956 if (IS_ERR(svc) || svc == NULL)
2957 goto out_err;
2958
2959 /* Dump the destinations */
2960 list_for_each_entry(dest, &svc->destinations, n_list) {
2961 if (++idx <= start)
2962 continue;
2963 if (ip_vs_genl_dump_dest(skb, dest, cb) < 0) {
2964 idx--;
2965 goto nla_put_failure;
2966 }
2967 }
2968
2969nla_put_failure:
2970 cb->args[0] = idx;
Julius Volz9a812192008-08-14 14:08:44 +02002971
2972out_err:
2973 mutex_unlock(&__ip_vs_mutex);
2974
2975 return skb->len;
2976}
2977
Julius Volzc860c6b2008-09-02 15:55:36 +02002978static int ip_vs_genl_parse_dest(struct ip_vs_dest_user_kern *udest,
Julius Volz9a812192008-08-14 14:08:44 +02002979 struct nlattr *nla, int full_entry)
2980{
2981 struct nlattr *attrs[IPVS_DEST_ATTR_MAX + 1];
2982 struct nlattr *nla_addr, *nla_port;
2983
2984 /* Parse mandatory identifying destination fields first */
2985 if (nla == NULL ||
2986 nla_parse_nested(attrs, IPVS_DEST_ATTR_MAX, nla, ip_vs_dest_policy))
2987 return -EINVAL;
2988
2989 nla_addr = attrs[IPVS_DEST_ATTR_ADDR];
2990 nla_port = attrs[IPVS_DEST_ATTR_PORT];
2991
2992 if (!(nla_addr && nla_port))
2993 return -EINVAL;
2994
Simon Horman258c8892009-12-15 17:01:25 +01002995 memset(udest, 0, sizeof(*udest));
2996
Julius Volz9a812192008-08-14 14:08:44 +02002997 nla_memcpy(&udest->addr, nla_addr, sizeof(udest->addr));
2998 udest->port = nla_get_u16(nla_port);
2999
3000 /* If a full entry was requested, check for the additional fields */
3001 if (full_entry) {
3002 struct nlattr *nla_fwd, *nla_weight, *nla_u_thresh,
3003 *nla_l_thresh;
3004
3005 nla_fwd = attrs[IPVS_DEST_ATTR_FWD_METHOD];
3006 nla_weight = attrs[IPVS_DEST_ATTR_WEIGHT];
3007 nla_u_thresh = attrs[IPVS_DEST_ATTR_U_THRESH];
3008 nla_l_thresh = attrs[IPVS_DEST_ATTR_L_THRESH];
3009
3010 if (!(nla_fwd && nla_weight && nla_u_thresh && nla_l_thresh))
3011 return -EINVAL;
3012
3013 udest->conn_flags = nla_get_u32(nla_fwd)
3014 & IP_VS_CONN_F_FWD_MASK;
3015 udest->weight = nla_get_u32(nla_weight);
3016 udest->u_threshold = nla_get_u32(nla_u_thresh);
3017 udest->l_threshold = nla_get_u32(nla_l_thresh);
3018 }
3019
3020 return 0;
3021}
3022
3023static int ip_vs_genl_fill_daemon(struct sk_buff *skb, __be32 state,
3024 const char *mcast_ifn, __be32 syncid)
3025{
3026 struct nlattr *nl_daemon;
3027
3028 nl_daemon = nla_nest_start(skb, IPVS_CMD_ATTR_DAEMON);
3029 if (!nl_daemon)
3030 return -EMSGSIZE;
3031
3032 NLA_PUT_U32(skb, IPVS_DAEMON_ATTR_STATE, state);
3033 NLA_PUT_STRING(skb, IPVS_DAEMON_ATTR_MCAST_IFN, mcast_ifn);
3034 NLA_PUT_U32(skb, IPVS_DAEMON_ATTR_SYNC_ID, syncid);
3035
3036 nla_nest_end(skb, nl_daemon);
3037
3038 return 0;
3039
3040nla_put_failure:
3041 nla_nest_cancel(skb, nl_daemon);
3042 return -EMSGSIZE;
3043}
3044
3045static int ip_vs_genl_dump_daemon(struct sk_buff *skb, __be32 state,
3046 const char *mcast_ifn, __be32 syncid,
3047 struct netlink_callback *cb)
3048{
3049 void *hdr;
3050 hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
3051 &ip_vs_genl_family, NLM_F_MULTI,
3052 IPVS_CMD_NEW_DAEMON);
3053 if (!hdr)
3054 return -EMSGSIZE;
3055
3056 if (ip_vs_genl_fill_daemon(skb, state, mcast_ifn, syncid))
3057 goto nla_put_failure;
3058
3059 return genlmsg_end(skb, hdr);
3060
3061nla_put_failure:
3062 genlmsg_cancel(skb, hdr);
3063 return -EMSGSIZE;
3064}
3065
3066static int ip_vs_genl_dump_daemons(struct sk_buff *skb,
3067 struct netlink_callback *cb)
3068{
Hans Schillstromf1313152011-01-03 14:44:55 +01003069 struct net *net = skb_net(skb);
3070 struct netns_ipvs *ipvs = net_ipvs(net);
3071
Julius Volz9a812192008-08-14 14:08:44 +02003072 mutex_lock(&__ip_vs_mutex);
Hans Schillstromf1313152011-01-03 14:44:55 +01003073 if ((ipvs->sync_state & IP_VS_STATE_MASTER) && !cb->args[0]) {
Julius Volz9a812192008-08-14 14:08:44 +02003074 if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_MASTER,
Hans Schillstromf1313152011-01-03 14:44:55 +01003075 ipvs->master_mcast_ifn,
3076 ipvs->master_syncid, cb) < 0)
Julius Volz9a812192008-08-14 14:08:44 +02003077 goto nla_put_failure;
3078
3079 cb->args[0] = 1;
3080 }
3081
Hans Schillstromf1313152011-01-03 14:44:55 +01003082 if ((ipvs->sync_state & IP_VS_STATE_BACKUP) && !cb->args[1]) {
Julius Volz9a812192008-08-14 14:08:44 +02003083 if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_BACKUP,
Hans Schillstromf1313152011-01-03 14:44:55 +01003084 ipvs->backup_mcast_ifn,
3085 ipvs->backup_syncid, cb) < 0)
Julius Volz9a812192008-08-14 14:08:44 +02003086 goto nla_put_failure;
3087
3088 cb->args[1] = 1;
3089 }
3090
3091nla_put_failure:
3092 mutex_unlock(&__ip_vs_mutex);
3093
3094 return skb->len;
3095}
3096
Hans Schillstromf1313152011-01-03 14:44:55 +01003097static int ip_vs_genl_new_daemon(struct net *net, struct nlattr **attrs)
Julius Volz9a812192008-08-14 14:08:44 +02003098{
3099 if (!(attrs[IPVS_DAEMON_ATTR_STATE] &&
3100 attrs[IPVS_DAEMON_ATTR_MCAST_IFN] &&
3101 attrs[IPVS_DAEMON_ATTR_SYNC_ID]))
3102 return -EINVAL;
3103
Hans Schillstromf1313152011-01-03 14:44:55 +01003104 return start_sync_thread(net,
3105 nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]),
Julius Volz9a812192008-08-14 14:08:44 +02003106 nla_data(attrs[IPVS_DAEMON_ATTR_MCAST_IFN]),
3107 nla_get_u32(attrs[IPVS_DAEMON_ATTR_SYNC_ID]));
3108}
3109
Hans Schillstromf1313152011-01-03 14:44:55 +01003110static int ip_vs_genl_del_daemon(struct net *net, struct nlattr **attrs)
Julius Volz9a812192008-08-14 14:08:44 +02003111{
3112 if (!attrs[IPVS_DAEMON_ATTR_STATE])
3113 return -EINVAL;
3114
Hans Schillstromf1313152011-01-03 14:44:55 +01003115 return stop_sync_thread(net,
3116 nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]));
Julius Volz9a812192008-08-14 14:08:44 +02003117}
3118
Hans Schillstrom93304192011-01-03 14:44:51 +01003119static int ip_vs_genl_set_config(struct net *net, struct nlattr **attrs)
Julius Volz9a812192008-08-14 14:08:44 +02003120{
3121 struct ip_vs_timeout_user t;
3122
Hans Schillstrom93304192011-01-03 14:44:51 +01003123 __ip_vs_get_timeouts(net, &t);
Julius Volz9a812192008-08-14 14:08:44 +02003124
3125 if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP])
3126 t.tcp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP]);
3127
3128 if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN])
3129 t.tcp_fin_timeout =
3130 nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN]);
3131
3132 if (attrs[IPVS_CMD_ATTR_TIMEOUT_UDP])
3133 t.udp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_UDP]);
3134
Hans Schillstrom93304192011-01-03 14:44:51 +01003135 return ip_vs_set_timeout(net, &t);
Julius Volz9a812192008-08-14 14:08:44 +02003136}
3137
3138static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
3139{
3140 struct ip_vs_service *svc = NULL;
Julius Volzc860c6b2008-09-02 15:55:36 +02003141 struct ip_vs_service_user_kern usvc;
3142 struct ip_vs_dest_user_kern udest;
Julius Volz9a812192008-08-14 14:08:44 +02003143 int ret = 0, cmd;
3144 int need_full_svc = 0, need_full_dest = 0;
Hans Schillstromfc723252011-01-03 14:44:43 +01003145 struct net *net;
Julius Volz9a812192008-08-14 14:08:44 +02003146
Hans Schillstromfc723252011-01-03 14:44:43 +01003147 net = skb_sknet(skb);
Julius Volz9a812192008-08-14 14:08:44 +02003148 cmd = info->genlhdr->cmd;
3149
3150 mutex_lock(&__ip_vs_mutex);
3151
3152 if (cmd == IPVS_CMD_FLUSH) {
Hans Schillstromfc723252011-01-03 14:44:43 +01003153 ret = ip_vs_flush(net);
Julius Volz9a812192008-08-14 14:08:44 +02003154 goto out;
3155 } else if (cmd == IPVS_CMD_SET_CONFIG) {
Hans Schillstrom93304192011-01-03 14:44:51 +01003156 ret = ip_vs_genl_set_config(net, info->attrs);
Julius Volz9a812192008-08-14 14:08:44 +02003157 goto out;
3158 } else if (cmd == IPVS_CMD_NEW_DAEMON ||
3159 cmd == IPVS_CMD_DEL_DAEMON) {
3160
3161 struct nlattr *daemon_attrs[IPVS_DAEMON_ATTR_MAX + 1];
3162
3163 if (!info->attrs[IPVS_CMD_ATTR_DAEMON] ||
3164 nla_parse_nested(daemon_attrs, IPVS_DAEMON_ATTR_MAX,
3165 info->attrs[IPVS_CMD_ATTR_DAEMON],
3166 ip_vs_daemon_policy)) {
3167 ret = -EINVAL;
3168 goto out;
3169 }
3170
3171 if (cmd == IPVS_CMD_NEW_DAEMON)
Hans Schillstromf1313152011-01-03 14:44:55 +01003172 ret = ip_vs_genl_new_daemon(net, daemon_attrs);
Julius Volz9a812192008-08-14 14:08:44 +02003173 else
Hans Schillstromf1313152011-01-03 14:44:55 +01003174 ret = ip_vs_genl_del_daemon(net, daemon_attrs);
Julius Volz9a812192008-08-14 14:08:44 +02003175 goto out;
3176 } else if (cmd == IPVS_CMD_ZERO &&
3177 !info->attrs[IPVS_CMD_ATTR_SERVICE]) {
Hans Schillstromfc723252011-01-03 14:44:43 +01003178 ret = ip_vs_zero_all(net);
Julius Volz9a812192008-08-14 14:08:44 +02003179 goto out;
3180 }
3181
3182 /* All following commands require a service argument, so check if we
3183 * received a valid one. We need a full service specification when
3184 * adding / editing a service. Only identifying members otherwise. */
3185 if (cmd == IPVS_CMD_NEW_SERVICE || cmd == IPVS_CMD_SET_SERVICE)
3186 need_full_svc = 1;
3187
Hans Schillstromfc723252011-01-03 14:44:43 +01003188 ret = ip_vs_genl_parse_service(net, &usvc,
Julius Volz9a812192008-08-14 14:08:44 +02003189 info->attrs[IPVS_CMD_ATTR_SERVICE],
Julian Anastasov26c15cf2010-09-21 18:12:30 +02003190 need_full_svc, &svc);
Julius Volz9a812192008-08-14 14:08:44 +02003191 if (ret)
3192 goto out;
3193
Julius Volz9a812192008-08-14 14:08:44 +02003194 /* Unless we're adding a new service, the service must already exist */
3195 if ((cmd != IPVS_CMD_NEW_SERVICE) && (svc == NULL)) {
3196 ret = -ESRCH;
3197 goto out;
3198 }
3199
3200 /* Destination commands require a valid destination argument. For
3201 * adding / editing a destination, we need a full destination
3202 * specification. */
3203 if (cmd == IPVS_CMD_NEW_DEST || cmd == IPVS_CMD_SET_DEST ||
3204 cmd == IPVS_CMD_DEL_DEST) {
3205 if (cmd != IPVS_CMD_DEL_DEST)
3206 need_full_dest = 1;
3207
3208 ret = ip_vs_genl_parse_dest(&udest,
3209 info->attrs[IPVS_CMD_ATTR_DEST],
3210 need_full_dest);
3211 if (ret)
3212 goto out;
3213 }
3214
3215 switch (cmd) {
3216 case IPVS_CMD_NEW_SERVICE:
3217 if (svc == NULL)
Hans Schillstromfc723252011-01-03 14:44:43 +01003218 ret = ip_vs_add_service(net, &usvc, &svc);
Julius Volz9a812192008-08-14 14:08:44 +02003219 else
3220 ret = -EEXIST;
3221 break;
3222 case IPVS_CMD_SET_SERVICE:
3223 ret = ip_vs_edit_service(svc, &usvc);
3224 break;
3225 case IPVS_CMD_DEL_SERVICE:
3226 ret = ip_vs_del_service(svc);
Julian Anastasov26c15cf2010-09-21 18:12:30 +02003227 /* do not use svc, it can be freed */
Julius Volz9a812192008-08-14 14:08:44 +02003228 break;
3229 case IPVS_CMD_NEW_DEST:
3230 ret = ip_vs_add_dest(svc, &udest);
3231 break;
3232 case IPVS_CMD_SET_DEST:
3233 ret = ip_vs_edit_dest(svc, &udest);
3234 break;
3235 case IPVS_CMD_DEL_DEST:
3236 ret = ip_vs_del_dest(svc, &udest);
3237 break;
3238 case IPVS_CMD_ZERO:
3239 ret = ip_vs_zero_service(svc);
3240 break;
3241 default:
3242 ret = -EINVAL;
3243 }
3244
3245out:
Julius Volz9a812192008-08-14 14:08:44 +02003246 mutex_unlock(&__ip_vs_mutex);
3247
3248 return ret;
3249}
3250
3251static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info)
3252{
3253 struct sk_buff *msg;
3254 void *reply;
3255 int ret, cmd, reply_cmd;
Hans Schillstromfc723252011-01-03 14:44:43 +01003256 struct net *net;
Julius Volz9a812192008-08-14 14:08:44 +02003257
Hans Schillstromfc723252011-01-03 14:44:43 +01003258 net = skb_sknet(skb);
Julius Volz9a812192008-08-14 14:08:44 +02003259 cmd = info->genlhdr->cmd;
3260
3261 if (cmd == IPVS_CMD_GET_SERVICE)
3262 reply_cmd = IPVS_CMD_NEW_SERVICE;
3263 else if (cmd == IPVS_CMD_GET_INFO)
3264 reply_cmd = IPVS_CMD_SET_INFO;
3265 else if (cmd == IPVS_CMD_GET_CONFIG)
3266 reply_cmd = IPVS_CMD_SET_CONFIG;
3267 else {
Hannes Eder1e3e2382009-08-02 11:05:41 +00003268 pr_err("unknown Generic Netlink command\n");
Julius Volz9a812192008-08-14 14:08:44 +02003269 return -EINVAL;
3270 }
3271
3272 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
3273 if (!msg)
3274 return -ENOMEM;
3275
3276 mutex_lock(&__ip_vs_mutex);
3277
3278 reply = genlmsg_put_reply(msg, info, &ip_vs_genl_family, 0, reply_cmd);
3279 if (reply == NULL)
3280 goto nla_put_failure;
3281
3282 switch (cmd) {
3283 case IPVS_CMD_GET_SERVICE:
3284 {
3285 struct ip_vs_service *svc;
3286
Hans Schillstromfc723252011-01-03 14:44:43 +01003287 svc = ip_vs_genl_find_service(net,
3288 info->attrs[IPVS_CMD_ATTR_SERVICE]);
Julius Volz9a812192008-08-14 14:08:44 +02003289 if (IS_ERR(svc)) {
3290 ret = PTR_ERR(svc);
3291 goto out_err;
3292 } else if (svc) {
3293 ret = ip_vs_genl_fill_service(msg, svc);
Julius Volz9a812192008-08-14 14:08:44 +02003294 if (ret)
3295 goto nla_put_failure;
3296 } else {
3297 ret = -ESRCH;
3298 goto out_err;
3299 }
3300
3301 break;
3302 }
3303
3304 case IPVS_CMD_GET_CONFIG:
3305 {
3306 struct ip_vs_timeout_user t;
3307
Hans Schillstrom93304192011-01-03 14:44:51 +01003308 __ip_vs_get_timeouts(net, &t);
Julius Volz9a812192008-08-14 14:08:44 +02003309#ifdef CONFIG_IP_VS_PROTO_TCP
3310 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP, t.tcp_timeout);
3311 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP_FIN,
3312 t.tcp_fin_timeout);
3313#endif
3314#ifdef CONFIG_IP_VS_PROTO_UDP
3315 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_UDP, t.udp_timeout);
3316#endif
3317
3318 break;
3319 }
3320
3321 case IPVS_CMD_GET_INFO:
3322 NLA_PUT_U32(msg, IPVS_INFO_ATTR_VERSION, IP_VS_VERSION_CODE);
3323 NLA_PUT_U32(msg, IPVS_INFO_ATTR_CONN_TAB_SIZE,
Catalin(ux) M. BOIE6f7edb42010-01-05 05:50:24 +01003324 ip_vs_conn_tab_size);
Julius Volz9a812192008-08-14 14:08:44 +02003325 break;
3326 }
3327
3328 genlmsg_end(msg, reply);
Johannes Berg134e6372009-07-10 09:51:34 +00003329 ret = genlmsg_reply(msg, info);
Julius Volz9a812192008-08-14 14:08:44 +02003330 goto out;
3331
3332nla_put_failure:
Hannes Eder1e3e2382009-08-02 11:05:41 +00003333 pr_err("not enough space in Netlink message\n");
Julius Volz9a812192008-08-14 14:08:44 +02003334 ret = -EMSGSIZE;
3335
3336out_err:
3337 nlmsg_free(msg);
3338out:
3339 mutex_unlock(&__ip_vs_mutex);
3340
3341 return ret;
3342}
3343
3344
3345static struct genl_ops ip_vs_genl_ops[] __read_mostly = {
3346 {
3347 .cmd = IPVS_CMD_NEW_SERVICE,
3348 .flags = GENL_ADMIN_PERM,
3349 .policy = ip_vs_cmd_policy,
3350 .doit = ip_vs_genl_set_cmd,
3351 },
3352 {
3353 .cmd = IPVS_CMD_SET_SERVICE,
3354 .flags = GENL_ADMIN_PERM,
3355 .policy = ip_vs_cmd_policy,
3356 .doit = ip_vs_genl_set_cmd,
3357 },
3358 {
3359 .cmd = IPVS_CMD_DEL_SERVICE,
3360 .flags = GENL_ADMIN_PERM,
3361 .policy = ip_vs_cmd_policy,
3362 .doit = ip_vs_genl_set_cmd,
3363 },
3364 {
3365 .cmd = IPVS_CMD_GET_SERVICE,
3366 .flags = GENL_ADMIN_PERM,
3367 .doit = ip_vs_genl_get_cmd,
3368 .dumpit = ip_vs_genl_dump_services,
3369 .policy = ip_vs_cmd_policy,
3370 },
3371 {
3372 .cmd = IPVS_CMD_NEW_DEST,
3373 .flags = GENL_ADMIN_PERM,
3374 .policy = ip_vs_cmd_policy,
3375 .doit = ip_vs_genl_set_cmd,
3376 },
3377 {
3378 .cmd = IPVS_CMD_SET_DEST,
3379 .flags = GENL_ADMIN_PERM,
3380 .policy = ip_vs_cmd_policy,
3381 .doit = ip_vs_genl_set_cmd,
3382 },
3383 {
3384 .cmd = IPVS_CMD_DEL_DEST,
3385 .flags = GENL_ADMIN_PERM,
3386 .policy = ip_vs_cmd_policy,
3387 .doit = ip_vs_genl_set_cmd,
3388 },
3389 {
3390 .cmd = IPVS_CMD_GET_DEST,
3391 .flags = GENL_ADMIN_PERM,
3392 .policy = ip_vs_cmd_policy,
3393 .dumpit = ip_vs_genl_dump_dests,
3394 },
3395 {
3396 .cmd = IPVS_CMD_NEW_DAEMON,
3397 .flags = GENL_ADMIN_PERM,
3398 .policy = ip_vs_cmd_policy,
3399 .doit = ip_vs_genl_set_cmd,
3400 },
3401 {
3402 .cmd = IPVS_CMD_DEL_DAEMON,
3403 .flags = GENL_ADMIN_PERM,
3404 .policy = ip_vs_cmd_policy,
3405 .doit = ip_vs_genl_set_cmd,
3406 },
3407 {
3408 .cmd = IPVS_CMD_GET_DAEMON,
3409 .flags = GENL_ADMIN_PERM,
3410 .dumpit = ip_vs_genl_dump_daemons,
3411 },
3412 {
3413 .cmd = IPVS_CMD_SET_CONFIG,
3414 .flags = GENL_ADMIN_PERM,
3415 .policy = ip_vs_cmd_policy,
3416 .doit = ip_vs_genl_set_cmd,
3417 },
3418 {
3419 .cmd = IPVS_CMD_GET_CONFIG,
3420 .flags = GENL_ADMIN_PERM,
3421 .doit = ip_vs_genl_get_cmd,
3422 },
3423 {
3424 .cmd = IPVS_CMD_GET_INFO,
3425 .flags = GENL_ADMIN_PERM,
3426 .doit = ip_vs_genl_get_cmd,
3427 },
3428 {
3429 .cmd = IPVS_CMD_ZERO,
3430 .flags = GENL_ADMIN_PERM,
3431 .policy = ip_vs_cmd_policy,
3432 .doit = ip_vs_genl_set_cmd,
3433 },
3434 {
3435 .cmd = IPVS_CMD_FLUSH,
3436 .flags = GENL_ADMIN_PERM,
3437 .doit = ip_vs_genl_set_cmd,
3438 },
3439};
3440
3441static int __init ip_vs_genl_register(void)
3442{
Michał Mirosław8f698d52009-05-21 10:34:05 +00003443 return genl_register_family_with_ops(&ip_vs_genl_family,
3444 ip_vs_genl_ops, ARRAY_SIZE(ip_vs_genl_ops));
Julius Volz9a812192008-08-14 14:08:44 +02003445}
3446
3447static void ip_vs_genl_unregister(void)
3448{
3449 genl_unregister_family(&ip_vs_genl_family);
3450}
3451
3452/* End of Generic Netlink interface definitions */
3453
Hans Schillstrom61b1ab42011-01-03 14:44:42 +01003454/*
3455 * per netns intit/exit func.
3456 */
3457int __net_init __ip_vs_control_init(struct net *net)
3458{
Hans Schillstromfc723252011-01-03 14:44:43 +01003459 int idx;
3460 struct netns_ipvs *ipvs = net_ipvs(net);
3461
Hans Schillstrom61b1ab42011-01-03 14:44:42 +01003462 if (!net_eq(net, &init_net)) /* netns not enabled yet */
3463 return -EPERM;
3464
Hans Schillstromfc723252011-01-03 14:44:43 +01003465 for (idx = 0; idx < IP_VS_RTAB_SIZE; idx++)
3466 INIT_LIST_HEAD(&ipvs->rs_table[idx]);
3467
Hans Schillstrom61b1ab42011-01-03 14:44:42 +01003468 proc_net_fops_create(net, "ip_vs", 0, &ip_vs_info_fops);
3469 proc_net_fops_create(net, "ip_vs_stats", 0, &ip_vs_stats_fops);
3470 sysctl_header = register_net_sysctl_table(net, net_vs_ctl_path,
3471 vs_vars);
3472 if (sysctl_header == NULL)
3473 goto err_reg;
Hans Schillstrom29c20262011-01-03 14:44:54 +01003474 ip_vs_new_estimator(net, &ip_vs_stats);
Hans Schillstrom61b1ab42011-01-03 14:44:42 +01003475 return 0;
3476
3477err_reg:
3478 return -ENOMEM;
3479}
3480
3481static void __net_exit __ip_vs_control_cleanup(struct net *net)
3482{
3483 if (!net_eq(net, &init_net)) /* netns not enabled yet */
3484 return;
3485
Hans Schillstrom29c20262011-01-03 14:44:54 +01003486 ip_vs_kill_estimator(net, &ip_vs_stats);
Hans Schillstrom61b1ab42011-01-03 14:44:42 +01003487 unregister_net_sysctl_table(sysctl_header);
3488 proc_net_remove(net, "ip_vs_stats");
3489 proc_net_remove(net, "ip_vs");
3490}
3491
3492static struct pernet_operations ipvs_control_ops = {
3493 .init = __ip_vs_control_init,
3494 .exit = __ip_vs_control_cleanup,
3495};
Linus Torvalds1da177e2005-04-16 15:20:36 -07003496
Sven Wegener048cf482008-08-10 18:24:35 +00003497int __init ip_vs_control_init(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003498{
Linus Torvalds1da177e2005-04-16 15:20:36 -07003499 int idx;
Hans Schillstromfc723252011-01-03 14:44:43 +01003500 int ret;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003501
3502 EnterFunction(2);
3503
Hans Schillstromfc723252011-01-03 14:44:43 +01003504 /* Initialize svc_table, ip_vs_svc_fwm_table, rs_table */
Eduardo Blancod86bef72010-10-19 10:26:47 +01003505 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
3506 INIT_LIST_HEAD(&ip_vs_svc_table[idx]);
3507 INIT_LIST_HEAD(&ip_vs_svc_fwm_table[idx]);
3508 }
Hans Schillstromfc723252011-01-03 14:44:43 +01003509
3510 ret = register_pernet_subsys(&ipvs_control_ops);
3511 if (ret) {
3512 pr_err("cannot register namespace.\n");
3513 goto err;
Eduardo Blancod86bef72010-10-19 10:26:47 +01003514 }
Hans Schillstromfc723252011-01-03 14:44:43 +01003515
3516 smp_wmb(); /* Do we really need it now ? */
Eduardo Blancod86bef72010-10-19 10:26:47 +01003517
Linus Torvalds1da177e2005-04-16 15:20:36 -07003518 ret = nf_register_sockopt(&ip_vs_sockopts);
3519 if (ret) {
Hannes Eder1e3e2382009-08-02 11:05:41 +00003520 pr_err("cannot register sockopt.\n");
Hans Schillstromfc723252011-01-03 14:44:43 +01003521 goto err_net;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003522 }
3523
Julius Volz9a812192008-08-14 14:08:44 +02003524 ret = ip_vs_genl_register();
3525 if (ret) {
Hannes Eder1e3e2382009-08-02 11:05:41 +00003526 pr_err("cannot register Generic Netlink interface.\n");
Julius Volz9a812192008-08-14 14:08:44 +02003527 nf_unregister_sockopt(&ip_vs_sockopts);
Hans Schillstromfc723252011-01-03 14:44:43 +01003528 goto err_net;
Julius Volz9a812192008-08-14 14:08:44 +02003529 }
3530
Linus Torvalds1da177e2005-04-16 15:20:36 -07003531 /* Hook the defense timer */
3532 schedule_delayed_work(&defense_work, DEFENSE_TIMER_PERIOD);
3533
3534 LeaveFunction(2);
3535 return 0;
Hans Schillstromfc723252011-01-03 14:44:43 +01003536
3537err_net:
3538 unregister_pernet_subsys(&ipvs_control_ops);
3539err:
3540 return ret;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003541}
3542
3543
3544void ip_vs_control_cleanup(void)
3545{
3546 EnterFunction(2);
3547 ip_vs_trash_cleanup();
Tejun Heoafe2c512010-12-14 16:21:17 +01003548 cancel_delayed_work_sync(&defense_work);
Oleg Nesterov28e53bd2007-05-09 02:34:22 -07003549 cancel_work_sync(&defense_work.work);
Hans Schillstrom61b1ab42011-01-03 14:44:42 +01003550 unregister_pernet_subsys(&ipvs_control_ops);
Julius Volz9a812192008-08-14 14:08:44 +02003551 ip_vs_genl_unregister();
Linus Torvalds1da177e2005-04-16 15:20:36 -07003552 nf_unregister_sockopt(&ip_vs_sockopts);
3553 LeaveFunction(2);
3554}