blob: edf2b6dee9720a463b13acbcc3376eaa34bb0bad [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * IPVS An implementation of the IP virtual server support for the
3 * LINUX operating system. IPVS is now implemented as a module
4 * over the NetFilter framework. IPVS can be used to build a
5 * high-performance and highly available server based on a
6 * cluster of servers.
7 *
Linus Torvalds1da177e2005-04-16 15:20:36 -07008 * Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
9 * Peter Kese <peter.kese@ijs.si>
10 * Julian Anastasov <ja@ssi.bg>
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version
15 * 2 of the License, or (at your option) any later version.
16 *
17 * Changes:
18 *
19 */
20
Hannes Eder9aada7a2009-07-30 14:29:44 -070021#define KMSG_COMPONENT "IPVS"
22#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
23
Linus Torvalds1da177e2005-04-16 15:20:36 -070024#include <linux/module.h>
25#include <linux/init.h>
26#include <linux/types.h>
Randy Dunlap4fc268d2006-01-11 12:17:47 -080027#include <linux/capability.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070028#include <linux/fs.h>
29#include <linux/sysctl.h>
30#include <linux/proc_fs.h>
31#include <linux/workqueue.h>
32#include <linux/swap.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070033#include <linux/seq_file.h>
Tejun Heo5a0e3ad2010-03-24 17:04:11 +090034#include <linux/slab.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070035
36#include <linux/netfilter.h>
37#include <linux/netfilter_ipv4.h>
Ingo Molnar14cc3e22006-03-26 01:37:14 -080038#include <linux/mutex.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070039
Eric W. Biederman457c4cb2007-09-12 12:01:34 +020040#include <net/net_namespace.h>
Hans Schillstrom93304192011-01-03 14:44:51 +010041#include <linux/nsproxy.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070042#include <net/ip.h>
Vince Busam09571c72008-09-02 15:55:52 +020043#ifdef CONFIG_IP_VS_IPV6
44#include <net/ipv6.h>
45#include <net/ip6_route.h>
46#endif
Arnaldo Carvalho de Melo14c85022005-12-27 02:43:12 -020047#include <net/route.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070048#include <net/sock.h>
Julius Volz9a812192008-08-14 14:08:44 +020049#include <net/genetlink.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070050
51#include <asm/uaccess.h>
52
53#include <net/ip_vs.h>
54
55/* semaphore for IPVS sockopts. And, [gs]etsockopt may sleep. */
Ingo Molnar14cc3e22006-03-26 01:37:14 -080056static DEFINE_MUTEX(__ip_vs_mutex);
Linus Torvalds1da177e2005-04-16 15:20:36 -070057
58/* lock for service table */
59static DEFINE_RWLOCK(__ip_vs_svc_lock);
60
Linus Torvalds1da177e2005-04-16 15:20:36 -070061/* sysctl variables */
Linus Torvalds1da177e2005-04-16 15:20:36 -070062
63#ifdef CONFIG_IP_VS_DEBUG
64static int sysctl_ip_vs_debug_level = 0;
65
66int ip_vs_get_debug_level(void)
67{
68 return sysctl_ip_vs_debug_level;
69}
70#endif
71
Vince Busam09571c72008-09-02 15:55:52 +020072#ifdef CONFIG_IP_VS_IPV6
73/* Taken from rt6_fill_node() in net/ipv6/route.c, is there a better way? */
Hans Schillstrom4a984802011-01-03 14:45:02 +010074static int __ip_vs_addr_is_local_v6(struct net *net,
75 const struct in6_addr *addr)
Vince Busam09571c72008-09-02 15:55:52 +020076{
77 struct rt6_info *rt;
78 struct flowi fl = {
79 .oif = 0,
Changli Gao58116622010-11-12 18:43:55 +000080 .fl6_dst = *addr,
81 .fl6_src = { .s6_addr32 = {0, 0, 0, 0} },
Vince Busam09571c72008-09-02 15:55:52 +020082 };
83
Hans Schillstrom4a984802011-01-03 14:45:02 +010084 rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl);
Vince Busam09571c72008-09-02 15:55:52 +020085 if (rt && rt->rt6i_dev && (rt->rt6i_dev->flags & IFF_LOOPBACK))
86 return 1;
87
88 return 0;
89}
90#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -070091/*
Julian Anastasovaf9debd2005-07-11 20:59:57 -070092 * update_defense_level is called from keventd and from sysctl,
93 * so it needs to protect itself from softirqs
Linus Torvalds1da177e2005-04-16 15:20:36 -070094 */
Hans Schillstrom93304192011-01-03 14:44:51 +010095static void update_defense_level(struct netns_ipvs *ipvs)
Linus Torvalds1da177e2005-04-16 15:20:36 -070096{
97 struct sysinfo i;
98 static int old_secure_tcp = 0;
99 int availmem;
100 int nomem;
101 int to_change = -1;
102
103 /* we only count free and buffered memory (in pages) */
104 si_meminfo(&i);
105 availmem = i.freeram + i.bufferram;
106 /* however in linux 2.5 the i.bufferram is total page cache size,
107 we need adjust it */
108 /* si_swapinfo(&i); */
109 /* availmem = availmem - (i.totalswap - i.freeswap); */
110
Hans Schillstroma0840e22011-01-03 14:44:58 +0100111 nomem = (availmem < ipvs->sysctl_amemthresh);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700112
Julian Anastasovaf9debd2005-07-11 20:59:57 -0700113 local_bh_disable();
114
Linus Torvalds1da177e2005-04-16 15:20:36 -0700115 /* drop_entry */
Hans Schillstroma0840e22011-01-03 14:44:58 +0100116 spin_lock(&ipvs->dropentry_lock);
117 switch (ipvs->sysctl_drop_entry) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700118 case 0:
Hans Schillstroma0840e22011-01-03 14:44:58 +0100119 atomic_set(&ipvs->dropentry, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700120 break;
121 case 1:
122 if (nomem) {
Hans Schillstroma0840e22011-01-03 14:44:58 +0100123 atomic_set(&ipvs->dropentry, 1);
124 ipvs->sysctl_drop_entry = 2;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700125 } else {
Hans Schillstroma0840e22011-01-03 14:44:58 +0100126 atomic_set(&ipvs->dropentry, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700127 }
128 break;
129 case 2:
130 if (nomem) {
Hans Schillstroma0840e22011-01-03 14:44:58 +0100131 atomic_set(&ipvs->dropentry, 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700132 } else {
Hans Schillstroma0840e22011-01-03 14:44:58 +0100133 atomic_set(&ipvs->dropentry, 0);
134 ipvs->sysctl_drop_entry = 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700135 };
136 break;
137 case 3:
Hans Schillstroma0840e22011-01-03 14:44:58 +0100138 atomic_set(&ipvs->dropentry, 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700139 break;
140 }
Hans Schillstroma0840e22011-01-03 14:44:58 +0100141 spin_unlock(&ipvs->dropentry_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700142
143 /* drop_packet */
Hans Schillstroma0840e22011-01-03 14:44:58 +0100144 spin_lock(&ipvs->droppacket_lock);
145 switch (ipvs->sysctl_drop_packet) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700146 case 0:
Hans Schillstroma0840e22011-01-03 14:44:58 +0100147 ipvs->drop_rate = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700148 break;
149 case 1:
150 if (nomem) {
Hans Schillstroma0840e22011-01-03 14:44:58 +0100151 ipvs->drop_rate = ipvs->drop_counter
152 = ipvs->sysctl_amemthresh /
153 (ipvs->sysctl_amemthresh-availmem);
154 ipvs->sysctl_drop_packet = 2;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700155 } else {
Hans Schillstroma0840e22011-01-03 14:44:58 +0100156 ipvs->drop_rate = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700157 }
158 break;
159 case 2:
160 if (nomem) {
Hans Schillstroma0840e22011-01-03 14:44:58 +0100161 ipvs->drop_rate = ipvs->drop_counter
162 = ipvs->sysctl_amemthresh /
163 (ipvs->sysctl_amemthresh-availmem);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700164 } else {
Hans Schillstroma0840e22011-01-03 14:44:58 +0100165 ipvs->drop_rate = 0;
166 ipvs->sysctl_drop_packet = 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700167 }
168 break;
169 case 3:
Hans Schillstroma0840e22011-01-03 14:44:58 +0100170 ipvs->drop_rate = ipvs->sysctl_am_droprate;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700171 break;
172 }
Hans Schillstroma0840e22011-01-03 14:44:58 +0100173 spin_unlock(&ipvs->droppacket_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700174
175 /* secure_tcp */
Hans Schillstroma0840e22011-01-03 14:44:58 +0100176 spin_lock(&ipvs->securetcp_lock);
177 switch (ipvs->sysctl_secure_tcp) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700178 case 0:
179 if (old_secure_tcp >= 2)
180 to_change = 0;
181 break;
182 case 1:
183 if (nomem) {
184 if (old_secure_tcp < 2)
185 to_change = 1;
Hans Schillstroma0840e22011-01-03 14:44:58 +0100186 ipvs->sysctl_secure_tcp = 2;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700187 } else {
188 if (old_secure_tcp >= 2)
189 to_change = 0;
190 }
191 break;
192 case 2:
193 if (nomem) {
194 if (old_secure_tcp < 2)
195 to_change = 1;
196 } else {
197 if (old_secure_tcp >= 2)
198 to_change = 0;
Hans Schillstroma0840e22011-01-03 14:44:58 +0100199 ipvs->sysctl_secure_tcp = 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700200 }
201 break;
202 case 3:
203 if (old_secure_tcp < 2)
204 to_change = 1;
205 break;
206 }
Hans Schillstroma0840e22011-01-03 14:44:58 +0100207 old_secure_tcp = ipvs->sysctl_secure_tcp;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700208 if (to_change >= 0)
Hans Schillstrom93304192011-01-03 14:44:51 +0100209 ip_vs_protocol_timeout_change(ipvs,
Hans Schillstroma0840e22011-01-03 14:44:58 +0100210 ipvs->sysctl_secure_tcp > 1);
211 spin_unlock(&ipvs->securetcp_lock);
Julian Anastasovaf9debd2005-07-11 20:59:57 -0700212
213 local_bh_enable();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700214}
215
216
217/*
218 * Timer for checking the defense
219 */
220#define DEFENSE_TIMER_PERIOD 1*HZ
Linus Torvalds1da177e2005-04-16 15:20:36 -0700221
David Howellsc4028952006-11-22 14:57:56 +0000222static void defense_work_handler(struct work_struct *work)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700223{
Hans Schillstromf6340ee2011-01-03 14:44:59 +0100224 struct netns_ipvs *ipvs =
225 container_of(work, struct netns_ipvs, defense_work.work);
Hans Schillstrom93304192011-01-03 14:44:51 +0100226
227 update_defense_level(ipvs);
Hans Schillstroma0840e22011-01-03 14:44:58 +0100228 if (atomic_read(&ipvs->dropentry))
Hans Schillstromf6340ee2011-01-03 14:44:59 +0100229 ip_vs_random_dropentry(ipvs->net);
230 schedule_delayed_work(&ipvs->defense_work, DEFENSE_TIMER_PERIOD);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700231}
232
233int
234ip_vs_use_count_inc(void)
235{
236 return try_module_get(THIS_MODULE);
237}
238
239void
240ip_vs_use_count_dec(void)
241{
242 module_put(THIS_MODULE);
243}
244
245
246/*
247 * Hash table: for virtual service lookups
248 */
249#define IP_VS_SVC_TAB_BITS 8
250#define IP_VS_SVC_TAB_SIZE (1 << IP_VS_SVC_TAB_BITS)
251#define IP_VS_SVC_TAB_MASK (IP_VS_SVC_TAB_SIZE - 1)
252
253/* the service table hashed by <protocol, addr, port> */
254static struct list_head ip_vs_svc_table[IP_VS_SVC_TAB_SIZE];
255/* the service table hashed by fwmark */
256static struct list_head ip_vs_svc_fwm_table[IP_VS_SVC_TAB_SIZE];
257
Linus Torvalds1da177e2005-04-16 15:20:36 -0700258
259/*
260 * Returns hash value for virtual service
261 */
Hans Schillstromfc723252011-01-03 14:44:43 +0100262static inline unsigned
263ip_vs_svc_hashkey(struct net *net, int af, unsigned proto,
264 const union nf_inet_addr *addr, __be16 port)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700265{
266 register unsigned porth = ntohs(port);
Julius Volzb18610d2008-09-02 15:55:37 +0200267 __be32 addr_fold = addr->ip;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700268
Julius Volzb18610d2008-09-02 15:55:37 +0200269#ifdef CONFIG_IP_VS_IPV6
270 if (af == AF_INET6)
271 addr_fold = addr->ip6[0]^addr->ip6[1]^
272 addr->ip6[2]^addr->ip6[3];
273#endif
Hans Schillstromfc723252011-01-03 14:44:43 +0100274 addr_fold ^= ((size_t)net>>8);
Julius Volzb18610d2008-09-02 15:55:37 +0200275
276 return (proto^ntohl(addr_fold)^(porth>>IP_VS_SVC_TAB_BITS)^porth)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700277 & IP_VS_SVC_TAB_MASK;
278}
279
280/*
281 * Returns hash value of fwmark for virtual service lookup
282 */
Hans Schillstromfc723252011-01-03 14:44:43 +0100283static inline unsigned ip_vs_svc_fwm_hashkey(struct net *net, __u32 fwmark)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700284{
Hans Schillstromfc723252011-01-03 14:44:43 +0100285 return (((size_t)net>>8) ^ fwmark) & IP_VS_SVC_TAB_MASK;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700286}
287
288/*
Hans Schillstromfc723252011-01-03 14:44:43 +0100289 * Hashes a service in the ip_vs_svc_table by <netns,proto,addr,port>
Linus Torvalds1da177e2005-04-16 15:20:36 -0700290 * or in the ip_vs_svc_fwm_table by fwmark.
291 * Should be called with locked tables.
292 */
293static int ip_vs_svc_hash(struct ip_vs_service *svc)
294{
295 unsigned hash;
296
297 if (svc->flags & IP_VS_SVC_F_HASHED) {
Hannes Eder1e3e2382009-08-02 11:05:41 +0000298 pr_err("%s(): request for already hashed, called from %pF\n",
299 __func__, __builtin_return_address(0));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700300 return 0;
301 }
302
303 if (svc->fwmark == 0) {
304 /*
Hans Schillstromfc723252011-01-03 14:44:43 +0100305 * Hash it by <netns,protocol,addr,port> in ip_vs_svc_table
Linus Torvalds1da177e2005-04-16 15:20:36 -0700306 */
Hans Schillstromfc723252011-01-03 14:44:43 +0100307 hash = ip_vs_svc_hashkey(svc->net, svc->af, svc->protocol,
308 &svc->addr, svc->port);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700309 list_add(&svc->s_list, &ip_vs_svc_table[hash]);
310 } else {
311 /*
Hans Schillstromfc723252011-01-03 14:44:43 +0100312 * Hash it by fwmark in svc_fwm_table
Linus Torvalds1da177e2005-04-16 15:20:36 -0700313 */
Hans Schillstromfc723252011-01-03 14:44:43 +0100314 hash = ip_vs_svc_fwm_hashkey(svc->net, svc->fwmark);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700315 list_add(&svc->f_list, &ip_vs_svc_fwm_table[hash]);
316 }
317
318 svc->flags |= IP_VS_SVC_F_HASHED;
319 /* increase its refcnt because it is referenced by the svc table */
320 atomic_inc(&svc->refcnt);
321 return 1;
322}
323
324
325/*
Hans Schillstromfc723252011-01-03 14:44:43 +0100326 * Unhashes a service from svc_table / svc_fwm_table.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700327 * Should be called with locked tables.
328 */
329static int ip_vs_svc_unhash(struct ip_vs_service *svc)
330{
331 if (!(svc->flags & IP_VS_SVC_F_HASHED)) {
Hannes Eder1e3e2382009-08-02 11:05:41 +0000332 pr_err("%s(): request for unhash flagged, called from %pF\n",
333 __func__, __builtin_return_address(0));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700334 return 0;
335 }
336
337 if (svc->fwmark == 0) {
Hans Schillstromfc723252011-01-03 14:44:43 +0100338 /* Remove it from the svc_table table */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700339 list_del(&svc->s_list);
340 } else {
Hans Schillstromfc723252011-01-03 14:44:43 +0100341 /* Remove it from the svc_fwm_table table */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700342 list_del(&svc->f_list);
343 }
344
345 svc->flags &= ~IP_VS_SVC_F_HASHED;
346 atomic_dec(&svc->refcnt);
347 return 1;
348}
349
350
351/*
Hans Schillstromfc723252011-01-03 14:44:43 +0100352 * Get service by {netns, proto,addr,port} in the service table.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700353 */
Julius Volzb18610d2008-09-02 15:55:37 +0200354static inline struct ip_vs_service *
Hans Schillstromfc723252011-01-03 14:44:43 +0100355__ip_vs_service_find(struct net *net, int af, __u16 protocol,
356 const union nf_inet_addr *vaddr, __be16 vport)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700357{
358 unsigned hash;
359 struct ip_vs_service *svc;
360
361 /* Check for "full" addressed entries */
Hans Schillstromfc723252011-01-03 14:44:43 +0100362 hash = ip_vs_svc_hashkey(net, af, protocol, vaddr, vport);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700363
364 list_for_each_entry(svc, &ip_vs_svc_table[hash], s_list){
Julius Volzb18610d2008-09-02 15:55:37 +0200365 if ((svc->af == af)
366 && ip_vs_addr_equal(af, &svc->addr, vaddr)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700367 && (svc->port == vport)
Hans Schillstromfc723252011-01-03 14:44:43 +0100368 && (svc->protocol == protocol)
369 && net_eq(svc->net, net)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700370 /* HIT */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700371 return svc;
372 }
373 }
374
375 return NULL;
376}
377
378
379/*
380 * Get service by {fwmark} in the service table.
381 */
Julius Volzb18610d2008-09-02 15:55:37 +0200382static inline struct ip_vs_service *
Hans Schillstromfc723252011-01-03 14:44:43 +0100383__ip_vs_svc_fwm_find(struct net *net, int af, __u32 fwmark)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700384{
385 unsigned hash;
386 struct ip_vs_service *svc;
387
388 /* Check for fwmark addressed entries */
Hans Schillstromfc723252011-01-03 14:44:43 +0100389 hash = ip_vs_svc_fwm_hashkey(net, fwmark);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700390
391 list_for_each_entry(svc, &ip_vs_svc_fwm_table[hash], f_list) {
Hans Schillstromfc723252011-01-03 14:44:43 +0100392 if (svc->fwmark == fwmark && svc->af == af
393 && net_eq(svc->net, net)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700394 /* HIT */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700395 return svc;
396 }
397 }
398
399 return NULL;
400}
401
402struct ip_vs_service *
Hans Schillstromfc723252011-01-03 14:44:43 +0100403ip_vs_service_get(struct net *net, int af, __u32 fwmark, __u16 protocol,
Julius Volz3c2e0502008-09-02 15:55:38 +0200404 const union nf_inet_addr *vaddr, __be16 vport)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700405{
406 struct ip_vs_service *svc;
Hans Schillstrom763f8d02011-01-03 14:45:01 +0100407 struct netns_ipvs *ipvs = net_ipvs(net);
Julius Volz3c2e0502008-09-02 15:55:38 +0200408
Linus Torvalds1da177e2005-04-16 15:20:36 -0700409 read_lock(&__ip_vs_svc_lock);
410
411 /*
412 * Check the table hashed by fwmark first
413 */
Hans Schillstromfc723252011-01-03 14:44:43 +0100414 svc = __ip_vs_svc_fwm_find(net, af, fwmark);
415 if (fwmark && svc)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700416 goto out;
417
418 /*
419 * Check the table hashed by <protocol,addr,port>
420 * for "full" addressed entries
421 */
Hans Schillstromfc723252011-01-03 14:44:43 +0100422 svc = __ip_vs_service_find(net, af, protocol, vaddr, vport);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700423
424 if (svc == NULL
425 && protocol == IPPROTO_TCP
Hans Schillstrom763f8d02011-01-03 14:45:01 +0100426 && atomic_read(&ipvs->ftpsvc_counter)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700427 && (vport == FTPDATA || ntohs(vport) >= PROT_SOCK)) {
428 /*
429 * Check if ftp service entry exists, the packet
430 * might belong to FTP data connections.
431 */
Hans Schillstromfc723252011-01-03 14:44:43 +0100432 svc = __ip_vs_service_find(net, af, protocol, vaddr, FTPPORT);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700433 }
434
435 if (svc == NULL
Hans Schillstrom763f8d02011-01-03 14:45:01 +0100436 && atomic_read(&ipvs->nullsvc_counter)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700437 /*
438 * Check if the catch-all port (port zero) exists
439 */
Hans Schillstromfc723252011-01-03 14:44:43 +0100440 svc = __ip_vs_service_find(net, af, protocol, vaddr, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700441 }
442
443 out:
Julian Anastasov26c15cf2010-09-21 18:12:30 +0200444 if (svc)
445 atomic_inc(&svc->usecnt);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700446 read_unlock(&__ip_vs_svc_lock);
447
Julius Volz3c2e0502008-09-02 15:55:38 +0200448 IP_VS_DBG_BUF(9, "lookup service: fwm %u %s %s:%u %s\n",
449 fwmark, ip_vs_proto_name(protocol),
450 IP_VS_DBG_ADDR(af, vaddr), ntohs(vport),
451 svc ? "hit" : "not hit");
Linus Torvalds1da177e2005-04-16 15:20:36 -0700452
453 return svc;
454}
455
456
457static inline void
458__ip_vs_bind_svc(struct ip_vs_dest *dest, struct ip_vs_service *svc)
459{
460 atomic_inc(&svc->refcnt);
461 dest->svc = svc;
462}
463
Julian Anastasov26c15cf2010-09-21 18:12:30 +0200464static void
Linus Torvalds1da177e2005-04-16 15:20:36 -0700465__ip_vs_unbind_svc(struct ip_vs_dest *dest)
466{
467 struct ip_vs_service *svc = dest->svc;
468
469 dest->svc = NULL;
Julian Anastasov26c15cf2010-09-21 18:12:30 +0200470 if (atomic_dec_and_test(&svc->refcnt)) {
471 IP_VS_DBG_BUF(3, "Removing service %u/%s:%u usecnt=%d\n",
472 svc->fwmark,
473 IP_VS_DBG_ADDR(svc->af, &svc->addr),
474 ntohs(svc->port), atomic_read(&svc->usecnt));
Hans Schillstromb17fc992011-01-03 14:44:56 +0100475 free_percpu(svc->stats.cpustats);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700476 kfree(svc);
Julian Anastasov26c15cf2010-09-21 18:12:30 +0200477 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700478}
479
480
481/*
482 * Returns hash value for real service
483 */
Julius Volz7937df12008-09-02 15:55:48 +0200484static inline unsigned ip_vs_rs_hashkey(int af,
485 const union nf_inet_addr *addr,
486 __be16 port)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700487{
488 register unsigned porth = ntohs(port);
Julius Volz7937df12008-09-02 15:55:48 +0200489 __be32 addr_fold = addr->ip;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700490
Julius Volz7937df12008-09-02 15:55:48 +0200491#ifdef CONFIG_IP_VS_IPV6
492 if (af == AF_INET6)
493 addr_fold = addr->ip6[0]^addr->ip6[1]^
494 addr->ip6[2]^addr->ip6[3];
495#endif
496
497 return (ntohl(addr_fold)^(porth>>IP_VS_RTAB_BITS)^porth)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700498 & IP_VS_RTAB_MASK;
499}
500
501/*
Hans Schillstromfc723252011-01-03 14:44:43 +0100502 * Hashes ip_vs_dest in rs_table by <proto,addr,port>.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700503 * should be called with locked tables.
504 */
Hans Schillstromfc723252011-01-03 14:44:43 +0100505static int ip_vs_rs_hash(struct netns_ipvs *ipvs, struct ip_vs_dest *dest)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700506{
507 unsigned hash;
508
509 if (!list_empty(&dest->d_list)) {
510 return 0;
511 }
512
513 /*
514 * Hash by proto,addr,port,
515 * which are the parameters of the real service.
516 */
Julius Volz7937df12008-09-02 15:55:48 +0200517 hash = ip_vs_rs_hashkey(dest->af, &dest->addr, dest->port);
518
Hans Schillstromfc723252011-01-03 14:44:43 +0100519 list_add(&dest->d_list, &ipvs->rs_table[hash]);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700520
521 return 1;
522}
523
524/*
Hans Schillstromfc723252011-01-03 14:44:43 +0100525 * UNhashes ip_vs_dest from rs_table.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700526 * should be called with locked tables.
527 */
528static int ip_vs_rs_unhash(struct ip_vs_dest *dest)
529{
530 /*
Hans Schillstromfc723252011-01-03 14:44:43 +0100531 * Remove it from the rs_table table.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700532 */
533 if (!list_empty(&dest->d_list)) {
534 list_del(&dest->d_list);
535 INIT_LIST_HEAD(&dest->d_list);
536 }
537
538 return 1;
539}
540
541/*
542 * Lookup real service by <proto,addr,port> in the real service table.
543 */
544struct ip_vs_dest *
Hans Schillstromfc723252011-01-03 14:44:43 +0100545ip_vs_lookup_real_service(struct net *net, int af, __u16 protocol,
Julius Volz7937df12008-09-02 15:55:48 +0200546 const union nf_inet_addr *daddr,
547 __be16 dport)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700548{
Hans Schillstromfc723252011-01-03 14:44:43 +0100549 struct netns_ipvs *ipvs = net_ipvs(net);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700550 unsigned hash;
551 struct ip_vs_dest *dest;
552
553 /*
554 * Check for "full" addressed entries
555 * Return the first found entry
556 */
Julius Volz7937df12008-09-02 15:55:48 +0200557 hash = ip_vs_rs_hashkey(af, daddr, dport);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700558
Hans Schillstroma0840e22011-01-03 14:44:58 +0100559 read_lock(&ipvs->rs_lock);
Hans Schillstromfc723252011-01-03 14:44:43 +0100560 list_for_each_entry(dest, &ipvs->rs_table[hash], d_list) {
Julius Volz7937df12008-09-02 15:55:48 +0200561 if ((dest->af == af)
562 && ip_vs_addr_equal(af, &dest->addr, daddr)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700563 && (dest->port == dport)
564 && ((dest->protocol == protocol) ||
565 dest->vfwmark)) {
566 /* HIT */
Hans Schillstroma0840e22011-01-03 14:44:58 +0100567 read_unlock(&ipvs->rs_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700568 return dest;
569 }
570 }
Hans Schillstroma0840e22011-01-03 14:44:58 +0100571 read_unlock(&ipvs->rs_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700572
573 return NULL;
574}
575
576/*
577 * Lookup destination by {addr,port} in the given service
578 */
579static struct ip_vs_dest *
Julius Volz7937df12008-09-02 15:55:48 +0200580ip_vs_lookup_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
581 __be16 dport)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700582{
583 struct ip_vs_dest *dest;
584
585 /*
586 * Find the destination for the given service
587 */
588 list_for_each_entry(dest, &svc->destinations, n_list) {
Julius Volz7937df12008-09-02 15:55:48 +0200589 if ((dest->af == svc->af)
590 && ip_vs_addr_equal(svc->af, &dest->addr, daddr)
591 && (dest->port == dport)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700592 /* HIT */
593 return dest;
594 }
595 }
596
597 return NULL;
598}
599
Rumen G. Bogdanovski1e356f92007-11-07 02:35:54 -0800600/*
601 * Find destination by {daddr,dport,vaddr,protocol}
602 * Cretaed to be used in ip_vs_process_message() in
603 * the backup synchronization daemon. It finds the
604 * destination to be bound to the received connection
605 * on the backup.
606 *
607 * ip_vs_lookup_real_service() looked promissing, but
608 * seems not working as expected.
609 */
Hans Schillstromfc723252011-01-03 14:44:43 +0100610struct ip_vs_dest *ip_vs_find_dest(struct net *net, int af,
611 const union nf_inet_addr *daddr,
Julius Volz7937df12008-09-02 15:55:48 +0200612 __be16 dport,
613 const union nf_inet_addr *vaddr,
Hans Schillstrom0e051e62010-11-19 14:25:07 +0100614 __be16 vport, __u16 protocol, __u32 fwmark)
Rumen G. Bogdanovski1e356f92007-11-07 02:35:54 -0800615{
616 struct ip_vs_dest *dest;
617 struct ip_vs_service *svc;
618
Hans Schillstromfc723252011-01-03 14:44:43 +0100619 svc = ip_vs_service_get(net, af, fwmark, protocol, vaddr, vport);
Rumen G. Bogdanovski1e356f92007-11-07 02:35:54 -0800620 if (!svc)
621 return NULL;
622 dest = ip_vs_lookup_dest(svc, daddr, dport);
623 if (dest)
624 atomic_inc(&dest->refcnt);
625 ip_vs_service_put(svc);
626 return dest;
627}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700628
629/*
630 * Lookup dest by {svc,addr,port} in the destination trash.
631 * The destination trash is used to hold the destinations that are removed
632 * from the service table but are still referenced by some conn entries.
633 * The reason to add the destination trash is when the dest is temporary
634 * down (either by administrator or by monitor program), the dest can be
635 * picked back from the trash, the remaining connections to the dest can
636 * continue, and the counting information of the dest is also useful for
637 * scheduling.
638 */
639static struct ip_vs_dest *
Julius Volz7937df12008-09-02 15:55:48 +0200640ip_vs_trash_get_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
641 __be16 dport)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700642{
643 struct ip_vs_dest *dest, *nxt;
Hans Schillstromf2431e62011-01-03 14:45:00 +0100644 struct netns_ipvs *ipvs = net_ipvs(svc->net);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700645
646 /*
647 * Find the destination in trash
648 */
Hans Schillstromf2431e62011-01-03 14:45:00 +0100649 list_for_each_entry_safe(dest, nxt, &ipvs->dest_trash, n_list) {
Julius Volz7937df12008-09-02 15:55:48 +0200650 IP_VS_DBG_BUF(3, "Destination %u/%s:%u still in trash, "
651 "dest->refcnt=%d\n",
652 dest->vfwmark,
653 IP_VS_DBG_ADDR(svc->af, &dest->addr),
654 ntohs(dest->port),
655 atomic_read(&dest->refcnt));
656 if (dest->af == svc->af &&
657 ip_vs_addr_equal(svc->af, &dest->addr, daddr) &&
Linus Torvalds1da177e2005-04-16 15:20:36 -0700658 dest->port == dport &&
659 dest->vfwmark == svc->fwmark &&
660 dest->protocol == svc->protocol &&
661 (svc->fwmark ||
Julius Volz7937df12008-09-02 15:55:48 +0200662 (ip_vs_addr_equal(svc->af, &dest->vaddr, &svc->addr) &&
Linus Torvalds1da177e2005-04-16 15:20:36 -0700663 dest->vport == svc->port))) {
664 /* HIT */
665 return dest;
666 }
667
668 /*
669 * Try to purge the destination from trash if not referenced
670 */
671 if (atomic_read(&dest->refcnt) == 1) {
Julius Volz7937df12008-09-02 15:55:48 +0200672 IP_VS_DBG_BUF(3, "Removing destination %u/%s:%u "
673 "from trash\n",
674 dest->vfwmark,
675 IP_VS_DBG_ADDR(svc->af, &dest->addr),
676 ntohs(dest->port));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700677 list_del(&dest->n_list);
678 ip_vs_dst_reset(dest);
679 __ip_vs_unbind_svc(dest);
Hans Schillstromb17fc992011-01-03 14:44:56 +0100680 free_percpu(dest->stats.cpustats);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700681 kfree(dest);
682 }
683 }
684
685 return NULL;
686}
687
688
689/*
690 * Clean up all the destinations in the trash
691 * Called by the ip_vs_control_cleanup()
692 *
693 * When the ip_vs_control_clearup is activated by ipvs module exit,
694 * the service tables must have been flushed and all the connections
695 * are expired, and the refcnt of each destination in the trash must
696 * be 1, so we simply release them here.
697 */
Hans Schillstromf2431e62011-01-03 14:45:00 +0100698static void ip_vs_trash_cleanup(struct net *net)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700699{
700 struct ip_vs_dest *dest, *nxt;
Hans Schillstromf2431e62011-01-03 14:45:00 +0100701 struct netns_ipvs *ipvs = net_ipvs(net);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700702
Hans Schillstromf2431e62011-01-03 14:45:00 +0100703 list_for_each_entry_safe(dest, nxt, &ipvs->dest_trash, n_list) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700704 list_del(&dest->n_list);
705 ip_vs_dst_reset(dest);
706 __ip_vs_unbind_svc(dest);
Hans Schillstromb17fc992011-01-03 14:44:56 +0100707 free_percpu(dest->stats.cpustats);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700708 kfree(dest);
709 }
710}
711
712
713static void
714ip_vs_zero_stats(struct ip_vs_stats *stats)
715{
716 spin_lock_bh(&stats->lock);
Simon Hormane93615d2008-08-11 17:19:14 +1000717
Sven Wegenere9c0ce22008-09-08 13:39:04 +0200718 memset(&stats->ustats, 0, sizeof(stats->ustats));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700719 ip_vs_zero_estimator(stats);
Simon Hormane93615d2008-08-11 17:19:14 +1000720
Sven Wegener3a14a3132008-08-10 18:24:41 +0000721 spin_unlock_bh(&stats->lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700722}
723
724/*
725 * Update a destination in the given service
726 */
727static void
Julian Anastasov26c15cf2010-09-21 18:12:30 +0200728__ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest,
729 struct ip_vs_dest_user_kern *udest, int add)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700730{
Hans Schillstromfc723252011-01-03 14:44:43 +0100731 struct netns_ipvs *ipvs = net_ipvs(svc->net);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700732 int conn_flags;
733
734 /* set the weight and the flags */
735 atomic_set(&dest->weight, udest->weight);
Julian Anastasov35757922010-09-17 14:18:16 +0200736 conn_flags = udest->conn_flags & IP_VS_CONN_F_DEST_MASK;
737 conn_flags |= IP_VS_CONN_F_INACTIVE;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700738
Linus Torvalds1da177e2005-04-16 15:20:36 -0700739 /* set the IP_VS_CONN_F_NOOUTPUT flag if not masquerading/NAT */
Julian Anastasov35757922010-09-17 14:18:16 +0200740 if ((conn_flags & IP_VS_CONN_F_FWD_MASK) != IP_VS_CONN_F_MASQ) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700741 conn_flags |= IP_VS_CONN_F_NOOUTPUT;
742 } else {
743 /*
Hans Schillstromfc723252011-01-03 14:44:43 +0100744 * Put the real service in rs_table if not present.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700745 * For now only for NAT!
746 */
Hans Schillstroma0840e22011-01-03 14:44:58 +0100747 write_lock_bh(&ipvs->rs_lock);
Hans Schillstromfc723252011-01-03 14:44:43 +0100748 ip_vs_rs_hash(ipvs, dest);
Hans Schillstroma0840e22011-01-03 14:44:58 +0100749 write_unlock_bh(&ipvs->rs_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700750 }
751 atomic_set(&dest->conn_flags, conn_flags);
752
753 /* bind the service */
754 if (!dest->svc) {
755 __ip_vs_bind_svc(dest, svc);
756 } else {
757 if (dest->svc != svc) {
758 __ip_vs_unbind_svc(dest);
759 ip_vs_zero_stats(&dest->stats);
760 __ip_vs_bind_svc(dest, svc);
761 }
762 }
763
764 /* set the dest status flags */
765 dest->flags |= IP_VS_DEST_F_AVAILABLE;
766
767 if (udest->u_threshold == 0 || udest->u_threshold > dest->u_threshold)
768 dest->flags &= ~IP_VS_DEST_F_OVERLOAD;
769 dest->u_threshold = udest->u_threshold;
770 dest->l_threshold = udest->l_threshold;
Julian Anastasov26c15cf2010-09-21 18:12:30 +0200771
Julian Anastasovfc604762010-10-17 16:38:15 +0300772 spin_lock(&dest->dst_lock);
773 ip_vs_dst_reset(dest);
774 spin_unlock(&dest->dst_lock);
775
Julian Anastasov26c15cf2010-09-21 18:12:30 +0200776 if (add)
Hans Schillstrom29c20262011-01-03 14:44:54 +0100777 ip_vs_new_estimator(svc->net, &dest->stats);
Julian Anastasov26c15cf2010-09-21 18:12:30 +0200778
779 write_lock_bh(&__ip_vs_svc_lock);
780
781 /* Wait until all other svc users go away */
782 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
783
784 if (add) {
785 list_add(&dest->n_list, &svc->destinations);
786 svc->num_dests++;
787 }
788
789 /* call the update_service, because server weight may be changed */
790 if (svc->scheduler->update_service)
791 svc->scheduler->update_service(svc);
792
793 write_unlock_bh(&__ip_vs_svc_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700794}
795
796
797/*
798 * Create a destination for the given service
799 */
800static int
Julius Volzc860c6b2008-09-02 15:55:36 +0200801ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700802 struct ip_vs_dest **dest_p)
803{
804 struct ip_vs_dest *dest;
805 unsigned atype;
806
807 EnterFunction(2);
808
Vince Busam09571c72008-09-02 15:55:52 +0200809#ifdef CONFIG_IP_VS_IPV6
810 if (svc->af == AF_INET6) {
811 atype = ipv6_addr_type(&udest->addr.in6);
Sven Wegener3bfb92f2008-09-05 16:53:49 +0200812 if ((!(atype & IPV6_ADDR_UNICAST) ||
813 atype & IPV6_ADDR_LINKLOCAL) &&
Hans Schillstrom4a984802011-01-03 14:45:02 +0100814 !__ip_vs_addr_is_local_v6(svc->net, &udest->addr.in6))
Vince Busam09571c72008-09-02 15:55:52 +0200815 return -EINVAL;
816 } else
817#endif
818 {
Hans Schillstrom4a984802011-01-03 14:45:02 +0100819 atype = inet_addr_type(svc->net, udest->addr.ip);
Vince Busam09571c72008-09-02 15:55:52 +0200820 if (atype != RTN_LOCAL && atype != RTN_UNICAST)
821 return -EINVAL;
822 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700823
Simon Hormandee06e42010-08-26 02:54:31 +0000824 dest = kzalloc(sizeof(struct ip_vs_dest), GFP_KERNEL);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700825 if (dest == NULL) {
Hannes Eder1e3e2382009-08-02 11:05:41 +0000826 pr_err("%s(): no memory.\n", __func__);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700827 return -ENOMEM;
828 }
Hans Schillstromb17fc992011-01-03 14:44:56 +0100829 dest->stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats);
830 if (!dest->stats.cpustats) {
831 pr_err("%s() alloc_percpu failed\n", __func__);
832 goto err_alloc;
833 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700834
Julius Volzc860c6b2008-09-02 15:55:36 +0200835 dest->af = svc->af;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700836 dest->protocol = svc->protocol;
Julius Volzc860c6b2008-09-02 15:55:36 +0200837 dest->vaddr = svc->addr;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700838 dest->vport = svc->port;
839 dest->vfwmark = svc->fwmark;
Julius Volzc860c6b2008-09-02 15:55:36 +0200840 ip_vs_addr_copy(svc->af, &dest->addr, &udest->addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700841 dest->port = udest->port;
842
843 atomic_set(&dest->activeconns, 0);
844 atomic_set(&dest->inactconns, 0);
845 atomic_set(&dest->persistconns, 0);
Julian Anastasov26c15cf2010-09-21 18:12:30 +0200846 atomic_set(&dest->refcnt, 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700847
848 INIT_LIST_HEAD(&dest->d_list);
849 spin_lock_init(&dest->dst_lock);
850 spin_lock_init(&dest->stats.lock);
Julian Anastasov26c15cf2010-09-21 18:12:30 +0200851 __ip_vs_update_dest(svc, dest, udest, 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700852
853 *dest_p = dest;
854
855 LeaveFunction(2);
856 return 0;
Hans Schillstromb17fc992011-01-03 14:44:56 +0100857
858err_alloc:
859 kfree(dest);
860 return -ENOMEM;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700861}
862
863
864/*
865 * Add a destination into an existing service
866 */
867static int
Julius Volzc860c6b2008-09-02 15:55:36 +0200868ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700869{
870 struct ip_vs_dest *dest;
Julius Volzc860c6b2008-09-02 15:55:36 +0200871 union nf_inet_addr daddr;
Al Viro014d7302006-09-28 14:29:52 -0700872 __be16 dport = udest->port;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700873 int ret;
874
875 EnterFunction(2);
876
877 if (udest->weight < 0) {
Hannes Eder1e3e2382009-08-02 11:05:41 +0000878 pr_err("%s(): server weight less than zero\n", __func__);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700879 return -ERANGE;
880 }
881
882 if (udest->l_threshold > udest->u_threshold) {
Hannes Eder1e3e2382009-08-02 11:05:41 +0000883 pr_err("%s(): lower threshold is higher than upper threshold\n",
884 __func__);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700885 return -ERANGE;
886 }
887
Julius Volzc860c6b2008-09-02 15:55:36 +0200888 ip_vs_addr_copy(svc->af, &daddr, &udest->addr);
889
Linus Torvalds1da177e2005-04-16 15:20:36 -0700890 /*
891 * Check if the dest already exists in the list
892 */
Julius Volz7937df12008-09-02 15:55:48 +0200893 dest = ip_vs_lookup_dest(svc, &daddr, dport);
894
Linus Torvalds1da177e2005-04-16 15:20:36 -0700895 if (dest != NULL) {
Hannes Eder1e3e2382009-08-02 11:05:41 +0000896 IP_VS_DBG(1, "%s(): dest already exists\n", __func__);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700897 return -EEXIST;
898 }
899
900 /*
901 * Check if the dest already exists in the trash and
902 * is from the same service
903 */
Julius Volz7937df12008-09-02 15:55:48 +0200904 dest = ip_vs_trash_get_dest(svc, &daddr, dport);
905
Linus Torvalds1da177e2005-04-16 15:20:36 -0700906 if (dest != NULL) {
Julius Volzcfc78c52008-09-02 15:55:53 +0200907 IP_VS_DBG_BUF(3, "Get destination %s:%u from trash, "
908 "dest->refcnt=%d, service %u/%s:%u\n",
909 IP_VS_DBG_ADDR(svc->af, &daddr), ntohs(dport),
910 atomic_read(&dest->refcnt),
911 dest->vfwmark,
912 IP_VS_DBG_ADDR(svc->af, &dest->vaddr),
913 ntohs(dest->vport));
914
Linus Torvalds1da177e2005-04-16 15:20:36 -0700915 /*
916 * Get the destination from the trash
917 */
918 list_del(&dest->n_list);
919
Julian Anastasov26c15cf2010-09-21 18:12:30 +0200920 __ip_vs_update_dest(svc, dest, udest, 1);
921 ret = 0;
922 } else {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700923 /*
Julian Anastasov26c15cf2010-09-21 18:12:30 +0200924 * Allocate and initialize the dest structure
Linus Torvalds1da177e2005-04-16 15:20:36 -0700925 */
Julian Anastasov26c15cf2010-09-21 18:12:30 +0200926 ret = ip_vs_new_dest(svc, udest, &dest);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700927 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700928 LeaveFunction(2);
929
Julian Anastasov26c15cf2010-09-21 18:12:30 +0200930 return ret;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700931}
932
933
934/*
935 * Edit a destination in the given service
936 */
937static int
Julius Volzc860c6b2008-09-02 15:55:36 +0200938ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700939{
940 struct ip_vs_dest *dest;
Julius Volzc860c6b2008-09-02 15:55:36 +0200941 union nf_inet_addr daddr;
Al Viro014d7302006-09-28 14:29:52 -0700942 __be16 dport = udest->port;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700943
944 EnterFunction(2);
945
946 if (udest->weight < 0) {
Hannes Eder1e3e2382009-08-02 11:05:41 +0000947 pr_err("%s(): server weight less than zero\n", __func__);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700948 return -ERANGE;
949 }
950
951 if (udest->l_threshold > udest->u_threshold) {
Hannes Eder1e3e2382009-08-02 11:05:41 +0000952 pr_err("%s(): lower threshold is higher than upper threshold\n",
953 __func__);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700954 return -ERANGE;
955 }
956
Julius Volzc860c6b2008-09-02 15:55:36 +0200957 ip_vs_addr_copy(svc->af, &daddr, &udest->addr);
958
Linus Torvalds1da177e2005-04-16 15:20:36 -0700959 /*
960 * Lookup the destination list
961 */
Julius Volz7937df12008-09-02 15:55:48 +0200962 dest = ip_vs_lookup_dest(svc, &daddr, dport);
963
Linus Torvalds1da177e2005-04-16 15:20:36 -0700964 if (dest == NULL) {
Hannes Eder1e3e2382009-08-02 11:05:41 +0000965 IP_VS_DBG(1, "%s(): dest doesn't exist\n", __func__);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700966 return -ENOENT;
967 }
968
Julian Anastasov26c15cf2010-09-21 18:12:30 +0200969 __ip_vs_update_dest(svc, dest, udest, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700970 LeaveFunction(2);
971
972 return 0;
973}
974
975
976/*
977 * Delete a destination (must be already unlinked from the service)
978 */
Hans Schillstrom29c20262011-01-03 14:44:54 +0100979static void __ip_vs_del_dest(struct net *net, struct ip_vs_dest *dest)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700980{
Hans Schillstroma0840e22011-01-03 14:44:58 +0100981 struct netns_ipvs *ipvs = net_ipvs(net);
982
Hans Schillstrom29c20262011-01-03 14:44:54 +0100983 ip_vs_kill_estimator(net, &dest->stats);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700984
985 /*
986 * Remove it from the d-linked list with the real services.
987 */
Hans Schillstroma0840e22011-01-03 14:44:58 +0100988 write_lock_bh(&ipvs->rs_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700989 ip_vs_rs_unhash(dest);
Hans Schillstroma0840e22011-01-03 14:44:58 +0100990 write_unlock_bh(&ipvs->rs_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700991
992 /*
993 * Decrease the refcnt of the dest, and free the dest
994 * if nobody refers to it (refcnt=0). Otherwise, throw
995 * the destination into the trash.
996 */
997 if (atomic_dec_and_test(&dest->refcnt)) {
Julian Anastasov26c15cf2010-09-21 18:12:30 +0200998 IP_VS_DBG_BUF(3, "Removing destination %u/%s:%u\n",
999 dest->vfwmark,
1000 IP_VS_DBG_ADDR(dest->af, &dest->addr),
1001 ntohs(dest->port));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001002 ip_vs_dst_reset(dest);
1003 /* simply decrease svc->refcnt here, let the caller check
1004 and release the service if nobody refers to it.
1005 Only user context can release destination and service,
1006 and only one user context can update virtual service at a
1007 time, so the operation here is OK */
1008 atomic_dec(&dest->svc->refcnt);
Hans Schillstromb17fc992011-01-03 14:44:56 +01001009 free_percpu(dest->stats.cpustats);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001010 kfree(dest);
1011 } else {
Julius Volzcfc78c52008-09-02 15:55:53 +02001012 IP_VS_DBG_BUF(3, "Moving dest %s:%u into trash, "
1013 "dest->refcnt=%d\n",
1014 IP_VS_DBG_ADDR(dest->af, &dest->addr),
1015 ntohs(dest->port),
1016 atomic_read(&dest->refcnt));
Hans Schillstromf2431e62011-01-03 14:45:00 +01001017 list_add(&dest->n_list, &ipvs->dest_trash);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001018 atomic_inc(&dest->refcnt);
1019 }
1020}
1021
1022
1023/*
1024 * Unlink a destination from the given service
1025 */
1026static void __ip_vs_unlink_dest(struct ip_vs_service *svc,
1027 struct ip_vs_dest *dest,
1028 int svcupd)
1029{
1030 dest->flags &= ~IP_VS_DEST_F_AVAILABLE;
1031
1032 /*
1033 * Remove it from the d-linked destination list.
1034 */
1035 list_del(&dest->n_list);
1036 svc->num_dests--;
Sven Wegener82dfb6f2008-08-11 19:36:06 +00001037
1038 /*
1039 * Call the update_service function of its scheduler
1040 */
1041 if (svcupd && svc->scheduler->update_service)
1042 svc->scheduler->update_service(svc);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001043}
1044
1045
1046/*
1047 * Delete a destination server in the given service
1048 */
1049static int
Julius Volzc860c6b2008-09-02 15:55:36 +02001050ip_vs_del_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001051{
1052 struct ip_vs_dest *dest;
Al Viro014d7302006-09-28 14:29:52 -07001053 __be16 dport = udest->port;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001054
1055 EnterFunction(2);
1056
Julius Volz7937df12008-09-02 15:55:48 +02001057 dest = ip_vs_lookup_dest(svc, &udest->addr, dport);
Julius Volzc860c6b2008-09-02 15:55:36 +02001058
Linus Torvalds1da177e2005-04-16 15:20:36 -07001059 if (dest == NULL) {
Hannes Eder1e3e2382009-08-02 11:05:41 +00001060 IP_VS_DBG(1, "%s(): destination not found!\n", __func__);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001061 return -ENOENT;
1062 }
1063
1064 write_lock_bh(&__ip_vs_svc_lock);
1065
1066 /*
1067 * Wait until all other svc users go away.
1068 */
Julian Anastasov26c15cf2010-09-21 18:12:30 +02001069 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001070
1071 /*
1072 * Unlink dest from the service
1073 */
1074 __ip_vs_unlink_dest(svc, dest, 1);
1075
1076 write_unlock_bh(&__ip_vs_svc_lock);
1077
1078 /*
1079 * Delete the destination
1080 */
Hans Schillstroma0840e22011-01-03 14:44:58 +01001081 __ip_vs_del_dest(svc->net, dest);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001082
1083 LeaveFunction(2);
1084
1085 return 0;
1086}
1087
1088
1089/*
1090 * Add a service into the service hash table
1091 */
1092static int
Hans Schillstromfc723252011-01-03 14:44:43 +01001093ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u,
Julius Volzc860c6b2008-09-02 15:55:36 +02001094 struct ip_vs_service **svc_p)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001095{
1096 int ret = 0;
1097 struct ip_vs_scheduler *sched = NULL;
Simon Horman0d1e71b2010-08-22 21:37:54 +09001098 struct ip_vs_pe *pe = NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001099 struct ip_vs_service *svc = NULL;
Hans Schillstroma0840e22011-01-03 14:44:58 +01001100 struct netns_ipvs *ipvs = net_ipvs(net);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001101
1102 /* increase the module use count */
1103 ip_vs_use_count_inc();
1104
1105 /* Lookup the scheduler by 'u->sched_name' */
1106 sched = ip_vs_scheduler_get(u->sched_name);
1107 if (sched == NULL) {
Hannes Eder1e3e2382009-08-02 11:05:41 +00001108 pr_info("Scheduler module ip_vs_%s not found\n", u->sched_name);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001109 ret = -ENOENT;
Simon Horman6e08bfb2010-08-22 21:37:52 +09001110 goto out_err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001111 }
1112
Simon Horman0d1e71b2010-08-22 21:37:54 +09001113 if (u->pe_name && *u->pe_name) {
Simon Hormane9e5eee2010-11-08 20:05:57 +09001114 pe = ip_vs_pe_getbyname(u->pe_name);
Simon Horman0d1e71b2010-08-22 21:37:54 +09001115 if (pe == NULL) {
1116 pr_info("persistence engine module ip_vs_pe_%s "
1117 "not found\n", u->pe_name);
1118 ret = -ENOENT;
1119 goto out_err;
1120 }
1121 }
1122
Julius Volzf94fd042008-09-02 15:55:55 +02001123#ifdef CONFIG_IP_VS_IPV6
Julius Volz48148932008-11-03 17:08:56 -08001124 if (u->af == AF_INET6 && (u->netmask < 1 || u->netmask > 128)) {
1125 ret = -EINVAL;
1126 goto out_err;
Julius Volzf94fd042008-09-02 15:55:55 +02001127 }
1128#endif
1129
Simon Hormandee06e42010-08-26 02:54:31 +00001130 svc = kzalloc(sizeof(struct ip_vs_service), GFP_KERNEL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001131 if (svc == NULL) {
Hannes Eder1e3e2382009-08-02 11:05:41 +00001132 IP_VS_DBG(1, "%s(): no memory\n", __func__);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001133 ret = -ENOMEM;
1134 goto out_err;
1135 }
Hans Schillstromb17fc992011-01-03 14:44:56 +01001136 svc->stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats);
1137 if (!svc->stats.cpustats) {
1138 pr_err("%s() alloc_percpu failed\n", __func__);
1139 goto out_err;
1140 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001141
1142 /* I'm the first user of the service */
Julian Anastasov26c15cf2010-09-21 18:12:30 +02001143 atomic_set(&svc->usecnt, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001144 atomic_set(&svc->refcnt, 0);
1145
Julius Volzc860c6b2008-09-02 15:55:36 +02001146 svc->af = u->af;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001147 svc->protocol = u->protocol;
Julius Volzc860c6b2008-09-02 15:55:36 +02001148 ip_vs_addr_copy(svc->af, &svc->addr, &u->addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001149 svc->port = u->port;
1150 svc->fwmark = u->fwmark;
1151 svc->flags = u->flags;
1152 svc->timeout = u->timeout * HZ;
1153 svc->netmask = u->netmask;
Hans Schillstromfc723252011-01-03 14:44:43 +01001154 svc->net = net;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001155
1156 INIT_LIST_HEAD(&svc->destinations);
1157 rwlock_init(&svc->sched_lock);
1158 spin_lock_init(&svc->stats.lock);
1159
1160 /* Bind the scheduler */
1161 ret = ip_vs_bind_scheduler(svc, sched);
1162 if (ret)
1163 goto out_err;
1164 sched = NULL;
1165
Simon Horman0d1e71b2010-08-22 21:37:54 +09001166 /* Bind the ct retriever */
1167 ip_vs_bind_pe(svc, pe);
1168 pe = NULL;
1169
Linus Torvalds1da177e2005-04-16 15:20:36 -07001170 /* Update the virtual service counters */
1171 if (svc->port == FTPPORT)
Hans Schillstrom763f8d02011-01-03 14:45:01 +01001172 atomic_inc(&ipvs->ftpsvc_counter);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001173 else if (svc->port == 0)
Hans Schillstrom763f8d02011-01-03 14:45:01 +01001174 atomic_inc(&ipvs->nullsvc_counter);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001175
Hans Schillstrom29c20262011-01-03 14:44:54 +01001176 ip_vs_new_estimator(net, &svc->stats);
Julius Volzf94fd042008-09-02 15:55:55 +02001177
1178 /* Count only IPv4 services for old get/setsockopt interface */
1179 if (svc->af == AF_INET)
Hans Schillstroma0840e22011-01-03 14:44:58 +01001180 ipvs->num_services++;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001181
1182 /* Hash the service into the service table */
1183 write_lock_bh(&__ip_vs_svc_lock);
1184 ip_vs_svc_hash(svc);
1185 write_unlock_bh(&__ip_vs_svc_lock);
1186
1187 *svc_p = svc;
1188 return 0;
1189
Hans Schillstromb17fc992011-01-03 14:44:56 +01001190
Simon Horman6e08bfb2010-08-22 21:37:52 +09001191 out_err:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001192 if (svc != NULL) {
Simon Horman2fabf352010-08-22 21:37:52 +09001193 ip_vs_unbind_scheduler(svc);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001194 if (svc->inc) {
1195 local_bh_disable();
1196 ip_vs_app_inc_put(svc->inc);
1197 local_bh_enable();
1198 }
Hans Schillstromb17fc992011-01-03 14:44:56 +01001199 if (svc->stats.cpustats)
1200 free_percpu(svc->stats.cpustats);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001201 kfree(svc);
1202 }
1203 ip_vs_scheduler_put(sched);
Simon Horman0d1e71b2010-08-22 21:37:54 +09001204 ip_vs_pe_put(pe);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001205
Linus Torvalds1da177e2005-04-16 15:20:36 -07001206 /* decrease the module use count */
1207 ip_vs_use_count_dec();
1208
1209 return ret;
1210}
1211
1212
1213/*
1214 * Edit a service and bind it with a new scheduler
1215 */
1216static int
Julius Volzc860c6b2008-09-02 15:55:36 +02001217ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001218{
1219 struct ip_vs_scheduler *sched, *old_sched;
Simon Horman0d1e71b2010-08-22 21:37:54 +09001220 struct ip_vs_pe *pe = NULL, *old_pe = NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001221 int ret = 0;
1222
1223 /*
1224 * Lookup the scheduler, by 'u->sched_name'
1225 */
1226 sched = ip_vs_scheduler_get(u->sched_name);
1227 if (sched == NULL) {
Hannes Eder1e3e2382009-08-02 11:05:41 +00001228 pr_info("Scheduler module ip_vs_%s not found\n", u->sched_name);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001229 return -ENOENT;
1230 }
1231 old_sched = sched;
1232
Simon Horman0d1e71b2010-08-22 21:37:54 +09001233 if (u->pe_name && *u->pe_name) {
Simon Hormane9e5eee2010-11-08 20:05:57 +09001234 pe = ip_vs_pe_getbyname(u->pe_name);
Simon Horman0d1e71b2010-08-22 21:37:54 +09001235 if (pe == NULL) {
1236 pr_info("persistence engine module ip_vs_pe_%s "
1237 "not found\n", u->pe_name);
1238 ret = -ENOENT;
1239 goto out;
1240 }
1241 old_pe = pe;
1242 }
1243
Julius Volzf94fd042008-09-02 15:55:55 +02001244#ifdef CONFIG_IP_VS_IPV6
Julius Volz48148932008-11-03 17:08:56 -08001245 if (u->af == AF_INET6 && (u->netmask < 1 || u->netmask > 128)) {
1246 ret = -EINVAL;
1247 goto out;
Julius Volzf94fd042008-09-02 15:55:55 +02001248 }
1249#endif
1250
Linus Torvalds1da177e2005-04-16 15:20:36 -07001251 write_lock_bh(&__ip_vs_svc_lock);
1252
1253 /*
1254 * Wait until all other svc users go away.
1255 */
Julian Anastasov26c15cf2010-09-21 18:12:30 +02001256 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001257
1258 /*
1259 * Set the flags and timeout value
1260 */
1261 svc->flags = u->flags | IP_VS_SVC_F_HASHED;
1262 svc->timeout = u->timeout * HZ;
1263 svc->netmask = u->netmask;
1264
1265 old_sched = svc->scheduler;
1266 if (sched != old_sched) {
1267 /*
1268 * Unbind the old scheduler
1269 */
1270 if ((ret = ip_vs_unbind_scheduler(svc))) {
1271 old_sched = sched;
Simon Horman9e691ed2008-09-17 10:10:41 +10001272 goto out_unlock;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001273 }
1274
1275 /*
1276 * Bind the new scheduler
1277 */
1278 if ((ret = ip_vs_bind_scheduler(svc, sched))) {
1279 /*
1280 * If ip_vs_bind_scheduler fails, restore the old
1281 * scheduler.
1282 * The main reason of failure is out of memory.
1283 *
1284 * The question is if the old scheduler can be
1285 * restored all the time. TODO: if it cannot be
1286 * restored some time, we must delete the service,
1287 * otherwise the system may crash.
1288 */
1289 ip_vs_bind_scheduler(svc, old_sched);
1290 old_sched = sched;
Simon Horman9e691ed2008-09-17 10:10:41 +10001291 goto out_unlock;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001292 }
1293 }
1294
Simon Horman0d1e71b2010-08-22 21:37:54 +09001295 old_pe = svc->pe;
1296 if (pe != old_pe) {
1297 ip_vs_unbind_pe(svc);
1298 ip_vs_bind_pe(svc, pe);
1299 }
1300
Simon Horman9e691ed2008-09-17 10:10:41 +10001301 out_unlock:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001302 write_unlock_bh(&__ip_vs_svc_lock);
Simon Horman9e691ed2008-09-17 10:10:41 +10001303 out:
Simon Horman6e08bfb2010-08-22 21:37:52 +09001304 ip_vs_scheduler_put(old_sched);
Simon Horman0d1e71b2010-08-22 21:37:54 +09001305 ip_vs_pe_put(old_pe);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001306 return ret;
1307}
1308
1309
1310/*
1311 * Delete a service from the service list
1312 * - The service must be unlinked, unlocked and not referenced!
1313 * - We are called under _bh lock
1314 */
1315static void __ip_vs_del_service(struct ip_vs_service *svc)
1316{
1317 struct ip_vs_dest *dest, *nxt;
1318 struct ip_vs_scheduler *old_sched;
Simon Horman0d1e71b2010-08-22 21:37:54 +09001319 struct ip_vs_pe *old_pe;
Hans Schillstroma0840e22011-01-03 14:44:58 +01001320 struct netns_ipvs *ipvs = net_ipvs(svc->net);
Simon Horman0d1e71b2010-08-22 21:37:54 +09001321
1322 pr_info("%s: enter\n", __func__);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001323
Julius Volzf94fd042008-09-02 15:55:55 +02001324 /* Count only IPv4 services for old get/setsockopt interface */
1325 if (svc->af == AF_INET)
Hans Schillstroma0840e22011-01-03 14:44:58 +01001326 ipvs->num_services--;
Julius Volzf94fd042008-09-02 15:55:55 +02001327
Hans Schillstrom29c20262011-01-03 14:44:54 +01001328 ip_vs_kill_estimator(svc->net, &svc->stats);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001329
1330 /* Unbind scheduler */
1331 old_sched = svc->scheduler;
1332 ip_vs_unbind_scheduler(svc);
Simon Horman6e08bfb2010-08-22 21:37:52 +09001333 ip_vs_scheduler_put(old_sched);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001334
Simon Horman0d1e71b2010-08-22 21:37:54 +09001335 /* Unbind persistence engine */
1336 old_pe = svc->pe;
1337 ip_vs_unbind_pe(svc);
1338 ip_vs_pe_put(old_pe);
1339
Linus Torvalds1da177e2005-04-16 15:20:36 -07001340 /* Unbind app inc */
1341 if (svc->inc) {
1342 ip_vs_app_inc_put(svc->inc);
1343 svc->inc = NULL;
1344 }
1345
1346 /*
1347 * Unlink the whole destination list
1348 */
1349 list_for_each_entry_safe(dest, nxt, &svc->destinations, n_list) {
1350 __ip_vs_unlink_dest(svc, dest, 0);
Hans Schillstrom29c20262011-01-03 14:44:54 +01001351 __ip_vs_del_dest(svc->net, dest);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001352 }
1353
1354 /*
1355 * Update the virtual service counters
1356 */
1357 if (svc->port == FTPPORT)
Hans Schillstrom763f8d02011-01-03 14:45:01 +01001358 atomic_dec(&ipvs->ftpsvc_counter);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001359 else if (svc->port == 0)
Hans Schillstrom763f8d02011-01-03 14:45:01 +01001360 atomic_dec(&ipvs->nullsvc_counter);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001361
1362 /*
1363 * Free the service if nobody refers to it
1364 */
Julian Anastasov26c15cf2010-09-21 18:12:30 +02001365 if (atomic_read(&svc->refcnt) == 0) {
1366 IP_VS_DBG_BUF(3, "Removing service %u/%s:%u usecnt=%d\n",
1367 svc->fwmark,
1368 IP_VS_DBG_ADDR(svc->af, &svc->addr),
1369 ntohs(svc->port), atomic_read(&svc->usecnt));
Hans Schillstromb17fc992011-01-03 14:44:56 +01001370 free_percpu(svc->stats.cpustats);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001371 kfree(svc);
Julian Anastasov26c15cf2010-09-21 18:12:30 +02001372 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001373
1374 /* decrease the module use count */
1375 ip_vs_use_count_dec();
1376}
1377
1378/*
Julian Anastasov26c15cf2010-09-21 18:12:30 +02001379 * Unlink a service from list and try to delete it if its refcnt reached 0
Linus Torvalds1da177e2005-04-16 15:20:36 -07001380 */
Julian Anastasov26c15cf2010-09-21 18:12:30 +02001381static void ip_vs_unlink_service(struct ip_vs_service *svc)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001382{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001383 /*
1384 * Unhash it from the service table
1385 */
1386 write_lock_bh(&__ip_vs_svc_lock);
1387
1388 ip_vs_svc_unhash(svc);
1389
1390 /*
1391 * Wait until all the svc users go away.
1392 */
Julian Anastasov26c15cf2010-09-21 18:12:30 +02001393 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001394
1395 __ip_vs_del_service(svc);
1396
1397 write_unlock_bh(&__ip_vs_svc_lock);
Julian Anastasov26c15cf2010-09-21 18:12:30 +02001398}
1399
1400/*
1401 * Delete a service from the service list
1402 */
1403static int ip_vs_del_service(struct ip_vs_service *svc)
1404{
1405 if (svc == NULL)
1406 return -EEXIST;
1407 ip_vs_unlink_service(svc);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001408
1409 return 0;
1410}
1411
1412
1413/*
1414 * Flush all the virtual services
1415 */
Hans Schillstromfc723252011-01-03 14:44:43 +01001416static int ip_vs_flush(struct net *net)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001417{
1418 int idx;
1419 struct ip_vs_service *svc, *nxt;
1420
1421 /*
Hans Schillstromfc723252011-01-03 14:44:43 +01001422 * Flush the service table hashed by <netns,protocol,addr,port>
Linus Torvalds1da177e2005-04-16 15:20:36 -07001423 */
1424 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
Hans Schillstromfc723252011-01-03 14:44:43 +01001425 list_for_each_entry_safe(svc, nxt, &ip_vs_svc_table[idx],
1426 s_list) {
1427 if (net_eq(svc->net, net))
1428 ip_vs_unlink_service(svc);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001429 }
1430 }
1431
1432 /*
1433 * Flush the service table hashed by fwmark
1434 */
1435 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1436 list_for_each_entry_safe(svc, nxt,
1437 &ip_vs_svc_fwm_table[idx], f_list) {
Hans Schillstromfc723252011-01-03 14:44:43 +01001438 if (net_eq(svc->net, net))
1439 ip_vs_unlink_service(svc);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001440 }
1441 }
1442
1443 return 0;
1444}
1445
1446
1447/*
1448 * Zero counters in a service or all services
1449 */
1450static int ip_vs_zero_service(struct ip_vs_service *svc)
1451{
1452 struct ip_vs_dest *dest;
1453
1454 write_lock_bh(&__ip_vs_svc_lock);
1455 list_for_each_entry(dest, &svc->destinations, n_list) {
1456 ip_vs_zero_stats(&dest->stats);
1457 }
1458 ip_vs_zero_stats(&svc->stats);
1459 write_unlock_bh(&__ip_vs_svc_lock);
1460 return 0;
1461}
1462
Hans Schillstromfc723252011-01-03 14:44:43 +01001463static int ip_vs_zero_all(struct net *net)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001464{
1465 int idx;
1466 struct ip_vs_service *svc;
1467
1468 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1469 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
Hans Schillstromfc723252011-01-03 14:44:43 +01001470 if (net_eq(svc->net, net))
1471 ip_vs_zero_service(svc);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001472 }
1473 }
1474
1475 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1476 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
Hans Schillstromfc723252011-01-03 14:44:43 +01001477 if (net_eq(svc->net, net))
1478 ip_vs_zero_service(svc);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001479 }
1480 }
1481
Hans Schillstromb17fc992011-01-03 14:44:56 +01001482 ip_vs_zero_stats(net_ipvs(net)->tot_stats);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001483 return 0;
1484}
1485
1486
1487static int
Alexey Dobriyan8d65af72009-09-23 15:57:19 -07001488proc_do_defense_mode(ctl_table *table, int write,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001489 void __user *buffer, size_t *lenp, loff_t *ppos)
1490{
Hans Schillstrom93304192011-01-03 14:44:51 +01001491 struct net *net = current->nsproxy->net_ns;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001492 int *valp = table->data;
1493 int val = *valp;
1494 int rc;
1495
Alexey Dobriyan8d65af72009-09-23 15:57:19 -07001496 rc = proc_dointvec(table, write, buffer, lenp, ppos);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001497 if (write && (*valp != val)) {
1498 if ((*valp < 0) || (*valp > 3)) {
1499 /* Restore the correct value */
1500 *valp = val;
1501 } else {
Hans Schillstrom93304192011-01-03 14:44:51 +01001502 update_defense_level(net_ipvs(net));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001503 }
1504 }
1505 return rc;
1506}
1507
1508
1509static int
Alexey Dobriyan8d65af72009-09-23 15:57:19 -07001510proc_do_sync_threshold(ctl_table *table, int write,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001511 void __user *buffer, size_t *lenp, loff_t *ppos)
1512{
1513 int *valp = table->data;
1514 int val[2];
1515 int rc;
1516
1517 /* backup the value first */
1518 memcpy(val, valp, sizeof(val));
1519
Alexey Dobriyan8d65af72009-09-23 15:57:19 -07001520 rc = proc_dointvec(table, write, buffer, lenp, ppos);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001521 if (write && (valp[0] < 0 || valp[1] < 0 || valp[0] >= valp[1])) {
1522 /* Restore the correct value */
1523 memcpy(valp, val, sizeof(val));
1524 }
1525 return rc;
1526}
1527
Hans Schillstromb880c1f2010-11-19 14:25:14 +01001528static int
1529proc_do_sync_mode(ctl_table *table, int write,
1530 void __user *buffer, size_t *lenp, loff_t *ppos)
1531{
1532 int *valp = table->data;
1533 int val = *valp;
1534 int rc;
1535
1536 rc = proc_dointvec(table, write, buffer, lenp, ppos);
1537 if (write && (*valp != val)) {
1538 if ((*valp < 0) || (*valp > 1)) {
1539 /* Restore the correct value */
1540 *valp = val;
1541 } else {
Hans Schillstromf1313152011-01-03 14:44:55 +01001542 struct net *net = current->nsproxy->net_ns;
1543 ip_vs_sync_switch_mode(net, val);
Hans Schillstromb880c1f2010-11-19 14:25:14 +01001544 }
1545 }
1546 return rc;
1547}
Linus Torvalds1da177e2005-04-16 15:20:36 -07001548
1549/*
1550 * IPVS sysctl table (under the /proc/sys/net/ipv4/vs/)
Hans Schillstroma0840e22011-01-03 14:44:58 +01001551 * Do not change order or insert new entries without
1552 * align with netns init in __ip_vs_control_init()
Linus Torvalds1da177e2005-04-16 15:20:36 -07001553 */
1554
1555static struct ctl_table vs_vars[] = {
1556 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001557 .procname = "amemthresh",
Hans Schillstroma0840e22011-01-03 14:44:58 +01001558 .maxlen = sizeof(int),
1559 .mode = 0644,
1560 .proc_handler = proc_dointvec,
1561 },
1562 {
1563 .procname = "am_droprate",
1564 .maxlen = sizeof(int),
1565 .mode = 0644,
1566 .proc_handler = proc_dointvec,
1567 },
1568 {
1569 .procname = "drop_entry",
1570 .maxlen = sizeof(int),
1571 .mode = 0644,
1572 .proc_handler = proc_do_defense_mode,
1573 },
1574 {
1575 .procname = "drop_packet",
1576 .maxlen = sizeof(int),
1577 .mode = 0644,
1578 .proc_handler = proc_do_defense_mode,
1579 },
1580#ifdef CONFIG_IP_VS_NFCT
1581 {
1582 .procname = "conntrack",
1583 .maxlen = sizeof(int),
1584 .mode = 0644,
1585 .proc_handler = &proc_dointvec,
1586 },
1587#endif
1588 {
1589 .procname = "secure_tcp",
1590 .maxlen = sizeof(int),
1591 .mode = 0644,
1592 .proc_handler = proc_do_defense_mode,
1593 },
1594 {
1595 .procname = "snat_reroute",
1596 .maxlen = sizeof(int),
1597 .mode = 0644,
1598 .proc_handler = &proc_dointvec,
1599 },
1600 {
1601 .procname = "sync_version",
1602 .maxlen = sizeof(int),
1603 .mode = 0644,
1604 .proc_handler = &proc_do_sync_mode,
1605 },
1606 {
1607 .procname = "cache_bypass",
1608 .maxlen = sizeof(int),
1609 .mode = 0644,
1610 .proc_handler = proc_dointvec,
1611 },
1612 {
1613 .procname = "expire_nodest_conn",
1614 .maxlen = sizeof(int),
1615 .mode = 0644,
1616 .proc_handler = proc_dointvec,
1617 },
1618 {
1619 .procname = "expire_quiescent_template",
1620 .maxlen = sizeof(int),
1621 .mode = 0644,
1622 .proc_handler = proc_dointvec,
1623 },
1624 {
1625 .procname = "sync_threshold",
1626 .maxlen =
1627 sizeof(((struct netns_ipvs *)0)->sysctl_sync_threshold),
1628 .mode = 0644,
1629 .proc_handler = proc_do_sync_threshold,
1630 },
1631 {
1632 .procname = "nat_icmp_send",
Linus Torvalds1da177e2005-04-16 15:20:36 -07001633 .maxlen = sizeof(int),
1634 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08001635 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001636 },
1637#ifdef CONFIG_IP_VS_DEBUG
1638 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001639 .procname = "debug_level",
1640 .data = &sysctl_ip_vs_debug_level,
1641 .maxlen = sizeof(int),
1642 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08001643 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001644 },
1645#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -07001646#if 0
1647 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001648 .procname = "timeout_established",
1649 .data = &vs_timeout_table_dos.timeout[IP_VS_S_ESTABLISHED],
1650 .maxlen = sizeof(int),
1651 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08001652 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001653 },
1654 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001655 .procname = "timeout_synsent",
1656 .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_SENT],
1657 .maxlen = sizeof(int),
1658 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08001659 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001660 },
1661 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001662 .procname = "timeout_synrecv",
1663 .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_RECV],
1664 .maxlen = sizeof(int),
1665 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08001666 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001667 },
1668 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001669 .procname = "timeout_finwait",
1670 .data = &vs_timeout_table_dos.timeout[IP_VS_S_FIN_WAIT],
1671 .maxlen = sizeof(int),
1672 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08001673 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001674 },
1675 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001676 .procname = "timeout_timewait",
1677 .data = &vs_timeout_table_dos.timeout[IP_VS_S_TIME_WAIT],
1678 .maxlen = sizeof(int),
1679 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08001680 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001681 },
1682 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001683 .procname = "timeout_close",
1684 .data = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE],
1685 .maxlen = sizeof(int),
1686 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08001687 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001688 },
1689 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001690 .procname = "timeout_closewait",
1691 .data = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE_WAIT],
1692 .maxlen = sizeof(int),
1693 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08001694 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001695 },
1696 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001697 .procname = "timeout_lastack",
1698 .data = &vs_timeout_table_dos.timeout[IP_VS_S_LAST_ACK],
1699 .maxlen = sizeof(int),
1700 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08001701 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001702 },
1703 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001704 .procname = "timeout_listen",
1705 .data = &vs_timeout_table_dos.timeout[IP_VS_S_LISTEN],
1706 .maxlen = sizeof(int),
1707 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08001708 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001709 },
1710 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001711 .procname = "timeout_synack",
1712 .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYNACK],
1713 .maxlen = sizeof(int),
1714 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08001715 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001716 },
1717 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001718 .procname = "timeout_udp",
1719 .data = &vs_timeout_table_dos.timeout[IP_VS_S_UDP],
1720 .maxlen = sizeof(int),
1721 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08001722 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001723 },
1724 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001725 .procname = "timeout_icmp",
1726 .data = &vs_timeout_table_dos.timeout[IP_VS_S_ICMP],
1727 .maxlen = sizeof(int),
1728 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08001729 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001730 },
1731#endif
Eric W. Biedermanf8572d82009-11-05 13:32:03 -08001732 { }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001733};
1734
Sven Wegener5587da52008-08-10 18:24:40 +00001735const struct ctl_path net_vs_ctl_path[] = {
Eric W. Biedermanf8572d82009-11-05 13:32:03 -08001736 { .procname = "net", },
1737 { .procname = "ipv4", },
Pavel Emelyanov90754f82008-01-12 02:33:50 -08001738 { .procname = "vs", },
1739 { }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001740};
Pavel Emelyanov90754f82008-01-12 02:33:50 -08001741EXPORT_SYMBOL_GPL(net_vs_ctl_path);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001742
Linus Torvalds1da177e2005-04-16 15:20:36 -07001743#ifdef CONFIG_PROC_FS
1744
1745struct ip_vs_iter {
Hans Schillstromfc723252011-01-03 14:44:43 +01001746 struct seq_net_private p; /* Do not move this, netns depends upon it*/
Linus Torvalds1da177e2005-04-16 15:20:36 -07001747 struct list_head *table;
1748 int bucket;
1749};
1750
1751/*
1752 * Write the contents of the VS rule table to a PROCfs file.
1753 * (It is kept just for backward compatibility)
1754 */
1755static inline const char *ip_vs_fwd_name(unsigned flags)
1756{
1757 switch (flags & IP_VS_CONN_F_FWD_MASK) {
1758 case IP_VS_CONN_F_LOCALNODE:
1759 return "Local";
1760 case IP_VS_CONN_F_TUNNEL:
1761 return "Tunnel";
1762 case IP_VS_CONN_F_DROUTE:
1763 return "Route";
1764 default:
1765 return "Masq";
1766 }
1767}
1768
1769
1770/* Get the Nth entry in the two lists */
1771static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos)
1772{
Hans Schillstromfc723252011-01-03 14:44:43 +01001773 struct net *net = seq_file_net(seq);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001774 struct ip_vs_iter *iter = seq->private;
1775 int idx;
1776 struct ip_vs_service *svc;
1777
1778 /* look in hash by protocol */
1779 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1780 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
Hans Schillstromfc723252011-01-03 14:44:43 +01001781 if (net_eq(svc->net, net) && pos-- == 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001782 iter->table = ip_vs_svc_table;
1783 iter->bucket = idx;
1784 return svc;
1785 }
1786 }
1787 }
1788
1789 /* keep looking in fwmark */
1790 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1791 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
Hans Schillstromfc723252011-01-03 14:44:43 +01001792 if (net_eq(svc->net, net) && pos-- == 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001793 iter->table = ip_vs_svc_fwm_table;
1794 iter->bucket = idx;
1795 return svc;
1796 }
1797 }
1798 }
1799
1800 return NULL;
1801}
1802
1803static void *ip_vs_info_seq_start(struct seq_file *seq, loff_t *pos)
Simon Horman563e94f2008-09-17 10:10:42 +10001804__acquires(__ip_vs_svc_lock)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001805{
1806
1807 read_lock_bh(&__ip_vs_svc_lock);
1808 return *pos ? ip_vs_info_array(seq, *pos - 1) : SEQ_START_TOKEN;
1809}
1810
1811
1812static void *ip_vs_info_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1813{
1814 struct list_head *e;
1815 struct ip_vs_iter *iter;
1816 struct ip_vs_service *svc;
1817
1818 ++*pos;
1819 if (v == SEQ_START_TOKEN)
1820 return ip_vs_info_array(seq,0);
1821
1822 svc = v;
1823 iter = seq->private;
1824
1825 if (iter->table == ip_vs_svc_table) {
1826 /* next service in table hashed by protocol */
1827 if ((e = svc->s_list.next) != &ip_vs_svc_table[iter->bucket])
1828 return list_entry(e, struct ip_vs_service, s_list);
1829
1830
1831 while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
1832 list_for_each_entry(svc,&ip_vs_svc_table[iter->bucket],
1833 s_list) {
1834 return svc;
1835 }
1836 }
1837
1838 iter->table = ip_vs_svc_fwm_table;
1839 iter->bucket = -1;
1840 goto scan_fwmark;
1841 }
1842
1843 /* next service in hashed by fwmark */
1844 if ((e = svc->f_list.next) != &ip_vs_svc_fwm_table[iter->bucket])
1845 return list_entry(e, struct ip_vs_service, f_list);
1846
1847 scan_fwmark:
1848 while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
1849 list_for_each_entry(svc, &ip_vs_svc_fwm_table[iter->bucket],
1850 f_list)
1851 return svc;
1852 }
1853
1854 return NULL;
1855}
1856
1857static void ip_vs_info_seq_stop(struct seq_file *seq, void *v)
Simon Horman563e94f2008-09-17 10:10:42 +10001858__releases(__ip_vs_svc_lock)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001859{
1860 read_unlock_bh(&__ip_vs_svc_lock);
1861}
1862
1863
1864static int ip_vs_info_seq_show(struct seq_file *seq, void *v)
1865{
1866 if (v == SEQ_START_TOKEN) {
1867 seq_printf(seq,
1868 "IP Virtual Server version %d.%d.%d (size=%d)\n",
Catalin(ux) M. BOIE6f7edb42010-01-05 05:50:24 +01001869 NVERSION(IP_VS_VERSION_CODE), ip_vs_conn_tab_size);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001870 seq_puts(seq,
1871 "Prot LocalAddress:Port Scheduler Flags\n");
1872 seq_puts(seq,
1873 " -> RemoteAddress:Port Forward Weight ActiveConn InActConn\n");
1874 } else {
1875 const struct ip_vs_service *svc = v;
1876 const struct ip_vs_iter *iter = seq->private;
1877 const struct ip_vs_dest *dest;
1878
Vince Busam667a5f12008-09-02 15:55:49 +02001879 if (iter->table == ip_vs_svc_table) {
1880#ifdef CONFIG_IP_VS_IPV6
1881 if (svc->af == AF_INET6)
Harvey Harrison5b095d9892008-10-29 12:52:50 -07001882 seq_printf(seq, "%s [%pI6]:%04X %s ",
Vince Busam667a5f12008-09-02 15:55:49 +02001883 ip_vs_proto_name(svc->protocol),
Harvey Harrison38ff4fa2008-10-28 16:08:13 -07001884 &svc->addr.in6,
Vince Busam667a5f12008-09-02 15:55:49 +02001885 ntohs(svc->port),
1886 svc->scheduler->name);
1887 else
1888#endif
Nick Chalk26ec0372010-06-22 08:07:01 +02001889 seq_printf(seq, "%s %08X:%04X %s %s ",
Vince Busam667a5f12008-09-02 15:55:49 +02001890 ip_vs_proto_name(svc->protocol),
1891 ntohl(svc->addr.ip),
1892 ntohs(svc->port),
Nick Chalk26ec0372010-06-22 08:07:01 +02001893 svc->scheduler->name,
1894 (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":"");
Vince Busam667a5f12008-09-02 15:55:49 +02001895 } else {
Nick Chalk26ec0372010-06-22 08:07:01 +02001896 seq_printf(seq, "FWM %08X %s %s",
1897 svc->fwmark, svc->scheduler->name,
1898 (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":"");
Vince Busam667a5f12008-09-02 15:55:49 +02001899 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001900
1901 if (svc->flags & IP_VS_SVC_F_PERSISTENT)
1902 seq_printf(seq, "persistent %d %08X\n",
1903 svc->timeout,
1904 ntohl(svc->netmask));
1905 else
1906 seq_putc(seq, '\n');
1907
1908 list_for_each_entry(dest, &svc->destinations, n_list) {
Vince Busam667a5f12008-09-02 15:55:49 +02001909#ifdef CONFIG_IP_VS_IPV6
1910 if (dest->af == AF_INET6)
1911 seq_printf(seq,
Harvey Harrison5b095d9892008-10-29 12:52:50 -07001912 " -> [%pI6]:%04X"
Vince Busam667a5f12008-09-02 15:55:49 +02001913 " %-7s %-6d %-10d %-10d\n",
Harvey Harrison38ff4fa2008-10-28 16:08:13 -07001914 &dest->addr.in6,
Vince Busam667a5f12008-09-02 15:55:49 +02001915 ntohs(dest->port),
1916 ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
1917 atomic_read(&dest->weight),
1918 atomic_read(&dest->activeconns),
1919 atomic_read(&dest->inactconns));
1920 else
1921#endif
1922 seq_printf(seq,
1923 " -> %08X:%04X "
1924 "%-7s %-6d %-10d %-10d\n",
1925 ntohl(dest->addr.ip),
1926 ntohs(dest->port),
1927 ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
1928 atomic_read(&dest->weight),
1929 atomic_read(&dest->activeconns),
1930 atomic_read(&dest->inactconns));
1931
Linus Torvalds1da177e2005-04-16 15:20:36 -07001932 }
1933 }
1934 return 0;
1935}
1936
Philippe De Muyter56b3d972007-07-10 23:07:31 -07001937static const struct seq_operations ip_vs_info_seq_ops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001938 .start = ip_vs_info_seq_start,
1939 .next = ip_vs_info_seq_next,
1940 .stop = ip_vs_info_seq_stop,
1941 .show = ip_vs_info_seq_show,
1942};
1943
1944static int ip_vs_info_open(struct inode *inode, struct file *file)
1945{
Hans Schillstromfc723252011-01-03 14:44:43 +01001946 return seq_open_net(inode, file, &ip_vs_info_seq_ops,
Pavel Emelyanovcf7732e2007-10-10 02:29:29 -07001947 sizeof(struct ip_vs_iter));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001948}
1949
Arjan van de Ven9a321442007-02-12 00:55:35 -08001950static const struct file_operations ip_vs_info_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001951 .owner = THIS_MODULE,
1952 .open = ip_vs_info_open,
1953 .read = seq_read,
1954 .llseek = seq_lseek,
1955 .release = seq_release_private,
1956};
1957
1958#endif
1959
Linus Torvalds1da177e2005-04-16 15:20:36 -07001960#ifdef CONFIG_PROC_FS
1961static int ip_vs_stats_show(struct seq_file *seq, void *v)
1962{
Hans Schillstromb17fc992011-01-03 14:44:56 +01001963 struct net *net = seq_file_single_net(seq);
1964 struct ip_vs_stats *tot_stats = net_ipvs(net)->tot_stats;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001965
1966/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
1967 seq_puts(seq,
1968 " Total Incoming Outgoing Incoming Outgoing\n");
1969 seq_printf(seq,
1970 " Conns Packets Packets Bytes Bytes\n");
1971
Hans Schillstromb17fc992011-01-03 14:44:56 +01001972 spin_lock_bh(&tot_stats->lock);
1973 seq_printf(seq, "%8X %8X %8X %16LX %16LX\n\n", tot_stats->ustats.conns,
1974 tot_stats->ustats.inpkts, tot_stats->ustats.outpkts,
1975 (unsigned long long) tot_stats->ustats.inbytes,
1976 (unsigned long long) tot_stats->ustats.outbytes);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001977
1978/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
1979 seq_puts(seq,
1980 " Conns/s Pkts/s Pkts/s Bytes/s Bytes/s\n");
1981 seq_printf(seq,"%8X %8X %8X %16X %16X\n",
Hans Schillstromb17fc992011-01-03 14:44:56 +01001982 tot_stats->ustats.cps,
1983 tot_stats->ustats.inpps,
1984 tot_stats->ustats.outpps,
1985 tot_stats->ustats.inbps,
1986 tot_stats->ustats.outbps);
1987 spin_unlock_bh(&tot_stats->lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001988
1989 return 0;
1990}
1991
1992static int ip_vs_stats_seq_open(struct inode *inode, struct file *file)
1993{
Hans Schillstromfc723252011-01-03 14:44:43 +01001994 return single_open_net(inode, file, ip_vs_stats_show);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001995}
1996
Arjan van de Ven9a321442007-02-12 00:55:35 -08001997static const struct file_operations ip_vs_stats_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001998 .owner = THIS_MODULE,
1999 .open = ip_vs_stats_seq_open,
2000 .read = seq_read,
2001 .llseek = seq_lseek,
2002 .release = single_release,
2003};
2004
Hans Schillstromb17fc992011-01-03 14:44:56 +01002005static int ip_vs_stats_percpu_show(struct seq_file *seq, void *v)
2006{
2007 struct net *net = seq_file_single_net(seq);
2008 struct ip_vs_stats *tot_stats = net_ipvs(net)->tot_stats;
2009 int i;
2010
2011/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
2012 seq_puts(seq,
2013 " Total Incoming Outgoing Incoming Outgoing\n");
2014 seq_printf(seq,
2015 "CPU Conns Packets Packets Bytes Bytes\n");
2016
2017 for_each_possible_cpu(i) {
2018 struct ip_vs_cpu_stats *u = per_cpu_ptr(net->ipvs->cpustats, i);
2019 seq_printf(seq, "%3X %8X %8X %8X %16LX %16LX\n",
2020 i, u->ustats.conns, u->ustats.inpkts,
2021 u->ustats.outpkts, (__u64)u->ustats.inbytes,
2022 (__u64)u->ustats.outbytes);
2023 }
2024
2025 spin_lock_bh(&tot_stats->lock);
2026 seq_printf(seq, " ~ %8X %8X %8X %16LX %16LX\n\n",
2027 tot_stats->ustats.conns, tot_stats->ustats.inpkts,
2028 tot_stats->ustats.outpkts,
2029 (unsigned long long) tot_stats->ustats.inbytes,
2030 (unsigned long long) tot_stats->ustats.outbytes);
2031
2032/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
2033 seq_puts(seq,
2034 " Conns/s Pkts/s Pkts/s Bytes/s Bytes/s\n");
2035 seq_printf(seq, " %8X %8X %8X %16X %16X\n",
2036 tot_stats->ustats.cps,
2037 tot_stats->ustats.inpps,
2038 tot_stats->ustats.outpps,
2039 tot_stats->ustats.inbps,
2040 tot_stats->ustats.outbps);
2041 spin_unlock_bh(&tot_stats->lock);
2042
2043 return 0;
2044}
2045
2046static int ip_vs_stats_percpu_seq_open(struct inode *inode, struct file *file)
2047{
2048 return single_open_net(inode, file, ip_vs_stats_percpu_show);
2049}
2050
2051static const struct file_operations ip_vs_stats_percpu_fops = {
2052 .owner = THIS_MODULE,
2053 .open = ip_vs_stats_percpu_seq_open,
2054 .read = seq_read,
2055 .llseek = seq_lseek,
2056 .release = single_release,
2057};
Linus Torvalds1da177e2005-04-16 15:20:36 -07002058#endif
2059
2060/*
2061 * Set timeout values for tcp tcpfin udp in the timeout_table.
2062 */
Hans Schillstrom93304192011-01-03 14:44:51 +01002063static int ip_vs_set_timeout(struct net *net, struct ip_vs_timeout_user *u)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002064{
Hans Schillstrom93304192011-01-03 14:44:51 +01002065 struct ip_vs_proto_data *pd;
2066
Linus Torvalds1da177e2005-04-16 15:20:36 -07002067 IP_VS_DBG(2, "Setting timeout tcp:%d tcpfin:%d udp:%d\n",
2068 u->tcp_timeout,
2069 u->tcp_fin_timeout,
2070 u->udp_timeout);
2071
2072#ifdef CONFIG_IP_VS_PROTO_TCP
2073 if (u->tcp_timeout) {
Hans Schillstrom93304192011-01-03 14:44:51 +01002074 pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
2075 pd->timeout_table[IP_VS_TCP_S_ESTABLISHED]
Linus Torvalds1da177e2005-04-16 15:20:36 -07002076 = u->tcp_timeout * HZ;
2077 }
2078
2079 if (u->tcp_fin_timeout) {
Hans Schillstrom93304192011-01-03 14:44:51 +01002080 pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
2081 pd->timeout_table[IP_VS_TCP_S_FIN_WAIT]
Linus Torvalds1da177e2005-04-16 15:20:36 -07002082 = u->tcp_fin_timeout * HZ;
2083 }
2084#endif
2085
2086#ifdef CONFIG_IP_VS_PROTO_UDP
2087 if (u->udp_timeout) {
Hans Schillstrom93304192011-01-03 14:44:51 +01002088 pd = ip_vs_proto_data_get(net, IPPROTO_UDP);
2089 pd->timeout_table[IP_VS_UDP_S_NORMAL]
Linus Torvalds1da177e2005-04-16 15:20:36 -07002090 = u->udp_timeout * HZ;
2091 }
2092#endif
2093 return 0;
2094}
2095
2096
2097#define SET_CMDID(cmd) (cmd - IP_VS_BASE_CTL)
2098#define SERVICE_ARG_LEN (sizeof(struct ip_vs_service_user))
2099#define SVCDEST_ARG_LEN (sizeof(struct ip_vs_service_user) + \
2100 sizeof(struct ip_vs_dest_user))
2101#define TIMEOUT_ARG_LEN (sizeof(struct ip_vs_timeout_user))
2102#define DAEMON_ARG_LEN (sizeof(struct ip_vs_daemon_user))
2103#define MAX_ARG_LEN SVCDEST_ARG_LEN
2104
Arjan van de Ven9b5b5cf2005-11-29 16:21:38 -08002105static const unsigned char set_arglen[SET_CMDID(IP_VS_SO_SET_MAX)+1] = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002106 [SET_CMDID(IP_VS_SO_SET_ADD)] = SERVICE_ARG_LEN,
2107 [SET_CMDID(IP_VS_SO_SET_EDIT)] = SERVICE_ARG_LEN,
2108 [SET_CMDID(IP_VS_SO_SET_DEL)] = SERVICE_ARG_LEN,
2109 [SET_CMDID(IP_VS_SO_SET_FLUSH)] = 0,
2110 [SET_CMDID(IP_VS_SO_SET_ADDDEST)] = SVCDEST_ARG_LEN,
2111 [SET_CMDID(IP_VS_SO_SET_DELDEST)] = SVCDEST_ARG_LEN,
2112 [SET_CMDID(IP_VS_SO_SET_EDITDEST)] = SVCDEST_ARG_LEN,
2113 [SET_CMDID(IP_VS_SO_SET_TIMEOUT)] = TIMEOUT_ARG_LEN,
2114 [SET_CMDID(IP_VS_SO_SET_STARTDAEMON)] = DAEMON_ARG_LEN,
2115 [SET_CMDID(IP_VS_SO_SET_STOPDAEMON)] = DAEMON_ARG_LEN,
2116 [SET_CMDID(IP_VS_SO_SET_ZERO)] = SERVICE_ARG_LEN,
2117};
2118
Julius Volzc860c6b2008-09-02 15:55:36 +02002119static void ip_vs_copy_usvc_compat(struct ip_vs_service_user_kern *usvc,
2120 struct ip_vs_service_user *usvc_compat)
2121{
Simon Horman0d1e71b2010-08-22 21:37:54 +09002122 memset(usvc, 0, sizeof(*usvc));
2123
Julius Volzc860c6b2008-09-02 15:55:36 +02002124 usvc->af = AF_INET;
2125 usvc->protocol = usvc_compat->protocol;
2126 usvc->addr.ip = usvc_compat->addr;
2127 usvc->port = usvc_compat->port;
2128 usvc->fwmark = usvc_compat->fwmark;
2129
2130 /* Deep copy of sched_name is not needed here */
2131 usvc->sched_name = usvc_compat->sched_name;
2132
2133 usvc->flags = usvc_compat->flags;
2134 usvc->timeout = usvc_compat->timeout;
2135 usvc->netmask = usvc_compat->netmask;
2136}
2137
2138static void ip_vs_copy_udest_compat(struct ip_vs_dest_user_kern *udest,
2139 struct ip_vs_dest_user *udest_compat)
2140{
Simon Horman0d1e71b2010-08-22 21:37:54 +09002141 memset(udest, 0, sizeof(*udest));
2142
Julius Volzc860c6b2008-09-02 15:55:36 +02002143 udest->addr.ip = udest_compat->addr;
2144 udest->port = udest_compat->port;
2145 udest->conn_flags = udest_compat->conn_flags;
2146 udest->weight = udest_compat->weight;
2147 udest->u_threshold = udest_compat->u_threshold;
2148 udest->l_threshold = udest_compat->l_threshold;
2149}
2150
Linus Torvalds1da177e2005-04-16 15:20:36 -07002151static int
2152do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
2153{
Hans Schillstromfc723252011-01-03 14:44:43 +01002154 struct net *net = sock_net(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002155 int ret;
2156 unsigned char arg[MAX_ARG_LEN];
Julius Volzc860c6b2008-09-02 15:55:36 +02002157 struct ip_vs_service_user *usvc_compat;
2158 struct ip_vs_service_user_kern usvc;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002159 struct ip_vs_service *svc;
Julius Volzc860c6b2008-09-02 15:55:36 +02002160 struct ip_vs_dest_user *udest_compat;
2161 struct ip_vs_dest_user_kern udest;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002162
2163 if (!capable(CAP_NET_ADMIN))
2164 return -EPERM;
2165
Arjan van de Ven04bcef22010-01-04 16:37:12 +01002166 if (cmd < IP_VS_BASE_CTL || cmd > IP_VS_SO_SET_MAX)
2167 return -EINVAL;
2168 if (len < 0 || len > MAX_ARG_LEN)
2169 return -EINVAL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002170 if (len != set_arglen[SET_CMDID(cmd)]) {
Hannes Eder1e3e2382009-08-02 11:05:41 +00002171 pr_err("set_ctl: len %u != %u\n",
2172 len, set_arglen[SET_CMDID(cmd)]);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002173 return -EINVAL;
2174 }
2175
2176 if (copy_from_user(arg, user, len) != 0)
2177 return -EFAULT;
2178
2179 /* increase the module use count */
2180 ip_vs_use_count_inc();
2181
Ingo Molnar14cc3e22006-03-26 01:37:14 -08002182 if (mutex_lock_interruptible(&__ip_vs_mutex)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002183 ret = -ERESTARTSYS;
2184 goto out_dec;
2185 }
2186
2187 if (cmd == IP_VS_SO_SET_FLUSH) {
2188 /* Flush the virtual service */
Hans Schillstromfc723252011-01-03 14:44:43 +01002189 ret = ip_vs_flush(net);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002190 goto out_unlock;
2191 } else if (cmd == IP_VS_SO_SET_TIMEOUT) {
2192 /* Set timeout values for (tcp tcpfin udp) */
Hans Schillstrom93304192011-01-03 14:44:51 +01002193 ret = ip_vs_set_timeout(net, (struct ip_vs_timeout_user *)arg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002194 goto out_unlock;
2195 } else if (cmd == IP_VS_SO_SET_STARTDAEMON) {
2196 struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
Hans Schillstromf1313152011-01-03 14:44:55 +01002197 ret = start_sync_thread(net, dm->state, dm->mcast_ifn,
2198 dm->syncid);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002199 goto out_unlock;
2200 } else if (cmd == IP_VS_SO_SET_STOPDAEMON) {
2201 struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
Hans Schillstromf1313152011-01-03 14:44:55 +01002202 ret = stop_sync_thread(net, dm->state);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002203 goto out_unlock;
2204 }
2205
Julius Volzc860c6b2008-09-02 15:55:36 +02002206 usvc_compat = (struct ip_vs_service_user *)arg;
2207 udest_compat = (struct ip_vs_dest_user *)(usvc_compat + 1);
2208
2209 /* We only use the new structs internally, so copy userspace compat
2210 * structs to extended internal versions */
2211 ip_vs_copy_usvc_compat(&usvc, usvc_compat);
2212 ip_vs_copy_udest_compat(&udest, udest_compat);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002213
2214 if (cmd == IP_VS_SO_SET_ZERO) {
2215 /* if no service address is set, zero counters in all */
Julius Volzc860c6b2008-09-02 15:55:36 +02002216 if (!usvc.fwmark && !usvc.addr.ip && !usvc.port) {
Hans Schillstromfc723252011-01-03 14:44:43 +01002217 ret = ip_vs_zero_all(net);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002218 goto out_unlock;
2219 }
2220 }
2221
Venkata Mohan Reddy2906f662010-02-18 12:31:05 +01002222 /* Check for valid protocol: TCP or UDP or SCTP, even for fwmark!=0 */
2223 if (usvc.protocol != IPPROTO_TCP && usvc.protocol != IPPROTO_UDP &&
2224 usvc.protocol != IPPROTO_SCTP) {
Hannes Eder1e3e2382009-08-02 11:05:41 +00002225 pr_err("set_ctl: invalid protocol: %d %pI4:%d %s\n",
2226 usvc.protocol, &usvc.addr.ip,
2227 ntohs(usvc.port), usvc.sched_name);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002228 ret = -EFAULT;
2229 goto out_unlock;
2230 }
2231
2232 /* Lookup the exact service by <protocol, addr, port> or fwmark */
Julius Volzc860c6b2008-09-02 15:55:36 +02002233 if (usvc.fwmark == 0)
Hans Schillstromfc723252011-01-03 14:44:43 +01002234 svc = __ip_vs_service_find(net, usvc.af, usvc.protocol,
Julian Anastasov26c15cf2010-09-21 18:12:30 +02002235 &usvc.addr, usvc.port);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002236 else
Hans Schillstromfc723252011-01-03 14:44:43 +01002237 svc = __ip_vs_svc_fwm_find(net, usvc.af, usvc.fwmark);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002238
2239 if (cmd != IP_VS_SO_SET_ADD
Julius Volzc860c6b2008-09-02 15:55:36 +02002240 && (svc == NULL || svc->protocol != usvc.protocol)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002241 ret = -ESRCH;
Julian Anastasov26c15cf2010-09-21 18:12:30 +02002242 goto out_unlock;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002243 }
2244
2245 switch (cmd) {
2246 case IP_VS_SO_SET_ADD:
2247 if (svc != NULL)
2248 ret = -EEXIST;
2249 else
Hans Schillstromfc723252011-01-03 14:44:43 +01002250 ret = ip_vs_add_service(net, &usvc, &svc);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002251 break;
2252 case IP_VS_SO_SET_EDIT:
Julius Volzc860c6b2008-09-02 15:55:36 +02002253 ret = ip_vs_edit_service(svc, &usvc);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002254 break;
2255 case IP_VS_SO_SET_DEL:
2256 ret = ip_vs_del_service(svc);
2257 if (!ret)
2258 goto out_unlock;
2259 break;
2260 case IP_VS_SO_SET_ZERO:
2261 ret = ip_vs_zero_service(svc);
2262 break;
2263 case IP_VS_SO_SET_ADDDEST:
Julius Volzc860c6b2008-09-02 15:55:36 +02002264 ret = ip_vs_add_dest(svc, &udest);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002265 break;
2266 case IP_VS_SO_SET_EDITDEST:
Julius Volzc860c6b2008-09-02 15:55:36 +02002267 ret = ip_vs_edit_dest(svc, &udest);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002268 break;
2269 case IP_VS_SO_SET_DELDEST:
Julius Volzc860c6b2008-09-02 15:55:36 +02002270 ret = ip_vs_del_dest(svc, &udest);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002271 break;
2272 default:
2273 ret = -EINVAL;
2274 }
2275
Linus Torvalds1da177e2005-04-16 15:20:36 -07002276 out_unlock:
Ingo Molnar14cc3e22006-03-26 01:37:14 -08002277 mutex_unlock(&__ip_vs_mutex);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002278 out_dec:
2279 /* decrease the module use count */
2280 ip_vs_use_count_dec();
2281
2282 return ret;
2283}
2284
2285
2286static void
2287ip_vs_copy_stats(struct ip_vs_stats_user *dst, struct ip_vs_stats *src)
2288{
2289 spin_lock_bh(&src->lock);
Sven Wegenere9c0ce22008-09-08 13:39:04 +02002290 memcpy(dst, &src->ustats, sizeof(*dst));
Linus Torvalds1da177e2005-04-16 15:20:36 -07002291 spin_unlock_bh(&src->lock);
2292}
2293
2294static void
2295ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src)
2296{
2297 dst->protocol = src->protocol;
Julius Volze7ade462008-09-02 15:55:33 +02002298 dst->addr = src->addr.ip;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002299 dst->port = src->port;
2300 dst->fwmark = src->fwmark;
pageexec4da62fc2005-06-26 16:00:19 -07002301 strlcpy(dst->sched_name, src->scheduler->name, sizeof(dst->sched_name));
Linus Torvalds1da177e2005-04-16 15:20:36 -07002302 dst->flags = src->flags;
2303 dst->timeout = src->timeout / HZ;
2304 dst->netmask = src->netmask;
2305 dst->num_dests = src->num_dests;
2306 ip_vs_copy_stats(&dst->stats, &src->stats);
2307}
2308
2309static inline int
Hans Schillstromfc723252011-01-03 14:44:43 +01002310__ip_vs_get_service_entries(struct net *net,
2311 const struct ip_vs_get_services *get,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002312 struct ip_vs_get_services __user *uptr)
2313{
2314 int idx, count=0;
2315 struct ip_vs_service *svc;
2316 struct ip_vs_service_entry entry;
2317 int ret = 0;
2318
2319 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2320 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
Julius Volzf94fd042008-09-02 15:55:55 +02002321 /* Only expose IPv4 entries to old interface */
Hans Schillstromfc723252011-01-03 14:44:43 +01002322 if (svc->af != AF_INET || !net_eq(svc->net, net))
Julius Volzf94fd042008-09-02 15:55:55 +02002323 continue;
2324
Linus Torvalds1da177e2005-04-16 15:20:36 -07002325 if (count >= get->num_services)
2326 goto out;
pageexec4da62fc2005-06-26 16:00:19 -07002327 memset(&entry, 0, sizeof(entry));
Linus Torvalds1da177e2005-04-16 15:20:36 -07002328 ip_vs_copy_service(&entry, svc);
2329 if (copy_to_user(&uptr->entrytable[count],
2330 &entry, sizeof(entry))) {
2331 ret = -EFAULT;
2332 goto out;
2333 }
2334 count++;
2335 }
2336 }
2337
2338 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2339 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
Julius Volzf94fd042008-09-02 15:55:55 +02002340 /* Only expose IPv4 entries to old interface */
Hans Schillstromfc723252011-01-03 14:44:43 +01002341 if (svc->af != AF_INET || !net_eq(svc->net, net))
Julius Volzf94fd042008-09-02 15:55:55 +02002342 continue;
2343
Linus Torvalds1da177e2005-04-16 15:20:36 -07002344 if (count >= get->num_services)
2345 goto out;
pageexec4da62fc2005-06-26 16:00:19 -07002346 memset(&entry, 0, sizeof(entry));
Linus Torvalds1da177e2005-04-16 15:20:36 -07002347 ip_vs_copy_service(&entry, svc);
2348 if (copy_to_user(&uptr->entrytable[count],
2349 &entry, sizeof(entry))) {
2350 ret = -EFAULT;
2351 goto out;
2352 }
2353 count++;
2354 }
2355 }
2356 out:
2357 return ret;
2358}
2359
2360static inline int
Hans Schillstromfc723252011-01-03 14:44:43 +01002361__ip_vs_get_dest_entries(struct net *net, const struct ip_vs_get_dests *get,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002362 struct ip_vs_get_dests __user *uptr)
2363{
2364 struct ip_vs_service *svc;
Julius Volzb18610d2008-09-02 15:55:37 +02002365 union nf_inet_addr addr = { .ip = get->addr };
Linus Torvalds1da177e2005-04-16 15:20:36 -07002366 int ret = 0;
2367
2368 if (get->fwmark)
Hans Schillstromfc723252011-01-03 14:44:43 +01002369 svc = __ip_vs_svc_fwm_find(net, AF_INET, get->fwmark);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002370 else
Hans Schillstromfc723252011-01-03 14:44:43 +01002371 svc = __ip_vs_service_find(net, AF_INET, get->protocol, &addr,
Julian Anastasov26c15cf2010-09-21 18:12:30 +02002372 get->port);
Julius Volzb18610d2008-09-02 15:55:37 +02002373
Linus Torvalds1da177e2005-04-16 15:20:36 -07002374 if (svc) {
2375 int count = 0;
2376 struct ip_vs_dest *dest;
2377 struct ip_vs_dest_entry entry;
2378
2379 list_for_each_entry(dest, &svc->destinations, n_list) {
2380 if (count >= get->num_dests)
2381 break;
2382
Julius Volze7ade462008-09-02 15:55:33 +02002383 entry.addr = dest->addr.ip;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002384 entry.port = dest->port;
2385 entry.conn_flags = atomic_read(&dest->conn_flags);
2386 entry.weight = atomic_read(&dest->weight);
2387 entry.u_threshold = dest->u_threshold;
2388 entry.l_threshold = dest->l_threshold;
2389 entry.activeconns = atomic_read(&dest->activeconns);
2390 entry.inactconns = atomic_read(&dest->inactconns);
2391 entry.persistconns = atomic_read(&dest->persistconns);
2392 ip_vs_copy_stats(&entry.stats, &dest->stats);
2393 if (copy_to_user(&uptr->entrytable[count],
2394 &entry, sizeof(entry))) {
2395 ret = -EFAULT;
2396 break;
2397 }
2398 count++;
2399 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002400 } else
2401 ret = -ESRCH;
2402 return ret;
2403}
2404
2405static inline void
Hans Schillstrom93304192011-01-03 14:44:51 +01002406__ip_vs_get_timeouts(struct net *net, struct ip_vs_timeout_user *u)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002407{
Hans Schillstrom93304192011-01-03 14:44:51 +01002408 struct ip_vs_proto_data *pd;
2409
Linus Torvalds1da177e2005-04-16 15:20:36 -07002410#ifdef CONFIG_IP_VS_PROTO_TCP
Hans Schillstrom93304192011-01-03 14:44:51 +01002411 pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
2412 u->tcp_timeout = pd->timeout_table[IP_VS_TCP_S_ESTABLISHED] / HZ;
2413 u->tcp_fin_timeout = pd->timeout_table[IP_VS_TCP_S_FIN_WAIT] / HZ;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002414#endif
2415#ifdef CONFIG_IP_VS_PROTO_UDP
Hans Schillstrom93304192011-01-03 14:44:51 +01002416 pd = ip_vs_proto_data_get(net, IPPROTO_UDP);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002417 u->udp_timeout =
Hans Schillstrom93304192011-01-03 14:44:51 +01002418 pd->timeout_table[IP_VS_UDP_S_NORMAL] / HZ;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002419#endif
2420}
2421
2422
2423#define GET_CMDID(cmd) (cmd - IP_VS_BASE_CTL)
2424#define GET_INFO_ARG_LEN (sizeof(struct ip_vs_getinfo))
2425#define GET_SERVICES_ARG_LEN (sizeof(struct ip_vs_get_services))
2426#define GET_SERVICE_ARG_LEN (sizeof(struct ip_vs_service_entry))
2427#define GET_DESTS_ARG_LEN (sizeof(struct ip_vs_get_dests))
2428#define GET_TIMEOUT_ARG_LEN (sizeof(struct ip_vs_timeout_user))
2429#define GET_DAEMON_ARG_LEN (sizeof(struct ip_vs_daemon_user) * 2)
2430
Arjan van de Ven9b5b5cf2005-11-29 16:21:38 -08002431static const unsigned char get_arglen[GET_CMDID(IP_VS_SO_GET_MAX)+1] = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002432 [GET_CMDID(IP_VS_SO_GET_VERSION)] = 64,
2433 [GET_CMDID(IP_VS_SO_GET_INFO)] = GET_INFO_ARG_LEN,
2434 [GET_CMDID(IP_VS_SO_GET_SERVICES)] = GET_SERVICES_ARG_LEN,
2435 [GET_CMDID(IP_VS_SO_GET_SERVICE)] = GET_SERVICE_ARG_LEN,
2436 [GET_CMDID(IP_VS_SO_GET_DESTS)] = GET_DESTS_ARG_LEN,
2437 [GET_CMDID(IP_VS_SO_GET_TIMEOUT)] = GET_TIMEOUT_ARG_LEN,
2438 [GET_CMDID(IP_VS_SO_GET_DAEMON)] = GET_DAEMON_ARG_LEN,
2439};
2440
2441static int
2442do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
2443{
2444 unsigned char arg[128];
2445 int ret = 0;
Arjan van de Ven04bcef22010-01-04 16:37:12 +01002446 unsigned int copylen;
Hans Schillstromfc723252011-01-03 14:44:43 +01002447 struct net *net = sock_net(sk);
Hans Schillstromf1313152011-01-03 14:44:55 +01002448 struct netns_ipvs *ipvs = net_ipvs(net);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002449
Hans Schillstromfc723252011-01-03 14:44:43 +01002450 BUG_ON(!net);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002451 if (!capable(CAP_NET_ADMIN))
2452 return -EPERM;
2453
Arjan van de Ven04bcef22010-01-04 16:37:12 +01002454 if (cmd < IP_VS_BASE_CTL || cmd > IP_VS_SO_GET_MAX)
2455 return -EINVAL;
2456
Linus Torvalds1da177e2005-04-16 15:20:36 -07002457 if (*len < get_arglen[GET_CMDID(cmd)]) {
Hannes Eder1e3e2382009-08-02 11:05:41 +00002458 pr_err("get_ctl: len %u < %u\n",
2459 *len, get_arglen[GET_CMDID(cmd)]);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002460 return -EINVAL;
2461 }
2462
Arjan van de Ven04bcef22010-01-04 16:37:12 +01002463 copylen = get_arglen[GET_CMDID(cmd)];
2464 if (copylen > 128)
2465 return -EINVAL;
2466
2467 if (copy_from_user(arg, user, copylen) != 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002468 return -EFAULT;
2469
Ingo Molnar14cc3e22006-03-26 01:37:14 -08002470 if (mutex_lock_interruptible(&__ip_vs_mutex))
Linus Torvalds1da177e2005-04-16 15:20:36 -07002471 return -ERESTARTSYS;
2472
2473 switch (cmd) {
2474 case IP_VS_SO_GET_VERSION:
2475 {
2476 char buf[64];
2477
2478 sprintf(buf, "IP Virtual Server version %d.%d.%d (size=%d)",
Catalin(ux) M. BOIE6f7edb42010-01-05 05:50:24 +01002479 NVERSION(IP_VS_VERSION_CODE), ip_vs_conn_tab_size);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002480 if (copy_to_user(user, buf, strlen(buf)+1) != 0) {
2481 ret = -EFAULT;
2482 goto out;
2483 }
2484 *len = strlen(buf)+1;
2485 }
2486 break;
2487
2488 case IP_VS_SO_GET_INFO:
2489 {
2490 struct ip_vs_getinfo info;
2491 info.version = IP_VS_VERSION_CODE;
Catalin(ux) M. BOIE6f7edb42010-01-05 05:50:24 +01002492 info.size = ip_vs_conn_tab_size;
Hans Schillstroma0840e22011-01-03 14:44:58 +01002493 info.num_services = ipvs->num_services;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002494 if (copy_to_user(user, &info, sizeof(info)) != 0)
2495 ret = -EFAULT;
2496 }
2497 break;
2498
2499 case IP_VS_SO_GET_SERVICES:
2500 {
2501 struct ip_vs_get_services *get;
2502 int size;
2503
2504 get = (struct ip_vs_get_services *)arg;
2505 size = sizeof(*get) +
2506 sizeof(struct ip_vs_service_entry) * get->num_services;
2507 if (*len != size) {
Hannes Eder1e3e2382009-08-02 11:05:41 +00002508 pr_err("length: %u != %u\n", *len, size);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002509 ret = -EINVAL;
2510 goto out;
2511 }
Hans Schillstromfc723252011-01-03 14:44:43 +01002512 ret = __ip_vs_get_service_entries(net, get, user);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002513 }
2514 break;
2515
2516 case IP_VS_SO_GET_SERVICE:
2517 {
2518 struct ip_vs_service_entry *entry;
2519 struct ip_vs_service *svc;
Julius Volzb18610d2008-09-02 15:55:37 +02002520 union nf_inet_addr addr;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002521
2522 entry = (struct ip_vs_service_entry *)arg;
Julius Volzb18610d2008-09-02 15:55:37 +02002523 addr.ip = entry->addr;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002524 if (entry->fwmark)
Hans Schillstromfc723252011-01-03 14:44:43 +01002525 svc = __ip_vs_svc_fwm_find(net, AF_INET, entry->fwmark);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002526 else
Hans Schillstromfc723252011-01-03 14:44:43 +01002527 svc = __ip_vs_service_find(net, AF_INET,
2528 entry->protocol, &addr,
2529 entry->port);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002530 if (svc) {
2531 ip_vs_copy_service(entry, svc);
2532 if (copy_to_user(user, entry, sizeof(*entry)) != 0)
2533 ret = -EFAULT;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002534 } else
2535 ret = -ESRCH;
2536 }
2537 break;
2538
2539 case IP_VS_SO_GET_DESTS:
2540 {
2541 struct ip_vs_get_dests *get;
2542 int size;
2543
2544 get = (struct ip_vs_get_dests *)arg;
2545 size = sizeof(*get) +
2546 sizeof(struct ip_vs_dest_entry) * get->num_dests;
2547 if (*len != size) {
Hannes Eder1e3e2382009-08-02 11:05:41 +00002548 pr_err("length: %u != %u\n", *len, size);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002549 ret = -EINVAL;
2550 goto out;
2551 }
Hans Schillstromfc723252011-01-03 14:44:43 +01002552 ret = __ip_vs_get_dest_entries(net, get, user);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002553 }
2554 break;
2555
2556 case IP_VS_SO_GET_TIMEOUT:
2557 {
2558 struct ip_vs_timeout_user t;
2559
Hans Schillstrom93304192011-01-03 14:44:51 +01002560 __ip_vs_get_timeouts(net, &t);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002561 if (copy_to_user(user, &t, sizeof(t)) != 0)
2562 ret = -EFAULT;
2563 }
2564 break;
2565
2566 case IP_VS_SO_GET_DAEMON:
2567 {
2568 struct ip_vs_daemon_user d[2];
2569
2570 memset(&d, 0, sizeof(d));
Hans Schillstromf1313152011-01-03 14:44:55 +01002571 if (ipvs->sync_state & IP_VS_STATE_MASTER) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002572 d[0].state = IP_VS_STATE_MASTER;
Hans Schillstromf1313152011-01-03 14:44:55 +01002573 strlcpy(d[0].mcast_ifn, ipvs->master_mcast_ifn,
2574 sizeof(d[0].mcast_ifn));
2575 d[0].syncid = ipvs->master_syncid;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002576 }
Hans Schillstromf1313152011-01-03 14:44:55 +01002577 if (ipvs->sync_state & IP_VS_STATE_BACKUP) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002578 d[1].state = IP_VS_STATE_BACKUP;
Hans Schillstromf1313152011-01-03 14:44:55 +01002579 strlcpy(d[1].mcast_ifn, ipvs->backup_mcast_ifn,
2580 sizeof(d[1].mcast_ifn));
2581 d[1].syncid = ipvs->backup_syncid;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002582 }
2583 if (copy_to_user(user, &d, sizeof(d)) != 0)
2584 ret = -EFAULT;
2585 }
2586 break;
2587
2588 default:
2589 ret = -EINVAL;
2590 }
2591
2592 out:
Ingo Molnar14cc3e22006-03-26 01:37:14 -08002593 mutex_unlock(&__ip_vs_mutex);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002594 return ret;
2595}
2596
2597
2598static struct nf_sockopt_ops ip_vs_sockopts = {
2599 .pf = PF_INET,
2600 .set_optmin = IP_VS_BASE_CTL,
2601 .set_optmax = IP_VS_SO_SET_MAX+1,
2602 .set = do_ip_vs_set_ctl,
2603 .get_optmin = IP_VS_BASE_CTL,
2604 .get_optmax = IP_VS_SO_GET_MAX+1,
2605 .get = do_ip_vs_get_ctl,
Neil Horman16fcec32007-09-11 11:28:26 +02002606 .owner = THIS_MODULE,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002607};
2608
Julius Volz9a812192008-08-14 14:08:44 +02002609/*
2610 * Generic Netlink interface
2611 */
2612
2613/* IPVS genetlink family */
2614static struct genl_family ip_vs_genl_family = {
2615 .id = GENL_ID_GENERATE,
2616 .hdrsize = 0,
2617 .name = IPVS_GENL_NAME,
2618 .version = IPVS_GENL_VERSION,
2619 .maxattr = IPVS_CMD_MAX,
2620};
2621
2622/* Policy used for first-level command attributes */
2623static const struct nla_policy ip_vs_cmd_policy[IPVS_CMD_ATTR_MAX + 1] = {
2624 [IPVS_CMD_ATTR_SERVICE] = { .type = NLA_NESTED },
2625 [IPVS_CMD_ATTR_DEST] = { .type = NLA_NESTED },
2626 [IPVS_CMD_ATTR_DAEMON] = { .type = NLA_NESTED },
2627 [IPVS_CMD_ATTR_TIMEOUT_TCP] = { .type = NLA_U32 },
2628 [IPVS_CMD_ATTR_TIMEOUT_TCP_FIN] = { .type = NLA_U32 },
2629 [IPVS_CMD_ATTR_TIMEOUT_UDP] = { .type = NLA_U32 },
2630};
2631
2632/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DAEMON */
2633static const struct nla_policy ip_vs_daemon_policy[IPVS_DAEMON_ATTR_MAX + 1] = {
2634 [IPVS_DAEMON_ATTR_STATE] = { .type = NLA_U32 },
2635 [IPVS_DAEMON_ATTR_MCAST_IFN] = { .type = NLA_NUL_STRING,
2636 .len = IP_VS_IFNAME_MAXLEN },
2637 [IPVS_DAEMON_ATTR_SYNC_ID] = { .type = NLA_U32 },
2638};
2639
2640/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_SERVICE */
2641static const struct nla_policy ip_vs_svc_policy[IPVS_SVC_ATTR_MAX + 1] = {
2642 [IPVS_SVC_ATTR_AF] = { .type = NLA_U16 },
2643 [IPVS_SVC_ATTR_PROTOCOL] = { .type = NLA_U16 },
2644 [IPVS_SVC_ATTR_ADDR] = { .type = NLA_BINARY,
2645 .len = sizeof(union nf_inet_addr) },
2646 [IPVS_SVC_ATTR_PORT] = { .type = NLA_U16 },
2647 [IPVS_SVC_ATTR_FWMARK] = { .type = NLA_U32 },
2648 [IPVS_SVC_ATTR_SCHED_NAME] = { .type = NLA_NUL_STRING,
2649 .len = IP_VS_SCHEDNAME_MAXLEN },
Simon Horman0d1e71b2010-08-22 21:37:54 +09002650 [IPVS_SVC_ATTR_PE_NAME] = { .type = NLA_NUL_STRING,
2651 .len = IP_VS_PENAME_MAXLEN },
Julius Volz9a812192008-08-14 14:08:44 +02002652 [IPVS_SVC_ATTR_FLAGS] = { .type = NLA_BINARY,
2653 .len = sizeof(struct ip_vs_flags) },
2654 [IPVS_SVC_ATTR_TIMEOUT] = { .type = NLA_U32 },
2655 [IPVS_SVC_ATTR_NETMASK] = { .type = NLA_U32 },
2656 [IPVS_SVC_ATTR_STATS] = { .type = NLA_NESTED },
2657};
2658
2659/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DEST */
2660static const struct nla_policy ip_vs_dest_policy[IPVS_DEST_ATTR_MAX + 1] = {
2661 [IPVS_DEST_ATTR_ADDR] = { .type = NLA_BINARY,
2662 .len = sizeof(union nf_inet_addr) },
2663 [IPVS_DEST_ATTR_PORT] = { .type = NLA_U16 },
2664 [IPVS_DEST_ATTR_FWD_METHOD] = { .type = NLA_U32 },
2665 [IPVS_DEST_ATTR_WEIGHT] = { .type = NLA_U32 },
2666 [IPVS_DEST_ATTR_U_THRESH] = { .type = NLA_U32 },
2667 [IPVS_DEST_ATTR_L_THRESH] = { .type = NLA_U32 },
2668 [IPVS_DEST_ATTR_ACTIVE_CONNS] = { .type = NLA_U32 },
2669 [IPVS_DEST_ATTR_INACT_CONNS] = { .type = NLA_U32 },
2670 [IPVS_DEST_ATTR_PERSIST_CONNS] = { .type = NLA_U32 },
2671 [IPVS_DEST_ATTR_STATS] = { .type = NLA_NESTED },
2672};
2673
2674static int ip_vs_genl_fill_stats(struct sk_buff *skb, int container_type,
2675 struct ip_vs_stats *stats)
2676{
2677 struct nlattr *nl_stats = nla_nest_start(skb, container_type);
2678 if (!nl_stats)
2679 return -EMSGSIZE;
2680
2681 spin_lock_bh(&stats->lock);
2682
Sven Wegenere9c0ce22008-09-08 13:39:04 +02002683 NLA_PUT_U32(skb, IPVS_STATS_ATTR_CONNS, stats->ustats.conns);
2684 NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPKTS, stats->ustats.inpkts);
2685 NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPKTS, stats->ustats.outpkts);
2686 NLA_PUT_U64(skb, IPVS_STATS_ATTR_INBYTES, stats->ustats.inbytes);
2687 NLA_PUT_U64(skb, IPVS_STATS_ATTR_OUTBYTES, stats->ustats.outbytes);
2688 NLA_PUT_U32(skb, IPVS_STATS_ATTR_CPS, stats->ustats.cps);
2689 NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPPS, stats->ustats.inpps);
2690 NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPPS, stats->ustats.outpps);
2691 NLA_PUT_U32(skb, IPVS_STATS_ATTR_INBPS, stats->ustats.inbps);
2692 NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTBPS, stats->ustats.outbps);
Julius Volz9a812192008-08-14 14:08:44 +02002693
2694 spin_unlock_bh(&stats->lock);
2695
2696 nla_nest_end(skb, nl_stats);
2697
2698 return 0;
2699
2700nla_put_failure:
2701 spin_unlock_bh(&stats->lock);
2702 nla_nest_cancel(skb, nl_stats);
2703 return -EMSGSIZE;
2704}
2705
2706static int ip_vs_genl_fill_service(struct sk_buff *skb,
2707 struct ip_vs_service *svc)
2708{
2709 struct nlattr *nl_service;
2710 struct ip_vs_flags flags = { .flags = svc->flags,
2711 .mask = ~0 };
2712
2713 nl_service = nla_nest_start(skb, IPVS_CMD_ATTR_SERVICE);
2714 if (!nl_service)
2715 return -EMSGSIZE;
2716
Julius Volzf94fd042008-09-02 15:55:55 +02002717 NLA_PUT_U16(skb, IPVS_SVC_ATTR_AF, svc->af);
Julius Volz9a812192008-08-14 14:08:44 +02002718
2719 if (svc->fwmark) {
2720 NLA_PUT_U32(skb, IPVS_SVC_ATTR_FWMARK, svc->fwmark);
2721 } else {
2722 NLA_PUT_U16(skb, IPVS_SVC_ATTR_PROTOCOL, svc->protocol);
2723 NLA_PUT(skb, IPVS_SVC_ATTR_ADDR, sizeof(svc->addr), &svc->addr);
2724 NLA_PUT_U16(skb, IPVS_SVC_ATTR_PORT, svc->port);
2725 }
2726
2727 NLA_PUT_STRING(skb, IPVS_SVC_ATTR_SCHED_NAME, svc->scheduler->name);
Simon Horman0d1e71b2010-08-22 21:37:54 +09002728 if (svc->pe)
2729 NLA_PUT_STRING(skb, IPVS_SVC_ATTR_PE_NAME, svc->pe->name);
Julius Volz9a812192008-08-14 14:08:44 +02002730 NLA_PUT(skb, IPVS_SVC_ATTR_FLAGS, sizeof(flags), &flags);
2731 NLA_PUT_U32(skb, IPVS_SVC_ATTR_TIMEOUT, svc->timeout / HZ);
2732 NLA_PUT_U32(skb, IPVS_SVC_ATTR_NETMASK, svc->netmask);
2733
2734 if (ip_vs_genl_fill_stats(skb, IPVS_SVC_ATTR_STATS, &svc->stats))
2735 goto nla_put_failure;
2736
2737 nla_nest_end(skb, nl_service);
2738
2739 return 0;
2740
2741nla_put_failure:
2742 nla_nest_cancel(skb, nl_service);
2743 return -EMSGSIZE;
2744}
2745
2746static int ip_vs_genl_dump_service(struct sk_buff *skb,
2747 struct ip_vs_service *svc,
2748 struct netlink_callback *cb)
2749{
2750 void *hdr;
2751
2752 hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
2753 &ip_vs_genl_family, NLM_F_MULTI,
2754 IPVS_CMD_NEW_SERVICE);
2755 if (!hdr)
2756 return -EMSGSIZE;
2757
2758 if (ip_vs_genl_fill_service(skb, svc) < 0)
2759 goto nla_put_failure;
2760
2761 return genlmsg_end(skb, hdr);
2762
2763nla_put_failure:
2764 genlmsg_cancel(skb, hdr);
2765 return -EMSGSIZE;
2766}
2767
2768static int ip_vs_genl_dump_services(struct sk_buff *skb,
2769 struct netlink_callback *cb)
2770{
2771 int idx = 0, i;
2772 int start = cb->args[0];
2773 struct ip_vs_service *svc;
Hans Schillstromfc723252011-01-03 14:44:43 +01002774 struct net *net = skb_sknet(skb);
Julius Volz9a812192008-08-14 14:08:44 +02002775
2776 mutex_lock(&__ip_vs_mutex);
2777 for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
2778 list_for_each_entry(svc, &ip_vs_svc_table[i], s_list) {
Hans Schillstromfc723252011-01-03 14:44:43 +01002779 if (++idx <= start || !net_eq(svc->net, net))
Julius Volz9a812192008-08-14 14:08:44 +02002780 continue;
2781 if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
2782 idx--;
2783 goto nla_put_failure;
2784 }
2785 }
2786 }
2787
2788 for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
2789 list_for_each_entry(svc, &ip_vs_svc_fwm_table[i], f_list) {
Hans Schillstromfc723252011-01-03 14:44:43 +01002790 if (++idx <= start || !net_eq(svc->net, net))
Julius Volz9a812192008-08-14 14:08:44 +02002791 continue;
2792 if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
2793 idx--;
2794 goto nla_put_failure;
2795 }
2796 }
2797 }
2798
2799nla_put_failure:
2800 mutex_unlock(&__ip_vs_mutex);
2801 cb->args[0] = idx;
2802
2803 return skb->len;
2804}
2805
Hans Schillstromfc723252011-01-03 14:44:43 +01002806static int ip_vs_genl_parse_service(struct net *net,
2807 struct ip_vs_service_user_kern *usvc,
Julian Anastasov26c15cf2010-09-21 18:12:30 +02002808 struct nlattr *nla, int full_entry,
2809 struct ip_vs_service **ret_svc)
Julius Volz9a812192008-08-14 14:08:44 +02002810{
2811 struct nlattr *attrs[IPVS_SVC_ATTR_MAX + 1];
2812 struct nlattr *nla_af, *nla_port, *nla_fwmark, *nla_protocol, *nla_addr;
Julian Anastasov26c15cf2010-09-21 18:12:30 +02002813 struct ip_vs_service *svc;
Julius Volz9a812192008-08-14 14:08:44 +02002814
2815 /* Parse mandatory identifying service fields first */
2816 if (nla == NULL ||
2817 nla_parse_nested(attrs, IPVS_SVC_ATTR_MAX, nla, ip_vs_svc_policy))
2818 return -EINVAL;
2819
2820 nla_af = attrs[IPVS_SVC_ATTR_AF];
2821 nla_protocol = attrs[IPVS_SVC_ATTR_PROTOCOL];
2822 nla_addr = attrs[IPVS_SVC_ATTR_ADDR];
2823 nla_port = attrs[IPVS_SVC_ATTR_PORT];
2824 nla_fwmark = attrs[IPVS_SVC_ATTR_FWMARK];
2825
2826 if (!(nla_af && (nla_fwmark || (nla_port && nla_protocol && nla_addr))))
2827 return -EINVAL;
2828
Simon Horman258c8892009-12-15 17:01:25 +01002829 memset(usvc, 0, sizeof(*usvc));
2830
Julius Volzc860c6b2008-09-02 15:55:36 +02002831 usvc->af = nla_get_u16(nla_af);
Julius Volzf94fd042008-09-02 15:55:55 +02002832#ifdef CONFIG_IP_VS_IPV6
2833 if (usvc->af != AF_INET && usvc->af != AF_INET6)
2834#else
2835 if (usvc->af != AF_INET)
2836#endif
Julius Volz9a812192008-08-14 14:08:44 +02002837 return -EAFNOSUPPORT;
2838
2839 if (nla_fwmark) {
2840 usvc->protocol = IPPROTO_TCP;
2841 usvc->fwmark = nla_get_u32(nla_fwmark);
2842 } else {
2843 usvc->protocol = nla_get_u16(nla_protocol);
2844 nla_memcpy(&usvc->addr, nla_addr, sizeof(usvc->addr));
2845 usvc->port = nla_get_u16(nla_port);
2846 usvc->fwmark = 0;
2847 }
2848
Julian Anastasov26c15cf2010-09-21 18:12:30 +02002849 if (usvc->fwmark)
Hans Schillstromfc723252011-01-03 14:44:43 +01002850 svc = __ip_vs_svc_fwm_find(net, usvc->af, usvc->fwmark);
Julian Anastasov26c15cf2010-09-21 18:12:30 +02002851 else
Hans Schillstromfc723252011-01-03 14:44:43 +01002852 svc = __ip_vs_service_find(net, usvc->af, usvc->protocol,
Julian Anastasov26c15cf2010-09-21 18:12:30 +02002853 &usvc->addr, usvc->port);
2854 *ret_svc = svc;
2855
Julius Volz9a812192008-08-14 14:08:44 +02002856 /* If a full entry was requested, check for the additional fields */
2857 if (full_entry) {
Simon Horman0d1e71b2010-08-22 21:37:54 +09002858 struct nlattr *nla_sched, *nla_flags, *nla_pe, *nla_timeout,
Julius Volz9a812192008-08-14 14:08:44 +02002859 *nla_netmask;
2860 struct ip_vs_flags flags;
Julius Volz9a812192008-08-14 14:08:44 +02002861
2862 nla_sched = attrs[IPVS_SVC_ATTR_SCHED_NAME];
Simon Horman0d1e71b2010-08-22 21:37:54 +09002863 nla_pe = attrs[IPVS_SVC_ATTR_PE_NAME];
Julius Volz9a812192008-08-14 14:08:44 +02002864 nla_flags = attrs[IPVS_SVC_ATTR_FLAGS];
2865 nla_timeout = attrs[IPVS_SVC_ATTR_TIMEOUT];
2866 nla_netmask = attrs[IPVS_SVC_ATTR_NETMASK];
2867
2868 if (!(nla_sched && nla_flags && nla_timeout && nla_netmask))
2869 return -EINVAL;
2870
2871 nla_memcpy(&flags, nla_flags, sizeof(flags));
2872
2873 /* prefill flags from service if it already exists */
Julian Anastasov26c15cf2010-09-21 18:12:30 +02002874 if (svc)
Julius Volz9a812192008-08-14 14:08:44 +02002875 usvc->flags = svc->flags;
Julius Volz9a812192008-08-14 14:08:44 +02002876
2877 /* set new flags from userland */
2878 usvc->flags = (usvc->flags & ~flags.mask) |
2879 (flags.flags & flags.mask);
Julius Volzc860c6b2008-09-02 15:55:36 +02002880 usvc->sched_name = nla_data(nla_sched);
Simon Horman0d1e71b2010-08-22 21:37:54 +09002881 usvc->pe_name = nla_pe ? nla_data(nla_pe) : NULL;
Julius Volz9a812192008-08-14 14:08:44 +02002882 usvc->timeout = nla_get_u32(nla_timeout);
2883 usvc->netmask = nla_get_u32(nla_netmask);
2884 }
2885
2886 return 0;
2887}
2888
Hans Schillstromfc723252011-01-03 14:44:43 +01002889static struct ip_vs_service *ip_vs_genl_find_service(struct net *net,
2890 struct nlattr *nla)
Julius Volz9a812192008-08-14 14:08:44 +02002891{
Julius Volzc860c6b2008-09-02 15:55:36 +02002892 struct ip_vs_service_user_kern usvc;
Julian Anastasov26c15cf2010-09-21 18:12:30 +02002893 struct ip_vs_service *svc;
Julius Volz9a812192008-08-14 14:08:44 +02002894 int ret;
2895
Hans Schillstromfc723252011-01-03 14:44:43 +01002896 ret = ip_vs_genl_parse_service(net, &usvc, nla, 0, &svc);
Julian Anastasov26c15cf2010-09-21 18:12:30 +02002897 return ret ? ERR_PTR(ret) : svc;
Julius Volz9a812192008-08-14 14:08:44 +02002898}
2899
2900static int ip_vs_genl_fill_dest(struct sk_buff *skb, struct ip_vs_dest *dest)
2901{
2902 struct nlattr *nl_dest;
2903
2904 nl_dest = nla_nest_start(skb, IPVS_CMD_ATTR_DEST);
2905 if (!nl_dest)
2906 return -EMSGSIZE;
2907
2908 NLA_PUT(skb, IPVS_DEST_ATTR_ADDR, sizeof(dest->addr), &dest->addr);
2909 NLA_PUT_U16(skb, IPVS_DEST_ATTR_PORT, dest->port);
2910
2911 NLA_PUT_U32(skb, IPVS_DEST_ATTR_FWD_METHOD,
2912 atomic_read(&dest->conn_flags) & IP_VS_CONN_F_FWD_MASK);
2913 NLA_PUT_U32(skb, IPVS_DEST_ATTR_WEIGHT, atomic_read(&dest->weight));
2914 NLA_PUT_U32(skb, IPVS_DEST_ATTR_U_THRESH, dest->u_threshold);
2915 NLA_PUT_U32(skb, IPVS_DEST_ATTR_L_THRESH, dest->l_threshold);
2916 NLA_PUT_U32(skb, IPVS_DEST_ATTR_ACTIVE_CONNS,
2917 atomic_read(&dest->activeconns));
2918 NLA_PUT_U32(skb, IPVS_DEST_ATTR_INACT_CONNS,
2919 atomic_read(&dest->inactconns));
2920 NLA_PUT_U32(skb, IPVS_DEST_ATTR_PERSIST_CONNS,
2921 atomic_read(&dest->persistconns));
2922
2923 if (ip_vs_genl_fill_stats(skb, IPVS_DEST_ATTR_STATS, &dest->stats))
2924 goto nla_put_failure;
2925
2926 nla_nest_end(skb, nl_dest);
2927
2928 return 0;
2929
2930nla_put_failure:
2931 nla_nest_cancel(skb, nl_dest);
2932 return -EMSGSIZE;
2933}
2934
2935static int ip_vs_genl_dump_dest(struct sk_buff *skb, struct ip_vs_dest *dest,
2936 struct netlink_callback *cb)
2937{
2938 void *hdr;
2939
2940 hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
2941 &ip_vs_genl_family, NLM_F_MULTI,
2942 IPVS_CMD_NEW_DEST);
2943 if (!hdr)
2944 return -EMSGSIZE;
2945
2946 if (ip_vs_genl_fill_dest(skb, dest) < 0)
2947 goto nla_put_failure;
2948
2949 return genlmsg_end(skb, hdr);
2950
2951nla_put_failure:
2952 genlmsg_cancel(skb, hdr);
2953 return -EMSGSIZE;
2954}
2955
2956static int ip_vs_genl_dump_dests(struct sk_buff *skb,
2957 struct netlink_callback *cb)
2958{
2959 int idx = 0;
2960 int start = cb->args[0];
2961 struct ip_vs_service *svc;
2962 struct ip_vs_dest *dest;
2963 struct nlattr *attrs[IPVS_CMD_ATTR_MAX + 1];
Hans Schillstroma0840e22011-01-03 14:44:58 +01002964 struct net *net = skb_sknet(skb);
Julius Volz9a812192008-08-14 14:08:44 +02002965
2966 mutex_lock(&__ip_vs_mutex);
2967
2968 /* Try to find the service for which to dump destinations */
2969 if (nlmsg_parse(cb->nlh, GENL_HDRLEN, attrs,
2970 IPVS_CMD_ATTR_MAX, ip_vs_cmd_policy))
2971 goto out_err;
2972
Hans Schillstroma0840e22011-01-03 14:44:58 +01002973
Hans Schillstromfc723252011-01-03 14:44:43 +01002974 svc = ip_vs_genl_find_service(net, attrs[IPVS_CMD_ATTR_SERVICE]);
Julius Volz9a812192008-08-14 14:08:44 +02002975 if (IS_ERR(svc) || svc == NULL)
2976 goto out_err;
2977
2978 /* Dump the destinations */
2979 list_for_each_entry(dest, &svc->destinations, n_list) {
2980 if (++idx <= start)
2981 continue;
2982 if (ip_vs_genl_dump_dest(skb, dest, cb) < 0) {
2983 idx--;
2984 goto nla_put_failure;
2985 }
2986 }
2987
2988nla_put_failure:
2989 cb->args[0] = idx;
Julius Volz9a812192008-08-14 14:08:44 +02002990
2991out_err:
2992 mutex_unlock(&__ip_vs_mutex);
2993
2994 return skb->len;
2995}
2996
Julius Volzc860c6b2008-09-02 15:55:36 +02002997static int ip_vs_genl_parse_dest(struct ip_vs_dest_user_kern *udest,
Julius Volz9a812192008-08-14 14:08:44 +02002998 struct nlattr *nla, int full_entry)
2999{
3000 struct nlattr *attrs[IPVS_DEST_ATTR_MAX + 1];
3001 struct nlattr *nla_addr, *nla_port;
3002
3003 /* Parse mandatory identifying destination fields first */
3004 if (nla == NULL ||
3005 nla_parse_nested(attrs, IPVS_DEST_ATTR_MAX, nla, ip_vs_dest_policy))
3006 return -EINVAL;
3007
3008 nla_addr = attrs[IPVS_DEST_ATTR_ADDR];
3009 nla_port = attrs[IPVS_DEST_ATTR_PORT];
3010
3011 if (!(nla_addr && nla_port))
3012 return -EINVAL;
3013
Simon Horman258c8892009-12-15 17:01:25 +01003014 memset(udest, 0, sizeof(*udest));
3015
Julius Volz9a812192008-08-14 14:08:44 +02003016 nla_memcpy(&udest->addr, nla_addr, sizeof(udest->addr));
3017 udest->port = nla_get_u16(nla_port);
3018
3019 /* If a full entry was requested, check for the additional fields */
3020 if (full_entry) {
3021 struct nlattr *nla_fwd, *nla_weight, *nla_u_thresh,
3022 *nla_l_thresh;
3023
3024 nla_fwd = attrs[IPVS_DEST_ATTR_FWD_METHOD];
3025 nla_weight = attrs[IPVS_DEST_ATTR_WEIGHT];
3026 nla_u_thresh = attrs[IPVS_DEST_ATTR_U_THRESH];
3027 nla_l_thresh = attrs[IPVS_DEST_ATTR_L_THRESH];
3028
3029 if (!(nla_fwd && nla_weight && nla_u_thresh && nla_l_thresh))
3030 return -EINVAL;
3031
3032 udest->conn_flags = nla_get_u32(nla_fwd)
3033 & IP_VS_CONN_F_FWD_MASK;
3034 udest->weight = nla_get_u32(nla_weight);
3035 udest->u_threshold = nla_get_u32(nla_u_thresh);
3036 udest->l_threshold = nla_get_u32(nla_l_thresh);
3037 }
3038
3039 return 0;
3040}
3041
3042static int ip_vs_genl_fill_daemon(struct sk_buff *skb, __be32 state,
3043 const char *mcast_ifn, __be32 syncid)
3044{
3045 struct nlattr *nl_daemon;
3046
3047 nl_daemon = nla_nest_start(skb, IPVS_CMD_ATTR_DAEMON);
3048 if (!nl_daemon)
3049 return -EMSGSIZE;
3050
3051 NLA_PUT_U32(skb, IPVS_DAEMON_ATTR_STATE, state);
3052 NLA_PUT_STRING(skb, IPVS_DAEMON_ATTR_MCAST_IFN, mcast_ifn);
3053 NLA_PUT_U32(skb, IPVS_DAEMON_ATTR_SYNC_ID, syncid);
3054
3055 nla_nest_end(skb, nl_daemon);
3056
3057 return 0;
3058
3059nla_put_failure:
3060 nla_nest_cancel(skb, nl_daemon);
3061 return -EMSGSIZE;
3062}
3063
3064static int ip_vs_genl_dump_daemon(struct sk_buff *skb, __be32 state,
3065 const char *mcast_ifn, __be32 syncid,
3066 struct netlink_callback *cb)
3067{
3068 void *hdr;
3069 hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
3070 &ip_vs_genl_family, NLM_F_MULTI,
3071 IPVS_CMD_NEW_DAEMON);
3072 if (!hdr)
3073 return -EMSGSIZE;
3074
3075 if (ip_vs_genl_fill_daemon(skb, state, mcast_ifn, syncid))
3076 goto nla_put_failure;
3077
3078 return genlmsg_end(skb, hdr);
3079
3080nla_put_failure:
3081 genlmsg_cancel(skb, hdr);
3082 return -EMSGSIZE;
3083}
3084
3085static int ip_vs_genl_dump_daemons(struct sk_buff *skb,
3086 struct netlink_callback *cb)
3087{
Hans Schillstromf1313152011-01-03 14:44:55 +01003088 struct net *net = skb_net(skb);
3089 struct netns_ipvs *ipvs = net_ipvs(net);
3090
Julius Volz9a812192008-08-14 14:08:44 +02003091 mutex_lock(&__ip_vs_mutex);
Hans Schillstromf1313152011-01-03 14:44:55 +01003092 if ((ipvs->sync_state & IP_VS_STATE_MASTER) && !cb->args[0]) {
Julius Volz9a812192008-08-14 14:08:44 +02003093 if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_MASTER,
Hans Schillstromf1313152011-01-03 14:44:55 +01003094 ipvs->master_mcast_ifn,
3095 ipvs->master_syncid, cb) < 0)
Julius Volz9a812192008-08-14 14:08:44 +02003096 goto nla_put_failure;
3097
3098 cb->args[0] = 1;
3099 }
3100
Hans Schillstromf1313152011-01-03 14:44:55 +01003101 if ((ipvs->sync_state & IP_VS_STATE_BACKUP) && !cb->args[1]) {
Julius Volz9a812192008-08-14 14:08:44 +02003102 if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_BACKUP,
Hans Schillstromf1313152011-01-03 14:44:55 +01003103 ipvs->backup_mcast_ifn,
3104 ipvs->backup_syncid, cb) < 0)
Julius Volz9a812192008-08-14 14:08:44 +02003105 goto nla_put_failure;
3106
3107 cb->args[1] = 1;
3108 }
3109
3110nla_put_failure:
3111 mutex_unlock(&__ip_vs_mutex);
3112
3113 return skb->len;
3114}
3115
Hans Schillstromf1313152011-01-03 14:44:55 +01003116static int ip_vs_genl_new_daemon(struct net *net, struct nlattr **attrs)
Julius Volz9a812192008-08-14 14:08:44 +02003117{
3118 if (!(attrs[IPVS_DAEMON_ATTR_STATE] &&
3119 attrs[IPVS_DAEMON_ATTR_MCAST_IFN] &&
3120 attrs[IPVS_DAEMON_ATTR_SYNC_ID]))
3121 return -EINVAL;
3122
Hans Schillstromf1313152011-01-03 14:44:55 +01003123 return start_sync_thread(net,
3124 nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]),
Julius Volz9a812192008-08-14 14:08:44 +02003125 nla_data(attrs[IPVS_DAEMON_ATTR_MCAST_IFN]),
3126 nla_get_u32(attrs[IPVS_DAEMON_ATTR_SYNC_ID]));
3127}
3128
Hans Schillstromf1313152011-01-03 14:44:55 +01003129static int ip_vs_genl_del_daemon(struct net *net, struct nlattr **attrs)
Julius Volz9a812192008-08-14 14:08:44 +02003130{
3131 if (!attrs[IPVS_DAEMON_ATTR_STATE])
3132 return -EINVAL;
3133
Hans Schillstromf1313152011-01-03 14:44:55 +01003134 return stop_sync_thread(net,
3135 nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]));
Julius Volz9a812192008-08-14 14:08:44 +02003136}
3137
Hans Schillstrom93304192011-01-03 14:44:51 +01003138static int ip_vs_genl_set_config(struct net *net, struct nlattr **attrs)
Julius Volz9a812192008-08-14 14:08:44 +02003139{
3140 struct ip_vs_timeout_user t;
3141
Hans Schillstrom93304192011-01-03 14:44:51 +01003142 __ip_vs_get_timeouts(net, &t);
Julius Volz9a812192008-08-14 14:08:44 +02003143
3144 if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP])
3145 t.tcp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP]);
3146
3147 if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN])
3148 t.tcp_fin_timeout =
3149 nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN]);
3150
3151 if (attrs[IPVS_CMD_ATTR_TIMEOUT_UDP])
3152 t.udp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_UDP]);
3153
Hans Schillstrom93304192011-01-03 14:44:51 +01003154 return ip_vs_set_timeout(net, &t);
Julius Volz9a812192008-08-14 14:08:44 +02003155}
3156
3157static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
3158{
3159 struct ip_vs_service *svc = NULL;
Julius Volzc860c6b2008-09-02 15:55:36 +02003160 struct ip_vs_service_user_kern usvc;
3161 struct ip_vs_dest_user_kern udest;
Julius Volz9a812192008-08-14 14:08:44 +02003162 int ret = 0, cmd;
3163 int need_full_svc = 0, need_full_dest = 0;
Hans Schillstromfc723252011-01-03 14:44:43 +01003164 struct net *net;
Hans Schillstroma0840e22011-01-03 14:44:58 +01003165 struct netns_ipvs *ipvs;
Julius Volz9a812192008-08-14 14:08:44 +02003166
Hans Schillstromfc723252011-01-03 14:44:43 +01003167 net = skb_sknet(skb);
Hans Schillstroma0840e22011-01-03 14:44:58 +01003168 ipvs = net_ipvs(net);
Julius Volz9a812192008-08-14 14:08:44 +02003169 cmd = info->genlhdr->cmd;
3170
3171 mutex_lock(&__ip_vs_mutex);
3172
3173 if (cmd == IPVS_CMD_FLUSH) {
Hans Schillstromfc723252011-01-03 14:44:43 +01003174 ret = ip_vs_flush(net);
Julius Volz9a812192008-08-14 14:08:44 +02003175 goto out;
3176 } else if (cmd == IPVS_CMD_SET_CONFIG) {
Hans Schillstrom93304192011-01-03 14:44:51 +01003177 ret = ip_vs_genl_set_config(net, info->attrs);
Julius Volz9a812192008-08-14 14:08:44 +02003178 goto out;
3179 } else if (cmd == IPVS_CMD_NEW_DAEMON ||
3180 cmd == IPVS_CMD_DEL_DAEMON) {
3181
3182 struct nlattr *daemon_attrs[IPVS_DAEMON_ATTR_MAX + 1];
3183
3184 if (!info->attrs[IPVS_CMD_ATTR_DAEMON] ||
3185 nla_parse_nested(daemon_attrs, IPVS_DAEMON_ATTR_MAX,
3186 info->attrs[IPVS_CMD_ATTR_DAEMON],
3187 ip_vs_daemon_policy)) {
3188 ret = -EINVAL;
3189 goto out;
3190 }
3191
3192 if (cmd == IPVS_CMD_NEW_DAEMON)
Hans Schillstromf1313152011-01-03 14:44:55 +01003193 ret = ip_vs_genl_new_daemon(net, daemon_attrs);
Julius Volz9a812192008-08-14 14:08:44 +02003194 else
Hans Schillstromf1313152011-01-03 14:44:55 +01003195 ret = ip_vs_genl_del_daemon(net, daemon_attrs);
Julius Volz9a812192008-08-14 14:08:44 +02003196 goto out;
3197 } else if (cmd == IPVS_CMD_ZERO &&
3198 !info->attrs[IPVS_CMD_ATTR_SERVICE]) {
Hans Schillstromfc723252011-01-03 14:44:43 +01003199 ret = ip_vs_zero_all(net);
Julius Volz9a812192008-08-14 14:08:44 +02003200 goto out;
3201 }
3202
3203 /* All following commands require a service argument, so check if we
3204 * received a valid one. We need a full service specification when
3205 * adding / editing a service. Only identifying members otherwise. */
3206 if (cmd == IPVS_CMD_NEW_SERVICE || cmd == IPVS_CMD_SET_SERVICE)
3207 need_full_svc = 1;
3208
Hans Schillstromfc723252011-01-03 14:44:43 +01003209 ret = ip_vs_genl_parse_service(net, &usvc,
Julius Volz9a812192008-08-14 14:08:44 +02003210 info->attrs[IPVS_CMD_ATTR_SERVICE],
Julian Anastasov26c15cf2010-09-21 18:12:30 +02003211 need_full_svc, &svc);
Julius Volz9a812192008-08-14 14:08:44 +02003212 if (ret)
3213 goto out;
3214
Julius Volz9a812192008-08-14 14:08:44 +02003215 /* Unless we're adding a new service, the service must already exist */
3216 if ((cmd != IPVS_CMD_NEW_SERVICE) && (svc == NULL)) {
3217 ret = -ESRCH;
3218 goto out;
3219 }
3220
3221 /* Destination commands require a valid destination argument. For
3222 * adding / editing a destination, we need a full destination
3223 * specification. */
3224 if (cmd == IPVS_CMD_NEW_DEST || cmd == IPVS_CMD_SET_DEST ||
3225 cmd == IPVS_CMD_DEL_DEST) {
3226 if (cmd != IPVS_CMD_DEL_DEST)
3227 need_full_dest = 1;
3228
3229 ret = ip_vs_genl_parse_dest(&udest,
3230 info->attrs[IPVS_CMD_ATTR_DEST],
3231 need_full_dest);
3232 if (ret)
3233 goto out;
3234 }
3235
3236 switch (cmd) {
3237 case IPVS_CMD_NEW_SERVICE:
3238 if (svc == NULL)
Hans Schillstromfc723252011-01-03 14:44:43 +01003239 ret = ip_vs_add_service(net, &usvc, &svc);
Julius Volz9a812192008-08-14 14:08:44 +02003240 else
3241 ret = -EEXIST;
3242 break;
3243 case IPVS_CMD_SET_SERVICE:
3244 ret = ip_vs_edit_service(svc, &usvc);
3245 break;
3246 case IPVS_CMD_DEL_SERVICE:
3247 ret = ip_vs_del_service(svc);
Julian Anastasov26c15cf2010-09-21 18:12:30 +02003248 /* do not use svc, it can be freed */
Julius Volz9a812192008-08-14 14:08:44 +02003249 break;
3250 case IPVS_CMD_NEW_DEST:
3251 ret = ip_vs_add_dest(svc, &udest);
3252 break;
3253 case IPVS_CMD_SET_DEST:
3254 ret = ip_vs_edit_dest(svc, &udest);
3255 break;
3256 case IPVS_CMD_DEL_DEST:
3257 ret = ip_vs_del_dest(svc, &udest);
3258 break;
3259 case IPVS_CMD_ZERO:
3260 ret = ip_vs_zero_service(svc);
3261 break;
3262 default:
3263 ret = -EINVAL;
3264 }
3265
3266out:
Julius Volz9a812192008-08-14 14:08:44 +02003267 mutex_unlock(&__ip_vs_mutex);
3268
3269 return ret;
3270}
3271
3272static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info)
3273{
3274 struct sk_buff *msg;
3275 void *reply;
3276 int ret, cmd, reply_cmd;
Hans Schillstromfc723252011-01-03 14:44:43 +01003277 struct net *net;
Hans Schillstroma0840e22011-01-03 14:44:58 +01003278 struct netns_ipvs *ipvs;
Julius Volz9a812192008-08-14 14:08:44 +02003279
Hans Schillstromfc723252011-01-03 14:44:43 +01003280 net = skb_sknet(skb);
Hans Schillstroma0840e22011-01-03 14:44:58 +01003281 ipvs = net_ipvs(net);
Julius Volz9a812192008-08-14 14:08:44 +02003282 cmd = info->genlhdr->cmd;
3283
3284 if (cmd == IPVS_CMD_GET_SERVICE)
3285 reply_cmd = IPVS_CMD_NEW_SERVICE;
3286 else if (cmd == IPVS_CMD_GET_INFO)
3287 reply_cmd = IPVS_CMD_SET_INFO;
3288 else if (cmd == IPVS_CMD_GET_CONFIG)
3289 reply_cmd = IPVS_CMD_SET_CONFIG;
3290 else {
Hannes Eder1e3e2382009-08-02 11:05:41 +00003291 pr_err("unknown Generic Netlink command\n");
Julius Volz9a812192008-08-14 14:08:44 +02003292 return -EINVAL;
3293 }
3294
3295 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
3296 if (!msg)
3297 return -ENOMEM;
3298
3299 mutex_lock(&__ip_vs_mutex);
3300
3301 reply = genlmsg_put_reply(msg, info, &ip_vs_genl_family, 0, reply_cmd);
3302 if (reply == NULL)
3303 goto nla_put_failure;
3304
3305 switch (cmd) {
3306 case IPVS_CMD_GET_SERVICE:
3307 {
3308 struct ip_vs_service *svc;
3309
Hans Schillstromfc723252011-01-03 14:44:43 +01003310 svc = ip_vs_genl_find_service(net,
3311 info->attrs[IPVS_CMD_ATTR_SERVICE]);
Julius Volz9a812192008-08-14 14:08:44 +02003312 if (IS_ERR(svc)) {
3313 ret = PTR_ERR(svc);
3314 goto out_err;
3315 } else if (svc) {
3316 ret = ip_vs_genl_fill_service(msg, svc);
Julius Volz9a812192008-08-14 14:08:44 +02003317 if (ret)
3318 goto nla_put_failure;
3319 } else {
3320 ret = -ESRCH;
3321 goto out_err;
3322 }
3323
3324 break;
3325 }
3326
3327 case IPVS_CMD_GET_CONFIG:
3328 {
3329 struct ip_vs_timeout_user t;
3330
Hans Schillstrom93304192011-01-03 14:44:51 +01003331 __ip_vs_get_timeouts(net, &t);
Julius Volz9a812192008-08-14 14:08:44 +02003332#ifdef CONFIG_IP_VS_PROTO_TCP
3333 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP, t.tcp_timeout);
3334 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP_FIN,
3335 t.tcp_fin_timeout);
3336#endif
3337#ifdef CONFIG_IP_VS_PROTO_UDP
3338 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_UDP, t.udp_timeout);
3339#endif
3340
3341 break;
3342 }
3343
3344 case IPVS_CMD_GET_INFO:
3345 NLA_PUT_U32(msg, IPVS_INFO_ATTR_VERSION, IP_VS_VERSION_CODE);
3346 NLA_PUT_U32(msg, IPVS_INFO_ATTR_CONN_TAB_SIZE,
Catalin(ux) M. BOIE6f7edb42010-01-05 05:50:24 +01003347 ip_vs_conn_tab_size);
Julius Volz9a812192008-08-14 14:08:44 +02003348 break;
3349 }
3350
3351 genlmsg_end(msg, reply);
Johannes Berg134e6372009-07-10 09:51:34 +00003352 ret = genlmsg_reply(msg, info);
Julius Volz9a812192008-08-14 14:08:44 +02003353 goto out;
3354
3355nla_put_failure:
Hannes Eder1e3e2382009-08-02 11:05:41 +00003356 pr_err("not enough space in Netlink message\n");
Julius Volz9a812192008-08-14 14:08:44 +02003357 ret = -EMSGSIZE;
3358
3359out_err:
3360 nlmsg_free(msg);
3361out:
3362 mutex_unlock(&__ip_vs_mutex);
3363
3364 return ret;
3365}
3366
3367
3368static struct genl_ops ip_vs_genl_ops[] __read_mostly = {
3369 {
3370 .cmd = IPVS_CMD_NEW_SERVICE,
3371 .flags = GENL_ADMIN_PERM,
3372 .policy = ip_vs_cmd_policy,
3373 .doit = ip_vs_genl_set_cmd,
3374 },
3375 {
3376 .cmd = IPVS_CMD_SET_SERVICE,
3377 .flags = GENL_ADMIN_PERM,
3378 .policy = ip_vs_cmd_policy,
3379 .doit = ip_vs_genl_set_cmd,
3380 },
3381 {
3382 .cmd = IPVS_CMD_DEL_SERVICE,
3383 .flags = GENL_ADMIN_PERM,
3384 .policy = ip_vs_cmd_policy,
3385 .doit = ip_vs_genl_set_cmd,
3386 },
3387 {
3388 .cmd = IPVS_CMD_GET_SERVICE,
3389 .flags = GENL_ADMIN_PERM,
3390 .doit = ip_vs_genl_get_cmd,
3391 .dumpit = ip_vs_genl_dump_services,
3392 .policy = ip_vs_cmd_policy,
3393 },
3394 {
3395 .cmd = IPVS_CMD_NEW_DEST,
3396 .flags = GENL_ADMIN_PERM,
3397 .policy = ip_vs_cmd_policy,
3398 .doit = ip_vs_genl_set_cmd,
3399 },
3400 {
3401 .cmd = IPVS_CMD_SET_DEST,
3402 .flags = GENL_ADMIN_PERM,
3403 .policy = ip_vs_cmd_policy,
3404 .doit = ip_vs_genl_set_cmd,
3405 },
3406 {
3407 .cmd = IPVS_CMD_DEL_DEST,
3408 .flags = GENL_ADMIN_PERM,
3409 .policy = ip_vs_cmd_policy,
3410 .doit = ip_vs_genl_set_cmd,
3411 },
3412 {
3413 .cmd = IPVS_CMD_GET_DEST,
3414 .flags = GENL_ADMIN_PERM,
3415 .policy = ip_vs_cmd_policy,
3416 .dumpit = ip_vs_genl_dump_dests,
3417 },
3418 {
3419 .cmd = IPVS_CMD_NEW_DAEMON,
3420 .flags = GENL_ADMIN_PERM,
3421 .policy = ip_vs_cmd_policy,
3422 .doit = ip_vs_genl_set_cmd,
3423 },
3424 {
3425 .cmd = IPVS_CMD_DEL_DAEMON,
3426 .flags = GENL_ADMIN_PERM,
3427 .policy = ip_vs_cmd_policy,
3428 .doit = ip_vs_genl_set_cmd,
3429 },
3430 {
3431 .cmd = IPVS_CMD_GET_DAEMON,
3432 .flags = GENL_ADMIN_PERM,
3433 .dumpit = ip_vs_genl_dump_daemons,
3434 },
3435 {
3436 .cmd = IPVS_CMD_SET_CONFIG,
3437 .flags = GENL_ADMIN_PERM,
3438 .policy = ip_vs_cmd_policy,
3439 .doit = ip_vs_genl_set_cmd,
3440 },
3441 {
3442 .cmd = IPVS_CMD_GET_CONFIG,
3443 .flags = GENL_ADMIN_PERM,
3444 .doit = ip_vs_genl_get_cmd,
3445 },
3446 {
3447 .cmd = IPVS_CMD_GET_INFO,
3448 .flags = GENL_ADMIN_PERM,
3449 .doit = ip_vs_genl_get_cmd,
3450 },
3451 {
3452 .cmd = IPVS_CMD_ZERO,
3453 .flags = GENL_ADMIN_PERM,
3454 .policy = ip_vs_cmd_policy,
3455 .doit = ip_vs_genl_set_cmd,
3456 },
3457 {
3458 .cmd = IPVS_CMD_FLUSH,
3459 .flags = GENL_ADMIN_PERM,
3460 .doit = ip_vs_genl_set_cmd,
3461 },
3462};
3463
3464static int __init ip_vs_genl_register(void)
3465{
Michał Mirosław8f698d52009-05-21 10:34:05 +00003466 return genl_register_family_with_ops(&ip_vs_genl_family,
3467 ip_vs_genl_ops, ARRAY_SIZE(ip_vs_genl_ops));
Julius Volz9a812192008-08-14 14:08:44 +02003468}
3469
3470static void ip_vs_genl_unregister(void)
3471{
3472 genl_unregister_family(&ip_vs_genl_family);
3473}
3474
3475/* End of Generic Netlink interface definitions */
3476
Hans Schillstrom61b1ab42011-01-03 14:44:42 +01003477/*
3478 * per netns intit/exit func.
3479 */
3480int __net_init __ip_vs_control_init(struct net *net)
3481{
Hans Schillstromfc723252011-01-03 14:44:43 +01003482 int idx;
3483 struct netns_ipvs *ipvs = net_ipvs(net);
Hans Schillstroma0840e22011-01-03 14:44:58 +01003484 struct ctl_table *tbl;
Hans Schillstromfc723252011-01-03 14:44:43 +01003485
Hans Schillstrom61b1ab42011-01-03 14:44:42 +01003486 if (!net_eq(net, &init_net)) /* netns not enabled yet */
3487 return -EPERM;
Hans Schillstroma0840e22011-01-03 14:44:58 +01003488
3489 atomic_set(&ipvs->dropentry, 0);
3490 spin_lock_init(&ipvs->dropentry_lock);
3491 spin_lock_init(&ipvs->droppacket_lock);
3492 spin_lock_init(&ipvs->securetcp_lock);
3493 ipvs->rs_lock = __RW_LOCK_UNLOCKED(ipvs->rs_lock);
3494
3495 /* Initialize rs_table */
3496 for (idx = 0; idx < IP_VS_RTAB_SIZE; idx++)
3497 INIT_LIST_HEAD(&ipvs->rs_table[idx]);
3498
Hans Schillstromf2431e62011-01-03 14:45:00 +01003499 INIT_LIST_HEAD(&ipvs->dest_trash);
Hans Schillstrom763f8d02011-01-03 14:45:01 +01003500 atomic_set(&ipvs->ftpsvc_counter, 0);
3501 atomic_set(&ipvs->nullsvc_counter, 0);
Hans Schillstromf2431e62011-01-03 14:45:00 +01003502
Hans Schillstromb17fc992011-01-03 14:44:56 +01003503 /* procfs stats */
3504 ipvs->tot_stats = kzalloc(sizeof(struct ip_vs_stats), GFP_KERNEL);
3505 if (ipvs->tot_stats == NULL) {
3506 pr_err("%s(): no memory.\n", __func__);
3507 return -ENOMEM;
3508 }
3509 ipvs->cpustats = alloc_percpu(struct ip_vs_cpu_stats);
3510 if (!ipvs->cpustats) {
3511 pr_err("%s() alloc_percpu failed\n", __func__);
3512 goto err_alloc;
3513 }
3514 spin_lock_init(&ipvs->tot_stats->lock);
Hans Schillstrom61b1ab42011-01-03 14:44:42 +01003515
Hans Schillstromfc723252011-01-03 14:44:43 +01003516 for (idx = 0; idx < IP_VS_RTAB_SIZE; idx++)
3517 INIT_LIST_HEAD(&ipvs->rs_table[idx]);
3518
Hans Schillstrom61b1ab42011-01-03 14:44:42 +01003519 proc_net_fops_create(net, "ip_vs", 0, &ip_vs_info_fops);
3520 proc_net_fops_create(net, "ip_vs_stats", 0, &ip_vs_stats_fops);
Hans Schillstromb17fc992011-01-03 14:44:56 +01003521 proc_net_fops_create(net, "ip_vs_stats_percpu", 0,
3522 &ip_vs_stats_percpu_fops);
Hans Schillstroma0840e22011-01-03 14:44:58 +01003523
3524 if (!net_eq(net, &init_net)) {
3525 tbl = kmemdup(vs_vars, sizeof(vs_vars), GFP_KERNEL);
3526 if (tbl == NULL)
3527 goto err_dup;
3528 } else
3529 tbl = vs_vars;
3530 /* Initialize sysctl defaults */
3531 idx = 0;
3532 ipvs->sysctl_amemthresh = 1024;
3533 tbl[idx++].data = &ipvs->sysctl_amemthresh;
3534 ipvs->sysctl_am_droprate = 10;
3535 tbl[idx++].data = &ipvs->sysctl_am_droprate;
3536 tbl[idx++].data = &ipvs->sysctl_drop_entry;
3537 tbl[idx++].data = &ipvs->sysctl_drop_packet;
3538#ifdef CONFIG_IP_VS_NFCT
3539 tbl[idx++].data = &ipvs->sysctl_conntrack;
3540#endif
3541 tbl[idx++].data = &ipvs->sysctl_secure_tcp;
3542 ipvs->sysctl_snat_reroute = 1;
3543 tbl[idx++].data = &ipvs->sysctl_snat_reroute;
3544 ipvs->sysctl_sync_ver = 1;
3545 tbl[idx++].data = &ipvs->sysctl_sync_ver;
3546 tbl[idx++].data = &ipvs->sysctl_cache_bypass;
3547 tbl[idx++].data = &ipvs->sysctl_expire_nodest_conn;
3548 tbl[idx++].data = &ipvs->sysctl_expire_quiescent_template;
3549 ipvs->sysctl_sync_threshold[0] = 3;
3550 ipvs->sysctl_sync_threshold[1] = 50;
3551 tbl[idx].data = &ipvs->sysctl_sync_threshold;
3552 tbl[idx++].maxlen = sizeof(ipvs->sysctl_sync_threshold);
3553 tbl[idx++].data = &ipvs->sysctl_nat_icmp_send;
3554
3555
3556 ipvs->sysctl_hdr = register_net_sysctl_table(net, net_vs_ctl_path,
Hans Schillstrom61b1ab42011-01-03 14:44:42 +01003557 vs_vars);
Hans Schillstroma0840e22011-01-03 14:44:58 +01003558 if (ipvs->sysctl_hdr == NULL)
Hans Schillstrom61b1ab42011-01-03 14:44:42 +01003559 goto err_reg;
Hans Schillstromb17fc992011-01-03 14:44:56 +01003560 ip_vs_new_estimator(net, ipvs->tot_stats);
Hans Schillstroma0840e22011-01-03 14:44:58 +01003561 ipvs->sysctl_tbl = tbl;
Hans Schillstromf6340ee2011-01-03 14:44:59 +01003562 /* Schedule defense work */
3563 INIT_DELAYED_WORK(&ipvs->defense_work, defense_work_handler);
3564 schedule_delayed_work(&ipvs->defense_work, DEFENSE_TIMER_PERIOD);
Hans Schillstrom61b1ab42011-01-03 14:44:42 +01003565 return 0;
3566
3567err_reg:
Hans Schillstroma0840e22011-01-03 14:44:58 +01003568 if (!net_eq(net, &init_net))
3569 kfree(tbl);
3570err_dup:
Hans Schillstromb17fc992011-01-03 14:44:56 +01003571 free_percpu(ipvs->cpustats);
3572err_alloc:
3573 kfree(ipvs->tot_stats);
Hans Schillstrom61b1ab42011-01-03 14:44:42 +01003574 return -ENOMEM;
3575}
3576
3577static void __net_exit __ip_vs_control_cleanup(struct net *net)
3578{
Hans Schillstromb17fc992011-01-03 14:44:56 +01003579 struct netns_ipvs *ipvs = net_ipvs(net);
3580
Hans Schillstrom61b1ab42011-01-03 14:44:42 +01003581 if (!net_eq(net, &init_net)) /* netns not enabled yet */
3582 return;
3583
Hans Schillstromf2431e62011-01-03 14:45:00 +01003584 ip_vs_trash_cleanup(net);
Hans Schillstromb17fc992011-01-03 14:44:56 +01003585 ip_vs_kill_estimator(net, ipvs->tot_stats);
Hans Schillstromf2431e62011-01-03 14:45:00 +01003586 cancel_delayed_work_sync(&ipvs->defense_work);
3587 cancel_work_sync(&ipvs->defense_work.work);
Hans Schillstroma0840e22011-01-03 14:44:58 +01003588 unregister_net_sysctl_table(ipvs->sysctl_hdr);
Hans Schillstromb17fc992011-01-03 14:44:56 +01003589 proc_net_remove(net, "ip_vs_stats_percpu");
Hans Schillstrom61b1ab42011-01-03 14:44:42 +01003590 proc_net_remove(net, "ip_vs_stats");
3591 proc_net_remove(net, "ip_vs");
Hans Schillstromb17fc992011-01-03 14:44:56 +01003592 free_percpu(ipvs->cpustats);
3593 kfree(ipvs->tot_stats);
Hans Schillstrom61b1ab42011-01-03 14:44:42 +01003594}
3595
3596static struct pernet_operations ipvs_control_ops = {
3597 .init = __ip_vs_control_init,
3598 .exit = __ip_vs_control_cleanup,
3599};
Linus Torvalds1da177e2005-04-16 15:20:36 -07003600
Sven Wegener048cf482008-08-10 18:24:35 +00003601int __init ip_vs_control_init(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003602{
Linus Torvalds1da177e2005-04-16 15:20:36 -07003603 int idx;
Hans Schillstromfc723252011-01-03 14:44:43 +01003604 int ret;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003605
3606 EnterFunction(2);
3607
Hans Schillstromfc723252011-01-03 14:44:43 +01003608 /* Initialize svc_table, ip_vs_svc_fwm_table, rs_table */
Eduardo Blancod86bef72010-10-19 10:26:47 +01003609 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
3610 INIT_LIST_HEAD(&ip_vs_svc_table[idx]);
3611 INIT_LIST_HEAD(&ip_vs_svc_fwm_table[idx]);
3612 }
Hans Schillstromfc723252011-01-03 14:44:43 +01003613
3614 ret = register_pernet_subsys(&ipvs_control_ops);
3615 if (ret) {
3616 pr_err("cannot register namespace.\n");
3617 goto err;
Eduardo Blancod86bef72010-10-19 10:26:47 +01003618 }
Hans Schillstromfc723252011-01-03 14:44:43 +01003619
3620 smp_wmb(); /* Do we really need it now ? */
Eduardo Blancod86bef72010-10-19 10:26:47 +01003621
Linus Torvalds1da177e2005-04-16 15:20:36 -07003622 ret = nf_register_sockopt(&ip_vs_sockopts);
3623 if (ret) {
Hannes Eder1e3e2382009-08-02 11:05:41 +00003624 pr_err("cannot register sockopt.\n");
Hans Schillstromfc723252011-01-03 14:44:43 +01003625 goto err_net;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003626 }
3627
Julius Volz9a812192008-08-14 14:08:44 +02003628 ret = ip_vs_genl_register();
3629 if (ret) {
Hannes Eder1e3e2382009-08-02 11:05:41 +00003630 pr_err("cannot register Generic Netlink interface.\n");
Julius Volz9a812192008-08-14 14:08:44 +02003631 nf_unregister_sockopt(&ip_vs_sockopts);
Hans Schillstromfc723252011-01-03 14:44:43 +01003632 goto err_net;
Julius Volz9a812192008-08-14 14:08:44 +02003633 }
3634
Linus Torvalds1da177e2005-04-16 15:20:36 -07003635 LeaveFunction(2);
3636 return 0;
Hans Schillstromfc723252011-01-03 14:44:43 +01003637
3638err_net:
3639 unregister_pernet_subsys(&ipvs_control_ops);
3640err:
3641 return ret;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003642}
3643
3644
3645void ip_vs_control_cleanup(void)
3646{
3647 EnterFunction(2);
Hans Schillstrom61b1ab42011-01-03 14:44:42 +01003648 unregister_pernet_subsys(&ipvs_control_ops);
Julius Volz9a812192008-08-14 14:08:44 +02003649 ip_vs_genl_unregister();
Linus Torvalds1da177e2005-04-16 15:20:36 -07003650 nf_unregister_sockopt(&ip_vs_sockopts);
3651 LeaveFunction(2);
3652}