blob: 364520f66b7ac9a7eab9970a05b96a66f4a3fa39 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * IPVS An implementation of the IP virtual server support for the
3 * LINUX operating system. IPVS is now implemented as a module
4 * over the NetFilter framework. IPVS can be used to build a
5 * high-performance and highly available server based on a
6 * cluster of servers.
7 *
Linus Torvalds1da177e2005-04-16 15:20:36 -07008 * Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
9 * Peter Kese <peter.kese@ijs.si>
10 * Julian Anastasov <ja@ssi.bg>
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version
15 * 2 of the License, or (at your option) any later version.
16 *
17 * Changes:
18 *
19 */
20
Hannes Eder9aada7a2009-07-30 14:29:44 -070021#define KMSG_COMPONENT "IPVS"
22#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
23
Linus Torvalds1da177e2005-04-16 15:20:36 -070024#include <linux/module.h>
25#include <linux/init.h>
26#include <linux/types.h>
Randy Dunlap4fc268d2006-01-11 12:17:47 -080027#include <linux/capability.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070028#include <linux/fs.h>
29#include <linux/sysctl.h>
30#include <linux/proc_fs.h>
31#include <linux/workqueue.h>
32#include <linux/swap.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070033#include <linux/seq_file.h>
Tejun Heo5a0e3ad2010-03-24 17:04:11 +090034#include <linux/slab.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070035
36#include <linux/netfilter.h>
37#include <linux/netfilter_ipv4.h>
Ingo Molnar14cc3e22006-03-26 01:37:14 -080038#include <linux/mutex.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070039
Eric W. Biederman457c4cb2007-09-12 12:01:34 +020040#include <net/net_namespace.h>
Hans Schillstrom93304192011-01-03 14:44:51 +010041#include <linux/nsproxy.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070042#include <net/ip.h>
Vince Busam09571c72008-09-02 15:55:52 +020043#ifdef CONFIG_IP_VS_IPV6
44#include <net/ipv6.h>
45#include <net/ip6_route.h>
46#endif
Arnaldo Carvalho de Melo14c85022005-12-27 02:43:12 -020047#include <net/route.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070048#include <net/sock.h>
Julius Volz9a812192008-08-14 14:08:44 +020049#include <net/genetlink.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070050
51#include <asm/uaccess.h>
52
53#include <net/ip_vs.h>
54
55/* semaphore for IPVS sockopts. And, [gs]etsockopt may sleep. */
Ingo Molnar14cc3e22006-03-26 01:37:14 -080056static DEFINE_MUTEX(__ip_vs_mutex);
Linus Torvalds1da177e2005-04-16 15:20:36 -070057
58/* lock for service table */
59static DEFINE_RWLOCK(__ip_vs_svc_lock);
60
Linus Torvalds1da177e2005-04-16 15:20:36 -070061/* sysctl variables */
Linus Torvalds1da177e2005-04-16 15:20:36 -070062
63#ifdef CONFIG_IP_VS_DEBUG
64static int sysctl_ip_vs_debug_level = 0;
65
66int ip_vs_get_debug_level(void)
67{
68 return sysctl_ip_vs_debug_level;
69}
70#endif
71
Vince Busam09571c72008-09-02 15:55:52 +020072#ifdef CONFIG_IP_VS_IPV6
73/* Taken from rt6_fill_node() in net/ipv6/route.c, is there a better way? */
Hans Schillstrom4a984802011-01-03 14:45:02 +010074static int __ip_vs_addr_is_local_v6(struct net *net,
75 const struct in6_addr *addr)
Vince Busam09571c72008-09-02 15:55:52 +020076{
77 struct rt6_info *rt;
78 struct flowi fl = {
79 .oif = 0,
Changli Gao58116622010-11-12 18:43:55 +000080 .fl6_dst = *addr,
81 .fl6_src = { .s6_addr32 = {0, 0, 0, 0} },
Vince Busam09571c72008-09-02 15:55:52 +020082 };
83
Hans Schillstrom4a984802011-01-03 14:45:02 +010084 rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl);
Vince Busam09571c72008-09-02 15:55:52 +020085 if (rt && rt->rt6i_dev && (rt->rt6i_dev->flags & IFF_LOOPBACK))
86 return 1;
87
88 return 0;
89}
90#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -070091/*
Julian Anastasovaf9debd2005-07-11 20:59:57 -070092 * update_defense_level is called from keventd and from sysctl,
93 * so it needs to protect itself from softirqs
Linus Torvalds1da177e2005-04-16 15:20:36 -070094 */
Hans Schillstrom93304192011-01-03 14:44:51 +010095static void update_defense_level(struct netns_ipvs *ipvs)
Linus Torvalds1da177e2005-04-16 15:20:36 -070096{
97 struct sysinfo i;
98 static int old_secure_tcp = 0;
99 int availmem;
100 int nomem;
101 int to_change = -1;
102
103 /* we only count free and buffered memory (in pages) */
104 si_meminfo(&i);
105 availmem = i.freeram + i.bufferram;
106 /* however in linux 2.5 the i.bufferram is total page cache size,
107 we need adjust it */
108 /* si_swapinfo(&i); */
109 /* availmem = availmem - (i.totalswap - i.freeswap); */
110
Hans Schillstroma0840e22011-01-03 14:44:58 +0100111 nomem = (availmem < ipvs->sysctl_amemthresh);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700112
Julian Anastasovaf9debd2005-07-11 20:59:57 -0700113 local_bh_disable();
114
Linus Torvalds1da177e2005-04-16 15:20:36 -0700115 /* drop_entry */
Hans Schillstroma0840e22011-01-03 14:44:58 +0100116 spin_lock(&ipvs->dropentry_lock);
117 switch (ipvs->sysctl_drop_entry) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700118 case 0:
Hans Schillstroma0840e22011-01-03 14:44:58 +0100119 atomic_set(&ipvs->dropentry, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700120 break;
121 case 1:
122 if (nomem) {
Hans Schillstroma0840e22011-01-03 14:44:58 +0100123 atomic_set(&ipvs->dropentry, 1);
124 ipvs->sysctl_drop_entry = 2;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700125 } else {
Hans Schillstroma0840e22011-01-03 14:44:58 +0100126 atomic_set(&ipvs->dropentry, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700127 }
128 break;
129 case 2:
130 if (nomem) {
Hans Schillstroma0840e22011-01-03 14:44:58 +0100131 atomic_set(&ipvs->dropentry, 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700132 } else {
Hans Schillstroma0840e22011-01-03 14:44:58 +0100133 atomic_set(&ipvs->dropentry, 0);
134 ipvs->sysctl_drop_entry = 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700135 };
136 break;
137 case 3:
Hans Schillstroma0840e22011-01-03 14:44:58 +0100138 atomic_set(&ipvs->dropentry, 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700139 break;
140 }
Hans Schillstroma0840e22011-01-03 14:44:58 +0100141 spin_unlock(&ipvs->dropentry_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700142
143 /* drop_packet */
Hans Schillstroma0840e22011-01-03 14:44:58 +0100144 spin_lock(&ipvs->droppacket_lock);
145 switch (ipvs->sysctl_drop_packet) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700146 case 0:
Hans Schillstroma0840e22011-01-03 14:44:58 +0100147 ipvs->drop_rate = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700148 break;
149 case 1:
150 if (nomem) {
Hans Schillstroma0840e22011-01-03 14:44:58 +0100151 ipvs->drop_rate = ipvs->drop_counter
152 = ipvs->sysctl_amemthresh /
153 (ipvs->sysctl_amemthresh-availmem);
154 ipvs->sysctl_drop_packet = 2;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700155 } else {
Hans Schillstroma0840e22011-01-03 14:44:58 +0100156 ipvs->drop_rate = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700157 }
158 break;
159 case 2:
160 if (nomem) {
Hans Schillstroma0840e22011-01-03 14:44:58 +0100161 ipvs->drop_rate = ipvs->drop_counter
162 = ipvs->sysctl_amemthresh /
163 (ipvs->sysctl_amemthresh-availmem);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700164 } else {
Hans Schillstroma0840e22011-01-03 14:44:58 +0100165 ipvs->drop_rate = 0;
166 ipvs->sysctl_drop_packet = 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700167 }
168 break;
169 case 3:
Hans Schillstroma0840e22011-01-03 14:44:58 +0100170 ipvs->drop_rate = ipvs->sysctl_am_droprate;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700171 break;
172 }
Hans Schillstroma0840e22011-01-03 14:44:58 +0100173 spin_unlock(&ipvs->droppacket_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700174
175 /* secure_tcp */
Hans Schillstroma0840e22011-01-03 14:44:58 +0100176 spin_lock(&ipvs->securetcp_lock);
177 switch (ipvs->sysctl_secure_tcp) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700178 case 0:
179 if (old_secure_tcp >= 2)
180 to_change = 0;
181 break;
182 case 1:
183 if (nomem) {
184 if (old_secure_tcp < 2)
185 to_change = 1;
Hans Schillstroma0840e22011-01-03 14:44:58 +0100186 ipvs->sysctl_secure_tcp = 2;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700187 } else {
188 if (old_secure_tcp >= 2)
189 to_change = 0;
190 }
191 break;
192 case 2:
193 if (nomem) {
194 if (old_secure_tcp < 2)
195 to_change = 1;
196 } else {
197 if (old_secure_tcp >= 2)
198 to_change = 0;
Hans Schillstroma0840e22011-01-03 14:44:58 +0100199 ipvs->sysctl_secure_tcp = 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700200 }
201 break;
202 case 3:
203 if (old_secure_tcp < 2)
204 to_change = 1;
205 break;
206 }
Hans Schillstroma0840e22011-01-03 14:44:58 +0100207 old_secure_tcp = ipvs->sysctl_secure_tcp;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700208 if (to_change >= 0)
Hans Schillstrom93304192011-01-03 14:44:51 +0100209 ip_vs_protocol_timeout_change(ipvs,
Hans Schillstroma0840e22011-01-03 14:44:58 +0100210 ipvs->sysctl_secure_tcp > 1);
211 spin_unlock(&ipvs->securetcp_lock);
Julian Anastasovaf9debd2005-07-11 20:59:57 -0700212
213 local_bh_enable();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700214}
215
216
217/*
218 * Timer for checking the defense
219 */
220#define DEFENSE_TIMER_PERIOD 1*HZ
Linus Torvalds1da177e2005-04-16 15:20:36 -0700221
David Howellsc4028952006-11-22 14:57:56 +0000222static void defense_work_handler(struct work_struct *work)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700223{
Hans Schillstromf6340ee2011-01-03 14:44:59 +0100224 struct netns_ipvs *ipvs =
225 container_of(work, struct netns_ipvs, defense_work.work);
Hans Schillstrom93304192011-01-03 14:44:51 +0100226
227 update_defense_level(ipvs);
Hans Schillstroma0840e22011-01-03 14:44:58 +0100228 if (atomic_read(&ipvs->dropentry))
Hans Schillstromf6340ee2011-01-03 14:44:59 +0100229 ip_vs_random_dropentry(ipvs->net);
230 schedule_delayed_work(&ipvs->defense_work, DEFENSE_TIMER_PERIOD);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700231}
232
233int
234ip_vs_use_count_inc(void)
235{
236 return try_module_get(THIS_MODULE);
237}
238
239void
240ip_vs_use_count_dec(void)
241{
242 module_put(THIS_MODULE);
243}
244
245
246/*
247 * Hash table: for virtual service lookups
248 */
249#define IP_VS_SVC_TAB_BITS 8
250#define IP_VS_SVC_TAB_SIZE (1 << IP_VS_SVC_TAB_BITS)
251#define IP_VS_SVC_TAB_MASK (IP_VS_SVC_TAB_SIZE - 1)
252
253/* the service table hashed by <protocol, addr, port> */
254static struct list_head ip_vs_svc_table[IP_VS_SVC_TAB_SIZE];
255/* the service table hashed by fwmark */
256static struct list_head ip_vs_svc_fwm_table[IP_VS_SVC_TAB_SIZE];
257
Linus Torvalds1da177e2005-04-16 15:20:36 -0700258
259/*
260 * Returns hash value for virtual service
261 */
Hans Schillstromfc723252011-01-03 14:44:43 +0100262static inline unsigned
263ip_vs_svc_hashkey(struct net *net, int af, unsigned proto,
264 const union nf_inet_addr *addr, __be16 port)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700265{
266 register unsigned porth = ntohs(port);
Julius Volzb18610d2008-09-02 15:55:37 +0200267 __be32 addr_fold = addr->ip;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700268
Julius Volzb18610d2008-09-02 15:55:37 +0200269#ifdef CONFIG_IP_VS_IPV6
270 if (af == AF_INET6)
271 addr_fold = addr->ip6[0]^addr->ip6[1]^
272 addr->ip6[2]^addr->ip6[3];
273#endif
Hans Schillstromfc723252011-01-03 14:44:43 +0100274 addr_fold ^= ((size_t)net>>8);
Julius Volzb18610d2008-09-02 15:55:37 +0200275
276 return (proto^ntohl(addr_fold)^(porth>>IP_VS_SVC_TAB_BITS)^porth)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700277 & IP_VS_SVC_TAB_MASK;
278}
279
280/*
281 * Returns hash value of fwmark for virtual service lookup
282 */
Hans Schillstromfc723252011-01-03 14:44:43 +0100283static inline unsigned ip_vs_svc_fwm_hashkey(struct net *net, __u32 fwmark)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700284{
Hans Schillstromfc723252011-01-03 14:44:43 +0100285 return (((size_t)net>>8) ^ fwmark) & IP_VS_SVC_TAB_MASK;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700286}
287
288/*
Hans Schillstromfc723252011-01-03 14:44:43 +0100289 * Hashes a service in the ip_vs_svc_table by <netns,proto,addr,port>
Linus Torvalds1da177e2005-04-16 15:20:36 -0700290 * or in the ip_vs_svc_fwm_table by fwmark.
291 * Should be called with locked tables.
292 */
293static int ip_vs_svc_hash(struct ip_vs_service *svc)
294{
295 unsigned hash;
296
297 if (svc->flags & IP_VS_SVC_F_HASHED) {
Hannes Eder1e3e2382009-08-02 11:05:41 +0000298 pr_err("%s(): request for already hashed, called from %pF\n",
299 __func__, __builtin_return_address(0));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700300 return 0;
301 }
302
303 if (svc->fwmark == 0) {
304 /*
Hans Schillstromfc723252011-01-03 14:44:43 +0100305 * Hash it by <netns,protocol,addr,port> in ip_vs_svc_table
Linus Torvalds1da177e2005-04-16 15:20:36 -0700306 */
Hans Schillstromfc723252011-01-03 14:44:43 +0100307 hash = ip_vs_svc_hashkey(svc->net, svc->af, svc->protocol,
308 &svc->addr, svc->port);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700309 list_add(&svc->s_list, &ip_vs_svc_table[hash]);
310 } else {
311 /*
Hans Schillstromfc723252011-01-03 14:44:43 +0100312 * Hash it by fwmark in svc_fwm_table
Linus Torvalds1da177e2005-04-16 15:20:36 -0700313 */
Hans Schillstromfc723252011-01-03 14:44:43 +0100314 hash = ip_vs_svc_fwm_hashkey(svc->net, svc->fwmark);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700315 list_add(&svc->f_list, &ip_vs_svc_fwm_table[hash]);
316 }
317
318 svc->flags |= IP_VS_SVC_F_HASHED;
319 /* increase its refcnt because it is referenced by the svc table */
320 atomic_inc(&svc->refcnt);
321 return 1;
322}
323
324
325/*
Hans Schillstromfc723252011-01-03 14:44:43 +0100326 * Unhashes a service from svc_table / svc_fwm_table.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700327 * Should be called with locked tables.
328 */
329static int ip_vs_svc_unhash(struct ip_vs_service *svc)
330{
331 if (!(svc->flags & IP_VS_SVC_F_HASHED)) {
Hannes Eder1e3e2382009-08-02 11:05:41 +0000332 pr_err("%s(): request for unhash flagged, called from %pF\n",
333 __func__, __builtin_return_address(0));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700334 return 0;
335 }
336
337 if (svc->fwmark == 0) {
Hans Schillstromfc723252011-01-03 14:44:43 +0100338 /* Remove it from the svc_table table */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700339 list_del(&svc->s_list);
340 } else {
Hans Schillstromfc723252011-01-03 14:44:43 +0100341 /* Remove it from the svc_fwm_table table */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700342 list_del(&svc->f_list);
343 }
344
345 svc->flags &= ~IP_VS_SVC_F_HASHED;
346 atomic_dec(&svc->refcnt);
347 return 1;
348}
349
350
351/*
Hans Schillstromfc723252011-01-03 14:44:43 +0100352 * Get service by {netns, proto,addr,port} in the service table.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700353 */
Julius Volzb18610d2008-09-02 15:55:37 +0200354static inline struct ip_vs_service *
Hans Schillstromfc723252011-01-03 14:44:43 +0100355__ip_vs_service_find(struct net *net, int af, __u16 protocol,
356 const union nf_inet_addr *vaddr, __be16 vport)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700357{
358 unsigned hash;
359 struct ip_vs_service *svc;
360
361 /* Check for "full" addressed entries */
Hans Schillstromfc723252011-01-03 14:44:43 +0100362 hash = ip_vs_svc_hashkey(net, af, protocol, vaddr, vport);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700363
364 list_for_each_entry(svc, &ip_vs_svc_table[hash], s_list){
Julius Volzb18610d2008-09-02 15:55:37 +0200365 if ((svc->af == af)
366 && ip_vs_addr_equal(af, &svc->addr, vaddr)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700367 && (svc->port == vport)
Hans Schillstromfc723252011-01-03 14:44:43 +0100368 && (svc->protocol == protocol)
369 && net_eq(svc->net, net)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700370 /* HIT */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700371 return svc;
372 }
373 }
374
375 return NULL;
376}
377
378
379/*
380 * Get service by {fwmark} in the service table.
381 */
Julius Volzb18610d2008-09-02 15:55:37 +0200382static inline struct ip_vs_service *
Hans Schillstromfc723252011-01-03 14:44:43 +0100383__ip_vs_svc_fwm_find(struct net *net, int af, __u32 fwmark)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700384{
385 unsigned hash;
386 struct ip_vs_service *svc;
387
388 /* Check for fwmark addressed entries */
Hans Schillstromfc723252011-01-03 14:44:43 +0100389 hash = ip_vs_svc_fwm_hashkey(net, fwmark);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700390
391 list_for_each_entry(svc, &ip_vs_svc_fwm_table[hash], f_list) {
Hans Schillstromfc723252011-01-03 14:44:43 +0100392 if (svc->fwmark == fwmark && svc->af == af
393 && net_eq(svc->net, net)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700394 /* HIT */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700395 return svc;
396 }
397 }
398
399 return NULL;
400}
401
402struct ip_vs_service *
Hans Schillstromfc723252011-01-03 14:44:43 +0100403ip_vs_service_get(struct net *net, int af, __u32 fwmark, __u16 protocol,
Julius Volz3c2e0502008-09-02 15:55:38 +0200404 const union nf_inet_addr *vaddr, __be16 vport)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700405{
406 struct ip_vs_service *svc;
Hans Schillstrom763f8d02011-01-03 14:45:01 +0100407 struct netns_ipvs *ipvs = net_ipvs(net);
Julius Volz3c2e0502008-09-02 15:55:38 +0200408
Linus Torvalds1da177e2005-04-16 15:20:36 -0700409 read_lock(&__ip_vs_svc_lock);
410
411 /*
412 * Check the table hashed by fwmark first
413 */
Julian Anastasov097fc762011-03-04 12:26:17 +0200414 if (fwmark) {
415 svc = __ip_vs_svc_fwm_find(net, af, fwmark);
416 if (svc)
417 goto out;
418 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700419
420 /*
421 * Check the table hashed by <protocol,addr,port>
422 * for "full" addressed entries
423 */
Hans Schillstromfc723252011-01-03 14:44:43 +0100424 svc = __ip_vs_service_find(net, af, protocol, vaddr, vport);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700425
426 if (svc == NULL
427 && protocol == IPPROTO_TCP
Hans Schillstrom763f8d02011-01-03 14:45:01 +0100428 && atomic_read(&ipvs->ftpsvc_counter)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700429 && (vport == FTPDATA || ntohs(vport) >= PROT_SOCK)) {
430 /*
431 * Check if ftp service entry exists, the packet
432 * might belong to FTP data connections.
433 */
Hans Schillstromfc723252011-01-03 14:44:43 +0100434 svc = __ip_vs_service_find(net, af, protocol, vaddr, FTPPORT);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700435 }
436
437 if (svc == NULL
Hans Schillstrom763f8d02011-01-03 14:45:01 +0100438 && atomic_read(&ipvs->nullsvc_counter)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700439 /*
440 * Check if the catch-all port (port zero) exists
441 */
Hans Schillstromfc723252011-01-03 14:44:43 +0100442 svc = __ip_vs_service_find(net, af, protocol, vaddr, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700443 }
444
445 out:
Julian Anastasov26c15cf2010-09-21 18:12:30 +0200446 if (svc)
447 atomic_inc(&svc->usecnt);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700448 read_unlock(&__ip_vs_svc_lock);
449
Julius Volz3c2e0502008-09-02 15:55:38 +0200450 IP_VS_DBG_BUF(9, "lookup service: fwm %u %s %s:%u %s\n",
451 fwmark, ip_vs_proto_name(protocol),
452 IP_VS_DBG_ADDR(af, vaddr), ntohs(vport),
453 svc ? "hit" : "not hit");
Linus Torvalds1da177e2005-04-16 15:20:36 -0700454
455 return svc;
456}
457
458
459static inline void
460__ip_vs_bind_svc(struct ip_vs_dest *dest, struct ip_vs_service *svc)
461{
462 atomic_inc(&svc->refcnt);
463 dest->svc = svc;
464}
465
Julian Anastasov26c15cf2010-09-21 18:12:30 +0200466static void
Linus Torvalds1da177e2005-04-16 15:20:36 -0700467__ip_vs_unbind_svc(struct ip_vs_dest *dest)
468{
469 struct ip_vs_service *svc = dest->svc;
470
471 dest->svc = NULL;
Julian Anastasov26c15cf2010-09-21 18:12:30 +0200472 if (atomic_dec_and_test(&svc->refcnt)) {
473 IP_VS_DBG_BUF(3, "Removing service %u/%s:%u usecnt=%d\n",
474 svc->fwmark,
475 IP_VS_DBG_ADDR(svc->af, &svc->addr),
476 ntohs(svc->port), atomic_read(&svc->usecnt));
Hans Schillstromb17fc992011-01-03 14:44:56 +0100477 free_percpu(svc->stats.cpustats);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700478 kfree(svc);
Julian Anastasov26c15cf2010-09-21 18:12:30 +0200479 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700480}
481
482
483/*
484 * Returns hash value for real service
485 */
Julius Volz7937df12008-09-02 15:55:48 +0200486static inline unsigned ip_vs_rs_hashkey(int af,
487 const union nf_inet_addr *addr,
488 __be16 port)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700489{
490 register unsigned porth = ntohs(port);
Julius Volz7937df12008-09-02 15:55:48 +0200491 __be32 addr_fold = addr->ip;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700492
Julius Volz7937df12008-09-02 15:55:48 +0200493#ifdef CONFIG_IP_VS_IPV6
494 if (af == AF_INET6)
495 addr_fold = addr->ip6[0]^addr->ip6[1]^
496 addr->ip6[2]^addr->ip6[3];
497#endif
498
499 return (ntohl(addr_fold)^(porth>>IP_VS_RTAB_BITS)^porth)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700500 & IP_VS_RTAB_MASK;
501}
502
503/*
Hans Schillstromfc723252011-01-03 14:44:43 +0100504 * Hashes ip_vs_dest in rs_table by <proto,addr,port>.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700505 * should be called with locked tables.
506 */
Hans Schillstromfc723252011-01-03 14:44:43 +0100507static int ip_vs_rs_hash(struct netns_ipvs *ipvs, struct ip_vs_dest *dest)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700508{
509 unsigned hash;
510
511 if (!list_empty(&dest->d_list)) {
512 return 0;
513 }
514
515 /*
516 * Hash by proto,addr,port,
517 * which are the parameters of the real service.
518 */
Julius Volz7937df12008-09-02 15:55:48 +0200519 hash = ip_vs_rs_hashkey(dest->af, &dest->addr, dest->port);
520
Hans Schillstromfc723252011-01-03 14:44:43 +0100521 list_add(&dest->d_list, &ipvs->rs_table[hash]);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700522
523 return 1;
524}
525
526/*
Hans Schillstromfc723252011-01-03 14:44:43 +0100527 * UNhashes ip_vs_dest from rs_table.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700528 * should be called with locked tables.
529 */
530static int ip_vs_rs_unhash(struct ip_vs_dest *dest)
531{
532 /*
Hans Schillstromfc723252011-01-03 14:44:43 +0100533 * Remove it from the rs_table table.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700534 */
535 if (!list_empty(&dest->d_list)) {
536 list_del(&dest->d_list);
537 INIT_LIST_HEAD(&dest->d_list);
538 }
539
540 return 1;
541}
542
543/*
544 * Lookup real service by <proto,addr,port> in the real service table.
545 */
546struct ip_vs_dest *
Hans Schillstromfc723252011-01-03 14:44:43 +0100547ip_vs_lookup_real_service(struct net *net, int af, __u16 protocol,
Julius Volz7937df12008-09-02 15:55:48 +0200548 const union nf_inet_addr *daddr,
549 __be16 dport)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700550{
Hans Schillstromfc723252011-01-03 14:44:43 +0100551 struct netns_ipvs *ipvs = net_ipvs(net);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700552 unsigned hash;
553 struct ip_vs_dest *dest;
554
555 /*
556 * Check for "full" addressed entries
557 * Return the first found entry
558 */
Julius Volz7937df12008-09-02 15:55:48 +0200559 hash = ip_vs_rs_hashkey(af, daddr, dport);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700560
Hans Schillstroma0840e22011-01-03 14:44:58 +0100561 read_lock(&ipvs->rs_lock);
Hans Schillstromfc723252011-01-03 14:44:43 +0100562 list_for_each_entry(dest, &ipvs->rs_table[hash], d_list) {
Julius Volz7937df12008-09-02 15:55:48 +0200563 if ((dest->af == af)
564 && ip_vs_addr_equal(af, &dest->addr, daddr)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700565 && (dest->port == dport)
566 && ((dest->protocol == protocol) ||
567 dest->vfwmark)) {
568 /* HIT */
Hans Schillstroma0840e22011-01-03 14:44:58 +0100569 read_unlock(&ipvs->rs_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700570 return dest;
571 }
572 }
Hans Schillstroma0840e22011-01-03 14:44:58 +0100573 read_unlock(&ipvs->rs_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700574
575 return NULL;
576}
577
578/*
579 * Lookup destination by {addr,port} in the given service
580 */
581static struct ip_vs_dest *
Julius Volz7937df12008-09-02 15:55:48 +0200582ip_vs_lookup_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
583 __be16 dport)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700584{
585 struct ip_vs_dest *dest;
586
587 /*
588 * Find the destination for the given service
589 */
590 list_for_each_entry(dest, &svc->destinations, n_list) {
Julius Volz7937df12008-09-02 15:55:48 +0200591 if ((dest->af == svc->af)
592 && ip_vs_addr_equal(svc->af, &dest->addr, daddr)
593 && (dest->port == dport)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700594 /* HIT */
595 return dest;
596 }
597 }
598
599 return NULL;
600}
601
Rumen G. Bogdanovski1e356f92007-11-07 02:35:54 -0800602/*
603 * Find destination by {daddr,dport,vaddr,protocol}
604 * Cretaed to be used in ip_vs_process_message() in
605 * the backup synchronization daemon. It finds the
606 * destination to be bound to the received connection
607 * on the backup.
608 *
609 * ip_vs_lookup_real_service() looked promissing, but
610 * seems not working as expected.
611 */
Hans Schillstromfc723252011-01-03 14:44:43 +0100612struct ip_vs_dest *ip_vs_find_dest(struct net *net, int af,
613 const union nf_inet_addr *daddr,
Julius Volz7937df12008-09-02 15:55:48 +0200614 __be16 dport,
615 const union nf_inet_addr *vaddr,
Hans Schillstrom0e051e62010-11-19 14:25:07 +0100616 __be16 vport, __u16 protocol, __u32 fwmark)
Rumen G. Bogdanovski1e356f92007-11-07 02:35:54 -0800617{
618 struct ip_vs_dest *dest;
619 struct ip_vs_service *svc;
620
Hans Schillstromfc723252011-01-03 14:44:43 +0100621 svc = ip_vs_service_get(net, af, fwmark, protocol, vaddr, vport);
Rumen G. Bogdanovski1e356f92007-11-07 02:35:54 -0800622 if (!svc)
623 return NULL;
624 dest = ip_vs_lookup_dest(svc, daddr, dport);
625 if (dest)
626 atomic_inc(&dest->refcnt);
627 ip_vs_service_put(svc);
628 return dest;
629}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700630
631/*
632 * Lookup dest by {svc,addr,port} in the destination trash.
633 * The destination trash is used to hold the destinations that are removed
634 * from the service table but are still referenced by some conn entries.
635 * The reason to add the destination trash is when the dest is temporary
636 * down (either by administrator or by monitor program), the dest can be
637 * picked back from the trash, the remaining connections to the dest can
638 * continue, and the counting information of the dest is also useful for
639 * scheduling.
640 */
641static struct ip_vs_dest *
Julius Volz7937df12008-09-02 15:55:48 +0200642ip_vs_trash_get_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
643 __be16 dport)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700644{
645 struct ip_vs_dest *dest, *nxt;
Hans Schillstromf2431e62011-01-03 14:45:00 +0100646 struct netns_ipvs *ipvs = net_ipvs(svc->net);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700647
648 /*
649 * Find the destination in trash
650 */
Hans Schillstromf2431e62011-01-03 14:45:00 +0100651 list_for_each_entry_safe(dest, nxt, &ipvs->dest_trash, n_list) {
Julius Volz7937df12008-09-02 15:55:48 +0200652 IP_VS_DBG_BUF(3, "Destination %u/%s:%u still in trash, "
653 "dest->refcnt=%d\n",
654 dest->vfwmark,
655 IP_VS_DBG_ADDR(svc->af, &dest->addr),
656 ntohs(dest->port),
657 atomic_read(&dest->refcnt));
658 if (dest->af == svc->af &&
659 ip_vs_addr_equal(svc->af, &dest->addr, daddr) &&
Linus Torvalds1da177e2005-04-16 15:20:36 -0700660 dest->port == dport &&
661 dest->vfwmark == svc->fwmark &&
662 dest->protocol == svc->protocol &&
663 (svc->fwmark ||
Julius Volz7937df12008-09-02 15:55:48 +0200664 (ip_vs_addr_equal(svc->af, &dest->vaddr, &svc->addr) &&
Linus Torvalds1da177e2005-04-16 15:20:36 -0700665 dest->vport == svc->port))) {
666 /* HIT */
667 return dest;
668 }
669
670 /*
671 * Try to purge the destination from trash if not referenced
672 */
673 if (atomic_read(&dest->refcnt) == 1) {
Julius Volz7937df12008-09-02 15:55:48 +0200674 IP_VS_DBG_BUF(3, "Removing destination %u/%s:%u "
675 "from trash\n",
676 dest->vfwmark,
677 IP_VS_DBG_ADDR(svc->af, &dest->addr),
678 ntohs(dest->port));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700679 list_del(&dest->n_list);
680 ip_vs_dst_reset(dest);
681 __ip_vs_unbind_svc(dest);
Hans Schillstromb17fc992011-01-03 14:44:56 +0100682 free_percpu(dest->stats.cpustats);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700683 kfree(dest);
684 }
685 }
686
687 return NULL;
688}
689
690
691/*
692 * Clean up all the destinations in the trash
693 * Called by the ip_vs_control_cleanup()
694 *
695 * When the ip_vs_control_clearup is activated by ipvs module exit,
696 * the service tables must have been flushed and all the connections
697 * are expired, and the refcnt of each destination in the trash must
698 * be 1, so we simply release them here.
699 */
Hans Schillstromf2431e62011-01-03 14:45:00 +0100700static void ip_vs_trash_cleanup(struct net *net)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700701{
702 struct ip_vs_dest *dest, *nxt;
Hans Schillstromf2431e62011-01-03 14:45:00 +0100703 struct netns_ipvs *ipvs = net_ipvs(net);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700704
Hans Schillstromf2431e62011-01-03 14:45:00 +0100705 list_for_each_entry_safe(dest, nxt, &ipvs->dest_trash, n_list) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700706 list_del(&dest->n_list);
707 ip_vs_dst_reset(dest);
708 __ip_vs_unbind_svc(dest);
Hans Schillstromb17fc992011-01-03 14:44:56 +0100709 free_percpu(dest->stats.cpustats);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700710 kfree(dest);
711 }
712}
713
Julian Anastasov55a3d4e2011-03-14 01:37:49 +0200714static void
715ip_vs_copy_stats(struct ip_vs_stats_user *dst, struct ip_vs_stats *src)
716{
717#define IP_VS_SHOW_STATS_COUNTER(c) dst->c = src->ustats.c - src->ustats0.c
Julian Anastasov55a3d4e2011-03-14 01:37:49 +0200718
719 spin_lock_bh(&src->lock);
720
721 IP_VS_SHOW_STATS_COUNTER(conns);
722 IP_VS_SHOW_STATS_COUNTER(inpkts);
723 IP_VS_SHOW_STATS_COUNTER(outpkts);
724 IP_VS_SHOW_STATS_COUNTER(inbytes);
725 IP_VS_SHOW_STATS_COUNTER(outbytes);
726
Julian Anastasovea9f22c2011-03-14 01:41:54 +0200727 ip_vs_read_estimator(dst, src);
Julian Anastasov55a3d4e2011-03-14 01:37:49 +0200728
729 spin_unlock_bh(&src->lock);
730}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700731
732static void
733ip_vs_zero_stats(struct ip_vs_stats *stats)
734{
735 spin_lock_bh(&stats->lock);
Simon Hormane93615d2008-08-11 17:19:14 +1000736
Julian Anastasov55a3d4e2011-03-14 01:37:49 +0200737 /* get current counters as zero point, rates are zeroed */
738
739#define IP_VS_ZERO_STATS_COUNTER(c) stats->ustats0.c = stats->ustats.c
Julian Anastasov55a3d4e2011-03-14 01:37:49 +0200740
741 IP_VS_ZERO_STATS_COUNTER(conns);
742 IP_VS_ZERO_STATS_COUNTER(inpkts);
743 IP_VS_ZERO_STATS_COUNTER(outpkts);
744 IP_VS_ZERO_STATS_COUNTER(inbytes);
745 IP_VS_ZERO_STATS_COUNTER(outbytes);
746
Linus Torvalds1da177e2005-04-16 15:20:36 -0700747 ip_vs_zero_estimator(stats);
Simon Hormane93615d2008-08-11 17:19:14 +1000748
Sven Wegener3a14a3132008-08-10 18:24:41 +0000749 spin_unlock_bh(&stats->lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700750}
751
752/*
753 * Update a destination in the given service
754 */
755static void
Julian Anastasov26c15cf2010-09-21 18:12:30 +0200756__ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest,
757 struct ip_vs_dest_user_kern *udest, int add)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700758{
Hans Schillstromfc723252011-01-03 14:44:43 +0100759 struct netns_ipvs *ipvs = net_ipvs(svc->net);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700760 int conn_flags;
761
762 /* set the weight and the flags */
763 atomic_set(&dest->weight, udest->weight);
Julian Anastasov35757922010-09-17 14:18:16 +0200764 conn_flags = udest->conn_flags & IP_VS_CONN_F_DEST_MASK;
765 conn_flags |= IP_VS_CONN_F_INACTIVE;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700766
Linus Torvalds1da177e2005-04-16 15:20:36 -0700767 /* set the IP_VS_CONN_F_NOOUTPUT flag if not masquerading/NAT */
Julian Anastasov35757922010-09-17 14:18:16 +0200768 if ((conn_flags & IP_VS_CONN_F_FWD_MASK) != IP_VS_CONN_F_MASQ) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700769 conn_flags |= IP_VS_CONN_F_NOOUTPUT;
770 } else {
771 /*
Hans Schillstromfc723252011-01-03 14:44:43 +0100772 * Put the real service in rs_table if not present.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700773 * For now only for NAT!
774 */
Hans Schillstroma0840e22011-01-03 14:44:58 +0100775 write_lock_bh(&ipvs->rs_lock);
Hans Schillstromfc723252011-01-03 14:44:43 +0100776 ip_vs_rs_hash(ipvs, dest);
Hans Schillstroma0840e22011-01-03 14:44:58 +0100777 write_unlock_bh(&ipvs->rs_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700778 }
779 atomic_set(&dest->conn_flags, conn_flags);
780
781 /* bind the service */
782 if (!dest->svc) {
783 __ip_vs_bind_svc(dest, svc);
784 } else {
785 if (dest->svc != svc) {
786 __ip_vs_unbind_svc(dest);
787 ip_vs_zero_stats(&dest->stats);
788 __ip_vs_bind_svc(dest, svc);
789 }
790 }
791
792 /* set the dest status flags */
793 dest->flags |= IP_VS_DEST_F_AVAILABLE;
794
795 if (udest->u_threshold == 0 || udest->u_threshold > dest->u_threshold)
796 dest->flags &= ~IP_VS_DEST_F_OVERLOAD;
797 dest->u_threshold = udest->u_threshold;
798 dest->l_threshold = udest->l_threshold;
Julian Anastasov26c15cf2010-09-21 18:12:30 +0200799
Julian Anastasovfc604762010-10-17 16:38:15 +0300800 spin_lock(&dest->dst_lock);
801 ip_vs_dst_reset(dest);
802 spin_unlock(&dest->dst_lock);
803
Julian Anastasov26c15cf2010-09-21 18:12:30 +0200804 if (add)
Julian Anastasov6ef757f2011-03-14 01:44:28 +0200805 ip_vs_start_estimator(svc->net, &dest->stats);
Julian Anastasov26c15cf2010-09-21 18:12:30 +0200806
807 write_lock_bh(&__ip_vs_svc_lock);
808
809 /* Wait until all other svc users go away */
810 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
811
812 if (add) {
813 list_add(&dest->n_list, &svc->destinations);
814 svc->num_dests++;
815 }
816
817 /* call the update_service, because server weight may be changed */
818 if (svc->scheduler->update_service)
819 svc->scheduler->update_service(svc);
820
821 write_unlock_bh(&__ip_vs_svc_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700822}
823
824
825/*
826 * Create a destination for the given service
827 */
828static int
Julius Volzc860c6b2008-09-02 15:55:36 +0200829ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700830 struct ip_vs_dest **dest_p)
831{
832 struct ip_vs_dest *dest;
833 unsigned atype;
834
835 EnterFunction(2);
836
Vince Busam09571c72008-09-02 15:55:52 +0200837#ifdef CONFIG_IP_VS_IPV6
838 if (svc->af == AF_INET6) {
839 atype = ipv6_addr_type(&udest->addr.in6);
Sven Wegener3bfb92f2008-09-05 16:53:49 +0200840 if ((!(atype & IPV6_ADDR_UNICAST) ||
841 atype & IPV6_ADDR_LINKLOCAL) &&
Hans Schillstrom4a984802011-01-03 14:45:02 +0100842 !__ip_vs_addr_is_local_v6(svc->net, &udest->addr.in6))
Vince Busam09571c72008-09-02 15:55:52 +0200843 return -EINVAL;
844 } else
845#endif
846 {
Hans Schillstrom4a984802011-01-03 14:45:02 +0100847 atype = inet_addr_type(svc->net, udest->addr.ip);
Vince Busam09571c72008-09-02 15:55:52 +0200848 if (atype != RTN_LOCAL && atype != RTN_UNICAST)
849 return -EINVAL;
850 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700851
Simon Hormandee06e42010-08-26 02:54:31 +0000852 dest = kzalloc(sizeof(struct ip_vs_dest), GFP_KERNEL);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700853 if (dest == NULL) {
Hannes Eder1e3e2382009-08-02 11:05:41 +0000854 pr_err("%s(): no memory.\n", __func__);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700855 return -ENOMEM;
856 }
Hans Schillstromb17fc992011-01-03 14:44:56 +0100857 dest->stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats);
858 if (!dest->stats.cpustats) {
859 pr_err("%s() alloc_percpu failed\n", __func__);
860 goto err_alloc;
861 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700862
Julius Volzc860c6b2008-09-02 15:55:36 +0200863 dest->af = svc->af;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700864 dest->protocol = svc->protocol;
Julius Volzc860c6b2008-09-02 15:55:36 +0200865 dest->vaddr = svc->addr;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700866 dest->vport = svc->port;
867 dest->vfwmark = svc->fwmark;
Julius Volzc860c6b2008-09-02 15:55:36 +0200868 ip_vs_addr_copy(svc->af, &dest->addr, &udest->addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700869 dest->port = udest->port;
870
871 atomic_set(&dest->activeconns, 0);
872 atomic_set(&dest->inactconns, 0);
873 atomic_set(&dest->persistconns, 0);
Julian Anastasov26c15cf2010-09-21 18:12:30 +0200874 atomic_set(&dest->refcnt, 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700875
876 INIT_LIST_HEAD(&dest->d_list);
877 spin_lock_init(&dest->dst_lock);
878 spin_lock_init(&dest->stats.lock);
Julian Anastasov26c15cf2010-09-21 18:12:30 +0200879 __ip_vs_update_dest(svc, dest, udest, 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700880
881 *dest_p = dest;
882
883 LeaveFunction(2);
884 return 0;
Hans Schillstromb17fc992011-01-03 14:44:56 +0100885
886err_alloc:
887 kfree(dest);
888 return -ENOMEM;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700889}
890
891
892/*
893 * Add a destination into an existing service
894 */
895static int
Julius Volzc860c6b2008-09-02 15:55:36 +0200896ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700897{
898 struct ip_vs_dest *dest;
Julius Volzc860c6b2008-09-02 15:55:36 +0200899 union nf_inet_addr daddr;
Al Viro014d7302006-09-28 14:29:52 -0700900 __be16 dport = udest->port;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700901 int ret;
902
903 EnterFunction(2);
904
905 if (udest->weight < 0) {
Hannes Eder1e3e2382009-08-02 11:05:41 +0000906 pr_err("%s(): server weight less than zero\n", __func__);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700907 return -ERANGE;
908 }
909
910 if (udest->l_threshold > udest->u_threshold) {
Hannes Eder1e3e2382009-08-02 11:05:41 +0000911 pr_err("%s(): lower threshold is higher than upper threshold\n",
912 __func__);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700913 return -ERANGE;
914 }
915
Julius Volzc860c6b2008-09-02 15:55:36 +0200916 ip_vs_addr_copy(svc->af, &daddr, &udest->addr);
917
Linus Torvalds1da177e2005-04-16 15:20:36 -0700918 /*
919 * Check if the dest already exists in the list
920 */
Julius Volz7937df12008-09-02 15:55:48 +0200921 dest = ip_vs_lookup_dest(svc, &daddr, dport);
922
Linus Torvalds1da177e2005-04-16 15:20:36 -0700923 if (dest != NULL) {
Hannes Eder1e3e2382009-08-02 11:05:41 +0000924 IP_VS_DBG(1, "%s(): dest already exists\n", __func__);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700925 return -EEXIST;
926 }
927
928 /*
929 * Check if the dest already exists in the trash and
930 * is from the same service
931 */
Julius Volz7937df12008-09-02 15:55:48 +0200932 dest = ip_vs_trash_get_dest(svc, &daddr, dport);
933
Linus Torvalds1da177e2005-04-16 15:20:36 -0700934 if (dest != NULL) {
Julius Volzcfc78c52008-09-02 15:55:53 +0200935 IP_VS_DBG_BUF(3, "Get destination %s:%u from trash, "
936 "dest->refcnt=%d, service %u/%s:%u\n",
937 IP_VS_DBG_ADDR(svc->af, &daddr), ntohs(dport),
938 atomic_read(&dest->refcnt),
939 dest->vfwmark,
940 IP_VS_DBG_ADDR(svc->af, &dest->vaddr),
941 ntohs(dest->vport));
942
Linus Torvalds1da177e2005-04-16 15:20:36 -0700943 /*
944 * Get the destination from the trash
945 */
946 list_del(&dest->n_list);
947
Julian Anastasov26c15cf2010-09-21 18:12:30 +0200948 __ip_vs_update_dest(svc, dest, udest, 1);
949 ret = 0;
950 } else {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700951 /*
Julian Anastasov26c15cf2010-09-21 18:12:30 +0200952 * Allocate and initialize the dest structure
Linus Torvalds1da177e2005-04-16 15:20:36 -0700953 */
Julian Anastasov26c15cf2010-09-21 18:12:30 +0200954 ret = ip_vs_new_dest(svc, udest, &dest);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700955 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700956 LeaveFunction(2);
957
Julian Anastasov26c15cf2010-09-21 18:12:30 +0200958 return ret;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700959}
960
961
962/*
963 * Edit a destination in the given service
964 */
965static int
Julius Volzc860c6b2008-09-02 15:55:36 +0200966ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700967{
968 struct ip_vs_dest *dest;
Julius Volzc860c6b2008-09-02 15:55:36 +0200969 union nf_inet_addr daddr;
Al Viro014d7302006-09-28 14:29:52 -0700970 __be16 dport = udest->port;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700971
972 EnterFunction(2);
973
974 if (udest->weight < 0) {
Hannes Eder1e3e2382009-08-02 11:05:41 +0000975 pr_err("%s(): server weight less than zero\n", __func__);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700976 return -ERANGE;
977 }
978
979 if (udest->l_threshold > udest->u_threshold) {
Hannes Eder1e3e2382009-08-02 11:05:41 +0000980 pr_err("%s(): lower threshold is higher than upper threshold\n",
981 __func__);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700982 return -ERANGE;
983 }
984
Julius Volzc860c6b2008-09-02 15:55:36 +0200985 ip_vs_addr_copy(svc->af, &daddr, &udest->addr);
986
Linus Torvalds1da177e2005-04-16 15:20:36 -0700987 /*
988 * Lookup the destination list
989 */
Julius Volz7937df12008-09-02 15:55:48 +0200990 dest = ip_vs_lookup_dest(svc, &daddr, dport);
991
Linus Torvalds1da177e2005-04-16 15:20:36 -0700992 if (dest == NULL) {
Hannes Eder1e3e2382009-08-02 11:05:41 +0000993 IP_VS_DBG(1, "%s(): dest doesn't exist\n", __func__);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700994 return -ENOENT;
995 }
996
Julian Anastasov26c15cf2010-09-21 18:12:30 +0200997 __ip_vs_update_dest(svc, dest, udest, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700998 LeaveFunction(2);
999
1000 return 0;
1001}
1002
1003
1004/*
1005 * Delete a destination (must be already unlinked from the service)
1006 */
Hans Schillstrom29c20262011-01-03 14:44:54 +01001007static void __ip_vs_del_dest(struct net *net, struct ip_vs_dest *dest)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001008{
Hans Schillstroma0840e22011-01-03 14:44:58 +01001009 struct netns_ipvs *ipvs = net_ipvs(net);
1010
Julian Anastasov6ef757f2011-03-14 01:44:28 +02001011 ip_vs_stop_estimator(net, &dest->stats);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001012
1013 /*
1014 * Remove it from the d-linked list with the real services.
1015 */
Hans Schillstroma0840e22011-01-03 14:44:58 +01001016 write_lock_bh(&ipvs->rs_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001017 ip_vs_rs_unhash(dest);
Hans Schillstroma0840e22011-01-03 14:44:58 +01001018 write_unlock_bh(&ipvs->rs_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001019
1020 /*
1021 * Decrease the refcnt of the dest, and free the dest
1022 * if nobody refers to it (refcnt=0). Otherwise, throw
1023 * the destination into the trash.
1024 */
1025 if (atomic_dec_and_test(&dest->refcnt)) {
Julian Anastasov26c15cf2010-09-21 18:12:30 +02001026 IP_VS_DBG_BUF(3, "Removing destination %u/%s:%u\n",
1027 dest->vfwmark,
1028 IP_VS_DBG_ADDR(dest->af, &dest->addr),
1029 ntohs(dest->port));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001030 ip_vs_dst_reset(dest);
1031 /* simply decrease svc->refcnt here, let the caller check
1032 and release the service if nobody refers to it.
1033 Only user context can release destination and service,
1034 and only one user context can update virtual service at a
1035 time, so the operation here is OK */
1036 atomic_dec(&dest->svc->refcnt);
Hans Schillstromb17fc992011-01-03 14:44:56 +01001037 free_percpu(dest->stats.cpustats);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001038 kfree(dest);
1039 } else {
Julius Volzcfc78c52008-09-02 15:55:53 +02001040 IP_VS_DBG_BUF(3, "Moving dest %s:%u into trash, "
1041 "dest->refcnt=%d\n",
1042 IP_VS_DBG_ADDR(dest->af, &dest->addr),
1043 ntohs(dest->port),
1044 atomic_read(&dest->refcnt));
Hans Schillstromf2431e62011-01-03 14:45:00 +01001045 list_add(&dest->n_list, &ipvs->dest_trash);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001046 atomic_inc(&dest->refcnt);
1047 }
1048}
1049
1050
1051/*
1052 * Unlink a destination from the given service
1053 */
1054static void __ip_vs_unlink_dest(struct ip_vs_service *svc,
1055 struct ip_vs_dest *dest,
1056 int svcupd)
1057{
1058 dest->flags &= ~IP_VS_DEST_F_AVAILABLE;
1059
1060 /*
1061 * Remove it from the d-linked destination list.
1062 */
1063 list_del(&dest->n_list);
1064 svc->num_dests--;
Sven Wegener82dfb6f2008-08-11 19:36:06 +00001065
1066 /*
1067 * Call the update_service function of its scheduler
1068 */
1069 if (svcupd && svc->scheduler->update_service)
1070 svc->scheduler->update_service(svc);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001071}
1072
1073
1074/*
1075 * Delete a destination server in the given service
1076 */
1077static int
Julius Volzc860c6b2008-09-02 15:55:36 +02001078ip_vs_del_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001079{
1080 struct ip_vs_dest *dest;
Al Viro014d7302006-09-28 14:29:52 -07001081 __be16 dport = udest->port;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001082
1083 EnterFunction(2);
1084
Julius Volz7937df12008-09-02 15:55:48 +02001085 dest = ip_vs_lookup_dest(svc, &udest->addr, dport);
Julius Volzc860c6b2008-09-02 15:55:36 +02001086
Linus Torvalds1da177e2005-04-16 15:20:36 -07001087 if (dest == NULL) {
Hannes Eder1e3e2382009-08-02 11:05:41 +00001088 IP_VS_DBG(1, "%s(): destination not found!\n", __func__);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001089 return -ENOENT;
1090 }
1091
1092 write_lock_bh(&__ip_vs_svc_lock);
1093
1094 /*
1095 * Wait until all other svc users go away.
1096 */
Julian Anastasov26c15cf2010-09-21 18:12:30 +02001097 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001098
1099 /*
1100 * Unlink dest from the service
1101 */
1102 __ip_vs_unlink_dest(svc, dest, 1);
1103
1104 write_unlock_bh(&__ip_vs_svc_lock);
1105
1106 /*
1107 * Delete the destination
1108 */
Hans Schillstroma0840e22011-01-03 14:44:58 +01001109 __ip_vs_del_dest(svc->net, dest);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001110
1111 LeaveFunction(2);
1112
1113 return 0;
1114}
1115
1116
1117/*
1118 * Add a service into the service hash table
1119 */
1120static int
Hans Schillstromfc723252011-01-03 14:44:43 +01001121ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u,
Julius Volzc860c6b2008-09-02 15:55:36 +02001122 struct ip_vs_service **svc_p)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001123{
1124 int ret = 0;
1125 struct ip_vs_scheduler *sched = NULL;
Simon Horman0d1e71b2010-08-22 21:37:54 +09001126 struct ip_vs_pe *pe = NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001127 struct ip_vs_service *svc = NULL;
Hans Schillstroma0840e22011-01-03 14:44:58 +01001128 struct netns_ipvs *ipvs = net_ipvs(net);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001129
1130 /* increase the module use count */
1131 ip_vs_use_count_inc();
1132
1133 /* Lookup the scheduler by 'u->sched_name' */
1134 sched = ip_vs_scheduler_get(u->sched_name);
1135 if (sched == NULL) {
Hannes Eder1e3e2382009-08-02 11:05:41 +00001136 pr_info("Scheduler module ip_vs_%s not found\n", u->sched_name);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001137 ret = -ENOENT;
Simon Horman6e08bfb2010-08-22 21:37:52 +09001138 goto out_err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001139 }
1140
Simon Horman0d1e71b2010-08-22 21:37:54 +09001141 if (u->pe_name && *u->pe_name) {
Simon Hormane9e5eee2010-11-08 20:05:57 +09001142 pe = ip_vs_pe_getbyname(u->pe_name);
Simon Horman0d1e71b2010-08-22 21:37:54 +09001143 if (pe == NULL) {
1144 pr_info("persistence engine module ip_vs_pe_%s "
1145 "not found\n", u->pe_name);
1146 ret = -ENOENT;
1147 goto out_err;
1148 }
1149 }
1150
Julius Volzf94fd042008-09-02 15:55:55 +02001151#ifdef CONFIG_IP_VS_IPV6
Julius Volz48148932008-11-03 17:08:56 -08001152 if (u->af == AF_INET6 && (u->netmask < 1 || u->netmask > 128)) {
1153 ret = -EINVAL;
1154 goto out_err;
Julius Volzf94fd042008-09-02 15:55:55 +02001155 }
1156#endif
1157
Simon Hormandee06e42010-08-26 02:54:31 +00001158 svc = kzalloc(sizeof(struct ip_vs_service), GFP_KERNEL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001159 if (svc == NULL) {
Hannes Eder1e3e2382009-08-02 11:05:41 +00001160 IP_VS_DBG(1, "%s(): no memory\n", __func__);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001161 ret = -ENOMEM;
1162 goto out_err;
1163 }
Hans Schillstromb17fc992011-01-03 14:44:56 +01001164 svc->stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats);
1165 if (!svc->stats.cpustats) {
1166 pr_err("%s() alloc_percpu failed\n", __func__);
1167 goto out_err;
1168 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001169
1170 /* I'm the first user of the service */
Julian Anastasov26c15cf2010-09-21 18:12:30 +02001171 atomic_set(&svc->usecnt, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001172 atomic_set(&svc->refcnt, 0);
1173
Julius Volzc860c6b2008-09-02 15:55:36 +02001174 svc->af = u->af;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001175 svc->protocol = u->protocol;
Julius Volzc860c6b2008-09-02 15:55:36 +02001176 ip_vs_addr_copy(svc->af, &svc->addr, &u->addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001177 svc->port = u->port;
1178 svc->fwmark = u->fwmark;
1179 svc->flags = u->flags;
1180 svc->timeout = u->timeout * HZ;
1181 svc->netmask = u->netmask;
Hans Schillstromfc723252011-01-03 14:44:43 +01001182 svc->net = net;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001183
1184 INIT_LIST_HEAD(&svc->destinations);
1185 rwlock_init(&svc->sched_lock);
1186 spin_lock_init(&svc->stats.lock);
1187
1188 /* Bind the scheduler */
1189 ret = ip_vs_bind_scheduler(svc, sched);
1190 if (ret)
1191 goto out_err;
1192 sched = NULL;
1193
Simon Horman0d1e71b2010-08-22 21:37:54 +09001194 /* Bind the ct retriever */
1195 ip_vs_bind_pe(svc, pe);
1196 pe = NULL;
1197
Linus Torvalds1da177e2005-04-16 15:20:36 -07001198 /* Update the virtual service counters */
1199 if (svc->port == FTPPORT)
Hans Schillstrom763f8d02011-01-03 14:45:01 +01001200 atomic_inc(&ipvs->ftpsvc_counter);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001201 else if (svc->port == 0)
Hans Schillstrom763f8d02011-01-03 14:45:01 +01001202 atomic_inc(&ipvs->nullsvc_counter);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001203
Julian Anastasov6ef757f2011-03-14 01:44:28 +02001204 ip_vs_start_estimator(net, &svc->stats);
Julius Volzf94fd042008-09-02 15:55:55 +02001205
1206 /* Count only IPv4 services for old get/setsockopt interface */
1207 if (svc->af == AF_INET)
Hans Schillstroma0840e22011-01-03 14:44:58 +01001208 ipvs->num_services++;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001209
1210 /* Hash the service into the service table */
1211 write_lock_bh(&__ip_vs_svc_lock);
1212 ip_vs_svc_hash(svc);
1213 write_unlock_bh(&__ip_vs_svc_lock);
1214
1215 *svc_p = svc;
1216 return 0;
1217
Hans Schillstromb17fc992011-01-03 14:44:56 +01001218
Simon Horman6e08bfb2010-08-22 21:37:52 +09001219 out_err:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001220 if (svc != NULL) {
Simon Horman2fabf352010-08-22 21:37:52 +09001221 ip_vs_unbind_scheduler(svc);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001222 if (svc->inc) {
1223 local_bh_disable();
1224 ip_vs_app_inc_put(svc->inc);
1225 local_bh_enable();
1226 }
Hans Schillstromb17fc992011-01-03 14:44:56 +01001227 if (svc->stats.cpustats)
1228 free_percpu(svc->stats.cpustats);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001229 kfree(svc);
1230 }
1231 ip_vs_scheduler_put(sched);
Simon Horman0d1e71b2010-08-22 21:37:54 +09001232 ip_vs_pe_put(pe);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001233
Linus Torvalds1da177e2005-04-16 15:20:36 -07001234 /* decrease the module use count */
1235 ip_vs_use_count_dec();
1236
1237 return ret;
1238}
1239
1240
1241/*
1242 * Edit a service and bind it with a new scheduler
1243 */
1244static int
Julius Volzc860c6b2008-09-02 15:55:36 +02001245ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001246{
1247 struct ip_vs_scheduler *sched, *old_sched;
Simon Horman0d1e71b2010-08-22 21:37:54 +09001248 struct ip_vs_pe *pe = NULL, *old_pe = NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001249 int ret = 0;
1250
1251 /*
1252 * Lookup the scheduler, by 'u->sched_name'
1253 */
1254 sched = ip_vs_scheduler_get(u->sched_name);
1255 if (sched == NULL) {
Hannes Eder1e3e2382009-08-02 11:05:41 +00001256 pr_info("Scheduler module ip_vs_%s not found\n", u->sched_name);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001257 return -ENOENT;
1258 }
1259 old_sched = sched;
1260
Simon Horman0d1e71b2010-08-22 21:37:54 +09001261 if (u->pe_name && *u->pe_name) {
Simon Hormane9e5eee2010-11-08 20:05:57 +09001262 pe = ip_vs_pe_getbyname(u->pe_name);
Simon Horman0d1e71b2010-08-22 21:37:54 +09001263 if (pe == NULL) {
1264 pr_info("persistence engine module ip_vs_pe_%s "
1265 "not found\n", u->pe_name);
1266 ret = -ENOENT;
1267 goto out;
1268 }
1269 old_pe = pe;
1270 }
1271
Julius Volzf94fd042008-09-02 15:55:55 +02001272#ifdef CONFIG_IP_VS_IPV6
Julius Volz48148932008-11-03 17:08:56 -08001273 if (u->af == AF_INET6 && (u->netmask < 1 || u->netmask > 128)) {
1274 ret = -EINVAL;
1275 goto out;
Julius Volzf94fd042008-09-02 15:55:55 +02001276 }
1277#endif
1278
Linus Torvalds1da177e2005-04-16 15:20:36 -07001279 write_lock_bh(&__ip_vs_svc_lock);
1280
1281 /*
1282 * Wait until all other svc users go away.
1283 */
Julian Anastasov26c15cf2010-09-21 18:12:30 +02001284 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001285
1286 /*
1287 * Set the flags and timeout value
1288 */
1289 svc->flags = u->flags | IP_VS_SVC_F_HASHED;
1290 svc->timeout = u->timeout * HZ;
1291 svc->netmask = u->netmask;
1292
1293 old_sched = svc->scheduler;
1294 if (sched != old_sched) {
1295 /*
1296 * Unbind the old scheduler
1297 */
1298 if ((ret = ip_vs_unbind_scheduler(svc))) {
1299 old_sched = sched;
Simon Horman9e691ed2008-09-17 10:10:41 +10001300 goto out_unlock;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001301 }
1302
1303 /*
1304 * Bind the new scheduler
1305 */
1306 if ((ret = ip_vs_bind_scheduler(svc, sched))) {
1307 /*
1308 * If ip_vs_bind_scheduler fails, restore the old
1309 * scheduler.
1310 * The main reason of failure is out of memory.
1311 *
1312 * The question is if the old scheduler can be
1313 * restored all the time. TODO: if it cannot be
1314 * restored some time, we must delete the service,
1315 * otherwise the system may crash.
1316 */
1317 ip_vs_bind_scheduler(svc, old_sched);
1318 old_sched = sched;
Simon Horman9e691ed2008-09-17 10:10:41 +10001319 goto out_unlock;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001320 }
1321 }
1322
Simon Horman0d1e71b2010-08-22 21:37:54 +09001323 old_pe = svc->pe;
1324 if (pe != old_pe) {
1325 ip_vs_unbind_pe(svc);
1326 ip_vs_bind_pe(svc, pe);
1327 }
1328
Simon Horman9e691ed2008-09-17 10:10:41 +10001329 out_unlock:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001330 write_unlock_bh(&__ip_vs_svc_lock);
Simon Horman9e691ed2008-09-17 10:10:41 +10001331 out:
Simon Horman6e08bfb2010-08-22 21:37:52 +09001332 ip_vs_scheduler_put(old_sched);
Simon Horman0d1e71b2010-08-22 21:37:54 +09001333 ip_vs_pe_put(old_pe);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001334 return ret;
1335}
1336
1337
1338/*
1339 * Delete a service from the service list
1340 * - The service must be unlinked, unlocked and not referenced!
1341 * - We are called under _bh lock
1342 */
1343static void __ip_vs_del_service(struct ip_vs_service *svc)
1344{
1345 struct ip_vs_dest *dest, *nxt;
1346 struct ip_vs_scheduler *old_sched;
Simon Horman0d1e71b2010-08-22 21:37:54 +09001347 struct ip_vs_pe *old_pe;
Hans Schillstroma0840e22011-01-03 14:44:58 +01001348 struct netns_ipvs *ipvs = net_ipvs(svc->net);
Simon Horman0d1e71b2010-08-22 21:37:54 +09001349
1350 pr_info("%s: enter\n", __func__);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001351
Julius Volzf94fd042008-09-02 15:55:55 +02001352 /* Count only IPv4 services for old get/setsockopt interface */
1353 if (svc->af == AF_INET)
Hans Schillstroma0840e22011-01-03 14:44:58 +01001354 ipvs->num_services--;
Julius Volzf94fd042008-09-02 15:55:55 +02001355
Julian Anastasov6ef757f2011-03-14 01:44:28 +02001356 ip_vs_stop_estimator(svc->net, &svc->stats);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001357
1358 /* Unbind scheduler */
1359 old_sched = svc->scheduler;
1360 ip_vs_unbind_scheduler(svc);
Simon Horman6e08bfb2010-08-22 21:37:52 +09001361 ip_vs_scheduler_put(old_sched);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001362
Simon Horman0d1e71b2010-08-22 21:37:54 +09001363 /* Unbind persistence engine */
1364 old_pe = svc->pe;
1365 ip_vs_unbind_pe(svc);
1366 ip_vs_pe_put(old_pe);
1367
Linus Torvalds1da177e2005-04-16 15:20:36 -07001368 /* Unbind app inc */
1369 if (svc->inc) {
1370 ip_vs_app_inc_put(svc->inc);
1371 svc->inc = NULL;
1372 }
1373
1374 /*
1375 * Unlink the whole destination list
1376 */
1377 list_for_each_entry_safe(dest, nxt, &svc->destinations, n_list) {
1378 __ip_vs_unlink_dest(svc, dest, 0);
Hans Schillstrom29c20262011-01-03 14:44:54 +01001379 __ip_vs_del_dest(svc->net, dest);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001380 }
1381
1382 /*
1383 * Update the virtual service counters
1384 */
1385 if (svc->port == FTPPORT)
Hans Schillstrom763f8d02011-01-03 14:45:01 +01001386 atomic_dec(&ipvs->ftpsvc_counter);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001387 else if (svc->port == 0)
Hans Schillstrom763f8d02011-01-03 14:45:01 +01001388 atomic_dec(&ipvs->nullsvc_counter);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001389
1390 /*
1391 * Free the service if nobody refers to it
1392 */
Julian Anastasov26c15cf2010-09-21 18:12:30 +02001393 if (atomic_read(&svc->refcnt) == 0) {
1394 IP_VS_DBG_BUF(3, "Removing service %u/%s:%u usecnt=%d\n",
1395 svc->fwmark,
1396 IP_VS_DBG_ADDR(svc->af, &svc->addr),
1397 ntohs(svc->port), atomic_read(&svc->usecnt));
Hans Schillstromb17fc992011-01-03 14:44:56 +01001398 free_percpu(svc->stats.cpustats);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001399 kfree(svc);
Julian Anastasov26c15cf2010-09-21 18:12:30 +02001400 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001401
1402 /* decrease the module use count */
1403 ip_vs_use_count_dec();
1404}
1405
1406/*
Julian Anastasov26c15cf2010-09-21 18:12:30 +02001407 * Unlink a service from list and try to delete it if its refcnt reached 0
Linus Torvalds1da177e2005-04-16 15:20:36 -07001408 */
Julian Anastasov26c15cf2010-09-21 18:12:30 +02001409static void ip_vs_unlink_service(struct ip_vs_service *svc)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001410{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001411 /*
1412 * Unhash it from the service table
1413 */
1414 write_lock_bh(&__ip_vs_svc_lock);
1415
1416 ip_vs_svc_unhash(svc);
1417
1418 /*
1419 * Wait until all the svc users go away.
1420 */
Julian Anastasov26c15cf2010-09-21 18:12:30 +02001421 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001422
1423 __ip_vs_del_service(svc);
1424
1425 write_unlock_bh(&__ip_vs_svc_lock);
Julian Anastasov26c15cf2010-09-21 18:12:30 +02001426}
1427
1428/*
1429 * Delete a service from the service list
1430 */
1431static int ip_vs_del_service(struct ip_vs_service *svc)
1432{
1433 if (svc == NULL)
1434 return -EEXIST;
1435 ip_vs_unlink_service(svc);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001436
1437 return 0;
1438}
1439
1440
1441/*
1442 * Flush all the virtual services
1443 */
Hans Schillstromfc723252011-01-03 14:44:43 +01001444static int ip_vs_flush(struct net *net)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001445{
1446 int idx;
1447 struct ip_vs_service *svc, *nxt;
1448
1449 /*
Hans Schillstromfc723252011-01-03 14:44:43 +01001450 * Flush the service table hashed by <netns,protocol,addr,port>
Linus Torvalds1da177e2005-04-16 15:20:36 -07001451 */
1452 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
Hans Schillstromfc723252011-01-03 14:44:43 +01001453 list_for_each_entry_safe(svc, nxt, &ip_vs_svc_table[idx],
1454 s_list) {
1455 if (net_eq(svc->net, net))
1456 ip_vs_unlink_service(svc);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001457 }
1458 }
1459
1460 /*
1461 * Flush the service table hashed by fwmark
1462 */
1463 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1464 list_for_each_entry_safe(svc, nxt,
1465 &ip_vs_svc_fwm_table[idx], f_list) {
Hans Schillstromfc723252011-01-03 14:44:43 +01001466 if (net_eq(svc->net, net))
1467 ip_vs_unlink_service(svc);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001468 }
1469 }
1470
1471 return 0;
1472}
1473
1474
1475/*
1476 * Zero counters in a service or all services
1477 */
1478static int ip_vs_zero_service(struct ip_vs_service *svc)
1479{
1480 struct ip_vs_dest *dest;
1481
1482 write_lock_bh(&__ip_vs_svc_lock);
1483 list_for_each_entry(dest, &svc->destinations, n_list) {
1484 ip_vs_zero_stats(&dest->stats);
1485 }
1486 ip_vs_zero_stats(&svc->stats);
1487 write_unlock_bh(&__ip_vs_svc_lock);
1488 return 0;
1489}
1490
Hans Schillstromfc723252011-01-03 14:44:43 +01001491static int ip_vs_zero_all(struct net *net)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001492{
1493 int idx;
1494 struct ip_vs_service *svc;
1495
1496 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1497 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
Hans Schillstromfc723252011-01-03 14:44:43 +01001498 if (net_eq(svc->net, net))
1499 ip_vs_zero_service(svc);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001500 }
1501 }
1502
1503 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1504 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
Hans Schillstromfc723252011-01-03 14:44:43 +01001505 if (net_eq(svc->net, net))
1506 ip_vs_zero_service(svc);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001507 }
1508 }
1509
Julian Anastasov2a0751a2011-03-04 12:20:35 +02001510 ip_vs_zero_stats(&net_ipvs(net)->tot_stats);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001511 return 0;
1512}
1513
1514
1515static int
Alexey Dobriyan8d65af72009-09-23 15:57:19 -07001516proc_do_defense_mode(ctl_table *table, int write,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001517 void __user *buffer, size_t *lenp, loff_t *ppos)
1518{
Hans Schillstrom93304192011-01-03 14:44:51 +01001519 struct net *net = current->nsproxy->net_ns;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001520 int *valp = table->data;
1521 int val = *valp;
1522 int rc;
1523
Alexey Dobriyan8d65af72009-09-23 15:57:19 -07001524 rc = proc_dointvec(table, write, buffer, lenp, ppos);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001525 if (write && (*valp != val)) {
1526 if ((*valp < 0) || (*valp > 3)) {
1527 /* Restore the correct value */
1528 *valp = val;
1529 } else {
Hans Schillstrom93304192011-01-03 14:44:51 +01001530 update_defense_level(net_ipvs(net));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001531 }
1532 }
1533 return rc;
1534}
1535
1536
1537static int
Alexey Dobriyan8d65af72009-09-23 15:57:19 -07001538proc_do_sync_threshold(ctl_table *table, int write,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001539 void __user *buffer, size_t *lenp, loff_t *ppos)
1540{
1541 int *valp = table->data;
1542 int val[2];
1543 int rc;
1544
1545 /* backup the value first */
1546 memcpy(val, valp, sizeof(val));
1547
Alexey Dobriyan8d65af72009-09-23 15:57:19 -07001548 rc = proc_dointvec(table, write, buffer, lenp, ppos);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001549 if (write && (valp[0] < 0 || valp[1] < 0 || valp[0] >= valp[1])) {
1550 /* Restore the correct value */
1551 memcpy(valp, val, sizeof(val));
1552 }
1553 return rc;
1554}
1555
Hans Schillstromb880c1f2010-11-19 14:25:14 +01001556static int
1557proc_do_sync_mode(ctl_table *table, int write,
1558 void __user *buffer, size_t *lenp, loff_t *ppos)
1559{
1560 int *valp = table->data;
1561 int val = *valp;
1562 int rc;
1563
1564 rc = proc_dointvec(table, write, buffer, lenp, ppos);
1565 if (write && (*valp != val)) {
1566 if ((*valp < 0) || (*valp > 1)) {
1567 /* Restore the correct value */
1568 *valp = val;
1569 } else {
Hans Schillstromf1313152011-01-03 14:44:55 +01001570 struct net *net = current->nsproxy->net_ns;
1571 ip_vs_sync_switch_mode(net, val);
Hans Schillstromb880c1f2010-11-19 14:25:14 +01001572 }
1573 }
1574 return rc;
1575}
Linus Torvalds1da177e2005-04-16 15:20:36 -07001576
1577/*
1578 * IPVS sysctl table (under the /proc/sys/net/ipv4/vs/)
Hans Schillstroma0840e22011-01-03 14:44:58 +01001579 * Do not change order or insert new entries without
1580 * align with netns init in __ip_vs_control_init()
Linus Torvalds1da177e2005-04-16 15:20:36 -07001581 */
1582
1583static struct ctl_table vs_vars[] = {
1584 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001585 .procname = "amemthresh",
Hans Schillstroma0840e22011-01-03 14:44:58 +01001586 .maxlen = sizeof(int),
1587 .mode = 0644,
1588 .proc_handler = proc_dointvec,
1589 },
1590 {
1591 .procname = "am_droprate",
1592 .maxlen = sizeof(int),
1593 .mode = 0644,
1594 .proc_handler = proc_dointvec,
1595 },
1596 {
1597 .procname = "drop_entry",
1598 .maxlen = sizeof(int),
1599 .mode = 0644,
1600 .proc_handler = proc_do_defense_mode,
1601 },
1602 {
1603 .procname = "drop_packet",
1604 .maxlen = sizeof(int),
1605 .mode = 0644,
1606 .proc_handler = proc_do_defense_mode,
1607 },
1608#ifdef CONFIG_IP_VS_NFCT
1609 {
1610 .procname = "conntrack",
1611 .maxlen = sizeof(int),
1612 .mode = 0644,
1613 .proc_handler = &proc_dointvec,
1614 },
1615#endif
1616 {
1617 .procname = "secure_tcp",
1618 .maxlen = sizeof(int),
1619 .mode = 0644,
1620 .proc_handler = proc_do_defense_mode,
1621 },
1622 {
1623 .procname = "snat_reroute",
1624 .maxlen = sizeof(int),
1625 .mode = 0644,
1626 .proc_handler = &proc_dointvec,
1627 },
1628 {
1629 .procname = "sync_version",
1630 .maxlen = sizeof(int),
1631 .mode = 0644,
1632 .proc_handler = &proc_do_sync_mode,
1633 },
1634 {
1635 .procname = "cache_bypass",
1636 .maxlen = sizeof(int),
1637 .mode = 0644,
1638 .proc_handler = proc_dointvec,
1639 },
1640 {
1641 .procname = "expire_nodest_conn",
1642 .maxlen = sizeof(int),
1643 .mode = 0644,
1644 .proc_handler = proc_dointvec,
1645 },
1646 {
1647 .procname = "expire_quiescent_template",
1648 .maxlen = sizeof(int),
1649 .mode = 0644,
1650 .proc_handler = proc_dointvec,
1651 },
1652 {
1653 .procname = "sync_threshold",
1654 .maxlen =
1655 sizeof(((struct netns_ipvs *)0)->sysctl_sync_threshold),
1656 .mode = 0644,
1657 .proc_handler = proc_do_sync_threshold,
1658 },
1659 {
1660 .procname = "nat_icmp_send",
Linus Torvalds1da177e2005-04-16 15:20:36 -07001661 .maxlen = sizeof(int),
1662 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08001663 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001664 },
1665#ifdef CONFIG_IP_VS_DEBUG
1666 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001667 .procname = "debug_level",
1668 .data = &sysctl_ip_vs_debug_level,
1669 .maxlen = sizeof(int),
1670 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08001671 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001672 },
1673#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -07001674#if 0
1675 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001676 .procname = "timeout_established",
1677 .data = &vs_timeout_table_dos.timeout[IP_VS_S_ESTABLISHED],
1678 .maxlen = sizeof(int),
1679 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08001680 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001681 },
1682 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001683 .procname = "timeout_synsent",
1684 .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_SENT],
1685 .maxlen = sizeof(int),
1686 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08001687 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001688 },
1689 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001690 .procname = "timeout_synrecv",
1691 .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_RECV],
1692 .maxlen = sizeof(int),
1693 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08001694 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001695 },
1696 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001697 .procname = "timeout_finwait",
1698 .data = &vs_timeout_table_dos.timeout[IP_VS_S_FIN_WAIT],
1699 .maxlen = sizeof(int),
1700 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08001701 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001702 },
1703 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001704 .procname = "timeout_timewait",
1705 .data = &vs_timeout_table_dos.timeout[IP_VS_S_TIME_WAIT],
1706 .maxlen = sizeof(int),
1707 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08001708 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001709 },
1710 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001711 .procname = "timeout_close",
1712 .data = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE],
1713 .maxlen = sizeof(int),
1714 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08001715 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001716 },
1717 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001718 .procname = "timeout_closewait",
1719 .data = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE_WAIT],
1720 .maxlen = sizeof(int),
1721 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08001722 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001723 },
1724 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001725 .procname = "timeout_lastack",
1726 .data = &vs_timeout_table_dos.timeout[IP_VS_S_LAST_ACK],
1727 .maxlen = sizeof(int),
1728 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08001729 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001730 },
1731 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001732 .procname = "timeout_listen",
1733 .data = &vs_timeout_table_dos.timeout[IP_VS_S_LISTEN],
1734 .maxlen = sizeof(int),
1735 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08001736 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001737 },
1738 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001739 .procname = "timeout_synack",
1740 .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYNACK],
1741 .maxlen = sizeof(int),
1742 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08001743 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001744 },
1745 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001746 .procname = "timeout_udp",
1747 .data = &vs_timeout_table_dos.timeout[IP_VS_S_UDP],
1748 .maxlen = sizeof(int),
1749 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08001750 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001751 },
1752 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001753 .procname = "timeout_icmp",
1754 .data = &vs_timeout_table_dos.timeout[IP_VS_S_ICMP],
1755 .maxlen = sizeof(int),
1756 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08001757 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001758 },
1759#endif
Eric W. Biedermanf8572d82009-11-05 13:32:03 -08001760 { }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001761};
1762
Sven Wegener5587da52008-08-10 18:24:40 +00001763const struct ctl_path net_vs_ctl_path[] = {
Eric W. Biedermanf8572d82009-11-05 13:32:03 -08001764 { .procname = "net", },
1765 { .procname = "ipv4", },
Pavel Emelyanov90754f82008-01-12 02:33:50 -08001766 { .procname = "vs", },
1767 { }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001768};
Pavel Emelyanov90754f82008-01-12 02:33:50 -08001769EXPORT_SYMBOL_GPL(net_vs_ctl_path);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001770
Linus Torvalds1da177e2005-04-16 15:20:36 -07001771#ifdef CONFIG_PROC_FS
1772
1773struct ip_vs_iter {
Hans Schillstromfc723252011-01-03 14:44:43 +01001774 struct seq_net_private p; /* Do not move this, netns depends upon it*/
Linus Torvalds1da177e2005-04-16 15:20:36 -07001775 struct list_head *table;
1776 int bucket;
1777};
1778
1779/*
1780 * Write the contents of the VS rule table to a PROCfs file.
1781 * (It is kept just for backward compatibility)
1782 */
1783static inline const char *ip_vs_fwd_name(unsigned flags)
1784{
1785 switch (flags & IP_VS_CONN_F_FWD_MASK) {
1786 case IP_VS_CONN_F_LOCALNODE:
1787 return "Local";
1788 case IP_VS_CONN_F_TUNNEL:
1789 return "Tunnel";
1790 case IP_VS_CONN_F_DROUTE:
1791 return "Route";
1792 default:
1793 return "Masq";
1794 }
1795}
1796
1797
1798/* Get the Nth entry in the two lists */
1799static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos)
1800{
Hans Schillstromfc723252011-01-03 14:44:43 +01001801 struct net *net = seq_file_net(seq);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001802 struct ip_vs_iter *iter = seq->private;
1803 int idx;
1804 struct ip_vs_service *svc;
1805
1806 /* look in hash by protocol */
1807 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1808 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
Hans Schillstromfc723252011-01-03 14:44:43 +01001809 if (net_eq(svc->net, net) && pos-- == 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001810 iter->table = ip_vs_svc_table;
1811 iter->bucket = idx;
1812 return svc;
1813 }
1814 }
1815 }
1816
1817 /* keep looking in fwmark */
1818 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1819 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
Hans Schillstromfc723252011-01-03 14:44:43 +01001820 if (net_eq(svc->net, net) && pos-- == 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001821 iter->table = ip_vs_svc_fwm_table;
1822 iter->bucket = idx;
1823 return svc;
1824 }
1825 }
1826 }
1827
1828 return NULL;
1829}
1830
1831static void *ip_vs_info_seq_start(struct seq_file *seq, loff_t *pos)
Simon Horman563e94f2008-09-17 10:10:42 +10001832__acquires(__ip_vs_svc_lock)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001833{
1834
1835 read_lock_bh(&__ip_vs_svc_lock);
1836 return *pos ? ip_vs_info_array(seq, *pos - 1) : SEQ_START_TOKEN;
1837}
1838
1839
1840static void *ip_vs_info_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1841{
1842 struct list_head *e;
1843 struct ip_vs_iter *iter;
1844 struct ip_vs_service *svc;
1845
1846 ++*pos;
1847 if (v == SEQ_START_TOKEN)
1848 return ip_vs_info_array(seq,0);
1849
1850 svc = v;
1851 iter = seq->private;
1852
1853 if (iter->table == ip_vs_svc_table) {
1854 /* next service in table hashed by protocol */
1855 if ((e = svc->s_list.next) != &ip_vs_svc_table[iter->bucket])
1856 return list_entry(e, struct ip_vs_service, s_list);
1857
1858
1859 while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
1860 list_for_each_entry(svc,&ip_vs_svc_table[iter->bucket],
1861 s_list) {
1862 return svc;
1863 }
1864 }
1865
1866 iter->table = ip_vs_svc_fwm_table;
1867 iter->bucket = -1;
1868 goto scan_fwmark;
1869 }
1870
1871 /* next service in hashed by fwmark */
1872 if ((e = svc->f_list.next) != &ip_vs_svc_fwm_table[iter->bucket])
1873 return list_entry(e, struct ip_vs_service, f_list);
1874
1875 scan_fwmark:
1876 while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
1877 list_for_each_entry(svc, &ip_vs_svc_fwm_table[iter->bucket],
1878 f_list)
1879 return svc;
1880 }
1881
1882 return NULL;
1883}
1884
1885static void ip_vs_info_seq_stop(struct seq_file *seq, void *v)
Simon Horman563e94f2008-09-17 10:10:42 +10001886__releases(__ip_vs_svc_lock)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001887{
1888 read_unlock_bh(&__ip_vs_svc_lock);
1889}
1890
1891
1892static int ip_vs_info_seq_show(struct seq_file *seq, void *v)
1893{
1894 if (v == SEQ_START_TOKEN) {
1895 seq_printf(seq,
1896 "IP Virtual Server version %d.%d.%d (size=%d)\n",
Catalin(ux) M. BOIE6f7edb42010-01-05 05:50:24 +01001897 NVERSION(IP_VS_VERSION_CODE), ip_vs_conn_tab_size);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001898 seq_puts(seq,
1899 "Prot LocalAddress:Port Scheduler Flags\n");
1900 seq_puts(seq,
1901 " -> RemoteAddress:Port Forward Weight ActiveConn InActConn\n");
1902 } else {
1903 const struct ip_vs_service *svc = v;
1904 const struct ip_vs_iter *iter = seq->private;
1905 const struct ip_vs_dest *dest;
1906
Vince Busam667a5f12008-09-02 15:55:49 +02001907 if (iter->table == ip_vs_svc_table) {
1908#ifdef CONFIG_IP_VS_IPV6
1909 if (svc->af == AF_INET6)
Harvey Harrison5b095d9892008-10-29 12:52:50 -07001910 seq_printf(seq, "%s [%pI6]:%04X %s ",
Vince Busam667a5f12008-09-02 15:55:49 +02001911 ip_vs_proto_name(svc->protocol),
Harvey Harrison38ff4fa2008-10-28 16:08:13 -07001912 &svc->addr.in6,
Vince Busam667a5f12008-09-02 15:55:49 +02001913 ntohs(svc->port),
1914 svc->scheduler->name);
1915 else
1916#endif
Nick Chalk26ec0372010-06-22 08:07:01 +02001917 seq_printf(seq, "%s %08X:%04X %s %s ",
Vince Busam667a5f12008-09-02 15:55:49 +02001918 ip_vs_proto_name(svc->protocol),
1919 ntohl(svc->addr.ip),
1920 ntohs(svc->port),
Nick Chalk26ec0372010-06-22 08:07:01 +02001921 svc->scheduler->name,
1922 (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":"");
Vince Busam667a5f12008-09-02 15:55:49 +02001923 } else {
Nick Chalk26ec0372010-06-22 08:07:01 +02001924 seq_printf(seq, "FWM %08X %s %s",
1925 svc->fwmark, svc->scheduler->name,
1926 (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":"");
Vince Busam667a5f12008-09-02 15:55:49 +02001927 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001928
1929 if (svc->flags & IP_VS_SVC_F_PERSISTENT)
1930 seq_printf(seq, "persistent %d %08X\n",
1931 svc->timeout,
1932 ntohl(svc->netmask));
1933 else
1934 seq_putc(seq, '\n');
1935
1936 list_for_each_entry(dest, &svc->destinations, n_list) {
Vince Busam667a5f12008-09-02 15:55:49 +02001937#ifdef CONFIG_IP_VS_IPV6
1938 if (dest->af == AF_INET6)
1939 seq_printf(seq,
Harvey Harrison5b095d9892008-10-29 12:52:50 -07001940 " -> [%pI6]:%04X"
Vince Busam667a5f12008-09-02 15:55:49 +02001941 " %-7s %-6d %-10d %-10d\n",
Harvey Harrison38ff4fa2008-10-28 16:08:13 -07001942 &dest->addr.in6,
Vince Busam667a5f12008-09-02 15:55:49 +02001943 ntohs(dest->port),
1944 ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
1945 atomic_read(&dest->weight),
1946 atomic_read(&dest->activeconns),
1947 atomic_read(&dest->inactconns));
1948 else
1949#endif
1950 seq_printf(seq,
1951 " -> %08X:%04X "
1952 "%-7s %-6d %-10d %-10d\n",
1953 ntohl(dest->addr.ip),
1954 ntohs(dest->port),
1955 ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
1956 atomic_read(&dest->weight),
1957 atomic_read(&dest->activeconns),
1958 atomic_read(&dest->inactconns));
1959
Linus Torvalds1da177e2005-04-16 15:20:36 -07001960 }
1961 }
1962 return 0;
1963}
1964
Philippe De Muyter56b3d972007-07-10 23:07:31 -07001965static const struct seq_operations ip_vs_info_seq_ops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001966 .start = ip_vs_info_seq_start,
1967 .next = ip_vs_info_seq_next,
1968 .stop = ip_vs_info_seq_stop,
1969 .show = ip_vs_info_seq_show,
1970};
1971
1972static int ip_vs_info_open(struct inode *inode, struct file *file)
1973{
Hans Schillstromfc723252011-01-03 14:44:43 +01001974 return seq_open_net(inode, file, &ip_vs_info_seq_ops,
Pavel Emelyanovcf7732e2007-10-10 02:29:29 -07001975 sizeof(struct ip_vs_iter));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001976}
1977
Arjan van de Ven9a321442007-02-12 00:55:35 -08001978static const struct file_operations ip_vs_info_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001979 .owner = THIS_MODULE,
1980 .open = ip_vs_info_open,
1981 .read = seq_read,
1982 .llseek = seq_lseek,
1983 .release = seq_release_private,
1984};
1985
1986#endif
1987
Linus Torvalds1da177e2005-04-16 15:20:36 -07001988#ifdef CONFIG_PROC_FS
1989static int ip_vs_stats_show(struct seq_file *seq, void *v)
1990{
Hans Schillstromb17fc992011-01-03 14:44:56 +01001991 struct net *net = seq_file_single_net(seq);
Julian Anastasov55a3d4e2011-03-14 01:37:49 +02001992 struct ip_vs_stats_user show;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001993
1994/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
1995 seq_puts(seq,
1996 " Total Incoming Outgoing Incoming Outgoing\n");
1997 seq_printf(seq,
1998 " Conns Packets Packets Bytes Bytes\n");
1999
Julian Anastasov55a3d4e2011-03-14 01:37:49 +02002000 ip_vs_copy_stats(&show, &net_ipvs(net)->tot_stats);
2001 seq_printf(seq, "%8X %8X %8X %16LX %16LX\n\n", show.conns,
2002 show.inpkts, show.outpkts,
2003 (unsigned long long) show.inbytes,
2004 (unsigned long long) show.outbytes);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002005
2006/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
2007 seq_puts(seq,
2008 " Conns/s Pkts/s Pkts/s Bytes/s Bytes/s\n");
Julian Anastasov55a3d4e2011-03-14 01:37:49 +02002009 seq_printf(seq, "%8X %8X %8X %16X %16X\n",
2010 show.cps, show.inpps, show.outpps,
2011 show.inbps, show.outbps);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002012
2013 return 0;
2014}
2015
2016static int ip_vs_stats_seq_open(struct inode *inode, struct file *file)
2017{
Hans Schillstromfc723252011-01-03 14:44:43 +01002018 return single_open_net(inode, file, ip_vs_stats_show);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002019}
2020
Arjan van de Ven9a321442007-02-12 00:55:35 -08002021static const struct file_operations ip_vs_stats_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002022 .owner = THIS_MODULE,
2023 .open = ip_vs_stats_seq_open,
2024 .read = seq_read,
2025 .llseek = seq_lseek,
2026 .release = single_release,
2027};
2028
Hans Schillstromb17fc992011-01-03 14:44:56 +01002029static int ip_vs_stats_percpu_show(struct seq_file *seq, void *v)
2030{
2031 struct net *net = seq_file_single_net(seq);
Julian Anastasov2a0751a2011-03-04 12:20:35 +02002032 struct ip_vs_stats *tot_stats = &net_ipvs(net)->tot_stats;
2033 struct ip_vs_cpu_stats *cpustats = tot_stats->cpustats;
Julian Anastasovea9f22c2011-03-14 01:41:54 +02002034 struct ip_vs_stats_user rates;
Hans Schillstromb17fc992011-01-03 14:44:56 +01002035 int i;
2036
2037/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
2038 seq_puts(seq,
2039 " Total Incoming Outgoing Incoming Outgoing\n");
2040 seq_printf(seq,
2041 "CPU Conns Packets Packets Bytes Bytes\n");
2042
2043 for_each_possible_cpu(i) {
Julian Anastasov2a0751a2011-03-04 12:20:35 +02002044 struct ip_vs_cpu_stats *u = per_cpu_ptr(cpustats, i);
2045 unsigned int start;
2046 __u64 inbytes, outbytes;
2047
2048 do {
2049 start = u64_stats_fetch_begin_bh(&u->syncp);
2050 inbytes = u->ustats.inbytes;
2051 outbytes = u->ustats.outbytes;
2052 } while (u64_stats_fetch_retry_bh(&u->syncp, start));
2053
Hans Schillstromb17fc992011-01-03 14:44:56 +01002054 seq_printf(seq, "%3X %8X %8X %8X %16LX %16LX\n",
Julian Anastasov2a0751a2011-03-04 12:20:35 +02002055 i, u->ustats.conns, u->ustats.inpkts,
2056 u->ustats.outpkts, (__u64)inbytes,
2057 (__u64)outbytes);
Hans Schillstromb17fc992011-01-03 14:44:56 +01002058 }
2059
2060 spin_lock_bh(&tot_stats->lock);
Julian Anastasovea9f22c2011-03-14 01:41:54 +02002061
Hans Schillstromb17fc992011-01-03 14:44:56 +01002062 seq_printf(seq, " ~ %8X %8X %8X %16LX %16LX\n\n",
2063 tot_stats->ustats.conns, tot_stats->ustats.inpkts,
2064 tot_stats->ustats.outpkts,
2065 (unsigned long long) tot_stats->ustats.inbytes,
2066 (unsigned long long) tot_stats->ustats.outbytes);
2067
Julian Anastasovea9f22c2011-03-14 01:41:54 +02002068 ip_vs_read_estimator(&rates, tot_stats);
2069
2070 spin_unlock_bh(&tot_stats->lock);
2071
Hans Schillstromb17fc992011-01-03 14:44:56 +01002072/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
2073 seq_puts(seq,
2074 " Conns/s Pkts/s Pkts/s Bytes/s Bytes/s\n");
2075 seq_printf(seq, " %8X %8X %8X %16X %16X\n",
Julian Anastasovea9f22c2011-03-14 01:41:54 +02002076 rates.cps,
2077 rates.inpps,
2078 rates.outpps,
2079 rates.inbps,
2080 rates.outbps);
Hans Schillstromb17fc992011-01-03 14:44:56 +01002081
2082 return 0;
2083}
2084
2085static int ip_vs_stats_percpu_seq_open(struct inode *inode, struct file *file)
2086{
2087 return single_open_net(inode, file, ip_vs_stats_percpu_show);
2088}
2089
2090static const struct file_operations ip_vs_stats_percpu_fops = {
2091 .owner = THIS_MODULE,
2092 .open = ip_vs_stats_percpu_seq_open,
2093 .read = seq_read,
2094 .llseek = seq_lseek,
2095 .release = single_release,
2096};
Linus Torvalds1da177e2005-04-16 15:20:36 -07002097#endif
2098
2099/*
2100 * Set timeout values for tcp tcpfin udp in the timeout_table.
2101 */
Hans Schillstrom93304192011-01-03 14:44:51 +01002102static int ip_vs_set_timeout(struct net *net, struct ip_vs_timeout_user *u)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002103{
Changli Gao091bb342011-01-21 18:02:13 +08002104#if defined(CONFIG_IP_VS_PROTO_TCP) || defined(CONFIG_IP_VS_PROTO_UDP)
Hans Schillstrom93304192011-01-03 14:44:51 +01002105 struct ip_vs_proto_data *pd;
Changli Gao091bb342011-01-21 18:02:13 +08002106#endif
Hans Schillstrom93304192011-01-03 14:44:51 +01002107
Linus Torvalds1da177e2005-04-16 15:20:36 -07002108 IP_VS_DBG(2, "Setting timeout tcp:%d tcpfin:%d udp:%d\n",
2109 u->tcp_timeout,
2110 u->tcp_fin_timeout,
2111 u->udp_timeout);
2112
2113#ifdef CONFIG_IP_VS_PROTO_TCP
2114 if (u->tcp_timeout) {
Hans Schillstrom93304192011-01-03 14:44:51 +01002115 pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
2116 pd->timeout_table[IP_VS_TCP_S_ESTABLISHED]
Linus Torvalds1da177e2005-04-16 15:20:36 -07002117 = u->tcp_timeout * HZ;
2118 }
2119
2120 if (u->tcp_fin_timeout) {
Hans Schillstrom93304192011-01-03 14:44:51 +01002121 pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
2122 pd->timeout_table[IP_VS_TCP_S_FIN_WAIT]
Linus Torvalds1da177e2005-04-16 15:20:36 -07002123 = u->tcp_fin_timeout * HZ;
2124 }
2125#endif
2126
2127#ifdef CONFIG_IP_VS_PROTO_UDP
2128 if (u->udp_timeout) {
Hans Schillstrom93304192011-01-03 14:44:51 +01002129 pd = ip_vs_proto_data_get(net, IPPROTO_UDP);
2130 pd->timeout_table[IP_VS_UDP_S_NORMAL]
Linus Torvalds1da177e2005-04-16 15:20:36 -07002131 = u->udp_timeout * HZ;
2132 }
2133#endif
2134 return 0;
2135}
2136
2137
2138#define SET_CMDID(cmd) (cmd - IP_VS_BASE_CTL)
2139#define SERVICE_ARG_LEN (sizeof(struct ip_vs_service_user))
2140#define SVCDEST_ARG_LEN (sizeof(struct ip_vs_service_user) + \
2141 sizeof(struct ip_vs_dest_user))
2142#define TIMEOUT_ARG_LEN (sizeof(struct ip_vs_timeout_user))
2143#define DAEMON_ARG_LEN (sizeof(struct ip_vs_daemon_user))
2144#define MAX_ARG_LEN SVCDEST_ARG_LEN
2145
Arjan van de Ven9b5b5cf2005-11-29 16:21:38 -08002146static const unsigned char set_arglen[SET_CMDID(IP_VS_SO_SET_MAX)+1] = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002147 [SET_CMDID(IP_VS_SO_SET_ADD)] = SERVICE_ARG_LEN,
2148 [SET_CMDID(IP_VS_SO_SET_EDIT)] = SERVICE_ARG_LEN,
2149 [SET_CMDID(IP_VS_SO_SET_DEL)] = SERVICE_ARG_LEN,
2150 [SET_CMDID(IP_VS_SO_SET_FLUSH)] = 0,
2151 [SET_CMDID(IP_VS_SO_SET_ADDDEST)] = SVCDEST_ARG_LEN,
2152 [SET_CMDID(IP_VS_SO_SET_DELDEST)] = SVCDEST_ARG_LEN,
2153 [SET_CMDID(IP_VS_SO_SET_EDITDEST)] = SVCDEST_ARG_LEN,
2154 [SET_CMDID(IP_VS_SO_SET_TIMEOUT)] = TIMEOUT_ARG_LEN,
2155 [SET_CMDID(IP_VS_SO_SET_STARTDAEMON)] = DAEMON_ARG_LEN,
2156 [SET_CMDID(IP_VS_SO_SET_STOPDAEMON)] = DAEMON_ARG_LEN,
2157 [SET_CMDID(IP_VS_SO_SET_ZERO)] = SERVICE_ARG_LEN,
2158};
2159
Julius Volzc860c6b2008-09-02 15:55:36 +02002160static void ip_vs_copy_usvc_compat(struct ip_vs_service_user_kern *usvc,
2161 struct ip_vs_service_user *usvc_compat)
2162{
Simon Horman0d1e71b2010-08-22 21:37:54 +09002163 memset(usvc, 0, sizeof(*usvc));
2164
Julius Volzc860c6b2008-09-02 15:55:36 +02002165 usvc->af = AF_INET;
2166 usvc->protocol = usvc_compat->protocol;
2167 usvc->addr.ip = usvc_compat->addr;
2168 usvc->port = usvc_compat->port;
2169 usvc->fwmark = usvc_compat->fwmark;
2170
2171 /* Deep copy of sched_name is not needed here */
2172 usvc->sched_name = usvc_compat->sched_name;
2173
2174 usvc->flags = usvc_compat->flags;
2175 usvc->timeout = usvc_compat->timeout;
2176 usvc->netmask = usvc_compat->netmask;
2177}
2178
2179static void ip_vs_copy_udest_compat(struct ip_vs_dest_user_kern *udest,
2180 struct ip_vs_dest_user *udest_compat)
2181{
Simon Horman0d1e71b2010-08-22 21:37:54 +09002182 memset(udest, 0, sizeof(*udest));
2183
Julius Volzc860c6b2008-09-02 15:55:36 +02002184 udest->addr.ip = udest_compat->addr;
2185 udest->port = udest_compat->port;
2186 udest->conn_flags = udest_compat->conn_flags;
2187 udest->weight = udest_compat->weight;
2188 udest->u_threshold = udest_compat->u_threshold;
2189 udest->l_threshold = udest_compat->l_threshold;
2190}
2191
Linus Torvalds1da177e2005-04-16 15:20:36 -07002192static int
2193do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
2194{
Hans Schillstromfc723252011-01-03 14:44:43 +01002195 struct net *net = sock_net(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002196 int ret;
2197 unsigned char arg[MAX_ARG_LEN];
Julius Volzc860c6b2008-09-02 15:55:36 +02002198 struct ip_vs_service_user *usvc_compat;
2199 struct ip_vs_service_user_kern usvc;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002200 struct ip_vs_service *svc;
Julius Volzc860c6b2008-09-02 15:55:36 +02002201 struct ip_vs_dest_user *udest_compat;
2202 struct ip_vs_dest_user_kern udest;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002203
2204 if (!capable(CAP_NET_ADMIN))
2205 return -EPERM;
2206
Arjan van de Ven04bcef22010-01-04 16:37:12 +01002207 if (cmd < IP_VS_BASE_CTL || cmd > IP_VS_SO_SET_MAX)
2208 return -EINVAL;
2209 if (len < 0 || len > MAX_ARG_LEN)
2210 return -EINVAL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002211 if (len != set_arglen[SET_CMDID(cmd)]) {
Hannes Eder1e3e2382009-08-02 11:05:41 +00002212 pr_err("set_ctl: len %u != %u\n",
2213 len, set_arglen[SET_CMDID(cmd)]);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002214 return -EINVAL;
2215 }
2216
2217 if (copy_from_user(arg, user, len) != 0)
2218 return -EFAULT;
2219
2220 /* increase the module use count */
2221 ip_vs_use_count_inc();
2222
Ingo Molnar14cc3e22006-03-26 01:37:14 -08002223 if (mutex_lock_interruptible(&__ip_vs_mutex)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002224 ret = -ERESTARTSYS;
2225 goto out_dec;
2226 }
2227
2228 if (cmd == IP_VS_SO_SET_FLUSH) {
2229 /* Flush the virtual service */
Hans Schillstromfc723252011-01-03 14:44:43 +01002230 ret = ip_vs_flush(net);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002231 goto out_unlock;
2232 } else if (cmd == IP_VS_SO_SET_TIMEOUT) {
2233 /* Set timeout values for (tcp tcpfin udp) */
Hans Schillstrom93304192011-01-03 14:44:51 +01002234 ret = ip_vs_set_timeout(net, (struct ip_vs_timeout_user *)arg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002235 goto out_unlock;
2236 } else if (cmd == IP_VS_SO_SET_STARTDAEMON) {
2237 struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
Hans Schillstromf1313152011-01-03 14:44:55 +01002238 ret = start_sync_thread(net, dm->state, dm->mcast_ifn,
2239 dm->syncid);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002240 goto out_unlock;
2241 } else if (cmd == IP_VS_SO_SET_STOPDAEMON) {
2242 struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
Hans Schillstromf1313152011-01-03 14:44:55 +01002243 ret = stop_sync_thread(net, dm->state);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002244 goto out_unlock;
2245 }
2246
Julius Volzc860c6b2008-09-02 15:55:36 +02002247 usvc_compat = (struct ip_vs_service_user *)arg;
2248 udest_compat = (struct ip_vs_dest_user *)(usvc_compat + 1);
2249
2250 /* We only use the new structs internally, so copy userspace compat
2251 * structs to extended internal versions */
2252 ip_vs_copy_usvc_compat(&usvc, usvc_compat);
2253 ip_vs_copy_udest_compat(&udest, udest_compat);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002254
2255 if (cmd == IP_VS_SO_SET_ZERO) {
2256 /* if no service address is set, zero counters in all */
Julius Volzc860c6b2008-09-02 15:55:36 +02002257 if (!usvc.fwmark && !usvc.addr.ip && !usvc.port) {
Hans Schillstromfc723252011-01-03 14:44:43 +01002258 ret = ip_vs_zero_all(net);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002259 goto out_unlock;
2260 }
2261 }
2262
Venkata Mohan Reddy2906f662010-02-18 12:31:05 +01002263 /* Check for valid protocol: TCP or UDP or SCTP, even for fwmark!=0 */
2264 if (usvc.protocol != IPPROTO_TCP && usvc.protocol != IPPROTO_UDP &&
2265 usvc.protocol != IPPROTO_SCTP) {
Hannes Eder1e3e2382009-08-02 11:05:41 +00002266 pr_err("set_ctl: invalid protocol: %d %pI4:%d %s\n",
2267 usvc.protocol, &usvc.addr.ip,
2268 ntohs(usvc.port), usvc.sched_name);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002269 ret = -EFAULT;
2270 goto out_unlock;
2271 }
2272
2273 /* Lookup the exact service by <protocol, addr, port> or fwmark */
Julius Volzc860c6b2008-09-02 15:55:36 +02002274 if (usvc.fwmark == 0)
Hans Schillstromfc723252011-01-03 14:44:43 +01002275 svc = __ip_vs_service_find(net, usvc.af, usvc.protocol,
Julian Anastasov26c15cf2010-09-21 18:12:30 +02002276 &usvc.addr, usvc.port);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002277 else
Hans Schillstromfc723252011-01-03 14:44:43 +01002278 svc = __ip_vs_svc_fwm_find(net, usvc.af, usvc.fwmark);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002279
2280 if (cmd != IP_VS_SO_SET_ADD
Julius Volzc860c6b2008-09-02 15:55:36 +02002281 && (svc == NULL || svc->protocol != usvc.protocol)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002282 ret = -ESRCH;
Julian Anastasov26c15cf2010-09-21 18:12:30 +02002283 goto out_unlock;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002284 }
2285
2286 switch (cmd) {
2287 case IP_VS_SO_SET_ADD:
2288 if (svc != NULL)
2289 ret = -EEXIST;
2290 else
Hans Schillstromfc723252011-01-03 14:44:43 +01002291 ret = ip_vs_add_service(net, &usvc, &svc);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002292 break;
2293 case IP_VS_SO_SET_EDIT:
Julius Volzc860c6b2008-09-02 15:55:36 +02002294 ret = ip_vs_edit_service(svc, &usvc);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002295 break;
2296 case IP_VS_SO_SET_DEL:
2297 ret = ip_vs_del_service(svc);
2298 if (!ret)
2299 goto out_unlock;
2300 break;
2301 case IP_VS_SO_SET_ZERO:
2302 ret = ip_vs_zero_service(svc);
2303 break;
2304 case IP_VS_SO_SET_ADDDEST:
Julius Volzc860c6b2008-09-02 15:55:36 +02002305 ret = ip_vs_add_dest(svc, &udest);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002306 break;
2307 case IP_VS_SO_SET_EDITDEST:
Julius Volzc860c6b2008-09-02 15:55:36 +02002308 ret = ip_vs_edit_dest(svc, &udest);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002309 break;
2310 case IP_VS_SO_SET_DELDEST:
Julius Volzc860c6b2008-09-02 15:55:36 +02002311 ret = ip_vs_del_dest(svc, &udest);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002312 break;
2313 default:
2314 ret = -EINVAL;
2315 }
2316
Linus Torvalds1da177e2005-04-16 15:20:36 -07002317 out_unlock:
Ingo Molnar14cc3e22006-03-26 01:37:14 -08002318 mutex_unlock(&__ip_vs_mutex);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002319 out_dec:
2320 /* decrease the module use count */
2321 ip_vs_use_count_dec();
2322
2323 return ret;
2324}
2325
2326
2327static void
Linus Torvalds1da177e2005-04-16 15:20:36 -07002328ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src)
2329{
2330 dst->protocol = src->protocol;
Julius Volze7ade462008-09-02 15:55:33 +02002331 dst->addr = src->addr.ip;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002332 dst->port = src->port;
2333 dst->fwmark = src->fwmark;
pageexec4da62fc2005-06-26 16:00:19 -07002334 strlcpy(dst->sched_name, src->scheduler->name, sizeof(dst->sched_name));
Linus Torvalds1da177e2005-04-16 15:20:36 -07002335 dst->flags = src->flags;
2336 dst->timeout = src->timeout / HZ;
2337 dst->netmask = src->netmask;
2338 dst->num_dests = src->num_dests;
2339 ip_vs_copy_stats(&dst->stats, &src->stats);
2340}
2341
2342static inline int
Hans Schillstromfc723252011-01-03 14:44:43 +01002343__ip_vs_get_service_entries(struct net *net,
2344 const struct ip_vs_get_services *get,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002345 struct ip_vs_get_services __user *uptr)
2346{
2347 int idx, count=0;
2348 struct ip_vs_service *svc;
2349 struct ip_vs_service_entry entry;
2350 int ret = 0;
2351
2352 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2353 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
Julius Volzf94fd042008-09-02 15:55:55 +02002354 /* Only expose IPv4 entries to old interface */
Hans Schillstromfc723252011-01-03 14:44:43 +01002355 if (svc->af != AF_INET || !net_eq(svc->net, net))
Julius Volzf94fd042008-09-02 15:55:55 +02002356 continue;
2357
Linus Torvalds1da177e2005-04-16 15:20:36 -07002358 if (count >= get->num_services)
2359 goto out;
pageexec4da62fc2005-06-26 16:00:19 -07002360 memset(&entry, 0, sizeof(entry));
Linus Torvalds1da177e2005-04-16 15:20:36 -07002361 ip_vs_copy_service(&entry, svc);
2362 if (copy_to_user(&uptr->entrytable[count],
2363 &entry, sizeof(entry))) {
2364 ret = -EFAULT;
2365 goto out;
2366 }
2367 count++;
2368 }
2369 }
2370
2371 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2372 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
Julius Volzf94fd042008-09-02 15:55:55 +02002373 /* Only expose IPv4 entries to old interface */
Hans Schillstromfc723252011-01-03 14:44:43 +01002374 if (svc->af != AF_INET || !net_eq(svc->net, net))
Julius Volzf94fd042008-09-02 15:55:55 +02002375 continue;
2376
Linus Torvalds1da177e2005-04-16 15:20:36 -07002377 if (count >= get->num_services)
2378 goto out;
pageexec4da62fc2005-06-26 16:00:19 -07002379 memset(&entry, 0, sizeof(entry));
Linus Torvalds1da177e2005-04-16 15:20:36 -07002380 ip_vs_copy_service(&entry, svc);
2381 if (copy_to_user(&uptr->entrytable[count],
2382 &entry, sizeof(entry))) {
2383 ret = -EFAULT;
2384 goto out;
2385 }
2386 count++;
2387 }
2388 }
2389 out:
2390 return ret;
2391}
2392
2393static inline int
Hans Schillstromfc723252011-01-03 14:44:43 +01002394__ip_vs_get_dest_entries(struct net *net, const struct ip_vs_get_dests *get,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002395 struct ip_vs_get_dests __user *uptr)
2396{
2397 struct ip_vs_service *svc;
Julius Volzb18610d2008-09-02 15:55:37 +02002398 union nf_inet_addr addr = { .ip = get->addr };
Linus Torvalds1da177e2005-04-16 15:20:36 -07002399 int ret = 0;
2400
2401 if (get->fwmark)
Hans Schillstromfc723252011-01-03 14:44:43 +01002402 svc = __ip_vs_svc_fwm_find(net, AF_INET, get->fwmark);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002403 else
Hans Schillstromfc723252011-01-03 14:44:43 +01002404 svc = __ip_vs_service_find(net, AF_INET, get->protocol, &addr,
Julian Anastasov26c15cf2010-09-21 18:12:30 +02002405 get->port);
Julius Volzb18610d2008-09-02 15:55:37 +02002406
Linus Torvalds1da177e2005-04-16 15:20:36 -07002407 if (svc) {
2408 int count = 0;
2409 struct ip_vs_dest *dest;
2410 struct ip_vs_dest_entry entry;
2411
2412 list_for_each_entry(dest, &svc->destinations, n_list) {
2413 if (count >= get->num_dests)
2414 break;
2415
Julius Volze7ade462008-09-02 15:55:33 +02002416 entry.addr = dest->addr.ip;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002417 entry.port = dest->port;
2418 entry.conn_flags = atomic_read(&dest->conn_flags);
2419 entry.weight = atomic_read(&dest->weight);
2420 entry.u_threshold = dest->u_threshold;
2421 entry.l_threshold = dest->l_threshold;
2422 entry.activeconns = atomic_read(&dest->activeconns);
2423 entry.inactconns = atomic_read(&dest->inactconns);
2424 entry.persistconns = atomic_read(&dest->persistconns);
2425 ip_vs_copy_stats(&entry.stats, &dest->stats);
2426 if (copy_to_user(&uptr->entrytable[count],
2427 &entry, sizeof(entry))) {
2428 ret = -EFAULT;
2429 break;
2430 }
2431 count++;
2432 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002433 } else
2434 ret = -ESRCH;
2435 return ret;
2436}
2437
2438static inline void
Hans Schillstrom93304192011-01-03 14:44:51 +01002439__ip_vs_get_timeouts(struct net *net, struct ip_vs_timeout_user *u)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002440{
Changli Gao091bb342011-01-21 18:02:13 +08002441#if defined(CONFIG_IP_VS_PROTO_TCP) || defined(CONFIG_IP_VS_PROTO_UDP)
Hans Schillstrom93304192011-01-03 14:44:51 +01002442 struct ip_vs_proto_data *pd;
Changli Gao091bb342011-01-21 18:02:13 +08002443#endif
Hans Schillstrom93304192011-01-03 14:44:51 +01002444
Linus Torvalds1da177e2005-04-16 15:20:36 -07002445#ifdef CONFIG_IP_VS_PROTO_TCP
Hans Schillstrom93304192011-01-03 14:44:51 +01002446 pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
2447 u->tcp_timeout = pd->timeout_table[IP_VS_TCP_S_ESTABLISHED] / HZ;
2448 u->tcp_fin_timeout = pd->timeout_table[IP_VS_TCP_S_FIN_WAIT] / HZ;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002449#endif
2450#ifdef CONFIG_IP_VS_PROTO_UDP
Hans Schillstrom93304192011-01-03 14:44:51 +01002451 pd = ip_vs_proto_data_get(net, IPPROTO_UDP);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002452 u->udp_timeout =
Hans Schillstrom93304192011-01-03 14:44:51 +01002453 pd->timeout_table[IP_VS_UDP_S_NORMAL] / HZ;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002454#endif
2455}
2456
2457
2458#define GET_CMDID(cmd) (cmd - IP_VS_BASE_CTL)
2459#define GET_INFO_ARG_LEN (sizeof(struct ip_vs_getinfo))
2460#define GET_SERVICES_ARG_LEN (sizeof(struct ip_vs_get_services))
2461#define GET_SERVICE_ARG_LEN (sizeof(struct ip_vs_service_entry))
2462#define GET_DESTS_ARG_LEN (sizeof(struct ip_vs_get_dests))
2463#define GET_TIMEOUT_ARG_LEN (sizeof(struct ip_vs_timeout_user))
2464#define GET_DAEMON_ARG_LEN (sizeof(struct ip_vs_daemon_user) * 2)
2465
Arjan van de Ven9b5b5cf2005-11-29 16:21:38 -08002466static const unsigned char get_arglen[GET_CMDID(IP_VS_SO_GET_MAX)+1] = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002467 [GET_CMDID(IP_VS_SO_GET_VERSION)] = 64,
2468 [GET_CMDID(IP_VS_SO_GET_INFO)] = GET_INFO_ARG_LEN,
2469 [GET_CMDID(IP_VS_SO_GET_SERVICES)] = GET_SERVICES_ARG_LEN,
2470 [GET_CMDID(IP_VS_SO_GET_SERVICE)] = GET_SERVICE_ARG_LEN,
2471 [GET_CMDID(IP_VS_SO_GET_DESTS)] = GET_DESTS_ARG_LEN,
2472 [GET_CMDID(IP_VS_SO_GET_TIMEOUT)] = GET_TIMEOUT_ARG_LEN,
2473 [GET_CMDID(IP_VS_SO_GET_DAEMON)] = GET_DAEMON_ARG_LEN,
2474};
2475
2476static int
2477do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
2478{
2479 unsigned char arg[128];
2480 int ret = 0;
Arjan van de Ven04bcef22010-01-04 16:37:12 +01002481 unsigned int copylen;
Hans Schillstromfc723252011-01-03 14:44:43 +01002482 struct net *net = sock_net(sk);
Hans Schillstromf1313152011-01-03 14:44:55 +01002483 struct netns_ipvs *ipvs = net_ipvs(net);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002484
Hans Schillstromfc723252011-01-03 14:44:43 +01002485 BUG_ON(!net);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002486 if (!capable(CAP_NET_ADMIN))
2487 return -EPERM;
2488
Arjan van de Ven04bcef22010-01-04 16:37:12 +01002489 if (cmd < IP_VS_BASE_CTL || cmd > IP_VS_SO_GET_MAX)
2490 return -EINVAL;
2491
Linus Torvalds1da177e2005-04-16 15:20:36 -07002492 if (*len < get_arglen[GET_CMDID(cmd)]) {
Hannes Eder1e3e2382009-08-02 11:05:41 +00002493 pr_err("get_ctl: len %u < %u\n",
2494 *len, get_arglen[GET_CMDID(cmd)]);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002495 return -EINVAL;
2496 }
2497
Arjan van de Ven04bcef22010-01-04 16:37:12 +01002498 copylen = get_arglen[GET_CMDID(cmd)];
2499 if (copylen > 128)
2500 return -EINVAL;
2501
2502 if (copy_from_user(arg, user, copylen) != 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002503 return -EFAULT;
2504
Ingo Molnar14cc3e22006-03-26 01:37:14 -08002505 if (mutex_lock_interruptible(&__ip_vs_mutex))
Linus Torvalds1da177e2005-04-16 15:20:36 -07002506 return -ERESTARTSYS;
2507
2508 switch (cmd) {
2509 case IP_VS_SO_GET_VERSION:
2510 {
2511 char buf[64];
2512
2513 sprintf(buf, "IP Virtual Server version %d.%d.%d (size=%d)",
Catalin(ux) M. BOIE6f7edb42010-01-05 05:50:24 +01002514 NVERSION(IP_VS_VERSION_CODE), ip_vs_conn_tab_size);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002515 if (copy_to_user(user, buf, strlen(buf)+1) != 0) {
2516 ret = -EFAULT;
2517 goto out;
2518 }
2519 *len = strlen(buf)+1;
2520 }
2521 break;
2522
2523 case IP_VS_SO_GET_INFO:
2524 {
2525 struct ip_vs_getinfo info;
2526 info.version = IP_VS_VERSION_CODE;
Catalin(ux) M. BOIE6f7edb42010-01-05 05:50:24 +01002527 info.size = ip_vs_conn_tab_size;
Hans Schillstroma0840e22011-01-03 14:44:58 +01002528 info.num_services = ipvs->num_services;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002529 if (copy_to_user(user, &info, sizeof(info)) != 0)
2530 ret = -EFAULT;
2531 }
2532 break;
2533
2534 case IP_VS_SO_GET_SERVICES:
2535 {
2536 struct ip_vs_get_services *get;
2537 int size;
2538
2539 get = (struct ip_vs_get_services *)arg;
2540 size = sizeof(*get) +
2541 sizeof(struct ip_vs_service_entry) * get->num_services;
2542 if (*len != size) {
Hannes Eder1e3e2382009-08-02 11:05:41 +00002543 pr_err("length: %u != %u\n", *len, size);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002544 ret = -EINVAL;
2545 goto out;
2546 }
Hans Schillstromfc723252011-01-03 14:44:43 +01002547 ret = __ip_vs_get_service_entries(net, get, user);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002548 }
2549 break;
2550
2551 case IP_VS_SO_GET_SERVICE:
2552 {
2553 struct ip_vs_service_entry *entry;
2554 struct ip_vs_service *svc;
Julius Volzb18610d2008-09-02 15:55:37 +02002555 union nf_inet_addr addr;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002556
2557 entry = (struct ip_vs_service_entry *)arg;
Julius Volzb18610d2008-09-02 15:55:37 +02002558 addr.ip = entry->addr;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002559 if (entry->fwmark)
Hans Schillstromfc723252011-01-03 14:44:43 +01002560 svc = __ip_vs_svc_fwm_find(net, AF_INET, entry->fwmark);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002561 else
Hans Schillstromfc723252011-01-03 14:44:43 +01002562 svc = __ip_vs_service_find(net, AF_INET,
2563 entry->protocol, &addr,
2564 entry->port);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002565 if (svc) {
2566 ip_vs_copy_service(entry, svc);
2567 if (copy_to_user(user, entry, sizeof(*entry)) != 0)
2568 ret = -EFAULT;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002569 } else
2570 ret = -ESRCH;
2571 }
2572 break;
2573
2574 case IP_VS_SO_GET_DESTS:
2575 {
2576 struct ip_vs_get_dests *get;
2577 int size;
2578
2579 get = (struct ip_vs_get_dests *)arg;
2580 size = sizeof(*get) +
2581 sizeof(struct ip_vs_dest_entry) * get->num_dests;
2582 if (*len != size) {
Hannes Eder1e3e2382009-08-02 11:05:41 +00002583 pr_err("length: %u != %u\n", *len, size);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002584 ret = -EINVAL;
2585 goto out;
2586 }
Hans Schillstromfc723252011-01-03 14:44:43 +01002587 ret = __ip_vs_get_dest_entries(net, get, user);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002588 }
2589 break;
2590
2591 case IP_VS_SO_GET_TIMEOUT:
2592 {
2593 struct ip_vs_timeout_user t;
2594
Hans Schillstrom93304192011-01-03 14:44:51 +01002595 __ip_vs_get_timeouts(net, &t);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002596 if (copy_to_user(user, &t, sizeof(t)) != 0)
2597 ret = -EFAULT;
2598 }
2599 break;
2600
2601 case IP_VS_SO_GET_DAEMON:
2602 {
2603 struct ip_vs_daemon_user d[2];
2604
2605 memset(&d, 0, sizeof(d));
Hans Schillstromf1313152011-01-03 14:44:55 +01002606 if (ipvs->sync_state & IP_VS_STATE_MASTER) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002607 d[0].state = IP_VS_STATE_MASTER;
Hans Schillstromf1313152011-01-03 14:44:55 +01002608 strlcpy(d[0].mcast_ifn, ipvs->master_mcast_ifn,
2609 sizeof(d[0].mcast_ifn));
2610 d[0].syncid = ipvs->master_syncid;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002611 }
Hans Schillstromf1313152011-01-03 14:44:55 +01002612 if (ipvs->sync_state & IP_VS_STATE_BACKUP) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002613 d[1].state = IP_VS_STATE_BACKUP;
Hans Schillstromf1313152011-01-03 14:44:55 +01002614 strlcpy(d[1].mcast_ifn, ipvs->backup_mcast_ifn,
2615 sizeof(d[1].mcast_ifn));
2616 d[1].syncid = ipvs->backup_syncid;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002617 }
2618 if (copy_to_user(user, &d, sizeof(d)) != 0)
2619 ret = -EFAULT;
2620 }
2621 break;
2622
2623 default:
2624 ret = -EINVAL;
2625 }
2626
2627 out:
Ingo Molnar14cc3e22006-03-26 01:37:14 -08002628 mutex_unlock(&__ip_vs_mutex);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002629 return ret;
2630}
2631
2632
2633static struct nf_sockopt_ops ip_vs_sockopts = {
2634 .pf = PF_INET,
2635 .set_optmin = IP_VS_BASE_CTL,
2636 .set_optmax = IP_VS_SO_SET_MAX+1,
2637 .set = do_ip_vs_set_ctl,
2638 .get_optmin = IP_VS_BASE_CTL,
2639 .get_optmax = IP_VS_SO_GET_MAX+1,
2640 .get = do_ip_vs_get_ctl,
Neil Horman16fcec32007-09-11 11:28:26 +02002641 .owner = THIS_MODULE,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002642};
2643
Julius Volz9a812192008-08-14 14:08:44 +02002644/*
2645 * Generic Netlink interface
2646 */
2647
2648/* IPVS genetlink family */
2649static struct genl_family ip_vs_genl_family = {
2650 .id = GENL_ID_GENERATE,
2651 .hdrsize = 0,
2652 .name = IPVS_GENL_NAME,
2653 .version = IPVS_GENL_VERSION,
2654 .maxattr = IPVS_CMD_MAX,
Hans Schillstromc6d2d442011-01-03 14:45:03 +01002655 .netnsok = true, /* Make ipvsadm to work on netns */
Julius Volz9a812192008-08-14 14:08:44 +02002656};
2657
2658/* Policy used for first-level command attributes */
2659static const struct nla_policy ip_vs_cmd_policy[IPVS_CMD_ATTR_MAX + 1] = {
2660 [IPVS_CMD_ATTR_SERVICE] = { .type = NLA_NESTED },
2661 [IPVS_CMD_ATTR_DEST] = { .type = NLA_NESTED },
2662 [IPVS_CMD_ATTR_DAEMON] = { .type = NLA_NESTED },
2663 [IPVS_CMD_ATTR_TIMEOUT_TCP] = { .type = NLA_U32 },
2664 [IPVS_CMD_ATTR_TIMEOUT_TCP_FIN] = { .type = NLA_U32 },
2665 [IPVS_CMD_ATTR_TIMEOUT_UDP] = { .type = NLA_U32 },
2666};
2667
2668/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DAEMON */
2669static const struct nla_policy ip_vs_daemon_policy[IPVS_DAEMON_ATTR_MAX + 1] = {
2670 [IPVS_DAEMON_ATTR_STATE] = { .type = NLA_U32 },
2671 [IPVS_DAEMON_ATTR_MCAST_IFN] = { .type = NLA_NUL_STRING,
2672 .len = IP_VS_IFNAME_MAXLEN },
2673 [IPVS_DAEMON_ATTR_SYNC_ID] = { .type = NLA_U32 },
2674};
2675
2676/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_SERVICE */
2677static const struct nla_policy ip_vs_svc_policy[IPVS_SVC_ATTR_MAX + 1] = {
2678 [IPVS_SVC_ATTR_AF] = { .type = NLA_U16 },
2679 [IPVS_SVC_ATTR_PROTOCOL] = { .type = NLA_U16 },
2680 [IPVS_SVC_ATTR_ADDR] = { .type = NLA_BINARY,
2681 .len = sizeof(union nf_inet_addr) },
2682 [IPVS_SVC_ATTR_PORT] = { .type = NLA_U16 },
2683 [IPVS_SVC_ATTR_FWMARK] = { .type = NLA_U32 },
2684 [IPVS_SVC_ATTR_SCHED_NAME] = { .type = NLA_NUL_STRING,
2685 .len = IP_VS_SCHEDNAME_MAXLEN },
Simon Horman0d1e71b2010-08-22 21:37:54 +09002686 [IPVS_SVC_ATTR_PE_NAME] = { .type = NLA_NUL_STRING,
2687 .len = IP_VS_PENAME_MAXLEN },
Julius Volz9a812192008-08-14 14:08:44 +02002688 [IPVS_SVC_ATTR_FLAGS] = { .type = NLA_BINARY,
2689 .len = sizeof(struct ip_vs_flags) },
2690 [IPVS_SVC_ATTR_TIMEOUT] = { .type = NLA_U32 },
2691 [IPVS_SVC_ATTR_NETMASK] = { .type = NLA_U32 },
2692 [IPVS_SVC_ATTR_STATS] = { .type = NLA_NESTED },
2693};
2694
2695/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DEST */
2696static const struct nla_policy ip_vs_dest_policy[IPVS_DEST_ATTR_MAX + 1] = {
2697 [IPVS_DEST_ATTR_ADDR] = { .type = NLA_BINARY,
2698 .len = sizeof(union nf_inet_addr) },
2699 [IPVS_DEST_ATTR_PORT] = { .type = NLA_U16 },
2700 [IPVS_DEST_ATTR_FWD_METHOD] = { .type = NLA_U32 },
2701 [IPVS_DEST_ATTR_WEIGHT] = { .type = NLA_U32 },
2702 [IPVS_DEST_ATTR_U_THRESH] = { .type = NLA_U32 },
2703 [IPVS_DEST_ATTR_L_THRESH] = { .type = NLA_U32 },
2704 [IPVS_DEST_ATTR_ACTIVE_CONNS] = { .type = NLA_U32 },
2705 [IPVS_DEST_ATTR_INACT_CONNS] = { .type = NLA_U32 },
2706 [IPVS_DEST_ATTR_PERSIST_CONNS] = { .type = NLA_U32 },
2707 [IPVS_DEST_ATTR_STATS] = { .type = NLA_NESTED },
2708};
2709
2710static int ip_vs_genl_fill_stats(struct sk_buff *skb, int container_type,
2711 struct ip_vs_stats *stats)
2712{
Julian Anastasov55a3d4e2011-03-14 01:37:49 +02002713 struct ip_vs_stats_user ustats;
Julius Volz9a812192008-08-14 14:08:44 +02002714 struct nlattr *nl_stats = nla_nest_start(skb, container_type);
2715 if (!nl_stats)
2716 return -EMSGSIZE;
2717
Julian Anastasov55a3d4e2011-03-14 01:37:49 +02002718 ip_vs_copy_stats(&ustats, stats);
Julius Volz9a812192008-08-14 14:08:44 +02002719
Julian Anastasov55a3d4e2011-03-14 01:37:49 +02002720 NLA_PUT_U32(skb, IPVS_STATS_ATTR_CONNS, ustats.conns);
2721 NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPKTS, ustats.inpkts);
2722 NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPKTS, ustats.outpkts);
2723 NLA_PUT_U64(skb, IPVS_STATS_ATTR_INBYTES, ustats.inbytes);
2724 NLA_PUT_U64(skb, IPVS_STATS_ATTR_OUTBYTES, ustats.outbytes);
2725 NLA_PUT_U32(skb, IPVS_STATS_ATTR_CPS, ustats.cps);
2726 NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPPS, ustats.inpps);
2727 NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPPS, ustats.outpps);
2728 NLA_PUT_U32(skb, IPVS_STATS_ATTR_INBPS, ustats.inbps);
2729 NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTBPS, ustats.outbps);
Julius Volz9a812192008-08-14 14:08:44 +02002730
2731 nla_nest_end(skb, nl_stats);
2732
2733 return 0;
2734
2735nla_put_failure:
Julius Volz9a812192008-08-14 14:08:44 +02002736 nla_nest_cancel(skb, nl_stats);
2737 return -EMSGSIZE;
2738}
2739
2740static int ip_vs_genl_fill_service(struct sk_buff *skb,
2741 struct ip_vs_service *svc)
2742{
2743 struct nlattr *nl_service;
2744 struct ip_vs_flags flags = { .flags = svc->flags,
2745 .mask = ~0 };
2746
2747 nl_service = nla_nest_start(skb, IPVS_CMD_ATTR_SERVICE);
2748 if (!nl_service)
2749 return -EMSGSIZE;
2750
Julius Volzf94fd042008-09-02 15:55:55 +02002751 NLA_PUT_U16(skb, IPVS_SVC_ATTR_AF, svc->af);
Julius Volz9a812192008-08-14 14:08:44 +02002752
2753 if (svc->fwmark) {
2754 NLA_PUT_U32(skb, IPVS_SVC_ATTR_FWMARK, svc->fwmark);
2755 } else {
2756 NLA_PUT_U16(skb, IPVS_SVC_ATTR_PROTOCOL, svc->protocol);
2757 NLA_PUT(skb, IPVS_SVC_ATTR_ADDR, sizeof(svc->addr), &svc->addr);
2758 NLA_PUT_U16(skb, IPVS_SVC_ATTR_PORT, svc->port);
2759 }
2760
2761 NLA_PUT_STRING(skb, IPVS_SVC_ATTR_SCHED_NAME, svc->scheduler->name);
Simon Horman0d1e71b2010-08-22 21:37:54 +09002762 if (svc->pe)
2763 NLA_PUT_STRING(skb, IPVS_SVC_ATTR_PE_NAME, svc->pe->name);
Julius Volz9a812192008-08-14 14:08:44 +02002764 NLA_PUT(skb, IPVS_SVC_ATTR_FLAGS, sizeof(flags), &flags);
2765 NLA_PUT_U32(skb, IPVS_SVC_ATTR_TIMEOUT, svc->timeout / HZ);
2766 NLA_PUT_U32(skb, IPVS_SVC_ATTR_NETMASK, svc->netmask);
2767
2768 if (ip_vs_genl_fill_stats(skb, IPVS_SVC_ATTR_STATS, &svc->stats))
2769 goto nla_put_failure;
2770
2771 nla_nest_end(skb, nl_service);
2772
2773 return 0;
2774
2775nla_put_failure:
2776 nla_nest_cancel(skb, nl_service);
2777 return -EMSGSIZE;
2778}
2779
2780static int ip_vs_genl_dump_service(struct sk_buff *skb,
2781 struct ip_vs_service *svc,
2782 struct netlink_callback *cb)
2783{
2784 void *hdr;
2785
2786 hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
2787 &ip_vs_genl_family, NLM_F_MULTI,
2788 IPVS_CMD_NEW_SERVICE);
2789 if (!hdr)
2790 return -EMSGSIZE;
2791
2792 if (ip_vs_genl_fill_service(skb, svc) < 0)
2793 goto nla_put_failure;
2794
2795 return genlmsg_end(skb, hdr);
2796
2797nla_put_failure:
2798 genlmsg_cancel(skb, hdr);
2799 return -EMSGSIZE;
2800}
2801
2802static int ip_vs_genl_dump_services(struct sk_buff *skb,
2803 struct netlink_callback *cb)
2804{
2805 int idx = 0, i;
2806 int start = cb->args[0];
2807 struct ip_vs_service *svc;
Hans Schillstromfc723252011-01-03 14:44:43 +01002808 struct net *net = skb_sknet(skb);
Julius Volz9a812192008-08-14 14:08:44 +02002809
2810 mutex_lock(&__ip_vs_mutex);
2811 for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
2812 list_for_each_entry(svc, &ip_vs_svc_table[i], s_list) {
Hans Schillstromfc723252011-01-03 14:44:43 +01002813 if (++idx <= start || !net_eq(svc->net, net))
Julius Volz9a812192008-08-14 14:08:44 +02002814 continue;
2815 if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
2816 idx--;
2817 goto nla_put_failure;
2818 }
2819 }
2820 }
2821
2822 for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
2823 list_for_each_entry(svc, &ip_vs_svc_fwm_table[i], f_list) {
Hans Schillstromfc723252011-01-03 14:44:43 +01002824 if (++idx <= start || !net_eq(svc->net, net))
Julius Volz9a812192008-08-14 14:08:44 +02002825 continue;
2826 if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
2827 idx--;
2828 goto nla_put_failure;
2829 }
2830 }
2831 }
2832
2833nla_put_failure:
2834 mutex_unlock(&__ip_vs_mutex);
2835 cb->args[0] = idx;
2836
2837 return skb->len;
2838}
2839
Hans Schillstromfc723252011-01-03 14:44:43 +01002840static int ip_vs_genl_parse_service(struct net *net,
2841 struct ip_vs_service_user_kern *usvc,
Julian Anastasov26c15cf2010-09-21 18:12:30 +02002842 struct nlattr *nla, int full_entry,
2843 struct ip_vs_service **ret_svc)
Julius Volz9a812192008-08-14 14:08:44 +02002844{
2845 struct nlattr *attrs[IPVS_SVC_ATTR_MAX + 1];
2846 struct nlattr *nla_af, *nla_port, *nla_fwmark, *nla_protocol, *nla_addr;
Julian Anastasov26c15cf2010-09-21 18:12:30 +02002847 struct ip_vs_service *svc;
Julius Volz9a812192008-08-14 14:08:44 +02002848
2849 /* Parse mandatory identifying service fields first */
2850 if (nla == NULL ||
2851 nla_parse_nested(attrs, IPVS_SVC_ATTR_MAX, nla, ip_vs_svc_policy))
2852 return -EINVAL;
2853
2854 nla_af = attrs[IPVS_SVC_ATTR_AF];
2855 nla_protocol = attrs[IPVS_SVC_ATTR_PROTOCOL];
2856 nla_addr = attrs[IPVS_SVC_ATTR_ADDR];
2857 nla_port = attrs[IPVS_SVC_ATTR_PORT];
2858 nla_fwmark = attrs[IPVS_SVC_ATTR_FWMARK];
2859
2860 if (!(nla_af && (nla_fwmark || (nla_port && nla_protocol && nla_addr))))
2861 return -EINVAL;
2862
Simon Horman258c8892009-12-15 17:01:25 +01002863 memset(usvc, 0, sizeof(*usvc));
2864
Julius Volzc860c6b2008-09-02 15:55:36 +02002865 usvc->af = nla_get_u16(nla_af);
Julius Volzf94fd042008-09-02 15:55:55 +02002866#ifdef CONFIG_IP_VS_IPV6
2867 if (usvc->af != AF_INET && usvc->af != AF_INET6)
2868#else
2869 if (usvc->af != AF_INET)
2870#endif
Julius Volz9a812192008-08-14 14:08:44 +02002871 return -EAFNOSUPPORT;
2872
2873 if (nla_fwmark) {
2874 usvc->protocol = IPPROTO_TCP;
2875 usvc->fwmark = nla_get_u32(nla_fwmark);
2876 } else {
2877 usvc->protocol = nla_get_u16(nla_protocol);
2878 nla_memcpy(&usvc->addr, nla_addr, sizeof(usvc->addr));
2879 usvc->port = nla_get_u16(nla_port);
2880 usvc->fwmark = 0;
2881 }
2882
Julian Anastasov26c15cf2010-09-21 18:12:30 +02002883 if (usvc->fwmark)
Hans Schillstromfc723252011-01-03 14:44:43 +01002884 svc = __ip_vs_svc_fwm_find(net, usvc->af, usvc->fwmark);
Julian Anastasov26c15cf2010-09-21 18:12:30 +02002885 else
Hans Schillstromfc723252011-01-03 14:44:43 +01002886 svc = __ip_vs_service_find(net, usvc->af, usvc->protocol,
Julian Anastasov26c15cf2010-09-21 18:12:30 +02002887 &usvc->addr, usvc->port);
2888 *ret_svc = svc;
2889
Julius Volz9a812192008-08-14 14:08:44 +02002890 /* If a full entry was requested, check for the additional fields */
2891 if (full_entry) {
Simon Horman0d1e71b2010-08-22 21:37:54 +09002892 struct nlattr *nla_sched, *nla_flags, *nla_pe, *nla_timeout,
Julius Volz9a812192008-08-14 14:08:44 +02002893 *nla_netmask;
2894 struct ip_vs_flags flags;
Julius Volz9a812192008-08-14 14:08:44 +02002895
2896 nla_sched = attrs[IPVS_SVC_ATTR_SCHED_NAME];
Simon Horman0d1e71b2010-08-22 21:37:54 +09002897 nla_pe = attrs[IPVS_SVC_ATTR_PE_NAME];
Julius Volz9a812192008-08-14 14:08:44 +02002898 nla_flags = attrs[IPVS_SVC_ATTR_FLAGS];
2899 nla_timeout = attrs[IPVS_SVC_ATTR_TIMEOUT];
2900 nla_netmask = attrs[IPVS_SVC_ATTR_NETMASK];
2901
2902 if (!(nla_sched && nla_flags && nla_timeout && nla_netmask))
2903 return -EINVAL;
2904
2905 nla_memcpy(&flags, nla_flags, sizeof(flags));
2906
2907 /* prefill flags from service if it already exists */
Julian Anastasov26c15cf2010-09-21 18:12:30 +02002908 if (svc)
Julius Volz9a812192008-08-14 14:08:44 +02002909 usvc->flags = svc->flags;
Julius Volz9a812192008-08-14 14:08:44 +02002910
2911 /* set new flags from userland */
2912 usvc->flags = (usvc->flags & ~flags.mask) |
2913 (flags.flags & flags.mask);
Julius Volzc860c6b2008-09-02 15:55:36 +02002914 usvc->sched_name = nla_data(nla_sched);
Simon Horman0d1e71b2010-08-22 21:37:54 +09002915 usvc->pe_name = nla_pe ? nla_data(nla_pe) : NULL;
Julius Volz9a812192008-08-14 14:08:44 +02002916 usvc->timeout = nla_get_u32(nla_timeout);
2917 usvc->netmask = nla_get_u32(nla_netmask);
2918 }
2919
2920 return 0;
2921}
2922
Hans Schillstromfc723252011-01-03 14:44:43 +01002923static struct ip_vs_service *ip_vs_genl_find_service(struct net *net,
2924 struct nlattr *nla)
Julius Volz9a812192008-08-14 14:08:44 +02002925{
Julius Volzc860c6b2008-09-02 15:55:36 +02002926 struct ip_vs_service_user_kern usvc;
Julian Anastasov26c15cf2010-09-21 18:12:30 +02002927 struct ip_vs_service *svc;
Julius Volz9a812192008-08-14 14:08:44 +02002928 int ret;
2929
Hans Schillstromfc723252011-01-03 14:44:43 +01002930 ret = ip_vs_genl_parse_service(net, &usvc, nla, 0, &svc);
Julian Anastasov26c15cf2010-09-21 18:12:30 +02002931 return ret ? ERR_PTR(ret) : svc;
Julius Volz9a812192008-08-14 14:08:44 +02002932}
2933
2934static int ip_vs_genl_fill_dest(struct sk_buff *skb, struct ip_vs_dest *dest)
2935{
2936 struct nlattr *nl_dest;
2937
2938 nl_dest = nla_nest_start(skb, IPVS_CMD_ATTR_DEST);
2939 if (!nl_dest)
2940 return -EMSGSIZE;
2941
2942 NLA_PUT(skb, IPVS_DEST_ATTR_ADDR, sizeof(dest->addr), &dest->addr);
2943 NLA_PUT_U16(skb, IPVS_DEST_ATTR_PORT, dest->port);
2944
2945 NLA_PUT_U32(skb, IPVS_DEST_ATTR_FWD_METHOD,
2946 atomic_read(&dest->conn_flags) & IP_VS_CONN_F_FWD_MASK);
2947 NLA_PUT_U32(skb, IPVS_DEST_ATTR_WEIGHT, atomic_read(&dest->weight));
2948 NLA_PUT_U32(skb, IPVS_DEST_ATTR_U_THRESH, dest->u_threshold);
2949 NLA_PUT_U32(skb, IPVS_DEST_ATTR_L_THRESH, dest->l_threshold);
2950 NLA_PUT_U32(skb, IPVS_DEST_ATTR_ACTIVE_CONNS,
2951 atomic_read(&dest->activeconns));
2952 NLA_PUT_U32(skb, IPVS_DEST_ATTR_INACT_CONNS,
2953 atomic_read(&dest->inactconns));
2954 NLA_PUT_U32(skb, IPVS_DEST_ATTR_PERSIST_CONNS,
2955 atomic_read(&dest->persistconns));
2956
2957 if (ip_vs_genl_fill_stats(skb, IPVS_DEST_ATTR_STATS, &dest->stats))
2958 goto nla_put_failure;
2959
2960 nla_nest_end(skb, nl_dest);
2961
2962 return 0;
2963
2964nla_put_failure:
2965 nla_nest_cancel(skb, nl_dest);
2966 return -EMSGSIZE;
2967}
2968
2969static int ip_vs_genl_dump_dest(struct sk_buff *skb, struct ip_vs_dest *dest,
2970 struct netlink_callback *cb)
2971{
2972 void *hdr;
2973
2974 hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
2975 &ip_vs_genl_family, NLM_F_MULTI,
2976 IPVS_CMD_NEW_DEST);
2977 if (!hdr)
2978 return -EMSGSIZE;
2979
2980 if (ip_vs_genl_fill_dest(skb, dest) < 0)
2981 goto nla_put_failure;
2982
2983 return genlmsg_end(skb, hdr);
2984
2985nla_put_failure:
2986 genlmsg_cancel(skb, hdr);
2987 return -EMSGSIZE;
2988}
2989
2990static int ip_vs_genl_dump_dests(struct sk_buff *skb,
2991 struct netlink_callback *cb)
2992{
2993 int idx = 0;
2994 int start = cb->args[0];
2995 struct ip_vs_service *svc;
2996 struct ip_vs_dest *dest;
2997 struct nlattr *attrs[IPVS_CMD_ATTR_MAX + 1];
Hans Schillstroma0840e22011-01-03 14:44:58 +01002998 struct net *net = skb_sknet(skb);
Julius Volz9a812192008-08-14 14:08:44 +02002999
3000 mutex_lock(&__ip_vs_mutex);
3001
3002 /* Try to find the service for which to dump destinations */
3003 if (nlmsg_parse(cb->nlh, GENL_HDRLEN, attrs,
3004 IPVS_CMD_ATTR_MAX, ip_vs_cmd_policy))
3005 goto out_err;
3006
Hans Schillstroma0840e22011-01-03 14:44:58 +01003007
Hans Schillstromfc723252011-01-03 14:44:43 +01003008 svc = ip_vs_genl_find_service(net, attrs[IPVS_CMD_ATTR_SERVICE]);
Julius Volz9a812192008-08-14 14:08:44 +02003009 if (IS_ERR(svc) || svc == NULL)
3010 goto out_err;
3011
3012 /* Dump the destinations */
3013 list_for_each_entry(dest, &svc->destinations, n_list) {
3014 if (++idx <= start)
3015 continue;
3016 if (ip_vs_genl_dump_dest(skb, dest, cb) < 0) {
3017 idx--;
3018 goto nla_put_failure;
3019 }
3020 }
3021
3022nla_put_failure:
3023 cb->args[0] = idx;
Julius Volz9a812192008-08-14 14:08:44 +02003024
3025out_err:
3026 mutex_unlock(&__ip_vs_mutex);
3027
3028 return skb->len;
3029}
3030
Julius Volzc860c6b2008-09-02 15:55:36 +02003031static int ip_vs_genl_parse_dest(struct ip_vs_dest_user_kern *udest,
Julius Volz9a812192008-08-14 14:08:44 +02003032 struct nlattr *nla, int full_entry)
3033{
3034 struct nlattr *attrs[IPVS_DEST_ATTR_MAX + 1];
3035 struct nlattr *nla_addr, *nla_port;
3036
3037 /* Parse mandatory identifying destination fields first */
3038 if (nla == NULL ||
3039 nla_parse_nested(attrs, IPVS_DEST_ATTR_MAX, nla, ip_vs_dest_policy))
3040 return -EINVAL;
3041
3042 nla_addr = attrs[IPVS_DEST_ATTR_ADDR];
3043 nla_port = attrs[IPVS_DEST_ATTR_PORT];
3044
3045 if (!(nla_addr && nla_port))
3046 return -EINVAL;
3047
Simon Horman258c8892009-12-15 17:01:25 +01003048 memset(udest, 0, sizeof(*udest));
3049
Julius Volz9a812192008-08-14 14:08:44 +02003050 nla_memcpy(&udest->addr, nla_addr, sizeof(udest->addr));
3051 udest->port = nla_get_u16(nla_port);
3052
3053 /* If a full entry was requested, check for the additional fields */
3054 if (full_entry) {
3055 struct nlattr *nla_fwd, *nla_weight, *nla_u_thresh,
3056 *nla_l_thresh;
3057
3058 nla_fwd = attrs[IPVS_DEST_ATTR_FWD_METHOD];
3059 nla_weight = attrs[IPVS_DEST_ATTR_WEIGHT];
3060 nla_u_thresh = attrs[IPVS_DEST_ATTR_U_THRESH];
3061 nla_l_thresh = attrs[IPVS_DEST_ATTR_L_THRESH];
3062
3063 if (!(nla_fwd && nla_weight && nla_u_thresh && nla_l_thresh))
3064 return -EINVAL;
3065
3066 udest->conn_flags = nla_get_u32(nla_fwd)
3067 & IP_VS_CONN_F_FWD_MASK;
3068 udest->weight = nla_get_u32(nla_weight);
3069 udest->u_threshold = nla_get_u32(nla_u_thresh);
3070 udest->l_threshold = nla_get_u32(nla_l_thresh);
3071 }
3072
3073 return 0;
3074}
3075
3076static int ip_vs_genl_fill_daemon(struct sk_buff *skb, __be32 state,
3077 const char *mcast_ifn, __be32 syncid)
3078{
3079 struct nlattr *nl_daemon;
3080
3081 nl_daemon = nla_nest_start(skb, IPVS_CMD_ATTR_DAEMON);
3082 if (!nl_daemon)
3083 return -EMSGSIZE;
3084
3085 NLA_PUT_U32(skb, IPVS_DAEMON_ATTR_STATE, state);
3086 NLA_PUT_STRING(skb, IPVS_DAEMON_ATTR_MCAST_IFN, mcast_ifn);
3087 NLA_PUT_U32(skb, IPVS_DAEMON_ATTR_SYNC_ID, syncid);
3088
3089 nla_nest_end(skb, nl_daemon);
3090
3091 return 0;
3092
3093nla_put_failure:
3094 nla_nest_cancel(skb, nl_daemon);
3095 return -EMSGSIZE;
3096}
3097
3098static int ip_vs_genl_dump_daemon(struct sk_buff *skb, __be32 state,
3099 const char *mcast_ifn, __be32 syncid,
3100 struct netlink_callback *cb)
3101{
3102 void *hdr;
3103 hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
3104 &ip_vs_genl_family, NLM_F_MULTI,
3105 IPVS_CMD_NEW_DAEMON);
3106 if (!hdr)
3107 return -EMSGSIZE;
3108
3109 if (ip_vs_genl_fill_daemon(skb, state, mcast_ifn, syncid))
3110 goto nla_put_failure;
3111
3112 return genlmsg_end(skb, hdr);
3113
3114nla_put_failure:
3115 genlmsg_cancel(skb, hdr);
3116 return -EMSGSIZE;
3117}
3118
3119static int ip_vs_genl_dump_daemons(struct sk_buff *skb,
3120 struct netlink_callback *cb)
3121{
Hans Schillstromf1313152011-01-03 14:44:55 +01003122 struct net *net = skb_net(skb);
3123 struct netns_ipvs *ipvs = net_ipvs(net);
3124
Julius Volz9a812192008-08-14 14:08:44 +02003125 mutex_lock(&__ip_vs_mutex);
Hans Schillstromf1313152011-01-03 14:44:55 +01003126 if ((ipvs->sync_state & IP_VS_STATE_MASTER) && !cb->args[0]) {
Julius Volz9a812192008-08-14 14:08:44 +02003127 if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_MASTER,
Hans Schillstromf1313152011-01-03 14:44:55 +01003128 ipvs->master_mcast_ifn,
3129 ipvs->master_syncid, cb) < 0)
Julius Volz9a812192008-08-14 14:08:44 +02003130 goto nla_put_failure;
3131
3132 cb->args[0] = 1;
3133 }
3134
Hans Schillstromf1313152011-01-03 14:44:55 +01003135 if ((ipvs->sync_state & IP_VS_STATE_BACKUP) && !cb->args[1]) {
Julius Volz9a812192008-08-14 14:08:44 +02003136 if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_BACKUP,
Hans Schillstromf1313152011-01-03 14:44:55 +01003137 ipvs->backup_mcast_ifn,
3138 ipvs->backup_syncid, cb) < 0)
Julius Volz9a812192008-08-14 14:08:44 +02003139 goto nla_put_failure;
3140
3141 cb->args[1] = 1;
3142 }
3143
3144nla_put_failure:
3145 mutex_unlock(&__ip_vs_mutex);
3146
3147 return skb->len;
3148}
3149
Hans Schillstromf1313152011-01-03 14:44:55 +01003150static int ip_vs_genl_new_daemon(struct net *net, struct nlattr **attrs)
Julius Volz9a812192008-08-14 14:08:44 +02003151{
3152 if (!(attrs[IPVS_DAEMON_ATTR_STATE] &&
3153 attrs[IPVS_DAEMON_ATTR_MCAST_IFN] &&
3154 attrs[IPVS_DAEMON_ATTR_SYNC_ID]))
3155 return -EINVAL;
3156
Hans Schillstromf1313152011-01-03 14:44:55 +01003157 return start_sync_thread(net,
3158 nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]),
Julius Volz9a812192008-08-14 14:08:44 +02003159 nla_data(attrs[IPVS_DAEMON_ATTR_MCAST_IFN]),
3160 nla_get_u32(attrs[IPVS_DAEMON_ATTR_SYNC_ID]));
3161}
3162
Hans Schillstromf1313152011-01-03 14:44:55 +01003163static int ip_vs_genl_del_daemon(struct net *net, struct nlattr **attrs)
Julius Volz9a812192008-08-14 14:08:44 +02003164{
3165 if (!attrs[IPVS_DAEMON_ATTR_STATE])
3166 return -EINVAL;
3167
Hans Schillstromf1313152011-01-03 14:44:55 +01003168 return stop_sync_thread(net,
3169 nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]));
Julius Volz9a812192008-08-14 14:08:44 +02003170}
3171
Hans Schillstrom93304192011-01-03 14:44:51 +01003172static int ip_vs_genl_set_config(struct net *net, struct nlattr **attrs)
Julius Volz9a812192008-08-14 14:08:44 +02003173{
3174 struct ip_vs_timeout_user t;
3175
Hans Schillstrom93304192011-01-03 14:44:51 +01003176 __ip_vs_get_timeouts(net, &t);
Julius Volz9a812192008-08-14 14:08:44 +02003177
3178 if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP])
3179 t.tcp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP]);
3180
3181 if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN])
3182 t.tcp_fin_timeout =
3183 nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN]);
3184
3185 if (attrs[IPVS_CMD_ATTR_TIMEOUT_UDP])
3186 t.udp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_UDP]);
3187
Hans Schillstrom93304192011-01-03 14:44:51 +01003188 return ip_vs_set_timeout(net, &t);
Julius Volz9a812192008-08-14 14:08:44 +02003189}
3190
3191static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
3192{
3193 struct ip_vs_service *svc = NULL;
Julius Volzc860c6b2008-09-02 15:55:36 +02003194 struct ip_vs_service_user_kern usvc;
3195 struct ip_vs_dest_user_kern udest;
Julius Volz9a812192008-08-14 14:08:44 +02003196 int ret = 0, cmd;
3197 int need_full_svc = 0, need_full_dest = 0;
Hans Schillstromfc723252011-01-03 14:44:43 +01003198 struct net *net;
Hans Schillstroma0840e22011-01-03 14:44:58 +01003199 struct netns_ipvs *ipvs;
Julius Volz9a812192008-08-14 14:08:44 +02003200
Hans Schillstromfc723252011-01-03 14:44:43 +01003201 net = skb_sknet(skb);
Hans Schillstroma0840e22011-01-03 14:44:58 +01003202 ipvs = net_ipvs(net);
Julius Volz9a812192008-08-14 14:08:44 +02003203 cmd = info->genlhdr->cmd;
3204
3205 mutex_lock(&__ip_vs_mutex);
3206
3207 if (cmd == IPVS_CMD_FLUSH) {
Hans Schillstromfc723252011-01-03 14:44:43 +01003208 ret = ip_vs_flush(net);
Julius Volz9a812192008-08-14 14:08:44 +02003209 goto out;
3210 } else if (cmd == IPVS_CMD_SET_CONFIG) {
Hans Schillstrom93304192011-01-03 14:44:51 +01003211 ret = ip_vs_genl_set_config(net, info->attrs);
Julius Volz9a812192008-08-14 14:08:44 +02003212 goto out;
3213 } else if (cmd == IPVS_CMD_NEW_DAEMON ||
3214 cmd == IPVS_CMD_DEL_DAEMON) {
3215
3216 struct nlattr *daemon_attrs[IPVS_DAEMON_ATTR_MAX + 1];
3217
3218 if (!info->attrs[IPVS_CMD_ATTR_DAEMON] ||
3219 nla_parse_nested(daemon_attrs, IPVS_DAEMON_ATTR_MAX,
3220 info->attrs[IPVS_CMD_ATTR_DAEMON],
3221 ip_vs_daemon_policy)) {
3222 ret = -EINVAL;
3223 goto out;
3224 }
3225
3226 if (cmd == IPVS_CMD_NEW_DAEMON)
Hans Schillstromf1313152011-01-03 14:44:55 +01003227 ret = ip_vs_genl_new_daemon(net, daemon_attrs);
Julius Volz9a812192008-08-14 14:08:44 +02003228 else
Hans Schillstromf1313152011-01-03 14:44:55 +01003229 ret = ip_vs_genl_del_daemon(net, daemon_attrs);
Julius Volz9a812192008-08-14 14:08:44 +02003230 goto out;
3231 } else if (cmd == IPVS_CMD_ZERO &&
3232 !info->attrs[IPVS_CMD_ATTR_SERVICE]) {
Hans Schillstromfc723252011-01-03 14:44:43 +01003233 ret = ip_vs_zero_all(net);
Julius Volz9a812192008-08-14 14:08:44 +02003234 goto out;
3235 }
3236
3237 /* All following commands require a service argument, so check if we
3238 * received a valid one. We need a full service specification when
3239 * adding / editing a service. Only identifying members otherwise. */
3240 if (cmd == IPVS_CMD_NEW_SERVICE || cmd == IPVS_CMD_SET_SERVICE)
3241 need_full_svc = 1;
3242
Hans Schillstromfc723252011-01-03 14:44:43 +01003243 ret = ip_vs_genl_parse_service(net, &usvc,
Julius Volz9a812192008-08-14 14:08:44 +02003244 info->attrs[IPVS_CMD_ATTR_SERVICE],
Julian Anastasov26c15cf2010-09-21 18:12:30 +02003245 need_full_svc, &svc);
Julius Volz9a812192008-08-14 14:08:44 +02003246 if (ret)
3247 goto out;
3248
Julius Volz9a812192008-08-14 14:08:44 +02003249 /* Unless we're adding a new service, the service must already exist */
3250 if ((cmd != IPVS_CMD_NEW_SERVICE) && (svc == NULL)) {
3251 ret = -ESRCH;
3252 goto out;
3253 }
3254
3255 /* Destination commands require a valid destination argument. For
3256 * adding / editing a destination, we need a full destination
3257 * specification. */
3258 if (cmd == IPVS_CMD_NEW_DEST || cmd == IPVS_CMD_SET_DEST ||
3259 cmd == IPVS_CMD_DEL_DEST) {
3260 if (cmd != IPVS_CMD_DEL_DEST)
3261 need_full_dest = 1;
3262
3263 ret = ip_vs_genl_parse_dest(&udest,
3264 info->attrs[IPVS_CMD_ATTR_DEST],
3265 need_full_dest);
3266 if (ret)
3267 goto out;
3268 }
3269
3270 switch (cmd) {
3271 case IPVS_CMD_NEW_SERVICE:
3272 if (svc == NULL)
Hans Schillstromfc723252011-01-03 14:44:43 +01003273 ret = ip_vs_add_service(net, &usvc, &svc);
Julius Volz9a812192008-08-14 14:08:44 +02003274 else
3275 ret = -EEXIST;
3276 break;
3277 case IPVS_CMD_SET_SERVICE:
3278 ret = ip_vs_edit_service(svc, &usvc);
3279 break;
3280 case IPVS_CMD_DEL_SERVICE:
3281 ret = ip_vs_del_service(svc);
Julian Anastasov26c15cf2010-09-21 18:12:30 +02003282 /* do not use svc, it can be freed */
Julius Volz9a812192008-08-14 14:08:44 +02003283 break;
3284 case IPVS_CMD_NEW_DEST:
3285 ret = ip_vs_add_dest(svc, &udest);
3286 break;
3287 case IPVS_CMD_SET_DEST:
3288 ret = ip_vs_edit_dest(svc, &udest);
3289 break;
3290 case IPVS_CMD_DEL_DEST:
3291 ret = ip_vs_del_dest(svc, &udest);
3292 break;
3293 case IPVS_CMD_ZERO:
3294 ret = ip_vs_zero_service(svc);
3295 break;
3296 default:
3297 ret = -EINVAL;
3298 }
3299
3300out:
Julius Volz9a812192008-08-14 14:08:44 +02003301 mutex_unlock(&__ip_vs_mutex);
3302
3303 return ret;
3304}
3305
3306static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info)
3307{
3308 struct sk_buff *msg;
3309 void *reply;
3310 int ret, cmd, reply_cmd;
Hans Schillstromfc723252011-01-03 14:44:43 +01003311 struct net *net;
Hans Schillstroma0840e22011-01-03 14:44:58 +01003312 struct netns_ipvs *ipvs;
Julius Volz9a812192008-08-14 14:08:44 +02003313
Hans Schillstromfc723252011-01-03 14:44:43 +01003314 net = skb_sknet(skb);
Hans Schillstroma0840e22011-01-03 14:44:58 +01003315 ipvs = net_ipvs(net);
Julius Volz9a812192008-08-14 14:08:44 +02003316 cmd = info->genlhdr->cmd;
3317
3318 if (cmd == IPVS_CMD_GET_SERVICE)
3319 reply_cmd = IPVS_CMD_NEW_SERVICE;
3320 else if (cmd == IPVS_CMD_GET_INFO)
3321 reply_cmd = IPVS_CMD_SET_INFO;
3322 else if (cmd == IPVS_CMD_GET_CONFIG)
3323 reply_cmd = IPVS_CMD_SET_CONFIG;
3324 else {
Hannes Eder1e3e2382009-08-02 11:05:41 +00003325 pr_err("unknown Generic Netlink command\n");
Julius Volz9a812192008-08-14 14:08:44 +02003326 return -EINVAL;
3327 }
3328
3329 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
3330 if (!msg)
3331 return -ENOMEM;
3332
3333 mutex_lock(&__ip_vs_mutex);
3334
3335 reply = genlmsg_put_reply(msg, info, &ip_vs_genl_family, 0, reply_cmd);
3336 if (reply == NULL)
3337 goto nla_put_failure;
3338
3339 switch (cmd) {
3340 case IPVS_CMD_GET_SERVICE:
3341 {
3342 struct ip_vs_service *svc;
3343
Hans Schillstromfc723252011-01-03 14:44:43 +01003344 svc = ip_vs_genl_find_service(net,
3345 info->attrs[IPVS_CMD_ATTR_SERVICE]);
Julius Volz9a812192008-08-14 14:08:44 +02003346 if (IS_ERR(svc)) {
3347 ret = PTR_ERR(svc);
3348 goto out_err;
3349 } else if (svc) {
3350 ret = ip_vs_genl_fill_service(msg, svc);
Julius Volz9a812192008-08-14 14:08:44 +02003351 if (ret)
3352 goto nla_put_failure;
3353 } else {
3354 ret = -ESRCH;
3355 goto out_err;
3356 }
3357
3358 break;
3359 }
3360
3361 case IPVS_CMD_GET_CONFIG:
3362 {
3363 struct ip_vs_timeout_user t;
3364
Hans Schillstrom93304192011-01-03 14:44:51 +01003365 __ip_vs_get_timeouts(net, &t);
Julius Volz9a812192008-08-14 14:08:44 +02003366#ifdef CONFIG_IP_VS_PROTO_TCP
3367 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP, t.tcp_timeout);
3368 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP_FIN,
3369 t.tcp_fin_timeout);
3370#endif
3371#ifdef CONFIG_IP_VS_PROTO_UDP
3372 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_UDP, t.udp_timeout);
3373#endif
3374
3375 break;
3376 }
3377
3378 case IPVS_CMD_GET_INFO:
3379 NLA_PUT_U32(msg, IPVS_INFO_ATTR_VERSION, IP_VS_VERSION_CODE);
3380 NLA_PUT_U32(msg, IPVS_INFO_ATTR_CONN_TAB_SIZE,
Catalin(ux) M. BOIE6f7edb42010-01-05 05:50:24 +01003381 ip_vs_conn_tab_size);
Julius Volz9a812192008-08-14 14:08:44 +02003382 break;
3383 }
3384
3385 genlmsg_end(msg, reply);
Johannes Berg134e6372009-07-10 09:51:34 +00003386 ret = genlmsg_reply(msg, info);
Julius Volz9a812192008-08-14 14:08:44 +02003387 goto out;
3388
3389nla_put_failure:
Hannes Eder1e3e2382009-08-02 11:05:41 +00003390 pr_err("not enough space in Netlink message\n");
Julius Volz9a812192008-08-14 14:08:44 +02003391 ret = -EMSGSIZE;
3392
3393out_err:
3394 nlmsg_free(msg);
3395out:
3396 mutex_unlock(&__ip_vs_mutex);
3397
3398 return ret;
3399}
3400
3401
3402static struct genl_ops ip_vs_genl_ops[] __read_mostly = {
3403 {
3404 .cmd = IPVS_CMD_NEW_SERVICE,
3405 .flags = GENL_ADMIN_PERM,
3406 .policy = ip_vs_cmd_policy,
3407 .doit = ip_vs_genl_set_cmd,
3408 },
3409 {
3410 .cmd = IPVS_CMD_SET_SERVICE,
3411 .flags = GENL_ADMIN_PERM,
3412 .policy = ip_vs_cmd_policy,
3413 .doit = ip_vs_genl_set_cmd,
3414 },
3415 {
3416 .cmd = IPVS_CMD_DEL_SERVICE,
3417 .flags = GENL_ADMIN_PERM,
3418 .policy = ip_vs_cmd_policy,
3419 .doit = ip_vs_genl_set_cmd,
3420 },
3421 {
3422 .cmd = IPVS_CMD_GET_SERVICE,
3423 .flags = GENL_ADMIN_PERM,
3424 .doit = ip_vs_genl_get_cmd,
3425 .dumpit = ip_vs_genl_dump_services,
3426 .policy = ip_vs_cmd_policy,
3427 },
3428 {
3429 .cmd = IPVS_CMD_NEW_DEST,
3430 .flags = GENL_ADMIN_PERM,
3431 .policy = ip_vs_cmd_policy,
3432 .doit = ip_vs_genl_set_cmd,
3433 },
3434 {
3435 .cmd = IPVS_CMD_SET_DEST,
3436 .flags = GENL_ADMIN_PERM,
3437 .policy = ip_vs_cmd_policy,
3438 .doit = ip_vs_genl_set_cmd,
3439 },
3440 {
3441 .cmd = IPVS_CMD_DEL_DEST,
3442 .flags = GENL_ADMIN_PERM,
3443 .policy = ip_vs_cmd_policy,
3444 .doit = ip_vs_genl_set_cmd,
3445 },
3446 {
3447 .cmd = IPVS_CMD_GET_DEST,
3448 .flags = GENL_ADMIN_PERM,
3449 .policy = ip_vs_cmd_policy,
3450 .dumpit = ip_vs_genl_dump_dests,
3451 },
3452 {
3453 .cmd = IPVS_CMD_NEW_DAEMON,
3454 .flags = GENL_ADMIN_PERM,
3455 .policy = ip_vs_cmd_policy,
3456 .doit = ip_vs_genl_set_cmd,
3457 },
3458 {
3459 .cmd = IPVS_CMD_DEL_DAEMON,
3460 .flags = GENL_ADMIN_PERM,
3461 .policy = ip_vs_cmd_policy,
3462 .doit = ip_vs_genl_set_cmd,
3463 },
3464 {
3465 .cmd = IPVS_CMD_GET_DAEMON,
3466 .flags = GENL_ADMIN_PERM,
3467 .dumpit = ip_vs_genl_dump_daemons,
3468 },
3469 {
3470 .cmd = IPVS_CMD_SET_CONFIG,
3471 .flags = GENL_ADMIN_PERM,
3472 .policy = ip_vs_cmd_policy,
3473 .doit = ip_vs_genl_set_cmd,
3474 },
3475 {
3476 .cmd = IPVS_CMD_GET_CONFIG,
3477 .flags = GENL_ADMIN_PERM,
3478 .doit = ip_vs_genl_get_cmd,
3479 },
3480 {
3481 .cmd = IPVS_CMD_GET_INFO,
3482 .flags = GENL_ADMIN_PERM,
3483 .doit = ip_vs_genl_get_cmd,
3484 },
3485 {
3486 .cmd = IPVS_CMD_ZERO,
3487 .flags = GENL_ADMIN_PERM,
3488 .policy = ip_vs_cmd_policy,
3489 .doit = ip_vs_genl_set_cmd,
3490 },
3491 {
3492 .cmd = IPVS_CMD_FLUSH,
3493 .flags = GENL_ADMIN_PERM,
3494 .doit = ip_vs_genl_set_cmd,
3495 },
3496};
3497
3498static int __init ip_vs_genl_register(void)
3499{
Michał Mirosław8f698d52009-05-21 10:34:05 +00003500 return genl_register_family_with_ops(&ip_vs_genl_family,
3501 ip_vs_genl_ops, ARRAY_SIZE(ip_vs_genl_ops));
Julius Volz9a812192008-08-14 14:08:44 +02003502}
3503
3504static void ip_vs_genl_unregister(void)
3505{
3506 genl_unregister_family(&ip_vs_genl_family);
3507}
3508
3509/* End of Generic Netlink interface definitions */
3510
Hans Schillstrom61b1ab42011-01-03 14:44:42 +01003511/*
3512 * per netns intit/exit func.
3513 */
3514int __net_init __ip_vs_control_init(struct net *net)
3515{
Hans Schillstromfc723252011-01-03 14:44:43 +01003516 int idx;
3517 struct netns_ipvs *ipvs = net_ipvs(net);
Hans Schillstroma0840e22011-01-03 14:44:58 +01003518 struct ctl_table *tbl;
Hans Schillstromfc723252011-01-03 14:44:43 +01003519
Hans Schillstroma0840e22011-01-03 14:44:58 +01003520 atomic_set(&ipvs->dropentry, 0);
3521 spin_lock_init(&ipvs->dropentry_lock);
3522 spin_lock_init(&ipvs->droppacket_lock);
3523 spin_lock_init(&ipvs->securetcp_lock);
3524 ipvs->rs_lock = __RW_LOCK_UNLOCKED(ipvs->rs_lock);
3525
3526 /* Initialize rs_table */
3527 for (idx = 0; idx < IP_VS_RTAB_SIZE; idx++)
3528 INIT_LIST_HEAD(&ipvs->rs_table[idx]);
3529
Hans Schillstromf2431e62011-01-03 14:45:00 +01003530 INIT_LIST_HEAD(&ipvs->dest_trash);
Hans Schillstrom763f8d02011-01-03 14:45:01 +01003531 atomic_set(&ipvs->ftpsvc_counter, 0);
3532 atomic_set(&ipvs->nullsvc_counter, 0);
Hans Schillstromf2431e62011-01-03 14:45:00 +01003533
Hans Schillstromb17fc992011-01-03 14:44:56 +01003534 /* procfs stats */
Julian Anastasov2a0751a2011-03-04 12:20:35 +02003535 ipvs->tot_stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats);
3536 if (!ipvs->tot_stats.cpustats) {
Hans Schillstromb17fc992011-01-03 14:44:56 +01003537 pr_err("%s() alloc_percpu failed\n", __func__);
3538 goto err_alloc;
3539 }
Julian Anastasov2a0751a2011-03-04 12:20:35 +02003540 spin_lock_init(&ipvs->tot_stats.lock);
Hans Schillstrom61b1ab42011-01-03 14:44:42 +01003541
3542 proc_net_fops_create(net, "ip_vs", 0, &ip_vs_info_fops);
3543 proc_net_fops_create(net, "ip_vs_stats", 0, &ip_vs_stats_fops);
Hans Schillstromb17fc992011-01-03 14:44:56 +01003544 proc_net_fops_create(net, "ip_vs_stats_percpu", 0,
3545 &ip_vs_stats_percpu_fops);
Hans Schillstroma0840e22011-01-03 14:44:58 +01003546
3547 if (!net_eq(net, &init_net)) {
3548 tbl = kmemdup(vs_vars, sizeof(vs_vars), GFP_KERNEL);
3549 if (tbl == NULL)
3550 goto err_dup;
3551 } else
3552 tbl = vs_vars;
3553 /* Initialize sysctl defaults */
3554 idx = 0;
3555 ipvs->sysctl_amemthresh = 1024;
3556 tbl[idx++].data = &ipvs->sysctl_amemthresh;
3557 ipvs->sysctl_am_droprate = 10;
3558 tbl[idx++].data = &ipvs->sysctl_am_droprate;
3559 tbl[idx++].data = &ipvs->sysctl_drop_entry;
3560 tbl[idx++].data = &ipvs->sysctl_drop_packet;
3561#ifdef CONFIG_IP_VS_NFCT
3562 tbl[idx++].data = &ipvs->sysctl_conntrack;
3563#endif
3564 tbl[idx++].data = &ipvs->sysctl_secure_tcp;
3565 ipvs->sysctl_snat_reroute = 1;
3566 tbl[idx++].data = &ipvs->sysctl_snat_reroute;
3567 ipvs->sysctl_sync_ver = 1;
3568 tbl[idx++].data = &ipvs->sysctl_sync_ver;
3569 tbl[idx++].data = &ipvs->sysctl_cache_bypass;
3570 tbl[idx++].data = &ipvs->sysctl_expire_nodest_conn;
3571 tbl[idx++].data = &ipvs->sysctl_expire_quiescent_template;
Simon Horman59e03502011-02-04 18:33:01 +09003572 ipvs->sysctl_sync_threshold[0] = DEFAULT_SYNC_THRESHOLD;
3573 ipvs->sysctl_sync_threshold[1] = DEFAULT_SYNC_PERIOD;
Hans Schillstroma0840e22011-01-03 14:44:58 +01003574 tbl[idx].data = &ipvs->sysctl_sync_threshold;
3575 tbl[idx++].maxlen = sizeof(ipvs->sysctl_sync_threshold);
3576 tbl[idx++].data = &ipvs->sysctl_nat_icmp_send;
3577
3578
Simon Horman04439292011-02-01 18:29:04 +01003579#ifdef CONFIG_SYSCTL
Hans Schillstroma0840e22011-01-03 14:44:58 +01003580 ipvs->sysctl_hdr = register_net_sysctl_table(net, net_vs_ctl_path,
Hans Schillstrom07924702011-01-24 15:14:41 +01003581 tbl);
Simon Horman04439292011-02-01 18:29:04 +01003582 if (ipvs->sysctl_hdr == NULL) {
3583 if (!net_eq(net, &init_net))
3584 kfree(tbl);
3585 goto err_dup;
3586 }
3587#endif
Julian Anastasov6ef757f2011-03-14 01:44:28 +02003588 ip_vs_start_estimator(net, &ipvs->tot_stats);
Hans Schillstroma0840e22011-01-03 14:44:58 +01003589 ipvs->sysctl_tbl = tbl;
Hans Schillstromf6340ee2011-01-03 14:44:59 +01003590 /* Schedule defense work */
3591 INIT_DELAYED_WORK(&ipvs->defense_work, defense_work_handler);
3592 schedule_delayed_work(&ipvs->defense_work, DEFENSE_TIMER_PERIOD);
Hans Schillstrom61b1ab42011-01-03 14:44:42 +01003593 return 0;
3594
Hans Schillstroma0840e22011-01-03 14:44:58 +01003595err_dup:
Julian Anastasov2a0751a2011-03-04 12:20:35 +02003596 free_percpu(ipvs->tot_stats.cpustats);
Hans Schillstromb17fc992011-01-03 14:44:56 +01003597err_alloc:
Hans Schillstrom61b1ab42011-01-03 14:44:42 +01003598 return -ENOMEM;
3599}
3600
3601static void __net_exit __ip_vs_control_cleanup(struct net *net)
3602{
Hans Schillstromb17fc992011-01-03 14:44:56 +01003603 struct netns_ipvs *ipvs = net_ipvs(net);
3604
Hans Schillstromf2431e62011-01-03 14:45:00 +01003605 ip_vs_trash_cleanup(net);
Julian Anastasov6ef757f2011-03-14 01:44:28 +02003606 ip_vs_stop_estimator(net, &ipvs->tot_stats);
Hans Schillstromf2431e62011-01-03 14:45:00 +01003607 cancel_delayed_work_sync(&ipvs->defense_work);
3608 cancel_work_sync(&ipvs->defense_work.work);
Simon Horman04439292011-02-01 18:29:04 +01003609#ifdef CONFIG_SYSCTL
Hans Schillstroma0840e22011-01-03 14:44:58 +01003610 unregister_net_sysctl_table(ipvs->sysctl_hdr);
Simon Horman04439292011-02-01 18:29:04 +01003611#endif
Hans Schillstromb17fc992011-01-03 14:44:56 +01003612 proc_net_remove(net, "ip_vs_stats_percpu");
Hans Schillstrom61b1ab42011-01-03 14:44:42 +01003613 proc_net_remove(net, "ip_vs_stats");
3614 proc_net_remove(net, "ip_vs");
Julian Anastasov2a0751a2011-03-04 12:20:35 +02003615 free_percpu(ipvs->tot_stats.cpustats);
Hans Schillstrom61b1ab42011-01-03 14:44:42 +01003616}
3617
3618static struct pernet_operations ipvs_control_ops = {
3619 .init = __ip_vs_control_init,
3620 .exit = __ip_vs_control_cleanup,
3621};
Linus Torvalds1da177e2005-04-16 15:20:36 -07003622
Sven Wegener048cf482008-08-10 18:24:35 +00003623int __init ip_vs_control_init(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003624{
Linus Torvalds1da177e2005-04-16 15:20:36 -07003625 int idx;
Hans Schillstromfc723252011-01-03 14:44:43 +01003626 int ret;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003627
3628 EnterFunction(2);
3629
Hans Schillstromfc723252011-01-03 14:44:43 +01003630 /* Initialize svc_table, ip_vs_svc_fwm_table, rs_table */
Eduardo Blancod86bef72010-10-19 10:26:47 +01003631 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
3632 INIT_LIST_HEAD(&ip_vs_svc_table[idx]);
3633 INIT_LIST_HEAD(&ip_vs_svc_fwm_table[idx]);
3634 }
Hans Schillstromfc723252011-01-03 14:44:43 +01003635
3636 ret = register_pernet_subsys(&ipvs_control_ops);
3637 if (ret) {
3638 pr_err("cannot register namespace.\n");
3639 goto err;
Eduardo Blancod86bef72010-10-19 10:26:47 +01003640 }
Hans Schillstromfc723252011-01-03 14:44:43 +01003641
3642 smp_wmb(); /* Do we really need it now ? */
Eduardo Blancod86bef72010-10-19 10:26:47 +01003643
Linus Torvalds1da177e2005-04-16 15:20:36 -07003644 ret = nf_register_sockopt(&ip_vs_sockopts);
3645 if (ret) {
Hannes Eder1e3e2382009-08-02 11:05:41 +00003646 pr_err("cannot register sockopt.\n");
Hans Schillstromfc723252011-01-03 14:44:43 +01003647 goto err_net;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003648 }
3649
Julius Volz9a812192008-08-14 14:08:44 +02003650 ret = ip_vs_genl_register();
3651 if (ret) {
Hannes Eder1e3e2382009-08-02 11:05:41 +00003652 pr_err("cannot register Generic Netlink interface.\n");
Julius Volz9a812192008-08-14 14:08:44 +02003653 nf_unregister_sockopt(&ip_vs_sockopts);
Hans Schillstromfc723252011-01-03 14:44:43 +01003654 goto err_net;
Julius Volz9a812192008-08-14 14:08:44 +02003655 }
3656
Linus Torvalds1da177e2005-04-16 15:20:36 -07003657 LeaveFunction(2);
3658 return 0;
Hans Schillstromfc723252011-01-03 14:44:43 +01003659
3660err_net:
3661 unregister_pernet_subsys(&ipvs_control_ops);
3662err:
3663 return ret;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003664}
3665
3666
3667void ip_vs_control_cleanup(void)
3668{
3669 EnterFunction(2);
Hans Schillstrom61b1ab42011-01-03 14:44:42 +01003670 unregister_pernet_subsys(&ipvs_control_ops);
Julius Volz9a812192008-08-14 14:08:44 +02003671 ip_vs_genl_unregister();
Linus Torvalds1da177e2005-04-16 15:20:36 -07003672 nf_unregister_sockopt(&ip_vs_sockopts);
3673 LeaveFunction(2);
3674}