blob: 804fee7be694b2618b69b3f8956cba89f51d222b [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * IPVS An implementation of the IP virtual server support for the
3 * LINUX operating system. IPVS is now implemented as a module
4 * over the NetFilter framework. IPVS can be used to build a
5 * high-performance and highly available server based on a
6 * cluster of servers.
7 *
Linus Torvalds1da177e2005-04-16 15:20:36 -07008 * Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
9 * Peter Kese <peter.kese@ijs.si>
10 * Julian Anastasov <ja@ssi.bg>
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version
15 * 2 of the License, or (at your option) any later version.
16 *
17 * Changes:
18 *
19 */
20
Hannes Eder9aada7a2009-07-30 14:29:44 -070021#define KMSG_COMPONENT "IPVS"
22#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
23
Linus Torvalds1da177e2005-04-16 15:20:36 -070024#include <linux/module.h>
25#include <linux/init.h>
26#include <linux/types.h>
Randy Dunlap4fc268d2006-01-11 12:17:47 -080027#include <linux/capability.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070028#include <linux/fs.h>
29#include <linux/sysctl.h>
30#include <linux/proc_fs.h>
31#include <linux/workqueue.h>
32#include <linux/swap.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070033#include <linux/seq_file.h>
Tejun Heo5a0e3ad2010-03-24 17:04:11 +090034#include <linux/slab.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070035
36#include <linux/netfilter.h>
37#include <linux/netfilter_ipv4.h>
Ingo Molnar14cc3e22006-03-26 01:37:14 -080038#include <linux/mutex.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070039
Eric W. Biederman457c4cb2007-09-12 12:01:34 +020040#include <net/net_namespace.h>
Hans Schillstrom93304192011-01-03 14:44:51 +010041#include <linux/nsproxy.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070042#include <net/ip.h>
Vince Busam09571c72008-09-02 15:55:52 +020043#ifdef CONFIG_IP_VS_IPV6
44#include <net/ipv6.h>
45#include <net/ip6_route.h>
46#endif
Arnaldo Carvalho de Melo14c85022005-12-27 02:43:12 -020047#include <net/route.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070048#include <net/sock.h>
Julius Volz9a812192008-08-14 14:08:44 +020049#include <net/genetlink.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070050
51#include <asm/uaccess.h>
52
53#include <net/ip_vs.h>
54
55/* semaphore for IPVS sockopts. And, [gs]etsockopt may sleep. */
Ingo Molnar14cc3e22006-03-26 01:37:14 -080056static DEFINE_MUTEX(__ip_vs_mutex);
Linus Torvalds1da177e2005-04-16 15:20:36 -070057
58/* lock for service table */
59static DEFINE_RWLOCK(__ip_vs_svc_lock);
60
Linus Torvalds1da177e2005-04-16 15:20:36 -070061/* sysctl variables */
Linus Torvalds1da177e2005-04-16 15:20:36 -070062
63#ifdef CONFIG_IP_VS_DEBUG
64static int sysctl_ip_vs_debug_level = 0;
65
66int ip_vs_get_debug_level(void)
67{
68 return sysctl_ip_vs_debug_level;
69}
70#endif
71
Vince Busam09571c72008-09-02 15:55:52 +020072#ifdef CONFIG_IP_VS_IPV6
73/* Taken from rt6_fill_node() in net/ipv6/route.c, is there a better way? */
Hans Schillstrom4a984802011-01-03 14:45:02 +010074static int __ip_vs_addr_is_local_v6(struct net *net,
75 const struct in6_addr *addr)
Vince Busam09571c72008-09-02 15:55:52 +020076{
77 struct rt6_info *rt;
78 struct flowi fl = {
79 .oif = 0,
Changli Gao58116622010-11-12 18:43:55 +000080 .fl6_dst = *addr,
81 .fl6_src = { .s6_addr32 = {0, 0, 0, 0} },
Vince Busam09571c72008-09-02 15:55:52 +020082 };
83
Hans Schillstrom4a984802011-01-03 14:45:02 +010084 rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl);
Vince Busam09571c72008-09-02 15:55:52 +020085 if (rt && rt->rt6i_dev && (rt->rt6i_dev->flags & IFF_LOOPBACK))
86 return 1;
87
88 return 0;
89}
90#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -070091/*
Julian Anastasovaf9debd2005-07-11 20:59:57 -070092 * update_defense_level is called from keventd and from sysctl,
93 * so it needs to protect itself from softirqs
Linus Torvalds1da177e2005-04-16 15:20:36 -070094 */
Hans Schillstrom93304192011-01-03 14:44:51 +010095static void update_defense_level(struct netns_ipvs *ipvs)
Linus Torvalds1da177e2005-04-16 15:20:36 -070096{
97 struct sysinfo i;
98 static int old_secure_tcp = 0;
99 int availmem;
100 int nomem;
101 int to_change = -1;
102
103 /* we only count free and buffered memory (in pages) */
104 si_meminfo(&i);
105 availmem = i.freeram + i.bufferram;
106 /* however in linux 2.5 the i.bufferram is total page cache size,
107 we need adjust it */
108 /* si_swapinfo(&i); */
109 /* availmem = availmem - (i.totalswap - i.freeswap); */
110
Hans Schillstroma0840e22011-01-03 14:44:58 +0100111 nomem = (availmem < ipvs->sysctl_amemthresh);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700112
Julian Anastasovaf9debd2005-07-11 20:59:57 -0700113 local_bh_disable();
114
Linus Torvalds1da177e2005-04-16 15:20:36 -0700115 /* drop_entry */
Hans Schillstroma0840e22011-01-03 14:44:58 +0100116 spin_lock(&ipvs->dropentry_lock);
117 switch (ipvs->sysctl_drop_entry) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700118 case 0:
Hans Schillstroma0840e22011-01-03 14:44:58 +0100119 atomic_set(&ipvs->dropentry, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700120 break;
121 case 1:
122 if (nomem) {
Hans Schillstroma0840e22011-01-03 14:44:58 +0100123 atomic_set(&ipvs->dropentry, 1);
124 ipvs->sysctl_drop_entry = 2;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700125 } else {
Hans Schillstroma0840e22011-01-03 14:44:58 +0100126 atomic_set(&ipvs->dropentry, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700127 }
128 break;
129 case 2:
130 if (nomem) {
Hans Schillstroma0840e22011-01-03 14:44:58 +0100131 atomic_set(&ipvs->dropentry, 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700132 } else {
Hans Schillstroma0840e22011-01-03 14:44:58 +0100133 atomic_set(&ipvs->dropentry, 0);
134 ipvs->sysctl_drop_entry = 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700135 };
136 break;
137 case 3:
Hans Schillstroma0840e22011-01-03 14:44:58 +0100138 atomic_set(&ipvs->dropentry, 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700139 break;
140 }
Hans Schillstroma0840e22011-01-03 14:44:58 +0100141 spin_unlock(&ipvs->dropentry_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700142
143 /* drop_packet */
Hans Schillstroma0840e22011-01-03 14:44:58 +0100144 spin_lock(&ipvs->droppacket_lock);
145 switch (ipvs->sysctl_drop_packet) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700146 case 0:
Hans Schillstroma0840e22011-01-03 14:44:58 +0100147 ipvs->drop_rate = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700148 break;
149 case 1:
150 if (nomem) {
Hans Schillstroma0840e22011-01-03 14:44:58 +0100151 ipvs->drop_rate = ipvs->drop_counter
152 = ipvs->sysctl_amemthresh /
153 (ipvs->sysctl_amemthresh-availmem);
154 ipvs->sysctl_drop_packet = 2;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700155 } else {
Hans Schillstroma0840e22011-01-03 14:44:58 +0100156 ipvs->drop_rate = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700157 }
158 break;
159 case 2:
160 if (nomem) {
Hans Schillstroma0840e22011-01-03 14:44:58 +0100161 ipvs->drop_rate = ipvs->drop_counter
162 = ipvs->sysctl_amemthresh /
163 (ipvs->sysctl_amemthresh-availmem);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700164 } else {
Hans Schillstroma0840e22011-01-03 14:44:58 +0100165 ipvs->drop_rate = 0;
166 ipvs->sysctl_drop_packet = 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700167 }
168 break;
169 case 3:
Hans Schillstroma0840e22011-01-03 14:44:58 +0100170 ipvs->drop_rate = ipvs->sysctl_am_droprate;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700171 break;
172 }
Hans Schillstroma0840e22011-01-03 14:44:58 +0100173 spin_unlock(&ipvs->droppacket_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700174
175 /* secure_tcp */
Hans Schillstroma0840e22011-01-03 14:44:58 +0100176 spin_lock(&ipvs->securetcp_lock);
177 switch (ipvs->sysctl_secure_tcp) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700178 case 0:
179 if (old_secure_tcp >= 2)
180 to_change = 0;
181 break;
182 case 1:
183 if (nomem) {
184 if (old_secure_tcp < 2)
185 to_change = 1;
Hans Schillstroma0840e22011-01-03 14:44:58 +0100186 ipvs->sysctl_secure_tcp = 2;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700187 } else {
188 if (old_secure_tcp >= 2)
189 to_change = 0;
190 }
191 break;
192 case 2:
193 if (nomem) {
194 if (old_secure_tcp < 2)
195 to_change = 1;
196 } else {
197 if (old_secure_tcp >= 2)
198 to_change = 0;
Hans Schillstroma0840e22011-01-03 14:44:58 +0100199 ipvs->sysctl_secure_tcp = 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700200 }
201 break;
202 case 3:
203 if (old_secure_tcp < 2)
204 to_change = 1;
205 break;
206 }
Hans Schillstroma0840e22011-01-03 14:44:58 +0100207 old_secure_tcp = ipvs->sysctl_secure_tcp;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700208 if (to_change >= 0)
Hans Schillstrom93304192011-01-03 14:44:51 +0100209 ip_vs_protocol_timeout_change(ipvs,
Hans Schillstroma0840e22011-01-03 14:44:58 +0100210 ipvs->sysctl_secure_tcp > 1);
211 spin_unlock(&ipvs->securetcp_lock);
Julian Anastasovaf9debd2005-07-11 20:59:57 -0700212
213 local_bh_enable();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700214}
215
216
217/*
218 * Timer for checking the defense
219 */
220#define DEFENSE_TIMER_PERIOD 1*HZ
Linus Torvalds1da177e2005-04-16 15:20:36 -0700221
David Howellsc4028952006-11-22 14:57:56 +0000222static void defense_work_handler(struct work_struct *work)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700223{
Hans Schillstromf6340ee2011-01-03 14:44:59 +0100224 struct netns_ipvs *ipvs =
225 container_of(work, struct netns_ipvs, defense_work.work);
Hans Schillstrom93304192011-01-03 14:44:51 +0100226
227 update_defense_level(ipvs);
Hans Schillstroma0840e22011-01-03 14:44:58 +0100228 if (atomic_read(&ipvs->dropentry))
Hans Schillstromf6340ee2011-01-03 14:44:59 +0100229 ip_vs_random_dropentry(ipvs->net);
230 schedule_delayed_work(&ipvs->defense_work, DEFENSE_TIMER_PERIOD);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700231}
232
233int
234ip_vs_use_count_inc(void)
235{
236 return try_module_get(THIS_MODULE);
237}
238
239void
240ip_vs_use_count_dec(void)
241{
242 module_put(THIS_MODULE);
243}
244
245
246/*
247 * Hash table: for virtual service lookups
248 */
249#define IP_VS_SVC_TAB_BITS 8
250#define IP_VS_SVC_TAB_SIZE (1 << IP_VS_SVC_TAB_BITS)
251#define IP_VS_SVC_TAB_MASK (IP_VS_SVC_TAB_SIZE - 1)
252
253/* the service table hashed by <protocol, addr, port> */
254static struct list_head ip_vs_svc_table[IP_VS_SVC_TAB_SIZE];
255/* the service table hashed by fwmark */
256static struct list_head ip_vs_svc_fwm_table[IP_VS_SVC_TAB_SIZE];
257
Linus Torvalds1da177e2005-04-16 15:20:36 -0700258
259/*
260 * Returns hash value for virtual service
261 */
Hans Schillstromfc723252011-01-03 14:44:43 +0100262static inline unsigned
263ip_vs_svc_hashkey(struct net *net, int af, unsigned proto,
264 const union nf_inet_addr *addr, __be16 port)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700265{
266 register unsigned porth = ntohs(port);
Julius Volzb18610d2008-09-02 15:55:37 +0200267 __be32 addr_fold = addr->ip;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700268
Julius Volzb18610d2008-09-02 15:55:37 +0200269#ifdef CONFIG_IP_VS_IPV6
270 if (af == AF_INET6)
271 addr_fold = addr->ip6[0]^addr->ip6[1]^
272 addr->ip6[2]^addr->ip6[3];
273#endif
Hans Schillstromfc723252011-01-03 14:44:43 +0100274 addr_fold ^= ((size_t)net>>8);
Julius Volzb18610d2008-09-02 15:55:37 +0200275
276 return (proto^ntohl(addr_fold)^(porth>>IP_VS_SVC_TAB_BITS)^porth)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700277 & IP_VS_SVC_TAB_MASK;
278}
279
280/*
281 * Returns hash value of fwmark for virtual service lookup
282 */
Hans Schillstromfc723252011-01-03 14:44:43 +0100283static inline unsigned ip_vs_svc_fwm_hashkey(struct net *net, __u32 fwmark)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700284{
Hans Schillstromfc723252011-01-03 14:44:43 +0100285 return (((size_t)net>>8) ^ fwmark) & IP_VS_SVC_TAB_MASK;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700286}
287
288/*
Hans Schillstromfc723252011-01-03 14:44:43 +0100289 * Hashes a service in the ip_vs_svc_table by <netns,proto,addr,port>
Linus Torvalds1da177e2005-04-16 15:20:36 -0700290 * or in the ip_vs_svc_fwm_table by fwmark.
291 * Should be called with locked tables.
292 */
293static int ip_vs_svc_hash(struct ip_vs_service *svc)
294{
295 unsigned hash;
296
297 if (svc->flags & IP_VS_SVC_F_HASHED) {
Hannes Eder1e3e2382009-08-02 11:05:41 +0000298 pr_err("%s(): request for already hashed, called from %pF\n",
299 __func__, __builtin_return_address(0));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700300 return 0;
301 }
302
303 if (svc->fwmark == 0) {
304 /*
Hans Schillstromfc723252011-01-03 14:44:43 +0100305 * Hash it by <netns,protocol,addr,port> in ip_vs_svc_table
Linus Torvalds1da177e2005-04-16 15:20:36 -0700306 */
Hans Schillstromfc723252011-01-03 14:44:43 +0100307 hash = ip_vs_svc_hashkey(svc->net, svc->af, svc->protocol,
308 &svc->addr, svc->port);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700309 list_add(&svc->s_list, &ip_vs_svc_table[hash]);
310 } else {
311 /*
Hans Schillstromfc723252011-01-03 14:44:43 +0100312 * Hash it by fwmark in svc_fwm_table
Linus Torvalds1da177e2005-04-16 15:20:36 -0700313 */
Hans Schillstromfc723252011-01-03 14:44:43 +0100314 hash = ip_vs_svc_fwm_hashkey(svc->net, svc->fwmark);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700315 list_add(&svc->f_list, &ip_vs_svc_fwm_table[hash]);
316 }
317
318 svc->flags |= IP_VS_SVC_F_HASHED;
319 /* increase its refcnt because it is referenced by the svc table */
320 atomic_inc(&svc->refcnt);
321 return 1;
322}
323
324
325/*
Hans Schillstromfc723252011-01-03 14:44:43 +0100326 * Unhashes a service from svc_table / svc_fwm_table.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700327 * Should be called with locked tables.
328 */
329static int ip_vs_svc_unhash(struct ip_vs_service *svc)
330{
331 if (!(svc->flags & IP_VS_SVC_F_HASHED)) {
Hannes Eder1e3e2382009-08-02 11:05:41 +0000332 pr_err("%s(): request for unhash flagged, called from %pF\n",
333 __func__, __builtin_return_address(0));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700334 return 0;
335 }
336
337 if (svc->fwmark == 0) {
Hans Schillstromfc723252011-01-03 14:44:43 +0100338 /* Remove it from the svc_table table */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700339 list_del(&svc->s_list);
340 } else {
Hans Schillstromfc723252011-01-03 14:44:43 +0100341 /* Remove it from the svc_fwm_table table */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700342 list_del(&svc->f_list);
343 }
344
345 svc->flags &= ~IP_VS_SVC_F_HASHED;
346 atomic_dec(&svc->refcnt);
347 return 1;
348}
349
350
351/*
Hans Schillstromfc723252011-01-03 14:44:43 +0100352 * Get service by {netns, proto,addr,port} in the service table.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700353 */
Julius Volzb18610d2008-09-02 15:55:37 +0200354static inline struct ip_vs_service *
Hans Schillstromfc723252011-01-03 14:44:43 +0100355__ip_vs_service_find(struct net *net, int af, __u16 protocol,
356 const union nf_inet_addr *vaddr, __be16 vport)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700357{
358 unsigned hash;
359 struct ip_vs_service *svc;
360
361 /* Check for "full" addressed entries */
Hans Schillstromfc723252011-01-03 14:44:43 +0100362 hash = ip_vs_svc_hashkey(net, af, protocol, vaddr, vport);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700363
364 list_for_each_entry(svc, &ip_vs_svc_table[hash], s_list){
Julius Volzb18610d2008-09-02 15:55:37 +0200365 if ((svc->af == af)
366 && ip_vs_addr_equal(af, &svc->addr, vaddr)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700367 && (svc->port == vport)
Hans Schillstromfc723252011-01-03 14:44:43 +0100368 && (svc->protocol == protocol)
369 && net_eq(svc->net, net)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700370 /* HIT */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700371 return svc;
372 }
373 }
374
375 return NULL;
376}
377
378
379/*
380 * Get service by {fwmark} in the service table.
381 */
Julius Volzb18610d2008-09-02 15:55:37 +0200382static inline struct ip_vs_service *
Hans Schillstromfc723252011-01-03 14:44:43 +0100383__ip_vs_svc_fwm_find(struct net *net, int af, __u32 fwmark)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700384{
385 unsigned hash;
386 struct ip_vs_service *svc;
387
388 /* Check for fwmark addressed entries */
Hans Schillstromfc723252011-01-03 14:44:43 +0100389 hash = ip_vs_svc_fwm_hashkey(net, fwmark);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700390
391 list_for_each_entry(svc, &ip_vs_svc_fwm_table[hash], f_list) {
Hans Schillstromfc723252011-01-03 14:44:43 +0100392 if (svc->fwmark == fwmark && svc->af == af
393 && net_eq(svc->net, net)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700394 /* HIT */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700395 return svc;
396 }
397 }
398
399 return NULL;
400}
401
402struct ip_vs_service *
Hans Schillstromfc723252011-01-03 14:44:43 +0100403ip_vs_service_get(struct net *net, int af, __u32 fwmark, __u16 protocol,
Julius Volz3c2e0502008-09-02 15:55:38 +0200404 const union nf_inet_addr *vaddr, __be16 vport)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700405{
406 struct ip_vs_service *svc;
Hans Schillstrom763f8d02011-01-03 14:45:01 +0100407 struct netns_ipvs *ipvs = net_ipvs(net);
Julius Volz3c2e0502008-09-02 15:55:38 +0200408
Linus Torvalds1da177e2005-04-16 15:20:36 -0700409 read_lock(&__ip_vs_svc_lock);
410
411 /*
412 * Check the table hashed by fwmark first
413 */
Julian Anastasov097fc762011-03-04 12:26:17 +0200414 if (fwmark) {
415 svc = __ip_vs_svc_fwm_find(net, af, fwmark);
416 if (svc)
417 goto out;
418 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700419
420 /*
421 * Check the table hashed by <protocol,addr,port>
422 * for "full" addressed entries
423 */
Hans Schillstromfc723252011-01-03 14:44:43 +0100424 svc = __ip_vs_service_find(net, af, protocol, vaddr, vport);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700425
426 if (svc == NULL
427 && protocol == IPPROTO_TCP
Hans Schillstrom763f8d02011-01-03 14:45:01 +0100428 && atomic_read(&ipvs->ftpsvc_counter)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700429 && (vport == FTPDATA || ntohs(vport) >= PROT_SOCK)) {
430 /*
431 * Check if ftp service entry exists, the packet
432 * might belong to FTP data connections.
433 */
Hans Schillstromfc723252011-01-03 14:44:43 +0100434 svc = __ip_vs_service_find(net, af, protocol, vaddr, FTPPORT);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700435 }
436
437 if (svc == NULL
Hans Schillstrom763f8d02011-01-03 14:45:01 +0100438 && atomic_read(&ipvs->nullsvc_counter)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700439 /*
440 * Check if the catch-all port (port zero) exists
441 */
Hans Schillstromfc723252011-01-03 14:44:43 +0100442 svc = __ip_vs_service_find(net, af, protocol, vaddr, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700443 }
444
445 out:
Julian Anastasov26c15cf2010-09-21 18:12:30 +0200446 if (svc)
447 atomic_inc(&svc->usecnt);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700448 read_unlock(&__ip_vs_svc_lock);
449
Julius Volz3c2e0502008-09-02 15:55:38 +0200450 IP_VS_DBG_BUF(9, "lookup service: fwm %u %s %s:%u %s\n",
451 fwmark, ip_vs_proto_name(protocol),
452 IP_VS_DBG_ADDR(af, vaddr), ntohs(vport),
453 svc ? "hit" : "not hit");
Linus Torvalds1da177e2005-04-16 15:20:36 -0700454
455 return svc;
456}
457
458
459static inline void
460__ip_vs_bind_svc(struct ip_vs_dest *dest, struct ip_vs_service *svc)
461{
462 atomic_inc(&svc->refcnt);
463 dest->svc = svc;
464}
465
Julian Anastasov26c15cf2010-09-21 18:12:30 +0200466static void
Linus Torvalds1da177e2005-04-16 15:20:36 -0700467__ip_vs_unbind_svc(struct ip_vs_dest *dest)
468{
469 struct ip_vs_service *svc = dest->svc;
470
471 dest->svc = NULL;
Julian Anastasov26c15cf2010-09-21 18:12:30 +0200472 if (atomic_dec_and_test(&svc->refcnt)) {
473 IP_VS_DBG_BUF(3, "Removing service %u/%s:%u usecnt=%d\n",
474 svc->fwmark,
475 IP_VS_DBG_ADDR(svc->af, &svc->addr),
476 ntohs(svc->port), atomic_read(&svc->usecnt));
Hans Schillstromb17fc992011-01-03 14:44:56 +0100477 free_percpu(svc->stats.cpustats);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700478 kfree(svc);
Julian Anastasov26c15cf2010-09-21 18:12:30 +0200479 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700480}
481
482
483/*
484 * Returns hash value for real service
485 */
Julius Volz7937df12008-09-02 15:55:48 +0200486static inline unsigned ip_vs_rs_hashkey(int af,
487 const union nf_inet_addr *addr,
488 __be16 port)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700489{
490 register unsigned porth = ntohs(port);
Julius Volz7937df12008-09-02 15:55:48 +0200491 __be32 addr_fold = addr->ip;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700492
Julius Volz7937df12008-09-02 15:55:48 +0200493#ifdef CONFIG_IP_VS_IPV6
494 if (af == AF_INET6)
495 addr_fold = addr->ip6[0]^addr->ip6[1]^
496 addr->ip6[2]^addr->ip6[3];
497#endif
498
499 return (ntohl(addr_fold)^(porth>>IP_VS_RTAB_BITS)^porth)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700500 & IP_VS_RTAB_MASK;
501}
502
503/*
Hans Schillstromfc723252011-01-03 14:44:43 +0100504 * Hashes ip_vs_dest in rs_table by <proto,addr,port>.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700505 * should be called with locked tables.
506 */
Hans Schillstromfc723252011-01-03 14:44:43 +0100507static int ip_vs_rs_hash(struct netns_ipvs *ipvs, struct ip_vs_dest *dest)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700508{
509 unsigned hash;
510
511 if (!list_empty(&dest->d_list)) {
512 return 0;
513 }
514
515 /*
516 * Hash by proto,addr,port,
517 * which are the parameters of the real service.
518 */
Julius Volz7937df12008-09-02 15:55:48 +0200519 hash = ip_vs_rs_hashkey(dest->af, &dest->addr, dest->port);
520
Hans Schillstromfc723252011-01-03 14:44:43 +0100521 list_add(&dest->d_list, &ipvs->rs_table[hash]);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700522
523 return 1;
524}
525
526/*
Hans Schillstromfc723252011-01-03 14:44:43 +0100527 * UNhashes ip_vs_dest from rs_table.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700528 * should be called with locked tables.
529 */
530static int ip_vs_rs_unhash(struct ip_vs_dest *dest)
531{
532 /*
Hans Schillstromfc723252011-01-03 14:44:43 +0100533 * Remove it from the rs_table table.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700534 */
535 if (!list_empty(&dest->d_list)) {
536 list_del(&dest->d_list);
537 INIT_LIST_HEAD(&dest->d_list);
538 }
539
540 return 1;
541}
542
543/*
544 * Lookup real service by <proto,addr,port> in the real service table.
545 */
546struct ip_vs_dest *
Hans Schillstromfc723252011-01-03 14:44:43 +0100547ip_vs_lookup_real_service(struct net *net, int af, __u16 protocol,
Julius Volz7937df12008-09-02 15:55:48 +0200548 const union nf_inet_addr *daddr,
549 __be16 dport)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700550{
Hans Schillstromfc723252011-01-03 14:44:43 +0100551 struct netns_ipvs *ipvs = net_ipvs(net);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700552 unsigned hash;
553 struct ip_vs_dest *dest;
554
555 /*
556 * Check for "full" addressed entries
557 * Return the first found entry
558 */
Julius Volz7937df12008-09-02 15:55:48 +0200559 hash = ip_vs_rs_hashkey(af, daddr, dport);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700560
Hans Schillstroma0840e22011-01-03 14:44:58 +0100561 read_lock(&ipvs->rs_lock);
Hans Schillstromfc723252011-01-03 14:44:43 +0100562 list_for_each_entry(dest, &ipvs->rs_table[hash], d_list) {
Julius Volz7937df12008-09-02 15:55:48 +0200563 if ((dest->af == af)
564 && ip_vs_addr_equal(af, &dest->addr, daddr)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700565 && (dest->port == dport)
566 && ((dest->protocol == protocol) ||
567 dest->vfwmark)) {
568 /* HIT */
Hans Schillstroma0840e22011-01-03 14:44:58 +0100569 read_unlock(&ipvs->rs_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700570 return dest;
571 }
572 }
Hans Schillstroma0840e22011-01-03 14:44:58 +0100573 read_unlock(&ipvs->rs_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700574
575 return NULL;
576}
577
578/*
579 * Lookup destination by {addr,port} in the given service
580 */
581static struct ip_vs_dest *
Julius Volz7937df12008-09-02 15:55:48 +0200582ip_vs_lookup_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
583 __be16 dport)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700584{
585 struct ip_vs_dest *dest;
586
587 /*
588 * Find the destination for the given service
589 */
590 list_for_each_entry(dest, &svc->destinations, n_list) {
Julius Volz7937df12008-09-02 15:55:48 +0200591 if ((dest->af == svc->af)
592 && ip_vs_addr_equal(svc->af, &dest->addr, daddr)
593 && (dest->port == dport)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700594 /* HIT */
595 return dest;
596 }
597 }
598
599 return NULL;
600}
601
Rumen G. Bogdanovski1e356f92007-11-07 02:35:54 -0800602/*
603 * Find destination by {daddr,dport,vaddr,protocol}
604 * Cretaed to be used in ip_vs_process_message() in
605 * the backup synchronization daemon. It finds the
606 * destination to be bound to the received connection
607 * on the backup.
608 *
609 * ip_vs_lookup_real_service() looked promissing, but
610 * seems not working as expected.
611 */
Hans Schillstromfc723252011-01-03 14:44:43 +0100612struct ip_vs_dest *ip_vs_find_dest(struct net *net, int af,
613 const union nf_inet_addr *daddr,
Julius Volz7937df12008-09-02 15:55:48 +0200614 __be16 dport,
615 const union nf_inet_addr *vaddr,
Hans Schillstrom0e051e62010-11-19 14:25:07 +0100616 __be16 vport, __u16 protocol, __u32 fwmark)
Rumen G. Bogdanovski1e356f92007-11-07 02:35:54 -0800617{
618 struct ip_vs_dest *dest;
619 struct ip_vs_service *svc;
620
Hans Schillstromfc723252011-01-03 14:44:43 +0100621 svc = ip_vs_service_get(net, af, fwmark, protocol, vaddr, vport);
Rumen G. Bogdanovski1e356f92007-11-07 02:35:54 -0800622 if (!svc)
623 return NULL;
624 dest = ip_vs_lookup_dest(svc, daddr, dport);
625 if (dest)
626 atomic_inc(&dest->refcnt);
627 ip_vs_service_put(svc);
628 return dest;
629}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700630
631/*
632 * Lookup dest by {svc,addr,port} in the destination trash.
633 * The destination trash is used to hold the destinations that are removed
634 * from the service table but are still referenced by some conn entries.
635 * The reason to add the destination trash is when the dest is temporary
636 * down (either by administrator or by monitor program), the dest can be
637 * picked back from the trash, the remaining connections to the dest can
638 * continue, and the counting information of the dest is also useful for
639 * scheduling.
640 */
641static struct ip_vs_dest *
Julius Volz7937df12008-09-02 15:55:48 +0200642ip_vs_trash_get_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
643 __be16 dport)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700644{
645 struct ip_vs_dest *dest, *nxt;
Hans Schillstromf2431e62011-01-03 14:45:00 +0100646 struct netns_ipvs *ipvs = net_ipvs(svc->net);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700647
648 /*
649 * Find the destination in trash
650 */
Hans Schillstromf2431e62011-01-03 14:45:00 +0100651 list_for_each_entry_safe(dest, nxt, &ipvs->dest_trash, n_list) {
Julius Volz7937df12008-09-02 15:55:48 +0200652 IP_VS_DBG_BUF(3, "Destination %u/%s:%u still in trash, "
653 "dest->refcnt=%d\n",
654 dest->vfwmark,
655 IP_VS_DBG_ADDR(svc->af, &dest->addr),
656 ntohs(dest->port),
657 atomic_read(&dest->refcnt));
658 if (dest->af == svc->af &&
659 ip_vs_addr_equal(svc->af, &dest->addr, daddr) &&
Linus Torvalds1da177e2005-04-16 15:20:36 -0700660 dest->port == dport &&
661 dest->vfwmark == svc->fwmark &&
662 dest->protocol == svc->protocol &&
663 (svc->fwmark ||
Julius Volz7937df12008-09-02 15:55:48 +0200664 (ip_vs_addr_equal(svc->af, &dest->vaddr, &svc->addr) &&
Linus Torvalds1da177e2005-04-16 15:20:36 -0700665 dest->vport == svc->port))) {
666 /* HIT */
667 return dest;
668 }
669
670 /*
671 * Try to purge the destination from trash if not referenced
672 */
673 if (atomic_read(&dest->refcnt) == 1) {
Julius Volz7937df12008-09-02 15:55:48 +0200674 IP_VS_DBG_BUF(3, "Removing destination %u/%s:%u "
675 "from trash\n",
676 dest->vfwmark,
677 IP_VS_DBG_ADDR(svc->af, &dest->addr),
678 ntohs(dest->port));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700679 list_del(&dest->n_list);
680 ip_vs_dst_reset(dest);
681 __ip_vs_unbind_svc(dest);
Hans Schillstromb17fc992011-01-03 14:44:56 +0100682 free_percpu(dest->stats.cpustats);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700683 kfree(dest);
684 }
685 }
686
687 return NULL;
688}
689
690
691/*
692 * Clean up all the destinations in the trash
693 * Called by the ip_vs_control_cleanup()
694 *
695 * When the ip_vs_control_clearup is activated by ipvs module exit,
696 * the service tables must have been flushed and all the connections
697 * are expired, and the refcnt of each destination in the trash must
698 * be 1, so we simply release them here.
699 */
Hans Schillstromf2431e62011-01-03 14:45:00 +0100700static void ip_vs_trash_cleanup(struct net *net)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700701{
702 struct ip_vs_dest *dest, *nxt;
Hans Schillstromf2431e62011-01-03 14:45:00 +0100703 struct netns_ipvs *ipvs = net_ipvs(net);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700704
Hans Schillstromf2431e62011-01-03 14:45:00 +0100705 list_for_each_entry_safe(dest, nxt, &ipvs->dest_trash, n_list) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700706 list_del(&dest->n_list);
707 ip_vs_dst_reset(dest);
708 __ip_vs_unbind_svc(dest);
Hans Schillstromb17fc992011-01-03 14:44:56 +0100709 free_percpu(dest->stats.cpustats);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700710 kfree(dest);
711 }
712}
713
Julian Anastasov55a3d4e2011-03-14 01:37:49 +0200714static void
715ip_vs_copy_stats(struct ip_vs_stats_user *dst, struct ip_vs_stats *src)
716{
717#define IP_VS_SHOW_STATS_COUNTER(c) dst->c = src->ustats.c - src->ustats0.c
718#define IP_VS_SHOW_STATS_RATE(r) dst->r = src->ustats.r
719
720 spin_lock_bh(&src->lock);
721
722 IP_VS_SHOW_STATS_COUNTER(conns);
723 IP_VS_SHOW_STATS_COUNTER(inpkts);
724 IP_VS_SHOW_STATS_COUNTER(outpkts);
725 IP_VS_SHOW_STATS_COUNTER(inbytes);
726 IP_VS_SHOW_STATS_COUNTER(outbytes);
727
728 IP_VS_SHOW_STATS_RATE(cps);
729 IP_VS_SHOW_STATS_RATE(inpps);
730 IP_VS_SHOW_STATS_RATE(outpps);
731 IP_VS_SHOW_STATS_RATE(inbps);
732 IP_VS_SHOW_STATS_RATE(outbps);
733
734 spin_unlock_bh(&src->lock);
735}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700736
737static void
738ip_vs_zero_stats(struct ip_vs_stats *stats)
739{
740 spin_lock_bh(&stats->lock);
Simon Hormane93615d2008-08-11 17:19:14 +1000741
Julian Anastasov55a3d4e2011-03-14 01:37:49 +0200742 /* get current counters as zero point, rates are zeroed */
743
744#define IP_VS_ZERO_STATS_COUNTER(c) stats->ustats0.c = stats->ustats.c
745#define IP_VS_ZERO_STATS_RATE(r) stats->ustats.r = 0
746
747 IP_VS_ZERO_STATS_COUNTER(conns);
748 IP_VS_ZERO_STATS_COUNTER(inpkts);
749 IP_VS_ZERO_STATS_COUNTER(outpkts);
750 IP_VS_ZERO_STATS_COUNTER(inbytes);
751 IP_VS_ZERO_STATS_COUNTER(outbytes);
752
753 IP_VS_ZERO_STATS_RATE(cps);
754 IP_VS_ZERO_STATS_RATE(inpps);
755 IP_VS_ZERO_STATS_RATE(outpps);
756 IP_VS_ZERO_STATS_RATE(inbps);
757 IP_VS_ZERO_STATS_RATE(outbps);
758
Linus Torvalds1da177e2005-04-16 15:20:36 -0700759 ip_vs_zero_estimator(stats);
Simon Hormane93615d2008-08-11 17:19:14 +1000760
Sven Wegener3a14a3132008-08-10 18:24:41 +0000761 spin_unlock_bh(&stats->lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700762}
763
764/*
765 * Update a destination in the given service
766 */
767static void
Julian Anastasov26c15cf2010-09-21 18:12:30 +0200768__ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest,
769 struct ip_vs_dest_user_kern *udest, int add)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700770{
Hans Schillstromfc723252011-01-03 14:44:43 +0100771 struct netns_ipvs *ipvs = net_ipvs(svc->net);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700772 int conn_flags;
773
774 /* set the weight and the flags */
775 atomic_set(&dest->weight, udest->weight);
Julian Anastasov35757922010-09-17 14:18:16 +0200776 conn_flags = udest->conn_flags & IP_VS_CONN_F_DEST_MASK;
777 conn_flags |= IP_VS_CONN_F_INACTIVE;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700778
Linus Torvalds1da177e2005-04-16 15:20:36 -0700779 /* set the IP_VS_CONN_F_NOOUTPUT flag if not masquerading/NAT */
Julian Anastasov35757922010-09-17 14:18:16 +0200780 if ((conn_flags & IP_VS_CONN_F_FWD_MASK) != IP_VS_CONN_F_MASQ) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700781 conn_flags |= IP_VS_CONN_F_NOOUTPUT;
782 } else {
783 /*
Hans Schillstromfc723252011-01-03 14:44:43 +0100784 * Put the real service in rs_table if not present.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700785 * For now only for NAT!
786 */
Hans Schillstroma0840e22011-01-03 14:44:58 +0100787 write_lock_bh(&ipvs->rs_lock);
Hans Schillstromfc723252011-01-03 14:44:43 +0100788 ip_vs_rs_hash(ipvs, dest);
Hans Schillstroma0840e22011-01-03 14:44:58 +0100789 write_unlock_bh(&ipvs->rs_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700790 }
791 atomic_set(&dest->conn_flags, conn_flags);
792
793 /* bind the service */
794 if (!dest->svc) {
795 __ip_vs_bind_svc(dest, svc);
796 } else {
797 if (dest->svc != svc) {
798 __ip_vs_unbind_svc(dest);
799 ip_vs_zero_stats(&dest->stats);
800 __ip_vs_bind_svc(dest, svc);
801 }
802 }
803
804 /* set the dest status flags */
805 dest->flags |= IP_VS_DEST_F_AVAILABLE;
806
807 if (udest->u_threshold == 0 || udest->u_threshold > dest->u_threshold)
808 dest->flags &= ~IP_VS_DEST_F_OVERLOAD;
809 dest->u_threshold = udest->u_threshold;
810 dest->l_threshold = udest->l_threshold;
Julian Anastasov26c15cf2010-09-21 18:12:30 +0200811
Julian Anastasovfc604762010-10-17 16:38:15 +0300812 spin_lock(&dest->dst_lock);
813 ip_vs_dst_reset(dest);
814 spin_unlock(&dest->dst_lock);
815
Julian Anastasov26c15cf2010-09-21 18:12:30 +0200816 if (add)
Hans Schillstrom29c20262011-01-03 14:44:54 +0100817 ip_vs_new_estimator(svc->net, &dest->stats);
Julian Anastasov26c15cf2010-09-21 18:12:30 +0200818
819 write_lock_bh(&__ip_vs_svc_lock);
820
821 /* Wait until all other svc users go away */
822 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
823
824 if (add) {
825 list_add(&dest->n_list, &svc->destinations);
826 svc->num_dests++;
827 }
828
829 /* call the update_service, because server weight may be changed */
830 if (svc->scheduler->update_service)
831 svc->scheduler->update_service(svc);
832
833 write_unlock_bh(&__ip_vs_svc_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700834}
835
836
837/*
838 * Create a destination for the given service
839 */
840static int
Julius Volzc860c6b2008-09-02 15:55:36 +0200841ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700842 struct ip_vs_dest **dest_p)
843{
844 struct ip_vs_dest *dest;
845 unsigned atype;
846
847 EnterFunction(2);
848
Vince Busam09571c72008-09-02 15:55:52 +0200849#ifdef CONFIG_IP_VS_IPV6
850 if (svc->af == AF_INET6) {
851 atype = ipv6_addr_type(&udest->addr.in6);
Sven Wegener3bfb92f2008-09-05 16:53:49 +0200852 if ((!(atype & IPV6_ADDR_UNICAST) ||
853 atype & IPV6_ADDR_LINKLOCAL) &&
Hans Schillstrom4a984802011-01-03 14:45:02 +0100854 !__ip_vs_addr_is_local_v6(svc->net, &udest->addr.in6))
Vince Busam09571c72008-09-02 15:55:52 +0200855 return -EINVAL;
856 } else
857#endif
858 {
Hans Schillstrom4a984802011-01-03 14:45:02 +0100859 atype = inet_addr_type(svc->net, udest->addr.ip);
Vince Busam09571c72008-09-02 15:55:52 +0200860 if (atype != RTN_LOCAL && atype != RTN_UNICAST)
861 return -EINVAL;
862 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700863
Simon Hormandee06e42010-08-26 02:54:31 +0000864 dest = kzalloc(sizeof(struct ip_vs_dest), GFP_KERNEL);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700865 if (dest == NULL) {
Hannes Eder1e3e2382009-08-02 11:05:41 +0000866 pr_err("%s(): no memory.\n", __func__);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700867 return -ENOMEM;
868 }
Hans Schillstromb17fc992011-01-03 14:44:56 +0100869 dest->stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats);
870 if (!dest->stats.cpustats) {
871 pr_err("%s() alloc_percpu failed\n", __func__);
872 goto err_alloc;
873 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700874
Julius Volzc860c6b2008-09-02 15:55:36 +0200875 dest->af = svc->af;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700876 dest->protocol = svc->protocol;
Julius Volzc860c6b2008-09-02 15:55:36 +0200877 dest->vaddr = svc->addr;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700878 dest->vport = svc->port;
879 dest->vfwmark = svc->fwmark;
Julius Volzc860c6b2008-09-02 15:55:36 +0200880 ip_vs_addr_copy(svc->af, &dest->addr, &udest->addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700881 dest->port = udest->port;
882
883 atomic_set(&dest->activeconns, 0);
884 atomic_set(&dest->inactconns, 0);
885 atomic_set(&dest->persistconns, 0);
Julian Anastasov26c15cf2010-09-21 18:12:30 +0200886 atomic_set(&dest->refcnt, 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700887
888 INIT_LIST_HEAD(&dest->d_list);
889 spin_lock_init(&dest->dst_lock);
890 spin_lock_init(&dest->stats.lock);
Julian Anastasov26c15cf2010-09-21 18:12:30 +0200891 __ip_vs_update_dest(svc, dest, udest, 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700892
893 *dest_p = dest;
894
895 LeaveFunction(2);
896 return 0;
Hans Schillstromb17fc992011-01-03 14:44:56 +0100897
898err_alloc:
899 kfree(dest);
900 return -ENOMEM;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700901}
902
903
904/*
905 * Add a destination into an existing service
906 */
907static int
Julius Volzc860c6b2008-09-02 15:55:36 +0200908ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700909{
910 struct ip_vs_dest *dest;
Julius Volzc860c6b2008-09-02 15:55:36 +0200911 union nf_inet_addr daddr;
Al Viro014d7302006-09-28 14:29:52 -0700912 __be16 dport = udest->port;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700913 int ret;
914
915 EnterFunction(2);
916
917 if (udest->weight < 0) {
Hannes Eder1e3e2382009-08-02 11:05:41 +0000918 pr_err("%s(): server weight less than zero\n", __func__);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700919 return -ERANGE;
920 }
921
922 if (udest->l_threshold > udest->u_threshold) {
Hannes Eder1e3e2382009-08-02 11:05:41 +0000923 pr_err("%s(): lower threshold is higher than upper threshold\n",
924 __func__);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700925 return -ERANGE;
926 }
927
Julius Volzc860c6b2008-09-02 15:55:36 +0200928 ip_vs_addr_copy(svc->af, &daddr, &udest->addr);
929
Linus Torvalds1da177e2005-04-16 15:20:36 -0700930 /*
931 * Check if the dest already exists in the list
932 */
Julius Volz7937df12008-09-02 15:55:48 +0200933 dest = ip_vs_lookup_dest(svc, &daddr, dport);
934
Linus Torvalds1da177e2005-04-16 15:20:36 -0700935 if (dest != NULL) {
Hannes Eder1e3e2382009-08-02 11:05:41 +0000936 IP_VS_DBG(1, "%s(): dest already exists\n", __func__);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700937 return -EEXIST;
938 }
939
940 /*
941 * Check if the dest already exists in the trash and
942 * is from the same service
943 */
Julius Volz7937df12008-09-02 15:55:48 +0200944 dest = ip_vs_trash_get_dest(svc, &daddr, dport);
945
Linus Torvalds1da177e2005-04-16 15:20:36 -0700946 if (dest != NULL) {
Julius Volzcfc78c52008-09-02 15:55:53 +0200947 IP_VS_DBG_BUF(3, "Get destination %s:%u from trash, "
948 "dest->refcnt=%d, service %u/%s:%u\n",
949 IP_VS_DBG_ADDR(svc->af, &daddr), ntohs(dport),
950 atomic_read(&dest->refcnt),
951 dest->vfwmark,
952 IP_VS_DBG_ADDR(svc->af, &dest->vaddr),
953 ntohs(dest->vport));
954
Linus Torvalds1da177e2005-04-16 15:20:36 -0700955 /*
956 * Get the destination from the trash
957 */
958 list_del(&dest->n_list);
959
Julian Anastasov26c15cf2010-09-21 18:12:30 +0200960 __ip_vs_update_dest(svc, dest, udest, 1);
961 ret = 0;
962 } else {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700963 /*
Julian Anastasov26c15cf2010-09-21 18:12:30 +0200964 * Allocate and initialize the dest structure
Linus Torvalds1da177e2005-04-16 15:20:36 -0700965 */
Julian Anastasov26c15cf2010-09-21 18:12:30 +0200966 ret = ip_vs_new_dest(svc, udest, &dest);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700967 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700968 LeaveFunction(2);
969
Julian Anastasov26c15cf2010-09-21 18:12:30 +0200970 return ret;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700971}
972
973
974/*
975 * Edit a destination in the given service
976 */
977static int
Julius Volzc860c6b2008-09-02 15:55:36 +0200978ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700979{
980 struct ip_vs_dest *dest;
Julius Volzc860c6b2008-09-02 15:55:36 +0200981 union nf_inet_addr daddr;
Al Viro014d7302006-09-28 14:29:52 -0700982 __be16 dport = udest->port;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700983
984 EnterFunction(2);
985
986 if (udest->weight < 0) {
Hannes Eder1e3e2382009-08-02 11:05:41 +0000987 pr_err("%s(): server weight less than zero\n", __func__);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700988 return -ERANGE;
989 }
990
991 if (udest->l_threshold > udest->u_threshold) {
Hannes Eder1e3e2382009-08-02 11:05:41 +0000992 pr_err("%s(): lower threshold is higher than upper threshold\n",
993 __func__);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700994 return -ERANGE;
995 }
996
Julius Volzc860c6b2008-09-02 15:55:36 +0200997 ip_vs_addr_copy(svc->af, &daddr, &udest->addr);
998
Linus Torvalds1da177e2005-04-16 15:20:36 -0700999 /*
1000 * Lookup the destination list
1001 */
Julius Volz7937df12008-09-02 15:55:48 +02001002 dest = ip_vs_lookup_dest(svc, &daddr, dport);
1003
Linus Torvalds1da177e2005-04-16 15:20:36 -07001004 if (dest == NULL) {
Hannes Eder1e3e2382009-08-02 11:05:41 +00001005 IP_VS_DBG(1, "%s(): dest doesn't exist\n", __func__);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001006 return -ENOENT;
1007 }
1008
Julian Anastasov26c15cf2010-09-21 18:12:30 +02001009 __ip_vs_update_dest(svc, dest, udest, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001010 LeaveFunction(2);
1011
1012 return 0;
1013}
1014
1015
1016/*
1017 * Delete a destination (must be already unlinked from the service)
1018 */
Hans Schillstrom29c20262011-01-03 14:44:54 +01001019static void __ip_vs_del_dest(struct net *net, struct ip_vs_dest *dest)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001020{
Hans Schillstroma0840e22011-01-03 14:44:58 +01001021 struct netns_ipvs *ipvs = net_ipvs(net);
1022
Hans Schillstrom29c20262011-01-03 14:44:54 +01001023 ip_vs_kill_estimator(net, &dest->stats);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001024
1025 /*
1026 * Remove it from the d-linked list with the real services.
1027 */
Hans Schillstroma0840e22011-01-03 14:44:58 +01001028 write_lock_bh(&ipvs->rs_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001029 ip_vs_rs_unhash(dest);
Hans Schillstroma0840e22011-01-03 14:44:58 +01001030 write_unlock_bh(&ipvs->rs_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001031
1032 /*
1033 * Decrease the refcnt of the dest, and free the dest
1034 * if nobody refers to it (refcnt=0). Otherwise, throw
1035 * the destination into the trash.
1036 */
1037 if (atomic_dec_and_test(&dest->refcnt)) {
Julian Anastasov26c15cf2010-09-21 18:12:30 +02001038 IP_VS_DBG_BUF(3, "Removing destination %u/%s:%u\n",
1039 dest->vfwmark,
1040 IP_VS_DBG_ADDR(dest->af, &dest->addr),
1041 ntohs(dest->port));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001042 ip_vs_dst_reset(dest);
1043 /* simply decrease svc->refcnt here, let the caller check
1044 and release the service if nobody refers to it.
1045 Only user context can release destination and service,
1046 and only one user context can update virtual service at a
1047 time, so the operation here is OK */
1048 atomic_dec(&dest->svc->refcnt);
Hans Schillstromb17fc992011-01-03 14:44:56 +01001049 free_percpu(dest->stats.cpustats);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001050 kfree(dest);
1051 } else {
Julius Volzcfc78c52008-09-02 15:55:53 +02001052 IP_VS_DBG_BUF(3, "Moving dest %s:%u into trash, "
1053 "dest->refcnt=%d\n",
1054 IP_VS_DBG_ADDR(dest->af, &dest->addr),
1055 ntohs(dest->port),
1056 atomic_read(&dest->refcnt));
Hans Schillstromf2431e62011-01-03 14:45:00 +01001057 list_add(&dest->n_list, &ipvs->dest_trash);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001058 atomic_inc(&dest->refcnt);
1059 }
1060}
1061
1062
1063/*
1064 * Unlink a destination from the given service
1065 */
1066static void __ip_vs_unlink_dest(struct ip_vs_service *svc,
1067 struct ip_vs_dest *dest,
1068 int svcupd)
1069{
1070 dest->flags &= ~IP_VS_DEST_F_AVAILABLE;
1071
1072 /*
1073 * Remove it from the d-linked destination list.
1074 */
1075 list_del(&dest->n_list);
1076 svc->num_dests--;
Sven Wegener82dfb6f2008-08-11 19:36:06 +00001077
1078 /*
1079 * Call the update_service function of its scheduler
1080 */
1081 if (svcupd && svc->scheduler->update_service)
1082 svc->scheduler->update_service(svc);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001083}
1084
1085
1086/*
1087 * Delete a destination server in the given service
1088 */
1089static int
Julius Volzc860c6b2008-09-02 15:55:36 +02001090ip_vs_del_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001091{
1092 struct ip_vs_dest *dest;
Al Viro014d7302006-09-28 14:29:52 -07001093 __be16 dport = udest->port;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001094
1095 EnterFunction(2);
1096
Julius Volz7937df12008-09-02 15:55:48 +02001097 dest = ip_vs_lookup_dest(svc, &udest->addr, dport);
Julius Volzc860c6b2008-09-02 15:55:36 +02001098
Linus Torvalds1da177e2005-04-16 15:20:36 -07001099 if (dest == NULL) {
Hannes Eder1e3e2382009-08-02 11:05:41 +00001100 IP_VS_DBG(1, "%s(): destination not found!\n", __func__);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001101 return -ENOENT;
1102 }
1103
1104 write_lock_bh(&__ip_vs_svc_lock);
1105
1106 /*
1107 * Wait until all other svc users go away.
1108 */
Julian Anastasov26c15cf2010-09-21 18:12:30 +02001109 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001110
1111 /*
1112 * Unlink dest from the service
1113 */
1114 __ip_vs_unlink_dest(svc, dest, 1);
1115
1116 write_unlock_bh(&__ip_vs_svc_lock);
1117
1118 /*
1119 * Delete the destination
1120 */
Hans Schillstroma0840e22011-01-03 14:44:58 +01001121 __ip_vs_del_dest(svc->net, dest);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001122
1123 LeaveFunction(2);
1124
1125 return 0;
1126}
1127
1128
1129/*
1130 * Add a service into the service hash table
1131 */
1132static int
Hans Schillstromfc723252011-01-03 14:44:43 +01001133ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u,
Julius Volzc860c6b2008-09-02 15:55:36 +02001134 struct ip_vs_service **svc_p)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001135{
1136 int ret = 0;
1137 struct ip_vs_scheduler *sched = NULL;
Simon Horman0d1e71b2010-08-22 21:37:54 +09001138 struct ip_vs_pe *pe = NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001139 struct ip_vs_service *svc = NULL;
Hans Schillstroma0840e22011-01-03 14:44:58 +01001140 struct netns_ipvs *ipvs = net_ipvs(net);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001141
1142 /* increase the module use count */
1143 ip_vs_use_count_inc();
1144
1145 /* Lookup the scheduler by 'u->sched_name' */
1146 sched = ip_vs_scheduler_get(u->sched_name);
1147 if (sched == NULL) {
Hannes Eder1e3e2382009-08-02 11:05:41 +00001148 pr_info("Scheduler module ip_vs_%s not found\n", u->sched_name);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001149 ret = -ENOENT;
Simon Horman6e08bfb2010-08-22 21:37:52 +09001150 goto out_err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001151 }
1152
Simon Horman0d1e71b2010-08-22 21:37:54 +09001153 if (u->pe_name && *u->pe_name) {
Simon Hormane9e5eee2010-11-08 20:05:57 +09001154 pe = ip_vs_pe_getbyname(u->pe_name);
Simon Horman0d1e71b2010-08-22 21:37:54 +09001155 if (pe == NULL) {
1156 pr_info("persistence engine module ip_vs_pe_%s "
1157 "not found\n", u->pe_name);
1158 ret = -ENOENT;
1159 goto out_err;
1160 }
1161 }
1162
Julius Volzf94fd042008-09-02 15:55:55 +02001163#ifdef CONFIG_IP_VS_IPV6
Julius Volz48148932008-11-03 17:08:56 -08001164 if (u->af == AF_INET6 && (u->netmask < 1 || u->netmask > 128)) {
1165 ret = -EINVAL;
1166 goto out_err;
Julius Volzf94fd042008-09-02 15:55:55 +02001167 }
1168#endif
1169
Simon Hormandee06e42010-08-26 02:54:31 +00001170 svc = kzalloc(sizeof(struct ip_vs_service), GFP_KERNEL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001171 if (svc == NULL) {
Hannes Eder1e3e2382009-08-02 11:05:41 +00001172 IP_VS_DBG(1, "%s(): no memory\n", __func__);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001173 ret = -ENOMEM;
1174 goto out_err;
1175 }
Hans Schillstromb17fc992011-01-03 14:44:56 +01001176 svc->stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats);
1177 if (!svc->stats.cpustats) {
1178 pr_err("%s() alloc_percpu failed\n", __func__);
1179 goto out_err;
1180 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001181
1182 /* I'm the first user of the service */
Julian Anastasov26c15cf2010-09-21 18:12:30 +02001183 atomic_set(&svc->usecnt, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001184 atomic_set(&svc->refcnt, 0);
1185
Julius Volzc860c6b2008-09-02 15:55:36 +02001186 svc->af = u->af;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001187 svc->protocol = u->protocol;
Julius Volzc860c6b2008-09-02 15:55:36 +02001188 ip_vs_addr_copy(svc->af, &svc->addr, &u->addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001189 svc->port = u->port;
1190 svc->fwmark = u->fwmark;
1191 svc->flags = u->flags;
1192 svc->timeout = u->timeout * HZ;
1193 svc->netmask = u->netmask;
Hans Schillstromfc723252011-01-03 14:44:43 +01001194 svc->net = net;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001195
1196 INIT_LIST_HEAD(&svc->destinations);
1197 rwlock_init(&svc->sched_lock);
1198 spin_lock_init(&svc->stats.lock);
1199
1200 /* Bind the scheduler */
1201 ret = ip_vs_bind_scheduler(svc, sched);
1202 if (ret)
1203 goto out_err;
1204 sched = NULL;
1205
Simon Horman0d1e71b2010-08-22 21:37:54 +09001206 /* Bind the ct retriever */
1207 ip_vs_bind_pe(svc, pe);
1208 pe = NULL;
1209
Linus Torvalds1da177e2005-04-16 15:20:36 -07001210 /* Update the virtual service counters */
1211 if (svc->port == FTPPORT)
Hans Schillstrom763f8d02011-01-03 14:45:01 +01001212 atomic_inc(&ipvs->ftpsvc_counter);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001213 else if (svc->port == 0)
Hans Schillstrom763f8d02011-01-03 14:45:01 +01001214 atomic_inc(&ipvs->nullsvc_counter);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001215
Hans Schillstrom29c20262011-01-03 14:44:54 +01001216 ip_vs_new_estimator(net, &svc->stats);
Julius Volzf94fd042008-09-02 15:55:55 +02001217
1218 /* Count only IPv4 services for old get/setsockopt interface */
1219 if (svc->af == AF_INET)
Hans Schillstroma0840e22011-01-03 14:44:58 +01001220 ipvs->num_services++;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001221
1222 /* Hash the service into the service table */
1223 write_lock_bh(&__ip_vs_svc_lock);
1224 ip_vs_svc_hash(svc);
1225 write_unlock_bh(&__ip_vs_svc_lock);
1226
1227 *svc_p = svc;
1228 return 0;
1229
Hans Schillstromb17fc992011-01-03 14:44:56 +01001230
Simon Horman6e08bfb2010-08-22 21:37:52 +09001231 out_err:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001232 if (svc != NULL) {
Simon Horman2fabf352010-08-22 21:37:52 +09001233 ip_vs_unbind_scheduler(svc);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001234 if (svc->inc) {
1235 local_bh_disable();
1236 ip_vs_app_inc_put(svc->inc);
1237 local_bh_enable();
1238 }
Hans Schillstromb17fc992011-01-03 14:44:56 +01001239 if (svc->stats.cpustats)
1240 free_percpu(svc->stats.cpustats);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001241 kfree(svc);
1242 }
1243 ip_vs_scheduler_put(sched);
Simon Horman0d1e71b2010-08-22 21:37:54 +09001244 ip_vs_pe_put(pe);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001245
Linus Torvalds1da177e2005-04-16 15:20:36 -07001246 /* decrease the module use count */
1247 ip_vs_use_count_dec();
1248
1249 return ret;
1250}
1251
1252
1253/*
1254 * Edit a service and bind it with a new scheduler
1255 */
1256static int
Julius Volzc860c6b2008-09-02 15:55:36 +02001257ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001258{
1259 struct ip_vs_scheduler *sched, *old_sched;
Simon Horman0d1e71b2010-08-22 21:37:54 +09001260 struct ip_vs_pe *pe = NULL, *old_pe = NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001261 int ret = 0;
1262
1263 /*
1264 * Lookup the scheduler, by 'u->sched_name'
1265 */
1266 sched = ip_vs_scheduler_get(u->sched_name);
1267 if (sched == NULL) {
Hannes Eder1e3e2382009-08-02 11:05:41 +00001268 pr_info("Scheduler module ip_vs_%s not found\n", u->sched_name);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001269 return -ENOENT;
1270 }
1271 old_sched = sched;
1272
Simon Horman0d1e71b2010-08-22 21:37:54 +09001273 if (u->pe_name && *u->pe_name) {
Simon Hormane9e5eee2010-11-08 20:05:57 +09001274 pe = ip_vs_pe_getbyname(u->pe_name);
Simon Horman0d1e71b2010-08-22 21:37:54 +09001275 if (pe == NULL) {
1276 pr_info("persistence engine module ip_vs_pe_%s "
1277 "not found\n", u->pe_name);
1278 ret = -ENOENT;
1279 goto out;
1280 }
1281 old_pe = pe;
1282 }
1283
Julius Volzf94fd042008-09-02 15:55:55 +02001284#ifdef CONFIG_IP_VS_IPV6
Julius Volz48148932008-11-03 17:08:56 -08001285 if (u->af == AF_INET6 && (u->netmask < 1 || u->netmask > 128)) {
1286 ret = -EINVAL;
1287 goto out;
Julius Volzf94fd042008-09-02 15:55:55 +02001288 }
1289#endif
1290
Linus Torvalds1da177e2005-04-16 15:20:36 -07001291 write_lock_bh(&__ip_vs_svc_lock);
1292
1293 /*
1294 * Wait until all other svc users go away.
1295 */
Julian Anastasov26c15cf2010-09-21 18:12:30 +02001296 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001297
1298 /*
1299 * Set the flags and timeout value
1300 */
1301 svc->flags = u->flags | IP_VS_SVC_F_HASHED;
1302 svc->timeout = u->timeout * HZ;
1303 svc->netmask = u->netmask;
1304
1305 old_sched = svc->scheduler;
1306 if (sched != old_sched) {
1307 /*
1308 * Unbind the old scheduler
1309 */
1310 if ((ret = ip_vs_unbind_scheduler(svc))) {
1311 old_sched = sched;
Simon Horman9e691ed2008-09-17 10:10:41 +10001312 goto out_unlock;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001313 }
1314
1315 /*
1316 * Bind the new scheduler
1317 */
1318 if ((ret = ip_vs_bind_scheduler(svc, sched))) {
1319 /*
1320 * If ip_vs_bind_scheduler fails, restore the old
1321 * scheduler.
1322 * The main reason of failure is out of memory.
1323 *
1324 * The question is if the old scheduler can be
1325 * restored all the time. TODO: if it cannot be
1326 * restored some time, we must delete the service,
1327 * otherwise the system may crash.
1328 */
1329 ip_vs_bind_scheduler(svc, old_sched);
1330 old_sched = sched;
Simon Horman9e691ed2008-09-17 10:10:41 +10001331 goto out_unlock;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001332 }
1333 }
1334
Simon Horman0d1e71b2010-08-22 21:37:54 +09001335 old_pe = svc->pe;
1336 if (pe != old_pe) {
1337 ip_vs_unbind_pe(svc);
1338 ip_vs_bind_pe(svc, pe);
1339 }
1340
Simon Horman9e691ed2008-09-17 10:10:41 +10001341 out_unlock:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001342 write_unlock_bh(&__ip_vs_svc_lock);
Simon Horman9e691ed2008-09-17 10:10:41 +10001343 out:
Simon Horman6e08bfb2010-08-22 21:37:52 +09001344 ip_vs_scheduler_put(old_sched);
Simon Horman0d1e71b2010-08-22 21:37:54 +09001345 ip_vs_pe_put(old_pe);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001346 return ret;
1347}
1348
1349
1350/*
1351 * Delete a service from the service list
1352 * - The service must be unlinked, unlocked and not referenced!
1353 * - We are called under _bh lock
1354 */
1355static void __ip_vs_del_service(struct ip_vs_service *svc)
1356{
1357 struct ip_vs_dest *dest, *nxt;
1358 struct ip_vs_scheduler *old_sched;
Simon Horman0d1e71b2010-08-22 21:37:54 +09001359 struct ip_vs_pe *old_pe;
Hans Schillstroma0840e22011-01-03 14:44:58 +01001360 struct netns_ipvs *ipvs = net_ipvs(svc->net);
Simon Horman0d1e71b2010-08-22 21:37:54 +09001361
1362 pr_info("%s: enter\n", __func__);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001363
Julius Volzf94fd042008-09-02 15:55:55 +02001364 /* Count only IPv4 services for old get/setsockopt interface */
1365 if (svc->af == AF_INET)
Hans Schillstroma0840e22011-01-03 14:44:58 +01001366 ipvs->num_services--;
Julius Volzf94fd042008-09-02 15:55:55 +02001367
Hans Schillstrom29c20262011-01-03 14:44:54 +01001368 ip_vs_kill_estimator(svc->net, &svc->stats);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001369
1370 /* Unbind scheduler */
1371 old_sched = svc->scheduler;
1372 ip_vs_unbind_scheduler(svc);
Simon Horman6e08bfb2010-08-22 21:37:52 +09001373 ip_vs_scheduler_put(old_sched);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001374
Simon Horman0d1e71b2010-08-22 21:37:54 +09001375 /* Unbind persistence engine */
1376 old_pe = svc->pe;
1377 ip_vs_unbind_pe(svc);
1378 ip_vs_pe_put(old_pe);
1379
Linus Torvalds1da177e2005-04-16 15:20:36 -07001380 /* Unbind app inc */
1381 if (svc->inc) {
1382 ip_vs_app_inc_put(svc->inc);
1383 svc->inc = NULL;
1384 }
1385
1386 /*
1387 * Unlink the whole destination list
1388 */
1389 list_for_each_entry_safe(dest, nxt, &svc->destinations, n_list) {
1390 __ip_vs_unlink_dest(svc, dest, 0);
Hans Schillstrom29c20262011-01-03 14:44:54 +01001391 __ip_vs_del_dest(svc->net, dest);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001392 }
1393
1394 /*
1395 * Update the virtual service counters
1396 */
1397 if (svc->port == FTPPORT)
Hans Schillstrom763f8d02011-01-03 14:45:01 +01001398 atomic_dec(&ipvs->ftpsvc_counter);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001399 else if (svc->port == 0)
Hans Schillstrom763f8d02011-01-03 14:45:01 +01001400 atomic_dec(&ipvs->nullsvc_counter);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001401
1402 /*
1403 * Free the service if nobody refers to it
1404 */
Julian Anastasov26c15cf2010-09-21 18:12:30 +02001405 if (atomic_read(&svc->refcnt) == 0) {
1406 IP_VS_DBG_BUF(3, "Removing service %u/%s:%u usecnt=%d\n",
1407 svc->fwmark,
1408 IP_VS_DBG_ADDR(svc->af, &svc->addr),
1409 ntohs(svc->port), atomic_read(&svc->usecnt));
Hans Schillstromb17fc992011-01-03 14:44:56 +01001410 free_percpu(svc->stats.cpustats);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001411 kfree(svc);
Julian Anastasov26c15cf2010-09-21 18:12:30 +02001412 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001413
1414 /* decrease the module use count */
1415 ip_vs_use_count_dec();
1416}
1417
1418/*
Julian Anastasov26c15cf2010-09-21 18:12:30 +02001419 * Unlink a service from list and try to delete it if its refcnt reached 0
Linus Torvalds1da177e2005-04-16 15:20:36 -07001420 */
Julian Anastasov26c15cf2010-09-21 18:12:30 +02001421static void ip_vs_unlink_service(struct ip_vs_service *svc)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001422{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001423 /*
1424 * Unhash it from the service table
1425 */
1426 write_lock_bh(&__ip_vs_svc_lock);
1427
1428 ip_vs_svc_unhash(svc);
1429
1430 /*
1431 * Wait until all the svc users go away.
1432 */
Julian Anastasov26c15cf2010-09-21 18:12:30 +02001433 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001434
1435 __ip_vs_del_service(svc);
1436
1437 write_unlock_bh(&__ip_vs_svc_lock);
Julian Anastasov26c15cf2010-09-21 18:12:30 +02001438}
1439
1440/*
1441 * Delete a service from the service list
1442 */
1443static int ip_vs_del_service(struct ip_vs_service *svc)
1444{
1445 if (svc == NULL)
1446 return -EEXIST;
1447 ip_vs_unlink_service(svc);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001448
1449 return 0;
1450}
1451
1452
1453/*
1454 * Flush all the virtual services
1455 */
Hans Schillstromfc723252011-01-03 14:44:43 +01001456static int ip_vs_flush(struct net *net)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001457{
1458 int idx;
1459 struct ip_vs_service *svc, *nxt;
1460
1461 /*
Hans Schillstromfc723252011-01-03 14:44:43 +01001462 * Flush the service table hashed by <netns,protocol,addr,port>
Linus Torvalds1da177e2005-04-16 15:20:36 -07001463 */
1464 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
Hans Schillstromfc723252011-01-03 14:44:43 +01001465 list_for_each_entry_safe(svc, nxt, &ip_vs_svc_table[idx],
1466 s_list) {
1467 if (net_eq(svc->net, net))
1468 ip_vs_unlink_service(svc);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001469 }
1470 }
1471
1472 /*
1473 * Flush the service table hashed by fwmark
1474 */
1475 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1476 list_for_each_entry_safe(svc, nxt,
1477 &ip_vs_svc_fwm_table[idx], f_list) {
Hans Schillstromfc723252011-01-03 14:44:43 +01001478 if (net_eq(svc->net, net))
1479 ip_vs_unlink_service(svc);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001480 }
1481 }
1482
1483 return 0;
1484}
1485
1486
1487/*
1488 * Zero counters in a service or all services
1489 */
1490static int ip_vs_zero_service(struct ip_vs_service *svc)
1491{
1492 struct ip_vs_dest *dest;
1493
1494 write_lock_bh(&__ip_vs_svc_lock);
1495 list_for_each_entry(dest, &svc->destinations, n_list) {
1496 ip_vs_zero_stats(&dest->stats);
1497 }
1498 ip_vs_zero_stats(&svc->stats);
1499 write_unlock_bh(&__ip_vs_svc_lock);
1500 return 0;
1501}
1502
Hans Schillstromfc723252011-01-03 14:44:43 +01001503static int ip_vs_zero_all(struct net *net)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001504{
1505 int idx;
1506 struct ip_vs_service *svc;
1507
1508 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1509 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
Hans Schillstromfc723252011-01-03 14:44:43 +01001510 if (net_eq(svc->net, net))
1511 ip_vs_zero_service(svc);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001512 }
1513 }
1514
1515 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1516 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
Hans Schillstromfc723252011-01-03 14:44:43 +01001517 if (net_eq(svc->net, net))
1518 ip_vs_zero_service(svc);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001519 }
1520 }
1521
Julian Anastasov2a0751a2011-03-04 12:20:35 +02001522 ip_vs_zero_stats(&net_ipvs(net)->tot_stats);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001523 return 0;
1524}
1525
1526
1527static int
Alexey Dobriyan8d65af72009-09-23 15:57:19 -07001528proc_do_defense_mode(ctl_table *table, int write,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001529 void __user *buffer, size_t *lenp, loff_t *ppos)
1530{
Hans Schillstrom93304192011-01-03 14:44:51 +01001531 struct net *net = current->nsproxy->net_ns;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001532 int *valp = table->data;
1533 int val = *valp;
1534 int rc;
1535
Alexey Dobriyan8d65af72009-09-23 15:57:19 -07001536 rc = proc_dointvec(table, write, buffer, lenp, ppos);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001537 if (write && (*valp != val)) {
1538 if ((*valp < 0) || (*valp > 3)) {
1539 /* Restore the correct value */
1540 *valp = val;
1541 } else {
Hans Schillstrom93304192011-01-03 14:44:51 +01001542 update_defense_level(net_ipvs(net));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001543 }
1544 }
1545 return rc;
1546}
1547
1548
1549static int
Alexey Dobriyan8d65af72009-09-23 15:57:19 -07001550proc_do_sync_threshold(ctl_table *table, int write,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001551 void __user *buffer, size_t *lenp, loff_t *ppos)
1552{
1553 int *valp = table->data;
1554 int val[2];
1555 int rc;
1556
1557 /* backup the value first */
1558 memcpy(val, valp, sizeof(val));
1559
Alexey Dobriyan8d65af72009-09-23 15:57:19 -07001560 rc = proc_dointvec(table, write, buffer, lenp, ppos);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001561 if (write && (valp[0] < 0 || valp[1] < 0 || valp[0] >= valp[1])) {
1562 /* Restore the correct value */
1563 memcpy(valp, val, sizeof(val));
1564 }
1565 return rc;
1566}
1567
Hans Schillstromb880c1f2010-11-19 14:25:14 +01001568static int
1569proc_do_sync_mode(ctl_table *table, int write,
1570 void __user *buffer, size_t *lenp, loff_t *ppos)
1571{
1572 int *valp = table->data;
1573 int val = *valp;
1574 int rc;
1575
1576 rc = proc_dointvec(table, write, buffer, lenp, ppos);
1577 if (write && (*valp != val)) {
1578 if ((*valp < 0) || (*valp > 1)) {
1579 /* Restore the correct value */
1580 *valp = val;
1581 } else {
Hans Schillstromf1313152011-01-03 14:44:55 +01001582 struct net *net = current->nsproxy->net_ns;
1583 ip_vs_sync_switch_mode(net, val);
Hans Schillstromb880c1f2010-11-19 14:25:14 +01001584 }
1585 }
1586 return rc;
1587}
Linus Torvalds1da177e2005-04-16 15:20:36 -07001588
1589/*
1590 * IPVS sysctl table (under the /proc/sys/net/ipv4/vs/)
Hans Schillstroma0840e22011-01-03 14:44:58 +01001591 * Do not change order or insert new entries without
1592 * align with netns init in __ip_vs_control_init()
Linus Torvalds1da177e2005-04-16 15:20:36 -07001593 */
1594
1595static struct ctl_table vs_vars[] = {
1596 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001597 .procname = "amemthresh",
Hans Schillstroma0840e22011-01-03 14:44:58 +01001598 .maxlen = sizeof(int),
1599 .mode = 0644,
1600 .proc_handler = proc_dointvec,
1601 },
1602 {
1603 .procname = "am_droprate",
1604 .maxlen = sizeof(int),
1605 .mode = 0644,
1606 .proc_handler = proc_dointvec,
1607 },
1608 {
1609 .procname = "drop_entry",
1610 .maxlen = sizeof(int),
1611 .mode = 0644,
1612 .proc_handler = proc_do_defense_mode,
1613 },
1614 {
1615 .procname = "drop_packet",
1616 .maxlen = sizeof(int),
1617 .mode = 0644,
1618 .proc_handler = proc_do_defense_mode,
1619 },
1620#ifdef CONFIG_IP_VS_NFCT
1621 {
1622 .procname = "conntrack",
1623 .maxlen = sizeof(int),
1624 .mode = 0644,
1625 .proc_handler = &proc_dointvec,
1626 },
1627#endif
1628 {
1629 .procname = "secure_tcp",
1630 .maxlen = sizeof(int),
1631 .mode = 0644,
1632 .proc_handler = proc_do_defense_mode,
1633 },
1634 {
1635 .procname = "snat_reroute",
1636 .maxlen = sizeof(int),
1637 .mode = 0644,
1638 .proc_handler = &proc_dointvec,
1639 },
1640 {
1641 .procname = "sync_version",
1642 .maxlen = sizeof(int),
1643 .mode = 0644,
1644 .proc_handler = &proc_do_sync_mode,
1645 },
1646 {
1647 .procname = "cache_bypass",
1648 .maxlen = sizeof(int),
1649 .mode = 0644,
1650 .proc_handler = proc_dointvec,
1651 },
1652 {
1653 .procname = "expire_nodest_conn",
1654 .maxlen = sizeof(int),
1655 .mode = 0644,
1656 .proc_handler = proc_dointvec,
1657 },
1658 {
1659 .procname = "expire_quiescent_template",
1660 .maxlen = sizeof(int),
1661 .mode = 0644,
1662 .proc_handler = proc_dointvec,
1663 },
1664 {
1665 .procname = "sync_threshold",
1666 .maxlen =
1667 sizeof(((struct netns_ipvs *)0)->sysctl_sync_threshold),
1668 .mode = 0644,
1669 .proc_handler = proc_do_sync_threshold,
1670 },
1671 {
1672 .procname = "nat_icmp_send",
Linus Torvalds1da177e2005-04-16 15:20:36 -07001673 .maxlen = sizeof(int),
1674 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08001675 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001676 },
1677#ifdef CONFIG_IP_VS_DEBUG
1678 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001679 .procname = "debug_level",
1680 .data = &sysctl_ip_vs_debug_level,
1681 .maxlen = sizeof(int),
1682 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08001683 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001684 },
1685#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -07001686#if 0
1687 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001688 .procname = "timeout_established",
1689 .data = &vs_timeout_table_dos.timeout[IP_VS_S_ESTABLISHED],
1690 .maxlen = sizeof(int),
1691 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08001692 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001693 },
1694 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001695 .procname = "timeout_synsent",
1696 .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_SENT],
1697 .maxlen = sizeof(int),
1698 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08001699 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001700 },
1701 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001702 .procname = "timeout_synrecv",
1703 .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_RECV],
1704 .maxlen = sizeof(int),
1705 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08001706 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001707 },
1708 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001709 .procname = "timeout_finwait",
1710 .data = &vs_timeout_table_dos.timeout[IP_VS_S_FIN_WAIT],
1711 .maxlen = sizeof(int),
1712 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08001713 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001714 },
1715 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001716 .procname = "timeout_timewait",
1717 .data = &vs_timeout_table_dos.timeout[IP_VS_S_TIME_WAIT],
1718 .maxlen = sizeof(int),
1719 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08001720 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001721 },
1722 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001723 .procname = "timeout_close",
1724 .data = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE],
1725 .maxlen = sizeof(int),
1726 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08001727 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001728 },
1729 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001730 .procname = "timeout_closewait",
1731 .data = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE_WAIT],
1732 .maxlen = sizeof(int),
1733 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08001734 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001735 },
1736 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001737 .procname = "timeout_lastack",
1738 .data = &vs_timeout_table_dos.timeout[IP_VS_S_LAST_ACK],
1739 .maxlen = sizeof(int),
1740 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08001741 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001742 },
1743 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001744 .procname = "timeout_listen",
1745 .data = &vs_timeout_table_dos.timeout[IP_VS_S_LISTEN],
1746 .maxlen = sizeof(int),
1747 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08001748 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001749 },
1750 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001751 .procname = "timeout_synack",
1752 .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYNACK],
1753 .maxlen = sizeof(int),
1754 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08001755 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001756 },
1757 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001758 .procname = "timeout_udp",
1759 .data = &vs_timeout_table_dos.timeout[IP_VS_S_UDP],
1760 .maxlen = sizeof(int),
1761 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08001762 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001763 },
1764 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001765 .procname = "timeout_icmp",
1766 .data = &vs_timeout_table_dos.timeout[IP_VS_S_ICMP],
1767 .maxlen = sizeof(int),
1768 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08001769 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001770 },
1771#endif
Eric W. Biedermanf8572d82009-11-05 13:32:03 -08001772 { }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001773};
1774
Sven Wegener5587da52008-08-10 18:24:40 +00001775const struct ctl_path net_vs_ctl_path[] = {
Eric W. Biedermanf8572d82009-11-05 13:32:03 -08001776 { .procname = "net", },
1777 { .procname = "ipv4", },
Pavel Emelyanov90754f82008-01-12 02:33:50 -08001778 { .procname = "vs", },
1779 { }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001780};
Pavel Emelyanov90754f82008-01-12 02:33:50 -08001781EXPORT_SYMBOL_GPL(net_vs_ctl_path);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001782
Linus Torvalds1da177e2005-04-16 15:20:36 -07001783#ifdef CONFIG_PROC_FS
1784
1785struct ip_vs_iter {
Hans Schillstromfc723252011-01-03 14:44:43 +01001786 struct seq_net_private p; /* Do not move this, netns depends upon it*/
Linus Torvalds1da177e2005-04-16 15:20:36 -07001787 struct list_head *table;
1788 int bucket;
1789};
1790
1791/*
1792 * Write the contents of the VS rule table to a PROCfs file.
1793 * (It is kept just for backward compatibility)
1794 */
1795static inline const char *ip_vs_fwd_name(unsigned flags)
1796{
1797 switch (flags & IP_VS_CONN_F_FWD_MASK) {
1798 case IP_VS_CONN_F_LOCALNODE:
1799 return "Local";
1800 case IP_VS_CONN_F_TUNNEL:
1801 return "Tunnel";
1802 case IP_VS_CONN_F_DROUTE:
1803 return "Route";
1804 default:
1805 return "Masq";
1806 }
1807}
1808
1809
1810/* Get the Nth entry in the two lists */
1811static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos)
1812{
Hans Schillstromfc723252011-01-03 14:44:43 +01001813 struct net *net = seq_file_net(seq);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001814 struct ip_vs_iter *iter = seq->private;
1815 int idx;
1816 struct ip_vs_service *svc;
1817
1818 /* look in hash by protocol */
1819 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1820 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
Hans Schillstromfc723252011-01-03 14:44:43 +01001821 if (net_eq(svc->net, net) && pos-- == 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001822 iter->table = ip_vs_svc_table;
1823 iter->bucket = idx;
1824 return svc;
1825 }
1826 }
1827 }
1828
1829 /* keep looking in fwmark */
1830 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1831 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
Hans Schillstromfc723252011-01-03 14:44:43 +01001832 if (net_eq(svc->net, net) && pos-- == 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001833 iter->table = ip_vs_svc_fwm_table;
1834 iter->bucket = idx;
1835 return svc;
1836 }
1837 }
1838 }
1839
1840 return NULL;
1841}
1842
1843static void *ip_vs_info_seq_start(struct seq_file *seq, loff_t *pos)
Simon Horman563e94f2008-09-17 10:10:42 +10001844__acquires(__ip_vs_svc_lock)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001845{
1846
1847 read_lock_bh(&__ip_vs_svc_lock);
1848 return *pos ? ip_vs_info_array(seq, *pos - 1) : SEQ_START_TOKEN;
1849}
1850
1851
1852static void *ip_vs_info_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1853{
1854 struct list_head *e;
1855 struct ip_vs_iter *iter;
1856 struct ip_vs_service *svc;
1857
1858 ++*pos;
1859 if (v == SEQ_START_TOKEN)
1860 return ip_vs_info_array(seq,0);
1861
1862 svc = v;
1863 iter = seq->private;
1864
1865 if (iter->table == ip_vs_svc_table) {
1866 /* next service in table hashed by protocol */
1867 if ((e = svc->s_list.next) != &ip_vs_svc_table[iter->bucket])
1868 return list_entry(e, struct ip_vs_service, s_list);
1869
1870
1871 while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
1872 list_for_each_entry(svc,&ip_vs_svc_table[iter->bucket],
1873 s_list) {
1874 return svc;
1875 }
1876 }
1877
1878 iter->table = ip_vs_svc_fwm_table;
1879 iter->bucket = -1;
1880 goto scan_fwmark;
1881 }
1882
1883 /* next service in hashed by fwmark */
1884 if ((e = svc->f_list.next) != &ip_vs_svc_fwm_table[iter->bucket])
1885 return list_entry(e, struct ip_vs_service, f_list);
1886
1887 scan_fwmark:
1888 while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
1889 list_for_each_entry(svc, &ip_vs_svc_fwm_table[iter->bucket],
1890 f_list)
1891 return svc;
1892 }
1893
1894 return NULL;
1895}
1896
1897static void ip_vs_info_seq_stop(struct seq_file *seq, void *v)
Simon Horman563e94f2008-09-17 10:10:42 +10001898__releases(__ip_vs_svc_lock)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001899{
1900 read_unlock_bh(&__ip_vs_svc_lock);
1901}
1902
1903
1904static int ip_vs_info_seq_show(struct seq_file *seq, void *v)
1905{
1906 if (v == SEQ_START_TOKEN) {
1907 seq_printf(seq,
1908 "IP Virtual Server version %d.%d.%d (size=%d)\n",
Catalin(ux) M. BOIE6f7edb42010-01-05 05:50:24 +01001909 NVERSION(IP_VS_VERSION_CODE), ip_vs_conn_tab_size);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001910 seq_puts(seq,
1911 "Prot LocalAddress:Port Scheduler Flags\n");
1912 seq_puts(seq,
1913 " -> RemoteAddress:Port Forward Weight ActiveConn InActConn\n");
1914 } else {
1915 const struct ip_vs_service *svc = v;
1916 const struct ip_vs_iter *iter = seq->private;
1917 const struct ip_vs_dest *dest;
1918
Vince Busam667a5f12008-09-02 15:55:49 +02001919 if (iter->table == ip_vs_svc_table) {
1920#ifdef CONFIG_IP_VS_IPV6
1921 if (svc->af == AF_INET6)
Harvey Harrison5b095d9892008-10-29 12:52:50 -07001922 seq_printf(seq, "%s [%pI6]:%04X %s ",
Vince Busam667a5f12008-09-02 15:55:49 +02001923 ip_vs_proto_name(svc->protocol),
Harvey Harrison38ff4fa2008-10-28 16:08:13 -07001924 &svc->addr.in6,
Vince Busam667a5f12008-09-02 15:55:49 +02001925 ntohs(svc->port),
1926 svc->scheduler->name);
1927 else
1928#endif
Nick Chalk26ec0372010-06-22 08:07:01 +02001929 seq_printf(seq, "%s %08X:%04X %s %s ",
Vince Busam667a5f12008-09-02 15:55:49 +02001930 ip_vs_proto_name(svc->protocol),
1931 ntohl(svc->addr.ip),
1932 ntohs(svc->port),
Nick Chalk26ec0372010-06-22 08:07:01 +02001933 svc->scheduler->name,
1934 (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":"");
Vince Busam667a5f12008-09-02 15:55:49 +02001935 } else {
Nick Chalk26ec0372010-06-22 08:07:01 +02001936 seq_printf(seq, "FWM %08X %s %s",
1937 svc->fwmark, svc->scheduler->name,
1938 (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":"");
Vince Busam667a5f12008-09-02 15:55:49 +02001939 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001940
1941 if (svc->flags & IP_VS_SVC_F_PERSISTENT)
1942 seq_printf(seq, "persistent %d %08X\n",
1943 svc->timeout,
1944 ntohl(svc->netmask));
1945 else
1946 seq_putc(seq, '\n');
1947
1948 list_for_each_entry(dest, &svc->destinations, n_list) {
Vince Busam667a5f12008-09-02 15:55:49 +02001949#ifdef CONFIG_IP_VS_IPV6
1950 if (dest->af == AF_INET6)
1951 seq_printf(seq,
Harvey Harrison5b095d9892008-10-29 12:52:50 -07001952 " -> [%pI6]:%04X"
Vince Busam667a5f12008-09-02 15:55:49 +02001953 " %-7s %-6d %-10d %-10d\n",
Harvey Harrison38ff4fa2008-10-28 16:08:13 -07001954 &dest->addr.in6,
Vince Busam667a5f12008-09-02 15:55:49 +02001955 ntohs(dest->port),
1956 ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
1957 atomic_read(&dest->weight),
1958 atomic_read(&dest->activeconns),
1959 atomic_read(&dest->inactconns));
1960 else
1961#endif
1962 seq_printf(seq,
1963 " -> %08X:%04X "
1964 "%-7s %-6d %-10d %-10d\n",
1965 ntohl(dest->addr.ip),
1966 ntohs(dest->port),
1967 ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
1968 atomic_read(&dest->weight),
1969 atomic_read(&dest->activeconns),
1970 atomic_read(&dest->inactconns));
1971
Linus Torvalds1da177e2005-04-16 15:20:36 -07001972 }
1973 }
1974 return 0;
1975}
1976
Philippe De Muyter56b3d972007-07-10 23:07:31 -07001977static const struct seq_operations ip_vs_info_seq_ops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001978 .start = ip_vs_info_seq_start,
1979 .next = ip_vs_info_seq_next,
1980 .stop = ip_vs_info_seq_stop,
1981 .show = ip_vs_info_seq_show,
1982};
1983
1984static int ip_vs_info_open(struct inode *inode, struct file *file)
1985{
Hans Schillstromfc723252011-01-03 14:44:43 +01001986 return seq_open_net(inode, file, &ip_vs_info_seq_ops,
Pavel Emelyanovcf7732e2007-10-10 02:29:29 -07001987 sizeof(struct ip_vs_iter));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001988}
1989
Arjan van de Ven9a321442007-02-12 00:55:35 -08001990static const struct file_operations ip_vs_info_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001991 .owner = THIS_MODULE,
1992 .open = ip_vs_info_open,
1993 .read = seq_read,
1994 .llseek = seq_lseek,
1995 .release = seq_release_private,
1996};
1997
1998#endif
1999
Linus Torvalds1da177e2005-04-16 15:20:36 -07002000#ifdef CONFIG_PROC_FS
2001static int ip_vs_stats_show(struct seq_file *seq, void *v)
2002{
Hans Schillstromb17fc992011-01-03 14:44:56 +01002003 struct net *net = seq_file_single_net(seq);
Julian Anastasov55a3d4e2011-03-14 01:37:49 +02002004 struct ip_vs_stats_user show;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002005
2006/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
2007 seq_puts(seq,
2008 " Total Incoming Outgoing Incoming Outgoing\n");
2009 seq_printf(seq,
2010 " Conns Packets Packets Bytes Bytes\n");
2011
Julian Anastasov55a3d4e2011-03-14 01:37:49 +02002012 ip_vs_copy_stats(&show, &net_ipvs(net)->tot_stats);
2013 seq_printf(seq, "%8X %8X %8X %16LX %16LX\n\n", show.conns,
2014 show.inpkts, show.outpkts,
2015 (unsigned long long) show.inbytes,
2016 (unsigned long long) show.outbytes);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002017
2018/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
2019 seq_puts(seq,
2020 " Conns/s Pkts/s Pkts/s Bytes/s Bytes/s\n");
Julian Anastasov55a3d4e2011-03-14 01:37:49 +02002021 seq_printf(seq, "%8X %8X %8X %16X %16X\n",
2022 show.cps, show.inpps, show.outpps,
2023 show.inbps, show.outbps);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002024
2025 return 0;
2026}
2027
2028static int ip_vs_stats_seq_open(struct inode *inode, struct file *file)
2029{
Hans Schillstromfc723252011-01-03 14:44:43 +01002030 return single_open_net(inode, file, ip_vs_stats_show);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002031}
2032
Arjan van de Ven9a321442007-02-12 00:55:35 -08002033static const struct file_operations ip_vs_stats_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002034 .owner = THIS_MODULE,
2035 .open = ip_vs_stats_seq_open,
2036 .read = seq_read,
2037 .llseek = seq_lseek,
2038 .release = single_release,
2039};
2040
Hans Schillstromb17fc992011-01-03 14:44:56 +01002041static int ip_vs_stats_percpu_show(struct seq_file *seq, void *v)
2042{
2043 struct net *net = seq_file_single_net(seq);
Julian Anastasov2a0751a2011-03-04 12:20:35 +02002044 struct ip_vs_stats *tot_stats = &net_ipvs(net)->tot_stats;
2045 struct ip_vs_cpu_stats *cpustats = tot_stats->cpustats;
Hans Schillstromb17fc992011-01-03 14:44:56 +01002046 int i;
2047
2048/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
2049 seq_puts(seq,
2050 " Total Incoming Outgoing Incoming Outgoing\n");
2051 seq_printf(seq,
2052 "CPU Conns Packets Packets Bytes Bytes\n");
2053
2054 for_each_possible_cpu(i) {
Julian Anastasov2a0751a2011-03-04 12:20:35 +02002055 struct ip_vs_cpu_stats *u = per_cpu_ptr(cpustats, i);
2056 unsigned int start;
2057 __u64 inbytes, outbytes;
2058
2059 do {
2060 start = u64_stats_fetch_begin_bh(&u->syncp);
2061 inbytes = u->ustats.inbytes;
2062 outbytes = u->ustats.outbytes;
2063 } while (u64_stats_fetch_retry_bh(&u->syncp, start));
2064
Hans Schillstromb17fc992011-01-03 14:44:56 +01002065 seq_printf(seq, "%3X %8X %8X %8X %16LX %16LX\n",
Julian Anastasov2a0751a2011-03-04 12:20:35 +02002066 i, u->ustats.conns, u->ustats.inpkts,
2067 u->ustats.outpkts, (__u64)inbytes,
2068 (__u64)outbytes);
Hans Schillstromb17fc992011-01-03 14:44:56 +01002069 }
2070
2071 spin_lock_bh(&tot_stats->lock);
2072 seq_printf(seq, " ~ %8X %8X %8X %16LX %16LX\n\n",
2073 tot_stats->ustats.conns, tot_stats->ustats.inpkts,
2074 tot_stats->ustats.outpkts,
2075 (unsigned long long) tot_stats->ustats.inbytes,
2076 (unsigned long long) tot_stats->ustats.outbytes);
2077
2078/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
2079 seq_puts(seq,
2080 " Conns/s Pkts/s Pkts/s Bytes/s Bytes/s\n");
2081 seq_printf(seq, " %8X %8X %8X %16X %16X\n",
2082 tot_stats->ustats.cps,
2083 tot_stats->ustats.inpps,
2084 tot_stats->ustats.outpps,
2085 tot_stats->ustats.inbps,
2086 tot_stats->ustats.outbps);
2087 spin_unlock_bh(&tot_stats->lock);
2088
2089 return 0;
2090}
2091
2092static int ip_vs_stats_percpu_seq_open(struct inode *inode, struct file *file)
2093{
2094 return single_open_net(inode, file, ip_vs_stats_percpu_show);
2095}
2096
2097static const struct file_operations ip_vs_stats_percpu_fops = {
2098 .owner = THIS_MODULE,
2099 .open = ip_vs_stats_percpu_seq_open,
2100 .read = seq_read,
2101 .llseek = seq_lseek,
2102 .release = single_release,
2103};
Linus Torvalds1da177e2005-04-16 15:20:36 -07002104#endif
2105
2106/*
2107 * Set timeout values for tcp tcpfin udp in the timeout_table.
2108 */
Hans Schillstrom93304192011-01-03 14:44:51 +01002109static int ip_vs_set_timeout(struct net *net, struct ip_vs_timeout_user *u)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002110{
Changli Gao091bb342011-01-21 18:02:13 +08002111#if defined(CONFIG_IP_VS_PROTO_TCP) || defined(CONFIG_IP_VS_PROTO_UDP)
Hans Schillstrom93304192011-01-03 14:44:51 +01002112 struct ip_vs_proto_data *pd;
Changli Gao091bb342011-01-21 18:02:13 +08002113#endif
Hans Schillstrom93304192011-01-03 14:44:51 +01002114
Linus Torvalds1da177e2005-04-16 15:20:36 -07002115 IP_VS_DBG(2, "Setting timeout tcp:%d tcpfin:%d udp:%d\n",
2116 u->tcp_timeout,
2117 u->tcp_fin_timeout,
2118 u->udp_timeout);
2119
2120#ifdef CONFIG_IP_VS_PROTO_TCP
2121 if (u->tcp_timeout) {
Hans Schillstrom93304192011-01-03 14:44:51 +01002122 pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
2123 pd->timeout_table[IP_VS_TCP_S_ESTABLISHED]
Linus Torvalds1da177e2005-04-16 15:20:36 -07002124 = u->tcp_timeout * HZ;
2125 }
2126
2127 if (u->tcp_fin_timeout) {
Hans Schillstrom93304192011-01-03 14:44:51 +01002128 pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
2129 pd->timeout_table[IP_VS_TCP_S_FIN_WAIT]
Linus Torvalds1da177e2005-04-16 15:20:36 -07002130 = u->tcp_fin_timeout * HZ;
2131 }
2132#endif
2133
2134#ifdef CONFIG_IP_VS_PROTO_UDP
2135 if (u->udp_timeout) {
Hans Schillstrom93304192011-01-03 14:44:51 +01002136 pd = ip_vs_proto_data_get(net, IPPROTO_UDP);
2137 pd->timeout_table[IP_VS_UDP_S_NORMAL]
Linus Torvalds1da177e2005-04-16 15:20:36 -07002138 = u->udp_timeout * HZ;
2139 }
2140#endif
2141 return 0;
2142}
2143
2144
2145#define SET_CMDID(cmd) (cmd - IP_VS_BASE_CTL)
2146#define SERVICE_ARG_LEN (sizeof(struct ip_vs_service_user))
2147#define SVCDEST_ARG_LEN (sizeof(struct ip_vs_service_user) + \
2148 sizeof(struct ip_vs_dest_user))
2149#define TIMEOUT_ARG_LEN (sizeof(struct ip_vs_timeout_user))
2150#define DAEMON_ARG_LEN (sizeof(struct ip_vs_daemon_user))
2151#define MAX_ARG_LEN SVCDEST_ARG_LEN
2152
Arjan van de Ven9b5b5cf2005-11-29 16:21:38 -08002153static const unsigned char set_arglen[SET_CMDID(IP_VS_SO_SET_MAX)+1] = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002154 [SET_CMDID(IP_VS_SO_SET_ADD)] = SERVICE_ARG_LEN,
2155 [SET_CMDID(IP_VS_SO_SET_EDIT)] = SERVICE_ARG_LEN,
2156 [SET_CMDID(IP_VS_SO_SET_DEL)] = SERVICE_ARG_LEN,
2157 [SET_CMDID(IP_VS_SO_SET_FLUSH)] = 0,
2158 [SET_CMDID(IP_VS_SO_SET_ADDDEST)] = SVCDEST_ARG_LEN,
2159 [SET_CMDID(IP_VS_SO_SET_DELDEST)] = SVCDEST_ARG_LEN,
2160 [SET_CMDID(IP_VS_SO_SET_EDITDEST)] = SVCDEST_ARG_LEN,
2161 [SET_CMDID(IP_VS_SO_SET_TIMEOUT)] = TIMEOUT_ARG_LEN,
2162 [SET_CMDID(IP_VS_SO_SET_STARTDAEMON)] = DAEMON_ARG_LEN,
2163 [SET_CMDID(IP_VS_SO_SET_STOPDAEMON)] = DAEMON_ARG_LEN,
2164 [SET_CMDID(IP_VS_SO_SET_ZERO)] = SERVICE_ARG_LEN,
2165};
2166
Julius Volzc860c6b2008-09-02 15:55:36 +02002167static void ip_vs_copy_usvc_compat(struct ip_vs_service_user_kern *usvc,
2168 struct ip_vs_service_user *usvc_compat)
2169{
Simon Horman0d1e71b2010-08-22 21:37:54 +09002170 memset(usvc, 0, sizeof(*usvc));
2171
Julius Volzc860c6b2008-09-02 15:55:36 +02002172 usvc->af = AF_INET;
2173 usvc->protocol = usvc_compat->protocol;
2174 usvc->addr.ip = usvc_compat->addr;
2175 usvc->port = usvc_compat->port;
2176 usvc->fwmark = usvc_compat->fwmark;
2177
2178 /* Deep copy of sched_name is not needed here */
2179 usvc->sched_name = usvc_compat->sched_name;
2180
2181 usvc->flags = usvc_compat->flags;
2182 usvc->timeout = usvc_compat->timeout;
2183 usvc->netmask = usvc_compat->netmask;
2184}
2185
2186static void ip_vs_copy_udest_compat(struct ip_vs_dest_user_kern *udest,
2187 struct ip_vs_dest_user *udest_compat)
2188{
Simon Horman0d1e71b2010-08-22 21:37:54 +09002189 memset(udest, 0, sizeof(*udest));
2190
Julius Volzc860c6b2008-09-02 15:55:36 +02002191 udest->addr.ip = udest_compat->addr;
2192 udest->port = udest_compat->port;
2193 udest->conn_flags = udest_compat->conn_flags;
2194 udest->weight = udest_compat->weight;
2195 udest->u_threshold = udest_compat->u_threshold;
2196 udest->l_threshold = udest_compat->l_threshold;
2197}
2198
Linus Torvalds1da177e2005-04-16 15:20:36 -07002199static int
2200do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
2201{
Hans Schillstromfc723252011-01-03 14:44:43 +01002202 struct net *net = sock_net(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002203 int ret;
2204 unsigned char arg[MAX_ARG_LEN];
Julius Volzc860c6b2008-09-02 15:55:36 +02002205 struct ip_vs_service_user *usvc_compat;
2206 struct ip_vs_service_user_kern usvc;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002207 struct ip_vs_service *svc;
Julius Volzc860c6b2008-09-02 15:55:36 +02002208 struct ip_vs_dest_user *udest_compat;
2209 struct ip_vs_dest_user_kern udest;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002210
2211 if (!capable(CAP_NET_ADMIN))
2212 return -EPERM;
2213
Arjan van de Ven04bcef22010-01-04 16:37:12 +01002214 if (cmd < IP_VS_BASE_CTL || cmd > IP_VS_SO_SET_MAX)
2215 return -EINVAL;
2216 if (len < 0 || len > MAX_ARG_LEN)
2217 return -EINVAL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002218 if (len != set_arglen[SET_CMDID(cmd)]) {
Hannes Eder1e3e2382009-08-02 11:05:41 +00002219 pr_err("set_ctl: len %u != %u\n",
2220 len, set_arglen[SET_CMDID(cmd)]);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002221 return -EINVAL;
2222 }
2223
2224 if (copy_from_user(arg, user, len) != 0)
2225 return -EFAULT;
2226
2227 /* increase the module use count */
2228 ip_vs_use_count_inc();
2229
Ingo Molnar14cc3e22006-03-26 01:37:14 -08002230 if (mutex_lock_interruptible(&__ip_vs_mutex)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002231 ret = -ERESTARTSYS;
2232 goto out_dec;
2233 }
2234
2235 if (cmd == IP_VS_SO_SET_FLUSH) {
2236 /* Flush the virtual service */
Hans Schillstromfc723252011-01-03 14:44:43 +01002237 ret = ip_vs_flush(net);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002238 goto out_unlock;
2239 } else if (cmd == IP_VS_SO_SET_TIMEOUT) {
2240 /* Set timeout values for (tcp tcpfin udp) */
Hans Schillstrom93304192011-01-03 14:44:51 +01002241 ret = ip_vs_set_timeout(net, (struct ip_vs_timeout_user *)arg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002242 goto out_unlock;
2243 } else if (cmd == IP_VS_SO_SET_STARTDAEMON) {
2244 struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
Hans Schillstromf1313152011-01-03 14:44:55 +01002245 ret = start_sync_thread(net, dm->state, dm->mcast_ifn,
2246 dm->syncid);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002247 goto out_unlock;
2248 } else if (cmd == IP_VS_SO_SET_STOPDAEMON) {
2249 struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
Hans Schillstromf1313152011-01-03 14:44:55 +01002250 ret = stop_sync_thread(net, dm->state);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002251 goto out_unlock;
2252 }
2253
Julius Volzc860c6b2008-09-02 15:55:36 +02002254 usvc_compat = (struct ip_vs_service_user *)arg;
2255 udest_compat = (struct ip_vs_dest_user *)(usvc_compat + 1);
2256
2257 /* We only use the new structs internally, so copy userspace compat
2258 * structs to extended internal versions */
2259 ip_vs_copy_usvc_compat(&usvc, usvc_compat);
2260 ip_vs_copy_udest_compat(&udest, udest_compat);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002261
2262 if (cmd == IP_VS_SO_SET_ZERO) {
2263 /* if no service address is set, zero counters in all */
Julius Volzc860c6b2008-09-02 15:55:36 +02002264 if (!usvc.fwmark && !usvc.addr.ip && !usvc.port) {
Hans Schillstromfc723252011-01-03 14:44:43 +01002265 ret = ip_vs_zero_all(net);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002266 goto out_unlock;
2267 }
2268 }
2269
Venkata Mohan Reddy2906f662010-02-18 12:31:05 +01002270 /* Check for valid protocol: TCP or UDP or SCTP, even for fwmark!=0 */
2271 if (usvc.protocol != IPPROTO_TCP && usvc.protocol != IPPROTO_UDP &&
2272 usvc.protocol != IPPROTO_SCTP) {
Hannes Eder1e3e2382009-08-02 11:05:41 +00002273 pr_err("set_ctl: invalid protocol: %d %pI4:%d %s\n",
2274 usvc.protocol, &usvc.addr.ip,
2275 ntohs(usvc.port), usvc.sched_name);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002276 ret = -EFAULT;
2277 goto out_unlock;
2278 }
2279
2280 /* Lookup the exact service by <protocol, addr, port> or fwmark */
Julius Volzc860c6b2008-09-02 15:55:36 +02002281 if (usvc.fwmark == 0)
Hans Schillstromfc723252011-01-03 14:44:43 +01002282 svc = __ip_vs_service_find(net, usvc.af, usvc.protocol,
Julian Anastasov26c15cf2010-09-21 18:12:30 +02002283 &usvc.addr, usvc.port);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002284 else
Hans Schillstromfc723252011-01-03 14:44:43 +01002285 svc = __ip_vs_svc_fwm_find(net, usvc.af, usvc.fwmark);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002286
2287 if (cmd != IP_VS_SO_SET_ADD
Julius Volzc860c6b2008-09-02 15:55:36 +02002288 && (svc == NULL || svc->protocol != usvc.protocol)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002289 ret = -ESRCH;
Julian Anastasov26c15cf2010-09-21 18:12:30 +02002290 goto out_unlock;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002291 }
2292
2293 switch (cmd) {
2294 case IP_VS_SO_SET_ADD:
2295 if (svc != NULL)
2296 ret = -EEXIST;
2297 else
Hans Schillstromfc723252011-01-03 14:44:43 +01002298 ret = ip_vs_add_service(net, &usvc, &svc);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002299 break;
2300 case IP_VS_SO_SET_EDIT:
Julius Volzc860c6b2008-09-02 15:55:36 +02002301 ret = ip_vs_edit_service(svc, &usvc);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002302 break;
2303 case IP_VS_SO_SET_DEL:
2304 ret = ip_vs_del_service(svc);
2305 if (!ret)
2306 goto out_unlock;
2307 break;
2308 case IP_VS_SO_SET_ZERO:
2309 ret = ip_vs_zero_service(svc);
2310 break;
2311 case IP_VS_SO_SET_ADDDEST:
Julius Volzc860c6b2008-09-02 15:55:36 +02002312 ret = ip_vs_add_dest(svc, &udest);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002313 break;
2314 case IP_VS_SO_SET_EDITDEST:
Julius Volzc860c6b2008-09-02 15:55:36 +02002315 ret = ip_vs_edit_dest(svc, &udest);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002316 break;
2317 case IP_VS_SO_SET_DELDEST:
Julius Volzc860c6b2008-09-02 15:55:36 +02002318 ret = ip_vs_del_dest(svc, &udest);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002319 break;
2320 default:
2321 ret = -EINVAL;
2322 }
2323
Linus Torvalds1da177e2005-04-16 15:20:36 -07002324 out_unlock:
Ingo Molnar14cc3e22006-03-26 01:37:14 -08002325 mutex_unlock(&__ip_vs_mutex);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002326 out_dec:
2327 /* decrease the module use count */
2328 ip_vs_use_count_dec();
2329
2330 return ret;
2331}
2332
2333
2334static void
Linus Torvalds1da177e2005-04-16 15:20:36 -07002335ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src)
2336{
2337 dst->protocol = src->protocol;
Julius Volze7ade462008-09-02 15:55:33 +02002338 dst->addr = src->addr.ip;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002339 dst->port = src->port;
2340 dst->fwmark = src->fwmark;
pageexec4da62fc2005-06-26 16:00:19 -07002341 strlcpy(dst->sched_name, src->scheduler->name, sizeof(dst->sched_name));
Linus Torvalds1da177e2005-04-16 15:20:36 -07002342 dst->flags = src->flags;
2343 dst->timeout = src->timeout / HZ;
2344 dst->netmask = src->netmask;
2345 dst->num_dests = src->num_dests;
2346 ip_vs_copy_stats(&dst->stats, &src->stats);
2347}
2348
2349static inline int
Hans Schillstromfc723252011-01-03 14:44:43 +01002350__ip_vs_get_service_entries(struct net *net,
2351 const struct ip_vs_get_services *get,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002352 struct ip_vs_get_services __user *uptr)
2353{
2354 int idx, count=0;
2355 struct ip_vs_service *svc;
2356 struct ip_vs_service_entry entry;
2357 int ret = 0;
2358
2359 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2360 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
Julius Volzf94fd042008-09-02 15:55:55 +02002361 /* Only expose IPv4 entries to old interface */
Hans Schillstromfc723252011-01-03 14:44:43 +01002362 if (svc->af != AF_INET || !net_eq(svc->net, net))
Julius Volzf94fd042008-09-02 15:55:55 +02002363 continue;
2364
Linus Torvalds1da177e2005-04-16 15:20:36 -07002365 if (count >= get->num_services)
2366 goto out;
pageexec4da62fc2005-06-26 16:00:19 -07002367 memset(&entry, 0, sizeof(entry));
Linus Torvalds1da177e2005-04-16 15:20:36 -07002368 ip_vs_copy_service(&entry, svc);
2369 if (copy_to_user(&uptr->entrytable[count],
2370 &entry, sizeof(entry))) {
2371 ret = -EFAULT;
2372 goto out;
2373 }
2374 count++;
2375 }
2376 }
2377
2378 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2379 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
Julius Volzf94fd042008-09-02 15:55:55 +02002380 /* Only expose IPv4 entries to old interface */
Hans Schillstromfc723252011-01-03 14:44:43 +01002381 if (svc->af != AF_INET || !net_eq(svc->net, net))
Julius Volzf94fd042008-09-02 15:55:55 +02002382 continue;
2383
Linus Torvalds1da177e2005-04-16 15:20:36 -07002384 if (count >= get->num_services)
2385 goto out;
pageexec4da62fc2005-06-26 16:00:19 -07002386 memset(&entry, 0, sizeof(entry));
Linus Torvalds1da177e2005-04-16 15:20:36 -07002387 ip_vs_copy_service(&entry, svc);
2388 if (copy_to_user(&uptr->entrytable[count],
2389 &entry, sizeof(entry))) {
2390 ret = -EFAULT;
2391 goto out;
2392 }
2393 count++;
2394 }
2395 }
2396 out:
2397 return ret;
2398}
2399
2400static inline int
Hans Schillstromfc723252011-01-03 14:44:43 +01002401__ip_vs_get_dest_entries(struct net *net, const struct ip_vs_get_dests *get,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002402 struct ip_vs_get_dests __user *uptr)
2403{
2404 struct ip_vs_service *svc;
Julius Volzb18610d2008-09-02 15:55:37 +02002405 union nf_inet_addr addr = { .ip = get->addr };
Linus Torvalds1da177e2005-04-16 15:20:36 -07002406 int ret = 0;
2407
2408 if (get->fwmark)
Hans Schillstromfc723252011-01-03 14:44:43 +01002409 svc = __ip_vs_svc_fwm_find(net, AF_INET, get->fwmark);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002410 else
Hans Schillstromfc723252011-01-03 14:44:43 +01002411 svc = __ip_vs_service_find(net, AF_INET, get->protocol, &addr,
Julian Anastasov26c15cf2010-09-21 18:12:30 +02002412 get->port);
Julius Volzb18610d2008-09-02 15:55:37 +02002413
Linus Torvalds1da177e2005-04-16 15:20:36 -07002414 if (svc) {
2415 int count = 0;
2416 struct ip_vs_dest *dest;
2417 struct ip_vs_dest_entry entry;
2418
2419 list_for_each_entry(dest, &svc->destinations, n_list) {
2420 if (count >= get->num_dests)
2421 break;
2422
Julius Volze7ade462008-09-02 15:55:33 +02002423 entry.addr = dest->addr.ip;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002424 entry.port = dest->port;
2425 entry.conn_flags = atomic_read(&dest->conn_flags);
2426 entry.weight = atomic_read(&dest->weight);
2427 entry.u_threshold = dest->u_threshold;
2428 entry.l_threshold = dest->l_threshold;
2429 entry.activeconns = atomic_read(&dest->activeconns);
2430 entry.inactconns = atomic_read(&dest->inactconns);
2431 entry.persistconns = atomic_read(&dest->persistconns);
2432 ip_vs_copy_stats(&entry.stats, &dest->stats);
2433 if (copy_to_user(&uptr->entrytable[count],
2434 &entry, sizeof(entry))) {
2435 ret = -EFAULT;
2436 break;
2437 }
2438 count++;
2439 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002440 } else
2441 ret = -ESRCH;
2442 return ret;
2443}
2444
2445static inline void
Hans Schillstrom93304192011-01-03 14:44:51 +01002446__ip_vs_get_timeouts(struct net *net, struct ip_vs_timeout_user *u)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002447{
Changli Gao091bb342011-01-21 18:02:13 +08002448#if defined(CONFIG_IP_VS_PROTO_TCP) || defined(CONFIG_IP_VS_PROTO_UDP)
Hans Schillstrom93304192011-01-03 14:44:51 +01002449 struct ip_vs_proto_data *pd;
Changli Gao091bb342011-01-21 18:02:13 +08002450#endif
Hans Schillstrom93304192011-01-03 14:44:51 +01002451
Linus Torvalds1da177e2005-04-16 15:20:36 -07002452#ifdef CONFIG_IP_VS_PROTO_TCP
Hans Schillstrom93304192011-01-03 14:44:51 +01002453 pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
2454 u->tcp_timeout = pd->timeout_table[IP_VS_TCP_S_ESTABLISHED] / HZ;
2455 u->tcp_fin_timeout = pd->timeout_table[IP_VS_TCP_S_FIN_WAIT] / HZ;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002456#endif
2457#ifdef CONFIG_IP_VS_PROTO_UDP
Hans Schillstrom93304192011-01-03 14:44:51 +01002458 pd = ip_vs_proto_data_get(net, IPPROTO_UDP);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002459 u->udp_timeout =
Hans Schillstrom93304192011-01-03 14:44:51 +01002460 pd->timeout_table[IP_VS_UDP_S_NORMAL] / HZ;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002461#endif
2462}
2463
2464
2465#define GET_CMDID(cmd) (cmd - IP_VS_BASE_CTL)
2466#define GET_INFO_ARG_LEN (sizeof(struct ip_vs_getinfo))
2467#define GET_SERVICES_ARG_LEN (sizeof(struct ip_vs_get_services))
2468#define GET_SERVICE_ARG_LEN (sizeof(struct ip_vs_service_entry))
2469#define GET_DESTS_ARG_LEN (sizeof(struct ip_vs_get_dests))
2470#define GET_TIMEOUT_ARG_LEN (sizeof(struct ip_vs_timeout_user))
2471#define GET_DAEMON_ARG_LEN (sizeof(struct ip_vs_daemon_user) * 2)
2472
Arjan van de Ven9b5b5cf2005-11-29 16:21:38 -08002473static const unsigned char get_arglen[GET_CMDID(IP_VS_SO_GET_MAX)+1] = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002474 [GET_CMDID(IP_VS_SO_GET_VERSION)] = 64,
2475 [GET_CMDID(IP_VS_SO_GET_INFO)] = GET_INFO_ARG_LEN,
2476 [GET_CMDID(IP_VS_SO_GET_SERVICES)] = GET_SERVICES_ARG_LEN,
2477 [GET_CMDID(IP_VS_SO_GET_SERVICE)] = GET_SERVICE_ARG_LEN,
2478 [GET_CMDID(IP_VS_SO_GET_DESTS)] = GET_DESTS_ARG_LEN,
2479 [GET_CMDID(IP_VS_SO_GET_TIMEOUT)] = GET_TIMEOUT_ARG_LEN,
2480 [GET_CMDID(IP_VS_SO_GET_DAEMON)] = GET_DAEMON_ARG_LEN,
2481};
2482
2483static int
2484do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
2485{
2486 unsigned char arg[128];
2487 int ret = 0;
Arjan van de Ven04bcef22010-01-04 16:37:12 +01002488 unsigned int copylen;
Hans Schillstromfc723252011-01-03 14:44:43 +01002489 struct net *net = sock_net(sk);
Hans Schillstromf1313152011-01-03 14:44:55 +01002490 struct netns_ipvs *ipvs = net_ipvs(net);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002491
Hans Schillstromfc723252011-01-03 14:44:43 +01002492 BUG_ON(!net);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002493 if (!capable(CAP_NET_ADMIN))
2494 return -EPERM;
2495
Arjan van de Ven04bcef22010-01-04 16:37:12 +01002496 if (cmd < IP_VS_BASE_CTL || cmd > IP_VS_SO_GET_MAX)
2497 return -EINVAL;
2498
Linus Torvalds1da177e2005-04-16 15:20:36 -07002499 if (*len < get_arglen[GET_CMDID(cmd)]) {
Hannes Eder1e3e2382009-08-02 11:05:41 +00002500 pr_err("get_ctl: len %u < %u\n",
2501 *len, get_arglen[GET_CMDID(cmd)]);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002502 return -EINVAL;
2503 }
2504
Arjan van de Ven04bcef22010-01-04 16:37:12 +01002505 copylen = get_arglen[GET_CMDID(cmd)];
2506 if (copylen > 128)
2507 return -EINVAL;
2508
2509 if (copy_from_user(arg, user, copylen) != 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002510 return -EFAULT;
2511
Ingo Molnar14cc3e22006-03-26 01:37:14 -08002512 if (mutex_lock_interruptible(&__ip_vs_mutex))
Linus Torvalds1da177e2005-04-16 15:20:36 -07002513 return -ERESTARTSYS;
2514
2515 switch (cmd) {
2516 case IP_VS_SO_GET_VERSION:
2517 {
2518 char buf[64];
2519
2520 sprintf(buf, "IP Virtual Server version %d.%d.%d (size=%d)",
Catalin(ux) M. BOIE6f7edb42010-01-05 05:50:24 +01002521 NVERSION(IP_VS_VERSION_CODE), ip_vs_conn_tab_size);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002522 if (copy_to_user(user, buf, strlen(buf)+1) != 0) {
2523 ret = -EFAULT;
2524 goto out;
2525 }
2526 *len = strlen(buf)+1;
2527 }
2528 break;
2529
2530 case IP_VS_SO_GET_INFO:
2531 {
2532 struct ip_vs_getinfo info;
2533 info.version = IP_VS_VERSION_CODE;
Catalin(ux) M. BOIE6f7edb42010-01-05 05:50:24 +01002534 info.size = ip_vs_conn_tab_size;
Hans Schillstroma0840e22011-01-03 14:44:58 +01002535 info.num_services = ipvs->num_services;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002536 if (copy_to_user(user, &info, sizeof(info)) != 0)
2537 ret = -EFAULT;
2538 }
2539 break;
2540
2541 case IP_VS_SO_GET_SERVICES:
2542 {
2543 struct ip_vs_get_services *get;
2544 int size;
2545
2546 get = (struct ip_vs_get_services *)arg;
2547 size = sizeof(*get) +
2548 sizeof(struct ip_vs_service_entry) * get->num_services;
2549 if (*len != size) {
Hannes Eder1e3e2382009-08-02 11:05:41 +00002550 pr_err("length: %u != %u\n", *len, size);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002551 ret = -EINVAL;
2552 goto out;
2553 }
Hans Schillstromfc723252011-01-03 14:44:43 +01002554 ret = __ip_vs_get_service_entries(net, get, user);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002555 }
2556 break;
2557
2558 case IP_VS_SO_GET_SERVICE:
2559 {
2560 struct ip_vs_service_entry *entry;
2561 struct ip_vs_service *svc;
Julius Volzb18610d2008-09-02 15:55:37 +02002562 union nf_inet_addr addr;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002563
2564 entry = (struct ip_vs_service_entry *)arg;
Julius Volzb18610d2008-09-02 15:55:37 +02002565 addr.ip = entry->addr;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002566 if (entry->fwmark)
Hans Schillstromfc723252011-01-03 14:44:43 +01002567 svc = __ip_vs_svc_fwm_find(net, AF_INET, entry->fwmark);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002568 else
Hans Schillstromfc723252011-01-03 14:44:43 +01002569 svc = __ip_vs_service_find(net, AF_INET,
2570 entry->protocol, &addr,
2571 entry->port);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002572 if (svc) {
2573 ip_vs_copy_service(entry, svc);
2574 if (copy_to_user(user, entry, sizeof(*entry)) != 0)
2575 ret = -EFAULT;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002576 } else
2577 ret = -ESRCH;
2578 }
2579 break;
2580
2581 case IP_VS_SO_GET_DESTS:
2582 {
2583 struct ip_vs_get_dests *get;
2584 int size;
2585
2586 get = (struct ip_vs_get_dests *)arg;
2587 size = sizeof(*get) +
2588 sizeof(struct ip_vs_dest_entry) * get->num_dests;
2589 if (*len != size) {
Hannes Eder1e3e2382009-08-02 11:05:41 +00002590 pr_err("length: %u != %u\n", *len, size);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002591 ret = -EINVAL;
2592 goto out;
2593 }
Hans Schillstromfc723252011-01-03 14:44:43 +01002594 ret = __ip_vs_get_dest_entries(net, get, user);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002595 }
2596 break;
2597
2598 case IP_VS_SO_GET_TIMEOUT:
2599 {
2600 struct ip_vs_timeout_user t;
2601
Hans Schillstrom93304192011-01-03 14:44:51 +01002602 __ip_vs_get_timeouts(net, &t);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002603 if (copy_to_user(user, &t, sizeof(t)) != 0)
2604 ret = -EFAULT;
2605 }
2606 break;
2607
2608 case IP_VS_SO_GET_DAEMON:
2609 {
2610 struct ip_vs_daemon_user d[2];
2611
2612 memset(&d, 0, sizeof(d));
Hans Schillstromf1313152011-01-03 14:44:55 +01002613 if (ipvs->sync_state & IP_VS_STATE_MASTER) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002614 d[0].state = IP_VS_STATE_MASTER;
Hans Schillstromf1313152011-01-03 14:44:55 +01002615 strlcpy(d[0].mcast_ifn, ipvs->master_mcast_ifn,
2616 sizeof(d[0].mcast_ifn));
2617 d[0].syncid = ipvs->master_syncid;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002618 }
Hans Schillstromf1313152011-01-03 14:44:55 +01002619 if (ipvs->sync_state & IP_VS_STATE_BACKUP) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002620 d[1].state = IP_VS_STATE_BACKUP;
Hans Schillstromf1313152011-01-03 14:44:55 +01002621 strlcpy(d[1].mcast_ifn, ipvs->backup_mcast_ifn,
2622 sizeof(d[1].mcast_ifn));
2623 d[1].syncid = ipvs->backup_syncid;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002624 }
2625 if (copy_to_user(user, &d, sizeof(d)) != 0)
2626 ret = -EFAULT;
2627 }
2628 break;
2629
2630 default:
2631 ret = -EINVAL;
2632 }
2633
2634 out:
Ingo Molnar14cc3e22006-03-26 01:37:14 -08002635 mutex_unlock(&__ip_vs_mutex);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002636 return ret;
2637}
2638
2639
2640static struct nf_sockopt_ops ip_vs_sockopts = {
2641 .pf = PF_INET,
2642 .set_optmin = IP_VS_BASE_CTL,
2643 .set_optmax = IP_VS_SO_SET_MAX+1,
2644 .set = do_ip_vs_set_ctl,
2645 .get_optmin = IP_VS_BASE_CTL,
2646 .get_optmax = IP_VS_SO_GET_MAX+1,
2647 .get = do_ip_vs_get_ctl,
Neil Horman16fcec32007-09-11 11:28:26 +02002648 .owner = THIS_MODULE,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002649};
2650
Julius Volz9a812192008-08-14 14:08:44 +02002651/*
2652 * Generic Netlink interface
2653 */
2654
2655/* IPVS genetlink family */
2656static struct genl_family ip_vs_genl_family = {
2657 .id = GENL_ID_GENERATE,
2658 .hdrsize = 0,
2659 .name = IPVS_GENL_NAME,
2660 .version = IPVS_GENL_VERSION,
2661 .maxattr = IPVS_CMD_MAX,
Hans Schillstromc6d2d442011-01-03 14:45:03 +01002662 .netnsok = true, /* Make ipvsadm to work on netns */
Julius Volz9a812192008-08-14 14:08:44 +02002663};
2664
2665/* Policy used for first-level command attributes */
2666static const struct nla_policy ip_vs_cmd_policy[IPVS_CMD_ATTR_MAX + 1] = {
2667 [IPVS_CMD_ATTR_SERVICE] = { .type = NLA_NESTED },
2668 [IPVS_CMD_ATTR_DEST] = { .type = NLA_NESTED },
2669 [IPVS_CMD_ATTR_DAEMON] = { .type = NLA_NESTED },
2670 [IPVS_CMD_ATTR_TIMEOUT_TCP] = { .type = NLA_U32 },
2671 [IPVS_CMD_ATTR_TIMEOUT_TCP_FIN] = { .type = NLA_U32 },
2672 [IPVS_CMD_ATTR_TIMEOUT_UDP] = { .type = NLA_U32 },
2673};
2674
2675/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DAEMON */
2676static const struct nla_policy ip_vs_daemon_policy[IPVS_DAEMON_ATTR_MAX + 1] = {
2677 [IPVS_DAEMON_ATTR_STATE] = { .type = NLA_U32 },
2678 [IPVS_DAEMON_ATTR_MCAST_IFN] = { .type = NLA_NUL_STRING,
2679 .len = IP_VS_IFNAME_MAXLEN },
2680 [IPVS_DAEMON_ATTR_SYNC_ID] = { .type = NLA_U32 },
2681};
2682
2683/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_SERVICE */
2684static const struct nla_policy ip_vs_svc_policy[IPVS_SVC_ATTR_MAX + 1] = {
2685 [IPVS_SVC_ATTR_AF] = { .type = NLA_U16 },
2686 [IPVS_SVC_ATTR_PROTOCOL] = { .type = NLA_U16 },
2687 [IPVS_SVC_ATTR_ADDR] = { .type = NLA_BINARY,
2688 .len = sizeof(union nf_inet_addr) },
2689 [IPVS_SVC_ATTR_PORT] = { .type = NLA_U16 },
2690 [IPVS_SVC_ATTR_FWMARK] = { .type = NLA_U32 },
2691 [IPVS_SVC_ATTR_SCHED_NAME] = { .type = NLA_NUL_STRING,
2692 .len = IP_VS_SCHEDNAME_MAXLEN },
Simon Horman0d1e71b2010-08-22 21:37:54 +09002693 [IPVS_SVC_ATTR_PE_NAME] = { .type = NLA_NUL_STRING,
2694 .len = IP_VS_PENAME_MAXLEN },
Julius Volz9a812192008-08-14 14:08:44 +02002695 [IPVS_SVC_ATTR_FLAGS] = { .type = NLA_BINARY,
2696 .len = sizeof(struct ip_vs_flags) },
2697 [IPVS_SVC_ATTR_TIMEOUT] = { .type = NLA_U32 },
2698 [IPVS_SVC_ATTR_NETMASK] = { .type = NLA_U32 },
2699 [IPVS_SVC_ATTR_STATS] = { .type = NLA_NESTED },
2700};
2701
2702/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DEST */
2703static const struct nla_policy ip_vs_dest_policy[IPVS_DEST_ATTR_MAX + 1] = {
2704 [IPVS_DEST_ATTR_ADDR] = { .type = NLA_BINARY,
2705 .len = sizeof(union nf_inet_addr) },
2706 [IPVS_DEST_ATTR_PORT] = { .type = NLA_U16 },
2707 [IPVS_DEST_ATTR_FWD_METHOD] = { .type = NLA_U32 },
2708 [IPVS_DEST_ATTR_WEIGHT] = { .type = NLA_U32 },
2709 [IPVS_DEST_ATTR_U_THRESH] = { .type = NLA_U32 },
2710 [IPVS_DEST_ATTR_L_THRESH] = { .type = NLA_U32 },
2711 [IPVS_DEST_ATTR_ACTIVE_CONNS] = { .type = NLA_U32 },
2712 [IPVS_DEST_ATTR_INACT_CONNS] = { .type = NLA_U32 },
2713 [IPVS_DEST_ATTR_PERSIST_CONNS] = { .type = NLA_U32 },
2714 [IPVS_DEST_ATTR_STATS] = { .type = NLA_NESTED },
2715};
2716
2717static int ip_vs_genl_fill_stats(struct sk_buff *skb, int container_type,
2718 struct ip_vs_stats *stats)
2719{
Julian Anastasov55a3d4e2011-03-14 01:37:49 +02002720 struct ip_vs_stats_user ustats;
Julius Volz9a812192008-08-14 14:08:44 +02002721 struct nlattr *nl_stats = nla_nest_start(skb, container_type);
2722 if (!nl_stats)
2723 return -EMSGSIZE;
2724
Julian Anastasov55a3d4e2011-03-14 01:37:49 +02002725 ip_vs_copy_stats(&ustats, stats);
Julius Volz9a812192008-08-14 14:08:44 +02002726
Julian Anastasov55a3d4e2011-03-14 01:37:49 +02002727 NLA_PUT_U32(skb, IPVS_STATS_ATTR_CONNS, ustats.conns);
2728 NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPKTS, ustats.inpkts);
2729 NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPKTS, ustats.outpkts);
2730 NLA_PUT_U64(skb, IPVS_STATS_ATTR_INBYTES, ustats.inbytes);
2731 NLA_PUT_U64(skb, IPVS_STATS_ATTR_OUTBYTES, ustats.outbytes);
2732 NLA_PUT_U32(skb, IPVS_STATS_ATTR_CPS, ustats.cps);
2733 NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPPS, ustats.inpps);
2734 NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPPS, ustats.outpps);
2735 NLA_PUT_U32(skb, IPVS_STATS_ATTR_INBPS, ustats.inbps);
2736 NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTBPS, ustats.outbps);
Julius Volz9a812192008-08-14 14:08:44 +02002737
2738 nla_nest_end(skb, nl_stats);
2739
2740 return 0;
2741
2742nla_put_failure:
Julius Volz9a812192008-08-14 14:08:44 +02002743 nla_nest_cancel(skb, nl_stats);
2744 return -EMSGSIZE;
2745}
2746
2747static int ip_vs_genl_fill_service(struct sk_buff *skb,
2748 struct ip_vs_service *svc)
2749{
2750 struct nlattr *nl_service;
2751 struct ip_vs_flags flags = { .flags = svc->flags,
2752 .mask = ~0 };
2753
2754 nl_service = nla_nest_start(skb, IPVS_CMD_ATTR_SERVICE);
2755 if (!nl_service)
2756 return -EMSGSIZE;
2757
Julius Volzf94fd042008-09-02 15:55:55 +02002758 NLA_PUT_U16(skb, IPVS_SVC_ATTR_AF, svc->af);
Julius Volz9a812192008-08-14 14:08:44 +02002759
2760 if (svc->fwmark) {
2761 NLA_PUT_U32(skb, IPVS_SVC_ATTR_FWMARK, svc->fwmark);
2762 } else {
2763 NLA_PUT_U16(skb, IPVS_SVC_ATTR_PROTOCOL, svc->protocol);
2764 NLA_PUT(skb, IPVS_SVC_ATTR_ADDR, sizeof(svc->addr), &svc->addr);
2765 NLA_PUT_U16(skb, IPVS_SVC_ATTR_PORT, svc->port);
2766 }
2767
2768 NLA_PUT_STRING(skb, IPVS_SVC_ATTR_SCHED_NAME, svc->scheduler->name);
Simon Horman0d1e71b2010-08-22 21:37:54 +09002769 if (svc->pe)
2770 NLA_PUT_STRING(skb, IPVS_SVC_ATTR_PE_NAME, svc->pe->name);
Julius Volz9a812192008-08-14 14:08:44 +02002771 NLA_PUT(skb, IPVS_SVC_ATTR_FLAGS, sizeof(flags), &flags);
2772 NLA_PUT_U32(skb, IPVS_SVC_ATTR_TIMEOUT, svc->timeout / HZ);
2773 NLA_PUT_U32(skb, IPVS_SVC_ATTR_NETMASK, svc->netmask);
2774
2775 if (ip_vs_genl_fill_stats(skb, IPVS_SVC_ATTR_STATS, &svc->stats))
2776 goto nla_put_failure;
2777
2778 nla_nest_end(skb, nl_service);
2779
2780 return 0;
2781
2782nla_put_failure:
2783 nla_nest_cancel(skb, nl_service);
2784 return -EMSGSIZE;
2785}
2786
2787static int ip_vs_genl_dump_service(struct sk_buff *skb,
2788 struct ip_vs_service *svc,
2789 struct netlink_callback *cb)
2790{
2791 void *hdr;
2792
2793 hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
2794 &ip_vs_genl_family, NLM_F_MULTI,
2795 IPVS_CMD_NEW_SERVICE);
2796 if (!hdr)
2797 return -EMSGSIZE;
2798
2799 if (ip_vs_genl_fill_service(skb, svc) < 0)
2800 goto nla_put_failure;
2801
2802 return genlmsg_end(skb, hdr);
2803
2804nla_put_failure:
2805 genlmsg_cancel(skb, hdr);
2806 return -EMSGSIZE;
2807}
2808
2809static int ip_vs_genl_dump_services(struct sk_buff *skb,
2810 struct netlink_callback *cb)
2811{
2812 int idx = 0, i;
2813 int start = cb->args[0];
2814 struct ip_vs_service *svc;
Hans Schillstromfc723252011-01-03 14:44:43 +01002815 struct net *net = skb_sknet(skb);
Julius Volz9a812192008-08-14 14:08:44 +02002816
2817 mutex_lock(&__ip_vs_mutex);
2818 for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
2819 list_for_each_entry(svc, &ip_vs_svc_table[i], s_list) {
Hans Schillstromfc723252011-01-03 14:44:43 +01002820 if (++idx <= start || !net_eq(svc->net, net))
Julius Volz9a812192008-08-14 14:08:44 +02002821 continue;
2822 if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
2823 idx--;
2824 goto nla_put_failure;
2825 }
2826 }
2827 }
2828
2829 for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
2830 list_for_each_entry(svc, &ip_vs_svc_fwm_table[i], f_list) {
Hans Schillstromfc723252011-01-03 14:44:43 +01002831 if (++idx <= start || !net_eq(svc->net, net))
Julius Volz9a812192008-08-14 14:08:44 +02002832 continue;
2833 if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
2834 idx--;
2835 goto nla_put_failure;
2836 }
2837 }
2838 }
2839
2840nla_put_failure:
2841 mutex_unlock(&__ip_vs_mutex);
2842 cb->args[0] = idx;
2843
2844 return skb->len;
2845}
2846
Hans Schillstromfc723252011-01-03 14:44:43 +01002847static int ip_vs_genl_parse_service(struct net *net,
2848 struct ip_vs_service_user_kern *usvc,
Julian Anastasov26c15cf2010-09-21 18:12:30 +02002849 struct nlattr *nla, int full_entry,
2850 struct ip_vs_service **ret_svc)
Julius Volz9a812192008-08-14 14:08:44 +02002851{
2852 struct nlattr *attrs[IPVS_SVC_ATTR_MAX + 1];
2853 struct nlattr *nla_af, *nla_port, *nla_fwmark, *nla_protocol, *nla_addr;
Julian Anastasov26c15cf2010-09-21 18:12:30 +02002854 struct ip_vs_service *svc;
Julius Volz9a812192008-08-14 14:08:44 +02002855
2856 /* Parse mandatory identifying service fields first */
2857 if (nla == NULL ||
2858 nla_parse_nested(attrs, IPVS_SVC_ATTR_MAX, nla, ip_vs_svc_policy))
2859 return -EINVAL;
2860
2861 nla_af = attrs[IPVS_SVC_ATTR_AF];
2862 nla_protocol = attrs[IPVS_SVC_ATTR_PROTOCOL];
2863 nla_addr = attrs[IPVS_SVC_ATTR_ADDR];
2864 nla_port = attrs[IPVS_SVC_ATTR_PORT];
2865 nla_fwmark = attrs[IPVS_SVC_ATTR_FWMARK];
2866
2867 if (!(nla_af && (nla_fwmark || (nla_port && nla_protocol && nla_addr))))
2868 return -EINVAL;
2869
Simon Horman258c8892009-12-15 17:01:25 +01002870 memset(usvc, 0, sizeof(*usvc));
2871
Julius Volzc860c6b2008-09-02 15:55:36 +02002872 usvc->af = nla_get_u16(nla_af);
Julius Volzf94fd042008-09-02 15:55:55 +02002873#ifdef CONFIG_IP_VS_IPV6
2874 if (usvc->af != AF_INET && usvc->af != AF_INET6)
2875#else
2876 if (usvc->af != AF_INET)
2877#endif
Julius Volz9a812192008-08-14 14:08:44 +02002878 return -EAFNOSUPPORT;
2879
2880 if (nla_fwmark) {
2881 usvc->protocol = IPPROTO_TCP;
2882 usvc->fwmark = nla_get_u32(nla_fwmark);
2883 } else {
2884 usvc->protocol = nla_get_u16(nla_protocol);
2885 nla_memcpy(&usvc->addr, nla_addr, sizeof(usvc->addr));
2886 usvc->port = nla_get_u16(nla_port);
2887 usvc->fwmark = 0;
2888 }
2889
Julian Anastasov26c15cf2010-09-21 18:12:30 +02002890 if (usvc->fwmark)
Hans Schillstromfc723252011-01-03 14:44:43 +01002891 svc = __ip_vs_svc_fwm_find(net, usvc->af, usvc->fwmark);
Julian Anastasov26c15cf2010-09-21 18:12:30 +02002892 else
Hans Schillstromfc723252011-01-03 14:44:43 +01002893 svc = __ip_vs_service_find(net, usvc->af, usvc->protocol,
Julian Anastasov26c15cf2010-09-21 18:12:30 +02002894 &usvc->addr, usvc->port);
2895 *ret_svc = svc;
2896
Julius Volz9a812192008-08-14 14:08:44 +02002897 /* If a full entry was requested, check for the additional fields */
2898 if (full_entry) {
Simon Horman0d1e71b2010-08-22 21:37:54 +09002899 struct nlattr *nla_sched, *nla_flags, *nla_pe, *nla_timeout,
Julius Volz9a812192008-08-14 14:08:44 +02002900 *nla_netmask;
2901 struct ip_vs_flags flags;
Julius Volz9a812192008-08-14 14:08:44 +02002902
2903 nla_sched = attrs[IPVS_SVC_ATTR_SCHED_NAME];
Simon Horman0d1e71b2010-08-22 21:37:54 +09002904 nla_pe = attrs[IPVS_SVC_ATTR_PE_NAME];
Julius Volz9a812192008-08-14 14:08:44 +02002905 nla_flags = attrs[IPVS_SVC_ATTR_FLAGS];
2906 nla_timeout = attrs[IPVS_SVC_ATTR_TIMEOUT];
2907 nla_netmask = attrs[IPVS_SVC_ATTR_NETMASK];
2908
2909 if (!(nla_sched && nla_flags && nla_timeout && nla_netmask))
2910 return -EINVAL;
2911
2912 nla_memcpy(&flags, nla_flags, sizeof(flags));
2913
2914 /* prefill flags from service if it already exists */
Julian Anastasov26c15cf2010-09-21 18:12:30 +02002915 if (svc)
Julius Volz9a812192008-08-14 14:08:44 +02002916 usvc->flags = svc->flags;
Julius Volz9a812192008-08-14 14:08:44 +02002917
2918 /* set new flags from userland */
2919 usvc->flags = (usvc->flags & ~flags.mask) |
2920 (flags.flags & flags.mask);
Julius Volzc860c6b2008-09-02 15:55:36 +02002921 usvc->sched_name = nla_data(nla_sched);
Simon Horman0d1e71b2010-08-22 21:37:54 +09002922 usvc->pe_name = nla_pe ? nla_data(nla_pe) : NULL;
Julius Volz9a812192008-08-14 14:08:44 +02002923 usvc->timeout = nla_get_u32(nla_timeout);
2924 usvc->netmask = nla_get_u32(nla_netmask);
2925 }
2926
2927 return 0;
2928}
2929
Hans Schillstromfc723252011-01-03 14:44:43 +01002930static struct ip_vs_service *ip_vs_genl_find_service(struct net *net,
2931 struct nlattr *nla)
Julius Volz9a812192008-08-14 14:08:44 +02002932{
Julius Volzc860c6b2008-09-02 15:55:36 +02002933 struct ip_vs_service_user_kern usvc;
Julian Anastasov26c15cf2010-09-21 18:12:30 +02002934 struct ip_vs_service *svc;
Julius Volz9a812192008-08-14 14:08:44 +02002935 int ret;
2936
Hans Schillstromfc723252011-01-03 14:44:43 +01002937 ret = ip_vs_genl_parse_service(net, &usvc, nla, 0, &svc);
Julian Anastasov26c15cf2010-09-21 18:12:30 +02002938 return ret ? ERR_PTR(ret) : svc;
Julius Volz9a812192008-08-14 14:08:44 +02002939}
2940
2941static int ip_vs_genl_fill_dest(struct sk_buff *skb, struct ip_vs_dest *dest)
2942{
2943 struct nlattr *nl_dest;
2944
2945 nl_dest = nla_nest_start(skb, IPVS_CMD_ATTR_DEST);
2946 if (!nl_dest)
2947 return -EMSGSIZE;
2948
2949 NLA_PUT(skb, IPVS_DEST_ATTR_ADDR, sizeof(dest->addr), &dest->addr);
2950 NLA_PUT_U16(skb, IPVS_DEST_ATTR_PORT, dest->port);
2951
2952 NLA_PUT_U32(skb, IPVS_DEST_ATTR_FWD_METHOD,
2953 atomic_read(&dest->conn_flags) & IP_VS_CONN_F_FWD_MASK);
2954 NLA_PUT_U32(skb, IPVS_DEST_ATTR_WEIGHT, atomic_read(&dest->weight));
2955 NLA_PUT_U32(skb, IPVS_DEST_ATTR_U_THRESH, dest->u_threshold);
2956 NLA_PUT_U32(skb, IPVS_DEST_ATTR_L_THRESH, dest->l_threshold);
2957 NLA_PUT_U32(skb, IPVS_DEST_ATTR_ACTIVE_CONNS,
2958 atomic_read(&dest->activeconns));
2959 NLA_PUT_U32(skb, IPVS_DEST_ATTR_INACT_CONNS,
2960 atomic_read(&dest->inactconns));
2961 NLA_PUT_U32(skb, IPVS_DEST_ATTR_PERSIST_CONNS,
2962 atomic_read(&dest->persistconns));
2963
2964 if (ip_vs_genl_fill_stats(skb, IPVS_DEST_ATTR_STATS, &dest->stats))
2965 goto nla_put_failure;
2966
2967 nla_nest_end(skb, nl_dest);
2968
2969 return 0;
2970
2971nla_put_failure:
2972 nla_nest_cancel(skb, nl_dest);
2973 return -EMSGSIZE;
2974}
2975
2976static int ip_vs_genl_dump_dest(struct sk_buff *skb, struct ip_vs_dest *dest,
2977 struct netlink_callback *cb)
2978{
2979 void *hdr;
2980
2981 hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
2982 &ip_vs_genl_family, NLM_F_MULTI,
2983 IPVS_CMD_NEW_DEST);
2984 if (!hdr)
2985 return -EMSGSIZE;
2986
2987 if (ip_vs_genl_fill_dest(skb, dest) < 0)
2988 goto nla_put_failure;
2989
2990 return genlmsg_end(skb, hdr);
2991
2992nla_put_failure:
2993 genlmsg_cancel(skb, hdr);
2994 return -EMSGSIZE;
2995}
2996
2997static int ip_vs_genl_dump_dests(struct sk_buff *skb,
2998 struct netlink_callback *cb)
2999{
3000 int idx = 0;
3001 int start = cb->args[0];
3002 struct ip_vs_service *svc;
3003 struct ip_vs_dest *dest;
3004 struct nlattr *attrs[IPVS_CMD_ATTR_MAX + 1];
Hans Schillstroma0840e22011-01-03 14:44:58 +01003005 struct net *net = skb_sknet(skb);
Julius Volz9a812192008-08-14 14:08:44 +02003006
3007 mutex_lock(&__ip_vs_mutex);
3008
3009 /* Try to find the service for which to dump destinations */
3010 if (nlmsg_parse(cb->nlh, GENL_HDRLEN, attrs,
3011 IPVS_CMD_ATTR_MAX, ip_vs_cmd_policy))
3012 goto out_err;
3013
Hans Schillstroma0840e22011-01-03 14:44:58 +01003014
Hans Schillstromfc723252011-01-03 14:44:43 +01003015 svc = ip_vs_genl_find_service(net, attrs[IPVS_CMD_ATTR_SERVICE]);
Julius Volz9a812192008-08-14 14:08:44 +02003016 if (IS_ERR(svc) || svc == NULL)
3017 goto out_err;
3018
3019 /* Dump the destinations */
3020 list_for_each_entry(dest, &svc->destinations, n_list) {
3021 if (++idx <= start)
3022 continue;
3023 if (ip_vs_genl_dump_dest(skb, dest, cb) < 0) {
3024 idx--;
3025 goto nla_put_failure;
3026 }
3027 }
3028
3029nla_put_failure:
3030 cb->args[0] = idx;
Julius Volz9a812192008-08-14 14:08:44 +02003031
3032out_err:
3033 mutex_unlock(&__ip_vs_mutex);
3034
3035 return skb->len;
3036}
3037
Julius Volzc860c6b2008-09-02 15:55:36 +02003038static int ip_vs_genl_parse_dest(struct ip_vs_dest_user_kern *udest,
Julius Volz9a812192008-08-14 14:08:44 +02003039 struct nlattr *nla, int full_entry)
3040{
3041 struct nlattr *attrs[IPVS_DEST_ATTR_MAX + 1];
3042 struct nlattr *nla_addr, *nla_port;
3043
3044 /* Parse mandatory identifying destination fields first */
3045 if (nla == NULL ||
3046 nla_parse_nested(attrs, IPVS_DEST_ATTR_MAX, nla, ip_vs_dest_policy))
3047 return -EINVAL;
3048
3049 nla_addr = attrs[IPVS_DEST_ATTR_ADDR];
3050 nla_port = attrs[IPVS_DEST_ATTR_PORT];
3051
3052 if (!(nla_addr && nla_port))
3053 return -EINVAL;
3054
Simon Horman258c8892009-12-15 17:01:25 +01003055 memset(udest, 0, sizeof(*udest));
3056
Julius Volz9a812192008-08-14 14:08:44 +02003057 nla_memcpy(&udest->addr, nla_addr, sizeof(udest->addr));
3058 udest->port = nla_get_u16(nla_port);
3059
3060 /* If a full entry was requested, check for the additional fields */
3061 if (full_entry) {
3062 struct nlattr *nla_fwd, *nla_weight, *nla_u_thresh,
3063 *nla_l_thresh;
3064
3065 nla_fwd = attrs[IPVS_DEST_ATTR_FWD_METHOD];
3066 nla_weight = attrs[IPVS_DEST_ATTR_WEIGHT];
3067 nla_u_thresh = attrs[IPVS_DEST_ATTR_U_THRESH];
3068 nla_l_thresh = attrs[IPVS_DEST_ATTR_L_THRESH];
3069
3070 if (!(nla_fwd && nla_weight && nla_u_thresh && nla_l_thresh))
3071 return -EINVAL;
3072
3073 udest->conn_flags = nla_get_u32(nla_fwd)
3074 & IP_VS_CONN_F_FWD_MASK;
3075 udest->weight = nla_get_u32(nla_weight);
3076 udest->u_threshold = nla_get_u32(nla_u_thresh);
3077 udest->l_threshold = nla_get_u32(nla_l_thresh);
3078 }
3079
3080 return 0;
3081}
3082
3083static int ip_vs_genl_fill_daemon(struct sk_buff *skb, __be32 state,
3084 const char *mcast_ifn, __be32 syncid)
3085{
3086 struct nlattr *nl_daemon;
3087
3088 nl_daemon = nla_nest_start(skb, IPVS_CMD_ATTR_DAEMON);
3089 if (!nl_daemon)
3090 return -EMSGSIZE;
3091
3092 NLA_PUT_U32(skb, IPVS_DAEMON_ATTR_STATE, state);
3093 NLA_PUT_STRING(skb, IPVS_DAEMON_ATTR_MCAST_IFN, mcast_ifn);
3094 NLA_PUT_U32(skb, IPVS_DAEMON_ATTR_SYNC_ID, syncid);
3095
3096 nla_nest_end(skb, nl_daemon);
3097
3098 return 0;
3099
3100nla_put_failure:
3101 nla_nest_cancel(skb, nl_daemon);
3102 return -EMSGSIZE;
3103}
3104
3105static int ip_vs_genl_dump_daemon(struct sk_buff *skb, __be32 state,
3106 const char *mcast_ifn, __be32 syncid,
3107 struct netlink_callback *cb)
3108{
3109 void *hdr;
3110 hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
3111 &ip_vs_genl_family, NLM_F_MULTI,
3112 IPVS_CMD_NEW_DAEMON);
3113 if (!hdr)
3114 return -EMSGSIZE;
3115
3116 if (ip_vs_genl_fill_daemon(skb, state, mcast_ifn, syncid))
3117 goto nla_put_failure;
3118
3119 return genlmsg_end(skb, hdr);
3120
3121nla_put_failure:
3122 genlmsg_cancel(skb, hdr);
3123 return -EMSGSIZE;
3124}
3125
3126static int ip_vs_genl_dump_daemons(struct sk_buff *skb,
3127 struct netlink_callback *cb)
3128{
Hans Schillstromf1313152011-01-03 14:44:55 +01003129 struct net *net = skb_net(skb);
3130 struct netns_ipvs *ipvs = net_ipvs(net);
3131
Julius Volz9a812192008-08-14 14:08:44 +02003132 mutex_lock(&__ip_vs_mutex);
Hans Schillstromf1313152011-01-03 14:44:55 +01003133 if ((ipvs->sync_state & IP_VS_STATE_MASTER) && !cb->args[0]) {
Julius Volz9a812192008-08-14 14:08:44 +02003134 if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_MASTER,
Hans Schillstromf1313152011-01-03 14:44:55 +01003135 ipvs->master_mcast_ifn,
3136 ipvs->master_syncid, cb) < 0)
Julius Volz9a812192008-08-14 14:08:44 +02003137 goto nla_put_failure;
3138
3139 cb->args[0] = 1;
3140 }
3141
Hans Schillstromf1313152011-01-03 14:44:55 +01003142 if ((ipvs->sync_state & IP_VS_STATE_BACKUP) && !cb->args[1]) {
Julius Volz9a812192008-08-14 14:08:44 +02003143 if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_BACKUP,
Hans Schillstromf1313152011-01-03 14:44:55 +01003144 ipvs->backup_mcast_ifn,
3145 ipvs->backup_syncid, cb) < 0)
Julius Volz9a812192008-08-14 14:08:44 +02003146 goto nla_put_failure;
3147
3148 cb->args[1] = 1;
3149 }
3150
3151nla_put_failure:
3152 mutex_unlock(&__ip_vs_mutex);
3153
3154 return skb->len;
3155}
3156
Hans Schillstromf1313152011-01-03 14:44:55 +01003157static int ip_vs_genl_new_daemon(struct net *net, struct nlattr **attrs)
Julius Volz9a812192008-08-14 14:08:44 +02003158{
3159 if (!(attrs[IPVS_DAEMON_ATTR_STATE] &&
3160 attrs[IPVS_DAEMON_ATTR_MCAST_IFN] &&
3161 attrs[IPVS_DAEMON_ATTR_SYNC_ID]))
3162 return -EINVAL;
3163
Hans Schillstromf1313152011-01-03 14:44:55 +01003164 return start_sync_thread(net,
3165 nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]),
Julius Volz9a812192008-08-14 14:08:44 +02003166 nla_data(attrs[IPVS_DAEMON_ATTR_MCAST_IFN]),
3167 nla_get_u32(attrs[IPVS_DAEMON_ATTR_SYNC_ID]));
3168}
3169
Hans Schillstromf1313152011-01-03 14:44:55 +01003170static int ip_vs_genl_del_daemon(struct net *net, struct nlattr **attrs)
Julius Volz9a812192008-08-14 14:08:44 +02003171{
3172 if (!attrs[IPVS_DAEMON_ATTR_STATE])
3173 return -EINVAL;
3174
Hans Schillstromf1313152011-01-03 14:44:55 +01003175 return stop_sync_thread(net,
3176 nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]));
Julius Volz9a812192008-08-14 14:08:44 +02003177}
3178
Hans Schillstrom93304192011-01-03 14:44:51 +01003179static int ip_vs_genl_set_config(struct net *net, struct nlattr **attrs)
Julius Volz9a812192008-08-14 14:08:44 +02003180{
3181 struct ip_vs_timeout_user t;
3182
Hans Schillstrom93304192011-01-03 14:44:51 +01003183 __ip_vs_get_timeouts(net, &t);
Julius Volz9a812192008-08-14 14:08:44 +02003184
3185 if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP])
3186 t.tcp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP]);
3187
3188 if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN])
3189 t.tcp_fin_timeout =
3190 nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN]);
3191
3192 if (attrs[IPVS_CMD_ATTR_TIMEOUT_UDP])
3193 t.udp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_UDP]);
3194
Hans Schillstrom93304192011-01-03 14:44:51 +01003195 return ip_vs_set_timeout(net, &t);
Julius Volz9a812192008-08-14 14:08:44 +02003196}
3197
3198static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
3199{
3200 struct ip_vs_service *svc = NULL;
Julius Volzc860c6b2008-09-02 15:55:36 +02003201 struct ip_vs_service_user_kern usvc;
3202 struct ip_vs_dest_user_kern udest;
Julius Volz9a812192008-08-14 14:08:44 +02003203 int ret = 0, cmd;
3204 int need_full_svc = 0, need_full_dest = 0;
Hans Schillstromfc723252011-01-03 14:44:43 +01003205 struct net *net;
Hans Schillstroma0840e22011-01-03 14:44:58 +01003206 struct netns_ipvs *ipvs;
Julius Volz9a812192008-08-14 14:08:44 +02003207
Hans Schillstromfc723252011-01-03 14:44:43 +01003208 net = skb_sknet(skb);
Hans Schillstroma0840e22011-01-03 14:44:58 +01003209 ipvs = net_ipvs(net);
Julius Volz9a812192008-08-14 14:08:44 +02003210 cmd = info->genlhdr->cmd;
3211
3212 mutex_lock(&__ip_vs_mutex);
3213
3214 if (cmd == IPVS_CMD_FLUSH) {
Hans Schillstromfc723252011-01-03 14:44:43 +01003215 ret = ip_vs_flush(net);
Julius Volz9a812192008-08-14 14:08:44 +02003216 goto out;
3217 } else if (cmd == IPVS_CMD_SET_CONFIG) {
Hans Schillstrom93304192011-01-03 14:44:51 +01003218 ret = ip_vs_genl_set_config(net, info->attrs);
Julius Volz9a812192008-08-14 14:08:44 +02003219 goto out;
3220 } else if (cmd == IPVS_CMD_NEW_DAEMON ||
3221 cmd == IPVS_CMD_DEL_DAEMON) {
3222
3223 struct nlattr *daemon_attrs[IPVS_DAEMON_ATTR_MAX + 1];
3224
3225 if (!info->attrs[IPVS_CMD_ATTR_DAEMON] ||
3226 nla_parse_nested(daemon_attrs, IPVS_DAEMON_ATTR_MAX,
3227 info->attrs[IPVS_CMD_ATTR_DAEMON],
3228 ip_vs_daemon_policy)) {
3229 ret = -EINVAL;
3230 goto out;
3231 }
3232
3233 if (cmd == IPVS_CMD_NEW_DAEMON)
Hans Schillstromf1313152011-01-03 14:44:55 +01003234 ret = ip_vs_genl_new_daemon(net, daemon_attrs);
Julius Volz9a812192008-08-14 14:08:44 +02003235 else
Hans Schillstromf1313152011-01-03 14:44:55 +01003236 ret = ip_vs_genl_del_daemon(net, daemon_attrs);
Julius Volz9a812192008-08-14 14:08:44 +02003237 goto out;
3238 } else if (cmd == IPVS_CMD_ZERO &&
3239 !info->attrs[IPVS_CMD_ATTR_SERVICE]) {
Hans Schillstromfc723252011-01-03 14:44:43 +01003240 ret = ip_vs_zero_all(net);
Julius Volz9a812192008-08-14 14:08:44 +02003241 goto out;
3242 }
3243
3244 /* All following commands require a service argument, so check if we
3245 * received a valid one. We need a full service specification when
3246 * adding / editing a service. Only identifying members otherwise. */
3247 if (cmd == IPVS_CMD_NEW_SERVICE || cmd == IPVS_CMD_SET_SERVICE)
3248 need_full_svc = 1;
3249
Hans Schillstromfc723252011-01-03 14:44:43 +01003250 ret = ip_vs_genl_parse_service(net, &usvc,
Julius Volz9a812192008-08-14 14:08:44 +02003251 info->attrs[IPVS_CMD_ATTR_SERVICE],
Julian Anastasov26c15cf2010-09-21 18:12:30 +02003252 need_full_svc, &svc);
Julius Volz9a812192008-08-14 14:08:44 +02003253 if (ret)
3254 goto out;
3255
Julius Volz9a812192008-08-14 14:08:44 +02003256 /* Unless we're adding a new service, the service must already exist */
3257 if ((cmd != IPVS_CMD_NEW_SERVICE) && (svc == NULL)) {
3258 ret = -ESRCH;
3259 goto out;
3260 }
3261
3262 /* Destination commands require a valid destination argument. For
3263 * adding / editing a destination, we need a full destination
3264 * specification. */
3265 if (cmd == IPVS_CMD_NEW_DEST || cmd == IPVS_CMD_SET_DEST ||
3266 cmd == IPVS_CMD_DEL_DEST) {
3267 if (cmd != IPVS_CMD_DEL_DEST)
3268 need_full_dest = 1;
3269
3270 ret = ip_vs_genl_parse_dest(&udest,
3271 info->attrs[IPVS_CMD_ATTR_DEST],
3272 need_full_dest);
3273 if (ret)
3274 goto out;
3275 }
3276
3277 switch (cmd) {
3278 case IPVS_CMD_NEW_SERVICE:
3279 if (svc == NULL)
Hans Schillstromfc723252011-01-03 14:44:43 +01003280 ret = ip_vs_add_service(net, &usvc, &svc);
Julius Volz9a812192008-08-14 14:08:44 +02003281 else
3282 ret = -EEXIST;
3283 break;
3284 case IPVS_CMD_SET_SERVICE:
3285 ret = ip_vs_edit_service(svc, &usvc);
3286 break;
3287 case IPVS_CMD_DEL_SERVICE:
3288 ret = ip_vs_del_service(svc);
Julian Anastasov26c15cf2010-09-21 18:12:30 +02003289 /* do not use svc, it can be freed */
Julius Volz9a812192008-08-14 14:08:44 +02003290 break;
3291 case IPVS_CMD_NEW_DEST:
3292 ret = ip_vs_add_dest(svc, &udest);
3293 break;
3294 case IPVS_CMD_SET_DEST:
3295 ret = ip_vs_edit_dest(svc, &udest);
3296 break;
3297 case IPVS_CMD_DEL_DEST:
3298 ret = ip_vs_del_dest(svc, &udest);
3299 break;
3300 case IPVS_CMD_ZERO:
3301 ret = ip_vs_zero_service(svc);
3302 break;
3303 default:
3304 ret = -EINVAL;
3305 }
3306
3307out:
Julius Volz9a812192008-08-14 14:08:44 +02003308 mutex_unlock(&__ip_vs_mutex);
3309
3310 return ret;
3311}
3312
3313static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info)
3314{
3315 struct sk_buff *msg;
3316 void *reply;
3317 int ret, cmd, reply_cmd;
Hans Schillstromfc723252011-01-03 14:44:43 +01003318 struct net *net;
Hans Schillstroma0840e22011-01-03 14:44:58 +01003319 struct netns_ipvs *ipvs;
Julius Volz9a812192008-08-14 14:08:44 +02003320
Hans Schillstromfc723252011-01-03 14:44:43 +01003321 net = skb_sknet(skb);
Hans Schillstroma0840e22011-01-03 14:44:58 +01003322 ipvs = net_ipvs(net);
Julius Volz9a812192008-08-14 14:08:44 +02003323 cmd = info->genlhdr->cmd;
3324
3325 if (cmd == IPVS_CMD_GET_SERVICE)
3326 reply_cmd = IPVS_CMD_NEW_SERVICE;
3327 else if (cmd == IPVS_CMD_GET_INFO)
3328 reply_cmd = IPVS_CMD_SET_INFO;
3329 else if (cmd == IPVS_CMD_GET_CONFIG)
3330 reply_cmd = IPVS_CMD_SET_CONFIG;
3331 else {
Hannes Eder1e3e2382009-08-02 11:05:41 +00003332 pr_err("unknown Generic Netlink command\n");
Julius Volz9a812192008-08-14 14:08:44 +02003333 return -EINVAL;
3334 }
3335
3336 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
3337 if (!msg)
3338 return -ENOMEM;
3339
3340 mutex_lock(&__ip_vs_mutex);
3341
3342 reply = genlmsg_put_reply(msg, info, &ip_vs_genl_family, 0, reply_cmd);
3343 if (reply == NULL)
3344 goto nla_put_failure;
3345
3346 switch (cmd) {
3347 case IPVS_CMD_GET_SERVICE:
3348 {
3349 struct ip_vs_service *svc;
3350
Hans Schillstromfc723252011-01-03 14:44:43 +01003351 svc = ip_vs_genl_find_service(net,
3352 info->attrs[IPVS_CMD_ATTR_SERVICE]);
Julius Volz9a812192008-08-14 14:08:44 +02003353 if (IS_ERR(svc)) {
3354 ret = PTR_ERR(svc);
3355 goto out_err;
3356 } else if (svc) {
3357 ret = ip_vs_genl_fill_service(msg, svc);
Julius Volz9a812192008-08-14 14:08:44 +02003358 if (ret)
3359 goto nla_put_failure;
3360 } else {
3361 ret = -ESRCH;
3362 goto out_err;
3363 }
3364
3365 break;
3366 }
3367
3368 case IPVS_CMD_GET_CONFIG:
3369 {
3370 struct ip_vs_timeout_user t;
3371
Hans Schillstrom93304192011-01-03 14:44:51 +01003372 __ip_vs_get_timeouts(net, &t);
Julius Volz9a812192008-08-14 14:08:44 +02003373#ifdef CONFIG_IP_VS_PROTO_TCP
3374 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP, t.tcp_timeout);
3375 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP_FIN,
3376 t.tcp_fin_timeout);
3377#endif
3378#ifdef CONFIG_IP_VS_PROTO_UDP
3379 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_UDP, t.udp_timeout);
3380#endif
3381
3382 break;
3383 }
3384
3385 case IPVS_CMD_GET_INFO:
3386 NLA_PUT_U32(msg, IPVS_INFO_ATTR_VERSION, IP_VS_VERSION_CODE);
3387 NLA_PUT_U32(msg, IPVS_INFO_ATTR_CONN_TAB_SIZE,
Catalin(ux) M. BOIE6f7edb42010-01-05 05:50:24 +01003388 ip_vs_conn_tab_size);
Julius Volz9a812192008-08-14 14:08:44 +02003389 break;
3390 }
3391
3392 genlmsg_end(msg, reply);
Johannes Berg134e6372009-07-10 09:51:34 +00003393 ret = genlmsg_reply(msg, info);
Julius Volz9a812192008-08-14 14:08:44 +02003394 goto out;
3395
3396nla_put_failure:
Hannes Eder1e3e2382009-08-02 11:05:41 +00003397 pr_err("not enough space in Netlink message\n");
Julius Volz9a812192008-08-14 14:08:44 +02003398 ret = -EMSGSIZE;
3399
3400out_err:
3401 nlmsg_free(msg);
3402out:
3403 mutex_unlock(&__ip_vs_mutex);
3404
3405 return ret;
3406}
3407
3408
3409static struct genl_ops ip_vs_genl_ops[] __read_mostly = {
3410 {
3411 .cmd = IPVS_CMD_NEW_SERVICE,
3412 .flags = GENL_ADMIN_PERM,
3413 .policy = ip_vs_cmd_policy,
3414 .doit = ip_vs_genl_set_cmd,
3415 },
3416 {
3417 .cmd = IPVS_CMD_SET_SERVICE,
3418 .flags = GENL_ADMIN_PERM,
3419 .policy = ip_vs_cmd_policy,
3420 .doit = ip_vs_genl_set_cmd,
3421 },
3422 {
3423 .cmd = IPVS_CMD_DEL_SERVICE,
3424 .flags = GENL_ADMIN_PERM,
3425 .policy = ip_vs_cmd_policy,
3426 .doit = ip_vs_genl_set_cmd,
3427 },
3428 {
3429 .cmd = IPVS_CMD_GET_SERVICE,
3430 .flags = GENL_ADMIN_PERM,
3431 .doit = ip_vs_genl_get_cmd,
3432 .dumpit = ip_vs_genl_dump_services,
3433 .policy = ip_vs_cmd_policy,
3434 },
3435 {
3436 .cmd = IPVS_CMD_NEW_DEST,
3437 .flags = GENL_ADMIN_PERM,
3438 .policy = ip_vs_cmd_policy,
3439 .doit = ip_vs_genl_set_cmd,
3440 },
3441 {
3442 .cmd = IPVS_CMD_SET_DEST,
3443 .flags = GENL_ADMIN_PERM,
3444 .policy = ip_vs_cmd_policy,
3445 .doit = ip_vs_genl_set_cmd,
3446 },
3447 {
3448 .cmd = IPVS_CMD_DEL_DEST,
3449 .flags = GENL_ADMIN_PERM,
3450 .policy = ip_vs_cmd_policy,
3451 .doit = ip_vs_genl_set_cmd,
3452 },
3453 {
3454 .cmd = IPVS_CMD_GET_DEST,
3455 .flags = GENL_ADMIN_PERM,
3456 .policy = ip_vs_cmd_policy,
3457 .dumpit = ip_vs_genl_dump_dests,
3458 },
3459 {
3460 .cmd = IPVS_CMD_NEW_DAEMON,
3461 .flags = GENL_ADMIN_PERM,
3462 .policy = ip_vs_cmd_policy,
3463 .doit = ip_vs_genl_set_cmd,
3464 },
3465 {
3466 .cmd = IPVS_CMD_DEL_DAEMON,
3467 .flags = GENL_ADMIN_PERM,
3468 .policy = ip_vs_cmd_policy,
3469 .doit = ip_vs_genl_set_cmd,
3470 },
3471 {
3472 .cmd = IPVS_CMD_GET_DAEMON,
3473 .flags = GENL_ADMIN_PERM,
3474 .dumpit = ip_vs_genl_dump_daemons,
3475 },
3476 {
3477 .cmd = IPVS_CMD_SET_CONFIG,
3478 .flags = GENL_ADMIN_PERM,
3479 .policy = ip_vs_cmd_policy,
3480 .doit = ip_vs_genl_set_cmd,
3481 },
3482 {
3483 .cmd = IPVS_CMD_GET_CONFIG,
3484 .flags = GENL_ADMIN_PERM,
3485 .doit = ip_vs_genl_get_cmd,
3486 },
3487 {
3488 .cmd = IPVS_CMD_GET_INFO,
3489 .flags = GENL_ADMIN_PERM,
3490 .doit = ip_vs_genl_get_cmd,
3491 },
3492 {
3493 .cmd = IPVS_CMD_ZERO,
3494 .flags = GENL_ADMIN_PERM,
3495 .policy = ip_vs_cmd_policy,
3496 .doit = ip_vs_genl_set_cmd,
3497 },
3498 {
3499 .cmd = IPVS_CMD_FLUSH,
3500 .flags = GENL_ADMIN_PERM,
3501 .doit = ip_vs_genl_set_cmd,
3502 },
3503};
3504
3505static int __init ip_vs_genl_register(void)
3506{
Michał Mirosław8f698d52009-05-21 10:34:05 +00003507 return genl_register_family_with_ops(&ip_vs_genl_family,
3508 ip_vs_genl_ops, ARRAY_SIZE(ip_vs_genl_ops));
Julius Volz9a812192008-08-14 14:08:44 +02003509}
3510
3511static void ip_vs_genl_unregister(void)
3512{
3513 genl_unregister_family(&ip_vs_genl_family);
3514}
3515
3516/* End of Generic Netlink interface definitions */
3517
Hans Schillstrom61b1ab42011-01-03 14:44:42 +01003518/*
3519 * per netns intit/exit func.
3520 */
3521int __net_init __ip_vs_control_init(struct net *net)
3522{
Hans Schillstromfc723252011-01-03 14:44:43 +01003523 int idx;
3524 struct netns_ipvs *ipvs = net_ipvs(net);
Hans Schillstroma0840e22011-01-03 14:44:58 +01003525 struct ctl_table *tbl;
Hans Schillstromfc723252011-01-03 14:44:43 +01003526
Hans Schillstroma0840e22011-01-03 14:44:58 +01003527 atomic_set(&ipvs->dropentry, 0);
3528 spin_lock_init(&ipvs->dropentry_lock);
3529 spin_lock_init(&ipvs->droppacket_lock);
3530 spin_lock_init(&ipvs->securetcp_lock);
3531 ipvs->rs_lock = __RW_LOCK_UNLOCKED(ipvs->rs_lock);
3532
3533 /* Initialize rs_table */
3534 for (idx = 0; idx < IP_VS_RTAB_SIZE; idx++)
3535 INIT_LIST_HEAD(&ipvs->rs_table[idx]);
3536
Hans Schillstromf2431e62011-01-03 14:45:00 +01003537 INIT_LIST_HEAD(&ipvs->dest_trash);
Hans Schillstrom763f8d02011-01-03 14:45:01 +01003538 atomic_set(&ipvs->ftpsvc_counter, 0);
3539 atomic_set(&ipvs->nullsvc_counter, 0);
Hans Schillstromf2431e62011-01-03 14:45:00 +01003540
Hans Schillstromb17fc992011-01-03 14:44:56 +01003541 /* procfs stats */
Julian Anastasov2a0751a2011-03-04 12:20:35 +02003542 ipvs->tot_stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats);
3543 if (!ipvs->tot_stats.cpustats) {
Hans Schillstromb17fc992011-01-03 14:44:56 +01003544 pr_err("%s() alloc_percpu failed\n", __func__);
3545 goto err_alloc;
3546 }
Julian Anastasov2a0751a2011-03-04 12:20:35 +02003547 spin_lock_init(&ipvs->tot_stats.lock);
Hans Schillstrom61b1ab42011-01-03 14:44:42 +01003548
3549 proc_net_fops_create(net, "ip_vs", 0, &ip_vs_info_fops);
3550 proc_net_fops_create(net, "ip_vs_stats", 0, &ip_vs_stats_fops);
Hans Schillstromb17fc992011-01-03 14:44:56 +01003551 proc_net_fops_create(net, "ip_vs_stats_percpu", 0,
3552 &ip_vs_stats_percpu_fops);
Hans Schillstroma0840e22011-01-03 14:44:58 +01003553
3554 if (!net_eq(net, &init_net)) {
3555 tbl = kmemdup(vs_vars, sizeof(vs_vars), GFP_KERNEL);
3556 if (tbl == NULL)
3557 goto err_dup;
3558 } else
3559 tbl = vs_vars;
3560 /* Initialize sysctl defaults */
3561 idx = 0;
3562 ipvs->sysctl_amemthresh = 1024;
3563 tbl[idx++].data = &ipvs->sysctl_amemthresh;
3564 ipvs->sysctl_am_droprate = 10;
3565 tbl[idx++].data = &ipvs->sysctl_am_droprate;
3566 tbl[idx++].data = &ipvs->sysctl_drop_entry;
3567 tbl[idx++].data = &ipvs->sysctl_drop_packet;
3568#ifdef CONFIG_IP_VS_NFCT
3569 tbl[idx++].data = &ipvs->sysctl_conntrack;
3570#endif
3571 tbl[idx++].data = &ipvs->sysctl_secure_tcp;
3572 ipvs->sysctl_snat_reroute = 1;
3573 tbl[idx++].data = &ipvs->sysctl_snat_reroute;
3574 ipvs->sysctl_sync_ver = 1;
3575 tbl[idx++].data = &ipvs->sysctl_sync_ver;
3576 tbl[idx++].data = &ipvs->sysctl_cache_bypass;
3577 tbl[idx++].data = &ipvs->sysctl_expire_nodest_conn;
3578 tbl[idx++].data = &ipvs->sysctl_expire_quiescent_template;
3579 ipvs->sysctl_sync_threshold[0] = 3;
3580 ipvs->sysctl_sync_threshold[1] = 50;
3581 tbl[idx].data = &ipvs->sysctl_sync_threshold;
3582 tbl[idx++].maxlen = sizeof(ipvs->sysctl_sync_threshold);
3583 tbl[idx++].data = &ipvs->sysctl_nat_icmp_send;
3584
3585
Simon Horman04439292011-02-01 18:29:04 +01003586#ifdef CONFIG_SYSCTL
Hans Schillstroma0840e22011-01-03 14:44:58 +01003587 ipvs->sysctl_hdr = register_net_sysctl_table(net, net_vs_ctl_path,
Hans Schillstrom07924702011-01-24 15:14:41 +01003588 tbl);
Simon Horman04439292011-02-01 18:29:04 +01003589 if (ipvs->sysctl_hdr == NULL) {
3590 if (!net_eq(net, &init_net))
3591 kfree(tbl);
3592 goto err_dup;
3593 }
3594#endif
Julian Anastasov2a0751a2011-03-04 12:20:35 +02003595 ip_vs_new_estimator(net, &ipvs->tot_stats);
Hans Schillstroma0840e22011-01-03 14:44:58 +01003596 ipvs->sysctl_tbl = tbl;
Hans Schillstromf6340ee2011-01-03 14:44:59 +01003597 /* Schedule defense work */
3598 INIT_DELAYED_WORK(&ipvs->defense_work, defense_work_handler);
3599 schedule_delayed_work(&ipvs->defense_work, DEFENSE_TIMER_PERIOD);
Hans Schillstrom61b1ab42011-01-03 14:44:42 +01003600 return 0;
3601
Hans Schillstroma0840e22011-01-03 14:44:58 +01003602err_dup:
Julian Anastasov2a0751a2011-03-04 12:20:35 +02003603 free_percpu(ipvs->tot_stats.cpustats);
Hans Schillstromb17fc992011-01-03 14:44:56 +01003604err_alloc:
Hans Schillstrom61b1ab42011-01-03 14:44:42 +01003605 return -ENOMEM;
3606}
3607
3608static void __net_exit __ip_vs_control_cleanup(struct net *net)
3609{
Hans Schillstromb17fc992011-01-03 14:44:56 +01003610 struct netns_ipvs *ipvs = net_ipvs(net);
3611
Hans Schillstromf2431e62011-01-03 14:45:00 +01003612 ip_vs_trash_cleanup(net);
Julian Anastasov2a0751a2011-03-04 12:20:35 +02003613 ip_vs_kill_estimator(net, &ipvs->tot_stats);
Hans Schillstromf2431e62011-01-03 14:45:00 +01003614 cancel_delayed_work_sync(&ipvs->defense_work);
3615 cancel_work_sync(&ipvs->defense_work.work);
Simon Horman04439292011-02-01 18:29:04 +01003616#ifdef CONFIG_SYSCTL
Hans Schillstroma0840e22011-01-03 14:44:58 +01003617 unregister_net_sysctl_table(ipvs->sysctl_hdr);
Simon Horman04439292011-02-01 18:29:04 +01003618#endif
Hans Schillstromb17fc992011-01-03 14:44:56 +01003619 proc_net_remove(net, "ip_vs_stats_percpu");
Hans Schillstrom61b1ab42011-01-03 14:44:42 +01003620 proc_net_remove(net, "ip_vs_stats");
3621 proc_net_remove(net, "ip_vs");
Julian Anastasov2a0751a2011-03-04 12:20:35 +02003622 free_percpu(ipvs->tot_stats.cpustats);
Hans Schillstrom61b1ab42011-01-03 14:44:42 +01003623}
3624
3625static struct pernet_operations ipvs_control_ops = {
3626 .init = __ip_vs_control_init,
3627 .exit = __ip_vs_control_cleanup,
3628};
Linus Torvalds1da177e2005-04-16 15:20:36 -07003629
Sven Wegener048cf482008-08-10 18:24:35 +00003630int __init ip_vs_control_init(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003631{
Linus Torvalds1da177e2005-04-16 15:20:36 -07003632 int idx;
Hans Schillstromfc723252011-01-03 14:44:43 +01003633 int ret;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003634
3635 EnterFunction(2);
3636
Hans Schillstromfc723252011-01-03 14:44:43 +01003637 /* Initialize svc_table, ip_vs_svc_fwm_table, rs_table */
Eduardo Blancod86bef72010-10-19 10:26:47 +01003638 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
3639 INIT_LIST_HEAD(&ip_vs_svc_table[idx]);
3640 INIT_LIST_HEAD(&ip_vs_svc_fwm_table[idx]);
3641 }
Hans Schillstromfc723252011-01-03 14:44:43 +01003642
3643 ret = register_pernet_subsys(&ipvs_control_ops);
3644 if (ret) {
3645 pr_err("cannot register namespace.\n");
3646 goto err;
Eduardo Blancod86bef72010-10-19 10:26:47 +01003647 }
Hans Schillstromfc723252011-01-03 14:44:43 +01003648
3649 smp_wmb(); /* Do we really need it now ? */
Eduardo Blancod86bef72010-10-19 10:26:47 +01003650
Linus Torvalds1da177e2005-04-16 15:20:36 -07003651 ret = nf_register_sockopt(&ip_vs_sockopts);
3652 if (ret) {
Hannes Eder1e3e2382009-08-02 11:05:41 +00003653 pr_err("cannot register sockopt.\n");
Hans Schillstromfc723252011-01-03 14:44:43 +01003654 goto err_net;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003655 }
3656
Julius Volz9a812192008-08-14 14:08:44 +02003657 ret = ip_vs_genl_register();
3658 if (ret) {
Hannes Eder1e3e2382009-08-02 11:05:41 +00003659 pr_err("cannot register Generic Netlink interface.\n");
Julius Volz9a812192008-08-14 14:08:44 +02003660 nf_unregister_sockopt(&ip_vs_sockopts);
Hans Schillstromfc723252011-01-03 14:44:43 +01003661 goto err_net;
Julius Volz9a812192008-08-14 14:08:44 +02003662 }
3663
Linus Torvalds1da177e2005-04-16 15:20:36 -07003664 LeaveFunction(2);
3665 return 0;
Hans Schillstromfc723252011-01-03 14:44:43 +01003666
3667err_net:
3668 unregister_pernet_subsys(&ipvs_control_ops);
3669err:
3670 return ret;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003671}
3672
3673
3674void ip_vs_control_cleanup(void)
3675{
3676 EnterFunction(2);
Hans Schillstrom61b1ab42011-01-03 14:44:42 +01003677 unregister_pernet_subsys(&ipvs_control_ops);
Julius Volz9a812192008-08-14 14:08:44 +02003678 ip_vs_genl_unregister();
Linus Torvalds1da177e2005-04-16 15:20:36 -07003679 nf_unregister_sockopt(&ip_vs_sockopts);
3680 LeaveFunction(2);
3681}