blob: fa6d44c62de3449abea578a598d261f0f674211b [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * IPVS An implementation of the IP virtual server support for the
3 * LINUX operating system. IPVS is now implemented as a module
4 * over the NetFilter framework. IPVS can be used to build a
5 * high-performance and highly available server based on a
6 * cluster of servers.
7 *
Linus Torvalds1da177e2005-04-16 15:20:36 -07008 * Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
9 * Peter Kese <peter.kese@ijs.si>
10 * Julian Anastasov <ja@ssi.bg>
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version
15 * 2 of the License, or (at your option) any later version.
16 *
17 * Changes:
18 *
19 */
20
Hannes Eder9aada7a2009-07-30 14:29:44 -070021#define KMSG_COMPONENT "IPVS"
22#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
23
Linus Torvalds1da177e2005-04-16 15:20:36 -070024#include <linux/module.h>
25#include <linux/init.h>
26#include <linux/types.h>
Randy Dunlap4fc268d2006-01-11 12:17:47 -080027#include <linux/capability.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070028#include <linux/fs.h>
29#include <linux/sysctl.h>
30#include <linux/proc_fs.h>
31#include <linux/workqueue.h>
32#include <linux/swap.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070033#include <linux/seq_file.h>
Tejun Heo5a0e3ad2010-03-24 17:04:11 +090034#include <linux/slab.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070035
36#include <linux/netfilter.h>
37#include <linux/netfilter_ipv4.h>
Ingo Molnar14cc3e22006-03-26 01:37:14 -080038#include <linux/mutex.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070039
Eric W. Biederman457c4cb2007-09-12 12:01:34 +020040#include <net/net_namespace.h>
Hans Schillstrom93304192011-01-03 14:44:51 +010041#include <linux/nsproxy.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070042#include <net/ip.h>
Vince Busam09571c72008-09-02 15:55:52 +020043#ifdef CONFIG_IP_VS_IPV6
44#include <net/ipv6.h>
45#include <net/ip6_route.h>
46#endif
Arnaldo Carvalho de Melo14c85022005-12-27 02:43:12 -020047#include <net/route.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070048#include <net/sock.h>
Julius Volz9a812192008-08-14 14:08:44 +020049#include <net/genetlink.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070050
51#include <asm/uaccess.h>
52
53#include <net/ip_vs.h>
54
55/* semaphore for IPVS sockopts. And, [gs]etsockopt may sleep. */
Ingo Molnar14cc3e22006-03-26 01:37:14 -080056static DEFINE_MUTEX(__ip_vs_mutex);
Linus Torvalds1da177e2005-04-16 15:20:36 -070057
58/* lock for service table */
59static DEFINE_RWLOCK(__ip_vs_svc_lock);
60
Linus Torvalds1da177e2005-04-16 15:20:36 -070061/* sysctl variables */
Linus Torvalds1da177e2005-04-16 15:20:36 -070062
63#ifdef CONFIG_IP_VS_DEBUG
64static int sysctl_ip_vs_debug_level = 0;
65
66int ip_vs_get_debug_level(void)
67{
68 return sysctl_ip_vs_debug_level;
69}
70#endif
71
Vince Busam09571c72008-09-02 15:55:52 +020072#ifdef CONFIG_IP_VS_IPV6
73/* Taken from rt6_fill_node() in net/ipv6/route.c, is there a better way? */
Hans Schillstrom4a984802011-01-03 14:45:02 +010074static int __ip_vs_addr_is_local_v6(struct net *net,
75 const struct in6_addr *addr)
Vince Busam09571c72008-09-02 15:55:52 +020076{
77 struct rt6_info *rt;
78 struct flowi fl = {
79 .oif = 0,
Changli Gao58116622010-11-12 18:43:55 +000080 .fl6_dst = *addr,
81 .fl6_src = { .s6_addr32 = {0, 0, 0, 0} },
Vince Busam09571c72008-09-02 15:55:52 +020082 };
83
Hans Schillstrom4a984802011-01-03 14:45:02 +010084 rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl);
Vince Busam09571c72008-09-02 15:55:52 +020085 if (rt && rt->rt6i_dev && (rt->rt6i_dev->flags & IFF_LOOPBACK))
86 return 1;
87
88 return 0;
89}
90#endif
Simon Horman14e40542011-02-04 18:33:02 +090091
92#ifdef CONFIG_SYSCTL
Linus Torvalds1da177e2005-04-16 15:20:36 -070093/*
Julian Anastasovaf9debd2005-07-11 20:59:57 -070094 * update_defense_level is called from keventd and from sysctl,
95 * so it needs to protect itself from softirqs
Linus Torvalds1da177e2005-04-16 15:20:36 -070096 */
Hans Schillstrom93304192011-01-03 14:44:51 +010097static void update_defense_level(struct netns_ipvs *ipvs)
Linus Torvalds1da177e2005-04-16 15:20:36 -070098{
99 struct sysinfo i;
100 static int old_secure_tcp = 0;
101 int availmem;
102 int nomem;
103 int to_change = -1;
104
105 /* we only count free and buffered memory (in pages) */
106 si_meminfo(&i);
107 availmem = i.freeram + i.bufferram;
108 /* however in linux 2.5 the i.bufferram is total page cache size,
109 we need adjust it */
110 /* si_swapinfo(&i); */
111 /* availmem = availmem - (i.totalswap - i.freeswap); */
112
Hans Schillstroma0840e22011-01-03 14:44:58 +0100113 nomem = (availmem < ipvs->sysctl_amemthresh);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700114
Julian Anastasovaf9debd2005-07-11 20:59:57 -0700115 local_bh_disable();
116
Linus Torvalds1da177e2005-04-16 15:20:36 -0700117 /* drop_entry */
Hans Schillstroma0840e22011-01-03 14:44:58 +0100118 spin_lock(&ipvs->dropentry_lock);
119 switch (ipvs->sysctl_drop_entry) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700120 case 0:
Hans Schillstroma0840e22011-01-03 14:44:58 +0100121 atomic_set(&ipvs->dropentry, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700122 break;
123 case 1:
124 if (nomem) {
Hans Schillstroma0840e22011-01-03 14:44:58 +0100125 atomic_set(&ipvs->dropentry, 1);
126 ipvs->sysctl_drop_entry = 2;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700127 } else {
Hans Schillstroma0840e22011-01-03 14:44:58 +0100128 atomic_set(&ipvs->dropentry, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700129 }
130 break;
131 case 2:
132 if (nomem) {
Hans Schillstroma0840e22011-01-03 14:44:58 +0100133 atomic_set(&ipvs->dropentry, 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700134 } else {
Hans Schillstroma0840e22011-01-03 14:44:58 +0100135 atomic_set(&ipvs->dropentry, 0);
136 ipvs->sysctl_drop_entry = 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700137 };
138 break;
139 case 3:
Hans Schillstroma0840e22011-01-03 14:44:58 +0100140 atomic_set(&ipvs->dropentry, 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700141 break;
142 }
Hans Schillstroma0840e22011-01-03 14:44:58 +0100143 spin_unlock(&ipvs->dropentry_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700144
145 /* drop_packet */
Hans Schillstroma0840e22011-01-03 14:44:58 +0100146 spin_lock(&ipvs->droppacket_lock);
147 switch (ipvs->sysctl_drop_packet) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700148 case 0:
Hans Schillstroma0840e22011-01-03 14:44:58 +0100149 ipvs->drop_rate = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700150 break;
151 case 1:
152 if (nomem) {
Hans Schillstroma0840e22011-01-03 14:44:58 +0100153 ipvs->drop_rate = ipvs->drop_counter
154 = ipvs->sysctl_amemthresh /
155 (ipvs->sysctl_amemthresh-availmem);
156 ipvs->sysctl_drop_packet = 2;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700157 } else {
Hans Schillstroma0840e22011-01-03 14:44:58 +0100158 ipvs->drop_rate = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700159 }
160 break;
161 case 2:
162 if (nomem) {
Hans Schillstroma0840e22011-01-03 14:44:58 +0100163 ipvs->drop_rate = ipvs->drop_counter
164 = ipvs->sysctl_amemthresh /
165 (ipvs->sysctl_amemthresh-availmem);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700166 } else {
Hans Schillstroma0840e22011-01-03 14:44:58 +0100167 ipvs->drop_rate = 0;
168 ipvs->sysctl_drop_packet = 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700169 }
170 break;
171 case 3:
Hans Schillstroma0840e22011-01-03 14:44:58 +0100172 ipvs->drop_rate = ipvs->sysctl_am_droprate;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700173 break;
174 }
Hans Schillstroma0840e22011-01-03 14:44:58 +0100175 spin_unlock(&ipvs->droppacket_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700176
177 /* secure_tcp */
Hans Schillstroma0840e22011-01-03 14:44:58 +0100178 spin_lock(&ipvs->securetcp_lock);
179 switch (ipvs->sysctl_secure_tcp) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700180 case 0:
181 if (old_secure_tcp >= 2)
182 to_change = 0;
183 break;
184 case 1:
185 if (nomem) {
186 if (old_secure_tcp < 2)
187 to_change = 1;
Hans Schillstroma0840e22011-01-03 14:44:58 +0100188 ipvs->sysctl_secure_tcp = 2;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700189 } else {
190 if (old_secure_tcp >= 2)
191 to_change = 0;
192 }
193 break;
194 case 2:
195 if (nomem) {
196 if (old_secure_tcp < 2)
197 to_change = 1;
198 } else {
199 if (old_secure_tcp >= 2)
200 to_change = 0;
Hans Schillstroma0840e22011-01-03 14:44:58 +0100201 ipvs->sysctl_secure_tcp = 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700202 }
203 break;
204 case 3:
205 if (old_secure_tcp < 2)
206 to_change = 1;
207 break;
208 }
Hans Schillstroma0840e22011-01-03 14:44:58 +0100209 old_secure_tcp = ipvs->sysctl_secure_tcp;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700210 if (to_change >= 0)
Hans Schillstrom93304192011-01-03 14:44:51 +0100211 ip_vs_protocol_timeout_change(ipvs,
Hans Schillstroma0840e22011-01-03 14:44:58 +0100212 ipvs->sysctl_secure_tcp > 1);
213 spin_unlock(&ipvs->securetcp_lock);
Julian Anastasovaf9debd2005-07-11 20:59:57 -0700214
215 local_bh_enable();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700216}
217
218
219/*
220 * Timer for checking the defense
221 */
222#define DEFENSE_TIMER_PERIOD 1*HZ
Linus Torvalds1da177e2005-04-16 15:20:36 -0700223
David Howellsc4028952006-11-22 14:57:56 +0000224static void defense_work_handler(struct work_struct *work)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700225{
Hans Schillstromf6340ee2011-01-03 14:44:59 +0100226 struct netns_ipvs *ipvs =
227 container_of(work, struct netns_ipvs, defense_work.work);
Hans Schillstrom93304192011-01-03 14:44:51 +0100228
229 update_defense_level(ipvs);
Hans Schillstroma0840e22011-01-03 14:44:58 +0100230 if (atomic_read(&ipvs->dropentry))
Hans Schillstromf6340ee2011-01-03 14:44:59 +0100231 ip_vs_random_dropentry(ipvs->net);
232 schedule_delayed_work(&ipvs->defense_work, DEFENSE_TIMER_PERIOD);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700233}
Simon Horman14e40542011-02-04 18:33:02 +0900234#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -0700235
236int
237ip_vs_use_count_inc(void)
238{
239 return try_module_get(THIS_MODULE);
240}
241
242void
243ip_vs_use_count_dec(void)
244{
245 module_put(THIS_MODULE);
246}
247
248
249/*
250 * Hash table: for virtual service lookups
251 */
252#define IP_VS_SVC_TAB_BITS 8
253#define IP_VS_SVC_TAB_SIZE (1 << IP_VS_SVC_TAB_BITS)
254#define IP_VS_SVC_TAB_MASK (IP_VS_SVC_TAB_SIZE - 1)
255
256/* the service table hashed by <protocol, addr, port> */
257static struct list_head ip_vs_svc_table[IP_VS_SVC_TAB_SIZE];
258/* the service table hashed by fwmark */
259static struct list_head ip_vs_svc_fwm_table[IP_VS_SVC_TAB_SIZE];
260
Linus Torvalds1da177e2005-04-16 15:20:36 -0700261
262/*
263 * Returns hash value for virtual service
264 */
Hans Schillstromfc723252011-01-03 14:44:43 +0100265static inline unsigned
266ip_vs_svc_hashkey(struct net *net, int af, unsigned proto,
267 const union nf_inet_addr *addr, __be16 port)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700268{
269 register unsigned porth = ntohs(port);
Julius Volzb18610d2008-09-02 15:55:37 +0200270 __be32 addr_fold = addr->ip;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700271
Julius Volzb18610d2008-09-02 15:55:37 +0200272#ifdef CONFIG_IP_VS_IPV6
273 if (af == AF_INET6)
274 addr_fold = addr->ip6[0]^addr->ip6[1]^
275 addr->ip6[2]^addr->ip6[3];
276#endif
Hans Schillstromfc723252011-01-03 14:44:43 +0100277 addr_fold ^= ((size_t)net>>8);
Julius Volzb18610d2008-09-02 15:55:37 +0200278
279 return (proto^ntohl(addr_fold)^(porth>>IP_VS_SVC_TAB_BITS)^porth)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700280 & IP_VS_SVC_TAB_MASK;
281}
282
283/*
284 * Returns hash value of fwmark for virtual service lookup
285 */
Hans Schillstromfc723252011-01-03 14:44:43 +0100286static inline unsigned ip_vs_svc_fwm_hashkey(struct net *net, __u32 fwmark)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700287{
Hans Schillstromfc723252011-01-03 14:44:43 +0100288 return (((size_t)net>>8) ^ fwmark) & IP_VS_SVC_TAB_MASK;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700289}
290
291/*
Hans Schillstromfc723252011-01-03 14:44:43 +0100292 * Hashes a service in the ip_vs_svc_table by <netns,proto,addr,port>
Linus Torvalds1da177e2005-04-16 15:20:36 -0700293 * or in the ip_vs_svc_fwm_table by fwmark.
294 * Should be called with locked tables.
295 */
296static int ip_vs_svc_hash(struct ip_vs_service *svc)
297{
298 unsigned hash;
299
300 if (svc->flags & IP_VS_SVC_F_HASHED) {
Hannes Eder1e3e2382009-08-02 11:05:41 +0000301 pr_err("%s(): request for already hashed, called from %pF\n",
302 __func__, __builtin_return_address(0));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700303 return 0;
304 }
305
306 if (svc->fwmark == 0) {
307 /*
Hans Schillstromfc723252011-01-03 14:44:43 +0100308 * Hash it by <netns,protocol,addr,port> in ip_vs_svc_table
Linus Torvalds1da177e2005-04-16 15:20:36 -0700309 */
Hans Schillstromfc723252011-01-03 14:44:43 +0100310 hash = ip_vs_svc_hashkey(svc->net, svc->af, svc->protocol,
311 &svc->addr, svc->port);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700312 list_add(&svc->s_list, &ip_vs_svc_table[hash]);
313 } else {
314 /*
Hans Schillstromfc723252011-01-03 14:44:43 +0100315 * Hash it by fwmark in svc_fwm_table
Linus Torvalds1da177e2005-04-16 15:20:36 -0700316 */
Hans Schillstromfc723252011-01-03 14:44:43 +0100317 hash = ip_vs_svc_fwm_hashkey(svc->net, svc->fwmark);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700318 list_add(&svc->f_list, &ip_vs_svc_fwm_table[hash]);
319 }
320
321 svc->flags |= IP_VS_SVC_F_HASHED;
322 /* increase its refcnt because it is referenced by the svc table */
323 atomic_inc(&svc->refcnt);
324 return 1;
325}
326
327
328/*
Hans Schillstromfc723252011-01-03 14:44:43 +0100329 * Unhashes a service from svc_table / svc_fwm_table.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700330 * Should be called with locked tables.
331 */
332static int ip_vs_svc_unhash(struct ip_vs_service *svc)
333{
334 if (!(svc->flags & IP_VS_SVC_F_HASHED)) {
Hannes Eder1e3e2382009-08-02 11:05:41 +0000335 pr_err("%s(): request for unhash flagged, called from %pF\n",
336 __func__, __builtin_return_address(0));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700337 return 0;
338 }
339
340 if (svc->fwmark == 0) {
Hans Schillstromfc723252011-01-03 14:44:43 +0100341 /* Remove it from the svc_table table */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700342 list_del(&svc->s_list);
343 } else {
Hans Schillstromfc723252011-01-03 14:44:43 +0100344 /* Remove it from the svc_fwm_table table */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700345 list_del(&svc->f_list);
346 }
347
348 svc->flags &= ~IP_VS_SVC_F_HASHED;
349 atomic_dec(&svc->refcnt);
350 return 1;
351}
352
353
354/*
Hans Schillstromfc723252011-01-03 14:44:43 +0100355 * Get service by {netns, proto,addr,port} in the service table.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700356 */
Julius Volzb18610d2008-09-02 15:55:37 +0200357static inline struct ip_vs_service *
Hans Schillstromfc723252011-01-03 14:44:43 +0100358__ip_vs_service_find(struct net *net, int af, __u16 protocol,
359 const union nf_inet_addr *vaddr, __be16 vport)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700360{
361 unsigned hash;
362 struct ip_vs_service *svc;
363
364 /* Check for "full" addressed entries */
Hans Schillstromfc723252011-01-03 14:44:43 +0100365 hash = ip_vs_svc_hashkey(net, af, protocol, vaddr, vport);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700366
367 list_for_each_entry(svc, &ip_vs_svc_table[hash], s_list){
Julius Volzb18610d2008-09-02 15:55:37 +0200368 if ((svc->af == af)
369 && ip_vs_addr_equal(af, &svc->addr, vaddr)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700370 && (svc->port == vport)
Hans Schillstromfc723252011-01-03 14:44:43 +0100371 && (svc->protocol == protocol)
372 && net_eq(svc->net, net)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700373 /* HIT */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700374 return svc;
375 }
376 }
377
378 return NULL;
379}
380
381
382/*
383 * Get service by {fwmark} in the service table.
384 */
Julius Volzb18610d2008-09-02 15:55:37 +0200385static inline struct ip_vs_service *
Hans Schillstromfc723252011-01-03 14:44:43 +0100386__ip_vs_svc_fwm_find(struct net *net, int af, __u32 fwmark)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700387{
388 unsigned hash;
389 struct ip_vs_service *svc;
390
391 /* Check for fwmark addressed entries */
Hans Schillstromfc723252011-01-03 14:44:43 +0100392 hash = ip_vs_svc_fwm_hashkey(net, fwmark);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700393
394 list_for_each_entry(svc, &ip_vs_svc_fwm_table[hash], f_list) {
Hans Schillstromfc723252011-01-03 14:44:43 +0100395 if (svc->fwmark == fwmark && svc->af == af
396 && net_eq(svc->net, net)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700397 /* HIT */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700398 return svc;
399 }
400 }
401
402 return NULL;
403}
404
405struct ip_vs_service *
Hans Schillstromfc723252011-01-03 14:44:43 +0100406ip_vs_service_get(struct net *net, int af, __u32 fwmark, __u16 protocol,
Julius Volz3c2e0502008-09-02 15:55:38 +0200407 const union nf_inet_addr *vaddr, __be16 vport)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700408{
409 struct ip_vs_service *svc;
Hans Schillstrom763f8d02011-01-03 14:45:01 +0100410 struct netns_ipvs *ipvs = net_ipvs(net);
Julius Volz3c2e0502008-09-02 15:55:38 +0200411
Linus Torvalds1da177e2005-04-16 15:20:36 -0700412 read_lock(&__ip_vs_svc_lock);
413
414 /*
415 * Check the table hashed by fwmark first
416 */
Julian Anastasov097fc762011-03-04 12:26:17 +0200417 if (fwmark) {
418 svc = __ip_vs_svc_fwm_find(net, af, fwmark);
419 if (svc)
420 goto out;
421 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700422
423 /*
424 * Check the table hashed by <protocol,addr,port>
425 * for "full" addressed entries
426 */
Hans Schillstromfc723252011-01-03 14:44:43 +0100427 svc = __ip_vs_service_find(net, af, protocol, vaddr, vport);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700428
429 if (svc == NULL
430 && protocol == IPPROTO_TCP
Hans Schillstrom763f8d02011-01-03 14:45:01 +0100431 && atomic_read(&ipvs->ftpsvc_counter)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700432 && (vport == FTPDATA || ntohs(vport) >= PROT_SOCK)) {
433 /*
434 * Check if ftp service entry exists, the packet
435 * might belong to FTP data connections.
436 */
Hans Schillstromfc723252011-01-03 14:44:43 +0100437 svc = __ip_vs_service_find(net, af, protocol, vaddr, FTPPORT);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700438 }
439
440 if (svc == NULL
Hans Schillstrom763f8d02011-01-03 14:45:01 +0100441 && atomic_read(&ipvs->nullsvc_counter)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700442 /*
443 * Check if the catch-all port (port zero) exists
444 */
Hans Schillstromfc723252011-01-03 14:44:43 +0100445 svc = __ip_vs_service_find(net, af, protocol, vaddr, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700446 }
447
448 out:
Julian Anastasov26c15cf2010-09-21 18:12:30 +0200449 if (svc)
450 atomic_inc(&svc->usecnt);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700451 read_unlock(&__ip_vs_svc_lock);
452
Julius Volz3c2e0502008-09-02 15:55:38 +0200453 IP_VS_DBG_BUF(9, "lookup service: fwm %u %s %s:%u %s\n",
454 fwmark, ip_vs_proto_name(protocol),
455 IP_VS_DBG_ADDR(af, vaddr), ntohs(vport),
456 svc ? "hit" : "not hit");
Linus Torvalds1da177e2005-04-16 15:20:36 -0700457
458 return svc;
459}
460
461
462static inline void
463__ip_vs_bind_svc(struct ip_vs_dest *dest, struct ip_vs_service *svc)
464{
465 atomic_inc(&svc->refcnt);
466 dest->svc = svc;
467}
468
Julian Anastasov26c15cf2010-09-21 18:12:30 +0200469static void
Linus Torvalds1da177e2005-04-16 15:20:36 -0700470__ip_vs_unbind_svc(struct ip_vs_dest *dest)
471{
472 struct ip_vs_service *svc = dest->svc;
473
474 dest->svc = NULL;
Julian Anastasov26c15cf2010-09-21 18:12:30 +0200475 if (atomic_dec_and_test(&svc->refcnt)) {
476 IP_VS_DBG_BUF(3, "Removing service %u/%s:%u usecnt=%d\n",
477 svc->fwmark,
478 IP_VS_DBG_ADDR(svc->af, &svc->addr),
479 ntohs(svc->port), atomic_read(&svc->usecnt));
Hans Schillstromb17fc992011-01-03 14:44:56 +0100480 free_percpu(svc->stats.cpustats);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700481 kfree(svc);
Julian Anastasov26c15cf2010-09-21 18:12:30 +0200482 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700483}
484
485
486/*
487 * Returns hash value for real service
488 */
Julius Volz7937df12008-09-02 15:55:48 +0200489static inline unsigned ip_vs_rs_hashkey(int af,
490 const union nf_inet_addr *addr,
491 __be16 port)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700492{
493 register unsigned porth = ntohs(port);
Julius Volz7937df12008-09-02 15:55:48 +0200494 __be32 addr_fold = addr->ip;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700495
Julius Volz7937df12008-09-02 15:55:48 +0200496#ifdef CONFIG_IP_VS_IPV6
497 if (af == AF_INET6)
498 addr_fold = addr->ip6[0]^addr->ip6[1]^
499 addr->ip6[2]^addr->ip6[3];
500#endif
501
502 return (ntohl(addr_fold)^(porth>>IP_VS_RTAB_BITS)^porth)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700503 & IP_VS_RTAB_MASK;
504}
505
506/*
Hans Schillstromfc723252011-01-03 14:44:43 +0100507 * Hashes ip_vs_dest in rs_table by <proto,addr,port>.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700508 * should be called with locked tables.
509 */
Hans Schillstromfc723252011-01-03 14:44:43 +0100510static int ip_vs_rs_hash(struct netns_ipvs *ipvs, struct ip_vs_dest *dest)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700511{
512 unsigned hash;
513
514 if (!list_empty(&dest->d_list)) {
515 return 0;
516 }
517
518 /*
519 * Hash by proto,addr,port,
520 * which are the parameters of the real service.
521 */
Julius Volz7937df12008-09-02 15:55:48 +0200522 hash = ip_vs_rs_hashkey(dest->af, &dest->addr, dest->port);
523
Hans Schillstromfc723252011-01-03 14:44:43 +0100524 list_add(&dest->d_list, &ipvs->rs_table[hash]);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700525
526 return 1;
527}
528
529/*
Hans Schillstromfc723252011-01-03 14:44:43 +0100530 * UNhashes ip_vs_dest from rs_table.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700531 * should be called with locked tables.
532 */
533static int ip_vs_rs_unhash(struct ip_vs_dest *dest)
534{
535 /*
Hans Schillstromfc723252011-01-03 14:44:43 +0100536 * Remove it from the rs_table table.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700537 */
538 if (!list_empty(&dest->d_list)) {
539 list_del(&dest->d_list);
540 INIT_LIST_HEAD(&dest->d_list);
541 }
542
543 return 1;
544}
545
546/*
547 * Lookup real service by <proto,addr,port> in the real service table.
548 */
549struct ip_vs_dest *
Hans Schillstromfc723252011-01-03 14:44:43 +0100550ip_vs_lookup_real_service(struct net *net, int af, __u16 protocol,
Julius Volz7937df12008-09-02 15:55:48 +0200551 const union nf_inet_addr *daddr,
552 __be16 dport)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700553{
Hans Schillstromfc723252011-01-03 14:44:43 +0100554 struct netns_ipvs *ipvs = net_ipvs(net);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700555 unsigned hash;
556 struct ip_vs_dest *dest;
557
558 /*
559 * Check for "full" addressed entries
560 * Return the first found entry
561 */
Julius Volz7937df12008-09-02 15:55:48 +0200562 hash = ip_vs_rs_hashkey(af, daddr, dport);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700563
Hans Schillstroma0840e22011-01-03 14:44:58 +0100564 read_lock(&ipvs->rs_lock);
Hans Schillstromfc723252011-01-03 14:44:43 +0100565 list_for_each_entry(dest, &ipvs->rs_table[hash], d_list) {
Julius Volz7937df12008-09-02 15:55:48 +0200566 if ((dest->af == af)
567 && ip_vs_addr_equal(af, &dest->addr, daddr)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700568 && (dest->port == dport)
569 && ((dest->protocol == protocol) ||
570 dest->vfwmark)) {
571 /* HIT */
Hans Schillstroma0840e22011-01-03 14:44:58 +0100572 read_unlock(&ipvs->rs_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700573 return dest;
574 }
575 }
Hans Schillstroma0840e22011-01-03 14:44:58 +0100576 read_unlock(&ipvs->rs_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700577
578 return NULL;
579}
580
581/*
582 * Lookup destination by {addr,port} in the given service
583 */
584static struct ip_vs_dest *
Julius Volz7937df12008-09-02 15:55:48 +0200585ip_vs_lookup_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
586 __be16 dport)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700587{
588 struct ip_vs_dest *dest;
589
590 /*
591 * Find the destination for the given service
592 */
593 list_for_each_entry(dest, &svc->destinations, n_list) {
Julius Volz7937df12008-09-02 15:55:48 +0200594 if ((dest->af == svc->af)
595 && ip_vs_addr_equal(svc->af, &dest->addr, daddr)
596 && (dest->port == dport)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700597 /* HIT */
598 return dest;
599 }
600 }
601
602 return NULL;
603}
604
Rumen G. Bogdanovski1e356f92007-11-07 02:35:54 -0800605/*
606 * Find destination by {daddr,dport,vaddr,protocol}
607 * Cretaed to be used in ip_vs_process_message() in
608 * the backup synchronization daemon. It finds the
609 * destination to be bound to the received connection
610 * on the backup.
611 *
612 * ip_vs_lookup_real_service() looked promissing, but
613 * seems not working as expected.
614 */
Hans Schillstromfc723252011-01-03 14:44:43 +0100615struct ip_vs_dest *ip_vs_find_dest(struct net *net, int af,
616 const union nf_inet_addr *daddr,
Julius Volz7937df12008-09-02 15:55:48 +0200617 __be16 dport,
618 const union nf_inet_addr *vaddr,
Hans Schillstrom0e051e62010-11-19 14:25:07 +0100619 __be16 vport, __u16 protocol, __u32 fwmark)
Rumen G. Bogdanovski1e356f92007-11-07 02:35:54 -0800620{
621 struct ip_vs_dest *dest;
622 struct ip_vs_service *svc;
623
Hans Schillstromfc723252011-01-03 14:44:43 +0100624 svc = ip_vs_service_get(net, af, fwmark, protocol, vaddr, vport);
Rumen G. Bogdanovski1e356f92007-11-07 02:35:54 -0800625 if (!svc)
626 return NULL;
627 dest = ip_vs_lookup_dest(svc, daddr, dport);
628 if (dest)
629 atomic_inc(&dest->refcnt);
630 ip_vs_service_put(svc);
631 return dest;
632}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700633
634/*
635 * Lookup dest by {svc,addr,port} in the destination trash.
636 * The destination trash is used to hold the destinations that are removed
637 * from the service table but are still referenced by some conn entries.
638 * The reason to add the destination trash is when the dest is temporary
639 * down (either by administrator or by monitor program), the dest can be
640 * picked back from the trash, the remaining connections to the dest can
641 * continue, and the counting information of the dest is also useful for
642 * scheduling.
643 */
644static struct ip_vs_dest *
Julius Volz7937df12008-09-02 15:55:48 +0200645ip_vs_trash_get_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
646 __be16 dport)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700647{
648 struct ip_vs_dest *dest, *nxt;
Hans Schillstromf2431e62011-01-03 14:45:00 +0100649 struct netns_ipvs *ipvs = net_ipvs(svc->net);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700650
651 /*
652 * Find the destination in trash
653 */
Hans Schillstromf2431e62011-01-03 14:45:00 +0100654 list_for_each_entry_safe(dest, nxt, &ipvs->dest_trash, n_list) {
Julius Volz7937df12008-09-02 15:55:48 +0200655 IP_VS_DBG_BUF(3, "Destination %u/%s:%u still in trash, "
656 "dest->refcnt=%d\n",
657 dest->vfwmark,
658 IP_VS_DBG_ADDR(svc->af, &dest->addr),
659 ntohs(dest->port),
660 atomic_read(&dest->refcnt));
661 if (dest->af == svc->af &&
662 ip_vs_addr_equal(svc->af, &dest->addr, daddr) &&
Linus Torvalds1da177e2005-04-16 15:20:36 -0700663 dest->port == dport &&
664 dest->vfwmark == svc->fwmark &&
665 dest->protocol == svc->protocol &&
666 (svc->fwmark ||
Julius Volz7937df12008-09-02 15:55:48 +0200667 (ip_vs_addr_equal(svc->af, &dest->vaddr, &svc->addr) &&
Linus Torvalds1da177e2005-04-16 15:20:36 -0700668 dest->vport == svc->port))) {
669 /* HIT */
670 return dest;
671 }
672
673 /*
674 * Try to purge the destination from trash if not referenced
675 */
676 if (atomic_read(&dest->refcnt) == 1) {
Julius Volz7937df12008-09-02 15:55:48 +0200677 IP_VS_DBG_BUF(3, "Removing destination %u/%s:%u "
678 "from trash\n",
679 dest->vfwmark,
680 IP_VS_DBG_ADDR(svc->af, &dest->addr),
681 ntohs(dest->port));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700682 list_del(&dest->n_list);
683 ip_vs_dst_reset(dest);
684 __ip_vs_unbind_svc(dest);
Hans Schillstromb17fc992011-01-03 14:44:56 +0100685 free_percpu(dest->stats.cpustats);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700686 kfree(dest);
687 }
688 }
689
690 return NULL;
691}
692
693
694/*
695 * Clean up all the destinations in the trash
696 * Called by the ip_vs_control_cleanup()
697 *
698 * When the ip_vs_control_clearup is activated by ipvs module exit,
699 * the service tables must have been flushed and all the connections
700 * are expired, and the refcnt of each destination in the trash must
701 * be 1, so we simply release them here.
702 */
Hans Schillstromf2431e62011-01-03 14:45:00 +0100703static void ip_vs_trash_cleanup(struct net *net)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700704{
705 struct ip_vs_dest *dest, *nxt;
Hans Schillstromf2431e62011-01-03 14:45:00 +0100706 struct netns_ipvs *ipvs = net_ipvs(net);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700707
Hans Schillstromf2431e62011-01-03 14:45:00 +0100708 list_for_each_entry_safe(dest, nxt, &ipvs->dest_trash, n_list) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700709 list_del(&dest->n_list);
710 ip_vs_dst_reset(dest);
711 __ip_vs_unbind_svc(dest);
Hans Schillstromb17fc992011-01-03 14:44:56 +0100712 free_percpu(dest->stats.cpustats);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700713 kfree(dest);
714 }
715}
716
Julian Anastasov55a3d4e2011-03-14 01:37:49 +0200717static void
718ip_vs_copy_stats(struct ip_vs_stats_user *dst, struct ip_vs_stats *src)
719{
720#define IP_VS_SHOW_STATS_COUNTER(c) dst->c = src->ustats.c - src->ustats0.c
Julian Anastasov55a3d4e2011-03-14 01:37:49 +0200721
722 spin_lock_bh(&src->lock);
723
724 IP_VS_SHOW_STATS_COUNTER(conns);
725 IP_VS_SHOW_STATS_COUNTER(inpkts);
726 IP_VS_SHOW_STATS_COUNTER(outpkts);
727 IP_VS_SHOW_STATS_COUNTER(inbytes);
728 IP_VS_SHOW_STATS_COUNTER(outbytes);
729
Julian Anastasovea9f22c2011-03-14 01:41:54 +0200730 ip_vs_read_estimator(dst, src);
Julian Anastasov55a3d4e2011-03-14 01:37:49 +0200731
732 spin_unlock_bh(&src->lock);
733}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700734
735static void
736ip_vs_zero_stats(struct ip_vs_stats *stats)
737{
738 spin_lock_bh(&stats->lock);
Simon Hormane93615d2008-08-11 17:19:14 +1000739
Julian Anastasov55a3d4e2011-03-14 01:37:49 +0200740 /* get current counters as zero point, rates are zeroed */
741
742#define IP_VS_ZERO_STATS_COUNTER(c) stats->ustats0.c = stats->ustats.c
Julian Anastasov55a3d4e2011-03-14 01:37:49 +0200743
744 IP_VS_ZERO_STATS_COUNTER(conns);
745 IP_VS_ZERO_STATS_COUNTER(inpkts);
746 IP_VS_ZERO_STATS_COUNTER(outpkts);
747 IP_VS_ZERO_STATS_COUNTER(inbytes);
748 IP_VS_ZERO_STATS_COUNTER(outbytes);
749
Linus Torvalds1da177e2005-04-16 15:20:36 -0700750 ip_vs_zero_estimator(stats);
Simon Hormane93615d2008-08-11 17:19:14 +1000751
Sven Wegener3a14a3132008-08-10 18:24:41 +0000752 spin_unlock_bh(&stats->lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700753}
754
755/*
756 * Update a destination in the given service
757 */
758static void
Julian Anastasov26c15cf2010-09-21 18:12:30 +0200759__ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest,
760 struct ip_vs_dest_user_kern *udest, int add)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700761{
Hans Schillstromfc723252011-01-03 14:44:43 +0100762 struct netns_ipvs *ipvs = net_ipvs(svc->net);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700763 int conn_flags;
764
765 /* set the weight and the flags */
766 atomic_set(&dest->weight, udest->weight);
Julian Anastasov35757922010-09-17 14:18:16 +0200767 conn_flags = udest->conn_flags & IP_VS_CONN_F_DEST_MASK;
768 conn_flags |= IP_VS_CONN_F_INACTIVE;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700769
Linus Torvalds1da177e2005-04-16 15:20:36 -0700770 /* set the IP_VS_CONN_F_NOOUTPUT flag if not masquerading/NAT */
Julian Anastasov35757922010-09-17 14:18:16 +0200771 if ((conn_flags & IP_VS_CONN_F_FWD_MASK) != IP_VS_CONN_F_MASQ) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700772 conn_flags |= IP_VS_CONN_F_NOOUTPUT;
773 } else {
774 /*
Hans Schillstromfc723252011-01-03 14:44:43 +0100775 * Put the real service in rs_table if not present.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700776 * For now only for NAT!
777 */
Hans Schillstroma0840e22011-01-03 14:44:58 +0100778 write_lock_bh(&ipvs->rs_lock);
Hans Schillstromfc723252011-01-03 14:44:43 +0100779 ip_vs_rs_hash(ipvs, dest);
Hans Schillstroma0840e22011-01-03 14:44:58 +0100780 write_unlock_bh(&ipvs->rs_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700781 }
782 atomic_set(&dest->conn_flags, conn_flags);
783
784 /* bind the service */
785 if (!dest->svc) {
786 __ip_vs_bind_svc(dest, svc);
787 } else {
788 if (dest->svc != svc) {
789 __ip_vs_unbind_svc(dest);
790 ip_vs_zero_stats(&dest->stats);
791 __ip_vs_bind_svc(dest, svc);
792 }
793 }
794
795 /* set the dest status flags */
796 dest->flags |= IP_VS_DEST_F_AVAILABLE;
797
798 if (udest->u_threshold == 0 || udest->u_threshold > dest->u_threshold)
799 dest->flags &= ~IP_VS_DEST_F_OVERLOAD;
800 dest->u_threshold = udest->u_threshold;
801 dest->l_threshold = udest->l_threshold;
Julian Anastasov26c15cf2010-09-21 18:12:30 +0200802
Julian Anastasovfc604762010-10-17 16:38:15 +0300803 spin_lock(&dest->dst_lock);
804 ip_vs_dst_reset(dest);
805 spin_unlock(&dest->dst_lock);
806
Julian Anastasov26c15cf2010-09-21 18:12:30 +0200807 if (add)
Julian Anastasov6ef757f2011-03-14 01:44:28 +0200808 ip_vs_start_estimator(svc->net, &dest->stats);
Julian Anastasov26c15cf2010-09-21 18:12:30 +0200809
810 write_lock_bh(&__ip_vs_svc_lock);
811
812 /* Wait until all other svc users go away */
813 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
814
815 if (add) {
816 list_add(&dest->n_list, &svc->destinations);
817 svc->num_dests++;
818 }
819
820 /* call the update_service, because server weight may be changed */
821 if (svc->scheduler->update_service)
822 svc->scheduler->update_service(svc);
823
824 write_unlock_bh(&__ip_vs_svc_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700825}
826
827
828/*
829 * Create a destination for the given service
830 */
831static int
Julius Volzc860c6b2008-09-02 15:55:36 +0200832ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700833 struct ip_vs_dest **dest_p)
834{
835 struct ip_vs_dest *dest;
836 unsigned atype;
837
838 EnterFunction(2);
839
Vince Busam09571c72008-09-02 15:55:52 +0200840#ifdef CONFIG_IP_VS_IPV6
841 if (svc->af == AF_INET6) {
842 atype = ipv6_addr_type(&udest->addr.in6);
Sven Wegener3bfb92f2008-09-05 16:53:49 +0200843 if ((!(atype & IPV6_ADDR_UNICAST) ||
844 atype & IPV6_ADDR_LINKLOCAL) &&
Hans Schillstrom4a984802011-01-03 14:45:02 +0100845 !__ip_vs_addr_is_local_v6(svc->net, &udest->addr.in6))
Vince Busam09571c72008-09-02 15:55:52 +0200846 return -EINVAL;
847 } else
848#endif
849 {
Hans Schillstrom4a984802011-01-03 14:45:02 +0100850 atype = inet_addr_type(svc->net, udest->addr.ip);
Vince Busam09571c72008-09-02 15:55:52 +0200851 if (atype != RTN_LOCAL && atype != RTN_UNICAST)
852 return -EINVAL;
853 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700854
Simon Hormandee06e42010-08-26 02:54:31 +0000855 dest = kzalloc(sizeof(struct ip_vs_dest), GFP_KERNEL);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700856 if (dest == NULL) {
Hannes Eder1e3e2382009-08-02 11:05:41 +0000857 pr_err("%s(): no memory.\n", __func__);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700858 return -ENOMEM;
859 }
Hans Schillstromb17fc992011-01-03 14:44:56 +0100860 dest->stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats);
861 if (!dest->stats.cpustats) {
862 pr_err("%s() alloc_percpu failed\n", __func__);
863 goto err_alloc;
864 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700865
Julius Volzc860c6b2008-09-02 15:55:36 +0200866 dest->af = svc->af;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700867 dest->protocol = svc->protocol;
Julius Volzc860c6b2008-09-02 15:55:36 +0200868 dest->vaddr = svc->addr;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700869 dest->vport = svc->port;
870 dest->vfwmark = svc->fwmark;
Julius Volzc860c6b2008-09-02 15:55:36 +0200871 ip_vs_addr_copy(svc->af, &dest->addr, &udest->addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700872 dest->port = udest->port;
873
874 atomic_set(&dest->activeconns, 0);
875 atomic_set(&dest->inactconns, 0);
876 atomic_set(&dest->persistconns, 0);
Julian Anastasov26c15cf2010-09-21 18:12:30 +0200877 atomic_set(&dest->refcnt, 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700878
879 INIT_LIST_HEAD(&dest->d_list);
880 spin_lock_init(&dest->dst_lock);
881 spin_lock_init(&dest->stats.lock);
Julian Anastasov26c15cf2010-09-21 18:12:30 +0200882 __ip_vs_update_dest(svc, dest, udest, 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700883
884 *dest_p = dest;
885
886 LeaveFunction(2);
887 return 0;
Hans Schillstromb17fc992011-01-03 14:44:56 +0100888
889err_alloc:
890 kfree(dest);
891 return -ENOMEM;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700892}
893
894
895/*
896 * Add a destination into an existing service
897 */
898static int
Julius Volzc860c6b2008-09-02 15:55:36 +0200899ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700900{
901 struct ip_vs_dest *dest;
Julius Volzc860c6b2008-09-02 15:55:36 +0200902 union nf_inet_addr daddr;
Al Viro014d7302006-09-28 14:29:52 -0700903 __be16 dport = udest->port;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700904 int ret;
905
906 EnterFunction(2);
907
908 if (udest->weight < 0) {
Hannes Eder1e3e2382009-08-02 11:05:41 +0000909 pr_err("%s(): server weight less than zero\n", __func__);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700910 return -ERANGE;
911 }
912
913 if (udest->l_threshold > udest->u_threshold) {
Hannes Eder1e3e2382009-08-02 11:05:41 +0000914 pr_err("%s(): lower threshold is higher than upper threshold\n",
915 __func__);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700916 return -ERANGE;
917 }
918
Julius Volzc860c6b2008-09-02 15:55:36 +0200919 ip_vs_addr_copy(svc->af, &daddr, &udest->addr);
920
Linus Torvalds1da177e2005-04-16 15:20:36 -0700921 /*
922 * Check if the dest already exists in the list
923 */
Julius Volz7937df12008-09-02 15:55:48 +0200924 dest = ip_vs_lookup_dest(svc, &daddr, dport);
925
Linus Torvalds1da177e2005-04-16 15:20:36 -0700926 if (dest != NULL) {
Hannes Eder1e3e2382009-08-02 11:05:41 +0000927 IP_VS_DBG(1, "%s(): dest already exists\n", __func__);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700928 return -EEXIST;
929 }
930
931 /*
932 * Check if the dest already exists in the trash and
933 * is from the same service
934 */
Julius Volz7937df12008-09-02 15:55:48 +0200935 dest = ip_vs_trash_get_dest(svc, &daddr, dport);
936
Linus Torvalds1da177e2005-04-16 15:20:36 -0700937 if (dest != NULL) {
Julius Volzcfc78c52008-09-02 15:55:53 +0200938 IP_VS_DBG_BUF(3, "Get destination %s:%u from trash, "
939 "dest->refcnt=%d, service %u/%s:%u\n",
940 IP_VS_DBG_ADDR(svc->af, &daddr), ntohs(dport),
941 atomic_read(&dest->refcnt),
942 dest->vfwmark,
943 IP_VS_DBG_ADDR(svc->af, &dest->vaddr),
944 ntohs(dest->vport));
945
Linus Torvalds1da177e2005-04-16 15:20:36 -0700946 /*
947 * Get the destination from the trash
948 */
949 list_del(&dest->n_list);
950
Julian Anastasov26c15cf2010-09-21 18:12:30 +0200951 __ip_vs_update_dest(svc, dest, udest, 1);
952 ret = 0;
953 } else {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700954 /*
Julian Anastasov26c15cf2010-09-21 18:12:30 +0200955 * Allocate and initialize the dest structure
Linus Torvalds1da177e2005-04-16 15:20:36 -0700956 */
Julian Anastasov26c15cf2010-09-21 18:12:30 +0200957 ret = ip_vs_new_dest(svc, udest, &dest);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700958 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700959 LeaveFunction(2);
960
Julian Anastasov26c15cf2010-09-21 18:12:30 +0200961 return ret;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700962}
963
964
965/*
966 * Edit a destination in the given service
967 */
968static int
Julius Volzc860c6b2008-09-02 15:55:36 +0200969ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700970{
971 struct ip_vs_dest *dest;
Julius Volzc860c6b2008-09-02 15:55:36 +0200972 union nf_inet_addr daddr;
Al Viro014d7302006-09-28 14:29:52 -0700973 __be16 dport = udest->port;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700974
975 EnterFunction(2);
976
977 if (udest->weight < 0) {
Hannes Eder1e3e2382009-08-02 11:05:41 +0000978 pr_err("%s(): server weight less than zero\n", __func__);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700979 return -ERANGE;
980 }
981
982 if (udest->l_threshold > udest->u_threshold) {
Hannes Eder1e3e2382009-08-02 11:05:41 +0000983 pr_err("%s(): lower threshold is higher than upper threshold\n",
984 __func__);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700985 return -ERANGE;
986 }
987
Julius Volzc860c6b2008-09-02 15:55:36 +0200988 ip_vs_addr_copy(svc->af, &daddr, &udest->addr);
989
Linus Torvalds1da177e2005-04-16 15:20:36 -0700990 /*
991 * Lookup the destination list
992 */
Julius Volz7937df12008-09-02 15:55:48 +0200993 dest = ip_vs_lookup_dest(svc, &daddr, dport);
994
Linus Torvalds1da177e2005-04-16 15:20:36 -0700995 if (dest == NULL) {
Hannes Eder1e3e2382009-08-02 11:05:41 +0000996 IP_VS_DBG(1, "%s(): dest doesn't exist\n", __func__);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700997 return -ENOENT;
998 }
999
Julian Anastasov26c15cf2010-09-21 18:12:30 +02001000 __ip_vs_update_dest(svc, dest, udest, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001001 LeaveFunction(2);
1002
1003 return 0;
1004}
1005
1006
1007/*
1008 * Delete a destination (must be already unlinked from the service)
1009 */
Hans Schillstrom29c20262011-01-03 14:44:54 +01001010static void __ip_vs_del_dest(struct net *net, struct ip_vs_dest *dest)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001011{
Hans Schillstroma0840e22011-01-03 14:44:58 +01001012 struct netns_ipvs *ipvs = net_ipvs(net);
1013
Julian Anastasov6ef757f2011-03-14 01:44:28 +02001014 ip_vs_stop_estimator(net, &dest->stats);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001015
1016 /*
1017 * Remove it from the d-linked list with the real services.
1018 */
Hans Schillstroma0840e22011-01-03 14:44:58 +01001019 write_lock_bh(&ipvs->rs_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001020 ip_vs_rs_unhash(dest);
Hans Schillstroma0840e22011-01-03 14:44:58 +01001021 write_unlock_bh(&ipvs->rs_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001022
1023 /*
1024 * Decrease the refcnt of the dest, and free the dest
1025 * if nobody refers to it (refcnt=0). Otherwise, throw
1026 * the destination into the trash.
1027 */
1028 if (atomic_dec_and_test(&dest->refcnt)) {
Julian Anastasov26c15cf2010-09-21 18:12:30 +02001029 IP_VS_DBG_BUF(3, "Removing destination %u/%s:%u\n",
1030 dest->vfwmark,
1031 IP_VS_DBG_ADDR(dest->af, &dest->addr),
1032 ntohs(dest->port));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001033 ip_vs_dst_reset(dest);
1034 /* simply decrease svc->refcnt here, let the caller check
1035 and release the service if nobody refers to it.
1036 Only user context can release destination and service,
1037 and only one user context can update virtual service at a
1038 time, so the operation here is OK */
1039 atomic_dec(&dest->svc->refcnt);
Hans Schillstromb17fc992011-01-03 14:44:56 +01001040 free_percpu(dest->stats.cpustats);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001041 kfree(dest);
1042 } else {
Julius Volzcfc78c52008-09-02 15:55:53 +02001043 IP_VS_DBG_BUF(3, "Moving dest %s:%u into trash, "
1044 "dest->refcnt=%d\n",
1045 IP_VS_DBG_ADDR(dest->af, &dest->addr),
1046 ntohs(dest->port),
1047 atomic_read(&dest->refcnt));
Hans Schillstromf2431e62011-01-03 14:45:00 +01001048 list_add(&dest->n_list, &ipvs->dest_trash);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001049 atomic_inc(&dest->refcnt);
1050 }
1051}
1052
1053
1054/*
1055 * Unlink a destination from the given service
1056 */
1057static void __ip_vs_unlink_dest(struct ip_vs_service *svc,
1058 struct ip_vs_dest *dest,
1059 int svcupd)
1060{
1061 dest->flags &= ~IP_VS_DEST_F_AVAILABLE;
1062
1063 /*
1064 * Remove it from the d-linked destination list.
1065 */
1066 list_del(&dest->n_list);
1067 svc->num_dests--;
Sven Wegener82dfb6f2008-08-11 19:36:06 +00001068
1069 /*
1070 * Call the update_service function of its scheduler
1071 */
1072 if (svcupd && svc->scheduler->update_service)
1073 svc->scheduler->update_service(svc);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001074}
1075
1076
1077/*
1078 * Delete a destination server in the given service
1079 */
1080static int
Julius Volzc860c6b2008-09-02 15:55:36 +02001081ip_vs_del_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001082{
1083 struct ip_vs_dest *dest;
Al Viro014d7302006-09-28 14:29:52 -07001084 __be16 dport = udest->port;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001085
1086 EnterFunction(2);
1087
Julius Volz7937df12008-09-02 15:55:48 +02001088 dest = ip_vs_lookup_dest(svc, &udest->addr, dport);
Julius Volzc860c6b2008-09-02 15:55:36 +02001089
Linus Torvalds1da177e2005-04-16 15:20:36 -07001090 if (dest == NULL) {
Hannes Eder1e3e2382009-08-02 11:05:41 +00001091 IP_VS_DBG(1, "%s(): destination not found!\n", __func__);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001092 return -ENOENT;
1093 }
1094
1095 write_lock_bh(&__ip_vs_svc_lock);
1096
1097 /*
1098 * Wait until all other svc users go away.
1099 */
Julian Anastasov26c15cf2010-09-21 18:12:30 +02001100 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001101
1102 /*
1103 * Unlink dest from the service
1104 */
1105 __ip_vs_unlink_dest(svc, dest, 1);
1106
1107 write_unlock_bh(&__ip_vs_svc_lock);
1108
1109 /*
1110 * Delete the destination
1111 */
Hans Schillstroma0840e22011-01-03 14:44:58 +01001112 __ip_vs_del_dest(svc->net, dest);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001113
1114 LeaveFunction(2);
1115
1116 return 0;
1117}
1118
1119
1120/*
1121 * Add a service into the service hash table
1122 */
1123static int
Hans Schillstromfc723252011-01-03 14:44:43 +01001124ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u,
Julius Volzc860c6b2008-09-02 15:55:36 +02001125 struct ip_vs_service **svc_p)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001126{
1127 int ret = 0;
1128 struct ip_vs_scheduler *sched = NULL;
Simon Horman0d1e71b2010-08-22 21:37:54 +09001129 struct ip_vs_pe *pe = NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001130 struct ip_vs_service *svc = NULL;
Hans Schillstroma0840e22011-01-03 14:44:58 +01001131 struct netns_ipvs *ipvs = net_ipvs(net);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001132
1133 /* increase the module use count */
1134 ip_vs_use_count_inc();
1135
1136 /* Lookup the scheduler by 'u->sched_name' */
1137 sched = ip_vs_scheduler_get(u->sched_name);
1138 if (sched == NULL) {
Hannes Eder1e3e2382009-08-02 11:05:41 +00001139 pr_info("Scheduler module ip_vs_%s not found\n", u->sched_name);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001140 ret = -ENOENT;
Simon Horman6e08bfb2010-08-22 21:37:52 +09001141 goto out_err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001142 }
1143
Simon Horman0d1e71b2010-08-22 21:37:54 +09001144 if (u->pe_name && *u->pe_name) {
Simon Hormane9e5eee2010-11-08 20:05:57 +09001145 pe = ip_vs_pe_getbyname(u->pe_name);
Simon Horman0d1e71b2010-08-22 21:37:54 +09001146 if (pe == NULL) {
1147 pr_info("persistence engine module ip_vs_pe_%s "
1148 "not found\n", u->pe_name);
1149 ret = -ENOENT;
1150 goto out_err;
1151 }
1152 }
1153
Julius Volzf94fd042008-09-02 15:55:55 +02001154#ifdef CONFIG_IP_VS_IPV6
Julius Volz48148932008-11-03 17:08:56 -08001155 if (u->af == AF_INET6 && (u->netmask < 1 || u->netmask > 128)) {
1156 ret = -EINVAL;
1157 goto out_err;
Julius Volzf94fd042008-09-02 15:55:55 +02001158 }
1159#endif
1160
Simon Hormandee06e42010-08-26 02:54:31 +00001161 svc = kzalloc(sizeof(struct ip_vs_service), GFP_KERNEL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001162 if (svc == NULL) {
Hannes Eder1e3e2382009-08-02 11:05:41 +00001163 IP_VS_DBG(1, "%s(): no memory\n", __func__);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001164 ret = -ENOMEM;
1165 goto out_err;
1166 }
Hans Schillstromb17fc992011-01-03 14:44:56 +01001167 svc->stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats);
1168 if (!svc->stats.cpustats) {
1169 pr_err("%s() alloc_percpu failed\n", __func__);
1170 goto out_err;
1171 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001172
1173 /* I'm the first user of the service */
Julian Anastasov26c15cf2010-09-21 18:12:30 +02001174 atomic_set(&svc->usecnt, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001175 atomic_set(&svc->refcnt, 0);
1176
Julius Volzc860c6b2008-09-02 15:55:36 +02001177 svc->af = u->af;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001178 svc->protocol = u->protocol;
Julius Volzc860c6b2008-09-02 15:55:36 +02001179 ip_vs_addr_copy(svc->af, &svc->addr, &u->addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001180 svc->port = u->port;
1181 svc->fwmark = u->fwmark;
1182 svc->flags = u->flags;
1183 svc->timeout = u->timeout * HZ;
1184 svc->netmask = u->netmask;
Hans Schillstromfc723252011-01-03 14:44:43 +01001185 svc->net = net;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001186
1187 INIT_LIST_HEAD(&svc->destinations);
1188 rwlock_init(&svc->sched_lock);
1189 spin_lock_init(&svc->stats.lock);
1190
1191 /* Bind the scheduler */
1192 ret = ip_vs_bind_scheduler(svc, sched);
1193 if (ret)
1194 goto out_err;
1195 sched = NULL;
1196
Simon Horman0d1e71b2010-08-22 21:37:54 +09001197 /* Bind the ct retriever */
1198 ip_vs_bind_pe(svc, pe);
1199 pe = NULL;
1200
Linus Torvalds1da177e2005-04-16 15:20:36 -07001201 /* Update the virtual service counters */
1202 if (svc->port == FTPPORT)
Hans Schillstrom763f8d02011-01-03 14:45:01 +01001203 atomic_inc(&ipvs->ftpsvc_counter);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001204 else if (svc->port == 0)
Hans Schillstrom763f8d02011-01-03 14:45:01 +01001205 atomic_inc(&ipvs->nullsvc_counter);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001206
Julian Anastasov6ef757f2011-03-14 01:44:28 +02001207 ip_vs_start_estimator(net, &svc->stats);
Julius Volzf94fd042008-09-02 15:55:55 +02001208
1209 /* Count only IPv4 services for old get/setsockopt interface */
1210 if (svc->af == AF_INET)
Hans Schillstroma0840e22011-01-03 14:44:58 +01001211 ipvs->num_services++;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001212
1213 /* Hash the service into the service table */
1214 write_lock_bh(&__ip_vs_svc_lock);
1215 ip_vs_svc_hash(svc);
1216 write_unlock_bh(&__ip_vs_svc_lock);
1217
1218 *svc_p = svc;
1219 return 0;
1220
Hans Schillstromb17fc992011-01-03 14:44:56 +01001221
Simon Horman6e08bfb2010-08-22 21:37:52 +09001222 out_err:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001223 if (svc != NULL) {
Simon Horman2fabf352010-08-22 21:37:52 +09001224 ip_vs_unbind_scheduler(svc);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001225 if (svc->inc) {
1226 local_bh_disable();
1227 ip_vs_app_inc_put(svc->inc);
1228 local_bh_enable();
1229 }
Hans Schillstromb17fc992011-01-03 14:44:56 +01001230 if (svc->stats.cpustats)
1231 free_percpu(svc->stats.cpustats);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001232 kfree(svc);
1233 }
1234 ip_vs_scheduler_put(sched);
Simon Horman0d1e71b2010-08-22 21:37:54 +09001235 ip_vs_pe_put(pe);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001236
Linus Torvalds1da177e2005-04-16 15:20:36 -07001237 /* decrease the module use count */
1238 ip_vs_use_count_dec();
1239
1240 return ret;
1241}
1242
1243
1244/*
1245 * Edit a service and bind it with a new scheduler
1246 */
1247static int
Julius Volzc860c6b2008-09-02 15:55:36 +02001248ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001249{
1250 struct ip_vs_scheduler *sched, *old_sched;
Simon Horman0d1e71b2010-08-22 21:37:54 +09001251 struct ip_vs_pe *pe = NULL, *old_pe = NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001252 int ret = 0;
1253
1254 /*
1255 * Lookup the scheduler, by 'u->sched_name'
1256 */
1257 sched = ip_vs_scheduler_get(u->sched_name);
1258 if (sched == NULL) {
Hannes Eder1e3e2382009-08-02 11:05:41 +00001259 pr_info("Scheduler module ip_vs_%s not found\n", u->sched_name);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001260 return -ENOENT;
1261 }
1262 old_sched = sched;
1263
Simon Horman0d1e71b2010-08-22 21:37:54 +09001264 if (u->pe_name && *u->pe_name) {
Simon Hormane9e5eee2010-11-08 20:05:57 +09001265 pe = ip_vs_pe_getbyname(u->pe_name);
Simon Horman0d1e71b2010-08-22 21:37:54 +09001266 if (pe == NULL) {
1267 pr_info("persistence engine module ip_vs_pe_%s "
1268 "not found\n", u->pe_name);
1269 ret = -ENOENT;
1270 goto out;
1271 }
1272 old_pe = pe;
1273 }
1274
Julius Volzf94fd042008-09-02 15:55:55 +02001275#ifdef CONFIG_IP_VS_IPV6
Julius Volz48148932008-11-03 17:08:56 -08001276 if (u->af == AF_INET6 && (u->netmask < 1 || u->netmask > 128)) {
1277 ret = -EINVAL;
1278 goto out;
Julius Volzf94fd042008-09-02 15:55:55 +02001279 }
1280#endif
1281
Linus Torvalds1da177e2005-04-16 15:20:36 -07001282 write_lock_bh(&__ip_vs_svc_lock);
1283
1284 /*
1285 * Wait until all other svc users go away.
1286 */
Julian Anastasov26c15cf2010-09-21 18:12:30 +02001287 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001288
1289 /*
1290 * Set the flags and timeout value
1291 */
1292 svc->flags = u->flags | IP_VS_SVC_F_HASHED;
1293 svc->timeout = u->timeout * HZ;
1294 svc->netmask = u->netmask;
1295
1296 old_sched = svc->scheduler;
1297 if (sched != old_sched) {
1298 /*
1299 * Unbind the old scheduler
1300 */
1301 if ((ret = ip_vs_unbind_scheduler(svc))) {
1302 old_sched = sched;
Simon Horman9e691ed2008-09-17 10:10:41 +10001303 goto out_unlock;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001304 }
1305
1306 /*
1307 * Bind the new scheduler
1308 */
1309 if ((ret = ip_vs_bind_scheduler(svc, sched))) {
1310 /*
1311 * If ip_vs_bind_scheduler fails, restore the old
1312 * scheduler.
1313 * The main reason of failure is out of memory.
1314 *
1315 * The question is if the old scheduler can be
1316 * restored all the time. TODO: if it cannot be
1317 * restored some time, we must delete the service,
1318 * otherwise the system may crash.
1319 */
1320 ip_vs_bind_scheduler(svc, old_sched);
1321 old_sched = sched;
Simon Horman9e691ed2008-09-17 10:10:41 +10001322 goto out_unlock;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001323 }
1324 }
1325
Simon Horman0d1e71b2010-08-22 21:37:54 +09001326 old_pe = svc->pe;
1327 if (pe != old_pe) {
1328 ip_vs_unbind_pe(svc);
1329 ip_vs_bind_pe(svc, pe);
1330 }
1331
Simon Horman9e691ed2008-09-17 10:10:41 +10001332 out_unlock:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001333 write_unlock_bh(&__ip_vs_svc_lock);
Simon Horman9e691ed2008-09-17 10:10:41 +10001334 out:
Simon Horman6e08bfb2010-08-22 21:37:52 +09001335 ip_vs_scheduler_put(old_sched);
Simon Horman0d1e71b2010-08-22 21:37:54 +09001336 ip_vs_pe_put(old_pe);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001337 return ret;
1338}
1339
1340
1341/*
1342 * Delete a service from the service list
1343 * - The service must be unlinked, unlocked and not referenced!
1344 * - We are called under _bh lock
1345 */
1346static void __ip_vs_del_service(struct ip_vs_service *svc)
1347{
1348 struct ip_vs_dest *dest, *nxt;
1349 struct ip_vs_scheduler *old_sched;
Simon Horman0d1e71b2010-08-22 21:37:54 +09001350 struct ip_vs_pe *old_pe;
Hans Schillstroma0840e22011-01-03 14:44:58 +01001351 struct netns_ipvs *ipvs = net_ipvs(svc->net);
Simon Horman0d1e71b2010-08-22 21:37:54 +09001352
1353 pr_info("%s: enter\n", __func__);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001354
Julius Volzf94fd042008-09-02 15:55:55 +02001355 /* Count only IPv4 services for old get/setsockopt interface */
1356 if (svc->af == AF_INET)
Hans Schillstroma0840e22011-01-03 14:44:58 +01001357 ipvs->num_services--;
Julius Volzf94fd042008-09-02 15:55:55 +02001358
Julian Anastasov6ef757f2011-03-14 01:44:28 +02001359 ip_vs_stop_estimator(svc->net, &svc->stats);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001360
1361 /* Unbind scheduler */
1362 old_sched = svc->scheduler;
1363 ip_vs_unbind_scheduler(svc);
Simon Horman6e08bfb2010-08-22 21:37:52 +09001364 ip_vs_scheduler_put(old_sched);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001365
Simon Horman0d1e71b2010-08-22 21:37:54 +09001366 /* Unbind persistence engine */
1367 old_pe = svc->pe;
1368 ip_vs_unbind_pe(svc);
1369 ip_vs_pe_put(old_pe);
1370
Linus Torvalds1da177e2005-04-16 15:20:36 -07001371 /* Unbind app inc */
1372 if (svc->inc) {
1373 ip_vs_app_inc_put(svc->inc);
1374 svc->inc = NULL;
1375 }
1376
1377 /*
1378 * Unlink the whole destination list
1379 */
1380 list_for_each_entry_safe(dest, nxt, &svc->destinations, n_list) {
1381 __ip_vs_unlink_dest(svc, dest, 0);
Hans Schillstrom29c20262011-01-03 14:44:54 +01001382 __ip_vs_del_dest(svc->net, dest);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001383 }
1384
1385 /*
1386 * Update the virtual service counters
1387 */
1388 if (svc->port == FTPPORT)
Hans Schillstrom763f8d02011-01-03 14:45:01 +01001389 atomic_dec(&ipvs->ftpsvc_counter);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001390 else if (svc->port == 0)
Hans Schillstrom763f8d02011-01-03 14:45:01 +01001391 atomic_dec(&ipvs->nullsvc_counter);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001392
1393 /*
1394 * Free the service if nobody refers to it
1395 */
Julian Anastasov26c15cf2010-09-21 18:12:30 +02001396 if (atomic_read(&svc->refcnt) == 0) {
1397 IP_VS_DBG_BUF(3, "Removing service %u/%s:%u usecnt=%d\n",
1398 svc->fwmark,
1399 IP_VS_DBG_ADDR(svc->af, &svc->addr),
1400 ntohs(svc->port), atomic_read(&svc->usecnt));
Hans Schillstromb17fc992011-01-03 14:44:56 +01001401 free_percpu(svc->stats.cpustats);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001402 kfree(svc);
Julian Anastasov26c15cf2010-09-21 18:12:30 +02001403 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001404
1405 /* decrease the module use count */
1406 ip_vs_use_count_dec();
1407}
1408
1409/*
Julian Anastasov26c15cf2010-09-21 18:12:30 +02001410 * Unlink a service from list and try to delete it if its refcnt reached 0
Linus Torvalds1da177e2005-04-16 15:20:36 -07001411 */
Julian Anastasov26c15cf2010-09-21 18:12:30 +02001412static void ip_vs_unlink_service(struct ip_vs_service *svc)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001413{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001414 /*
1415 * Unhash it from the service table
1416 */
1417 write_lock_bh(&__ip_vs_svc_lock);
1418
1419 ip_vs_svc_unhash(svc);
1420
1421 /*
1422 * Wait until all the svc users go away.
1423 */
Julian Anastasov26c15cf2010-09-21 18:12:30 +02001424 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001425
1426 __ip_vs_del_service(svc);
1427
1428 write_unlock_bh(&__ip_vs_svc_lock);
Julian Anastasov26c15cf2010-09-21 18:12:30 +02001429}
1430
1431/*
1432 * Delete a service from the service list
1433 */
1434static int ip_vs_del_service(struct ip_vs_service *svc)
1435{
1436 if (svc == NULL)
1437 return -EEXIST;
1438 ip_vs_unlink_service(svc);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001439
1440 return 0;
1441}
1442
1443
1444/*
1445 * Flush all the virtual services
1446 */
Hans Schillstromfc723252011-01-03 14:44:43 +01001447static int ip_vs_flush(struct net *net)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001448{
1449 int idx;
1450 struct ip_vs_service *svc, *nxt;
1451
1452 /*
Hans Schillstromfc723252011-01-03 14:44:43 +01001453 * Flush the service table hashed by <netns,protocol,addr,port>
Linus Torvalds1da177e2005-04-16 15:20:36 -07001454 */
1455 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
Hans Schillstromfc723252011-01-03 14:44:43 +01001456 list_for_each_entry_safe(svc, nxt, &ip_vs_svc_table[idx],
1457 s_list) {
1458 if (net_eq(svc->net, net))
1459 ip_vs_unlink_service(svc);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001460 }
1461 }
1462
1463 /*
1464 * Flush the service table hashed by fwmark
1465 */
1466 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1467 list_for_each_entry_safe(svc, nxt,
1468 &ip_vs_svc_fwm_table[idx], f_list) {
Hans Schillstromfc723252011-01-03 14:44:43 +01001469 if (net_eq(svc->net, net))
1470 ip_vs_unlink_service(svc);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001471 }
1472 }
1473
1474 return 0;
1475}
1476
1477
1478/*
1479 * Zero counters in a service or all services
1480 */
1481static int ip_vs_zero_service(struct ip_vs_service *svc)
1482{
1483 struct ip_vs_dest *dest;
1484
1485 write_lock_bh(&__ip_vs_svc_lock);
1486 list_for_each_entry(dest, &svc->destinations, n_list) {
1487 ip_vs_zero_stats(&dest->stats);
1488 }
1489 ip_vs_zero_stats(&svc->stats);
1490 write_unlock_bh(&__ip_vs_svc_lock);
1491 return 0;
1492}
1493
Hans Schillstromfc723252011-01-03 14:44:43 +01001494static int ip_vs_zero_all(struct net *net)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001495{
1496 int idx;
1497 struct ip_vs_service *svc;
1498
1499 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1500 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
Hans Schillstromfc723252011-01-03 14:44:43 +01001501 if (net_eq(svc->net, net))
1502 ip_vs_zero_service(svc);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001503 }
1504 }
1505
1506 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1507 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
Hans Schillstromfc723252011-01-03 14:44:43 +01001508 if (net_eq(svc->net, net))
1509 ip_vs_zero_service(svc);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001510 }
1511 }
1512
Julian Anastasov2a0751a2011-03-04 12:20:35 +02001513 ip_vs_zero_stats(&net_ipvs(net)->tot_stats);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001514 return 0;
1515}
1516
Simon Horman14e40542011-02-04 18:33:02 +09001517#ifdef CONFIG_SYSCTL
Linus Torvalds1da177e2005-04-16 15:20:36 -07001518static int
Alexey Dobriyan8d65af72009-09-23 15:57:19 -07001519proc_do_defense_mode(ctl_table *table, int write,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001520 void __user *buffer, size_t *lenp, loff_t *ppos)
1521{
Hans Schillstrom93304192011-01-03 14:44:51 +01001522 struct net *net = current->nsproxy->net_ns;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001523 int *valp = table->data;
1524 int val = *valp;
1525 int rc;
1526
Alexey Dobriyan8d65af72009-09-23 15:57:19 -07001527 rc = proc_dointvec(table, write, buffer, lenp, ppos);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001528 if (write && (*valp != val)) {
1529 if ((*valp < 0) || (*valp > 3)) {
1530 /* Restore the correct value */
1531 *valp = val;
1532 } else {
Hans Schillstrom93304192011-01-03 14:44:51 +01001533 update_defense_level(net_ipvs(net));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001534 }
1535 }
1536 return rc;
1537}
1538
Linus Torvalds1da177e2005-04-16 15:20:36 -07001539static int
Alexey Dobriyan8d65af72009-09-23 15:57:19 -07001540proc_do_sync_threshold(ctl_table *table, int write,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001541 void __user *buffer, size_t *lenp, loff_t *ppos)
1542{
1543 int *valp = table->data;
1544 int val[2];
1545 int rc;
1546
1547 /* backup the value first */
1548 memcpy(val, valp, sizeof(val));
1549
Alexey Dobriyan8d65af72009-09-23 15:57:19 -07001550 rc = proc_dointvec(table, write, buffer, lenp, ppos);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001551 if (write && (valp[0] < 0 || valp[1] < 0 || valp[0] >= valp[1])) {
1552 /* Restore the correct value */
1553 memcpy(valp, val, sizeof(val));
1554 }
1555 return rc;
1556}
1557
Hans Schillstromb880c1f2010-11-19 14:25:14 +01001558static int
1559proc_do_sync_mode(ctl_table *table, int write,
1560 void __user *buffer, size_t *lenp, loff_t *ppos)
1561{
1562 int *valp = table->data;
1563 int val = *valp;
1564 int rc;
1565
1566 rc = proc_dointvec(table, write, buffer, lenp, ppos);
1567 if (write && (*valp != val)) {
1568 if ((*valp < 0) || (*valp > 1)) {
1569 /* Restore the correct value */
1570 *valp = val;
1571 } else {
Hans Schillstromf1313152011-01-03 14:44:55 +01001572 struct net *net = current->nsproxy->net_ns;
1573 ip_vs_sync_switch_mode(net, val);
Hans Schillstromb880c1f2010-11-19 14:25:14 +01001574 }
1575 }
1576 return rc;
1577}
Linus Torvalds1da177e2005-04-16 15:20:36 -07001578
1579/*
1580 * IPVS sysctl table (under the /proc/sys/net/ipv4/vs/)
Hans Schillstroma0840e22011-01-03 14:44:58 +01001581 * Do not change order or insert new entries without
1582 * align with netns init in __ip_vs_control_init()
Linus Torvalds1da177e2005-04-16 15:20:36 -07001583 */
1584
1585static struct ctl_table vs_vars[] = {
1586 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001587 .procname = "amemthresh",
Hans Schillstroma0840e22011-01-03 14:44:58 +01001588 .maxlen = sizeof(int),
1589 .mode = 0644,
1590 .proc_handler = proc_dointvec,
1591 },
1592 {
1593 .procname = "am_droprate",
1594 .maxlen = sizeof(int),
1595 .mode = 0644,
1596 .proc_handler = proc_dointvec,
1597 },
1598 {
1599 .procname = "drop_entry",
1600 .maxlen = sizeof(int),
1601 .mode = 0644,
1602 .proc_handler = proc_do_defense_mode,
1603 },
1604 {
1605 .procname = "drop_packet",
1606 .maxlen = sizeof(int),
1607 .mode = 0644,
1608 .proc_handler = proc_do_defense_mode,
1609 },
1610#ifdef CONFIG_IP_VS_NFCT
1611 {
1612 .procname = "conntrack",
1613 .maxlen = sizeof(int),
1614 .mode = 0644,
1615 .proc_handler = &proc_dointvec,
1616 },
1617#endif
1618 {
1619 .procname = "secure_tcp",
1620 .maxlen = sizeof(int),
1621 .mode = 0644,
1622 .proc_handler = proc_do_defense_mode,
1623 },
1624 {
1625 .procname = "snat_reroute",
1626 .maxlen = sizeof(int),
1627 .mode = 0644,
1628 .proc_handler = &proc_dointvec,
1629 },
1630 {
1631 .procname = "sync_version",
1632 .maxlen = sizeof(int),
1633 .mode = 0644,
1634 .proc_handler = &proc_do_sync_mode,
1635 },
1636 {
1637 .procname = "cache_bypass",
1638 .maxlen = sizeof(int),
1639 .mode = 0644,
1640 .proc_handler = proc_dointvec,
1641 },
1642 {
1643 .procname = "expire_nodest_conn",
1644 .maxlen = sizeof(int),
1645 .mode = 0644,
1646 .proc_handler = proc_dointvec,
1647 },
1648 {
1649 .procname = "expire_quiescent_template",
1650 .maxlen = sizeof(int),
1651 .mode = 0644,
1652 .proc_handler = proc_dointvec,
1653 },
1654 {
1655 .procname = "sync_threshold",
1656 .maxlen =
1657 sizeof(((struct netns_ipvs *)0)->sysctl_sync_threshold),
1658 .mode = 0644,
1659 .proc_handler = proc_do_sync_threshold,
1660 },
1661 {
1662 .procname = "nat_icmp_send",
Linus Torvalds1da177e2005-04-16 15:20:36 -07001663 .maxlen = sizeof(int),
1664 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08001665 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001666 },
1667#ifdef CONFIG_IP_VS_DEBUG
1668 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001669 .procname = "debug_level",
1670 .data = &sysctl_ip_vs_debug_level,
1671 .maxlen = sizeof(int),
1672 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08001673 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001674 },
1675#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -07001676#if 0
1677 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001678 .procname = "timeout_established",
1679 .data = &vs_timeout_table_dos.timeout[IP_VS_S_ESTABLISHED],
1680 .maxlen = sizeof(int),
1681 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08001682 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001683 },
1684 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001685 .procname = "timeout_synsent",
1686 .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_SENT],
1687 .maxlen = sizeof(int),
1688 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08001689 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001690 },
1691 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001692 .procname = "timeout_synrecv",
1693 .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_RECV],
1694 .maxlen = sizeof(int),
1695 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08001696 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001697 },
1698 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001699 .procname = "timeout_finwait",
1700 .data = &vs_timeout_table_dos.timeout[IP_VS_S_FIN_WAIT],
1701 .maxlen = sizeof(int),
1702 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08001703 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001704 },
1705 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001706 .procname = "timeout_timewait",
1707 .data = &vs_timeout_table_dos.timeout[IP_VS_S_TIME_WAIT],
1708 .maxlen = sizeof(int),
1709 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08001710 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001711 },
1712 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001713 .procname = "timeout_close",
1714 .data = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE],
1715 .maxlen = sizeof(int),
1716 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08001717 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001718 },
1719 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001720 .procname = "timeout_closewait",
1721 .data = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE_WAIT],
1722 .maxlen = sizeof(int),
1723 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08001724 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001725 },
1726 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001727 .procname = "timeout_lastack",
1728 .data = &vs_timeout_table_dos.timeout[IP_VS_S_LAST_ACK],
1729 .maxlen = sizeof(int),
1730 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08001731 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001732 },
1733 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001734 .procname = "timeout_listen",
1735 .data = &vs_timeout_table_dos.timeout[IP_VS_S_LISTEN],
1736 .maxlen = sizeof(int),
1737 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08001738 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001739 },
1740 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001741 .procname = "timeout_synack",
1742 .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYNACK],
1743 .maxlen = sizeof(int),
1744 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08001745 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001746 },
1747 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001748 .procname = "timeout_udp",
1749 .data = &vs_timeout_table_dos.timeout[IP_VS_S_UDP],
1750 .maxlen = sizeof(int),
1751 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08001752 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001753 },
1754 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001755 .procname = "timeout_icmp",
1756 .data = &vs_timeout_table_dos.timeout[IP_VS_S_ICMP],
1757 .maxlen = sizeof(int),
1758 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08001759 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001760 },
1761#endif
Eric W. Biedermanf8572d82009-11-05 13:32:03 -08001762 { }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001763};
1764
Sven Wegener5587da52008-08-10 18:24:40 +00001765const struct ctl_path net_vs_ctl_path[] = {
Eric W. Biedermanf8572d82009-11-05 13:32:03 -08001766 { .procname = "net", },
1767 { .procname = "ipv4", },
Pavel Emelyanov90754f82008-01-12 02:33:50 -08001768 { .procname = "vs", },
1769 { }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001770};
Pavel Emelyanov90754f82008-01-12 02:33:50 -08001771EXPORT_SYMBOL_GPL(net_vs_ctl_path);
Simon Horman14e40542011-02-04 18:33:02 +09001772#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -07001773
Linus Torvalds1da177e2005-04-16 15:20:36 -07001774#ifdef CONFIG_PROC_FS
1775
1776struct ip_vs_iter {
Hans Schillstromfc723252011-01-03 14:44:43 +01001777 struct seq_net_private p; /* Do not move this, netns depends upon it*/
Linus Torvalds1da177e2005-04-16 15:20:36 -07001778 struct list_head *table;
1779 int bucket;
1780};
1781
1782/*
1783 * Write the contents of the VS rule table to a PROCfs file.
1784 * (It is kept just for backward compatibility)
1785 */
1786static inline const char *ip_vs_fwd_name(unsigned flags)
1787{
1788 switch (flags & IP_VS_CONN_F_FWD_MASK) {
1789 case IP_VS_CONN_F_LOCALNODE:
1790 return "Local";
1791 case IP_VS_CONN_F_TUNNEL:
1792 return "Tunnel";
1793 case IP_VS_CONN_F_DROUTE:
1794 return "Route";
1795 default:
1796 return "Masq";
1797 }
1798}
1799
1800
1801/* Get the Nth entry in the two lists */
1802static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos)
1803{
Hans Schillstromfc723252011-01-03 14:44:43 +01001804 struct net *net = seq_file_net(seq);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001805 struct ip_vs_iter *iter = seq->private;
1806 int idx;
1807 struct ip_vs_service *svc;
1808
1809 /* look in hash by protocol */
1810 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1811 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
Hans Schillstromfc723252011-01-03 14:44:43 +01001812 if (net_eq(svc->net, net) && pos-- == 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001813 iter->table = ip_vs_svc_table;
1814 iter->bucket = idx;
1815 return svc;
1816 }
1817 }
1818 }
1819
1820 /* keep looking in fwmark */
1821 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1822 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
Hans Schillstromfc723252011-01-03 14:44:43 +01001823 if (net_eq(svc->net, net) && pos-- == 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001824 iter->table = ip_vs_svc_fwm_table;
1825 iter->bucket = idx;
1826 return svc;
1827 }
1828 }
1829 }
1830
1831 return NULL;
1832}
1833
1834static void *ip_vs_info_seq_start(struct seq_file *seq, loff_t *pos)
Simon Horman563e94f2008-09-17 10:10:42 +10001835__acquires(__ip_vs_svc_lock)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001836{
1837
1838 read_lock_bh(&__ip_vs_svc_lock);
1839 return *pos ? ip_vs_info_array(seq, *pos - 1) : SEQ_START_TOKEN;
1840}
1841
1842
1843static void *ip_vs_info_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1844{
1845 struct list_head *e;
1846 struct ip_vs_iter *iter;
1847 struct ip_vs_service *svc;
1848
1849 ++*pos;
1850 if (v == SEQ_START_TOKEN)
1851 return ip_vs_info_array(seq,0);
1852
1853 svc = v;
1854 iter = seq->private;
1855
1856 if (iter->table == ip_vs_svc_table) {
1857 /* next service in table hashed by protocol */
1858 if ((e = svc->s_list.next) != &ip_vs_svc_table[iter->bucket])
1859 return list_entry(e, struct ip_vs_service, s_list);
1860
1861
1862 while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
1863 list_for_each_entry(svc,&ip_vs_svc_table[iter->bucket],
1864 s_list) {
1865 return svc;
1866 }
1867 }
1868
1869 iter->table = ip_vs_svc_fwm_table;
1870 iter->bucket = -1;
1871 goto scan_fwmark;
1872 }
1873
1874 /* next service in hashed by fwmark */
1875 if ((e = svc->f_list.next) != &ip_vs_svc_fwm_table[iter->bucket])
1876 return list_entry(e, struct ip_vs_service, f_list);
1877
1878 scan_fwmark:
1879 while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
1880 list_for_each_entry(svc, &ip_vs_svc_fwm_table[iter->bucket],
1881 f_list)
1882 return svc;
1883 }
1884
1885 return NULL;
1886}
1887
1888static void ip_vs_info_seq_stop(struct seq_file *seq, void *v)
Simon Horman563e94f2008-09-17 10:10:42 +10001889__releases(__ip_vs_svc_lock)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001890{
1891 read_unlock_bh(&__ip_vs_svc_lock);
1892}
1893
1894
1895static int ip_vs_info_seq_show(struct seq_file *seq, void *v)
1896{
1897 if (v == SEQ_START_TOKEN) {
1898 seq_printf(seq,
1899 "IP Virtual Server version %d.%d.%d (size=%d)\n",
Catalin(ux) M. BOIE6f7edb42010-01-05 05:50:24 +01001900 NVERSION(IP_VS_VERSION_CODE), ip_vs_conn_tab_size);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001901 seq_puts(seq,
1902 "Prot LocalAddress:Port Scheduler Flags\n");
1903 seq_puts(seq,
1904 " -> RemoteAddress:Port Forward Weight ActiveConn InActConn\n");
1905 } else {
1906 const struct ip_vs_service *svc = v;
1907 const struct ip_vs_iter *iter = seq->private;
1908 const struct ip_vs_dest *dest;
1909
Vince Busam667a5f12008-09-02 15:55:49 +02001910 if (iter->table == ip_vs_svc_table) {
1911#ifdef CONFIG_IP_VS_IPV6
1912 if (svc->af == AF_INET6)
Harvey Harrison5b095d9892008-10-29 12:52:50 -07001913 seq_printf(seq, "%s [%pI6]:%04X %s ",
Vince Busam667a5f12008-09-02 15:55:49 +02001914 ip_vs_proto_name(svc->protocol),
Harvey Harrison38ff4fa2008-10-28 16:08:13 -07001915 &svc->addr.in6,
Vince Busam667a5f12008-09-02 15:55:49 +02001916 ntohs(svc->port),
1917 svc->scheduler->name);
1918 else
1919#endif
Nick Chalk26ec0372010-06-22 08:07:01 +02001920 seq_printf(seq, "%s %08X:%04X %s %s ",
Vince Busam667a5f12008-09-02 15:55:49 +02001921 ip_vs_proto_name(svc->protocol),
1922 ntohl(svc->addr.ip),
1923 ntohs(svc->port),
Nick Chalk26ec0372010-06-22 08:07:01 +02001924 svc->scheduler->name,
1925 (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":"");
Vince Busam667a5f12008-09-02 15:55:49 +02001926 } else {
Nick Chalk26ec0372010-06-22 08:07:01 +02001927 seq_printf(seq, "FWM %08X %s %s",
1928 svc->fwmark, svc->scheduler->name,
1929 (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":"");
Vince Busam667a5f12008-09-02 15:55:49 +02001930 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001931
1932 if (svc->flags & IP_VS_SVC_F_PERSISTENT)
1933 seq_printf(seq, "persistent %d %08X\n",
1934 svc->timeout,
1935 ntohl(svc->netmask));
1936 else
1937 seq_putc(seq, '\n');
1938
1939 list_for_each_entry(dest, &svc->destinations, n_list) {
Vince Busam667a5f12008-09-02 15:55:49 +02001940#ifdef CONFIG_IP_VS_IPV6
1941 if (dest->af == AF_INET6)
1942 seq_printf(seq,
Harvey Harrison5b095d9892008-10-29 12:52:50 -07001943 " -> [%pI6]:%04X"
Vince Busam667a5f12008-09-02 15:55:49 +02001944 " %-7s %-6d %-10d %-10d\n",
Harvey Harrison38ff4fa2008-10-28 16:08:13 -07001945 &dest->addr.in6,
Vince Busam667a5f12008-09-02 15:55:49 +02001946 ntohs(dest->port),
1947 ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
1948 atomic_read(&dest->weight),
1949 atomic_read(&dest->activeconns),
1950 atomic_read(&dest->inactconns));
1951 else
1952#endif
1953 seq_printf(seq,
1954 " -> %08X:%04X "
1955 "%-7s %-6d %-10d %-10d\n",
1956 ntohl(dest->addr.ip),
1957 ntohs(dest->port),
1958 ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
1959 atomic_read(&dest->weight),
1960 atomic_read(&dest->activeconns),
1961 atomic_read(&dest->inactconns));
1962
Linus Torvalds1da177e2005-04-16 15:20:36 -07001963 }
1964 }
1965 return 0;
1966}
1967
Philippe De Muyter56b3d972007-07-10 23:07:31 -07001968static const struct seq_operations ip_vs_info_seq_ops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001969 .start = ip_vs_info_seq_start,
1970 .next = ip_vs_info_seq_next,
1971 .stop = ip_vs_info_seq_stop,
1972 .show = ip_vs_info_seq_show,
1973};
1974
1975static int ip_vs_info_open(struct inode *inode, struct file *file)
1976{
Hans Schillstromfc723252011-01-03 14:44:43 +01001977 return seq_open_net(inode, file, &ip_vs_info_seq_ops,
Pavel Emelyanovcf7732e2007-10-10 02:29:29 -07001978 sizeof(struct ip_vs_iter));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001979}
1980
Arjan van de Ven9a321442007-02-12 00:55:35 -08001981static const struct file_operations ip_vs_info_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001982 .owner = THIS_MODULE,
1983 .open = ip_vs_info_open,
1984 .read = seq_read,
1985 .llseek = seq_lseek,
1986 .release = seq_release_private,
1987};
1988
1989#endif
1990
Linus Torvalds1da177e2005-04-16 15:20:36 -07001991#ifdef CONFIG_PROC_FS
1992static int ip_vs_stats_show(struct seq_file *seq, void *v)
1993{
Hans Schillstromb17fc992011-01-03 14:44:56 +01001994 struct net *net = seq_file_single_net(seq);
Julian Anastasov55a3d4e2011-03-14 01:37:49 +02001995 struct ip_vs_stats_user show;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001996
1997/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
1998 seq_puts(seq,
1999 " Total Incoming Outgoing Incoming Outgoing\n");
2000 seq_printf(seq,
2001 " Conns Packets Packets Bytes Bytes\n");
2002
Julian Anastasov55a3d4e2011-03-14 01:37:49 +02002003 ip_vs_copy_stats(&show, &net_ipvs(net)->tot_stats);
2004 seq_printf(seq, "%8X %8X %8X %16LX %16LX\n\n", show.conns,
2005 show.inpkts, show.outpkts,
2006 (unsigned long long) show.inbytes,
2007 (unsigned long long) show.outbytes);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002008
2009/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
2010 seq_puts(seq,
2011 " Conns/s Pkts/s Pkts/s Bytes/s Bytes/s\n");
Julian Anastasov55a3d4e2011-03-14 01:37:49 +02002012 seq_printf(seq, "%8X %8X %8X %16X %16X\n",
2013 show.cps, show.inpps, show.outpps,
2014 show.inbps, show.outbps);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002015
2016 return 0;
2017}
2018
2019static int ip_vs_stats_seq_open(struct inode *inode, struct file *file)
2020{
Hans Schillstromfc723252011-01-03 14:44:43 +01002021 return single_open_net(inode, file, ip_vs_stats_show);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002022}
2023
Arjan van de Ven9a321442007-02-12 00:55:35 -08002024static const struct file_operations ip_vs_stats_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002025 .owner = THIS_MODULE,
2026 .open = ip_vs_stats_seq_open,
2027 .read = seq_read,
2028 .llseek = seq_lseek,
2029 .release = single_release,
2030};
2031
Hans Schillstromb17fc992011-01-03 14:44:56 +01002032static int ip_vs_stats_percpu_show(struct seq_file *seq, void *v)
2033{
2034 struct net *net = seq_file_single_net(seq);
Julian Anastasov2a0751a2011-03-04 12:20:35 +02002035 struct ip_vs_stats *tot_stats = &net_ipvs(net)->tot_stats;
2036 struct ip_vs_cpu_stats *cpustats = tot_stats->cpustats;
Julian Anastasovea9f22c2011-03-14 01:41:54 +02002037 struct ip_vs_stats_user rates;
Hans Schillstromb17fc992011-01-03 14:44:56 +01002038 int i;
2039
2040/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
2041 seq_puts(seq,
2042 " Total Incoming Outgoing Incoming Outgoing\n");
2043 seq_printf(seq,
2044 "CPU Conns Packets Packets Bytes Bytes\n");
2045
2046 for_each_possible_cpu(i) {
Julian Anastasov2a0751a2011-03-04 12:20:35 +02002047 struct ip_vs_cpu_stats *u = per_cpu_ptr(cpustats, i);
2048 unsigned int start;
2049 __u64 inbytes, outbytes;
2050
2051 do {
2052 start = u64_stats_fetch_begin_bh(&u->syncp);
2053 inbytes = u->ustats.inbytes;
2054 outbytes = u->ustats.outbytes;
2055 } while (u64_stats_fetch_retry_bh(&u->syncp, start));
2056
Hans Schillstromb17fc992011-01-03 14:44:56 +01002057 seq_printf(seq, "%3X %8X %8X %8X %16LX %16LX\n",
Julian Anastasov2a0751a2011-03-04 12:20:35 +02002058 i, u->ustats.conns, u->ustats.inpkts,
2059 u->ustats.outpkts, (__u64)inbytes,
2060 (__u64)outbytes);
Hans Schillstromb17fc992011-01-03 14:44:56 +01002061 }
2062
2063 spin_lock_bh(&tot_stats->lock);
Julian Anastasovea9f22c2011-03-14 01:41:54 +02002064
Hans Schillstromb17fc992011-01-03 14:44:56 +01002065 seq_printf(seq, " ~ %8X %8X %8X %16LX %16LX\n\n",
2066 tot_stats->ustats.conns, tot_stats->ustats.inpkts,
2067 tot_stats->ustats.outpkts,
2068 (unsigned long long) tot_stats->ustats.inbytes,
2069 (unsigned long long) tot_stats->ustats.outbytes);
2070
Julian Anastasovea9f22c2011-03-14 01:41:54 +02002071 ip_vs_read_estimator(&rates, tot_stats);
2072
2073 spin_unlock_bh(&tot_stats->lock);
2074
Hans Schillstromb17fc992011-01-03 14:44:56 +01002075/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
2076 seq_puts(seq,
2077 " Conns/s Pkts/s Pkts/s Bytes/s Bytes/s\n");
2078 seq_printf(seq, " %8X %8X %8X %16X %16X\n",
Julian Anastasovea9f22c2011-03-14 01:41:54 +02002079 rates.cps,
2080 rates.inpps,
2081 rates.outpps,
2082 rates.inbps,
2083 rates.outbps);
Hans Schillstromb17fc992011-01-03 14:44:56 +01002084
2085 return 0;
2086}
2087
2088static int ip_vs_stats_percpu_seq_open(struct inode *inode, struct file *file)
2089{
2090 return single_open_net(inode, file, ip_vs_stats_percpu_show);
2091}
2092
2093static const struct file_operations ip_vs_stats_percpu_fops = {
2094 .owner = THIS_MODULE,
2095 .open = ip_vs_stats_percpu_seq_open,
2096 .read = seq_read,
2097 .llseek = seq_lseek,
2098 .release = single_release,
2099};
Linus Torvalds1da177e2005-04-16 15:20:36 -07002100#endif
2101
2102/*
2103 * Set timeout values for tcp tcpfin udp in the timeout_table.
2104 */
Hans Schillstrom93304192011-01-03 14:44:51 +01002105static int ip_vs_set_timeout(struct net *net, struct ip_vs_timeout_user *u)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002106{
Changli Gao091bb342011-01-21 18:02:13 +08002107#if defined(CONFIG_IP_VS_PROTO_TCP) || defined(CONFIG_IP_VS_PROTO_UDP)
Hans Schillstrom93304192011-01-03 14:44:51 +01002108 struct ip_vs_proto_data *pd;
Changli Gao091bb342011-01-21 18:02:13 +08002109#endif
Hans Schillstrom93304192011-01-03 14:44:51 +01002110
Linus Torvalds1da177e2005-04-16 15:20:36 -07002111 IP_VS_DBG(2, "Setting timeout tcp:%d tcpfin:%d udp:%d\n",
2112 u->tcp_timeout,
2113 u->tcp_fin_timeout,
2114 u->udp_timeout);
2115
2116#ifdef CONFIG_IP_VS_PROTO_TCP
2117 if (u->tcp_timeout) {
Hans Schillstrom93304192011-01-03 14:44:51 +01002118 pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
2119 pd->timeout_table[IP_VS_TCP_S_ESTABLISHED]
Linus Torvalds1da177e2005-04-16 15:20:36 -07002120 = u->tcp_timeout * HZ;
2121 }
2122
2123 if (u->tcp_fin_timeout) {
Hans Schillstrom93304192011-01-03 14:44:51 +01002124 pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
2125 pd->timeout_table[IP_VS_TCP_S_FIN_WAIT]
Linus Torvalds1da177e2005-04-16 15:20:36 -07002126 = u->tcp_fin_timeout * HZ;
2127 }
2128#endif
2129
2130#ifdef CONFIG_IP_VS_PROTO_UDP
2131 if (u->udp_timeout) {
Hans Schillstrom93304192011-01-03 14:44:51 +01002132 pd = ip_vs_proto_data_get(net, IPPROTO_UDP);
2133 pd->timeout_table[IP_VS_UDP_S_NORMAL]
Linus Torvalds1da177e2005-04-16 15:20:36 -07002134 = u->udp_timeout * HZ;
2135 }
2136#endif
2137 return 0;
2138}
2139
2140
2141#define SET_CMDID(cmd) (cmd - IP_VS_BASE_CTL)
2142#define SERVICE_ARG_LEN (sizeof(struct ip_vs_service_user))
2143#define SVCDEST_ARG_LEN (sizeof(struct ip_vs_service_user) + \
2144 sizeof(struct ip_vs_dest_user))
2145#define TIMEOUT_ARG_LEN (sizeof(struct ip_vs_timeout_user))
2146#define DAEMON_ARG_LEN (sizeof(struct ip_vs_daemon_user))
2147#define MAX_ARG_LEN SVCDEST_ARG_LEN
2148
Arjan van de Ven9b5b5cf2005-11-29 16:21:38 -08002149static const unsigned char set_arglen[SET_CMDID(IP_VS_SO_SET_MAX)+1] = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002150 [SET_CMDID(IP_VS_SO_SET_ADD)] = SERVICE_ARG_LEN,
2151 [SET_CMDID(IP_VS_SO_SET_EDIT)] = SERVICE_ARG_LEN,
2152 [SET_CMDID(IP_VS_SO_SET_DEL)] = SERVICE_ARG_LEN,
2153 [SET_CMDID(IP_VS_SO_SET_FLUSH)] = 0,
2154 [SET_CMDID(IP_VS_SO_SET_ADDDEST)] = SVCDEST_ARG_LEN,
2155 [SET_CMDID(IP_VS_SO_SET_DELDEST)] = SVCDEST_ARG_LEN,
2156 [SET_CMDID(IP_VS_SO_SET_EDITDEST)] = SVCDEST_ARG_LEN,
2157 [SET_CMDID(IP_VS_SO_SET_TIMEOUT)] = TIMEOUT_ARG_LEN,
2158 [SET_CMDID(IP_VS_SO_SET_STARTDAEMON)] = DAEMON_ARG_LEN,
2159 [SET_CMDID(IP_VS_SO_SET_STOPDAEMON)] = DAEMON_ARG_LEN,
2160 [SET_CMDID(IP_VS_SO_SET_ZERO)] = SERVICE_ARG_LEN,
2161};
2162
Julius Volzc860c6b2008-09-02 15:55:36 +02002163static void ip_vs_copy_usvc_compat(struct ip_vs_service_user_kern *usvc,
2164 struct ip_vs_service_user *usvc_compat)
2165{
Simon Horman0d1e71b2010-08-22 21:37:54 +09002166 memset(usvc, 0, sizeof(*usvc));
2167
Julius Volzc860c6b2008-09-02 15:55:36 +02002168 usvc->af = AF_INET;
2169 usvc->protocol = usvc_compat->protocol;
2170 usvc->addr.ip = usvc_compat->addr;
2171 usvc->port = usvc_compat->port;
2172 usvc->fwmark = usvc_compat->fwmark;
2173
2174 /* Deep copy of sched_name is not needed here */
2175 usvc->sched_name = usvc_compat->sched_name;
2176
2177 usvc->flags = usvc_compat->flags;
2178 usvc->timeout = usvc_compat->timeout;
2179 usvc->netmask = usvc_compat->netmask;
2180}
2181
2182static void ip_vs_copy_udest_compat(struct ip_vs_dest_user_kern *udest,
2183 struct ip_vs_dest_user *udest_compat)
2184{
Simon Horman0d1e71b2010-08-22 21:37:54 +09002185 memset(udest, 0, sizeof(*udest));
2186
Julius Volzc860c6b2008-09-02 15:55:36 +02002187 udest->addr.ip = udest_compat->addr;
2188 udest->port = udest_compat->port;
2189 udest->conn_flags = udest_compat->conn_flags;
2190 udest->weight = udest_compat->weight;
2191 udest->u_threshold = udest_compat->u_threshold;
2192 udest->l_threshold = udest_compat->l_threshold;
2193}
2194
Linus Torvalds1da177e2005-04-16 15:20:36 -07002195static int
2196do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
2197{
Hans Schillstromfc723252011-01-03 14:44:43 +01002198 struct net *net = sock_net(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002199 int ret;
2200 unsigned char arg[MAX_ARG_LEN];
Julius Volzc860c6b2008-09-02 15:55:36 +02002201 struct ip_vs_service_user *usvc_compat;
2202 struct ip_vs_service_user_kern usvc;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002203 struct ip_vs_service *svc;
Julius Volzc860c6b2008-09-02 15:55:36 +02002204 struct ip_vs_dest_user *udest_compat;
2205 struct ip_vs_dest_user_kern udest;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002206
2207 if (!capable(CAP_NET_ADMIN))
2208 return -EPERM;
2209
Arjan van de Ven04bcef22010-01-04 16:37:12 +01002210 if (cmd < IP_VS_BASE_CTL || cmd > IP_VS_SO_SET_MAX)
2211 return -EINVAL;
2212 if (len < 0 || len > MAX_ARG_LEN)
2213 return -EINVAL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002214 if (len != set_arglen[SET_CMDID(cmd)]) {
Hannes Eder1e3e2382009-08-02 11:05:41 +00002215 pr_err("set_ctl: len %u != %u\n",
2216 len, set_arglen[SET_CMDID(cmd)]);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002217 return -EINVAL;
2218 }
2219
2220 if (copy_from_user(arg, user, len) != 0)
2221 return -EFAULT;
2222
2223 /* increase the module use count */
2224 ip_vs_use_count_inc();
2225
Ingo Molnar14cc3e22006-03-26 01:37:14 -08002226 if (mutex_lock_interruptible(&__ip_vs_mutex)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002227 ret = -ERESTARTSYS;
2228 goto out_dec;
2229 }
2230
2231 if (cmd == IP_VS_SO_SET_FLUSH) {
2232 /* Flush the virtual service */
Hans Schillstromfc723252011-01-03 14:44:43 +01002233 ret = ip_vs_flush(net);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002234 goto out_unlock;
2235 } else if (cmd == IP_VS_SO_SET_TIMEOUT) {
2236 /* Set timeout values for (tcp tcpfin udp) */
Hans Schillstrom93304192011-01-03 14:44:51 +01002237 ret = ip_vs_set_timeout(net, (struct ip_vs_timeout_user *)arg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002238 goto out_unlock;
2239 } else if (cmd == IP_VS_SO_SET_STARTDAEMON) {
2240 struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
Hans Schillstromf1313152011-01-03 14:44:55 +01002241 ret = start_sync_thread(net, dm->state, dm->mcast_ifn,
2242 dm->syncid);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002243 goto out_unlock;
2244 } else if (cmd == IP_VS_SO_SET_STOPDAEMON) {
2245 struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
Hans Schillstromf1313152011-01-03 14:44:55 +01002246 ret = stop_sync_thread(net, dm->state);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002247 goto out_unlock;
2248 }
2249
Julius Volzc860c6b2008-09-02 15:55:36 +02002250 usvc_compat = (struct ip_vs_service_user *)arg;
2251 udest_compat = (struct ip_vs_dest_user *)(usvc_compat + 1);
2252
2253 /* We only use the new structs internally, so copy userspace compat
2254 * structs to extended internal versions */
2255 ip_vs_copy_usvc_compat(&usvc, usvc_compat);
2256 ip_vs_copy_udest_compat(&udest, udest_compat);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002257
2258 if (cmd == IP_VS_SO_SET_ZERO) {
2259 /* if no service address is set, zero counters in all */
Julius Volzc860c6b2008-09-02 15:55:36 +02002260 if (!usvc.fwmark && !usvc.addr.ip && !usvc.port) {
Hans Schillstromfc723252011-01-03 14:44:43 +01002261 ret = ip_vs_zero_all(net);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002262 goto out_unlock;
2263 }
2264 }
2265
Venkata Mohan Reddy2906f662010-02-18 12:31:05 +01002266 /* Check for valid protocol: TCP or UDP or SCTP, even for fwmark!=0 */
2267 if (usvc.protocol != IPPROTO_TCP && usvc.protocol != IPPROTO_UDP &&
2268 usvc.protocol != IPPROTO_SCTP) {
Hannes Eder1e3e2382009-08-02 11:05:41 +00002269 pr_err("set_ctl: invalid protocol: %d %pI4:%d %s\n",
2270 usvc.protocol, &usvc.addr.ip,
2271 ntohs(usvc.port), usvc.sched_name);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002272 ret = -EFAULT;
2273 goto out_unlock;
2274 }
2275
2276 /* Lookup the exact service by <protocol, addr, port> or fwmark */
Julius Volzc860c6b2008-09-02 15:55:36 +02002277 if (usvc.fwmark == 0)
Hans Schillstromfc723252011-01-03 14:44:43 +01002278 svc = __ip_vs_service_find(net, usvc.af, usvc.protocol,
Julian Anastasov26c15cf2010-09-21 18:12:30 +02002279 &usvc.addr, usvc.port);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002280 else
Hans Schillstromfc723252011-01-03 14:44:43 +01002281 svc = __ip_vs_svc_fwm_find(net, usvc.af, usvc.fwmark);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002282
2283 if (cmd != IP_VS_SO_SET_ADD
Julius Volzc860c6b2008-09-02 15:55:36 +02002284 && (svc == NULL || svc->protocol != usvc.protocol)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002285 ret = -ESRCH;
Julian Anastasov26c15cf2010-09-21 18:12:30 +02002286 goto out_unlock;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002287 }
2288
2289 switch (cmd) {
2290 case IP_VS_SO_SET_ADD:
2291 if (svc != NULL)
2292 ret = -EEXIST;
2293 else
Hans Schillstromfc723252011-01-03 14:44:43 +01002294 ret = ip_vs_add_service(net, &usvc, &svc);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002295 break;
2296 case IP_VS_SO_SET_EDIT:
Julius Volzc860c6b2008-09-02 15:55:36 +02002297 ret = ip_vs_edit_service(svc, &usvc);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002298 break;
2299 case IP_VS_SO_SET_DEL:
2300 ret = ip_vs_del_service(svc);
2301 if (!ret)
2302 goto out_unlock;
2303 break;
2304 case IP_VS_SO_SET_ZERO:
2305 ret = ip_vs_zero_service(svc);
2306 break;
2307 case IP_VS_SO_SET_ADDDEST:
Julius Volzc860c6b2008-09-02 15:55:36 +02002308 ret = ip_vs_add_dest(svc, &udest);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002309 break;
2310 case IP_VS_SO_SET_EDITDEST:
Julius Volzc860c6b2008-09-02 15:55:36 +02002311 ret = ip_vs_edit_dest(svc, &udest);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002312 break;
2313 case IP_VS_SO_SET_DELDEST:
Julius Volzc860c6b2008-09-02 15:55:36 +02002314 ret = ip_vs_del_dest(svc, &udest);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002315 break;
2316 default:
2317 ret = -EINVAL;
2318 }
2319
Linus Torvalds1da177e2005-04-16 15:20:36 -07002320 out_unlock:
Ingo Molnar14cc3e22006-03-26 01:37:14 -08002321 mutex_unlock(&__ip_vs_mutex);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002322 out_dec:
2323 /* decrease the module use count */
2324 ip_vs_use_count_dec();
2325
2326 return ret;
2327}
2328
2329
2330static void
Linus Torvalds1da177e2005-04-16 15:20:36 -07002331ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src)
2332{
2333 dst->protocol = src->protocol;
Julius Volze7ade462008-09-02 15:55:33 +02002334 dst->addr = src->addr.ip;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002335 dst->port = src->port;
2336 dst->fwmark = src->fwmark;
pageexec4da62fc2005-06-26 16:00:19 -07002337 strlcpy(dst->sched_name, src->scheduler->name, sizeof(dst->sched_name));
Linus Torvalds1da177e2005-04-16 15:20:36 -07002338 dst->flags = src->flags;
2339 dst->timeout = src->timeout / HZ;
2340 dst->netmask = src->netmask;
2341 dst->num_dests = src->num_dests;
2342 ip_vs_copy_stats(&dst->stats, &src->stats);
2343}
2344
2345static inline int
Hans Schillstromfc723252011-01-03 14:44:43 +01002346__ip_vs_get_service_entries(struct net *net,
2347 const struct ip_vs_get_services *get,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002348 struct ip_vs_get_services __user *uptr)
2349{
2350 int idx, count=0;
2351 struct ip_vs_service *svc;
2352 struct ip_vs_service_entry entry;
2353 int ret = 0;
2354
2355 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2356 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
Julius Volzf94fd042008-09-02 15:55:55 +02002357 /* Only expose IPv4 entries to old interface */
Hans Schillstromfc723252011-01-03 14:44:43 +01002358 if (svc->af != AF_INET || !net_eq(svc->net, net))
Julius Volzf94fd042008-09-02 15:55:55 +02002359 continue;
2360
Linus Torvalds1da177e2005-04-16 15:20:36 -07002361 if (count >= get->num_services)
2362 goto out;
pageexec4da62fc2005-06-26 16:00:19 -07002363 memset(&entry, 0, sizeof(entry));
Linus Torvalds1da177e2005-04-16 15:20:36 -07002364 ip_vs_copy_service(&entry, svc);
2365 if (copy_to_user(&uptr->entrytable[count],
2366 &entry, sizeof(entry))) {
2367 ret = -EFAULT;
2368 goto out;
2369 }
2370 count++;
2371 }
2372 }
2373
2374 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2375 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
Julius Volzf94fd042008-09-02 15:55:55 +02002376 /* Only expose IPv4 entries to old interface */
Hans Schillstromfc723252011-01-03 14:44:43 +01002377 if (svc->af != AF_INET || !net_eq(svc->net, net))
Julius Volzf94fd042008-09-02 15:55:55 +02002378 continue;
2379
Linus Torvalds1da177e2005-04-16 15:20:36 -07002380 if (count >= get->num_services)
2381 goto out;
pageexec4da62fc2005-06-26 16:00:19 -07002382 memset(&entry, 0, sizeof(entry));
Linus Torvalds1da177e2005-04-16 15:20:36 -07002383 ip_vs_copy_service(&entry, svc);
2384 if (copy_to_user(&uptr->entrytable[count],
2385 &entry, sizeof(entry))) {
2386 ret = -EFAULT;
2387 goto out;
2388 }
2389 count++;
2390 }
2391 }
2392 out:
2393 return ret;
2394}
2395
2396static inline int
Hans Schillstromfc723252011-01-03 14:44:43 +01002397__ip_vs_get_dest_entries(struct net *net, const struct ip_vs_get_dests *get,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002398 struct ip_vs_get_dests __user *uptr)
2399{
2400 struct ip_vs_service *svc;
Julius Volzb18610d2008-09-02 15:55:37 +02002401 union nf_inet_addr addr = { .ip = get->addr };
Linus Torvalds1da177e2005-04-16 15:20:36 -07002402 int ret = 0;
2403
2404 if (get->fwmark)
Hans Schillstromfc723252011-01-03 14:44:43 +01002405 svc = __ip_vs_svc_fwm_find(net, AF_INET, get->fwmark);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002406 else
Hans Schillstromfc723252011-01-03 14:44:43 +01002407 svc = __ip_vs_service_find(net, AF_INET, get->protocol, &addr,
Julian Anastasov26c15cf2010-09-21 18:12:30 +02002408 get->port);
Julius Volzb18610d2008-09-02 15:55:37 +02002409
Linus Torvalds1da177e2005-04-16 15:20:36 -07002410 if (svc) {
2411 int count = 0;
2412 struct ip_vs_dest *dest;
2413 struct ip_vs_dest_entry entry;
2414
2415 list_for_each_entry(dest, &svc->destinations, n_list) {
2416 if (count >= get->num_dests)
2417 break;
2418
Julius Volze7ade462008-09-02 15:55:33 +02002419 entry.addr = dest->addr.ip;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002420 entry.port = dest->port;
2421 entry.conn_flags = atomic_read(&dest->conn_flags);
2422 entry.weight = atomic_read(&dest->weight);
2423 entry.u_threshold = dest->u_threshold;
2424 entry.l_threshold = dest->l_threshold;
2425 entry.activeconns = atomic_read(&dest->activeconns);
2426 entry.inactconns = atomic_read(&dest->inactconns);
2427 entry.persistconns = atomic_read(&dest->persistconns);
2428 ip_vs_copy_stats(&entry.stats, &dest->stats);
2429 if (copy_to_user(&uptr->entrytable[count],
2430 &entry, sizeof(entry))) {
2431 ret = -EFAULT;
2432 break;
2433 }
2434 count++;
2435 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002436 } else
2437 ret = -ESRCH;
2438 return ret;
2439}
2440
2441static inline void
Hans Schillstrom93304192011-01-03 14:44:51 +01002442__ip_vs_get_timeouts(struct net *net, struct ip_vs_timeout_user *u)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002443{
Changli Gao091bb342011-01-21 18:02:13 +08002444#if defined(CONFIG_IP_VS_PROTO_TCP) || defined(CONFIG_IP_VS_PROTO_UDP)
Hans Schillstrom93304192011-01-03 14:44:51 +01002445 struct ip_vs_proto_data *pd;
Changli Gao091bb342011-01-21 18:02:13 +08002446#endif
Hans Schillstrom93304192011-01-03 14:44:51 +01002447
Linus Torvalds1da177e2005-04-16 15:20:36 -07002448#ifdef CONFIG_IP_VS_PROTO_TCP
Hans Schillstrom93304192011-01-03 14:44:51 +01002449 pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
2450 u->tcp_timeout = pd->timeout_table[IP_VS_TCP_S_ESTABLISHED] / HZ;
2451 u->tcp_fin_timeout = pd->timeout_table[IP_VS_TCP_S_FIN_WAIT] / HZ;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002452#endif
2453#ifdef CONFIG_IP_VS_PROTO_UDP
Hans Schillstrom93304192011-01-03 14:44:51 +01002454 pd = ip_vs_proto_data_get(net, IPPROTO_UDP);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002455 u->udp_timeout =
Hans Schillstrom93304192011-01-03 14:44:51 +01002456 pd->timeout_table[IP_VS_UDP_S_NORMAL] / HZ;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002457#endif
2458}
2459
2460
2461#define GET_CMDID(cmd) (cmd - IP_VS_BASE_CTL)
2462#define GET_INFO_ARG_LEN (sizeof(struct ip_vs_getinfo))
2463#define GET_SERVICES_ARG_LEN (sizeof(struct ip_vs_get_services))
2464#define GET_SERVICE_ARG_LEN (sizeof(struct ip_vs_service_entry))
2465#define GET_DESTS_ARG_LEN (sizeof(struct ip_vs_get_dests))
2466#define GET_TIMEOUT_ARG_LEN (sizeof(struct ip_vs_timeout_user))
2467#define GET_DAEMON_ARG_LEN (sizeof(struct ip_vs_daemon_user) * 2)
2468
Arjan van de Ven9b5b5cf2005-11-29 16:21:38 -08002469static const unsigned char get_arglen[GET_CMDID(IP_VS_SO_GET_MAX)+1] = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002470 [GET_CMDID(IP_VS_SO_GET_VERSION)] = 64,
2471 [GET_CMDID(IP_VS_SO_GET_INFO)] = GET_INFO_ARG_LEN,
2472 [GET_CMDID(IP_VS_SO_GET_SERVICES)] = GET_SERVICES_ARG_LEN,
2473 [GET_CMDID(IP_VS_SO_GET_SERVICE)] = GET_SERVICE_ARG_LEN,
2474 [GET_CMDID(IP_VS_SO_GET_DESTS)] = GET_DESTS_ARG_LEN,
2475 [GET_CMDID(IP_VS_SO_GET_TIMEOUT)] = GET_TIMEOUT_ARG_LEN,
2476 [GET_CMDID(IP_VS_SO_GET_DAEMON)] = GET_DAEMON_ARG_LEN,
2477};
2478
2479static int
2480do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
2481{
2482 unsigned char arg[128];
2483 int ret = 0;
Arjan van de Ven04bcef22010-01-04 16:37:12 +01002484 unsigned int copylen;
Hans Schillstromfc723252011-01-03 14:44:43 +01002485 struct net *net = sock_net(sk);
Hans Schillstromf1313152011-01-03 14:44:55 +01002486 struct netns_ipvs *ipvs = net_ipvs(net);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002487
Hans Schillstromfc723252011-01-03 14:44:43 +01002488 BUG_ON(!net);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002489 if (!capable(CAP_NET_ADMIN))
2490 return -EPERM;
2491
Arjan van de Ven04bcef22010-01-04 16:37:12 +01002492 if (cmd < IP_VS_BASE_CTL || cmd > IP_VS_SO_GET_MAX)
2493 return -EINVAL;
2494
Linus Torvalds1da177e2005-04-16 15:20:36 -07002495 if (*len < get_arglen[GET_CMDID(cmd)]) {
Hannes Eder1e3e2382009-08-02 11:05:41 +00002496 pr_err("get_ctl: len %u < %u\n",
2497 *len, get_arglen[GET_CMDID(cmd)]);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002498 return -EINVAL;
2499 }
2500
Arjan van de Ven04bcef22010-01-04 16:37:12 +01002501 copylen = get_arglen[GET_CMDID(cmd)];
2502 if (copylen > 128)
2503 return -EINVAL;
2504
2505 if (copy_from_user(arg, user, copylen) != 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002506 return -EFAULT;
2507
Ingo Molnar14cc3e22006-03-26 01:37:14 -08002508 if (mutex_lock_interruptible(&__ip_vs_mutex))
Linus Torvalds1da177e2005-04-16 15:20:36 -07002509 return -ERESTARTSYS;
2510
2511 switch (cmd) {
2512 case IP_VS_SO_GET_VERSION:
2513 {
2514 char buf[64];
2515
2516 sprintf(buf, "IP Virtual Server version %d.%d.%d (size=%d)",
Catalin(ux) M. BOIE6f7edb42010-01-05 05:50:24 +01002517 NVERSION(IP_VS_VERSION_CODE), ip_vs_conn_tab_size);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002518 if (copy_to_user(user, buf, strlen(buf)+1) != 0) {
2519 ret = -EFAULT;
2520 goto out;
2521 }
2522 *len = strlen(buf)+1;
2523 }
2524 break;
2525
2526 case IP_VS_SO_GET_INFO:
2527 {
2528 struct ip_vs_getinfo info;
2529 info.version = IP_VS_VERSION_CODE;
Catalin(ux) M. BOIE6f7edb42010-01-05 05:50:24 +01002530 info.size = ip_vs_conn_tab_size;
Hans Schillstroma0840e22011-01-03 14:44:58 +01002531 info.num_services = ipvs->num_services;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002532 if (copy_to_user(user, &info, sizeof(info)) != 0)
2533 ret = -EFAULT;
2534 }
2535 break;
2536
2537 case IP_VS_SO_GET_SERVICES:
2538 {
2539 struct ip_vs_get_services *get;
2540 int size;
2541
2542 get = (struct ip_vs_get_services *)arg;
2543 size = sizeof(*get) +
2544 sizeof(struct ip_vs_service_entry) * get->num_services;
2545 if (*len != size) {
Hannes Eder1e3e2382009-08-02 11:05:41 +00002546 pr_err("length: %u != %u\n", *len, size);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002547 ret = -EINVAL;
2548 goto out;
2549 }
Hans Schillstromfc723252011-01-03 14:44:43 +01002550 ret = __ip_vs_get_service_entries(net, get, user);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002551 }
2552 break;
2553
2554 case IP_VS_SO_GET_SERVICE:
2555 {
2556 struct ip_vs_service_entry *entry;
2557 struct ip_vs_service *svc;
Julius Volzb18610d2008-09-02 15:55:37 +02002558 union nf_inet_addr addr;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002559
2560 entry = (struct ip_vs_service_entry *)arg;
Julius Volzb18610d2008-09-02 15:55:37 +02002561 addr.ip = entry->addr;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002562 if (entry->fwmark)
Hans Schillstromfc723252011-01-03 14:44:43 +01002563 svc = __ip_vs_svc_fwm_find(net, AF_INET, entry->fwmark);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002564 else
Hans Schillstromfc723252011-01-03 14:44:43 +01002565 svc = __ip_vs_service_find(net, AF_INET,
2566 entry->protocol, &addr,
2567 entry->port);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002568 if (svc) {
2569 ip_vs_copy_service(entry, svc);
2570 if (copy_to_user(user, entry, sizeof(*entry)) != 0)
2571 ret = -EFAULT;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002572 } else
2573 ret = -ESRCH;
2574 }
2575 break;
2576
2577 case IP_VS_SO_GET_DESTS:
2578 {
2579 struct ip_vs_get_dests *get;
2580 int size;
2581
2582 get = (struct ip_vs_get_dests *)arg;
2583 size = sizeof(*get) +
2584 sizeof(struct ip_vs_dest_entry) * get->num_dests;
2585 if (*len != size) {
Hannes Eder1e3e2382009-08-02 11:05:41 +00002586 pr_err("length: %u != %u\n", *len, size);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002587 ret = -EINVAL;
2588 goto out;
2589 }
Hans Schillstromfc723252011-01-03 14:44:43 +01002590 ret = __ip_vs_get_dest_entries(net, get, user);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002591 }
2592 break;
2593
2594 case IP_VS_SO_GET_TIMEOUT:
2595 {
2596 struct ip_vs_timeout_user t;
2597
Hans Schillstrom93304192011-01-03 14:44:51 +01002598 __ip_vs_get_timeouts(net, &t);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002599 if (copy_to_user(user, &t, sizeof(t)) != 0)
2600 ret = -EFAULT;
2601 }
2602 break;
2603
2604 case IP_VS_SO_GET_DAEMON:
2605 {
2606 struct ip_vs_daemon_user d[2];
2607
2608 memset(&d, 0, sizeof(d));
Hans Schillstromf1313152011-01-03 14:44:55 +01002609 if (ipvs->sync_state & IP_VS_STATE_MASTER) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002610 d[0].state = IP_VS_STATE_MASTER;
Hans Schillstromf1313152011-01-03 14:44:55 +01002611 strlcpy(d[0].mcast_ifn, ipvs->master_mcast_ifn,
2612 sizeof(d[0].mcast_ifn));
2613 d[0].syncid = ipvs->master_syncid;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002614 }
Hans Schillstromf1313152011-01-03 14:44:55 +01002615 if (ipvs->sync_state & IP_VS_STATE_BACKUP) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002616 d[1].state = IP_VS_STATE_BACKUP;
Hans Schillstromf1313152011-01-03 14:44:55 +01002617 strlcpy(d[1].mcast_ifn, ipvs->backup_mcast_ifn,
2618 sizeof(d[1].mcast_ifn));
2619 d[1].syncid = ipvs->backup_syncid;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002620 }
2621 if (copy_to_user(user, &d, sizeof(d)) != 0)
2622 ret = -EFAULT;
2623 }
2624 break;
2625
2626 default:
2627 ret = -EINVAL;
2628 }
2629
2630 out:
Ingo Molnar14cc3e22006-03-26 01:37:14 -08002631 mutex_unlock(&__ip_vs_mutex);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002632 return ret;
2633}
2634
2635
2636static struct nf_sockopt_ops ip_vs_sockopts = {
2637 .pf = PF_INET,
2638 .set_optmin = IP_VS_BASE_CTL,
2639 .set_optmax = IP_VS_SO_SET_MAX+1,
2640 .set = do_ip_vs_set_ctl,
2641 .get_optmin = IP_VS_BASE_CTL,
2642 .get_optmax = IP_VS_SO_GET_MAX+1,
2643 .get = do_ip_vs_get_ctl,
Neil Horman16fcec32007-09-11 11:28:26 +02002644 .owner = THIS_MODULE,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002645};
2646
Julius Volz9a812192008-08-14 14:08:44 +02002647/*
2648 * Generic Netlink interface
2649 */
2650
2651/* IPVS genetlink family */
2652static struct genl_family ip_vs_genl_family = {
2653 .id = GENL_ID_GENERATE,
2654 .hdrsize = 0,
2655 .name = IPVS_GENL_NAME,
2656 .version = IPVS_GENL_VERSION,
2657 .maxattr = IPVS_CMD_MAX,
Hans Schillstromc6d2d442011-01-03 14:45:03 +01002658 .netnsok = true, /* Make ipvsadm to work on netns */
Julius Volz9a812192008-08-14 14:08:44 +02002659};
2660
2661/* Policy used for first-level command attributes */
2662static const struct nla_policy ip_vs_cmd_policy[IPVS_CMD_ATTR_MAX + 1] = {
2663 [IPVS_CMD_ATTR_SERVICE] = { .type = NLA_NESTED },
2664 [IPVS_CMD_ATTR_DEST] = { .type = NLA_NESTED },
2665 [IPVS_CMD_ATTR_DAEMON] = { .type = NLA_NESTED },
2666 [IPVS_CMD_ATTR_TIMEOUT_TCP] = { .type = NLA_U32 },
2667 [IPVS_CMD_ATTR_TIMEOUT_TCP_FIN] = { .type = NLA_U32 },
2668 [IPVS_CMD_ATTR_TIMEOUT_UDP] = { .type = NLA_U32 },
2669};
2670
2671/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DAEMON */
2672static const struct nla_policy ip_vs_daemon_policy[IPVS_DAEMON_ATTR_MAX + 1] = {
2673 [IPVS_DAEMON_ATTR_STATE] = { .type = NLA_U32 },
2674 [IPVS_DAEMON_ATTR_MCAST_IFN] = { .type = NLA_NUL_STRING,
2675 .len = IP_VS_IFNAME_MAXLEN },
2676 [IPVS_DAEMON_ATTR_SYNC_ID] = { .type = NLA_U32 },
2677};
2678
2679/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_SERVICE */
2680static const struct nla_policy ip_vs_svc_policy[IPVS_SVC_ATTR_MAX + 1] = {
2681 [IPVS_SVC_ATTR_AF] = { .type = NLA_U16 },
2682 [IPVS_SVC_ATTR_PROTOCOL] = { .type = NLA_U16 },
2683 [IPVS_SVC_ATTR_ADDR] = { .type = NLA_BINARY,
2684 .len = sizeof(union nf_inet_addr) },
2685 [IPVS_SVC_ATTR_PORT] = { .type = NLA_U16 },
2686 [IPVS_SVC_ATTR_FWMARK] = { .type = NLA_U32 },
2687 [IPVS_SVC_ATTR_SCHED_NAME] = { .type = NLA_NUL_STRING,
2688 .len = IP_VS_SCHEDNAME_MAXLEN },
Simon Horman0d1e71b2010-08-22 21:37:54 +09002689 [IPVS_SVC_ATTR_PE_NAME] = { .type = NLA_NUL_STRING,
2690 .len = IP_VS_PENAME_MAXLEN },
Julius Volz9a812192008-08-14 14:08:44 +02002691 [IPVS_SVC_ATTR_FLAGS] = { .type = NLA_BINARY,
2692 .len = sizeof(struct ip_vs_flags) },
2693 [IPVS_SVC_ATTR_TIMEOUT] = { .type = NLA_U32 },
2694 [IPVS_SVC_ATTR_NETMASK] = { .type = NLA_U32 },
2695 [IPVS_SVC_ATTR_STATS] = { .type = NLA_NESTED },
2696};
2697
2698/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DEST */
2699static const struct nla_policy ip_vs_dest_policy[IPVS_DEST_ATTR_MAX + 1] = {
2700 [IPVS_DEST_ATTR_ADDR] = { .type = NLA_BINARY,
2701 .len = sizeof(union nf_inet_addr) },
2702 [IPVS_DEST_ATTR_PORT] = { .type = NLA_U16 },
2703 [IPVS_DEST_ATTR_FWD_METHOD] = { .type = NLA_U32 },
2704 [IPVS_DEST_ATTR_WEIGHT] = { .type = NLA_U32 },
2705 [IPVS_DEST_ATTR_U_THRESH] = { .type = NLA_U32 },
2706 [IPVS_DEST_ATTR_L_THRESH] = { .type = NLA_U32 },
2707 [IPVS_DEST_ATTR_ACTIVE_CONNS] = { .type = NLA_U32 },
2708 [IPVS_DEST_ATTR_INACT_CONNS] = { .type = NLA_U32 },
2709 [IPVS_DEST_ATTR_PERSIST_CONNS] = { .type = NLA_U32 },
2710 [IPVS_DEST_ATTR_STATS] = { .type = NLA_NESTED },
2711};
2712
2713static int ip_vs_genl_fill_stats(struct sk_buff *skb, int container_type,
2714 struct ip_vs_stats *stats)
2715{
Julian Anastasov55a3d4e2011-03-14 01:37:49 +02002716 struct ip_vs_stats_user ustats;
Julius Volz9a812192008-08-14 14:08:44 +02002717 struct nlattr *nl_stats = nla_nest_start(skb, container_type);
2718 if (!nl_stats)
2719 return -EMSGSIZE;
2720
Julian Anastasov55a3d4e2011-03-14 01:37:49 +02002721 ip_vs_copy_stats(&ustats, stats);
Julius Volz9a812192008-08-14 14:08:44 +02002722
Julian Anastasov55a3d4e2011-03-14 01:37:49 +02002723 NLA_PUT_U32(skb, IPVS_STATS_ATTR_CONNS, ustats.conns);
2724 NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPKTS, ustats.inpkts);
2725 NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPKTS, ustats.outpkts);
2726 NLA_PUT_U64(skb, IPVS_STATS_ATTR_INBYTES, ustats.inbytes);
2727 NLA_PUT_U64(skb, IPVS_STATS_ATTR_OUTBYTES, ustats.outbytes);
2728 NLA_PUT_U32(skb, IPVS_STATS_ATTR_CPS, ustats.cps);
2729 NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPPS, ustats.inpps);
2730 NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPPS, ustats.outpps);
2731 NLA_PUT_U32(skb, IPVS_STATS_ATTR_INBPS, ustats.inbps);
2732 NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTBPS, ustats.outbps);
Julius Volz9a812192008-08-14 14:08:44 +02002733
2734 nla_nest_end(skb, nl_stats);
2735
2736 return 0;
2737
2738nla_put_failure:
Julius Volz9a812192008-08-14 14:08:44 +02002739 nla_nest_cancel(skb, nl_stats);
2740 return -EMSGSIZE;
2741}
2742
2743static int ip_vs_genl_fill_service(struct sk_buff *skb,
2744 struct ip_vs_service *svc)
2745{
2746 struct nlattr *nl_service;
2747 struct ip_vs_flags flags = { .flags = svc->flags,
2748 .mask = ~0 };
2749
2750 nl_service = nla_nest_start(skb, IPVS_CMD_ATTR_SERVICE);
2751 if (!nl_service)
2752 return -EMSGSIZE;
2753
Julius Volzf94fd042008-09-02 15:55:55 +02002754 NLA_PUT_U16(skb, IPVS_SVC_ATTR_AF, svc->af);
Julius Volz9a812192008-08-14 14:08:44 +02002755
2756 if (svc->fwmark) {
2757 NLA_PUT_U32(skb, IPVS_SVC_ATTR_FWMARK, svc->fwmark);
2758 } else {
2759 NLA_PUT_U16(skb, IPVS_SVC_ATTR_PROTOCOL, svc->protocol);
2760 NLA_PUT(skb, IPVS_SVC_ATTR_ADDR, sizeof(svc->addr), &svc->addr);
2761 NLA_PUT_U16(skb, IPVS_SVC_ATTR_PORT, svc->port);
2762 }
2763
2764 NLA_PUT_STRING(skb, IPVS_SVC_ATTR_SCHED_NAME, svc->scheduler->name);
Simon Horman0d1e71b2010-08-22 21:37:54 +09002765 if (svc->pe)
2766 NLA_PUT_STRING(skb, IPVS_SVC_ATTR_PE_NAME, svc->pe->name);
Julius Volz9a812192008-08-14 14:08:44 +02002767 NLA_PUT(skb, IPVS_SVC_ATTR_FLAGS, sizeof(flags), &flags);
2768 NLA_PUT_U32(skb, IPVS_SVC_ATTR_TIMEOUT, svc->timeout / HZ);
2769 NLA_PUT_U32(skb, IPVS_SVC_ATTR_NETMASK, svc->netmask);
2770
2771 if (ip_vs_genl_fill_stats(skb, IPVS_SVC_ATTR_STATS, &svc->stats))
2772 goto nla_put_failure;
2773
2774 nla_nest_end(skb, nl_service);
2775
2776 return 0;
2777
2778nla_put_failure:
2779 nla_nest_cancel(skb, nl_service);
2780 return -EMSGSIZE;
2781}
2782
2783static int ip_vs_genl_dump_service(struct sk_buff *skb,
2784 struct ip_vs_service *svc,
2785 struct netlink_callback *cb)
2786{
2787 void *hdr;
2788
2789 hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
2790 &ip_vs_genl_family, NLM_F_MULTI,
2791 IPVS_CMD_NEW_SERVICE);
2792 if (!hdr)
2793 return -EMSGSIZE;
2794
2795 if (ip_vs_genl_fill_service(skb, svc) < 0)
2796 goto nla_put_failure;
2797
2798 return genlmsg_end(skb, hdr);
2799
2800nla_put_failure:
2801 genlmsg_cancel(skb, hdr);
2802 return -EMSGSIZE;
2803}
2804
2805static int ip_vs_genl_dump_services(struct sk_buff *skb,
2806 struct netlink_callback *cb)
2807{
2808 int idx = 0, i;
2809 int start = cb->args[0];
2810 struct ip_vs_service *svc;
Hans Schillstromfc723252011-01-03 14:44:43 +01002811 struct net *net = skb_sknet(skb);
Julius Volz9a812192008-08-14 14:08:44 +02002812
2813 mutex_lock(&__ip_vs_mutex);
2814 for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
2815 list_for_each_entry(svc, &ip_vs_svc_table[i], s_list) {
Hans Schillstromfc723252011-01-03 14:44:43 +01002816 if (++idx <= start || !net_eq(svc->net, net))
Julius Volz9a812192008-08-14 14:08:44 +02002817 continue;
2818 if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
2819 idx--;
2820 goto nla_put_failure;
2821 }
2822 }
2823 }
2824
2825 for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
2826 list_for_each_entry(svc, &ip_vs_svc_fwm_table[i], f_list) {
Hans Schillstromfc723252011-01-03 14:44:43 +01002827 if (++idx <= start || !net_eq(svc->net, net))
Julius Volz9a812192008-08-14 14:08:44 +02002828 continue;
2829 if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
2830 idx--;
2831 goto nla_put_failure;
2832 }
2833 }
2834 }
2835
2836nla_put_failure:
2837 mutex_unlock(&__ip_vs_mutex);
2838 cb->args[0] = idx;
2839
2840 return skb->len;
2841}
2842
Hans Schillstromfc723252011-01-03 14:44:43 +01002843static int ip_vs_genl_parse_service(struct net *net,
2844 struct ip_vs_service_user_kern *usvc,
Julian Anastasov26c15cf2010-09-21 18:12:30 +02002845 struct nlattr *nla, int full_entry,
2846 struct ip_vs_service **ret_svc)
Julius Volz9a812192008-08-14 14:08:44 +02002847{
2848 struct nlattr *attrs[IPVS_SVC_ATTR_MAX + 1];
2849 struct nlattr *nla_af, *nla_port, *nla_fwmark, *nla_protocol, *nla_addr;
Julian Anastasov26c15cf2010-09-21 18:12:30 +02002850 struct ip_vs_service *svc;
Julius Volz9a812192008-08-14 14:08:44 +02002851
2852 /* Parse mandatory identifying service fields first */
2853 if (nla == NULL ||
2854 nla_parse_nested(attrs, IPVS_SVC_ATTR_MAX, nla, ip_vs_svc_policy))
2855 return -EINVAL;
2856
2857 nla_af = attrs[IPVS_SVC_ATTR_AF];
2858 nla_protocol = attrs[IPVS_SVC_ATTR_PROTOCOL];
2859 nla_addr = attrs[IPVS_SVC_ATTR_ADDR];
2860 nla_port = attrs[IPVS_SVC_ATTR_PORT];
2861 nla_fwmark = attrs[IPVS_SVC_ATTR_FWMARK];
2862
2863 if (!(nla_af && (nla_fwmark || (nla_port && nla_protocol && nla_addr))))
2864 return -EINVAL;
2865
Simon Horman258c8892009-12-15 17:01:25 +01002866 memset(usvc, 0, sizeof(*usvc));
2867
Julius Volzc860c6b2008-09-02 15:55:36 +02002868 usvc->af = nla_get_u16(nla_af);
Julius Volzf94fd042008-09-02 15:55:55 +02002869#ifdef CONFIG_IP_VS_IPV6
2870 if (usvc->af != AF_INET && usvc->af != AF_INET6)
2871#else
2872 if (usvc->af != AF_INET)
2873#endif
Julius Volz9a812192008-08-14 14:08:44 +02002874 return -EAFNOSUPPORT;
2875
2876 if (nla_fwmark) {
2877 usvc->protocol = IPPROTO_TCP;
2878 usvc->fwmark = nla_get_u32(nla_fwmark);
2879 } else {
2880 usvc->protocol = nla_get_u16(nla_protocol);
2881 nla_memcpy(&usvc->addr, nla_addr, sizeof(usvc->addr));
2882 usvc->port = nla_get_u16(nla_port);
2883 usvc->fwmark = 0;
2884 }
2885
Julian Anastasov26c15cf2010-09-21 18:12:30 +02002886 if (usvc->fwmark)
Hans Schillstromfc723252011-01-03 14:44:43 +01002887 svc = __ip_vs_svc_fwm_find(net, usvc->af, usvc->fwmark);
Julian Anastasov26c15cf2010-09-21 18:12:30 +02002888 else
Hans Schillstromfc723252011-01-03 14:44:43 +01002889 svc = __ip_vs_service_find(net, usvc->af, usvc->protocol,
Julian Anastasov26c15cf2010-09-21 18:12:30 +02002890 &usvc->addr, usvc->port);
2891 *ret_svc = svc;
2892
Julius Volz9a812192008-08-14 14:08:44 +02002893 /* If a full entry was requested, check for the additional fields */
2894 if (full_entry) {
Simon Horman0d1e71b2010-08-22 21:37:54 +09002895 struct nlattr *nla_sched, *nla_flags, *nla_pe, *nla_timeout,
Julius Volz9a812192008-08-14 14:08:44 +02002896 *nla_netmask;
2897 struct ip_vs_flags flags;
Julius Volz9a812192008-08-14 14:08:44 +02002898
2899 nla_sched = attrs[IPVS_SVC_ATTR_SCHED_NAME];
Simon Horman0d1e71b2010-08-22 21:37:54 +09002900 nla_pe = attrs[IPVS_SVC_ATTR_PE_NAME];
Julius Volz9a812192008-08-14 14:08:44 +02002901 nla_flags = attrs[IPVS_SVC_ATTR_FLAGS];
2902 nla_timeout = attrs[IPVS_SVC_ATTR_TIMEOUT];
2903 nla_netmask = attrs[IPVS_SVC_ATTR_NETMASK];
2904
2905 if (!(nla_sched && nla_flags && nla_timeout && nla_netmask))
2906 return -EINVAL;
2907
2908 nla_memcpy(&flags, nla_flags, sizeof(flags));
2909
2910 /* prefill flags from service if it already exists */
Julian Anastasov26c15cf2010-09-21 18:12:30 +02002911 if (svc)
Julius Volz9a812192008-08-14 14:08:44 +02002912 usvc->flags = svc->flags;
Julius Volz9a812192008-08-14 14:08:44 +02002913
2914 /* set new flags from userland */
2915 usvc->flags = (usvc->flags & ~flags.mask) |
2916 (flags.flags & flags.mask);
Julius Volzc860c6b2008-09-02 15:55:36 +02002917 usvc->sched_name = nla_data(nla_sched);
Simon Horman0d1e71b2010-08-22 21:37:54 +09002918 usvc->pe_name = nla_pe ? nla_data(nla_pe) : NULL;
Julius Volz9a812192008-08-14 14:08:44 +02002919 usvc->timeout = nla_get_u32(nla_timeout);
2920 usvc->netmask = nla_get_u32(nla_netmask);
2921 }
2922
2923 return 0;
2924}
2925
Hans Schillstromfc723252011-01-03 14:44:43 +01002926static struct ip_vs_service *ip_vs_genl_find_service(struct net *net,
2927 struct nlattr *nla)
Julius Volz9a812192008-08-14 14:08:44 +02002928{
Julius Volzc860c6b2008-09-02 15:55:36 +02002929 struct ip_vs_service_user_kern usvc;
Julian Anastasov26c15cf2010-09-21 18:12:30 +02002930 struct ip_vs_service *svc;
Julius Volz9a812192008-08-14 14:08:44 +02002931 int ret;
2932
Hans Schillstromfc723252011-01-03 14:44:43 +01002933 ret = ip_vs_genl_parse_service(net, &usvc, nla, 0, &svc);
Julian Anastasov26c15cf2010-09-21 18:12:30 +02002934 return ret ? ERR_PTR(ret) : svc;
Julius Volz9a812192008-08-14 14:08:44 +02002935}
2936
2937static int ip_vs_genl_fill_dest(struct sk_buff *skb, struct ip_vs_dest *dest)
2938{
2939 struct nlattr *nl_dest;
2940
2941 nl_dest = nla_nest_start(skb, IPVS_CMD_ATTR_DEST);
2942 if (!nl_dest)
2943 return -EMSGSIZE;
2944
2945 NLA_PUT(skb, IPVS_DEST_ATTR_ADDR, sizeof(dest->addr), &dest->addr);
2946 NLA_PUT_U16(skb, IPVS_DEST_ATTR_PORT, dest->port);
2947
2948 NLA_PUT_U32(skb, IPVS_DEST_ATTR_FWD_METHOD,
2949 atomic_read(&dest->conn_flags) & IP_VS_CONN_F_FWD_MASK);
2950 NLA_PUT_U32(skb, IPVS_DEST_ATTR_WEIGHT, atomic_read(&dest->weight));
2951 NLA_PUT_U32(skb, IPVS_DEST_ATTR_U_THRESH, dest->u_threshold);
2952 NLA_PUT_U32(skb, IPVS_DEST_ATTR_L_THRESH, dest->l_threshold);
2953 NLA_PUT_U32(skb, IPVS_DEST_ATTR_ACTIVE_CONNS,
2954 atomic_read(&dest->activeconns));
2955 NLA_PUT_U32(skb, IPVS_DEST_ATTR_INACT_CONNS,
2956 atomic_read(&dest->inactconns));
2957 NLA_PUT_U32(skb, IPVS_DEST_ATTR_PERSIST_CONNS,
2958 atomic_read(&dest->persistconns));
2959
2960 if (ip_vs_genl_fill_stats(skb, IPVS_DEST_ATTR_STATS, &dest->stats))
2961 goto nla_put_failure;
2962
2963 nla_nest_end(skb, nl_dest);
2964
2965 return 0;
2966
2967nla_put_failure:
2968 nla_nest_cancel(skb, nl_dest);
2969 return -EMSGSIZE;
2970}
2971
2972static int ip_vs_genl_dump_dest(struct sk_buff *skb, struct ip_vs_dest *dest,
2973 struct netlink_callback *cb)
2974{
2975 void *hdr;
2976
2977 hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
2978 &ip_vs_genl_family, NLM_F_MULTI,
2979 IPVS_CMD_NEW_DEST);
2980 if (!hdr)
2981 return -EMSGSIZE;
2982
2983 if (ip_vs_genl_fill_dest(skb, dest) < 0)
2984 goto nla_put_failure;
2985
2986 return genlmsg_end(skb, hdr);
2987
2988nla_put_failure:
2989 genlmsg_cancel(skb, hdr);
2990 return -EMSGSIZE;
2991}
2992
2993static int ip_vs_genl_dump_dests(struct sk_buff *skb,
2994 struct netlink_callback *cb)
2995{
2996 int idx = 0;
2997 int start = cb->args[0];
2998 struct ip_vs_service *svc;
2999 struct ip_vs_dest *dest;
3000 struct nlattr *attrs[IPVS_CMD_ATTR_MAX + 1];
Hans Schillstroma0840e22011-01-03 14:44:58 +01003001 struct net *net = skb_sknet(skb);
Julius Volz9a812192008-08-14 14:08:44 +02003002
3003 mutex_lock(&__ip_vs_mutex);
3004
3005 /* Try to find the service for which to dump destinations */
3006 if (nlmsg_parse(cb->nlh, GENL_HDRLEN, attrs,
3007 IPVS_CMD_ATTR_MAX, ip_vs_cmd_policy))
3008 goto out_err;
3009
Hans Schillstroma0840e22011-01-03 14:44:58 +01003010
Hans Schillstromfc723252011-01-03 14:44:43 +01003011 svc = ip_vs_genl_find_service(net, attrs[IPVS_CMD_ATTR_SERVICE]);
Julius Volz9a812192008-08-14 14:08:44 +02003012 if (IS_ERR(svc) || svc == NULL)
3013 goto out_err;
3014
3015 /* Dump the destinations */
3016 list_for_each_entry(dest, &svc->destinations, n_list) {
3017 if (++idx <= start)
3018 continue;
3019 if (ip_vs_genl_dump_dest(skb, dest, cb) < 0) {
3020 idx--;
3021 goto nla_put_failure;
3022 }
3023 }
3024
3025nla_put_failure:
3026 cb->args[0] = idx;
Julius Volz9a812192008-08-14 14:08:44 +02003027
3028out_err:
3029 mutex_unlock(&__ip_vs_mutex);
3030
3031 return skb->len;
3032}
3033
Julius Volzc860c6b2008-09-02 15:55:36 +02003034static int ip_vs_genl_parse_dest(struct ip_vs_dest_user_kern *udest,
Julius Volz9a812192008-08-14 14:08:44 +02003035 struct nlattr *nla, int full_entry)
3036{
3037 struct nlattr *attrs[IPVS_DEST_ATTR_MAX + 1];
3038 struct nlattr *nla_addr, *nla_port;
3039
3040 /* Parse mandatory identifying destination fields first */
3041 if (nla == NULL ||
3042 nla_parse_nested(attrs, IPVS_DEST_ATTR_MAX, nla, ip_vs_dest_policy))
3043 return -EINVAL;
3044
3045 nla_addr = attrs[IPVS_DEST_ATTR_ADDR];
3046 nla_port = attrs[IPVS_DEST_ATTR_PORT];
3047
3048 if (!(nla_addr && nla_port))
3049 return -EINVAL;
3050
Simon Horman258c8892009-12-15 17:01:25 +01003051 memset(udest, 0, sizeof(*udest));
3052
Julius Volz9a812192008-08-14 14:08:44 +02003053 nla_memcpy(&udest->addr, nla_addr, sizeof(udest->addr));
3054 udest->port = nla_get_u16(nla_port);
3055
3056 /* If a full entry was requested, check for the additional fields */
3057 if (full_entry) {
3058 struct nlattr *nla_fwd, *nla_weight, *nla_u_thresh,
3059 *nla_l_thresh;
3060
3061 nla_fwd = attrs[IPVS_DEST_ATTR_FWD_METHOD];
3062 nla_weight = attrs[IPVS_DEST_ATTR_WEIGHT];
3063 nla_u_thresh = attrs[IPVS_DEST_ATTR_U_THRESH];
3064 nla_l_thresh = attrs[IPVS_DEST_ATTR_L_THRESH];
3065
3066 if (!(nla_fwd && nla_weight && nla_u_thresh && nla_l_thresh))
3067 return -EINVAL;
3068
3069 udest->conn_flags = nla_get_u32(nla_fwd)
3070 & IP_VS_CONN_F_FWD_MASK;
3071 udest->weight = nla_get_u32(nla_weight);
3072 udest->u_threshold = nla_get_u32(nla_u_thresh);
3073 udest->l_threshold = nla_get_u32(nla_l_thresh);
3074 }
3075
3076 return 0;
3077}
3078
3079static int ip_vs_genl_fill_daemon(struct sk_buff *skb, __be32 state,
3080 const char *mcast_ifn, __be32 syncid)
3081{
3082 struct nlattr *nl_daemon;
3083
3084 nl_daemon = nla_nest_start(skb, IPVS_CMD_ATTR_DAEMON);
3085 if (!nl_daemon)
3086 return -EMSGSIZE;
3087
3088 NLA_PUT_U32(skb, IPVS_DAEMON_ATTR_STATE, state);
3089 NLA_PUT_STRING(skb, IPVS_DAEMON_ATTR_MCAST_IFN, mcast_ifn);
3090 NLA_PUT_U32(skb, IPVS_DAEMON_ATTR_SYNC_ID, syncid);
3091
3092 nla_nest_end(skb, nl_daemon);
3093
3094 return 0;
3095
3096nla_put_failure:
3097 nla_nest_cancel(skb, nl_daemon);
3098 return -EMSGSIZE;
3099}
3100
3101static int ip_vs_genl_dump_daemon(struct sk_buff *skb, __be32 state,
3102 const char *mcast_ifn, __be32 syncid,
3103 struct netlink_callback *cb)
3104{
3105 void *hdr;
3106 hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
3107 &ip_vs_genl_family, NLM_F_MULTI,
3108 IPVS_CMD_NEW_DAEMON);
3109 if (!hdr)
3110 return -EMSGSIZE;
3111
3112 if (ip_vs_genl_fill_daemon(skb, state, mcast_ifn, syncid))
3113 goto nla_put_failure;
3114
3115 return genlmsg_end(skb, hdr);
3116
3117nla_put_failure:
3118 genlmsg_cancel(skb, hdr);
3119 return -EMSGSIZE;
3120}
3121
3122static int ip_vs_genl_dump_daemons(struct sk_buff *skb,
3123 struct netlink_callback *cb)
3124{
Hans Schillstromf1313152011-01-03 14:44:55 +01003125 struct net *net = skb_net(skb);
3126 struct netns_ipvs *ipvs = net_ipvs(net);
3127
Julius Volz9a812192008-08-14 14:08:44 +02003128 mutex_lock(&__ip_vs_mutex);
Hans Schillstromf1313152011-01-03 14:44:55 +01003129 if ((ipvs->sync_state & IP_VS_STATE_MASTER) && !cb->args[0]) {
Julius Volz9a812192008-08-14 14:08:44 +02003130 if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_MASTER,
Hans Schillstromf1313152011-01-03 14:44:55 +01003131 ipvs->master_mcast_ifn,
3132 ipvs->master_syncid, cb) < 0)
Julius Volz9a812192008-08-14 14:08:44 +02003133 goto nla_put_failure;
3134
3135 cb->args[0] = 1;
3136 }
3137
Hans Schillstromf1313152011-01-03 14:44:55 +01003138 if ((ipvs->sync_state & IP_VS_STATE_BACKUP) && !cb->args[1]) {
Julius Volz9a812192008-08-14 14:08:44 +02003139 if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_BACKUP,
Hans Schillstromf1313152011-01-03 14:44:55 +01003140 ipvs->backup_mcast_ifn,
3141 ipvs->backup_syncid, cb) < 0)
Julius Volz9a812192008-08-14 14:08:44 +02003142 goto nla_put_failure;
3143
3144 cb->args[1] = 1;
3145 }
3146
3147nla_put_failure:
3148 mutex_unlock(&__ip_vs_mutex);
3149
3150 return skb->len;
3151}
3152
Hans Schillstromf1313152011-01-03 14:44:55 +01003153static int ip_vs_genl_new_daemon(struct net *net, struct nlattr **attrs)
Julius Volz9a812192008-08-14 14:08:44 +02003154{
3155 if (!(attrs[IPVS_DAEMON_ATTR_STATE] &&
3156 attrs[IPVS_DAEMON_ATTR_MCAST_IFN] &&
3157 attrs[IPVS_DAEMON_ATTR_SYNC_ID]))
3158 return -EINVAL;
3159
Hans Schillstromf1313152011-01-03 14:44:55 +01003160 return start_sync_thread(net,
3161 nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]),
Julius Volz9a812192008-08-14 14:08:44 +02003162 nla_data(attrs[IPVS_DAEMON_ATTR_MCAST_IFN]),
3163 nla_get_u32(attrs[IPVS_DAEMON_ATTR_SYNC_ID]));
3164}
3165
Hans Schillstromf1313152011-01-03 14:44:55 +01003166static int ip_vs_genl_del_daemon(struct net *net, struct nlattr **attrs)
Julius Volz9a812192008-08-14 14:08:44 +02003167{
3168 if (!attrs[IPVS_DAEMON_ATTR_STATE])
3169 return -EINVAL;
3170
Hans Schillstromf1313152011-01-03 14:44:55 +01003171 return stop_sync_thread(net,
3172 nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]));
Julius Volz9a812192008-08-14 14:08:44 +02003173}
3174
Hans Schillstrom93304192011-01-03 14:44:51 +01003175static int ip_vs_genl_set_config(struct net *net, struct nlattr **attrs)
Julius Volz9a812192008-08-14 14:08:44 +02003176{
3177 struct ip_vs_timeout_user t;
3178
Hans Schillstrom93304192011-01-03 14:44:51 +01003179 __ip_vs_get_timeouts(net, &t);
Julius Volz9a812192008-08-14 14:08:44 +02003180
3181 if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP])
3182 t.tcp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP]);
3183
3184 if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN])
3185 t.tcp_fin_timeout =
3186 nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN]);
3187
3188 if (attrs[IPVS_CMD_ATTR_TIMEOUT_UDP])
3189 t.udp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_UDP]);
3190
Hans Schillstrom93304192011-01-03 14:44:51 +01003191 return ip_vs_set_timeout(net, &t);
Julius Volz9a812192008-08-14 14:08:44 +02003192}
3193
3194static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
3195{
3196 struct ip_vs_service *svc = NULL;
Julius Volzc860c6b2008-09-02 15:55:36 +02003197 struct ip_vs_service_user_kern usvc;
3198 struct ip_vs_dest_user_kern udest;
Julius Volz9a812192008-08-14 14:08:44 +02003199 int ret = 0, cmd;
3200 int need_full_svc = 0, need_full_dest = 0;
Hans Schillstromfc723252011-01-03 14:44:43 +01003201 struct net *net;
Hans Schillstroma0840e22011-01-03 14:44:58 +01003202 struct netns_ipvs *ipvs;
Julius Volz9a812192008-08-14 14:08:44 +02003203
Hans Schillstromfc723252011-01-03 14:44:43 +01003204 net = skb_sknet(skb);
Hans Schillstroma0840e22011-01-03 14:44:58 +01003205 ipvs = net_ipvs(net);
Julius Volz9a812192008-08-14 14:08:44 +02003206 cmd = info->genlhdr->cmd;
3207
3208 mutex_lock(&__ip_vs_mutex);
3209
3210 if (cmd == IPVS_CMD_FLUSH) {
Hans Schillstromfc723252011-01-03 14:44:43 +01003211 ret = ip_vs_flush(net);
Julius Volz9a812192008-08-14 14:08:44 +02003212 goto out;
3213 } else if (cmd == IPVS_CMD_SET_CONFIG) {
Hans Schillstrom93304192011-01-03 14:44:51 +01003214 ret = ip_vs_genl_set_config(net, info->attrs);
Julius Volz9a812192008-08-14 14:08:44 +02003215 goto out;
3216 } else if (cmd == IPVS_CMD_NEW_DAEMON ||
3217 cmd == IPVS_CMD_DEL_DAEMON) {
3218
3219 struct nlattr *daemon_attrs[IPVS_DAEMON_ATTR_MAX + 1];
3220
3221 if (!info->attrs[IPVS_CMD_ATTR_DAEMON] ||
3222 nla_parse_nested(daemon_attrs, IPVS_DAEMON_ATTR_MAX,
3223 info->attrs[IPVS_CMD_ATTR_DAEMON],
3224 ip_vs_daemon_policy)) {
3225 ret = -EINVAL;
3226 goto out;
3227 }
3228
3229 if (cmd == IPVS_CMD_NEW_DAEMON)
Hans Schillstromf1313152011-01-03 14:44:55 +01003230 ret = ip_vs_genl_new_daemon(net, daemon_attrs);
Julius Volz9a812192008-08-14 14:08:44 +02003231 else
Hans Schillstromf1313152011-01-03 14:44:55 +01003232 ret = ip_vs_genl_del_daemon(net, daemon_attrs);
Julius Volz9a812192008-08-14 14:08:44 +02003233 goto out;
3234 } else if (cmd == IPVS_CMD_ZERO &&
3235 !info->attrs[IPVS_CMD_ATTR_SERVICE]) {
Hans Schillstromfc723252011-01-03 14:44:43 +01003236 ret = ip_vs_zero_all(net);
Julius Volz9a812192008-08-14 14:08:44 +02003237 goto out;
3238 }
3239
3240 /* All following commands require a service argument, so check if we
3241 * received a valid one. We need a full service specification when
3242 * adding / editing a service. Only identifying members otherwise. */
3243 if (cmd == IPVS_CMD_NEW_SERVICE || cmd == IPVS_CMD_SET_SERVICE)
3244 need_full_svc = 1;
3245
Hans Schillstromfc723252011-01-03 14:44:43 +01003246 ret = ip_vs_genl_parse_service(net, &usvc,
Julius Volz9a812192008-08-14 14:08:44 +02003247 info->attrs[IPVS_CMD_ATTR_SERVICE],
Julian Anastasov26c15cf2010-09-21 18:12:30 +02003248 need_full_svc, &svc);
Julius Volz9a812192008-08-14 14:08:44 +02003249 if (ret)
3250 goto out;
3251
Julius Volz9a812192008-08-14 14:08:44 +02003252 /* Unless we're adding a new service, the service must already exist */
3253 if ((cmd != IPVS_CMD_NEW_SERVICE) && (svc == NULL)) {
3254 ret = -ESRCH;
3255 goto out;
3256 }
3257
3258 /* Destination commands require a valid destination argument. For
3259 * adding / editing a destination, we need a full destination
3260 * specification. */
3261 if (cmd == IPVS_CMD_NEW_DEST || cmd == IPVS_CMD_SET_DEST ||
3262 cmd == IPVS_CMD_DEL_DEST) {
3263 if (cmd != IPVS_CMD_DEL_DEST)
3264 need_full_dest = 1;
3265
3266 ret = ip_vs_genl_parse_dest(&udest,
3267 info->attrs[IPVS_CMD_ATTR_DEST],
3268 need_full_dest);
3269 if (ret)
3270 goto out;
3271 }
3272
3273 switch (cmd) {
3274 case IPVS_CMD_NEW_SERVICE:
3275 if (svc == NULL)
Hans Schillstromfc723252011-01-03 14:44:43 +01003276 ret = ip_vs_add_service(net, &usvc, &svc);
Julius Volz9a812192008-08-14 14:08:44 +02003277 else
3278 ret = -EEXIST;
3279 break;
3280 case IPVS_CMD_SET_SERVICE:
3281 ret = ip_vs_edit_service(svc, &usvc);
3282 break;
3283 case IPVS_CMD_DEL_SERVICE:
3284 ret = ip_vs_del_service(svc);
Julian Anastasov26c15cf2010-09-21 18:12:30 +02003285 /* do not use svc, it can be freed */
Julius Volz9a812192008-08-14 14:08:44 +02003286 break;
3287 case IPVS_CMD_NEW_DEST:
3288 ret = ip_vs_add_dest(svc, &udest);
3289 break;
3290 case IPVS_CMD_SET_DEST:
3291 ret = ip_vs_edit_dest(svc, &udest);
3292 break;
3293 case IPVS_CMD_DEL_DEST:
3294 ret = ip_vs_del_dest(svc, &udest);
3295 break;
3296 case IPVS_CMD_ZERO:
3297 ret = ip_vs_zero_service(svc);
3298 break;
3299 default:
3300 ret = -EINVAL;
3301 }
3302
3303out:
Julius Volz9a812192008-08-14 14:08:44 +02003304 mutex_unlock(&__ip_vs_mutex);
3305
3306 return ret;
3307}
3308
3309static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info)
3310{
3311 struct sk_buff *msg;
3312 void *reply;
3313 int ret, cmd, reply_cmd;
Hans Schillstromfc723252011-01-03 14:44:43 +01003314 struct net *net;
Hans Schillstroma0840e22011-01-03 14:44:58 +01003315 struct netns_ipvs *ipvs;
Julius Volz9a812192008-08-14 14:08:44 +02003316
Hans Schillstromfc723252011-01-03 14:44:43 +01003317 net = skb_sknet(skb);
Hans Schillstroma0840e22011-01-03 14:44:58 +01003318 ipvs = net_ipvs(net);
Julius Volz9a812192008-08-14 14:08:44 +02003319 cmd = info->genlhdr->cmd;
3320
3321 if (cmd == IPVS_CMD_GET_SERVICE)
3322 reply_cmd = IPVS_CMD_NEW_SERVICE;
3323 else if (cmd == IPVS_CMD_GET_INFO)
3324 reply_cmd = IPVS_CMD_SET_INFO;
3325 else if (cmd == IPVS_CMD_GET_CONFIG)
3326 reply_cmd = IPVS_CMD_SET_CONFIG;
3327 else {
Hannes Eder1e3e2382009-08-02 11:05:41 +00003328 pr_err("unknown Generic Netlink command\n");
Julius Volz9a812192008-08-14 14:08:44 +02003329 return -EINVAL;
3330 }
3331
3332 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
3333 if (!msg)
3334 return -ENOMEM;
3335
3336 mutex_lock(&__ip_vs_mutex);
3337
3338 reply = genlmsg_put_reply(msg, info, &ip_vs_genl_family, 0, reply_cmd);
3339 if (reply == NULL)
3340 goto nla_put_failure;
3341
3342 switch (cmd) {
3343 case IPVS_CMD_GET_SERVICE:
3344 {
3345 struct ip_vs_service *svc;
3346
Hans Schillstromfc723252011-01-03 14:44:43 +01003347 svc = ip_vs_genl_find_service(net,
3348 info->attrs[IPVS_CMD_ATTR_SERVICE]);
Julius Volz9a812192008-08-14 14:08:44 +02003349 if (IS_ERR(svc)) {
3350 ret = PTR_ERR(svc);
3351 goto out_err;
3352 } else if (svc) {
3353 ret = ip_vs_genl_fill_service(msg, svc);
Julius Volz9a812192008-08-14 14:08:44 +02003354 if (ret)
3355 goto nla_put_failure;
3356 } else {
3357 ret = -ESRCH;
3358 goto out_err;
3359 }
3360
3361 break;
3362 }
3363
3364 case IPVS_CMD_GET_CONFIG:
3365 {
3366 struct ip_vs_timeout_user t;
3367
Hans Schillstrom93304192011-01-03 14:44:51 +01003368 __ip_vs_get_timeouts(net, &t);
Julius Volz9a812192008-08-14 14:08:44 +02003369#ifdef CONFIG_IP_VS_PROTO_TCP
3370 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP, t.tcp_timeout);
3371 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP_FIN,
3372 t.tcp_fin_timeout);
3373#endif
3374#ifdef CONFIG_IP_VS_PROTO_UDP
3375 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_UDP, t.udp_timeout);
3376#endif
3377
3378 break;
3379 }
3380
3381 case IPVS_CMD_GET_INFO:
3382 NLA_PUT_U32(msg, IPVS_INFO_ATTR_VERSION, IP_VS_VERSION_CODE);
3383 NLA_PUT_U32(msg, IPVS_INFO_ATTR_CONN_TAB_SIZE,
Catalin(ux) M. BOIE6f7edb42010-01-05 05:50:24 +01003384 ip_vs_conn_tab_size);
Julius Volz9a812192008-08-14 14:08:44 +02003385 break;
3386 }
3387
3388 genlmsg_end(msg, reply);
Johannes Berg134e6372009-07-10 09:51:34 +00003389 ret = genlmsg_reply(msg, info);
Julius Volz9a812192008-08-14 14:08:44 +02003390 goto out;
3391
3392nla_put_failure:
Hannes Eder1e3e2382009-08-02 11:05:41 +00003393 pr_err("not enough space in Netlink message\n");
Julius Volz9a812192008-08-14 14:08:44 +02003394 ret = -EMSGSIZE;
3395
3396out_err:
3397 nlmsg_free(msg);
3398out:
3399 mutex_unlock(&__ip_vs_mutex);
3400
3401 return ret;
3402}
3403
3404
3405static struct genl_ops ip_vs_genl_ops[] __read_mostly = {
3406 {
3407 .cmd = IPVS_CMD_NEW_SERVICE,
3408 .flags = GENL_ADMIN_PERM,
3409 .policy = ip_vs_cmd_policy,
3410 .doit = ip_vs_genl_set_cmd,
3411 },
3412 {
3413 .cmd = IPVS_CMD_SET_SERVICE,
3414 .flags = GENL_ADMIN_PERM,
3415 .policy = ip_vs_cmd_policy,
3416 .doit = ip_vs_genl_set_cmd,
3417 },
3418 {
3419 .cmd = IPVS_CMD_DEL_SERVICE,
3420 .flags = GENL_ADMIN_PERM,
3421 .policy = ip_vs_cmd_policy,
3422 .doit = ip_vs_genl_set_cmd,
3423 },
3424 {
3425 .cmd = IPVS_CMD_GET_SERVICE,
3426 .flags = GENL_ADMIN_PERM,
3427 .doit = ip_vs_genl_get_cmd,
3428 .dumpit = ip_vs_genl_dump_services,
3429 .policy = ip_vs_cmd_policy,
3430 },
3431 {
3432 .cmd = IPVS_CMD_NEW_DEST,
3433 .flags = GENL_ADMIN_PERM,
3434 .policy = ip_vs_cmd_policy,
3435 .doit = ip_vs_genl_set_cmd,
3436 },
3437 {
3438 .cmd = IPVS_CMD_SET_DEST,
3439 .flags = GENL_ADMIN_PERM,
3440 .policy = ip_vs_cmd_policy,
3441 .doit = ip_vs_genl_set_cmd,
3442 },
3443 {
3444 .cmd = IPVS_CMD_DEL_DEST,
3445 .flags = GENL_ADMIN_PERM,
3446 .policy = ip_vs_cmd_policy,
3447 .doit = ip_vs_genl_set_cmd,
3448 },
3449 {
3450 .cmd = IPVS_CMD_GET_DEST,
3451 .flags = GENL_ADMIN_PERM,
3452 .policy = ip_vs_cmd_policy,
3453 .dumpit = ip_vs_genl_dump_dests,
3454 },
3455 {
3456 .cmd = IPVS_CMD_NEW_DAEMON,
3457 .flags = GENL_ADMIN_PERM,
3458 .policy = ip_vs_cmd_policy,
3459 .doit = ip_vs_genl_set_cmd,
3460 },
3461 {
3462 .cmd = IPVS_CMD_DEL_DAEMON,
3463 .flags = GENL_ADMIN_PERM,
3464 .policy = ip_vs_cmd_policy,
3465 .doit = ip_vs_genl_set_cmd,
3466 },
3467 {
3468 .cmd = IPVS_CMD_GET_DAEMON,
3469 .flags = GENL_ADMIN_PERM,
3470 .dumpit = ip_vs_genl_dump_daemons,
3471 },
3472 {
3473 .cmd = IPVS_CMD_SET_CONFIG,
3474 .flags = GENL_ADMIN_PERM,
3475 .policy = ip_vs_cmd_policy,
3476 .doit = ip_vs_genl_set_cmd,
3477 },
3478 {
3479 .cmd = IPVS_CMD_GET_CONFIG,
3480 .flags = GENL_ADMIN_PERM,
3481 .doit = ip_vs_genl_get_cmd,
3482 },
3483 {
3484 .cmd = IPVS_CMD_GET_INFO,
3485 .flags = GENL_ADMIN_PERM,
3486 .doit = ip_vs_genl_get_cmd,
3487 },
3488 {
3489 .cmd = IPVS_CMD_ZERO,
3490 .flags = GENL_ADMIN_PERM,
3491 .policy = ip_vs_cmd_policy,
3492 .doit = ip_vs_genl_set_cmd,
3493 },
3494 {
3495 .cmd = IPVS_CMD_FLUSH,
3496 .flags = GENL_ADMIN_PERM,
3497 .doit = ip_vs_genl_set_cmd,
3498 },
3499};
3500
3501static int __init ip_vs_genl_register(void)
3502{
Michał Mirosław8f698d52009-05-21 10:34:05 +00003503 return genl_register_family_with_ops(&ip_vs_genl_family,
3504 ip_vs_genl_ops, ARRAY_SIZE(ip_vs_genl_ops));
Julius Volz9a812192008-08-14 14:08:44 +02003505}
3506
3507static void ip_vs_genl_unregister(void)
3508{
3509 genl_unregister_family(&ip_vs_genl_family);
3510}
3511
3512/* End of Generic Netlink interface definitions */
3513
Hans Schillstrom61b1ab42011-01-03 14:44:42 +01003514/*
3515 * per netns intit/exit func.
3516 */
Simon Horman14e40542011-02-04 18:33:02 +09003517#ifdef CONFIG_SYSCTL
3518int __net_init __ip_vs_control_init_sysctl(struct net *net)
Hans Schillstrom61b1ab42011-01-03 14:44:42 +01003519{
Hans Schillstromfc723252011-01-03 14:44:43 +01003520 int idx;
3521 struct netns_ipvs *ipvs = net_ipvs(net);
Hans Schillstroma0840e22011-01-03 14:44:58 +01003522 struct ctl_table *tbl;
Hans Schillstromfc723252011-01-03 14:44:43 +01003523
Hans Schillstroma0840e22011-01-03 14:44:58 +01003524 atomic_set(&ipvs->dropentry, 0);
3525 spin_lock_init(&ipvs->dropentry_lock);
3526 spin_lock_init(&ipvs->droppacket_lock);
3527 spin_lock_init(&ipvs->securetcp_lock);
Hans Schillstroma0840e22011-01-03 14:44:58 +01003528
3529 if (!net_eq(net, &init_net)) {
3530 tbl = kmemdup(vs_vars, sizeof(vs_vars), GFP_KERNEL);
3531 if (tbl == NULL)
Simon Horman14e40542011-02-04 18:33:02 +09003532 return -ENOMEM;
Hans Schillstroma0840e22011-01-03 14:44:58 +01003533 } else
3534 tbl = vs_vars;
3535 /* Initialize sysctl defaults */
3536 idx = 0;
3537 ipvs->sysctl_amemthresh = 1024;
3538 tbl[idx++].data = &ipvs->sysctl_amemthresh;
3539 ipvs->sysctl_am_droprate = 10;
3540 tbl[idx++].data = &ipvs->sysctl_am_droprate;
3541 tbl[idx++].data = &ipvs->sysctl_drop_entry;
3542 tbl[idx++].data = &ipvs->sysctl_drop_packet;
3543#ifdef CONFIG_IP_VS_NFCT
3544 tbl[idx++].data = &ipvs->sysctl_conntrack;
3545#endif
3546 tbl[idx++].data = &ipvs->sysctl_secure_tcp;
3547 ipvs->sysctl_snat_reroute = 1;
3548 tbl[idx++].data = &ipvs->sysctl_snat_reroute;
3549 ipvs->sysctl_sync_ver = 1;
3550 tbl[idx++].data = &ipvs->sysctl_sync_ver;
3551 tbl[idx++].data = &ipvs->sysctl_cache_bypass;
3552 tbl[idx++].data = &ipvs->sysctl_expire_nodest_conn;
3553 tbl[idx++].data = &ipvs->sysctl_expire_quiescent_template;
Simon Horman59e03502011-02-04 18:33:01 +09003554 ipvs->sysctl_sync_threshold[0] = DEFAULT_SYNC_THRESHOLD;
3555 ipvs->sysctl_sync_threshold[1] = DEFAULT_SYNC_PERIOD;
Hans Schillstroma0840e22011-01-03 14:44:58 +01003556 tbl[idx].data = &ipvs->sysctl_sync_threshold;
3557 tbl[idx++].maxlen = sizeof(ipvs->sysctl_sync_threshold);
3558 tbl[idx++].data = &ipvs->sysctl_nat_icmp_send;
3559
3560
3561 ipvs->sysctl_hdr = register_net_sysctl_table(net, net_vs_ctl_path,
Hans Schillstrom07924702011-01-24 15:14:41 +01003562 tbl);
Simon Horman04439292011-02-01 18:29:04 +01003563 if (ipvs->sysctl_hdr == NULL) {
3564 if (!net_eq(net, &init_net))
3565 kfree(tbl);
Simon Horman14e40542011-02-04 18:33:02 +09003566 return -ENOMEM;
Simon Horman04439292011-02-01 18:29:04 +01003567 }
Julian Anastasov6ef757f2011-03-14 01:44:28 +02003568 ip_vs_start_estimator(net, &ipvs->tot_stats);
Hans Schillstroma0840e22011-01-03 14:44:58 +01003569 ipvs->sysctl_tbl = tbl;
Hans Schillstromf6340ee2011-01-03 14:44:59 +01003570 /* Schedule defense work */
3571 INIT_DELAYED_WORK(&ipvs->defense_work, defense_work_handler);
3572 schedule_delayed_work(&ipvs->defense_work, DEFENSE_TIMER_PERIOD);
Simon Horman14e40542011-02-04 18:33:02 +09003573
3574 return 0;
3575}
3576
3577void __net_init __ip_vs_control_cleanup_sysctl(struct net *net)
3578{
3579 struct netns_ipvs *ipvs = net_ipvs(net);
3580
3581 cancel_delayed_work_sync(&ipvs->defense_work);
3582 cancel_work_sync(&ipvs->defense_work.work);
3583 unregister_net_sysctl_table(ipvs->sysctl_hdr);
3584}
3585
3586#else
3587
3588int __net_init __ip_vs_control_init_sysctl(struct net *net) { return 0; }
3589void __net_init __ip_vs_control_cleanup_sysctl(struct net *net) { }
3590
3591#endif
3592
3593int __net_init __ip_vs_control_init(struct net *net)
3594{
3595 int idx;
3596 struct netns_ipvs *ipvs = net_ipvs(net);
3597
3598 ipvs->rs_lock = __RW_LOCK_UNLOCKED(ipvs->rs_lock);
3599
3600 /* Initialize rs_table */
3601 for (idx = 0; idx < IP_VS_RTAB_SIZE; idx++)
3602 INIT_LIST_HEAD(&ipvs->rs_table[idx]);
3603
3604 INIT_LIST_HEAD(&ipvs->dest_trash);
3605 atomic_set(&ipvs->ftpsvc_counter, 0);
3606 atomic_set(&ipvs->nullsvc_counter, 0);
3607
3608 /* procfs stats */
3609 ipvs->tot_stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats);
3610 if (ipvs->tot_stats.cpustats) {
3611 pr_err("%s(): alloc_percpu.\n", __func__);
3612 return -ENOMEM;
3613 }
3614 spin_lock_init(&ipvs->tot_stats.lock);
3615
3616 proc_net_fops_create(net, "ip_vs", 0, &ip_vs_info_fops);
3617 proc_net_fops_create(net, "ip_vs_stats", 0, &ip_vs_stats_fops);
3618 proc_net_fops_create(net, "ip_vs_stats_percpu", 0,
3619 &ip_vs_stats_percpu_fops);
3620
3621 if (__ip_vs_control_init_sysctl(net))
3622 goto err;
3623
Hans Schillstrom61b1ab42011-01-03 14:44:42 +01003624 return 0;
3625
Simon Horman14e40542011-02-04 18:33:02 +09003626err:
Julian Anastasov2a0751a2011-03-04 12:20:35 +02003627 free_percpu(ipvs->tot_stats.cpustats);
Hans Schillstrom61b1ab42011-01-03 14:44:42 +01003628 return -ENOMEM;
3629}
3630
3631static void __net_exit __ip_vs_control_cleanup(struct net *net)
3632{
Hans Schillstromb17fc992011-01-03 14:44:56 +01003633 struct netns_ipvs *ipvs = net_ipvs(net);
3634
Hans Schillstromf2431e62011-01-03 14:45:00 +01003635 ip_vs_trash_cleanup(net);
Julian Anastasov6ef757f2011-03-14 01:44:28 +02003636 ip_vs_stop_estimator(net, &ipvs->tot_stats);
Simon Horman14e40542011-02-04 18:33:02 +09003637 __ip_vs_control_cleanup_sysctl(net);
Hans Schillstromb17fc992011-01-03 14:44:56 +01003638 proc_net_remove(net, "ip_vs_stats_percpu");
Hans Schillstrom61b1ab42011-01-03 14:44:42 +01003639 proc_net_remove(net, "ip_vs_stats");
3640 proc_net_remove(net, "ip_vs");
Julian Anastasov2a0751a2011-03-04 12:20:35 +02003641 free_percpu(ipvs->tot_stats.cpustats);
Hans Schillstrom61b1ab42011-01-03 14:44:42 +01003642}
3643
3644static struct pernet_operations ipvs_control_ops = {
3645 .init = __ip_vs_control_init,
3646 .exit = __ip_vs_control_cleanup,
3647};
Linus Torvalds1da177e2005-04-16 15:20:36 -07003648
Sven Wegener048cf482008-08-10 18:24:35 +00003649int __init ip_vs_control_init(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003650{
Linus Torvalds1da177e2005-04-16 15:20:36 -07003651 int idx;
Hans Schillstromfc723252011-01-03 14:44:43 +01003652 int ret;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003653
3654 EnterFunction(2);
3655
Hans Schillstromfc723252011-01-03 14:44:43 +01003656 /* Initialize svc_table, ip_vs_svc_fwm_table, rs_table */
Eduardo Blancod86bef72010-10-19 10:26:47 +01003657 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
3658 INIT_LIST_HEAD(&ip_vs_svc_table[idx]);
3659 INIT_LIST_HEAD(&ip_vs_svc_fwm_table[idx]);
3660 }
Hans Schillstromfc723252011-01-03 14:44:43 +01003661
3662 ret = register_pernet_subsys(&ipvs_control_ops);
3663 if (ret) {
3664 pr_err("cannot register namespace.\n");
3665 goto err;
Eduardo Blancod86bef72010-10-19 10:26:47 +01003666 }
Hans Schillstromfc723252011-01-03 14:44:43 +01003667
3668 smp_wmb(); /* Do we really need it now ? */
Eduardo Blancod86bef72010-10-19 10:26:47 +01003669
Linus Torvalds1da177e2005-04-16 15:20:36 -07003670 ret = nf_register_sockopt(&ip_vs_sockopts);
3671 if (ret) {
Hannes Eder1e3e2382009-08-02 11:05:41 +00003672 pr_err("cannot register sockopt.\n");
Hans Schillstromfc723252011-01-03 14:44:43 +01003673 goto err_net;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003674 }
3675
Julius Volz9a812192008-08-14 14:08:44 +02003676 ret = ip_vs_genl_register();
3677 if (ret) {
Hannes Eder1e3e2382009-08-02 11:05:41 +00003678 pr_err("cannot register Generic Netlink interface.\n");
Julius Volz9a812192008-08-14 14:08:44 +02003679 nf_unregister_sockopt(&ip_vs_sockopts);
Hans Schillstromfc723252011-01-03 14:44:43 +01003680 goto err_net;
Julius Volz9a812192008-08-14 14:08:44 +02003681 }
3682
Linus Torvalds1da177e2005-04-16 15:20:36 -07003683 LeaveFunction(2);
3684 return 0;
Hans Schillstromfc723252011-01-03 14:44:43 +01003685
3686err_net:
3687 unregister_pernet_subsys(&ipvs_control_ops);
3688err:
3689 return ret;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003690}
3691
3692
3693void ip_vs_control_cleanup(void)
3694{
3695 EnterFunction(2);
Hans Schillstrom61b1ab42011-01-03 14:44:42 +01003696 unregister_pernet_subsys(&ipvs_control_ops);
Julius Volz9a812192008-08-14 14:08:44 +02003697 ip_vs_genl_unregister();
Linus Torvalds1da177e2005-04-16 15:20:36 -07003698 nf_unregister_sockopt(&ip_vs_sockopts);
3699 LeaveFunction(2);
3700}