blob: 183ac18bded5433c4613c71c3f057c8eda5dd013 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * IPVS An implementation of the IP virtual server support for the
3 * LINUX operating system. IPVS is now implemented as a module
4 * over the NetFilter framework. IPVS can be used to build a
5 * high-performance and highly available server based on a
6 * cluster of servers.
7 *
Linus Torvalds1da177e2005-04-16 15:20:36 -07008 * Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
9 * Peter Kese <peter.kese@ijs.si>
10 * Julian Anastasov <ja@ssi.bg>
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version
15 * 2 of the License, or (at your option) any later version.
16 *
17 * Changes:
18 *
19 */
20
Hannes Eder9aada7a2009-07-30 14:29:44 -070021#define KMSG_COMPONENT "IPVS"
22#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
23
Linus Torvalds1da177e2005-04-16 15:20:36 -070024#include <linux/module.h>
25#include <linux/init.h>
26#include <linux/types.h>
Randy Dunlap4fc268d2006-01-11 12:17:47 -080027#include <linux/capability.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070028#include <linux/fs.h>
29#include <linux/sysctl.h>
30#include <linux/proc_fs.h>
31#include <linux/workqueue.h>
32#include <linux/swap.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070033#include <linux/seq_file.h>
Tejun Heo5a0e3ad2010-03-24 17:04:11 +090034#include <linux/slab.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070035
36#include <linux/netfilter.h>
37#include <linux/netfilter_ipv4.h>
Ingo Molnar14cc3e22006-03-26 01:37:14 -080038#include <linux/mutex.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070039
Eric W. Biederman457c4cb2007-09-12 12:01:34 +020040#include <net/net_namespace.h>
Hans Schillstrom93304192011-01-03 14:44:51 +010041#include <linux/nsproxy.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070042#include <net/ip.h>
Vince Busam09571c72008-09-02 15:55:52 +020043#ifdef CONFIG_IP_VS_IPV6
44#include <net/ipv6.h>
45#include <net/ip6_route.h>
46#endif
Arnaldo Carvalho de Melo14c85022005-12-27 02:43:12 -020047#include <net/route.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070048#include <net/sock.h>
Julius Volz9a812192008-08-14 14:08:44 +020049#include <net/genetlink.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070050
51#include <asm/uaccess.h>
52
53#include <net/ip_vs.h>
54
55/* semaphore for IPVS sockopts. And, [gs]etsockopt may sleep. */
Ingo Molnar14cc3e22006-03-26 01:37:14 -080056static DEFINE_MUTEX(__ip_vs_mutex);
Linus Torvalds1da177e2005-04-16 15:20:36 -070057
58/* lock for service table */
59static DEFINE_RWLOCK(__ip_vs_svc_lock);
60
Linus Torvalds1da177e2005-04-16 15:20:36 -070061/* sysctl variables */
Linus Torvalds1da177e2005-04-16 15:20:36 -070062
63#ifdef CONFIG_IP_VS_DEBUG
64static int sysctl_ip_vs_debug_level = 0;
65
66int ip_vs_get_debug_level(void)
67{
68 return sysctl_ip_vs_debug_level;
69}
70#endif
71
Vince Busam09571c72008-09-02 15:55:52 +020072#ifdef CONFIG_IP_VS_IPV6
73/* Taken from rt6_fill_node() in net/ipv6/route.c, is there a better way? */
74static int __ip_vs_addr_is_local_v6(const struct in6_addr *addr)
75{
76 struct rt6_info *rt;
77 struct flowi fl = {
78 .oif = 0,
Changli Gao58116622010-11-12 18:43:55 +000079 .fl6_dst = *addr,
80 .fl6_src = { .s6_addr32 = {0, 0, 0, 0} },
Vince Busam09571c72008-09-02 15:55:52 +020081 };
82
83 rt = (struct rt6_info *)ip6_route_output(&init_net, NULL, &fl);
84 if (rt && rt->rt6i_dev && (rt->rt6i_dev->flags & IFF_LOOPBACK))
85 return 1;
86
87 return 0;
88}
89#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -070090/*
Julian Anastasovaf9debd2005-07-11 20:59:57 -070091 * update_defense_level is called from keventd and from sysctl,
92 * so it needs to protect itself from softirqs
Linus Torvalds1da177e2005-04-16 15:20:36 -070093 */
Hans Schillstrom93304192011-01-03 14:44:51 +010094static void update_defense_level(struct netns_ipvs *ipvs)
Linus Torvalds1da177e2005-04-16 15:20:36 -070095{
96 struct sysinfo i;
97 static int old_secure_tcp = 0;
98 int availmem;
99 int nomem;
100 int to_change = -1;
101
102 /* we only count free and buffered memory (in pages) */
103 si_meminfo(&i);
104 availmem = i.freeram + i.bufferram;
105 /* however in linux 2.5 the i.bufferram is total page cache size,
106 we need adjust it */
107 /* si_swapinfo(&i); */
108 /* availmem = availmem - (i.totalswap - i.freeswap); */
109
Hans Schillstroma0840e22011-01-03 14:44:58 +0100110 nomem = (availmem < ipvs->sysctl_amemthresh);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700111
Julian Anastasovaf9debd2005-07-11 20:59:57 -0700112 local_bh_disable();
113
Linus Torvalds1da177e2005-04-16 15:20:36 -0700114 /* drop_entry */
Hans Schillstroma0840e22011-01-03 14:44:58 +0100115 spin_lock(&ipvs->dropentry_lock);
116 switch (ipvs->sysctl_drop_entry) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700117 case 0:
Hans Schillstroma0840e22011-01-03 14:44:58 +0100118 atomic_set(&ipvs->dropentry, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700119 break;
120 case 1:
121 if (nomem) {
Hans Schillstroma0840e22011-01-03 14:44:58 +0100122 atomic_set(&ipvs->dropentry, 1);
123 ipvs->sysctl_drop_entry = 2;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700124 } else {
Hans Schillstroma0840e22011-01-03 14:44:58 +0100125 atomic_set(&ipvs->dropentry, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700126 }
127 break;
128 case 2:
129 if (nomem) {
Hans Schillstroma0840e22011-01-03 14:44:58 +0100130 atomic_set(&ipvs->dropentry, 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700131 } else {
Hans Schillstroma0840e22011-01-03 14:44:58 +0100132 atomic_set(&ipvs->dropentry, 0);
133 ipvs->sysctl_drop_entry = 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700134 };
135 break;
136 case 3:
Hans Schillstroma0840e22011-01-03 14:44:58 +0100137 atomic_set(&ipvs->dropentry, 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700138 break;
139 }
Hans Schillstroma0840e22011-01-03 14:44:58 +0100140 spin_unlock(&ipvs->dropentry_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700141
142 /* drop_packet */
Hans Schillstroma0840e22011-01-03 14:44:58 +0100143 spin_lock(&ipvs->droppacket_lock);
144 switch (ipvs->sysctl_drop_packet) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700145 case 0:
Hans Schillstroma0840e22011-01-03 14:44:58 +0100146 ipvs->drop_rate = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700147 break;
148 case 1:
149 if (nomem) {
Hans Schillstroma0840e22011-01-03 14:44:58 +0100150 ipvs->drop_rate = ipvs->drop_counter
151 = ipvs->sysctl_amemthresh /
152 (ipvs->sysctl_amemthresh-availmem);
153 ipvs->sysctl_drop_packet = 2;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700154 } else {
Hans Schillstroma0840e22011-01-03 14:44:58 +0100155 ipvs->drop_rate = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700156 }
157 break;
158 case 2:
159 if (nomem) {
Hans Schillstroma0840e22011-01-03 14:44:58 +0100160 ipvs->drop_rate = ipvs->drop_counter
161 = ipvs->sysctl_amemthresh /
162 (ipvs->sysctl_amemthresh-availmem);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700163 } else {
Hans Schillstroma0840e22011-01-03 14:44:58 +0100164 ipvs->drop_rate = 0;
165 ipvs->sysctl_drop_packet = 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700166 }
167 break;
168 case 3:
Hans Schillstroma0840e22011-01-03 14:44:58 +0100169 ipvs->drop_rate = ipvs->sysctl_am_droprate;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700170 break;
171 }
Hans Schillstroma0840e22011-01-03 14:44:58 +0100172 spin_unlock(&ipvs->droppacket_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700173
174 /* secure_tcp */
Hans Schillstroma0840e22011-01-03 14:44:58 +0100175 spin_lock(&ipvs->securetcp_lock);
176 switch (ipvs->sysctl_secure_tcp) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700177 case 0:
178 if (old_secure_tcp >= 2)
179 to_change = 0;
180 break;
181 case 1:
182 if (nomem) {
183 if (old_secure_tcp < 2)
184 to_change = 1;
Hans Schillstroma0840e22011-01-03 14:44:58 +0100185 ipvs->sysctl_secure_tcp = 2;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700186 } else {
187 if (old_secure_tcp >= 2)
188 to_change = 0;
189 }
190 break;
191 case 2:
192 if (nomem) {
193 if (old_secure_tcp < 2)
194 to_change = 1;
195 } else {
196 if (old_secure_tcp >= 2)
197 to_change = 0;
Hans Schillstroma0840e22011-01-03 14:44:58 +0100198 ipvs->sysctl_secure_tcp = 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700199 }
200 break;
201 case 3:
202 if (old_secure_tcp < 2)
203 to_change = 1;
204 break;
205 }
Hans Schillstroma0840e22011-01-03 14:44:58 +0100206 old_secure_tcp = ipvs->sysctl_secure_tcp;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700207 if (to_change >= 0)
Hans Schillstrom93304192011-01-03 14:44:51 +0100208 ip_vs_protocol_timeout_change(ipvs,
Hans Schillstroma0840e22011-01-03 14:44:58 +0100209 ipvs->sysctl_secure_tcp > 1);
210 spin_unlock(&ipvs->securetcp_lock);
Julian Anastasovaf9debd2005-07-11 20:59:57 -0700211
212 local_bh_enable();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700213}
214
215
216/*
217 * Timer for checking the defense
218 */
219#define DEFENSE_TIMER_PERIOD 1*HZ
David Howellsc4028952006-11-22 14:57:56 +0000220static void defense_work_handler(struct work_struct *work);
221static DECLARE_DELAYED_WORK(defense_work, defense_work_handler);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700222
David Howellsc4028952006-11-22 14:57:56 +0000223static void defense_work_handler(struct work_struct *work)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700224{
Hans Schillstromb17fc992011-01-03 14:44:56 +0100225 struct netns_ipvs *ipvs = net_ipvs(&init_net);
Hans Schillstrom93304192011-01-03 14:44:51 +0100226
227 update_defense_level(ipvs);
Hans Schillstroma0840e22011-01-03 14:44:58 +0100228 if (atomic_read(&ipvs->dropentry))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700229 ip_vs_random_dropentry();
230
231 schedule_delayed_work(&defense_work, DEFENSE_TIMER_PERIOD);
232}
233
234int
235ip_vs_use_count_inc(void)
236{
237 return try_module_get(THIS_MODULE);
238}
239
240void
241ip_vs_use_count_dec(void)
242{
243 module_put(THIS_MODULE);
244}
245
246
247/*
248 * Hash table: for virtual service lookups
249 */
250#define IP_VS_SVC_TAB_BITS 8
251#define IP_VS_SVC_TAB_SIZE (1 << IP_VS_SVC_TAB_BITS)
252#define IP_VS_SVC_TAB_MASK (IP_VS_SVC_TAB_SIZE - 1)
253
254/* the service table hashed by <protocol, addr, port> */
255static struct list_head ip_vs_svc_table[IP_VS_SVC_TAB_SIZE];
256/* the service table hashed by fwmark */
257static struct list_head ip_vs_svc_fwm_table[IP_VS_SVC_TAB_SIZE];
258
259/*
Linus Torvalds1da177e2005-04-16 15:20:36 -0700260 * Trash for destinations
261 */
262static LIST_HEAD(ip_vs_dest_trash);
263
264/*
265 * FTP & NULL virtual service counters
266 */
267static atomic_t ip_vs_ftpsvc_counter = ATOMIC_INIT(0);
268static atomic_t ip_vs_nullsvc_counter = ATOMIC_INIT(0);
269
270
271/*
272 * Returns hash value for virtual service
273 */
Hans Schillstromfc723252011-01-03 14:44:43 +0100274static inline unsigned
275ip_vs_svc_hashkey(struct net *net, int af, unsigned proto,
276 const union nf_inet_addr *addr, __be16 port)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700277{
278 register unsigned porth = ntohs(port);
Julius Volzb18610d2008-09-02 15:55:37 +0200279 __be32 addr_fold = addr->ip;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700280
Julius Volzb18610d2008-09-02 15:55:37 +0200281#ifdef CONFIG_IP_VS_IPV6
282 if (af == AF_INET6)
283 addr_fold = addr->ip6[0]^addr->ip6[1]^
284 addr->ip6[2]^addr->ip6[3];
285#endif
Hans Schillstromfc723252011-01-03 14:44:43 +0100286 addr_fold ^= ((size_t)net>>8);
Julius Volzb18610d2008-09-02 15:55:37 +0200287
288 return (proto^ntohl(addr_fold)^(porth>>IP_VS_SVC_TAB_BITS)^porth)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700289 & IP_VS_SVC_TAB_MASK;
290}
291
292/*
293 * Returns hash value of fwmark for virtual service lookup
294 */
Hans Schillstromfc723252011-01-03 14:44:43 +0100295static inline unsigned ip_vs_svc_fwm_hashkey(struct net *net, __u32 fwmark)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700296{
Hans Schillstromfc723252011-01-03 14:44:43 +0100297 return (((size_t)net>>8) ^ fwmark) & IP_VS_SVC_TAB_MASK;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700298}
299
300/*
Hans Schillstromfc723252011-01-03 14:44:43 +0100301 * Hashes a service in the ip_vs_svc_table by <netns,proto,addr,port>
Linus Torvalds1da177e2005-04-16 15:20:36 -0700302 * or in the ip_vs_svc_fwm_table by fwmark.
303 * Should be called with locked tables.
304 */
305static int ip_vs_svc_hash(struct ip_vs_service *svc)
306{
307 unsigned hash;
308
309 if (svc->flags & IP_VS_SVC_F_HASHED) {
Hannes Eder1e3e2382009-08-02 11:05:41 +0000310 pr_err("%s(): request for already hashed, called from %pF\n",
311 __func__, __builtin_return_address(0));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700312 return 0;
313 }
314
315 if (svc->fwmark == 0) {
316 /*
Hans Schillstromfc723252011-01-03 14:44:43 +0100317 * Hash it by <netns,protocol,addr,port> in ip_vs_svc_table
Linus Torvalds1da177e2005-04-16 15:20:36 -0700318 */
Hans Schillstromfc723252011-01-03 14:44:43 +0100319 hash = ip_vs_svc_hashkey(svc->net, svc->af, svc->protocol,
320 &svc->addr, svc->port);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700321 list_add(&svc->s_list, &ip_vs_svc_table[hash]);
322 } else {
323 /*
Hans Schillstromfc723252011-01-03 14:44:43 +0100324 * Hash it by fwmark in svc_fwm_table
Linus Torvalds1da177e2005-04-16 15:20:36 -0700325 */
Hans Schillstromfc723252011-01-03 14:44:43 +0100326 hash = ip_vs_svc_fwm_hashkey(svc->net, svc->fwmark);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700327 list_add(&svc->f_list, &ip_vs_svc_fwm_table[hash]);
328 }
329
330 svc->flags |= IP_VS_SVC_F_HASHED;
331 /* increase its refcnt because it is referenced by the svc table */
332 atomic_inc(&svc->refcnt);
333 return 1;
334}
335
336
337/*
Hans Schillstromfc723252011-01-03 14:44:43 +0100338 * Unhashes a service from svc_table / svc_fwm_table.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700339 * Should be called with locked tables.
340 */
341static int ip_vs_svc_unhash(struct ip_vs_service *svc)
342{
343 if (!(svc->flags & IP_VS_SVC_F_HASHED)) {
Hannes Eder1e3e2382009-08-02 11:05:41 +0000344 pr_err("%s(): request for unhash flagged, called from %pF\n",
345 __func__, __builtin_return_address(0));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700346 return 0;
347 }
348
349 if (svc->fwmark == 0) {
Hans Schillstromfc723252011-01-03 14:44:43 +0100350 /* Remove it from the svc_table table */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700351 list_del(&svc->s_list);
352 } else {
Hans Schillstromfc723252011-01-03 14:44:43 +0100353 /* Remove it from the svc_fwm_table table */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700354 list_del(&svc->f_list);
355 }
356
357 svc->flags &= ~IP_VS_SVC_F_HASHED;
358 atomic_dec(&svc->refcnt);
359 return 1;
360}
361
362
363/*
Hans Schillstromfc723252011-01-03 14:44:43 +0100364 * Get service by {netns, proto,addr,port} in the service table.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700365 */
Julius Volzb18610d2008-09-02 15:55:37 +0200366static inline struct ip_vs_service *
Hans Schillstromfc723252011-01-03 14:44:43 +0100367__ip_vs_service_find(struct net *net, int af, __u16 protocol,
368 const union nf_inet_addr *vaddr, __be16 vport)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700369{
370 unsigned hash;
371 struct ip_vs_service *svc;
372
373 /* Check for "full" addressed entries */
Hans Schillstromfc723252011-01-03 14:44:43 +0100374 hash = ip_vs_svc_hashkey(net, af, protocol, vaddr, vport);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700375
376 list_for_each_entry(svc, &ip_vs_svc_table[hash], s_list){
Julius Volzb18610d2008-09-02 15:55:37 +0200377 if ((svc->af == af)
378 && ip_vs_addr_equal(af, &svc->addr, vaddr)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700379 && (svc->port == vport)
Hans Schillstromfc723252011-01-03 14:44:43 +0100380 && (svc->protocol == protocol)
381 && net_eq(svc->net, net)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700382 /* HIT */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700383 return svc;
384 }
385 }
386
387 return NULL;
388}
389
390
391/*
392 * Get service by {fwmark} in the service table.
393 */
Julius Volzb18610d2008-09-02 15:55:37 +0200394static inline struct ip_vs_service *
Hans Schillstromfc723252011-01-03 14:44:43 +0100395__ip_vs_svc_fwm_find(struct net *net, int af, __u32 fwmark)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700396{
397 unsigned hash;
398 struct ip_vs_service *svc;
399
400 /* Check for fwmark addressed entries */
Hans Schillstromfc723252011-01-03 14:44:43 +0100401 hash = ip_vs_svc_fwm_hashkey(net, fwmark);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700402
403 list_for_each_entry(svc, &ip_vs_svc_fwm_table[hash], f_list) {
Hans Schillstromfc723252011-01-03 14:44:43 +0100404 if (svc->fwmark == fwmark && svc->af == af
405 && net_eq(svc->net, net)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700406 /* HIT */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700407 return svc;
408 }
409 }
410
411 return NULL;
412}
413
414struct ip_vs_service *
Hans Schillstromfc723252011-01-03 14:44:43 +0100415ip_vs_service_get(struct net *net, int af, __u32 fwmark, __u16 protocol,
Julius Volz3c2e0502008-09-02 15:55:38 +0200416 const union nf_inet_addr *vaddr, __be16 vport)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700417{
418 struct ip_vs_service *svc;
Julius Volz3c2e0502008-09-02 15:55:38 +0200419
Linus Torvalds1da177e2005-04-16 15:20:36 -0700420 read_lock(&__ip_vs_svc_lock);
421
422 /*
423 * Check the table hashed by fwmark first
424 */
Hans Schillstromfc723252011-01-03 14:44:43 +0100425 svc = __ip_vs_svc_fwm_find(net, af, fwmark);
426 if (fwmark && svc)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700427 goto out;
428
429 /*
430 * Check the table hashed by <protocol,addr,port>
431 * for "full" addressed entries
432 */
Hans Schillstromfc723252011-01-03 14:44:43 +0100433 svc = __ip_vs_service_find(net, af, protocol, vaddr, vport);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700434
435 if (svc == NULL
436 && protocol == IPPROTO_TCP
437 && atomic_read(&ip_vs_ftpsvc_counter)
438 && (vport == FTPDATA || ntohs(vport) >= PROT_SOCK)) {
439 /*
440 * Check if ftp service entry exists, the packet
441 * might belong to FTP data connections.
442 */
Hans Schillstromfc723252011-01-03 14:44:43 +0100443 svc = __ip_vs_service_find(net, af, protocol, vaddr, FTPPORT);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700444 }
445
446 if (svc == NULL
447 && atomic_read(&ip_vs_nullsvc_counter)) {
448 /*
449 * Check if the catch-all port (port zero) exists
450 */
Hans Schillstromfc723252011-01-03 14:44:43 +0100451 svc = __ip_vs_service_find(net, af, protocol, vaddr, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700452 }
453
454 out:
Julian Anastasov26c15cf2010-09-21 18:12:30 +0200455 if (svc)
456 atomic_inc(&svc->usecnt);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700457 read_unlock(&__ip_vs_svc_lock);
458
Julius Volz3c2e0502008-09-02 15:55:38 +0200459 IP_VS_DBG_BUF(9, "lookup service: fwm %u %s %s:%u %s\n",
460 fwmark, ip_vs_proto_name(protocol),
461 IP_VS_DBG_ADDR(af, vaddr), ntohs(vport),
462 svc ? "hit" : "not hit");
Linus Torvalds1da177e2005-04-16 15:20:36 -0700463
464 return svc;
465}
466
467
468static inline void
469__ip_vs_bind_svc(struct ip_vs_dest *dest, struct ip_vs_service *svc)
470{
471 atomic_inc(&svc->refcnt);
472 dest->svc = svc;
473}
474
Julian Anastasov26c15cf2010-09-21 18:12:30 +0200475static void
Linus Torvalds1da177e2005-04-16 15:20:36 -0700476__ip_vs_unbind_svc(struct ip_vs_dest *dest)
477{
478 struct ip_vs_service *svc = dest->svc;
479
480 dest->svc = NULL;
Julian Anastasov26c15cf2010-09-21 18:12:30 +0200481 if (atomic_dec_and_test(&svc->refcnt)) {
482 IP_VS_DBG_BUF(3, "Removing service %u/%s:%u usecnt=%d\n",
483 svc->fwmark,
484 IP_VS_DBG_ADDR(svc->af, &svc->addr),
485 ntohs(svc->port), atomic_read(&svc->usecnt));
Hans Schillstromb17fc992011-01-03 14:44:56 +0100486 free_percpu(svc->stats.cpustats);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700487 kfree(svc);
Julian Anastasov26c15cf2010-09-21 18:12:30 +0200488 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700489}
490
491
492/*
493 * Returns hash value for real service
494 */
Julius Volz7937df12008-09-02 15:55:48 +0200495static inline unsigned ip_vs_rs_hashkey(int af,
496 const union nf_inet_addr *addr,
497 __be16 port)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700498{
499 register unsigned porth = ntohs(port);
Julius Volz7937df12008-09-02 15:55:48 +0200500 __be32 addr_fold = addr->ip;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700501
Julius Volz7937df12008-09-02 15:55:48 +0200502#ifdef CONFIG_IP_VS_IPV6
503 if (af == AF_INET6)
504 addr_fold = addr->ip6[0]^addr->ip6[1]^
505 addr->ip6[2]^addr->ip6[3];
506#endif
507
508 return (ntohl(addr_fold)^(porth>>IP_VS_RTAB_BITS)^porth)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700509 & IP_VS_RTAB_MASK;
510}
511
512/*
Hans Schillstromfc723252011-01-03 14:44:43 +0100513 * Hashes ip_vs_dest in rs_table by <proto,addr,port>.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700514 * should be called with locked tables.
515 */
Hans Schillstromfc723252011-01-03 14:44:43 +0100516static int ip_vs_rs_hash(struct netns_ipvs *ipvs, struct ip_vs_dest *dest)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700517{
518 unsigned hash;
519
520 if (!list_empty(&dest->d_list)) {
521 return 0;
522 }
523
524 /*
525 * Hash by proto,addr,port,
526 * which are the parameters of the real service.
527 */
Julius Volz7937df12008-09-02 15:55:48 +0200528 hash = ip_vs_rs_hashkey(dest->af, &dest->addr, dest->port);
529
Hans Schillstromfc723252011-01-03 14:44:43 +0100530 list_add(&dest->d_list, &ipvs->rs_table[hash]);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700531
532 return 1;
533}
534
535/*
Hans Schillstromfc723252011-01-03 14:44:43 +0100536 * UNhashes ip_vs_dest from rs_table.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700537 * should be called with locked tables.
538 */
539static int ip_vs_rs_unhash(struct ip_vs_dest *dest)
540{
541 /*
Hans Schillstromfc723252011-01-03 14:44:43 +0100542 * Remove it from the rs_table table.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700543 */
544 if (!list_empty(&dest->d_list)) {
545 list_del(&dest->d_list);
546 INIT_LIST_HEAD(&dest->d_list);
547 }
548
549 return 1;
550}
551
552/*
553 * Lookup real service by <proto,addr,port> in the real service table.
554 */
555struct ip_vs_dest *
Hans Schillstromfc723252011-01-03 14:44:43 +0100556ip_vs_lookup_real_service(struct net *net, int af, __u16 protocol,
Julius Volz7937df12008-09-02 15:55:48 +0200557 const union nf_inet_addr *daddr,
558 __be16 dport)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700559{
Hans Schillstromfc723252011-01-03 14:44:43 +0100560 struct netns_ipvs *ipvs = net_ipvs(net);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700561 unsigned hash;
562 struct ip_vs_dest *dest;
563
564 /*
565 * Check for "full" addressed entries
566 * Return the first found entry
567 */
Julius Volz7937df12008-09-02 15:55:48 +0200568 hash = ip_vs_rs_hashkey(af, daddr, dport);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700569
Hans Schillstroma0840e22011-01-03 14:44:58 +0100570 read_lock(&ipvs->rs_lock);
Hans Schillstromfc723252011-01-03 14:44:43 +0100571 list_for_each_entry(dest, &ipvs->rs_table[hash], d_list) {
Julius Volz7937df12008-09-02 15:55:48 +0200572 if ((dest->af == af)
573 && ip_vs_addr_equal(af, &dest->addr, daddr)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700574 && (dest->port == dport)
575 && ((dest->protocol == protocol) ||
576 dest->vfwmark)) {
577 /* HIT */
Hans Schillstroma0840e22011-01-03 14:44:58 +0100578 read_unlock(&ipvs->rs_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700579 return dest;
580 }
581 }
Hans Schillstroma0840e22011-01-03 14:44:58 +0100582 read_unlock(&ipvs->rs_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700583
584 return NULL;
585}
586
587/*
588 * Lookup destination by {addr,port} in the given service
589 */
590static struct ip_vs_dest *
Julius Volz7937df12008-09-02 15:55:48 +0200591ip_vs_lookup_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
592 __be16 dport)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700593{
594 struct ip_vs_dest *dest;
595
596 /*
597 * Find the destination for the given service
598 */
599 list_for_each_entry(dest, &svc->destinations, n_list) {
Julius Volz7937df12008-09-02 15:55:48 +0200600 if ((dest->af == svc->af)
601 && ip_vs_addr_equal(svc->af, &dest->addr, daddr)
602 && (dest->port == dport)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700603 /* HIT */
604 return dest;
605 }
606 }
607
608 return NULL;
609}
610
Rumen G. Bogdanovski1e356f92007-11-07 02:35:54 -0800611/*
612 * Find destination by {daddr,dport,vaddr,protocol}
613 * Cretaed to be used in ip_vs_process_message() in
614 * the backup synchronization daemon. It finds the
615 * destination to be bound to the received connection
616 * on the backup.
617 *
618 * ip_vs_lookup_real_service() looked promissing, but
619 * seems not working as expected.
620 */
Hans Schillstromfc723252011-01-03 14:44:43 +0100621struct ip_vs_dest *ip_vs_find_dest(struct net *net, int af,
622 const union nf_inet_addr *daddr,
Julius Volz7937df12008-09-02 15:55:48 +0200623 __be16 dport,
624 const union nf_inet_addr *vaddr,
Hans Schillstrom0e051e62010-11-19 14:25:07 +0100625 __be16 vport, __u16 protocol, __u32 fwmark)
Rumen G. Bogdanovski1e356f92007-11-07 02:35:54 -0800626{
627 struct ip_vs_dest *dest;
628 struct ip_vs_service *svc;
629
Hans Schillstromfc723252011-01-03 14:44:43 +0100630 svc = ip_vs_service_get(net, af, fwmark, protocol, vaddr, vport);
Rumen G. Bogdanovski1e356f92007-11-07 02:35:54 -0800631 if (!svc)
632 return NULL;
633 dest = ip_vs_lookup_dest(svc, daddr, dport);
634 if (dest)
635 atomic_inc(&dest->refcnt);
636 ip_vs_service_put(svc);
637 return dest;
638}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700639
640/*
641 * Lookup dest by {svc,addr,port} in the destination trash.
642 * The destination trash is used to hold the destinations that are removed
643 * from the service table but are still referenced by some conn entries.
644 * The reason to add the destination trash is when the dest is temporary
645 * down (either by administrator or by monitor program), the dest can be
646 * picked back from the trash, the remaining connections to the dest can
647 * continue, and the counting information of the dest is also useful for
648 * scheduling.
649 */
650static struct ip_vs_dest *
Julius Volz7937df12008-09-02 15:55:48 +0200651ip_vs_trash_get_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
652 __be16 dport)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700653{
654 struct ip_vs_dest *dest, *nxt;
655
656 /*
657 * Find the destination in trash
658 */
659 list_for_each_entry_safe(dest, nxt, &ip_vs_dest_trash, n_list) {
Julius Volz7937df12008-09-02 15:55:48 +0200660 IP_VS_DBG_BUF(3, "Destination %u/%s:%u still in trash, "
661 "dest->refcnt=%d\n",
662 dest->vfwmark,
663 IP_VS_DBG_ADDR(svc->af, &dest->addr),
664 ntohs(dest->port),
665 atomic_read(&dest->refcnt));
666 if (dest->af == svc->af &&
667 ip_vs_addr_equal(svc->af, &dest->addr, daddr) &&
Linus Torvalds1da177e2005-04-16 15:20:36 -0700668 dest->port == dport &&
669 dest->vfwmark == svc->fwmark &&
670 dest->protocol == svc->protocol &&
671 (svc->fwmark ||
Julius Volz7937df12008-09-02 15:55:48 +0200672 (ip_vs_addr_equal(svc->af, &dest->vaddr, &svc->addr) &&
Linus Torvalds1da177e2005-04-16 15:20:36 -0700673 dest->vport == svc->port))) {
674 /* HIT */
675 return dest;
676 }
677
678 /*
679 * Try to purge the destination from trash if not referenced
680 */
681 if (atomic_read(&dest->refcnt) == 1) {
Julius Volz7937df12008-09-02 15:55:48 +0200682 IP_VS_DBG_BUF(3, "Removing destination %u/%s:%u "
683 "from trash\n",
684 dest->vfwmark,
685 IP_VS_DBG_ADDR(svc->af, &dest->addr),
686 ntohs(dest->port));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700687 list_del(&dest->n_list);
688 ip_vs_dst_reset(dest);
689 __ip_vs_unbind_svc(dest);
Hans Schillstromb17fc992011-01-03 14:44:56 +0100690 free_percpu(dest->stats.cpustats);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700691 kfree(dest);
692 }
693 }
694
695 return NULL;
696}
697
698
699/*
700 * Clean up all the destinations in the trash
701 * Called by the ip_vs_control_cleanup()
702 *
703 * When the ip_vs_control_clearup is activated by ipvs module exit,
704 * the service tables must have been flushed and all the connections
705 * are expired, and the refcnt of each destination in the trash must
706 * be 1, so we simply release them here.
707 */
708static void ip_vs_trash_cleanup(void)
709{
710 struct ip_vs_dest *dest, *nxt;
711
712 list_for_each_entry_safe(dest, nxt, &ip_vs_dest_trash, n_list) {
713 list_del(&dest->n_list);
714 ip_vs_dst_reset(dest);
715 __ip_vs_unbind_svc(dest);
Hans Schillstromb17fc992011-01-03 14:44:56 +0100716 free_percpu(dest->stats.cpustats);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700717 kfree(dest);
718 }
719}
720
721
722static void
723ip_vs_zero_stats(struct ip_vs_stats *stats)
724{
725 spin_lock_bh(&stats->lock);
Simon Hormane93615d2008-08-11 17:19:14 +1000726
Sven Wegenere9c0ce22008-09-08 13:39:04 +0200727 memset(&stats->ustats, 0, sizeof(stats->ustats));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700728 ip_vs_zero_estimator(stats);
Simon Hormane93615d2008-08-11 17:19:14 +1000729
Sven Wegener3a14a3132008-08-10 18:24:41 +0000730 spin_unlock_bh(&stats->lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700731}
732
733/*
734 * Update a destination in the given service
735 */
736static void
Julian Anastasov26c15cf2010-09-21 18:12:30 +0200737__ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest,
738 struct ip_vs_dest_user_kern *udest, int add)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700739{
Hans Schillstromfc723252011-01-03 14:44:43 +0100740 struct netns_ipvs *ipvs = net_ipvs(svc->net);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700741 int conn_flags;
742
743 /* set the weight and the flags */
744 atomic_set(&dest->weight, udest->weight);
Julian Anastasov35757922010-09-17 14:18:16 +0200745 conn_flags = udest->conn_flags & IP_VS_CONN_F_DEST_MASK;
746 conn_flags |= IP_VS_CONN_F_INACTIVE;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700747
Linus Torvalds1da177e2005-04-16 15:20:36 -0700748 /* set the IP_VS_CONN_F_NOOUTPUT flag if not masquerading/NAT */
Julian Anastasov35757922010-09-17 14:18:16 +0200749 if ((conn_flags & IP_VS_CONN_F_FWD_MASK) != IP_VS_CONN_F_MASQ) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700750 conn_flags |= IP_VS_CONN_F_NOOUTPUT;
751 } else {
752 /*
Hans Schillstromfc723252011-01-03 14:44:43 +0100753 * Put the real service in rs_table if not present.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700754 * For now only for NAT!
755 */
Hans Schillstroma0840e22011-01-03 14:44:58 +0100756 write_lock_bh(&ipvs->rs_lock);
Hans Schillstromfc723252011-01-03 14:44:43 +0100757 ip_vs_rs_hash(ipvs, dest);
Hans Schillstroma0840e22011-01-03 14:44:58 +0100758 write_unlock_bh(&ipvs->rs_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700759 }
760 atomic_set(&dest->conn_flags, conn_flags);
761
762 /* bind the service */
763 if (!dest->svc) {
764 __ip_vs_bind_svc(dest, svc);
765 } else {
766 if (dest->svc != svc) {
767 __ip_vs_unbind_svc(dest);
768 ip_vs_zero_stats(&dest->stats);
769 __ip_vs_bind_svc(dest, svc);
770 }
771 }
772
773 /* set the dest status flags */
774 dest->flags |= IP_VS_DEST_F_AVAILABLE;
775
776 if (udest->u_threshold == 0 || udest->u_threshold > dest->u_threshold)
777 dest->flags &= ~IP_VS_DEST_F_OVERLOAD;
778 dest->u_threshold = udest->u_threshold;
779 dest->l_threshold = udest->l_threshold;
Julian Anastasov26c15cf2010-09-21 18:12:30 +0200780
Julian Anastasovfc604762010-10-17 16:38:15 +0300781 spin_lock(&dest->dst_lock);
782 ip_vs_dst_reset(dest);
783 spin_unlock(&dest->dst_lock);
784
Julian Anastasov26c15cf2010-09-21 18:12:30 +0200785 if (add)
Hans Schillstrom29c20262011-01-03 14:44:54 +0100786 ip_vs_new_estimator(svc->net, &dest->stats);
Julian Anastasov26c15cf2010-09-21 18:12:30 +0200787
788 write_lock_bh(&__ip_vs_svc_lock);
789
790 /* Wait until all other svc users go away */
791 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
792
793 if (add) {
794 list_add(&dest->n_list, &svc->destinations);
795 svc->num_dests++;
796 }
797
798 /* call the update_service, because server weight may be changed */
799 if (svc->scheduler->update_service)
800 svc->scheduler->update_service(svc);
801
802 write_unlock_bh(&__ip_vs_svc_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700803}
804
805
806/*
807 * Create a destination for the given service
808 */
809static int
Julius Volzc860c6b2008-09-02 15:55:36 +0200810ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700811 struct ip_vs_dest **dest_p)
812{
813 struct ip_vs_dest *dest;
814 unsigned atype;
815
816 EnterFunction(2);
817
Vince Busam09571c72008-09-02 15:55:52 +0200818#ifdef CONFIG_IP_VS_IPV6
819 if (svc->af == AF_INET6) {
820 atype = ipv6_addr_type(&udest->addr.in6);
Sven Wegener3bfb92f2008-09-05 16:53:49 +0200821 if ((!(atype & IPV6_ADDR_UNICAST) ||
822 atype & IPV6_ADDR_LINKLOCAL) &&
Vince Busam09571c72008-09-02 15:55:52 +0200823 !__ip_vs_addr_is_local_v6(&udest->addr.in6))
824 return -EINVAL;
825 } else
826#endif
827 {
828 atype = inet_addr_type(&init_net, udest->addr.ip);
829 if (atype != RTN_LOCAL && atype != RTN_UNICAST)
830 return -EINVAL;
831 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700832
Simon Hormandee06e42010-08-26 02:54:31 +0000833 dest = kzalloc(sizeof(struct ip_vs_dest), GFP_KERNEL);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700834 if (dest == NULL) {
Hannes Eder1e3e2382009-08-02 11:05:41 +0000835 pr_err("%s(): no memory.\n", __func__);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700836 return -ENOMEM;
837 }
Hans Schillstromb17fc992011-01-03 14:44:56 +0100838 dest->stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats);
839 if (!dest->stats.cpustats) {
840 pr_err("%s() alloc_percpu failed\n", __func__);
841 goto err_alloc;
842 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700843
Julius Volzc860c6b2008-09-02 15:55:36 +0200844 dest->af = svc->af;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700845 dest->protocol = svc->protocol;
Julius Volzc860c6b2008-09-02 15:55:36 +0200846 dest->vaddr = svc->addr;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700847 dest->vport = svc->port;
848 dest->vfwmark = svc->fwmark;
Julius Volzc860c6b2008-09-02 15:55:36 +0200849 ip_vs_addr_copy(svc->af, &dest->addr, &udest->addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700850 dest->port = udest->port;
851
852 atomic_set(&dest->activeconns, 0);
853 atomic_set(&dest->inactconns, 0);
854 atomic_set(&dest->persistconns, 0);
Julian Anastasov26c15cf2010-09-21 18:12:30 +0200855 atomic_set(&dest->refcnt, 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700856
857 INIT_LIST_HEAD(&dest->d_list);
858 spin_lock_init(&dest->dst_lock);
859 spin_lock_init(&dest->stats.lock);
Julian Anastasov26c15cf2010-09-21 18:12:30 +0200860 __ip_vs_update_dest(svc, dest, udest, 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700861
862 *dest_p = dest;
863
864 LeaveFunction(2);
865 return 0;
Hans Schillstromb17fc992011-01-03 14:44:56 +0100866
867err_alloc:
868 kfree(dest);
869 return -ENOMEM;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700870}
871
872
873/*
874 * Add a destination into an existing service
875 */
876static int
Julius Volzc860c6b2008-09-02 15:55:36 +0200877ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700878{
879 struct ip_vs_dest *dest;
Julius Volzc860c6b2008-09-02 15:55:36 +0200880 union nf_inet_addr daddr;
Al Viro014d7302006-09-28 14:29:52 -0700881 __be16 dport = udest->port;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700882 int ret;
883
884 EnterFunction(2);
885
886 if (udest->weight < 0) {
Hannes Eder1e3e2382009-08-02 11:05:41 +0000887 pr_err("%s(): server weight less than zero\n", __func__);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700888 return -ERANGE;
889 }
890
891 if (udest->l_threshold > udest->u_threshold) {
Hannes Eder1e3e2382009-08-02 11:05:41 +0000892 pr_err("%s(): lower threshold is higher than upper threshold\n",
893 __func__);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700894 return -ERANGE;
895 }
896
Julius Volzc860c6b2008-09-02 15:55:36 +0200897 ip_vs_addr_copy(svc->af, &daddr, &udest->addr);
898
Linus Torvalds1da177e2005-04-16 15:20:36 -0700899 /*
900 * Check if the dest already exists in the list
901 */
Julius Volz7937df12008-09-02 15:55:48 +0200902 dest = ip_vs_lookup_dest(svc, &daddr, dport);
903
Linus Torvalds1da177e2005-04-16 15:20:36 -0700904 if (dest != NULL) {
Hannes Eder1e3e2382009-08-02 11:05:41 +0000905 IP_VS_DBG(1, "%s(): dest already exists\n", __func__);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700906 return -EEXIST;
907 }
908
909 /*
910 * Check if the dest already exists in the trash and
911 * is from the same service
912 */
Julius Volz7937df12008-09-02 15:55:48 +0200913 dest = ip_vs_trash_get_dest(svc, &daddr, dport);
914
Linus Torvalds1da177e2005-04-16 15:20:36 -0700915 if (dest != NULL) {
Julius Volzcfc78c52008-09-02 15:55:53 +0200916 IP_VS_DBG_BUF(3, "Get destination %s:%u from trash, "
917 "dest->refcnt=%d, service %u/%s:%u\n",
918 IP_VS_DBG_ADDR(svc->af, &daddr), ntohs(dport),
919 atomic_read(&dest->refcnt),
920 dest->vfwmark,
921 IP_VS_DBG_ADDR(svc->af, &dest->vaddr),
922 ntohs(dest->vport));
923
Linus Torvalds1da177e2005-04-16 15:20:36 -0700924 /*
925 * Get the destination from the trash
926 */
927 list_del(&dest->n_list);
928
Julian Anastasov26c15cf2010-09-21 18:12:30 +0200929 __ip_vs_update_dest(svc, dest, udest, 1);
930 ret = 0;
931 } else {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700932 /*
Julian Anastasov26c15cf2010-09-21 18:12:30 +0200933 * Allocate and initialize the dest structure
Linus Torvalds1da177e2005-04-16 15:20:36 -0700934 */
Julian Anastasov26c15cf2010-09-21 18:12:30 +0200935 ret = ip_vs_new_dest(svc, udest, &dest);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700936 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700937 LeaveFunction(2);
938
Julian Anastasov26c15cf2010-09-21 18:12:30 +0200939 return ret;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700940}
941
942
943/*
944 * Edit a destination in the given service
945 */
946static int
Julius Volzc860c6b2008-09-02 15:55:36 +0200947ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700948{
949 struct ip_vs_dest *dest;
Julius Volzc860c6b2008-09-02 15:55:36 +0200950 union nf_inet_addr daddr;
Al Viro014d7302006-09-28 14:29:52 -0700951 __be16 dport = udest->port;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700952
953 EnterFunction(2);
954
955 if (udest->weight < 0) {
Hannes Eder1e3e2382009-08-02 11:05:41 +0000956 pr_err("%s(): server weight less than zero\n", __func__);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700957 return -ERANGE;
958 }
959
960 if (udest->l_threshold > udest->u_threshold) {
Hannes Eder1e3e2382009-08-02 11:05:41 +0000961 pr_err("%s(): lower threshold is higher than upper threshold\n",
962 __func__);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700963 return -ERANGE;
964 }
965
Julius Volzc860c6b2008-09-02 15:55:36 +0200966 ip_vs_addr_copy(svc->af, &daddr, &udest->addr);
967
Linus Torvalds1da177e2005-04-16 15:20:36 -0700968 /*
969 * Lookup the destination list
970 */
Julius Volz7937df12008-09-02 15:55:48 +0200971 dest = ip_vs_lookup_dest(svc, &daddr, dport);
972
Linus Torvalds1da177e2005-04-16 15:20:36 -0700973 if (dest == NULL) {
Hannes Eder1e3e2382009-08-02 11:05:41 +0000974 IP_VS_DBG(1, "%s(): dest doesn't exist\n", __func__);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700975 return -ENOENT;
976 }
977
Julian Anastasov26c15cf2010-09-21 18:12:30 +0200978 __ip_vs_update_dest(svc, dest, udest, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700979 LeaveFunction(2);
980
981 return 0;
982}
983
984
985/*
986 * Delete a destination (must be already unlinked from the service)
987 */
Hans Schillstrom29c20262011-01-03 14:44:54 +0100988static void __ip_vs_del_dest(struct net *net, struct ip_vs_dest *dest)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700989{
Hans Schillstroma0840e22011-01-03 14:44:58 +0100990 struct netns_ipvs *ipvs = net_ipvs(net);
991
Hans Schillstrom29c20262011-01-03 14:44:54 +0100992 ip_vs_kill_estimator(net, &dest->stats);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700993
994 /*
995 * Remove it from the d-linked list with the real services.
996 */
Hans Schillstroma0840e22011-01-03 14:44:58 +0100997 write_lock_bh(&ipvs->rs_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700998 ip_vs_rs_unhash(dest);
Hans Schillstroma0840e22011-01-03 14:44:58 +0100999 write_unlock_bh(&ipvs->rs_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001000
1001 /*
1002 * Decrease the refcnt of the dest, and free the dest
1003 * if nobody refers to it (refcnt=0). Otherwise, throw
1004 * the destination into the trash.
1005 */
1006 if (atomic_dec_and_test(&dest->refcnt)) {
Julian Anastasov26c15cf2010-09-21 18:12:30 +02001007 IP_VS_DBG_BUF(3, "Removing destination %u/%s:%u\n",
1008 dest->vfwmark,
1009 IP_VS_DBG_ADDR(dest->af, &dest->addr),
1010 ntohs(dest->port));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001011 ip_vs_dst_reset(dest);
1012 /* simply decrease svc->refcnt here, let the caller check
1013 and release the service if nobody refers to it.
1014 Only user context can release destination and service,
1015 and only one user context can update virtual service at a
1016 time, so the operation here is OK */
1017 atomic_dec(&dest->svc->refcnt);
Hans Schillstromb17fc992011-01-03 14:44:56 +01001018 free_percpu(dest->stats.cpustats);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001019 kfree(dest);
1020 } else {
Julius Volzcfc78c52008-09-02 15:55:53 +02001021 IP_VS_DBG_BUF(3, "Moving dest %s:%u into trash, "
1022 "dest->refcnt=%d\n",
1023 IP_VS_DBG_ADDR(dest->af, &dest->addr),
1024 ntohs(dest->port),
1025 atomic_read(&dest->refcnt));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001026 list_add(&dest->n_list, &ip_vs_dest_trash);
1027 atomic_inc(&dest->refcnt);
1028 }
1029}
1030
1031
1032/*
1033 * Unlink a destination from the given service
1034 */
1035static void __ip_vs_unlink_dest(struct ip_vs_service *svc,
1036 struct ip_vs_dest *dest,
1037 int svcupd)
1038{
1039 dest->flags &= ~IP_VS_DEST_F_AVAILABLE;
1040
1041 /*
1042 * Remove it from the d-linked destination list.
1043 */
1044 list_del(&dest->n_list);
1045 svc->num_dests--;
Sven Wegener82dfb6f2008-08-11 19:36:06 +00001046
1047 /*
1048 * Call the update_service function of its scheduler
1049 */
1050 if (svcupd && svc->scheduler->update_service)
1051 svc->scheduler->update_service(svc);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001052}
1053
1054
1055/*
1056 * Delete a destination server in the given service
1057 */
1058static int
Julius Volzc860c6b2008-09-02 15:55:36 +02001059ip_vs_del_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001060{
1061 struct ip_vs_dest *dest;
Al Viro014d7302006-09-28 14:29:52 -07001062 __be16 dport = udest->port;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001063
1064 EnterFunction(2);
1065
Julius Volz7937df12008-09-02 15:55:48 +02001066 dest = ip_vs_lookup_dest(svc, &udest->addr, dport);
Julius Volzc860c6b2008-09-02 15:55:36 +02001067
Linus Torvalds1da177e2005-04-16 15:20:36 -07001068 if (dest == NULL) {
Hannes Eder1e3e2382009-08-02 11:05:41 +00001069 IP_VS_DBG(1, "%s(): destination not found!\n", __func__);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001070 return -ENOENT;
1071 }
1072
1073 write_lock_bh(&__ip_vs_svc_lock);
1074
1075 /*
1076 * Wait until all other svc users go away.
1077 */
Julian Anastasov26c15cf2010-09-21 18:12:30 +02001078 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001079
1080 /*
1081 * Unlink dest from the service
1082 */
1083 __ip_vs_unlink_dest(svc, dest, 1);
1084
1085 write_unlock_bh(&__ip_vs_svc_lock);
1086
1087 /*
1088 * Delete the destination
1089 */
Hans Schillstroma0840e22011-01-03 14:44:58 +01001090 __ip_vs_del_dest(svc->net, dest);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001091
1092 LeaveFunction(2);
1093
1094 return 0;
1095}
1096
1097
1098/*
1099 * Add a service into the service hash table
1100 */
1101static int
Hans Schillstromfc723252011-01-03 14:44:43 +01001102ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u,
Julius Volzc860c6b2008-09-02 15:55:36 +02001103 struct ip_vs_service **svc_p)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001104{
1105 int ret = 0;
1106 struct ip_vs_scheduler *sched = NULL;
Simon Horman0d1e71b2010-08-22 21:37:54 +09001107 struct ip_vs_pe *pe = NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001108 struct ip_vs_service *svc = NULL;
Hans Schillstroma0840e22011-01-03 14:44:58 +01001109 struct netns_ipvs *ipvs = net_ipvs(net);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001110
1111 /* increase the module use count */
1112 ip_vs_use_count_inc();
1113
1114 /* Lookup the scheduler by 'u->sched_name' */
1115 sched = ip_vs_scheduler_get(u->sched_name);
1116 if (sched == NULL) {
Hannes Eder1e3e2382009-08-02 11:05:41 +00001117 pr_info("Scheduler module ip_vs_%s not found\n", u->sched_name);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001118 ret = -ENOENT;
Simon Horman6e08bfb2010-08-22 21:37:52 +09001119 goto out_err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001120 }
1121
Simon Horman0d1e71b2010-08-22 21:37:54 +09001122 if (u->pe_name && *u->pe_name) {
Simon Hormane9e5eee2010-11-08 20:05:57 +09001123 pe = ip_vs_pe_getbyname(u->pe_name);
Simon Horman0d1e71b2010-08-22 21:37:54 +09001124 if (pe == NULL) {
1125 pr_info("persistence engine module ip_vs_pe_%s "
1126 "not found\n", u->pe_name);
1127 ret = -ENOENT;
1128 goto out_err;
1129 }
1130 }
1131
Julius Volzf94fd042008-09-02 15:55:55 +02001132#ifdef CONFIG_IP_VS_IPV6
Julius Volz48148932008-11-03 17:08:56 -08001133 if (u->af == AF_INET6 && (u->netmask < 1 || u->netmask > 128)) {
1134 ret = -EINVAL;
1135 goto out_err;
Julius Volzf94fd042008-09-02 15:55:55 +02001136 }
1137#endif
1138
Simon Hormandee06e42010-08-26 02:54:31 +00001139 svc = kzalloc(sizeof(struct ip_vs_service), GFP_KERNEL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001140 if (svc == NULL) {
Hannes Eder1e3e2382009-08-02 11:05:41 +00001141 IP_VS_DBG(1, "%s(): no memory\n", __func__);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001142 ret = -ENOMEM;
1143 goto out_err;
1144 }
Hans Schillstromb17fc992011-01-03 14:44:56 +01001145 svc->stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats);
1146 if (!svc->stats.cpustats) {
1147 pr_err("%s() alloc_percpu failed\n", __func__);
1148 goto out_err;
1149 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001150
1151 /* I'm the first user of the service */
Julian Anastasov26c15cf2010-09-21 18:12:30 +02001152 atomic_set(&svc->usecnt, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001153 atomic_set(&svc->refcnt, 0);
1154
Julius Volzc860c6b2008-09-02 15:55:36 +02001155 svc->af = u->af;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001156 svc->protocol = u->protocol;
Julius Volzc860c6b2008-09-02 15:55:36 +02001157 ip_vs_addr_copy(svc->af, &svc->addr, &u->addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001158 svc->port = u->port;
1159 svc->fwmark = u->fwmark;
1160 svc->flags = u->flags;
1161 svc->timeout = u->timeout * HZ;
1162 svc->netmask = u->netmask;
Hans Schillstromfc723252011-01-03 14:44:43 +01001163 svc->net = net;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001164
1165 INIT_LIST_HEAD(&svc->destinations);
1166 rwlock_init(&svc->sched_lock);
1167 spin_lock_init(&svc->stats.lock);
1168
1169 /* Bind the scheduler */
1170 ret = ip_vs_bind_scheduler(svc, sched);
1171 if (ret)
1172 goto out_err;
1173 sched = NULL;
1174
Simon Horman0d1e71b2010-08-22 21:37:54 +09001175 /* Bind the ct retriever */
1176 ip_vs_bind_pe(svc, pe);
1177 pe = NULL;
1178
Linus Torvalds1da177e2005-04-16 15:20:36 -07001179 /* Update the virtual service counters */
1180 if (svc->port == FTPPORT)
1181 atomic_inc(&ip_vs_ftpsvc_counter);
1182 else if (svc->port == 0)
1183 atomic_inc(&ip_vs_nullsvc_counter);
1184
Hans Schillstrom29c20262011-01-03 14:44:54 +01001185 ip_vs_new_estimator(net, &svc->stats);
Julius Volzf94fd042008-09-02 15:55:55 +02001186
1187 /* Count only IPv4 services for old get/setsockopt interface */
1188 if (svc->af == AF_INET)
Hans Schillstroma0840e22011-01-03 14:44:58 +01001189 ipvs->num_services++;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001190
1191 /* Hash the service into the service table */
1192 write_lock_bh(&__ip_vs_svc_lock);
1193 ip_vs_svc_hash(svc);
1194 write_unlock_bh(&__ip_vs_svc_lock);
1195
1196 *svc_p = svc;
1197 return 0;
1198
Hans Schillstromb17fc992011-01-03 14:44:56 +01001199
Simon Horman6e08bfb2010-08-22 21:37:52 +09001200 out_err:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001201 if (svc != NULL) {
Simon Horman2fabf352010-08-22 21:37:52 +09001202 ip_vs_unbind_scheduler(svc);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001203 if (svc->inc) {
1204 local_bh_disable();
1205 ip_vs_app_inc_put(svc->inc);
1206 local_bh_enable();
1207 }
Hans Schillstromb17fc992011-01-03 14:44:56 +01001208 if (svc->stats.cpustats)
1209 free_percpu(svc->stats.cpustats);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001210 kfree(svc);
1211 }
1212 ip_vs_scheduler_put(sched);
Simon Horman0d1e71b2010-08-22 21:37:54 +09001213 ip_vs_pe_put(pe);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001214
Linus Torvalds1da177e2005-04-16 15:20:36 -07001215 /* decrease the module use count */
1216 ip_vs_use_count_dec();
1217
1218 return ret;
1219}
1220
1221
1222/*
1223 * Edit a service and bind it with a new scheduler
1224 */
1225static int
Julius Volzc860c6b2008-09-02 15:55:36 +02001226ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001227{
1228 struct ip_vs_scheduler *sched, *old_sched;
Simon Horman0d1e71b2010-08-22 21:37:54 +09001229 struct ip_vs_pe *pe = NULL, *old_pe = NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001230 int ret = 0;
1231
1232 /*
1233 * Lookup the scheduler, by 'u->sched_name'
1234 */
1235 sched = ip_vs_scheduler_get(u->sched_name);
1236 if (sched == NULL) {
Hannes Eder1e3e2382009-08-02 11:05:41 +00001237 pr_info("Scheduler module ip_vs_%s not found\n", u->sched_name);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001238 return -ENOENT;
1239 }
1240 old_sched = sched;
1241
Simon Horman0d1e71b2010-08-22 21:37:54 +09001242 if (u->pe_name && *u->pe_name) {
Simon Hormane9e5eee2010-11-08 20:05:57 +09001243 pe = ip_vs_pe_getbyname(u->pe_name);
Simon Horman0d1e71b2010-08-22 21:37:54 +09001244 if (pe == NULL) {
1245 pr_info("persistence engine module ip_vs_pe_%s "
1246 "not found\n", u->pe_name);
1247 ret = -ENOENT;
1248 goto out;
1249 }
1250 old_pe = pe;
1251 }
1252
Julius Volzf94fd042008-09-02 15:55:55 +02001253#ifdef CONFIG_IP_VS_IPV6
Julius Volz48148932008-11-03 17:08:56 -08001254 if (u->af == AF_INET6 && (u->netmask < 1 || u->netmask > 128)) {
1255 ret = -EINVAL;
1256 goto out;
Julius Volzf94fd042008-09-02 15:55:55 +02001257 }
1258#endif
1259
Linus Torvalds1da177e2005-04-16 15:20:36 -07001260 write_lock_bh(&__ip_vs_svc_lock);
1261
1262 /*
1263 * Wait until all other svc users go away.
1264 */
Julian Anastasov26c15cf2010-09-21 18:12:30 +02001265 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001266
1267 /*
1268 * Set the flags and timeout value
1269 */
1270 svc->flags = u->flags | IP_VS_SVC_F_HASHED;
1271 svc->timeout = u->timeout * HZ;
1272 svc->netmask = u->netmask;
1273
1274 old_sched = svc->scheduler;
1275 if (sched != old_sched) {
1276 /*
1277 * Unbind the old scheduler
1278 */
1279 if ((ret = ip_vs_unbind_scheduler(svc))) {
1280 old_sched = sched;
Simon Horman9e691ed2008-09-17 10:10:41 +10001281 goto out_unlock;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001282 }
1283
1284 /*
1285 * Bind the new scheduler
1286 */
1287 if ((ret = ip_vs_bind_scheduler(svc, sched))) {
1288 /*
1289 * If ip_vs_bind_scheduler fails, restore the old
1290 * scheduler.
1291 * The main reason of failure is out of memory.
1292 *
1293 * The question is if the old scheduler can be
1294 * restored all the time. TODO: if it cannot be
1295 * restored some time, we must delete the service,
1296 * otherwise the system may crash.
1297 */
1298 ip_vs_bind_scheduler(svc, old_sched);
1299 old_sched = sched;
Simon Horman9e691ed2008-09-17 10:10:41 +10001300 goto out_unlock;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001301 }
1302 }
1303
Simon Horman0d1e71b2010-08-22 21:37:54 +09001304 old_pe = svc->pe;
1305 if (pe != old_pe) {
1306 ip_vs_unbind_pe(svc);
1307 ip_vs_bind_pe(svc, pe);
1308 }
1309
Simon Horman9e691ed2008-09-17 10:10:41 +10001310 out_unlock:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001311 write_unlock_bh(&__ip_vs_svc_lock);
Simon Horman9e691ed2008-09-17 10:10:41 +10001312 out:
Simon Horman6e08bfb2010-08-22 21:37:52 +09001313 ip_vs_scheduler_put(old_sched);
Simon Horman0d1e71b2010-08-22 21:37:54 +09001314 ip_vs_pe_put(old_pe);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001315 return ret;
1316}
1317
1318
1319/*
1320 * Delete a service from the service list
1321 * - The service must be unlinked, unlocked and not referenced!
1322 * - We are called under _bh lock
1323 */
1324static void __ip_vs_del_service(struct ip_vs_service *svc)
1325{
1326 struct ip_vs_dest *dest, *nxt;
1327 struct ip_vs_scheduler *old_sched;
Simon Horman0d1e71b2010-08-22 21:37:54 +09001328 struct ip_vs_pe *old_pe;
Hans Schillstroma0840e22011-01-03 14:44:58 +01001329 struct netns_ipvs *ipvs = net_ipvs(svc->net);
Simon Horman0d1e71b2010-08-22 21:37:54 +09001330
1331 pr_info("%s: enter\n", __func__);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001332
Julius Volzf94fd042008-09-02 15:55:55 +02001333 /* Count only IPv4 services for old get/setsockopt interface */
1334 if (svc->af == AF_INET)
Hans Schillstroma0840e22011-01-03 14:44:58 +01001335 ipvs->num_services--;
Julius Volzf94fd042008-09-02 15:55:55 +02001336
Hans Schillstrom29c20262011-01-03 14:44:54 +01001337 ip_vs_kill_estimator(svc->net, &svc->stats);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001338
1339 /* Unbind scheduler */
1340 old_sched = svc->scheduler;
1341 ip_vs_unbind_scheduler(svc);
Simon Horman6e08bfb2010-08-22 21:37:52 +09001342 ip_vs_scheduler_put(old_sched);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001343
Simon Horman0d1e71b2010-08-22 21:37:54 +09001344 /* Unbind persistence engine */
1345 old_pe = svc->pe;
1346 ip_vs_unbind_pe(svc);
1347 ip_vs_pe_put(old_pe);
1348
Linus Torvalds1da177e2005-04-16 15:20:36 -07001349 /* Unbind app inc */
1350 if (svc->inc) {
1351 ip_vs_app_inc_put(svc->inc);
1352 svc->inc = NULL;
1353 }
1354
1355 /*
1356 * Unlink the whole destination list
1357 */
1358 list_for_each_entry_safe(dest, nxt, &svc->destinations, n_list) {
1359 __ip_vs_unlink_dest(svc, dest, 0);
Hans Schillstrom29c20262011-01-03 14:44:54 +01001360 __ip_vs_del_dest(svc->net, dest);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001361 }
1362
1363 /*
1364 * Update the virtual service counters
1365 */
1366 if (svc->port == FTPPORT)
1367 atomic_dec(&ip_vs_ftpsvc_counter);
1368 else if (svc->port == 0)
1369 atomic_dec(&ip_vs_nullsvc_counter);
1370
1371 /*
1372 * Free the service if nobody refers to it
1373 */
Julian Anastasov26c15cf2010-09-21 18:12:30 +02001374 if (atomic_read(&svc->refcnt) == 0) {
1375 IP_VS_DBG_BUF(3, "Removing service %u/%s:%u usecnt=%d\n",
1376 svc->fwmark,
1377 IP_VS_DBG_ADDR(svc->af, &svc->addr),
1378 ntohs(svc->port), atomic_read(&svc->usecnt));
Hans Schillstromb17fc992011-01-03 14:44:56 +01001379 free_percpu(svc->stats.cpustats);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001380 kfree(svc);
Julian Anastasov26c15cf2010-09-21 18:12:30 +02001381 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001382
1383 /* decrease the module use count */
1384 ip_vs_use_count_dec();
1385}
1386
1387/*
Julian Anastasov26c15cf2010-09-21 18:12:30 +02001388 * Unlink a service from list and try to delete it if its refcnt reached 0
Linus Torvalds1da177e2005-04-16 15:20:36 -07001389 */
Julian Anastasov26c15cf2010-09-21 18:12:30 +02001390static void ip_vs_unlink_service(struct ip_vs_service *svc)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001391{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001392 /*
1393 * Unhash it from the service table
1394 */
1395 write_lock_bh(&__ip_vs_svc_lock);
1396
1397 ip_vs_svc_unhash(svc);
1398
1399 /*
1400 * Wait until all the svc users go away.
1401 */
Julian Anastasov26c15cf2010-09-21 18:12:30 +02001402 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001403
1404 __ip_vs_del_service(svc);
1405
1406 write_unlock_bh(&__ip_vs_svc_lock);
Julian Anastasov26c15cf2010-09-21 18:12:30 +02001407}
1408
1409/*
1410 * Delete a service from the service list
1411 */
1412static int ip_vs_del_service(struct ip_vs_service *svc)
1413{
1414 if (svc == NULL)
1415 return -EEXIST;
1416 ip_vs_unlink_service(svc);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001417
1418 return 0;
1419}
1420
1421
1422/*
1423 * Flush all the virtual services
1424 */
Hans Schillstromfc723252011-01-03 14:44:43 +01001425static int ip_vs_flush(struct net *net)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001426{
1427 int idx;
1428 struct ip_vs_service *svc, *nxt;
1429
1430 /*
Hans Schillstromfc723252011-01-03 14:44:43 +01001431 * Flush the service table hashed by <netns,protocol,addr,port>
Linus Torvalds1da177e2005-04-16 15:20:36 -07001432 */
1433 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
Hans Schillstromfc723252011-01-03 14:44:43 +01001434 list_for_each_entry_safe(svc, nxt, &ip_vs_svc_table[idx],
1435 s_list) {
1436 if (net_eq(svc->net, net))
1437 ip_vs_unlink_service(svc);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001438 }
1439 }
1440
1441 /*
1442 * Flush the service table hashed by fwmark
1443 */
1444 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1445 list_for_each_entry_safe(svc, nxt,
1446 &ip_vs_svc_fwm_table[idx], f_list) {
Hans Schillstromfc723252011-01-03 14:44:43 +01001447 if (net_eq(svc->net, net))
1448 ip_vs_unlink_service(svc);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001449 }
1450 }
1451
1452 return 0;
1453}
1454
1455
1456/*
1457 * Zero counters in a service or all services
1458 */
1459static int ip_vs_zero_service(struct ip_vs_service *svc)
1460{
1461 struct ip_vs_dest *dest;
1462
1463 write_lock_bh(&__ip_vs_svc_lock);
1464 list_for_each_entry(dest, &svc->destinations, n_list) {
1465 ip_vs_zero_stats(&dest->stats);
1466 }
1467 ip_vs_zero_stats(&svc->stats);
1468 write_unlock_bh(&__ip_vs_svc_lock);
1469 return 0;
1470}
1471
Hans Schillstromfc723252011-01-03 14:44:43 +01001472static int ip_vs_zero_all(struct net *net)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001473{
1474 int idx;
1475 struct ip_vs_service *svc;
1476
1477 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1478 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
Hans Schillstromfc723252011-01-03 14:44:43 +01001479 if (net_eq(svc->net, net))
1480 ip_vs_zero_service(svc);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001481 }
1482 }
1483
1484 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1485 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
Hans Schillstromfc723252011-01-03 14:44:43 +01001486 if (net_eq(svc->net, net))
1487 ip_vs_zero_service(svc);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001488 }
1489 }
1490
Hans Schillstromb17fc992011-01-03 14:44:56 +01001491 ip_vs_zero_stats(net_ipvs(net)->tot_stats);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001492 return 0;
1493}
1494
1495
1496static int
Alexey Dobriyan8d65af72009-09-23 15:57:19 -07001497proc_do_defense_mode(ctl_table *table, int write,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001498 void __user *buffer, size_t *lenp, loff_t *ppos)
1499{
Hans Schillstrom93304192011-01-03 14:44:51 +01001500 struct net *net = current->nsproxy->net_ns;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001501 int *valp = table->data;
1502 int val = *valp;
1503 int rc;
1504
Alexey Dobriyan8d65af72009-09-23 15:57:19 -07001505 rc = proc_dointvec(table, write, buffer, lenp, ppos);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001506 if (write && (*valp != val)) {
1507 if ((*valp < 0) || (*valp > 3)) {
1508 /* Restore the correct value */
1509 *valp = val;
1510 } else {
Hans Schillstrom93304192011-01-03 14:44:51 +01001511 update_defense_level(net_ipvs(net));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001512 }
1513 }
1514 return rc;
1515}
1516
1517
1518static int
Alexey Dobriyan8d65af72009-09-23 15:57:19 -07001519proc_do_sync_threshold(ctl_table *table, int write,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001520 void __user *buffer, size_t *lenp, loff_t *ppos)
1521{
1522 int *valp = table->data;
1523 int val[2];
1524 int rc;
1525
1526 /* backup the value first */
1527 memcpy(val, valp, sizeof(val));
1528
Alexey Dobriyan8d65af72009-09-23 15:57:19 -07001529 rc = proc_dointvec(table, write, buffer, lenp, ppos);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001530 if (write && (valp[0] < 0 || valp[1] < 0 || valp[0] >= valp[1])) {
1531 /* Restore the correct value */
1532 memcpy(valp, val, sizeof(val));
1533 }
1534 return rc;
1535}
1536
Hans Schillstromb880c1f2010-11-19 14:25:14 +01001537static int
1538proc_do_sync_mode(ctl_table *table, int write,
1539 void __user *buffer, size_t *lenp, loff_t *ppos)
1540{
1541 int *valp = table->data;
1542 int val = *valp;
1543 int rc;
1544
1545 rc = proc_dointvec(table, write, buffer, lenp, ppos);
1546 if (write && (*valp != val)) {
1547 if ((*valp < 0) || (*valp > 1)) {
1548 /* Restore the correct value */
1549 *valp = val;
1550 } else {
Hans Schillstromf1313152011-01-03 14:44:55 +01001551 struct net *net = current->nsproxy->net_ns;
1552 ip_vs_sync_switch_mode(net, val);
Hans Schillstromb880c1f2010-11-19 14:25:14 +01001553 }
1554 }
1555 return rc;
1556}
Linus Torvalds1da177e2005-04-16 15:20:36 -07001557
1558/*
1559 * IPVS sysctl table (under the /proc/sys/net/ipv4/vs/)
Hans Schillstroma0840e22011-01-03 14:44:58 +01001560 * Do not change order or insert new entries without
1561 * align with netns init in __ip_vs_control_init()
Linus Torvalds1da177e2005-04-16 15:20:36 -07001562 */
1563
1564static struct ctl_table vs_vars[] = {
1565 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001566 .procname = "amemthresh",
Hans Schillstroma0840e22011-01-03 14:44:58 +01001567 .maxlen = sizeof(int),
1568 .mode = 0644,
1569 .proc_handler = proc_dointvec,
1570 },
1571 {
1572 .procname = "am_droprate",
1573 .maxlen = sizeof(int),
1574 .mode = 0644,
1575 .proc_handler = proc_dointvec,
1576 },
1577 {
1578 .procname = "drop_entry",
1579 .maxlen = sizeof(int),
1580 .mode = 0644,
1581 .proc_handler = proc_do_defense_mode,
1582 },
1583 {
1584 .procname = "drop_packet",
1585 .maxlen = sizeof(int),
1586 .mode = 0644,
1587 .proc_handler = proc_do_defense_mode,
1588 },
1589#ifdef CONFIG_IP_VS_NFCT
1590 {
1591 .procname = "conntrack",
1592 .maxlen = sizeof(int),
1593 .mode = 0644,
1594 .proc_handler = &proc_dointvec,
1595 },
1596#endif
1597 {
1598 .procname = "secure_tcp",
1599 .maxlen = sizeof(int),
1600 .mode = 0644,
1601 .proc_handler = proc_do_defense_mode,
1602 },
1603 {
1604 .procname = "snat_reroute",
1605 .maxlen = sizeof(int),
1606 .mode = 0644,
1607 .proc_handler = &proc_dointvec,
1608 },
1609 {
1610 .procname = "sync_version",
1611 .maxlen = sizeof(int),
1612 .mode = 0644,
1613 .proc_handler = &proc_do_sync_mode,
1614 },
1615 {
1616 .procname = "cache_bypass",
1617 .maxlen = sizeof(int),
1618 .mode = 0644,
1619 .proc_handler = proc_dointvec,
1620 },
1621 {
1622 .procname = "expire_nodest_conn",
1623 .maxlen = sizeof(int),
1624 .mode = 0644,
1625 .proc_handler = proc_dointvec,
1626 },
1627 {
1628 .procname = "expire_quiescent_template",
1629 .maxlen = sizeof(int),
1630 .mode = 0644,
1631 .proc_handler = proc_dointvec,
1632 },
1633 {
1634 .procname = "sync_threshold",
1635 .maxlen =
1636 sizeof(((struct netns_ipvs *)0)->sysctl_sync_threshold),
1637 .mode = 0644,
1638 .proc_handler = proc_do_sync_threshold,
1639 },
1640 {
1641 .procname = "nat_icmp_send",
Linus Torvalds1da177e2005-04-16 15:20:36 -07001642 .maxlen = sizeof(int),
1643 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08001644 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001645 },
1646#ifdef CONFIG_IP_VS_DEBUG
1647 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001648 .procname = "debug_level",
1649 .data = &sysctl_ip_vs_debug_level,
1650 .maxlen = sizeof(int),
1651 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08001652 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001653 },
1654#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -07001655#if 0
1656 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001657 .procname = "timeout_established",
1658 .data = &vs_timeout_table_dos.timeout[IP_VS_S_ESTABLISHED],
1659 .maxlen = sizeof(int),
1660 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08001661 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001662 },
1663 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001664 .procname = "timeout_synsent",
1665 .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_SENT],
1666 .maxlen = sizeof(int),
1667 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08001668 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001669 },
1670 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001671 .procname = "timeout_synrecv",
1672 .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_RECV],
1673 .maxlen = sizeof(int),
1674 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08001675 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001676 },
1677 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001678 .procname = "timeout_finwait",
1679 .data = &vs_timeout_table_dos.timeout[IP_VS_S_FIN_WAIT],
1680 .maxlen = sizeof(int),
1681 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08001682 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001683 },
1684 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001685 .procname = "timeout_timewait",
1686 .data = &vs_timeout_table_dos.timeout[IP_VS_S_TIME_WAIT],
1687 .maxlen = sizeof(int),
1688 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08001689 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001690 },
1691 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001692 .procname = "timeout_close",
1693 .data = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE],
1694 .maxlen = sizeof(int),
1695 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08001696 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001697 },
1698 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001699 .procname = "timeout_closewait",
1700 .data = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE_WAIT],
1701 .maxlen = sizeof(int),
1702 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08001703 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001704 },
1705 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001706 .procname = "timeout_lastack",
1707 .data = &vs_timeout_table_dos.timeout[IP_VS_S_LAST_ACK],
1708 .maxlen = sizeof(int),
1709 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08001710 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001711 },
1712 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001713 .procname = "timeout_listen",
1714 .data = &vs_timeout_table_dos.timeout[IP_VS_S_LISTEN],
1715 .maxlen = sizeof(int),
1716 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08001717 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001718 },
1719 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001720 .procname = "timeout_synack",
1721 .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYNACK],
1722 .maxlen = sizeof(int),
1723 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08001724 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001725 },
1726 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001727 .procname = "timeout_udp",
1728 .data = &vs_timeout_table_dos.timeout[IP_VS_S_UDP],
1729 .maxlen = sizeof(int),
1730 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08001731 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001732 },
1733 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001734 .procname = "timeout_icmp",
1735 .data = &vs_timeout_table_dos.timeout[IP_VS_S_ICMP],
1736 .maxlen = sizeof(int),
1737 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08001738 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001739 },
1740#endif
Eric W. Biedermanf8572d82009-11-05 13:32:03 -08001741 { }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001742};
1743
Sven Wegener5587da52008-08-10 18:24:40 +00001744const struct ctl_path net_vs_ctl_path[] = {
Eric W. Biedermanf8572d82009-11-05 13:32:03 -08001745 { .procname = "net", },
1746 { .procname = "ipv4", },
Pavel Emelyanov90754f82008-01-12 02:33:50 -08001747 { .procname = "vs", },
1748 { }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001749};
Pavel Emelyanov90754f82008-01-12 02:33:50 -08001750EXPORT_SYMBOL_GPL(net_vs_ctl_path);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001751
Linus Torvalds1da177e2005-04-16 15:20:36 -07001752#ifdef CONFIG_PROC_FS
1753
1754struct ip_vs_iter {
Hans Schillstromfc723252011-01-03 14:44:43 +01001755 struct seq_net_private p; /* Do not move this, netns depends upon it*/
Linus Torvalds1da177e2005-04-16 15:20:36 -07001756 struct list_head *table;
1757 int bucket;
1758};
1759
1760/*
1761 * Write the contents of the VS rule table to a PROCfs file.
1762 * (It is kept just for backward compatibility)
1763 */
1764static inline const char *ip_vs_fwd_name(unsigned flags)
1765{
1766 switch (flags & IP_VS_CONN_F_FWD_MASK) {
1767 case IP_VS_CONN_F_LOCALNODE:
1768 return "Local";
1769 case IP_VS_CONN_F_TUNNEL:
1770 return "Tunnel";
1771 case IP_VS_CONN_F_DROUTE:
1772 return "Route";
1773 default:
1774 return "Masq";
1775 }
1776}
1777
1778
1779/* Get the Nth entry in the two lists */
1780static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos)
1781{
Hans Schillstromfc723252011-01-03 14:44:43 +01001782 struct net *net = seq_file_net(seq);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001783 struct ip_vs_iter *iter = seq->private;
1784 int idx;
1785 struct ip_vs_service *svc;
1786
1787 /* look in hash by protocol */
1788 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1789 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
Hans Schillstromfc723252011-01-03 14:44:43 +01001790 if (net_eq(svc->net, net) && pos-- == 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001791 iter->table = ip_vs_svc_table;
1792 iter->bucket = idx;
1793 return svc;
1794 }
1795 }
1796 }
1797
1798 /* keep looking in fwmark */
1799 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1800 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
Hans Schillstromfc723252011-01-03 14:44:43 +01001801 if (net_eq(svc->net, net) && pos-- == 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001802 iter->table = ip_vs_svc_fwm_table;
1803 iter->bucket = idx;
1804 return svc;
1805 }
1806 }
1807 }
1808
1809 return NULL;
1810}
1811
1812static void *ip_vs_info_seq_start(struct seq_file *seq, loff_t *pos)
Simon Horman563e94f2008-09-17 10:10:42 +10001813__acquires(__ip_vs_svc_lock)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001814{
1815
1816 read_lock_bh(&__ip_vs_svc_lock);
1817 return *pos ? ip_vs_info_array(seq, *pos - 1) : SEQ_START_TOKEN;
1818}
1819
1820
1821static void *ip_vs_info_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1822{
1823 struct list_head *e;
1824 struct ip_vs_iter *iter;
1825 struct ip_vs_service *svc;
1826
1827 ++*pos;
1828 if (v == SEQ_START_TOKEN)
1829 return ip_vs_info_array(seq,0);
1830
1831 svc = v;
1832 iter = seq->private;
1833
1834 if (iter->table == ip_vs_svc_table) {
1835 /* next service in table hashed by protocol */
1836 if ((e = svc->s_list.next) != &ip_vs_svc_table[iter->bucket])
1837 return list_entry(e, struct ip_vs_service, s_list);
1838
1839
1840 while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
1841 list_for_each_entry(svc,&ip_vs_svc_table[iter->bucket],
1842 s_list) {
1843 return svc;
1844 }
1845 }
1846
1847 iter->table = ip_vs_svc_fwm_table;
1848 iter->bucket = -1;
1849 goto scan_fwmark;
1850 }
1851
1852 /* next service in hashed by fwmark */
1853 if ((e = svc->f_list.next) != &ip_vs_svc_fwm_table[iter->bucket])
1854 return list_entry(e, struct ip_vs_service, f_list);
1855
1856 scan_fwmark:
1857 while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
1858 list_for_each_entry(svc, &ip_vs_svc_fwm_table[iter->bucket],
1859 f_list)
1860 return svc;
1861 }
1862
1863 return NULL;
1864}
1865
1866static void ip_vs_info_seq_stop(struct seq_file *seq, void *v)
Simon Horman563e94f2008-09-17 10:10:42 +10001867__releases(__ip_vs_svc_lock)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001868{
1869 read_unlock_bh(&__ip_vs_svc_lock);
1870}
1871
1872
1873static int ip_vs_info_seq_show(struct seq_file *seq, void *v)
1874{
1875 if (v == SEQ_START_TOKEN) {
1876 seq_printf(seq,
1877 "IP Virtual Server version %d.%d.%d (size=%d)\n",
Catalin(ux) M. BOIE6f7edb42010-01-05 05:50:24 +01001878 NVERSION(IP_VS_VERSION_CODE), ip_vs_conn_tab_size);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001879 seq_puts(seq,
1880 "Prot LocalAddress:Port Scheduler Flags\n");
1881 seq_puts(seq,
1882 " -> RemoteAddress:Port Forward Weight ActiveConn InActConn\n");
1883 } else {
1884 const struct ip_vs_service *svc = v;
1885 const struct ip_vs_iter *iter = seq->private;
1886 const struct ip_vs_dest *dest;
1887
Vince Busam667a5f12008-09-02 15:55:49 +02001888 if (iter->table == ip_vs_svc_table) {
1889#ifdef CONFIG_IP_VS_IPV6
1890 if (svc->af == AF_INET6)
Harvey Harrison5b095d9892008-10-29 12:52:50 -07001891 seq_printf(seq, "%s [%pI6]:%04X %s ",
Vince Busam667a5f12008-09-02 15:55:49 +02001892 ip_vs_proto_name(svc->protocol),
Harvey Harrison38ff4fa2008-10-28 16:08:13 -07001893 &svc->addr.in6,
Vince Busam667a5f12008-09-02 15:55:49 +02001894 ntohs(svc->port),
1895 svc->scheduler->name);
1896 else
1897#endif
Nick Chalk26ec0372010-06-22 08:07:01 +02001898 seq_printf(seq, "%s %08X:%04X %s %s ",
Vince Busam667a5f12008-09-02 15:55:49 +02001899 ip_vs_proto_name(svc->protocol),
1900 ntohl(svc->addr.ip),
1901 ntohs(svc->port),
Nick Chalk26ec0372010-06-22 08:07:01 +02001902 svc->scheduler->name,
1903 (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":"");
Vince Busam667a5f12008-09-02 15:55:49 +02001904 } else {
Nick Chalk26ec0372010-06-22 08:07:01 +02001905 seq_printf(seq, "FWM %08X %s %s",
1906 svc->fwmark, svc->scheduler->name,
1907 (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":"");
Vince Busam667a5f12008-09-02 15:55:49 +02001908 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001909
1910 if (svc->flags & IP_VS_SVC_F_PERSISTENT)
1911 seq_printf(seq, "persistent %d %08X\n",
1912 svc->timeout,
1913 ntohl(svc->netmask));
1914 else
1915 seq_putc(seq, '\n');
1916
1917 list_for_each_entry(dest, &svc->destinations, n_list) {
Vince Busam667a5f12008-09-02 15:55:49 +02001918#ifdef CONFIG_IP_VS_IPV6
1919 if (dest->af == AF_INET6)
1920 seq_printf(seq,
Harvey Harrison5b095d9892008-10-29 12:52:50 -07001921 " -> [%pI6]:%04X"
Vince Busam667a5f12008-09-02 15:55:49 +02001922 " %-7s %-6d %-10d %-10d\n",
Harvey Harrison38ff4fa2008-10-28 16:08:13 -07001923 &dest->addr.in6,
Vince Busam667a5f12008-09-02 15:55:49 +02001924 ntohs(dest->port),
1925 ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
1926 atomic_read(&dest->weight),
1927 atomic_read(&dest->activeconns),
1928 atomic_read(&dest->inactconns));
1929 else
1930#endif
1931 seq_printf(seq,
1932 " -> %08X:%04X "
1933 "%-7s %-6d %-10d %-10d\n",
1934 ntohl(dest->addr.ip),
1935 ntohs(dest->port),
1936 ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
1937 atomic_read(&dest->weight),
1938 atomic_read(&dest->activeconns),
1939 atomic_read(&dest->inactconns));
1940
Linus Torvalds1da177e2005-04-16 15:20:36 -07001941 }
1942 }
1943 return 0;
1944}
1945
Philippe De Muyter56b3d972007-07-10 23:07:31 -07001946static const struct seq_operations ip_vs_info_seq_ops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001947 .start = ip_vs_info_seq_start,
1948 .next = ip_vs_info_seq_next,
1949 .stop = ip_vs_info_seq_stop,
1950 .show = ip_vs_info_seq_show,
1951};
1952
1953static int ip_vs_info_open(struct inode *inode, struct file *file)
1954{
Hans Schillstromfc723252011-01-03 14:44:43 +01001955 return seq_open_net(inode, file, &ip_vs_info_seq_ops,
Pavel Emelyanovcf7732e2007-10-10 02:29:29 -07001956 sizeof(struct ip_vs_iter));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001957}
1958
Arjan van de Ven9a321442007-02-12 00:55:35 -08001959static const struct file_operations ip_vs_info_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001960 .owner = THIS_MODULE,
1961 .open = ip_vs_info_open,
1962 .read = seq_read,
1963 .llseek = seq_lseek,
1964 .release = seq_release_private,
1965};
1966
1967#endif
1968
Linus Torvalds1da177e2005-04-16 15:20:36 -07001969#ifdef CONFIG_PROC_FS
1970static int ip_vs_stats_show(struct seq_file *seq, void *v)
1971{
Hans Schillstromb17fc992011-01-03 14:44:56 +01001972 struct net *net = seq_file_single_net(seq);
1973 struct ip_vs_stats *tot_stats = net_ipvs(net)->tot_stats;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001974
1975/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
1976 seq_puts(seq,
1977 " Total Incoming Outgoing Incoming Outgoing\n");
1978 seq_printf(seq,
1979 " Conns Packets Packets Bytes Bytes\n");
1980
Hans Schillstromb17fc992011-01-03 14:44:56 +01001981 spin_lock_bh(&tot_stats->lock);
1982 seq_printf(seq, "%8X %8X %8X %16LX %16LX\n\n", tot_stats->ustats.conns,
1983 tot_stats->ustats.inpkts, tot_stats->ustats.outpkts,
1984 (unsigned long long) tot_stats->ustats.inbytes,
1985 (unsigned long long) tot_stats->ustats.outbytes);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001986
1987/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
1988 seq_puts(seq,
1989 " Conns/s Pkts/s Pkts/s Bytes/s Bytes/s\n");
1990 seq_printf(seq,"%8X %8X %8X %16X %16X\n",
Hans Schillstromb17fc992011-01-03 14:44:56 +01001991 tot_stats->ustats.cps,
1992 tot_stats->ustats.inpps,
1993 tot_stats->ustats.outpps,
1994 tot_stats->ustats.inbps,
1995 tot_stats->ustats.outbps);
1996 spin_unlock_bh(&tot_stats->lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001997
1998 return 0;
1999}
2000
2001static int ip_vs_stats_seq_open(struct inode *inode, struct file *file)
2002{
Hans Schillstromfc723252011-01-03 14:44:43 +01002003 return single_open_net(inode, file, ip_vs_stats_show);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002004}
2005
Arjan van de Ven9a321442007-02-12 00:55:35 -08002006static const struct file_operations ip_vs_stats_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002007 .owner = THIS_MODULE,
2008 .open = ip_vs_stats_seq_open,
2009 .read = seq_read,
2010 .llseek = seq_lseek,
2011 .release = single_release,
2012};
2013
Hans Schillstromb17fc992011-01-03 14:44:56 +01002014static int ip_vs_stats_percpu_show(struct seq_file *seq, void *v)
2015{
2016 struct net *net = seq_file_single_net(seq);
2017 struct ip_vs_stats *tot_stats = net_ipvs(net)->tot_stats;
2018 int i;
2019
2020/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
2021 seq_puts(seq,
2022 " Total Incoming Outgoing Incoming Outgoing\n");
2023 seq_printf(seq,
2024 "CPU Conns Packets Packets Bytes Bytes\n");
2025
2026 for_each_possible_cpu(i) {
2027 struct ip_vs_cpu_stats *u = per_cpu_ptr(net->ipvs->cpustats, i);
2028 seq_printf(seq, "%3X %8X %8X %8X %16LX %16LX\n",
2029 i, u->ustats.conns, u->ustats.inpkts,
2030 u->ustats.outpkts, (__u64)u->ustats.inbytes,
2031 (__u64)u->ustats.outbytes);
2032 }
2033
2034 spin_lock_bh(&tot_stats->lock);
2035 seq_printf(seq, " ~ %8X %8X %8X %16LX %16LX\n\n",
2036 tot_stats->ustats.conns, tot_stats->ustats.inpkts,
2037 tot_stats->ustats.outpkts,
2038 (unsigned long long) tot_stats->ustats.inbytes,
2039 (unsigned long long) tot_stats->ustats.outbytes);
2040
2041/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
2042 seq_puts(seq,
2043 " Conns/s Pkts/s Pkts/s Bytes/s Bytes/s\n");
2044 seq_printf(seq, " %8X %8X %8X %16X %16X\n",
2045 tot_stats->ustats.cps,
2046 tot_stats->ustats.inpps,
2047 tot_stats->ustats.outpps,
2048 tot_stats->ustats.inbps,
2049 tot_stats->ustats.outbps);
2050 spin_unlock_bh(&tot_stats->lock);
2051
2052 return 0;
2053}
2054
2055static int ip_vs_stats_percpu_seq_open(struct inode *inode, struct file *file)
2056{
2057 return single_open_net(inode, file, ip_vs_stats_percpu_show);
2058}
2059
2060static const struct file_operations ip_vs_stats_percpu_fops = {
2061 .owner = THIS_MODULE,
2062 .open = ip_vs_stats_percpu_seq_open,
2063 .read = seq_read,
2064 .llseek = seq_lseek,
2065 .release = single_release,
2066};
Linus Torvalds1da177e2005-04-16 15:20:36 -07002067#endif
2068
2069/*
2070 * Set timeout values for tcp tcpfin udp in the timeout_table.
2071 */
Hans Schillstrom93304192011-01-03 14:44:51 +01002072static int ip_vs_set_timeout(struct net *net, struct ip_vs_timeout_user *u)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002073{
Hans Schillstrom93304192011-01-03 14:44:51 +01002074 struct ip_vs_proto_data *pd;
2075
Linus Torvalds1da177e2005-04-16 15:20:36 -07002076 IP_VS_DBG(2, "Setting timeout tcp:%d tcpfin:%d udp:%d\n",
2077 u->tcp_timeout,
2078 u->tcp_fin_timeout,
2079 u->udp_timeout);
2080
2081#ifdef CONFIG_IP_VS_PROTO_TCP
2082 if (u->tcp_timeout) {
Hans Schillstrom93304192011-01-03 14:44:51 +01002083 pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
2084 pd->timeout_table[IP_VS_TCP_S_ESTABLISHED]
Linus Torvalds1da177e2005-04-16 15:20:36 -07002085 = u->tcp_timeout * HZ;
2086 }
2087
2088 if (u->tcp_fin_timeout) {
Hans Schillstrom93304192011-01-03 14:44:51 +01002089 pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
2090 pd->timeout_table[IP_VS_TCP_S_FIN_WAIT]
Linus Torvalds1da177e2005-04-16 15:20:36 -07002091 = u->tcp_fin_timeout * HZ;
2092 }
2093#endif
2094
2095#ifdef CONFIG_IP_VS_PROTO_UDP
2096 if (u->udp_timeout) {
Hans Schillstrom93304192011-01-03 14:44:51 +01002097 pd = ip_vs_proto_data_get(net, IPPROTO_UDP);
2098 pd->timeout_table[IP_VS_UDP_S_NORMAL]
Linus Torvalds1da177e2005-04-16 15:20:36 -07002099 = u->udp_timeout * HZ;
2100 }
2101#endif
2102 return 0;
2103}
2104
2105
2106#define SET_CMDID(cmd) (cmd - IP_VS_BASE_CTL)
2107#define SERVICE_ARG_LEN (sizeof(struct ip_vs_service_user))
2108#define SVCDEST_ARG_LEN (sizeof(struct ip_vs_service_user) + \
2109 sizeof(struct ip_vs_dest_user))
2110#define TIMEOUT_ARG_LEN (sizeof(struct ip_vs_timeout_user))
2111#define DAEMON_ARG_LEN (sizeof(struct ip_vs_daemon_user))
2112#define MAX_ARG_LEN SVCDEST_ARG_LEN
2113
Arjan van de Ven9b5b5cf2005-11-29 16:21:38 -08002114static const unsigned char set_arglen[SET_CMDID(IP_VS_SO_SET_MAX)+1] = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002115 [SET_CMDID(IP_VS_SO_SET_ADD)] = SERVICE_ARG_LEN,
2116 [SET_CMDID(IP_VS_SO_SET_EDIT)] = SERVICE_ARG_LEN,
2117 [SET_CMDID(IP_VS_SO_SET_DEL)] = SERVICE_ARG_LEN,
2118 [SET_CMDID(IP_VS_SO_SET_FLUSH)] = 0,
2119 [SET_CMDID(IP_VS_SO_SET_ADDDEST)] = SVCDEST_ARG_LEN,
2120 [SET_CMDID(IP_VS_SO_SET_DELDEST)] = SVCDEST_ARG_LEN,
2121 [SET_CMDID(IP_VS_SO_SET_EDITDEST)] = SVCDEST_ARG_LEN,
2122 [SET_CMDID(IP_VS_SO_SET_TIMEOUT)] = TIMEOUT_ARG_LEN,
2123 [SET_CMDID(IP_VS_SO_SET_STARTDAEMON)] = DAEMON_ARG_LEN,
2124 [SET_CMDID(IP_VS_SO_SET_STOPDAEMON)] = DAEMON_ARG_LEN,
2125 [SET_CMDID(IP_VS_SO_SET_ZERO)] = SERVICE_ARG_LEN,
2126};
2127
Julius Volzc860c6b2008-09-02 15:55:36 +02002128static void ip_vs_copy_usvc_compat(struct ip_vs_service_user_kern *usvc,
2129 struct ip_vs_service_user *usvc_compat)
2130{
Simon Horman0d1e71b2010-08-22 21:37:54 +09002131 memset(usvc, 0, sizeof(*usvc));
2132
Julius Volzc860c6b2008-09-02 15:55:36 +02002133 usvc->af = AF_INET;
2134 usvc->protocol = usvc_compat->protocol;
2135 usvc->addr.ip = usvc_compat->addr;
2136 usvc->port = usvc_compat->port;
2137 usvc->fwmark = usvc_compat->fwmark;
2138
2139 /* Deep copy of sched_name is not needed here */
2140 usvc->sched_name = usvc_compat->sched_name;
2141
2142 usvc->flags = usvc_compat->flags;
2143 usvc->timeout = usvc_compat->timeout;
2144 usvc->netmask = usvc_compat->netmask;
2145}
2146
2147static void ip_vs_copy_udest_compat(struct ip_vs_dest_user_kern *udest,
2148 struct ip_vs_dest_user *udest_compat)
2149{
Simon Horman0d1e71b2010-08-22 21:37:54 +09002150 memset(udest, 0, sizeof(*udest));
2151
Julius Volzc860c6b2008-09-02 15:55:36 +02002152 udest->addr.ip = udest_compat->addr;
2153 udest->port = udest_compat->port;
2154 udest->conn_flags = udest_compat->conn_flags;
2155 udest->weight = udest_compat->weight;
2156 udest->u_threshold = udest_compat->u_threshold;
2157 udest->l_threshold = udest_compat->l_threshold;
2158}
2159
Linus Torvalds1da177e2005-04-16 15:20:36 -07002160static int
2161do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
2162{
Hans Schillstromfc723252011-01-03 14:44:43 +01002163 struct net *net = sock_net(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002164 int ret;
2165 unsigned char arg[MAX_ARG_LEN];
Julius Volzc860c6b2008-09-02 15:55:36 +02002166 struct ip_vs_service_user *usvc_compat;
2167 struct ip_vs_service_user_kern usvc;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002168 struct ip_vs_service *svc;
Julius Volzc860c6b2008-09-02 15:55:36 +02002169 struct ip_vs_dest_user *udest_compat;
2170 struct ip_vs_dest_user_kern udest;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002171
2172 if (!capable(CAP_NET_ADMIN))
2173 return -EPERM;
2174
Arjan van de Ven04bcef22010-01-04 16:37:12 +01002175 if (cmd < IP_VS_BASE_CTL || cmd > IP_VS_SO_SET_MAX)
2176 return -EINVAL;
2177 if (len < 0 || len > MAX_ARG_LEN)
2178 return -EINVAL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002179 if (len != set_arglen[SET_CMDID(cmd)]) {
Hannes Eder1e3e2382009-08-02 11:05:41 +00002180 pr_err("set_ctl: len %u != %u\n",
2181 len, set_arglen[SET_CMDID(cmd)]);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002182 return -EINVAL;
2183 }
2184
2185 if (copy_from_user(arg, user, len) != 0)
2186 return -EFAULT;
2187
2188 /* increase the module use count */
2189 ip_vs_use_count_inc();
2190
Ingo Molnar14cc3e22006-03-26 01:37:14 -08002191 if (mutex_lock_interruptible(&__ip_vs_mutex)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002192 ret = -ERESTARTSYS;
2193 goto out_dec;
2194 }
2195
2196 if (cmd == IP_VS_SO_SET_FLUSH) {
2197 /* Flush the virtual service */
Hans Schillstromfc723252011-01-03 14:44:43 +01002198 ret = ip_vs_flush(net);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002199 goto out_unlock;
2200 } else if (cmd == IP_VS_SO_SET_TIMEOUT) {
2201 /* Set timeout values for (tcp tcpfin udp) */
Hans Schillstrom93304192011-01-03 14:44:51 +01002202 ret = ip_vs_set_timeout(net, (struct ip_vs_timeout_user *)arg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002203 goto out_unlock;
2204 } else if (cmd == IP_VS_SO_SET_STARTDAEMON) {
2205 struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
Hans Schillstromf1313152011-01-03 14:44:55 +01002206 ret = start_sync_thread(net, dm->state, dm->mcast_ifn,
2207 dm->syncid);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002208 goto out_unlock;
2209 } else if (cmd == IP_VS_SO_SET_STOPDAEMON) {
2210 struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
Hans Schillstromf1313152011-01-03 14:44:55 +01002211 ret = stop_sync_thread(net, dm->state);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002212 goto out_unlock;
2213 }
2214
Julius Volzc860c6b2008-09-02 15:55:36 +02002215 usvc_compat = (struct ip_vs_service_user *)arg;
2216 udest_compat = (struct ip_vs_dest_user *)(usvc_compat + 1);
2217
2218 /* We only use the new structs internally, so copy userspace compat
2219 * structs to extended internal versions */
2220 ip_vs_copy_usvc_compat(&usvc, usvc_compat);
2221 ip_vs_copy_udest_compat(&udest, udest_compat);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002222
2223 if (cmd == IP_VS_SO_SET_ZERO) {
2224 /* if no service address is set, zero counters in all */
Julius Volzc860c6b2008-09-02 15:55:36 +02002225 if (!usvc.fwmark && !usvc.addr.ip && !usvc.port) {
Hans Schillstromfc723252011-01-03 14:44:43 +01002226 ret = ip_vs_zero_all(net);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002227 goto out_unlock;
2228 }
2229 }
2230
Venkata Mohan Reddy2906f662010-02-18 12:31:05 +01002231 /* Check for valid protocol: TCP or UDP or SCTP, even for fwmark!=0 */
2232 if (usvc.protocol != IPPROTO_TCP && usvc.protocol != IPPROTO_UDP &&
2233 usvc.protocol != IPPROTO_SCTP) {
Hannes Eder1e3e2382009-08-02 11:05:41 +00002234 pr_err("set_ctl: invalid protocol: %d %pI4:%d %s\n",
2235 usvc.protocol, &usvc.addr.ip,
2236 ntohs(usvc.port), usvc.sched_name);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002237 ret = -EFAULT;
2238 goto out_unlock;
2239 }
2240
2241 /* Lookup the exact service by <protocol, addr, port> or fwmark */
Julius Volzc860c6b2008-09-02 15:55:36 +02002242 if (usvc.fwmark == 0)
Hans Schillstromfc723252011-01-03 14:44:43 +01002243 svc = __ip_vs_service_find(net, usvc.af, usvc.protocol,
Julian Anastasov26c15cf2010-09-21 18:12:30 +02002244 &usvc.addr, usvc.port);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002245 else
Hans Schillstromfc723252011-01-03 14:44:43 +01002246 svc = __ip_vs_svc_fwm_find(net, usvc.af, usvc.fwmark);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002247
2248 if (cmd != IP_VS_SO_SET_ADD
Julius Volzc860c6b2008-09-02 15:55:36 +02002249 && (svc == NULL || svc->protocol != usvc.protocol)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002250 ret = -ESRCH;
Julian Anastasov26c15cf2010-09-21 18:12:30 +02002251 goto out_unlock;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002252 }
2253
2254 switch (cmd) {
2255 case IP_VS_SO_SET_ADD:
2256 if (svc != NULL)
2257 ret = -EEXIST;
2258 else
Hans Schillstromfc723252011-01-03 14:44:43 +01002259 ret = ip_vs_add_service(net, &usvc, &svc);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002260 break;
2261 case IP_VS_SO_SET_EDIT:
Julius Volzc860c6b2008-09-02 15:55:36 +02002262 ret = ip_vs_edit_service(svc, &usvc);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002263 break;
2264 case IP_VS_SO_SET_DEL:
2265 ret = ip_vs_del_service(svc);
2266 if (!ret)
2267 goto out_unlock;
2268 break;
2269 case IP_VS_SO_SET_ZERO:
2270 ret = ip_vs_zero_service(svc);
2271 break;
2272 case IP_VS_SO_SET_ADDDEST:
Julius Volzc860c6b2008-09-02 15:55:36 +02002273 ret = ip_vs_add_dest(svc, &udest);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002274 break;
2275 case IP_VS_SO_SET_EDITDEST:
Julius Volzc860c6b2008-09-02 15:55:36 +02002276 ret = ip_vs_edit_dest(svc, &udest);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002277 break;
2278 case IP_VS_SO_SET_DELDEST:
Julius Volzc860c6b2008-09-02 15:55:36 +02002279 ret = ip_vs_del_dest(svc, &udest);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002280 break;
2281 default:
2282 ret = -EINVAL;
2283 }
2284
Linus Torvalds1da177e2005-04-16 15:20:36 -07002285 out_unlock:
Ingo Molnar14cc3e22006-03-26 01:37:14 -08002286 mutex_unlock(&__ip_vs_mutex);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002287 out_dec:
2288 /* decrease the module use count */
2289 ip_vs_use_count_dec();
2290
2291 return ret;
2292}
2293
2294
2295static void
2296ip_vs_copy_stats(struct ip_vs_stats_user *dst, struct ip_vs_stats *src)
2297{
2298 spin_lock_bh(&src->lock);
Sven Wegenere9c0ce22008-09-08 13:39:04 +02002299 memcpy(dst, &src->ustats, sizeof(*dst));
Linus Torvalds1da177e2005-04-16 15:20:36 -07002300 spin_unlock_bh(&src->lock);
2301}
2302
2303static void
2304ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src)
2305{
2306 dst->protocol = src->protocol;
Julius Volze7ade462008-09-02 15:55:33 +02002307 dst->addr = src->addr.ip;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002308 dst->port = src->port;
2309 dst->fwmark = src->fwmark;
pageexec4da62fc2005-06-26 16:00:19 -07002310 strlcpy(dst->sched_name, src->scheduler->name, sizeof(dst->sched_name));
Linus Torvalds1da177e2005-04-16 15:20:36 -07002311 dst->flags = src->flags;
2312 dst->timeout = src->timeout / HZ;
2313 dst->netmask = src->netmask;
2314 dst->num_dests = src->num_dests;
2315 ip_vs_copy_stats(&dst->stats, &src->stats);
2316}
2317
2318static inline int
Hans Schillstromfc723252011-01-03 14:44:43 +01002319__ip_vs_get_service_entries(struct net *net,
2320 const struct ip_vs_get_services *get,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002321 struct ip_vs_get_services __user *uptr)
2322{
2323 int idx, count=0;
2324 struct ip_vs_service *svc;
2325 struct ip_vs_service_entry entry;
2326 int ret = 0;
2327
2328 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2329 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
Julius Volzf94fd042008-09-02 15:55:55 +02002330 /* Only expose IPv4 entries to old interface */
Hans Schillstromfc723252011-01-03 14:44:43 +01002331 if (svc->af != AF_INET || !net_eq(svc->net, net))
Julius Volzf94fd042008-09-02 15:55:55 +02002332 continue;
2333
Linus Torvalds1da177e2005-04-16 15:20:36 -07002334 if (count >= get->num_services)
2335 goto out;
pageexec4da62fc2005-06-26 16:00:19 -07002336 memset(&entry, 0, sizeof(entry));
Linus Torvalds1da177e2005-04-16 15:20:36 -07002337 ip_vs_copy_service(&entry, svc);
2338 if (copy_to_user(&uptr->entrytable[count],
2339 &entry, sizeof(entry))) {
2340 ret = -EFAULT;
2341 goto out;
2342 }
2343 count++;
2344 }
2345 }
2346
2347 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2348 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
Julius Volzf94fd042008-09-02 15:55:55 +02002349 /* Only expose IPv4 entries to old interface */
Hans Schillstromfc723252011-01-03 14:44:43 +01002350 if (svc->af != AF_INET || !net_eq(svc->net, net))
Julius Volzf94fd042008-09-02 15:55:55 +02002351 continue;
2352
Linus Torvalds1da177e2005-04-16 15:20:36 -07002353 if (count >= get->num_services)
2354 goto out;
pageexec4da62fc2005-06-26 16:00:19 -07002355 memset(&entry, 0, sizeof(entry));
Linus Torvalds1da177e2005-04-16 15:20:36 -07002356 ip_vs_copy_service(&entry, svc);
2357 if (copy_to_user(&uptr->entrytable[count],
2358 &entry, sizeof(entry))) {
2359 ret = -EFAULT;
2360 goto out;
2361 }
2362 count++;
2363 }
2364 }
2365 out:
2366 return ret;
2367}
2368
2369static inline int
Hans Schillstromfc723252011-01-03 14:44:43 +01002370__ip_vs_get_dest_entries(struct net *net, const struct ip_vs_get_dests *get,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002371 struct ip_vs_get_dests __user *uptr)
2372{
2373 struct ip_vs_service *svc;
Julius Volzb18610d2008-09-02 15:55:37 +02002374 union nf_inet_addr addr = { .ip = get->addr };
Linus Torvalds1da177e2005-04-16 15:20:36 -07002375 int ret = 0;
2376
2377 if (get->fwmark)
Hans Schillstromfc723252011-01-03 14:44:43 +01002378 svc = __ip_vs_svc_fwm_find(net, AF_INET, get->fwmark);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002379 else
Hans Schillstromfc723252011-01-03 14:44:43 +01002380 svc = __ip_vs_service_find(net, AF_INET, get->protocol, &addr,
Julian Anastasov26c15cf2010-09-21 18:12:30 +02002381 get->port);
Julius Volzb18610d2008-09-02 15:55:37 +02002382
Linus Torvalds1da177e2005-04-16 15:20:36 -07002383 if (svc) {
2384 int count = 0;
2385 struct ip_vs_dest *dest;
2386 struct ip_vs_dest_entry entry;
2387
2388 list_for_each_entry(dest, &svc->destinations, n_list) {
2389 if (count >= get->num_dests)
2390 break;
2391
Julius Volze7ade462008-09-02 15:55:33 +02002392 entry.addr = dest->addr.ip;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002393 entry.port = dest->port;
2394 entry.conn_flags = atomic_read(&dest->conn_flags);
2395 entry.weight = atomic_read(&dest->weight);
2396 entry.u_threshold = dest->u_threshold;
2397 entry.l_threshold = dest->l_threshold;
2398 entry.activeconns = atomic_read(&dest->activeconns);
2399 entry.inactconns = atomic_read(&dest->inactconns);
2400 entry.persistconns = atomic_read(&dest->persistconns);
2401 ip_vs_copy_stats(&entry.stats, &dest->stats);
2402 if (copy_to_user(&uptr->entrytable[count],
2403 &entry, sizeof(entry))) {
2404 ret = -EFAULT;
2405 break;
2406 }
2407 count++;
2408 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002409 } else
2410 ret = -ESRCH;
2411 return ret;
2412}
2413
2414static inline void
Hans Schillstrom93304192011-01-03 14:44:51 +01002415__ip_vs_get_timeouts(struct net *net, struct ip_vs_timeout_user *u)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002416{
Hans Schillstrom93304192011-01-03 14:44:51 +01002417 struct ip_vs_proto_data *pd;
2418
Linus Torvalds1da177e2005-04-16 15:20:36 -07002419#ifdef CONFIG_IP_VS_PROTO_TCP
Hans Schillstrom93304192011-01-03 14:44:51 +01002420 pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
2421 u->tcp_timeout = pd->timeout_table[IP_VS_TCP_S_ESTABLISHED] / HZ;
2422 u->tcp_fin_timeout = pd->timeout_table[IP_VS_TCP_S_FIN_WAIT] / HZ;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002423#endif
2424#ifdef CONFIG_IP_VS_PROTO_UDP
Hans Schillstrom93304192011-01-03 14:44:51 +01002425 pd = ip_vs_proto_data_get(net, IPPROTO_UDP);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002426 u->udp_timeout =
Hans Schillstrom93304192011-01-03 14:44:51 +01002427 pd->timeout_table[IP_VS_UDP_S_NORMAL] / HZ;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002428#endif
2429}
2430
2431
2432#define GET_CMDID(cmd) (cmd - IP_VS_BASE_CTL)
2433#define GET_INFO_ARG_LEN (sizeof(struct ip_vs_getinfo))
2434#define GET_SERVICES_ARG_LEN (sizeof(struct ip_vs_get_services))
2435#define GET_SERVICE_ARG_LEN (sizeof(struct ip_vs_service_entry))
2436#define GET_DESTS_ARG_LEN (sizeof(struct ip_vs_get_dests))
2437#define GET_TIMEOUT_ARG_LEN (sizeof(struct ip_vs_timeout_user))
2438#define GET_DAEMON_ARG_LEN (sizeof(struct ip_vs_daemon_user) * 2)
2439
Arjan van de Ven9b5b5cf2005-11-29 16:21:38 -08002440static const unsigned char get_arglen[GET_CMDID(IP_VS_SO_GET_MAX)+1] = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002441 [GET_CMDID(IP_VS_SO_GET_VERSION)] = 64,
2442 [GET_CMDID(IP_VS_SO_GET_INFO)] = GET_INFO_ARG_LEN,
2443 [GET_CMDID(IP_VS_SO_GET_SERVICES)] = GET_SERVICES_ARG_LEN,
2444 [GET_CMDID(IP_VS_SO_GET_SERVICE)] = GET_SERVICE_ARG_LEN,
2445 [GET_CMDID(IP_VS_SO_GET_DESTS)] = GET_DESTS_ARG_LEN,
2446 [GET_CMDID(IP_VS_SO_GET_TIMEOUT)] = GET_TIMEOUT_ARG_LEN,
2447 [GET_CMDID(IP_VS_SO_GET_DAEMON)] = GET_DAEMON_ARG_LEN,
2448};
2449
2450static int
2451do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
2452{
2453 unsigned char arg[128];
2454 int ret = 0;
Arjan van de Ven04bcef22010-01-04 16:37:12 +01002455 unsigned int copylen;
Hans Schillstromfc723252011-01-03 14:44:43 +01002456 struct net *net = sock_net(sk);
Hans Schillstromf1313152011-01-03 14:44:55 +01002457 struct netns_ipvs *ipvs = net_ipvs(net);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002458
Hans Schillstromfc723252011-01-03 14:44:43 +01002459 BUG_ON(!net);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002460 if (!capable(CAP_NET_ADMIN))
2461 return -EPERM;
2462
Arjan van de Ven04bcef22010-01-04 16:37:12 +01002463 if (cmd < IP_VS_BASE_CTL || cmd > IP_VS_SO_GET_MAX)
2464 return -EINVAL;
2465
Linus Torvalds1da177e2005-04-16 15:20:36 -07002466 if (*len < get_arglen[GET_CMDID(cmd)]) {
Hannes Eder1e3e2382009-08-02 11:05:41 +00002467 pr_err("get_ctl: len %u < %u\n",
2468 *len, get_arglen[GET_CMDID(cmd)]);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002469 return -EINVAL;
2470 }
2471
Arjan van de Ven04bcef22010-01-04 16:37:12 +01002472 copylen = get_arglen[GET_CMDID(cmd)];
2473 if (copylen > 128)
2474 return -EINVAL;
2475
2476 if (copy_from_user(arg, user, copylen) != 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002477 return -EFAULT;
2478
Ingo Molnar14cc3e22006-03-26 01:37:14 -08002479 if (mutex_lock_interruptible(&__ip_vs_mutex))
Linus Torvalds1da177e2005-04-16 15:20:36 -07002480 return -ERESTARTSYS;
2481
2482 switch (cmd) {
2483 case IP_VS_SO_GET_VERSION:
2484 {
2485 char buf[64];
2486
2487 sprintf(buf, "IP Virtual Server version %d.%d.%d (size=%d)",
Catalin(ux) M. BOIE6f7edb42010-01-05 05:50:24 +01002488 NVERSION(IP_VS_VERSION_CODE), ip_vs_conn_tab_size);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002489 if (copy_to_user(user, buf, strlen(buf)+1) != 0) {
2490 ret = -EFAULT;
2491 goto out;
2492 }
2493 *len = strlen(buf)+1;
2494 }
2495 break;
2496
2497 case IP_VS_SO_GET_INFO:
2498 {
2499 struct ip_vs_getinfo info;
2500 info.version = IP_VS_VERSION_CODE;
Catalin(ux) M. BOIE6f7edb42010-01-05 05:50:24 +01002501 info.size = ip_vs_conn_tab_size;
Hans Schillstroma0840e22011-01-03 14:44:58 +01002502 info.num_services = ipvs->num_services;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002503 if (copy_to_user(user, &info, sizeof(info)) != 0)
2504 ret = -EFAULT;
2505 }
2506 break;
2507
2508 case IP_VS_SO_GET_SERVICES:
2509 {
2510 struct ip_vs_get_services *get;
2511 int size;
2512
2513 get = (struct ip_vs_get_services *)arg;
2514 size = sizeof(*get) +
2515 sizeof(struct ip_vs_service_entry) * get->num_services;
2516 if (*len != size) {
Hannes Eder1e3e2382009-08-02 11:05:41 +00002517 pr_err("length: %u != %u\n", *len, size);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002518 ret = -EINVAL;
2519 goto out;
2520 }
Hans Schillstromfc723252011-01-03 14:44:43 +01002521 ret = __ip_vs_get_service_entries(net, get, user);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002522 }
2523 break;
2524
2525 case IP_VS_SO_GET_SERVICE:
2526 {
2527 struct ip_vs_service_entry *entry;
2528 struct ip_vs_service *svc;
Julius Volzb18610d2008-09-02 15:55:37 +02002529 union nf_inet_addr addr;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002530
2531 entry = (struct ip_vs_service_entry *)arg;
Julius Volzb18610d2008-09-02 15:55:37 +02002532 addr.ip = entry->addr;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002533 if (entry->fwmark)
Hans Schillstromfc723252011-01-03 14:44:43 +01002534 svc = __ip_vs_svc_fwm_find(net, AF_INET, entry->fwmark);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002535 else
Hans Schillstromfc723252011-01-03 14:44:43 +01002536 svc = __ip_vs_service_find(net, AF_INET,
2537 entry->protocol, &addr,
2538 entry->port);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002539 if (svc) {
2540 ip_vs_copy_service(entry, svc);
2541 if (copy_to_user(user, entry, sizeof(*entry)) != 0)
2542 ret = -EFAULT;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002543 } else
2544 ret = -ESRCH;
2545 }
2546 break;
2547
2548 case IP_VS_SO_GET_DESTS:
2549 {
2550 struct ip_vs_get_dests *get;
2551 int size;
2552
2553 get = (struct ip_vs_get_dests *)arg;
2554 size = sizeof(*get) +
2555 sizeof(struct ip_vs_dest_entry) * get->num_dests;
2556 if (*len != size) {
Hannes Eder1e3e2382009-08-02 11:05:41 +00002557 pr_err("length: %u != %u\n", *len, size);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002558 ret = -EINVAL;
2559 goto out;
2560 }
Hans Schillstromfc723252011-01-03 14:44:43 +01002561 ret = __ip_vs_get_dest_entries(net, get, user);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002562 }
2563 break;
2564
2565 case IP_VS_SO_GET_TIMEOUT:
2566 {
2567 struct ip_vs_timeout_user t;
2568
Hans Schillstrom93304192011-01-03 14:44:51 +01002569 __ip_vs_get_timeouts(net, &t);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002570 if (copy_to_user(user, &t, sizeof(t)) != 0)
2571 ret = -EFAULT;
2572 }
2573 break;
2574
2575 case IP_VS_SO_GET_DAEMON:
2576 {
2577 struct ip_vs_daemon_user d[2];
2578
2579 memset(&d, 0, sizeof(d));
Hans Schillstromf1313152011-01-03 14:44:55 +01002580 if (ipvs->sync_state & IP_VS_STATE_MASTER) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002581 d[0].state = IP_VS_STATE_MASTER;
Hans Schillstromf1313152011-01-03 14:44:55 +01002582 strlcpy(d[0].mcast_ifn, ipvs->master_mcast_ifn,
2583 sizeof(d[0].mcast_ifn));
2584 d[0].syncid = ipvs->master_syncid;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002585 }
Hans Schillstromf1313152011-01-03 14:44:55 +01002586 if (ipvs->sync_state & IP_VS_STATE_BACKUP) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002587 d[1].state = IP_VS_STATE_BACKUP;
Hans Schillstromf1313152011-01-03 14:44:55 +01002588 strlcpy(d[1].mcast_ifn, ipvs->backup_mcast_ifn,
2589 sizeof(d[1].mcast_ifn));
2590 d[1].syncid = ipvs->backup_syncid;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002591 }
2592 if (copy_to_user(user, &d, sizeof(d)) != 0)
2593 ret = -EFAULT;
2594 }
2595 break;
2596
2597 default:
2598 ret = -EINVAL;
2599 }
2600
2601 out:
Ingo Molnar14cc3e22006-03-26 01:37:14 -08002602 mutex_unlock(&__ip_vs_mutex);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002603 return ret;
2604}
2605
2606
2607static struct nf_sockopt_ops ip_vs_sockopts = {
2608 .pf = PF_INET,
2609 .set_optmin = IP_VS_BASE_CTL,
2610 .set_optmax = IP_VS_SO_SET_MAX+1,
2611 .set = do_ip_vs_set_ctl,
2612 .get_optmin = IP_VS_BASE_CTL,
2613 .get_optmax = IP_VS_SO_GET_MAX+1,
2614 .get = do_ip_vs_get_ctl,
Neil Horman16fcec32007-09-11 11:28:26 +02002615 .owner = THIS_MODULE,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002616};
2617
Julius Volz9a812192008-08-14 14:08:44 +02002618/*
2619 * Generic Netlink interface
2620 */
2621
2622/* IPVS genetlink family */
2623static struct genl_family ip_vs_genl_family = {
2624 .id = GENL_ID_GENERATE,
2625 .hdrsize = 0,
2626 .name = IPVS_GENL_NAME,
2627 .version = IPVS_GENL_VERSION,
2628 .maxattr = IPVS_CMD_MAX,
2629};
2630
2631/* Policy used for first-level command attributes */
2632static const struct nla_policy ip_vs_cmd_policy[IPVS_CMD_ATTR_MAX + 1] = {
2633 [IPVS_CMD_ATTR_SERVICE] = { .type = NLA_NESTED },
2634 [IPVS_CMD_ATTR_DEST] = { .type = NLA_NESTED },
2635 [IPVS_CMD_ATTR_DAEMON] = { .type = NLA_NESTED },
2636 [IPVS_CMD_ATTR_TIMEOUT_TCP] = { .type = NLA_U32 },
2637 [IPVS_CMD_ATTR_TIMEOUT_TCP_FIN] = { .type = NLA_U32 },
2638 [IPVS_CMD_ATTR_TIMEOUT_UDP] = { .type = NLA_U32 },
2639};
2640
2641/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DAEMON */
2642static const struct nla_policy ip_vs_daemon_policy[IPVS_DAEMON_ATTR_MAX + 1] = {
2643 [IPVS_DAEMON_ATTR_STATE] = { .type = NLA_U32 },
2644 [IPVS_DAEMON_ATTR_MCAST_IFN] = { .type = NLA_NUL_STRING,
2645 .len = IP_VS_IFNAME_MAXLEN },
2646 [IPVS_DAEMON_ATTR_SYNC_ID] = { .type = NLA_U32 },
2647};
2648
2649/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_SERVICE */
2650static const struct nla_policy ip_vs_svc_policy[IPVS_SVC_ATTR_MAX + 1] = {
2651 [IPVS_SVC_ATTR_AF] = { .type = NLA_U16 },
2652 [IPVS_SVC_ATTR_PROTOCOL] = { .type = NLA_U16 },
2653 [IPVS_SVC_ATTR_ADDR] = { .type = NLA_BINARY,
2654 .len = sizeof(union nf_inet_addr) },
2655 [IPVS_SVC_ATTR_PORT] = { .type = NLA_U16 },
2656 [IPVS_SVC_ATTR_FWMARK] = { .type = NLA_U32 },
2657 [IPVS_SVC_ATTR_SCHED_NAME] = { .type = NLA_NUL_STRING,
2658 .len = IP_VS_SCHEDNAME_MAXLEN },
Simon Horman0d1e71b2010-08-22 21:37:54 +09002659 [IPVS_SVC_ATTR_PE_NAME] = { .type = NLA_NUL_STRING,
2660 .len = IP_VS_PENAME_MAXLEN },
Julius Volz9a812192008-08-14 14:08:44 +02002661 [IPVS_SVC_ATTR_FLAGS] = { .type = NLA_BINARY,
2662 .len = sizeof(struct ip_vs_flags) },
2663 [IPVS_SVC_ATTR_TIMEOUT] = { .type = NLA_U32 },
2664 [IPVS_SVC_ATTR_NETMASK] = { .type = NLA_U32 },
2665 [IPVS_SVC_ATTR_STATS] = { .type = NLA_NESTED },
2666};
2667
2668/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DEST */
2669static const struct nla_policy ip_vs_dest_policy[IPVS_DEST_ATTR_MAX + 1] = {
2670 [IPVS_DEST_ATTR_ADDR] = { .type = NLA_BINARY,
2671 .len = sizeof(union nf_inet_addr) },
2672 [IPVS_DEST_ATTR_PORT] = { .type = NLA_U16 },
2673 [IPVS_DEST_ATTR_FWD_METHOD] = { .type = NLA_U32 },
2674 [IPVS_DEST_ATTR_WEIGHT] = { .type = NLA_U32 },
2675 [IPVS_DEST_ATTR_U_THRESH] = { .type = NLA_U32 },
2676 [IPVS_DEST_ATTR_L_THRESH] = { .type = NLA_U32 },
2677 [IPVS_DEST_ATTR_ACTIVE_CONNS] = { .type = NLA_U32 },
2678 [IPVS_DEST_ATTR_INACT_CONNS] = { .type = NLA_U32 },
2679 [IPVS_DEST_ATTR_PERSIST_CONNS] = { .type = NLA_U32 },
2680 [IPVS_DEST_ATTR_STATS] = { .type = NLA_NESTED },
2681};
2682
2683static int ip_vs_genl_fill_stats(struct sk_buff *skb, int container_type,
2684 struct ip_vs_stats *stats)
2685{
2686 struct nlattr *nl_stats = nla_nest_start(skb, container_type);
2687 if (!nl_stats)
2688 return -EMSGSIZE;
2689
2690 spin_lock_bh(&stats->lock);
2691
Sven Wegenere9c0ce22008-09-08 13:39:04 +02002692 NLA_PUT_U32(skb, IPVS_STATS_ATTR_CONNS, stats->ustats.conns);
2693 NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPKTS, stats->ustats.inpkts);
2694 NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPKTS, stats->ustats.outpkts);
2695 NLA_PUT_U64(skb, IPVS_STATS_ATTR_INBYTES, stats->ustats.inbytes);
2696 NLA_PUT_U64(skb, IPVS_STATS_ATTR_OUTBYTES, stats->ustats.outbytes);
2697 NLA_PUT_U32(skb, IPVS_STATS_ATTR_CPS, stats->ustats.cps);
2698 NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPPS, stats->ustats.inpps);
2699 NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPPS, stats->ustats.outpps);
2700 NLA_PUT_U32(skb, IPVS_STATS_ATTR_INBPS, stats->ustats.inbps);
2701 NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTBPS, stats->ustats.outbps);
Julius Volz9a812192008-08-14 14:08:44 +02002702
2703 spin_unlock_bh(&stats->lock);
2704
2705 nla_nest_end(skb, nl_stats);
2706
2707 return 0;
2708
2709nla_put_failure:
2710 spin_unlock_bh(&stats->lock);
2711 nla_nest_cancel(skb, nl_stats);
2712 return -EMSGSIZE;
2713}
2714
2715static int ip_vs_genl_fill_service(struct sk_buff *skb,
2716 struct ip_vs_service *svc)
2717{
2718 struct nlattr *nl_service;
2719 struct ip_vs_flags flags = { .flags = svc->flags,
2720 .mask = ~0 };
2721
2722 nl_service = nla_nest_start(skb, IPVS_CMD_ATTR_SERVICE);
2723 if (!nl_service)
2724 return -EMSGSIZE;
2725
Julius Volzf94fd042008-09-02 15:55:55 +02002726 NLA_PUT_U16(skb, IPVS_SVC_ATTR_AF, svc->af);
Julius Volz9a812192008-08-14 14:08:44 +02002727
2728 if (svc->fwmark) {
2729 NLA_PUT_U32(skb, IPVS_SVC_ATTR_FWMARK, svc->fwmark);
2730 } else {
2731 NLA_PUT_U16(skb, IPVS_SVC_ATTR_PROTOCOL, svc->protocol);
2732 NLA_PUT(skb, IPVS_SVC_ATTR_ADDR, sizeof(svc->addr), &svc->addr);
2733 NLA_PUT_U16(skb, IPVS_SVC_ATTR_PORT, svc->port);
2734 }
2735
2736 NLA_PUT_STRING(skb, IPVS_SVC_ATTR_SCHED_NAME, svc->scheduler->name);
Simon Horman0d1e71b2010-08-22 21:37:54 +09002737 if (svc->pe)
2738 NLA_PUT_STRING(skb, IPVS_SVC_ATTR_PE_NAME, svc->pe->name);
Julius Volz9a812192008-08-14 14:08:44 +02002739 NLA_PUT(skb, IPVS_SVC_ATTR_FLAGS, sizeof(flags), &flags);
2740 NLA_PUT_U32(skb, IPVS_SVC_ATTR_TIMEOUT, svc->timeout / HZ);
2741 NLA_PUT_U32(skb, IPVS_SVC_ATTR_NETMASK, svc->netmask);
2742
2743 if (ip_vs_genl_fill_stats(skb, IPVS_SVC_ATTR_STATS, &svc->stats))
2744 goto nla_put_failure;
2745
2746 nla_nest_end(skb, nl_service);
2747
2748 return 0;
2749
2750nla_put_failure:
2751 nla_nest_cancel(skb, nl_service);
2752 return -EMSGSIZE;
2753}
2754
2755static int ip_vs_genl_dump_service(struct sk_buff *skb,
2756 struct ip_vs_service *svc,
2757 struct netlink_callback *cb)
2758{
2759 void *hdr;
2760
2761 hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
2762 &ip_vs_genl_family, NLM_F_MULTI,
2763 IPVS_CMD_NEW_SERVICE);
2764 if (!hdr)
2765 return -EMSGSIZE;
2766
2767 if (ip_vs_genl_fill_service(skb, svc) < 0)
2768 goto nla_put_failure;
2769
2770 return genlmsg_end(skb, hdr);
2771
2772nla_put_failure:
2773 genlmsg_cancel(skb, hdr);
2774 return -EMSGSIZE;
2775}
2776
2777static int ip_vs_genl_dump_services(struct sk_buff *skb,
2778 struct netlink_callback *cb)
2779{
2780 int idx = 0, i;
2781 int start = cb->args[0];
2782 struct ip_vs_service *svc;
Hans Schillstromfc723252011-01-03 14:44:43 +01002783 struct net *net = skb_sknet(skb);
Julius Volz9a812192008-08-14 14:08:44 +02002784
2785 mutex_lock(&__ip_vs_mutex);
2786 for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
2787 list_for_each_entry(svc, &ip_vs_svc_table[i], s_list) {
Hans Schillstromfc723252011-01-03 14:44:43 +01002788 if (++idx <= start || !net_eq(svc->net, net))
Julius Volz9a812192008-08-14 14:08:44 +02002789 continue;
2790 if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
2791 idx--;
2792 goto nla_put_failure;
2793 }
2794 }
2795 }
2796
2797 for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
2798 list_for_each_entry(svc, &ip_vs_svc_fwm_table[i], f_list) {
Hans Schillstromfc723252011-01-03 14:44:43 +01002799 if (++idx <= start || !net_eq(svc->net, net))
Julius Volz9a812192008-08-14 14:08:44 +02002800 continue;
2801 if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
2802 idx--;
2803 goto nla_put_failure;
2804 }
2805 }
2806 }
2807
2808nla_put_failure:
2809 mutex_unlock(&__ip_vs_mutex);
2810 cb->args[0] = idx;
2811
2812 return skb->len;
2813}
2814
Hans Schillstromfc723252011-01-03 14:44:43 +01002815static int ip_vs_genl_parse_service(struct net *net,
2816 struct ip_vs_service_user_kern *usvc,
Julian Anastasov26c15cf2010-09-21 18:12:30 +02002817 struct nlattr *nla, int full_entry,
2818 struct ip_vs_service **ret_svc)
Julius Volz9a812192008-08-14 14:08:44 +02002819{
2820 struct nlattr *attrs[IPVS_SVC_ATTR_MAX + 1];
2821 struct nlattr *nla_af, *nla_port, *nla_fwmark, *nla_protocol, *nla_addr;
Julian Anastasov26c15cf2010-09-21 18:12:30 +02002822 struct ip_vs_service *svc;
Julius Volz9a812192008-08-14 14:08:44 +02002823
2824 /* Parse mandatory identifying service fields first */
2825 if (nla == NULL ||
2826 nla_parse_nested(attrs, IPVS_SVC_ATTR_MAX, nla, ip_vs_svc_policy))
2827 return -EINVAL;
2828
2829 nla_af = attrs[IPVS_SVC_ATTR_AF];
2830 nla_protocol = attrs[IPVS_SVC_ATTR_PROTOCOL];
2831 nla_addr = attrs[IPVS_SVC_ATTR_ADDR];
2832 nla_port = attrs[IPVS_SVC_ATTR_PORT];
2833 nla_fwmark = attrs[IPVS_SVC_ATTR_FWMARK];
2834
2835 if (!(nla_af && (nla_fwmark || (nla_port && nla_protocol && nla_addr))))
2836 return -EINVAL;
2837
Simon Horman258c8892009-12-15 17:01:25 +01002838 memset(usvc, 0, sizeof(*usvc));
2839
Julius Volzc860c6b2008-09-02 15:55:36 +02002840 usvc->af = nla_get_u16(nla_af);
Julius Volzf94fd042008-09-02 15:55:55 +02002841#ifdef CONFIG_IP_VS_IPV6
2842 if (usvc->af != AF_INET && usvc->af != AF_INET6)
2843#else
2844 if (usvc->af != AF_INET)
2845#endif
Julius Volz9a812192008-08-14 14:08:44 +02002846 return -EAFNOSUPPORT;
2847
2848 if (nla_fwmark) {
2849 usvc->protocol = IPPROTO_TCP;
2850 usvc->fwmark = nla_get_u32(nla_fwmark);
2851 } else {
2852 usvc->protocol = nla_get_u16(nla_protocol);
2853 nla_memcpy(&usvc->addr, nla_addr, sizeof(usvc->addr));
2854 usvc->port = nla_get_u16(nla_port);
2855 usvc->fwmark = 0;
2856 }
2857
Julian Anastasov26c15cf2010-09-21 18:12:30 +02002858 if (usvc->fwmark)
Hans Schillstromfc723252011-01-03 14:44:43 +01002859 svc = __ip_vs_svc_fwm_find(net, usvc->af, usvc->fwmark);
Julian Anastasov26c15cf2010-09-21 18:12:30 +02002860 else
Hans Schillstromfc723252011-01-03 14:44:43 +01002861 svc = __ip_vs_service_find(net, usvc->af, usvc->protocol,
Julian Anastasov26c15cf2010-09-21 18:12:30 +02002862 &usvc->addr, usvc->port);
2863 *ret_svc = svc;
2864
Julius Volz9a812192008-08-14 14:08:44 +02002865 /* If a full entry was requested, check for the additional fields */
2866 if (full_entry) {
Simon Horman0d1e71b2010-08-22 21:37:54 +09002867 struct nlattr *nla_sched, *nla_flags, *nla_pe, *nla_timeout,
Julius Volz9a812192008-08-14 14:08:44 +02002868 *nla_netmask;
2869 struct ip_vs_flags flags;
Julius Volz9a812192008-08-14 14:08:44 +02002870
2871 nla_sched = attrs[IPVS_SVC_ATTR_SCHED_NAME];
Simon Horman0d1e71b2010-08-22 21:37:54 +09002872 nla_pe = attrs[IPVS_SVC_ATTR_PE_NAME];
Julius Volz9a812192008-08-14 14:08:44 +02002873 nla_flags = attrs[IPVS_SVC_ATTR_FLAGS];
2874 nla_timeout = attrs[IPVS_SVC_ATTR_TIMEOUT];
2875 nla_netmask = attrs[IPVS_SVC_ATTR_NETMASK];
2876
2877 if (!(nla_sched && nla_flags && nla_timeout && nla_netmask))
2878 return -EINVAL;
2879
2880 nla_memcpy(&flags, nla_flags, sizeof(flags));
2881
2882 /* prefill flags from service if it already exists */
Julian Anastasov26c15cf2010-09-21 18:12:30 +02002883 if (svc)
Julius Volz9a812192008-08-14 14:08:44 +02002884 usvc->flags = svc->flags;
Julius Volz9a812192008-08-14 14:08:44 +02002885
2886 /* set new flags from userland */
2887 usvc->flags = (usvc->flags & ~flags.mask) |
2888 (flags.flags & flags.mask);
Julius Volzc860c6b2008-09-02 15:55:36 +02002889 usvc->sched_name = nla_data(nla_sched);
Simon Horman0d1e71b2010-08-22 21:37:54 +09002890 usvc->pe_name = nla_pe ? nla_data(nla_pe) : NULL;
Julius Volz9a812192008-08-14 14:08:44 +02002891 usvc->timeout = nla_get_u32(nla_timeout);
2892 usvc->netmask = nla_get_u32(nla_netmask);
2893 }
2894
2895 return 0;
2896}
2897
Hans Schillstromfc723252011-01-03 14:44:43 +01002898static struct ip_vs_service *ip_vs_genl_find_service(struct net *net,
2899 struct nlattr *nla)
Julius Volz9a812192008-08-14 14:08:44 +02002900{
Julius Volzc860c6b2008-09-02 15:55:36 +02002901 struct ip_vs_service_user_kern usvc;
Julian Anastasov26c15cf2010-09-21 18:12:30 +02002902 struct ip_vs_service *svc;
Julius Volz9a812192008-08-14 14:08:44 +02002903 int ret;
2904
Hans Schillstromfc723252011-01-03 14:44:43 +01002905 ret = ip_vs_genl_parse_service(net, &usvc, nla, 0, &svc);
Julian Anastasov26c15cf2010-09-21 18:12:30 +02002906 return ret ? ERR_PTR(ret) : svc;
Julius Volz9a812192008-08-14 14:08:44 +02002907}
2908
2909static int ip_vs_genl_fill_dest(struct sk_buff *skb, struct ip_vs_dest *dest)
2910{
2911 struct nlattr *nl_dest;
2912
2913 nl_dest = nla_nest_start(skb, IPVS_CMD_ATTR_DEST);
2914 if (!nl_dest)
2915 return -EMSGSIZE;
2916
2917 NLA_PUT(skb, IPVS_DEST_ATTR_ADDR, sizeof(dest->addr), &dest->addr);
2918 NLA_PUT_U16(skb, IPVS_DEST_ATTR_PORT, dest->port);
2919
2920 NLA_PUT_U32(skb, IPVS_DEST_ATTR_FWD_METHOD,
2921 atomic_read(&dest->conn_flags) & IP_VS_CONN_F_FWD_MASK);
2922 NLA_PUT_U32(skb, IPVS_DEST_ATTR_WEIGHT, atomic_read(&dest->weight));
2923 NLA_PUT_U32(skb, IPVS_DEST_ATTR_U_THRESH, dest->u_threshold);
2924 NLA_PUT_U32(skb, IPVS_DEST_ATTR_L_THRESH, dest->l_threshold);
2925 NLA_PUT_U32(skb, IPVS_DEST_ATTR_ACTIVE_CONNS,
2926 atomic_read(&dest->activeconns));
2927 NLA_PUT_U32(skb, IPVS_DEST_ATTR_INACT_CONNS,
2928 atomic_read(&dest->inactconns));
2929 NLA_PUT_U32(skb, IPVS_DEST_ATTR_PERSIST_CONNS,
2930 atomic_read(&dest->persistconns));
2931
2932 if (ip_vs_genl_fill_stats(skb, IPVS_DEST_ATTR_STATS, &dest->stats))
2933 goto nla_put_failure;
2934
2935 nla_nest_end(skb, nl_dest);
2936
2937 return 0;
2938
2939nla_put_failure:
2940 nla_nest_cancel(skb, nl_dest);
2941 return -EMSGSIZE;
2942}
2943
2944static int ip_vs_genl_dump_dest(struct sk_buff *skb, struct ip_vs_dest *dest,
2945 struct netlink_callback *cb)
2946{
2947 void *hdr;
2948
2949 hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
2950 &ip_vs_genl_family, NLM_F_MULTI,
2951 IPVS_CMD_NEW_DEST);
2952 if (!hdr)
2953 return -EMSGSIZE;
2954
2955 if (ip_vs_genl_fill_dest(skb, dest) < 0)
2956 goto nla_put_failure;
2957
2958 return genlmsg_end(skb, hdr);
2959
2960nla_put_failure:
2961 genlmsg_cancel(skb, hdr);
2962 return -EMSGSIZE;
2963}
2964
2965static int ip_vs_genl_dump_dests(struct sk_buff *skb,
2966 struct netlink_callback *cb)
2967{
2968 int idx = 0;
2969 int start = cb->args[0];
2970 struct ip_vs_service *svc;
2971 struct ip_vs_dest *dest;
2972 struct nlattr *attrs[IPVS_CMD_ATTR_MAX + 1];
Hans Schillstroma0840e22011-01-03 14:44:58 +01002973 struct net *net = skb_sknet(skb);
Julius Volz9a812192008-08-14 14:08:44 +02002974
2975 mutex_lock(&__ip_vs_mutex);
2976
2977 /* Try to find the service for which to dump destinations */
2978 if (nlmsg_parse(cb->nlh, GENL_HDRLEN, attrs,
2979 IPVS_CMD_ATTR_MAX, ip_vs_cmd_policy))
2980 goto out_err;
2981
Hans Schillstroma0840e22011-01-03 14:44:58 +01002982
Hans Schillstromfc723252011-01-03 14:44:43 +01002983 svc = ip_vs_genl_find_service(net, attrs[IPVS_CMD_ATTR_SERVICE]);
Julius Volz9a812192008-08-14 14:08:44 +02002984 if (IS_ERR(svc) || svc == NULL)
2985 goto out_err;
2986
2987 /* Dump the destinations */
2988 list_for_each_entry(dest, &svc->destinations, n_list) {
2989 if (++idx <= start)
2990 continue;
2991 if (ip_vs_genl_dump_dest(skb, dest, cb) < 0) {
2992 idx--;
2993 goto nla_put_failure;
2994 }
2995 }
2996
2997nla_put_failure:
2998 cb->args[0] = idx;
Julius Volz9a812192008-08-14 14:08:44 +02002999
3000out_err:
3001 mutex_unlock(&__ip_vs_mutex);
3002
3003 return skb->len;
3004}
3005
Julius Volzc860c6b2008-09-02 15:55:36 +02003006static int ip_vs_genl_parse_dest(struct ip_vs_dest_user_kern *udest,
Julius Volz9a812192008-08-14 14:08:44 +02003007 struct nlattr *nla, int full_entry)
3008{
3009 struct nlattr *attrs[IPVS_DEST_ATTR_MAX + 1];
3010 struct nlattr *nla_addr, *nla_port;
3011
3012 /* Parse mandatory identifying destination fields first */
3013 if (nla == NULL ||
3014 nla_parse_nested(attrs, IPVS_DEST_ATTR_MAX, nla, ip_vs_dest_policy))
3015 return -EINVAL;
3016
3017 nla_addr = attrs[IPVS_DEST_ATTR_ADDR];
3018 nla_port = attrs[IPVS_DEST_ATTR_PORT];
3019
3020 if (!(nla_addr && nla_port))
3021 return -EINVAL;
3022
Simon Horman258c8892009-12-15 17:01:25 +01003023 memset(udest, 0, sizeof(*udest));
3024
Julius Volz9a812192008-08-14 14:08:44 +02003025 nla_memcpy(&udest->addr, nla_addr, sizeof(udest->addr));
3026 udest->port = nla_get_u16(nla_port);
3027
3028 /* If a full entry was requested, check for the additional fields */
3029 if (full_entry) {
3030 struct nlattr *nla_fwd, *nla_weight, *nla_u_thresh,
3031 *nla_l_thresh;
3032
3033 nla_fwd = attrs[IPVS_DEST_ATTR_FWD_METHOD];
3034 nla_weight = attrs[IPVS_DEST_ATTR_WEIGHT];
3035 nla_u_thresh = attrs[IPVS_DEST_ATTR_U_THRESH];
3036 nla_l_thresh = attrs[IPVS_DEST_ATTR_L_THRESH];
3037
3038 if (!(nla_fwd && nla_weight && nla_u_thresh && nla_l_thresh))
3039 return -EINVAL;
3040
3041 udest->conn_flags = nla_get_u32(nla_fwd)
3042 & IP_VS_CONN_F_FWD_MASK;
3043 udest->weight = nla_get_u32(nla_weight);
3044 udest->u_threshold = nla_get_u32(nla_u_thresh);
3045 udest->l_threshold = nla_get_u32(nla_l_thresh);
3046 }
3047
3048 return 0;
3049}
3050
3051static int ip_vs_genl_fill_daemon(struct sk_buff *skb, __be32 state,
3052 const char *mcast_ifn, __be32 syncid)
3053{
3054 struct nlattr *nl_daemon;
3055
3056 nl_daemon = nla_nest_start(skb, IPVS_CMD_ATTR_DAEMON);
3057 if (!nl_daemon)
3058 return -EMSGSIZE;
3059
3060 NLA_PUT_U32(skb, IPVS_DAEMON_ATTR_STATE, state);
3061 NLA_PUT_STRING(skb, IPVS_DAEMON_ATTR_MCAST_IFN, mcast_ifn);
3062 NLA_PUT_U32(skb, IPVS_DAEMON_ATTR_SYNC_ID, syncid);
3063
3064 nla_nest_end(skb, nl_daemon);
3065
3066 return 0;
3067
3068nla_put_failure:
3069 nla_nest_cancel(skb, nl_daemon);
3070 return -EMSGSIZE;
3071}
3072
3073static int ip_vs_genl_dump_daemon(struct sk_buff *skb, __be32 state,
3074 const char *mcast_ifn, __be32 syncid,
3075 struct netlink_callback *cb)
3076{
3077 void *hdr;
3078 hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
3079 &ip_vs_genl_family, NLM_F_MULTI,
3080 IPVS_CMD_NEW_DAEMON);
3081 if (!hdr)
3082 return -EMSGSIZE;
3083
3084 if (ip_vs_genl_fill_daemon(skb, state, mcast_ifn, syncid))
3085 goto nla_put_failure;
3086
3087 return genlmsg_end(skb, hdr);
3088
3089nla_put_failure:
3090 genlmsg_cancel(skb, hdr);
3091 return -EMSGSIZE;
3092}
3093
3094static int ip_vs_genl_dump_daemons(struct sk_buff *skb,
3095 struct netlink_callback *cb)
3096{
Hans Schillstromf1313152011-01-03 14:44:55 +01003097 struct net *net = skb_net(skb);
3098 struct netns_ipvs *ipvs = net_ipvs(net);
3099
Julius Volz9a812192008-08-14 14:08:44 +02003100 mutex_lock(&__ip_vs_mutex);
Hans Schillstromf1313152011-01-03 14:44:55 +01003101 if ((ipvs->sync_state & IP_VS_STATE_MASTER) && !cb->args[0]) {
Julius Volz9a812192008-08-14 14:08:44 +02003102 if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_MASTER,
Hans Schillstromf1313152011-01-03 14:44:55 +01003103 ipvs->master_mcast_ifn,
3104 ipvs->master_syncid, cb) < 0)
Julius Volz9a812192008-08-14 14:08:44 +02003105 goto nla_put_failure;
3106
3107 cb->args[0] = 1;
3108 }
3109
Hans Schillstromf1313152011-01-03 14:44:55 +01003110 if ((ipvs->sync_state & IP_VS_STATE_BACKUP) && !cb->args[1]) {
Julius Volz9a812192008-08-14 14:08:44 +02003111 if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_BACKUP,
Hans Schillstromf1313152011-01-03 14:44:55 +01003112 ipvs->backup_mcast_ifn,
3113 ipvs->backup_syncid, cb) < 0)
Julius Volz9a812192008-08-14 14:08:44 +02003114 goto nla_put_failure;
3115
3116 cb->args[1] = 1;
3117 }
3118
3119nla_put_failure:
3120 mutex_unlock(&__ip_vs_mutex);
3121
3122 return skb->len;
3123}
3124
Hans Schillstromf1313152011-01-03 14:44:55 +01003125static int ip_vs_genl_new_daemon(struct net *net, struct nlattr **attrs)
Julius Volz9a812192008-08-14 14:08:44 +02003126{
3127 if (!(attrs[IPVS_DAEMON_ATTR_STATE] &&
3128 attrs[IPVS_DAEMON_ATTR_MCAST_IFN] &&
3129 attrs[IPVS_DAEMON_ATTR_SYNC_ID]))
3130 return -EINVAL;
3131
Hans Schillstromf1313152011-01-03 14:44:55 +01003132 return start_sync_thread(net,
3133 nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]),
Julius Volz9a812192008-08-14 14:08:44 +02003134 nla_data(attrs[IPVS_DAEMON_ATTR_MCAST_IFN]),
3135 nla_get_u32(attrs[IPVS_DAEMON_ATTR_SYNC_ID]));
3136}
3137
Hans Schillstromf1313152011-01-03 14:44:55 +01003138static int ip_vs_genl_del_daemon(struct net *net, struct nlattr **attrs)
Julius Volz9a812192008-08-14 14:08:44 +02003139{
3140 if (!attrs[IPVS_DAEMON_ATTR_STATE])
3141 return -EINVAL;
3142
Hans Schillstromf1313152011-01-03 14:44:55 +01003143 return stop_sync_thread(net,
3144 nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]));
Julius Volz9a812192008-08-14 14:08:44 +02003145}
3146
Hans Schillstrom93304192011-01-03 14:44:51 +01003147static int ip_vs_genl_set_config(struct net *net, struct nlattr **attrs)
Julius Volz9a812192008-08-14 14:08:44 +02003148{
3149 struct ip_vs_timeout_user t;
3150
Hans Schillstrom93304192011-01-03 14:44:51 +01003151 __ip_vs_get_timeouts(net, &t);
Julius Volz9a812192008-08-14 14:08:44 +02003152
3153 if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP])
3154 t.tcp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP]);
3155
3156 if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN])
3157 t.tcp_fin_timeout =
3158 nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN]);
3159
3160 if (attrs[IPVS_CMD_ATTR_TIMEOUT_UDP])
3161 t.udp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_UDP]);
3162
Hans Schillstrom93304192011-01-03 14:44:51 +01003163 return ip_vs_set_timeout(net, &t);
Julius Volz9a812192008-08-14 14:08:44 +02003164}
3165
3166static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
3167{
3168 struct ip_vs_service *svc = NULL;
Julius Volzc860c6b2008-09-02 15:55:36 +02003169 struct ip_vs_service_user_kern usvc;
3170 struct ip_vs_dest_user_kern udest;
Julius Volz9a812192008-08-14 14:08:44 +02003171 int ret = 0, cmd;
3172 int need_full_svc = 0, need_full_dest = 0;
Hans Schillstromfc723252011-01-03 14:44:43 +01003173 struct net *net;
Hans Schillstroma0840e22011-01-03 14:44:58 +01003174 struct netns_ipvs *ipvs;
Julius Volz9a812192008-08-14 14:08:44 +02003175
Hans Schillstromfc723252011-01-03 14:44:43 +01003176 net = skb_sknet(skb);
Hans Schillstroma0840e22011-01-03 14:44:58 +01003177 ipvs = net_ipvs(net);
Julius Volz9a812192008-08-14 14:08:44 +02003178 cmd = info->genlhdr->cmd;
3179
3180 mutex_lock(&__ip_vs_mutex);
3181
3182 if (cmd == IPVS_CMD_FLUSH) {
Hans Schillstromfc723252011-01-03 14:44:43 +01003183 ret = ip_vs_flush(net);
Julius Volz9a812192008-08-14 14:08:44 +02003184 goto out;
3185 } else if (cmd == IPVS_CMD_SET_CONFIG) {
Hans Schillstrom93304192011-01-03 14:44:51 +01003186 ret = ip_vs_genl_set_config(net, info->attrs);
Julius Volz9a812192008-08-14 14:08:44 +02003187 goto out;
3188 } else if (cmd == IPVS_CMD_NEW_DAEMON ||
3189 cmd == IPVS_CMD_DEL_DAEMON) {
3190
3191 struct nlattr *daemon_attrs[IPVS_DAEMON_ATTR_MAX + 1];
3192
3193 if (!info->attrs[IPVS_CMD_ATTR_DAEMON] ||
3194 nla_parse_nested(daemon_attrs, IPVS_DAEMON_ATTR_MAX,
3195 info->attrs[IPVS_CMD_ATTR_DAEMON],
3196 ip_vs_daemon_policy)) {
3197 ret = -EINVAL;
3198 goto out;
3199 }
3200
3201 if (cmd == IPVS_CMD_NEW_DAEMON)
Hans Schillstromf1313152011-01-03 14:44:55 +01003202 ret = ip_vs_genl_new_daemon(net, daemon_attrs);
Julius Volz9a812192008-08-14 14:08:44 +02003203 else
Hans Schillstromf1313152011-01-03 14:44:55 +01003204 ret = ip_vs_genl_del_daemon(net, daemon_attrs);
Julius Volz9a812192008-08-14 14:08:44 +02003205 goto out;
3206 } else if (cmd == IPVS_CMD_ZERO &&
3207 !info->attrs[IPVS_CMD_ATTR_SERVICE]) {
Hans Schillstromfc723252011-01-03 14:44:43 +01003208 ret = ip_vs_zero_all(net);
Julius Volz9a812192008-08-14 14:08:44 +02003209 goto out;
3210 }
3211
3212 /* All following commands require a service argument, so check if we
3213 * received a valid one. We need a full service specification when
3214 * adding / editing a service. Only identifying members otherwise. */
3215 if (cmd == IPVS_CMD_NEW_SERVICE || cmd == IPVS_CMD_SET_SERVICE)
3216 need_full_svc = 1;
3217
Hans Schillstromfc723252011-01-03 14:44:43 +01003218 ret = ip_vs_genl_parse_service(net, &usvc,
Julius Volz9a812192008-08-14 14:08:44 +02003219 info->attrs[IPVS_CMD_ATTR_SERVICE],
Julian Anastasov26c15cf2010-09-21 18:12:30 +02003220 need_full_svc, &svc);
Julius Volz9a812192008-08-14 14:08:44 +02003221 if (ret)
3222 goto out;
3223
Julius Volz9a812192008-08-14 14:08:44 +02003224 /* Unless we're adding a new service, the service must already exist */
3225 if ((cmd != IPVS_CMD_NEW_SERVICE) && (svc == NULL)) {
3226 ret = -ESRCH;
3227 goto out;
3228 }
3229
3230 /* Destination commands require a valid destination argument. For
3231 * adding / editing a destination, we need a full destination
3232 * specification. */
3233 if (cmd == IPVS_CMD_NEW_DEST || cmd == IPVS_CMD_SET_DEST ||
3234 cmd == IPVS_CMD_DEL_DEST) {
3235 if (cmd != IPVS_CMD_DEL_DEST)
3236 need_full_dest = 1;
3237
3238 ret = ip_vs_genl_parse_dest(&udest,
3239 info->attrs[IPVS_CMD_ATTR_DEST],
3240 need_full_dest);
3241 if (ret)
3242 goto out;
3243 }
3244
3245 switch (cmd) {
3246 case IPVS_CMD_NEW_SERVICE:
3247 if (svc == NULL)
Hans Schillstromfc723252011-01-03 14:44:43 +01003248 ret = ip_vs_add_service(net, &usvc, &svc);
Julius Volz9a812192008-08-14 14:08:44 +02003249 else
3250 ret = -EEXIST;
3251 break;
3252 case IPVS_CMD_SET_SERVICE:
3253 ret = ip_vs_edit_service(svc, &usvc);
3254 break;
3255 case IPVS_CMD_DEL_SERVICE:
3256 ret = ip_vs_del_service(svc);
Julian Anastasov26c15cf2010-09-21 18:12:30 +02003257 /* do not use svc, it can be freed */
Julius Volz9a812192008-08-14 14:08:44 +02003258 break;
3259 case IPVS_CMD_NEW_DEST:
3260 ret = ip_vs_add_dest(svc, &udest);
3261 break;
3262 case IPVS_CMD_SET_DEST:
3263 ret = ip_vs_edit_dest(svc, &udest);
3264 break;
3265 case IPVS_CMD_DEL_DEST:
3266 ret = ip_vs_del_dest(svc, &udest);
3267 break;
3268 case IPVS_CMD_ZERO:
3269 ret = ip_vs_zero_service(svc);
3270 break;
3271 default:
3272 ret = -EINVAL;
3273 }
3274
3275out:
Julius Volz9a812192008-08-14 14:08:44 +02003276 mutex_unlock(&__ip_vs_mutex);
3277
3278 return ret;
3279}
3280
3281static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info)
3282{
3283 struct sk_buff *msg;
3284 void *reply;
3285 int ret, cmd, reply_cmd;
Hans Schillstromfc723252011-01-03 14:44:43 +01003286 struct net *net;
Hans Schillstroma0840e22011-01-03 14:44:58 +01003287 struct netns_ipvs *ipvs;
Julius Volz9a812192008-08-14 14:08:44 +02003288
Hans Schillstromfc723252011-01-03 14:44:43 +01003289 net = skb_sknet(skb);
Hans Schillstroma0840e22011-01-03 14:44:58 +01003290 ipvs = net_ipvs(net);
Julius Volz9a812192008-08-14 14:08:44 +02003291 cmd = info->genlhdr->cmd;
3292
3293 if (cmd == IPVS_CMD_GET_SERVICE)
3294 reply_cmd = IPVS_CMD_NEW_SERVICE;
3295 else if (cmd == IPVS_CMD_GET_INFO)
3296 reply_cmd = IPVS_CMD_SET_INFO;
3297 else if (cmd == IPVS_CMD_GET_CONFIG)
3298 reply_cmd = IPVS_CMD_SET_CONFIG;
3299 else {
Hannes Eder1e3e2382009-08-02 11:05:41 +00003300 pr_err("unknown Generic Netlink command\n");
Julius Volz9a812192008-08-14 14:08:44 +02003301 return -EINVAL;
3302 }
3303
3304 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
3305 if (!msg)
3306 return -ENOMEM;
3307
3308 mutex_lock(&__ip_vs_mutex);
3309
3310 reply = genlmsg_put_reply(msg, info, &ip_vs_genl_family, 0, reply_cmd);
3311 if (reply == NULL)
3312 goto nla_put_failure;
3313
3314 switch (cmd) {
3315 case IPVS_CMD_GET_SERVICE:
3316 {
3317 struct ip_vs_service *svc;
3318
Hans Schillstromfc723252011-01-03 14:44:43 +01003319 svc = ip_vs_genl_find_service(net,
3320 info->attrs[IPVS_CMD_ATTR_SERVICE]);
Julius Volz9a812192008-08-14 14:08:44 +02003321 if (IS_ERR(svc)) {
3322 ret = PTR_ERR(svc);
3323 goto out_err;
3324 } else if (svc) {
3325 ret = ip_vs_genl_fill_service(msg, svc);
Julius Volz9a812192008-08-14 14:08:44 +02003326 if (ret)
3327 goto nla_put_failure;
3328 } else {
3329 ret = -ESRCH;
3330 goto out_err;
3331 }
3332
3333 break;
3334 }
3335
3336 case IPVS_CMD_GET_CONFIG:
3337 {
3338 struct ip_vs_timeout_user t;
3339
Hans Schillstrom93304192011-01-03 14:44:51 +01003340 __ip_vs_get_timeouts(net, &t);
Julius Volz9a812192008-08-14 14:08:44 +02003341#ifdef CONFIG_IP_VS_PROTO_TCP
3342 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP, t.tcp_timeout);
3343 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP_FIN,
3344 t.tcp_fin_timeout);
3345#endif
3346#ifdef CONFIG_IP_VS_PROTO_UDP
3347 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_UDP, t.udp_timeout);
3348#endif
3349
3350 break;
3351 }
3352
3353 case IPVS_CMD_GET_INFO:
3354 NLA_PUT_U32(msg, IPVS_INFO_ATTR_VERSION, IP_VS_VERSION_CODE);
3355 NLA_PUT_U32(msg, IPVS_INFO_ATTR_CONN_TAB_SIZE,
Catalin(ux) M. BOIE6f7edb42010-01-05 05:50:24 +01003356 ip_vs_conn_tab_size);
Julius Volz9a812192008-08-14 14:08:44 +02003357 break;
3358 }
3359
3360 genlmsg_end(msg, reply);
Johannes Berg134e6372009-07-10 09:51:34 +00003361 ret = genlmsg_reply(msg, info);
Julius Volz9a812192008-08-14 14:08:44 +02003362 goto out;
3363
3364nla_put_failure:
Hannes Eder1e3e2382009-08-02 11:05:41 +00003365 pr_err("not enough space in Netlink message\n");
Julius Volz9a812192008-08-14 14:08:44 +02003366 ret = -EMSGSIZE;
3367
3368out_err:
3369 nlmsg_free(msg);
3370out:
3371 mutex_unlock(&__ip_vs_mutex);
3372
3373 return ret;
3374}
3375
3376
3377static struct genl_ops ip_vs_genl_ops[] __read_mostly = {
3378 {
3379 .cmd = IPVS_CMD_NEW_SERVICE,
3380 .flags = GENL_ADMIN_PERM,
3381 .policy = ip_vs_cmd_policy,
3382 .doit = ip_vs_genl_set_cmd,
3383 },
3384 {
3385 .cmd = IPVS_CMD_SET_SERVICE,
3386 .flags = GENL_ADMIN_PERM,
3387 .policy = ip_vs_cmd_policy,
3388 .doit = ip_vs_genl_set_cmd,
3389 },
3390 {
3391 .cmd = IPVS_CMD_DEL_SERVICE,
3392 .flags = GENL_ADMIN_PERM,
3393 .policy = ip_vs_cmd_policy,
3394 .doit = ip_vs_genl_set_cmd,
3395 },
3396 {
3397 .cmd = IPVS_CMD_GET_SERVICE,
3398 .flags = GENL_ADMIN_PERM,
3399 .doit = ip_vs_genl_get_cmd,
3400 .dumpit = ip_vs_genl_dump_services,
3401 .policy = ip_vs_cmd_policy,
3402 },
3403 {
3404 .cmd = IPVS_CMD_NEW_DEST,
3405 .flags = GENL_ADMIN_PERM,
3406 .policy = ip_vs_cmd_policy,
3407 .doit = ip_vs_genl_set_cmd,
3408 },
3409 {
3410 .cmd = IPVS_CMD_SET_DEST,
3411 .flags = GENL_ADMIN_PERM,
3412 .policy = ip_vs_cmd_policy,
3413 .doit = ip_vs_genl_set_cmd,
3414 },
3415 {
3416 .cmd = IPVS_CMD_DEL_DEST,
3417 .flags = GENL_ADMIN_PERM,
3418 .policy = ip_vs_cmd_policy,
3419 .doit = ip_vs_genl_set_cmd,
3420 },
3421 {
3422 .cmd = IPVS_CMD_GET_DEST,
3423 .flags = GENL_ADMIN_PERM,
3424 .policy = ip_vs_cmd_policy,
3425 .dumpit = ip_vs_genl_dump_dests,
3426 },
3427 {
3428 .cmd = IPVS_CMD_NEW_DAEMON,
3429 .flags = GENL_ADMIN_PERM,
3430 .policy = ip_vs_cmd_policy,
3431 .doit = ip_vs_genl_set_cmd,
3432 },
3433 {
3434 .cmd = IPVS_CMD_DEL_DAEMON,
3435 .flags = GENL_ADMIN_PERM,
3436 .policy = ip_vs_cmd_policy,
3437 .doit = ip_vs_genl_set_cmd,
3438 },
3439 {
3440 .cmd = IPVS_CMD_GET_DAEMON,
3441 .flags = GENL_ADMIN_PERM,
3442 .dumpit = ip_vs_genl_dump_daemons,
3443 },
3444 {
3445 .cmd = IPVS_CMD_SET_CONFIG,
3446 .flags = GENL_ADMIN_PERM,
3447 .policy = ip_vs_cmd_policy,
3448 .doit = ip_vs_genl_set_cmd,
3449 },
3450 {
3451 .cmd = IPVS_CMD_GET_CONFIG,
3452 .flags = GENL_ADMIN_PERM,
3453 .doit = ip_vs_genl_get_cmd,
3454 },
3455 {
3456 .cmd = IPVS_CMD_GET_INFO,
3457 .flags = GENL_ADMIN_PERM,
3458 .doit = ip_vs_genl_get_cmd,
3459 },
3460 {
3461 .cmd = IPVS_CMD_ZERO,
3462 .flags = GENL_ADMIN_PERM,
3463 .policy = ip_vs_cmd_policy,
3464 .doit = ip_vs_genl_set_cmd,
3465 },
3466 {
3467 .cmd = IPVS_CMD_FLUSH,
3468 .flags = GENL_ADMIN_PERM,
3469 .doit = ip_vs_genl_set_cmd,
3470 },
3471};
3472
3473static int __init ip_vs_genl_register(void)
3474{
Michał Mirosław8f698d52009-05-21 10:34:05 +00003475 return genl_register_family_with_ops(&ip_vs_genl_family,
3476 ip_vs_genl_ops, ARRAY_SIZE(ip_vs_genl_ops));
Julius Volz9a812192008-08-14 14:08:44 +02003477}
3478
3479static void ip_vs_genl_unregister(void)
3480{
3481 genl_unregister_family(&ip_vs_genl_family);
3482}
3483
3484/* End of Generic Netlink interface definitions */
3485
Hans Schillstrom61b1ab42011-01-03 14:44:42 +01003486/*
3487 * per netns intit/exit func.
3488 */
3489int __net_init __ip_vs_control_init(struct net *net)
3490{
Hans Schillstromfc723252011-01-03 14:44:43 +01003491 int idx;
3492 struct netns_ipvs *ipvs = net_ipvs(net);
Hans Schillstroma0840e22011-01-03 14:44:58 +01003493 struct ctl_table *tbl;
Hans Schillstromfc723252011-01-03 14:44:43 +01003494
Hans Schillstrom61b1ab42011-01-03 14:44:42 +01003495 if (!net_eq(net, &init_net)) /* netns not enabled yet */
3496 return -EPERM;
Hans Schillstroma0840e22011-01-03 14:44:58 +01003497
3498 atomic_set(&ipvs->dropentry, 0);
3499 spin_lock_init(&ipvs->dropentry_lock);
3500 spin_lock_init(&ipvs->droppacket_lock);
3501 spin_lock_init(&ipvs->securetcp_lock);
3502 ipvs->rs_lock = __RW_LOCK_UNLOCKED(ipvs->rs_lock);
3503
3504 /* Initialize rs_table */
3505 for (idx = 0; idx < IP_VS_RTAB_SIZE; idx++)
3506 INIT_LIST_HEAD(&ipvs->rs_table[idx]);
3507
Hans Schillstromb17fc992011-01-03 14:44:56 +01003508 /* procfs stats */
3509 ipvs->tot_stats = kzalloc(sizeof(struct ip_vs_stats), GFP_KERNEL);
3510 if (ipvs->tot_stats == NULL) {
3511 pr_err("%s(): no memory.\n", __func__);
3512 return -ENOMEM;
3513 }
3514 ipvs->cpustats = alloc_percpu(struct ip_vs_cpu_stats);
3515 if (!ipvs->cpustats) {
3516 pr_err("%s() alloc_percpu failed\n", __func__);
3517 goto err_alloc;
3518 }
3519 spin_lock_init(&ipvs->tot_stats->lock);
Hans Schillstrom61b1ab42011-01-03 14:44:42 +01003520
Hans Schillstromfc723252011-01-03 14:44:43 +01003521 for (idx = 0; idx < IP_VS_RTAB_SIZE; idx++)
3522 INIT_LIST_HEAD(&ipvs->rs_table[idx]);
3523
Hans Schillstrom61b1ab42011-01-03 14:44:42 +01003524 proc_net_fops_create(net, "ip_vs", 0, &ip_vs_info_fops);
3525 proc_net_fops_create(net, "ip_vs_stats", 0, &ip_vs_stats_fops);
Hans Schillstromb17fc992011-01-03 14:44:56 +01003526 proc_net_fops_create(net, "ip_vs_stats_percpu", 0,
3527 &ip_vs_stats_percpu_fops);
Hans Schillstroma0840e22011-01-03 14:44:58 +01003528
3529 if (!net_eq(net, &init_net)) {
3530 tbl = kmemdup(vs_vars, sizeof(vs_vars), GFP_KERNEL);
3531 if (tbl == NULL)
3532 goto err_dup;
3533 } else
3534 tbl = vs_vars;
3535 /* Initialize sysctl defaults */
3536 idx = 0;
3537 ipvs->sysctl_amemthresh = 1024;
3538 tbl[idx++].data = &ipvs->sysctl_amemthresh;
3539 ipvs->sysctl_am_droprate = 10;
3540 tbl[idx++].data = &ipvs->sysctl_am_droprate;
3541 tbl[idx++].data = &ipvs->sysctl_drop_entry;
3542 tbl[idx++].data = &ipvs->sysctl_drop_packet;
3543#ifdef CONFIG_IP_VS_NFCT
3544 tbl[idx++].data = &ipvs->sysctl_conntrack;
3545#endif
3546 tbl[idx++].data = &ipvs->sysctl_secure_tcp;
3547 ipvs->sysctl_snat_reroute = 1;
3548 tbl[idx++].data = &ipvs->sysctl_snat_reroute;
3549 ipvs->sysctl_sync_ver = 1;
3550 tbl[idx++].data = &ipvs->sysctl_sync_ver;
3551 tbl[idx++].data = &ipvs->sysctl_cache_bypass;
3552 tbl[idx++].data = &ipvs->sysctl_expire_nodest_conn;
3553 tbl[idx++].data = &ipvs->sysctl_expire_quiescent_template;
3554 ipvs->sysctl_sync_threshold[0] = 3;
3555 ipvs->sysctl_sync_threshold[1] = 50;
3556 tbl[idx].data = &ipvs->sysctl_sync_threshold;
3557 tbl[idx++].maxlen = sizeof(ipvs->sysctl_sync_threshold);
3558 tbl[idx++].data = &ipvs->sysctl_nat_icmp_send;
3559
3560
3561 ipvs->sysctl_hdr = register_net_sysctl_table(net, net_vs_ctl_path,
Hans Schillstrom61b1ab42011-01-03 14:44:42 +01003562 vs_vars);
Hans Schillstroma0840e22011-01-03 14:44:58 +01003563 if (ipvs->sysctl_hdr == NULL)
Hans Schillstrom61b1ab42011-01-03 14:44:42 +01003564 goto err_reg;
Hans Schillstromb17fc992011-01-03 14:44:56 +01003565 ip_vs_new_estimator(net, ipvs->tot_stats);
Hans Schillstroma0840e22011-01-03 14:44:58 +01003566 ipvs->sysctl_tbl = tbl;
Hans Schillstrom61b1ab42011-01-03 14:44:42 +01003567 return 0;
3568
3569err_reg:
Hans Schillstroma0840e22011-01-03 14:44:58 +01003570 if (!net_eq(net, &init_net))
3571 kfree(tbl);
3572err_dup:
Hans Schillstromb17fc992011-01-03 14:44:56 +01003573 free_percpu(ipvs->cpustats);
3574err_alloc:
3575 kfree(ipvs->tot_stats);
Hans Schillstrom61b1ab42011-01-03 14:44:42 +01003576 return -ENOMEM;
3577}
3578
3579static void __net_exit __ip_vs_control_cleanup(struct net *net)
3580{
Hans Schillstromb17fc992011-01-03 14:44:56 +01003581 struct netns_ipvs *ipvs = net_ipvs(net);
3582
Hans Schillstrom61b1ab42011-01-03 14:44:42 +01003583 if (!net_eq(net, &init_net)) /* netns not enabled yet */
3584 return;
3585
Hans Schillstromb17fc992011-01-03 14:44:56 +01003586 ip_vs_kill_estimator(net, ipvs->tot_stats);
Hans Schillstroma0840e22011-01-03 14:44:58 +01003587 unregister_net_sysctl_table(ipvs->sysctl_hdr);
Hans Schillstromb17fc992011-01-03 14:44:56 +01003588 proc_net_remove(net, "ip_vs_stats_percpu");
Hans Schillstrom61b1ab42011-01-03 14:44:42 +01003589 proc_net_remove(net, "ip_vs_stats");
3590 proc_net_remove(net, "ip_vs");
Hans Schillstromb17fc992011-01-03 14:44:56 +01003591 free_percpu(ipvs->cpustats);
3592 kfree(ipvs->tot_stats);
Hans Schillstrom61b1ab42011-01-03 14:44:42 +01003593}
3594
3595static struct pernet_operations ipvs_control_ops = {
3596 .init = __ip_vs_control_init,
3597 .exit = __ip_vs_control_cleanup,
3598};
Linus Torvalds1da177e2005-04-16 15:20:36 -07003599
Sven Wegener048cf482008-08-10 18:24:35 +00003600int __init ip_vs_control_init(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003601{
Linus Torvalds1da177e2005-04-16 15:20:36 -07003602 int idx;
Hans Schillstromfc723252011-01-03 14:44:43 +01003603 int ret;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003604
3605 EnterFunction(2);
3606
Hans Schillstromfc723252011-01-03 14:44:43 +01003607 /* Initialize svc_table, ip_vs_svc_fwm_table, rs_table */
Eduardo Blancod86bef72010-10-19 10:26:47 +01003608 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
3609 INIT_LIST_HEAD(&ip_vs_svc_table[idx]);
3610 INIT_LIST_HEAD(&ip_vs_svc_fwm_table[idx]);
3611 }
Hans Schillstromfc723252011-01-03 14:44:43 +01003612
3613 ret = register_pernet_subsys(&ipvs_control_ops);
3614 if (ret) {
3615 pr_err("cannot register namespace.\n");
3616 goto err;
Eduardo Blancod86bef72010-10-19 10:26:47 +01003617 }
Hans Schillstromfc723252011-01-03 14:44:43 +01003618
3619 smp_wmb(); /* Do we really need it now ? */
Eduardo Blancod86bef72010-10-19 10:26:47 +01003620
Linus Torvalds1da177e2005-04-16 15:20:36 -07003621 ret = nf_register_sockopt(&ip_vs_sockopts);
3622 if (ret) {
Hannes Eder1e3e2382009-08-02 11:05:41 +00003623 pr_err("cannot register sockopt.\n");
Hans Schillstromfc723252011-01-03 14:44:43 +01003624 goto err_net;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003625 }
3626
Julius Volz9a812192008-08-14 14:08:44 +02003627 ret = ip_vs_genl_register();
3628 if (ret) {
Hannes Eder1e3e2382009-08-02 11:05:41 +00003629 pr_err("cannot register Generic Netlink interface.\n");
Julius Volz9a812192008-08-14 14:08:44 +02003630 nf_unregister_sockopt(&ip_vs_sockopts);
Hans Schillstromfc723252011-01-03 14:44:43 +01003631 goto err_net;
Julius Volz9a812192008-08-14 14:08:44 +02003632 }
3633
Linus Torvalds1da177e2005-04-16 15:20:36 -07003634 /* Hook the defense timer */
3635 schedule_delayed_work(&defense_work, DEFENSE_TIMER_PERIOD);
3636
3637 LeaveFunction(2);
3638 return 0;
Hans Schillstromfc723252011-01-03 14:44:43 +01003639
3640err_net:
3641 unregister_pernet_subsys(&ipvs_control_ops);
3642err:
3643 return ret;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003644}
3645
3646
3647void ip_vs_control_cleanup(void)
3648{
3649 EnterFunction(2);
3650 ip_vs_trash_cleanup();
Tejun Heoafe2c512010-12-14 16:21:17 +01003651 cancel_delayed_work_sync(&defense_work);
Oleg Nesterov28e53bd2007-05-09 02:34:22 -07003652 cancel_work_sync(&defense_work.work);
Hans Schillstrom61b1ab42011-01-03 14:44:42 +01003653 unregister_pernet_subsys(&ipvs_control_ops);
Julius Volz9a812192008-08-14 14:08:44 +02003654 ip_vs_genl_unregister();
Linus Torvalds1da177e2005-04-16 15:20:36 -07003655 nf_unregister_sockopt(&ip_vs_sockopts);
3656 LeaveFunction(2);
3657}