blob: 8553231b5d412ca557f8699ee998e05351152213 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * ip_vs_proto_udp.c: UDP load balancing support for IPVS
3 *
Linus Torvalds1da177e2005-04-16 15:20:36 -07004 * Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
5 * Julian Anastasov <ja@ssi.bg>
6 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version
10 * 2 of the License, or (at your option) any later version.
11 *
12 * Changes:
13 *
14 */
15
Hannes Eder9aada7a2009-07-30 14:29:44 -070016#define KMSG_COMPONENT "IPVS"
17#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
18
Arnaldo Carvalho de Melo14c85022005-12-27 02:43:12 -020019#include <linux/in.h>
20#include <linux/ip.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070021#include <linux/kernel.h>
Herbert Xuaf1e1cf2007-10-14 00:39:33 -070022#include <linux/netfilter.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070023#include <linux/netfilter_ipv4.h>
Arnaldo Carvalho de Melo14c85022005-12-27 02:43:12 -020024#include <linux/udp.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070025
26#include <net/ip_vs.h>
Arnaldo Carvalho de Meloc9bdd4b2007-03-12 20:09:15 -030027#include <net/ip.h>
Stephen Rothwell63f2c042008-09-12 23:23:50 -070028#include <net/ip6_checksum.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070029
Linus Torvalds1da177e2005-04-16 15:20:36 -070030static int
Julius Volz51ef3482008-09-02 15:55:40 +020031udp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
Linus Torvalds1da177e2005-04-16 15:20:36 -070032 int *verdict, struct ip_vs_conn **cpp)
33{
34 struct ip_vs_service *svc;
35 struct udphdr _udph, *uh;
Julius Volz3c2e0502008-09-02 15:55:38 +020036 struct ip_vs_iphdr iph;
Linus Torvalds1da177e2005-04-16 15:20:36 -070037
Julius Volz51ef3482008-09-02 15:55:40 +020038 ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
Julius Volz3c2e0502008-09-02 15:55:38 +020039
40 uh = skb_header_pointer(skb, iph.len, sizeof(_udph), &_udph);
Linus Torvalds1da177e2005-04-16 15:20:36 -070041 if (uh == NULL) {
42 *verdict = NF_DROP;
43 return 0;
44 }
45
Julius Volz51ef3482008-09-02 15:55:40 +020046 svc = ip_vs_service_get(af, skb->mark, iph.protocol,
Julius Volz3c2e0502008-09-02 15:55:38 +020047 &iph.daddr, uh->dest);
48 if (svc) {
Linus Torvalds1da177e2005-04-16 15:20:36 -070049 if (ip_vs_todrop()) {
50 /*
51 * It seems that we are very loaded.
52 * We have to drop this packet :(
53 */
54 ip_vs_service_put(svc);
55 *verdict = NF_DROP;
56 return 0;
57 }
58
59 /*
60 * Let the virtual server select a real server for the
61 * incoming connection, and create a connection entry.
62 */
63 *cpp = ip_vs_schedule(svc, skb);
64 if (!*cpp) {
65 *verdict = ip_vs_leave(svc, skb, pp);
66 return 0;
67 }
68 ip_vs_service_put(svc);
69 }
70 return 1;
71}
72
73
74static inline void
Julius Volz0bbdd422008-09-02 15:55:42 +020075udp_fast_csum_update(int af, struct udphdr *uhdr,
76 const union nf_inet_addr *oldip,
77 const union nf_inet_addr *newip,
Al Viro014d7302006-09-28 14:29:52 -070078 __be16 oldport, __be16 newport)
Linus Torvalds1da177e2005-04-16 15:20:36 -070079{
Julius Volz0bbdd422008-09-02 15:55:42 +020080#ifdef CONFIG_IP_VS_IPV6
81 if (af == AF_INET6)
82 uhdr->check =
83 csum_fold(ip_vs_check_diff16(oldip->ip6, newip->ip6,
84 ip_vs_check_diff2(oldport, newport,
85 ~csum_unfold(uhdr->check))));
86 else
87#endif
88 uhdr->check =
89 csum_fold(ip_vs_check_diff4(oldip->ip, newip->ip,
90 ip_vs_check_diff2(oldport, newport,
91 ~csum_unfold(uhdr->check))));
Linus Torvalds1da177e2005-04-16 15:20:36 -070092 if (!uhdr->check)
Al Virof6ab0282006-11-16 02:36:50 -080093 uhdr->check = CSUM_MANGLED_0;
Linus Torvalds1da177e2005-04-16 15:20:36 -070094}
95
Simon Horman503e81f2008-09-08 12:04:21 +100096static inline void
97udp_partial_csum_update(int af, struct udphdr *uhdr,
98 const union nf_inet_addr *oldip,
99 const union nf_inet_addr *newip,
100 __be16 oldlen, __be16 newlen)
101{
102#ifdef CONFIG_IP_VS_IPV6
103 if (af == AF_INET6)
104 uhdr->check =
105 csum_fold(ip_vs_check_diff16(oldip->ip6, newip->ip6,
106 ip_vs_check_diff2(oldlen, newlen,
107 ~csum_unfold(uhdr->check))));
108 else
109#endif
110 uhdr->check =
111 csum_fold(ip_vs_check_diff4(oldip->ip, newip->ip,
112 ip_vs_check_diff2(oldlen, newlen,
113 ~csum_unfold(uhdr->check))));
114}
115
116
Linus Torvalds1da177e2005-04-16 15:20:36 -0700117static int
Herbert Xu3db05fe2007-10-15 00:53:15 -0700118udp_snat_handler(struct sk_buff *skb,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700119 struct ip_vs_protocol *pp, struct ip_vs_conn *cp)
120{
121 struct udphdr *udph;
Julius Volz0bbdd422008-09-02 15:55:42 +0200122 unsigned int udphoff;
Simon Horman503e81f2008-09-08 12:04:21 +1000123 int oldlen;
Julius Volz0bbdd422008-09-02 15:55:42 +0200124
125#ifdef CONFIG_IP_VS_IPV6
126 if (cp->af == AF_INET6)
127 udphoff = sizeof(struct ipv6hdr);
128 else
129#endif
130 udphoff = ip_hdrlen(skb);
Simon Horman503e81f2008-09-08 12:04:21 +1000131 oldlen = skb->len - udphoff;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700132
133 /* csum_check requires unshared skb */
Herbert Xu3db05fe2007-10-15 00:53:15 -0700134 if (!skb_make_writable(skb, udphoff+sizeof(*udph)))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700135 return 0;
136
137 if (unlikely(cp->app != NULL)) {
138 /* Some checks before mangling */
Julius Volz0bbdd422008-09-02 15:55:42 +0200139 if (pp->csum_check && !pp->csum_check(cp->af, skb, pp))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700140 return 0;
141
142 /*
143 * Call application helper if needed
144 */
Herbert Xu3db05fe2007-10-15 00:53:15 -0700145 if (!ip_vs_app_pkt_out(cp, skb))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700146 return 0;
147 }
148
Julius Volz0bbdd422008-09-02 15:55:42 +0200149 udph = (void *)skb_network_header(skb) + udphoff;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700150 udph->source = cp->vport;
151
152 /*
153 * Adjust UDP checksums
154 */
Simon Horman503e81f2008-09-08 12:04:21 +1000155 if (skb->ip_summed == CHECKSUM_PARTIAL) {
156 udp_partial_csum_update(cp->af, udph, &cp->daddr, &cp->vaddr,
Harvey Harrisonca620592008-11-06 23:09:56 -0800157 htons(oldlen),
158 htons(skb->len - udphoff));
Simon Horman503e81f2008-09-08 12:04:21 +1000159 } else if (!cp->app && (udph->check != 0)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700160 /* Only port and addr are changed, do fast csum update */
Julius Volz0bbdd422008-09-02 15:55:42 +0200161 udp_fast_csum_update(cp->af, udph, &cp->daddr, &cp->vaddr,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700162 cp->dport, cp->vport);
Herbert Xu3db05fe2007-10-15 00:53:15 -0700163 if (skb->ip_summed == CHECKSUM_COMPLETE)
164 skb->ip_summed = CHECKSUM_NONE;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700165 } else {
166 /* full checksum calculation */
167 udph->check = 0;
Herbert Xu3db05fe2007-10-15 00:53:15 -0700168 skb->csum = skb_checksum(skb, udphoff, skb->len - udphoff, 0);
Julius Volz0bbdd422008-09-02 15:55:42 +0200169#ifdef CONFIG_IP_VS_IPV6
170 if (cp->af == AF_INET6)
171 udph->check = csum_ipv6_magic(&cp->vaddr.in6,
172 &cp->caddr.in6,
173 skb->len - udphoff,
174 cp->protocol, skb->csum);
175 else
176#endif
177 udph->check = csum_tcpudp_magic(cp->vaddr.ip,
178 cp->caddr.ip,
179 skb->len - udphoff,
180 cp->protocol,
181 skb->csum);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700182 if (udph->check == 0)
Al Virof6ab0282006-11-16 02:36:50 -0800183 udph->check = CSUM_MANGLED_0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700184 IP_VS_DBG(11, "O-pkt: %s O-csum=%d (+%zd)\n",
185 pp->name, udph->check,
186 (char*)&(udph->check) - (char*)udph);
187 }
188 return 1;
189}
190
191
192static int
Herbert Xu3db05fe2007-10-15 00:53:15 -0700193udp_dnat_handler(struct sk_buff *skb,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700194 struct ip_vs_protocol *pp, struct ip_vs_conn *cp)
195{
196 struct udphdr *udph;
Julius Volz0bbdd422008-09-02 15:55:42 +0200197 unsigned int udphoff;
Simon Horman503e81f2008-09-08 12:04:21 +1000198 int oldlen;
Julius Volz0bbdd422008-09-02 15:55:42 +0200199
200#ifdef CONFIG_IP_VS_IPV6
201 if (cp->af == AF_INET6)
202 udphoff = sizeof(struct ipv6hdr);
203 else
204#endif
205 udphoff = ip_hdrlen(skb);
Simon Horman503e81f2008-09-08 12:04:21 +1000206 oldlen = skb->len - udphoff;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700207
208 /* csum_check requires unshared skb */
Herbert Xu3db05fe2007-10-15 00:53:15 -0700209 if (!skb_make_writable(skb, udphoff+sizeof(*udph)))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700210 return 0;
211
212 if (unlikely(cp->app != NULL)) {
213 /* Some checks before mangling */
Julius Volz0bbdd422008-09-02 15:55:42 +0200214 if (pp->csum_check && !pp->csum_check(cp->af, skb, pp))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700215 return 0;
216
217 /*
218 * Attempt ip_vs_app call.
219 * It will fix ip_vs_conn
220 */
Herbert Xu3db05fe2007-10-15 00:53:15 -0700221 if (!ip_vs_app_pkt_in(cp, skb))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700222 return 0;
223 }
224
Julius Volz0bbdd422008-09-02 15:55:42 +0200225 udph = (void *)skb_network_header(skb) + udphoff;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700226 udph->dest = cp->dport;
227
228 /*
229 * Adjust UDP checksums
230 */
Simon Horman503e81f2008-09-08 12:04:21 +1000231 if (skb->ip_summed == CHECKSUM_PARTIAL) {
232 udp_partial_csum_update(cp->af, udph, &cp->daddr, &cp->vaddr,
Harvey Harrisonca620592008-11-06 23:09:56 -0800233 htons(oldlen),
234 htons(skb->len - udphoff));
Simon Horman503e81f2008-09-08 12:04:21 +1000235 } else if (!cp->app && (udph->check != 0)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700236 /* Only port and addr are changed, do fast csum update */
Julius Volz0bbdd422008-09-02 15:55:42 +0200237 udp_fast_csum_update(cp->af, udph, &cp->vaddr, &cp->daddr,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700238 cp->vport, cp->dport);
Herbert Xu3db05fe2007-10-15 00:53:15 -0700239 if (skb->ip_summed == CHECKSUM_COMPLETE)
240 skb->ip_summed = CHECKSUM_NONE;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700241 } else {
242 /* full checksum calculation */
243 udph->check = 0;
Herbert Xu3db05fe2007-10-15 00:53:15 -0700244 skb->csum = skb_checksum(skb, udphoff, skb->len - udphoff, 0);
Julius Volz0bbdd422008-09-02 15:55:42 +0200245#ifdef CONFIG_IP_VS_IPV6
246 if (cp->af == AF_INET6)
247 udph->check = csum_ipv6_magic(&cp->caddr.in6,
248 &cp->daddr.in6,
249 skb->len - udphoff,
250 cp->protocol, skb->csum);
251 else
252#endif
253 udph->check = csum_tcpudp_magic(cp->caddr.ip,
254 cp->daddr.ip,
255 skb->len - udphoff,
256 cp->protocol,
257 skb->csum);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700258 if (udph->check == 0)
Al Virof6ab0282006-11-16 02:36:50 -0800259 udph->check = CSUM_MANGLED_0;
Herbert Xu3db05fe2007-10-15 00:53:15 -0700260 skb->ip_summed = CHECKSUM_UNNECESSARY;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700261 }
262 return 1;
263}
264
265
266static int
Julius Volz51ef3482008-09-02 15:55:40 +0200267udp_csum_check(int af, struct sk_buff *skb, struct ip_vs_protocol *pp)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700268{
269 struct udphdr _udph, *uh;
Julius Volz51ef3482008-09-02 15:55:40 +0200270 unsigned int udphoff;
271
272#ifdef CONFIG_IP_VS_IPV6
273 if (af == AF_INET6)
274 udphoff = sizeof(struct ipv6hdr);
275 else
276#endif
277 udphoff = ip_hdrlen(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700278
279 uh = skb_header_pointer(skb, udphoff, sizeof(_udph), &_udph);
280 if (uh == NULL)
281 return 0;
282
283 if (uh->check != 0) {
284 switch (skb->ip_summed) {
285 case CHECKSUM_NONE:
286 skb->csum = skb_checksum(skb, udphoff,
287 skb->len - udphoff, 0);
Patrick McHardy84fa7932006-08-29 16:44:56 -0700288 case CHECKSUM_COMPLETE:
Julius Volz51ef3482008-09-02 15:55:40 +0200289#ifdef CONFIG_IP_VS_IPV6
290 if (af == AF_INET6) {
291 if (csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
292 &ipv6_hdr(skb)->daddr,
293 skb->len - udphoff,
294 ipv6_hdr(skb)->nexthdr,
295 skb->csum)) {
296 IP_VS_DBG_RL_PKT(0, pp, skb, 0,
297 "Failed checksum for");
298 return 0;
299 }
300 } else
301#endif
302 if (csum_tcpudp_magic(ip_hdr(skb)->saddr,
303 ip_hdr(skb)->daddr,
304 skb->len - udphoff,
305 ip_hdr(skb)->protocol,
306 skb->csum)) {
307 IP_VS_DBG_RL_PKT(0, pp, skb, 0,
308 "Failed checksum for");
309 return 0;
310 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700311 break;
312 default:
Patrick McHardy84fa7932006-08-29 16:44:56 -0700313 /* No need to checksum. */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700314 break;
315 }
316 }
317 return 1;
318}
319
320
321/*
322 * Note: the caller guarantees that only one of register_app,
323 * unregister_app or app_conn_bind is called each time.
324 */
325
326#define UDP_APP_TAB_BITS 4
327#define UDP_APP_TAB_SIZE (1 << UDP_APP_TAB_BITS)
328#define UDP_APP_TAB_MASK (UDP_APP_TAB_SIZE - 1)
329
330static struct list_head udp_apps[UDP_APP_TAB_SIZE];
331static DEFINE_SPINLOCK(udp_app_lock);
332
Al Viro75e7ce62006-11-14 21:13:28 -0800333static inline __u16 udp_app_hashkey(__be16 port)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700334{
Al Viro75e7ce62006-11-14 21:13:28 -0800335 return (((__force u16)port >> UDP_APP_TAB_BITS) ^ (__force u16)port)
336 & UDP_APP_TAB_MASK;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700337}
338
339
340static int udp_register_app(struct ip_vs_app *inc)
341{
342 struct ip_vs_app *i;
Al Viro75e7ce62006-11-14 21:13:28 -0800343 __u16 hash;
344 __be16 port = inc->port;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700345 int ret = 0;
346
347 hash = udp_app_hashkey(port);
348
349
350 spin_lock_bh(&udp_app_lock);
351 list_for_each_entry(i, &udp_apps[hash], p_list) {
352 if (i->port == port) {
353 ret = -EEXIST;
354 goto out;
355 }
356 }
357 list_add(&inc->p_list, &udp_apps[hash]);
358 atomic_inc(&ip_vs_protocol_udp.appcnt);
359
360 out:
361 spin_unlock_bh(&udp_app_lock);
362 return ret;
363}
364
365
366static void
367udp_unregister_app(struct ip_vs_app *inc)
368{
369 spin_lock_bh(&udp_app_lock);
370 atomic_dec(&ip_vs_protocol_udp.appcnt);
371 list_del(&inc->p_list);
372 spin_unlock_bh(&udp_app_lock);
373}
374
375
376static int udp_app_conn_bind(struct ip_vs_conn *cp)
377{
378 int hash;
379 struct ip_vs_app *inc;
380 int result = 0;
381
382 /* Default binding: bind app only for NAT */
383 if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ)
384 return 0;
385
386 /* Lookup application incarnations and bind the right one */
387 hash = udp_app_hashkey(cp->vport);
388
389 spin_lock(&udp_app_lock);
390 list_for_each_entry(inc, &udp_apps[hash], p_list) {
391 if (inc->port == cp->vport) {
392 if (unlikely(!ip_vs_app_inc_get(inc)))
393 break;
394 spin_unlock(&udp_app_lock);
395
Hannes Eder1e3e2382009-08-02 11:05:41 +0000396 IP_VS_DBG_BUF(9, "%s(): Binding conn %s:%u->"
Julius Volzcfc78c52008-09-02 15:55:53 +0200397 "%s:%u to app %s on port %u\n",
398 __func__,
399 IP_VS_DBG_ADDR(cp->af, &cp->caddr),
400 ntohs(cp->cport),
401 IP_VS_DBG_ADDR(cp->af, &cp->vaddr),
402 ntohs(cp->vport),
403 inc->name, ntohs(inc->port));
404
Linus Torvalds1da177e2005-04-16 15:20:36 -0700405 cp->app = inc;
406 if (inc->init_conn)
407 result = inc->init_conn(inc, cp);
408 goto out;
409 }
410 }
411 spin_unlock(&udp_app_lock);
412
413 out:
414 return result;
415}
416
417
418static int udp_timeouts[IP_VS_UDP_S_LAST+1] = {
419 [IP_VS_UDP_S_NORMAL] = 5*60*HZ,
420 [IP_VS_UDP_S_LAST] = 2*HZ,
421};
422
Jan Engelhardt36cbd3d2009-08-05 10:42:58 -0700423static const char *const udp_state_name_table[IP_VS_UDP_S_LAST+1] = {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700424 [IP_VS_UDP_S_NORMAL] = "UDP",
425 [IP_VS_UDP_S_LAST] = "BUG!",
426};
427
428
429static int
430udp_set_state_timeout(struct ip_vs_protocol *pp, char *sname, int to)
431{
432 return ip_vs_set_state_timeout(pp->timeout_table, IP_VS_UDP_S_LAST,
433 udp_state_name_table, sname, to);
434}
435
436static const char * udp_state_name(int state)
437{
438 if (state >= IP_VS_UDP_S_LAST)
439 return "ERR!";
440 return udp_state_name_table[state] ? udp_state_name_table[state] : "?";
441}
442
443static int
444udp_state_transition(struct ip_vs_conn *cp, int direction,
445 const struct sk_buff *skb,
446 struct ip_vs_protocol *pp)
447{
448 cp->timeout = pp->timeout_table[IP_VS_UDP_S_NORMAL];
449 return 1;
450}
451
452static void udp_init(struct ip_vs_protocol *pp)
453{
454 IP_VS_INIT_HASH_TABLE(udp_apps);
455 pp->timeout_table = udp_timeouts;
456}
457
458static void udp_exit(struct ip_vs_protocol *pp)
459{
460}
461
462
463struct ip_vs_protocol ip_vs_protocol_udp = {
464 .name = "UDP",
465 .protocol = IPPROTO_UDP,
Julian Anastasov2ad17de2008-04-29 03:21:23 -0700466 .num_states = IP_VS_UDP_S_LAST,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700467 .dont_defrag = 0,
468 .init = udp_init,
469 .exit = udp_exit,
470 .conn_schedule = udp_conn_schedule,
Simon Horman5c0d2372010-08-02 17:12:44 +0200471 .conn_in_get = ip_vs_conn_in_get_proto,
472 .conn_out_get = ip_vs_conn_out_get_proto,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700473 .snat_handler = udp_snat_handler,
474 .dnat_handler = udp_dnat_handler,
475 .csum_check = udp_csum_check,
476 .state_transition = udp_state_transition,
477 .state_name = udp_state_name,
478 .register_app = udp_register_app,
479 .unregister_app = udp_unregister_app,
480 .app_conn_bind = udp_app_conn_bind,
481 .debug_packet = ip_vs_tcpudp_debug_packet,
482 .timeout_change = NULL,
483 .set_state_timeout = udp_set_state_timeout,
484};