blob: 8ae5f2e0aefa259ba17a5de3540aed4738ec0178 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * ip_vs_proto_udp.c: UDP load balancing support for IPVS
3 *
4 * Version: $Id: ip_vs_proto_udp.c,v 1.3 2002/11/30 01:50:35 wensong Exp $
5 *
6 * Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
7 * Julian Anastasov <ja@ssi.bg>
8 *
9 * This program is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU General Public License
11 * as published by the Free Software Foundation; either version
12 * 2 of the License, or (at your option) any later version.
13 *
14 * Changes:
15 *
16 */
17
18#include <linux/kernel.h>
19#include <linux/netfilter_ipv4.h>
20
21#include <net/ip_vs.h>
22
23
24static struct ip_vs_conn *
25udp_conn_in_get(const struct sk_buff *skb, struct ip_vs_protocol *pp,
26 const struct iphdr *iph, unsigned int proto_off, int inverse)
27{
28 struct ip_vs_conn *cp;
29 __u16 _ports[2], *pptr;
30
31 pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports);
32 if (pptr == NULL)
33 return NULL;
34
35 if (likely(!inverse)) {
36 cp = ip_vs_conn_in_get(iph->protocol,
37 iph->saddr, pptr[0],
38 iph->daddr, pptr[1]);
39 } else {
40 cp = ip_vs_conn_in_get(iph->protocol,
41 iph->daddr, pptr[1],
42 iph->saddr, pptr[0]);
43 }
44
45 return cp;
46}
47
48
49static struct ip_vs_conn *
50udp_conn_out_get(const struct sk_buff *skb, struct ip_vs_protocol *pp,
51 const struct iphdr *iph, unsigned int proto_off, int inverse)
52{
53 struct ip_vs_conn *cp;
54 __u16 _ports[2], *pptr;
55
56 pptr = skb_header_pointer(skb, skb->nh.iph->ihl*4,
57 sizeof(_ports), _ports);
58 if (pptr == NULL)
59 return NULL;
60
61 if (likely(!inverse)) {
62 cp = ip_vs_conn_out_get(iph->protocol,
63 iph->saddr, pptr[0],
64 iph->daddr, pptr[1]);
65 } else {
66 cp = ip_vs_conn_out_get(iph->protocol,
67 iph->daddr, pptr[1],
68 iph->saddr, pptr[0]);
69 }
70
71 return cp;
72}
73
74
75static int
76udp_conn_schedule(struct sk_buff *skb, struct ip_vs_protocol *pp,
77 int *verdict, struct ip_vs_conn **cpp)
78{
79 struct ip_vs_service *svc;
80 struct udphdr _udph, *uh;
81
82 uh = skb_header_pointer(skb, skb->nh.iph->ihl*4,
83 sizeof(_udph), &_udph);
84 if (uh == NULL) {
85 *verdict = NF_DROP;
86 return 0;
87 }
88
89 if ((svc = ip_vs_service_get(skb->nfmark, skb->nh.iph->protocol,
90 skb->nh.iph->daddr, uh->dest))) {
91 if (ip_vs_todrop()) {
92 /*
93 * It seems that we are very loaded.
94 * We have to drop this packet :(
95 */
96 ip_vs_service_put(svc);
97 *verdict = NF_DROP;
98 return 0;
99 }
100
101 /*
102 * Let the virtual server select a real server for the
103 * incoming connection, and create a connection entry.
104 */
105 *cpp = ip_vs_schedule(svc, skb);
106 if (!*cpp) {
107 *verdict = ip_vs_leave(svc, skb, pp);
108 return 0;
109 }
110 ip_vs_service_put(svc);
111 }
112 return 1;
113}
114
115
116static inline void
117udp_fast_csum_update(struct udphdr *uhdr, u32 oldip, u32 newip,
118 u16 oldport, u16 newport)
119{
120 uhdr->check =
121 ip_vs_check_diff(~oldip, newip,
122 ip_vs_check_diff(oldport ^ 0xFFFF,
123 newport, uhdr->check));
124 if (!uhdr->check)
125 uhdr->check = 0xFFFF;
126}
127
128static int
129udp_snat_handler(struct sk_buff **pskb,
130 struct ip_vs_protocol *pp, struct ip_vs_conn *cp)
131{
132 struct udphdr *udph;
133 unsigned int udphoff = (*pskb)->nh.iph->ihl * 4;
134
135 /* csum_check requires unshared skb */
136 if (!ip_vs_make_skb_writable(pskb, udphoff+sizeof(*udph)))
137 return 0;
138
139 if (unlikely(cp->app != NULL)) {
140 /* Some checks before mangling */
141 if (pp->csum_check && !pp->csum_check(*pskb, pp))
142 return 0;
143
144 /*
145 * Call application helper if needed
146 */
147 if (!ip_vs_app_pkt_out(cp, pskb))
148 return 0;
149 }
150
151 udph = (void *)(*pskb)->nh.iph + udphoff;
152 udph->source = cp->vport;
153
154 /*
155 * Adjust UDP checksums
156 */
157 if (!cp->app && (udph->check != 0)) {
158 /* Only port and addr are changed, do fast csum update */
159 udp_fast_csum_update(udph, cp->daddr, cp->vaddr,
160 cp->dport, cp->vport);
161 if ((*pskb)->ip_summed == CHECKSUM_HW)
162 (*pskb)->ip_summed = CHECKSUM_NONE;
163 } else {
164 /* full checksum calculation */
165 udph->check = 0;
166 (*pskb)->csum = skb_checksum(*pskb, udphoff,
167 (*pskb)->len - udphoff, 0);
168 udph->check = csum_tcpudp_magic(cp->vaddr, cp->caddr,
169 (*pskb)->len - udphoff,
170 cp->protocol,
171 (*pskb)->csum);
172 if (udph->check == 0)
173 udph->check = 0xFFFF;
174 IP_VS_DBG(11, "O-pkt: %s O-csum=%d (+%zd)\n",
175 pp->name, udph->check,
176 (char*)&(udph->check) - (char*)udph);
177 }
178 return 1;
179}
180
181
182static int
183udp_dnat_handler(struct sk_buff **pskb,
184 struct ip_vs_protocol *pp, struct ip_vs_conn *cp)
185{
186 struct udphdr *udph;
187 unsigned int udphoff = (*pskb)->nh.iph->ihl * 4;
188
189 /* csum_check requires unshared skb */
190 if (!ip_vs_make_skb_writable(pskb, udphoff+sizeof(*udph)))
191 return 0;
192
193 if (unlikely(cp->app != NULL)) {
194 /* Some checks before mangling */
195 if (pp->csum_check && !pp->csum_check(*pskb, pp))
196 return 0;
197
198 /*
199 * Attempt ip_vs_app call.
200 * It will fix ip_vs_conn
201 */
202 if (!ip_vs_app_pkt_in(cp, pskb))
203 return 0;
204 }
205
206 udph = (void *)(*pskb)->nh.iph + udphoff;
207 udph->dest = cp->dport;
208
209 /*
210 * Adjust UDP checksums
211 */
212 if (!cp->app && (udph->check != 0)) {
213 /* Only port and addr are changed, do fast csum update */
214 udp_fast_csum_update(udph, cp->vaddr, cp->daddr,
215 cp->vport, cp->dport);
216 if ((*pskb)->ip_summed == CHECKSUM_HW)
217 (*pskb)->ip_summed = CHECKSUM_NONE;
218 } else {
219 /* full checksum calculation */
220 udph->check = 0;
221 (*pskb)->csum = skb_checksum(*pskb, udphoff,
222 (*pskb)->len - udphoff, 0);
223 udph->check = csum_tcpudp_magic(cp->caddr, cp->daddr,
224 (*pskb)->len - udphoff,
225 cp->protocol,
226 (*pskb)->csum);
227 if (udph->check == 0)
228 udph->check = 0xFFFF;
229 (*pskb)->ip_summed = CHECKSUM_UNNECESSARY;
230 }
231 return 1;
232}
233
234
235static int
236udp_csum_check(struct sk_buff *skb, struct ip_vs_protocol *pp)
237{
238 struct udphdr _udph, *uh;
239 unsigned int udphoff = skb->nh.iph->ihl*4;
240
241 uh = skb_header_pointer(skb, udphoff, sizeof(_udph), &_udph);
242 if (uh == NULL)
243 return 0;
244
245 if (uh->check != 0) {
246 switch (skb->ip_summed) {
247 case CHECKSUM_NONE:
248 skb->csum = skb_checksum(skb, udphoff,
249 skb->len - udphoff, 0);
250 case CHECKSUM_HW:
251 if (csum_tcpudp_magic(skb->nh.iph->saddr,
252 skb->nh.iph->daddr,
253 skb->len - udphoff,
254 skb->nh.iph->protocol,
255 skb->csum)) {
256 IP_VS_DBG_RL_PKT(0, pp, skb, 0,
257 "Failed checksum for");
258 return 0;
259 }
260 break;
261 default:
262 /* CHECKSUM_UNNECESSARY */
263 break;
264 }
265 }
266 return 1;
267}
268
269
270/*
271 * Note: the caller guarantees that only one of register_app,
272 * unregister_app or app_conn_bind is called each time.
273 */
274
275#define UDP_APP_TAB_BITS 4
276#define UDP_APP_TAB_SIZE (1 << UDP_APP_TAB_BITS)
277#define UDP_APP_TAB_MASK (UDP_APP_TAB_SIZE - 1)
278
279static struct list_head udp_apps[UDP_APP_TAB_SIZE];
280static DEFINE_SPINLOCK(udp_app_lock);
281
282static inline __u16 udp_app_hashkey(__u16 port)
283{
284 return ((port >> UDP_APP_TAB_BITS) ^ port) & UDP_APP_TAB_MASK;
285}
286
287
288static int udp_register_app(struct ip_vs_app *inc)
289{
290 struct ip_vs_app *i;
291 __u16 hash, port = inc->port;
292 int ret = 0;
293
294 hash = udp_app_hashkey(port);
295
296
297 spin_lock_bh(&udp_app_lock);
298 list_for_each_entry(i, &udp_apps[hash], p_list) {
299 if (i->port == port) {
300 ret = -EEXIST;
301 goto out;
302 }
303 }
304 list_add(&inc->p_list, &udp_apps[hash]);
305 atomic_inc(&ip_vs_protocol_udp.appcnt);
306
307 out:
308 spin_unlock_bh(&udp_app_lock);
309 return ret;
310}
311
312
313static void
314udp_unregister_app(struct ip_vs_app *inc)
315{
316 spin_lock_bh(&udp_app_lock);
317 atomic_dec(&ip_vs_protocol_udp.appcnt);
318 list_del(&inc->p_list);
319 spin_unlock_bh(&udp_app_lock);
320}
321
322
323static int udp_app_conn_bind(struct ip_vs_conn *cp)
324{
325 int hash;
326 struct ip_vs_app *inc;
327 int result = 0;
328
329 /* Default binding: bind app only for NAT */
330 if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ)
331 return 0;
332
333 /* Lookup application incarnations and bind the right one */
334 hash = udp_app_hashkey(cp->vport);
335
336 spin_lock(&udp_app_lock);
337 list_for_each_entry(inc, &udp_apps[hash], p_list) {
338 if (inc->port == cp->vport) {
339 if (unlikely(!ip_vs_app_inc_get(inc)))
340 break;
341 spin_unlock(&udp_app_lock);
342
343 IP_VS_DBG(9, "%s: Binding conn %u.%u.%u.%u:%u->"
344 "%u.%u.%u.%u:%u to app %s on port %u\n",
345 __FUNCTION__,
346 NIPQUAD(cp->caddr), ntohs(cp->cport),
347 NIPQUAD(cp->vaddr), ntohs(cp->vport),
348 inc->name, ntohs(inc->port));
349 cp->app = inc;
350 if (inc->init_conn)
351 result = inc->init_conn(inc, cp);
352 goto out;
353 }
354 }
355 spin_unlock(&udp_app_lock);
356
357 out:
358 return result;
359}
360
361
362static int udp_timeouts[IP_VS_UDP_S_LAST+1] = {
363 [IP_VS_UDP_S_NORMAL] = 5*60*HZ,
364 [IP_VS_UDP_S_LAST] = 2*HZ,
365};
366
367static char * udp_state_name_table[IP_VS_UDP_S_LAST+1] = {
368 [IP_VS_UDP_S_NORMAL] = "UDP",
369 [IP_VS_UDP_S_LAST] = "BUG!",
370};
371
372
373static int
374udp_set_state_timeout(struct ip_vs_protocol *pp, char *sname, int to)
375{
376 return ip_vs_set_state_timeout(pp->timeout_table, IP_VS_UDP_S_LAST,
377 udp_state_name_table, sname, to);
378}
379
380static const char * udp_state_name(int state)
381{
382 if (state >= IP_VS_UDP_S_LAST)
383 return "ERR!";
384 return udp_state_name_table[state] ? udp_state_name_table[state] : "?";
385}
386
387static int
388udp_state_transition(struct ip_vs_conn *cp, int direction,
389 const struct sk_buff *skb,
390 struct ip_vs_protocol *pp)
391{
392 cp->timeout = pp->timeout_table[IP_VS_UDP_S_NORMAL];
393 return 1;
394}
395
396static void udp_init(struct ip_vs_protocol *pp)
397{
398 IP_VS_INIT_HASH_TABLE(udp_apps);
399 pp->timeout_table = udp_timeouts;
400}
401
402static void udp_exit(struct ip_vs_protocol *pp)
403{
404}
405
406
407struct ip_vs_protocol ip_vs_protocol_udp = {
408 .name = "UDP",
409 .protocol = IPPROTO_UDP,
410 .dont_defrag = 0,
411 .init = udp_init,
412 .exit = udp_exit,
413 .conn_schedule = udp_conn_schedule,
414 .conn_in_get = udp_conn_in_get,
415 .conn_out_get = udp_conn_out_get,
416 .snat_handler = udp_snat_handler,
417 .dnat_handler = udp_dnat_handler,
418 .csum_check = udp_csum_check,
419 .state_transition = udp_state_transition,
420 .state_name = udp_state_name,
421 .register_app = udp_register_app,
422 .unregister_app = udp_unregister_app,
423 .app_conn_bind = udp_app_conn_bind,
424 .debug_packet = ip_vs_tcpudp_debug_packet,
425 .timeout_change = NULL,
426 .set_state_timeout = udp_set_state_timeout,
427};