blob: b0bd8afbf3686088635e70c0b7269cad2a099d9d [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * ip_vs_xmit.c: various packet transmitters for IPVS
3 *
Linus Torvalds1da177e2005-04-16 15:20:36 -07004 * Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
5 * Julian Anastasov <ja@ssi.bg>
6 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version
10 * 2 of the License, or (at your option) any later version.
11 *
12 * Changes:
13 *
14 */
15
Hannes Eder9aada7a2009-07-30 14:29:44 -070016#define KMSG_COMPONENT "IPVS"
17#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
18
Linus Torvalds1da177e2005-04-16 15:20:36 -070019#include <linux/kernel.h>
Tejun Heo5a0e3ad2010-03-24 17:04:11 +090020#include <linux/slab.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070021#include <linux/tcp.h> /* for tcphdr */
Herbert Xuc439cb22008-01-11 19:14:00 -080022#include <net/ip.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070023#include <net/tcp.h> /* for csum_tcpudp_magic */
24#include <net/udp.h>
25#include <net/icmp.h> /* for icmp_send */
26#include <net/route.h> /* for ip_route_output */
Julius Volz38cdcc92008-09-02 15:55:44 +020027#include <net/ipv6.h>
28#include <net/ip6_route.h>
Hans Schillstrom714f0952010-10-19 10:38:48 +020029#include <net/addrconf.h>
Julius Volz38cdcc92008-09-02 15:55:44 +020030#include <linux/icmpv6.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070031#include <linux/netfilter.h>
32#include <linux/netfilter_ipv4.h>
33
34#include <net/ip_vs.h>
35
36
37/*
38 * Destination cache to speed up outgoing route lookup
39 */
40static inline void
Hans Schillstrom714f0952010-10-19 10:38:48 +020041__ip_vs_dst_set(struct ip_vs_dest *dest, u32 rtos, struct dst_entry *dst,
42 u32 dst_cookie)
Linus Torvalds1da177e2005-04-16 15:20:36 -070043{
44 struct dst_entry *old_dst;
45
46 old_dst = dest->dst_cache;
47 dest->dst_cache = dst;
48 dest->dst_rtos = rtos;
Hans Schillstrom714f0952010-10-19 10:38:48 +020049 dest->dst_cookie = dst_cookie;
Linus Torvalds1da177e2005-04-16 15:20:36 -070050 dst_release(old_dst);
51}
52
53static inline struct dst_entry *
Hans Schillstrom714f0952010-10-19 10:38:48 +020054__ip_vs_dst_check(struct ip_vs_dest *dest, u32 rtos)
Linus Torvalds1da177e2005-04-16 15:20:36 -070055{
56 struct dst_entry *dst = dest->dst_cache;
57
58 if (!dst)
59 return NULL;
Hans Schillstrom714f0952010-10-19 10:38:48 +020060 if ((dst->obsolete || rtos != dest->dst_rtos) &&
61 dst->ops->check(dst, dest->dst_cookie) == NULL) {
Linus Torvalds1da177e2005-04-16 15:20:36 -070062 dest->dst_cache = NULL;
63 dst_release(dst);
64 return NULL;
65 }
66 dst_hold(dst);
67 return dst;
68}
69
Ilpo Järvinenad1b30b2008-01-05 23:12:40 -080070static struct rtable *
Hans Schillstrom714f0952010-10-19 10:38:48 +020071__ip_vs_get_out_rt(struct sk_buff *skb, struct ip_vs_conn *cp, u32 rtos)
Linus Torvalds1da177e2005-04-16 15:20:36 -070072{
Hans Schillstrom714f0952010-10-19 10:38:48 +020073 struct net *net = dev_net(skb->dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -070074 struct rtable *rt; /* Route to the other host */
75 struct ip_vs_dest *dest = cp->dest;
76
77 if (dest) {
78 spin_lock(&dest->dst_lock);
79 if (!(rt = (struct rtable *)
Hans Schillstrom714f0952010-10-19 10:38:48 +020080 __ip_vs_dst_check(dest, rtos))) {
Linus Torvalds1da177e2005-04-16 15:20:36 -070081 struct flowi fl = {
82 .oif = 0,
83 .nl_u = {
84 .ip4_u = {
Julius Volze7ade462008-09-02 15:55:33 +020085 .daddr = dest->addr.ip,
Linus Torvalds1da177e2005-04-16 15:20:36 -070086 .saddr = 0,
87 .tos = rtos, } },
88 };
89
Hans Schillstrom714f0952010-10-19 10:38:48 +020090 if (ip_route_output_key(net, &rt, &fl)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -070091 spin_unlock(&dest->dst_lock);
Harvey Harrison14d5e8342008-10-31 00:54:29 -070092 IP_VS_DBG_RL("ip_route_output error, dest: %pI4\n",
93 &dest->addr.ip);
Linus Torvalds1da177e2005-04-16 15:20:36 -070094 return NULL;
95 }
Hans Schillstrom714f0952010-10-19 10:38:48 +020096 __ip_vs_dst_set(dest, rtos, dst_clone(&rt->dst), 0);
Harvey Harrison14d5e8342008-10-31 00:54:29 -070097 IP_VS_DBG(10, "new dst %pI4, refcnt=%d, rtos=%X\n",
98 &dest->addr.ip,
Changli Gaod8d1f302010-06-10 23:31:35 -070099 atomic_read(&rt->dst.__refcnt), rtos);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700100 }
101 spin_unlock(&dest->dst_lock);
102 } else {
103 struct flowi fl = {
104 .oif = 0,
105 .nl_u = {
106 .ip4_u = {
Julius Volze7ade462008-09-02 15:55:33 +0200107 .daddr = cp->daddr.ip,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700108 .saddr = 0,
109 .tos = rtos, } },
110 };
111
Hans Schillstrom714f0952010-10-19 10:38:48 +0200112 if (ip_route_output_key(net, &rt, &fl)) {
Harvey Harrison14d5e8342008-10-31 00:54:29 -0700113 IP_VS_DBG_RL("ip_route_output error, dest: %pI4\n",
114 &cp->daddr.ip);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700115 return NULL;
116 }
117 }
118
119 return rt;
120}
121
Julius Volz38cdcc92008-09-02 15:55:44 +0200122#ifdef CONFIG_IP_VS_IPV6
Hans Schillstrom714f0952010-10-19 10:38:48 +0200123
124static struct dst_entry *
125__ip_vs_route_output_v6(struct net *net, struct in6_addr *daddr,
126 struct in6_addr *ret_saddr, int do_xfrm)
Julius Volz38cdcc92008-09-02 15:55:44 +0200127{
Hans Schillstrom714f0952010-10-19 10:38:48 +0200128 struct dst_entry *dst;
129 struct flowi fl = {
130 .oif = 0,
131 .nl_u = {
132 .ip6_u = {
133 .daddr = *daddr,
134 },
135 },
136 };
137
138 dst = ip6_route_output(net, NULL, &fl);
139 if (dst->error)
140 goto out_err;
141 if (!ret_saddr)
142 return dst;
143 if (ipv6_addr_any(&fl.fl6_src) &&
144 ipv6_dev_get_saddr(net, ip6_dst_idev(dst)->dev,
145 &fl.fl6_dst, 0, &fl.fl6_src) < 0)
146 goto out_err;
147 if (do_xfrm && xfrm_lookup(net, &dst, &fl, NULL, 0) < 0)
148 goto out_err;
149 ipv6_addr_copy(ret_saddr, &fl.fl6_src);
150 return dst;
151
152out_err:
153 dst_release(dst);
154 IP_VS_DBG_RL("ip6_route_output error, dest: %pI6\n", daddr);
155 return NULL;
156}
157
158static struct rt6_info *
159__ip_vs_get_out_rt_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
160 struct in6_addr *ret_saddr, int do_xfrm)
161{
162 struct net *net = dev_net(skb->dev);
Julius Volz38cdcc92008-09-02 15:55:44 +0200163 struct rt6_info *rt; /* Route to the other host */
164 struct ip_vs_dest *dest = cp->dest;
Hans Schillstrom714f0952010-10-19 10:38:48 +0200165 struct dst_entry *dst;
Julius Volz38cdcc92008-09-02 15:55:44 +0200166
167 if (dest) {
168 spin_lock(&dest->dst_lock);
Hans Schillstrom714f0952010-10-19 10:38:48 +0200169 rt = (struct rt6_info *)__ip_vs_dst_check(dest, 0);
Julius Volz38cdcc92008-09-02 15:55:44 +0200170 if (!rt) {
Hans Schillstrom714f0952010-10-19 10:38:48 +0200171 u32 cookie;
Julius Volz38cdcc92008-09-02 15:55:44 +0200172
Hans Schillstrom714f0952010-10-19 10:38:48 +0200173 dst = __ip_vs_route_output_v6(net, &dest->addr.in6,
174 &dest->dst_saddr,
175 do_xfrm);
176 if (!dst) {
Julius Volz38cdcc92008-09-02 15:55:44 +0200177 spin_unlock(&dest->dst_lock);
Julius Volz38cdcc92008-09-02 15:55:44 +0200178 return NULL;
179 }
Hans Schillstrom714f0952010-10-19 10:38:48 +0200180 rt = (struct rt6_info *) dst;
181 cookie = rt->rt6i_node ? rt->rt6i_node->fn_sernum : 0;
182 __ip_vs_dst_set(dest, 0, dst_clone(&rt->dst), cookie);
183 IP_VS_DBG(10, "new dst %pI6, src %pI6, refcnt=%d\n",
184 &dest->addr.in6, &dest->dst_saddr,
Changli Gaod8d1f302010-06-10 23:31:35 -0700185 atomic_read(&rt->dst.__refcnt));
Julius Volz38cdcc92008-09-02 15:55:44 +0200186 }
Hans Schillstrom714f0952010-10-19 10:38:48 +0200187 if (ret_saddr)
188 ipv6_addr_copy(ret_saddr, &dest->dst_saddr);
Julius Volz38cdcc92008-09-02 15:55:44 +0200189 spin_unlock(&dest->dst_lock);
190 } else {
Hans Schillstrom714f0952010-10-19 10:38:48 +0200191 dst = __ip_vs_route_output_v6(net, &cp->daddr.in6, ret_saddr,
192 do_xfrm);
193 if (!dst)
Julius Volz38cdcc92008-09-02 15:55:44 +0200194 return NULL;
Hans Schillstrom714f0952010-10-19 10:38:48 +0200195 rt = (struct rt6_info *) dst;
Julius Volz38cdcc92008-09-02 15:55:44 +0200196 }
197
198 return rt;
199}
200#endif
201
Linus Torvalds1da177e2005-04-16 15:20:36 -0700202
203/*
204 * Release dest->dst_cache before a dest is removed
205 */
206void
207ip_vs_dst_reset(struct ip_vs_dest *dest)
208{
209 struct dst_entry *old_dst;
210
211 old_dst = dest->dst_cache;
212 dest->dst_cache = NULL;
213 dst_release(old_dst);
214}
215
Julian Anastasovf4bc17c2010-09-21 17:35:41 +0200216#define IP_VS_XMIT_TUNNEL(skb, cp) \
217({ \
218 int __ret = NF_ACCEPT; \
219 \
220 if (unlikely((cp)->flags & IP_VS_CONN_F_NFCT)) \
221 __ret = ip_vs_confirm_conntrack(skb, cp); \
222 if (__ret == NF_ACCEPT) { \
223 nf_reset(skb); \
224 (skb)->ip_summed = CHECKSUM_NONE; \
225 } \
226 __ret; \
227})
228
229#define IP_VS_XMIT_NAT(pf, skb, cp) \
Linus Torvalds1da177e2005-04-16 15:20:36 -0700230do { \
Julian Anastasovf4bc17c2010-09-21 17:35:41 +0200231 if (likely(!((cp)->flags & IP_VS_CONN_F_NFCT))) \
232 (skb)->ipvs_property = 1; \
233 else \
234 ip_vs_update_conntrack(skb, cp, 1); \
Herbert Xuccc79112007-07-30 16:20:12 -0700235 skb_forward_csum(skb); \
Julius Volz38cdcc92008-09-02 15:55:44 +0200236 NF_HOOK(pf, NF_INET_LOCAL_OUT, (skb), NULL, \
Julian Anastasovf4bc17c2010-09-21 17:35:41 +0200237 skb_dst(skb)->dev, dst_output); \
238} while (0)
239
240#define IP_VS_XMIT(pf, skb, cp) \
241do { \
242 if (likely(!((cp)->flags & IP_VS_CONN_F_NFCT))) \
243 (skb)->ipvs_property = 1; \
244 skb_forward_csum(skb); \
245 NF_HOOK(pf, NF_INET_LOCAL_OUT, (skb), NULL, \
246 skb_dst(skb)->dev, dst_output); \
Linus Torvalds1da177e2005-04-16 15:20:36 -0700247} while (0)
248
249
250/*
251 * NULL transmitter (do nothing except return NF_ACCEPT)
252 */
253int
254ip_vs_null_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
255 struct ip_vs_protocol *pp)
256{
257 /* we do not touch skb and do not need pskb ptr */
258 return NF_ACCEPT;
259}
260
261
262/*
263 * Bypass transmitter
264 * Let packets bypass the destination when the destination is not
265 * available, it may be only used in transparent cache cluster.
266 */
267int
268ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
269 struct ip_vs_protocol *pp)
270{
Hans Schillstrom714f0952010-10-19 10:38:48 +0200271 struct net *net = dev_net(skb->dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700272 struct rtable *rt; /* Route to the other host */
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700273 struct iphdr *iph = ip_hdr(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700274 u8 tos = iph->tos;
275 int mtu;
276 struct flowi fl = {
277 .oif = 0,
278 .nl_u = {
279 .ip4_u = {
280 .daddr = iph->daddr,
281 .saddr = 0,
282 .tos = RT_TOS(tos), } },
283 };
284
285 EnterFunction(10);
286
Hans Schillstrom714f0952010-10-19 10:38:48 +0200287 if (ip_route_output_key(net, &rt, &fl)) {
Hannes Eder1e3e2382009-08-02 11:05:41 +0000288 IP_VS_DBG_RL("%s(): ip_route_output error, dest: %pI4\n",
289 __func__, &iph->daddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700290 goto tx_error_icmp;
291 }
292
293 /* MTU checking */
Changli Gaod8d1f302010-06-10 23:31:35 -0700294 mtu = dst_mtu(&rt->dst);
YOSHIFUJI Hideaki4412ec42007-03-07 14:19:10 +0900295 if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF))) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700296 ip_rt_put(rt);
297 icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
Hannes Eder1e3e2382009-08-02 11:05:41 +0000298 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700299 goto tx_error;
300 }
301
302 /*
303 * Call ip_send_check because we are not sure it is called
304 * after ip_defrag. Is copy-on-write needed?
305 */
306 if (unlikely((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL)) {
307 ip_rt_put(rt);
308 return NF_STOLEN;
309 }
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700310 ip_send_check(ip_hdr(skb));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700311
312 /* drop old route */
Eric Dumazetadf30902009-06-02 05:19:30 +0000313 skb_dst_drop(skb);
Changli Gaod8d1f302010-06-10 23:31:35 -0700314 skb_dst_set(skb, &rt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700315
316 /* Another hack: avoid icmp_send in ip_fragment */
317 skb->local_df = 1;
318
Julian Anastasovf4bc17c2010-09-21 17:35:41 +0200319 IP_VS_XMIT(NFPROTO_IPV4, skb, cp);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700320
321 LeaveFunction(10);
322 return NF_STOLEN;
323
324 tx_error_icmp:
325 dst_link_failure(skb);
326 tx_error:
327 kfree_skb(skb);
328 LeaveFunction(10);
329 return NF_STOLEN;
330}
331
Julius Volzb3cdd2a2008-09-02 15:55:45 +0200332#ifdef CONFIG_IP_VS_IPV6
333int
334ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
335 struct ip_vs_protocol *pp)
336{
Hans Schillstrom714f0952010-10-19 10:38:48 +0200337 struct net *net = dev_net(skb->dev);
338 struct dst_entry *dst;
Julius Volzb3cdd2a2008-09-02 15:55:45 +0200339 struct rt6_info *rt; /* Route to the other host */
340 struct ipv6hdr *iph = ipv6_hdr(skb);
341 int mtu;
Julius Volzb3cdd2a2008-09-02 15:55:45 +0200342
343 EnterFunction(10);
344
Hans Schillstrom714f0952010-10-19 10:38:48 +0200345 dst = __ip_vs_route_output_v6(net, &iph->daddr, NULL, 0);
346 if (!dst)
Julius Volzb3cdd2a2008-09-02 15:55:45 +0200347 goto tx_error_icmp;
Hans Schillstrom714f0952010-10-19 10:38:48 +0200348 rt = (struct rt6_info *) dst;
Julius Volzb3cdd2a2008-09-02 15:55:45 +0200349
350 /* MTU checking */
Changli Gaod8d1f302010-06-10 23:31:35 -0700351 mtu = dst_mtu(&rt->dst);
Julius Volzb3cdd2a2008-09-02 15:55:45 +0200352 if (skb->len > mtu) {
Changli Gaod8d1f302010-06-10 23:31:35 -0700353 dst_release(&rt->dst);
Alexey Dobriyan3ffe5332010-02-18 08:25:24 +0000354 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
Hannes Eder1e3e2382009-08-02 11:05:41 +0000355 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
Julius Volzb3cdd2a2008-09-02 15:55:45 +0200356 goto tx_error;
357 }
358
359 /*
360 * Call ip_send_check because we are not sure it is called
361 * after ip_defrag. Is copy-on-write needed?
362 */
363 skb = skb_share_check(skb, GFP_ATOMIC);
364 if (unlikely(skb == NULL)) {
Changli Gaod8d1f302010-06-10 23:31:35 -0700365 dst_release(&rt->dst);
Julius Volzb3cdd2a2008-09-02 15:55:45 +0200366 return NF_STOLEN;
367 }
368
369 /* drop old route */
Eric Dumazetadf30902009-06-02 05:19:30 +0000370 skb_dst_drop(skb);
Changli Gaod8d1f302010-06-10 23:31:35 -0700371 skb_dst_set(skb, &rt->dst);
Julius Volzb3cdd2a2008-09-02 15:55:45 +0200372
373 /* Another hack: avoid icmp_send in ip_fragment */
374 skb->local_df = 1;
375
Julian Anastasovf4bc17c2010-09-21 17:35:41 +0200376 IP_VS_XMIT(NFPROTO_IPV6, skb, cp);
Julius Volzb3cdd2a2008-09-02 15:55:45 +0200377
378 LeaveFunction(10);
379 return NF_STOLEN;
380
381 tx_error_icmp:
382 dst_link_failure(skb);
383 tx_error:
384 kfree_skb(skb);
385 LeaveFunction(10);
386 return NF_STOLEN;
387}
388#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -0700389
390/*
391 * NAT transmitter (only for outside-to-inside nat forwarding)
392 * Not used for related ICMP
393 */
394int
395ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
396 struct ip_vs_protocol *pp)
397{
398 struct rtable *rt; /* Route to the other host */
399 int mtu;
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700400 struct iphdr *iph = ip_hdr(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700401
402 EnterFunction(10);
403
404 /* check if it is a connection of no-client-port */
405 if (unlikely(cp->flags & IP_VS_CONN_F_NO_CPORT)) {
Al Viro014d7302006-09-28 14:29:52 -0700406 __be16 _pt, *p;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700407 p = skb_header_pointer(skb, iph->ihl*4, sizeof(_pt), &_pt);
408 if (p == NULL)
409 goto tx_error;
410 ip_vs_conn_fill_cport(cp, *p);
411 IP_VS_DBG(10, "filled cport=%d\n", ntohs(*p));
412 }
413
Hans Schillstrom714f0952010-10-19 10:38:48 +0200414 if (!(rt = __ip_vs_get_out_rt(skb, cp, RT_TOS(iph->tos))))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700415 goto tx_error_icmp;
416
417 /* MTU checking */
Changli Gaod8d1f302010-06-10 23:31:35 -0700418 mtu = dst_mtu(&rt->dst);
YOSHIFUJI Hideaki4412ec42007-03-07 14:19:10 +0900419 if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF))) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700420 ip_rt_put(rt);
421 icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
422 IP_VS_DBG_RL_PKT(0, pp, skb, 0, "ip_vs_nat_xmit(): frag needed for");
423 goto tx_error;
424 }
425
426 /* copy-on-write the packet before mangling it */
Herbert Xuaf1e1cf2007-10-14 00:39:33 -0700427 if (!skb_make_writable(skb, sizeof(struct iphdr)))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700428 goto tx_error_put;
429
Changli Gaod8d1f302010-06-10 23:31:35 -0700430 if (skb_cow(skb, rt->dst.dev->hard_header_len))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700431 goto tx_error_put;
432
433 /* drop old route */
Eric Dumazetadf30902009-06-02 05:19:30 +0000434 skb_dst_drop(skb);
Changli Gaod8d1f302010-06-10 23:31:35 -0700435 skb_dst_set(skb, &rt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700436
437 /* mangle the packet */
Herbert Xu3db05fe2007-10-15 00:53:15 -0700438 if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700439 goto tx_error;
Julius Volze7ade462008-09-02 15:55:33 +0200440 ip_hdr(skb)->daddr = cp->daddr.ip;
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700441 ip_send_check(ip_hdr(skb));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700442
443 IP_VS_DBG_PKT(10, pp, skb, 0, "After DNAT");
444
445 /* FIXME: when application helper enlarges the packet and the length
446 is larger than the MTU of outgoing device, there will be still
447 MTU problem. */
448
449 /* Another hack: avoid icmp_send in ip_fragment */
450 skb->local_df = 1;
451
Julian Anastasovf4bc17c2010-09-21 17:35:41 +0200452 IP_VS_XMIT_NAT(NFPROTO_IPV4, skb, cp);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700453
454 LeaveFunction(10);
455 return NF_STOLEN;
456
457 tx_error_icmp:
458 dst_link_failure(skb);
459 tx_error:
Linus Torvalds1da177e2005-04-16 15:20:36 -0700460 kfree_skb(skb);
Julian Anastasovf4bc17c2010-09-21 17:35:41 +0200461 LeaveFunction(10);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700462 return NF_STOLEN;
463 tx_error_put:
464 ip_rt_put(rt);
465 goto tx_error;
466}
467
Julius Volzb3cdd2a2008-09-02 15:55:45 +0200468#ifdef CONFIG_IP_VS_IPV6
469int
470ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
471 struct ip_vs_protocol *pp)
472{
473 struct rt6_info *rt; /* Route to the other host */
474 int mtu;
475
476 EnterFunction(10);
477
478 /* check if it is a connection of no-client-port */
479 if (unlikely(cp->flags & IP_VS_CONN_F_NO_CPORT)) {
480 __be16 _pt, *p;
481 p = skb_header_pointer(skb, sizeof(struct ipv6hdr),
482 sizeof(_pt), &_pt);
483 if (p == NULL)
484 goto tx_error;
485 ip_vs_conn_fill_cport(cp, *p);
486 IP_VS_DBG(10, "filled cport=%d\n", ntohs(*p));
487 }
488
Hans Schillstrom714f0952010-10-19 10:38:48 +0200489 rt = __ip_vs_get_out_rt_v6(skb, cp, NULL, 0);
Julius Volzb3cdd2a2008-09-02 15:55:45 +0200490 if (!rt)
491 goto tx_error_icmp;
492
493 /* MTU checking */
Changli Gaod8d1f302010-06-10 23:31:35 -0700494 mtu = dst_mtu(&rt->dst);
Julius Volzb3cdd2a2008-09-02 15:55:45 +0200495 if (skb->len > mtu) {
Changli Gaod8d1f302010-06-10 23:31:35 -0700496 dst_release(&rt->dst);
Alexey Dobriyan3ffe5332010-02-18 08:25:24 +0000497 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
Julius Volzb3cdd2a2008-09-02 15:55:45 +0200498 IP_VS_DBG_RL_PKT(0, pp, skb, 0,
499 "ip_vs_nat_xmit_v6(): frag needed for");
500 goto tx_error;
501 }
502
503 /* copy-on-write the packet before mangling it */
504 if (!skb_make_writable(skb, sizeof(struct ipv6hdr)))
505 goto tx_error_put;
506
Changli Gaod8d1f302010-06-10 23:31:35 -0700507 if (skb_cow(skb, rt->dst.dev->hard_header_len))
Julius Volzb3cdd2a2008-09-02 15:55:45 +0200508 goto tx_error_put;
509
510 /* drop old route */
Eric Dumazetadf30902009-06-02 05:19:30 +0000511 skb_dst_drop(skb);
Changli Gaod8d1f302010-06-10 23:31:35 -0700512 skb_dst_set(skb, &rt->dst);
Julius Volzb3cdd2a2008-09-02 15:55:45 +0200513
514 /* mangle the packet */
515 if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp))
516 goto tx_error;
517 ipv6_hdr(skb)->daddr = cp->daddr.in6;
518
519 IP_VS_DBG_PKT(10, pp, skb, 0, "After DNAT");
520
521 /* FIXME: when application helper enlarges the packet and the length
522 is larger than the MTU of outgoing device, there will be still
523 MTU problem. */
524
525 /* Another hack: avoid icmp_send in ip_fragment */
526 skb->local_df = 1;
527
Julian Anastasovf4bc17c2010-09-21 17:35:41 +0200528 IP_VS_XMIT_NAT(NFPROTO_IPV6, skb, cp);
Julius Volzb3cdd2a2008-09-02 15:55:45 +0200529
530 LeaveFunction(10);
531 return NF_STOLEN;
532
533tx_error_icmp:
534 dst_link_failure(skb);
535tx_error:
536 LeaveFunction(10);
537 kfree_skb(skb);
538 return NF_STOLEN;
539tx_error_put:
Changli Gaod8d1f302010-06-10 23:31:35 -0700540 dst_release(&rt->dst);
Julius Volzb3cdd2a2008-09-02 15:55:45 +0200541 goto tx_error;
542}
543#endif
544
Linus Torvalds1da177e2005-04-16 15:20:36 -0700545
546/*
547 * IP Tunneling transmitter
548 *
549 * This function encapsulates the packet in a new IP packet, its
550 * destination will be set to cp->daddr. Most code of this function
551 * is taken from ipip.c.
552 *
553 * It is used in VS/TUN cluster. The load balancer selects a real
554 * server from a cluster based on a scheduling algorithm,
555 * encapsulates the request packet and forwards it to the selected
556 * server. For example, all real servers are configured with
557 * "ifconfig tunl0 <Virtual IP Address> up". When the server receives
558 * the encapsulated packet, it will decapsulate the packet, processe
559 * the request and return the response packets directly to the client
560 * without passing the load balancer. This can greatly increase the
561 * scalability of virtual server.
562 *
563 * Used for ANY protocol
564 */
565int
566ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
567 struct ip_vs_protocol *pp)
568{
569 struct rtable *rt; /* Route to the other host */
570 struct net_device *tdev; /* Device to other host */
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700571 struct iphdr *old_iph = ip_hdr(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700572 u8 tos = old_iph->tos;
Alexey Dobriyan76ab6082006-01-06 13:24:29 -0800573 __be16 df = old_iph->frag_off;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700574 struct iphdr *iph; /* Our new IP header */
Chuck Leverc2636b42007-10-23 21:07:32 -0700575 unsigned int max_headroom; /* The extra header space needed */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700576 int mtu;
Julian Anastasovf4bc17c2010-09-21 17:35:41 +0200577 int ret;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700578
579 EnterFunction(10);
580
YOSHIFUJI Hideaki4412ec42007-03-07 14:19:10 +0900581 if (skb->protocol != htons(ETH_P_IP)) {
Hannes Eder1e3e2382009-08-02 11:05:41 +0000582 IP_VS_DBG_RL("%s(): protocol error, "
Linus Torvalds1da177e2005-04-16 15:20:36 -0700583 "ETH_P_IP: %d, skb protocol: %d\n",
Hannes Eder1e3e2382009-08-02 11:05:41 +0000584 __func__, htons(ETH_P_IP), skb->protocol);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700585 goto tx_error;
586 }
587
Hans Schillstrom714f0952010-10-19 10:38:48 +0200588 if (!(rt = __ip_vs_get_out_rt(skb, cp, RT_TOS(tos))))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700589 goto tx_error_icmp;
590
Changli Gaod8d1f302010-06-10 23:31:35 -0700591 tdev = rt->dst.dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700592
Changli Gaod8d1f302010-06-10 23:31:35 -0700593 mtu = dst_mtu(&rt->dst) - sizeof(struct iphdr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700594 if (mtu < 68) {
595 ip_rt_put(rt);
Hannes Eder1e3e2382009-08-02 11:05:41 +0000596 IP_VS_DBG_RL("%s(): mtu less than 68\n", __func__);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700597 goto tx_error;
598 }
Eric Dumazetadf30902009-06-02 05:19:30 +0000599 if (skb_dst(skb))
600 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700601
YOSHIFUJI Hideaki4412ec42007-03-07 14:19:10 +0900602 df |= (old_iph->frag_off & htons(IP_DF));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700603
YOSHIFUJI Hideaki4412ec42007-03-07 14:19:10 +0900604 if ((old_iph->frag_off & htons(IP_DF))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700605 && mtu < ntohs(old_iph->tot_len)) {
606 icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
607 ip_rt_put(rt);
Hannes Eder1e3e2382009-08-02 11:05:41 +0000608 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700609 goto tx_error;
610 }
611
612 /*
613 * Okay, now see if we can stuff it in the buffer as-is.
614 */
615 max_headroom = LL_RESERVED_SPACE(tdev) + sizeof(struct iphdr);
616
617 if (skb_headroom(skb) < max_headroom
618 || skb_cloned(skb) || skb_shared(skb)) {
619 struct sk_buff *new_skb =
620 skb_realloc_headroom(skb, max_headroom);
621 if (!new_skb) {
622 ip_rt_put(rt);
623 kfree_skb(skb);
Hannes Eder1e3e2382009-08-02 11:05:41 +0000624 IP_VS_ERR_RL("%s(): no memory\n", __func__);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700625 return NF_STOLEN;
626 }
627 kfree_skb(skb);
628 skb = new_skb;
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700629 old_iph = ip_hdr(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700630 }
631
Hans Schillstrom714f0952010-10-19 10:38:48 +0200632 skb->transport_header = skb->network_header;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700633
634 /* fix old IP header checksum */
635 ip_send_check(old_iph);
636
Arnaldo Carvalho de Meloe2d1bca2007-04-10 20:46:21 -0700637 skb_push(skb, sizeof(struct iphdr));
638 skb_reset_network_header(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700639 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
640
641 /* drop old route */
Eric Dumazetadf30902009-06-02 05:19:30 +0000642 skb_dst_drop(skb);
Changli Gaod8d1f302010-06-10 23:31:35 -0700643 skb_dst_set(skb, &rt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700644
645 /*
646 * Push down and install the IPIP header.
647 */
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700648 iph = ip_hdr(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700649 iph->version = 4;
650 iph->ihl = sizeof(struct iphdr)>>2;
651 iph->frag_off = df;
652 iph->protocol = IPPROTO_IPIP;
653 iph->tos = tos;
654 iph->daddr = rt->rt_dst;
655 iph->saddr = rt->rt_src;
656 iph->ttl = old_iph->ttl;
Changli Gaod8d1f302010-06-10 23:31:35 -0700657 ip_select_ident(iph, &rt->dst, NULL);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700658
659 /* Another hack: avoid icmp_send in ip_fragment */
660 skb->local_df = 1;
661
Julian Anastasovf4bc17c2010-09-21 17:35:41 +0200662 ret = IP_VS_XMIT_TUNNEL(skb, cp);
663 if (ret == NF_ACCEPT)
664 ip_local_out(skb);
665 else if (ret == NF_DROP)
666 kfree_skb(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700667
668 LeaveFunction(10);
669
670 return NF_STOLEN;
671
672 tx_error_icmp:
673 dst_link_failure(skb);
674 tx_error:
675 kfree_skb(skb);
676 LeaveFunction(10);
677 return NF_STOLEN;
678}
679
Julius Volzb3cdd2a2008-09-02 15:55:45 +0200680#ifdef CONFIG_IP_VS_IPV6
681int
682ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
683 struct ip_vs_protocol *pp)
684{
685 struct rt6_info *rt; /* Route to the other host */
Hans Schillstrom714f0952010-10-19 10:38:48 +0200686 struct in6_addr saddr; /* Source for tunnel */
Julius Volzb3cdd2a2008-09-02 15:55:45 +0200687 struct net_device *tdev; /* Device to other host */
688 struct ipv6hdr *old_iph = ipv6_hdr(skb);
Julius Volzb3cdd2a2008-09-02 15:55:45 +0200689 struct ipv6hdr *iph; /* Our new IP header */
690 unsigned int max_headroom; /* The extra header space needed */
691 int mtu;
Julian Anastasovf4bc17c2010-09-21 17:35:41 +0200692 int ret;
Julius Volzb3cdd2a2008-09-02 15:55:45 +0200693
694 EnterFunction(10);
695
696 if (skb->protocol != htons(ETH_P_IPV6)) {
Hannes Eder1e3e2382009-08-02 11:05:41 +0000697 IP_VS_DBG_RL("%s(): protocol error, "
Julius Volzb3cdd2a2008-09-02 15:55:45 +0200698 "ETH_P_IPV6: %d, skb protocol: %d\n",
Hannes Eder1e3e2382009-08-02 11:05:41 +0000699 __func__, htons(ETH_P_IPV6), skb->protocol);
Julius Volzb3cdd2a2008-09-02 15:55:45 +0200700 goto tx_error;
701 }
702
Hans Schillstrom714f0952010-10-19 10:38:48 +0200703 rt = __ip_vs_get_out_rt_v6(skb, cp, &saddr, 1);
Julius Volzb3cdd2a2008-09-02 15:55:45 +0200704 if (!rt)
705 goto tx_error_icmp;
706
Changli Gaod8d1f302010-06-10 23:31:35 -0700707 tdev = rt->dst.dev;
Julius Volzb3cdd2a2008-09-02 15:55:45 +0200708
Changli Gaod8d1f302010-06-10 23:31:35 -0700709 mtu = dst_mtu(&rt->dst) - sizeof(struct ipv6hdr);
Hans Schillstrom714f0952010-10-19 10:38:48 +0200710 if (mtu < IPV6_MIN_MTU) {
Changli Gaod8d1f302010-06-10 23:31:35 -0700711 dst_release(&rt->dst);
Hans Schillstrom714f0952010-10-19 10:38:48 +0200712 IP_VS_DBG_RL("%s(): mtu less than %d\n", __func__,
713 IPV6_MIN_MTU);
Julius Volzb3cdd2a2008-09-02 15:55:45 +0200714 goto tx_error;
715 }
Eric Dumazetadf30902009-06-02 05:19:30 +0000716 if (skb_dst(skb))
717 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu);
Julius Volzb3cdd2a2008-09-02 15:55:45 +0200718
719 if (mtu < ntohs(old_iph->payload_len) + sizeof(struct ipv6hdr)) {
Alexey Dobriyan3ffe5332010-02-18 08:25:24 +0000720 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
Changli Gaod8d1f302010-06-10 23:31:35 -0700721 dst_release(&rt->dst);
Hannes Eder1e3e2382009-08-02 11:05:41 +0000722 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
Julius Volzb3cdd2a2008-09-02 15:55:45 +0200723 goto tx_error;
724 }
725
726 /*
727 * Okay, now see if we can stuff it in the buffer as-is.
728 */
729 max_headroom = LL_RESERVED_SPACE(tdev) + sizeof(struct ipv6hdr);
730
731 if (skb_headroom(skb) < max_headroom
732 || skb_cloned(skb) || skb_shared(skb)) {
733 struct sk_buff *new_skb =
734 skb_realloc_headroom(skb, max_headroom);
735 if (!new_skb) {
Changli Gaod8d1f302010-06-10 23:31:35 -0700736 dst_release(&rt->dst);
Julius Volzb3cdd2a2008-09-02 15:55:45 +0200737 kfree_skb(skb);
Hannes Eder1e3e2382009-08-02 11:05:41 +0000738 IP_VS_ERR_RL("%s(): no memory\n", __func__);
Julius Volzb3cdd2a2008-09-02 15:55:45 +0200739 return NF_STOLEN;
740 }
741 kfree_skb(skb);
742 skb = new_skb;
743 old_iph = ipv6_hdr(skb);
744 }
745
Hans Schillstrom714f0952010-10-19 10:38:48 +0200746 skb->transport_header = skb->network_header;
Julius Volzb3cdd2a2008-09-02 15:55:45 +0200747
748 skb_push(skb, sizeof(struct ipv6hdr));
749 skb_reset_network_header(skb);
750 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
751
752 /* drop old route */
Eric Dumazetadf30902009-06-02 05:19:30 +0000753 skb_dst_drop(skb);
Changli Gaod8d1f302010-06-10 23:31:35 -0700754 skb_dst_set(skb, &rt->dst);
Julius Volzb3cdd2a2008-09-02 15:55:45 +0200755
756 /*
757 * Push down and install the IPIP header.
758 */
759 iph = ipv6_hdr(skb);
760 iph->version = 6;
761 iph->nexthdr = IPPROTO_IPV6;
Harvey Harrisonb7b45f42008-11-10 16:46:06 -0800762 iph->payload_len = old_iph->payload_len;
763 be16_add_cpu(&iph->payload_len, sizeof(*old_iph));
Julius Volzb3cdd2a2008-09-02 15:55:45 +0200764 iph->priority = old_iph->priority;
765 memset(&iph->flow_lbl, 0, sizeof(iph->flow_lbl));
Hans Schillstrom714f0952010-10-19 10:38:48 +0200766 ipv6_addr_copy(&iph->daddr, &cp->daddr.in6);
767 ipv6_addr_copy(&iph->saddr, &saddr);
Julius Volzb3cdd2a2008-09-02 15:55:45 +0200768 iph->hop_limit = old_iph->hop_limit;
769
770 /* Another hack: avoid icmp_send in ip_fragment */
771 skb->local_df = 1;
772
Julian Anastasovf4bc17c2010-09-21 17:35:41 +0200773 ret = IP_VS_XMIT_TUNNEL(skb, cp);
774 if (ret == NF_ACCEPT)
775 ip6_local_out(skb);
776 else if (ret == NF_DROP)
777 kfree_skb(skb);
Julius Volzb3cdd2a2008-09-02 15:55:45 +0200778
779 LeaveFunction(10);
780
781 return NF_STOLEN;
782
783tx_error_icmp:
784 dst_link_failure(skb);
785tx_error:
786 kfree_skb(skb);
787 LeaveFunction(10);
788 return NF_STOLEN;
789}
790#endif
791
Linus Torvalds1da177e2005-04-16 15:20:36 -0700792
793/*
794 * Direct Routing transmitter
795 * Used for ANY protocol
796 */
797int
798ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
799 struct ip_vs_protocol *pp)
800{
801 struct rtable *rt; /* Route to the other host */
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700802 struct iphdr *iph = ip_hdr(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700803 int mtu;
804
805 EnterFunction(10);
806
Hans Schillstrom714f0952010-10-19 10:38:48 +0200807 if (!(rt = __ip_vs_get_out_rt(skb, cp, RT_TOS(iph->tos))))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700808 goto tx_error_icmp;
809
810 /* MTU checking */
Changli Gaod8d1f302010-06-10 23:31:35 -0700811 mtu = dst_mtu(&rt->dst);
YOSHIFUJI Hideaki4412ec42007-03-07 14:19:10 +0900812 if ((iph->frag_off & htons(IP_DF)) && skb->len > mtu) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700813 icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
814 ip_rt_put(rt);
Hannes Eder1e3e2382009-08-02 11:05:41 +0000815 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700816 goto tx_error;
817 }
818
819 /*
820 * Call ip_send_check because we are not sure it is called
821 * after ip_defrag. Is copy-on-write needed?
822 */
823 if (unlikely((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL)) {
824 ip_rt_put(rt);
825 return NF_STOLEN;
826 }
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700827 ip_send_check(ip_hdr(skb));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700828
829 /* drop old route */
Eric Dumazetadf30902009-06-02 05:19:30 +0000830 skb_dst_drop(skb);
Changli Gaod8d1f302010-06-10 23:31:35 -0700831 skb_dst_set(skb, &rt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700832
833 /* Another hack: avoid icmp_send in ip_fragment */
834 skb->local_df = 1;
835
Julian Anastasovf4bc17c2010-09-21 17:35:41 +0200836 IP_VS_XMIT(NFPROTO_IPV4, skb, cp);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700837
838 LeaveFunction(10);
839 return NF_STOLEN;
840
841 tx_error_icmp:
842 dst_link_failure(skb);
843 tx_error:
844 kfree_skb(skb);
845 LeaveFunction(10);
846 return NF_STOLEN;
847}
848
Julius Volzb3cdd2a2008-09-02 15:55:45 +0200849#ifdef CONFIG_IP_VS_IPV6
850int
851ip_vs_dr_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
852 struct ip_vs_protocol *pp)
853{
854 struct rt6_info *rt; /* Route to the other host */
855 int mtu;
856
857 EnterFunction(10);
858
Hans Schillstrom714f0952010-10-19 10:38:48 +0200859 rt = __ip_vs_get_out_rt_v6(skb, cp, NULL, 0);
Julius Volzb3cdd2a2008-09-02 15:55:45 +0200860 if (!rt)
861 goto tx_error_icmp;
862
863 /* MTU checking */
Changli Gaod8d1f302010-06-10 23:31:35 -0700864 mtu = dst_mtu(&rt->dst);
Julius Volzb3cdd2a2008-09-02 15:55:45 +0200865 if (skb->len > mtu) {
Alexey Dobriyan3ffe5332010-02-18 08:25:24 +0000866 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
Changli Gaod8d1f302010-06-10 23:31:35 -0700867 dst_release(&rt->dst);
Hannes Eder1e3e2382009-08-02 11:05:41 +0000868 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
Julius Volzb3cdd2a2008-09-02 15:55:45 +0200869 goto tx_error;
870 }
871
872 /*
873 * Call ip_send_check because we are not sure it is called
874 * after ip_defrag. Is copy-on-write needed?
875 */
876 skb = skb_share_check(skb, GFP_ATOMIC);
877 if (unlikely(skb == NULL)) {
Changli Gaod8d1f302010-06-10 23:31:35 -0700878 dst_release(&rt->dst);
Julius Volzb3cdd2a2008-09-02 15:55:45 +0200879 return NF_STOLEN;
880 }
881
882 /* drop old route */
Eric Dumazetadf30902009-06-02 05:19:30 +0000883 skb_dst_drop(skb);
Changli Gaod8d1f302010-06-10 23:31:35 -0700884 skb_dst_set(skb, &rt->dst);
Julius Volzb3cdd2a2008-09-02 15:55:45 +0200885
886 /* Another hack: avoid icmp_send in ip_fragment */
887 skb->local_df = 1;
888
Julian Anastasovf4bc17c2010-09-21 17:35:41 +0200889 IP_VS_XMIT(NFPROTO_IPV6, skb, cp);
Julius Volzb3cdd2a2008-09-02 15:55:45 +0200890
891 LeaveFunction(10);
892 return NF_STOLEN;
893
894tx_error_icmp:
895 dst_link_failure(skb);
896tx_error:
897 kfree_skb(skb);
898 LeaveFunction(10);
899 return NF_STOLEN;
900}
901#endif
902
Linus Torvalds1da177e2005-04-16 15:20:36 -0700903
904/*
905 * ICMP packet transmitter
906 * called by the ip_vs_in_icmp
907 */
908int
909ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
910 struct ip_vs_protocol *pp, int offset)
911{
912 struct rtable *rt; /* Route to the other host */
913 int mtu;
914 int rc;
915
916 EnterFunction(10);
917
918 /* The ICMP packet for VS/TUN, VS/DR and LOCALNODE will be
919 forwarded directly here, because there is no need to
920 translate address/port back */
921 if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) {
922 if (cp->packet_xmit)
923 rc = cp->packet_xmit(skb, cp, pp);
924 else
925 rc = NF_ACCEPT;
926 /* do not touch skb anymore */
927 atomic_inc(&cp->in_pkts);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700928 goto out;
929 }
930
931 /*
932 * mangle and send the packet here (only for VS/NAT)
933 */
934
Hans Schillstrom714f0952010-10-19 10:38:48 +0200935 if (!(rt = __ip_vs_get_out_rt(skb, cp, RT_TOS(ip_hdr(skb)->tos))))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700936 goto tx_error_icmp;
937
938 /* MTU checking */
Changli Gaod8d1f302010-06-10 23:31:35 -0700939 mtu = dst_mtu(&rt->dst);
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700940 if ((skb->len > mtu) && (ip_hdr(skb)->frag_off & htons(IP_DF))) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700941 ip_rt_put(rt);
942 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
Hannes Eder1e3e2382009-08-02 11:05:41 +0000943 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700944 goto tx_error;
945 }
946
947 /* copy-on-write the packet before mangling it */
Herbert Xuaf1e1cf2007-10-14 00:39:33 -0700948 if (!skb_make_writable(skb, offset))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700949 goto tx_error_put;
950
Changli Gaod8d1f302010-06-10 23:31:35 -0700951 if (skb_cow(skb, rt->dst.dev->hard_header_len))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700952 goto tx_error_put;
953
954 /* drop the old route when skb is not shared */
Eric Dumazetadf30902009-06-02 05:19:30 +0000955 skb_dst_drop(skb);
Changli Gaod8d1f302010-06-10 23:31:35 -0700956 skb_dst_set(skb, &rt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700957
958 ip_vs_nat_icmp(skb, pp, cp, 0);
959
960 /* Another hack: avoid icmp_send in ip_fragment */
961 skb->local_df = 1;
962
Julian Anastasovf4bc17c2010-09-21 17:35:41 +0200963 IP_VS_XMIT(NFPROTO_IPV4, skb, cp);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700964
965 rc = NF_STOLEN;
966 goto out;
967
968 tx_error_icmp:
969 dst_link_failure(skb);
970 tx_error:
971 dev_kfree_skb(skb);
972 rc = NF_STOLEN;
973 out:
974 LeaveFunction(10);
975 return rc;
976 tx_error_put:
977 ip_rt_put(rt);
978 goto tx_error;
979}
Julius Volzb3cdd2a2008-09-02 15:55:45 +0200980
981#ifdef CONFIG_IP_VS_IPV6
982int
983ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
984 struct ip_vs_protocol *pp, int offset)
985{
986 struct rt6_info *rt; /* Route to the other host */
987 int mtu;
988 int rc;
989
990 EnterFunction(10);
991
992 /* The ICMP packet for VS/TUN, VS/DR and LOCALNODE will be
993 forwarded directly here, because there is no need to
994 translate address/port back */
995 if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) {
996 if (cp->packet_xmit)
997 rc = cp->packet_xmit(skb, cp, pp);
998 else
999 rc = NF_ACCEPT;
1000 /* do not touch skb anymore */
1001 atomic_inc(&cp->in_pkts);
1002 goto out;
1003 }
1004
1005 /*
1006 * mangle and send the packet here (only for VS/NAT)
1007 */
1008
Hans Schillstrom714f0952010-10-19 10:38:48 +02001009 rt = __ip_vs_get_out_rt_v6(skb, cp, NULL, 0);
Julius Volzb3cdd2a2008-09-02 15:55:45 +02001010 if (!rt)
1011 goto tx_error_icmp;
1012
1013 /* MTU checking */
Changli Gaod8d1f302010-06-10 23:31:35 -07001014 mtu = dst_mtu(&rt->dst);
Julius Volzb3cdd2a2008-09-02 15:55:45 +02001015 if (skb->len > mtu) {
Changli Gaod8d1f302010-06-10 23:31:35 -07001016 dst_release(&rt->dst);
Alexey Dobriyan3ffe5332010-02-18 08:25:24 +00001017 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
Hannes Eder1e3e2382009-08-02 11:05:41 +00001018 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
Julius Volzb3cdd2a2008-09-02 15:55:45 +02001019 goto tx_error;
1020 }
1021
1022 /* copy-on-write the packet before mangling it */
1023 if (!skb_make_writable(skb, offset))
1024 goto tx_error_put;
1025
Changli Gaod8d1f302010-06-10 23:31:35 -07001026 if (skb_cow(skb, rt->dst.dev->hard_header_len))
Julius Volzb3cdd2a2008-09-02 15:55:45 +02001027 goto tx_error_put;
1028
1029 /* drop the old route when skb is not shared */
Eric Dumazetadf30902009-06-02 05:19:30 +00001030 skb_dst_drop(skb);
Changli Gaod8d1f302010-06-10 23:31:35 -07001031 skb_dst_set(skb, &rt->dst);
Julius Volzb3cdd2a2008-09-02 15:55:45 +02001032
1033 ip_vs_nat_icmp_v6(skb, pp, cp, 0);
1034
1035 /* Another hack: avoid icmp_send in ip_fragment */
1036 skb->local_df = 1;
1037
Julian Anastasovf4bc17c2010-09-21 17:35:41 +02001038 IP_VS_XMIT(NFPROTO_IPV6, skb, cp);
Julius Volzb3cdd2a2008-09-02 15:55:45 +02001039
1040 rc = NF_STOLEN;
1041 goto out;
1042
1043tx_error_icmp:
1044 dst_link_failure(skb);
1045tx_error:
1046 dev_kfree_skb(skb);
1047 rc = NF_STOLEN;
1048out:
1049 LeaveFunction(10);
1050 return rc;
1051tx_error_put:
Changli Gaod8d1f302010-06-10 23:31:35 -07001052 dst_release(&rt->dst);
Julius Volzb3cdd2a2008-09-02 15:55:45 +02001053 goto tx_error;
1054}
1055#endif