blob: 63cc0feaaef60646d2310b8a382581570c8af6ab [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * ip_vs_xmit.c: various packet transmitters for IPVS
3 *
Linus Torvalds1da177e2005-04-16 15:20:36 -07004 * Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
5 * Julian Anastasov <ja@ssi.bg>
6 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version
10 * 2 of the License, or (at your option) any later version.
11 *
12 * Changes:
13 *
14 */
15
Hannes Eder9aada7a2009-07-30 14:29:44 -070016#define KMSG_COMPONENT "IPVS"
17#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
18
Linus Torvalds1da177e2005-04-16 15:20:36 -070019#include <linux/kernel.h>
Tejun Heo5a0e3ad2010-03-24 17:04:11 +090020#include <linux/slab.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070021#include <linux/tcp.h> /* for tcphdr */
Herbert Xuc439cb22008-01-11 19:14:00 -080022#include <net/ip.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070023#include <net/tcp.h> /* for csum_tcpudp_magic */
24#include <net/udp.h>
25#include <net/icmp.h> /* for icmp_send */
26#include <net/route.h> /* for ip_route_output */
Julius Volz38cdcc92008-09-02 15:55:44 +020027#include <net/ipv6.h>
28#include <net/ip6_route.h>
Hans Schillstrom714f0952010-10-19 10:38:48 +020029#include <net/addrconf.h>
Julius Volz38cdcc92008-09-02 15:55:44 +020030#include <linux/icmpv6.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070031#include <linux/netfilter.h>
32#include <linux/netfilter_ipv4.h>
33
34#include <net/ip_vs.h>
35
36
37/*
38 * Destination cache to speed up outgoing route lookup
39 */
40static inline void
Hans Schillstrom714f0952010-10-19 10:38:48 +020041__ip_vs_dst_set(struct ip_vs_dest *dest, u32 rtos, struct dst_entry *dst,
42 u32 dst_cookie)
Linus Torvalds1da177e2005-04-16 15:20:36 -070043{
44 struct dst_entry *old_dst;
45
46 old_dst = dest->dst_cache;
47 dest->dst_cache = dst;
48 dest->dst_rtos = rtos;
Hans Schillstrom714f0952010-10-19 10:38:48 +020049 dest->dst_cookie = dst_cookie;
Linus Torvalds1da177e2005-04-16 15:20:36 -070050 dst_release(old_dst);
51}
52
53static inline struct dst_entry *
Hans Schillstrom714f0952010-10-19 10:38:48 +020054__ip_vs_dst_check(struct ip_vs_dest *dest, u32 rtos)
Linus Torvalds1da177e2005-04-16 15:20:36 -070055{
56 struct dst_entry *dst = dest->dst_cache;
57
58 if (!dst)
59 return NULL;
Hans Schillstrom714f0952010-10-19 10:38:48 +020060 if ((dst->obsolete || rtos != dest->dst_rtos) &&
61 dst->ops->check(dst, dest->dst_cookie) == NULL) {
Linus Torvalds1da177e2005-04-16 15:20:36 -070062 dest->dst_cache = NULL;
63 dst_release(dst);
64 return NULL;
65 }
66 dst_hold(dst);
67 return dst;
68}
69
Ilpo Järvinenad1b30b2008-01-05 23:12:40 -080070static struct rtable *
Hans Schillstrom714f0952010-10-19 10:38:48 +020071__ip_vs_get_out_rt(struct sk_buff *skb, struct ip_vs_conn *cp, u32 rtos)
Linus Torvalds1da177e2005-04-16 15:20:36 -070072{
Hans Schillstrom714f0952010-10-19 10:38:48 +020073 struct net *net = dev_net(skb->dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -070074 struct rtable *rt; /* Route to the other host */
75 struct ip_vs_dest *dest = cp->dest;
76
77 if (dest) {
78 spin_lock(&dest->dst_lock);
79 if (!(rt = (struct rtable *)
Hans Schillstrom714f0952010-10-19 10:38:48 +020080 __ip_vs_dst_check(dest, rtos))) {
Linus Torvalds1da177e2005-04-16 15:20:36 -070081 struct flowi fl = {
82 .oif = 0,
83 .nl_u = {
84 .ip4_u = {
Julius Volze7ade462008-09-02 15:55:33 +020085 .daddr = dest->addr.ip,
Linus Torvalds1da177e2005-04-16 15:20:36 -070086 .saddr = 0,
87 .tos = rtos, } },
88 };
89
Hans Schillstrom714f0952010-10-19 10:38:48 +020090 if (ip_route_output_key(net, &rt, &fl)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -070091 spin_unlock(&dest->dst_lock);
Harvey Harrison14d5e832008-10-31 00:54:29 -070092 IP_VS_DBG_RL("ip_route_output error, dest: %pI4\n",
93 &dest->addr.ip);
Linus Torvalds1da177e2005-04-16 15:20:36 -070094 return NULL;
95 }
Hans Schillstrom714f0952010-10-19 10:38:48 +020096 __ip_vs_dst_set(dest, rtos, dst_clone(&rt->dst), 0);
Harvey Harrison14d5e832008-10-31 00:54:29 -070097 IP_VS_DBG(10, "new dst %pI4, refcnt=%d, rtos=%X\n",
98 &dest->addr.ip,
Changli Gaod8d1f302010-06-10 23:31:35 -070099 atomic_read(&rt->dst.__refcnt), rtos);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700100 }
101 spin_unlock(&dest->dst_lock);
102 } else {
103 struct flowi fl = {
104 .oif = 0,
105 .nl_u = {
106 .ip4_u = {
Julius Volze7ade462008-09-02 15:55:33 +0200107 .daddr = cp->daddr.ip,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700108 .saddr = 0,
109 .tos = rtos, } },
110 };
111
Hans Schillstrom714f0952010-10-19 10:38:48 +0200112 if (ip_route_output_key(net, &rt, &fl)) {
Harvey Harrison14d5e832008-10-31 00:54:29 -0700113 IP_VS_DBG_RL("ip_route_output error, dest: %pI4\n",
114 &cp->daddr.ip);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700115 return NULL;
116 }
117 }
118
119 return rt;
120}
121
Julius Volz38cdcc92008-09-02 15:55:44 +0200122#ifdef CONFIG_IP_VS_IPV6
Hans Schillstrom714f0952010-10-19 10:38:48 +0200123
124static struct dst_entry *
125__ip_vs_route_output_v6(struct net *net, struct in6_addr *daddr,
126 struct in6_addr *ret_saddr, int do_xfrm)
Julius Volz38cdcc92008-09-02 15:55:44 +0200127{
Hans Schillstrom714f0952010-10-19 10:38:48 +0200128 struct dst_entry *dst;
129 struct flowi fl = {
130 .oif = 0,
131 .nl_u = {
132 .ip6_u = {
133 .daddr = *daddr,
134 },
135 },
136 };
137
138 dst = ip6_route_output(net, NULL, &fl);
139 if (dst->error)
140 goto out_err;
141 if (!ret_saddr)
142 return dst;
143 if (ipv6_addr_any(&fl.fl6_src) &&
144 ipv6_dev_get_saddr(net, ip6_dst_idev(dst)->dev,
145 &fl.fl6_dst, 0, &fl.fl6_src) < 0)
146 goto out_err;
147 if (do_xfrm && xfrm_lookup(net, &dst, &fl, NULL, 0) < 0)
148 goto out_err;
149 ipv6_addr_copy(ret_saddr, &fl.fl6_src);
150 return dst;
151
152out_err:
153 dst_release(dst);
154 IP_VS_DBG_RL("ip6_route_output error, dest: %pI6\n", daddr);
155 return NULL;
156}
157
158static struct rt6_info *
159__ip_vs_get_out_rt_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
160 struct in6_addr *ret_saddr, int do_xfrm)
161{
162 struct net *net = dev_net(skb->dev);
Julius Volz38cdcc92008-09-02 15:55:44 +0200163 struct rt6_info *rt; /* Route to the other host */
164 struct ip_vs_dest *dest = cp->dest;
Hans Schillstrom714f0952010-10-19 10:38:48 +0200165 struct dst_entry *dst;
Julius Volz38cdcc92008-09-02 15:55:44 +0200166
167 if (dest) {
168 spin_lock(&dest->dst_lock);
Hans Schillstrom714f0952010-10-19 10:38:48 +0200169 rt = (struct rt6_info *)__ip_vs_dst_check(dest, 0);
Julius Volz38cdcc92008-09-02 15:55:44 +0200170 if (!rt) {
Hans Schillstrom714f0952010-10-19 10:38:48 +0200171 u32 cookie;
Julius Volz38cdcc92008-09-02 15:55:44 +0200172
Hans Schillstrom714f0952010-10-19 10:38:48 +0200173 dst = __ip_vs_route_output_v6(net, &dest->addr.in6,
174 &dest->dst_saddr,
175 do_xfrm);
176 if (!dst) {
Julius Volz38cdcc92008-09-02 15:55:44 +0200177 spin_unlock(&dest->dst_lock);
Julius Volz38cdcc92008-09-02 15:55:44 +0200178 return NULL;
179 }
Hans Schillstrom714f0952010-10-19 10:38:48 +0200180 rt = (struct rt6_info *) dst;
181 cookie = rt->rt6i_node ? rt->rt6i_node->fn_sernum : 0;
182 __ip_vs_dst_set(dest, 0, dst_clone(&rt->dst), cookie);
183 IP_VS_DBG(10, "new dst %pI6, src %pI6, refcnt=%d\n",
184 &dest->addr.in6, &dest->dst_saddr,
Changli Gaod8d1f302010-06-10 23:31:35 -0700185 atomic_read(&rt->dst.__refcnt));
Julius Volz38cdcc92008-09-02 15:55:44 +0200186 }
Hans Schillstrom714f0952010-10-19 10:38:48 +0200187 if (ret_saddr)
188 ipv6_addr_copy(ret_saddr, &dest->dst_saddr);
Julius Volz38cdcc92008-09-02 15:55:44 +0200189 spin_unlock(&dest->dst_lock);
190 } else {
Hans Schillstrom714f0952010-10-19 10:38:48 +0200191 dst = __ip_vs_route_output_v6(net, &cp->daddr.in6, ret_saddr,
192 do_xfrm);
193 if (!dst)
Julius Volz38cdcc92008-09-02 15:55:44 +0200194 return NULL;
Hans Schillstrom714f0952010-10-19 10:38:48 +0200195 rt = (struct rt6_info *) dst;
Julius Volz38cdcc92008-09-02 15:55:44 +0200196 }
197
198 return rt;
199}
200#endif
201
Linus Torvalds1da177e2005-04-16 15:20:36 -0700202
203/*
204 * Release dest->dst_cache before a dest is removed
205 */
206void
207ip_vs_dst_reset(struct ip_vs_dest *dest)
208{
209 struct dst_entry *old_dst;
210
211 old_dst = dest->dst_cache;
212 dest->dst_cache = NULL;
213 dst_release(old_dst);
214}
215
Julian Anastasovf4bc17c2010-09-21 17:35:41 +0200216#define IP_VS_XMIT_TUNNEL(skb, cp) \
217({ \
218 int __ret = NF_ACCEPT; \
219 \
Julian Anastasovcf356d62010-10-17 16:21:07 +0300220 (skb)->ipvs_property = 1; \
Julian Anastasovf4bc17c2010-09-21 17:35:41 +0200221 if (unlikely((cp)->flags & IP_VS_CONN_F_NFCT)) \
222 __ret = ip_vs_confirm_conntrack(skb, cp); \
223 if (__ret == NF_ACCEPT) { \
224 nf_reset(skb); \
Julian Anastasov4256f1a2010-10-17 16:29:40 +0300225 skb_forward_csum(skb); \
Julian Anastasovf4bc17c2010-09-21 17:35:41 +0200226 } \
227 __ret; \
228})
229
230#define IP_VS_XMIT_NAT(pf, skb, cp) \
Linus Torvalds1da177e2005-04-16 15:20:36 -0700231do { \
Julian Anastasovcf356d62010-10-17 16:21:07 +0300232 (skb)->ipvs_property = 1; \
Julian Anastasovf4bc17c2010-09-21 17:35:41 +0200233 if (likely(!((cp)->flags & IP_VS_CONN_F_NFCT))) \
Julian Anastasovcf356d62010-10-17 16:21:07 +0300234 ip_vs_notrack(skb); \
Julian Anastasovf4bc17c2010-09-21 17:35:41 +0200235 else \
236 ip_vs_update_conntrack(skb, cp, 1); \
Herbert Xuccc79112007-07-30 16:20:12 -0700237 skb_forward_csum(skb); \
Julius Volz38cdcc92008-09-02 15:55:44 +0200238 NF_HOOK(pf, NF_INET_LOCAL_OUT, (skb), NULL, \
Julian Anastasovf4bc17c2010-09-21 17:35:41 +0200239 skb_dst(skb)->dev, dst_output); \
240} while (0)
241
242#define IP_VS_XMIT(pf, skb, cp) \
243do { \
Julian Anastasovcf356d62010-10-17 16:21:07 +0300244 (skb)->ipvs_property = 1; \
Julian Anastasovf4bc17c2010-09-21 17:35:41 +0200245 if (likely(!((cp)->flags & IP_VS_CONN_F_NFCT))) \
Julian Anastasovcf356d62010-10-17 16:21:07 +0300246 ip_vs_notrack(skb); \
Julian Anastasovf4bc17c2010-09-21 17:35:41 +0200247 skb_forward_csum(skb); \
248 NF_HOOK(pf, NF_INET_LOCAL_OUT, (skb), NULL, \
249 skb_dst(skb)->dev, dst_output); \
Linus Torvalds1da177e2005-04-16 15:20:36 -0700250} while (0)
251
252
253/*
254 * NULL transmitter (do nothing except return NF_ACCEPT)
255 */
256int
257ip_vs_null_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
258 struct ip_vs_protocol *pp)
259{
260 /* we do not touch skb and do not need pskb ptr */
261 return NF_ACCEPT;
262}
263
264
265/*
266 * Bypass transmitter
267 * Let packets bypass the destination when the destination is not
268 * available, it may be only used in transparent cache cluster.
269 */
270int
271ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
272 struct ip_vs_protocol *pp)
273{
Hans Schillstrom714f0952010-10-19 10:38:48 +0200274 struct net *net = dev_net(skb->dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700275 struct rtable *rt; /* Route to the other host */
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700276 struct iphdr *iph = ip_hdr(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700277 u8 tos = iph->tos;
278 int mtu;
279 struct flowi fl = {
280 .oif = 0,
281 .nl_u = {
282 .ip4_u = {
283 .daddr = iph->daddr,
284 .saddr = 0,
285 .tos = RT_TOS(tos), } },
286 };
287
288 EnterFunction(10);
289
Hans Schillstrom714f0952010-10-19 10:38:48 +0200290 if (ip_route_output_key(net, &rt, &fl)) {
Hannes Eder1e3e2382009-08-02 11:05:41 +0000291 IP_VS_DBG_RL("%s(): ip_route_output error, dest: %pI4\n",
292 __func__, &iph->daddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700293 goto tx_error_icmp;
294 }
295
296 /* MTU checking */
Changli Gaod8d1f302010-06-10 23:31:35 -0700297 mtu = dst_mtu(&rt->dst);
YOSHIFUJI Hideaki4412ec42007-03-07 14:19:10 +0900298 if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF))) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700299 ip_rt_put(rt);
300 icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
Hannes Eder1e3e2382009-08-02 11:05:41 +0000301 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700302 goto tx_error;
303 }
304
305 /*
306 * Call ip_send_check because we are not sure it is called
307 * after ip_defrag. Is copy-on-write needed?
308 */
309 if (unlikely((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL)) {
310 ip_rt_put(rt);
311 return NF_STOLEN;
312 }
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700313 ip_send_check(ip_hdr(skb));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700314
315 /* drop old route */
Eric Dumazetadf30902009-06-02 05:19:30 +0000316 skb_dst_drop(skb);
Changli Gaod8d1f302010-06-10 23:31:35 -0700317 skb_dst_set(skb, &rt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700318
319 /* Another hack: avoid icmp_send in ip_fragment */
320 skb->local_df = 1;
321
Julian Anastasovf4bc17c2010-09-21 17:35:41 +0200322 IP_VS_XMIT(NFPROTO_IPV4, skb, cp);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700323
324 LeaveFunction(10);
325 return NF_STOLEN;
326
327 tx_error_icmp:
328 dst_link_failure(skb);
329 tx_error:
330 kfree_skb(skb);
331 LeaveFunction(10);
332 return NF_STOLEN;
333}
334
Julius Volzb3cdd2a72008-09-02 15:55:45 +0200335#ifdef CONFIG_IP_VS_IPV6
336int
337ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
338 struct ip_vs_protocol *pp)
339{
Hans Schillstrom714f0952010-10-19 10:38:48 +0200340 struct net *net = dev_net(skb->dev);
341 struct dst_entry *dst;
Julius Volzb3cdd2a72008-09-02 15:55:45 +0200342 struct rt6_info *rt; /* Route to the other host */
343 struct ipv6hdr *iph = ipv6_hdr(skb);
344 int mtu;
Julius Volzb3cdd2a72008-09-02 15:55:45 +0200345
346 EnterFunction(10);
347
Hans Schillstrom714f0952010-10-19 10:38:48 +0200348 dst = __ip_vs_route_output_v6(net, &iph->daddr, NULL, 0);
349 if (!dst)
Julius Volzb3cdd2a72008-09-02 15:55:45 +0200350 goto tx_error_icmp;
Hans Schillstrom714f0952010-10-19 10:38:48 +0200351 rt = (struct rt6_info *) dst;
Julius Volzb3cdd2a72008-09-02 15:55:45 +0200352
353 /* MTU checking */
Changli Gaod8d1f302010-06-10 23:31:35 -0700354 mtu = dst_mtu(&rt->dst);
Julius Volzb3cdd2a72008-09-02 15:55:45 +0200355 if (skb->len > mtu) {
Changli Gaod8d1f302010-06-10 23:31:35 -0700356 dst_release(&rt->dst);
Alexey Dobriyan3ffe5332010-02-18 08:25:24 +0000357 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
Hannes Eder1e3e2382009-08-02 11:05:41 +0000358 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
Julius Volzb3cdd2a72008-09-02 15:55:45 +0200359 goto tx_error;
360 }
361
362 /*
363 * Call ip_send_check because we are not sure it is called
364 * after ip_defrag. Is copy-on-write needed?
365 */
366 skb = skb_share_check(skb, GFP_ATOMIC);
367 if (unlikely(skb == NULL)) {
Changli Gaod8d1f302010-06-10 23:31:35 -0700368 dst_release(&rt->dst);
Julius Volzb3cdd2a72008-09-02 15:55:45 +0200369 return NF_STOLEN;
370 }
371
372 /* drop old route */
Eric Dumazetadf30902009-06-02 05:19:30 +0000373 skb_dst_drop(skb);
Changli Gaod8d1f302010-06-10 23:31:35 -0700374 skb_dst_set(skb, &rt->dst);
Julius Volzb3cdd2a72008-09-02 15:55:45 +0200375
376 /* Another hack: avoid icmp_send in ip_fragment */
377 skb->local_df = 1;
378
Julian Anastasovf4bc17c2010-09-21 17:35:41 +0200379 IP_VS_XMIT(NFPROTO_IPV6, skb, cp);
Julius Volzb3cdd2a72008-09-02 15:55:45 +0200380
381 LeaveFunction(10);
382 return NF_STOLEN;
383
384 tx_error_icmp:
385 dst_link_failure(skb);
386 tx_error:
387 kfree_skb(skb);
388 LeaveFunction(10);
389 return NF_STOLEN;
390}
391#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -0700392
393/*
394 * NAT transmitter (only for outside-to-inside nat forwarding)
395 * Not used for related ICMP
396 */
397int
398ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
399 struct ip_vs_protocol *pp)
400{
401 struct rtable *rt; /* Route to the other host */
402 int mtu;
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700403 struct iphdr *iph = ip_hdr(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700404
405 EnterFunction(10);
406
407 /* check if it is a connection of no-client-port */
408 if (unlikely(cp->flags & IP_VS_CONN_F_NO_CPORT)) {
Al Viro014d7302006-09-28 14:29:52 -0700409 __be16 _pt, *p;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700410 p = skb_header_pointer(skb, iph->ihl*4, sizeof(_pt), &_pt);
411 if (p == NULL)
412 goto tx_error;
413 ip_vs_conn_fill_cport(cp, *p);
414 IP_VS_DBG(10, "filled cport=%d\n", ntohs(*p));
415 }
416
Hans Schillstrom714f0952010-10-19 10:38:48 +0200417 if (!(rt = __ip_vs_get_out_rt(skb, cp, RT_TOS(iph->tos))))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700418 goto tx_error_icmp;
419
420 /* MTU checking */
Changli Gaod8d1f302010-06-10 23:31:35 -0700421 mtu = dst_mtu(&rt->dst);
YOSHIFUJI Hideaki4412ec42007-03-07 14:19:10 +0900422 if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF))) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700423 ip_rt_put(rt);
424 icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
425 IP_VS_DBG_RL_PKT(0, pp, skb, 0, "ip_vs_nat_xmit(): frag needed for");
426 goto tx_error;
427 }
428
429 /* copy-on-write the packet before mangling it */
Herbert Xuaf1e1cf2007-10-14 00:39:33 -0700430 if (!skb_make_writable(skb, sizeof(struct iphdr)))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700431 goto tx_error_put;
432
Changli Gaod8d1f302010-06-10 23:31:35 -0700433 if (skb_cow(skb, rt->dst.dev->hard_header_len))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700434 goto tx_error_put;
435
436 /* drop old route */
Eric Dumazetadf30902009-06-02 05:19:30 +0000437 skb_dst_drop(skb);
Changli Gaod8d1f302010-06-10 23:31:35 -0700438 skb_dst_set(skb, &rt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700439
440 /* mangle the packet */
Herbert Xu3db05fe2007-10-15 00:53:15 -0700441 if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700442 goto tx_error;
Julius Volze7ade462008-09-02 15:55:33 +0200443 ip_hdr(skb)->daddr = cp->daddr.ip;
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700444 ip_send_check(ip_hdr(skb));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700445
446 IP_VS_DBG_PKT(10, pp, skb, 0, "After DNAT");
447
448 /* FIXME: when application helper enlarges the packet and the length
449 is larger than the MTU of outgoing device, there will be still
450 MTU problem. */
451
452 /* Another hack: avoid icmp_send in ip_fragment */
453 skb->local_df = 1;
454
Julian Anastasovf4bc17c2010-09-21 17:35:41 +0200455 IP_VS_XMIT_NAT(NFPROTO_IPV4, skb, cp);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700456
457 LeaveFunction(10);
458 return NF_STOLEN;
459
460 tx_error_icmp:
461 dst_link_failure(skb);
462 tx_error:
Linus Torvalds1da177e2005-04-16 15:20:36 -0700463 kfree_skb(skb);
Julian Anastasovf4bc17c2010-09-21 17:35:41 +0200464 LeaveFunction(10);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700465 return NF_STOLEN;
466 tx_error_put:
467 ip_rt_put(rt);
468 goto tx_error;
469}
470
Julius Volzb3cdd2a72008-09-02 15:55:45 +0200471#ifdef CONFIG_IP_VS_IPV6
472int
473ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
474 struct ip_vs_protocol *pp)
475{
476 struct rt6_info *rt; /* Route to the other host */
477 int mtu;
478
479 EnterFunction(10);
480
481 /* check if it is a connection of no-client-port */
482 if (unlikely(cp->flags & IP_VS_CONN_F_NO_CPORT)) {
483 __be16 _pt, *p;
484 p = skb_header_pointer(skb, sizeof(struct ipv6hdr),
485 sizeof(_pt), &_pt);
486 if (p == NULL)
487 goto tx_error;
488 ip_vs_conn_fill_cport(cp, *p);
489 IP_VS_DBG(10, "filled cport=%d\n", ntohs(*p));
490 }
491
Hans Schillstrom714f0952010-10-19 10:38:48 +0200492 rt = __ip_vs_get_out_rt_v6(skb, cp, NULL, 0);
Julius Volzb3cdd2a72008-09-02 15:55:45 +0200493 if (!rt)
494 goto tx_error_icmp;
495
496 /* MTU checking */
Changli Gaod8d1f302010-06-10 23:31:35 -0700497 mtu = dst_mtu(&rt->dst);
Julius Volzb3cdd2a72008-09-02 15:55:45 +0200498 if (skb->len > mtu) {
Changli Gaod8d1f302010-06-10 23:31:35 -0700499 dst_release(&rt->dst);
Alexey Dobriyan3ffe5332010-02-18 08:25:24 +0000500 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
Julius Volzb3cdd2a72008-09-02 15:55:45 +0200501 IP_VS_DBG_RL_PKT(0, pp, skb, 0,
502 "ip_vs_nat_xmit_v6(): frag needed for");
503 goto tx_error;
504 }
505
506 /* copy-on-write the packet before mangling it */
507 if (!skb_make_writable(skb, sizeof(struct ipv6hdr)))
508 goto tx_error_put;
509
Changli Gaod8d1f302010-06-10 23:31:35 -0700510 if (skb_cow(skb, rt->dst.dev->hard_header_len))
Julius Volzb3cdd2a72008-09-02 15:55:45 +0200511 goto tx_error_put;
512
513 /* drop old route */
Eric Dumazetadf30902009-06-02 05:19:30 +0000514 skb_dst_drop(skb);
Changli Gaod8d1f302010-06-10 23:31:35 -0700515 skb_dst_set(skb, &rt->dst);
Julius Volzb3cdd2a72008-09-02 15:55:45 +0200516
517 /* mangle the packet */
518 if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp))
519 goto tx_error;
520 ipv6_hdr(skb)->daddr = cp->daddr.in6;
521
522 IP_VS_DBG_PKT(10, pp, skb, 0, "After DNAT");
523
524 /* FIXME: when application helper enlarges the packet and the length
525 is larger than the MTU of outgoing device, there will be still
526 MTU problem. */
527
528 /* Another hack: avoid icmp_send in ip_fragment */
529 skb->local_df = 1;
530
Julian Anastasovf4bc17c2010-09-21 17:35:41 +0200531 IP_VS_XMIT_NAT(NFPROTO_IPV6, skb, cp);
Julius Volzb3cdd2a72008-09-02 15:55:45 +0200532
533 LeaveFunction(10);
534 return NF_STOLEN;
535
536tx_error_icmp:
537 dst_link_failure(skb);
538tx_error:
539 LeaveFunction(10);
540 kfree_skb(skb);
541 return NF_STOLEN;
542tx_error_put:
Changli Gaod8d1f302010-06-10 23:31:35 -0700543 dst_release(&rt->dst);
Julius Volzb3cdd2a72008-09-02 15:55:45 +0200544 goto tx_error;
545}
546#endif
547
Linus Torvalds1da177e2005-04-16 15:20:36 -0700548
549/*
550 * IP Tunneling transmitter
551 *
552 * This function encapsulates the packet in a new IP packet, its
553 * destination will be set to cp->daddr. Most code of this function
554 * is taken from ipip.c.
555 *
556 * It is used in VS/TUN cluster. The load balancer selects a real
557 * server from a cluster based on a scheduling algorithm,
558 * encapsulates the request packet and forwards it to the selected
559 * server. For example, all real servers are configured with
560 * "ifconfig tunl0 <Virtual IP Address> up". When the server receives
561 * the encapsulated packet, it will decapsulate the packet, processe
562 * the request and return the response packets directly to the client
563 * without passing the load balancer. This can greatly increase the
564 * scalability of virtual server.
565 *
566 * Used for ANY protocol
567 */
568int
569ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
570 struct ip_vs_protocol *pp)
571{
572 struct rtable *rt; /* Route to the other host */
573 struct net_device *tdev; /* Device to other host */
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700574 struct iphdr *old_iph = ip_hdr(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700575 u8 tos = old_iph->tos;
Alexey Dobriyan76ab6082006-01-06 13:24:29 -0800576 __be16 df = old_iph->frag_off;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700577 struct iphdr *iph; /* Our new IP header */
Chuck Leverc2636b42007-10-23 21:07:32 -0700578 unsigned int max_headroom; /* The extra header space needed */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700579 int mtu;
Julian Anastasovf4bc17c2010-09-21 17:35:41 +0200580 int ret;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700581
582 EnterFunction(10);
583
YOSHIFUJI Hideaki4412ec42007-03-07 14:19:10 +0900584 if (skb->protocol != htons(ETH_P_IP)) {
Hannes Eder1e3e2382009-08-02 11:05:41 +0000585 IP_VS_DBG_RL("%s(): protocol error, "
Linus Torvalds1da177e2005-04-16 15:20:36 -0700586 "ETH_P_IP: %d, skb protocol: %d\n",
Hannes Eder1e3e2382009-08-02 11:05:41 +0000587 __func__, htons(ETH_P_IP), skb->protocol);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700588 goto tx_error;
589 }
590
Hans Schillstrom714f0952010-10-19 10:38:48 +0200591 if (!(rt = __ip_vs_get_out_rt(skb, cp, RT_TOS(tos))))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700592 goto tx_error_icmp;
593
Changli Gaod8d1f302010-06-10 23:31:35 -0700594 tdev = rt->dst.dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700595
Changli Gaod8d1f302010-06-10 23:31:35 -0700596 mtu = dst_mtu(&rt->dst) - sizeof(struct iphdr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700597 if (mtu < 68) {
598 ip_rt_put(rt);
Hannes Eder1e3e2382009-08-02 11:05:41 +0000599 IP_VS_DBG_RL("%s(): mtu less than 68\n", __func__);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700600 goto tx_error;
601 }
Eric Dumazetadf30902009-06-02 05:19:30 +0000602 if (skb_dst(skb))
603 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700604
YOSHIFUJI Hideaki4412ec42007-03-07 14:19:10 +0900605 df |= (old_iph->frag_off & htons(IP_DF));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700606
YOSHIFUJI Hideaki4412ec42007-03-07 14:19:10 +0900607 if ((old_iph->frag_off & htons(IP_DF))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700608 && mtu < ntohs(old_iph->tot_len)) {
609 icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
610 ip_rt_put(rt);
Hannes Eder1e3e2382009-08-02 11:05:41 +0000611 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700612 goto tx_error;
613 }
614
615 /*
616 * Okay, now see if we can stuff it in the buffer as-is.
617 */
618 max_headroom = LL_RESERVED_SPACE(tdev) + sizeof(struct iphdr);
619
620 if (skb_headroom(skb) < max_headroom
621 || skb_cloned(skb) || skb_shared(skb)) {
622 struct sk_buff *new_skb =
623 skb_realloc_headroom(skb, max_headroom);
624 if (!new_skb) {
625 ip_rt_put(rt);
626 kfree_skb(skb);
Hannes Eder1e3e2382009-08-02 11:05:41 +0000627 IP_VS_ERR_RL("%s(): no memory\n", __func__);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700628 return NF_STOLEN;
629 }
630 kfree_skb(skb);
631 skb = new_skb;
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700632 old_iph = ip_hdr(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700633 }
634
Hans Schillstrom714f0952010-10-19 10:38:48 +0200635 skb->transport_header = skb->network_header;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700636
637 /* fix old IP header checksum */
638 ip_send_check(old_iph);
639
Arnaldo Carvalho de Meloe2d1bca2007-04-10 20:46:21 -0700640 skb_push(skb, sizeof(struct iphdr));
641 skb_reset_network_header(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700642 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
643
644 /* drop old route */
Eric Dumazetadf30902009-06-02 05:19:30 +0000645 skb_dst_drop(skb);
Changli Gaod8d1f302010-06-10 23:31:35 -0700646 skb_dst_set(skb, &rt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700647
648 /*
649 * Push down and install the IPIP header.
650 */
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700651 iph = ip_hdr(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700652 iph->version = 4;
653 iph->ihl = sizeof(struct iphdr)>>2;
654 iph->frag_off = df;
655 iph->protocol = IPPROTO_IPIP;
656 iph->tos = tos;
657 iph->daddr = rt->rt_dst;
658 iph->saddr = rt->rt_src;
659 iph->ttl = old_iph->ttl;
Changli Gaod8d1f302010-06-10 23:31:35 -0700660 ip_select_ident(iph, &rt->dst, NULL);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700661
662 /* Another hack: avoid icmp_send in ip_fragment */
663 skb->local_df = 1;
664
Julian Anastasovf4bc17c2010-09-21 17:35:41 +0200665 ret = IP_VS_XMIT_TUNNEL(skb, cp);
666 if (ret == NF_ACCEPT)
667 ip_local_out(skb);
668 else if (ret == NF_DROP)
669 kfree_skb(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700670
671 LeaveFunction(10);
672
673 return NF_STOLEN;
674
675 tx_error_icmp:
676 dst_link_failure(skb);
677 tx_error:
678 kfree_skb(skb);
679 LeaveFunction(10);
680 return NF_STOLEN;
681}
682
Julius Volzb3cdd2a72008-09-02 15:55:45 +0200683#ifdef CONFIG_IP_VS_IPV6
684int
685ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
686 struct ip_vs_protocol *pp)
687{
688 struct rt6_info *rt; /* Route to the other host */
Hans Schillstrom714f0952010-10-19 10:38:48 +0200689 struct in6_addr saddr; /* Source for tunnel */
Julius Volzb3cdd2a72008-09-02 15:55:45 +0200690 struct net_device *tdev; /* Device to other host */
691 struct ipv6hdr *old_iph = ipv6_hdr(skb);
Julius Volzb3cdd2a72008-09-02 15:55:45 +0200692 struct ipv6hdr *iph; /* Our new IP header */
693 unsigned int max_headroom; /* The extra header space needed */
694 int mtu;
Julian Anastasovf4bc17c2010-09-21 17:35:41 +0200695 int ret;
Julius Volzb3cdd2a72008-09-02 15:55:45 +0200696
697 EnterFunction(10);
698
699 if (skb->protocol != htons(ETH_P_IPV6)) {
Hannes Eder1e3e2382009-08-02 11:05:41 +0000700 IP_VS_DBG_RL("%s(): protocol error, "
Julius Volzb3cdd2a72008-09-02 15:55:45 +0200701 "ETH_P_IPV6: %d, skb protocol: %d\n",
Hannes Eder1e3e2382009-08-02 11:05:41 +0000702 __func__, htons(ETH_P_IPV6), skb->protocol);
Julius Volzb3cdd2a72008-09-02 15:55:45 +0200703 goto tx_error;
704 }
705
Hans Schillstrom714f0952010-10-19 10:38:48 +0200706 rt = __ip_vs_get_out_rt_v6(skb, cp, &saddr, 1);
Julius Volzb3cdd2a72008-09-02 15:55:45 +0200707 if (!rt)
708 goto tx_error_icmp;
709
Changli Gaod8d1f302010-06-10 23:31:35 -0700710 tdev = rt->dst.dev;
Julius Volzb3cdd2a72008-09-02 15:55:45 +0200711
Changli Gaod8d1f302010-06-10 23:31:35 -0700712 mtu = dst_mtu(&rt->dst) - sizeof(struct ipv6hdr);
Hans Schillstrom714f0952010-10-19 10:38:48 +0200713 if (mtu < IPV6_MIN_MTU) {
Changli Gaod8d1f302010-06-10 23:31:35 -0700714 dst_release(&rt->dst);
Hans Schillstrom714f0952010-10-19 10:38:48 +0200715 IP_VS_DBG_RL("%s(): mtu less than %d\n", __func__,
716 IPV6_MIN_MTU);
Julius Volzb3cdd2a72008-09-02 15:55:45 +0200717 goto tx_error;
718 }
Eric Dumazetadf30902009-06-02 05:19:30 +0000719 if (skb_dst(skb))
720 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu);
Julius Volzb3cdd2a72008-09-02 15:55:45 +0200721
722 if (mtu < ntohs(old_iph->payload_len) + sizeof(struct ipv6hdr)) {
Alexey Dobriyan3ffe5332010-02-18 08:25:24 +0000723 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
Changli Gaod8d1f302010-06-10 23:31:35 -0700724 dst_release(&rt->dst);
Hannes Eder1e3e2382009-08-02 11:05:41 +0000725 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
Julius Volzb3cdd2a72008-09-02 15:55:45 +0200726 goto tx_error;
727 }
728
729 /*
730 * Okay, now see if we can stuff it in the buffer as-is.
731 */
732 max_headroom = LL_RESERVED_SPACE(tdev) + sizeof(struct ipv6hdr);
733
734 if (skb_headroom(skb) < max_headroom
735 || skb_cloned(skb) || skb_shared(skb)) {
736 struct sk_buff *new_skb =
737 skb_realloc_headroom(skb, max_headroom);
738 if (!new_skb) {
Changli Gaod8d1f302010-06-10 23:31:35 -0700739 dst_release(&rt->dst);
Julius Volzb3cdd2a72008-09-02 15:55:45 +0200740 kfree_skb(skb);
Hannes Eder1e3e2382009-08-02 11:05:41 +0000741 IP_VS_ERR_RL("%s(): no memory\n", __func__);
Julius Volzb3cdd2a72008-09-02 15:55:45 +0200742 return NF_STOLEN;
743 }
744 kfree_skb(skb);
745 skb = new_skb;
746 old_iph = ipv6_hdr(skb);
747 }
748
Hans Schillstrom714f0952010-10-19 10:38:48 +0200749 skb->transport_header = skb->network_header;
Julius Volzb3cdd2a72008-09-02 15:55:45 +0200750
751 skb_push(skb, sizeof(struct ipv6hdr));
752 skb_reset_network_header(skb);
753 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
754
755 /* drop old route */
Eric Dumazetadf30902009-06-02 05:19:30 +0000756 skb_dst_drop(skb);
Changli Gaod8d1f302010-06-10 23:31:35 -0700757 skb_dst_set(skb, &rt->dst);
Julius Volzb3cdd2a72008-09-02 15:55:45 +0200758
759 /*
760 * Push down and install the IPIP header.
761 */
762 iph = ipv6_hdr(skb);
763 iph->version = 6;
764 iph->nexthdr = IPPROTO_IPV6;
Harvey Harrisonb7b45f42008-11-10 16:46:06 -0800765 iph->payload_len = old_iph->payload_len;
766 be16_add_cpu(&iph->payload_len, sizeof(*old_iph));
Julius Volzb3cdd2a72008-09-02 15:55:45 +0200767 iph->priority = old_iph->priority;
768 memset(&iph->flow_lbl, 0, sizeof(iph->flow_lbl));
Hans Schillstrom714f0952010-10-19 10:38:48 +0200769 ipv6_addr_copy(&iph->daddr, &cp->daddr.in6);
770 ipv6_addr_copy(&iph->saddr, &saddr);
Julius Volzb3cdd2a72008-09-02 15:55:45 +0200771 iph->hop_limit = old_iph->hop_limit;
772
773 /* Another hack: avoid icmp_send in ip_fragment */
774 skb->local_df = 1;
775
Julian Anastasovf4bc17c2010-09-21 17:35:41 +0200776 ret = IP_VS_XMIT_TUNNEL(skb, cp);
777 if (ret == NF_ACCEPT)
778 ip6_local_out(skb);
779 else if (ret == NF_DROP)
780 kfree_skb(skb);
Julius Volzb3cdd2a72008-09-02 15:55:45 +0200781
782 LeaveFunction(10);
783
784 return NF_STOLEN;
785
786tx_error_icmp:
787 dst_link_failure(skb);
788tx_error:
789 kfree_skb(skb);
790 LeaveFunction(10);
791 return NF_STOLEN;
792}
793#endif
794
Linus Torvalds1da177e2005-04-16 15:20:36 -0700795
796/*
797 * Direct Routing transmitter
798 * Used for ANY protocol
799 */
800int
801ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
802 struct ip_vs_protocol *pp)
803{
804 struct rtable *rt; /* Route to the other host */
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700805 struct iphdr *iph = ip_hdr(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700806 int mtu;
807
808 EnterFunction(10);
809
Hans Schillstrom714f0952010-10-19 10:38:48 +0200810 if (!(rt = __ip_vs_get_out_rt(skb, cp, RT_TOS(iph->tos))))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700811 goto tx_error_icmp;
812
813 /* MTU checking */
Changli Gaod8d1f302010-06-10 23:31:35 -0700814 mtu = dst_mtu(&rt->dst);
YOSHIFUJI Hideaki4412ec42007-03-07 14:19:10 +0900815 if ((iph->frag_off & htons(IP_DF)) && skb->len > mtu) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700816 icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
817 ip_rt_put(rt);
Hannes Eder1e3e2382009-08-02 11:05:41 +0000818 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700819 goto tx_error;
820 }
821
822 /*
823 * Call ip_send_check because we are not sure it is called
824 * after ip_defrag. Is copy-on-write needed?
825 */
826 if (unlikely((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL)) {
827 ip_rt_put(rt);
828 return NF_STOLEN;
829 }
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700830 ip_send_check(ip_hdr(skb));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700831
832 /* drop old route */
Eric Dumazetadf30902009-06-02 05:19:30 +0000833 skb_dst_drop(skb);
Changli Gaod8d1f302010-06-10 23:31:35 -0700834 skb_dst_set(skb, &rt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700835
836 /* Another hack: avoid icmp_send in ip_fragment */
837 skb->local_df = 1;
838
Julian Anastasovf4bc17c2010-09-21 17:35:41 +0200839 IP_VS_XMIT(NFPROTO_IPV4, skb, cp);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700840
841 LeaveFunction(10);
842 return NF_STOLEN;
843
844 tx_error_icmp:
845 dst_link_failure(skb);
846 tx_error:
847 kfree_skb(skb);
848 LeaveFunction(10);
849 return NF_STOLEN;
850}
851
Julius Volzb3cdd2a72008-09-02 15:55:45 +0200852#ifdef CONFIG_IP_VS_IPV6
853int
854ip_vs_dr_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
855 struct ip_vs_protocol *pp)
856{
857 struct rt6_info *rt; /* Route to the other host */
858 int mtu;
859
860 EnterFunction(10);
861
Hans Schillstrom714f0952010-10-19 10:38:48 +0200862 rt = __ip_vs_get_out_rt_v6(skb, cp, NULL, 0);
Julius Volzb3cdd2a72008-09-02 15:55:45 +0200863 if (!rt)
864 goto tx_error_icmp;
865
866 /* MTU checking */
Changli Gaod8d1f302010-06-10 23:31:35 -0700867 mtu = dst_mtu(&rt->dst);
Julius Volzb3cdd2a72008-09-02 15:55:45 +0200868 if (skb->len > mtu) {
Alexey Dobriyan3ffe5332010-02-18 08:25:24 +0000869 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
Changli Gaod8d1f302010-06-10 23:31:35 -0700870 dst_release(&rt->dst);
Hannes Eder1e3e2382009-08-02 11:05:41 +0000871 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
Julius Volzb3cdd2a72008-09-02 15:55:45 +0200872 goto tx_error;
873 }
874
875 /*
876 * Call ip_send_check because we are not sure it is called
877 * after ip_defrag. Is copy-on-write needed?
878 */
879 skb = skb_share_check(skb, GFP_ATOMIC);
880 if (unlikely(skb == NULL)) {
Changli Gaod8d1f302010-06-10 23:31:35 -0700881 dst_release(&rt->dst);
Julius Volzb3cdd2a72008-09-02 15:55:45 +0200882 return NF_STOLEN;
883 }
884
885 /* drop old route */
Eric Dumazetadf30902009-06-02 05:19:30 +0000886 skb_dst_drop(skb);
Changli Gaod8d1f302010-06-10 23:31:35 -0700887 skb_dst_set(skb, &rt->dst);
Julius Volzb3cdd2a72008-09-02 15:55:45 +0200888
889 /* Another hack: avoid icmp_send in ip_fragment */
890 skb->local_df = 1;
891
Julian Anastasovf4bc17c2010-09-21 17:35:41 +0200892 IP_VS_XMIT(NFPROTO_IPV6, skb, cp);
Julius Volzb3cdd2a72008-09-02 15:55:45 +0200893
894 LeaveFunction(10);
895 return NF_STOLEN;
896
897tx_error_icmp:
898 dst_link_failure(skb);
899tx_error:
900 kfree_skb(skb);
901 LeaveFunction(10);
902 return NF_STOLEN;
903}
904#endif
905
Linus Torvalds1da177e2005-04-16 15:20:36 -0700906
907/*
908 * ICMP packet transmitter
909 * called by the ip_vs_in_icmp
910 */
911int
912ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
913 struct ip_vs_protocol *pp, int offset)
914{
915 struct rtable *rt; /* Route to the other host */
916 int mtu;
917 int rc;
918
919 EnterFunction(10);
920
921 /* The ICMP packet for VS/TUN, VS/DR and LOCALNODE will be
922 forwarded directly here, because there is no need to
923 translate address/port back */
924 if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) {
925 if (cp->packet_xmit)
926 rc = cp->packet_xmit(skb, cp, pp);
927 else
928 rc = NF_ACCEPT;
929 /* do not touch skb anymore */
930 atomic_inc(&cp->in_pkts);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700931 goto out;
932 }
933
934 /*
935 * mangle and send the packet here (only for VS/NAT)
936 */
937
Hans Schillstrom714f0952010-10-19 10:38:48 +0200938 if (!(rt = __ip_vs_get_out_rt(skb, cp, RT_TOS(ip_hdr(skb)->tos))))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700939 goto tx_error_icmp;
940
941 /* MTU checking */
Changli Gaod8d1f302010-06-10 23:31:35 -0700942 mtu = dst_mtu(&rt->dst);
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700943 if ((skb->len > mtu) && (ip_hdr(skb)->frag_off & htons(IP_DF))) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700944 ip_rt_put(rt);
945 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
Hannes Eder1e3e2382009-08-02 11:05:41 +0000946 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700947 goto tx_error;
948 }
949
950 /* copy-on-write the packet before mangling it */
Herbert Xuaf1e1cf2007-10-14 00:39:33 -0700951 if (!skb_make_writable(skb, offset))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700952 goto tx_error_put;
953
Changli Gaod8d1f302010-06-10 23:31:35 -0700954 if (skb_cow(skb, rt->dst.dev->hard_header_len))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700955 goto tx_error_put;
956
957 /* drop the old route when skb is not shared */
Eric Dumazetadf30902009-06-02 05:19:30 +0000958 skb_dst_drop(skb);
Changli Gaod8d1f302010-06-10 23:31:35 -0700959 skb_dst_set(skb, &rt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700960
961 ip_vs_nat_icmp(skb, pp, cp, 0);
962
963 /* Another hack: avoid icmp_send in ip_fragment */
964 skb->local_df = 1;
965
Julian Anastasovf4bc17c2010-09-21 17:35:41 +0200966 IP_VS_XMIT(NFPROTO_IPV4, skb, cp);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700967
968 rc = NF_STOLEN;
969 goto out;
970
971 tx_error_icmp:
972 dst_link_failure(skb);
973 tx_error:
974 dev_kfree_skb(skb);
975 rc = NF_STOLEN;
976 out:
977 LeaveFunction(10);
978 return rc;
979 tx_error_put:
980 ip_rt_put(rt);
981 goto tx_error;
982}
Julius Volzb3cdd2a72008-09-02 15:55:45 +0200983
984#ifdef CONFIG_IP_VS_IPV6
985int
986ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
987 struct ip_vs_protocol *pp, int offset)
988{
989 struct rt6_info *rt; /* Route to the other host */
990 int mtu;
991 int rc;
992
993 EnterFunction(10);
994
995 /* The ICMP packet for VS/TUN, VS/DR and LOCALNODE will be
996 forwarded directly here, because there is no need to
997 translate address/port back */
998 if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) {
999 if (cp->packet_xmit)
1000 rc = cp->packet_xmit(skb, cp, pp);
1001 else
1002 rc = NF_ACCEPT;
1003 /* do not touch skb anymore */
1004 atomic_inc(&cp->in_pkts);
1005 goto out;
1006 }
1007
1008 /*
1009 * mangle and send the packet here (only for VS/NAT)
1010 */
1011
Hans Schillstrom714f0952010-10-19 10:38:48 +02001012 rt = __ip_vs_get_out_rt_v6(skb, cp, NULL, 0);
Julius Volzb3cdd2a72008-09-02 15:55:45 +02001013 if (!rt)
1014 goto tx_error_icmp;
1015
1016 /* MTU checking */
Changli Gaod8d1f302010-06-10 23:31:35 -07001017 mtu = dst_mtu(&rt->dst);
Julius Volzb3cdd2a72008-09-02 15:55:45 +02001018 if (skb->len > mtu) {
Changli Gaod8d1f302010-06-10 23:31:35 -07001019 dst_release(&rt->dst);
Alexey Dobriyan3ffe5332010-02-18 08:25:24 +00001020 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
Hannes Eder1e3e2382009-08-02 11:05:41 +00001021 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
Julius Volzb3cdd2a72008-09-02 15:55:45 +02001022 goto tx_error;
1023 }
1024
1025 /* copy-on-write the packet before mangling it */
1026 if (!skb_make_writable(skb, offset))
1027 goto tx_error_put;
1028
Changli Gaod8d1f302010-06-10 23:31:35 -07001029 if (skb_cow(skb, rt->dst.dev->hard_header_len))
Julius Volzb3cdd2a72008-09-02 15:55:45 +02001030 goto tx_error_put;
1031
1032 /* drop the old route when skb is not shared */
Eric Dumazetadf30902009-06-02 05:19:30 +00001033 skb_dst_drop(skb);
Changli Gaod8d1f302010-06-10 23:31:35 -07001034 skb_dst_set(skb, &rt->dst);
Julius Volzb3cdd2a72008-09-02 15:55:45 +02001035
1036 ip_vs_nat_icmp_v6(skb, pp, cp, 0);
1037
1038 /* Another hack: avoid icmp_send in ip_fragment */
1039 skb->local_df = 1;
1040
Julian Anastasovf4bc17c2010-09-21 17:35:41 +02001041 IP_VS_XMIT(NFPROTO_IPV6, skb, cp);
Julius Volzb3cdd2a72008-09-02 15:55:45 +02001042
1043 rc = NF_STOLEN;
1044 goto out;
1045
1046tx_error_icmp:
1047 dst_link_failure(skb);
1048tx_error:
1049 dev_kfree_skb(skb);
1050 rc = NF_STOLEN;
1051out:
1052 LeaveFunction(10);
1053 return rc;
1054tx_error_put:
Changli Gaod8d1f302010-06-10 23:31:35 -07001055 dst_release(&rt->dst);
Julius Volzb3cdd2a72008-09-02 15:55:45 +02001056 goto tx_error;
1057}
1058#endif