blob: 67a39786b0a1efe3f9392e70fda46261c13b2b86 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * ip_vs_xmit.c: various packet transmitters for IPVS
3 *
Linus Torvalds1da177e2005-04-16 15:20:36 -07004 * Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
5 * Julian Anastasov <ja@ssi.bg>
6 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version
10 * 2 of the License, or (at your option) any later version.
11 *
12 * Changes:
13 *
Julian Anastasovcb591552010-10-17 16:40:51 +030014 * Description of forwarding methods:
15 * - all transmitters are called from LOCAL_IN (remote clients) and
16 * LOCAL_OUT (local clients) but for ICMP can be called from FORWARD
17 * - not all connections have destination server, for example,
18 * connections in backup server when fwmark is used
19 * - bypass connections use daddr from packet
20 * LOCAL_OUT rules:
21 * - skb->dev is NULL, skb->protocol is not set (both are set in POST_ROUTING)
22 * - skb->pkt_type is not set yet
23 * - the only place where we can see skb->sk != NULL
Linus Torvalds1da177e2005-04-16 15:20:36 -070024 */
25
Hannes Eder9aada7a2009-07-30 14:29:44 -070026#define KMSG_COMPONENT "IPVS"
27#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
28
Linus Torvalds1da177e2005-04-16 15:20:36 -070029#include <linux/kernel.h>
Tejun Heo5a0e3ad2010-03-24 17:04:11 +090030#include <linux/slab.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070031#include <linux/tcp.h> /* for tcphdr */
Herbert Xuc439cb22008-01-11 19:14:00 -080032#include <net/ip.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070033#include <net/tcp.h> /* for csum_tcpudp_magic */
34#include <net/udp.h>
35#include <net/icmp.h> /* for icmp_send */
36#include <net/route.h> /* for ip_route_output */
Julius Volz38cdcc92008-09-02 15:55:44 +020037#include <net/ipv6.h>
38#include <net/ip6_route.h>
Hans Schillstrom714f0952010-10-19 10:38:48 +020039#include <net/addrconf.h>
Julius Volz38cdcc92008-09-02 15:55:44 +020040#include <linux/icmpv6.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070041#include <linux/netfilter.h>
42#include <linux/netfilter_ipv4.h>
43
44#include <net/ip_vs.h>
45
Changli Gao17a8f8e2011-02-24 08:19:57 +080046enum {
47 IP_VS_RT_MODE_LOCAL = 1, /* Allow local dest */
48 IP_VS_RT_MODE_NON_LOCAL = 2, /* Allow non-local dest */
49 IP_VS_RT_MODE_RDR = 4, /* Allow redirect from remote daddr to
50 * local
51 */
Julian Anastasovf2edb9f2012-07-20 11:59:52 +030052 IP_VS_RT_MODE_CONNECT = 8, /* Always bind route to saddr */
Changli Gao17a8f8e2011-02-24 08:19:57 +080053};
Linus Torvalds1da177e2005-04-16 15:20:36 -070054
55/*
56 * Destination cache to speed up outgoing route lookup
57 */
58static inline void
Hans Schillstrom714f0952010-10-19 10:38:48 +020059__ip_vs_dst_set(struct ip_vs_dest *dest, u32 rtos, struct dst_entry *dst,
60 u32 dst_cookie)
Linus Torvalds1da177e2005-04-16 15:20:36 -070061{
62 struct dst_entry *old_dst;
63
64 old_dst = dest->dst_cache;
65 dest->dst_cache = dst;
66 dest->dst_rtos = rtos;
Hans Schillstrom714f0952010-10-19 10:38:48 +020067 dest->dst_cookie = dst_cookie;
Linus Torvalds1da177e2005-04-16 15:20:36 -070068 dst_release(old_dst);
69}
70
71static inline struct dst_entry *
Hans Schillstrom714f0952010-10-19 10:38:48 +020072__ip_vs_dst_check(struct ip_vs_dest *dest, u32 rtos)
Linus Torvalds1da177e2005-04-16 15:20:36 -070073{
74 struct dst_entry *dst = dest->dst_cache;
75
76 if (!dst)
77 return NULL;
Hans Schillstrom714f0952010-10-19 10:38:48 +020078 if ((dst->obsolete || rtos != dest->dst_rtos) &&
79 dst->ops->check(dst, dest->dst_cookie) == NULL) {
Linus Torvalds1da177e2005-04-16 15:20:36 -070080 dest->dst_cache = NULL;
81 dst_release(dst);
82 return NULL;
83 }
84 dst_hold(dst);
85 return dst;
86}
87
Jesper Dangaard Brouer590e3f72012-08-28 22:05:51 +020088static inline bool
89__mtu_check_toobig_v6(const struct sk_buff *skb, u32 mtu)
90{
91 if (skb->len > mtu && !skb_is_gso(skb)) {
92 return true; /* Packet size violate MTU size */
93 }
94 return false;
95}
96
Julian Anastasovf2edb9f2012-07-20 11:59:52 +030097/* Get route to daddr, update *saddr, optionally bind route to saddr */
98static struct rtable *do_output_route4(struct net *net, __be32 daddr,
99 u32 rtos, int rt_mode, __be32 *saddr)
100{
101 struct flowi4 fl4;
102 struct rtable *rt;
103 int loop = 0;
104
105 memset(&fl4, 0, sizeof(fl4));
106 fl4.daddr = daddr;
107 fl4.saddr = (rt_mode & IP_VS_RT_MODE_CONNECT) ? *saddr : 0;
108 fl4.flowi4_tos = rtos;
109
110retry:
111 rt = ip_route_output_key(net, &fl4);
112 if (IS_ERR(rt)) {
113 /* Invalid saddr ? */
114 if (PTR_ERR(rt) == -EINVAL && *saddr &&
115 rt_mode & IP_VS_RT_MODE_CONNECT && !loop) {
116 *saddr = 0;
117 flowi4_update_output(&fl4, 0, rtos, daddr, 0);
118 goto retry;
119 }
120 IP_VS_DBG_RL("ip_route_output error, dest: %pI4\n", &daddr);
121 return NULL;
122 } else if (!*saddr && rt_mode & IP_VS_RT_MODE_CONNECT && fl4.saddr) {
123 ip_rt_put(rt);
124 *saddr = fl4.saddr;
125 flowi4_update_output(&fl4, 0, rtos, daddr, fl4.saddr);
126 loop++;
127 goto retry;
128 }
129 *saddr = fl4.saddr;
130 return rt;
131}
132
Changli Gao17a8f8e2011-02-24 08:19:57 +0800133/* Get route to destination or remote server */
Ilpo Järvinenad1b30b2008-01-05 23:12:40 -0800134static struct rtable *
Julian Anastasovfc604762010-10-17 16:38:15 +0300135__ip_vs_get_out_rt(struct sk_buff *skb, struct ip_vs_dest *dest,
Julian Anastasovc92f5ca2011-05-10 12:46:05 +0000136 __be32 daddr, u32 rtos, int rt_mode, __be32 *ret_saddr)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700137{
Julian Anastasovfc604762010-10-17 16:38:15 +0300138 struct net *net = dev_net(skb_dst(skb)->dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700139 struct rtable *rt; /* Route to the other host */
Julian Anastasovfc604762010-10-17 16:38:15 +0300140 struct rtable *ort; /* Original route */
141 int local;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700142
143 if (dest) {
144 spin_lock(&dest->dst_lock);
145 if (!(rt = (struct rtable *)
Hans Schillstrom714f0952010-10-19 10:38:48 +0200146 __ip_vs_dst_check(dest, rtos))) {
Julian Anastasovf2edb9f2012-07-20 11:59:52 +0300147 rt = do_output_route4(net, dest->addr.ip, rtos,
148 rt_mode, &dest->dst_saddr.ip);
149 if (!rt) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700150 spin_unlock(&dest->dst_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700151 return NULL;
152 }
Hans Schillstrom714f0952010-10-19 10:38:48 +0200153 __ip_vs_dst_set(dest, rtos, dst_clone(&rt->dst), 0);
Julian Anastasovc92f5ca2011-05-10 12:46:05 +0000154 IP_VS_DBG(10, "new dst %pI4, src %pI4, refcnt=%d, "
155 "rtos=%X\n",
156 &dest->addr.ip, &dest->dst_saddr.ip,
Changli Gaod8d1f302010-06-10 23:31:35 -0700157 atomic_read(&rt->dst.__refcnt), rtos);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700158 }
David S. Miller44e31252011-05-09 14:38:06 -0700159 daddr = dest->addr.ip;
Julian Anastasovc92f5ca2011-05-10 12:46:05 +0000160 if (ret_saddr)
161 *ret_saddr = dest->dst_saddr.ip;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700162 spin_unlock(&dest->dst_lock);
163 } else {
Julian Anastasovf2edb9f2012-07-20 11:59:52 +0300164 __be32 saddr = htonl(INADDR_ANY);
Julian Anastasovc92f5ca2011-05-10 12:46:05 +0000165
Julian Anastasovf2edb9f2012-07-20 11:59:52 +0300166 /* For such unconfigured boxes avoid many route lookups
167 * for performance reasons because we do not remember saddr
168 */
169 rt_mode &= ~IP_VS_RT_MODE_CONNECT;
170 rt = do_output_route4(net, daddr, rtos, rt_mode, &saddr);
171 if (!rt)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700172 return NULL;
Julian Anastasovc92f5ca2011-05-10 12:46:05 +0000173 if (ret_saddr)
Julian Anastasovf2edb9f2012-07-20 11:59:52 +0300174 *ret_saddr = saddr;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700175 }
176
Julian Anastasovfc604762010-10-17 16:38:15 +0300177 local = rt->rt_flags & RTCF_LOCAL;
Changli Gao17a8f8e2011-02-24 08:19:57 +0800178 if (!((local ? IP_VS_RT_MODE_LOCAL : IP_VS_RT_MODE_NON_LOCAL) &
179 rt_mode)) {
Julian Anastasovfc604762010-10-17 16:38:15 +0300180 IP_VS_DBG_RL("Stopping traffic to %s address, dest: %pI4\n",
181 (rt->rt_flags & RTCF_LOCAL) ?
David S. Miller44e31252011-05-09 14:38:06 -0700182 "local":"non-local", &daddr);
Julian Anastasovfc604762010-10-17 16:38:15 +0300183 ip_rt_put(rt);
184 return NULL;
185 }
Changli Gao17a8f8e2011-02-24 08:19:57 +0800186 if (local && !(rt_mode & IP_VS_RT_MODE_RDR) &&
187 !((ort = skb_rtable(skb)) && ort->rt_flags & RTCF_LOCAL)) {
Julian Anastasovfc604762010-10-17 16:38:15 +0300188 IP_VS_DBG_RL("Redirect from non-local address %pI4 to local "
189 "requires NAT method, dest: %pI4\n",
David S. Miller44e31252011-05-09 14:38:06 -0700190 &ip_hdr(skb)->daddr, &daddr);
Julian Anastasovfc604762010-10-17 16:38:15 +0300191 ip_rt_put(rt);
192 return NULL;
193 }
194 if (unlikely(!local && ipv4_is_loopback(ip_hdr(skb)->saddr))) {
195 IP_VS_DBG_RL("Stopping traffic from loopback address %pI4 "
196 "to non-local address, dest: %pI4\n",
David S. Miller44e31252011-05-09 14:38:06 -0700197 &ip_hdr(skb)->saddr, &daddr);
Julian Anastasovfc604762010-10-17 16:38:15 +0300198 ip_rt_put(rt);
199 return NULL;
200 }
201
Linus Torvalds1da177e2005-04-16 15:20:36 -0700202 return rt;
203}
204
Julian Anastasovfc604762010-10-17 16:38:15 +0300205/* Reroute packet to local IPv4 stack after DNAT */
206static int
207__ip_vs_reroute_locally(struct sk_buff *skb)
208{
209 struct rtable *rt = skb_rtable(skb);
210 struct net_device *dev = rt->dst.dev;
211 struct net *net = dev_net(dev);
212 struct iphdr *iph = ip_hdr(skb);
213
David S. Millerc7537962010-11-11 17:07:48 -0800214 if (rt_is_input_route(rt)) {
Julian Anastasovfc604762010-10-17 16:38:15 +0300215 unsigned long orefdst = skb->_skb_refdst;
216
217 if (ip_route_input(skb, iph->daddr, iph->saddr,
218 iph->tos, skb->dev))
219 return 0;
220 refdst_drop(orefdst);
221 } else {
David S. Miller9d6ec932011-03-12 01:12:47 -0500222 struct flowi4 fl4 = {
223 .daddr = iph->daddr,
224 .saddr = iph->saddr,
225 .flowi4_tos = RT_TOS(iph->tos),
226 .flowi4_mark = skb->mark,
Julian Anastasovfc604762010-10-17 16:38:15 +0300227 };
Julian Anastasovfc604762010-10-17 16:38:15 +0300228
David S. Miller9d6ec932011-03-12 01:12:47 -0500229 rt = ip_route_output_key(net, &fl4);
David S. Millerb23dd4f2011-03-02 14:31:35 -0800230 if (IS_ERR(rt))
Julian Anastasovfc604762010-10-17 16:38:15 +0300231 return 0;
232 if (!(rt->rt_flags & RTCF_LOCAL)) {
233 ip_rt_put(rt);
234 return 0;
235 }
236 /* Drop old route. */
237 skb_dst_drop(skb);
238 skb_dst_set(skb, &rt->dst);
239 }
240 return 1;
241}
242
Julius Volz38cdcc92008-09-02 15:55:44 +0200243#ifdef CONFIG_IP_VS_IPV6
Hans Schillstrom714f0952010-10-19 10:38:48 +0200244
Julian Anastasovfc604762010-10-17 16:38:15 +0300245static inline int __ip_vs_is_local_route6(struct rt6_info *rt)
246{
David S. Millerd1918542011-12-28 20:19:20 -0500247 return rt->dst.dev && rt->dst.dev->flags & IFF_LOOPBACK;
Julian Anastasovfc604762010-10-17 16:38:15 +0300248}
249
Hans Schillstrom714f0952010-10-19 10:38:48 +0200250static struct dst_entry *
251__ip_vs_route_output_v6(struct net *net, struct in6_addr *daddr,
252 struct in6_addr *ret_saddr, int do_xfrm)
Julius Volz38cdcc92008-09-02 15:55:44 +0200253{
Hans Schillstrom714f0952010-10-19 10:38:48 +0200254 struct dst_entry *dst;
David S. Miller4c9483b2011-03-12 16:22:43 -0500255 struct flowi6 fl6 = {
256 .daddr = *daddr,
Hans Schillstrom714f0952010-10-19 10:38:48 +0200257 };
258
David S. Miller4c9483b2011-03-12 16:22:43 -0500259 dst = ip6_route_output(net, NULL, &fl6);
Hans Schillstrom714f0952010-10-19 10:38:48 +0200260 if (dst->error)
261 goto out_err;
262 if (!ret_saddr)
263 return dst;
David S. Miller4c9483b2011-03-12 16:22:43 -0500264 if (ipv6_addr_any(&fl6.saddr) &&
Hans Schillstrom714f0952010-10-19 10:38:48 +0200265 ipv6_dev_get_saddr(net, ip6_dst_idev(dst)->dev,
David S. Miller4c9483b2011-03-12 16:22:43 -0500266 &fl6.daddr, 0, &fl6.saddr) < 0)
Hans Schillstrom714f0952010-10-19 10:38:48 +0200267 goto out_err;
David S. Miller452edd52011-03-02 13:27:41 -0800268 if (do_xfrm) {
David S. Miller4c9483b2011-03-12 16:22:43 -0500269 dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), NULL, 0);
David S. Miller452edd52011-03-02 13:27:41 -0800270 if (IS_ERR(dst)) {
271 dst = NULL;
272 goto out_err;
273 }
274 }
Alexey Dobriyan4e3fd7a2011-11-21 03:39:03 +0000275 *ret_saddr = fl6.saddr;
Hans Schillstrom714f0952010-10-19 10:38:48 +0200276 return dst;
277
278out_err:
279 dst_release(dst);
280 IP_VS_DBG_RL("ip6_route_output error, dest: %pI6\n", daddr);
281 return NULL;
282}
283
Julian Anastasovfc604762010-10-17 16:38:15 +0300284/*
285 * Get route to destination or remote server
Julian Anastasovfc604762010-10-17 16:38:15 +0300286 */
Hans Schillstrom714f0952010-10-19 10:38:48 +0200287static struct rt6_info *
Julian Anastasovfc604762010-10-17 16:38:15 +0300288__ip_vs_get_out_rt_v6(struct sk_buff *skb, struct ip_vs_dest *dest,
289 struct in6_addr *daddr, struct in6_addr *ret_saddr,
290 int do_xfrm, int rt_mode)
Hans Schillstrom714f0952010-10-19 10:38:48 +0200291{
Julian Anastasovfc604762010-10-17 16:38:15 +0300292 struct net *net = dev_net(skb_dst(skb)->dev);
Julius Volz38cdcc92008-09-02 15:55:44 +0200293 struct rt6_info *rt; /* Route to the other host */
Julian Anastasovfc604762010-10-17 16:38:15 +0300294 struct rt6_info *ort; /* Original route */
Hans Schillstrom714f0952010-10-19 10:38:48 +0200295 struct dst_entry *dst;
Julian Anastasovfc604762010-10-17 16:38:15 +0300296 int local;
Julius Volz38cdcc92008-09-02 15:55:44 +0200297
298 if (dest) {
299 spin_lock(&dest->dst_lock);
Hans Schillstrom714f0952010-10-19 10:38:48 +0200300 rt = (struct rt6_info *)__ip_vs_dst_check(dest, 0);
Julius Volz38cdcc92008-09-02 15:55:44 +0200301 if (!rt) {
Hans Schillstrom714f0952010-10-19 10:38:48 +0200302 u32 cookie;
Julius Volz38cdcc92008-09-02 15:55:44 +0200303
Hans Schillstrom714f0952010-10-19 10:38:48 +0200304 dst = __ip_vs_route_output_v6(net, &dest->addr.in6,
Julian Anastasovc92f5ca2011-05-10 12:46:05 +0000305 &dest->dst_saddr.in6,
Hans Schillstrom714f0952010-10-19 10:38:48 +0200306 do_xfrm);
307 if (!dst) {
Julius Volz38cdcc92008-09-02 15:55:44 +0200308 spin_unlock(&dest->dst_lock);
Julius Volz38cdcc92008-09-02 15:55:44 +0200309 return NULL;
310 }
Hans Schillstrom714f0952010-10-19 10:38:48 +0200311 rt = (struct rt6_info *) dst;
312 cookie = rt->rt6i_node ? rt->rt6i_node->fn_sernum : 0;
313 __ip_vs_dst_set(dest, 0, dst_clone(&rt->dst), cookie);
314 IP_VS_DBG(10, "new dst %pI6, src %pI6, refcnt=%d\n",
Julian Anastasovc92f5ca2011-05-10 12:46:05 +0000315 &dest->addr.in6, &dest->dst_saddr.in6,
Changli Gaod8d1f302010-06-10 23:31:35 -0700316 atomic_read(&rt->dst.__refcnt));
Julius Volz38cdcc92008-09-02 15:55:44 +0200317 }
Hans Schillstrom714f0952010-10-19 10:38:48 +0200318 if (ret_saddr)
Alexey Dobriyan4e3fd7a2011-11-21 03:39:03 +0000319 *ret_saddr = dest->dst_saddr.in6;
Julius Volz38cdcc92008-09-02 15:55:44 +0200320 spin_unlock(&dest->dst_lock);
321 } else {
Julian Anastasovfc604762010-10-17 16:38:15 +0300322 dst = __ip_vs_route_output_v6(net, daddr, ret_saddr, do_xfrm);
Hans Schillstrom714f0952010-10-19 10:38:48 +0200323 if (!dst)
Julius Volz38cdcc92008-09-02 15:55:44 +0200324 return NULL;
Hans Schillstrom714f0952010-10-19 10:38:48 +0200325 rt = (struct rt6_info *) dst;
Julius Volz38cdcc92008-09-02 15:55:44 +0200326 }
327
Julian Anastasovfc604762010-10-17 16:38:15 +0300328 local = __ip_vs_is_local_route6(rt);
David S. Millere58b3442011-05-12 18:22:34 -0400329 if (!((local ? IP_VS_RT_MODE_LOCAL : IP_VS_RT_MODE_NON_LOCAL) &
330 rt_mode)) {
Julian Anastasovfc604762010-10-17 16:38:15 +0300331 IP_VS_DBG_RL("Stopping traffic to %s address, dest: %pI6\n",
332 local ? "local":"non-local", daddr);
333 dst_release(&rt->dst);
334 return NULL;
335 }
David S. Millere58b3442011-05-12 18:22:34 -0400336 if (local && !(rt_mode & IP_VS_RT_MODE_RDR) &&
Julian Anastasovfc604762010-10-17 16:38:15 +0300337 !((ort = (struct rt6_info *) skb_dst(skb)) &&
338 __ip_vs_is_local_route6(ort))) {
339 IP_VS_DBG_RL("Redirect from non-local address %pI6 to local "
340 "requires NAT method, dest: %pI6\n",
341 &ipv6_hdr(skb)->daddr, daddr);
342 dst_release(&rt->dst);
343 return NULL;
344 }
345 if (unlikely(!local && (!skb->dev || skb->dev->flags & IFF_LOOPBACK) &&
346 ipv6_addr_type(&ipv6_hdr(skb)->saddr) &
347 IPV6_ADDR_LOOPBACK)) {
348 IP_VS_DBG_RL("Stopping traffic from loopback address %pI6 "
349 "to non-local address, dest: %pI6\n",
350 &ipv6_hdr(skb)->saddr, daddr);
351 dst_release(&rt->dst);
352 return NULL;
353 }
354
Julius Volz38cdcc92008-09-02 15:55:44 +0200355 return rt;
356}
357#endif
358
Linus Torvalds1da177e2005-04-16 15:20:36 -0700359
360/*
361 * Release dest->dst_cache before a dest is removed
362 */
363void
364ip_vs_dst_reset(struct ip_vs_dest *dest)
365{
366 struct dst_entry *old_dst;
367
368 old_dst = dest->dst_cache;
369 dest->dst_cache = NULL;
370 dst_release(old_dst);
Julian Anastasovf2edb9f2012-07-20 11:59:52 +0300371 dest->dst_saddr.ip = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700372}
373
Julian Anastasovf4bc17c2010-09-21 17:35:41 +0200374#define IP_VS_XMIT_TUNNEL(skb, cp) \
375({ \
376 int __ret = NF_ACCEPT; \
377 \
Julian Anastasovcf356d62010-10-17 16:21:07 +0300378 (skb)->ipvs_property = 1; \
Julian Anastasovf4bc17c2010-09-21 17:35:41 +0200379 if (unlikely((cp)->flags & IP_VS_CONN_F_NFCT)) \
Simon Horman3c2de2a2011-09-16 14:02:19 +0900380 __ret = ip_vs_confirm_conntrack(skb); \
Julian Anastasovf4bc17c2010-09-21 17:35:41 +0200381 if (__ret == NF_ACCEPT) { \
382 nf_reset(skb); \
Julian Anastasov4256f1a2010-10-17 16:29:40 +0300383 skb_forward_csum(skb); \
Julian Anastasovf4bc17c2010-09-21 17:35:41 +0200384 } \
385 __ret; \
386})
387
Julian Anastasovfc604762010-10-17 16:38:15 +0300388#define IP_VS_XMIT_NAT(pf, skb, cp, local) \
Linus Torvalds1da177e2005-04-16 15:20:36 -0700389do { \
Julian Anastasovcf356d62010-10-17 16:21:07 +0300390 (skb)->ipvs_property = 1; \
Julian Anastasovf4bc17c2010-09-21 17:35:41 +0200391 if (likely(!((cp)->flags & IP_VS_CONN_F_NFCT))) \
Julian Anastasovcf356d62010-10-17 16:21:07 +0300392 ip_vs_notrack(skb); \
Julian Anastasovf4bc17c2010-09-21 17:35:41 +0200393 else \
394 ip_vs_update_conntrack(skb, cp, 1); \
Julian Anastasovfc604762010-10-17 16:38:15 +0300395 if (local) \
396 return NF_ACCEPT; \
Herbert Xuccc79112007-07-30 16:20:12 -0700397 skb_forward_csum(skb); \
Julius Volz38cdcc92008-09-02 15:55:44 +0200398 NF_HOOK(pf, NF_INET_LOCAL_OUT, (skb), NULL, \
Julian Anastasovf4bc17c2010-09-21 17:35:41 +0200399 skb_dst(skb)->dev, dst_output); \
400} while (0)
401
Julian Anastasovfc604762010-10-17 16:38:15 +0300402#define IP_VS_XMIT(pf, skb, cp, local) \
Julian Anastasovf4bc17c2010-09-21 17:35:41 +0200403do { \
Julian Anastasovcf356d62010-10-17 16:21:07 +0300404 (skb)->ipvs_property = 1; \
Julian Anastasovf4bc17c2010-09-21 17:35:41 +0200405 if (likely(!((cp)->flags & IP_VS_CONN_F_NFCT))) \
Julian Anastasovcf356d62010-10-17 16:21:07 +0300406 ip_vs_notrack(skb); \
Julian Anastasovfc604762010-10-17 16:38:15 +0300407 if (local) \
408 return NF_ACCEPT; \
Julian Anastasovf4bc17c2010-09-21 17:35:41 +0200409 skb_forward_csum(skb); \
410 NF_HOOK(pf, NF_INET_LOCAL_OUT, (skb), NULL, \
411 skb_dst(skb)->dev, dst_output); \
Linus Torvalds1da177e2005-04-16 15:20:36 -0700412} while (0)
413
414
415/*
416 * NULL transmitter (do nothing except return NF_ACCEPT)
417 */
418int
419ip_vs_null_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
420 struct ip_vs_protocol *pp)
421{
422 /* we do not touch skb and do not need pskb ptr */
Julian Anastasovfc604762010-10-17 16:38:15 +0300423 IP_VS_XMIT(NFPROTO_IPV4, skb, cp, 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700424}
425
426
427/*
428 * Bypass transmitter
429 * Let packets bypass the destination when the destination is not
430 * available, it may be only used in transparent cache cluster.
431 */
432int
433ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
434 struct ip_vs_protocol *pp)
435{
436 struct rtable *rt; /* Route to the other host */
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700437 struct iphdr *iph = ip_hdr(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700438 int mtu;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700439
440 EnterFunction(10);
441
Changli Gao17a8f8e2011-02-24 08:19:57 +0800442 if (!(rt = __ip_vs_get_out_rt(skb, NULL, iph->daddr, RT_TOS(iph->tos),
Julian Anastasovc92f5ca2011-05-10 12:46:05 +0000443 IP_VS_RT_MODE_NON_LOCAL, NULL)))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700444 goto tx_error_icmp;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700445
446 /* MTU checking */
Changli Gaod8d1f302010-06-10 23:31:35 -0700447 mtu = dst_mtu(&rt->dst);
Simon Horman8f1b03a2010-11-09 10:08:49 +0900448 if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF)) &&
449 !skb_is_gso(skb)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700450 ip_rt_put(rt);
451 icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
Hannes Eder1e3e2382009-08-02 11:05:41 +0000452 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700453 goto tx_error;
454 }
455
456 /*
457 * Call ip_send_check because we are not sure it is called
458 * after ip_defrag. Is copy-on-write needed?
459 */
460 if (unlikely((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL)) {
461 ip_rt_put(rt);
462 return NF_STOLEN;
463 }
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700464 ip_send_check(ip_hdr(skb));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700465
466 /* drop old route */
Eric Dumazetadf30902009-06-02 05:19:30 +0000467 skb_dst_drop(skb);
Changli Gaod8d1f302010-06-10 23:31:35 -0700468 skb_dst_set(skb, &rt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700469
470 /* Another hack: avoid icmp_send in ip_fragment */
471 skb->local_df = 1;
472
Julian Anastasovfc604762010-10-17 16:38:15 +0300473 IP_VS_XMIT(NFPROTO_IPV4, skb, cp, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700474
475 LeaveFunction(10);
476 return NF_STOLEN;
477
478 tx_error_icmp:
479 dst_link_failure(skb);
480 tx_error:
481 kfree_skb(skb);
482 LeaveFunction(10);
483 return NF_STOLEN;
484}
485
Julius Volzb3cdd2a72008-09-02 15:55:45 +0200486#ifdef CONFIG_IP_VS_IPV6
487int
488ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
489 struct ip_vs_protocol *pp)
490{
491 struct rt6_info *rt; /* Route to the other host */
492 struct ipv6hdr *iph = ipv6_hdr(skb);
493 int mtu;
Julius Volzb3cdd2a72008-09-02 15:55:45 +0200494
495 EnterFunction(10);
496
David S. Millere58b3442011-05-12 18:22:34 -0400497 if (!(rt = __ip_vs_get_out_rt_v6(skb, NULL, &iph->daddr, NULL, 0,
498 IP_VS_RT_MODE_NON_LOCAL)))
Julius Volzb3cdd2a72008-09-02 15:55:45 +0200499 goto tx_error_icmp;
Julius Volzb3cdd2a72008-09-02 15:55:45 +0200500
501 /* MTU checking */
Changli Gaod8d1f302010-06-10 23:31:35 -0700502 mtu = dst_mtu(&rt->dst);
Jesper Dangaard Brouer590e3f72012-08-28 22:05:51 +0200503 if (__mtu_check_toobig_v6(skb, mtu)) {
Julian Anastasovcb591552010-10-17 16:40:51 +0300504 if (!skb->dev) {
505 struct net *net = dev_net(skb_dst(skb)->dev);
506
507 skb->dev = net->loopback_dev;
508 }
Alexey Dobriyan3ffe5332010-02-18 08:25:24 +0000509 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
Julian Anastasovcb591552010-10-17 16:40:51 +0300510 dst_release(&rt->dst);
Hannes Eder1e3e2382009-08-02 11:05:41 +0000511 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
Julius Volzb3cdd2a72008-09-02 15:55:45 +0200512 goto tx_error;
513 }
514
515 /*
516 * Call ip_send_check because we are not sure it is called
517 * after ip_defrag. Is copy-on-write needed?
518 */
519 skb = skb_share_check(skb, GFP_ATOMIC);
520 if (unlikely(skb == NULL)) {
Changli Gaod8d1f302010-06-10 23:31:35 -0700521 dst_release(&rt->dst);
Julius Volzb3cdd2a72008-09-02 15:55:45 +0200522 return NF_STOLEN;
523 }
524
525 /* drop old route */
Eric Dumazetadf30902009-06-02 05:19:30 +0000526 skb_dst_drop(skb);
Changli Gaod8d1f302010-06-10 23:31:35 -0700527 skb_dst_set(skb, &rt->dst);
Julius Volzb3cdd2a72008-09-02 15:55:45 +0200528
529 /* Another hack: avoid icmp_send in ip_fragment */
530 skb->local_df = 1;
531
Julian Anastasovfc604762010-10-17 16:38:15 +0300532 IP_VS_XMIT(NFPROTO_IPV6, skb, cp, 0);
Julius Volzb3cdd2a72008-09-02 15:55:45 +0200533
534 LeaveFunction(10);
535 return NF_STOLEN;
536
537 tx_error_icmp:
538 dst_link_failure(skb);
539 tx_error:
540 kfree_skb(skb);
541 LeaveFunction(10);
542 return NF_STOLEN;
543}
544#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -0700545
546/*
547 * NAT transmitter (only for outside-to-inside nat forwarding)
548 * Not used for related ICMP
549 */
550int
551ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
552 struct ip_vs_protocol *pp)
553{
554 struct rtable *rt; /* Route to the other host */
555 int mtu;
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700556 struct iphdr *iph = ip_hdr(skb);
Julian Anastasovfc604762010-10-17 16:38:15 +0300557 int local;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700558
559 EnterFunction(10);
560
561 /* check if it is a connection of no-client-port */
562 if (unlikely(cp->flags & IP_VS_CONN_F_NO_CPORT)) {
Al Viro014d7302006-09-28 14:29:52 -0700563 __be16 _pt, *p;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700564 p = skb_header_pointer(skb, iph->ihl*4, sizeof(_pt), &_pt);
565 if (p == NULL)
566 goto tx_error;
567 ip_vs_conn_fill_cport(cp, *p);
568 IP_VS_DBG(10, "filled cport=%d\n", ntohs(*p));
569 }
570
Julian Anastasovfc604762010-10-17 16:38:15 +0300571 if (!(rt = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip,
Changli Gao17a8f8e2011-02-24 08:19:57 +0800572 RT_TOS(iph->tos),
573 IP_VS_RT_MODE_LOCAL |
574 IP_VS_RT_MODE_NON_LOCAL |
Julian Anastasovc92f5ca2011-05-10 12:46:05 +0000575 IP_VS_RT_MODE_RDR, NULL)))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700576 goto tx_error_icmp;
Julian Anastasovfc604762010-10-17 16:38:15 +0300577 local = rt->rt_flags & RTCF_LOCAL;
578 /*
579 * Avoid duplicate tuple in reply direction for NAT traffic
580 * to local address when connection is sync-ed
581 */
Igor Maravićc0cd1152011-12-12 02:58:24 +0000582#if IS_ENABLED(CONFIG_NF_CONNTRACK)
Julian Anastasovfc604762010-10-17 16:38:15 +0300583 if (cp->flags & IP_VS_CONN_F_SYNC && local) {
584 enum ip_conntrack_info ctinfo;
585 struct nf_conn *ct = ct = nf_ct_get(skb, &ctinfo);
586
587 if (ct && !nf_ct_is_untracked(ct)) {
Julian Anastasov0d796412010-10-17 16:46:17 +0300588 IP_VS_DBG_RL_PKT(10, AF_INET, pp, skb, 0,
589 "ip_vs_nat_xmit(): "
Julian Anastasovfc604762010-10-17 16:38:15 +0300590 "stopping DNAT to local address");
591 goto tx_error_put;
592 }
593 }
594#endif
595
596 /* From world but DNAT to loopback address? */
Julian Anastasovc92f5ca2011-05-10 12:46:05 +0000597 if (local && ipv4_is_loopback(cp->daddr.ip) &&
David S. Millerc7537962010-11-11 17:07:48 -0800598 rt_is_input_route(skb_rtable(skb))) {
Julian Anastasov0d796412010-10-17 16:46:17 +0300599 IP_VS_DBG_RL_PKT(1, AF_INET, pp, skb, 0, "ip_vs_nat_xmit(): "
Julian Anastasovfc604762010-10-17 16:38:15 +0300600 "stopping DNAT to loopback address");
601 goto tx_error_put;
602 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700603
604 /* MTU checking */
Changli Gaod8d1f302010-06-10 23:31:35 -0700605 mtu = dst_mtu(&rt->dst);
Simon Horman8f1b03a2010-11-09 10:08:49 +0900606 if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF)) &&
607 !skb_is_gso(skb)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700608 icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
Julian Anastasov0d796412010-10-17 16:46:17 +0300609 IP_VS_DBG_RL_PKT(0, AF_INET, pp, skb, 0,
610 "ip_vs_nat_xmit(): frag needed for");
Julian Anastasovfc604762010-10-17 16:38:15 +0300611 goto tx_error_put;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700612 }
613
614 /* copy-on-write the packet before mangling it */
Herbert Xuaf1e1cf2007-10-14 00:39:33 -0700615 if (!skb_make_writable(skb, sizeof(struct iphdr)))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700616 goto tx_error_put;
617
Changli Gaod8d1f302010-06-10 23:31:35 -0700618 if (skb_cow(skb, rt->dst.dev->hard_header_len))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700619 goto tx_error_put;
620
Linus Torvalds1da177e2005-04-16 15:20:36 -0700621 /* mangle the packet */
Herbert Xu3db05fe2007-10-15 00:53:15 -0700622 if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp))
Julian Anastasovfc604762010-10-17 16:38:15 +0300623 goto tx_error_put;
Julius Volze7ade462008-09-02 15:55:33 +0200624 ip_hdr(skb)->daddr = cp->daddr.ip;
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700625 ip_send_check(ip_hdr(skb));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700626
Julian Anastasovfc604762010-10-17 16:38:15 +0300627 if (!local) {
628 /* drop old route */
629 skb_dst_drop(skb);
630 skb_dst_set(skb, &rt->dst);
631 } else {
632 ip_rt_put(rt);
633 /*
634 * Some IPv4 replies get local address from routes,
635 * not from iph, so while we DNAT after routing
636 * we need this second input/output route.
637 */
638 if (!__ip_vs_reroute_locally(skb))
639 goto tx_error;
640 }
641
Julian Anastasov0d796412010-10-17 16:46:17 +0300642 IP_VS_DBG_PKT(10, AF_INET, pp, skb, 0, "After DNAT");
Linus Torvalds1da177e2005-04-16 15:20:36 -0700643
644 /* FIXME: when application helper enlarges the packet and the length
645 is larger than the MTU of outgoing device, there will be still
646 MTU problem. */
647
648 /* Another hack: avoid icmp_send in ip_fragment */
649 skb->local_df = 1;
650
Julian Anastasovfc604762010-10-17 16:38:15 +0300651 IP_VS_XMIT_NAT(NFPROTO_IPV4, skb, cp, local);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700652
653 LeaveFunction(10);
654 return NF_STOLEN;
655
656 tx_error_icmp:
657 dst_link_failure(skb);
658 tx_error:
Linus Torvalds1da177e2005-04-16 15:20:36 -0700659 kfree_skb(skb);
Julian Anastasovf4bc17c2010-09-21 17:35:41 +0200660 LeaveFunction(10);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700661 return NF_STOLEN;
662 tx_error_put:
663 ip_rt_put(rt);
664 goto tx_error;
665}
666
Julius Volzb3cdd2a72008-09-02 15:55:45 +0200667#ifdef CONFIG_IP_VS_IPV6
668int
669ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
670 struct ip_vs_protocol *pp)
671{
672 struct rt6_info *rt; /* Route to the other host */
673 int mtu;
Julian Anastasovfc604762010-10-17 16:38:15 +0300674 int local;
Julius Volzb3cdd2a72008-09-02 15:55:45 +0200675
676 EnterFunction(10);
677
678 /* check if it is a connection of no-client-port */
679 if (unlikely(cp->flags & IP_VS_CONN_F_NO_CPORT)) {
680 __be16 _pt, *p;
681 p = skb_header_pointer(skb, sizeof(struct ipv6hdr),
682 sizeof(_pt), &_pt);
683 if (p == NULL)
684 goto tx_error;
685 ip_vs_conn_fill_cport(cp, *p);
686 IP_VS_DBG(10, "filled cport=%d\n", ntohs(*p));
687 }
688
Julian Anastasovfc604762010-10-17 16:38:15 +0300689 if (!(rt = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6, NULL,
David S. Millere58b3442011-05-12 18:22:34 -0400690 0, (IP_VS_RT_MODE_LOCAL |
691 IP_VS_RT_MODE_NON_LOCAL |
692 IP_VS_RT_MODE_RDR))))
Julius Volzb3cdd2a72008-09-02 15:55:45 +0200693 goto tx_error_icmp;
Julian Anastasovfc604762010-10-17 16:38:15 +0300694 local = __ip_vs_is_local_route6(rt);
695 /*
696 * Avoid duplicate tuple in reply direction for NAT traffic
697 * to local address when connection is sync-ed
698 */
Igor Maravićc0cd1152011-12-12 02:58:24 +0000699#if IS_ENABLED(CONFIG_NF_CONNTRACK)
Julian Anastasovfc604762010-10-17 16:38:15 +0300700 if (cp->flags & IP_VS_CONN_F_SYNC && local) {
701 enum ip_conntrack_info ctinfo;
702 struct nf_conn *ct = ct = nf_ct_get(skb, &ctinfo);
703
704 if (ct && !nf_ct_is_untracked(ct)) {
Julian Anastasov0d796412010-10-17 16:46:17 +0300705 IP_VS_DBG_RL_PKT(10, AF_INET6, pp, skb, 0,
Julian Anastasovfc604762010-10-17 16:38:15 +0300706 "ip_vs_nat_xmit_v6(): "
707 "stopping DNAT to local address");
708 goto tx_error_put;
709 }
710 }
711#endif
712
713 /* From world but DNAT to loopback address? */
714 if (local && skb->dev && !(skb->dev->flags & IFF_LOOPBACK) &&
715 ipv6_addr_type(&rt->rt6i_dst.addr) & IPV6_ADDR_LOOPBACK) {
Julian Anastasov0d796412010-10-17 16:46:17 +0300716 IP_VS_DBG_RL_PKT(1, AF_INET6, pp, skb, 0,
Julian Anastasovfc604762010-10-17 16:38:15 +0300717 "ip_vs_nat_xmit_v6(): "
718 "stopping DNAT to loopback address");
719 goto tx_error_put;
720 }
Julius Volzb3cdd2a72008-09-02 15:55:45 +0200721
722 /* MTU checking */
Changli Gaod8d1f302010-06-10 23:31:35 -0700723 mtu = dst_mtu(&rt->dst);
Jesper Dangaard Brouer590e3f72012-08-28 22:05:51 +0200724 if (__mtu_check_toobig_v6(skb, mtu)) {
Julian Anastasovcb591552010-10-17 16:40:51 +0300725 if (!skb->dev) {
726 struct net *net = dev_net(skb_dst(skb)->dev);
727
728 skb->dev = net->loopback_dev;
729 }
Alexey Dobriyan3ffe5332010-02-18 08:25:24 +0000730 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
Julian Anastasov0d796412010-10-17 16:46:17 +0300731 IP_VS_DBG_RL_PKT(0, AF_INET6, pp, skb, 0,
Julius Volzb3cdd2a72008-09-02 15:55:45 +0200732 "ip_vs_nat_xmit_v6(): frag needed for");
Julian Anastasovfc604762010-10-17 16:38:15 +0300733 goto tx_error_put;
Julius Volzb3cdd2a72008-09-02 15:55:45 +0200734 }
735
736 /* copy-on-write the packet before mangling it */
737 if (!skb_make_writable(skb, sizeof(struct ipv6hdr)))
738 goto tx_error_put;
739
Changli Gaod8d1f302010-06-10 23:31:35 -0700740 if (skb_cow(skb, rt->dst.dev->hard_header_len))
Julius Volzb3cdd2a72008-09-02 15:55:45 +0200741 goto tx_error_put;
742
Julius Volzb3cdd2a72008-09-02 15:55:45 +0200743 /* mangle the packet */
744 if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp))
745 goto tx_error;
Alexey Dobriyan4e3fd7a2011-11-21 03:39:03 +0000746 ipv6_hdr(skb)->daddr = cp->daddr.in6;
Julian Anastasovfc604762010-10-17 16:38:15 +0300747
748 if (!local || !skb->dev) {
749 /* drop the old route when skb is not shared */
750 skb_dst_drop(skb);
751 skb_dst_set(skb, &rt->dst);
752 } else {
753 /* destined to loopback, do we need to change route? */
754 dst_release(&rt->dst);
755 }
Julius Volzb3cdd2a72008-09-02 15:55:45 +0200756
Julian Anastasov0d796412010-10-17 16:46:17 +0300757 IP_VS_DBG_PKT(10, AF_INET6, pp, skb, 0, "After DNAT");
Julius Volzb3cdd2a72008-09-02 15:55:45 +0200758
759 /* FIXME: when application helper enlarges the packet and the length
760 is larger than the MTU of outgoing device, there will be still
761 MTU problem. */
762
763 /* Another hack: avoid icmp_send in ip_fragment */
764 skb->local_df = 1;
765
Julian Anastasovfc604762010-10-17 16:38:15 +0300766 IP_VS_XMIT_NAT(NFPROTO_IPV6, skb, cp, local);
Julius Volzb3cdd2a72008-09-02 15:55:45 +0200767
768 LeaveFunction(10);
769 return NF_STOLEN;
770
771tx_error_icmp:
772 dst_link_failure(skb);
773tx_error:
774 LeaveFunction(10);
775 kfree_skb(skb);
776 return NF_STOLEN;
777tx_error_put:
Changli Gaod8d1f302010-06-10 23:31:35 -0700778 dst_release(&rt->dst);
Julius Volzb3cdd2a72008-09-02 15:55:45 +0200779 goto tx_error;
780}
781#endif
782
Linus Torvalds1da177e2005-04-16 15:20:36 -0700783
784/*
785 * IP Tunneling transmitter
786 *
787 * This function encapsulates the packet in a new IP packet, its
788 * destination will be set to cp->daddr. Most code of this function
789 * is taken from ipip.c.
790 *
791 * It is used in VS/TUN cluster. The load balancer selects a real
792 * server from a cluster based on a scheduling algorithm,
793 * encapsulates the request packet and forwards it to the selected
794 * server. For example, all real servers are configured with
795 * "ifconfig tunl0 <Virtual IP Address> up". When the server receives
796 * the encapsulated packet, it will decapsulate the packet, processe
797 * the request and return the response packets directly to the client
798 * without passing the load balancer. This can greatly increase the
799 * scalability of virtual server.
800 *
801 * Used for ANY protocol
802 */
803int
804ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
805 struct ip_vs_protocol *pp)
806{
Julian Anastasov3654e612012-07-20 11:59:53 +0300807 struct netns_ipvs *ipvs = net_ipvs(skb_net(skb));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700808 struct rtable *rt; /* Route to the other host */
Julian Anastasovc92f5ca2011-05-10 12:46:05 +0000809 __be32 saddr; /* Source for tunnel */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700810 struct net_device *tdev; /* Device to other host */
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700811 struct iphdr *old_iph = ip_hdr(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700812 u8 tos = old_iph->tos;
Julian Anastasovf2edb9f2012-07-20 11:59:52 +0300813 __be16 df;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700814 struct iphdr *iph; /* Our new IP header */
Chuck Leverc2636b42007-10-23 21:07:32 -0700815 unsigned int max_headroom; /* The extra header space needed */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700816 int mtu;
Julian Anastasovf4bc17c2010-09-21 17:35:41 +0200817 int ret;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700818
819 EnterFunction(10);
820
Julian Anastasovfc604762010-10-17 16:38:15 +0300821 if (!(rt = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip,
Changli Gao17a8f8e2011-02-24 08:19:57 +0800822 RT_TOS(tos), IP_VS_RT_MODE_LOCAL |
Julian Anastasovf2edb9f2012-07-20 11:59:52 +0300823 IP_VS_RT_MODE_NON_LOCAL |
824 IP_VS_RT_MODE_CONNECT,
Julian Anastasovc92f5ca2011-05-10 12:46:05 +0000825 &saddr)))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700826 goto tx_error_icmp;
Julian Anastasovfc604762010-10-17 16:38:15 +0300827 if (rt->rt_flags & RTCF_LOCAL) {
828 ip_rt_put(rt);
829 IP_VS_XMIT(NFPROTO_IPV4, skb, cp, 1);
830 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700831
Changli Gaod8d1f302010-06-10 23:31:35 -0700832 tdev = rt->dst.dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700833
Changli Gaod8d1f302010-06-10 23:31:35 -0700834 mtu = dst_mtu(&rt->dst) - sizeof(struct iphdr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700835 if (mtu < 68) {
Hannes Eder1e3e2382009-08-02 11:05:41 +0000836 IP_VS_DBG_RL("%s(): mtu less than 68\n", __func__);
Julian Anastasovfc604762010-10-17 16:38:15 +0300837 goto tx_error_put;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700838 }
Julian Anastasovf2edb9f2012-07-20 11:59:52 +0300839 if (rt_is_output_route(skb_rtable(skb)))
David S. Miller6700c272012-07-17 03:29:28 -0700840 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700841
Julian Anastasovf2edb9f2012-07-20 11:59:52 +0300842 /* Copy DF, reset fragment offset and MF */
Julian Anastasov3654e612012-07-20 11:59:53 +0300843 df = sysctl_pmtu_disc(ipvs) ? old_iph->frag_off & htons(IP_DF) : 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700844
Julian Anastasov3654e612012-07-20 11:59:53 +0300845 if (df && mtu < ntohs(old_iph->tot_len) && !skb_is_gso(skb)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700846 icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
Hannes Eder1e3e2382009-08-02 11:05:41 +0000847 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
Julian Anastasovfc604762010-10-17 16:38:15 +0300848 goto tx_error_put;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700849 }
850
851 /*
852 * Okay, now see if we can stuff it in the buffer as-is.
853 */
854 max_headroom = LL_RESERVED_SPACE(tdev) + sizeof(struct iphdr);
855
856 if (skb_headroom(skb) < max_headroom
857 || skb_cloned(skb) || skb_shared(skb)) {
858 struct sk_buff *new_skb =
859 skb_realloc_headroom(skb, max_headroom);
860 if (!new_skb) {
861 ip_rt_put(rt);
862 kfree_skb(skb);
Hannes Eder1e3e2382009-08-02 11:05:41 +0000863 IP_VS_ERR_RL("%s(): no memory\n", __func__);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700864 return NF_STOLEN;
865 }
Eric Dumazet5d0ba552012-06-04 01:17:19 +0000866 consume_skb(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700867 skb = new_skb;
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700868 old_iph = ip_hdr(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700869 }
870
Hans Schillstrom714f0952010-10-19 10:38:48 +0200871 skb->transport_header = skb->network_header;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700872
873 /* fix old IP header checksum */
874 ip_send_check(old_iph);
875
Arnaldo Carvalho de Meloe2d1bca2007-04-10 20:46:21 -0700876 skb_push(skb, sizeof(struct iphdr));
877 skb_reset_network_header(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700878 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
879
880 /* drop old route */
Eric Dumazetadf30902009-06-02 05:19:30 +0000881 skb_dst_drop(skb);
Changli Gaod8d1f302010-06-10 23:31:35 -0700882 skb_dst_set(skb, &rt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700883
884 /*
885 * Push down and install the IPIP header.
886 */
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700887 iph = ip_hdr(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700888 iph->version = 4;
889 iph->ihl = sizeof(struct iphdr)>>2;
890 iph->frag_off = df;
891 iph->protocol = IPPROTO_IPIP;
892 iph->tos = tos;
Julian Anastasovc92f5ca2011-05-10 12:46:05 +0000893 iph->daddr = cp->daddr.ip;
894 iph->saddr = saddr;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700895 iph->ttl = old_iph->ttl;
Changli Gaod8d1f302010-06-10 23:31:35 -0700896 ip_select_ident(iph, &rt->dst, NULL);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700897
898 /* Another hack: avoid icmp_send in ip_fragment */
899 skb->local_df = 1;
900
Julian Anastasovf4bc17c2010-09-21 17:35:41 +0200901 ret = IP_VS_XMIT_TUNNEL(skb, cp);
902 if (ret == NF_ACCEPT)
903 ip_local_out(skb);
904 else if (ret == NF_DROP)
905 kfree_skb(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700906
907 LeaveFunction(10);
908
909 return NF_STOLEN;
910
911 tx_error_icmp:
912 dst_link_failure(skb);
913 tx_error:
914 kfree_skb(skb);
915 LeaveFunction(10);
916 return NF_STOLEN;
Julian Anastasovfc604762010-10-17 16:38:15 +0300917tx_error_put:
918 ip_rt_put(rt);
919 goto tx_error;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700920}
921
Julius Volzb3cdd2a72008-09-02 15:55:45 +0200922#ifdef CONFIG_IP_VS_IPV6
923int
924ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
925 struct ip_vs_protocol *pp)
926{
927 struct rt6_info *rt; /* Route to the other host */
Hans Schillstrom714f0952010-10-19 10:38:48 +0200928 struct in6_addr saddr; /* Source for tunnel */
Julius Volzb3cdd2a72008-09-02 15:55:45 +0200929 struct net_device *tdev; /* Device to other host */
930 struct ipv6hdr *old_iph = ipv6_hdr(skb);
Julius Volzb3cdd2a72008-09-02 15:55:45 +0200931 struct ipv6hdr *iph; /* Our new IP header */
932 unsigned int max_headroom; /* The extra header space needed */
933 int mtu;
Julian Anastasovf4bc17c2010-09-21 17:35:41 +0200934 int ret;
Julius Volzb3cdd2a72008-09-02 15:55:45 +0200935
936 EnterFunction(10);
937
Julian Anastasovfc604762010-10-17 16:38:15 +0300938 if (!(rt = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6,
David S. Millere58b3442011-05-12 18:22:34 -0400939 &saddr, 1, (IP_VS_RT_MODE_LOCAL |
940 IP_VS_RT_MODE_NON_LOCAL))))
Julius Volzb3cdd2a72008-09-02 15:55:45 +0200941 goto tx_error_icmp;
Julian Anastasovfc604762010-10-17 16:38:15 +0300942 if (__ip_vs_is_local_route6(rt)) {
943 dst_release(&rt->dst);
944 IP_VS_XMIT(NFPROTO_IPV6, skb, cp, 1);
945 }
Julius Volzb3cdd2a72008-09-02 15:55:45 +0200946
Changli Gaod8d1f302010-06-10 23:31:35 -0700947 tdev = rt->dst.dev;
Julius Volzb3cdd2a72008-09-02 15:55:45 +0200948
Changli Gaod8d1f302010-06-10 23:31:35 -0700949 mtu = dst_mtu(&rt->dst) - sizeof(struct ipv6hdr);
Hans Schillstrom714f0952010-10-19 10:38:48 +0200950 if (mtu < IPV6_MIN_MTU) {
Hans Schillstrom714f0952010-10-19 10:38:48 +0200951 IP_VS_DBG_RL("%s(): mtu less than %d\n", __func__,
952 IPV6_MIN_MTU);
Julian Anastasovfc604762010-10-17 16:38:15 +0300953 goto tx_error_put;
Julius Volzb3cdd2a72008-09-02 15:55:45 +0200954 }
Eric Dumazetadf30902009-06-02 05:19:30 +0000955 if (skb_dst(skb))
David S. Miller6700c272012-07-17 03:29:28 -0700956 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
Julius Volzb3cdd2a72008-09-02 15:55:45 +0200957
Jesper Dangaard Brouer590e3f72012-08-28 22:05:51 +0200958 /* MTU checking: Notice that 'mtu' have been adjusted before hand */
959 if (__mtu_check_toobig_v6(skb, mtu)) {
Julian Anastasovcb591552010-10-17 16:40:51 +0300960 if (!skb->dev) {
961 struct net *net = dev_net(skb_dst(skb)->dev);
962
963 skb->dev = net->loopback_dev;
964 }
Alexey Dobriyan3ffe5332010-02-18 08:25:24 +0000965 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
Hannes Eder1e3e2382009-08-02 11:05:41 +0000966 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
Julian Anastasovfc604762010-10-17 16:38:15 +0300967 goto tx_error_put;
Julius Volzb3cdd2a72008-09-02 15:55:45 +0200968 }
969
970 /*
971 * Okay, now see if we can stuff it in the buffer as-is.
972 */
973 max_headroom = LL_RESERVED_SPACE(tdev) + sizeof(struct ipv6hdr);
974
975 if (skb_headroom(skb) < max_headroom
976 || skb_cloned(skb) || skb_shared(skb)) {
977 struct sk_buff *new_skb =
978 skb_realloc_headroom(skb, max_headroom);
979 if (!new_skb) {
Changli Gaod8d1f302010-06-10 23:31:35 -0700980 dst_release(&rt->dst);
Julius Volzb3cdd2a72008-09-02 15:55:45 +0200981 kfree_skb(skb);
Hannes Eder1e3e2382009-08-02 11:05:41 +0000982 IP_VS_ERR_RL("%s(): no memory\n", __func__);
Julius Volzb3cdd2a72008-09-02 15:55:45 +0200983 return NF_STOLEN;
984 }
Eric Dumazet5d0ba552012-06-04 01:17:19 +0000985 consume_skb(skb);
Julius Volzb3cdd2a72008-09-02 15:55:45 +0200986 skb = new_skb;
987 old_iph = ipv6_hdr(skb);
988 }
989
Hans Schillstrom714f0952010-10-19 10:38:48 +0200990 skb->transport_header = skb->network_header;
Julius Volzb3cdd2a72008-09-02 15:55:45 +0200991
992 skb_push(skb, sizeof(struct ipv6hdr));
993 skb_reset_network_header(skb);
994 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
995
996 /* drop old route */
Eric Dumazetadf30902009-06-02 05:19:30 +0000997 skb_dst_drop(skb);
Changli Gaod8d1f302010-06-10 23:31:35 -0700998 skb_dst_set(skb, &rt->dst);
Julius Volzb3cdd2a72008-09-02 15:55:45 +0200999
1000 /*
1001 * Push down and install the IPIP header.
1002 */
1003 iph = ipv6_hdr(skb);
1004 iph->version = 6;
1005 iph->nexthdr = IPPROTO_IPV6;
Harvey Harrisonb7b45f42008-11-10 16:46:06 -08001006 iph->payload_len = old_iph->payload_len;
1007 be16_add_cpu(&iph->payload_len, sizeof(*old_iph));
Julius Volzb3cdd2a72008-09-02 15:55:45 +02001008 iph->priority = old_iph->priority;
1009 memset(&iph->flow_lbl, 0, sizeof(iph->flow_lbl));
Alexey Dobriyan4e3fd7a2011-11-21 03:39:03 +00001010 iph->daddr = cp->daddr.in6;
1011 iph->saddr = saddr;
Julius Volzb3cdd2a72008-09-02 15:55:45 +02001012 iph->hop_limit = old_iph->hop_limit;
1013
1014 /* Another hack: avoid icmp_send in ip_fragment */
1015 skb->local_df = 1;
1016
Julian Anastasovf4bc17c2010-09-21 17:35:41 +02001017 ret = IP_VS_XMIT_TUNNEL(skb, cp);
1018 if (ret == NF_ACCEPT)
1019 ip6_local_out(skb);
1020 else if (ret == NF_DROP)
1021 kfree_skb(skb);
Julius Volzb3cdd2a72008-09-02 15:55:45 +02001022
1023 LeaveFunction(10);
1024
1025 return NF_STOLEN;
1026
1027tx_error_icmp:
1028 dst_link_failure(skb);
1029tx_error:
1030 kfree_skb(skb);
1031 LeaveFunction(10);
1032 return NF_STOLEN;
Julian Anastasovfc604762010-10-17 16:38:15 +03001033tx_error_put:
1034 dst_release(&rt->dst);
1035 goto tx_error;
Julius Volzb3cdd2a72008-09-02 15:55:45 +02001036}
1037#endif
1038
Linus Torvalds1da177e2005-04-16 15:20:36 -07001039
1040/*
1041 * Direct Routing transmitter
1042 * Used for ANY protocol
1043 */
1044int
1045ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
1046 struct ip_vs_protocol *pp)
1047{
1048 struct rtable *rt; /* Route to the other host */
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -07001049 struct iphdr *iph = ip_hdr(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001050 int mtu;
1051
1052 EnterFunction(10);
1053
Julian Anastasovfc604762010-10-17 16:38:15 +03001054 if (!(rt = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip,
Changli Gao17a8f8e2011-02-24 08:19:57 +08001055 RT_TOS(iph->tos),
1056 IP_VS_RT_MODE_LOCAL |
Julian Anastasovc92f5ca2011-05-10 12:46:05 +00001057 IP_VS_RT_MODE_NON_LOCAL, NULL)))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001058 goto tx_error_icmp;
Julian Anastasovfc604762010-10-17 16:38:15 +03001059 if (rt->rt_flags & RTCF_LOCAL) {
1060 ip_rt_put(rt);
1061 IP_VS_XMIT(NFPROTO_IPV4, skb, cp, 1);
1062 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001063
1064 /* MTU checking */
Changli Gaod8d1f302010-06-10 23:31:35 -07001065 mtu = dst_mtu(&rt->dst);
Simon Horman8f1b03a2010-11-09 10:08:49 +09001066 if ((iph->frag_off & htons(IP_DF)) && skb->len > mtu &&
1067 !skb_is_gso(skb)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001068 icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
1069 ip_rt_put(rt);
Hannes Eder1e3e2382009-08-02 11:05:41 +00001070 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001071 goto tx_error;
1072 }
1073
1074 /*
1075 * Call ip_send_check because we are not sure it is called
1076 * after ip_defrag. Is copy-on-write needed?
1077 */
1078 if (unlikely((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL)) {
1079 ip_rt_put(rt);
1080 return NF_STOLEN;
1081 }
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -07001082 ip_send_check(ip_hdr(skb));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001083
1084 /* drop old route */
Eric Dumazetadf30902009-06-02 05:19:30 +00001085 skb_dst_drop(skb);
Changli Gaod8d1f302010-06-10 23:31:35 -07001086 skb_dst_set(skb, &rt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001087
1088 /* Another hack: avoid icmp_send in ip_fragment */
1089 skb->local_df = 1;
1090
Julian Anastasovfc604762010-10-17 16:38:15 +03001091 IP_VS_XMIT(NFPROTO_IPV4, skb, cp, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001092
1093 LeaveFunction(10);
1094 return NF_STOLEN;
1095
1096 tx_error_icmp:
1097 dst_link_failure(skb);
1098 tx_error:
1099 kfree_skb(skb);
1100 LeaveFunction(10);
1101 return NF_STOLEN;
1102}
1103
Julius Volzb3cdd2a72008-09-02 15:55:45 +02001104#ifdef CONFIG_IP_VS_IPV6
1105int
1106ip_vs_dr_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
1107 struct ip_vs_protocol *pp)
1108{
1109 struct rt6_info *rt; /* Route to the other host */
1110 int mtu;
1111
1112 EnterFunction(10);
1113
Julian Anastasovfc604762010-10-17 16:38:15 +03001114 if (!(rt = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6, NULL,
David S. Millere58b3442011-05-12 18:22:34 -04001115 0, (IP_VS_RT_MODE_LOCAL |
1116 IP_VS_RT_MODE_NON_LOCAL))))
Julius Volzb3cdd2a72008-09-02 15:55:45 +02001117 goto tx_error_icmp;
Julian Anastasovfc604762010-10-17 16:38:15 +03001118 if (__ip_vs_is_local_route6(rt)) {
1119 dst_release(&rt->dst);
1120 IP_VS_XMIT(NFPROTO_IPV6, skb, cp, 1);
1121 }
Julius Volzb3cdd2a72008-09-02 15:55:45 +02001122
1123 /* MTU checking */
Changli Gaod8d1f302010-06-10 23:31:35 -07001124 mtu = dst_mtu(&rt->dst);
Jesper Dangaard Brouer590e3f72012-08-28 22:05:51 +02001125 if (__mtu_check_toobig_v6(skb, mtu)) {
Julian Anastasovcb591552010-10-17 16:40:51 +03001126 if (!skb->dev) {
1127 struct net *net = dev_net(skb_dst(skb)->dev);
1128
1129 skb->dev = net->loopback_dev;
1130 }
Alexey Dobriyan3ffe5332010-02-18 08:25:24 +00001131 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
Changli Gaod8d1f302010-06-10 23:31:35 -07001132 dst_release(&rt->dst);
Hannes Eder1e3e2382009-08-02 11:05:41 +00001133 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
Julius Volzb3cdd2a72008-09-02 15:55:45 +02001134 goto tx_error;
1135 }
1136
1137 /*
1138 * Call ip_send_check because we are not sure it is called
1139 * after ip_defrag. Is copy-on-write needed?
1140 */
1141 skb = skb_share_check(skb, GFP_ATOMIC);
1142 if (unlikely(skb == NULL)) {
Changli Gaod8d1f302010-06-10 23:31:35 -07001143 dst_release(&rt->dst);
Julius Volzb3cdd2a72008-09-02 15:55:45 +02001144 return NF_STOLEN;
1145 }
1146
1147 /* drop old route */
Eric Dumazetadf30902009-06-02 05:19:30 +00001148 skb_dst_drop(skb);
Changli Gaod8d1f302010-06-10 23:31:35 -07001149 skb_dst_set(skb, &rt->dst);
Julius Volzb3cdd2a72008-09-02 15:55:45 +02001150
1151 /* Another hack: avoid icmp_send in ip_fragment */
1152 skb->local_df = 1;
1153
Julian Anastasovfc604762010-10-17 16:38:15 +03001154 IP_VS_XMIT(NFPROTO_IPV6, skb, cp, 0);
Julius Volzb3cdd2a72008-09-02 15:55:45 +02001155
1156 LeaveFunction(10);
1157 return NF_STOLEN;
1158
1159tx_error_icmp:
1160 dst_link_failure(skb);
1161tx_error:
1162 kfree_skb(skb);
1163 LeaveFunction(10);
1164 return NF_STOLEN;
1165}
1166#endif
1167
Linus Torvalds1da177e2005-04-16 15:20:36 -07001168
1169/*
1170 * ICMP packet transmitter
1171 * called by the ip_vs_in_icmp
1172 */
1173int
1174ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
Julian Anastasovc92f5ca2011-05-10 12:46:05 +00001175 struct ip_vs_protocol *pp, int offset, unsigned int hooknum)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001176{
1177 struct rtable *rt; /* Route to the other host */
1178 int mtu;
1179 int rc;
Julian Anastasovfc604762010-10-17 16:38:15 +03001180 int local;
Julian Anastasovc92f5ca2011-05-10 12:46:05 +00001181 int rt_mode;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001182
1183 EnterFunction(10);
1184
1185 /* The ICMP packet for VS/TUN, VS/DR and LOCALNODE will be
1186 forwarded directly here, because there is no need to
1187 translate address/port back */
1188 if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) {
1189 if (cp->packet_xmit)
1190 rc = cp->packet_xmit(skb, cp, pp);
1191 else
1192 rc = NF_ACCEPT;
1193 /* do not touch skb anymore */
1194 atomic_inc(&cp->in_pkts);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001195 goto out;
1196 }
1197
1198 /*
1199 * mangle and send the packet here (only for VS/NAT)
1200 */
1201
Julian Anastasovc92f5ca2011-05-10 12:46:05 +00001202 /* LOCALNODE from FORWARD hook is not supported */
1203 rt_mode = (hooknum != NF_INET_FORWARD) ?
1204 IP_VS_RT_MODE_LOCAL | IP_VS_RT_MODE_NON_LOCAL |
1205 IP_VS_RT_MODE_RDR : IP_VS_RT_MODE_NON_LOCAL;
Julian Anastasovfc604762010-10-17 16:38:15 +03001206 if (!(rt = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip,
Changli Gao17a8f8e2011-02-24 08:19:57 +08001207 RT_TOS(ip_hdr(skb)->tos),
Julian Anastasovc92f5ca2011-05-10 12:46:05 +00001208 rt_mode, NULL)))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001209 goto tx_error_icmp;
Julian Anastasovfc604762010-10-17 16:38:15 +03001210 local = rt->rt_flags & RTCF_LOCAL;
1211
1212 /*
1213 * Avoid duplicate tuple in reply direction for NAT traffic
1214 * to local address when connection is sync-ed
1215 */
Igor Maravićc0cd1152011-12-12 02:58:24 +00001216#if IS_ENABLED(CONFIG_NF_CONNTRACK)
Julian Anastasovfc604762010-10-17 16:38:15 +03001217 if (cp->flags & IP_VS_CONN_F_SYNC && local) {
1218 enum ip_conntrack_info ctinfo;
1219 struct nf_conn *ct = ct = nf_ct_get(skb, &ctinfo);
1220
1221 if (ct && !nf_ct_is_untracked(ct)) {
1222 IP_VS_DBG(10, "%s(): "
1223 "stopping DNAT to local address %pI4\n",
1224 __func__, &cp->daddr.ip);
1225 goto tx_error_put;
1226 }
1227 }
1228#endif
1229
1230 /* From world but DNAT to loopback address? */
Julian Anastasovc92f5ca2011-05-10 12:46:05 +00001231 if (local && ipv4_is_loopback(cp->daddr.ip) &&
David S. Millerc7537962010-11-11 17:07:48 -08001232 rt_is_input_route(skb_rtable(skb))) {
Julian Anastasovfc604762010-10-17 16:38:15 +03001233 IP_VS_DBG(1, "%s(): "
1234 "stopping DNAT to loopback %pI4\n",
1235 __func__, &cp->daddr.ip);
1236 goto tx_error_put;
1237 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001238
1239 /* MTU checking */
Changli Gaod8d1f302010-06-10 23:31:35 -07001240 mtu = dst_mtu(&rt->dst);
Simon Horman8f1b03a2010-11-09 10:08:49 +09001241 if ((skb->len > mtu) && (ip_hdr(skb)->frag_off & htons(IP_DF)) &&
1242 !skb_is_gso(skb)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001243 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
Hannes Eder1e3e2382009-08-02 11:05:41 +00001244 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
Julian Anastasovfc604762010-10-17 16:38:15 +03001245 goto tx_error_put;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001246 }
1247
1248 /* copy-on-write the packet before mangling it */
Herbert Xuaf1e1cf2007-10-14 00:39:33 -07001249 if (!skb_make_writable(skb, offset))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001250 goto tx_error_put;
1251
Changli Gaod8d1f302010-06-10 23:31:35 -07001252 if (skb_cow(skb, rt->dst.dev->hard_header_len))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001253 goto tx_error_put;
1254
Linus Torvalds1da177e2005-04-16 15:20:36 -07001255 ip_vs_nat_icmp(skb, pp, cp, 0);
1256
Julian Anastasovfc604762010-10-17 16:38:15 +03001257 if (!local) {
1258 /* drop the old route when skb is not shared */
1259 skb_dst_drop(skb);
1260 skb_dst_set(skb, &rt->dst);
1261 } else {
1262 ip_rt_put(rt);
1263 /*
1264 * Some IPv4 replies get local address from routes,
1265 * not from iph, so while we DNAT after routing
1266 * we need this second input/output route.
1267 */
1268 if (!__ip_vs_reroute_locally(skb))
1269 goto tx_error;
1270 }
1271
Linus Torvalds1da177e2005-04-16 15:20:36 -07001272 /* Another hack: avoid icmp_send in ip_fragment */
1273 skb->local_df = 1;
1274
Julian Anastasovfc604762010-10-17 16:38:15 +03001275 IP_VS_XMIT_NAT(NFPROTO_IPV4, skb, cp, local);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001276
1277 rc = NF_STOLEN;
1278 goto out;
1279
1280 tx_error_icmp:
1281 dst_link_failure(skb);
1282 tx_error:
1283 dev_kfree_skb(skb);
1284 rc = NF_STOLEN;
1285 out:
1286 LeaveFunction(10);
1287 return rc;
1288 tx_error_put:
1289 ip_rt_put(rt);
1290 goto tx_error;
1291}
Julius Volzb3cdd2a72008-09-02 15:55:45 +02001292
1293#ifdef CONFIG_IP_VS_IPV6
1294int
1295ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
Julian Anastasovc92f5ca2011-05-10 12:46:05 +00001296 struct ip_vs_protocol *pp, int offset, unsigned int hooknum)
Julius Volzb3cdd2a72008-09-02 15:55:45 +02001297{
1298 struct rt6_info *rt; /* Route to the other host */
1299 int mtu;
1300 int rc;
Julian Anastasovfc604762010-10-17 16:38:15 +03001301 int local;
Julian Anastasovc92f5ca2011-05-10 12:46:05 +00001302 int rt_mode;
Julius Volzb3cdd2a72008-09-02 15:55:45 +02001303
1304 EnterFunction(10);
1305
1306 /* The ICMP packet for VS/TUN, VS/DR and LOCALNODE will be
1307 forwarded directly here, because there is no need to
1308 translate address/port back */
1309 if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) {
1310 if (cp->packet_xmit)
1311 rc = cp->packet_xmit(skb, cp, pp);
1312 else
1313 rc = NF_ACCEPT;
1314 /* do not touch skb anymore */
1315 atomic_inc(&cp->in_pkts);
1316 goto out;
1317 }
1318
1319 /*
1320 * mangle and send the packet here (only for VS/NAT)
1321 */
1322
Julian Anastasovc92f5ca2011-05-10 12:46:05 +00001323 /* LOCALNODE from FORWARD hook is not supported */
1324 rt_mode = (hooknum != NF_INET_FORWARD) ?
1325 IP_VS_RT_MODE_LOCAL | IP_VS_RT_MODE_NON_LOCAL |
1326 IP_VS_RT_MODE_RDR : IP_VS_RT_MODE_NON_LOCAL;
Julian Anastasovfc604762010-10-17 16:38:15 +03001327 if (!(rt = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6, NULL,
Julian Anastasovc92f5ca2011-05-10 12:46:05 +00001328 0, rt_mode)))
Julius Volzb3cdd2a72008-09-02 15:55:45 +02001329 goto tx_error_icmp;
1330
Julian Anastasovfc604762010-10-17 16:38:15 +03001331 local = __ip_vs_is_local_route6(rt);
1332 /*
1333 * Avoid duplicate tuple in reply direction for NAT traffic
1334 * to local address when connection is sync-ed
1335 */
Igor Maravićc0cd1152011-12-12 02:58:24 +00001336#if IS_ENABLED(CONFIG_NF_CONNTRACK)
Julian Anastasovfc604762010-10-17 16:38:15 +03001337 if (cp->flags & IP_VS_CONN_F_SYNC && local) {
1338 enum ip_conntrack_info ctinfo;
1339 struct nf_conn *ct = ct = nf_ct_get(skb, &ctinfo);
1340
1341 if (ct && !nf_ct_is_untracked(ct)) {
1342 IP_VS_DBG(10, "%s(): "
1343 "stopping DNAT to local address %pI6\n",
1344 __func__, &cp->daddr.in6);
1345 goto tx_error_put;
1346 }
1347 }
1348#endif
1349
1350 /* From world but DNAT to loopback address? */
1351 if (local && skb->dev && !(skb->dev->flags & IFF_LOOPBACK) &&
1352 ipv6_addr_type(&rt->rt6i_dst.addr) & IPV6_ADDR_LOOPBACK) {
1353 IP_VS_DBG(1, "%s(): "
1354 "stopping DNAT to loopback %pI6\n",
1355 __func__, &cp->daddr.in6);
1356 goto tx_error_put;
1357 }
1358
Julius Volzb3cdd2a72008-09-02 15:55:45 +02001359 /* MTU checking */
Changli Gaod8d1f302010-06-10 23:31:35 -07001360 mtu = dst_mtu(&rt->dst);
Jesper Dangaard Brouer590e3f72012-08-28 22:05:51 +02001361 if (__mtu_check_toobig_v6(skb, mtu)) {
Julian Anastasovcb591552010-10-17 16:40:51 +03001362 if (!skb->dev) {
1363 struct net *net = dev_net(skb_dst(skb)->dev);
1364
1365 skb->dev = net->loopback_dev;
1366 }
Alexey Dobriyan3ffe5332010-02-18 08:25:24 +00001367 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
Hannes Eder1e3e2382009-08-02 11:05:41 +00001368 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
Julian Anastasovfc604762010-10-17 16:38:15 +03001369 goto tx_error_put;
Julius Volzb3cdd2a72008-09-02 15:55:45 +02001370 }
1371
1372 /* copy-on-write the packet before mangling it */
1373 if (!skb_make_writable(skb, offset))
1374 goto tx_error_put;
1375
Changli Gaod8d1f302010-06-10 23:31:35 -07001376 if (skb_cow(skb, rt->dst.dev->hard_header_len))
Julius Volzb3cdd2a72008-09-02 15:55:45 +02001377 goto tx_error_put;
1378
Julius Volzb3cdd2a72008-09-02 15:55:45 +02001379 ip_vs_nat_icmp_v6(skb, pp, cp, 0);
1380
Julian Anastasovfc604762010-10-17 16:38:15 +03001381 if (!local || !skb->dev) {
1382 /* drop the old route when skb is not shared */
1383 skb_dst_drop(skb);
1384 skb_dst_set(skb, &rt->dst);
1385 } else {
1386 /* destined to loopback, do we need to change route? */
1387 dst_release(&rt->dst);
1388 }
1389
Julius Volzb3cdd2a72008-09-02 15:55:45 +02001390 /* Another hack: avoid icmp_send in ip_fragment */
1391 skb->local_df = 1;
1392
Julian Anastasovfc604762010-10-17 16:38:15 +03001393 IP_VS_XMIT_NAT(NFPROTO_IPV6, skb, cp, local);
Julius Volzb3cdd2a72008-09-02 15:55:45 +02001394
1395 rc = NF_STOLEN;
1396 goto out;
1397
1398tx_error_icmp:
1399 dst_link_failure(skb);
1400tx_error:
1401 dev_kfree_skb(skb);
1402 rc = NF_STOLEN;
1403out:
1404 LeaveFunction(10);
1405 return rc;
1406tx_error_put:
Changli Gaod8d1f302010-06-10 23:31:35 -07001407 dst_release(&rt->dst);
Julius Volzb3cdd2a72008-09-02 15:55:45 +02001408 goto tx_error;
1409}
1410#endif