blob: c942d36e38a57a24a9485220e8969120c6f70bcc [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * ip_vs_xmit.c: various packet transmitters for IPVS
3 *
Linus Torvalds1da177e2005-04-16 15:20:36 -07004 * Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
5 * Julian Anastasov <ja@ssi.bg>
6 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version
10 * 2 of the License, or (at your option) any later version.
11 *
12 * Changes:
13 *
Julian Anastasovcb591552010-10-17 16:40:51 +030014 * Description of forwarding methods:
15 * - all transmitters are called from LOCAL_IN (remote clients) and
16 * LOCAL_OUT (local clients) but for ICMP can be called from FORWARD
17 * - not all connections have destination server, for example,
18 * connections in backup server when fwmark is used
19 * - bypass connections use daddr from packet
20 * LOCAL_OUT rules:
21 * - skb->dev is NULL, skb->protocol is not set (both are set in POST_ROUTING)
22 * - skb->pkt_type is not set yet
23 * - the only place where we can see skb->sk != NULL
Linus Torvalds1da177e2005-04-16 15:20:36 -070024 */
25
Hannes Eder9aada7a2009-07-30 14:29:44 -070026#define KMSG_COMPONENT "IPVS"
27#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
28
Linus Torvalds1da177e2005-04-16 15:20:36 -070029#include <linux/kernel.h>
Tejun Heo5a0e3ad2010-03-24 17:04:11 +090030#include <linux/slab.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070031#include <linux/tcp.h> /* for tcphdr */
Herbert Xuc439cb22008-01-11 19:14:00 -080032#include <net/ip.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070033#include <net/tcp.h> /* for csum_tcpudp_magic */
34#include <net/udp.h>
35#include <net/icmp.h> /* for icmp_send */
36#include <net/route.h> /* for ip_route_output */
Julius Volz38cdcc92008-09-02 15:55:44 +020037#include <net/ipv6.h>
38#include <net/ip6_route.h>
Hans Schillstrom714f0952010-10-19 10:38:48 +020039#include <net/addrconf.h>
Julius Volz38cdcc92008-09-02 15:55:44 +020040#include <linux/icmpv6.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070041#include <linux/netfilter.h>
42#include <linux/netfilter_ipv4.h>
43
44#include <net/ip_vs.h>
45
Changli Gao17a8f8e2011-02-24 08:19:57 +080046enum {
47 IP_VS_RT_MODE_LOCAL = 1, /* Allow local dest */
48 IP_VS_RT_MODE_NON_LOCAL = 2, /* Allow non-local dest */
49 IP_VS_RT_MODE_RDR = 4, /* Allow redirect from remote daddr to
50 * local
51 */
Julian Anastasovf2edb9f2012-07-20 11:59:52 +030052 IP_VS_RT_MODE_CONNECT = 8, /* Always bind route to saddr */
Julian Anastasovad4d3ef2012-10-08 11:41:20 +000053 IP_VS_RT_MODE_KNOWN_NH = 16,/* Route via remote addr */
Changli Gao17a8f8e2011-02-24 08:19:57 +080054};
Linus Torvalds1da177e2005-04-16 15:20:36 -070055
56/*
57 * Destination cache to speed up outgoing route lookup
58 */
59static inline void
Julian Anastasovc90558d2013-03-21 11:57:59 +020060__ip_vs_dst_set(struct ip_vs_dest *dest, struct dst_entry *dst, u32 dst_cookie)
Linus Torvalds1da177e2005-04-16 15:20:36 -070061{
62 struct dst_entry *old_dst;
63
64 old_dst = dest->dst_cache;
65 dest->dst_cache = dst;
Hans Schillstrom714f0952010-10-19 10:38:48 +020066 dest->dst_cookie = dst_cookie;
Linus Torvalds1da177e2005-04-16 15:20:36 -070067 dst_release(old_dst);
68}
69
70static inline struct dst_entry *
Julian Anastasovc90558d2013-03-21 11:57:59 +020071__ip_vs_dst_check(struct ip_vs_dest *dest)
Linus Torvalds1da177e2005-04-16 15:20:36 -070072{
73 struct dst_entry *dst = dest->dst_cache;
74
75 if (!dst)
76 return NULL;
Julian Anastasovc90558d2013-03-21 11:57:59 +020077 if (dst->obsolete && dst->ops->check(dst, dest->dst_cookie) == NULL) {
Linus Torvalds1da177e2005-04-16 15:20:36 -070078 dest->dst_cache = NULL;
79 dst_release(dst);
80 return NULL;
81 }
82 dst_hold(dst);
83 return dst;
84}
85
Jesper Dangaard Brouer590e3f72012-08-28 22:05:51 +020086static inline bool
87__mtu_check_toobig_v6(const struct sk_buff *skb, u32 mtu)
88{
Patrick McHardy4cdd34082012-08-26 19:13:58 +020089 if (IP6CB(skb)->frag_max_size) {
90 /* frag_max_size tell us that, this packet have been
91 * defragmented by netfilter IPv6 conntrack module.
92 */
93 if (IP6CB(skb)->frag_max_size > mtu)
94 return true; /* largest fragment violate MTU */
95 }
96 else if (skb->len > mtu && !skb_is_gso(skb)) {
Jesper Dangaard Brouer590e3f72012-08-28 22:05:51 +020097 return true; /* Packet size violate MTU size */
98 }
99 return false;
100}
101
Julian Anastasovf2edb9f2012-07-20 11:59:52 +0300102/* Get route to daddr, update *saddr, optionally bind route to saddr */
103static struct rtable *do_output_route4(struct net *net, __be32 daddr,
Julian Anastasovc90558d2013-03-21 11:57:59 +0200104 int rt_mode, __be32 *saddr)
Julian Anastasovf2edb9f2012-07-20 11:59:52 +0300105{
106 struct flowi4 fl4;
107 struct rtable *rt;
108 int loop = 0;
109
110 memset(&fl4, 0, sizeof(fl4));
111 fl4.daddr = daddr;
112 fl4.saddr = (rt_mode & IP_VS_RT_MODE_CONNECT) ? *saddr : 0;
Julian Anastasovad4d3ef2012-10-08 11:41:20 +0000113 fl4.flowi4_flags = (rt_mode & IP_VS_RT_MODE_KNOWN_NH) ?
114 FLOWI_FLAG_KNOWN_NH : 0;
Julian Anastasovf2edb9f2012-07-20 11:59:52 +0300115
116retry:
117 rt = ip_route_output_key(net, &fl4);
118 if (IS_ERR(rt)) {
119 /* Invalid saddr ? */
120 if (PTR_ERR(rt) == -EINVAL && *saddr &&
121 rt_mode & IP_VS_RT_MODE_CONNECT && !loop) {
122 *saddr = 0;
Julian Anastasovc90558d2013-03-21 11:57:59 +0200123 flowi4_update_output(&fl4, 0, 0, daddr, 0);
Julian Anastasovf2edb9f2012-07-20 11:59:52 +0300124 goto retry;
125 }
126 IP_VS_DBG_RL("ip_route_output error, dest: %pI4\n", &daddr);
127 return NULL;
128 } else if (!*saddr && rt_mode & IP_VS_RT_MODE_CONNECT && fl4.saddr) {
129 ip_rt_put(rt);
130 *saddr = fl4.saddr;
Julian Anastasovc90558d2013-03-21 11:57:59 +0200131 flowi4_update_output(&fl4, 0, 0, daddr, fl4.saddr);
Julian Anastasovf2edb9f2012-07-20 11:59:52 +0300132 loop++;
133 goto retry;
134 }
135 *saddr = fl4.saddr;
136 return rt;
137}
138
Changli Gao17a8f8e2011-02-24 08:19:57 +0800139/* Get route to destination or remote server */
Ilpo Järvinenad1b30b2008-01-05 23:12:40 -0800140static struct rtable *
Julian Anastasovfc604762010-10-17 16:38:15 +0300141__ip_vs_get_out_rt(struct sk_buff *skb, struct ip_vs_dest *dest,
Julian Anastasovc90558d2013-03-21 11:57:59 +0200142 __be32 daddr, int rt_mode, __be32 *ret_saddr)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700143{
Julian Anastasovfc604762010-10-17 16:38:15 +0300144 struct net *net = dev_net(skb_dst(skb)->dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700145 struct rtable *rt; /* Route to the other host */
Julian Anastasovfc604762010-10-17 16:38:15 +0300146 struct rtable *ort; /* Original route */
147 int local;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700148
149 if (dest) {
150 spin_lock(&dest->dst_lock);
Julian Anastasovc90558d2013-03-21 11:57:59 +0200151 rt = (struct rtable *) __ip_vs_dst_check(dest);
152 if (!rt) {
153 rt = do_output_route4(net, dest->addr.ip, rt_mode,
154 &dest->dst_saddr.ip);
Julian Anastasovf2edb9f2012-07-20 11:59:52 +0300155 if (!rt) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700156 spin_unlock(&dest->dst_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700157 return NULL;
158 }
Julian Anastasovc90558d2013-03-21 11:57:59 +0200159 __ip_vs_dst_set(dest, dst_clone(&rt->dst), 0);
160 IP_VS_DBG(10, "new dst %pI4, src %pI4, refcnt=%d\n",
Julian Anastasovc92f5ca2011-05-10 12:46:05 +0000161 &dest->addr.ip, &dest->dst_saddr.ip,
Julian Anastasovc90558d2013-03-21 11:57:59 +0200162 atomic_read(&rt->dst.__refcnt));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700163 }
David S. Miller44e31252011-05-09 14:38:06 -0700164 daddr = dest->addr.ip;
Julian Anastasovc92f5ca2011-05-10 12:46:05 +0000165 if (ret_saddr)
166 *ret_saddr = dest->dst_saddr.ip;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700167 spin_unlock(&dest->dst_lock);
168 } else {
Julian Anastasovf2edb9f2012-07-20 11:59:52 +0300169 __be32 saddr = htonl(INADDR_ANY);
Julian Anastasovc92f5ca2011-05-10 12:46:05 +0000170
Julian Anastasovf2edb9f2012-07-20 11:59:52 +0300171 /* For such unconfigured boxes avoid many route lookups
172 * for performance reasons because we do not remember saddr
173 */
174 rt_mode &= ~IP_VS_RT_MODE_CONNECT;
Julian Anastasovc90558d2013-03-21 11:57:59 +0200175 rt = do_output_route4(net, daddr, rt_mode, &saddr);
Julian Anastasovf2edb9f2012-07-20 11:59:52 +0300176 if (!rt)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700177 return NULL;
Julian Anastasovc92f5ca2011-05-10 12:46:05 +0000178 if (ret_saddr)
Julian Anastasovf2edb9f2012-07-20 11:59:52 +0300179 *ret_saddr = saddr;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700180 }
181
Julian Anastasovfc604762010-10-17 16:38:15 +0300182 local = rt->rt_flags & RTCF_LOCAL;
Changli Gao17a8f8e2011-02-24 08:19:57 +0800183 if (!((local ? IP_VS_RT_MODE_LOCAL : IP_VS_RT_MODE_NON_LOCAL) &
184 rt_mode)) {
Julian Anastasovfc604762010-10-17 16:38:15 +0300185 IP_VS_DBG_RL("Stopping traffic to %s address, dest: %pI4\n",
186 (rt->rt_flags & RTCF_LOCAL) ?
David S. Miller44e31252011-05-09 14:38:06 -0700187 "local":"non-local", &daddr);
Julian Anastasovfc604762010-10-17 16:38:15 +0300188 ip_rt_put(rt);
189 return NULL;
190 }
Changli Gao17a8f8e2011-02-24 08:19:57 +0800191 if (local && !(rt_mode & IP_VS_RT_MODE_RDR) &&
192 !((ort = skb_rtable(skb)) && ort->rt_flags & RTCF_LOCAL)) {
Julian Anastasovfc604762010-10-17 16:38:15 +0300193 IP_VS_DBG_RL("Redirect from non-local address %pI4 to local "
194 "requires NAT method, dest: %pI4\n",
David S. Miller44e31252011-05-09 14:38:06 -0700195 &ip_hdr(skb)->daddr, &daddr);
Julian Anastasovfc604762010-10-17 16:38:15 +0300196 ip_rt_put(rt);
197 return NULL;
198 }
199 if (unlikely(!local && ipv4_is_loopback(ip_hdr(skb)->saddr))) {
200 IP_VS_DBG_RL("Stopping traffic from loopback address %pI4 "
201 "to non-local address, dest: %pI4\n",
David S. Miller44e31252011-05-09 14:38:06 -0700202 &ip_hdr(skb)->saddr, &daddr);
Julian Anastasovfc604762010-10-17 16:38:15 +0300203 ip_rt_put(rt);
204 return NULL;
205 }
206
Linus Torvalds1da177e2005-04-16 15:20:36 -0700207 return rt;
208}
209
Julius Volz38cdcc92008-09-02 15:55:44 +0200210#ifdef CONFIG_IP_VS_IPV6
Hans Schillstrom714f0952010-10-19 10:38:48 +0200211
Julian Anastasovfc604762010-10-17 16:38:15 +0300212static inline int __ip_vs_is_local_route6(struct rt6_info *rt)
213{
David S. Millerd1918542011-12-28 20:19:20 -0500214 return rt->dst.dev && rt->dst.dev->flags & IFF_LOOPBACK;
Julian Anastasovfc604762010-10-17 16:38:15 +0300215}
216
Hans Schillstrom714f0952010-10-19 10:38:48 +0200217static struct dst_entry *
218__ip_vs_route_output_v6(struct net *net, struct in6_addr *daddr,
219 struct in6_addr *ret_saddr, int do_xfrm)
Julius Volz38cdcc92008-09-02 15:55:44 +0200220{
Hans Schillstrom714f0952010-10-19 10:38:48 +0200221 struct dst_entry *dst;
David S. Miller4c9483b2011-03-12 16:22:43 -0500222 struct flowi6 fl6 = {
223 .daddr = *daddr,
Hans Schillstrom714f0952010-10-19 10:38:48 +0200224 };
225
David S. Miller4c9483b2011-03-12 16:22:43 -0500226 dst = ip6_route_output(net, NULL, &fl6);
Hans Schillstrom714f0952010-10-19 10:38:48 +0200227 if (dst->error)
228 goto out_err;
229 if (!ret_saddr)
230 return dst;
David S. Miller4c9483b2011-03-12 16:22:43 -0500231 if (ipv6_addr_any(&fl6.saddr) &&
Hans Schillstrom714f0952010-10-19 10:38:48 +0200232 ipv6_dev_get_saddr(net, ip6_dst_idev(dst)->dev,
David S. Miller4c9483b2011-03-12 16:22:43 -0500233 &fl6.daddr, 0, &fl6.saddr) < 0)
Hans Schillstrom714f0952010-10-19 10:38:48 +0200234 goto out_err;
David S. Miller452edd52011-03-02 13:27:41 -0800235 if (do_xfrm) {
David S. Miller4c9483b2011-03-12 16:22:43 -0500236 dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), NULL, 0);
David S. Miller452edd52011-03-02 13:27:41 -0800237 if (IS_ERR(dst)) {
238 dst = NULL;
239 goto out_err;
240 }
241 }
Alexey Dobriyan4e3fd7a2011-11-21 03:39:03 +0000242 *ret_saddr = fl6.saddr;
Hans Schillstrom714f0952010-10-19 10:38:48 +0200243 return dst;
244
245out_err:
246 dst_release(dst);
247 IP_VS_DBG_RL("ip6_route_output error, dest: %pI6\n", daddr);
248 return NULL;
249}
250
Julian Anastasovfc604762010-10-17 16:38:15 +0300251/*
252 * Get route to destination or remote server
Julian Anastasovfc604762010-10-17 16:38:15 +0300253 */
Hans Schillstrom714f0952010-10-19 10:38:48 +0200254static struct rt6_info *
Julian Anastasovfc604762010-10-17 16:38:15 +0300255__ip_vs_get_out_rt_v6(struct sk_buff *skb, struct ip_vs_dest *dest,
256 struct in6_addr *daddr, struct in6_addr *ret_saddr,
257 int do_xfrm, int rt_mode)
Hans Schillstrom714f0952010-10-19 10:38:48 +0200258{
Julian Anastasovfc604762010-10-17 16:38:15 +0300259 struct net *net = dev_net(skb_dst(skb)->dev);
Julius Volz38cdcc92008-09-02 15:55:44 +0200260 struct rt6_info *rt; /* Route to the other host */
Julian Anastasovfc604762010-10-17 16:38:15 +0300261 struct rt6_info *ort; /* Original route */
Hans Schillstrom714f0952010-10-19 10:38:48 +0200262 struct dst_entry *dst;
Julian Anastasovfc604762010-10-17 16:38:15 +0300263 int local;
Julius Volz38cdcc92008-09-02 15:55:44 +0200264
265 if (dest) {
266 spin_lock(&dest->dst_lock);
Julian Anastasovc90558d2013-03-21 11:57:59 +0200267 rt = (struct rt6_info *)__ip_vs_dst_check(dest);
Julius Volz38cdcc92008-09-02 15:55:44 +0200268 if (!rt) {
Hans Schillstrom714f0952010-10-19 10:38:48 +0200269 u32 cookie;
Julius Volz38cdcc92008-09-02 15:55:44 +0200270
Hans Schillstrom714f0952010-10-19 10:38:48 +0200271 dst = __ip_vs_route_output_v6(net, &dest->addr.in6,
Julian Anastasovc92f5ca2011-05-10 12:46:05 +0000272 &dest->dst_saddr.in6,
Hans Schillstrom714f0952010-10-19 10:38:48 +0200273 do_xfrm);
274 if (!dst) {
Julius Volz38cdcc92008-09-02 15:55:44 +0200275 spin_unlock(&dest->dst_lock);
Julius Volz38cdcc92008-09-02 15:55:44 +0200276 return NULL;
277 }
Hans Schillstrom714f0952010-10-19 10:38:48 +0200278 rt = (struct rt6_info *) dst;
279 cookie = rt->rt6i_node ? rt->rt6i_node->fn_sernum : 0;
Julian Anastasovc90558d2013-03-21 11:57:59 +0200280 __ip_vs_dst_set(dest, dst_clone(&rt->dst), cookie);
Hans Schillstrom714f0952010-10-19 10:38:48 +0200281 IP_VS_DBG(10, "new dst %pI6, src %pI6, refcnt=%d\n",
Julian Anastasovc92f5ca2011-05-10 12:46:05 +0000282 &dest->addr.in6, &dest->dst_saddr.in6,
Changli Gaod8d1f302010-06-10 23:31:35 -0700283 atomic_read(&rt->dst.__refcnt));
Julius Volz38cdcc92008-09-02 15:55:44 +0200284 }
Hans Schillstrom714f0952010-10-19 10:38:48 +0200285 if (ret_saddr)
Alexey Dobriyan4e3fd7a2011-11-21 03:39:03 +0000286 *ret_saddr = dest->dst_saddr.in6;
Julius Volz38cdcc92008-09-02 15:55:44 +0200287 spin_unlock(&dest->dst_lock);
288 } else {
Julian Anastasovfc604762010-10-17 16:38:15 +0300289 dst = __ip_vs_route_output_v6(net, daddr, ret_saddr, do_xfrm);
Hans Schillstrom714f0952010-10-19 10:38:48 +0200290 if (!dst)
Julius Volz38cdcc92008-09-02 15:55:44 +0200291 return NULL;
Hans Schillstrom714f0952010-10-19 10:38:48 +0200292 rt = (struct rt6_info *) dst;
Julius Volz38cdcc92008-09-02 15:55:44 +0200293 }
294
Julian Anastasovfc604762010-10-17 16:38:15 +0300295 local = __ip_vs_is_local_route6(rt);
David S. Millere58b3442011-05-12 18:22:34 -0400296 if (!((local ? IP_VS_RT_MODE_LOCAL : IP_VS_RT_MODE_NON_LOCAL) &
297 rt_mode)) {
Jesper Dangaard Brouer120b9c12012-09-26 14:05:53 +0200298 IP_VS_DBG_RL("Stopping traffic to %s address, dest: %pI6c\n",
Julian Anastasovfc604762010-10-17 16:38:15 +0300299 local ? "local":"non-local", daddr);
300 dst_release(&rt->dst);
301 return NULL;
302 }
David S. Millere58b3442011-05-12 18:22:34 -0400303 if (local && !(rt_mode & IP_VS_RT_MODE_RDR) &&
Julian Anastasovfc604762010-10-17 16:38:15 +0300304 !((ort = (struct rt6_info *) skb_dst(skb)) &&
305 __ip_vs_is_local_route6(ort))) {
Jesper Dangaard Brouer120b9c12012-09-26 14:05:53 +0200306 IP_VS_DBG_RL("Redirect from non-local address %pI6c to local "
307 "requires NAT method, dest: %pI6c\n",
Julian Anastasovfc604762010-10-17 16:38:15 +0300308 &ipv6_hdr(skb)->daddr, daddr);
309 dst_release(&rt->dst);
310 return NULL;
311 }
312 if (unlikely(!local && (!skb->dev || skb->dev->flags & IFF_LOOPBACK) &&
313 ipv6_addr_type(&ipv6_hdr(skb)->saddr) &
314 IPV6_ADDR_LOOPBACK)) {
Jesper Dangaard Brouer120b9c12012-09-26 14:05:53 +0200315 IP_VS_DBG_RL("Stopping traffic from loopback address %pI6c "
316 "to non-local address, dest: %pI6c\n",
Julian Anastasovfc604762010-10-17 16:38:15 +0300317 &ipv6_hdr(skb)->saddr, daddr);
318 dst_release(&rt->dst);
319 return NULL;
320 }
321
Julius Volz38cdcc92008-09-02 15:55:44 +0200322 return rt;
323}
324#endif
325
Linus Torvalds1da177e2005-04-16 15:20:36 -0700326
Julian Anastasovb8abdf02013-03-21 11:58:01 +0200327/* return NF_ACCEPT to allow forwarding or other NF_xxx on error */
328static inline int ip_vs_tunnel_xmit_prepare(struct sk_buff *skb,
329 struct ip_vs_conn *cp)
330{
331 int ret = NF_ACCEPT;
Julian Anastasovf4bc17c2010-09-21 17:35:41 +0200332
Julian Anastasovb8abdf02013-03-21 11:58:01 +0200333 skb->ipvs_property = 1;
334 if (unlikely(cp->flags & IP_VS_CONN_F_NFCT))
335 ret = ip_vs_confirm_conntrack(skb);
336 if (ret == NF_ACCEPT) {
337 nf_reset(skb);
338 skb_forward_csum(skb);
339 }
340 return ret;
341}
Julian Anastasovf4bc17c2010-09-21 17:35:41 +0200342
Julian Anastasovb8abdf02013-03-21 11:58:01 +0200343/* return NF_STOLEN (sent) or NF_ACCEPT if local=1 (not sent) */
344static inline int ip_vs_nat_send_or_cont(int pf, struct sk_buff *skb,
345 struct ip_vs_conn *cp, int local)
346{
347 int ret = NF_STOLEN;
348
349 skb->ipvs_property = 1;
350 if (likely(!(cp->flags & IP_VS_CONN_F_NFCT)))
351 ip_vs_notrack(skb);
352 else
353 ip_vs_update_conntrack(skb, cp, 1);
354 if (!local) {
355 skb_forward_csum(skb);
356 NF_HOOK(pf, NF_INET_LOCAL_OUT, skb, NULL, skb_dst(skb)->dev,
357 dst_output);
358 } else
359 ret = NF_ACCEPT;
360 return ret;
361}
362
363/* return NF_STOLEN (sent) or NF_ACCEPT if local=1 (not sent) */
364static inline int ip_vs_send_or_cont(int pf, struct sk_buff *skb,
365 struct ip_vs_conn *cp, int local)
366{
367 int ret = NF_STOLEN;
368
369 skb->ipvs_property = 1;
370 if (likely(!(cp->flags & IP_VS_CONN_F_NFCT)))
371 ip_vs_notrack(skb);
372 if (!local) {
373 skb_forward_csum(skb);
374 NF_HOOK(pf, NF_INET_LOCAL_OUT, skb, NULL, skb_dst(skb)->dev,
375 dst_output);
376 } else
377 ret = NF_ACCEPT;
378 return ret;
379}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700380
381
382/*
383 * NULL transmitter (do nothing except return NF_ACCEPT)
384 */
385int
386ip_vs_null_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
Jesper Dangaard Brouerd4383f02012-09-26 14:07:17 +0200387 struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700388{
389 /* we do not touch skb and do not need pskb ptr */
Julian Anastasovb8abdf02013-03-21 11:58:01 +0200390 return ip_vs_send_or_cont(NFPROTO_IPV4, skb, cp, 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700391}
392
393
394/*
395 * Bypass transmitter
396 * Let packets bypass the destination when the destination is not
397 * available, it may be only used in transparent cache cluster.
398 */
399int
400ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
Jesper Dangaard Brouerd4383f02012-09-26 14:07:17 +0200401 struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700402{
403 struct rtable *rt; /* Route to the other host */
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700404 struct iphdr *iph = ip_hdr(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700405 int mtu;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700406
407 EnterFunction(10);
408
Julian Anastasovc90558d2013-03-21 11:57:59 +0200409 rt = __ip_vs_get_out_rt(skb, NULL, iph->daddr, IP_VS_RT_MODE_NON_LOCAL,
410 NULL);
411 if (!rt)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700412 goto tx_error_icmp;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700413
414 /* MTU checking */
Changli Gaod8d1f302010-06-10 23:31:35 -0700415 mtu = dst_mtu(&rt->dst);
Simon Horman8f1b03a2010-11-09 10:08:49 +0900416 if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF)) &&
417 !skb_is_gso(skb)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700418 ip_rt_put(rt);
419 icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
Hannes Eder1e3e2382009-08-02 11:05:41 +0000420 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700421 goto tx_error;
422 }
423
424 /*
425 * Call ip_send_check because we are not sure it is called
426 * after ip_defrag. Is copy-on-write needed?
427 */
428 if (unlikely((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL)) {
429 ip_rt_put(rt);
430 return NF_STOLEN;
431 }
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700432 ip_send_check(ip_hdr(skb));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700433
434 /* drop old route */
Eric Dumazetadf30902009-06-02 05:19:30 +0000435 skb_dst_drop(skb);
Changli Gaod8d1f302010-06-10 23:31:35 -0700436 skb_dst_set(skb, &rt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700437
438 /* Another hack: avoid icmp_send in ip_fragment */
439 skb->local_df = 1;
440
Julian Anastasovb8abdf02013-03-21 11:58:01 +0200441 ip_vs_send_or_cont(NFPROTO_IPV4, skb, cp, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700442
443 LeaveFunction(10);
444 return NF_STOLEN;
445
446 tx_error_icmp:
447 dst_link_failure(skb);
448 tx_error:
449 kfree_skb(skb);
450 LeaveFunction(10);
451 return NF_STOLEN;
452}
453
Julius Volzb3cdd2a72008-09-02 15:55:45 +0200454#ifdef CONFIG_IP_VS_IPV6
455int
456ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
Jesper Dangaard Brouerd4383f02012-09-26 14:07:17 +0200457 struct ip_vs_protocol *pp, struct ip_vs_iphdr *iph)
Julius Volzb3cdd2a72008-09-02 15:55:45 +0200458{
459 struct rt6_info *rt; /* Route to the other host */
Julius Volzb3cdd2a72008-09-02 15:55:45 +0200460 int mtu;
Julius Volzb3cdd2a72008-09-02 15:55:45 +0200461
462 EnterFunction(10);
463
Jesper Dangaard Brouerd4383f02012-09-26 14:07:17 +0200464 rt = __ip_vs_get_out_rt_v6(skb, NULL, &iph->daddr.in6, NULL, 0,
Jesper Dangaard Brouer2f747132012-09-26 14:06:59 +0200465 IP_VS_RT_MODE_NON_LOCAL);
466 if (!rt)
Julius Volzb3cdd2a72008-09-02 15:55:45 +0200467 goto tx_error_icmp;
Julius Volzb3cdd2a72008-09-02 15:55:45 +0200468
469 /* MTU checking */
Changli Gaod8d1f302010-06-10 23:31:35 -0700470 mtu = dst_mtu(&rt->dst);
Jesper Dangaard Brouer590e3f72012-08-28 22:05:51 +0200471 if (__mtu_check_toobig_v6(skb, mtu)) {
Julian Anastasovcb591552010-10-17 16:40:51 +0300472 if (!skb->dev) {
473 struct net *net = dev_net(skb_dst(skb)->dev);
474
475 skb->dev = net->loopback_dev;
476 }
Jesper Dangaard Brouer2f747132012-09-26 14:06:59 +0200477 /* only send ICMP too big on first fragment */
Jesper Dangaard Brouerd4383f02012-09-26 14:07:17 +0200478 if (!iph->fragoffs)
Jesper Dangaard Brouer2f747132012-09-26 14:06:59 +0200479 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
Julian Anastasovcb591552010-10-17 16:40:51 +0300480 dst_release(&rt->dst);
Hannes Eder1e3e2382009-08-02 11:05:41 +0000481 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
Julius Volzb3cdd2a72008-09-02 15:55:45 +0200482 goto tx_error;
483 }
484
485 /*
486 * Call ip_send_check because we are not sure it is called
487 * after ip_defrag. Is copy-on-write needed?
488 */
489 skb = skb_share_check(skb, GFP_ATOMIC);
490 if (unlikely(skb == NULL)) {
Changli Gaod8d1f302010-06-10 23:31:35 -0700491 dst_release(&rt->dst);
Julius Volzb3cdd2a72008-09-02 15:55:45 +0200492 return NF_STOLEN;
493 }
494
495 /* drop old route */
Eric Dumazetadf30902009-06-02 05:19:30 +0000496 skb_dst_drop(skb);
Changli Gaod8d1f302010-06-10 23:31:35 -0700497 skb_dst_set(skb, &rt->dst);
Julius Volzb3cdd2a72008-09-02 15:55:45 +0200498
499 /* Another hack: avoid icmp_send in ip_fragment */
500 skb->local_df = 1;
501
Julian Anastasovb8abdf02013-03-21 11:58:01 +0200502 ip_vs_send_or_cont(NFPROTO_IPV6, skb, cp, 0);
Julius Volzb3cdd2a72008-09-02 15:55:45 +0200503
504 LeaveFunction(10);
505 return NF_STOLEN;
506
507 tx_error_icmp:
508 dst_link_failure(skb);
509 tx_error:
510 kfree_skb(skb);
511 LeaveFunction(10);
512 return NF_STOLEN;
513}
514#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -0700515
516/*
517 * NAT transmitter (only for outside-to-inside nat forwarding)
518 * Not used for related ICMP
519 */
520int
521ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
Jesper Dangaard Brouerd4383f02012-09-26 14:07:17 +0200522 struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700523{
524 struct rtable *rt; /* Route to the other host */
525 int mtu;
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700526 struct iphdr *iph = ip_hdr(skb);
Julian Anastasovb8abdf02013-03-21 11:58:01 +0200527 int local, rc;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700528
529 EnterFunction(10);
530
531 /* check if it is a connection of no-client-port */
532 if (unlikely(cp->flags & IP_VS_CONN_F_NO_CPORT)) {
Al Viro014d7302006-09-28 14:29:52 -0700533 __be16 _pt, *p;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700534 p = skb_header_pointer(skb, iph->ihl*4, sizeof(_pt), &_pt);
535 if (p == NULL)
536 goto tx_error;
537 ip_vs_conn_fill_cport(cp, *p);
538 IP_VS_DBG(10, "filled cport=%d\n", ntohs(*p));
539 }
540
Julian Anastasovfc604762010-10-17 16:38:15 +0300541 if (!(rt = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip,
Changli Gao17a8f8e2011-02-24 08:19:57 +0800542 IP_VS_RT_MODE_LOCAL |
Julian Anastasovc90558d2013-03-21 11:57:59 +0200543 IP_VS_RT_MODE_NON_LOCAL |
544 IP_VS_RT_MODE_RDR, NULL)))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700545 goto tx_error_icmp;
Julian Anastasovfc604762010-10-17 16:38:15 +0300546 local = rt->rt_flags & RTCF_LOCAL;
547 /*
548 * Avoid duplicate tuple in reply direction for NAT traffic
549 * to local address when connection is sync-ed
550 */
Igor Maravićc0cd1152011-12-12 02:58:24 +0000551#if IS_ENABLED(CONFIG_NF_CONNTRACK)
Julian Anastasovfc604762010-10-17 16:38:15 +0300552 if (cp->flags & IP_VS_CONN_F_SYNC && local) {
553 enum ip_conntrack_info ctinfo;
Alan Cox05b4b062012-10-26 00:13:27 +0000554 struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
Julian Anastasovfc604762010-10-17 16:38:15 +0300555
556 if (ct && !nf_ct_is_untracked(ct)) {
Julian Anastasov0d796412010-10-17 16:46:17 +0300557 IP_VS_DBG_RL_PKT(10, AF_INET, pp, skb, 0,
558 "ip_vs_nat_xmit(): "
Julian Anastasovfc604762010-10-17 16:38:15 +0300559 "stopping DNAT to local address");
560 goto tx_error_put;
561 }
562 }
563#endif
564
565 /* From world but DNAT to loopback address? */
Julian Anastasovc92f5ca2011-05-10 12:46:05 +0000566 if (local && ipv4_is_loopback(cp->daddr.ip) &&
David S. Millerc7537962010-11-11 17:07:48 -0800567 rt_is_input_route(skb_rtable(skb))) {
Julian Anastasov0d796412010-10-17 16:46:17 +0300568 IP_VS_DBG_RL_PKT(1, AF_INET, pp, skb, 0, "ip_vs_nat_xmit(): "
Julian Anastasovfc604762010-10-17 16:38:15 +0300569 "stopping DNAT to loopback address");
570 goto tx_error_put;
571 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700572
573 /* MTU checking */
Changli Gaod8d1f302010-06-10 23:31:35 -0700574 mtu = dst_mtu(&rt->dst);
Simon Horman8f1b03a2010-11-09 10:08:49 +0900575 if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF)) &&
576 !skb_is_gso(skb)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700577 icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
Julian Anastasov0d796412010-10-17 16:46:17 +0300578 IP_VS_DBG_RL_PKT(0, AF_INET, pp, skb, 0,
579 "ip_vs_nat_xmit(): frag needed for");
Julian Anastasovfc604762010-10-17 16:38:15 +0300580 goto tx_error_put;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700581 }
582
583 /* copy-on-write the packet before mangling it */
Herbert Xuaf1e1cf2007-10-14 00:39:33 -0700584 if (!skb_make_writable(skb, sizeof(struct iphdr)))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700585 goto tx_error_put;
586
Changli Gaod8d1f302010-06-10 23:31:35 -0700587 if (skb_cow(skb, rt->dst.dev->hard_header_len))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700588 goto tx_error_put;
589
Linus Torvalds1da177e2005-04-16 15:20:36 -0700590 /* mangle the packet */
Jesper Dangaard Brouerd4383f02012-09-26 14:07:17 +0200591 if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp, ipvsh))
Julian Anastasovfc604762010-10-17 16:38:15 +0300592 goto tx_error_put;
Julius Volze7ade462008-09-02 15:55:33 +0200593 ip_hdr(skb)->daddr = cp->daddr.ip;
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700594 ip_send_check(ip_hdr(skb));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700595
Julian Anastasovfc604762010-10-17 16:38:15 +0300596 if (!local) {
597 /* drop old route */
598 skb_dst_drop(skb);
599 skb_dst_set(skb, &rt->dst);
Julian Anastasov183dce52013-03-21 11:58:03 +0200600 } else
Julian Anastasovfc604762010-10-17 16:38:15 +0300601 ip_rt_put(rt);
Julian Anastasovfc604762010-10-17 16:38:15 +0300602
Julian Anastasov0d796412010-10-17 16:46:17 +0300603 IP_VS_DBG_PKT(10, AF_INET, pp, skb, 0, "After DNAT");
Linus Torvalds1da177e2005-04-16 15:20:36 -0700604
605 /* FIXME: when application helper enlarges the packet and the length
606 is larger than the MTU of outgoing device, there will be still
607 MTU problem. */
608
609 /* Another hack: avoid icmp_send in ip_fragment */
610 skb->local_df = 1;
611
Julian Anastasovb8abdf02013-03-21 11:58:01 +0200612 rc = ip_vs_nat_send_or_cont(NFPROTO_IPV4, skb, cp, local);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700613
614 LeaveFunction(10);
Julian Anastasovb8abdf02013-03-21 11:58:01 +0200615 return rc;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700616
617 tx_error_icmp:
618 dst_link_failure(skb);
619 tx_error:
Linus Torvalds1da177e2005-04-16 15:20:36 -0700620 kfree_skb(skb);
Julian Anastasovf4bc17c2010-09-21 17:35:41 +0200621 LeaveFunction(10);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700622 return NF_STOLEN;
623 tx_error_put:
624 ip_rt_put(rt);
625 goto tx_error;
626}
627
Julius Volzb3cdd2a72008-09-02 15:55:45 +0200628#ifdef CONFIG_IP_VS_IPV6
629int
630ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
Jesper Dangaard Brouerd4383f02012-09-26 14:07:17 +0200631 struct ip_vs_protocol *pp, struct ip_vs_iphdr *iph)
Julius Volzb3cdd2a72008-09-02 15:55:45 +0200632{
633 struct rt6_info *rt; /* Route to the other host */
634 int mtu;
Julian Anastasovb8abdf02013-03-21 11:58:01 +0200635 int local, rc;
Julius Volzb3cdd2a72008-09-02 15:55:45 +0200636
637 EnterFunction(10);
638
639 /* check if it is a connection of no-client-port */
Jesper Dangaard Brouerd4383f02012-09-26 14:07:17 +0200640 if (unlikely(cp->flags & IP_VS_CONN_F_NO_CPORT && !iph->fragoffs)) {
Julius Volzb3cdd2a72008-09-02 15:55:45 +0200641 __be16 _pt, *p;
Jesper Dangaard Brouerd4383f02012-09-26 14:07:17 +0200642 p = skb_header_pointer(skb, iph->len, sizeof(_pt), &_pt);
Julius Volzb3cdd2a72008-09-02 15:55:45 +0200643 if (p == NULL)
644 goto tx_error;
645 ip_vs_conn_fill_cport(cp, *p);
646 IP_VS_DBG(10, "filled cport=%d\n", ntohs(*p));
647 }
648
Julian Anastasovfc604762010-10-17 16:38:15 +0300649 if (!(rt = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6, NULL,
David S. Millere58b3442011-05-12 18:22:34 -0400650 0, (IP_VS_RT_MODE_LOCAL |
651 IP_VS_RT_MODE_NON_LOCAL |
652 IP_VS_RT_MODE_RDR))))
Julius Volzb3cdd2a72008-09-02 15:55:45 +0200653 goto tx_error_icmp;
Julian Anastasovfc604762010-10-17 16:38:15 +0300654 local = __ip_vs_is_local_route6(rt);
655 /*
656 * Avoid duplicate tuple in reply direction for NAT traffic
657 * to local address when connection is sync-ed
658 */
Igor Maravićc0cd1152011-12-12 02:58:24 +0000659#if IS_ENABLED(CONFIG_NF_CONNTRACK)
Julian Anastasovfc604762010-10-17 16:38:15 +0300660 if (cp->flags & IP_VS_CONN_F_SYNC && local) {
661 enum ip_conntrack_info ctinfo;
Alan Cox05b4b062012-10-26 00:13:27 +0000662 struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
Julian Anastasovfc604762010-10-17 16:38:15 +0300663
664 if (ct && !nf_ct_is_untracked(ct)) {
Julian Anastasov0d796412010-10-17 16:46:17 +0300665 IP_VS_DBG_RL_PKT(10, AF_INET6, pp, skb, 0,
Julian Anastasovfc604762010-10-17 16:38:15 +0300666 "ip_vs_nat_xmit_v6(): "
667 "stopping DNAT to local address");
668 goto tx_error_put;
669 }
670 }
671#endif
672
673 /* From world but DNAT to loopback address? */
674 if (local && skb->dev && !(skb->dev->flags & IFF_LOOPBACK) &&
675 ipv6_addr_type(&rt->rt6i_dst.addr) & IPV6_ADDR_LOOPBACK) {
Julian Anastasov0d796412010-10-17 16:46:17 +0300676 IP_VS_DBG_RL_PKT(1, AF_INET6, pp, skb, 0,
Julian Anastasovfc604762010-10-17 16:38:15 +0300677 "ip_vs_nat_xmit_v6(): "
678 "stopping DNAT to loopback address");
679 goto tx_error_put;
680 }
Julius Volzb3cdd2a72008-09-02 15:55:45 +0200681
682 /* MTU checking */
Changli Gaod8d1f302010-06-10 23:31:35 -0700683 mtu = dst_mtu(&rt->dst);
Jesper Dangaard Brouer590e3f72012-08-28 22:05:51 +0200684 if (__mtu_check_toobig_v6(skb, mtu)) {
Julian Anastasovcb591552010-10-17 16:40:51 +0300685 if (!skb->dev) {
686 struct net *net = dev_net(skb_dst(skb)->dev);
687
688 skb->dev = net->loopback_dev;
689 }
Jesper Dangaard Brouer2f747132012-09-26 14:06:59 +0200690 /* only send ICMP too big on first fragment */
Jesper Dangaard Brouerd4383f02012-09-26 14:07:17 +0200691 if (!iph->fragoffs)
Jesper Dangaard Brouer2f747132012-09-26 14:06:59 +0200692 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
Julian Anastasov0d796412010-10-17 16:46:17 +0300693 IP_VS_DBG_RL_PKT(0, AF_INET6, pp, skb, 0,
Julius Volzb3cdd2a72008-09-02 15:55:45 +0200694 "ip_vs_nat_xmit_v6(): frag needed for");
Julian Anastasovfc604762010-10-17 16:38:15 +0300695 goto tx_error_put;
Julius Volzb3cdd2a72008-09-02 15:55:45 +0200696 }
697
698 /* copy-on-write the packet before mangling it */
699 if (!skb_make_writable(skb, sizeof(struct ipv6hdr)))
700 goto tx_error_put;
701
Changli Gaod8d1f302010-06-10 23:31:35 -0700702 if (skb_cow(skb, rt->dst.dev->hard_header_len))
Julius Volzb3cdd2a72008-09-02 15:55:45 +0200703 goto tx_error_put;
704
Julius Volzb3cdd2a72008-09-02 15:55:45 +0200705 /* mangle the packet */
Jesper Dangaard Brouerd4383f02012-09-26 14:07:17 +0200706 if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp, iph))
Julius Volzb3cdd2a72008-09-02 15:55:45 +0200707 goto tx_error;
Alexey Dobriyan4e3fd7a2011-11-21 03:39:03 +0000708 ipv6_hdr(skb)->daddr = cp->daddr.in6;
Julian Anastasovfc604762010-10-17 16:38:15 +0300709
710 if (!local || !skb->dev) {
711 /* drop the old route when skb is not shared */
712 skb_dst_drop(skb);
713 skb_dst_set(skb, &rt->dst);
714 } else {
715 /* destined to loopback, do we need to change route? */
716 dst_release(&rt->dst);
717 }
Julius Volzb3cdd2a72008-09-02 15:55:45 +0200718
Julian Anastasov0d796412010-10-17 16:46:17 +0300719 IP_VS_DBG_PKT(10, AF_INET6, pp, skb, 0, "After DNAT");
Julius Volzb3cdd2a72008-09-02 15:55:45 +0200720
721 /* FIXME: when application helper enlarges the packet and the length
722 is larger than the MTU of outgoing device, there will be still
723 MTU problem. */
724
725 /* Another hack: avoid icmp_send in ip_fragment */
726 skb->local_df = 1;
727
Julian Anastasovb8abdf02013-03-21 11:58:01 +0200728 rc = ip_vs_nat_send_or_cont(NFPROTO_IPV6, skb, cp, local);
Julius Volzb3cdd2a72008-09-02 15:55:45 +0200729
730 LeaveFunction(10);
Julian Anastasovb8abdf02013-03-21 11:58:01 +0200731 return rc;
Julius Volzb3cdd2a72008-09-02 15:55:45 +0200732
733tx_error_icmp:
734 dst_link_failure(skb);
735tx_error:
736 LeaveFunction(10);
737 kfree_skb(skb);
738 return NF_STOLEN;
739tx_error_put:
Changli Gaod8d1f302010-06-10 23:31:35 -0700740 dst_release(&rt->dst);
Julius Volzb3cdd2a72008-09-02 15:55:45 +0200741 goto tx_error;
742}
743#endif
744
Linus Torvalds1da177e2005-04-16 15:20:36 -0700745
746/*
747 * IP Tunneling transmitter
748 *
749 * This function encapsulates the packet in a new IP packet, its
750 * destination will be set to cp->daddr. Most code of this function
751 * is taken from ipip.c.
752 *
753 * It is used in VS/TUN cluster. The load balancer selects a real
754 * server from a cluster based on a scheduling algorithm,
755 * encapsulates the request packet and forwards it to the selected
756 * server. For example, all real servers are configured with
757 * "ifconfig tunl0 <Virtual IP Address> up". When the server receives
758 * the encapsulated packet, it will decapsulate the packet, processe
759 * the request and return the response packets directly to the client
760 * without passing the load balancer. This can greatly increase the
761 * scalability of virtual server.
762 *
763 * Used for ANY protocol
764 */
765int
766ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
Jesper Dangaard Brouerd4383f02012-09-26 14:07:17 +0200767 struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700768{
Julian Anastasov3654e612012-07-20 11:59:53 +0300769 struct netns_ipvs *ipvs = net_ipvs(skb_net(skb));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700770 struct rtable *rt; /* Route to the other host */
Julian Anastasovc92f5ca2011-05-10 12:46:05 +0000771 __be32 saddr; /* Source for tunnel */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700772 struct net_device *tdev; /* Device to other host */
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700773 struct iphdr *old_iph = ip_hdr(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700774 u8 tos = old_iph->tos;
Julian Anastasovf2edb9f2012-07-20 11:59:52 +0300775 __be16 df;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700776 struct iphdr *iph; /* Our new IP header */
Chuck Leverc2636b42007-10-23 21:07:32 -0700777 unsigned int max_headroom; /* The extra header space needed */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700778 int mtu;
Julian Anastasovf4bc17c2010-09-21 17:35:41 +0200779 int ret;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700780
781 EnterFunction(10);
782
Julian Anastasovfc604762010-10-17 16:38:15 +0300783 if (!(rt = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip,
Julian Anastasovc90558d2013-03-21 11:57:59 +0200784 IP_VS_RT_MODE_LOCAL |
785 IP_VS_RT_MODE_NON_LOCAL |
786 IP_VS_RT_MODE_CONNECT, &saddr)))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700787 goto tx_error_icmp;
Julian Anastasovfc604762010-10-17 16:38:15 +0300788 if (rt->rt_flags & RTCF_LOCAL) {
789 ip_rt_put(rt);
Julian Anastasovb8abdf02013-03-21 11:58:01 +0200790 return ip_vs_send_or_cont(NFPROTO_IPV4, skb, cp, 1);
Julian Anastasovfc604762010-10-17 16:38:15 +0300791 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700792
Changli Gaod8d1f302010-06-10 23:31:35 -0700793 tdev = rt->dst.dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700794
Changli Gaod8d1f302010-06-10 23:31:35 -0700795 mtu = dst_mtu(&rt->dst) - sizeof(struct iphdr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700796 if (mtu < 68) {
Hannes Eder1e3e2382009-08-02 11:05:41 +0000797 IP_VS_DBG_RL("%s(): mtu less than 68\n", __func__);
Julian Anastasovfc604762010-10-17 16:38:15 +0300798 goto tx_error_put;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700799 }
Julian Anastasovf2edb9f2012-07-20 11:59:52 +0300800 if (rt_is_output_route(skb_rtable(skb)))
David S. Miller6700c272012-07-17 03:29:28 -0700801 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700802
Julian Anastasovf2edb9f2012-07-20 11:59:52 +0300803 /* Copy DF, reset fragment offset and MF */
Julian Anastasov3654e612012-07-20 11:59:53 +0300804 df = sysctl_pmtu_disc(ipvs) ? old_iph->frag_off & htons(IP_DF) : 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700805
Julian Anastasov3654e612012-07-20 11:59:53 +0300806 if (df && mtu < ntohs(old_iph->tot_len) && !skb_is_gso(skb)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700807 icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
Hannes Eder1e3e2382009-08-02 11:05:41 +0000808 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
Julian Anastasovfc604762010-10-17 16:38:15 +0300809 goto tx_error_put;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700810 }
811
812 /*
813 * Okay, now see if we can stuff it in the buffer as-is.
814 */
815 max_headroom = LL_RESERVED_SPACE(tdev) + sizeof(struct iphdr);
816
817 if (skb_headroom(skb) < max_headroom
818 || skb_cloned(skb) || skb_shared(skb)) {
819 struct sk_buff *new_skb =
820 skb_realloc_headroom(skb, max_headroom);
821 if (!new_skb) {
822 ip_rt_put(rt);
823 kfree_skb(skb);
Hannes Eder1e3e2382009-08-02 11:05:41 +0000824 IP_VS_ERR_RL("%s(): no memory\n", __func__);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700825 return NF_STOLEN;
826 }
Eric Dumazet5d0ba552012-06-04 01:17:19 +0000827 consume_skb(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700828 skb = new_skb;
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700829 old_iph = ip_hdr(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700830 }
831
Hans Schillstrom714f0952010-10-19 10:38:48 +0200832 skb->transport_header = skb->network_header;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700833
834 /* fix old IP header checksum */
835 ip_send_check(old_iph);
836
Arnaldo Carvalho de Meloe2d1bca2007-04-10 20:46:21 -0700837 skb_push(skb, sizeof(struct iphdr));
838 skb_reset_network_header(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700839 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
840
841 /* drop old route */
Eric Dumazetadf30902009-06-02 05:19:30 +0000842 skb_dst_drop(skb);
Changli Gaod8d1f302010-06-10 23:31:35 -0700843 skb_dst_set(skb, &rt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700844
845 /*
846 * Push down and install the IPIP header.
847 */
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700848 iph = ip_hdr(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700849 iph->version = 4;
850 iph->ihl = sizeof(struct iphdr)>>2;
851 iph->frag_off = df;
852 iph->protocol = IPPROTO_IPIP;
853 iph->tos = tos;
Julian Anastasovc92f5ca2011-05-10 12:46:05 +0000854 iph->daddr = cp->daddr.ip;
855 iph->saddr = saddr;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700856 iph->ttl = old_iph->ttl;
Changli Gaod8d1f302010-06-10 23:31:35 -0700857 ip_select_ident(iph, &rt->dst, NULL);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700858
859 /* Another hack: avoid icmp_send in ip_fragment */
860 skb->local_df = 1;
861
Julian Anastasovb8abdf02013-03-21 11:58:01 +0200862 ret = ip_vs_tunnel_xmit_prepare(skb, cp);
Julian Anastasovf4bc17c2010-09-21 17:35:41 +0200863 if (ret == NF_ACCEPT)
864 ip_local_out(skb);
865 else if (ret == NF_DROP)
866 kfree_skb(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700867
868 LeaveFunction(10);
869
870 return NF_STOLEN;
871
872 tx_error_icmp:
873 dst_link_failure(skb);
874 tx_error:
875 kfree_skb(skb);
876 LeaveFunction(10);
877 return NF_STOLEN;
Julian Anastasovfc604762010-10-17 16:38:15 +0300878tx_error_put:
879 ip_rt_put(rt);
880 goto tx_error;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700881}
882
Julius Volzb3cdd2a72008-09-02 15:55:45 +0200883#ifdef CONFIG_IP_VS_IPV6
884int
885ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
Jesper Dangaard Brouerd4383f02012-09-26 14:07:17 +0200886 struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh)
Julius Volzb3cdd2a72008-09-02 15:55:45 +0200887{
888 struct rt6_info *rt; /* Route to the other host */
Hans Schillstrom714f0952010-10-19 10:38:48 +0200889 struct in6_addr saddr; /* Source for tunnel */
Julius Volzb3cdd2a72008-09-02 15:55:45 +0200890 struct net_device *tdev; /* Device to other host */
891 struct ipv6hdr *old_iph = ipv6_hdr(skb);
Julius Volzb3cdd2a72008-09-02 15:55:45 +0200892 struct ipv6hdr *iph; /* Our new IP header */
893 unsigned int max_headroom; /* The extra header space needed */
894 int mtu;
Julian Anastasovf4bc17c2010-09-21 17:35:41 +0200895 int ret;
Julius Volzb3cdd2a72008-09-02 15:55:45 +0200896
897 EnterFunction(10);
898
Julian Anastasovfc604762010-10-17 16:38:15 +0300899 if (!(rt = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6,
David S. Millere58b3442011-05-12 18:22:34 -0400900 &saddr, 1, (IP_VS_RT_MODE_LOCAL |
901 IP_VS_RT_MODE_NON_LOCAL))))
Julius Volzb3cdd2a72008-09-02 15:55:45 +0200902 goto tx_error_icmp;
Julian Anastasovfc604762010-10-17 16:38:15 +0300903 if (__ip_vs_is_local_route6(rt)) {
904 dst_release(&rt->dst);
Julian Anastasovb8abdf02013-03-21 11:58:01 +0200905 return ip_vs_send_or_cont(NFPROTO_IPV6, skb, cp, 1);
Julian Anastasovfc604762010-10-17 16:38:15 +0300906 }
Julius Volzb3cdd2a72008-09-02 15:55:45 +0200907
Changli Gaod8d1f302010-06-10 23:31:35 -0700908 tdev = rt->dst.dev;
Julius Volzb3cdd2a72008-09-02 15:55:45 +0200909
Changli Gaod8d1f302010-06-10 23:31:35 -0700910 mtu = dst_mtu(&rt->dst) - sizeof(struct ipv6hdr);
Hans Schillstrom714f0952010-10-19 10:38:48 +0200911 if (mtu < IPV6_MIN_MTU) {
Hans Schillstrom714f0952010-10-19 10:38:48 +0200912 IP_VS_DBG_RL("%s(): mtu less than %d\n", __func__,
913 IPV6_MIN_MTU);
Julian Anastasovfc604762010-10-17 16:38:15 +0300914 goto tx_error_put;
Julius Volzb3cdd2a72008-09-02 15:55:45 +0200915 }
Eric Dumazetadf30902009-06-02 05:19:30 +0000916 if (skb_dst(skb))
David S. Miller6700c272012-07-17 03:29:28 -0700917 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
Julius Volzb3cdd2a72008-09-02 15:55:45 +0200918
Jesper Dangaard Brouer590e3f72012-08-28 22:05:51 +0200919 /* MTU checking: Notice that 'mtu' have been adjusted before hand */
920 if (__mtu_check_toobig_v6(skb, mtu)) {
Julian Anastasovcb591552010-10-17 16:40:51 +0300921 if (!skb->dev) {
922 struct net *net = dev_net(skb_dst(skb)->dev);
923
924 skb->dev = net->loopback_dev;
925 }
Jesper Dangaard Brouer2f747132012-09-26 14:06:59 +0200926 /* only send ICMP too big on first fragment */
Jesper Dangaard Brouerd4383f02012-09-26 14:07:17 +0200927 if (!ipvsh->fragoffs)
Jesper Dangaard Brouer2f747132012-09-26 14:06:59 +0200928 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
Hannes Eder1e3e2382009-08-02 11:05:41 +0000929 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
Julian Anastasovfc604762010-10-17 16:38:15 +0300930 goto tx_error_put;
Julius Volzb3cdd2a72008-09-02 15:55:45 +0200931 }
932
933 /*
934 * Okay, now see if we can stuff it in the buffer as-is.
935 */
936 max_headroom = LL_RESERVED_SPACE(tdev) + sizeof(struct ipv6hdr);
937
938 if (skb_headroom(skb) < max_headroom
939 || skb_cloned(skb) || skb_shared(skb)) {
940 struct sk_buff *new_skb =
941 skb_realloc_headroom(skb, max_headroom);
942 if (!new_skb) {
Changli Gaod8d1f302010-06-10 23:31:35 -0700943 dst_release(&rt->dst);
Julius Volzb3cdd2a72008-09-02 15:55:45 +0200944 kfree_skb(skb);
Hannes Eder1e3e2382009-08-02 11:05:41 +0000945 IP_VS_ERR_RL("%s(): no memory\n", __func__);
Julius Volzb3cdd2a72008-09-02 15:55:45 +0200946 return NF_STOLEN;
947 }
Eric Dumazet5d0ba552012-06-04 01:17:19 +0000948 consume_skb(skb);
Julius Volzb3cdd2a72008-09-02 15:55:45 +0200949 skb = new_skb;
950 old_iph = ipv6_hdr(skb);
951 }
952
Hans Schillstrom714f0952010-10-19 10:38:48 +0200953 skb->transport_header = skb->network_header;
Julius Volzb3cdd2a72008-09-02 15:55:45 +0200954
955 skb_push(skb, sizeof(struct ipv6hdr));
956 skb_reset_network_header(skb);
957 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
958
959 /* drop old route */
Eric Dumazetadf30902009-06-02 05:19:30 +0000960 skb_dst_drop(skb);
Changli Gaod8d1f302010-06-10 23:31:35 -0700961 skb_dst_set(skb, &rt->dst);
Julius Volzb3cdd2a72008-09-02 15:55:45 +0200962
963 /*
964 * Push down and install the IPIP header.
965 */
966 iph = ipv6_hdr(skb);
967 iph->version = 6;
968 iph->nexthdr = IPPROTO_IPV6;
Harvey Harrisonb7b45f42008-11-10 16:46:06 -0800969 iph->payload_len = old_iph->payload_len;
970 be16_add_cpu(&iph->payload_len, sizeof(*old_iph));
Julius Volzb3cdd2a72008-09-02 15:55:45 +0200971 iph->priority = old_iph->priority;
972 memset(&iph->flow_lbl, 0, sizeof(iph->flow_lbl));
Alexey Dobriyan4e3fd7a2011-11-21 03:39:03 +0000973 iph->daddr = cp->daddr.in6;
974 iph->saddr = saddr;
Julius Volzb3cdd2a72008-09-02 15:55:45 +0200975 iph->hop_limit = old_iph->hop_limit;
976
977 /* Another hack: avoid icmp_send in ip_fragment */
978 skb->local_df = 1;
979
Julian Anastasovb8abdf02013-03-21 11:58:01 +0200980 ret = ip_vs_tunnel_xmit_prepare(skb, cp);
Julian Anastasovf4bc17c2010-09-21 17:35:41 +0200981 if (ret == NF_ACCEPT)
982 ip6_local_out(skb);
983 else if (ret == NF_DROP)
984 kfree_skb(skb);
Julius Volzb3cdd2a72008-09-02 15:55:45 +0200985
986 LeaveFunction(10);
987
988 return NF_STOLEN;
989
990tx_error_icmp:
991 dst_link_failure(skb);
992tx_error:
993 kfree_skb(skb);
994 LeaveFunction(10);
995 return NF_STOLEN;
Julian Anastasovfc604762010-10-17 16:38:15 +0300996tx_error_put:
997 dst_release(&rt->dst);
998 goto tx_error;
Julius Volzb3cdd2a72008-09-02 15:55:45 +0200999}
1000#endif
1001
Linus Torvalds1da177e2005-04-16 15:20:36 -07001002
1003/*
1004 * Direct Routing transmitter
1005 * Used for ANY protocol
1006 */
1007int
1008ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
Jesper Dangaard Brouerd4383f02012-09-26 14:07:17 +02001009 struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001010{
1011 struct rtable *rt; /* Route to the other host */
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -07001012 struct iphdr *iph = ip_hdr(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001013 int mtu;
1014
1015 EnterFunction(10);
1016
Julian Anastasovfc604762010-10-17 16:38:15 +03001017 if (!(rt = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip,
Changli Gao17a8f8e2011-02-24 08:19:57 +08001018 IP_VS_RT_MODE_LOCAL |
Julian Anastasovad4d3ef2012-10-08 11:41:20 +00001019 IP_VS_RT_MODE_NON_LOCAL |
1020 IP_VS_RT_MODE_KNOWN_NH, NULL)))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001021 goto tx_error_icmp;
Julian Anastasovfc604762010-10-17 16:38:15 +03001022 if (rt->rt_flags & RTCF_LOCAL) {
1023 ip_rt_put(rt);
Julian Anastasovb8abdf02013-03-21 11:58:01 +02001024 return ip_vs_send_or_cont(NFPROTO_IPV4, skb, cp, 1);
Julian Anastasovfc604762010-10-17 16:38:15 +03001025 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001026
1027 /* MTU checking */
Changli Gaod8d1f302010-06-10 23:31:35 -07001028 mtu = dst_mtu(&rt->dst);
Simon Horman8f1b03a2010-11-09 10:08:49 +09001029 if ((iph->frag_off & htons(IP_DF)) && skb->len > mtu &&
1030 !skb_is_gso(skb)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001031 icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
1032 ip_rt_put(rt);
Hannes Eder1e3e2382009-08-02 11:05:41 +00001033 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001034 goto tx_error;
1035 }
1036
1037 /*
1038 * Call ip_send_check because we are not sure it is called
1039 * after ip_defrag. Is copy-on-write needed?
1040 */
1041 if (unlikely((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL)) {
1042 ip_rt_put(rt);
1043 return NF_STOLEN;
1044 }
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -07001045 ip_send_check(ip_hdr(skb));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001046
1047 /* drop old route */
Eric Dumazetadf30902009-06-02 05:19:30 +00001048 skb_dst_drop(skb);
Changli Gaod8d1f302010-06-10 23:31:35 -07001049 skb_dst_set(skb, &rt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001050
1051 /* Another hack: avoid icmp_send in ip_fragment */
1052 skb->local_df = 1;
1053
Julian Anastasovb8abdf02013-03-21 11:58:01 +02001054 ip_vs_send_or_cont(NFPROTO_IPV4, skb, cp, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001055
1056 LeaveFunction(10);
1057 return NF_STOLEN;
1058
1059 tx_error_icmp:
1060 dst_link_failure(skb);
1061 tx_error:
1062 kfree_skb(skb);
1063 LeaveFunction(10);
1064 return NF_STOLEN;
1065}
1066
Julius Volzb3cdd2a72008-09-02 15:55:45 +02001067#ifdef CONFIG_IP_VS_IPV6
1068int
1069ip_vs_dr_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
Jesper Dangaard Brouerd4383f02012-09-26 14:07:17 +02001070 struct ip_vs_protocol *pp, struct ip_vs_iphdr *iph)
Julius Volzb3cdd2a72008-09-02 15:55:45 +02001071{
1072 struct rt6_info *rt; /* Route to the other host */
1073 int mtu;
1074
1075 EnterFunction(10);
1076
Julian Anastasovfc604762010-10-17 16:38:15 +03001077 if (!(rt = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6, NULL,
David S. Millere58b3442011-05-12 18:22:34 -04001078 0, (IP_VS_RT_MODE_LOCAL |
1079 IP_VS_RT_MODE_NON_LOCAL))))
Julius Volzb3cdd2a72008-09-02 15:55:45 +02001080 goto tx_error_icmp;
Julian Anastasovfc604762010-10-17 16:38:15 +03001081 if (__ip_vs_is_local_route6(rt)) {
1082 dst_release(&rt->dst);
Julian Anastasovb8abdf02013-03-21 11:58:01 +02001083 return ip_vs_send_or_cont(NFPROTO_IPV6, skb, cp, 1);
Julian Anastasovfc604762010-10-17 16:38:15 +03001084 }
Julius Volzb3cdd2a72008-09-02 15:55:45 +02001085
1086 /* MTU checking */
Changli Gaod8d1f302010-06-10 23:31:35 -07001087 mtu = dst_mtu(&rt->dst);
Jesper Dangaard Brouer590e3f72012-08-28 22:05:51 +02001088 if (__mtu_check_toobig_v6(skb, mtu)) {
Julian Anastasovcb591552010-10-17 16:40:51 +03001089 if (!skb->dev) {
1090 struct net *net = dev_net(skb_dst(skb)->dev);
1091
1092 skb->dev = net->loopback_dev;
1093 }
Jesper Dangaard Brouer2f747132012-09-26 14:06:59 +02001094 /* only send ICMP too big on first fragment */
Jesper Dangaard Brouerd4383f02012-09-26 14:07:17 +02001095 if (!iph->fragoffs)
Jesper Dangaard Brouer2f747132012-09-26 14:06:59 +02001096 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
Changli Gaod8d1f302010-06-10 23:31:35 -07001097 dst_release(&rt->dst);
Hannes Eder1e3e2382009-08-02 11:05:41 +00001098 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
Julius Volzb3cdd2a72008-09-02 15:55:45 +02001099 goto tx_error;
1100 }
1101
1102 /*
1103 * Call ip_send_check because we are not sure it is called
1104 * after ip_defrag. Is copy-on-write needed?
1105 */
1106 skb = skb_share_check(skb, GFP_ATOMIC);
1107 if (unlikely(skb == NULL)) {
Changli Gaod8d1f302010-06-10 23:31:35 -07001108 dst_release(&rt->dst);
Julius Volzb3cdd2a72008-09-02 15:55:45 +02001109 return NF_STOLEN;
1110 }
1111
1112 /* drop old route */
Eric Dumazetadf30902009-06-02 05:19:30 +00001113 skb_dst_drop(skb);
Changli Gaod8d1f302010-06-10 23:31:35 -07001114 skb_dst_set(skb, &rt->dst);
Julius Volzb3cdd2a72008-09-02 15:55:45 +02001115
1116 /* Another hack: avoid icmp_send in ip_fragment */
1117 skb->local_df = 1;
1118
Julian Anastasovb8abdf02013-03-21 11:58:01 +02001119 ip_vs_send_or_cont(NFPROTO_IPV6, skb, cp, 0);
Julius Volzb3cdd2a72008-09-02 15:55:45 +02001120
1121 LeaveFunction(10);
1122 return NF_STOLEN;
1123
1124tx_error_icmp:
1125 dst_link_failure(skb);
1126tx_error:
1127 kfree_skb(skb);
1128 LeaveFunction(10);
1129 return NF_STOLEN;
1130}
1131#endif
1132
Linus Torvalds1da177e2005-04-16 15:20:36 -07001133
1134/*
1135 * ICMP packet transmitter
1136 * called by the ip_vs_in_icmp
1137 */
1138int
1139ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
Jesper Dangaard Brouerd4383f02012-09-26 14:07:17 +02001140 struct ip_vs_protocol *pp, int offset, unsigned int hooknum,
1141 struct ip_vs_iphdr *iph)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001142{
1143 struct rtable *rt; /* Route to the other host */
1144 int mtu;
1145 int rc;
Julian Anastasovfc604762010-10-17 16:38:15 +03001146 int local;
Julian Anastasovc92f5ca2011-05-10 12:46:05 +00001147 int rt_mode;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001148
1149 EnterFunction(10);
1150
1151 /* The ICMP packet for VS/TUN, VS/DR and LOCALNODE will be
1152 forwarded directly here, because there is no need to
1153 translate address/port back */
1154 if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) {
1155 if (cp->packet_xmit)
Jesper Dangaard Brouerd4383f02012-09-26 14:07:17 +02001156 rc = cp->packet_xmit(skb, cp, pp, iph);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001157 else
1158 rc = NF_ACCEPT;
1159 /* do not touch skb anymore */
1160 atomic_inc(&cp->in_pkts);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001161 goto out;
1162 }
1163
1164 /*
1165 * mangle and send the packet here (only for VS/NAT)
1166 */
1167
Julian Anastasovc92f5ca2011-05-10 12:46:05 +00001168 /* LOCALNODE from FORWARD hook is not supported */
1169 rt_mode = (hooknum != NF_INET_FORWARD) ?
1170 IP_VS_RT_MODE_LOCAL | IP_VS_RT_MODE_NON_LOCAL |
1171 IP_VS_RT_MODE_RDR : IP_VS_RT_MODE_NON_LOCAL;
Julian Anastasovfc604762010-10-17 16:38:15 +03001172 if (!(rt = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip,
Julian Anastasovc92f5ca2011-05-10 12:46:05 +00001173 rt_mode, NULL)))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001174 goto tx_error_icmp;
Julian Anastasovfc604762010-10-17 16:38:15 +03001175 local = rt->rt_flags & RTCF_LOCAL;
1176
1177 /*
1178 * Avoid duplicate tuple in reply direction for NAT traffic
1179 * to local address when connection is sync-ed
1180 */
Igor Maravićc0cd1152011-12-12 02:58:24 +00001181#if IS_ENABLED(CONFIG_NF_CONNTRACK)
Julian Anastasovfc604762010-10-17 16:38:15 +03001182 if (cp->flags & IP_VS_CONN_F_SYNC && local) {
1183 enum ip_conntrack_info ctinfo;
Alan Cox05b4b062012-10-26 00:13:27 +00001184 struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
Julian Anastasovfc604762010-10-17 16:38:15 +03001185
1186 if (ct && !nf_ct_is_untracked(ct)) {
1187 IP_VS_DBG(10, "%s(): "
1188 "stopping DNAT to local address %pI4\n",
1189 __func__, &cp->daddr.ip);
1190 goto tx_error_put;
1191 }
1192 }
1193#endif
1194
1195 /* From world but DNAT to loopback address? */
Julian Anastasovc92f5ca2011-05-10 12:46:05 +00001196 if (local && ipv4_is_loopback(cp->daddr.ip) &&
David S. Millerc7537962010-11-11 17:07:48 -08001197 rt_is_input_route(skb_rtable(skb))) {
Julian Anastasovfc604762010-10-17 16:38:15 +03001198 IP_VS_DBG(1, "%s(): "
1199 "stopping DNAT to loopback %pI4\n",
1200 __func__, &cp->daddr.ip);
1201 goto tx_error_put;
1202 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001203
1204 /* MTU checking */
Changli Gaod8d1f302010-06-10 23:31:35 -07001205 mtu = dst_mtu(&rt->dst);
Simon Horman8f1b03a2010-11-09 10:08:49 +09001206 if ((skb->len > mtu) && (ip_hdr(skb)->frag_off & htons(IP_DF)) &&
1207 !skb_is_gso(skb)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001208 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
Hannes Eder1e3e2382009-08-02 11:05:41 +00001209 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
Julian Anastasovfc604762010-10-17 16:38:15 +03001210 goto tx_error_put;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001211 }
1212
1213 /* copy-on-write the packet before mangling it */
Herbert Xuaf1e1cf2007-10-14 00:39:33 -07001214 if (!skb_make_writable(skb, offset))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001215 goto tx_error_put;
1216
Changli Gaod8d1f302010-06-10 23:31:35 -07001217 if (skb_cow(skb, rt->dst.dev->hard_header_len))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001218 goto tx_error_put;
1219
Linus Torvalds1da177e2005-04-16 15:20:36 -07001220 ip_vs_nat_icmp(skb, pp, cp, 0);
1221
Julian Anastasovfc604762010-10-17 16:38:15 +03001222 if (!local) {
1223 /* drop the old route when skb is not shared */
1224 skb_dst_drop(skb);
1225 skb_dst_set(skb, &rt->dst);
Julian Anastasov183dce52013-03-21 11:58:03 +02001226 } else
Julian Anastasovfc604762010-10-17 16:38:15 +03001227 ip_rt_put(rt);
Julian Anastasovfc604762010-10-17 16:38:15 +03001228
Linus Torvalds1da177e2005-04-16 15:20:36 -07001229 /* Another hack: avoid icmp_send in ip_fragment */
1230 skb->local_df = 1;
1231
Julian Anastasovb8abdf02013-03-21 11:58:01 +02001232 rc = ip_vs_nat_send_or_cont(NFPROTO_IPV4, skb, cp, local);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001233 goto out;
1234
1235 tx_error_icmp:
1236 dst_link_failure(skb);
1237 tx_error:
1238 dev_kfree_skb(skb);
1239 rc = NF_STOLEN;
1240 out:
1241 LeaveFunction(10);
1242 return rc;
1243 tx_error_put:
1244 ip_rt_put(rt);
1245 goto tx_error;
1246}
Julius Volzb3cdd2a72008-09-02 15:55:45 +02001247
1248#ifdef CONFIG_IP_VS_IPV6
1249int
1250ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
Jesper Dangaard Brouerd4383f02012-09-26 14:07:17 +02001251 struct ip_vs_protocol *pp, int offset, unsigned int hooknum,
1252 struct ip_vs_iphdr *iph)
Julius Volzb3cdd2a72008-09-02 15:55:45 +02001253{
1254 struct rt6_info *rt; /* Route to the other host */
1255 int mtu;
1256 int rc;
Julian Anastasovfc604762010-10-17 16:38:15 +03001257 int local;
Julian Anastasovc92f5ca2011-05-10 12:46:05 +00001258 int rt_mode;
Julius Volzb3cdd2a72008-09-02 15:55:45 +02001259
1260 EnterFunction(10);
1261
1262 /* The ICMP packet for VS/TUN, VS/DR and LOCALNODE will be
1263 forwarded directly here, because there is no need to
1264 translate address/port back */
1265 if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) {
1266 if (cp->packet_xmit)
Jesper Dangaard Brouerd4383f02012-09-26 14:07:17 +02001267 rc = cp->packet_xmit(skb, cp, pp, iph);
Julius Volzb3cdd2a72008-09-02 15:55:45 +02001268 else
1269 rc = NF_ACCEPT;
1270 /* do not touch skb anymore */
1271 atomic_inc(&cp->in_pkts);
1272 goto out;
1273 }
1274
1275 /*
1276 * mangle and send the packet here (only for VS/NAT)
1277 */
1278
Julian Anastasovc92f5ca2011-05-10 12:46:05 +00001279 /* LOCALNODE from FORWARD hook is not supported */
1280 rt_mode = (hooknum != NF_INET_FORWARD) ?
1281 IP_VS_RT_MODE_LOCAL | IP_VS_RT_MODE_NON_LOCAL |
1282 IP_VS_RT_MODE_RDR : IP_VS_RT_MODE_NON_LOCAL;
Julian Anastasovfc604762010-10-17 16:38:15 +03001283 if (!(rt = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6, NULL,
Julian Anastasovc92f5ca2011-05-10 12:46:05 +00001284 0, rt_mode)))
Julius Volzb3cdd2a72008-09-02 15:55:45 +02001285 goto tx_error_icmp;
1286
Julian Anastasovfc604762010-10-17 16:38:15 +03001287 local = __ip_vs_is_local_route6(rt);
1288 /*
1289 * Avoid duplicate tuple in reply direction for NAT traffic
1290 * to local address when connection is sync-ed
1291 */
Igor Maravićc0cd1152011-12-12 02:58:24 +00001292#if IS_ENABLED(CONFIG_NF_CONNTRACK)
Julian Anastasovfc604762010-10-17 16:38:15 +03001293 if (cp->flags & IP_VS_CONN_F_SYNC && local) {
1294 enum ip_conntrack_info ctinfo;
Alan Cox05b4b062012-10-26 00:13:27 +00001295 struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
Julian Anastasovfc604762010-10-17 16:38:15 +03001296
1297 if (ct && !nf_ct_is_untracked(ct)) {
1298 IP_VS_DBG(10, "%s(): "
1299 "stopping DNAT to local address %pI6\n",
1300 __func__, &cp->daddr.in6);
1301 goto tx_error_put;
1302 }
1303 }
1304#endif
1305
1306 /* From world but DNAT to loopback address? */
1307 if (local && skb->dev && !(skb->dev->flags & IFF_LOOPBACK) &&
1308 ipv6_addr_type(&rt->rt6i_dst.addr) & IPV6_ADDR_LOOPBACK) {
1309 IP_VS_DBG(1, "%s(): "
1310 "stopping DNAT to loopback %pI6\n",
1311 __func__, &cp->daddr.in6);
1312 goto tx_error_put;
1313 }
1314
Julius Volzb3cdd2a72008-09-02 15:55:45 +02001315 /* MTU checking */
Changli Gaod8d1f302010-06-10 23:31:35 -07001316 mtu = dst_mtu(&rt->dst);
Jesper Dangaard Brouer590e3f72012-08-28 22:05:51 +02001317 if (__mtu_check_toobig_v6(skb, mtu)) {
Julian Anastasovcb591552010-10-17 16:40:51 +03001318 if (!skb->dev) {
1319 struct net *net = dev_net(skb_dst(skb)->dev);
1320
1321 skb->dev = net->loopback_dev;
1322 }
Jesper Dangaard Brouer2f747132012-09-26 14:06:59 +02001323 /* only send ICMP too big on first fragment */
Jesper Dangaard Brouerd4383f02012-09-26 14:07:17 +02001324 if (!iph->fragoffs)
Jesper Dangaard Brouer2f747132012-09-26 14:06:59 +02001325 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
Hannes Eder1e3e2382009-08-02 11:05:41 +00001326 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
Julian Anastasovfc604762010-10-17 16:38:15 +03001327 goto tx_error_put;
Julius Volzb3cdd2a72008-09-02 15:55:45 +02001328 }
1329
1330 /* copy-on-write the packet before mangling it */
1331 if (!skb_make_writable(skb, offset))
1332 goto tx_error_put;
1333
Changli Gaod8d1f302010-06-10 23:31:35 -07001334 if (skb_cow(skb, rt->dst.dev->hard_header_len))
Julius Volzb3cdd2a72008-09-02 15:55:45 +02001335 goto tx_error_put;
1336
Julius Volzb3cdd2a72008-09-02 15:55:45 +02001337 ip_vs_nat_icmp_v6(skb, pp, cp, 0);
1338
Julian Anastasovfc604762010-10-17 16:38:15 +03001339 if (!local || !skb->dev) {
1340 /* drop the old route when skb is not shared */
1341 skb_dst_drop(skb);
1342 skb_dst_set(skb, &rt->dst);
1343 } else {
1344 /* destined to loopback, do we need to change route? */
1345 dst_release(&rt->dst);
1346 }
1347
Julius Volzb3cdd2a72008-09-02 15:55:45 +02001348 /* Another hack: avoid icmp_send in ip_fragment */
1349 skb->local_df = 1;
1350
Julian Anastasovb8abdf02013-03-21 11:58:01 +02001351 rc = ip_vs_nat_send_or_cont(NFPROTO_IPV6, skb, cp, local);
Julius Volzb3cdd2a72008-09-02 15:55:45 +02001352 goto out;
1353
1354tx_error_icmp:
1355 dst_link_failure(skb);
1356tx_error:
1357 dev_kfree_skb(skb);
1358 rc = NF_STOLEN;
1359out:
1360 LeaveFunction(10);
1361 return rc;
1362tx_error_put:
Changli Gaod8d1f302010-06-10 23:31:35 -07001363 dst_release(&rt->dst);
Julius Volzb3cdd2a72008-09-02 15:55:45 +02001364 goto tx_error;
1365}
1366#endif