blob: 1060bd52a16540a81bd09d0ed49546bf378dc4da [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * ip_vs_xmit.c: various packet transmitters for IPVS
3 *
Linus Torvalds1da177e2005-04-16 15:20:36 -07004 * Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
5 * Julian Anastasov <ja@ssi.bg>
6 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version
10 * 2 of the License, or (at your option) any later version.
11 *
12 * Changes:
13 *
Julian Anastasovcb591552010-10-17 16:40:51 +030014 * Description of forwarding methods:
15 * - all transmitters are called from LOCAL_IN (remote clients) and
16 * LOCAL_OUT (local clients) but for ICMP can be called from FORWARD
17 * - not all connections have destination server, for example,
18 * connections in backup server when fwmark is used
19 * - bypass connections use daddr from packet
20 * LOCAL_OUT rules:
21 * - skb->dev is NULL, skb->protocol is not set (both are set in POST_ROUTING)
22 * - skb->pkt_type is not set yet
23 * - the only place where we can see skb->sk != NULL
Linus Torvalds1da177e2005-04-16 15:20:36 -070024 */
25
Hannes Eder9aada7a2009-07-30 14:29:44 -070026#define KMSG_COMPONENT "IPVS"
27#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
28
Linus Torvalds1da177e2005-04-16 15:20:36 -070029#include <linux/kernel.h>
Tejun Heo5a0e3ad2010-03-24 17:04:11 +090030#include <linux/slab.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070031#include <linux/tcp.h> /* for tcphdr */
Herbert Xuc439cb22008-01-11 19:14:00 -080032#include <net/ip.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070033#include <net/tcp.h> /* for csum_tcpudp_magic */
34#include <net/udp.h>
35#include <net/icmp.h> /* for icmp_send */
36#include <net/route.h> /* for ip_route_output */
Julius Volz38cdcc92008-09-02 15:55:44 +020037#include <net/ipv6.h>
38#include <net/ip6_route.h>
Hans Schillstrom714f0952010-10-19 10:38:48 +020039#include <net/addrconf.h>
Julius Volz38cdcc92008-09-02 15:55:44 +020040#include <linux/icmpv6.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070041#include <linux/netfilter.h>
42#include <linux/netfilter_ipv4.h>
43
44#include <net/ip_vs.h>
45
Changli Gao17a8f8e2011-02-24 08:19:57 +080046enum {
47 IP_VS_RT_MODE_LOCAL = 1, /* Allow local dest */
48 IP_VS_RT_MODE_NON_LOCAL = 2, /* Allow non-local dest */
49 IP_VS_RT_MODE_RDR = 4, /* Allow redirect from remote daddr to
50 * local
51 */
Julian Anastasovf2edb9f2012-07-20 11:59:52 +030052 IP_VS_RT_MODE_CONNECT = 8, /* Always bind route to saddr */
Changli Gao17a8f8e2011-02-24 08:19:57 +080053};
Linus Torvalds1da177e2005-04-16 15:20:36 -070054
55/*
56 * Destination cache to speed up outgoing route lookup
57 */
58static inline void
Hans Schillstrom714f0952010-10-19 10:38:48 +020059__ip_vs_dst_set(struct ip_vs_dest *dest, u32 rtos, struct dst_entry *dst,
60 u32 dst_cookie)
Linus Torvalds1da177e2005-04-16 15:20:36 -070061{
62 struct dst_entry *old_dst;
63
64 old_dst = dest->dst_cache;
65 dest->dst_cache = dst;
66 dest->dst_rtos = rtos;
Hans Schillstrom714f0952010-10-19 10:38:48 +020067 dest->dst_cookie = dst_cookie;
Linus Torvalds1da177e2005-04-16 15:20:36 -070068 dst_release(old_dst);
69}
70
71static inline struct dst_entry *
Hans Schillstrom714f0952010-10-19 10:38:48 +020072__ip_vs_dst_check(struct ip_vs_dest *dest, u32 rtos)
Linus Torvalds1da177e2005-04-16 15:20:36 -070073{
74 struct dst_entry *dst = dest->dst_cache;
75
76 if (!dst)
77 return NULL;
Hans Schillstrom714f0952010-10-19 10:38:48 +020078 if ((dst->obsolete || rtos != dest->dst_rtos) &&
79 dst->ops->check(dst, dest->dst_cookie) == NULL) {
Linus Torvalds1da177e2005-04-16 15:20:36 -070080 dest->dst_cache = NULL;
81 dst_release(dst);
82 return NULL;
83 }
84 dst_hold(dst);
85 return dst;
86}
87
Jesper Dangaard Brouer590e3f72012-08-28 22:05:51 +020088static inline bool
89__mtu_check_toobig_v6(const struct sk_buff *skb, u32 mtu)
90{
Patrick McHardy4cdd34082012-08-26 19:13:58 +020091 if (IP6CB(skb)->frag_max_size) {
92 /* frag_max_size tell us that, this packet have been
93 * defragmented by netfilter IPv6 conntrack module.
94 */
95 if (IP6CB(skb)->frag_max_size > mtu)
96 return true; /* largest fragment violate MTU */
97 }
98 else if (skb->len > mtu && !skb_is_gso(skb)) {
Jesper Dangaard Brouer590e3f72012-08-28 22:05:51 +020099 return true; /* Packet size violate MTU size */
100 }
101 return false;
102}
103
Julian Anastasovf2edb9f2012-07-20 11:59:52 +0300104/* Get route to daddr, update *saddr, optionally bind route to saddr */
105static struct rtable *do_output_route4(struct net *net, __be32 daddr,
106 u32 rtos, int rt_mode, __be32 *saddr)
107{
108 struct flowi4 fl4;
109 struct rtable *rt;
110 int loop = 0;
111
112 memset(&fl4, 0, sizeof(fl4));
113 fl4.daddr = daddr;
114 fl4.saddr = (rt_mode & IP_VS_RT_MODE_CONNECT) ? *saddr : 0;
115 fl4.flowi4_tos = rtos;
116
117retry:
118 rt = ip_route_output_key(net, &fl4);
119 if (IS_ERR(rt)) {
120 /* Invalid saddr ? */
121 if (PTR_ERR(rt) == -EINVAL && *saddr &&
122 rt_mode & IP_VS_RT_MODE_CONNECT && !loop) {
123 *saddr = 0;
124 flowi4_update_output(&fl4, 0, rtos, daddr, 0);
125 goto retry;
126 }
127 IP_VS_DBG_RL("ip_route_output error, dest: %pI4\n", &daddr);
128 return NULL;
129 } else if (!*saddr && rt_mode & IP_VS_RT_MODE_CONNECT && fl4.saddr) {
130 ip_rt_put(rt);
131 *saddr = fl4.saddr;
132 flowi4_update_output(&fl4, 0, rtos, daddr, fl4.saddr);
133 loop++;
134 goto retry;
135 }
136 *saddr = fl4.saddr;
137 return rt;
138}
139
Changli Gao17a8f8e2011-02-24 08:19:57 +0800140/* Get route to destination or remote server */
Ilpo Järvinenad1b30b2008-01-05 23:12:40 -0800141static struct rtable *
Julian Anastasovfc604762010-10-17 16:38:15 +0300142__ip_vs_get_out_rt(struct sk_buff *skb, struct ip_vs_dest *dest,
Julian Anastasovc92f5ca2011-05-10 12:46:05 +0000143 __be32 daddr, u32 rtos, int rt_mode, __be32 *ret_saddr)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700144{
Julian Anastasovfc604762010-10-17 16:38:15 +0300145 struct net *net = dev_net(skb_dst(skb)->dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700146 struct rtable *rt; /* Route to the other host */
Julian Anastasovfc604762010-10-17 16:38:15 +0300147 struct rtable *ort; /* Original route */
148 int local;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700149
150 if (dest) {
151 spin_lock(&dest->dst_lock);
152 if (!(rt = (struct rtable *)
Hans Schillstrom714f0952010-10-19 10:38:48 +0200153 __ip_vs_dst_check(dest, rtos))) {
Julian Anastasovf2edb9f2012-07-20 11:59:52 +0300154 rt = do_output_route4(net, dest->addr.ip, rtos,
155 rt_mode, &dest->dst_saddr.ip);
156 if (!rt) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700157 spin_unlock(&dest->dst_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700158 return NULL;
159 }
Hans Schillstrom714f0952010-10-19 10:38:48 +0200160 __ip_vs_dst_set(dest, rtos, dst_clone(&rt->dst), 0);
Julian Anastasovc92f5ca2011-05-10 12:46:05 +0000161 IP_VS_DBG(10, "new dst %pI4, src %pI4, refcnt=%d, "
162 "rtos=%X\n",
163 &dest->addr.ip, &dest->dst_saddr.ip,
Changli Gaod8d1f302010-06-10 23:31:35 -0700164 atomic_read(&rt->dst.__refcnt), rtos);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700165 }
David S. Miller44e31252011-05-09 14:38:06 -0700166 daddr = dest->addr.ip;
Julian Anastasovc92f5ca2011-05-10 12:46:05 +0000167 if (ret_saddr)
168 *ret_saddr = dest->dst_saddr.ip;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700169 spin_unlock(&dest->dst_lock);
170 } else {
Julian Anastasovf2edb9f2012-07-20 11:59:52 +0300171 __be32 saddr = htonl(INADDR_ANY);
Julian Anastasovc92f5ca2011-05-10 12:46:05 +0000172
Julian Anastasovf2edb9f2012-07-20 11:59:52 +0300173 /* For such unconfigured boxes avoid many route lookups
174 * for performance reasons because we do not remember saddr
175 */
176 rt_mode &= ~IP_VS_RT_MODE_CONNECT;
177 rt = do_output_route4(net, daddr, rtos, rt_mode, &saddr);
178 if (!rt)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700179 return NULL;
Julian Anastasovc92f5ca2011-05-10 12:46:05 +0000180 if (ret_saddr)
Julian Anastasovf2edb9f2012-07-20 11:59:52 +0300181 *ret_saddr = saddr;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700182 }
183
Julian Anastasovfc604762010-10-17 16:38:15 +0300184 local = rt->rt_flags & RTCF_LOCAL;
Changli Gao17a8f8e2011-02-24 08:19:57 +0800185 if (!((local ? IP_VS_RT_MODE_LOCAL : IP_VS_RT_MODE_NON_LOCAL) &
186 rt_mode)) {
Julian Anastasovfc604762010-10-17 16:38:15 +0300187 IP_VS_DBG_RL("Stopping traffic to %s address, dest: %pI4\n",
188 (rt->rt_flags & RTCF_LOCAL) ?
David S. Miller44e31252011-05-09 14:38:06 -0700189 "local":"non-local", &daddr);
Julian Anastasovfc604762010-10-17 16:38:15 +0300190 ip_rt_put(rt);
191 return NULL;
192 }
Changli Gao17a8f8e2011-02-24 08:19:57 +0800193 if (local && !(rt_mode & IP_VS_RT_MODE_RDR) &&
194 !((ort = skb_rtable(skb)) && ort->rt_flags & RTCF_LOCAL)) {
Julian Anastasovfc604762010-10-17 16:38:15 +0300195 IP_VS_DBG_RL("Redirect from non-local address %pI4 to local "
196 "requires NAT method, dest: %pI4\n",
David S. Miller44e31252011-05-09 14:38:06 -0700197 &ip_hdr(skb)->daddr, &daddr);
Julian Anastasovfc604762010-10-17 16:38:15 +0300198 ip_rt_put(rt);
199 return NULL;
200 }
201 if (unlikely(!local && ipv4_is_loopback(ip_hdr(skb)->saddr))) {
202 IP_VS_DBG_RL("Stopping traffic from loopback address %pI4 "
203 "to non-local address, dest: %pI4\n",
David S. Miller44e31252011-05-09 14:38:06 -0700204 &ip_hdr(skb)->saddr, &daddr);
Julian Anastasovfc604762010-10-17 16:38:15 +0300205 ip_rt_put(rt);
206 return NULL;
207 }
208
Linus Torvalds1da177e2005-04-16 15:20:36 -0700209 return rt;
210}
211
Julian Anastasovfc604762010-10-17 16:38:15 +0300212/* Reroute packet to local IPv4 stack after DNAT */
213static int
214__ip_vs_reroute_locally(struct sk_buff *skb)
215{
216 struct rtable *rt = skb_rtable(skb);
217 struct net_device *dev = rt->dst.dev;
218 struct net *net = dev_net(dev);
219 struct iphdr *iph = ip_hdr(skb);
220
David S. Millerc7537962010-11-11 17:07:48 -0800221 if (rt_is_input_route(rt)) {
Julian Anastasovfc604762010-10-17 16:38:15 +0300222 unsigned long orefdst = skb->_skb_refdst;
223
224 if (ip_route_input(skb, iph->daddr, iph->saddr,
225 iph->tos, skb->dev))
226 return 0;
227 refdst_drop(orefdst);
228 } else {
David S. Miller9d6ec932011-03-12 01:12:47 -0500229 struct flowi4 fl4 = {
230 .daddr = iph->daddr,
231 .saddr = iph->saddr,
232 .flowi4_tos = RT_TOS(iph->tos),
233 .flowi4_mark = skb->mark,
Julian Anastasovfc604762010-10-17 16:38:15 +0300234 };
Julian Anastasovfc604762010-10-17 16:38:15 +0300235
David S. Miller9d6ec932011-03-12 01:12:47 -0500236 rt = ip_route_output_key(net, &fl4);
David S. Millerb23dd4f2011-03-02 14:31:35 -0800237 if (IS_ERR(rt))
Julian Anastasovfc604762010-10-17 16:38:15 +0300238 return 0;
239 if (!(rt->rt_flags & RTCF_LOCAL)) {
240 ip_rt_put(rt);
241 return 0;
242 }
243 /* Drop old route. */
244 skb_dst_drop(skb);
245 skb_dst_set(skb, &rt->dst);
246 }
247 return 1;
248}
249
Julius Volz38cdcc92008-09-02 15:55:44 +0200250#ifdef CONFIG_IP_VS_IPV6
Hans Schillstrom714f0952010-10-19 10:38:48 +0200251
Julian Anastasovfc604762010-10-17 16:38:15 +0300252static inline int __ip_vs_is_local_route6(struct rt6_info *rt)
253{
David S. Millerd1918542011-12-28 20:19:20 -0500254 return rt->dst.dev && rt->dst.dev->flags & IFF_LOOPBACK;
Julian Anastasovfc604762010-10-17 16:38:15 +0300255}
256
Hans Schillstrom714f0952010-10-19 10:38:48 +0200257static struct dst_entry *
258__ip_vs_route_output_v6(struct net *net, struct in6_addr *daddr,
259 struct in6_addr *ret_saddr, int do_xfrm)
Julius Volz38cdcc92008-09-02 15:55:44 +0200260{
Hans Schillstrom714f0952010-10-19 10:38:48 +0200261 struct dst_entry *dst;
David S. Miller4c9483b2011-03-12 16:22:43 -0500262 struct flowi6 fl6 = {
263 .daddr = *daddr,
Hans Schillstrom714f0952010-10-19 10:38:48 +0200264 };
265
David S. Miller4c9483b2011-03-12 16:22:43 -0500266 dst = ip6_route_output(net, NULL, &fl6);
Hans Schillstrom714f0952010-10-19 10:38:48 +0200267 if (dst->error)
268 goto out_err;
269 if (!ret_saddr)
270 return dst;
David S. Miller4c9483b2011-03-12 16:22:43 -0500271 if (ipv6_addr_any(&fl6.saddr) &&
Hans Schillstrom714f0952010-10-19 10:38:48 +0200272 ipv6_dev_get_saddr(net, ip6_dst_idev(dst)->dev,
David S. Miller4c9483b2011-03-12 16:22:43 -0500273 &fl6.daddr, 0, &fl6.saddr) < 0)
Hans Schillstrom714f0952010-10-19 10:38:48 +0200274 goto out_err;
David S. Miller452edd52011-03-02 13:27:41 -0800275 if (do_xfrm) {
David S. Miller4c9483b2011-03-12 16:22:43 -0500276 dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), NULL, 0);
David S. Miller452edd52011-03-02 13:27:41 -0800277 if (IS_ERR(dst)) {
278 dst = NULL;
279 goto out_err;
280 }
281 }
Alexey Dobriyan4e3fd7a2011-11-21 03:39:03 +0000282 *ret_saddr = fl6.saddr;
Hans Schillstrom714f0952010-10-19 10:38:48 +0200283 return dst;
284
285out_err:
286 dst_release(dst);
287 IP_VS_DBG_RL("ip6_route_output error, dest: %pI6\n", daddr);
288 return NULL;
289}
290
Julian Anastasovfc604762010-10-17 16:38:15 +0300291/*
292 * Get route to destination or remote server
Julian Anastasovfc604762010-10-17 16:38:15 +0300293 */
Hans Schillstrom714f0952010-10-19 10:38:48 +0200294static struct rt6_info *
Julian Anastasovfc604762010-10-17 16:38:15 +0300295__ip_vs_get_out_rt_v6(struct sk_buff *skb, struct ip_vs_dest *dest,
296 struct in6_addr *daddr, struct in6_addr *ret_saddr,
297 int do_xfrm, int rt_mode)
Hans Schillstrom714f0952010-10-19 10:38:48 +0200298{
Julian Anastasovfc604762010-10-17 16:38:15 +0300299 struct net *net = dev_net(skb_dst(skb)->dev);
Julius Volz38cdcc92008-09-02 15:55:44 +0200300 struct rt6_info *rt; /* Route to the other host */
Julian Anastasovfc604762010-10-17 16:38:15 +0300301 struct rt6_info *ort; /* Original route */
Hans Schillstrom714f0952010-10-19 10:38:48 +0200302 struct dst_entry *dst;
Julian Anastasovfc604762010-10-17 16:38:15 +0300303 int local;
Julius Volz38cdcc92008-09-02 15:55:44 +0200304
305 if (dest) {
306 spin_lock(&dest->dst_lock);
Hans Schillstrom714f0952010-10-19 10:38:48 +0200307 rt = (struct rt6_info *)__ip_vs_dst_check(dest, 0);
Julius Volz38cdcc92008-09-02 15:55:44 +0200308 if (!rt) {
Hans Schillstrom714f0952010-10-19 10:38:48 +0200309 u32 cookie;
Julius Volz38cdcc92008-09-02 15:55:44 +0200310
Hans Schillstrom714f0952010-10-19 10:38:48 +0200311 dst = __ip_vs_route_output_v6(net, &dest->addr.in6,
Julian Anastasovc92f5ca2011-05-10 12:46:05 +0000312 &dest->dst_saddr.in6,
Hans Schillstrom714f0952010-10-19 10:38:48 +0200313 do_xfrm);
314 if (!dst) {
Julius Volz38cdcc92008-09-02 15:55:44 +0200315 spin_unlock(&dest->dst_lock);
Julius Volz38cdcc92008-09-02 15:55:44 +0200316 return NULL;
317 }
Hans Schillstrom714f0952010-10-19 10:38:48 +0200318 rt = (struct rt6_info *) dst;
319 cookie = rt->rt6i_node ? rt->rt6i_node->fn_sernum : 0;
320 __ip_vs_dst_set(dest, 0, dst_clone(&rt->dst), cookie);
321 IP_VS_DBG(10, "new dst %pI6, src %pI6, refcnt=%d\n",
Julian Anastasovc92f5ca2011-05-10 12:46:05 +0000322 &dest->addr.in6, &dest->dst_saddr.in6,
Changli Gaod8d1f302010-06-10 23:31:35 -0700323 atomic_read(&rt->dst.__refcnt));
Julius Volz38cdcc92008-09-02 15:55:44 +0200324 }
Hans Schillstrom714f0952010-10-19 10:38:48 +0200325 if (ret_saddr)
Alexey Dobriyan4e3fd7a2011-11-21 03:39:03 +0000326 *ret_saddr = dest->dst_saddr.in6;
Julius Volz38cdcc92008-09-02 15:55:44 +0200327 spin_unlock(&dest->dst_lock);
328 } else {
Julian Anastasovfc604762010-10-17 16:38:15 +0300329 dst = __ip_vs_route_output_v6(net, daddr, ret_saddr, do_xfrm);
Hans Schillstrom714f0952010-10-19 10:38:48 +0200330 if (!dst)
Julius Volz38cdcc92008-09-02 15:55:44 +0200331 return NULL;
Hans Schillstrom714f0952010-10-19 10:38:48 +0200332 rt = (struct rt6_info *) dst;
Julius Volz38cdcc92008-09-02 15:55:44 +0200333 }
334
Julian Anastasovfc604762010-10-17 16:38:15 +0300335 local = __ip_vs_is_local_route6(rt);
David S. Millere58b3442011-05-12 18:22:34 -0400336 if (!((local ? IP_VS_RT_MODE_LOCAL : IP_VS_RT_MODE_NON_LOCAL) &
337 rt_mode)) {
Jesper Dangaard Brouer120b9c12012-09-26 14:05:53 +0200338 IP_VS_DBG_RL("Stopping traffic to %s address, dest: %pI6c\n",
Julian Anastasovfc604762010-10-17 16:38:15 +0300339 local ? "local":"non-local", daddr);
340 dst_release(&rt->dst);
341 return NULL;
342 }
David S. Millere58b3442011-05-12 18:22:34 -0400343 if (local && !(rt_mode & IP_VS_RT_MODE_RDR) &&
Julian Anastasovfc604762010-10-17 16:38:15 +0300344 !((ort = (struct rt6_info *) skb_dst(skb)) &&
345 __ip_vs_is_local_route6(ort))) {
Jesper Dangaard Brouer120b9c12012-09-26 14:05:53 +0200346 IP_VS_DBG_RL("Redirect from non-local address %pI6c to local "
347 "requires NAT method, dest: %pI6c\n",
Julian Anastasovfc604762010-10-17 16:38:15 +0300348 &ipv6_hdr(skb)->daddr, daddr);
349 dst_release(&rt->dst);
350 return NULL;
351 }
352 if (unlikely(!local && (!skb->dev || skb->dev->flags & IFF_LOOPBACK) &&
353 ipv6_addr_type(&ipv6_hdr(skb)->saddr) &
354 IPV6_ADDR_LOOPBACK)) {
Jesper Dangaard Brouer120b9c12012-09-26 14:05:53 +0200355 IP_VS_DBG_RL("Stopping traffic from loopback address %pI6c "
356 "to non-local address, dest: %pI6c\n",
Julian Anastasovfc604762010-10-17 16:38:15 +0300357 &ipv6_hdr(skb)->saddr, daddr);
358 dst_release(&rt->dst);
359 return NULL;
360 }
361
Julius Volz38cdcc92008-09-02 15:55:44 +0200362 return rt;
363}
364#endif
365
Linus Torvalds1da177e2005-04-16 15:20:36 -0700366
367/*
368 * Release dest->dst_cache before a dest is removed
369 */
370void
371ip_vs_dst_reset(struct ip_vs_dest *dest)
372{
373 struct dst_entry *old_dst;
374
375 old_dst = dest->dst_cache;
376 dest->dst_cache = NULL;
377 dst_release(old_dst);
Julian Anastasovf2edb9f2012-07-20 11:59:52 +0300378 dest->dst_saddr.ip = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700379}
380
Julian Anastasovf4bc17c2010-09-21 17:35:41 +0200381#define IP_VS_XMIT_TUNNEL(skb, cp) \
382({ \
383 int __ret = NF_ACCEPT; \
384 \
Julian Anastasovcf356d62010-10-17 16:21:07 +0300385 (skb)->ipvs_property = 1; \
Julian Anastasovf4bc17c2010-09-21 17:35:41 +0200386 if (unlikely((cp)->flags & IP_VS_CONN_F_NFCT)) \
Simon Horman3c2de2a2011-09-16 14:02:19 +0900387 __ret = ip_vs_confirm_conntrack(skb); \
Julian Anastasovf4bc17c2010-09-21 17:35:41 +0200388 if (__ret == NF_ACCEPT) { \
389 nf_reset(skb); \
Julian Anastasov4256f1a2010-10-17 16:29:40 +0300390 skb_forward_csum(skb); \
Julian Anastasovf4bc17c2010-09-21 17:35:41 +0200391 } \
392 __ret; \
393})
394
Julian Anastasovfc604762010-10-17 16:38:15 +0300395#define IP_VS_XMIT_NAT(pf, skb, cp, local) \
Linus Torvalds1da177e2005-04-16 15:20:36 -0700396do { \
Julian Anastasovcf356d62010-10-17 16:21:07 +0300397 (skb)->ipvs_property = 1; \
Julian Anastasovf4bc17c2010-09-21 17:35:41 +0200398 if (likely(!((cp)->flags & IP_VS_CONN_F_NFCT))) \
Julian Anastasovcf356d62010-10-17 16:21:07 +0300399 ip_vs_notrack(skb); \
Julian Anastasovf4bc17c2010-09-21 17:35:41 +0200400 else \
401 ip_vs_update_conntrack(skb, cp, 1); \
Julian Anastasovfc604762010-10-17 16:38:15 +0300402 if (local) \
403 return NF_ACCEPT; \
Herbert Xuccc79112007-07-30 16:20:12 -0700404 skb_forward_csum(skb); \
Julius Volz38cdcc92008-09-02 15:55:44 +0200405 NF_HOOK(pf, NF_INET_LOCAL_OUT, (skb), NULL, \
Julian Anastasovf4bc17c2010-09-21 17:35:41 +0200406 skb_dst(skb)->dev, dst_output); \
407} while (0)
408
Julian Anastasovfc604762010-10-17 16:38:15 +0300409#define IP_VS_XMIT(pf, skb, cp, local) \
Julian Anastasovf4bc17c2010-09-21 17:35:41 +0200410do { \
Julian Anastasovcf356d62010-10-17 16:21:07 +0300411 (skb)->ipvs_property = 1; \
Julian Anastasovf4bc17c2010-09-21 17:35:41 +0200412 if (likely(!((cp)->flags & IP_VS_CONN_F_NFCT))) \
Julian Anastasovcf356d62010-10-17 16:21:07 +0300413 ip_vs_notrack(skb); \
Julian Anastasovfc604762010-10-17 16:38:15 +0300414 if (local) \
415 return NF_ACCEPT; \
Julian Anastasovf4bc17c2010-09-21 17:35:41 +0200416 skb_forward_csum(skb); \
417 NF_HOOK(pf, NF_INET_LOCAL_OUT, (skb), NULL, \
418 skb_dst(skb)->dev, dst_output); \
Linus Torvalds1da177e2005-04-16 15:20:36 -0700419} while (0)
420
421
422/*
423 * NULL transmitter (do nothing except return NF_ACCEPT)
424 */
425int
426ip_vs_null_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
427 struct ip_vs_protocol *pp)
428{
429 /* we do not touch skb and do not need pskb ptr */
Julian Anastasovfc604762010-10-17 16:38:15 +0300430 IP_VS_XMIT(NFPROTO_IPV4, skb, cp, 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700431}
432
433
434/*
435 * Bypass transmitter
436 * Let packets bypass the destination when the destination is not
437 * available, it may be only used in transparent cache cluster.
438 */
439int
440ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
441 struct ip_vs_protocol *pp)
442{
443 struct rtable *rt; /* Route to the other host */
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700444 struct iphdr *iph = ip_hdr(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700445 int mtu;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700446
447 EnterFunction(10);
448
Changli Gao17a8f8e2011-02-24 08:19:57 +0800449 if (!(rt = __ip_vs_get_out_rt(skb, NULL, iph->daddr, RT_TOS(iph->tos),
Julian Anastasovc92f5ca2011-05-10 12:46:05 +0000450 IP_VS_RT_MODE_NON_LOCAL, NULL)))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700451 goto tx_error_icmp;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700452
453 /* MTU checking */
Changli Gaod8d1f302010-06-10 23:31:35 -0700454 mtu = dst_mtu(&rt->dst);
Simon Horman8f1b03a2010-11-09 10:08:49 +0900455 if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF)) &&
456 !skb_is_gso(skb)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700457 ip_rt_put(rt);
458 icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
Hannes Eder1e3e2382009-08-02 11:05:41 +0000459 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700460 goto tx_error;
461 }
462
463 /*
464 * Call ip_send_check because we are not sure it is called
465 * after ip_defrag. Is copy-on-write needed?
466 */
467 if (unlikely((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL)) {
468 ip_rt_put(rt);
469 return NF_STOLEN;
470 }
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700471 ip_send_check(ip_hdr(skb));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700472
473 /* drop old route */
Eric Dumazetadf30902009-06-02 05:19:30 +0000474 skb_dst_drop(skb);
Changli Gaod8d1f302010-06-10 23:31:35 -0700475 skb_dst_set(skb, &rt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700476
477 /* Another hack: avoid icmp_send in ip_fragment */
478 skb->local_df = 1;
479
Julian Anastasovfc604762010-10-17 16:38:15 +0300480 IP_VS_XMIT(NFPROTO_IPV4, skb, cp, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700481
482 LeaveFunction(10);
483 return NF_STOLEN;
484
485 tx_error_icmp:
486 dst_link_failure(skb);
487 tx_error:
488 kfree_skb(skb);
489 LeaveFunction(10);
490 return NF_STOLEN;
491}
492
Julius Volzb3cdd2a72008-09-02 15:55:45 +0200493#ifdef CONFIG_IP_VS_IPV6
494int
495ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
496 struct ip_vs_protocol *pp)
497{
498 struct rt6_info *rt; /* Route to the other host */
499 struct ipv6hdr *iph = ipv6_hdr(skb);
500 int mtu;
Julius Volzb3cdd2a72008-09-02 15:55:45 +0200501
502 EnterFunction(10);
503
David S. Millere58b3442011-05-12 18:22:34 -0400504 if (!(rt = __ip_vs_get_out_rt_v6(skb, NULL, &iph->daddr, NULL, 0,
505 IP_VS_RT_MODE_NON_LOCAL)))
Julius Volzb3cdd2a72008-09-02 15:55:45 +0200506 goto tx_error_icmp;
Julius Volzb3cdd2a72008-09-02 15:55:45 +0200507
508 /* MTU checking */
Changli Gaod8d1f302010-06-10 23:31:35 -0700509 mtu = dst_mtu(&rt->dst);
Jesper Dangaard Brouer590e3f72012-08-28 22:05:51 +0200510 if (__mtu_check_toobig_v6(skb, mtu)) {
Julian Anastasovcb591552010-10-17 16:40:51 +0300511 if (!skb->dev) {
512 struct net *net = dev_net(skb_dst(skb)->dev);
513
514 skb->dev = net->loopback_dev;
515 }
Alexey Dobriyan3ffe5332010-02-18 08:25:24 +0000516 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
Julian Anastasovcb591552010-10-17 16:40:51 +0300517 dst_release(&rt->dst);
Hannes Eder1e3e2382009-08-02 11:05:41 +0000518 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
Julius Volzb3cdd2a72008-09-02 15:55:45 +0200519 goto tx_error;
520 }
521
522 /*
523 * Call ip_send_check because we are not sure it is called
524 * after ip_defrag. Is copy-on-write needed?
525 */
526 skb = skb_share_check(skb, GFP_ATOMIC);
527 if (unlikely(skb == NULL)) {
Changli Gaod8d1f302010-06-10 23:31:35 -0700528 dst_release(&rt->dst);
Julius Volzb3cdd2a72008-09-02 15:55:45 +0200529 return NF_STOLEN;
530 }
531
532 /* drop old route */
Eric Dumazetadf30902009-06-02 05:19:30 +0000533 skb_dst_drop(skb);
Changli Gaod8d1f302010-06-10 23:31:35 -0700534 skb_dst_set(skb, &rt->dst);
Julius Volzb3cdd2a72008-09-02 15:55:45 +0200535
536 /* Another hack: avoid icmp_send in ip_fragment */
537 skb->local_df = 1;
538
Julian Anastasovfc604762010-10-17 16:38:15 +0300539 IP_VS_XMIT(NFPROTO_IPV6, skb, cp, 0);
Julius Volzb3cdd2a72008-09-02 15:55:45 +0200540
541 LeaveFunction(10);
542 return NF_STOLEN;
543
544 tx_error_icmp:
545 dst_link_failure(skb);
546 tx_error:
547 kfree_skb(skb);
548 LeaveFunction(10);
549 return NF_STOLEN;
550}
551#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -0700552
553/*
554 * NAT transmitter (only for outside-to-inside nat forwarding)
555 * Not used for related ICMP
556 */
557int
558ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
559 struct ip_vs_protocol *pp)
560{
561 struct rtable *rt; /* Route to the other host */
562 int mtu;
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700563 struct iphdr *iph = ip_hdr(skb);
Julian Anastasovfc604762010-10-17 16:38:15 +0300564 int local;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700565
566 EnterFunction(10);
567
568 /* check if it is a connection of no-client-port */
569 if (unlikely(cp->flags & IP_VS_CONN_F_NO_CPORT)) {
Al Viro014d7302006-09-28 14:29:52 -0700570 __be16 _pt, *p;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700571 p = skb_header_pointer(skb, iph->ihl*4, sizeof(_pt), &_pt);
572 if (p == NULL)
573 goto tx_error;
574 ip_vs_conn_fill_cport(cp, *p);
575 IP_VS_DBG(10, "filled cport=%d\n", ntohs(*p));
576 }
577
Julian Anastasovfc604762010-10-17 16:38:15 +0300578 if (!(rt = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip,
Changli Gao17a8f8e2011-02-24 08:19:57 +0800579 RT_TOS(iph->tos),
580 IP_VS_RT_MODE_LOCAL |
581 IP_VS_RT_MODE_NON_LOCAL |
Julian Anastasovc92f5ca2011-05-10 12:46:05 +0000582 IP_VS_RT_MODE_RDR, NULL)))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700583 goto tx_error_icmp;
Julian Anastasovfc604762010-10-17 16:38:15 +0300584 local = rt->rt_flags & RTCF_LOCAL;
585 /*
586 * Avoid duplicate tuple in reply direction for NAT traffic
587 * to local address when connection is sync-ed
588 */
Igor Maravićc0cd1152011-12-12 02:58:24 +0000589#if IS_ENABLED(CONFIG_NF_CONNTRACK)
Julian Anastasovfc604762010-10-17 16:38:15 +0300590 if (cp->flags & IP_VS_CONN_F_SYNC && local) {
591 enum ip_conntrack_info ctinfo;
592 struct nf_conn *ct = ct = nf_ct_get(skb, &ctinfo);
593
594 if (ct && !nf_ct_is_untracked(ct)) {
Julian Anastasov0d796412010-10-17 16:46:17 +0300595 IP_VS_DBG_RL_PKT(10, AF_INET, pp, skb, 0,
596 "ip_vs_nat_xmit(): "
Julian Anastasovfc604762010-10-17 16:38:15 +0300597 "stopping DNAT to local address");
598 goto tx_error_put;
599 }
600 }
601#endif
602
603 /* From world but DNAT to loopback address? */
Julian Anastasovc92f5ca2011-05-10 12:46:05 +0000604 if (local && ipv4_is_loopback(cp->daddr.ip) &&
David S. Millerc7537962010-11-11 17:07:48 -0800605 rt_is_input_route(skb_rtable(skb))) {
Julian Anastasov0d796412010-10-17 16:46:17 +0300606 IP_VS_DBG_RL_PKT(1, AF_INET, pp, skb, 0, "ip_vs_nat_xmit(): "
Julian Anastasovfc604762010-10-17 16:38:15 +0300607 "stopping DNAT to loopback address");
608 goto tx_error_put;
609 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700610
611 /* MTU checking */
Changli Gaod8d1f302010-06-10 23:31:35 -0700612 mtu = dst_mtu(&rt->dst);
Simon Horman8f1b03a2010-11-09 10:08:49 +0900613 if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF)) &&
614 !skb_is_gso(skb)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700615 icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
Julian Anastasov0d796412010-10-17 16:46:17 +0300616 IP_VS_DBG_RL_PKT(0, AF_INET, pp, skb, 0,
617 "ip_vs_nat_xmit(): frag needed for");
Julian Anastasovfc604762010-10-17 16:38:15 +0300618 goto tx_error_put;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700619 }
620
621 /* copy-on-write the packet before mangling it */
Herbert Xuaf1e1cf2007-10-14 00:39:33 -0700622 if (!skb_make_writable(skb, sizeof(struct iphdr)))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700623 goto tx_error_put;
624
Changli Gaod8d1f302010-06-10 23:31:35 -0700625 if (skb_cow(skb, rt->dst.dev->hard_header_len))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700626 goto tx_error_put;
627
Linus Torvalds1da177e2005-04-16 15:20:36 -0700628 /* mangle the packet */
Herbert Xu3db05fe2007-10-15 00:53:15 -0700629 if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp))
Julian Anastasovfc604762010-10-17 16:38:15 +0300630 goto tx_error_put;
Julius Volze7ade462008-09-02 15:55:33 +0200631 ip_hdr(skb)->daddr = cp->daddr.ip;
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700632 ip_send_check(ip_hdr(skb));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700633
Julian Anastasovfc604762010-10-17 16:38:15 +0300634 if (!local) {
635 /* drop old route */
636 skb_dst_drop(skb);
637 skb_dst_set(skb, &rt->dst);
638 } else {
639 ip_rt_put(rt);
640 /*
641 * Some IPv4 replies get local address from routes,
642 * not from iph, so while we DNAT after routing
643 * we need this second input/output route.
644 */
645 if (!__ip_vs_reroute_locally(skb))
646 goto tx_error;
647 }
648
Julian Anastasov0d796412010-10-17 16:46:17 +0300649 IP_VS_DBG_PKT(10, AF_INET, pp, skb, 0, "After DNAT");
Linus Torvalds1da177e2005-04-16 15:20:36 -0700650
651 /* FIXME: when application helper enlarges the packet and the length
652 is larger than the MTU of outgoing device, there will be still
653 MTU problem. */
654
655 /* Another hack: avoid icmp_send in ip_fragment */
656 skb->local_df = 1;
657
Julian Anastasovfc604762010-10-17 16:38:15 +0300658 IP_VS_XMIT_NAT(NFPROTO_IPV4, skb, cp, local);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700659
660 LeaveFunction(10);
661 return NF_STOLEN;
662
663 tx_error_icmp:
664 dst_link_failure(skb);
665 tx_error:
Linus Torvalds1da177e2005-04-16 15:20:36 -0700666 kfree_skb(skb);
Julian Anastasovf4bc17c2010-09-21 17:35:41 +0200667 LeaveFunction(10);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700668 return NF_STOLEN;
669 tx_error_put:
670 ip_rt_put(rt);
671 goto tx_error;
672}
673
Julius Volzb3cdd2a72008-09-02 15:55:45 +0200674#ifdef CONFIG_IP_VS_IPV6
675int
676ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
677 struct ip_vs_protocol *pp)
678{
679 struct rt6_info *rt; /* Route to the other host */
680 int mtu;
Julian Anastasovfc604762010-10-17 16:38:15 +0300681 int local;
Julius Volzb3cdd2a72008-09-02 15:55:45 +0200682
683 EnterFunction(10);
684
685 /* check if it is a connection of no-client-port */
686 if (unlikely(cp->flags & IP_VS_CONN_F_NO_CPORT)) {
687 __be16 _pt, *p;
688 p = skb_header_pointer(skb, sizeof(struct ipv6hdr),
689 sizeof(_pt), &_pt);
690 if (p == NULL)
691 goto tx_error;
692 ip_vs_conn_fill_cport(cp, *p);
693 IP_VS_DBG(10, "filled cport=%d\n", ntohs(*p));
694 }
695
Julian Anastasovfc604762010-10-17 16:38:15 +0300696 if (!(rt = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6, NULL,
David S. Millere58b3442011-05-12 18:22:34 -0400697 0, (IP_VS_RT_MODE_LOCAL |
698 IP_VS_RT_MODE_NON_LOCAL |
699 IP_VS_RT_MODE_RDR))))
Julius Volzb3cdd2a72008-09-02 15:55:45 +0200700 goto tx_error_icmp;
Julian Anastasovfc604762010-10-17 16:38:15 +0300701 local = __ip_vs_is_local_route6(rt);
702 /*
703 * Avoid duplicate tuple in reply direction for NAT traffic
704 * to local address when connection is sync-ed
705 */
Igor Maravićc0cd1152011-12-12 02:58:24 +0000706#if IS_ENABLED(CONFIG_NF_CONNTRACK)
Julian Anastasovfc604762010-10-17 16:38:15 +0300707 if (cp->flags & IP_VS_CONN_F_SYNC && local) {
708 enum ip_conntrack_info ctinfo;
709 struct nf_conn *ct = ct = nf_ct_get(skb, &ctinfo);
710
711 if (ct && !nf_ct_is_untracked(ct)) {
Julian Anastasov0d796412010-10-17 16:46:17 +0300712 IP_VS_DBG_RL_PKT(10, AF_INET6, pp, skb, 0,
Julian Anastasovfc604762010-10-17 16:38:15 +0300713 "ip_vs_nat_xmit_v6(): "
714 "stopping DNAT to local address");
715 goto tx_error_put;
716 }
717 }
718#endif
719
720 /* From world but DNAT to loopback address? */
721 if (local && skb->dev && !(skb->dev->flags & IFF_LOOPBACK) &&
722 ipv6_addr_type(&rt->rt6i_dst.addr) & IPV6_ADDR_LOOPBACK) {
Julian Anastasov0d796412010-10-17 16:46:17 +0300723 IP_VS_DBG_RL_PKT(1, AF_INET6, pp, skb, 0,
Julian Anastasovfc604762010-10-17 16:38:15 +0300724 "ip_vs_nat_xmit_v6(): "
725 "stopping DNAT to loopback address");
726 goto tx_error_put;
727 }
Julius Volzb3cdd2a72008-09-02 15:55:45 +0200728
729 /* MTU checking */
Changli Gaod8d1f302010-06-10 23:31:35 -0700730 mtu = dst_mtu(&rt->dst);
Jesper Dangaard Brouer590e3f72012-08-28 22:05:51 +0200731 if (__mtu_check_toobig_v6(skb, mtu)) {
Julian Anastasovcb591552010-10-17 16:40:51 +0300732 if (!skb->dev) {
733 struct net *net = dev_net(skb_dst(skb)->dev);
734
735 skb->dev = net->loopback_dev;
736 }
Alexey Dobriyan3ffe5332010-02-18 08:25:24 +0000737 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
Julian Anastasov0d796412010-10-17 16:46:17 +0300738 IP_VS_DBG_RL_PKT(0, AF_INET6, pp, skb, 0,
Julius Volzb3cdd2a72008-09-02 15:55:45 +0200739 "ip_vs_nat_xmit_v6(): frag needed for");
Julian Anastasovfc604762010-10-17 16:38:15 +0300740 goto tx_error_put;
Julius Volzb3cdd2a72008-09-02 15:55:45 +0200741 }
742
743 /* copy-on-write the packet before mangling it */
744 if (!skb_make_writable(skb, sizeof(struct ipv6hdr)))
745 goto tx_error_put;
746
Changli Gaod8d1f302010-06-10 23:31:35 -0700747 if (skb_cow(skb, rt->dst.dev->hard_header_len))
Julius Volzb3cdd2a72008-09-02 15:55:45 +0200748 goto tx_error_put;
749
Julius Volzb3cdd2a72008-09-02 15:55:45 +0200750 /* mangle the packet */
751 if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp))
752 goto tx_error;
Alexey Dobriyan4e3fd7a2011-11-21 03:39:03 +0000753 ipv6_hdr(skb)->daddr = cp->daddr.in6;
Julian Anastasovfc604762010-10-17 16:38:15 +0300754
755 if (!local || !skb->dev) {
756 /* drop the old route when skb is not shared */
757 skb_dst_drop(skb);
758 skb_dst_set(skb, &rt->dst);
759 } else {
760 /* destined to loopback, do we need to change route? */
761 dst_release(&rt->dst);
762 }
Julius Volzb3cdd2a72008-09-02 15:55:45 +0200763
Julian Anastasov0d796412010-10-17 16:46:17 +0300764 IP_VS_DBG_PKT(10, AF_INET6, pp, skb, 0, "After DNAT");
Julius Volzb3cdd2a72008-09-02 15:55:45 +0200765
766 /* FIXME: when application helper enlarges the packet and the length
767 is larger than the MTU of outgoing device, there will be still
768 MTU problem. */
769
770 /* Another hack: avoid icmp_send in ip_fragment */
771 skb->local_df = 1;
772
Julian Anastasovfc604762010-10-17 16:38:15 +0300773 IP_VS_XMIT_NAT(NFPROTO_IPV6, skb, cp, local);
Julius Volzb3cdd2a72008-09-02 15:55:45 +0200774
775 LeaveFunction(10);
776 return NF_STOLEN;
777
778tx_error_icmp:
779 dst_link_failure(skb);
780tx_error:
781 LeaveFunction(10);
782 kfree_skb(skb);
783 return NF_STOLEN;
784tx_error_put:
Changli Gaod8d1f302010-06-10 23:31:35 -0700785 dst_release(&rt->dst);
Julius Volzb3cdd2a72008-09-02 15:55:45 +0200786 goto tx_error;
787}
788#endif
789
Linus Torvalds1da177e2005-04-16 15:20:36 -0700790
791/*
792 * IP Tunneling transmitter
793 *
794 * This function encapsulates the packet in a new IP packet, its
795 * destination will be set to cp->daddr. Most code of this function
796 * is taken from ipip.c.
797 *
798 * It is used in VS/TUN cluster. The load balancer selects a real
799 * server from a cluster based on a scheduling algorithm,
800 * encapsulates the request packet and forwards it to the selected
801 * server. For example, all real servers are configured with
802 * "ifconfig tunl0 <Virtual IP Address> up". When the server receives
803 * the encapsulated packet, it will decapsulate the packet, processe
804 * the request and return the response packets directly to the client
805 * without passing the load balancer. This can greatly increase the
806 * scalability of virtual server.
807 *
808 * Used for ANY protocol
809 */
810int
811ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
812 struct ip_vs_protocol *pp)
813{
Julian Anastasov3654e612012-07-20 11:59:53 +0300814 struct netns_ipvs *ipvs = net_ipvs(skb_net(skb));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700815 struct rtable *rt; /* Route to the other host */
Julian Anastasovc92f5ca2011-05-10 12:46:05 +0000816 __be32 saddr; /* Source for tunnel */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700817 struct net_device *tdev; /* Device to other host */
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700818 struct iphdr *old_iph = ip_hdr(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700819 u8 tos = old_iph->tos;
Julian Anastasovf2edb9f2012-07-20 11:59:52 +0300820 __be16 df;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700821 struct iphdr *iph; /* Our new IP header */
Chuck Leverc2636b42007-10-23 21:07:32 -0700822 unsigned int max_headroom; /* The extra header space needed */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700823 int mtu;
Julian Anastasovf4bc17c2010-09-21 17:35:41 +0200824 int ret;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700825
826 EnterFunction(10);
827
Julian Anastasovfc604762010-10-17 16:38:15 +0300828 if (!(rt = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip,
Changli Gao17a8f8e2011-02-24 08:19:57 +0800829 RT_TOS(tos), IP_VS_RT_MODE_LOCAL |
Julian Anastasovf2edb9f2012-07-20 11:59:52 +0300830 IP_VS_RT_MODE_NON_LOCAL |
831 IP_VS_RT_MODE_CONNECT,
Julian Anastasovc92f5ca2011-05-10 12:46:05 +0000832 &saddr)))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700833 goto tx_error_icmp;
Julian Anastasovfc604762010-10-17 16:38:15 +0300834 if (rt->rt_flags & RTCF_LOCAL) {
835 ip_rt_put(rt);
836 IP_VS_XMIT(NFPROTO_IPV4, skb, cp, 1);
837 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700838
Changli Gaod8d1f302010-06-10 23:31:35 -0700839 tdev = rt->dst.dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700840
Changli Gaod8d1f302010-06-10 23:31:35 -0700841 mtu = dst_mtu(&rt->dst) - sizeof(struct iphdr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700842 if (mtu < 68) {
Hannes Eder1e3e2382009-08-02 11:05:41 +0000843 IP_VS_DBG_RL("%s(): mtu less than 68\n", __func__);
Julian Anastasovfc604762010-10-17 16:38:15 +0300844 goto tx_error_put;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700845 }
Julian Anastasovf2edb9f2012-07-20 11:59:52 +0300846 if (rt_is_output_route(skb_rtable(skb)))
David S. Miller6700c272012-07-17 03:29:28 -0700847 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700848
Julian Anastasovf2edb9f2012-07-20 11:59:52 +0300849 /* Copy DF, reset fragment offset and MF */
Julian Anastasov3654e612012-07-20 11:59:53 +0300850 df = sysctl_pmtu_disc(ipvs) ? old_iph->frag_off & htons(IP_DF) : 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700851
Julian Anastasov3654e612012-07-20 11:59:53 +0300852 if (df && mtu < ntohs(old_iph->tot_len) && !skb_is_gso(skb)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700853 icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
Hannes Eder1e3e2382009-08-02 11:05:41 +0000854 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
Julian Anastasovfc604762010-10-17 16:38:15 +0300855 goto tx_error_put;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700856 }
857
858 /*
859 * Okay, now see if we can stuff it in the buffer as-is.
860 */
861 max_headroom = LL_RESERVED_SPACE(tdev) + sizeof(struct iphdr);
862
863 if (skb_headroom(skb) < max_headroom
864 || skb_cloned(skb) || skb_shared(skb)) {
865 struct sk_buff *new_skb =
866 skb_realloc_headroom(skb, max_headroom);
867 if (!new_skb) {
868 ip_rt_put(rt);
869 kfree_skb(skb);
Hannes Eder1e3e2382009-08-02 11:05:41 +0000870 IP_VS_ERR_RL("%s(): no memory\n", __func__);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700871 return NF_STOLEN;
872 }
Eric Dumazet5d0ba552012-06-04 01:17:19 +0000873 consume_skb(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700874 skb = new_skb;
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700875 old_iph = ip_hdr(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700876 }
877
Hans Schillstrom714f0952010-10-19 10:38:48 +0200878 skb->transport_header = skb->network_header;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700879
880 /* fix old IP header checksum */
881 ip_send_check(old_iph);
882
Arnaldo Carvalho de Meloe2d1bca2007-04-10 20:46:21 -0700883 skb_push(skb, sizeof(struct iphdr));
884 skb_reset_network_header(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700885 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
886
887 /* drop old route */
Eric Dumazetadf30902009-06-02 05:19:30 +0000888 skb_dst_drop(skb);
Changli Gaod8d1f302010-06-10 23:31:35 -0700889 skb_dst_set(skb, &rt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700890
891 /*
892 * Push down and install the IPIP header.
893 */
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700894 iph = ip_hdr(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700895 iph->version = 4;
896 iph->ihl = sizeof(struct iphdr)>>2;
897 iph->frag_off = df;
898 iph->protocol = IPPROTO_IPIP;
899 iph->tos = tos;
Julian Anastasovc92f5ca2011-05-10 12:46:05 +0000900 iph->daddr = cp->daddr.ip;
901 iph->saddr = saddr;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700902 iph->ttl = old_iph->ttl;
Changli Gaod8d1f302010-06-10 23:31:35 -0700903 ip_select_ident(iph, &rt->dst, NULL);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700904
905 /* Another hack: avoid icmp_send in ip_fragment */
906 skb->local_df = 1;
907
Julian Anastasovf4bc17c2010-09-21 17:35:41 +0200908 ret = IP_VS_XMIT_TUNNEL(skb, cp);
909 if (ret == NF_ACCEPT)
910 ip_local_out(skb);
911 else if (ret == NF_DROP)
912 kfree_skb(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700913
914 LeaveFunction(10);
915
916 return NF_STOLEN;
917
918 tx_error_icmp:
919 dst_link_failure(skb);
920 tx_error:
921 kfree_skb(skb);
922 LeaveFunction(10);
923 return NF_STOLEN;
Julian Anastasovfc604762010-10-17 16:38:15 +0300924tx_error_put:
925 ip_rt_put(rt);
926 goto tx_error;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700927}
928
Julius Volzb3cdd2a72008-09-02 15:55:45 +0200929#ifdef CONFIG_IP_VS_IPV6
930int
931ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
932 struct ip_vs_protocol *pp)
933{
934 struct rt6_info *rt; /* Route to the other host */
Hans Schillstrom714f0952010-10-19 10:38:48 +0200935 struct in6_addr saddr; /* Source for tunnel */
Julius Volzb3cdd2a72008-09-02 15:55:45 +0200936 struct net_device *tdev; /* Device to other host */
937 struct ipv6hdr *old_iph = ipv6_hdr(skb);
Julius Volzb3cdd2a72008-09-02 15:55:45 +0200938 struct ipv6hdr *iph; /* Our new IP header */
939 unsigned int max_headroom; /* The extra header space needed */
940 int mtu;
Julian Anastasovf4bc17c2010-09-21 17:35:41 +0200941 int ret;
Julius Volzb3cdd2a72008-09-02 15:55:45 +0200942
943 EnterFunction(10);
944
Julian Anastasovfc604762010-10-17 16:38:15 +0300945 if (!(rt = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6,
David S. Millere58b3442011-05-12 18:22:34 -0400946 &saddr, 1, (IP_VS_RT_MODE_LOCAL |
947 IP_VS_RT_MODE_NON_LOCAL))))
Julius Volzb3cdd2a72008-09-02 15:55:45 +0200948 goto tx_error_icmp;
Julian Anastasovfc604762010-10-17 16:38:15 +0300949 if (__ip_vs_is_local_route6(rt)) {
950 dst_release(&rt->dst);
951 IP_VS_XMIT(NFPROTO_IPV6, skb, cp, 1);
952 }
Julius Volzb3cdd2a72008-09-02 15:55:45 +0200953
Changli Gaod8d1f302010-06-10 23:31:35 -0700954 tdev = rt->dst.dev;
Julius Volzb3cdd2a72008-09-02 15:55:45 +0200955
Changli Gaod8d1f302010-06-10 23:31:35 -0700956 mtu = dst_mtu(&rt->dst) - sizeof(struct ipv6hdr);
Hans Schillstrom714f0952010-10-19 10:38:48 +0200957 if (mtu < IPV6_MIN_MTU) {
Hans Schillstrom714f0952010-10-19 10:38:48 +0200958 IP_VS_DBG_RL("%s(): mtu less than %d\n", __func__,
959 IPV6_MIN_MTU);
Julian Anastasovfc604762010-10-17 16:38:15 +0300960 goto tx_error_put;
Julius Volzb3cdd2a72008-09-02 15:55:45 +0200961 }
Eric Dumazetadf30902009-06-02 05:19:30 +0000962 if (skb_dst(skb))
David S. Miller6700c272012-07-17 03:29:28 -0700963 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
Julius Volzb3cdd2a72008-09-02 15:55:45 +0200964
Jesper Dangaard Brouer590e3f72012-08-28 22:05:51 +0200965 /* MTU checking: Notice that 'mtu' have been adjusted before hand */
966 if (__mtu_check_toobig_v6(skb, mtu)) {
Julian Anastasovcb591552010-10-17 16:40:51 +0300967 if (!skb->dev) {
968 struct net *net = dev_net(skb_dst(skb)->dev);
969
970 skb->dev = net->loopback_dev;
971 }
Alexey Dobriyan3ffe5332010-02-18 08:25:24 +0000972 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
Hannes Eder1e3e2382009-08-02 11:05:41 +0000973 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
Julian Anastasovfc604762010-10-17 16:38:15 +0300974 goto tx_error_put;
Julius Volzb3cdd2a72008-09-02 15:55:45 +0200975 }
976
977 /*
978 * Okay, now see if we can stuff it in the buffer as-is.
979 */
980 max_headroom = LL_RESERVED_SPACE(tdev) + sizeof(struct ipv6hdr);
981
982 if (skb_headroom(skb) < max_headroom
983 || skb_cloned(skb) || skb_shared(skb)) {
984 struct sk_buff *new_skb =
985 skb_realloc_headroom(skb, max_headroom);
986 if (!new_skb) {
Changli Gaod8d1f302010-06-10 23:31:35 -0700987 dst_release(&rt->dst);
Julius Volzb3cdd2a72008-09-02 15:55:45 +0200988 kfree_skb(skb);
Hannes Eder1e3e2382009-08-02 11:05:41 +0000989 IP_VS_ERR_RL("%s(): no memory\n", __func__);
Julius Volzb3cdd2a72008-09-02 15:55:45 +0200990 return NF_STOLEN;
991 }
Eric Dumazet5d0ba552012-06-04 01:17:19 +0000992 consume_skb(skb);
Julius Volzb3cdd2a72008-09-02 15:55:45 +0200993 skb = new_skb;
994 old_iph = ipv6_hdr(skb);
995 }
996
Hans Schillstrom714f0952010-10-19 10:38:48 +0200997 skb->transport_header = skb->network_header;
Julius Volzb3cdd2a72008-09-02 15:55:45 +0200998
999 skb_push(skb, sizeof(struct ipv6hdr));
1000 skb_reset_network_header(skb);
1001 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
1002
1003 /* drop old route */
Eric Dumazetadf30902009-06-02 05:19:30 +00001004 skb_dst_drop(skb);
Changli Gaod8d1f302010-06-10 23:31:35 -07001005 skb_dst_set(skb, &rt->dst);
Julius Volzb3cdd2a72008-09-02 15:55:45 +02001006
1007 /*
1008 * Push down and install the IPIP header.
1009 */
1010 iph = ipv6_hdr(skb);
1011 iph->version = 6;
1012 iph->nexthdr = IPPROTO_IPV6;
Harvey Harrisonb7b45f42008-11-10 16:46:06 -08001013 iph->payload_len = old_iph->payload_len;
1014 be16_add_cpu(&iph->payload_len, sizeof(*old_iph));
Julius Volzb3cdd2a72008-09-02 15:55:45 +02001015 iph->priority = old_iph->priority;
1016 memset(&iph->flow_lbl, 0, sizeof(iph->flow_lbl));
Alexey Dobriyan4e3fd7a2011-11-21 03:39:03 +00001017 iph->daddr = cp->daddr.in6;
1018 iph->saddr = saddr;
Julius Volzb3cdd2a72008-09-02 15:55:45 +02001019 iph->hop_limit = old_iph->hop_limit;
1020
1021 /* Another hack: avoid icmp_send in ip_fragment */
1022 skb->local_df = 1;
1023
Julian Anastasovf4bc17c2010-09-21 17:35:41 +02001024 ret = IP_VS_XMIT_TUNNEL(skb, cp);
1025 if (ret == NF_ACCEPT)
1026 ip6_local_out(skb);
1027 else if (ret == NF_DROP)
1028 kfree_skb(skb);
Julius Volzb3cdd2a72008-09-02 15:55:45 +02001029
1030 LeaveFunction(10);
1031
1032 return NF_STOLEN;
1033
1034tx_error_icmp:
1035 dst_link_failure(skb);
1036tx_error:
1037 kfree_skb(skb);
1038 LeaveFunction(10);
1039 return NF_STOLEN;
Julian Anastasovfc604762010-10-17 16:38:15 +03001040tx_error_put:
1041 dst_release(&rt->dst);
1042 goto tx_error;
Julius Volzb3cdd2a72008-09-02 15:55:45 +02001043}
1044#endif
1045
Linus Torvalds1da177e2005-04-16 15:20:36 -07001046
1047/*
1048 * Direct Routing transmitter
1049 * Used for ANY protocol
1050 */
1051int
1052ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
1053 struct ip_vs_protocol *pp)
1054{
1055 struct rtable *rt; /* Route to the other host */
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -07001056 struct iphdr *iph = ip_hdr(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001057 int mtu;
1058
1059 EnterFunction(10);
1060
Julian Anastasovfc604762010-10-17 16:38:15 +03001061 if (!(rt = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip,
Changli Gao17a8f8e2011-02-24 08:19:57 +08001062 RT_TOS(iph->tos),
1063 IP_VS_RT_MODE_LOCAL |
Julian Anastasovc92f5ca2011-05-10 12:46:05 +00001064 IP_VS_RT_MODE_NON_LOCAL, NULL)))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001065 goto tx_error_icmp;
Julian Anastasovfc604762010-10-17 16:38:15 +03001066 if (rt->rt_flags & RTCF_LOCAL) {
1067 ip_rt_put(rt);
1068 IP_VS_XMIT(NFPROTO_IPV4, skb, cp, 1);
1069 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001070
1071 /* MTU checking */
Changli Gaod8d1f302010-06-10 23:31:35 -07001072 mtu = dst_mtu(&rt->dst);
Simon Horman8f1b03a2010-11-09 10:08:49 +09001073 if ((iph->frag_off & htons(IP_DF)) && skb->len > mtu &&
1074 !skb_is_gso(skb)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001075 icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
1076 ip_rt_put(rt);
Hannes Eder1e3e2382009-08-02 11:05:41 +00001077 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001078 goto tx_error;
1079 }
1080
1081 /*
1082 * Call ip_send_check because we are not sure it is called
1083 * after ip_defrag. Is copy-on-write needed?
1084 */
1085 if (unlikely((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL)) {
1086 ip_rt_put(rt);
1087 return NF_STOLEN;
1088 }
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -07001089 ip_send_check(ip_hdr(skb));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001090
1091 /* drop old route */
Eric Dumazetadf30902009-06-02 05:19:30 +00001092 skb_dst_drop(skb);
Changli Gaod8d1f302010-06-10 23:31:35 -07001093 skb_dst_set(skb, &rt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001094
1095 /* Another hack: avoid icmp_send in ip_fragment */
1096 skb->local_df = 1;
1097
Julian Anastasovfc604762010-10-17 16:38:15 +03001098 IP_VS_XMIT(NFPROTO_IPV4, skb, cp, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001099
1100 LeaveFunction(10);
1101 return NF_STOLEN;
1102
1103 tx_error_icmp:
1104 dst_link_failure(skb);
1105 tx_error:
1106 kfree_skb(skb);
1107 LeaveFunction(10);
1108 return NF_STOLEN;
1109}
1110
Julius Volzb3cdd2a72008-09-02 15:55:45 +02001111#ifdef CONFIG_IP_VS_IPV6
1112int
1113ip_vs_dr_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
1114 struct ip_vs_protocol *pp)
1115{
1116 struct rt6_info *rt; /* Route to the other host */
1117 int mtu;
1118
1119 EnterFunction(10);
1120
Julian Anastasovfc604762010-10-17 16:38:15 +03001121 if (!(rt = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6, NULL,
David S. Millere58b3442011-05-12 18:22:34 -04001122 0, (IP_VS_RT_MODE_LOCAL |
1123 IP_VS_RT_MODE_NON_LOCAL))))
Julius Volzb3cdd2a72008-09-02 15:55:45 +02001124 goto tx_error_icmp;
Julian Anastasovfc604762010-10-17 16:38:15 +03001125 if (__ip_vs_is_local_route6(rt)) {
1126 dst_release(&rt->dst);
1127 IP_VS_XMIT(NFPROTO_IPV6, skb, cp, 1);
1128 }
Julius Volzb3cdd2a72008-09-02 15:55:45 +02001129
1130 /* MTU checking */
Changli Gaod8d1f302010-06-10 23:31:35 -07001131 mtu = dst_mtu(&rt->dst);
Jesper Dangaard Brouer590e3f72012-08-28 22:05:51 +02001132 if (__mtu_check_toobig_v6(skb, mtu)) {
Julian Anastasovcb591552010-10-17 16:40:51 +03001133 if (!skb->dev) {
1134 struct net *net = dev_net(skb_dst(skb)->dev);
1135
1136 skb->dev = net->loopback_dev;
1137 }
Alexey Dobriyan3ffe5332010-02-18 08:25:24 +00001138 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
Changli Gaod8d1f302010-06-10 23:31:35 -07001139 dst_release(&rt->dst);
Hannes Eder1e3e2382009-08-02 11:05:41 +00001140 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
Julius Volzb3cdd2a72008-09-02 15:55:45 +02001141 goto tx_error;
1142 }
1143
1144 /*
1145 * Call ip_send_check because we are not sure it is called
1146 * after ip_defrag. Is copy-on-write needed?
1147 */
1148 skb = skb_share_check(skb, GFP_ATOMIC);
1149 if (unlikely(skb == NULL)) {
Changli Gaod8d1f302010-06-10 23:31:35 -07001150 dst_release(&rt->dst);
Julius Volzb3cdd2a72008-09-02 15:55:45 +02001151 return NF_STOLEN;
1152 }
1153
1154 /* drop old route */
Eric Dumazetadf30902009-06-02 05:19:30 +00001155 skb_dst_drop(skb);
Changli Gaod8d1f302010-06-10 23:31:35 -07001156 skb_dst_set(skb, &rt->dst);
Julius Volzb3cdd2a72008-09-02 15:55:45 +02001157
1158 /* Another hack: avoid icmp_send in ip_fragment */
1159 skb->local_df = 1;
1160
Julian Anastasovfc604762010-10-17 16:38:15 +03001161 IP_VS_XMIT(NFPROTO_IPV6, skb, cp, 0);
Julius Volzb3cdd2a72008-09-02 15:55:45 +02001162
1163 LeaveFunction(10);
1164 return NF_STOLEN;
1165
1166tx_error_icmp:
1167 dst_link_failure(skb);
1168tx_error:
1169 kfree_skb(skb);
1170 LeaveFunction(10);
1171 return NF_STOLEN;
1172}
1173#endif
1174
Linus Torvalds1da177e2005-04-16 15:20:36 -07001175
1176/*
1177 * ICMP packet transmitter
1178 * called by the ip_vs_in_icmp
1179 */
1180int
1181ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
Julian Anastasovc92f5ca2011-05-10 12:46:05 +00001182 struct ip_vs_protocol *pp, int offset, unsigned int hooknum)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001183{
1184 struct rtable *rt; /* Route to the other host */
1185 int mtu;
1186 int rc;
Julian Anastasovfc604762010-10-17 16:38:15 +03001187 int local;
Julian Anastasovc92f5ca2011-05-10 12:46:05 +00001188 int rt_mode;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001189
1190 EnterFunction(10);
1191
1192 /* The ICMP packet for VS/TUN, VS/DR and LOCALNODE will be
1193 forwarded directly here, because there is no need to
1194 translate address/port back */
1195 if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) {
1196 if (cp->packet_xmit)
1197 rc = cp->packet_xmit(skb, cp, pp);
1198 else
1199 rc = NF_ACCEPT;
1200 /* do not touch skb anymore */
1201 atomic_inc(&cp->in_pkts);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001202 goto out;
1203 }
1204
1205 /*
1206 * mangle and send the packet here (only for VS/NAT)
1207 */
1208
Julian Anastasovc92f5ca2011-05-10 12:46:05 +00001209 /* LOCALNODE from FORWARD hook is not supported */
1210 rt_mode = (hooknum != NF_INET_FORWARD) ?
1211 IP_VS_RT_MODE_LOCAL | IP_VS_RT_MODE_NON_LOCAL |
1212 IP_VS_RT_MODE_RDR : IP_VS_RT_MODE_NON_LOCAL;
Julian Anastasovfc604762010-10-17 16:38:15 +03001213 if (!(rt = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip,
Changli Gao17a8f8e2011-02-24 08:19:57 +08001214 RT_TOS(ip_hdr(skb)->tos),
Julian Anastasovc92f5ca2011-05-10 12:46:05 +00001215 rt_mode, NULL)))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001216 goto tx_error_icmp;
Julian Anastasovfc604762010-10-17 16:38:15 +03001217 local = rt->rt_flags & RTCF_LOCAL;
1218
1219 /*
1220 * Avoid duplicate tuple in reply direction for NAT traffic
1221 * to local address when connection is sync-ed
1222 */
Igor Maravićc0cd1152011-12-12 02:58:24 +00001223#if IS_ENABLED(CONFIG_NF_CONNTRACK)
Julian Anastasovfc604762010-10-17 16:38:15 +03001224 if (cp->flags & IP_VS_CONN_F_SYNC && local) {
1225 enum ip_conntrack_info ctinfo;
1226 struct nf_conn *ct = ct = nf_ct_get(skb, &ctinfo);
1227
1228 if (ct && !nf_ct_is_untracked(ct)) {
1229 IP_VS_DBG(10, "%s(): "
1230 "stopping DNAT to local address %pI4\n",
1231 __func__, &cp->daddr.ip);
1232 goto tx_error_put;
1233 }
1234 }
1235#endif
1236
1237 /* From world but DNAT to loopback address? */
Julian Anastasovc92f5ca2011-05-10 12:46:05 +00001238 if (local && ipv4_is_loopback(cp->daddr.ip) &&
David S. Millerc7537962010-11-11 17:07:48 -08001239 rt_is_input_route(skb_rtable(skb))) {
Julian Anastasovfc604762010-10-17 16:38:15 +03001240 IP_VS_DBG(1, "%s(): "
1241 "stopping DNAT to loopback %pI4\n",
1242 __func__, &cp->daddr.ip);
1243 goto tx_error_put;
1244 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001245
1246 /* MTU checking */
Changli Gaod8d1f302010-06-10 23:31:35 -07001247 mtu = dst_mtu(&rt->dst);
Simon Horman8f1b03a2010-11-09 10:08:49 +09001248 if ((skb->len > mtu) && (ip_hdr(skb)->frag_off & htons(IP_DF)) &&
1249 !skb_is_gso(skb)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001250 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
Hannes Eder1e3e2382009-08-02 11:05:41 +00001251 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
Julian Anastasovfc604762010-10-17 16:38:15 +03001252 goto tx_error_put;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001253 }
1254
1255 /* copy-on-write the packet before mangling it */
Herbert Xuaf1e1cf2007-10-14 00:39:33 -07001256 if (!skb_make_writable(skb, offset))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001257 goto tx_error_put;
1258
Changli Gaod8d1f302010-06-10 23:31:35 -07001259 if (skb_cow(skb, rt->dst.dev->hard_header_len))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001260 goto tx_error_put;
1261
Linus Torvalds1da177e2005-04-16 15:20:36 -07001262 ip_vs_nat_icmp(skb, pp, cp, 0);
1263
Julian Anastasovfc604762010-10-17 16:38:15 +03001264 if (!local) {
1265 /* drop the old route when skb is not shared */
1266 skb_dst_drop(skb);
1267 skb_dst_set(skb, &rt->dst);
1268 } else {
1269 ip_rt_put(rt);
1270 /*
1271 * Some IPv4 replies get local address from routes,
1272 * not from iph, so while we DNAT after routing
1273 * we need this second input/output route.
1274 */
1275 if (!__ip_vs_reroute_locally(skb))
1276 goto tx_error;
1277 }
1278
Linus Torvalds1da177e2005-04-16 15:20:36 -07001279 /* Another hack: avoid icmp_send in ip_fragment */
1280 skb->local_df = 1;
1281
Julian Anastasovfc604762010-10-17 16:38:15 +03001282 IP_VS_XMIT_NAT(NFPROTO_IPV4, skb, cp, local);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001283
1284 rc = NF_STOLEN;
1285 goto out;
1286
1287 tx_error_icmp:
1288 dst_link_failure(skb);
1289 tx_error:
1290 dev_kfree_skb(skb);
1291 rc = NF_STOLEN;
1292 out:
1293 LeaveFunction(10);
1294 return rc;
1295 tx_error_put:
1296 ip_rt_put(rt);
1297 goto tx_error;
1298}
Julius Volzb3cdd2a72008-09-02 15:55:45 +02001299
1300#ifdef CONFIG_IP_VS_IPV6
1301int
1302ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
Julian Anastasovc92f5ca2011-05-10 12:46:05 +00001303 struct ip_vs_protocol *pp, int offset, unsigned int hooknum)
Julius Volzb3cdd2a72008-09-02 15:55:45 +02001304{
1305 struct rt6_info *rt; /* Route to the other host */
1306 int mtu;
1307 int rc;
Julian Anastasovfc604762010-10-17 16:38:15 +03001308 int local;
Julian Anastasovc92f5ca2011-05-10 12:46:05 +00001309 int rt_mode;
Julius Volzb3cdd2a72008-09-02 15:55:45 +02001310
1311 EnterFunction(10);
1312
1313 /* The ICMP packet for VS/TUN, VS/DR and LOCALNODE will be
1314 forwarded directly here, because there is no need to
1315 translate address/port back */
1316 if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) {
1317 if (cp->packet_xmit)
1318 rc = cp->packet_xmit(skb, cp, pp);
1319 else
1320 rc = NF_ACCEPT;
1321 /* do not touch skb anymore */
1322 atomic_inc(&cp->in_pkts);
1323 goto out;
1324 }
1325
1326 /*
1327 * mangle and send the packet here (only for VS/NAT)
1328 */
1329
Julian Anastasovc92f5ca2011-05-10 12:46:05 +00001330 /* LOCALNODE from FORWARD hook is not supported */
1331 rt_mode = (hooknum != NF_INET_FORWARD) ?
1332 IP_VS_RT_MODE_LOCAL | IP_VS_RT_MODE_NON_LOCAL |
1333 IP_VS_RT_MODE_RDR : IP_VS_RT_MODE_NON_LOCAL;
Julian Anastasovfc604762010-10-17 16:38:15 +03001334 if (!(rt = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6, NULL,
Julian Anastasovc92f5ca2011-05-10 12:46:05 +00001335 0, rt_mode)))
Julius Volzb3cdd2a72008-09-02 15:55:45 +02001336 goto tx_error_icmp;
1337
Julian Anastasovfc604762010-10-17 16:38:15 +03001338 local = __ip_vs_is_local_route6(rt);
1339 /*
1340 * Avoid duplicate tuple in reply direction for NAT traffic
1341 * to local address when connection is sync-ed
1342 */
Igor Maravićc0cd1152011-12-12 02:58:24 +00001343#if IS_ENABLED(CONFIG_NF_CONNTRACK)
Julian Anastasovfc604762010-10-17 16:38:15 +03001344 if (cp->flags & IP_VS_CONN_F_SYNC && local) {
1345 enum ip_conntrack_info ctinfo;
1346 struct nf_conn *ct = ct = nf_ct_get(skb, &ctinfo);
1347
1348 if (ct && !nf_ct_is_untracked(ct)) {
1349 IP_VS_DBG(10, "%s(): "
1350 "stopping DNAT to local address %pI6\n",
1351 __func__, &cp->daddr.in6);
1352 goto tx_error_put;
1353 }
1354 }
1355#endif
1356
1357 /* From world but DNAT to loopback address? */
1358 if (local && skb->dev && !(skb->dev->flags & IFF_LOOPBACK) &&
1359 ipv6_addr_type(&rt->rt6i_dst.addr) & IPV6_ADDR_LOOPBACK) {
1360 IP_VS_DBG(1, "%s(): "
1361 "stopping DNAT to loopback %pI6\n",
1362 __func__, &cp->daddr.in6);
1363 goto tx_error_put;
1364 }
1365
Julius Volzb3cdd2a72008-09-02 15:55:45 +02001366 /* MTU checking */
Changli Gaod8d1f302010-06-10 23:31:35 -07001367 mtu = dst_mtu(&rt->dst);
Jesper Dangaard Brouer590e3f72012-08-28 22:05:51 +02001368 if (__mtu_check_toobig_v6(skb, mtu)) {
Julian Anastasovcb591552010-10-17 16:40:51 +03001369 if (!skb->dev) {
1370 struct net *net = dev_net(skb_dst(skb)->dev);
1371
1372 skb->dev = net->loopback_dev;
1373 }
Alexey Dobriyan3ffe5332010-02-18 08:25:24 +00001374 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
Hannes Eder1e3e2382009-08-02 11:05:41 +00001375 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
Julian Anastasovfc604762010-10-17 16:38:15 +03001376 goto tx_error_put;
Julius Volzb3cdd2a72008-09-02 15:55:45 +02001377 }
1378
1379 /* copy-on-write the packet before mangling it */
1380 if (!skb_make_writable(skb, offset))
1381 goto tx_error_put;
1382
Changli Gaod8d1f302010-06-10 23:31:35 -07001383 if (skb_cow(skb, rt->dst.dev->hard_header_len))
Julius Volzb3cdd2a72008-09-02 15:55:45 +02001384 goto tx_error_put;
1385
Julius Volzb3cdd2a72008-09-02 15:55:45 +02001386 ip_vs_nat_icmp_v6(skb, pp, cp, 0);
1387
Julian Anastasovfc604762010-10-17 16:38:15 +03001388 if (!local || !skb->dev) {
1389 /* drop the old route when skb is not shared */
1390 skb_dst_drop(skb);
1391 skb_dst_set(skb, &rt->dst);
1392 } else {
1393 /* destined to loopback, do we need to change route? */
1394 dst_release(&rt->dst);
1395 }
1396
Julius Volzb3cdd2a72008-09-02 15:55:45 +02001397 /* Another hack: avoid icmp_send in ip_fragment */
1398 skb->local_df = 1;
1399
Julian Anastasovfc604762010-10-17 16:38:15 +03001400 IP_VS_XMIT_NAT(NFPROTO_IPV6, skb, cp, local);
Julius Volzb3cdd2a72008-09-02 15:55:45 +02001401
1402 rc = NF_STOLEN;
1403 goto out;
1404
1405tx_error_icmp:
1406 dst_link_failure(skb);
1407tx_error:
1408 dev_kfree_skb(skb);
1409 rc = NF_STOLEN;
1410out:
1411 LeaveFunction(10);
1412 return rc;
1413tx_error_put:
Changli Gaod8d1f302010-06-10 23:31:35 -07001414 dst_release(&rt->dst);
Julius Volzb3cdd2a72008-09-02 15:55:45 +02001415 goto tx_error;
1416}
1417#endif