Merge branch 'lwt_arp'
Jiri Benc says:
====================
lwtunnel: make it really work, for IPv4
One of the selling points of lwtunnel was the ability to specify the tunnel
destination using routes. However, this doesn't really work currently, as
ARP and ndisc replies are not handled correctly. ARP and ndisc replies won't
have tunnel metadata attached, thus they will be sent out with the default
parameters or not sent at all, either way never reaching the requester.
Most of the egress tunnel parameters can be inferred from the ingress
metada. The only and important exception is UDP ports. This patchset infers
the egress data from the ingress data and disallow settings of UDP ports in
tunnel routes. If there's a need for different UDP ports, a new interface
needs to be created for each port combination. Note that it's still possible
to specify the UDP ports to use, it just needs to be done while creating the
vxlan/geneve interface.
This covers only ARPs. IPv6 ndisc has the same problem but is harder to
solve, as there's already dst attached to outgoing skbs. Ideas to solve this
are welcome.
====================
Signed-off-by: David S. Miller <davem@davemloft.net>
diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h
index 9a6a3ba..f6dafec 100644
--- a/include/net/ip_tunnels.h
+++ b/include/net/ip_tunnels.h
@@ -276,6 +276,8 @@
int iptunnel_xmit(struct sock *sk, struct rtable *rt, struct sk_buff *skb,
__be32 src, __be32 dst, u8 proto,
u8 tos, u8 ttl, __be16 df, bool xnet);
+struct metadata_dst *iptunnel_metadata_reply(struct metadata_dst *md,
+ gfp_t flags);
struct sk_buff *iptunnel_handle_offloads(struct sk_buff *skb, bool gre_csum,
int gso_type_mask);
diff --git a/include/uapi/linux/lwtunnel.h b/include/uapi/linux/lwtunnel.h
index 34141a5..f8b0188 100644
--- a/include/uapi/linux/lwtunnel.h
+++ b/include/uapi/linux/lwtunnel.h
@@ -21,8 +21,6 @@
LWTUNNEL_IP_SRC,
LWTUNNEL_IP_TTL,
LWTUNNEL_IP_TOS,
- LWTUNNEL_IP_SPORT,
- LWTUNNEL_IP_DPORT,
LWTUNNEL_IP_FLAGS,
__LWTUNNEL_IP_MAX,
};
@@ -36,8 +34,6 @@
LWTUNNEL_IP6_SRC,
LWTUNNEL_IP6_HOPLIMIT,
LWTUNNEL_IP6_TC,
- LWTUNNEL_IP6_SPORT,
- LWTUNNEL_IP6_DPORT,
LWTUNNEL_IP6_FLAGS,
__LWTUNNEL_IP6_MAX,
};
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index 30409b7..f03db8b 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -113,6 +113,8 @@
#include <net/arp.h>
#include <net/ax25.h>
#include <net/netrom.h>
+#include <net/dst_metadata.h>
+#include <net/ip_tunnels.h>
#include <linux/uaccess.h>
@@ -296,7 +298,8 @@
struct net_device *dev, __be32 src_ip,
const unsigned char *dest_hw,
const unsigned char *src_hw,
- const unsigned char *target_hw, struct sk_buff *oskb)
+ const unsigned char *target_hw,
+ struct dst_entry *dst)
{
struct sk_buff *skb;
@@ -309,9 +312,7 @@
if (!skb)
return;
- if (oskb)
- skb_dst_copy(skb, oskb);
-
+ skb_dst_set(skb, dst);
arp_xmit(skb);
}
@@ -333,6 +334,7 @@
__be32 target = *(__be32 *)neigh->primary_key;
int probes = atomic_read(&neigh->probes);
struct in_device *in_dev;
+ struct dst_entry *dst = NULL;
rcu_read_lock();
in_dev = __in_dev_get_rcu(dev);
@@ -381,9 +383,10 @@
}
}
+ if (skb && !(dev->priv_flags & IFF_XMIT_DST_RELEASE))
+ dst = dst_clone(skb_dst(skb));
arp_send_dst(ARPOP_REQUEST, ETH_P_ARP, target, dev, saddr,
- dst_hw, dev->dev_addr, NULL,
- dev->priv_flags & IFF_XMIT_DST_RELEASE ? NULL : skb);
+ dst_hw, dev->dev_addr, NULL, dst);
}
static int arp_ignore(struct in_device *in_dev, __be32 sip, __be32 tip)
@@ -649,6 +652,7 @@
int addr_type;
struct neighbour *n;
struct net *net = dev_net(dev);
+ struct dst_entry *reply_dst = NULL;
bool is_garp = false;
/* arp_rcv below verifies the ARP header and verifies the device
@@ -749,13 +753,18 @@
* cache.
*/
+ if (arp->ar_op == htons(ARPOP_REQUEST) && skb_metadata_dst(skb))
+ reply_dst = (struct dst_entry *)
+ iptunnel_metadata_reply(skb_metadata_dst(skb),
+ GFP_ATOMIC);
+
/* Special case: IPv4 duplicate address detection packet (RFC2131) */
if (sip == 0) {
if (arp->ar_op == htons(ARPOP_REQUEST) &&
inet_addr_type_dev_table(net, dev, tip) == RTN_LOCAL &&
!arp_ignore(in_dev, sip, tip))
- arp_send(ARPOP_REPLY, ETH_P_ARP, sip, dev, tip, sha,
- dev->dev_addr, sha);
+ arp_send_dst(ARPOP_REPLY, ETH_P_ARP, sip, dev, tip,
+ sha, dev->dev_addr, sha, reply_dst);
goto out;
}
@@ -774,9 +783,10 @@
if (!dont_send) {
n = neigh_event_ns(&arp_tbl, sha, &sip, dev);
if (n) {
- arp_send(ARPOP_REPLY, ETH_P_ARP, sip,
- dev, tip, sha, dev->dev_addr,
- sha);
+ arp_send_dst(ARPOP_REPLY, ETH_P_ARP,
+ sip, dev, tip, sha,
+ dev->dev_addr, sha,
+ reply_dst);
neigh_release(n);
}
}
@@ -794,9 +804,10 @@
if (NEIGH_CB(skb)->flags & LOCALLY_ENQUEUED ||
skb->pkt_type == PACKET_HOST ||
NEIGH_VAR(in_dev->arp_parms, PROXY_DELAY) == 0) {
- arp_send(ARPOP_REPLY, ETH_P_ARP, sip,
- dev, tip, sha, dev->dev_addr,
- sha);
+ arp_send_dst(ARPOP_REPLY, ETH_P_ARP,
+ sip, dev, tip, sha,
+ dev->dev_addr, sha,
+ reply_dst);
} else {
pneigh_enqueue(&arp_tbl,
in_dev->arp_parms, skb);
diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c
index 9b97204..84dce6a 100644
--- a/net/ipv4/ip_tunnel_core.c
+++ b/net/ipv4/ip_tunnel_core.c
@@ -46,6 +46,7 @@
#include <net/net_namespace.h>
#include <net/netns/generic.h>
#include <net/rtnetlink.h>
+#include <net/dst_metadata.h>
int iptunnel_xmit(struct sock *sk, struct rtable *rt, struct sk_buff *skb,
__be32 src, __be32 dst, __u8 proto,
@@ -119,6 +120,33 @@
}
EXPORT_SYMBOL_GPL(iptunnel_pull_header);
+struct metadata_dst *iptunnel_metadata_reply(struct metadata_dst *md,
+ gfp_t flags)
+{
+ struct metadata_dst *res;
+ struct ip_tunnel_info *dst, *src;
+
+ if (!md || md->u.tun_info.mode & IP_TUNNEL_INFO_TX)
+ return NULL;
+
+ res = metadata_dst_alloc(0, flags);
+ if (!res)
+ return NULL;
+
+ dst = &res->u.tun_info;
+ src = &md->u.tun_info;
+ dst->key.tun_id = src->key.tun_id;
+ if (src->mode & IP_TUNNEL_INFO_IPV6)
+ memcpy(&dst->key.u.ipv6.dst, &src->key.u.ipv6.src,
+ sizeof(struct in6_addr));
+ else
+ dst->key.u.ipv4.dst = src->key.u.ipv4.src;
+ dst->mode = src->mode | IP_TUNNEL_INFO_TX;
+
+ return res;
+}
+EXPORT_SYMBOL_GPL(iptunnel_metadata_reply);
+
struct sk_buff *iptunnel_handle_offloads(struct sk_buff *skb,
bool csum_help,
int gso_type_mask)
@@ -198,8 +226,6 @@
[LWTUNNEL_IP_SRC] = { .type = NLA_U32 },
[LWTUNNEL_IP_TTL] = { .type = NLA_U8 },
[LWTUNNEL_IP_TOS] = { .type = NLA_U8 },
- [LWTUNNEL_IP_SPORT] = { .type = NLA_U16 },
- [LWTUNNEL_IP_DPORT] = { .type = NLA_U16 },
[LWTUNNEL_IP_FLAGS] = { .type = NLA_U16 },
};
@@ -239,12 +265,6 @@
if (tb[LWTUNNEL_IP_TOS])
tun_info->key.tos = nla_get_u8(tb[LWTUNNEL_IP_TOS]);
- if (tb[LWTUNNEL_IP_SPORT])
- tun_info->key.tp_src = nla_get_be16(tb[LWTUNNEL_IP_SPORT]);
-
- if (tb[LWTUNNEL_IP_DPORT])
- tun_info->key.tp_dst = nla_get_be16(tb[LWTUNNEL_IP_DPORT]);
-
if (tb[LWTUNNEL_IP_FLAGS])
tun_info->key.tun_flags = nla_get_u16(tb[LWTUNNEL_IP_FLAGS]);
@@ -266,8 +286,6 @@
nla_put_be32(skb, LWTUNNEL_IP_SRC, tun_info->key.u.ipv4.src) ||
nla_put_u8(skb, LWTUNNEL_IP_TOS, tun_info->key.tos) ||
nla_put_u8(skb, LWTUNNEL_IP_TTL, tun_info->key.ttl) ||
- nla_put_u16(skb, LWTUNNEL_IP_SPORT, tun_info->key.tp_src) ||
- nla_put_u16(skb, LWTUNNEL_IP_DPORT, tun_info->key.tp_dst) ||
nla_put_u16(skb, LWTUNNEL_IP_FLAGS, tun_info->key.tun_flags))
return -ENOMEM;
@@ -281,8 +299,6 @@
+ nla_total_size(4) /* LWTUNNEL_IP_SRC */
+ nla_total_size(1) /* LWTUNNEL_IP_TOS */
+ nla_total_size(1) /* LWTUNNEL_IP_TTL */
- + nla_total_size(2) /* LWTUNNEL_IP_SPORT */
- + nla_total_size(2) /* LWTUNNEL_IP_DPORT */
+ nla_total_size(2); /* LWTUNNEL_IP_FLAGS */
}
@@ -305,8 +321,6 @@
[LWTUNNEL_IP6_SRC] = { .len = sizeof(struct in6_addr) },
[LWTUNNEL_IP6_HOPLIMIT] = { .type = NLA_U8 },
[LWTUNNEL_IP6_TC] = { .type = NLA_U8 },
- [LWTUNNEL_IP6_SPORT] = { .type = NLA_U16 },
- [LWTUNNEL_IP6_DPORT] = { .type = NLA_U16 },
[LWTUNNEL_IP6_FLAGS] = { .type = NLA_U16 },
};
@@ -346,12 +360,6 @@
if (tb[LWTUNNEL_IP6_TC])
tun_info->key.tos = nla_get_u8(tb[LWTUNNEL_IP6_TC]);
- if (tb[LWTUNNEL_IP6_SPORT])
- tun_info->key.tp_src = nla_get_be16(tb[LWTUNNEL_IP6_SPORT]);
-
- if (tb[LWTUNNEL_IP6_DPORT])
- tun_info->key.tp_dst = nla_get_be16(tb[LWTUNNEL_IP6_DPORT]);
-
if (tb[LWTUNNEL_IP6_FLAGS])
tun_info->key.tun_flags = nla_get_u16(tb[LWTUNNEL_IP6_FLAGS]);
@@ -373,8 +381,6 @@
nla_put_in6_addr(skb, LWTUNNEL_IP6_SRC, &tun_info->key.u.ipv6.src) ||
nla_put_u8(skb, LWTUNNEL_IP6_HOPLIMIT, tun_info->key.tos) ||
nla_put_u8(skb, LWTUNNEL_IP6_TC, tun_info->key.ttl) ||
- nla_put_u16(skb, LWTUNNEL_IP6_SPORT, tun_info->key.tp_src) ||
- nla_put_u16(skb, LWTUNNEL_IP6_DPORT, tun_info->key.tp_dst) ||
nla_put_u16(skb, LWTUNNEL_IP6_FLAGS, tun_info->key.tun_flags))
return -ENOMEM;
@@ -388,8 +394,6 @@
+ nla_total_size(16) /* LWTUNNEL_IP6_SRC */
+ nla_total_size(1) /* LWTUNNEL_IP6_HOPLIMIT */
+ nla_total_size(1) /* LWTUNNEL_IP6_TC */
- + nla_total_size(2) /* LWTUNNEL_IP6_SPORT */
- + nla_total_size(2) /* LWTUNNEL_IP6_DPORT */
+ nla_total_size(2); /* LWTUNNEL_IP6_FLAGS */
}