Merge branch 'ipv6-gre-offloads'

Alexander Duyck says:

====================
Add support for offloads with IPv6 GRE tunnels

This patch series enables the use of segmentation and checksum offloads
with IPv6 based GRE tunnels.

In order to enable this series I had to make a change to
iptunnel_handle_offloads so that it would no longer free the skb.  This was
necessary as there were multiple paths in the IPv6 GRE code that required
the skb to still be present so it could be freed.  As it turned out I
believe this actually fixes a bug that was present in FOU/GUE based tunnels
anyway.

Below is a quick breakdown of the performance gains seen with a simple
netperf test passing traffic through a ip6gretap tunnel and then an i40e
interface:

Throughput Throughput  Local Local   Result
           Units       CPU   Service Tag
                       Util  Demand
                       %
3544.93    10^6bits/s  6.30  4.656   "before"
13081.75   10^6bits/s  3.75  0.752   "after"
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c
index a9fbf17..efbc7ce 100644
--- a/drivers/net/geneve.c
+++ b/drivers/net/geneve.c
@@ -696,16 +696,12 @@
 	min_headroom = LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len
 			+ GENEVE_BASE_HLEN + opt_len + sizeof(struct iphdr);
 	err = skb_cow_head(skb, min_headroom);
-	if (unlikely(err)) {
-		kfree_skb(skb);
+	if (unlikely(err))
 		goto free_rt;
-	}
 
-	skb = udp_tunnel_handle_offloads(skb, udp_sum);
-	if (IS_ERR(skb)) {
-		err = PTR_ERR(skb);
+	err = udp_tunnel_handle_offloads(skb, udp_sum);
+	if (err)
 		goto free_rt;
-	}
 
 	gnvh = (struct genevehdr *)__skb_push(skb, sizeof(*gnvh) + opt_len);
 	geneve_build_header(gnvh, tun_flags, vni, opt_len, opt);
@@ -733,16 +729,12 @@
 	min_headroom = LL_RESERVED_SPACE(dst->dev) + dst->header_len
 			+ GENEVE_BASE_HLEN + opt_len + sizeof(struct ipv6hdr);
 	err = skb_cow_head(skb, min_headroom);
-	if (unlikely(err)) {
-		kfree_skb(skb);
+	if (unlikely(err))
 		goto free_dst;
-	}
 
-	skb = udp_tunnel_handle_offloads(skb, udp_sum);
-	if (IS_ERR(skb)) {
-		err = PTR_ERR(skb);
+	err = udp_tunnel_handle_offloads(skb, udp_sum);
+	if (IS_ERR(skb))
 		goto free_dst;
-	}
 
 	gnvh = (struct genevehdr *)__skb_push(skb, sizeof(*gnvh) + opt_len);
 	geneve_build_header(gnvh, tun_flags, vni, opt_len, opt);
@@ -937,7 +929,7 @@
 		err = geneve_build_skb(rt, skb, key->tun_flags, vni,
 				       info->options_len, opts, flags, xnet);
 		if (unlikely(err))
-			goto err;
+			goto tx_error;
 
 		tos = ip_tunnel_ecn_encap(key->tos, iip, skb);
 		ttl = key->ttl;
@@ -946,7 +938,7 @@
 		err = geneve_build_skb(rt, skb, 0, geneve->vni,
 				       0, NULL, flags, xnet);
 		if (unlikely(err))
-			goto err;
+			goto tx_error;
 
 		tos = ip_tunnel_ecn_encap(fl4.flowi4_tos, iip, skb);
 		ttl = geneve->ttl;
@@ -964,7 +956,7 @@
 
 tx_error:
 	dev_kfree_skb(skb);
-err:
+
 	if (err == -ELOOP)
 		dev->stats.collisions++;
 	else if (err == -ENETUNREACH)
@@ -1026,7 +1018,7 @@
 					info->options_len, opts,
 					flags, xnet);
 		if (unlikely(err))
-			goto err;
+			goto tx_error;
 
 		prio = ip_tunnel_ecn_encap(key->tos, iip, skb);
 		ttl = key->ttl;
@@ -1035,7 +1027,7 @@
 		err = geneve6_build_skb(dst, skb, 0, geneve->vni,
 					0, NULL, flags, xnet);
 		if (unlikely(err))
-			goto err;
+			goto tx_error;
 
 		prio = ip_tunnel_ecn_encap(ip6_tclass(fl6.flowlabel),
 					   iip, skb);
@@ -1054,7 +1046,7 @@
 
 tx_error:
 	dev_kfree_skb(skb);
-err:
+
 	if (err == -ELOOP)
 		dev->stats.collisions++;
 	else if (err == -ENETUNREACH)
diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index a7112b3..c2e22c2 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -1797,9 +1797,9 @@
 	if (WARN_ON(!skb))
 		return -ENOMEM;
 
-	skb = iptunnel_handle_offloads(skb, type);
-	if (IS_ERR(skb))
-		return PTR_ERR(skb);
+	err = iptunnel_handle_offloads(skb, type);
+	if (err)
+		goto out_free;
 
 	vxh = (struct vxlanhdr *) __skb_push(skb, sizeof(*vxh));
 	vxh->vx_flags = VXLAN_HF_VNI;
diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h
index 9ae9fbb..6d79091 100644
--- a/include/net/ip_tunnels.h
+++ b/include/net/ip_tunnels.h
@@ -309,7 +309,7 @@
 struct metadata_dst *iptunnel_metadata_reply(struct metadata_dst *md,
 					     gfp_t flags);
 
-struct sk_buff *iptunnel_handle_offloads(struct sk_buff *skb, int gso_type_mask);
+int iptunnel_handle_offloads(struct sk_buff *skb, int gso_type_mask);
 
 static inline int iptunnel_pull_offloads(struct sk_buff *skb)
 {
diff --git a/include/net/udp_tunnel.h b/include/net/udp_tunnel.h
index 2dcf1de..4f54326 100644
--- a/include/net/udp_tunnel.h
+++ b/include/net/udp_tunnel.h
@@ -105,8 +105,7 @@
 				    __be16 flags, __be64 tunnel_id,
 				    int md_size);
 
-static inline struct sk_buff *udp_tunnel_handle_offloads(struct sk_buff *skb,
-							 bool udp_csum)
+static inline int udp_tunnel_handle_offloads(struct sk_buff *skb, bool udp_csum)
 {
 	int type = udp_csum ? SKB_GSO_UDP_TUNNEL_CSUM : SKB_GSO_UDP_TUNNEL;
 
diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c
index d039f8f..7ac5ec8 100644
--- a/net/ipv4/fou.c
+++ b/net/ipv4/fou.c
@@ -802,11 +802,11 @@
 	int type = e->flags & TUNNEL_ENCAP_FLAG_CSUM ? SKB_GSO_UDP_TUNNEL_CSUM :
 						       SKB_GSO_UDP_TUNNEL;
 	__be16 sport;
+	int err;
 
-	skb = iptunnel_handle_offloads(skb, type);
-
-	if (IS_ERR(skb))
-		return PTR_ERR(skb);
+	err = iptunnel_handle_offloads(skb, type);
+	if (err)
+		return err;
 
 	sport = e->sport ? : udp_flow_src_port(dev_net(skb->dev),
 					       skb, 0, 0, false);
@@ -826,6 +826,7 @@
 	__be16 sport;
 	void *data;
 	bool need_priv = false;
+	int err;
 
 	if ((e->flags & TUNNEL_ENCAP_FLAG_REMCSUM) &&
 	    skb->ip_summed == CHECKSUM_PARTIAL) {
@@ -836,10 +837,9 @@
 
 	optlen += need_priv ? GUE_LEN_PRIV : 0;
 
-	skb = iptunnel_handle_offloads(skb, type);
-
-	if (IS_ERR(skb))
-		return PTR_ERR(skb);
+	err = iptunnel_handle_offloads(skb, type);
+	if (err)
+		return err;
 
 	/* Get source port (based on flow hash) before skb_push */
 	sport = e->sport ? : udp_flow_src_port(dev_net(skb->dev),
diff --git a/net/ipv4/gre_offload.c b/net/ipv4/gre_offload.c
index 20557f2..e88190a 100644
--- a/net/ipv4/gre_offload.c
+++ b/net/ipv4/gre_offload.c
@@ -292,6 +292,18 @@
 
 static int __init gre_offload_init(void)
 {
-	return inet_add_offload(&gre_offload, IPPROTO_GRE);
+	int err;
+
+	err = inet_add_offload(&gre_offload, IPPROTO_GRE);
+#if IS_ENABLED(CONFIG_IPV6)
+	if (err)
+		return err;
+
+	err = inet6_add_offload(&gre_offload, IPPROTO_GRE);
+	if (err)
+		inet_del_offload(&gre_offload, IPPROTO_GRE);
+#endif
+
+	return err;
 }
 device_initcall(gre_offload_init);
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index af5d1f3..eedd829 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -500,8 +500,7 @@
 	ip_tunnel_xmit(skb, dev, tnl_params, tnl_params->protocol);
 }
 
-static struct sk_buff *gre_handle_offloads(struct sk_buff *skb,
-					   bool csum)
+static int gre_handle_offloads(struct sk_buff *skb, bool csum)
 {
 	return iptunnel_handle_offloads(skb, csum ? SKB_GSO_GRE_CSUM : SKB_GSO_GRE);
 }
@@ -568,11 +567,8 @@
 	}
 
 	/* Push Tunnel header. */
-	skb = gre_handle_offloads(skb, !!(tun_info->key.tun_flags & TUNNEL_CSUM));
-	if (IS_ERR(skb)) {
-		skb = NULL;
+	if (gre_handle_offloads(skb, !!(tun_info->key.tun_flags & TUNNEL_CSUM)))
 		goto err_free_rt;
-	}
 
 	flags = tun_info->key.tun_flags & (TUNNEL_CSUM | TUNNEL_KEY);
 	build_header(skb, tunnel_hlen, flags, htons(ETH_P_TEB),
@@ -640,16 +636,14 @@
 		tnl_params = &tunnel->parms.iph;
 	}
 
-	skb = gre_handle_offloads(skb, !!(tunnel->parms.o_flags&TUNNEL_CSUM));
-	if (IS_ERR(skb))
-		goto out;
+	if (gre_handle_offloads(skb, !!(tunnel->parms.o_flags & TUNNEL_CSUM)))
+		goto free_skb;
 
 	__gre_xmit(skb, dev, tnl_params, skb->protocol);
 	return NETDEV_TX_OK;
 
 free_skb:
 	kfree_skb(skb);
-out:
 	dev->stats.tx_dropped++;
 	return NETDEV_TX_OK;
 }
@@ -664,9 +658,8 @@
 		return NETDEV_TX_OK;
 	}
 
-	skb = gre_handle_offloads(skb, !!(tunnel->parms.o_flags&TUNNEL_CSUM));
-	if (IS_ERR(skb))
-		goto out;
+	if (gre_handle_offloads(skb, !!(tunnel->parms.o_flags & TUNNEL_CSUM)))
+		goto free_skb;
 
 	if (skb_cow_head(skb, dev->needed_headroom))
 		goto free_skb;
@@ -676,7 +669,6 @@
 
 free_skb:
 	kfree_skb(skb);
-out:
 	dev->stats.tx_dropped++;
 	return NETDEV_TX_OK;
 }
diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c
index 43445df..f46c5c8 100644
--- a/net/ipv4/ip_tunnel_core.c
+++ b/net/ipv4/ip_tunnel_core.c
@@ -146,8 +146,8 @@
 }
 EXPORT_SYMBOL_GPL(iptunnel_metadata_reply);
 
-struct sk_buff *iptunnel_handle_offloads(struct sk_buff *skb,
-					 int gso_type_mask)
+int iptunnel_handle_offloads(struct sk_buff *skb,
+			     int gso_type_mask)
 {
 	int err;
 
@@ -159,9 +159,9 @@
 	if (skb_is_gso(skb)) {
 		err = skb_unclone(skb, GFP_ATOMIC);
 		if (unlikely(err))
-			goto error;
+			return err;
 		skb_shinfo(skb)->gso_type |= gso_type_mask;
-		return skb;
+		return 0;
 	}
 
 	if (skb->ip_summed != CHECKSUM_PARTIAL) {
@@ -174,10 +174,7 @@
 		skb->encapsulation = 0;
 	}
 
-	return skb;
-error:
-	kfree_skb(skb);
-	return ERR_PTR(err);
+	return 0;
 }
 EXPORT_SYMBOL_GPL(iptunnel_handle_offloads);
 
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index ec51d02..9282748 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -219,9 +219,8 @@
 	if (unlikely(skb->protocol != htons(ETH_P_IP)))
 		goto tx_error;
 
-	skb = iptunnel_handle_offloads(skb, SKB_GSO_IPIP);
-	if (IS_ERR(skb))
-		goto out;
+	if (iptunnel_handle_offloads(skb, SKB_GSO_IPIP))
+		goto tx_error;
 
 	skb_set_inner_ipproto(skb, IPPROTO_IPIP);
 
@@ -230,7 +229,7 @@
 
 tx_error:
 	kfree_skb(skb);
-out:
+
 	dev->stats.tx_errors++;
 	return NETDEV_TX_OK;
 }
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index 4e636e6..ca5a2c5 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -598,6 +598,18 @@
 	opt->ops.opt_nflen = 8;
 }
 
+static __sum16 gre6_checksum(struct sk_buff *skb)
+{
+	__wsum csum;
+
+	if (skb->ip_summed == CHECKSUM_PARTIAL)
+		csum = lco_csum(skb);
+	else
+		csum = skb_checksum(skb, sizeof(struct ipv6hdr),
+				    skb->len - sizeof(struct ipv6hdr), 0);
+	return csum_fold(csum);
+}
+
 static netdev_tx_t ip6gre_xmit2(struct sk_buff *skb,
 			 struct net_device *dev,
 			 __u8 dsfield,
@@ -609,7 +621,7 @@
 	struct net *net = tunnel->net;
 	struct net_device *tdev;    /* Device to other host */
 	struct ipv6hdr  *ipv6h;     /* Our new IP header */
-	unsigned int max_headroom = 0; /* The extra header space needed */
+	unsigned int min_headroom = 0; /* The extra header space needed */
 	int    gre_hlen;
 	struct ipv6_tel_txoption opt;
 	int    mtu;
@@ -617,7 +629,6 @@
 	struct net_device_stats *stats = &tunnel->dev->stats;
 	int err = -1;
 	u8 proto;
-	struct sk_buff *new_skb;
 	__be16 protocol;
 
 	if (dev->type == ARPHRD_ETHER)
@@ -660,14 +671,14 @@
 
 	mtu = dst_mtu(dst) - sizeof(*ipv6h);
 	if (encap_limit >= 0) {
-		max_headroom += 8;
+		min_headroom += 8;
 		mtu -= 8;
 	}
 	if (mtu < IPV6_MIN_MTU)
 		mtu = IPV6_MIN_MTU;
 	if (skb_dst(skb))
 		skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
-	if (skb->len > mtu) {
+	if (skb->len > mtu && !skb_is_gso(skb)) {
 		*pmtu = mtu;
 		err = -EMSGSIZE;
 		goto tx_err_dst_release;
@@ -685,20 +696,19 @@
 
 	skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(dev)));
 
-	max_headroom += LL_RESERVED_SPACE(tdev) + gre_hlen + dst->header_len;
+	min_headroom += LL_RESERVED_SPACE(tdev) + gre_hlen + dst->header_len;
 
-	if (skb_headroom(skb) < max_headroom || skb_shared(skb) ||
-	    (skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
-		new_skb = skb_realloc_headroom(skb, max_headroom);
-		if (max_headroom > dev->needed_headroom)
-			dev->needed_headroom = max_headroom;
-		if (!new_skb)
+	if (skb_headroom(skb) < min_headroom || skb_header_cloned(skb)) {
+		int head_delta = SKB_DATA_ALIGN(min_headroom -
+						skb_headroom(skb) +
+						16);
+
+		err = pskb_expand_head(skb, max_t(int, head_delta, 0),
+				       0, GFP_ATOMIC);
+		if (min_headroom > dev->needed_headroom)
+			dev->needed_headroom = min_headroom;
+		if (unlikely(err))
 			goto tx_err_dst_release;
-
-		if (skb->sk)
-			skb_set_owner_w(new_skb, skb->sk);
-		consume_skb(skb);
-		skb = new_skb;
 	}
 
 	if (!fl6->flowi6_mark && ndst)
@@ -711,10 +721,11 @@
 		ipv6_push_nfrag_opts(skb, &opt.ops, &proto, NULL);
 	}
 
-	if (likely(!skb->encapsulation)) {
-		skb_reset_inner_headers(skb);
-		skb->encapsulation = 1;
-	}
+	err = iptunnel_handle_offloads(skb,
+				       (tunnel->parms.o_flags & GRE_CSUM) ?
+				       SKB_GSO_GRE_CSUM : SKB_GSO_GRE);
+	if (err)
+		goto tx_err_dst_release;
 
 	skb_push(skb, gre_hlen);
 	skb_reset_network_header(skb);
@@ -748,10 +759,11 @@
 			*ptr = tunnel->parms.o_key;
 			ptr--;
 		}
-		if (tunnel->parms.o_flags&GRE_CSUM) {
+		if ((tunnel->parms.o_flags & GRE_CSUM) &&
+		    !(skb_shinfo(skb)->gso_type &
+		      (SKB_GSO_GRE | SKB_GSO_GRE_CSUM))) {
 			*ptr = 0;
-			*(__sum16 *)ptr = ip_compute_csum((void *)(ipv6h+1),
-				skb->len - sizeof(struct ipv6hdr));
+			*(__sum16 *)ptr = gre6_checksum(skb);
 		}
 	}
 
@@ -987,6 +999,8 @@
 				dev->mtu = rt->dst.dev->mtu - addend;
 				if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
 					dev->mtu -= 8;
+				if (dev->type == ARPHRD_ETHER)
+					dev->mtu -= ETH_HLEN;
 
 				if (dev->mtu < IPV6_MIN_MTU)
 					dev->mtu = IPV6_MIN_MTU;
@@ -1505,6 +1519,11 @@
 	.ndo_get_iflink = ip6_tnl_get_iflink,
 };
 
+#define GRE6_FEATURES (NETIF_F_SG |		\
+		       NETIF_F_FRAGLIST |	\
+		       NETIF_F_HIGHDMA |		\
+		       NETIF_F_HW_CSUM)
+
 static void ip6gre_tap_setup(struct net_device *dev)
 {
 
@@ -1538,9 +1557,21 @@
 	nt->net = dev_net(dev);
 	ip6gre_tnl_link_config(nt, !tb[IFLA_MTU]);
 
-	/* Can use a lockless transmit, unless we generate output sequences */
-	if (!(nt->parms.o_flags & GRE_SEQ))
+	dev->features		|= GRE6_FEATURES;
+	dev->hw_features	|= GRE6_FEATURES;
+
+	if (!(nt->parms.o_flags & GRE_SEQ)) {
+		/* TCP segmentation offload is not supported when we
+		 * generate output sequences.
+		 */
+		dev->features    |= NETIF_F_GSO_SOFTWARE;
+		dev->hw_features |= NETIF_F_GSO_SOFTWARE;
+
+		/* Can use a lockless transmit, unless we generate
+		 * output sequences
+		 */
 		dev->features |= NETIF_F_LLTX;
+	}
 
 	err = register_netdevice(dev);
 	if (err)
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 8338430..a13d8c1 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -913,10 +913,9 @@
 		goto tx_error;
 	}
 
-	skb = iptunnel_handle_offloads(skb, SKB_GSO_SIT);
-	if (IS_ERR(skb)) {
+	if (iptunnel_handle_offloads(skb, SKB_GSO_SIT)) {
 		ip_rt_put(rt);
-		goto out;
+		goto tx_error;
 	}
 
 	if (df) {
@@ -992,7 +991,6 @@
 	dst_link_failure(skb);
 tx_error:
 	kfree_skb(skb);
-out:
 	dev->stats.tx_errors++;
 	return NETDEV_TX_OK;
 }
@@ -1002,15 +1000,15 @@
 	struct ip_tunnel *tunnel = netdev_priv(dev);
 	const struct iphdr  *tiph = &tunnel->parms.iph;
 
-	skb = iptunnel_handle_offloads(skb, SKB_GSO_IPIP);
-	if (IS_ERR(skb))
-		goto out;
+	if (iptunnel_handle_offloads(skb, SKB_GSO_IPIP))
+		goto tx_error;
 
 	skb_set_inner_ipproto(skb, IPPROTO_IPIP);
 
 	ip_tunnel_xmit(skb, dev, tiph, IPPROTO_IPIP);
 	return NETDEV_TX_OK;
-out:
+tx_error:
+	kfree_skb(skb);
 	dev->stats.tx_errors++;
 	return NETDEV_TX_OK;
 }
diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c
index dc196a0..6d19d2e 100644
--- a/net/netfilter/ipvs/ip_vs_xmit.c
+++ b/net/netfilter/ipvs/ip_vs_xmit.c
@@ -1013,8 +1013,7 @@
 	if (IS_ERR(skb))
 		goto tx_error;
 
-	skb = iptunnel_handle_offloads(skb, __tun_gso_type_mask(AF_INET, cp->af));
-	if (IS_ERR(skb))
+	if (iptunnel_handle_offloads(skb, __tun_gso_type_mask(AF_INET, cp->af)))
 		goto tx_error;
 
 	skb->transport_header = skb->network_header;
@@ -1105,8 +1104,7 @@
 	if (IS_ERR(skb))
 		goto tx_error;
 
-	skb = iptunnel_handle_offloads(skb, __tun_gso_type_mask(AF_INET6, cp->af));
-	if (IS_ERR(skb))
+	if (iptunnel_handle_offloads(skb, __tun_gso_type_mask(AF_INET6, cp->af)))
 		goto tx_error;
 
 	skb->transport_header = skb->network_header;