GRE: Disable segmentation offloads w/ CSUM and we are encapsulated via FOU

This patch fixes an issue I found in which we were dropping frames if we
had enabled checksums on GRE headers that were encapsulated by either FOU
or GUE.  Without this patch I was barely able to get 1 Gb/s of throughput.
With this patch applied I am now at least getting around 6 Gb/s.

The issue is due to the fact that with FOU or GUE applied we do not provide
a transport offset pointing to the GRE header, nor do we offload it in
software as the GRE header is completely skipped by GSO and treated like a
VXLAN or GENEVE type header.  As such we need to prevent the stack from
generating it and also prevent GRE from generating it via any interface we
create.

Fixes: c3483384ee511 ("gro: Allow tunnel stacking in the case of FOU/GUE")
Signed-off-by: Alexander Duyck <aduyck@mirantis.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index cb0d5d0..8395308 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2120,7 +2120,10 @@
 	/* Used in foo-over-udp, set in udp[46]_gro_receive */
 	u8	is_ipv6:1;
 
-	/* 7 bit hole */
+	/* Used in GRE, set in fou/gue_gro_receive */
+	u8	is_fou:1;
+
+	/* 6 bit hole */
 
 	/* used to support CHECKSUM_COMPLETE for tunneling protocols */
 	__wsum	csum;
diff --git a/net/core/dev.c b/net/core/dev.c
index b9bcbe7..77a71cd 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4439,6 +4439,7 @@
 		NAPI_GRO_CB(skb)->flush = 0;
 		NAPI_GRO_CB(skb)->free = 0;
 		NAPI_GRO_CB(skb)->encap_mark = 0;
+		NAPI_GRO_CB(skb)->is_fou = 0;
 		NAPI_GRO_CB(skb)->gro_remcsum_start = 0;
 
 		/* Setup for GRO checksum validation */
diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c
index 5a94aea..a39068b 100644
--- a/net/ipv4/fou.c
+++ b/net/ipv4/fou.c
@@ -203,6 +203,9 @@
 	 */
 	NAPI_GRO_CB(skb)->encap_mark = 0;
 
+	/* Flag this frame as already having an outer encap header */
+	NAPI_GRO_CB(skb)->is_fou = 1;
+
 	rcu_read_lock();
 	offloads = NAPI_GRO_CB(skb)->is_ipv6 ? inet6_offloads : inet_offloads;
 	ops = rcu_dereference(offloads[proto]);
@@ -368,6 +371,9 @@
 	 */
 	NAPI_GRO_CB(skb)->encap_mark = 0;
 
+	/* Flag this frame as already having an outer encap header */
+	NAPI_GRO_CB(skb)->is_fou = 1;
+
 	rcu_read_lock();
 	offloads = NAPI_GRO_CB(skb)->is_ipv6 ? inet6_offloads : inet_offloads;
 	ops = rcu_dereference(offloads[guehdr->proto_ctype]);
diff --git a/net/ipv4/gre_offload.c b/net/ipv4/gre_offload.c
index c47539d..6a5bd43 100644
--- a/net/ipv4/gre_offload.c
+++ b/net/ipv4/gre_offload.c
@@ -150,6 +150,14 @@
 	if ((greh->flags & ~(GRE_KEY|GRE_CSUM)) != 0)
 		goto out;
 
+	/* We can only support GRE_CSUM if we can track the location of
+	 * the GRE header.  In the case of FOU/GUE we cannot because the
+	 * outer UDP header displaces the GRE header leaving us in a state
+	 * of limbo.
+	 */
+	if ((greh->flags & GRE_CSUM) && NAPI_GRO_CB(skb)->is_fou)
+		goto out;
+
 	type = greh->protocol;
 
 	rcu_read_lock();
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 31936d3..af5d1f3 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -862,9 +862,16 @@
 	dev->hw_features	|= GRE_FEATURES;
 
 	if (!(tunnel->parms.o_flags & TUNNEL_SEQ)) {
-		/* TCP offload with GRE SEQ is not supported. */
-		dev->features    |= NETIF_F_GSO_SOFTWARE;
-		dev->hw_features |= NETIF_F_GSO_SOFTWARE;
+		/* TCP offload with GRE SEQ is not supported, nor
+		 * can we support 2 levels of outer headers requiring
+		 * an update.
+		 */
+		if (!(tunnel->parms.o_flags & TUNNEL_CSUM) ||
+		    (tunnel->encap.type == TUNNEL_ENCAP_NONE)) {
+			dev->features    |= NETIF_F_GSO_SOFTWARE;
+			dev->hw_features |= NETIF_F_GSO_SOFTWARE;
+		}
+
 		/* Can use a lockless transmit, unless we generate
 		 * output sequences
 		 */