Merge branch 'gro_tunnels'
Tom Herbert says:
====================
gro: Fixes for tunnels and GRO
This patch set addresses some issue related to tunneling and GRO:
- Fix remote checksum offload to properly deal with frag0 in GRO.
- Add support for GRO at VXLAN tunnel (call gro_cells)
Testing: Ran one netperf TCP_STREAM to highlight impact of different
configurations:
GUE
Zero UDP checksum
4628.42 MBps
UDP checksums enabled
6800.51 MBps
UDP checksums and remote checksum offload
7663.82 MBps
UDP checksums and remote checksum offload using no-partial
7287.25 MBps
VXLAN
Zero UDP checksum
4112.02
UDP checksums enabled
6785.80 MBps
UDP checksums and remote checksum offload
7075.56 MBps
v2:
- Drop "gro: Pull headers into skb head for 1st skb in gro list"
from patch set
- In vxlan_remcsum and gue_remcsum return immediately if remcsum
processing was already done
- Add gro callbacks for sit offload
- Use WARN_ON_ONCE if we get a GUE protocol that does not have
GRO offload support
v3:
- Don't restore gro callbacks for sit offload
====================
Signed-off-by: David S. Miller <davem@davemloft.net>
diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index 54615bb..61b457b 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -519,10 +519,10 @@
u32 data, struct gro_remcsum *grc,
bool nopartial)
{
- size_t start, offset, plen;
+ size_t start, offset;
if (skb->remcsum_offload)
- return NULL;
+ return vh;
if (!NAPI_GRO_CB(skb)->csum_valid)
return NULL;
@@ -532,17 +532,8 @@
offsetof(struct udphdr, check) :
offsetof(struct tcphdr, check));
- plen = hdrlen + offset + sizeof(u16);
-
- /* Pull checksum that will be written */
- if (skb_gro_header_hard(skb, off + plen)) {
- vh = skb_gro_header_slow(skb, off + plen, off);
- if (!vh)
- return NULL;
- }
-
- skb_gro_remcsum_process(skb, (void *)vh + hdrlen,
- start, offset, grc, nopartial);
+ vh = skb_gro_remcsum_process(skb, (void *)vh, off, hdrlen,
+ start, offset, grc, nopartial);
skb->remcsum_offload = 1;
@@ -573,7 +564,6 @@
goto out;
}
- skb_gro_pull(skb, sizeof(struct vxlanhdr)); /* pull vxlan header */
skb_gro_postpull_rcsum(skb, vh, sizeof(struct vxlanhdr));
flags = ntohl(vh->vx_flags);
@@ -588,6 +578,8 @@
goto out;
}
+ skb_gro_pull(skb, sizeof(struct vxlanhdr)); /* pull vxlan header */
+
flush = 0;
for (p = *head; p; p = p->next) {
@@ -1110,6 +1102,9 @@
{
size_t start, offset, plen;
+ if (skb->remcsum_offload)
+ return vh;
+
start = (data & VXLAN_RCO_MASK) << VXLAN_RCO_SHIFT;
offset = start + ((data & VXLAN_RCO_UDP) ?
offsetof(struct udphdr, check) :
@@ -1213,7 +1208,7 @@
stats->rx_bytes += skb->len;
u64_stats_update_end(&stats->syncp);
- netif_rx(skb);
+ gro_cells_receive(&vxlan->gro_cells, skb);
return;
drop:
@@ -2451,6 +2446,8 @@
vxlan->dev = dev;
+ gro_cells_init(&vxlan->gro_cells, dev);
+
for (h = 0; h < FDB_HASH_SIZE; ++h)
INIT_HLIST_HEAD(&vxlan->fdb_head[h]);
}
@@ -2890,6 +2887,7 @@
hlist_del_rcu(&vxlan->hlist);
spin_unlock(&vn->sock_lock);
+ gro_cells_destroy(&vxlan->gro_cells);
list_del(&vxlan->next);
unregister_netdevice_queue(dev, head);
}
@@ -3098,8 +3096,10 @@
/* If vxlan->dev is in the same netns, it has already been added
* to the list by the previous loop.
*/
- if (!net_eq(dev_net(vxlan->dev), net))
+ if (!net_eq(dev_net(vxlan->dev), net)) {
+ gro_cells_destroy(&vxlan->gro_cells);
unregister_netdevice_queue(vxlan->dev, &list);
+ }
}
unregister_netdevice_many(&list);
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 4bd177f..6abe0d6 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2311,8 +2311,7 @@
static inline bool skb_at_gro_remcsum_start(struct sk_buff *skb)
{
- return (NAPI_GRO_CB(skb)->gro_remcsum_start - skb_headroom(skb) ==
- skb_gro_offset(skb));
+ return (NAPI_GRO_CB(skb)->gro_remcsum_start == skb_gro_offset(skb));
}
static inline bool __skb_gro_checksum_validate_needed(struct sk_buff *skb,
@@ -2408,37 +2407,58 @@
grc->delta = 0;
}
-static inline void skb_gro_remcsum_process(struct sk_buff *skb, void *ptr,
- int start, int offset,
- struct gro_remcsum *grc,
- bool nopartial)
+static inline void *skb_gro_remcsum_process(struct sk_buff *skb, void *ptr,
+ unsigned int off, size_t hdrlen,
+ int start, int offset,
+ struct gro_remcsum *grc,
+ bool nopartial)
{
__wsum delta;
+ size_t plen = hdrlen + max_t(size_t, offset + sizeof(u16), start);
BUG_ON(!NAPI_GRO_CB(skb)->csum_valid);
if (!nopartial) {
- NAPI_GRO_CB(skb)->gro_remcsum_start =
- ((unsigned char *)ptr + start) - skb->head;
- return;
+ NAPI_GRO_CB(skb)->gro_remcsum_start = off + hdrlen + start;
+ return ptr;
}
- delta = remcsum_adjust(ptr, NAPI_GRO_CB(skb)->csum, start, offset);
+ ptr = skb_gro_header_fast(skb, off);
+ if (skb_gro_header_hard(skb, off + plen)) {
+ ptr = skb_gro_header_slow(skb, off + plen, off);
+ if (!ptr)
+ return NULL;
+ }
+
+ delta = remcsum_adjust(ptr + hdrlen, NAPI_GRO_CB(skb)->csum,
+ start, offset);
/* Adjust skb->csum since we changed the packet */
NAPI_GRO_CB(skb)->csum = csum_add(NAPI_GRO_CB(skb)->csum, delta);
- grc->offset = (ptr + offset) - (void *)skb->head;
+ grc->offset = off + hdrlen + offset;
grc->delta = delta;
+
+ return ptr;
}
static inline void skb_gro_remcsum_cleanup(struct sk_buff *skb,
struct gro_remcsum *grc)
{
+ void *ptr;
+ size_t plen = grc->offset + sizeof(u16);
+
if (!grc->delta)
return;
- remcsum_unadjust((__sum16 *)(skb->head + grc->offset), grc->delta);
+ ptr = skb_gro_header_fast(skb, grc->offset);
+ if (skb_gro_header_hard(skb, grc->offset + sizeof(u16))) {
+ ptr = skb_gro_header_slow(skb, plen, grc->offset);
+ if (!ptr)
+ return;
+ }
+
+ remcsum_unadjust((__sum16 *)ptr, grc->delta);
}
static inline int dev_hard_header(struct sk_buff *skb, struct net_device *dev,
diff --git a/include/net/vxlan.h b/include/net/vxlan.h
index 43677e6..6b32345 100644
--- a/include/net/vxlan.h
+++ b/include/net/vxlan.h
@@ -161,6 +161,7 @@
struct timer_list age_timer;
spinlock_t hash_lock;
unsigned int addrcnt;
+ struct gro_cells gro_cells;
struct vxlan_config cfg;
diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c
index 34968cd..2d1646c 100644
--- a/net/ipv4/fou.c
+++ b/net/ipv4/fou.c
@@ -79,7 +79,11 @@
__be16 *pd = data;
size_t start = ntohs(pd[0]);
size_t offset = ntohs(pd[1]);
- size_t plen = hdrlen + max_t(size_t, offset + sizeof(u16), start);
+ size_t plen = sizeof(struct udphdr) + hdrlen +
+ max_t(size_t, offset + sizeof(u16), start);
+
+ if (skb->remcsum_offload)
+ return guehdr;
if (!pskb_may_pull(skb, plen))
return NULL;
@@ -221,29 +225,21 @@
static struct guehdr *gue_gro_remcsum(struct sk_buff *skb, unsigned int off,
struct guehdr *guehdr, void *data,
- size_t hdrlen, u8 ipproto,
- struct gro_remcsum *grc, bool nopartial)
+ size_t hdrlen, struct gro_remcsum *grc,
+ bool nopartial)
{
__be16 *pd = data;
size_t start = ntohs(pd[0]);
size_t offset = ntohs(pd[1]);
- size_t plen = hdrlen + max_t(size_t, offset + sizeof(u16), start);
if (skb->remcsum_offload)
- return NULL;
+ return guehdr;
if (!NAPI_GRO_CB(skb)->csum_valid)
return NULL;
- /* Pull checksum that will be written */
- if (skb_gro_header_hard(skb, off + plen)) {
- guehdr = skb_gro_header_slow(skb, off + plen, off);
- if (!guehdr)
- return NULL;
- }
-
- skb_gro_remcsum_process(skb, (void *)guehdr + hdrlen,
- start, offset, grc, nopartial);
+ guehdr = skb_gro_remcsum_process(skb, (void *)guehdr, off, hdrlen,
+ start, offset, grc, nopartial);
skb->remcsum_offload = 1;
@@ -307,10 +303,10 @@
if (flags & GUE_PFLAG_REMCSUM) {
guehdr = gue_gro_remcsum(skb, off, guehdr,
- data + doffset, hdrlen,
- guehdr->proto_ctype, &grc,
+ data + doffset, hdrlen, &grc,
!!(fou->flags &
FOU_F_REMCSUM_NOPARTIAL));
+
if (!guehdr)
goto out;
@@ -351,7 +347,7 @@
rcu_read_lock();
offloads = NAPI_GRO_CB(skb)->is_ipv6 ? inet6_offloads : inet_offloads;
ops = rcu_dereference(offloads[guehdr->proto_ctype]);
- if (WARN_ON(!ops || !ops->callbacks.gro_receive))
+ if (WARN_ON_ONCE(!ops || !ops->callbacks.gro_receive))
goto out_unlock;
pp = ops->callbacks.gro_receive(head, skb);