net: __alloc_skb() speedup
With following patch I can reach maximum rate of my pktgen+udpsink
simulator :
- 'old' machine : dual quad core E5450 @3.00GHz
- 64 UDP rx flows (only differ by destination port)
- RPS enabled, NIC interrupts serviced on cpu0
- rps dispatched on 7 other cores. (~130.000 IPI per second)
- SLAB allocator (faster than SLUB in this workload)
- tg3 NIC
- 1.080.000 pps without a single drop at NIC level.
Idea is to add two prefetchw() calls in __alloc_skb(), one to prefetch
first sk_buff cache line, the second to prefetch the shinfo part.
Also using one memset() to initialize all skb_shared_info fields instead
of one by one to reduce number of instructions, using long word moves.
All skb_shared_info fields before 'dataref' are cleared in
__alloc_skb().
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 8b9c109..a9b0e1f 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -181,12 +181,14 @@
skb = kmem_cache_alloc_node(cache, gfp_mask & ~__GFP_DMA, node);
if (!skb)
goto out;
+ prefetchw(skb);
size = SKB_DATA_ALIGN(size);
data = kmalloc_node_track_caller(size + sizeof(struct skb_shared_info),
gfp_mask, node);
if (!data)
goto nodata;
+ prefetchw(data + size);
/*
* Only clear those fields we need to clear, not those that we will
@@ -208,15 +210,8 @@
/* make sure we initialize shinfo sequentially */
shinfo = skb_shinfo(skb);
+ memset(shinfo, 0, offsetof(struct skb_shared_info, dataref));
atomic_set(&shinfo->dataref, 1);
- shinfo->nr_frags = 0;
- shinfo->gso_size = 0;
- shinfo->gso_segs = 0;
- shinfo->gso_type = 0;
- shinfo->ip6_frag_id = 0;
- shinfo->tx_flags.flags = 0;
- skb_frag_list_init(skb);
- memset(&shinfo->hwtstamps, 0, sizeof(shinfo->hwtstamps));
if (fclone) {
struct sk_buff *child = skb + 1;
@@ -505,16 +500,10 @@
return 0;
skb_release_head_state(skb);
+
shinfo = skb_shinfo(skb);
+ memset(shinfo, 0, offsetof(struct skb_shared_info, dataref));
atomic_set(&shinfo->dataref, 1);
- shinfo->nr_frags = 0;
- shinfo->gso_size = 0;
- shinfo->gso_segs = 0;
- shinfo->gso_type = 0;
- shinfo->ip6_frag_id = 0;
- shinfo->tx_flags.flags = 0;
- skb_frag_list_init(skb);
- memset(&shinfo->hwtstamps, 0, sizeof(shinfo->hwtstamps));
memset(skb, 0, offsetof(struct sk_buff, tail));
skb->data = skb->head + NET_SKB_PAD;