igb: Only DMA sync frame length
On some platforms, syncing a buffer for DMA is expensive. Rather than
sync the whole 2K receive buffer, only synchronise the length of the
frame, which will typically be the MTU, or a much smaller TCP ACK.
For an IMX6Q, this gives around 6% increased TCP receive performance,
which is cache operations bound and reduces CPU load for TCP transmit.
Signed-off-by: Andrew Lunn <andrew@lunn.ch>
Tested-by: Aaron Brown <aaron.f.brown@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c
index a15f826..9bcba42 100644
--- a/drivers/net/ethernet/intel/igb/igb_main.c
+++ b/drivers/net/ethernet/intel/igb/igb_main.c
@@ -6856,12 +6856,12 @@
**/
static bool igb_add_rx_frag(struct igb_ring *rx_ring,
struct igb_rx_buffer *rx_buffer,
+ unsigned int size,
union e1000_adv_rx_desc *rx_desc,
struct sk_buff *skb)
{
struct page *page = rx_buffer->page;
unsigned char *va = page_address(page) + rx_buffer->page_offset;
- unsigned int size = le16_to_cpu(rx_desc->wb.upper.length);
#if (PAGE_SIZE < 8192)
unsigned int truesize = IGB_RX_BUFSZ;
#else
@@ -6913,6 +6913,7 @@
union e1000_adv_rx_desc *rx_desc,
struct sk_buff *skb)
{
+ unsigned int size = le16_to_cpu(rx_desc->wb.upper.length);
struct igb_rx_buffer *rx_buffer;
struct page *page;
@@ -6948,11 +6949,11 @@
dma_sync_single_range_for_cpu(rx_ring->dev,
rx_buffer->dma,
rx_buffer->page_offset,
- IGB_RX_BUFSZ,
+ size,
DMA_FROM_DEVICE);
/* pull page into skb */
- if (igb_add_rx_frag(rx_ring, rx_buffer, rx_desc, skb)) {
+ if (igb_add_rx_frag(rx_ring, rx_buffer, size, rx_desc, skb)) {
/* hand second half of page back to the ring */
igb_reuse_rx_page(rx_ring, rx_buffer);
} else {