vxge: prefetch skb->data

This patch implements prefetching of skb->data from a copy of the pointer
in the descriptor (which is already in the L1 cache at this point).  This
improves netperf rx performance (netperf -L 0,0 -c -H 192.168.254.2 -- -M
131072 -m 131072) by 4.9% on a P4 Xeon host.

Signed-off-by: Benjamin LaHaise <ben.lahaise@neterion.com>
Signed-off-by: Sreenivasa Honnur <sreenivasa.honnur@neterion.com>
Signed-off-by: Ramkrishna Vepa <ram.vepa@neterion.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
diff --git a/drivers/net/vxge/vxge-main.c b/drivers/net/vxge/vxge-main.c
index e93651c..094d155 100644
--- a/drivers/net/vxge/vxge-main.c
+++ b/drivers/net/vxge/vxge-main.c
@@ -286,6 +286,7 @@
 	skb_reserve(skb, VXGE_HW_HEADER_ETHERNET_II_802_3_ALIGN);
 
 	rx_priv->skb = skb;
+	rx_priv->skb_data = NULL;
 	rx_priv->data_size = skb_size;
 	vxge_debug_entryexit(VXGE_TRACE,
 		"%s: %s:%d Exiting...", ring->ndev->name, __func__, __LINE__);
@@ -305,7 +306,8 @@
 		ring->ndev->name, __func__, __LINE__);
 	rx_priv = vxge_hw_ring_rxd_private_get(dtrh);
 
-	dma_addr = pci_map_single(ring->pdev, rx_priv->skb->data,
+	rx_priv->skb_data = rx_priv->skb->data;
+	dma_addr = pci_map_single(ring->pdev, rx_priv->skb_data,
 				rx_priv->data_size, PCI_DMA_FROMDEVICE);
 
 	if (dma_addr == 0) {
@@ -450,6 +452,7 @@
 		skb = rx_priv->skb;
 		data_size = rx_priv->data_size;
 		data_dma = rx_priv->data_dma;
+		prefetch(rx_priv->skb_data);
 
 		vxge_debug_rx(VXGE_TRACE,
 			"%s: %s:%d  skb = 0x%p",
@@ -1056,6 +1059,7 @@
 		rx_priv->data_size, PCI_DMA_FROMDEVICE);
 
 	dev_kfree_skb(rx_priv->skb);
+	rx_priv->skb_data = NULL;
 
 	vxge_debug_entryexit(VXGE_TRACE,
 		"%s: %s:%d  Exiting...",