bnx2: switch to build_skb() infrastructure

This is very similar to bnx2x conversion, but bnx2 only requires 16bytes
alignement at start of the received frame to store its l2_fhdr, so goal
was not to reduce skb truesize (in fact it should not change after this
patch)

Using build_skb() reduces cache line misses in the driver, since we
use cache hot skb instead of cold ones. Number of in-flight sk_buff
structures is lower, they are more likely recycled in SLUB caches
while still hot.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
CC: Michael Chan <mchan@broadcom.com>
CC: Eilon Greenstein <eilong@broadcom.com>
Reviewed-by: Michael Chan <mchan@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
diff --git a/drivers/net/ethernet/broadcom/bnx2.h b/drivers/net/ethernet/broadcom/bnx2.h
index 99d31a7..1db2d51 100644
--- a/drivers/net/ethernet/broadcom/bnx2.h
+++ b/drivers/net/ethernet/broadcom/bnx2.h
@@ -6563,12 +6563,25 @@
 #define MB_TX_CID_ADDR	MB_GET_CID_ADDR(TX_CID)
 #define MB_RX_CID_ADDR	MB_GET_CID_ADDR(RX_CID)
 
+/*
+ * This driver uses new build_skb() API :
+ * RX ring buffer contains pointer to kmalloc() data only,
+ * skb are built only after Hardware filled the frame.
+ */
 struct sw_bd {
-	struct sk_buff		*skb;
-	struct l2_fhdr		*desc;
+	u8			*data;
 	DEFINE_DMA_UNMAP_ADDR(mapping);
 };
 
+/* Its faster to compute this from data than storing it in sw_bd
+ * (less cache misses)
+ */
+static inline struct l2_fhdr *get_l2_fhdr(u8 *data)
+{
+	return (struct l2_fhdr *)(PTR_ALIGN(data, BNX2_RX_ALIGN) + NET_SKB_PAD);
+}
+
+
 struct sw_pg {
 	struct page		*page;
 	DEFINE_DMA_UNMAP_ADDR(mapping);