net/mlx4_en: Saving mem access on data path

Localized the pdev->dev, and using dma_map instead of pci_map
There are multiple map/unmap operations on data path,
optimizing those by saving redundant pointer access.
Those places were identified as hot-spots when running kernel profiling
during some benchmarks.
The fixes had most impact when testing packet rate with small packets,
reducing several % from CPU load, and in some case being the difference
between reaching wire speed or being CPU bound.

Signed-off-by: Yevgeny Petrilin <yevgenyp@mellanox.co.il>
Signed-off-by: David S. Miller <davem@davemloft.net>
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
index 50b3fa5..008f0af 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
@@ -198,7 +198,6 @@
 				struct mlx4_en_tx_ring *ring,
 				int index, u8 owner)
 {
-	struct mlx4_en_dev *mdev = priv->mdev;
 	struct mlx4_en_tx_info *tx_info = &ring->tx_info[index];
 	struct mlx4_en_tx_desc *tx_desc = ring->buf + index * TXBB_SIZE;
 	struct mlx4_wqe_data_seg *data = (void *) tx_desc + tx_info->data_offset;
@@ -214,7 +213,7 @@
 	if (likely((void *) tx_desc + tx_info->nr_txbb * TXBB_SIZE <= end)) {
 		if (!tx_info->inl) {
 			if (tx_info->linear) {
-				pci_unmap_single(mdev->pdev,
+				dma_unmap_single(priv->ddev,
 					(dma_addr_t) be64_to_cpu(data->addr),
 					 be32_to_cpu(data->byte_count),
 					 PCI_DMA_TODEVICE);
@@ -223,7 +222,7 @@
 
 			for (i = 0; i < frags; i++) {
 				frag = &skb_shinfo(skb)->frags[i];
-				pci_unmap_page(mdev->pdev,
+				dma_unmap_page(priv->ddev,
 					(dma_addr_t) be64_to_cpu(data[i].addr),
 					skb_frag_size(frag), PCI_DMA_TODEVICE);
 			}
@@ -241,7 +240,7 @@
 			}
 
 			if (tx_info->linear) {
-				pci_unmap_single(mdev->pdev,
+				dma_unmap_single(priv->ddev,
 					(dma_addr_t) be64_to_cpu(data->addr),
 					 be32_to_cpu(data->byte_count),
 					 PCI_DMA_TODEVICE);
@@ -253,7 +252,7 @@
 				if ((void *) data >= end)
 					data = ring->buf;
 				frag = &skb_shinfo(skb)->frags[i];
-				pci_unmap_page(mdev->pdev,
+				dma_unmap_page(priv->ddev,
 					(dma_addr_t) be64_to_cpu(data->addr),
 					 skb_frag_size(frag), PCI_DMA_TODEVICE);
 				++data;
@@ -733,7 +732,7 @@
 		/* Map fragments */
 		for (i = skb_shinfo(skb)->nr_frags - 1; i >= 0; i--) {
 			frag = &skb_shinfo(skb)->frags[i];
-			dma = skb_frag_dma_map(&mdev->dev->pdev->dev, frag,
+			dma = skb_frag_dma_map(priv->ddev, frag,
 					       0, skb_frag_size(frag),
 					       DMA_TO_DEVICE);
 			data->addr = cpu_to_be64(dma);
@@ -745,7 +744,7 @@
 
 		/* Map linear part */
 		if (tx_info->linear) {
-			dma = pci_map_single(mdev->dev->pdev, skb->data + lso_header_size,
+			dma = dma_map_single(priv->ddev, skb->data + lso_header_size,
 					     skb_headlen(skb) - lso_header_size, PCI_DMA_TODEVICE);
 			data->addr = cpu_to_be64(dma);
 			data->lkey = cpu_to_be32(mdev->mr.key);