mlx4_core: Fix dma_sync_single_for_cpu() with matching for_device() calls

Commit 5d23a1d2 ("net: replace dma_sync_single with
dma_sync_single_for_cpu") replaced uses of the deprectated function
dma_sync_single() with calls to dma_sync_single_for_cpu().  However,
to be correct, the code should do a sync for_cpu() before touching the
memory and for_device() after it's done.

Signed-off-by: Roland Dreier <rolandd@cisco.com>
diff --git a/drivers/net/mlx4/mr.c b/drivers/net/mlx4/mr.c
index 5887e47..f96948b 100644
--- a/drivers/net/mlx4/mr.c
+++ b/drivers/net/mlx4/mr.c
@@ -399,11 +399,14 @@
 	if (!mtts)
 		return -ENOMEM;
 
+	dma_sync_single_for_cpu(&dev->pdev->dev, dma_handle,
+				npages * sizeof (u64), DMA_TO_DEVICE);
+
 	for (i = 0; i < npages; ++i)
 		mtts[i] = cpu_to_be64(page_list[i] | MLX4_MTT_FLAG_PRESENT);
 
-	dma_sync_single_for_cpu(&dev->pdev->dev, dma_handle,
-				npages * sizeof (u64), DMA_TO_DEVICE);
+	dma_sync_single_for_device(&dev->pdev->dev, dma_handle,
+				   npages * sizeof (u64), DMA_TO_DEVICE);
 
 	return 0;
 }
@@ -547,11 +550,14 @@
 	/* Make sure MPT status is visible before writing MTT entries */
 	wmb();
 
+	dma_sync_single_for_cpu(&dev->pdev->dev, fmr->dma_handle,
+				npages * sizeof(u64), DMA_TO_DEVICE);
+
 	for (i = 0; i < npages; ++i)
 		fmr->mtts[i] = cpu_to_be64(page_list[i] | MLX4_MTT_FLAG_PRESENT);
 
-	dma_sync_single_for_cpu(&dev->pdev->dev, fmr->dma_handle,
-				npages * sizeof(u64), DMA_TO_DEVICE);
+	dma_sync_single_for_device(&dev->pdev->dev, fmr->dma_handle,
+				   npages * sizeof(u64), DMA_TO_DEVICE);
 
 	fmr->mpt->key    = cpu_to_be32(key);
 	fmr->mpt->lkey   = cpu_to_be32(key);