Merge branch 'topic/pl330' into for-linus
diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig
index 79b1390..c77f214 100644
--- a/drivers/dma/Kconfig
+++ b/drivers/dma/Kconfig
@@ -341,12 +341,13 @@
 
 config MXS_DMA
 	bool "MXS DMA support"
-	depends on SOC_IMX23 || SOC_IMX28 || SOC_IMX6Q
+	depends on SOC_IMX23 || SOC_IMX28 || SOC_IMX6Q || SOC_IMX6UL
 	select STMP_DEVICE
 	select DMA_ENGINE
 	help
 	  Support the MXS DMA engine. This engine including APBH-DMA
-	  and APBX-DMA is integrated into Freescale i.MX23/28/MX6Q/MX6DL chips.
+	  and APBX-DMA is integrated into Freescale
+	  i.MX23/28/MX6Q/MX6DL/MX6UL chips.
 
 config MX3_IPU
 	bool "MX3x Image Processing Unit support"
diff --git a/drivers/dma/acpi-dma.c b/drivers/dma/acpi-dma.c
index eed6bda..4a748c3 100644
--- a/drivers/dma/acpi-dma.c
+++ b/drivers/dma/acpi-dma.c
@@ -438,7 +438,7 @@
 			return ERR_PTR(-ENODEV);
 	}
 
-	dev_dbg(dev, "found DMA channel \"%s\" at index %d\n", name, index);
+	dev_dbg(dev, "Looking for DMA channel \"%s\" at index %d...\n", name, index);
 	return acpi_dma_request_slave_chan_by_index(dev, index);
 }
 EXPORT_SYMBOL_GPL(acpi_dma_request_slave_chan_by_name);
diff --git a/drivers/dma/dw/regs.h b/drivers/dma/dw/regs.h
index 241ff2b..0a50c18 100644
--- a/drivers/dma/dw/regs.h
+++ b/drivers/dma/dw/regs.h
@@ -150,7 +150,7 @@
 #define DWC_CTLL_DST_INC	(0<<7)		/* DAR update/not */
 #define DWC_CTLL_DST_DEC	(1<<7)
 #define DWC_CTLL_DST_FIX	(2<<7)
-#define DWC_CTLL_SRC_INC	(0<<7)		/* SAR update/not */
+#define DWC_CTLL_SRC_INC	(0<<9)		/* SAR update/not */
 #define DWC_CTLL_SRC_DEC	(1<<9)
 #define DWC_CTLL_SRC_FIX	(2<<9)
 #define DWC_CTLL_DST_MSIZE(n)	((n)<<11)	/* burst, #elements */
diff --git a/drivers/dma/edma.c b/drivers/dma/edma.c
index d92d655..29a7723 100644
--- a/drivers/dma/edma.c
+++ b/drivers/dma/edma.c
@@ -866,6 +866,13 @@
 	return 0;
 }
 
+static void edma_synchronize(struct dma_chan *chan)
+{
+	struct edma_chan *echan = to_edma_chan(chan);
+
+	vchan_synchronize(&echan->vchan);
+}
+
 static int edma_slave_config(struct dma_chan *chan,
 	struct dma_slave_config *cfg)
 {
@@ -1362,36 +1369,36 @@
 static void edma_completion_handler(struct edma_chan *echan)
 {
 	struct device *dev = echan->vchan.chan.device->dev;
-	struct edma_desc *edesc = echan->edesc;
-
-	if (!edesc)
-		return;
+	struct edma_desc *edesc;
 
 	spin_lock(&echan->vchan.lock);
-	if (edesc->cyclic) {
-		vchan_cyclic_callback(&edesc->vdesc);
-		spin_unlock(&echan->vchan.lock);
-		return;
-	} else if (edesc->processed == edesc->pset_nr) {
-		edesc->residue = 0;
-		edma_stop(echan);
-		vchan_cookie_complete(&edesc->vdesc);
-		echan->edesc = NULL;
+	edesc = echan->edesc;
+	if (edesc) {
+		if (edesc->cyclic) {
+			vchan_cyclic_callback(&edesc->vdesc);
+			spin_unlock(&echan->vchan.lock);
+			return;
+		} else if (edesc->processed == edesc->pset_nr) {
+			edesc->residue = 0;
+			edma_stop(echan);
+			vchan_cookie_complete(&edesc->vdesc);
+			echan->edesc = NULL;
 
-		dev_dbg(dev, "Transfer completed on channel %d\n",
-			echan->ch_num);
-	} else {
-		dev_dbg(dev, "Sub transfer completed on channel %d\n",
-			echan->ch_num);
+			dev_dbg(dev, "Transfer completed on channel %d\n",
+				echan->ch_num);
+		} else {
+			dev_dbg(dev, "Sub transfer completed on channel %d\n",
+				echan->ch_num);
 
-		edma_pause(echan);
+			edma_pause(echan);
 
-		/* Update statistics for tx_status */
-		edesc->residue -= edesc->sg_len;
-		edesc->residue_stat = edesc->residue;
-		edesc->processed_stat = edesc->processed;
+			/* Update statistics for tx_status */
+			edesc->residue -= edesc->sg_len;
+			edesc->residue_stat = edesc->residue;
+			edesc->processed_stat = edesc->processed;
+		}
+		edma_execute(echan);
 	}
-	edma_execute(echan);
 
 	spin_unlock(&echan->vchan.lock);
 }
@@ -1798,6 +1805,7 @@
 	s_ddev->device_pause = edma_dma_pause;
 	s_ddev->device_resume = edma_dma_resume;
 	s_ddev->device_terminate_all = edma_terminate_all;
+	s_ddev->device_synchronize = edma_synchronize;
 
 	s_ddev->src_addr_widths = EDMA_DMA_BUSWIDTHS;
 	s_ddev->dst_addr_widths = EDMA_DMA_BUSWIDTHS;
@@ -1823,6 +1831,7 @@
 		m_ddev->device_pause = edma_dma_pause;
 		m_ddev->device_resume = edma_dma_resume;
 		m_ddev->device_terminate_all = edma_terminate_all;
+		m_ddev->device_synchronize = edma_synchronize;
 
 		m_ddev->src_addr_widths = EDMA_DMA_BUSWIDTHS;
 		m_ddev->dst_addr_widths = EDMA_DMA_BUSWIDTHS;
diff --git a/drivers/dma/ep93xx_dma.c b/drivers/dma/ep93xx_dma.c
index 57ff462..21f08cc 100644
--- a/drivers/dma/ep93xx_dma.c
+++ b/drivers/dma/ep93xx_dma.c
@@ -421,23 +421,25 @@
 			desc->size);
 	}
 
-	switch (irq_status & (M2P_INTERRUPT_STALL | M2P_INTERRUPT_NFB)) {
-	case M2P_INTERRUPT_STALL:
-		/* Disable interrupts */
-		control = readl(edmac->regs + M2P_CONTROL);
-		control &= ~(M2P_CONTROL_STALLINT | M2P_CONTROL_NFBINT);
-		m2p_set_control(edmac, control);
+	/*
+	 * Even latest E2 silicon revision sometimes assert STALL interrupt
+	 * instead of NFB. Therefore we treat them equally, basing on the
+	 * amount of data we still have to transfer.
+	 */
+	if (!(irq_status & (M2P_INTERRUPT_STALL | M2P_INTERRUPT_NFB)))
+		return INTERRUPT_UNKNOWN;
 
-		return INTERRUPT_DONE;
-
-	case M2P_INTERRUPT_NFB:
-		if (ep93xx_dma_advance_active(edmac))
-			m2p_fill_desc(edmac);
-
+	if (ep93xx_dma_advance_active(edmac)) {
+		m2p_fill_desc(edmac);
 		return INTERRUPT_NEXT_BUFFER;
 	}
 
-	return INTERRUPT_UNKNOWN;
+	/* Disable interrupts */
+	control = readl(edmac->regs + M2P_CONTROL);
+	control &= ~(M2P_CONTROL_STALLINT | M2P_CONTROL_NFBINT);
+	m2p_set_control(edmac, control);
+
+	return INTERRUPT_DONE;
 }
 
 /*
diff --git a/drivers/dma/idma64.c b/drivers/dma/idma64.c
index 3cb7b2c..1953e57 100644
--- a/drivers/dma/idma64.c
+++ b/drivers/dma/idma64.c
@@ -289,6 +289,9 @@
 
 	/* Trigger an interrupt after the last block is transfered */
 	lli->ctllo |= IDMA64C_CTLL_INT_EN;
+
+	/* Disable LLP transfer in the last block */
+	lli->ctllo &= ~(IDMA64C_CTLL_LLP_S_EN | IDMA64C_CTLL_LLP_D_EN);
 }
 
 static struct dma_async_tx_descriptor *idma64_prep_slave_sg(
diff --git a/drivers/dma/idma64.h b/drivers/dma/idma64.h
index 8423f13..dc68744 100644
--- a/drivers/dma/idma64.h
+++ b/drivers/dma/idma64.h
@@ -71,7 +71,7 @@
 #define IDMA64C_CFGH_SRC_PER(x)		((x) << 0)	/* src peripheral */
 #define IDMA64C_CFGH_DST_PER(x)		((x) << 4)	/* dst peripheral */
 #define IDMA64C_CFGH_RD_ISSUE_THD(x)	((x) << 8)
-#define IDMA64C_CFGH_RW_ISSUE_THD(x)	((x) << 18)
+#define IDMA64C_CFGH_WR_ISSUE_THD(x)	((x) << 18)
 
 /* Interrupt registers */
 
diff --git a/drivers/dma/ioat/dma.c b/drivers/dma/ioat/dma.c
index 1d5df2e..5428746 100644
--- a/drivers/dma/ioat/dma.c
+++ b/drivers/dma/ioat/dma.c
@@ -31,6 +31,7 @@
 #include <linux/dma-mapping.h>
 #include <linux/workqueue.h>
 #include <linux/prefetch.h>
+#include <linux/sizes.h>
 #include "dma.h"
 #include "registers.h"
 #include "hw.h"
@@ -290,24 +291,30 @@
 }
 
 static struct ioat_ring_ent *
-ioat_alloc_ring_ent(struct dma_chan *chan, gfp_t flags)
+ioat_alloc_ring_ent(struct dma_chan *chan, int idx, gfp_t flags)
 {
 	struct ioat_dma_descriptor *hw;
 	struct ioat_ring_ent *desc;
 	struct ioatdma_device *ioat_dma;
+	struct ioatdma_chan *ioat_chan = to_ioat_chan(chan);
+	int chunk;
 	dma_addr_t phys;
+	u8 *pos;
+	off_t offs;
 
 	ioat_dma = to_ioatdma_device(chan->device);
-	hw = pci_pool_alloc(ioat_dma->dma_pool, flags, &phys);
-	if (!hw)
-		return NULL;
+
+	chunk = idx / IOAT_DESCS_PER_2M;
+	idx &= (IOAT_DESCS_PER_2M - 1);
+	offs = idx * IOAT_DESC_SZ;
+	pos = (u8 *)ioat_chan->descs[chunk].virt + offs;
+	phys = ioat_chan->descs[chunk].hw + offs;
+	hw = (struct ioat_dma_descriptor *)pos;
 	memset(hw, 0, sizeof(*hw));
 
 	desc = kmem_cache_zalloc(ioat_cache, flags);
-	if (!desc) {
-		pci_pool_free(ioat_dma->dma_pool, hw, phys);
+	if (!desc)
 		return NULL;
-	}
 
 	dma_async_tx_descriptor_init(&desc->txd, chan);
 	desc->txd.tx_submit = ioat_tx_submit_unlock;
@@ -318,32 +325,63 @@
 
 void ioat_free_ring_ent(struct ioat_ring_ent *desc, struct dma_chan *chan)
 {
-	struct ioatdma_device *ioat_dma;
-
-	ioat_dma = to_ioatdma_device(chan->device);
-	pci_pool_free(ioat_dma->dma_pool, desc->hw, desc->txd.phys);
 	kmem_cache_free(ioat_cache, desc);
 }
 
 struct ioat_ring_ent **
 ioat_alloc_ring(struct dma_chan *c, int order, gfp_t flags)
 {
+	struct ioatdma_chan *ioat_chan = to_ioat_chan(c);
 	struct ioat_ring_ent **ring;
-	int descs = 1 << order;
-	int i;
-
-	if (order > ioat_get_max_alloc_order())
-		return NULL;
+	int total_descs = 1 << order;
+	int i, chunks;
 
 	/* allocate the array to hold the software ring */
-	ring = kcalloc(descs, sizeof(*ring), flags);
+	ring = kcalloc(total_descs, sizeof(*ring), flags);
 	if (!ring)
 		return NULL;
-	for (i = 0; i < descs; i++) {
-		ring[i] = ioat_alloc_ring_ent(c, flags);
+
+	ioat_chan->desc_chunks = chunks = (total_descs * IOAT_DESC_SZ) / SZ_2M;
+
+	for (i = 0; i < chunks; i++) {
+		struct ioat_descs *descs = &ioat_chan->descs[i];
+
+		descs->virt = dma_alloc_coherent(to_dev(ioat_chan),
+						 SZ_2M, &descs->hw, flags);
+		if (!descs->virt && (i > 0)) {
+			int idx;
+
+			for (idx = 0; idx < i; idx++) {
+				dma_free_coherent(to_dev(ioat_chan), SZ_2M,
+						  descs->virt, descs->hw);
+				descs->virt = NULL;
+				descs->hw = 0;
+			}
+
+			ioat_chan->desc_chunks = 0;
+			kfree(ring);
+			return NULL;
+		}
+	}
+
+	for (i = 0; i < total_descs; i++) {
+		ring[i] = ioat_alloc_ring_ent(c, i, flags);
 		if (!ring[i]) {
+			int idx;
+
 			while (i--)
 				ioat_free_ring_ent(ring[i], c);
+
+			for (idx = 0; idx < ioat_chan->desc_chunks; idx++) {
+				dma_free_coherent(to_dev(ioat_chan),
+						  SZ_2M,
+						  ioat_chan->descs[idx].virt,
+						  ioat_chan->descs[idx].hw);
+				ioat_chan->descs[idx].virt = NULL;
+				ioat_chan->descs[idx].hw = 0;
+			}
+
+			ioat_chan->desc_chunks = 0;
 			kfree(ring);
 			return NULL;
 		}
@@ -351,7 +389,7 @@
 	}
 
 	/* link descs */
-	for (i = 0; i < descs-1; i++) {
+	for (i = 0; i < total_descs-1; i++) {
 		struct ioat_ring_ent *next = ring[i+1];
 		struct ioat_dma_descriptor *hw = ring[i]->hw;
 
@@ -362,114 +400,6 @@
 	return ring;
 }
 
-static bool reshape_ring(struct ioatdma_chan *ioat_chan, int order)
-{
-	/* reshape differs from normal ring allocation in that we want
-	 * to allocate a new software ring while only
-	 * extending/truncating the hardware ring
-	 */
-	struct dma_chan *c = &ioat_chan->dma_chan;
-	const u32 curr_size = ioat_ring_size(ioat_chan);
-	const u16 active = ioat_ring_active(ioat_chan);
-	const u32 new_size = 1 << order;
-	struct ioat_ring_ent **ring;
-	u32 i;
-
-	if (order > ioat_get_max_alloc_order())
-		return false;
-
-	/* double check that we have at least 1 free descriptor */
-	if (active == curr_size)
-		return false;
-
-	/* when shrinking, verify that we can hold the current active
-	 * set in the new ring
-	 */
-	if (active >= new_size)
-		return false;
-
-	/* allocate the array to hold the software ring */
-	ring = kcalloc(new_size, sizeof(*ring), GFP_NOWAIT);
-	if (!ring)
-		return false;
-
-	/* allocate/trim descriptors as needed */
-	if (new_size > curr_size) {
-		/* copy current descriptors to the new ring */
-		for (i = 0; i < curr_size; i++) {
-			u16 curr_idx = (ioat_chan->tail+i) & (curr_size-1);
-			u16 new_idx = (ioat_chan->tail+i) & (new_size-1);
-
-			ring[new_idx] = ioat_chan->ring[curr_idx];
-			set_desc_id(ring[new_idx], new_idx);
-		}
-
-		/* add new descriptors to the ring */
-		for (i = curr_size; i < new_size; i++) {
-			u16 new_idx = (ioat_chan->tail+i) & (new_size-1);
-
-			ring[new_idx] = ioat_alloc_ring_ent(c, GFP_NOWAIT);
-			if (!ring[new_idx]) {
-				while (i--) {
-					u16 new_idx = (ioat_chan->tail+i) &
-						       (new_size-1);
-
-					ioat_free_ring_ent(ring[new_idx], c);
-				}
-				kfree(ring);
-				return false;
-			}
-			set_desc_id(ring[new_idx], new_idx);
-		}
-
-		/* hw link new descriptors */
-		for (i = curr_size-1; i < new_size; i++) {
-			u16 new_idx = (ioat_chan->tail+i) & (new_size-1);
-			struct ioat_ring_ent *next =
-				ring[(new_idx+1) & (new_size-1)];
-			struct ioat_dma_descriptor *hw = ring[new_idx]->hw;
-
-			hw->next = next->txd.phys;
-		}
-	} else {
-		struct ioat_dma_descriptor *hw;
-		struct ioat_ring_ent *next;
-
-		/* copy current descriptors to the new ring, dropping the
-		 * removed descriptors
-		 */
-		for (i = 0; i < new_size; i++) {
-			u16 curr_idx = (ioat_chan->tail+i) & (curr_size-1);
-			u16 new_idx = (ioat_chan->tail+i) & (new_size-1);
-
-			ring[new_idx] = ioat_chan->ring[curr_idx];
-			set_desc_id(ring[new_idx], new_idx);
-		}
-
-		/* free deleted descriptors */
-		for (i = new_size; i < curr_size; i++) {
-			struct ioat_ring_ent *ent;
-
-			ent = ioat_get_ring_ent(ioat_chan, ioat_chan->tail+i);
-			ioat_free_ring_ent(ent, c);
-		}
-
-		/* fix up hardware ring */
-		hw = ring[(ioat_chan->tail+new_size-1) & (new_size-1)]->hw;
-		next = ring[(ioat_chan->tail+new_size) & (new_size-1)];
-		hw->next = next->txd.phys;
-	}
-
-	dev_dbg(to_dev(ioat_chan), "%s: allocated %d descriptors\n",
-		__func__, new_size);
-
-	kfree(ioat_chan->ring);
-	ioat_chan->ring = ring;
-	ioat_chan->alloc_order = order;
-
-	return true;
-}
-
 /**
  * ioat_check_space_lock - verify space and grab ring producer lock
  * @ioat: ioat,3 channel (ring) to operate on
@@ -478,9 +408,6 @@
 int ioat_check_space_lock(struct ioatdma_chan *ioat_chan, int num_descs)
 	__acquires(&ioat_chan->prep_lock)
 {
-	bool retry;
-
- retry:
 	spin_lock_bh(&ioat_chan->prep_lock);
 	/* never allow the last descriptor to be consumed, we need at
 	 * least one free at all times to allow for on-the-fly ring
@@ -493,24 +420,8 @@
 		ioat_chan->produce = num_descs;
 		return 0;  /* with ioat->prep_lock held */
 	}
-	retry = test_and_set_bit(IOAT_RESHAPE_PENDING, &ioat_chan->state);
 	spin_unlock_bh(&ioat_chan->prep_lock);
 
-	/* is another cpu already trying to expand the ring? */
-	if (retry)
-		goto retry;
-
-	spin_lock_bh(&ioat_chan->cleanup_lock);
-	spin_lock_bh(&ioat_chan->prep_lock);
-	retry = reshape_ring(ioat_chan, ioat_chan->alloc_order + 1);
-	clear_bit(IOAT_RESHAPE_PENDING, &ioat_chan->state);
-	spin_unlock_bh(&ioat_chan->prep_lock);
-	spin_unlock_bh(&ioat_chan->cleanup_lock);
-
-	/* if we were able to expand the ring retry the allocation */
-	if (retry)
-		goto retry;
-
 	dev_dbg_ratelimited(to_dev(ioat_chan),
 			    "%s: ring full! num_descs: %d (%x:%x:%x)\n",
 			    __func__, num_descs, ioat_chan->head,
@@ -823,19 +734,6 @@
 
 	if (test_and_clear_bit(IOAT_CHAN_ACTIVE, &ioat_chan->state))
 		mod_timer(&ioat_chan->timer, jiffies + IDLE_TIMEOUT);
-	else if (ioat_chan->alloc_order > ioat_get_alloc_order()) {
-		/* if the ring is idle, empty, and oversized try to step
-		 * down the size
-		 */
-		reshape_ring(ioat_chan, ioat_chan->alloc_order - 1);
-
-		/* keep shrinking until we get back to our minimum
-		 * default size
-		 */
-		if (ioat_chan->alloc_order > ioat_get_alloc_order())
-			mod_timer(&ioat_chan->timer, jiffies + IDLE_TIMEOUT);
-	}
-
 }
 
 void ioat_timer_event(unsigned long data)
@@ -906,40 +804,6 @@
 	return dma_cookie_status(c, cookie, txstate);
 }
 
-static int ioat_irq_reinit(struct ioatdma_device *ioat_dma)
-{
-	struct pci_dev *pdev = ioat_dma->pdev;
-	int irq = pdev->irq, i;
-
-	if (!is_bwd_ioat(pdev))
-		return 0;
-
-	switch (ioat_dma->irq_mode) {
-	case IOAT_MSIX:
-		for (i = 0; i < ioat_dma->dma_dev.chancnt; i++) {
-			struct msix_entry *msix = &ioat_dma->msix_entries[i];
-			struct ioatdma_chan *ioat_chan;
-
-			ioat_chan = ioat_chan_by_index(ioat_dma, i);
-			devm_free_irq(&pdev->dev, msix->vector, ioat_chan);
-		}
-
-		pci_disable_msix(pdev);
-		break;
-	case IOAT_MSI:
-		pci_disable_msi(pdev);
-		/* fall through */
-	case IOAT_INTX:
-		devm_free_irq(&pdev->dev, irq, ioat_dma);
-		break;
-	default:
-		return 0;
-	}
-	ioat_dma->irq_mode = IOAT_NOIRQ;
-
-	return ioat_dma_setup_interrupts(ioat_dma);
-}
-
 int ioat_reset_hw(struct ioatdma_chan *ioat_chan)
 {
 	/* throw away whatever the channel was doing and get it
@@ -979,9 +843,21 @@
 		}
 	}
 
+	if (is_bwd_ioat(pdev) && (ioat_dma->irq_mode == IOAT_MSIX)) {
+		ioat_dma->msixtba0 = readq(ioat_dma->reg_base + 0x1000);
+		ioat_dma->msixdata0 = readq(ioat_dma->reg_base + 0x1008);
+		ioat_dma->msixpba = readq(ioat_dma->reg_base + 0x1800);
+	}
+
+
 	err = ioat_reset_sync(ioat_chan, msecs_to_jiffies(200));
-	if (!err)
-		err = ioat_irq_reinit(ioat_dma);
+	if (!err) {
+		if (is_bwd_ioat(pdev) && (ioat_dma->irq_mode == IOAT_MSIX)) {
+			writeq(ioat_dma->msixtba0, ioat_dma->reg_base + 0x1000);
+			writeq(ioat_dma->msixdata0, ioat_dma->reg_base + 0x1008);
+			writeq(ioat_dma->msixpba, ioat_dma->reg_base + 0x1800);
+		}
+	}
 
 	if (err)
 		dev_err(&pdev->dev, "Failed to reset: %d\n", err);
diff --git a/drivers/dma/ioat/dma.h b/drivers/dma/ioat/dma.h
index b8f4807..a9bc1a1 100644
--- a/drivers/dma/ioat/dma.h
+++ b/drivers/dma/ioat/dma.h
@@ -62,7 +62,6 @@
  * struct ioatdma_device - internal representation of a IOAT device
  * @pdev: PCI-Express device
  * @reg_base: MMIO register space base address
- * @dma_pool: for allocating DMA descriptors
  * @completion_pool: DMA buffers for completion ops
  * @sed_hw_pool: DMA super descriptor pools
  * @dma_dev: embedded struct dma_device
@@ -76,8 +75,7 @@
 struct ioatdma_device {
 	struct pci_dev *pdev;
 	void __iomem *reg_base;
-	struct pci_pool *dma_pool;
-	struct pci_pool *completion_pool;
+	struct dma_pool *completion_pool;
 #define MAX_SED_POOLS	5
 	struct dma_pool *sed_hw_pool[MAX_SED_POOLS];
 	struct dma_device dma_dev;
@@ -88,6 +86,16 @@
 	struct dca_provider *dca;
 	enum ioat_irq_mode irq_mode;
 	u32 cap;
+
+	/* shadow version for CB3.3 chan reset errata workaround */
+	u64 msixtba0;
+	u64 msixdata0;
+	u32 msixpba;
+};
+
+struct ioat_descs {
+	void *virt;
+	dma_addr_t hw;
 };
 
 struct ioatdma_chan {
@@ -100,7 +108,6 @@
 	#define IOAT_COMPLETION_ACK 1
 	#define IOAT_RESET_PENDING 2
 	#define IOAT_KOBJ_INIT_FAIL 3
-	#define IOAT_RESHAPE_PENDING 4
 	#define IOAT_RUN 5
 	#define IOAT_CHAN_ACTIVE 6
 	struct timer_list timer;
@@ -133,6 +140,8 @@
 	u16 produce;
 	struct ioat_ring_ent **ring;
 	spinlock_t prep_lock;
+	struct ioat_descs descs[2];
+	int desc_chunks;
 };
 
 struct ioat_sysfs_entry {
@@ -302,10 +311,8 @@
 }
 
 #define IOAT_MAX_ORDER 16
-#define ioat_get_alloc_order() \
-	(min(ioat_ring_alloc_order, IOAT_MAX_ORDER))
-#define ioat_get_max_alloc_order() \
-	(min(ioat_ring_max_alloc_order, IOAT_MAX_ORDER))
+#define IOAT_MAX_DESCS 65536
+#define IOAT_DESCS_PER_2M 32768
 
 static inline u32 ioat_ring_size(struct ioatdma_chan *ioat_chan)
 {
diff --git a/drivers/dma/ioat/hw.h b/drivers/dma/ioat/hw.h
index 690e3b4..8e67895 100644
--- a/drivers/dma/ioat/hw.h
+++ b/drivers/dma/ioat/hw.h
@@ -73,6 +73,8 @@
 
 int system_has_dca_enabled(struct pci_dev *pdev);
 
+#define IOAT_DESC_SZ	64
+
 struct ioat_dma_descriptor {
 	uint32_t	size;
 	union {
diff --git a/drivers/dma/ioat/init.c b/drivers/dma/ioat/init.c
index 4ef0c5e..efdee1a 100644
--- a/drivers/dma/ioat/init.c
+++ b/drivers/dma/ioat/init.c
@@ -28,6 +28,7 @@
 #include <linux/prefetch.h>
 #include <linux/dca.h>
 #include <linux/aer.h>
+#include <linux/sizes.h>
 #include "dma.h"
 #include "registers.h"
 #include "hw.h"
@@ -136,14 +137,6 @@
 module_param(ioat_pending_level, int, 0644);
 MODULE_PARM_DESC(ioat_pending_level,
 		 "high-water mark for pushing ioat descriptors (default: 4)");
-int ioat_ring_alloc_order = 8;
-module_param(ioat_ring_alloc_order, int, 0644);
-MODULE_PARM_DESC(ioat_ring_alloc_order,
-		 "ioat+: allocate 2^n descriptors per channel (default: 8 max: 16)");
-int ioat_ring_max_alloc_order = IOAT_MAX_ORDER;
-module_param(ioat_ring_max_alloc_order, int, 0644);
-MODULE_PARM_DESC(ioat_ring_max_alloc_order,
-		 "ioat+: upper limit for ring size (default: 16)");
 static char ioat_interrupt_style[32] = "msix";
 module_param_string(ioat_interrupt_style, ioat_interrupt_style,
 		    sizeof(ioat_interrupt_style), 0644);
@@ -504,23 +497,14 @@
 	struct pci_dev *pdev = ioat_dma->pdev;
 	struct device *dev = &pdev->dev;
 
-	/* DMA coherent memory pool for DMA descriptor allocations */
-	ioat_dma->dma_pool = pci_pool_create("dma_desc_pool", pdev,
-					     sizeof(struct ioat_dma_descriptor),
-					     64, 0);
-	if (!ioat_dma->dma_pool) {
-		err = -ENOMEM;
-		goto err_dma_pool;
-	}
-
-	ioat_dma->completion_pool = pci_pool_create("completion_pool", pdev,
+	ioat_dma->completion_pool = dma_pool_create("completion_pool", dev,
 						    sizeof(u64),
 						    SMP_CACHE_BYTES,
 						    SMP_CACHE_BYTES);
 
 	if (!ioat_dma->completion_pool) {
 		err = -ENOMEM;
-		goto err_completion_pool;
+		goto err_out;
 	}
 
 	ioat_enumerate_channels(ioat_dma);
@@ -546,10 +530,8 @@
 err_self_test:
 	ioat_disable_interrupts(ioat_dma);
 err_setup_interrupts:
-	pci_pool_destroy(ioat_dma->completion_pool);
-err_completion_pool:
-	pci_pool_destroy(ioat_dma->dma_pool);
-err_dma_pool:
+	dma_pool_destroy(ioat_dma->completion_pool);
+err_out:
 	return err;
 }
 
@@ -559,8 +541,7 @@
 
 	if (err) {
 		ioat_disable_interrupts(ioat_dma);
-		pci_pool_destroy(ioat_dma->completion_pool);
-		pci_pool_destroy(ioat_dma->dma_pool);
+		dma_pool_destroy(ioat_dma->completion_pool);
 	}
 
 	return err;
@@ -576,8 +557,7 @@
 
 	dma_async_device_unregister(dma);
 
-	pci_pool_destroy(ioat_dma->dma_pool);
-	pci_pool_destroy(ioat_dma->completion_pool);
+	dma_pool_destroy(ioat_dma->completion_pool);
 
 	INIT_LIST_HEAD(&dma->channels);
 }
@@ -666,10 +646,19 @@
 		ioat_free_ring_ent(desc, c);
 	}
 
+	for (i = 0; i < ioat_chan->desc_chunks; i++) {
+		dma_free_coherent(to_dev(ioat_chan), SZ_2M,
+				  ioat_chan->descs[i].virt,
+				  ioat_chan->descs[i].hw);
+		ioat_chan->descs[i].virt = NULL;
+		ioat_chan->descs[i].hw = 0;
+	}
+	ioat_chan->desc_chunks = 0;
+
 	kfree(ioat_chan->ring);
 	ioat_chan->ring = NULL;
 	ioat_chan->alloc_order = 0;
-	pci_pool_free(ioat_dma->completion_pool, ioat_chan->completion,
+	dma_pool_free(ioat_dma->completion_pool, ioat_chan->completion,
 		      ioat_chan->completion_dma);
 	spin_unlock_bh(&ioat_chan->prep_lock);
 	spin_unlock_bh(&ioat_chan->cleanup_lock);
@@ -701,7 +690,7 @@
 	/* allocate a completion writeback area */
 	/* doing 2 32bit writes to mmio since 1 64b write doesn't work */
 	ioat_chan->completion =
-		pci_pool_alloc(ioat_chan->ioat_dma->completion_pool,
+		dma_pool_alloc(ioat_chan->ioat_dma->completion_pool,
 			       GFP_KERNEL, &ioat_chan->completion_dma);
 	if (!ioat_chan->completion)
 		return -ENOMEM;
@@ -712,7 +701,7 @@
 	writel(((u64)ioat_chan->completion_dma) >> 32,
 	       ioat_chan->reg_base + IOAT_CHANCMP_OFFSET_HIGH);
 
-	order = ioat_get_alloc_order();
+	order = IOAT_MAX_ORDER;
 	ring = ioat_alloc_ring(c, order, GFP_KERNEL);
 	if (!ring)
 		return -ENOMEM;
diff --git a/drivers/dma/ioat/prep.c b/drivers/dma/ioat/prep.c
index 6bb4a13..243421a 100644
--- a/drivers/dma/ioat/prep.c
+++ b/drivers/dma/ioat/prep.c
@@ -26,7 +26,7 @@
 #include "hw.h"
 #include "dma.h"
 
-#define MAX_SCF	1024
+#define MAX_SCF	256
 
 /* provide a lookup table for setting the source address in the base or
  * extended descriptor of an xor or pq descriptor
diff --git a/drivers/dma/mic_x100_dma.c b/drivers/dma/mic_x100_dma.c
index 068e920..1502b24 100644
--- a/drivers/dma/mic_x100_dma.c
+++ b/drivers/dma/mic_x100_dma.c
@@ -483,7 +483,7 @@
 			mic_dma_intr_handler, mic_dma_thread_fn,
 			"mic dma_channel", ch, ch->ch_num);
 	if (IS_ERR(ch->cookie))
-		return IS_ERR(ch->cookie);
+		return PTR_ERR(ch->cookie);
 	return 0;
 }
 
diff --git a/drivers/dma/omap-dma.c b/drivers/dma/omap-dma.c
index 9794b07..43bd5ae 100644
--- a/drivers/dma/omap-dma.c
+++ b/drivers/dma/omap-dma.c
@@ -1009,6 +1009,13 @@
 	return 0;
 }
 
+static void omap_dma_synchronize(struct dma_chan *chan)
+{
+	struct omap_chan *c = to_omap_dma_chan(chan);
+
+	vchan_synchronize(&c->vc);
+}
+
 static int omap_dma_pause(struct dma_chan *chan)
 {
 	struct omap_chan *c = to_omap_dma_chan(chan);
@@ -1112,6 +1119,7 @@
 	od->ddev.device_pause = omap_dma_pause;
 	od->ddev.device_resume = omap_dma_resume;
 	od->ddev.device_terminate_all = omap_dma_terminate_all;
+	od->ddev.device_synchronize = omap_dma_synchronize;
 	od->ddev.src_addr_widths = OMAP_DMA_BUSWIDTHS;
 	od->ddev.dst_addr_widths = OMAP_DMA_BUSWIDTHS;
 	od->ddev.directions = BIT(DMA_DEV_TO_MEM) | BIT(DMA_MEM_TO_DEV);
diff --git a/drivers/dma/sirf-dma.c b/drivers/dma/sirf-dma.c
index 22ea241..e48350e 100644
--- a/drivers/dma/sirf-dma.c
+++ b/drivers/dma/sirf-dma.c
@@ -989,7 +989,7 @@
 	return 0;
 }
 
-static int sirfsoc_dma_runtime_suspend(struct device *dev)
+static int __maybe_unused sirfsoc_dma_runtime_suspend(struct device *dev)
 {
 	struct sirfsoc_dma *sdma = dev_get_drvdata(dev);
 
@@ -997,7 +997,7 @@
 	return 0;
 }
 
-static int sirfsoc_dma_runtime_resume(struct device *dev)
+static int __maybe_unused sirfsoc_dma_runtime_resume(struct device *dev)
 {
 	struct sirfsoc_dma *sdma = dev_get_drvdata(dev);
 	int ret;
@@ -1010,8 +1010,7 @@
 	return 0;
 }
 
-#ifdef CONFIG_PM_SLEEP
-static int sirfsoc_dma_pm_suspend(struct device *dev)
+static int __maybe_unused sirfsoc_dma_pm_suspend(struct device *dev)
 {
 	struct sirfsoc_dma *sdma = dev_get_drvdata(dev);
 	struct sirfsoc_dma_regs *save = &sdma->regs_save;
@@ -1062,7 +1061,7 @@
 	return 0;
 }
 
-static int sirfsoc_dma_pm_resume(struct device *dev)
+static int __maybe_unused sirfsoc_dma_pm_resume(struct device *dev)
 {
 	struct sirfsoc_dma *sdma = dev_get_drvdata(dev);
 	struct sirfsoc_dma_regs *save = &sdma->regs_save;
@@ -1121,7 +1120,6 @@
 
 	return 0;
 }
-#endif
 
 static const struct dev_pm_ops sirfsoc_dma_pm_ops = {
 	SET_RUNTIME_PM_OPS(sirfsoc_dma_runtime_suspend, sirfsoc_dma_runtime_resume, NULL)
diff --git a/drivers/dma/sun4i-dma.c b/drivers/dma/sun4i-dma.c
index 1661d518..e0df233 100644
--- a/drivers/dma/sun4i-dma.c
+++ b/drivers/dma/sun4i-dma.c
@@ -1271,6 +1271,7 @@
 	{ .compatible = "allwinner,sun4i-a10-dma" },
 	{ /* sentinel */ },
 };
+MODULE_DEVICE_TABLE(of, sun4i_dma_match);
 
 static struct platform_driver sun4i_dma_driver = {
 	.probe	= sun4i_dma_probe,
diff --git a/drivers/dma/tegra20-apb-dma.c b/drivers/dma/tegra20-apb-dma.c
index 935da81..3871f29 100644
--- a/drivers/dma/tegra20-apb-dma.c
+++ b/drivers/dma/tegra20-apb-dma.c
@@ -1292,40 +1292,19 @@
 	.support_separate_wcount_reg = true,
 };
 
-
-static const struct of_device_id tegra_dma_of_match[] = {
-	{
-		.compatible = "nvidia,tegra148-apbdma",
-		.data = &tegra148_dma_chip_data,
-	}, {
-		.compatible = "nvidia,tegra114-apbdma",
-		.data = &tegra114_dma_chip_data,
-	}, {
-		.compatible = "nvidia,tegra30-apbdma",
-		.data = &tegra30_dma_chip_data,
-	}, {
-		.compatible = "nvidia,tegra20-apbdma",
-		.data = &tegra20_dma_chip_data,
-	}, {
-	},
-};
-MODULE_DEVICE_TABLE(of, tegra_dma_of_match);
-
 static int tegra_dma_probe(struct platform_device *pdev)
 {
 	struct resource	*res;
 	struct tegra_dma *tdma;
 	int ret;
 	int i;
-	const struct tegra_dma_chip_data *cdata = NULL;
-	const struct of_device_id *match;
+	const struct tegra_dma_chip_data *cdata;
 
-	match = of_match_device(tegra_dma_of_match, &pdev->dev);
-	if (!match) {
-		dev_err(&pdev->dev, "Error: No device match found\n");
+	cdata = of_device_get_match_data(&pdev->dev);
+	if (!cdata) {
+		dev_err(&pdev->dev, "Error: No device match data found\n");
 		return -ENODEV;
 	}
-	cdata = match->data;
 
 	tdma = devm_kzalloc(&pdev->dev, sizeof(*tdma) + cdata->nr_channels *
 			sizeof(struct tegra_dma_channel), GFP_KERNEL);
@@ -1612,6 +1591,24 @@
 	SET_SYSTEM_SLEEP_PM_OPS(tegra_dma_pm_suspend, tegra_dma_pm_resume)
 };
 
+static const struct of_device_id tegra_dma_of_match[] = {
+	{
+		.compatible = "nvidia,tegra148-apbdma",
+		.data = &tegra148_dma_chip_data,
+	}, {
+		.compatible = "nvidia,tegra114-apbdma",
+		.data = &tegra114_dma_chip_data,
+	}, {
+		.compatible = "nvidia,tegra30-apbdma",
+		.data = &tegra30_dma_chip_data,
+	}, {
+		.compatible = "nvidia,tegra20-apbdma",
+		.data = &tegra20_dma_chip_data,
+	}, {
+	},
+};
+MODULE_DEVICE_TABLE(of, tegra_dma_of_match);
+
 static struct platform_driver tegra_dmac_driver = {
 	.driver = {
 		.name	= "tegra-apbdma",
diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
index 0a9a0ba..0174337 100644
--- a/include/linux/dmaengine.h
+++ b/include/linux/dmaengine.h
@@ -357,8 +357,8 @@
  */
 struct dma_slave_config {
 	enum dma_transfer_direction direction;
-	dma_addr_t src_addr;
-	dma_addr_t dst_addr;
+	phys_addr_t src_addr;
+	phys_addr_t dst_addr;
 	enum dma_slave_buswidth src_addr_width;
 	enum dma_slave_buswidth dst_addr_width;
 	u32 src_maxburst;