Merge branch 'topic/dw' into for-linus
diff --git a/Documentation/devicetree/bindings/dma/dma.txt b/Documentation/devicetree/bindings/dma/dma.txt
index 8f504e6..8210427 100644
--- a/Documentation/devicetree/bindings/dma/dma.txt
+++ b/Documentation/devicetree/bindings/dma/dma.txt
@@ -14,7 +14,7 @@
 
 Optional properties:
 - dma-channels: 	Number of DMA channels supported by the controller.
-- dma-requests: 	Number of DMA requests signals supported by the
+- dma-requests: 	Number of DMA request signals supported by the
 			controller.
 
 Example:
@@ -44,7 +44,7 @@
 			  #dma-cells property in the node referenced by phandle
 			  containing DMA controller specific information. This
 			  typically contains a DMA request line number or a
-			  channel number, but can contain any data that is used
+			  channel number, but can contain any data that is
 			  required for configuring a channel.
 - dma-names: 		Contains one identifier string for each DMA specifier in
 			the dmas property. The specific strings that can be used
diff --git a/Documentation/devicetree/bindings/dma/mmp-dma.txt b/Documentation/devicetree/bindings/dma/mmp-dma.txt
index a4fa4ef..7a802f6 100644
--- a/Documentation/devicetree/bindings/dma/mmp-dma.txt
+++ b/Documentation/devicetree/bindings/dma/mmp-dma.txt
@@ -1,17 +1,20 @@
 * MARVELL MMP DMA controller
 
 Marvell Peripheral DMA Controller
-Used platfroms: pxa688, pxa910, pxa3xx, etc
+Used platforms: pxa688, pxa910, pxa3xx, etc
 
 Required properties:
 - compatible: Should be "marvell,pdma-1.0"
 - reg: Should contain DMA registers location and length.
 - interrupts: Either contain all of the per-channel DMA interrupts
 		or one irq for pdma device
-- #dma-channels: Number of DMA channels supported by the controller.
+
+Optional properties:
+- #dma-channels: Number of DMA channels supported by the controller (defaults
+  to 32 when not specified)
 
 "marvell,pdma-1.0"
-Used platfroms: pxa25x, pxa27x, pxa3xx, pxa93x, pxa168, pxa910, pxa688.
+Used platforms: pxa25x, pxa27x, pxa3xx, pxa93x, pxa168, pxa910, pxa688.
 
 Examples:
 
@@ -45,7 +48,7 @@
 
 
 Marvell Two Channel DMA Controller used specifically for audio
-Used platfroms: pxa688, pxa910
+Used platforms: pxa688, pxa910
 
 Required properties:
 - compatible: Should be "marvell,adma-1.0" or "marvell,pxa910-squ"
diff --git a/arch/arm/common/edma.c b/arch/arm/common/edma.c
index 41bca32..25fa735 100644
--- a/arch/arm/common/edma.c
+++ b/arch/arm/common/edma.c
@@ -994,29 +994,23 @@
 EXPORT_SYMBOL(edma_set_dest);
 
 /**
- * edma_get_position - returns the current transfer points
+ * edma_get_position - returns the current transfer point
  * @slot: parameter RAM slot being examined
- * @src: pointer to source port position
- * @dst: pointer to destination port position
+ * @dst:  true selects the dest position, false the source
  *
- * Returns current source and destination addresses for a particular
- * parameter RAM slot.  Its channel should not be active when this is called.
+ * Returns the position of the current active slot
  */
-void edma_get_position(unsigned slot, dma_addr_t *src, dma_addr_t *dst)
+dma_addr_t edma_get_position(unsigned slot, bool dst)
 {
-	struct edmacc_param temp;
-	unsigned ctlr;
+	u32 offs, ctlr = EDMA_CTLR(slot);
 
-	ctlr = EDMA_CTLR(slot);
 	slot = EDMA_CHAN_SLOT(slot);
 
-	edma_read_slot(EDMA_CTLR_CHAN(ctlr, slot), &temp);
-	if (src != NULL)
-		*src = temp.src;
-	if (dst != NULL)
-		*dst = temp.dst;
+	offs = PARM_OFFSET(slot);
+	offs += dst ? PARM_DST : PARM_SRC;
+
+	return edma_read(ctlr, offs);
 }
-EXPORT_SYMBOL(edma_get_position);
 
 /**
  * edma_set_src_index - configure DMA source address indexing
@@ -1574,6 +1568,7 @@
 		return ERR_PTR(ret);
 
 	dma_cap_set(DMA_SLAVE, edma_filter_info.dma_cap);
+	dma_cap_set(DMA_CYCLIC, edma_filter_info.dma_cap);
 	of_dma_controller_register(dev->of_node, of_dma_simple_xlate,
 				   &edma_filter_info);
 
@@ -1768,6 +1763,9 @@
 			map_queue_tc(j, queue_tc_mapping[i][0],
 					queue_tc_mapping[i][1]);
 
+		/* Save the number of TCs */
+		edma_cc[j]->num_tc = i;
+
 		/* Event queue priority mapping */
 		for (i = 0; queue_priority_mapping[i][0] != -1; i++)
 			assign_priority_to_queue(j,
diff --git a/arch/powerpc/boot/dts/mpc8308_p1m.dts b/arch/powerpc/boot/dts/mpc8308_p1m.dts
index 651e4f5..57f86cd 100644
--- a/arch/powerpc/boot/dts/mpc8308_p1m.dts
+++ b/arch/powerpc/boot/dts/mpc8308_p1m.dts
@@ -296,7 +296,7 @@
 		};
 
 		dma@2c000 {
-			compatible = "fsl,mpc8308-dma", "fsl,mpc5121-dma";
+			compatible = "fsl,mpc8308-dma";
 			reg = <0x2c000 0x1800>;
 			interrupts = <3 0x8
 					94 0x8>;
diff --git a/arch/powerpc/boot/dts/mpc8308rdb.dts b/arch/powerpc/boot/dts/mpc8308rdb.dts
index 9ce45f2..d0211f0 100644
--- a/arch/powerpc/boot/dts/mpc8308rdb.dts
+++ b/arch/powerpc/boot/dts/mpc8308rdb.dts
@@ -265,7 +265,7 @@
 		};
 
 		dma@2c000 {
-			compatible = "fsl,mpc8308-dma", "fsl,mpc5121-dma";
+			compatible = "fsl,mpc8308-dma";
 			reg = <0x2c000 0x1800>;
 			interrupts = <3 0x8
 					94 0x8>;
diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig
index 5c58638..049059f 100644
--- a/drivers/dma/Kconfig
+++ b/drivers/dma/Kconfig
@@ -234,7 +234,7 @@
 
 config PCH_DMA
 	tristate "Intel EG20T PCH / LAPIS Semicon IOH(ML7213/ML7223/ML7831) DMA"
-	depends on PCI && X86
+	depends on PCI && (X86_32 || COMPILE_TEST)
 	select DMA_ENGINE
 	help
 	  Enable support for Intel EG20T PCH DMA engine.
@@ -269,7 +269,7 @@
 	select DMA_ENGINE
 	help
 	  Support the MXS DMA engine. This engine including APBH-DMA
-	  and APBX-DMA is integrated into Freescale i.MX23/28 chips.
+	  and APBX-DMA is integrated into Freescale i.MX23/28/MX6Q/MX6DL chips.
 
 config EP93XX_DMA
 	bool "Cirrus Logic EP93xx DMA support"
diff --git a/drivers/dma/dw/pci.c b/drivers/dma/dw/pci.c
index fec59f1..39e30c3 100644
--- a/drivers/dma/dw/pci.c
+++ b/drivers/dma/dw/pci.c
@@ -93,19 +93,13 @@
 	return dw_dma_resume(chip);
 };
 
-#else /* !CONFIG_PM_SLEEP */
-
-#define dw_pci_suspend_late	NULL
-#define dw_pci_resume_early	NULL
-
-#endif /* !CONFIG_PM_SLEEP */
+#endif /* CONFIG_PM_SLEEP */
 
 static const struct dev_pm_ops dw_pci_dev_pm_ops = {
-	.suspend_late = dw_pci_suspend_late,
-	.resume_early = dw_pci_resume_early,
+	SET_LATE_SYSTEM_SLEEP_PM_OPS(dw_pci_suspend_late, dw_pci_resume_early)
 };
 
-static DEFINE_PCI_DEVICE_TABLE(dw_pci_id_table) = {
+static const struct pci_device_id dw_pci_id_table[] = {
 	/* Medfield */
 	{ PCI_VDEVICE(INTEL, 0x0827), (kernel_ulong_t)&dw_pci_pdata },
 	{ PCI_VDEVICE(INTEL, 0x0830), (kernel_ulong_t)&dw_pci_pdata },
diff --git a/drivers/dma/dw/platform.c b/drivers/dma/dw/platform.c
index 453822c..c5b339a 100644
--- a/drivers/dma/dw/platform.c
+++ b/drivers/dma/dw/platform.c
@@ -256,7 +256,7 @@
 
 #ifdef CONFIG_PM_SLEEP
 
-static int dw_suspend_noirq(struct device *dev)
+static int dw_suspend_late(struct device *dev)
 {
 	struct platform_device *pdev = to_platform_device(dev);
 	struct dw_dma_chip *chip = platform_get_drvdata(pdev);
@@ -264,7 +264,7 @@
 	return dw_dma_suspend(chip);
 }
 
-static int dw_resume_noirq(struct device *dev)
+static int dw_resume_early(struct device *dev)
 {
 	struct platform_device *pdev = to_platform_device(dev);
 	struct dw_dma_chip *chip = platform_get_drvdata(pdev);
@@ -272,20 +272,10 @@
 	return dw_dma_resume(chip);
 }
 
-#else /* !CONFIG_PM_SLEEP */
-
-#define dw_suspend_noirq	NULL
-#define dw_resume_noirq		NULL
-
-#endif /* !CONFIG_PM_SLEEP */
+#endif /* CONFIG_PM_SLEEP */
 
 static const struct dev_pm_ops dw_dev_pm_ops = {
-	.suspend_noirq = dw_suspend_noirq,
-	.resume_noirq = dw_resume_noirq,
-	.freeze_noirq = dw_suspend_noirq,
-	.thaw_noirq = dw_resume_noirq,
-	.restore_noirq = dw_resume_noirq,
-	.poweroff_noirq = dw_suspend_noirq,
+	SET_LATE_SYSTEM_SLEEP_PM_OPS(dw_suspend_late, dw_resume_early)
 };
 
 static struct platform_driver dw_driver = {
diff --git a/drivers/dma/edma.c b/drivers/dma/edma.c
index 926360c..d08c4de 100644
--- a/drivers/dma/edma.c
+++ b/drivers/dma/edma.c
@@ -57,14 +57,48 @@
 #define EDMA_MAX_SLOTS		MAX_NR_SG
 #define EDMA_DESCRIPTORS	16
 
+struct edma_pset {
+	u32				len;
+	dma_addr_t			addr;
+	struct edmacc_param		param;
+};
+
 struct edma_desc {
 	struct virt_dma_desc		vdesc;
 	struct list_head		node;
+	enum dma_transfer_direction	direction;
 	int				cyclic;
 	int				absync;
 	int				pset_nr;
+	struct edma_chan		*echan;
 	int				processed;
-	struct edmacc_param		pset[0];
+
+	/*
+	 * The following 4 elements are used for residue accounting.
+	 *
+	 * - processed_stat: the number of SG elements we have traversed
+	 * so far to cover accounting. This is updated directly to processed
+	 * during edma_callback and is always <= processed, because processed
+	 * refers to the number of pending transfer (programmed to EDMA
+	 * controller), where as processed_stat tracks number of transfers
+	 * accounted for so far.
+	 *
+	 * - residue: The amount of bytes we have left to transfer for this desc
+	 *
+	 * - residue_stat: The residue in bytes of data we have covered
+	 * so far for accounting. This is updated directly to residue
+	 * during callbacks to keep it current.
+	 *
+	 * - sg_len: Tracks the length of the current intermediate transfer,
+	 * this is required to update the residue during intermediate transfer
+	 * completion callback.
+	 */
+	int				processed_stat;
+	u32				sg_len;
+	u32				residue;
+	u32				residue_stat;
+
+	struct edma_pset		pset[0];
 };
 
 struct edma_cc;
@@ -136,12 +170,14 @@
 	/* Find out how many left */
 	left = edesc->pset_nr - edesc->processed;
 	nslots = min(MAX_NR_SG, left);
+	edesc->sg_len = 0;
 
 	/* Write descriptor PaRAM set(s) */
 	for (i = 0; i < nslots; i++) {
 		j = i + edesc->processed;
-		edma_write_slot(echan->slot[i], &edesc->pset[j]);
-		dev_dbg(echan->vchan.chan.device->dev,
+		edma_write_slot(echan->slot[i], &edesc->pset[j].param);
+		edesc->sg_len += edesc->pset[j].len;
+		dev_vdbg(echan->vchan.chan.device->dev,
 			"\n pset[%d]:\n"
 			"  chnum\t%d\n"
 			"  slot\t%d\n"
@@ -154,14 +190,14 @@
 			"  cidx\t%08x\n"
 			"  lkrld\t%08x\n",
 			j, echan->ch_num, echan->slot[i],
-			edesc->pset[j].opt,
-			edesc->pset[j].src,
-			edesc->pset[j].dst,
-			edesc->pset[j].a_b_cnt,
-			edesc->pset[j].ccnt,
-			edesc->pset[j].src_dst_bidx,
-			edesc->pset[j].src_dst_cidx,
-			edesc->pset[j].link_bcntrld);
+			edesc->pset[j].param.opt,
+			edesc->pset[j].param.src,
+			edesc->pset[j].param.dst,
+			edesc->pset[j].param.a_b_cnt,
+			edesc->pset[j].param.ccnt,
+			edesc->pset[j].param.src_dst_bidx,
+			edesc->pset[j].param.src_dst_cidx,
+			edesc->pset[j].param.link_bcntrld);
 		/* Link to the previous slot if not the last set */
 		if (i != (nslots - 1))
 			edma_link(echan->slot[i], echan->slot[i+1]);
@@ -183,7 +219,8 @@
 	}
 
 	if (edesc->processed <= MAX_NR_SG) {
-		dev_dbg(dev, "first transfer starting %d\n", echan->ch_num);
+		dev_dbg(dev, "first transfer starting on channel %d\n",
+			echan->ch_num);
 		edma_start(echan->ch_num);
 	} else {
 		dev_dbg(dev, "chan: %d: completed %d elements, resuming\n",
@@ -197,7 +234,7 @@
 	 * MAX_NR_SG
 	 */
 	if (echan->missed) {
-		dev_dbg(dev, "missed event in execute detected\n");
+		dev_dbg(dev, "missed event on channel %d\n", echan->ch_num);
 		edma_clean_channel(echan->ch_num);
 		edma_stop(echan->ch_num);
 		edma_start(echan->ch_num);
@@ -242,6 +279,26 @@
 	return 0;
 }
 
+static int edma_dma_pause(struct edma_chan *echan)
+{
+	/* Pause/Resume only allowed with cyclic mode */
+	if (!echan->edesc->cyclic)
+		return -EINVAL;
+
+	edma_pause(echan->ch_num);
+	return 0;
+}
+
+static int edma_dma_resume(struct edma_chan *echan)
+{
+	/* Pause/Resume only allowed with cyclic mode */
+	if (!echan->edesc->cyclic)
+		return -EINVAL;
+
+	edma_resume(echan->ch_num);
+	return 0;
+}
+
 static int edma_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd,
 			unsigned long arg)
 {
@@ -257,6 +314,14 @@
 		config = (struct dma_slave_config *)arg;
 		ret = edma_slave_config(echan, config);
 		break;
+	case DMA_PAUSE:
+		ret = edma_dma_pause(echan);
+		break;
+
+	case DMA_RESUME:
+		ret = edma_dma_resume(echan);
+		break;
+
 	default:
 		ret = -ENOSYS;
 	}
@@ -275,18 +340,23 @@
  * @dma_length: Total length of the DMA transfer
  * @direction: Direction of the transfer
  */
-static int edma_config_pset(struct dma_chan *chan, struct edmacc_param *pset,
+static int edma_config_pset(struct dma_chan *chan, struct edma_pset *epset,
 	dma_addr_t src_addr, dma_addr_t dst_addr, u32 burst,
 	enum dma_slave_buswidth dev_width, unsigned int dma_length,
 	enum dma_transfer_direction direction)
 {
 	struct edma_chan *echan = to_edma_chan(chan);
 	struct device *dev = chan->device->dev;
+	struct edmacc_param *param = &epset->param;
 	int acnt, bcnt, ccnt, cidx;
 	int src_bidx, dst_bidx, src_cidx, dst_cidx;
 	int absync;
 
 	acnt = dev_width;
+
+	/* src/dst_maxburst == 0 is the same case as src/dst_maxburst == 1 */
+	if (!burst)
+		burst = 1;
 	/*
 	 * If the maxburst is equal to the fifo width, use
 	 * A-synced transfers. This allows for large contiguous
@@ -337,41 +407,50 @@
 		cidx = acnt * bcnt;
 	}
 
+	epset->len = dma_length;
+
 	if (direction == DMA_MEM_TO_DEV) {
 		src_bidx = acnt;
 		src_cidx = cidx;
 		dst_bidx = 0;
 		dst_cidx = 0;
+		epset->addr = src_addr;
 	} else if (direction == DMA_DEV_TO_MEM)  {
 		src_bidx = 0;
 		src_cidx = 0;
 		dst_bidx = acnt;
 		dst_cidx = cidx;
+		epset->addr = dst_addr;
+	} else if (direction == DMA_MEM_TO_MEM)  {
+		src_bidx = acnt;
+		src_cidx = cidx;
+		dst_bidx = acnt;
+		dst_cidx = cidx;
 	} else {
 		dev_err(dev, "%s: direction not implemented yet\n", __func__);
 		return -EINVAL;
 	}
 
-	pset->opt = EDMA_TCC(EDMA_CHAN_SLOT(echan->ch_num));
+	param->opt = EDMA_TCC(EDMA_CHAN_SLOT(echan->ch_num));
 	/* Configure A or AB synchronized transfers */
 	if (absync)
-		pset->opt |= SYNCDIM;
+		param->opt |= SYNCDIM;
 
-	pset->src = src_addr;
-	pset->dst = dst_addr;
+	param->src = src_addr;
+	param->dst = dst_addr;
 
-	pset->src_dst_bidx = (dst_bidx << 16) | src_bidx;
-	pset->src_dst_cidx = (dst_cidx << 16) | src_cidx;
+	param->src_dst_bidx = (dst_bidx << 16) | src_bidx;
+	param->src_dst_cidx = (dst_cidx << 16) | src_cidx;
 
-	pset->a_b_cnt = bcnt << 16 | acnt;
-	pset->ccnt = ccnt;
+	param->a_b_cnt = bcnt << 16 | acnt;
+	param->ccnt = ccnt;
 	/*
 	 * Only time when (bcntrld) auto reload is required is for
 	 * A-sync case, and in this case, a requirement of reload value
 	 * of SZ_64K-1 only is assured. 'link' is initially set to NULL
 	 * and then later will be populated by edma_execute.
 	 */
-	pset->link_bcntrld = 0xffffffff;
+	param->link_bcntrld = 0xffffffff;
 	return absync;
 }
 
@@ -401,23 +480,26 @@
 		dev_width = echan->cfg.dst_addr_width;
 		burst = echan->cfg.dst_maxburst;
 	} else {
-		dev_err(dev, "%s: bad direction?\n", __func__);
+		dev_err(dev, "%s: bad direction: %d\n", __func__, direction);
 		return NULL;
 	}
 
 	if (dev_width == DMA_SLAVE_BUSWIDTH_UNDEFINED) {
-		dev_err(dev, "Undefined slave buswidth\n");
+		dev_err(dev, "%s: Undefined slave buswidth\n", __func__);
 		return NULL;
 	}
 
 	edesc = kzalloc(sizeof(*edesc) + sg_len *
 		sizeof(edesc->pset[0]), GFP_ATOMIC);
 	if (!edesc) {
-		dev_dbg(dev, "Failed to allocate a descriptor\n");
+		dev_err(dev, "%s: Failed to allocate a descriptor\n", __func__);
 		return NULL;
 	}
 
 	edesc->pset_nr = sg_len;
+	edesc->residue = 0;
+	edesc->direction = direction;
+	edesc->echan = echan;
 
 	/* Allocate a PaRAM slot, if needed */
 	nslots = min_t(unsigned, MAX_NR_SG, sg_len);
@@ -429,7 +511,8 @@
 						EDMA_SLOT_ANY);
 			if (echan->slot[i] < 0) {
 				kfree(edesc);
-				dev_err(dev, "Failed to allocate slot\n");
+				dev_err(dev, "%s: Failed to allocate slot\n",
+					__func__);
 				return NULL;
 			}
 		}
@@ -452,16 +535,56 @@
 		}
 
 		edesc->absync = ret;
+		edesc->residue += sg_dma_len(sg);
 
 		/* If this is the last in a current SG set of transactions,
 		   enable interrupts so that next set is processed */
 		if (!((i+1) % MAX_NR_SG))
-			edesc->pset[i].opt |= TCINTEN;
+			edesc->pset[i].param.opt |= TCINTEN;
 
 		/* If this is the last set, enable completion interrupt flag */
 		if (i == sg_len - 1)
-			edesc->pset[i].opt |= TCINTEN;
+			edesc->pset[i].param.opt |= TCINTEN;
 	}
+	edesc->residue_stat = edesc->residue;
+
+	return vchan_tx_prep(&echan->vchan, &edesc->vdesc, tx_flags);
+}
+
+struct dma_async_tx_descriptor *edma_prep_dma_memcpy(
+	struct dma_chan *chan, dma_addr_t dest, dma_addr_t src,
+	size_t len, unsigned long tx_flags)
+{
+	int ret;
+	struct edma_desc *edesc;
+	struct device *dev = chan->device->dev;
+	struct edma_chan *echan = to_edma_chan(chan);
+
+	if (unlikely(!echan || !len))
+		return NULL;
+
+	edesc = kzalloc(sizeof(*edesc) + sizeof(edesc->pset[0]), GFP_ATOMIC);
+	if (!edesc) {
+		dev_dbg(dev, "Failed to allocate a descriptor\n");
+		return NULL;
+	}
+
+	edesc->pset_nr = 1;
+
+	ret = edma_config_pset(chan, &edesc->pset[0], src, dest, 1,
+			       DMA_SLAVE_BUSWIDTH_4_BYTES, len, DMA_MEM_TO_MEM);
+	if (ret < 0)
+		return NULL;
+
+	edesc->absync = ret;
+
+	/*
+	 * Enable intermediate transfer chaining to re-trigger channel
+	 * on completion of every TR, and enable transfer-completion
+	 * interrupt on completion of the whole transfer.
+	 */
+	edesc->pset[0].param.opt |= ITCCHEN;
+	edesc->pset[0].param.opt |= TCINTEN;
 
 	return vchan_tx_prep(&echan->vchan, &edesc->vdesc, tx_flags);
 }
@@ -493,12 +616,12 @@
 		dev_width = echan->cfg.dst_addr_width;
 		burst = echan->cfg.dst_maxburst;
 	} else {
-		dev_err(dev, "%s: bad direction?\n", __func__);
+		dev_err(dev, "%s: bad direction: %d\n", __func__, direction);
 		return NULL;
 	}
 
 	if (dev_width == DMA_SLAVE_BUSWIDTH_UNDEFINED) {
-		dev_err(dev, "Undefined slave buswidth\n");
+		dev_err(dev, "%s: Undefined slave buswidth\n", __func__);
 		return NULL;
 	}
 
@@ -523,16 +646,18 @@
 	edesc = kzalloc(sizeof(*edesc) + nslots *
 		sizeof(edesc->pset[0]), GFP_ATOMIC);
 	if (!edesc) {
-		dev_dbg(dev, "Failed to allocate a descriptor\n");
+		dev_err(dev, "%s: Failed to allocate a descriptor\n", __func__);
 		return NULL;
 	}
 
 	edesc->cyclic = 1;
 	edesc->pset_nr = nslots;
+	edesc->residue = edesc->residue_stat = buf_len;
+	edesc->direction = direction;
+	edesc->echan = echan;
 
-	dev_dbg(dev, "%s: nslots=%d\n", __func__, nslots);
-	dev_dbg(dev, "%s: period_len=%d\n", __func__, period_len);
-	dev_dbg(dev, "%s: buf_len=%d\n", __func__, buf_len);
+	dev_dbg(dev, "%s: channel=%d nslots=%d period_len=%zu buf_len=%zu\n",
+		__func__, echan->ch_num, nslots, period_len, buf_len);
 
 	for (i = 0; i < nslots; i++) {
 		/* Allocate a PaRAM slot, if needed */
@@ -542,7 +667,8 @@
 						EDMA_SLOT_ANY);
 			if (echan->slot[i] < 0) {
 				kfree(edesc);
-				dev_err(dev, "Failed to allocate slot\n");
+				dev_err(dev, "%s: Failed to allocate slot\n",
+					__func__);
 				return NULL;
 			}
 		}
@@ -566,8 +692,8 @@
 		else
 			src_addr += period_len;
 
-		dev_dbg(dev, "%s: Configure period %d of buf:\n", __func__, i);
-		dev_dbg(dev,
+		dev_vdbg(dev, "%s: Configure period %d of buf:\n", __func__, i);
+		dev_vdbg(dev,
 			"\n pset[%d]:\n"
 			"  chnum\t%d\n"
 			"  slot\t%d\n"
@@ -580,14 +706,14 @@
 			"  cidx\t%08x\n"
 			"  lkrld\t%08x\n",
 			i, echan->ch_num, echan->slot[i],
-			edesc->pset[i].opt,
-			edesc->pset[i].src,
-			edesc->pset[i].dst,
-			edesc->pset[i].a_b_cnt,
-			edesc->pset[i].ccnt,
-			edesc->pset[i].src_dst_bidx,
-			edesc->pset[i].src_dst_cidx,
-			edesc->pset[i].link_bcntrld);
+			edesc->pset[i].param.opt,
+			edesc->pset[i].param.src,
+			edesc->pset[i].param.dst,
+			edesc->pset[i].param.a_b_cnt,
+			edesc->pset[i].param.ccnt,
+			edesc->pset[i].param.src_dst_bidx,
+			edesc->pset[i].param.src_dst_cidx,
+			edesc->pset[i].param.link_bcntrld);
 
 		edesc->absync = ret;
 
@@ -595,7 +721,7 @@
 		 * Enable interrupts for every period because callback
 		 * has to be called for every period.
 		 */
-		edesc->pset[i].opt |= TCINTEN;
+		edesc->pset[i].param.opt |= TCINTEN;
 	}
 
 	return vchan_tx_prep(&echan->vchan, &edesc->vdesc, tx_flags);
@@ -606,7 +732,6 @@
 	struct edma_chan *echan = data;
 	struct device *dev = echan->vchan.chan.device->dev;
 	struct edma_desc *edesc;
-	unsigned long flags;
 	struct edmacc_param p;
 
 	edesc = echan->edesc;
@@ -617,27 +742,34 @@
 
 	switch (ch_status) {
 	case EDMA_DMA_COMPLETE:
-		spin_lock_irqsave(&echan->vchan.lock, flags);
+		spin_lock(&echan->vchan.lock);
 
 		if (edesc) {
 			if (edesc->cyclic) {
 				vchan_cyclic_callback(&edesc->vdesc);
 			} else if (edesc->processed == edesc->pset_nr) {
 				dev_dbg(dev, "Transfer complete, stopping channel %d\n", ch_num);
+				edesc->residue = 0;
 				edma_stop(echan->ch_num);
 				vchan_cookie_complete(&edesc->vdesc);
 				edma_execute(echan);
 			} else {
 				dev_dbg(dev, "Intermediate transfer complete on channel %d\n", ch_num);
+
+				/* Update statistics for tx_status */
+				edesc->residue -= edesc->sg_len;
+				edesc->residue_stat = edesc->residue;
+				edesc->processed_stat = edesc->processed;
+
 				edma_execute(echan);
 			}
 		}
 
-		spin_unlock_irqrestore(&echan->vchan.lock, flags);
+		spin_unlock(&echan->vchan.lock);
 
 		break;
 	case EDMA_DMA_CC_ERROR:
-		spin_lock_irqsave(&echan->vchan.lock, flags);
+		spin_lock(&echan->vchan.lock);
 
 		edma_read_slot(EDMA_CHAN_SLOT(echan->slot[0]), &p);
 
@@ -668,7 +800,7 @@
 			edma_trigger_channel(echan->ch_num);
 		}
 
-		spin_unlock_irqrestore(&echan->vchan.lock, flags);
+		spin_unlock(&echan->vchan.lock);
 
 		break;
 	default:
@@ -704,7 +836,7 @@
 	echan->alloced = true;
 	echan->slot[0] = echan->ch_num;
 
-	dev_dbg(dev, "allocated channel for %u:%u\n",
+	dev_dbg(dev, "allocated channel %d for %u:%u\n", echan->ch_num,
 		EDMA_CTLR(echan->ch_num), EDMA_CHAN_SLOT(echan->ch_num));
 
 	return 0;
@@ -756,23 +888,52 @@
 	spin_unlock_irqrestore(&echan->vchan.lock, flags);
 }
 
-static size_t edma_desc_size(struct edma_desc *edesc)
+static u32 edma_residue(struct edma_desc *edesc)
 {
+	bool dst = edesc->direction == DMA_DEV_TO_MEM;
+	struct edma_pset *pset = edesc->pset;
+	dma_addr_t done, pos;
 	int i;
-	size_t size;
 
-	if (edesc->absync)
-		for (size = i = 0; i < edesc->pset_nr; i++)
-			size += (edesc->pset[i].a_b_cnt & 0xffff) *
-				(edesc->pset[i].a_b_cnt >> 16) *
-				 edesc->pset[i].ccnt;
-	else
-		size = (edesc->pset[0].a_b_cnt & 0xffff) *
-			(edesc->pset[0].a_b_cnt >> 16) +
-			(edesc->pset[0].a_b_cnt & 0xffff) *
-			(SZ_64K - 1) * edesc->pset[0].ccnt;
+	/*
+	 * We always read the dst/src position from the first RamPar
+	 * pset. That's the one which is active now.
+	 */
+	pos = edma_get_position(edesc->echan->slot[0], dst);
 
-	return size;
+	/*
+	 * Cyclic is simple. Just subtract pset[0].addr from pos.
+	 *
+	 * We never update edesc->residue in the cyclic case, so we
+	 * can tell the remaining room to the end of the circular
+	 * buffer.
+	 */
+	if (edesc->cyclic) {
+		done = pos - pset->addr;
+		edesc->residue_stat = edesc->residue - done;
+		return edesc->residue_stat;
+	}
+
+	/*
+	 * For SG operation we catch up with the last processed
+	 * status.
+	 */
+	pset += edesc->processed_stat;
+
+	for (i = edesc->processed_stat; i < edesc->processed; i++, pset++) {
+		/*
+		 * If we are inside this pset address range, we know
+		 * this is the active one. Get the current delta and
+		 * stop walking the psets.
+		 */
+		if (pos >= pset->addr && pos < pset->addr + pset->len)
+			return edesc->residue_stat - (pos - pset->addr);
+
+		/* Otherwise mark it done and update residue_stat. */
+		edesc->processed_stat++;
+		edesc->residue_stat -= pset->len;
+	}
+	return edesc->residue_stat;
 }
 
 /* Check request completion status */
@@ -790,13 +951,10 @@
 		return ret;
 
 	spin_lock_irqsave(&echan->vchan.lock, flags);
-	vdesc = vchan_find_desc(&echan->vchan, cookie);
-	if (vdesc) {
-		txstate->residue = edma_desc_size(to_edma_desc(&vdesc->tx));
-	} else if (echan->edesc && echan->edesc->vdesc.tx.cookie == cookie) {
-		struct edma_desc *edesc = echan->edesc;
-		txstate->residue = edma_desc_size(edesc);
-	}
+	if (echan->edesc && echan->edesc->vdesc.tx.cookie == cookie)
+		txstate->residue = edma_residue(echan->edesc);
+	else if ((vdesc = vchan_find_desc(&echan->vchan, cookie)))
+		txstate->residue = to_edma_desc(&vdesc->tx)->residue;
 	spin_unlock_irqrestore(&echan->vchan.lock, flags);
 
 	return ret;
@@ -822,18 +980,43 @@
 	}
 }
 
+#define EDMA_DMA_BUSWIDTHS	(BIT(DMA_SLAVE_BUSWIDTH_1_BYTE) | \
+				 BIT(DMA_SLAVE_BUSWIDTH_2_BYTES) | \
+				 BIT(DMA_SLAVE_BUSWIDTH_4_BYTES))
+
+static int edma_dma_device_slave_caps(struct dma_chan *dchan,
+				      struct dma_slave_caps *caps)
+{
+	caps->src_addr_widths = EDMA_DMA_BUSWIDTHS;
+	caps->dstn_addr_widths = EDMA_DMA_BUSWIDTHS;
+	caps->directions = BIT(DMA_DEV_TO_MEM) | BIT(DMA_MEM_TO_DEV);
+	caps->cmd_pause = true;
+	caps->cmd_terminate = true;
+	caps->residue_granularity = DMA_RESIDUE_GRANULARITY_DESCRIPTOR;
+
+	return 0;
+}
+
 static void edma_dma_init(struct edma_cc *ecc, struct dma_device *dma,
 			  struct device *dev)
 {
 	dma->device_prep_slave_sg = edma_prep_slave_sg;
 	dma->device_prep_dma_cyclic = edma_prep_dma_cyclic;
+	dma->device_prep_dma_memcpy = edma_prep_dma_memcpy;
 	dma->device_alloc_chan_resources = edma_alloc_chan_resources;
 	dma->device_free_chan_resources = edma_free_chan_resources;
 	dma->device_issue_pending = edma_issue_pending;
 	dma->device_tx_status = edma_tx_status;
 	dma->device_control = edma_control;
+	dma->device_slave_caps = edma_dma_device_slave_caps;
 	dma->dev = dev;
 
+	/*
+	 * code using dma memcpy must make sure alignment of
+	 * length is at dma->copy_align boundary.
+	 */
+	dma->copy_align = DMA_SLAVE_BUSWIDTH_4_BYTES;
+
 	INIT_LIST_HEAD(&dma->channels);
 }
 
@@ -861,6 +1044,8 @@
 
 	dma_cap_zero(ecc->dma_slave.cap_mask);
 	dma_cap_set(DMA_SLAVE, ecc->dma_slave.cap_mask);
+	dma_cap_set(DMA_CYCLIC, ecc->dma_slave.cap_mask);
+	dma_cap_set(DMA_MEMCPY, ecc->dma_slave.cap_mask);
 
 	edma_dma_init(ecc, &ecc->dma_slave, &pdev->dev);
 
diff --git a/drivers/dma/fsldma.c b/drivers/dma/fsldma.c
index f157c6f..e0fec68 100644
--- a/drivers/dma/fsldma.c
+++ b/drivers/dma/fsldma.c
@@ -61,6 +61,16 @@
 	return DMA_IN(chan, &chan->regs->sr, 32);
 }
 
+static void set_mr(struct fsldma_chan *chan, u32 val)
+{
+	DMA_OUT(chan, &chan->regs->mr, val, 32);
+}
+
+static u32 get_mr(struct fsldma_chan *chan)
+{
+	return DMA_IN(chan, &chan->regs->mr, 32);
+}
+
 static void set_cdar(struct fsldma_chan *chan, dma_addr_t addr)
 {
 	DMA_OUT(chan, &chan->regs->cdar, addr | FSL_DMA_SNEN, 64);
@@ -71,6 +81,11 @@
 	return DMA_IN(chan, &chan->regs->cdar, 64) & ~FSL_DMA_SNEN;
 }
 
+static void set_bcr(struct fsldma_chan *chan, u32 val)
+{
+	DMA_OUT(chan, &chan->regs->bcr, val, 32);
+}
+
 static u32 get_bcr(struct fsldma_chan *chan)
 {
 	return DMA_IN(chan, &chan->regs->bcr, 32);
@@ -135,7 +150,7 @@
 static void dma_init(struct fsldma_chan *chan)
 {
 	/* Reset the channel */
-	DMA_OUT(chan, &chan->regs->mr, 0, 32);
+	set_mr(chan, 0);
 
 	switch (chan->feature & FSL_DMA_IP_MASK) {
 	case FSL_DMA_IP_85XX:
@@ -144,16 +159,15 @@
 		 * EOLNIE - End of links interrupt enable
 		 * BWC - Bandwidth sharing among channels
 		 */
-		DMA_OUT(chan, &chan->regs->mr, FSL_DMA_MR_BWC
-				| FSL_DMA_MR_EIE | FSL_DMA_MR_EOLNIE, 32);
+		set_mr(chan, FSL_DMA_MR_BWC | FSL_DMA_MR_EIE
+			| FSL_DMA_MR_EOLNIE);
 		break;
 	case FSL_DMA_IP_83XX:
 		/* Set the channel to below modes:
 		 * EOTIE - End-of-transfer interrupt enable
 		 * PRC_RM - PCI read multiple
 		 */
-		DMA_OUT(chan, &chan->regs->mr, FSL_DMA_MR_EOTIE
-				| FSL_DMA_MR_PRC_RM, 32);
+		set_mr(chan, FSL_DMA_MR_EOTIE | FSL_DMA_MR_PRC_RM);
 		break;
 	}
 }
@@ -175,10 +189,10 @@
 {
 	u32 mode;
 
-	mode = DMA_IN(chan, &chan->regs->mr, 32);
+	mode = get_mr(chan);
 
 	if (chan->feature & FSL_DMA_CHAN_PAUSE_EXT) {
-		DMA_OUT(chan, &chan->regs->bcr, 0, 32);
+		set_bcr(chan, 0);
 		mode |= FSL_DMA_MR_EMP_EN;
 	} else {
 		mode &= ~FSL_DMA_MR_EMP_EN;
@@ -191,7 +205,7 @@
 		mode |= FSL_DMA_MR_CS;
 	}
 
-	DMA_OUT(chan, &chan->regs->mr, mode, 32);
+	set_mr(chan, mode);
 }
 
 static void dma_halt(struct fsldma_chan *chan)
@@ -200,7 +214,7 @@
 	int i;
 
 	/* read the mode register */
-	mode = DMA_IN(chan, &chan->regs->mr, 32);
+	mode = get_mr(chan);
 
 	/*
 	 * The 85xx controller supports channel abort, which will stop
@@ -209,14 +223,14 @@
 	 */
 	if ((chan->feature & FSL_DMA_IP_MASK) == FSL_DMA_IP_85XX) {
 		mode |= FSL_DMA_MR_CA;
-		DMA_OUT(chan, &chan->regs->mr, mode, 32);
+		set_mr(chan, mode);
 
 		mode &= ~FSL_DMA_MR_CA;
 	}
 
 	/* stop the DMA controller */
 	mode &= ~(FSL_DMA_MR_CS | FSL_DMA_MR_EMS_EN);
-	DMA_OUT(chan, &chan->regs->mr, mode, 32);
+	set_mr(chan, mode);
 
 	/* wait for the DMA controller to become idle */
 	for (i = 0; i < 100; i++) {
@@ -245,7 +259,7 @@
 {
 	u32 mode;
 
-	mode = DMA_IN(chan, &chan->regs->mr, 32);
+	mode = get_mr(chan);
 
 	switch (size) {
 	case 0:
@@ -259,7 +273,7 @@
 		break;
 	}
 
-	DMA_OUT(chan, &chan->regs->mr, mode, 32);
+	set_mr(chan, mode);
 }
 
 /**
@@ -277,7 +291,7 @@
 {
 	u32 mode;
 
-	mode = DMA_IN(chan, &chan->regs->mr, 32);
+	mode = get_mr(chan);
 
 	switch (size) {
 	case 0:
@@ -291,7 +305,7 @@
 		break;
 	}
 
-	DMA_OUT(chan, &chan->regs->mr, mode, 32);
+	set_mr(chan, mode);
 }
 
 /**
@@ -312,10 +326,10 @@
 
 	BUG_ON(size > 1024);
 
-	mode = DMA_IN(chan, &chan->regs->mr, 32);
+	mode = get_mr(chan);
 	mode |= (__ilog2(size) << 24) & 0x0f000000;
 
-	DMA_OUT(chan, &chan->regs->mr, mode, 32);
+	set_mr(chan, mode);
 }
 
 /**
@@ -404,6 +418,19 @@
 }
 
 /**
+ * fsl_dma_free_descriptor - Free descriptor from channel's DMA pool.
+ * @chan : Freescale DMA channel
+ * @desc: descriptor to be freed
+ */
+static void fsl_dma_free_descriptor(struct fsldma_chan *chan,
+		struct fsl_desc_sw *desc)
+{
+	list_del(&desc->node);
+	chan_dbg(chan, "LD %p free\n", desc);
+	dma_pool_free(chan->desc_pool, desc, desc->async_tx.phys);
+}
+
+/**
  * fsl_dma_alloc_descriptor - Allocate descriptor from channel's DMA pool.
  * @chan : Freescale DMA channel
  *
@@ -426,14 +453,107 @@
 	desc->async_tx.tx_submit = fsl_dma_tx_submit;
 	desc->async_tx.phys = pdesc;
 
-#ifdef FSL_DMA_LD_DEBUG
 	chan_dbg(chan, "LD %p allocated\n", desc);
-#endif
 
 	return desc;
 }
 
 /**
+ * fsl_chan_xfer_ld_queue - transfer any pending transactions
+ * @chan : Freescale DMA channel
+ *
+ * HARDWARE STATE: idle
+ * LOCKING: must hold chan->desc_lock
+ */
+static void fsl_chan_xfer_ld_queue(struct fsldma_chan *chan)
+{
+	struct fsl_desc_sw *desc;
+
+	/*
+	 * If the list of pending descriptors is empty, then we
+	 * don't need to do any work at all
+	 */
+	if (list_empty(&chan->ld_pending)) {
+		chan_dbg(chan, "no pending LDs\n");
+		return;
+	}
+
+	/*
+	 * The DMA controller is not idle, which means that the interrupt
+	 * handler will start any queued transactions when it runs after
+	 * this transaction finishes
+	 */
+	if (!chan->idle) {
+		chan_dbg(chan, "DMA controller still busy\n");
+		return;
+	}
+
+	/*
+	 * If there are some link descriptors which have not been
+	 * transferred, we need to start the controller
+	 */
+
+	/*
+	 * Move all elements from the queue of pending transactions
+	 * onto the list of running transactions
+	 */
+	chan_dbg(chan, "idle, starting controller\n");
+	desc = list_first_entry(&chan->ld_pending, struct fsl_desc_sw, node);
+	list_splice_tail_init(&chan->ld_pending, &chan->ld_running);
+
+	/*
+	 * The 85xx DMA controller doesn't clear the channel start bit
+	 * automatically at the end of a transfer. Therefore we must clear
+	 * it in software before starting the transfer.
+	 */
+	if ((chan->feature & FSL_DMA_IP_MASK) == FSL_DMA_IP_85XX) {
+		u32 mode;
+
+		mode = get_mr(chan);
+		mode &= ~FSL_DMA_MR_CS;
+		set_mr(chan, mode);
+	}
+
+	/*
+	 * Program the descriptor's address into the DMA controller,
+	 * then start the DMA transaction
+	 */
+	set_cdar(chan, desc->async_tx.phys);
+	get_cdar(chan);
+
+	dma_start(chan);
+	chan->idle = false;
+}
+
+/**
+ * fsldma_cleanup_descriptor - cleanup and free a single link descriptor
+ * @chan: Freescale DMA channel
+ * @desc: descriptor to cleanup and free
+ *
+ * This function is used on a descriptor which has been executed by the DMA
+ * controller. It will run any callbacks, submit any dependencies, and then
+ * free the descriptor.
+ */
+static void fsldma_cleanup_descriptor(struct fsldma_chan *chan,
+				      struct fsl_desc_sw *desc)
+{
+	struct dma_async_tx_descriptor *txd = &desc->async_tx;
+
+	/* Run the link descriptor callback function */
+	if (txd->callback) {
+		chan_dbg(chan, "LD %p callback\n", desc);
+		txd->callback(txd->callback_param);
+	}
+
+	/* Run any dependencies */
+	dma_run_dependencies(txd);
+
+	dma_descriptor_unmap(txd);
+	chan_dbg(chan, "LD %p free\n", desc);
+	dma_pool_free(chan->desc_pool, desc, txd->phys);
+}
+
+/**
  * fsl_dma_alloc_chan_resources - Allocate resources for DMA channel.
  * @chan : Freescale DMA channel
  *
@@ -477,13 +597,8 @@
 {
 	struct fsl_desc_sw *desc, *_desc;
 
-	list_for_each_entry_safe(desc, _desc, list, node) {
-		list_del(&desc->node);
-#ifdef FSL_DMA_LD_DEBUG
-		chan_dbg(chan, "LD %p free\n", desc);
-#endif
-		dma_pool_free(chan->desc_pool, desc, desc->async_tx.phys);
-	}
+	list_for_each_entry_safe(desc, _desc, list, node)
+		fsl_dma_free_descriptor(chan, desc);
 }
 
 static void fsldma_free_desc_list_reverse(struct fsldma_chan *chan,
@@ -491,13 +606,8 @@
 {
 	struct fsl_desc_sw *desc, *_desc;
 
-	list_for_each_entry_safe_reverse(desc, _desc, list, node) {
-		list_del(&desc->node);
-#ifdef FSL_DMA_LD_DEBUG
-		chan_dbg(chan, "LD %p free\n", desc);
-#endif
-		dma_pool_free(chan->desc_pool, desc, desc->async_tx.phys);
-	}
+	list_for_each_entry_safe_reverse(desc, _desc, list, node)
+		fsl_dma_free_descriptor(chan, desc);
 }
 
 /**
@@ -520,35 +630,6 @@
 }
 
 static struct dma_async_tx_descriptor *
-fsl_dma_prep_interrupt(struct dma_chan *dchan, unsigned long flags)
-{
-	struct fsldma_chan *chan;
-	struct fsl_desc_sw *new;
-
-	if (!dchan)
-		return NULL;
-
-	chan = to_fsl_chan(dchan);
-
-	new = fsl_dma_alloc_descriptor(chan);
-	if (!new) {
-		chan_err(chan, "%s\n", msg_ld_oom);
-		return NULL;
-	}
-
-	new->async_tx.cookie = -EBUSY;
-	new->async_tx.flags = flags;
-
-	/* Insert the link descriptor to the LD ring */
-	list_add_tail(&new->node, &new->tx_list);
-
-	/* Set End-of-link to the last link descriptor of new list */
-	set_ld_eol(chan, new);
-
-	return &new->async_tx;
-}
-
-static struct dma_async_tx_descriptor *
 fsl_dma_prep_memcpy(struct dma_chan *dchan,
 	dma_addr_t dma_dst, dma_addr_t dma_src,
 	size_t len, unsigned long flags)
@@ -817,105 +898,6 @@
 }
 
 /**
- * fsldma_cleanup_descriptor - cleanup and free a single link descriptor
- * @chan: Freescale DMA channel
- * @desc: descriptor to cleanup and free
- *
- * This function is used on a descriptor which has been executed by the DMA
- * controller. It will run any callbacks, submit any dependencies, and then
- * free the descriptor.
- */
-static void fsldma_cleanup_descriptor(struct fsldma_chan *chan,
-				      struct fsl_desc_sw *desc)
-{
-	struct dma_async_tx_descriptor *txd = &desc->async_tx;
-
-	/* Run the link descriptor callback function */
-	if (txd->callback) {
-#ifdef FSL_DMA_LD_DEBUG
-		chan_dbg(chan, "LD %p callback\n", desc);
-#endif
-		txd->callback(txd->callback_param);
-	}
-
-	/* Run any dependencies */
-	dma_run_dependencies(txd);
-
-	dma_descriptor_unmap(txd);
-#ifdef FSL_DMA_LD_DEBUG
-	chan_dbg(chan, "LD %p free\n", desc);
-#endif
-	dma_pool_free(chan->desc_pool, desc, txd->phys);
-}
-
-/**
- * fsl_chan_xfer_ld_queue - transfer any pending transactions
- * @chan : Freescale DMA channel
- *
- * HARDWARE STATE: idle
- * LOCKING: must hold chan->desc_lock
- */
-static void fsl_chan_xfer_ld_queue(struct fsldma_chan *chan)
-{
-	struct fsl_desc_sw *desc;
-
-	/*
-	 * If the list of pending descriptors is empty, then we
-	 * don't need to do any work at all
-	 */
-	if (list_empty(&chan->ld_pending)) {
-		chan_dbg(chan, "no pending LDs\n");
-		return;
-	}
-
-	/*
-	 * The DMA controller is not idle, which means that the interrupt
-	 * handler will start any queued transactions when it runs after
-	 * this transaction finishes
-	 */
-	if (!chan->idle) {
-		chan_dbg(chan, "DMA controller still busy\n");
-		return;
-	}
-
-	/*
-	 * If there are some link descriptors which have not been
-	 * transferred, we need to start the controller
-	 */
-
-	/*
-	 * Move all elements from the queue of pending transactions
-	 * onto the list of running transactions
-	 */
-	chan_dbg(chan, "idle, starting controller\n");
-	desc = list_first_entry(&chan->ld_pending, struct fsl_desc_sw, node);
-	list_splice_tail_init(&chan->ld_pending, &chan->ld_running);
-
-	/*
-	 * The 85xx DMA controller doesn't clear the channel start bit
-	 * automatically at the end of a transfer. Therefore we must clear
-	 * it in software before starting the transfer.
-	 */
-	if ((chan->feature & FSL_DMA_IP_MASK) == FSL_DMA_IP_85XX) {
-		u32 mode;
-
-		mode = DMA_IN(chan, &chan->regs->mr, 32);
-		mode &= ~FSL_DMA_MR_CS;
-		DMA_OUT(chan, &chan->regs->mr, mode, 32);
-	}
-
-	/*
-	 * Program the descriptor's address into the DMA controller,
-	 * then start the DMA transaction
-	 */
-	set_cdar(chan, desc->async_tx.phys);
-	get_cdar(chan);
-
-	dma_start(chan);
-	chan->idle = false;
-}
-
-/**
  * fsl_dma_memcpy_issue_pending - Issue the DMA start command
  * @chan : Freescale DMA channel
  */
@@ -1304,12 +1286,10 @@
 	fdev->irq = irq_of_parse_and_map(op->dev.of_node, 0);
 
 	dma_cap_set(DMA_MEMCPY, fdev->common.cap_mask);
-	dma_cap_set(DMA_INTERRUPT, fdev->common.cap_mask);
 	dma_cap_set(DMA_SG, fdev->common.cap_mask);
 	dma_cap_set(DMA_SLAVE, fdev->common.cap_mask);
 	fdev->common.device_alloc_chan_resources = fsl_dma_alloc_chan_resources;
 	fdev->common.device_free_chan_resources = fsl_dma_free_chan_resources;
-	fdev->common.device_prep_dma_interrupt = fsl_dma_prep_interrupt;
 	fdev->common.device_prep_dma_memcpy = fsl_dma_prep_memcpy;
 	fdev->common.device_prep_dma_sg = fsl_dma_prep_sg;
 	fdev->common.device_tx_status = fsl_tx_status;
diff --git a/drivers/dma/imx-sdma.c b/drivers/dma/imx-sdma.c
index 19041ce..1287146 100644
--- a/drivers/dma/imx-sdma.c
+++ b/drivers/dma/imx-sdma.c
@@ -607,8 +607,6 @@
 
 		if (bd->mode.status & BD_RROR)
 			sdmac->status = DMA_ERROR;
-		else
-			sdmac->status = DMA_IN_PROGRESS;
 
 		bd->mode.status |= BD_DONE;
 		sdmac->buf_tail++;
diff --git a/drivers/dma/mmp_pdma.c b/drivers/dma/mmp_pdma.c
index bf02e7b..a7b186d 100644
--- a/drivers/dma/mmp_pdma.c
+++ b/drivers/dma/mmp_pdma.c
@@ -29,8 +29,8 @@
 #define DALGN		0x00a0
 #define DINT		0x00f0
 #define DDADR		0x0200
-#define DSADR		0x0204
-#define DTADR		0x0208
+#define DSADR(n)	(0x0204 + ((n) << 4))
+#define DTADR(n)	(0x0208 + ((n) << 4))
 #define DCMD		0x020c
 
 #define DCSR_RUN	BIT(31)	/* Run Bit (read / write) */
@@ -277,7 +277,7 @@
 		return;
 
 	/* clear the channel mapping in DRCMR */
-	reg = DRCMR(pchan->phy->vchan->drcmr);
+	reg = DRCMR(pchan->drcmr);
 	writel(0, pchan->phy->base + reg);
 
 	spin_lock_irqsave(&pdev->phy_lock, flags);
@@ -748,11 +748,92 @@
 	return 0;
 }
 
+static unsigned int mmp_pdma_residue(struct mmp_pdma_chan *chan,
+				     dma_cookie_t cookie)
+{
+	struct mmp_pdma_desc_sw *sw;
+	u32 curr, residue = 0;
+	bool passed = false;
+	bool cyclic = chan->cyclic_first != NULL;
+
+	/*
+	 * If the channel does not have a phy pointer anymore, it has already
+	 * been completed. Therefore, its residue is 0.
+	 */
+	if (!chan->phy)
+		return 0;
+
+	if (chan->dir == DMA_DEV_TO_MEM)
+		curr = readl(chan->phy->base + DTADR(chan->phy->idx));
+	else
+		curr = readl(chan->phy->base + DSADR(chan->phy->idx));
+
+	list_for_each_entry(sw, &chan->chain_running, node) {
+		u32 start, end, len;
+
+		if (chan->dir == DMA_DEV_TO_MEM)
+			start = sw->desc.dtadr;
+		else
+			start = sw->desc.dsadr;
+
+		len = sw->desc.dcmd & DCMD_LENGTH;
+		end = start + len;
+
+		/*
+		 * 'passed' will be latched once we found the descriptor which
+		 * lies inside the boundaries of the curr pointer. All
+		 * descriptors that occur in the list _after_ we found that
+		 * partially handled descriptor are still to be processed and
+		 * are hence added to the residual bytes counter.
+		 */
+
+		if (passed) {
+			residue += len;
+		} else if (curr >= start && curr <= end) {
+			residue += end - curr;
+			passed = true;
+		}
+
+		/*
+		 * Descriptors that have the ENDIRQEN bit set mark the end of a
+		 * transaction chain, and the cookie assigned with it has been
+		 * returned previously from mmp_pdma_tx_submit().
+		 *
+		 * In case we have multiple transactions in the running chain,
+		 * and the cookie does not match the one the user asked us
+		 * about, reset the state variables and start over.
+		 *
+		 * This logic does not apply to cyclic transactions, where all
+		 * descriptors have the ENDIRQEN bit set, and for which we
+		 * can't have multiple transactions on one channel anyway.
+		 */
+		if (cyclic || !(sw->desc.dcmd & DCMD_ENDIRQEN))
+			continue;
+
+		if (sw->async_tx.cookie == cookie) {
+			return residue;
+		} else {
+			residue = 0;
+			passed = false;
+		}
+	}
+
+	/* We should only get here in case of cyclic transactions */
+	return residue;
+}
+
 static enum dma_status mmp_pdma_tx_status(struct dma_chan *dchan,
 					  dma_cookie_t cookie,
 					  struct dma_tx_state *txstate)
 {
-	return dma_cookie_status(dchan, cookie, txstate);
+	struct mmp_pdma_chan *chan = to_mmp_pdma_chan(dchan);
+	enum dma_status ret;
+
+	ret = dma_cookie_status(dchan, cookie, txstate);
+	if (likely(ret != DMA_ERROR))
+		dma_set_residue(txstate, mmp_pdma_residue(chan, cookie));
+
+	return ret;
 }
 
 /**
@@ -858,8 +939,7 @@
 	struct mmp_pdma_chan *chan;
 	int ret;
 
-	chan = devm_kzalloc(pdev->dev, sizeof(struct mmp_pdma_chan),
-			    GFP_KERNEL);
+	chan = devm_kzalloc(pdev->dev, sizeof(*chan), GFP_KERNEL);
 	if (chan == NULL)
 		return -ENOMEM;
 
@@ -946,8 +1026,7 @@
 			irq_num++;
 	}
 
-	pdev->phy = devm_kcalloc(pdev->dev,
-				 dma_channels, sizeof(struct mmp_pdma_chan),
+	pdev->phy = devm_kcalloc(pdev->dev, dma_channels, sizeof(*pdev->phy),
 				 GFP_KERNEL);
 	if (pdev->phy == NULL)
 		return -ENOMEM;
diff --git a/drivers/dma/mpc512x_dma.c b/drivers/dma/mpc512x_dma.c
index 448750d..2ad4373 100644
--- a/drivers/dma/mpc512x_dma.c
+++ b/drivers/dma/mpc512x_dma.c
@@ -2,6 +2,7 @@
  * Copyright (C) Freescale Semicondutor, Inc. 2007, 2008.
  * Copyright (C) Semihalf 2009
  * Copyright (C) Ilya Yanok, Emcraft Systems 2010
+ * Copyright (C) Alexander Popov, Promcontroller 2014
  *
  * Written by Piotr Ziecik <kosmo@semihalf.com>. Hardware description
  * (defines, structures and comments) was taken from MPC5121 DMA driver
@@ -29,8 +30,18 @@
  */
 
 /*
- * This is initial version of MPC5121 DMA driver. Only memory to memory
- * transfers are supported (tested using dmatest module).
+ * MPC512x and MPC8308 DMA driver. It supports
+ * memory to memory data transfers (tested using dmatest module) and
+ * data transfers between memory and peripheral I/O memory
+ * by means of slave scatter/gather with these limitations:
+ *  - chunked transfers (described by s/g lists with more than one item)
+ *     are refused as long as proper support for scatter/gather is missing;
+ *  - transfers on MPC8308 always start from software as this SoC appears
+ *     not to have external request lines for peripheral flow control;
+ *  - only peripheral devices with 4-byte FIFO access register are supported;
+ *  - minimal memory <-> I/O memory transfer chunk is 4 bytes and consequently
+ *     source and destination addresses must be 4-byte aligned
+ *     and transfer size must be aligned on (4 * maxburst) boundary;
  */
 
 #include <linux/module.h>
@@ -52,9 +63,17 @@
 #define MPC_DMA_DESCRIPTORS	64
 
 /* Macro definitions */
-#define MPC_DMA_CHANNELS	64
 #define MPC_DMA_TCD_OFFSET	0x1000
 
+/*
+ * Maximum channel counts for individual hardware variants
+ * and the maximum channel count over all supported controllers,
+ * used for data structure size
+ */
+#define MPC8308_DMACHAN_MAX	16
+#define MPC512x_DMACHAN_MAX	64
+#define MPC_DMA_CHANNELS	64
+
 /* Arbitration mode of group and channel */
 #define MPC_DMA_DMACR_EDCG	(1 << 31)
 #define MPC_DMA_DMACR_ERGA	(1 << 3)
@@ -181,6 +200,7 @@
 	dma_addr_t			tcd_paddr;
 	int				error;
 	struct list_head		node;
+	int				will_access_peripheral;
 };
 
 struct mpc_dma_chan {
@@ -193,6 +213,12 @@
 	struct mpc_dma_tcd		*tcd;
 	dma_addr_t			tcd_paddr;
 
+	/* Settings for access to peripheral FIFO */
+	dma_addr_t			src_per_paddr;
+	u32				src_tcd_nunits;
+	dma_addr_t			dst_per_paddr;
+	u32				dst_tcd_nunits;
+
 	/* Lock for this structure */
 	spinlock_t			lock;
 };
@@ -243,8 +269,23 @@
 	struct mpc_dma_desc *mdesc;
 	int cid = mchan->chan.chan_id;
 
-	/* Move all queued descriptors to active list */
-	list_splice_tail_init(&mchan->queued, &mchan->active);
+	while (!list_empty(&mchan->queued)) {
+		mdesc = list_first_entry(&mchan->queued,
+						struct mpc_dma_desc, node);
+		/*
+		 * Grab either several mem-to-mem transfer descriptors
+		 * or one peripheral transfer descriptor,
+		 * don't mix mem-to-mem and peripheral transfer descriptors
+		 * within the same 'active' list.
+		 */
+		if (mdesc->will_access_peripheral) {
+			if (list_empty(&mchan->active))
+				list_move_tail(&mdesc->node, &mchan->active);
+			break;
+		} else {
+			list_move_tail(&mdesc->node, &mchan->active);
+		}
+	}
 
 	/* Chain descriptors into one transaction */
 	list_for_each_entry(mdesc, &mchan->active, node) {
@@ -270,7 +311,17 @@
 
 	if (first != prev)
 		mdma->tcd[cid].e_sg = 1;
-	out_8(&mdma->regs->dmassrt, cid);
+
+	if (mdma->is_mpc8308) {
+		/* MPC8308, no request lines, software initiated start */
+		out_8(&mdma->regs->dmassrt, cid);
+	} else if (first->will_access_peripheral) {
+		/* Peripherals involved, start by external request signal */
+		out_8(&mdma->regs->dmaserq, cid);
+	} else {
+		/* Memory to memory transfer, software initiated start */
+		out_8(&mdma->regs->dmassrt, cid);
+	}
 }
 
 /* Handle interrupt on one half of DMA controller (32 channels) */
@@ -588,6 +639,7 @@
 	}
 
 	mdesc->error = 0;
+	mdesc->will_access_peripheral = 0;
 	tcd = mdesc->tcd;
 
 	/* Prepare Transfer Control Descriptor for this transaction */
@@ -635,6 +687,193 @@
 	return &mdesc->desc;
 }
 
+static struct dma_async_tx_descriptor *
+mpc_dma_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
+		unsigned int sg_len, enum dma_transfer_direction direction,
+		unsigned long flags, void *context)
+{
+	struct mpc_dma *mdma = dma_chan_to_mpc_dma(chan);
+	struct mpc_dma_chan *mchan = dma_chan_to_mpc_dma_chan(chan);
+	struct mpc_dma_desc *mdesc = NULL;
+	dma_addr_t per_paddr;
+	u32 tcd_nunits;
+	struct mpc_dma_tcd *tcd;
+	unsigned long iflags;
+	struct scatterlist *sg;
+	size_t len;
+	int iter, i;
+
+	/* Currently there is no proper support for scatter/gather */
+	if (sg_len != 1)
+		return NULL;
+
+	if (!is_slave_direction(direction))
+		return NULL;
+
+	for_each_sg(sgl, sg, sg_len, i) {
+		spin_lock_irqsave(&mchan->lock, iflags);
+
+		mdesc = list_first_entry(&mchan->free,
+						struct mpc_dma_desc, node);
+		if (!mdesc) {
+			spin_unlock_irqrestore(&mchan->lock, iflags);
+			/* Try to free completed descriptors */
+			mpc_dma_process_completed(mdma);
+			return NULL;
+		}
+
+		list_del(&mdesc->node);
+
+		if (direction == DMA_DEV_TO_MEM) {
+			per_paddr = mchan->src_per_paddr;
+			tcd_nunits = mchan->src_tcd_nunits;
+		} else {
+			per_paddr = mchan->dst_per_paddr;
+			tcd_nunits = mchan->dst_tcd_nunits;
+		}
+
+		spin_unlock_irqrestore(&mchan->lock, iflags);
+
+		if (per_paddr == 0 || tcd_nunits == 0)
+			goto err_prep;
+
+		mdesc->error = 0;
+		mdesc->will_access_peripheral = 1;
+
+		/* Prepare Transfer Control Descriptor for this transaction */
+		tcd = mdesc->tcd;
+
+		memset(tcd, 0, sizeof(struct mpc_dma_tcd));
+
+		if (!IS_ALIGNED(sg_dma_address(sg), 4))
+			goto err_prep;
+
+		if (direction == DMA_DEV_TO_MEM) {
+			tcd->saddr = per_paddr;
+			tcd->daddr = sg_dma_address(sg);
+			tcd->soff = 0;
+			tcd->doff = 4;
+		} else {
+			tcd->saddr = sg_dma_address(sg);
+			tcd->daddr = per_paddr;
+			tcd->soff = 4;
+			tcd->doff = 0;
+		}
+
+		tcd->ssize = MPC_DMA_TSIZE_4;
+		tcd->dsize = MPC_DMA_TSIZE_4;
+
+		len = sg_dma_len(sg);
+		tcd->nbytes = tcd_nunits * 4;
+		if (!IS_ALIGNED(len, tcd->nbytes))
+			goto err_prep;
+
+		iter = len / tcd->nbytes;
+		if (iter >= 1 << 15) {
+			/* len is too big */
+			goto err_prep;
+		}
+		/* citer_linkch contains the high bits of iter */
+		tcd->biter = iter & 0x1ff;
+		tcd->biter_linkch = iter >> 9;
+		tcd->citer = tcd->biter;
+		tcd->citer_linkch = tcd->biter_linkch;
+
+		tcd->e_sg = 0;
+		tcd->d_req = 1;
+
+		/* Place descriptor in prepared list */
+		spin_lock_irqsave(&mchan->lock, iflags);
+		list_add_tail(&mdesc->node, &mchan->prepared);
+		spin_unlock_irqrestore(&mchan->lock, iflags);
+	}
+
+	return &mdesc->desc;
+
+err_prep:
+	/* Put the descriptor back */
+	spin_lock_irqsave(&mchan->lock, iflags);
+	list_add_tail(&mdesc->node, &mchan->free);
+	spin_unlock_irqrestore(&mchan->lock, iflags);
+
+	return NULL;
+}
+
+static int mpc_dma_device_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd,
+							unsigned long arg)
+{
+	struct mpc_dma_chan *mchan;
+	struct mpc_dma *mdma;
+	struct dma_slave_config *cfg;
+	unsigned long flags;
+
+	mchan = dma_chan_to_mpc_dma_chan(chan);
+	switch (cmd) {
+	case DMA_TERMINATE_ALL:
+		/* Disable channel requests */
+		mdma = dma_chan_to_mpc_dma(chan);
+
+		spin_lock_irqsave(&mchan->lock, flags);
+
+		out_8(&mdma->regs->dmacerq, chan->chan_id);
+		list_splice_tail_init(&mchan->prepared, &mchan->free);
+		list_splice_tail_init(&mchan->queued, &mchan->free);
+		list_splice_tail_init(&mchan->active, &mchan->free);
+
+		spin_unlock_irqrestore(&mchan->lock, flags);
+
+		return 0;
+
+	case DMA_SLAVE_CONFIG:
+		/*
+		 * Software constraints:
+		 *  - only transfers between a peripheral device and
+		 *     memory are supported;
+		 *  - only peripheral devices with 4-byte FIFO access register
+		 *     are supported;
+		 *  - minimal transfer chunk is 4 bytes and consequently
+		 *     source and destination addresses must be 4-byte aligned
+		 *     and transfer size must be aligned on (4 * maxburst)
+		 *     boundary;
+		 *  - during the transfer RAM address is being incremented by
+		 *     the size of minimal transfer chunk;
+		 *  - peripheral port's address is constant during the transfer.
+		 */
+
+		cfg = (void *)arg;
+
+		if (cfg->src_addr_width != DMA_SLAVE_BUSWIDTH_4_BYTES ||
+		    cfg->dst_addr_width != DMA_SLAVE_BUSWIDTH_4_BYTES ||
+		    !IS_ALIGNED(cfg->src_addr, 4) ||
+		    !IS_ALIGNED(cfg->dst_addr, 4)) {
+			return -EINVAL;
+		}
+
+		spin_lock_irqsave(&mchan->lock, flags);
+
+		mchan->src_per_paddr = cfg->src_addr;
+		mchan->src_tcd_nunits = cfg->src_maxburst;
+		mchan->dst_per_paddr = cfg->dst_addr;
+		mchan->dst_tcd_nunits = cfg->dst_maxburst;
+
+		/* Apply defaults */
+		if (mchan->src_tcd_nunits == 0)
+			mchan->src_tcd_nunits = 1;
+		if (mchan->dst_tcd_nunits == 0)
+			mchan->dst_tcd_nunits = 1;
+
+		spin_unlock_irqrestore(&mchan->lock, flags);
+
+		return 0;
+
+	default:
+		/* Unknown command */
+		break;
+	}
+
+	return -ENXIO;
+}
+
 static int mpc_dma_probe(struct platform_device *op)
 {
 	struct device_node *dn = op->dev.of_node;
@@ -649,13 +888,15 @@
 	mdma = devm_kzalloc(dev, sizeof(struct mpc_dma), GFP_KERNEL);
 	if (!mdma) {
 		dev_err(dev, "Memory exhausted!\n");
-		return -ENOMEM;
+		retval = -ENOMEM;
+		goto err;
 	}
 
 	mdma->irq = irq_of_parse_and_map(dn, 0);
 	if (mdma->irq == NO_IRQ) {
 		dev_err(dev, "Error mapping IRQ!\n");
-		return -EINVAL;
+		retval = -EINVAL;
+		goto err;
 	}
 
 	if (of_device_is_compatible(dn, "fsl,mpc8308-dma")) {
@@ -663,14 +904,15 @@
 		mdma->irq2 = irq_of_parse_and_map(dn, 1);
 		if (mdma->irq2 == NO_IRQ) {
 			dev_err(dev, "Error mapping IRQ!\n");
-			return -EINVAL;
+			retval = -EINVAL;
+			goto err_dispose1;
 		}
 	}
 
 	retval = of_address_to_resource(dn, 0, &res);
 	if (retval) {
 		dev_err(dev, "Error parsing memory region!\n");
-		return retval;
+		goto err_dispose2;
 	}
 
 	regs_start = res.start;
@@ -678,31 +920,34 @@
 
 	if (!devm_request_mem_region(dev, regs_start, regs_size, DRV_NAME)) {
 		dev_err(dev, "Error requesting memory region!\n");
-		return -EBUSY;
+		retval = -EBUSY;
+		goto err_dispose2;
 	}
 
 	mdma->regs = devm_ioremap(dev, regs_start, regs_size);
 	if (!mdma->regs) {
 		dev_err(dev, "Error mapping memory region!\n");
-		return -ENOMEM;
+		retval = -ENOMEM;
+		goto err_dispose2;
 	}
 
 	mdma->tcd = (struct mpc_dma_tcd *)((u8 *)(mdma->regs)
 							+ MPC_DMA_TCD_OFFSET);
 
-	retval = devm_request_irq(dev, mdma->irq, &mpc_dma_irq, 0, DRV_NAME,
-									mdma);
+	retval = request_irq(mdma->irq, &mpc_dma_irq, 0, DRV_NAME, mdma);
 	if (retval) {
 		dev_err(dev, "Error requesting IRQ!\n");
-		return -EINVAL;
+		retval = -EINVAL;
+		goto err_dispose2;
 	}
 
 	if (mdma->is_mpc8308) {
-		retval = devm_request_irq(dev, mdma->irq2, &mpc_dma_irq, 0,
-				DRV_NAME, mdma);
+		retval = request_irq(mdma->irq2, &mpc_dma_irq, 0,
+							DRV_NAME, mdma);
 		if (retval) {
 			dev_err(dev, "Error requesting IRQ2!\n");
-			return -EINVAL;
+			retval = -EINVAL;
+			goto err_free1;
 		}
 	}
 
@@ -710,18 +955,21 @@
 
 	dma = &mdma->dma;
 	dma->dev = dev;
-	if (!mdma->is_mpc8308)
-		dma->chancnt = MPC_DMA_CHANNELS;
+	if (mdma->is_mpc8308)
+		dma->chancnt = MPC8308_DMACHAN_MAX;
 	else
-		dma->chancnt = 16; /* MPC8308 DMA has only 16 channels */
+		dma->chancnt = MPC512x_DMACHAN_MAX;
 	dma->device_alloc_chan_resources = mpc_dma_alloc_chan_resources;
 	dma->device_free_chan_resources = mpc_dma_free_chan_resources;
 	dma->device_issue_pending = mpc_dma_issue_pending;
 	dma->device_tx_status = mpc_dma_tx_status;
 	dma->device_prep_dma_memcpy = mpc_dma_prep_memcpy;
+	dma->device_prep_slave_sg = mpc_dma_prep_slave_sg;
+	dma->device_control = mpc_dma_device_control;
 
 	INIT_LIST_HEAD(&dma->channels);
 	dma_cap_set(DMA_MEMCPY, dma->cap_mask);
+	dma_cap_set(DMA_SLAVE, dma->cap_mask);
 
 	for (i = 0; i < dma->chancnt; i++) {
 		mchan = &mdma->channels[i];
@@ -747,7 +995,19 @@
 	 * - Round-robin group arbitration,
 	 * - Round-robin channel arbitration.
 	 */
-	if (!mdma->is_mpc8308) {
+	if (mdma->is_mpc8308) {
+		/* MPC8308 has 16 channels and lacks some registers */
+		out_be32(&mdma->regs->dmacr, MPC_DMA_DMACR_ERCA);
+
+		/* enable snooping */
+		out_be32(&mdma->regs->dmagpor, MPC_DMA_DMAGPOR_SNOOP_ENABLE);
+		/* Disable error interrupts */
+		out_be32(&mdma->regs->dmaeeil, 0);
+
+		/* Clear interrupts status */
+		out_be32(&mdma->regs->dmaintl, 0xFFFF);
+		out_be32(&mdma->regs->dmaerrl, 0xFFFF);
+	} else {
 		out_be32(&mdma->regs->dmacr, MPC_DMA_DMACR_EDCG |
 					MPC_DMA_DMACR_ERGA | MPC_DMA_DMACR_ERCA);
 
@@ -768,29 +1028,28 @@
 		/* Route interrupts to IPIC */
 		out_be32(&mdma->regs->dmaihsa, 0);
 		out_be32(&mdma->regs->dmailsa, 0);
-	} else {
-		/* MPC8308 has 16 channels and lacks some registers */
-		out_be32(&mdma->regs->dmacr, MPC_DMA_DMACR_ERCA);
-
-		/* enable snooping */
-		out_be32(&mdma->regs->dmagpor, MPC_DMA_DMAGPOR_SNOOP_ENABLE);
-		/* Disable error interrupts */
-		out_be32(&mdma->regs->dmaeeil, 0);
-
-		/* Clear interrupts status */
-		out_be32(&mdma->regs->dmaintl, 0xFFFF);
-		out_be32(&mdma->regs->dmaerrl, 0xFFFF);
 	}
 
 	/* Register DMA engine */
 	dev_set_drvdata(dev, mdma);
 	retval = dma_async_device_register(dma);
-	if (retval) {
-		devm_free_irq(dev, mdma->irq, mdma);
-		irq_dispose_mapping(mdma->irq);
-	}
+	if (retval)
+		goto err_free2;
 
 	return retval;
+
+err_free2:
+	if (mdma->is_mpc8308)
+		free_irq(mdma->irq2, mdma);
+err_free1:
+	free_irq(mdma->irq, mdma);
+err_dispose2:
+	if (mdma->is_mpc8308)
+		irq_dispose_mapping(mdma->irq2);
+err_dispose1:
+	irq_dispose_mapping(mdma->irq);
+err:
+	return retval;
 }
 
 static int mpc_dma_remove(struct platform_device *op)
@@ -799,7 +1058,11 @@
 	struct mpc_dma *mdma = dev_get_drvdata(dev);
 
 	dma_async_device_unregister(&mdma->dma);
-	devm_free_irq(dev, mdma->irq, mdma);
+	if (mdma->is_mpc8308) {
+		free_irq(mdma->irq2, mdma);
+		irq_dispose_mapping(mdma->irq2);
+	}
+	free_irq(mdma->irq, mdma);
 	irq_dispose_mapping(mdma->irq);
 
 	return 0;
@@ -807,6 +1070,7 @@
 
 static struct of_device_id mpc_dma_match[] = {
 	{ .compatible = "fsl,mpc5121-dma", },
+	{ .compatible = "fsl,mpc8308-dma", },
 	{},
 };
 
diff --git a/drivers/dma/pch_dma.c b/drivers/dma/pch_dma.c
index 05fa548..9f9ca9f 100644
--- a/drivers/dma/pch_dma.c
+++ b/drivers/dma/pch_dma.c
@@ -21,6 +21,7 @@
 #include <linux/dma-mapping.h>
 #include <linux/init.h>
 #include <linux/pci.h>
+#include <linux/slab.h>
 #include <linux/interrupt.h>
 #include <linux/module.h>
 #include <linux/pch_dma.h>
@@ -996,7 +997,7 @@
 #define PCI_DEVICE_ID_ML7831_DMA1_8CH	0x8810
 #define PCI_DEVICE_ID_ML7831_DMA2_4CH	0x8815
 
-DEFINE_PCI_DEVICE_TABLE(pch_dma_id_table) = {
+const struct pci_device_id pch_dma_id_table[] = {
 	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_EG20T_PCH_DMA_8CH), 8 },
 	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_EG20T_PCH_DMA_4CH), 4 },
 	{ PCI_VDEVICE(ROHM, PCI_DEVICE_ID_ML7213_DMA1_8CH), 8}, /* UART Video */
diff --git a/drivers/dma/s3c24xx-dma.c b/drivers/dma/s3c24xx-dma.c
index b209a0f..012520c 100644
--- a/drivers/dma/s3c24xx-dma.c
+++ b/drivers/dma/s3c24xx-dma.c
@@ -164,6 +164,7 @@
  * @disrcc: value for source control register
  * @didstc: value for destination control register
  * @dcon: base value for dcon register
+ * @cyclic: indicate cyclic transfer
  */
 struct s3c24xx_txd {
 	struct virt_dma_desc vd;
@@ -173,6 +174,7 @@
 	u32 disrcc;
 	u32 didstc;
 	u32 dcon;
+	bool cyclic;
 };
 
 struct s3c24xx_dma_chan;
@@ -669,8 +671,10 @@
 		/* when more sg's are in this txd, start the next one */
 		if (!list_is_last(txd->at, &txd->dsg_list)) {
 			txd->at = txd->at->next;
+			if (txd->cyclic)
+				vchan_cyclic_callback(&txd->vd);
 			s3c24xx_dma_start_next_sg(s3cchan, txd);
-		} else {
+		} else if (!txd->cyclic) {
 			s3cchan->at = NULL;
 			vchan_cookie_complete(&txd->vd);
 
@@ -682,6 +686,12 @@
 				s3c24xx_dma_start_next_txd(s3cchan);
 			else
 				s3c24xx_dma_phy_free(s3cchan);
+		} else {
+			vchan_cyclic_callback(&txd->vd);
+
+			/* Cyclic: reset at beginning */
+			txd->at = txd->dsg_list.next;
+			s3c24xx_dma_start_next_sg(s3cchan, txd);
 		}
 	}
 	spin_unlock(&s3cchan->vc.lock);
@@ -877,6 +887,104 @@
 	return vchan_tx_prep(&s3cchan->vc, &txd->vd, flags);
 }
 
+static struct dma_async_tx_descriptor *s3c24xx_dma_prep_dma_cyclic(
+	struct dma_chan *chan, dma_addr_t addr, size_t size, size_t period,
+	enum dma_transfer_direction direction, unsigned long flags,
+	void *context)
+{
+	struct s3c24xx_dma_chan *s3cchan = to_s3c24xx_dma_chan(chan);
+	struct s3c24xx_dma_engine *s3cdma = s3cchan->host;
+	const struct s3c24xx_dma_platdata *pdata = s3cdma->pdata;
+	struct s3c24xx_dma_channel *cdata = &pdata->channels[s3cchan->id];
+	struct s3c24xx_txd *txd;
+	struct s3c24xx_sg *dsg;
+	unsigned sg_len;
+	dma_addr_t slave_addr;
+	u32 hwcfg = 0;
+	int i;
+
+	dev_dbg(&s3cdma->pdev->dev,
+		"prepare cyclic transaction of %zu bytes with period %zu from %s\n",
+		size, period, s3cchan->name);
+
+	if (!is_slave_direction(direction)) {
+		dev_err(&s3cdma->pdev->dev,
+			"direction %d unsupported\n", direction);
+		return NULL;
+	}
+
+	txd = s3c24xx_dma_get_txd();
+	if (!txd)
+		return NULL;
+
+	txd->cyclic = 1;
+
+	if (cdata->handshake)
+		txd->dcon |= S3C24XX_DCON_HANDSHAKE;
+
+	switch (cdata->bus) {
+	case S3C24XX_DMA_APB:
+		txd->dcon |= S3C24XX_DCON_SYNC_PCLK;
+		hwcfg |= S3C24XX_DISRCC_LOC_APB;
+		break;
+	case S3C24XX_DMA_AHB:
+		txd->dcon |= S3C24XX_DCON_SYNC_HCLK;
+		hwcfg |= S3C24XX_DISRCC_LOC_AHB;
+		break;
+	}
+
+	/*
+	 * Always assume our peripheral desintation is a fixed
+	 * address in memory.
+	 */
+	hwcfg |= S3C24XX_DISRCC_INC_FIXED;
+
+	/*
+	 * Individual dma operations are requested by the slave,
+	 * so serve only single atomic operations (S3C24XX_DCON_SERV_SINGLE).
+	 */
+	txd->dcon |= S3C24XX_DCON_SERV_SINGLE;
+
+	if (direction == DMA_MEM_TO_DEV) {
+		txd->disrcc = S3C24XX_DISRCC_LOC_AHB |
+			      S3C24XX_DISRCC_INC_INCREMENT;
+		txd->didstc = hwcfg;
+		slave_addr = s3cchan->cfg.dst_addr;
+		txd->width = s3cchan->cfg.dst_addr_width;
+	} else {
+		txd->disrcc = hwcfg;
+		txd->didstc = S3C24XX_DIDSTC_LOC_AHB |
+			      S3C24XX_DIDSTC_INC_INCREMENT;
+		slave_addr = s3cchan->cfg.src_addr;
+		txd->width = s3cchan->cfg.src_addr_width;
+	}
+
+	sg_len = size / period;
+
+	for (i = 0; i < sg_len; i++) {
+		dsg = kzalloc(sizeof(*dsg), GFP_NOWAIT);
+		if (!dsg) {
+			s3c24xx_dma_free_txd(txd);
+			return NULL;
+		}
+		list_add_tail(&dsg->node, &txd->dsg_list);
+
+		dsg->len = period;
+		/* Check last period length */
+		if (i == sg_len - 1)
+			dsg->len = size - period * i;
+		if (direction == DMA_MEM_TO_DEV) {
+			dsg->src_addr = addr + period * i;
+			dsg->dst_addr = slave_addr;
+		} else { /* DMA_DEV_TO_MEM */
+			dsg->src_addr = slave_addr;
+			dsg->dst_addr = addr + period * i;
+		}
+	}
+
+	return vchan_tx_prep(&s3cchan->vc, &txd->vd, flags);
+}
+
 static struct dma_async_tx_descriptor *s3c24xx_dma_prep_slave_sg(
 		struct dma_chan *chan, struct scatterlist *sgl,
 		unsigned int sg_len, enum dma_transfer_direction direction,
@@ -961,7 +1069,6 @@
 			dsg->src_addr = slave_addr;
 			dsg->dst_addr = sg_dma_address(sg);
 		}
-		break;
 	}
 
 	return vchan_tx_prep(&s3cchan->vc, &txd->vd, flags);
@@ -1198,6 +1305,7 @@
 
 	/* Initialize slave engine for SoC internal dedicated peripherals */
 	dma_cap_set(DMA_SLAVE, s3cdma->slave.cap_mask);
+	dma_cap_set(DMA_CYCLIC, s3cdma->slave.cap_mask);
 	dma_cap_set(DMA_PRIVATE, s3cdma->slave.cap_mask);
 	s3cdma->slave.dev = &pdev->dev;
 	s3cdma->slave.device_alloc_chan_resources =
@@ -1207,6 +1315,7 @@
 	s3cdma->slave.device_tx_status = s3c24xx_dma_tx_status;
 	s3cdma->slave.device_issue_pending = s3c24xx_dma_issue_pending;
 	s3cdma->slave.device_prep_slave_sg = s3c24xx_dma_prep_slave_sg;
+	s3cdma->slave.device_prep_dma_cyclic = s3c24xx_dma_prep_dma_cyclic;
 	s3cdma->slave.device_control = s3c24xx_dma_control;
 
 	/* Register as many memcpy channels as there are physical channels */
diff --git a/drivers/dma/sh/Kconfig b/drivers/dma/sh/Kconfig
index b4c8138..0f71981 100644
--- a/drivers/dma/sh/Kconfig
+++ b/drivers/dma/sh/Kconfig
@@ -4,7 +4,7 @@
 
 config SH_DMAE_BASE
 	bool "Renesas SuperH DMA Engine support"
-	depends on (SUPERH && SH_DMA) || (ARM && ARCH_SHMOBILE)
+	depends on (SUPERH && SH_DMA) || ARCH_SHMOBILE || COMPILE_TEST
 	depends on !SH_DMA_API
 	default y
 	select DMA_ENGINE
diff --git a/drivers/dma/sh/rcar-hpbdma.c b/drivers/dma/sh/rcar-hpbdma.c
index 3083d90..b212d94 100644
--- a/drivers/dma/sh/rcar-hpbdma.c
+++ b/drivers/dma/sh/rcar-hpbdma.c
@@ -18,6 +18,7 @@
 
 #include <linux/dmaengine.h>
 #include <linux/delay.h>
+#include <linux/err.h>
 #include <linux/init.h>
 #include <linux/interrupt.h>
 #include <linux/module.h>
diff --git a/drivers/dma/sh/shdma-base.c b/drivers/dma/sh/shdma-base.c
index 5239677..b35007e 100644
--- a/drivers/dma/sh/shdma-base.c
+++ b/drivers/dma/sh/shdma-base.c
@@ -73,8 +73,7 @@
 static dma_cookie_t shdma_tx_submit(struct dma_async_tx_descriptor *tx)
 {
 	struct shdma_desc *chunk, *c, *desc =
-		container_of(tx, struct shdma_desc, async_tx),
-		*last = desc;
+		container_of(tx, struct shdma_desc, async_tx);
 	struct shdma_chan *schan = to_shdma_chan(tx->chan);
 	dma_async_tx_callback callback = tx->callback;
 	dma_cookie_t cookie;
@@ -98,19 +97,20 @@
 				      &chunk->node == &schan->ld_free))
 			break;
 		chunk->mark = DESC_SUBMITTED;
-		/* Callback goes to the last chunk */
-		chunk->async_tx.callback = NULL;
+		if (chunk->chunks == 1) {
+			chunk->async_tx.callback = callback;
+			chunk->async_tx.callback_param = tx->callback_param;
+		} else {
+			/* Callback goes to the last chunk */
+			chunk->async_tx.callback = NULL;
+		}
 		chunk->cookie = cookie;
 		list_move_tail(&chunk->node, &schan->ld_queue);
-		last = chunk;
 
 		dev_dbg(schan->dev, "submit #%d@%p on %d\n",
-			tx->cookie, &last->async_tx, schan->id);
+			tx->cookie, &chunk->async_tx, schan->id);
 	}
 
-	last->async_tx.callback = callback;
-	last->async_tx.callback_param = tx->callback_param;
-
 	if (power_up) {
 		int ret;
 		schan->pm_state = SHDMA_PM_BUSY;
@@ -304,6 +304,7 @@
 	dma_async_tx_callback callback = NULL;
 	void *param = NULL;
 	unsigned long flags;
+	LIST_HEAD(cyclic_list);
 
 	spin_lock_irqsave(&schan->chan_lock, flags);
 	list_for_each_entry_safe(desc, _desc, &schan->ld_queue, node) {
@@ -369,10 +370,16 @@
 		if (((desc->mark == DESC_COMPLETED ||
 		      desc->mark == DESC_WAITING) &&
 		     async_tx_test_ack(&desc->async_tx)) || all) {
-			/* Remove from ld_queue list */
-			desc->mark = DESC_IDLE;
 
-			list_move(&desc->node, &schan->ld_free);
+			if (all || !desc->cyclic) {
+				/* Remove from ld_queue list */
+				desc->mark = DESC_IDLE;
+				list_move(&desc->node, &schan->ld_free);
+			} else {
+				/* reuse as cyclic */
+				desc->mark = DESC_SUBMITTED;
+				list_move_tail(&desc->node, &cyclic_list);
+			}
 
 			if (list_empty(&schan->ld_queue)) {
 				dev_dbg(schan->dev, "Bring down channel %d\n", schan->id);
@@ -389,6 +396,8 @@
 		 */
 		schan->dma_chan.completed_cookie = schan->dma_chan.cookie;
 
+	list_splice_tail(&cyclic_list, &schan->ld_queue);
+
 	spin_unlock_irqrestore(&schan->chan_lock, flags);
 
 	if (callback)
@@ -521,7 +530,7 @@
  */
 static struct dma_async_tx_descriptor *shdma_prep_sg(struct shdma_chan *schan,
 	struct scatterlist *sgl, unsigned int sg_len, dma_addr_t *addr,
-	enum dma_transfer_direction direction, unsigned long flags)
+	enum dma_transfer_direction direction, unsigned long flags, bool cyclic)
 {
 	struct scatterlist *sg;
 	struct shdma_desc *first = NULL, *new = NULL /* compiler... */;
@@ -569,7 +578,11 @@
 			if (!new)
 				goto err_get_desc;
 
-			new->chunks = chunks--;
+			new->cyclic = cyclic;
+			if (cyclic)
+				new->chunks = 1;
+			else
+				new->chunks = chunks--;
 			list_add_tail(&new->node, &tx_list);
 		} while (len);
 	}
@@ -612,7 +625,8 @@
 	sg_dma_address(&sg) = dma_src;
 	sg_dma_len(&sg) = len;
 
-	return shdma_prep_sg(schan, &sg, 1, &dma_dest, DMA_MEM_TO_MEM, flags);
+	return shdma_prep_sg(schan, &sg, 1, &dma_dest, DMA_MEM_TO_MEM,
+			     flags, false);
 }
 
 static struct dma_async_tx_descriptor *shdma_prep_slave_sg(
@@ -640,7 +654,58 @@
 	slave_addr = ops->slave_addr(schan);
 
 	return shdma_prep_sg(schan, sgl, sg_len, &slave_addr,
-			      direction, flags);
+			     direction, flags, false);
+}
+
+#define SHDMA_MAX_SG_LEN 32
+
+static struct dma_async_tx_descriptor *shdma_prep_dma_cyclic(
+	struct dma_chan *chan, dma_addr_t buf_addr, size_t buf_len,
+	size_t period_len, enum dma_transfer_direction direction,
+	unsigned long flags, void *context)
+{
+	struct shdma_chan *schan = to_shdma_chan(chan);
+	struct shdma_dev *sdev = to_shdma_dev(schan->dma_chan.device);
+	const struct shdma_ops *ops = sdev->ops;
+	unsigned int sg_len = buf_len / period_len;
+	int slave_id = schan->slave_id;
+	dma_addr_t slave_addr;
+	struct scatterlist sgl[SHDMA_MAX_SG_LEN];
+	int i;
+
+	if (!chan)
+		return NULL;
+
+	BUG_ON(!schan->desc_num);
+
+	if (sg_len > SHDMA_MAX_SG_LEN) {
+		dev_err(schan->dev, "sg length %d exceds limit %d",
+				sg_len, SHDMA_MAX_SG_LEN);
+		return NULL;
+	}
+
+	/* Someone calling slave DMA on a generic channel? */
+	if (slave_id < 0 || (buf_len < period_len)) {
+		dev_warn(schan->dev,
+			"%s: bad parameter: buf_len=%zu, period_len=%zu, id=%d\n",
+			__func__, buf_len, period_len, slave_id);
+		return NULL;
+	}
+
+	slave_addr = ops->slave_addr(schan);
+
+	sg_init_table(sgl, sg_len);
+	for (i = 0; i < sg_len; i++) {
+		dma_addr_t src = buf_addr + (period_len * i);
+
+		sg_set_page(&sgl[i], pfn_to_page(PFN_DOWN(src)), period_len,
+			    offset_in_page(src));
+		sg_dma_address(&sgl[i]) = src;
+		sg_dma_len(&sgl[i]) = period_len;
+	}
+
+	return shdma_prep_sg(schan, sgl, sg_len, &slave_addr,
+			     direction, flags, true);
 }
 
 static int shdma_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd,
@@ -915,6 +980,7 @@
 
 	/* Compulsory for DMA_SLAVE fields */
 	dma_dev->device_prep_slave_sg = shdma_prep_slave_sg;
+	dma_dev->device_prep_dma_cyclic = shdma_prep_dma_cyclic;
 	dma_dev->device_control = shdma_control;
 
 	dma_dev->dev = dev;
diff --git a/drivers/dma/sh/shdmac.c b/drivers/dma/sh/shdmac.c
index dda7e75..146d5df 100644
--- a/drivers/dma/sh/shdmac.c
+++ b/drivers/dma/sh/shdmac.c
@@ -18,21 +18,22 @@
  *
  */
 
+#include <linux/delay.h>
+#include <linux/dmaengine.h>
+#include <linux/err.h>
 #include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/kdebug.h>
 #include <linux/module.h>
+#include <linux/notifier.h>
 #include <linux/of.h>
 #include <linux/of_device.h>
-#include <linux/slab.h>
-#include <linux/interrupt.h>
-#include <linux/dmaengine.h>
-#include <linux/delay.h>
 #include <linux/platform_device.h>
 #include <linux/pm_runtime.h>
-#include <linux/sh_dma.h>
-#include <linux/notifier.h>
-#include <linux/kdebug.h>
-#include <linux/spinlock.h>
 #include <linux/rculist.h>
+#include <linux/sh_dma.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
 
 #include "../dmaengine.h"
 #include "shdma.h"
diff --git a/drivers/dma/sh/sudmac.c b/drivers/dma/sh/sudmac.c
index 4e7df43..3ce1039 100644
--- a/drivers/dma/sh/sudmac.c
+++ b/drivers/dma/sh/sudmac.c
@@ -14,12 +14,13 @@
  * published by the Free Software Foundation.
  */
 
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/slab.h>
-#include <linux/interrupt.h>
 #include <linux/dmaengine.h>
+#include <linux/err.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/module.h>
 #include <linux/platform_device.h>
+#include <linux/slab.h>
 #include <linux/sudmac.h>
 
 struct sudmac_chan {
diff --git a/drivers/dma/ste_dma40.c b/drivers/dma/ste_dma40.c
index bf18c78..c798445 100644
--- a/drivers/dma/ste_dma40.c
+++ b/drivers/dma/ste_dma40.c
@@ -556,7 +556,6 @@
  * later
  * @reg_val_backup_chan: Backup data for standard channel parameter registers.
  * @gcc_pwr_off_mask: Mask to maintain the channels that can be turned off.
- * @initialized: true if the dma has been initialized
  * @gen_dmac: the struct for generic registers values to represent u8500/8540
  * DMA controller
  */
@@ -594,7 +593,6 @@
 	u32				  reg_val_backup_v4[BACKUP_REGS_SZ_MAX];
 	u32				 *reg_val_backup_chan;
 	u16				  gcc_pwr_off_mask;
-	bool				  initialized;
 	struct d40_gen_dmac		  gen_dmac;
 };
 
@@ -1056,62 +1054,6 @@
 	return len;
 }
 
-
-#ifdef CONFIG_PM
-static void dma40_backup(void __iomem *baseaddr, u32 *backup,
-			 u32 *regaddr, int num, bool save)
-{
-	int i;
-
-	for (i = 0; i < num; i++) {
-		void __iomem *addr = baseaddr + regaddr[i];
-
-		if (save)
-			backup[i] = readl_relaxed(addr);
-		else
-			writel_relaxed(backup[i], addr);
-	}
-}
-
-static void d40_save_restore_registers(struct d40_base *base, bool save)
-{
-	int i;
-
-	/* Save/Restore channel specific registers */
-	for (i = 0; i < base->num_phy_chans; i++) {
-		void __iomem *addr;
-		int idx;
-
-		if (base->phy_res[i].reserved)
-			continue;
-
-		addr = base->virtbase + D40_DREG_PCBASE + i * D40_DREG_PCDELTA;
-		idx = i * ARRAY_SIZE(d40_backup_regs_chan);
-
-		dma40_backup(addr, &base->reg_val_backup_chan[idx],
-			     d40_backup_regs_chan,
-			     ARRAY_SIZE(d40_backup_regs_chan),
-			     save);
-	}
-
-	/* Save/Restore global registers */
-	dma40_backup(base->virtbase, base->reg_val_backup,
-		     d40_backup_regs, ARRAY_SIZE(d40_backup_regs),
-		     save);
-
-	/* Save/Restore registers only existing on dma40 v3 and later */
-	if (base->gen_dmac.backup)
-		dma40_backup(base->virtbase, base->reg_val_backup_v4,
-			     base->gen_dmac.backup,
-			base->gen_dmac.backup_size,
-			save);
-}
-#else
-static void d40_save_restore_registers(struct d40_base *base, bool save)
-{
-}
-#endif
-
 static int __d40_execute_command_phy(struct d40_chan *d40c,
 				     enum d40_command command)
 {
@@ -1495,8 +1437,8 @@
 	if (!d40c->busy)
 		return 0;
 
-	pm_runtime_get_sync(d40c->base->dev);
 	spin_lock_irqsave(&d40c->lock, flags);
+	pm_runtime_get_sync(d40c->base->dev);
 
 	res = d40_channel_execute_command(d40c, D40_DMA_SUSPEND_REQ);
 
@@ -2998,16 +2940,86 @@
 }
 
 /* Suspend resume functionality */
-#ifdef CONFIG_PM
-static int dma40_pm_suspend(struct device *dev)
+#ifdef CONFIG_PM_SLEEP
+static int dma40_suspend(struct device *dev)
+{
+	struct platform_device *pdev = to_platform_device(dev);
+	struct d40_base *base = platform_get_drvdata(pdev);
+	int ret;
+
+	ret = pm_runtime_force_suspend(dev);
+	if (ret)
+		return ret;
+
+	if (base->lcpa_regulator)
+		ret = regulator_disable(base->lcpa_regulator);
+	return ret;
+}
+
+static int dma40_resume(struct device *dev)
 {
 	struct platform_device *pdev = to_platform_device(dev);
 	struct d40_base *base = platform_get_drvdata(pdev);
 	int ret = 0;
 
-	if (base->lcpa_regulator)
-		ret = regulator_disable(base->lcpa_regulator);
-	return ret;
+	if (base->lcpa_regulator) {
+		ret = regulator_enable(base->lcpa_regulator);
+		if (ret)
+			return ret;
+	}
+
+	return pm_runtime_force_resume(dev);
+}
+#endif
+
+#ifdef CONFIG_PM
+static void dma40_backup(void __iomem *baseaddr, u32 *backup,
+			 u32 *regaddr, int num, bool save)
+{
+	int i;
+
+	for (i = 0; i < num; i++) {
+		void __iomem *addr = baseaddr + regaddr[i];
+
+		if (save)
+			backup[i] = readl_relaxed(addr);
+		else
+			writel_relaxed(backup[i], addr);
+	}
+}
+
+static void d40_save_restore_registers(struct d40_base *base, bool save)
+{
+	int i;
+
+	/* Save/Restore channel specific registers */
+	for (i = 0; i < base->num_phy_chans; i++) {
+		void __iomem *addr;
+		int idx;
+
+		if (base->phy_res[i].reserved)
+			continue;
+
+		addr = base->virtbase + D40_DREG_PCBASE + i * D40_DREG_PCDELTA;
+		idx = i * ARRAY_SIZE(d40_backup_regs_chan);
+
+		dma40_backup(addr, &base->reg_val_backup_chan[idx],
+			     d40_backup_regs_chan,
+			     ARRAY_SIZE(d40_backup_regs_chan),
+			     save);
+	}
+
+	/* Save/Restore global registers */
+	dma40_backup(base->virtbase, base->reg_val_backup,
+		     d40_backup_regs, ARRAY_SIZE(d40_backup_regs),
+		     save);
+
+	/* Save/Restore registers only existing on dma40 v3 and later */
+	if (base->gen_dmac.backup)
+		dma40_backup(base->virtbase, base->reg_val_backup_v4,
+			     base->gen_dmac.backup,
+			base->gen_dmac.backup_size,
+			save);
 }
 
 static int dma40_runtime_suspend(struct device *dev)
@@ -3030,36 +3042,20 @@
 	struct platform_device *pdev = to_platform_device(dev);
 	struct d40_base *base = platform_get_drvdata(pdev);
 
-	if (base->initialized)
-		d40_save_restore_registers(base, false);
+	d40_save_restore_registers(base, false);
 
 	writel_relaxed(D40_DREG_GCC_ENABLE_ALL,
 		       base->virtbase + D40_DREG_GCC);
 	return 0;
 }
-
-static int dma40_resume(struct device *dev)
-{
-	struct platform_device *pdev = to_platform_device(dev);
-	struct d40_base *base = platform_get_drvdata(pdev);
-	int ret = 0;
-
-	if (base->lcpa_regulator)
-		ret = regulator_enable(base->lcpa_regulator);
-
-	return ret;
-}
+#endif
 
 static const struct dev_pm_ops dma40_pm_ops = {
-	.suspend		= dma40_pm_suspend,
-	.runtime_suspend	= dma40_runtime_suspend,
-	.runtime_resume		= dma40_runtime_resume,
-	.resume			= dma40_resume,
+	SET_LATE_SYSTEM_SLEEP_PM_OPS(dma40_suspend, dma40_resume)
+	SET_PM_RUNTIME_PM_OPS(dma40_runtime_suspend,
+				dma40_runtime_resume,
+				NULL)
 };
-#define DMA40_PM_OPS	(&dma40_pm_ops)
-#else
-#define DMA40_PM_OPS	NULL
-#endif
 
 /* Initialization functions. */
 
@@ -3645,12 +3641,6 @@
 		goto failure;
 	}
 
-	pm_runtime_irq_safe(base->dev);
-	pm_runtime_set_autosuspend_delay(base->dev, DMA40_AUTOSUSPEND_DELAY);
-	pm_runtime_use_autosuspend(base->dev);
-	pm_runtime_enable(base->dev);
-	pm_runtime_resume(base->dev);
-
 	if (base->plat_data->use_esram_lcla) {
 
 		base->lcpa_regulator = regulator_get(base->dev, "lcla_esram");
@@ -3671,7 +3661,15 @@
 		}
 	}
 
-	base->initialized = true;
+	writel_relaxed(D40_DREG_GCC_ENABLE_ALL, base->virtbase + D40_DREG_GCC);
+
+	pm_runtime_irq_safe(base->dev);
+	pm_runtime_set_autosuspend_delay(base->dev, DMA40_AUTOSUSPEND_DELAY);
+	pm_runtime_use_autosuspend(base->dev);
+	pm_runtime_mark_last_busy(base->dev);
+	pm_runtime_set_active(base->dev);
+	pm_runtime_enable(base->dev);
+
 	ret = d40_dmaengine_init(base, num_reserved_chans);
 	if (ret)
 		goto failure;
@@ -3754,7 +3752,7 @@
 	.driver = {
 		.owner = THIS_MODULE,
 		.name  = D40_NAME,
-		.pm = DMA40_PM_OPS,
+		.pm = &dma40_pm_ops,
 		.of_match_table = d40_match,
 	},
 };
diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
index 8300fb8..cbb168e 100644
--- a/include/linux/dmaengine.h
+++ b/include/linux/dmaengine.h
@@ -292,7 +292,7 @@
 };
 
 /**
- * enum dma_slave_buswidth - defines bus with of the DMA slave
+ * enum dma_slave_buswidth - defines bus width of the DMA slave
  * device, source or target buses
  */
 enum dma_slave_buswidth {
diff --git a/include/linux/platform_data/edma.h b/include/linux/platform_data/edma.h
index f50821c..12f134b 100644
--- a/include/linux/platform_data/edma.h
+++ b/include/linux/platform_data/edma.h
@@ -43,15 +43,15 @@
 
 /* PaRAM slots are laid out like this */
 struct edmacc_param {
-	unsigned int opt;
-	unsigned int src;
-	unsigned int a_b_cnt;
-	unsigned int dst;
-	unsigned int src_dst_bidx;
-	unsigned int link_bcntrld;
-	unsigned int src_dst_cidx;
-	unsigned int ccnt;
-};
+	u32 opt;
+	u32 src;
+	u32 a_b_cnt;
+	u32 dst;
+	u32 src_dst_bidx;
+	u32 link_bcntrld;
+	u32 src_dst_cidx;
+	u32 ccnt;
+} __packed;
 
 /* fields in edmacc_param.opt */
 #define SAM		BIT(0)
@@ -130,7 +130,7 @@
 				enum address_mode mode, enum fifo_width);
 void edma_set_dest(unsigned slot, dma_addr_t dest_port,
 				 enum address_mode mode, enum fifo_width);
-void edma_get_position(unsigned slot, dma_addr_t *src, dma_addr_t *dst);
+dma_addr_t edma_get_position(unsigned slot, bool dst);
 void edma_set_src_index(unsigned slot, s16 src_bidx, s16 src_cidx);
 void edma_set_dest_index(unsigned slot, s16 dest_bidx, s16 dest_cidx);
 void edma_set_transfer_params(unsigned slot, u16 acnt, u16 bcnt, u16 ccnt,
diff --git a/include/linux/shdma-base.h b/include/linux/shdma-base.h
index f92c0a4..abdf1f2 100644
--- a/include/linux/shdma-base.h
+++ b/include/linux/shdma-base.h
@@ -54,6 +54,7 @@
 	dma_cookie_t cookie;
 	int chunks;
 	int mark;
+	bool cyclic;			/* used as cyclic transfer */
 };
 
 struct shdma_chan {