Merge branch 'topic/of' into for-linus
diff --git a/Documentation/devicetree/bindings/dma/fsl-imx-sdma.txt b/Documentation/devicetree/bindings/dma/fsl-imx-sdma.txt
index 68cee4f5..4fa814d 100644
--- a/Documentation/devicetree/bindings/dma/fsl-imx-sdma.txt
+++ b/Documentation/devicetree/bindings/dma/fsl-imx-sdma.txt
@@ -1,7 +1,12 @@
 * Freescale Smart Direct Memory Access (SDMA) Controller for i.MX
 
 Required properties:
-- compatible : Should be "fsl,<chip>-sdma"
+- compatible : Should be "fsl,imx31-sdma", "fsl,imx31-to1-sdma",
+  "fsl,imx31-to2-sdma", "fsl,imx35-sdma", "fsl,imx35-to1-sdma",
+  "fsl,imx35-to2-sdma", "fsl,imx51-sdma", "fsl,imx53-sdma" or
+  "fsl,imx6q-sdma". The -to variants should be preferred since they
+  allow to determnine the correct ROM script addresses needed for
+  the driver to work without additional firmware.
 - reg : Should contain SDMA registers location and length
 - interrupts : Should contain SDMA interrupt
 - #dma-cells : Must be <3>.
diff --git a/Documentation/devicetree/bindings/dma/k3dma.txt b/Documentation/devicetree/bindings/dma/k3dma.txt
new file mode 100644
index 0000000..23f8d71
--- /dev/null
+++ b/Documentation/devicetree/bindings/dma/k3dma.txt
@@ -0,0 +1,46 @@
+* Hisilicon K3 DMA controller
+
+See dma.txt first
+
+Required properties:
+- compatible: Should be "hisilicon,k3-dma-1.0"
+- reg: Should contain DMA registers location and length.
+- interrupts: Should contain one interrupt shared by all channel
+- #dma-cells: see dma.txt, should be 1, para number
+- dma-channels: physical channels supported
+- dma-requests: virtual channels supported, each virtual channel
+		have specific request line
+- clocks: clock required
+
+Example:
+
+Controller:
+		dma0: dma@fcd02000 {
+			compatible = "hisilicon,k3-dma-1.0";
+			reg = <0xfcd02000 0x1000>;
+			#dma-cells = <1>;
+			dma-channels = <16>;
+			dma-requests = <27>;
+			interrupts = <0 12 4>;
+			clocks = <&pclk>;
+			status = "disable";
+		};
+
+Client:
+Use specific request line passing from dmax
+For example, i2c0 read channel request line is 18, while write channel use 19
+
+		i2c0: i2c@fcb08000 {
+			compatible = "snps,designware-i2c";
+			dmas =	<&dma0 18          /* read channel */
+				 &dma0 19>;        /* write channel */
+			dma-names = "rx", "tx";
+		};
+
+		i2c1: i2c@fcb09000 {
+			compatible = "snps,designware-i2c";
+			dmas =	<&dma0 20          /* read channel */
+				 &dma0 21>;        /* write channel */
+			dma-names = "rx", "tx";
+		};
+
diff --git a/Documentation/driver-model/devres.txt b/Documentation/driver-model/devres.txt
index b467145..2850110 100644
--- a/Documentation/driver-model/devres.txt
+++ b/Documentation/driver-model/devres.txt
@@ -293,3 +293,6 @@
 PHY
   devm_usb_get_phy()
   devm_usb_put_phy()
+
+SLAVE DMA ENGINE
+  devm_acpi_dma_controller_register()
diff --git a/MAINTAINERS b/MAINTAINERS
index defc053..6a6554a 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -7126,6 +7126,7 @@
 
 SYNOPSYS DESIGNWARE DMAC DRIVER
 M:	Viresh Kumar <viresh.linux@gmail.com>
+M:	Andy Shevchenko <andriy.shevchenko@linux.intel.com>
 S:	Maintained
 F:	include/linux/dw_dmac.h
 F:	drivers/dma/dw/
diff --git a/arch/arm/mach-imx/mm-imx25.c b/arch/arm/mach-imx/mm-imx25.c
index e065c11..5211f62 100644
--- a/arch/arm/mach-imx/mm-imx25.c
+++ b/arch/arm/mach-imx/mm-imx25.c
@@ -61,25 +61,8 @@
 	mxc_init_irq(MX25_IO_ADDRESS(MX25_AVIC_BASE_ADDR));
 }
 
-static struct sdma_script_start_addrs imx25_sdma_script __initdata = {
-	.ap_2_ap_addr = 729,
-	.uart_2_mcu_addr = 904,
-	.per_2_app_addr = 1255,
-	.mcu_2_app_addr = 834,
-	.uartsh_2_mcu_addr = 1120,
-	.per_2_shp_addr = 1329,
-	.mcu_2_shp_addr = 1048,
-	.ata_2_mcu_addr = 1560,
-	.mcu_2_ata_addr = 1479,
-	.app_2_per_addr = 1189,
-	.app_2_mcu_addr = 770,
-	.shp_2_per_addr = 1407,
-	.shp_2_mcu_addr = 979,
-};
-
 static struct sdma_platform_data imx25_sdma_pdata __initdata = {
 	.fw_name = "sdma-imx25.bin",
-	.script_addrs = &imx25_sdma_script,
 };
 
 static const struct resource imx25_audmux_res[] __initconst = {
diff --git a/arch/arm/mach-imx/mm-imx5.c b/arch/arm/mach-imx/mm-imx5.c
index cf193d8..051add9 100644
--- a/arch/arm/mach-imx/mm-imx5.c
+++ b/arch/arm/mach-imx/mm-imx5.c
@@ -103,22 +103,8 @@
 	tzic_init_irq(MX53_IO_ADDRESS(MX53_TZIC_BASE_ADDR));
 }
 
-static struct sdma_script_start_addrs imx51_sdma_script __initdata = {
-	.ap_2_ap_addr = 642,
-	.uart_2_mcu_addr = 817,
-	.mcu_2_app_addr = 747,
-	.mcu_2_shp_addr = 961,
-	.ata_2_mcu_addr = 1473,
-	.mcu_2_ata_addr = 1392,
-	.app_2_per_addr = 1033,
-	.app_2_mcu_addr = 683,
-	.shp_2_per_addr = 1251,
-	.shp_2_mcu_addr = 892,
-};
-
 static struct sdma_platform_data imx51_sdma_pdata __initdata = {
 	.fw_name = "sdma-imx51.bin",
-	.script_addrs = &imx51_sdma_script,
 };
 
 static const struct resource imx51_audmux_res[] __initconst = {
diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig
index 6825957..2945ff0 100644
--- a/drivers/dma/Kconfig
+++ b/drivers/dma/Kconfig
@@ -300,6 +300,15 @@
 	select DMA_ENGINE
 	select DMA_VIRTUAL_CHANNELS
 
+config K3_DMA
+	tristate "Hisilicon K3 DMA support"
+	depends on ARCH_HI3xxx
+	select DMA_ENGINE
+	select DMA_VIRTUAL_CHANNELS
+	help
+	  Support the DMA engine for Hisilicon K3 platform
+	  devices.
+
 config DMA_ENGINE
 	bool
 
diff --git a/drivers/dma/Makefile b/drivers/dma/Makefile
index 5e0f2ef..8c97941 100644
--- a/drivers/dma/Makefile
+++ b/drivers/dma/Makefile
@@ -39,3 +39,4 @@
 obj-$(CONFIG_DMA_OMAP) += omap-dma.o
 obj-$(CONFIG_MMP_PDMA) += mmp_pdma.o
 obj-$(CONFIG_DMA_JZ4740) += dma-jz4740.o
+obj-$(CONFIG_K3_DMA) += k3dma.o
diff --git a/drivers/dma/acpi-dma.c b/drivers/dma/acpi-dma.c
index 5a18f82..e69b03c 100644
--- a/drivers/dma/acpi-dma.c
+++ b/drivers/dma/acpi-dma.c
@@ -43,7 +43,6 @@
 	struct list_head resource_list;
 	struct resource_list_entry *rentry;
 	resource_size_t mem = 0, irq = 0;
-	u32 vendor_id;
 	int ret;
 
 	if (grp->shared_info_length != sizeof(struct acpi_csrt_shared_info))
@@ -73,9 +72,8 @@
 	if (si->mmio_base_low != mem || si->gsi_interrupt != irq)
 		return 0;
 
-	vendor_id = le32_to_cpu(grp->vendor_id);
 	dev_dbg(&adev->dev, "matches with %.4s%04X (rev %u)\n",
-		(char *)&vendor_id, grp->device_id, grp->revision);
+		(char *)&grp->vendor_id, grp->device_id, grp->revision);
 
 	/* Check if the request line range is available */
 	if (si->base_request_line == 0 && si->num_handshake_signals == 0)
diff --git a/drivers/dma/amba-pl08x.c b/drivers/dma/amba-pl08x.c
index 06fe45c..cd29434 100644
--- a/drivers/dma/amba-pl08x.c
+++ b/drivers/dma/amba-pl08x.c
@@ -24,6 +24,7 @@
  *
  * Documentation: ARM DDI 0196G == PL080
  * Documentation: ARM DDI 0218E == PL081
+ * Documentation: S3C6410 User's Manual == PL080S
  *
  * PL080 & PL081 both have 16 sets of DMA signals that can be routed to any
  * channel.
@@ -36,6 +37,14 @@
  *
  * The PL080 has a dual bus master, PL081 has a single master.
  *
+ * PL080S is a version modified by Samsung and used in S3C64xx SoCs.
+ * It differs in following aspects:
+ * - CH_CONFIG register at different offset,
+ * - separate CH_CONTROL2 register for transfer size,
+ * - bigger maximum transfer size,
+ * - 8-word aligned LLI, instead of 4-word, due to extra CCTL2 word,
+ * - no support for peripheral flow control.
+ *
  * Memory to peripheral transfer may be visualized as
  *	Get data from memory to DMAC
  *	Until no data left
@@ -64,10 +73,7 @@
  *  - Peripheral flow control: the transfer size is ignored (and should be
  *    zero).  The data is transferred from the current LLI entry, until
  *    after the final transfer signalled by LBREQ or LSREQ.  The DMAC
- *    will then move to the next LLI entry.
- *
- * Global TODO:
- * - Break out common code from arch/arm/mach-s3c64xx and share
+ *    will then move to the next LLI entry. Unsupported by PL080S.
  */
 #include <linux/amba/bus.h>
 #include <linux/amba/pl08x.h>
@@ -100,24 +106,16 @@
  * @nomadik: whether the channels have Nomadik security extension bits
  *	that need to be checked for permission before use and some registers are
  *	missing
+ * @pl080s: whether this version is a PL080S, which has separate register and
+ *	LLI word for transfer size.
  */
 struct vendor_data {
+	u8 config_offset;
 	u8 channels;
 	bool dualmaster;
 	bool nomadik;
-};
-
-/*
- * PL08X private data structures
- * An LLI struct - see PL08x TRM.  Note that next uses bit[0] as a bus bit,
- * start & end do not - their bus bit info is in cctl.  Also note that these
- * are fixed 32-bit quantities.
- */
-struct pl08x_lli {
-	u32 src;
-	u32 dst;
-	u32 lli;
-	u32 cctl;
+	bool pl080s;
+	u32 max_transfer_size;
 };
 
 /**
@@ -145,6 +143,7 @@
 struct pl08x_phy_chan {
 	unsigned int id;
 	void __iomem *base;
+	void __iomem *reg_config;
 	spinlock_t lock;
 	struct pl08x_dma_chan *serving;
 	bool locked;
@@ -174,12 +173,13 @@
  * @ccfg: config reg values for current txd
  * @done: this marks completed descriptors, which should not have their
  *   mux released.
+ * @cyclic: indicate cyclic transfers
  */
 struct pl08x_txd {
 	struct virt_dma_desc vd;
 	struct list_head dsg_list;
 	dma_addr_t llis_bus;
-	struct pl08x_lli *llis_va;
+	u32 *llis_va;
 	/* Default cctl value for LLIs */
 	u32 cctl;
 	/*
@@ -188,6 +188,7 @@
 	 */
 	u32 ccfg;
 	bool done;
+	bool cyclic;
 };
 
 /**
@@ -263,17 +264,29 @@
 	struct dma_pool *pool;
 	u8 lli_buses;
 	u8 mem_buses;
+	u8 lli_words;
 };
 
 /*
  * PL08X specific defines
  */
 
-/* Size (bytes) of each LLI buffer allocated for one transfer */
-# define PL08X_LLI_TSFR_SIZE	0x2000
+/* The order of words in an LLI. */
+#define PL080_LLI_SRC		0
+#define PL080_LLI_DST		1
+#define PL080_LLI_LLI		2
+#define PL080_LLI_CCTL		3
+#define PL080S_LLI_CCTL2	4
 
-/* Maximum times we call dma_pool_alloc on this pool without freeing */
-#define MAX_NUM_TSFR_LLIS	(PL08X_LLI_TSFR_SIZE/sizeof(struct pl08x_lli))
+/* Total words in an LLI. */
+#define PL080_LLI_WORDS		4
+#define PL080S_LLI_WORDS	8
+
+/*
+ * Number of LLIs in each LLI buffer allocated for one transfer
+ * (maximum times we call dma_pool_alloc on this pool without freeing)
+ */
+#define MAX_NUM_TSFR_LLIS	512
 #define PL08X_ALIGN		8
 
 static inline struct pl08x_dma_chan *to_pl08x_chan(struct dma_chan *chan)
@@ -334,10 +347,39 @@
 {
 	unsigned int val;
 
-	val = readl(ch->base + PL080_CH_CONFIG);
+	val = readl(ch->reg_config);
 	return val & PL080_CONFIG_ACTIVE;
 }
 
+static void pl08x_write_lli(struct pl08x_driver_data *pl08x,
+		struct pl08x_phy_chan *phychan, const u32 *lli, u32 ccfg)
+{
+	if (pl08x->vd->pl080s)
+		dev_vdbg(&pl08x->adev->dev,
+			"WRITE channel %d: csrc=0x%08x, cdst=0x%08x, "
+			"clli=0x%08x, cctl=0x%08x, cctl2=0x%08x, ccfg=0x%08x\n",
+			phychan->id, lli[PL080_LLI_SRC], lli[PL080_LLI_DST],
+			lli[PL080_LLI_LLI], lli[PL080_LLI_CCTL],
+			lli[PL080S_LLI_CCTL2], ccfg);
+	else
+		dev_vdbg(&pl08x->adev->dev,
+			"WRITE channel %d: csrc=0x%08x, cdst=0x%08x, "
+			"clli=0x%08x, cctl=0x%08x, ccfg=0x%08x\n",
+			phychan->id, lli[PL080_LLI_SRC], lli[PL080_LLI_DST],
+			lli[PL080_LLI_LLI], lli[PL080_LLI_CCTL], ccfg);
+
+	writel_relaxed(lli[PL080_LLI_SRC], phychan->base + PL080_CH_SRC_ADDR);
+	writel_relaxed(lli[PL080_LLI_DST], phychan->base + PL080_CH_DST_ADDR);
+	writel_relaxed(lli[PL080_LLI_LLI], phychan->base + PL080_CH_LLI);
+	writel_relaxed(lli[PL080_LLI_CCTL], phychan->base + PL080_CH_CONTROL);
+
+	if (pl08x->vd->pl080s)
+		writel_relaxed(lli[PL080S_LLI_CCTL2],
+				phychan->base + PL080S_CH_CONTROL2);
+
+	writel(ccfg, phychan->reg_config);
+}
+
 /*
  * Set the initial DMA register values i.e. those for the first LLI
  * The next LLI pointer and the configuration interrupt bit have
@@ -350,7 +392,6 @@
 	struct pl08x_phy_chan *phychan = plchan->phychan;
 	struct virt_dma_desc *vd = vchan_next_desc(&plchan->vc);
 	struct pl08x_txd *txd = to_pl08x_txd(&vd->tx);
-	struct pl08x_lli *lli;
 	u32 val;
 
 	list_del(&txd->vd.node);
@@ -361,19 +402,7 @@
 	while (pl08x_phy_channel_busy(phychan))
 		cpu_relax();
 
-	lli = &txd->llis_va[0];
-
-	dev_vdbg(&pl08x->adev->dev,
-		"WRITE channel %d: csrc=0x%08x, cdst=0x%08x, "
-		"clli=0x%08x, cctl=0x%08x, ccfg=0x%08x\n",
-		phychan->id, lli->src, lli->dst, lli->lli, lli->cctl,
-		txd->ccfg);
-
-	writel(lli->src, phychan->base + PL080_CH_SRC_ADDR);
-	writel(lli->dst, phychan->base + PL080_CH_DST_ADDR);
-	writel(lli->lli, phychan->base + PL080_CH_LLI);
-	writel(lli->cctl, phychan->base + PL080_CH_CONTROL);
-	writel(txd->ccfg, phychan->base + PL080_CH_CONFIG);
+	pl08x_write_lli(pl08x, phychan, &txd->llis_va[0], txd->ccfg);
 
 	/* Enable the DMA channel */
 	/* Do not access config register until channel shows as disabled */
@@ -381,11 +410,11 @@
 		cpu_relax();
 
 	/* Do not access config register until channel shows as inactive */
-	val = readl(phychan->base + PL080_CH_CONFIG);
+	val = readl(phychan->reg_config);
 	while ((val & PL080_CONFIG_ACTIVE) || (val & PL080_CONFIG_ENABLE))
-		val = readl(phychan->base + PL080_CH_CONFIG);
+		val = readl(phychan->reg_config);
 
-	writel(val | PL080_CONFIG_ENABLE, phychan->base + PL080_CH_CONFIG);
+	writel(val | PL080_CONFIG_ENABLE, phychan->reg_config);
 }
 
 /*
@@ -404,9 +433,9 @@
 	int timeout;
 
 	/* Set the HALT bit and wait for the FIFO to drain */
-	val = readl(ch->base + PL080_CH_CONFIG);
+	val = readl(ch->reg_config);
 	val |= PL080_CONFIG_HALT;
-	writel(val, ch->base + PL080_CH_CONFIG);
+	writel(val, ch->reg_config);
 
 	/* Wait for channel inactive */
 	for (timeout = 1000; timeout; timeout--) {
@@ -423,9 +452,9 @@
 	u32 val;
 
 	/* Clear the HALT bit */
-	val = readl(ch->base + PL080_CH_CONFIG);
+	val = readl(ch->reg_config);
 	val &= ~PL080_CONFIG_HALT;
-	writel(val, ch->base + PL080_CH_CONFIG);
+	writel(val, ch->reg_config);
 }
 
 /*
@@ -437,12 +466,12 @@
 static void pl08x_terminate_phy_chan(struct pl08x_driver_data *pl08x,
 	struct pl08x_phy_chan *ch)
 {
-	u32 val = readl(ch->base + PL080_CH_CONFIG);
+	u32 val = readl(ch->reg_config);
 
 	val &= ~(PL080_CONFIG_ENABLE | PL080_CONFIG_ERR_IRQ_MASK |
 	         PL080_CONFIG_TC_IRQ_MASK);
 
-	writel(val, ch->base + PL080_CH_CONFIG);
+	writel(val, ch->reg_config);
 
 	writel(1 << ch->id, pl08x->base + PL080_ERR_CLEAR);
 	writel(1 << ch->id, pl08x->base + PL080_TC_CLEAR);
@@ -453,6 +482,28 @@
 	/* The source width defines the number of bytes */
 	u32 bytes = cctl & PL080_CONTROL_TRANSFER_SIZE_MASK;
 
+	cctl &= PL080_CONTROL_SWIDTH_MASK;
+
+	switch (cctl >> PL080_CONTROL_SWIDTH_SHIFT) {
+	case PL080_WIDTH_8BIT:
+		break;
+	case PL080_WIDTH_16BIT:
+		bytes *= 2;
+		break;
+	case PL080_WIDTH_32BIT:
+		bytes *= 4;
+		break;
+	}
+	return bytes;
+}
+
+static inline u32 get_bytes_in_cctl_pl080s(u32 cctl, u32 cctl1)
+{
+	/* The source width defines the number of bytes */
+	u32 bytes = cctl1 & PL080S_CONTROL_TRANSFER_SIZE_MASK;
+
+	cctl &= PL080_CONTROL_SWIDTH_MASK;
+
 	switch (cctl >> PL080_CONTROL_SWIDTH_SHIFT) {
 	case PL080_WIDTH_8BIT:
 		break;
@@ -469,47 +520,66 @@
 /* The channel should be paused when calling this */
 static u32 pl08x_getbytes_chan(struct pl08x_dma_chan *plchan)
 {
+	struct pl08x_driver_data *pl08x = plchan->host;
+	const u32 *llis_va, *llis_va_limit;
 	struct pl08x_phy_chan *ch;
+	dma_addr_t llis_bus;
 	struct pl08x_txd *txd;
-	size_t bytes = 0;
+	u32 llis_max_words;
+	size_t bytes;
+	u32 clli;
 
 	ch = plchan->phychan;
 	txd = plchan->at;
 
+	if (!ch || !txd)
+		return 0;
+
 	/*
 	 * Follow the LLIs to get the number of remaining
 	 * bytes in the currently active transaction.
 	 */
-	if (ch && txd) {
-		u32 clli = readl(ch->base + PL080_CH_LLI) & ~PL080_LLI_LM_AHB2;
+	clli = readl(ch->base + PL080_CH_LLI) & ~PL080_LLI_LM_AHB2;
 
-		/* First get the remaining bytes in the active transfer */
+	/* First get the remaining bytes in the active transfer */
+	if (pl08x->vd->pl080s)
+		bytes = get_bytes_in_cctl_pl080s(
+				readl(ch->base + PL080_CH_CONTROL),
+				readl(ch->base + PL080S_CH_CONTROL2));
+	else
 		bytes = get_bytes_in_cctl(readl(ch->base + PL080_CH_CONTROL));
 
-		if (clli) {
-			struct pl08x_lli *llis_va = txd->llis_va;
-			dma_addr_t llis_bus = txd->llis_bus;
-			int index;
+	if (!clli)
+		return bytes;
 
-			BUG_ON(clli < llis_bus || clli >= llis_bus +
-				sizeof(struct pl08x_lli) * MAX_NUM_TSFR_LLIS);
+	llis_va = txd->llis_va;
+	llis_bus = txd->llis_bus;
 
-			/*
-			 * Locate the next LLI - as this is an array,
-			 * it's simple maths to find.
-			 */
-			index = (clli - llis_bus) / sizeof(struct pl08x_lli);
+	llis_max_words = pl08x->lli_words * MAX_NUM_TSFR_LLIS;
+	BUG_ON(clli < llis_bus || clli >= llis_bus +
+						sizeof(u32) * llis_max_words);
 
-			for (; index < MAX_NUM_TSFR_LLIS; index++) {
-				bytes += get_bytes_in_cctl(llis_va[index].cctl);
+	/*
+	 * Locate the next LLI - as this is an array,
+	 * it's simple maths to find.
+	 */
+	llis_va += (clli - llis_bus) / sizeof(u32);
 
-				/*
-				 * A LLI pointer of 0 terminates the LLI list
-				 */
-				if (!llis_va[index].lli)
-					break;
-			}
-		}
+	llis_va_limit = llis_va + llis_max_words;
+
+	for (; llis_va < llis_va_limit; llis_va += pl08x->lli_words) {
+		if (pl08x->vd->pl080s)
+			bytes += get_bytes_in_cctl_pl080s(
+						llis_va[PL080_LLI_CCTL],
+						llis_va[PL080S_LLI_CCTL2]);
+		else
+			bytes += get_bytes_in_cctl(llis_va[PL080_LLI_CCTL]);
+
+		/*
+		 * A LLI pointer going backward terminates the LLI list
+		 */
+		if (llis_va[PL080_LLI_LLI] <= clli)
+			break;
 	}
 
 	return bytes;
@@ -720,6 +790,7 @@
 		break;
 	}
 
+	tsize &= PL080_CONTROL_TRANSFER_SIZE_MASK;
 	retbits |= tsize << PL080_CONTROL_TRANSFER_SIZE_SHIFT;
 	return retbits;
 }
@@ -764,20 +835,26 @@
 /*
  * Fills in one LLI for a certain transfer descriptor and advance the counter
  */
-static void pl08x_fill_lli_for_desc(struct pl08x_lli_build_data *bd,
-	int num_llis, int len, u32 cctl)
+static void pl08x_fill_lli_for_desc(struct pl08x_driver_data *pl08x,
+				    struct pl08x_lli_build_data *bd,
+				    int num_llis, int len, u32 cctl, u32 cctl2)
 {
-	struct pl08x_lli *llis_va = bd->txd->llis_va;
+	u32 offset = num_llis * pl08x->lli_words;
+	u32 *llis_va = bd->txd->llis_va + offset;
 	dma_addr_t llis_bus = bd->txd->llis_bus;
 
 	BUG_ON(num_llis >= MAX_NUM_TSFR_LLIS);
 
-	llis_va[num_llis].cctl = cctl;
-	llis_va[num_llis].src = bd->srcbus.addr;
-	llis_va[num_llis].dst = bd->dstbus.addr;
-	llis_va[num_llis].lli = llis_bus + (num_llis + 1) *
-		sizeof(struct pl08x_lli);
-	llis_va[num_llis].lli |= bd->lli_bus;
+	/* Advance the offset to next LLI. */
+	offset += pl08x->lli_words;
+
+	llis_va[PL080_LLI_SRC] = bd->srcbus.addr;
+	llis_va[PL080_LLI_DST] = bd->dstbus.addr;
+	llis_va[PL080_LLI_LLI] = (llis_bus + sizeof(u32) * offset);
+	llis_va[PL080_LLI_LLI] |= bd->lli_bus;
+	llis_va[PL080_LLI_CCTL] = cctl;
+	if (pl08x->vd->pl080s)
+		llis_va[PL080S_LLI_CCTL2] = cctl2;
 
 	if (cctl & PL080_CONTROL_SRC_INCR)
 		bd->srcbus.addr += len;
@@ -789,14 +866,53 @@
 	bd->remainder -= len;
 }
 
-static inline void prep_byte_width_lli(struct pl08x_lli_build_data *bd,
-		u32 *cctl, u32 len, int num_llis, size_t *total_bytes)
+static inline void prep_byte_width_lli(struct pl08x_driver_data *pl08x,
+			struct pl08x_lli_build_data *bd, u32 *cctl, u32 len,
+			int num_llis, size_t *total_bytes)
 {
 	*cctl = pl08x_cctl_bits(*cctl, 1, 1, len);
-	pl08x_fill_lli_for_desc(bd, num_llis, len, *cctl);
+	pl08x_fill_lli_for_desc(pl08x, bd, num_llis, len, *cctl, len);
 	(*total_bytes) += len;
 }
 
+#ifdef VERBOSE_DEBUG
+static void pl08x_dump_lli(struct pl08x_driver_data *pl08x,
+			   const u32 *llis_va, int num_llis)
+{
+	int i;
+
+	if (pl08x->vd->pl080s) {
+		dev_vdbg(&pl08x->adev->dev,
+			"%-3s %-9s  %-10s %-10s %-10s %-10s %s\n",
+			"lli", "", "csrc", "cdst", "clli", "cctl", "cctl2");
+		for (i = 0; i < num_llis; i++) {
+			dev_vdbg(&pl08x->adev->dev,
+				"%3d @%p: 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x\n",
+				i, llis_va, llis_va[PL080_LLI_SRC],
+				llis_va[PL080_LLI_DST], llis_va[PL080_LLI_LLI],
+				llis_va[PL080_LLI_CCTL],
+				llis_va[PL080S_LLI_CCTL2]);
+			llis_va += pl08x->lli_words;
+		}
+	} else {
+		dev_vdbg(&pl08x->adev->dev,
+			"%-3s %-9s  %-10s %-10s %-10s %s\n",
+			"lli", "", "csrc", "cdst", "clli", "cctl");
+		for (i = 0; i < num_llis; i++) {
+			dev_vdbg(&pl08x->adev->dev,
+				"%3d @%p: 0x%08x 0x%08x 0x%08x 0x%08x\n",
+				i, llis_va, llis_va[PL080_LLI_SRC],
+				llis_va[PL080_LLI_DST], llis_va[PL080_LLI_LLI],
+				llis_va[PL080_LLI_CCTL]);
+			llis_va += pl08x->lli_words;
+		}
+	}
+}
+#else
+static inline void pl08x_dump_lli(struct pl08x_driver_data *pl08x,
+				  const u32 *llis_va, int num_llis) {}
+#endif
+
 /*
  * This fills in the table of LLIs for the transfer descriptor
  * Note that we assume we never have to change the burst sizes
@@ -810,7 +926,7 @@
 	int num_llis = 0;
 	u32 cctl, early_bytes = 0;
 	size_t max_bytes_per_lli, total_bytes;
-	struct pl08x_lli *llis_va;
+	u32 *llis_va, *last_lli;
 	struct pl08x_sg *dsg;
 
 	txd->llis_va = dma_pool_alloc(pl08x->pool, GFP_NOWAIT, &txd->llis_bus);
@@ -897,7 +1013,8 @@
 
 			cctl = pl08x_cctl_bits(cctl, bd.srcbus.buswidth,
 					bd.dstbus.buswidth, 0);
-			pl08x_fill_lli_for_desc(&bd, num_llis++, 0, cctl);
+			pl08x_fill_lli_for_desc(pl08x, &bd, num_llis++,
+					0, cctl, 0);
 			break;
 		}
 
@@ -919,8 +1036,8 @@
 			dev_vdbg(&pl08x->adev->dev,
 				"%s byte width LLIs (remain 0x%08x)\n",
 				__func__, bd.remainder);
-			prep_byte_width_lli(&bd, &cctl, early_bytes, num_llis++,
-				&total_bytes);
+			prep_byte_width_lli(pl08x, &bd, &cctl, early_bytes,
+				num_llis++, &total_bytes);
 		}
 
 		if (bd.remainder) {
@@ -941,7 +1058,7 @@
 			 * MIN(buswidths)
 			 */
 			max_bytes_per_lli = bd.srcbus.buswidth *
-				PL080_CONTROL_TRANSFER_SIZE_MASK;
+						pl08x->vd->max_transfer_size;
 			dev_vdbg(&pl08x->adev->dev,
 				"%s max bytes per lli = %zu\n",
 				__func__, max_bytes_per_lli);
@@ -976,8 +1093,8 @@
 
 				cctl = pl08x_cctl_bits(cctl, bd.srcbus.buswidth,
 					bd.dstbus.buswidth, tsize);
-				pl08x_fill_lli_for_desc(&bd, num_llis++,
-						lli_len, cctl);
+				pl08x_fill_lli_for_desc(pl08x, &bd, num_llis++,
+						lli_len, cctl, tsize);
 				total_bytes += lli_len;
 			}
 
@@ -988,8 +1105,8 @@
 				dev_vdbg(&pl08x->adev->dev,
 					"%s align with boundary, send odd bytes (remain %zu)\n",
 					__func__, bd.remainder);
-				prep_byte_width_lli(&bd, &cctl, bd.remainder,
-						num_llis++, &total_bytes);
+				prep_byte_width_lli(pl08x, &bd, &cctl,
+					bd.remainder, num_llis++, &total_bytes);
 			}
 		}
 
@@ -1003,33 +1120,25 @@
 		if (num_llis >= MAX_NUM_TSFR_LLIS) {
 			dev_err(&pl08x->adev->dev,
 				"%s need to increase MAX_NUM_TSFR_LLIS from 0x%08x\n",
-				__func__, (u32) MAX_NUM_TSFR_LLIS);
+				__func__, MAX_NUM_TSFR_LLIS);
 			return 0;
 		}
 	}
 
 	llis_va = txd->llis_va;
-	/* The final LLI terminates the LLI. */
-	llis_va[num_llis - 1].lli = 0;
-	/* The final LLI element shall also fire an interrupt. */
-	llis_va[num_llis - 1].cctl |= PL080_CONTROL_TC_IRQ_EN;
+	last_lli = llis_va + (num_llis - 1) * pl08x->lli_words;
 
-#ifdef VERBOSE_DEBUG
-	{
-		int i;
-
-		dev_vdbg(&pl08x->adev->dev,
-			 "%-3s %-9s  %-10s %-10s %-10s %s\n",
-			 "lli", "", "csrc", "cdst", "clli", "cctl");
-		for (i = 0; i < num_llis; i++) {
-			dev_vdbg(&pl08x->adev->dev,
-				 "%3d @%p: 0x%08x 0x%08x 0x%08x 0x%08x\n",
-				 i, &llis_va[i], llis_va[i].src,
-				 llis_va[i].dst, llis_va[i].lli, llis_va[i].cctl
-				);
-		}
+	if (txd->cyclic) {
+		/* Link back to the first LLI. */
+		last_lli[PL080_LLI_LLI] = txd->llis_bus | bd.lli_bus;
+	} else {
+		/* The final LLI terminates the LLI. */
+		last_lli[PL080_LLI_LLI] = 0;
+		/* The final LLI element shall also fire an interrupt. */
+		last_lli[PL080_LLI_CCTL] |= PL080_CONTROL_TC_IRQ_EN;
 	}
-#endif
+
+	pl08x_dump_lli(pl08x, llis_va, num_llis);
 
 	return num_llis;
 }
@@ -1305,6 +1414,7 @@
 				  struct dma_slave_config *config)
 {
 	struct pl08x_dma_chan *plchan = to_pl08x_chan(chan);
+	struct pl08x_driver_data *pl08x = plchan->host;
 
 	if (!plchan->slave)
 		return -EINVAL;
@@ -1314,6 +1424,13 @@
 	    config->dst_addr_width == DMA_SLAVE_BUSWIDTH_8_BYTES)
 		return -EINVAL;
 
+	if (config->device_fc && pl08x->vd->pl080s) {
+		dev_err(&pl08x->adev->dev,
+			"%s: PL080S does not support peripheral flow control\n",
+			__func__);
+		return -EINVAL;
+	}
+
 	plchan->cfg = *config;
 
 	return 0;
@@ -1404,25 +1521,19 @@
 	return vchan_tx_prep(&plchan->vc, &txd->vd, flags);
 }
 
-static struct dma_async_tx_descriptor *pl08x_prep_slave_sg(
-		struct dma_chan *chan, struct scatterlist *sgl,
-		unsigned int sg_len, enum dma_transfer_direction direction,
-		unsigned long flags, void *context)
+static struct pl08x_txd *pl08x_init_txd(
+		struct dma_chan *chan,
+		enum dma_transfer_direction direction,
+		dma_addr_t *slave_addr)
 {
 	struct pl08x_dma_chan *plchan = to_pl08x_chan(chan);
 	struct pl08x_driver_data *pl08x = plchan->host;
 	struct pl08x_txd *txd;
-	struct pl08x_sg *dsg;
-	struct scatterlist *sg;
 	enum dma_slave_buswidth addr_width;
-	dma_addr_t slave_addr;
 	int ret, tmp;
 	u8 src_buses, dst_buses;
 	u32 maxburst, cctl;
 
-	dev_dbg(&pl08x->adev->dev, "%s prepare transaction of %d bytes from %s\n",
-			__func__, sg_dma_len(sgl), plchan->name);
-
 	txd = pl08x_get_txd(plchan);
 	if (!txd) {
 		dev_err(&pl08x->adev->dev, "%s no txd\n", __func__);
@@ -1436,14 +1547,14 @@
 	 */
 	if (direction == DMA_MEM_TO_DEV) {
 		cctl = PL080_CONTROL_SRC_INCR;
-		slave_addr = plchan->cfg.dst_addr;
+		*slave_addr = plchan->cfg.dst_addr;
 		addr_width = plchan->cfg.dst_addr_width;
 		maxburst = plchan->cfg.dst_maxburst;
 		src_buses = pl08x->mem_buses;
 		dst_buses = plchan->cd->periph_buses;
 	} else if (direction == DMA_DEV_TO_MEM) {
 		cctl = PL080_CONTROL_DST_INCR;
-		slave_addr = plchan->cfg.src_addr;
+		*slave_addr = plchan->cfg.src_addr;
 		addr_width = plchan->cfg.src_addr_width;
 		maxburst = plchan->cfg.src_maxburst;
 		src_buses = plchan->cd->periph_buses;
@@ -1492,24 +1603,107 @@
 	else
 		txd->ccfg |= plchan->signal << PL080_CONFIG_SRC_SEL_SHIFT;
 
+	return txd;
+}
+
+static int pl08x_tx_add_sg(struct pl08x_txd *txd,
+			   enum dma_transfer_direction direction,
+			   dma_addr_t slave_addr,
+			   dma_addr_t buf_addr,
+			   unsigned int len)
+{
+	struct pl08x_sg *dsg;
+
+	dsg = kzalloc(sizeof(struct pl08x_sg), GFP_NOWAIT);
+	if (!dsg)
+		return -ENOMEM;
+
+	list_add_tail(&dsg->node, &txd->dsg_list);
+
+	dsg->len = len;
+	if (direction == DMA_MEM_TO_DEV) {
+		dsg->src_addr = buf_addr;
+		dsg->dst_addr = slave_addr;
+	} else {
+		dsg->src_addr = slave_addr;
+		dsg->dst_addr = buf_addr;
+	}
+
+	return 0;
+}
+
+static struct dma_async_tx_descriptor *pl08x_prep_slave_sg(
+		struct dma_chan *chan, struct scatterlist *sgl,
+		unsigned int sg_len, enum dma_transfer_direction direction,
+		unsigned long flags, void *context)
+{
+	struct pl08x_dma_chan *plchan = to_pl08x_chan(chan);
+	struct pl08x_driver_data *pl08x = plchan->host;
+	struct pl08x_txd *txd;
+	struct scatterlist *sg;
+	int ret, tmp;
+	dma_addr_t slave_addr;
+
+	dev_dbg(&pl08x->adev->dev, "%s prepare transaction of %d bytes from %s\n",
+			__func__, sg_dma_len(sgl), plchan->name);
+
+	txd = pl08x_init_txd(chan, direction, &slave_addr);
+	if (!txd)
+		return NULL;
+
 	for_each_sg(sgl, sg, sg_len, tmp) {
-		dsg = kzalloc(sizeof(struct pl08x_sg), GFP_NOWAIT);
-		if (!dsg) {
+		ret = pl08x_tx_add_sg(txd, direction, slave_addr,
+				      sg_dma_address(sg),
+				      sg_dma_len(sg));
+		if (ret) {
 			pl08x_release_mux(plchan);
 			pl08x_free_txd(pl08x, txd);
 			dev_err(&pl08x->adev->dev, "%s no mem for pl080 sg\n",
 					__func__);
 			return NULL;
 		}
-		list_add_tail(&dsg->node, &txd->dsg_list);
+	}
 
-		dsg->len = sg_dma_len(sg);
-		if (direction == DMA_MEM_TO_DEV) {
-			dsg->src_addr = sg_dma_address(sg);
-			dsg->dst_addr = slave_addr;
-		} else {
-			dsg->src_addr = slave_addr;
-			dsg->dst_addr = sg_dma_address(sg);
+	ret = pl08x_fill_llis_for_desc(plchan->host, txd);
+	if (!ret) {
+		pl08x_release_mux(plchan);
+		pl08x_free_txd(pl08x, txd);
+		return NULL;
+	}
+
+	return vchan_tx_prep(&plchan->vc, &txd->vd, flags);
+}
+
+static struct dma_async_tx_descriptor *pl08x_prep_dma_cyclic(
+		struct dma_chan *chan, dma_addr_t buf_addr, size_t buf_len,
+		size_t period_len, enum dma_transfer_direction direction,
+		unsigned long flags, void *context)
+{
+	struct pl08x_dma_chan *plchan = to_pl08x_chan(chan);
+	struct pl08x_driver_data *pl08x = plchan->host;
+	struct pl08x_txd *txd;
+	int ret, tmp;
+	dma_addr_t slave_addr;
+
+	dev_dbg(&pl08x->adev->dev,
+		"%s prepare cyclic transaction of %d/%d bytes %s %s\n",
+		__func__, period_len, buf_len,
+		direction == DMA_MEM_TO_DEV ? "to" : "from",
+		plchan->name);
+
+	txd = pl08x_init_txd(chan, direction, &slave_addr);
+	if (!txd)
+		return NULL;
+
+	txd->cyclic = true;
+	txd->cctl |= PL080_CONTROL_TC_IRQ_EN;
+	for (tmp = 0; tmp < buf_len; tmp += period_len) {
+		ret = pl08x_tx_add_sg(txd, direction, slave_addr,
+				      buf_addr + tmp, period_len);
+		if (ret) {
+			pl08x_release_mux(plchan);
+			pl08x_free_txd(pl08x, txd);
+			return NULL;
 		}
 	}
 
@@ -1652,7 +1846,9 @@
 
 			spin_lock(&plchan->vc.lock);
 			tx = plchan->at;
-			if (tx) {
+			if (tx && tx->cyclic) {
+				vchan_cyclic_callback(&tx->vd);
+			} else if (tx) {
 				plchan->at = NULL;
 				/*
 				 * This descriptor is done, release its mux
@@ -1846,6 +2042,7 @@
 {
 	struct pl08x_driver_data *pl08x;
 	const struct vendor_data *vd = id->data;
+	u32 tsfr_size;
 	int ret = 0;
 	int i;
 
@@ -1873,6 +2070,7 @@
 
 	/* Initialize slave engine */
 	dma_cap_set(DMA_SLAVE, pl08x->slave.cap_mask);
+	dma_cap_set(DMA_CYCLIC, pl08x->slave.cap_mask);
 	pl08x->slave.dev = &adev->dev;
 	pl08x->slave.device_alloc_chan_resources = pl08x_alloc_chan_resources;
 	pl08x->slave.device_free_chan_resources = pl08x_free_chan_resources;
@@ -1880,6 +2078,7 @@
 	pl08x->slave.device_tx_status = pl08x_dma_tx_status;
 	pl08x->slave.device_issue_pending = pl08x_issue_pending;
 	pl08x->slave.device_prep_slave_sg = pl08x_prep_slave_sg;
+	pl08x->slave.device_prep_dma_cyclic = pl08x_prep_dma_cyclic;
 	pl08x->slave.device_control = pl08x_control;
 
 	/* Get the platform data */
@@ -1902,9 +2101,15 @@
 		pl08x->mem_buses = pl08x->pd->mem_buses;
 	}
 
+	if (vd->pl080s)
+		pl08x->lli_words = PL080S_LLI_WORDS;
+	else
+		pl08x->lli_words = PL080_LLI_WORDS;
+	tsfr_size = MAX_NUM_TSFR_LLIS * pl08x->lli_words * sizeof(u32);
+
 	/* A DMA memory pool for LLIs, align on 1-byte boundary */
 	pl08x->pool = dma_pool_create(DRIVER_NAME, &pl08x->adev->dev,
-			PL08X_LLI_TSFR_SIZE, PL08X_ALIGN, 0);
+						tsfr_size, PL08X_ALIGN, 0);
 	if (!pl08x->pool) {
 		ret = -ENOMEM;
 		goto out_no_lli_pool;
@@ -1947,6 +2152,7 @@
 
 		ch->id = i;
 		ch->base = pl08x->base + PL080_Cx_BASE(i);
+		ch->reg_config = ch->base + vd->config_offset;
 		spin_lock_init(&ch->lock);
 
 		/*
@@ -1957,7 +2163,7 @@
 		if (vd->nomadik) {
 			u32 val;
 
-			val = readl(ch->base + PL080_CH_CONFIG);
+			val = readl(ch->reg_config);
 			if (val & (PL080N_CONFIG_ITPROT | PL080N_CONFIG_SECPROT)) {
 				dev_info(&adev->dev, "physical channel %d reserved for secure access only\n", i);
 				ch->locked = true;
@@ -2008,8 +2214,8 @@
 
 	amba_set_drvdata(adev, pl08x);
 	init_pl08x_debugfs(pl08x);
-	dev_info(&pl08x->adev->dev, "DMA: PL%03x rev%u at 0x%08llx irq %d\n",
-		 amba_part(adev), amba_rev(adev),
+	dev_info(&pl08x->adev->dev, "DMA: PL%03x%s rev%u at 0x%08llx irq %d\n",
+		 amba_part(adev), pl08x->vd->pl080s ? "s" : "", amba_rev(adev),
 		 (unsigned long long)adev->res.start, adev->irq[0]);
 
 	return 0;
@@ -2038,22 +2244,41 @@
 
 /* PL080 has 8 channels and the PL080 have just 2 */
 static struct vendor_data vendor_pl080 = {
+	.config_offset = PL080_CH_CONFIG,
 	.channels = 8,
 	.dualmaster = true,
+	.max_transfer_size = PL080_CONTROL_TRANSFER_SIZE_MASK,
 };
 
 static struct vendor_data vendor_nomadik = {
+	.config_offset = PL080_CH_CONFIG,
 	.channels = 8,
 	.dualmaster = true,
 	.nomadik = true,
+	.max_transfer_size = PL080_CONTROL_TRANSFER_SIZE_MASK,
+};
+
+static struct vendor_data vendor_pl080s = {
+	.config_offset = PL080S_CH_CONFIG,
+	.channels = 8,
+	.pl080s = true,
+	.max_transfer_size = PL080S_CONTROL_TRANSFER_SIZE_MASK,
 };
 
 static struct vendor_data vendor_pl081 = {
+	.config_offset = PL080_CH_CONFIG,
 	.channels = 2,
 	.dualmaster = false,
+	.max_transfer_size = PL080_CONTROL_TRANSFER_SIZE_MASK,
 };
 
 static struct amba_id pl08x_ids[] = {
+	/* Samsung PL080S variant */
+	{
+		.id	= 0x0a141080,
+		.mask	= 0xffffffff,
+		.data	= &vendor_pl080s,
+	},
 	/* PL080 */
 	{
 		.id	= 0x00041080,
diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c
index 755ba2f..d7d94d2 100644
--- a/drivers/dma/dmaengine.c
+++ b/drivers/dma/dmaengine.c
@@ -504,7 +504,7 @@
 }
 
 /**
- * dma_request_channel - try to get specific channel exclusively
+ * dma_request_slave_channel - try to get specific channel exclusively
  * @chan: target channel
  */
 struct dma_chan *dma_get_slave_channel(struct dma_chan *chan)
@@ -530,7 +530,7 @@
 EXPORT_SYMBOL_GPL(dma_get_slave_channel);
 
 /**
- * dma_request_channel - try to allocate an exclusive channel
+ * __dma_request_channel - try to allocate an exclusive channel
  * @mask: capabilities that the channel must satisfy
  * @fn: optional callback to disposition available channels
  * @fn_param: opaque parameter to pass to dma_filter_fn
diff --git a/drivers/dma/dw/core.c b/drivers/dma/dw/core.c
index eea479c..89eb89f 100644
--- a/drivers/dma/dw/core.c
+++ b/drivers/dma/dw/core.c
@@ -37,16 +37,22 @@
  * which does not support descriptor writeback.
  */
 
+static inline bool is_request_line_unset(struct dw_dma_chan *dwc)
+{
+	return dwc->request_line == (typeof(dwc->request_line))~0;
+}
+
 static inline void dwc_set_masters(struct dw_dma_chan *dwc)
 {
 	struct dw_dma *dw = to_dw_dma(dwc->chan.device);
 	struct dw_dma_slave *dws = dwc->chan.private;
 	unsigned char mmax = dw->nr_masters - 1;
 
-	if (dwc->request_line == ~0) {
-		dwc->src_master = min_t(unsigned char, mmax, dwc_get_sms(dws));
-		dwc->dst_master = min_t(unsigned char, mmax, dwc_get_dms(dws));
-	}
+	if (!is_request_line_unset(dwc))
+		return;
+
+	dwc->src_master = min_t(unsigned char, mmax, dwc_get_sms(dws));
+	dwc->dst_master = min_t(unsigned char, mmax, dwc_get_dms(dws));
 }
 
 #define DWC_DEFAULT_CTLLO(_chan) ({				\
@@ -644,10 +650,13 @@
 static irqreturn_t dw_dma_interrupt(int irq, void *dev_id)
 {
 	struct dw_dma *dw = dev_id;
-	u32 status;
+	u32 status = dma_readl(dw, STATUS_INT);
 
-	dev_vdbg(dw->dma.dev, "%s: status=0x%x\n", __func__,
-			dma_readl(dw, STATUS_INT));
+	dev_vdbg(dw->dma.dev, "%s: status=0x%x\n", __func__, status);
+
+	/* Check if we have any interrupt from the DMAC */
+	if (!status)
+		return IRQ_NONE;
 
 	/*
 	 * Just disable the interrupts. We'll turn them back on in the
@@ -984,7 +993,7 @@
 	dwc->direction = sconfig->direction;
 
 	/* Take the request line from slave_id member */
-	if (dwc->request_line == ~0)
+	if (is_request_line_unset(dwc))
 		dwc->request_line = sconfig->slave_id;
 
 	convert_burst(&dwc->dma_sconfig.src_maxburst);
@@ -1089,16 +1098,16 @@
 	enum dma_status		ret;
 
 	ret = dma_cookie_status(chan, cookie, txstate);
-	if (ret != DMA_SUCCESS) {
-		dwc_scan_descriptors(to_dw_dma(chan->device), dwc);
+	if (ret == DMA_SUCCESS)
+		return ret;
 
-		ret = dma_cookie_status(chan, cookie, txstate);
-	}
+	dwc_scan_descriptors(to_dw_dma(chan->device), dwc);
 
+	ret = dma_cookie_status(chan, cookie, txstate);
 	if (ret != DMA_SUCCESS)
 		dma_set_residue(txstate, dwc_get_residue(dwc));
 
-	if (dwc->paused)
+	if (dwc->paused && ret == DMA_IN_PROGRESS)
 		return DMA_PAUSED;
 
 	return ret;
@@ -1560,8 +1569,8 @@
 	/* Disable BLOCK interrupts as well */
 	channel_clear_bit(dw, MASK.BLOCK, dw->all_chan_mask);
 
-	err = devm_request_irq(chip->dev, chip->irq, dw_dma_interrupt, 0,
-			       "dw_dmac", dw);
+	err = devm_request_irq(chip->dev, chip->irq, dw_dma_interrupt,
+			       IRQF_SHARED, "dw_dmac", dw);
 	if (err)
 		return err;
 
diff --git a/drivers/dma/dw/platform.c b/drivers/dma/dw/platform.c
index 6c9449c..e35d975 100644
--- a/drivers/dma/dw/platform.c
+++ b/drivers/dma/dw/platform.c
@@ -253,6 +253,7 @@
 	{ "INTL9C60", 0 },
 	{ }
 };
+MODULE_DEVICE_TABLE(acpi, dw_dma_acpi_id_table);
 #endif
 
 #ifdef CONFIG_PM_SLEEP
diff --git a/drivers/dma/edma.c b/drivers/dma/edma.c
index 5f3e532..4f6d87b 100644
--- a/drivers/dma/edma.c
+++ b/drivers/dma/edma.c
@@ -502,8 +502,6 @@
 	} else if (echan->edesc && echan->edesc->vdesc.tx.cookie == cookie) {
 		struct edma_desc *edesc = echan->edesc;
 		txstate->residue = edma_desc_size(edesc);
-	} else {
-		txstate->residue = 0;
 	}
 	spin_unlock_irqrestore(&echan->vchan.lock, flags);
 
diff --git a/drivers/dma/ep93xx_dma.c b/drivers/dma/ep93xx_dma.c
index f2bf8c0..591cd8c 100644
--- a/drivers/dma/ep93xx_dma.c
+++ b/drivers/dma/ep93xx_dma.c
@@ -1313,15 +1313,7 @@
 					    dma_cookie_t cookie,
 					    struct dma_tx_state *state)
 {
-	struct ep93xx_dma_chan *edmac = to_ep93xx_dma_chan(chan);
-	enum dma_status ret;
-	unsigned long flags;
-
-	spin_lock_irqsave(&edmac->lock, flags);
-	ret = dma_cookie_status(chan, cookie, state);
-	spin_unlock_irqrestore(&edmac->lock, flags);
-
-	return ret;
+	return dma_cookie_status(chan, cookie, state);
 }
 
 /**
diff --git a/drivers/dma/fsldma.c b/drivers/dma/fsldma.c
index 49e8fbd..b3f3e90 100644
--- a/drivers/dma/fsldma.c
+++ b/drivers/dma/fsldma.c
@@ -979,15 +979,7 @@
 					dma_cookie_t cookie,
 					struct dma_tx_state *txstate)
 {
-	struct fsldma_chan *chan = to_fsl_chan(dchan);
-	enum dma_status ret;
-	unsigned long flags;
-
-	spin_lock_irqsave(&chan->desc_lock, flags);
-	ret = dma_cookie_status(dchan, cookie, txstate);
-	spin_unlock_irqrestore(&chan->desc_lock, flags);
-
-	return ret;
+	return dma_cookie_status(dchan, cookie, txstate);
 }
 
 /*----------------------------------------------------------------------------*/
diff --git a/drivers/dma/imx-dma.c b/drivers/dma/imx-dma.c
index ff2aab9..78f8ca5 100644
--- a/drivers/dma/imx-dma.c
+++ b/drivers/dma/imx-dma.c
@@ -805,10 +805,8 @@
 	}
 	INIT_LIST_HEAD(&imxdmac->ld_free);
 
-	if (imxdmac->sg_list) {
-		kfree(imxdmac->sg_list);
-		imxdmac->sg_list = NULL;
-	}
+	kfree(imxdmac->sg_list);
+	imxdmac->sg_list = NULL;
 }
 
 static struct dma_async_tx_descriptor *imxdma_prep_slave_sg(
diff --git a/drivers/dma/imx-sdma.c b/drivers/dma/imx-sdma.c
index 1e44b8c..fc43603 100644
--- a/drivers/dma/imx-sdma.c
+++ b/drivers/dma/imx-sdma.c
@@ -243,7 +243,6 @@
  * @event_id1		for channels that use 2 events
  * @word_size		peripheral access size
  * @buf_tail		ID of the buffer that was processed
- * @done		channel completion
  * @num_bd		max NUM_BD. number of descriptors currently handling
  */
 struct sdma_channel {
@@ -255,7 +254,6 @@
 	unsigned int			event_id1;
 	enum dma_slave_buswidth		word_size;
 	unsigned int			buf_tail;
-	struct completion		done;
 	unsigned int			num_bd;
 	struct sdma_buffer_descriptor	*bd;
 	dma_addr_t			bd_phys;
@@ -307,9 +305,10 @@
 	u32	ram_code_size;
 };
 
-enum sdma_devtype {
-	IMX31_SDMA,	/* runs on i.mx31 */
-	IMX35_SDMA,	/* runs on i.mx35 and later */
+struct sdma_driver_data {
+	int chnenbl0;
+	int num_events;
+	struct sdma_script_start_addrs	*script_addrs;
 };
 
 struct sdma_engine {
@@ -318,8 +317,6 @@
 	struct sdma_channel		channel[MAX_DMA_CHANNELS];
 	struct sdma_channel_control	*channel_control;
 	void __iomem			*regs;
-	enum sdma_devtype		devtype;
-	unsigned int			num_events;
 	struct sdma_context_data	*context;
 	dma_addr_t			context_phys;
 	struct dma_device		dma_device;
@@ -327,15 +324,118 @@
 	struct clk			*clk_ahb;
 	spinlock_t			channel_0_lock;
 	struct sdma_script_start_addrs	*script_addrs;
+	const struct sdma_driver_data	*drvdata;
+};
+
+static struct sdma_driver_data sdma_imx31 = {
+	.chnenbl0 = SDMA_CHNENBL0_IMX31,
+	.num_events = 32,
+};
+
+static struct sdma_script_start_addrs sdma_script_imx25 = {
+	.ap_2_ap_addr = 729,
+	.uart_2_mcu_addr = 904,
+	.per_2_app_addr = 1255,
+	.mcu_2_app_addr = 834,
+	.uartsh_2_mcu_addr = 1120,
+	.per_2_shp_addr = 1329,
+	.mcu_2_shp_addr = 1048,
+	.ata_2_mcu_addr = 1560,
+	.mcu_2_ata_addr = 1479,
+	.app_2_per_addr = 1189,
+	.app_2_mcu_addr = 770,
+	.shp_2_per_addr = 1407,
+	.shp_2_mcu_addr = 979,
+};
+
+static struct sdma_driver_data sdma_imx25 = {
+	.chnenbl0 = SDMA_CHNENBL0_IMX35,
+	.num_events = 48,
+	.script_addrs = &sdma_script_imx25,
+};
+
+static struct sdma_driver_data sdma_imx35 = {
+	.chnenbl0 = SDMA_CHNENBL0_IMX35,
+	.num_events = 48,
+};
+
+static struct sdma_script_start_addrs sdma_script_imx51 = {
+	.ap_2_ap_addr = 642,
+	.uart_2_mcu_addr = 817,
+	.mcu_2_app_addr = 747,
+	.mcu_2_shp_addr = 961,
+	.ata_2_mcu_addr = 1473,
+	.mcu_2_ata_addr = 1392,
+	.app_2_per_addr = 1033,
+	.app_2_mcu_addr = 683,
+	.shp_2_per_addr = 1251,
+	.shp_2_mcu_addr = 892,
+};
+
+static struct sdma_driver_data sdma_imx51 = {
+	.chnenbl0 = SDMA_CHNENBL0_IMX35,
+	.num_events = 48,
+	.script_addrs = &sdma_script_imx51,
+};
+
+static struct sdma_script_start_addrs sdma_script_imx53 = {
+	.ap_2_ap_addr = 642,
+	.app_2_mcu_addr = 683,
+	.mcu_2_app_addr = 747,
+	.uart_2_mcu_addr = 817,
+	.shp_2_mcu_addr = 891,
+	.mcu_2_shp_addr = 960,
+	.uartsh_2_mcu_addr = 1032,
+	.spdif_2_mcu_addr = 1100,
+	.mcu_2_spdif_addr = 1134,
+	.firi_2_mcu_addr = 1193,
+	.mcu_2_firi_addr = 1290,
+};
+
+static struct sdma_driver_data sdma_imx53 = {
+	.chnenbl0 = SDMA_CHNENBL0_IMX35,
+	.num_events = 48,
+	.script_addrs = &sdma_script_imx53,
+};
+
+static struct sdma_script_start_addrs sdma_script_imx6q = {
+	.ap_2_ap_addr = 642,
+	.uart_2_mcu_addr = 817,
+	.mcu_2_app_addr = 747,
+	.per_2_per_addr = 6331,
+	.uartsh_2_mcu_addr = 1032,
+	.mcu_2_shp_addr = 960,
+	.app_2_mcu_addr = 683,
+	.shp_2_mcu_addr = 891,
+	.spdif_2_mcu_addr = 1100,
+	.mcu_2_spdif_addr = 1134,
+};
+
+static struct sdma_driver_data sdma_imx6q = {
+	.chnenbl0 = SDMA_CHNENBL0_IMX35,
+	.num_events = 48,
+	.script_addrs = &sdma_script_imx6q,
 };
 
 static struct platform_device_id sdma_devtypes[] = {
 	{
+		.name = "imx25-sdma",
+		.driver_data = (unsigned long)&sdma_imx25,
+	}, {
 		.name = "imx31-sdma",
-		.driver_data = IMX31_SDMA,
+		.driver_data = (unsigned long)&sdma_imx31,
 	}, {
 		.name = "imx35-sdma",
-		.driver_data = IMX35_SDMA,
+		.driver_data = (unsigned long)&sdma_imx35,
+	}, {
+		.name = "imx51-sdma",
+		.driver_data = (unsigned long)&sdma_imx51,
+	}, {
+		.name = "imx53-sdma",
+		.driver_data = (unsigned long)&sdma_imx53,
+	}, {
+		.name = "imx6q-sdma",
+		.driver_data = (unsigned long)&sdma_imx6q,
 	}, {
 		/* sentinel */
 	}
@@ -343,8 +443,11 @@
 MODULE_DEVICE_TABLE(platform, sdma_devtypes);
 
 static const struct of_device_id sdma_dt_ids[] = {
-	{ .compatible = "fsl,imx31-sdma", .data = &sdma_devtypes[IMX31_SDMA], },
-	{ .compatible = "fsl,imx35-sdma", .data = &sdma_devtypes[IMX35_SDMA], },
+	{ .compatible = "fsl,imx6q-sdma", .data = &sdma_imx6q, },
+	{ .compatible = "fsl,imx53-sdma", .data = &sdma_imx53, },
+	{ .compatible = "fsl,imx51-sdma", .data = &sdma_imx51, },
+	{ .compatible = "fsl,imx35-sdma", .data = &sdma_imx35, },
+	{ .compatible = "fsl,imx31-sdma", .data = &sdma_imx31, },
 	{ /* sentinel */ }
 };
 MODULE_DEVICE_TABLE(of, sdma_dt_ids);
@@ -356,8 +459,7 @@
 
 static inline u32 chnenbl_ofs(struct sdma_engine *sdma, unsigned int event)
 {
-	u32 chnenbl0 = (sdma->devtype == IMX31_SDMA ? SDMA_CHNENBL0_IMX31 :
-						      SDMA_CHNENBL0_IMX35);
+	u32 chnenbl0 = sdma->drvdata->chnenbl0;
 	return chnenbl0 + event * 4;
 }
 
@@ -547,8 +649,6 @@
 {
 	struct sdma_channel *sdmac = (struct sdma_channel *) data;
 
-	complete(&sdmac->done);
-
 	if (sdmac->flags & IMX_DMA_SG_LOOP)
 		sdma_handle_channel_loop(sdmac);
 	else
@@ -733,7 +833,7 @@
 	sdmac->per_addr = 0;
 
 	if (sdmac->event_id0) {
-		if (sdmac->event_id0 >= sdmac->sdma->num_events)
+		if (sdmac->event_id0 >= sdmac->sdma->drvdata->num_events)
 			return -EINVAL;
 		sdma_event_enable(sdmac, sdmac->event_id0);
 	}
@@ -812,9 +912,6 @@
 	sdma->channel_control[channel].current_bd_ptr = sdmac->bd_phys;
 
 	sdma_set_channel_priority(sdmac, MXC_SDMA_DEFAULT_PRIORITY);
-
-	init_completion(&sdmac->done);
-
 	return 0;
 out:
 
@@ -1120,15 +1217,12 @@
 }
 
 static enum dma_status sdma_tx_status(struct dma_chan *chan,
-					    dma_cookie_t cookie,
-					    struct dma_tx_state *txstate)
+				      dma_cookie_t cookie,
+				      struct dma_tx_state *txstate)
 {
 	struct sdma_channel *sdmac = to_sdma_chan(chan);
-	dma_cookie_t last_used;
 
-	last_used = chan->cookie;
-
-	dma_set_tx_state(txstate, chan->completed_cookie, last_used,
+	dma_set_tx_state(txstate, chan->completed_cookie, chan->cookie,
 			sdmac->chn_count - sdmac->chn_real_count);
 
 	return sdmac->status;
@@ -1218,19 +1312,6 @@
 	int i, ret;
 	dma_addr_t ccb_phys;
 
-	switch (sdma->devtype) {
-	case IMX31_SDMA:
-		sdma->num_events = 32;
-		break;
-	case IMX35_SDMA:
-		sdma->num_events = 48;
-		break;
-	default:
-		dev_err(sdma->dev, "Unknown sdma type %d. aborting\n",
-			sdma->devtype);
-		return -ENODEV;
-	}
-
 	clk_enable(sdma->clk_ipg);
 	clk_enable(sdma->clk_ahb);
 
@@ -1257,7 +1338,7 @@
 			MAX_DMA_CHANNELS * sizeof (struct sdma_channel_control));
 
 	/* disable all channels */
-	for (i = 0; i < sdma->num_events; i++)
+	for (i = 0; i < sdma->drvdata->num_events; i++)
 		writel_relaxed(0, sdma->regs + chnenbl_ofs(sdma, i));
 
 	/* All channels have priority 0 */
@@ -1335,10 +1416,21 @@
 	int ret;
 	int irq;
 	struct resource *iores;
-	struct sdma_platform_data *pdata = pdev->dev.platform_data;
+	struct sdma_platform_data *pdata = dev_get_platdata(&pdev->dev);
 	int i;
 	struct sdma_engine *sdma;
 	s32 *saddr_arr;
+	const struct sdma_driver_data *drvdata = NULL;
+
+	if (of_id)
+		drvdata = of_id->data;
+	else if (pdev->id_entry)
+		drvdata = (void *)pdev->id_entry->driver_data;
+
+	if (!drvdata) {
+		dev_err(&pdev->dev, "unable to find driver data\n");
+		return -EINVAL;
+	}
 
 	sdma = kzalloc(sizeof(*sdma), GFP_KERNEL);
 	if (!sdma)
@@ -1347,6 +1439,7 @@
 	spin_lock_init(&sdma->channel_0_lock);
 
 	sdma->dev = &pdev->dev;
+	sdma->drvdata = drvdata;
 
 	iores = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	irq = platform_get_irq(pdev, 0);
@@ -1396,10 +1489,6 @@
 	for (i = 0; i < SDMA_SCRIPT_ADDRS_ARRAY_SIZE_V1; i++)
 		saddr_arr[i] = -EINVAL;
 
-	if (of_id)
-		pdev->id_entry = of_id->data;
-	sdma->devtype = pdev->id_entry->driver_data;
-
 	dma_cap_set(DMA_SLAVE, sdma->dma_device.cap_mask);
 	dma_cap_set(DMA_CYCLIC, sdma->dma_device.cap_mask);
 
@@ -1431,6 +1520,8 @@
 	if (ret)
 		goto err_init;
 
+	if (sdma->drvdata->script_addrs)
+		sdma_add_scripts(sdma, sdma->drvdata->script_addrs);
 	if (pdata && pdata->script_addrs)
 		sdma_add_scripts(sdma, pdata->script_addrs);
 
diff --git a/drivers/dma/iop-adma.c b/drivers/dma/iop-adma.c
index cc727ec..dd8b44a 100644
--- a/drivers/dma/iop-adma.c
+++ b/drivers/dma/iop-adma.c
@@ -518,7 +518,7 @@
 	struct iop_adma_desc_slot *slot = NULL;
 	int init = iop_chan->slots_allocated ? 0 : 1;
 	struct iop_adma_platform_data *plat_data =
-		iop_chan->device->pdev->dev.platform_data;
+		dev_get_platdata(&iop_chan->device->pdev->dev);
 	int num_descs_in_pool = plat_data->pool_size/IOP_ADMA_SLOT_SIZE;
 
 	/* Allocate descriptor slots */
@@ -1351,7 +1351,7 @@
 	struct iop_adma_device *device = platform_get_drvdata(dev);
 	struct dma_chan *chan, *_chan;
 	struct iop_adma_chan *iop_chan;
-	struct iop_adma_platform_data *plat_data = dev->dev.platform_data;
+	struct iop_adma_platform_data *plat_data = dev_get_platdata(&dev->dev);
 
 	dma_async_device_unregister(&device->common);
 
@@ -1376,7 +1376,7 @@
 	struct iop_adma_device *adev;
 	struct iop_adma_chan *iop_chan;
 	struct dma_device *dma_dev;
-	struct iop_adma_platform_data *plat_data = pdev->dev.platform_data;
+	struct iop_adma_platform_data *plat_data = dev_get_platdata(&pdev->dev);
 
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	if (!res)
diff --git a/drivers/dma/ipu/ipu_idmac.c b/drivers/dma/ipu/ipu_idmac.c
index d39c2cd..cb9c0bc 100644
--- a/drivers/dma/ipu/ipu_idmac.c
+++ b/drivers/dma/ipu/ipu_idmac.c
@@ -1593,10 +1593,7 @@
 static enum dma_status idmac_tx_status(struct dma_chan *chan,
 		       dma_cookie_t cookie, struct dma_tx_state *txstate)
 {
-	dma_set_tx_state(txstate, chan->completed_cookie, chan->cookie, 0);
-	if (cookie != chan->cookie)
-		return DMA_ERROR;
-	return DMA_SUCCESS;
+	return dma_cookie_status(chan, cookie, txstate);
 }
 
 static int __init ipu_idmac_init(struct ipu *ipu)
@@ -1767,7 +1764,6 @@
 	iounmap(ipu->reg_ic);
 	iounmap(ipu->reg_ipu);
 	tasklet_kill(&ipu->tasklet);
-	platform_set_drvdata(pdev, NULL);
 
 	return 0;
 }
diff --git a/drivers/dma/k3dma.c b/drivers/dma/k3dma.c
new file mode 100644
index 0000000..a2c330f
--- /dev/null
+++ b/drivers/dma/k3dma.c
@@ -0,0 +1,837 @@
+/*
+ * Copyright (c) 2013 Linaro Ltd.
+ * Copyright (c) 2013 Hisilicon Limited.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/sched.h>
+#include <linux/device.h>
+#include <linux/dmaengine.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/of_device.h>
+#include <linux/of.h>
+#include <linux/clk.h>
+#include <linux/of_dma.h>
+
+#include "virt-dma.h"
+
+#define DRIVER_NAME		"k3-dma"
+#define DMA_ALIGN		3
+#define DMA_MAX_SIZE		0x1ffc
+
+#define INT_STAT		0x00
+#define INT_TC1			0x04
+#define INT_ERR1		0x0c
+#define INT_ERR2		0x10
+#define INT_TC1_MASK		0x18
+#define INT_ERR1_MASK		0x20
+#define INT_ERR2_MASK		0x24
+#define INT_TC1_RAW		0x600
+#define INT_ERR1_RAW		0x608
+#define INT_ERR2_RAW		0x610
+#define CH_PRI			0x688
+#define CH_STAT			0x690
+#define CX_CUR_CNT		0x704
+#define CX_LLI			0x800
+#define CX_CNT			0x810
+#define CX_SRC			0x814
+#define CX_DST			0x818
+#define CX_CFG			0x81c
+#define AXI_CFG			0x820
+#define AXI_CFG_DEFAULT		0x201201
+
+#define CX_LLI_CHAIN_EN		0x2
+#define CX_CFG_EN		0x1
+#define CX_CFG_MEM2PER		(0x1 << 2)
+#define CX_CFG_PER2MEM		(0x2 << 2)
+#define CX_CFG_SRCINCR		(0x1 << 31)
+#define CX_CFG_DSTINCR		(0x1 << 30)
+
+struct k3_desc_hw {
+	u32 lli;
+	u32 reserved[3];
+	u32 count;
+	u32 saddr;
+	u32 daddr;
+	u32 config;
+} __aligned(32);
+
+struct k3_dma_desc_sw {
+	struct virt_dma_desc	vd;
+	dma_addr_t		desc_hw_lli;
+	size_t			desc_num;
+	size_t			size;
+	struct k3_desc_hw	desc_hw[0];
+};
+
+struct k3_dma_phy;
+
+struct k3_dma_chan {
+	u32			ccfg;
+	struct virt_dma_chan	vc;
+	struct k3_dma_phy	*phy;
+	struct list_head	node;
+	enum dma_transfer_direction dir;
+	dma_addr_t		dev_addr;
+	enum dma_status		status;
+};
+
+struct k3_dma_phy {
+	u32			idx;
+	void __iomem		*base;
+	struct k3_dma_chan	*vchan;
+	struct k3_dma_desc_sw	*ds_run;
+	struct k3_dma_desc_sw	*ds_done;
+};
+
+struct k3_dma_dev {
+	struct dma_device	slave;
+	void __iomem		*base;
+	struct tasklet_struct	task;
+	spinlock_t		lock;
+	struct list_head	chan_pending;
+	struct k3_dma_phy	*phy;
+	struct k3_dma_chan	*chans;
+	struct clk		*clk;
+	u32			dma_channels;
+	u32			dma_requests;
+};
+
+#define to_k3_dma(dmadev) container_of(dmadev, struct k3_dma_dev, slave)
+
+static struct k3_dma_chan *to_k3_chan(struct dma_chan *chan)
+{
+	return container_of(chan, struct k3_dma_chan, vc.chan);
+}
+
+static void k3_dma_pause_dma(struct k3_dma_phy *phy, bool on)
+{
+	u32 val = 0;
+
+	if (on) {
+		val = readl_relaxed(phy->base + CX_CFG);
+		val |= CX_CFG_EN;
+		writel_relaxed(val, phy->base + CX_CFG);
+	} else {
+		val = readl_relaxed(phy->base + CX_CFG);
+		val &= ~CX_CFG_EN;
+		writel_relaxed(val, phy->base + CX_CFG);
+	}
+}
+
+static void k3_dma_terminate_chan(struct k3_dma_phy *phy, struct k3_dma_dev *d)
+{
+	u32 val = 0;
+
+	k3_dma_pause_dma(phy, false);
+
+	val = 0x1 << phy->idx;
+	writel_relaxed(val, d->base + INT_TC1_RAW);
+	writel_relaxed(val, d->base + INT_ERR1_RAW);
+	writel_relaxed(val, d->base + INT_ERR2_RAW);
+}
+
+static void k3_dma_set_desc(struct k3_dma_phy *phy, struct k3_desc_hw *hw)
+{
+	writel_relaxed(hw->lli, phy->base + CX_LLI);
+	writel_relaxed(hw->count, phy->base + CX_CNT);
+	writel_relaxed(hw->saddr, phy->base + CX_SRC);
+	writel_relaxed(hw->daddr, phy->base + CX_DST);
+	writel_relaxed(AXI_CFG_DEFAULT, phy->base + AXI_CFG);
+	writel_relaxed(hw->config, phy->base + CX_CFG);
+}
+
+static u32 k3_dma_get_curr_cnt(struct k3_dma_dev *d, struct k3_dma_phy *phy)
+{
+	u32 cnt = 0;
+
+	cnt = readl_relaxed(d->base + CX_CUR_CNT + phy->idx * 0x10);
+	cnt &= 0xffff;
+	return cnt;
+}
+
+static u32 k3_dma_get_curr_lli(struct k3_dma_phy *phy)
+{
+	return readl_relaxed(phy->base + CX_LLI);
+}
+
+static u32 k3_dma_get_chan_stat(struct k3_dma_dev *d)
+{
+	return readl_relaxed(d->base + CH_STAT);
+}
+
+static void k3_dma_enable_dma(struct k3_dma_dev *d, bool on)
+{
+	if (on) {
+		/* set same priority */
+		writel_relaxed(0x0, d->base + CH_PRI);
+
+		/* unmask irq */
+		writel_relaxed(0xffff, d->base + INT_TC1_MASK);
+		writel_relaxed(0xffff, d->base + INT_ERR1_MASK);
+		writel_relaxed(0xffff, d->base + INT_ERR2_MASK);
+	} else {
+		/* mask irq */
+		writel_relaxed(0x0, d->base + INT_TC1_MASK);
+		writel_relaxed(0x0, d->base + INT_ERR1_MASK);
+		writel_relaxed(0x0, d->base + INT_ERR2_MASK);
+	}
+}
+
+static irqreturn_t k3_dma_int_handler(int irq, void *dev_id)
+{
+	struct k3_dma_dev *d = (struct k3_dma_dev *)dev_id;
+	struct k3_dma_phy *p;
+	struct k3_dma_chan *c;
+	u32 stat = readl_relaxed(d->base + INT_STAT);
+	u32 tc1  = readl_relaxed(d->base + INT_TC1);
+	u32 err1 = readl_relaxed(d->base + INT_ERR1);
+	u32 err2 = readl_relaxed(d->base + INT_ERR2);
+	u32 i, irq_chan = 0;
+
+	while (stat) {
+		i = __ffs(stat);
+		stat &= (stat - 1);
+		if (likely(tc1 & BIT(i))) {
+			p = &d->phy[i];
+			c = p->vchan;
+			if (c) {
+				unsigned long flags;
+
+				spin_lock_irqsave(&c->vc.lock, flags);
+				vchan_cookie_complete(&p->ds_run->vd);
+				p->ds_done = p->ds_run;
+				spin_unlock_irqrestore(&c->vc.lock, flags);
+			}
+			irq_chan |= BIT(i);
+		}
+		if (unlikely((err1 & BIT(i)) || (err2 & BIT(i))))
+			dev_warn(d->slave.dev, "DMA ERR\n");
+	}
+
+	writel_relaxed(irq_chan, d->base + INT_TC1_RAW);
+	writel_relaxed(err1, d->base + INT_ERR1_RAW);
+	writel_relaxed(err2, d->base + INT_ERR2_RAW);
+
+	if (irq_chan) {
+		tasklet_schedule(&d->task);
+		return IRQ_HANDLED;
+	} else
+		return IRQ_NONE;
+}
+
+static int k3_dma_start_txd(struct k3_dma_chan *c)
+{
+	struct k3_dma_dev *d = to_k3_dma(c->vc.chan.device);
+	struct virt_dma_desc *vd = vchan_next_desc(&c->vc);
+
+	if (!c->phy)
+		return -EAGAIN;
+
+	if (BIT(c->phy->idx) & k3_dma_get_chan_stat(d))
+		return -EAGAIN;
+
+	if (vd) {
+		struct k3_dma_desc_sw *ds =
+			container_of(vd, struct k3_dma_desc_sw, vd);
+		/*
+		 * fetch and remove request from vc->desc_issued
+		 * so vc->desc_issued only contains desc pending
+		 */
+		list_del(&ds->vd.node);
+		c->phy->ds_run = ds;
+		c->phy->ds_done = NULL;
+		/* start dma */
+		k3_dma_set_desc(c->phy, &ds->desc_hw[0]);
+		return 0;
+	}
+	c->phy->ds_done = NULL;
+	c->phy->ds_run = NULL;
+	return -EAGAIN;
+}
+
+static void k3_dma_tasklet(unsigned long arg)
+{
+	struct k3_dma_dev *d = (struct k3_dma_dev *)arg;
+	struct k3_dma_phy *p;
+	struct k3_dma_chan *c, *cn;
+	unsigned pch, pch_alloc = 0;
+
+	/* check new dma request of running channel in vc->desc_issued */
+	list_for_each_entry_safe(c, cn, &d->slave.channels, vc.chan.device_node) {
+		spin_lock_irq(&c->vc.lock);
+		p = c->phy;
+		if (p && p->ds_done) {
+			if (k3_dma_start_txd(c)) {
+				/* No current txd associated with this channel */
+				dev_dbg(d->slave.dev, "pchan %u: free\n", p->idx);
+				/* Mark this channel free */
+				c->phy = NULL;
+				p->vchan = NULL;
+			}
+		}
+		spin_unlock_irq(&c->vc.lock);
+	}
+
+	/* check new channel request in d->chan_pending */
+	spin_lock_irq(&d->lock);
+	for (pch = 0; pch < d->dma_channels; pch++) {
+		p = &d->phy[pch];
+
+		if (p->vchan == NULL && !list_empty(&d->chan_pending)) {
+			c = list_first_entry(&d->chan_pending,
+				struct k3_dma_chan, node);
+			/* remove from d->chan_pending */
+			list_del_init(&c->node);
+			pch_alloc |= 1 << pch;
+			/* Mark this channel allocated */
+			p->vchan = c;
+			c->phy = p;
+			dev_dbg(d->slave.dev, "pchan %u: alloc vchan %p\n", pch, &c->vc);
+		}
+	}
+	spin_unlock_irq(&d->lock);
+
+	for (pch = 0; pch < d->dma_channels; pch++) {
+		if (pch_alloc & (1 << pch)) {
+			p = &d->phy[pch];
+			c = p->vchan;
+			if (c) {
+				spin_lock_irq(&c->vc.lock);
+				k3_dma_start_txd(c);
+				spin_unlock_irq(&c->vc.lock);
+			}
+		}
+	}
+}
+
+static int k3_dma_alloc_chan_resources(struct dma_chan *chan)
+{
+	return 0;
+}
+
+static void k3_dma_free_chan_resources(struct dma_chan *chan)
+{
+	struct k3_dma_chan *c = to_k3_chan(chan);
+	struct k3_dma_dev *d = to_k3_dma(chan->device);
+	unsigned long flags;
+
+	spin_lock_irqsave(&d->lock, flags);
+	list_del_init(&c->node);
+	spin_unlock_irqrestore(&d->lock, flags);
+
+	vchan_free_chan_resources(&c->vc);
+	c->ccfg = 0;
+}
+
+static enum dma_status k3_dma_tx_status(struct dma_chan *chan,
+	dma_cookie_t cookie, struct dma_tx_state *state)
+{
+	struct k3_dma_chan *c = to_k3_chan(chan);
+	struct k3_dma_dev *d = to_k3_dma(chan->device);
+	struct k3_dma_phy *p;
+	struct virt_dma_desc *vd;
+	unsigned long flags;
+	enum dma_status ret;
+	size_t bytes = 0;
+
+	ret = dma_cookie_status(&c->vc.chan, cookie, state);
+	if (ret == DMA_SUCCESS)
+		return ret;
+
+	spin_lock_irqsave(&c->vc.lock, flags);
+	p = c->phy;
+	ret = c->status;
+
+	/*
+	 * If the cookie is on our issue queue, then the residue is
+	 * its total size.
+	 */
+	vd = vchan_find_desc(&c->vc, cookie);
+	if (vd) {
+		bytes = container_of(vd, struct k3_dma_desc_sw, vd)->size;
+	} else if ((!p) || (!p->ds_run)) {
+		bytes = 0;
+	} else {
+		struct k3_dma_desc_sw *ds = p->ds_run;
+		u32 clli = 0, index = 0;
+
+		bytes = k3_dma_get_curr_cnt(d, p);
+		clli = k3_dma_get_curr_lli(p);
+		index = (clli - ds->desc_hw_lli) / sizeof(struct k3_desc_hw);
+		for (; index < ds->desc_num; index++) {
+			bytes += ds->desc_hw[index].count;
+			/* end of lli */
+			if (!ds->desc_hw[index].lli)
+				break;
+		}
+	}
+	spin_unlock_irqrestore(&c->vc.lock, flags);
+	dma_set_residue(state, bytes);
+	return ret;
+}
+
+static void k3_dma_issue_pending(struct dma_chan *chan)
+{
+	struct k3_dma_chan *c = to_k3_chan(chan);
+	struct k3_dma_dev *d = to_k3_dma(chan->device);
+	unsigned long flags;
+
+	spin_lock_irqsave(&c->vc.lock, flags);
+	/* add request to vc->desc_issued */
+	if (vchan_issue_pending(&c->vc)) {
+		spin_lock(&d->lock);
+		if (!c->phy) {
+			if (list_empty(&c->node)) {
+				/* if new channel, add chan_pending */
+				list_add_tail(&c->node, &d->chan_pending);
+				/* check in tasklet */
+				tasklet_schedule(&d->task);
+				dev_dbg(d->slave.dev, "vchan %p: issued\n", &c->vc);
+			}
+		}
+		spin_unlock(&d->lock);
+	} else
+		dev_dbg(d->slave.dev, "vchan %p: nothing to issue\n", &c->vc);
+	spin_unlock_irqrestore(&c->vc.lock, flags);
+}
+
+static void k3_dma_fill_desc(struct k3_dma_desc_sw *ds, dma_addr_t dst,
+			dma_addr_t src, size_t len, u32 num, u32 ccfg)
+{
+	if ((num + 1) < ds->desc_num)
+		ds->desc_hw[num].lli = ds->desc_hw_lli + (num + 1) *
+			sizeof(struct k3_desc_hw);
+	ds->desc_hw[num].lli |= CX_LLI_CHAIN_EN;
+	ds->desc_hw[num].count = len;
+	ds->desc_hw[num].saddr = src;
+	ds->desc_hw[num].daddr = dst;
+	ds->desc_hw[num].config = ccfg;
+}
+
+static struct dma_async_tx_descriptor *k3_dma_prep_memcpy(
+	struct dma_chan *chan,	dma_addr_t dst, dma_addr_t src,
+	size_t len, unsigned long flags)
+{
+	struct k3_dma_chan *c = to_k3_chan(chan);
+	struct k3_dma_desc_sw *ds;
+	size_t copy = 0;
+	int num = 0;
+
+	if (!len)
+		return NULL;
+
+	num = DIV_ROUND_UP(len, DMA_MAX_SIZE);
+	ds = kzalloc(sizeof(*ds) + num * sizeof(ds->desc_hw[0]), GFP_ATOMIC);
+	if (!ds) {
+		dev_dbg(chan->device->dev, "vchan %p: kzalloc fail\n", &c->vc);
+		return NULL;
+	}
+	ds->desc_hw_lli = __virt_to_phys((unsigned long)&ds->desc_hw[0]);
+	ds->size = len;
+	ds->desc_num = num;
+	num = 0;
+
+	if (!c->ccfg) {
+		/* default is memtomem, without calling device_control */
+		c->ccfg = CX_CFG_SRCINCR | CX_CFG_DSTINCR | CX_CFG_EN;
+		c->ccfg |= (0xf << 20) | (0xf << 24);	/* burst = 16 */
+		c->ccfg |= (0x3 << 12) | (0x3 << 16);	/* width = 64 bit */
+	}
+
+	do {
+		copy = min_t(size_t, len, DMA_MAX_SIZE);
+		k3_dma_fill_desc(ds, dst, src, copy, num++, c->ccfg);
+
+		if (c->dir == DMA_MEM_TO_DEV) {
+			src += copy;
+		} else if (c->dir == DMA_DEV_TO_MEM) {
+			dst += copy;
+		} else {
+			src += copy;
+			dst += copy;
+		}
+		len -= copy;
+	} while (len);
+
+	ds->desc_hw[num-1].lli = 0;	/* end of link */
+	return vchan_tx_prep(&c->vc, &ds->vd, flags);
+}
+
+static struct dma_async_tx_descriptor *k3_dma_prep_slave_sg(
+	struct dma_chan *chan, struct scatterlist *sgl, unsigned int sglen,
+	enum dma_transfer_direction dir, unsigned long flags, void *context)
+{
+	struct k3_dma_chan *c = to_k3_chan(chan);
+	struct k3_dma_desc_sw *ds;
+	size_t len, avail, total = 0;
+	struct scatterlist *sg;
+	dma_addr_t addr, src = 0, dst = 0;
+	int num = sglen, i;
+
+	if (sgl == 0)
+		return NULL;
+
+	for_each_sg(sgl, sg, sglen, i) {
+		avail = sg_dma_len(sg);
+		if (avail > DMA_MAX_SIZE)
+			num += DIV_ROUND_UP(avail, DMA_MAX_SIZE) - 1;
+	}
+
+	ds = kzalloc(sizeof(*ds) + num * sizeof(ds->desc_hw[0]), GFP_ATOMIC);
+	if (!ds) {
+		dev_dbg(chan->device->dev, "vchan %p: kzalloc fail\n", &c->vc);
+		return NULL;
+	}
+	ds->desc_hw_lli = __virt_to_phys((unsigned long)&ds->desc_hw[0]);
+	ds->desc_num = num;
+	num = 0;
+
+	for_each_sg(sgl, sg, sglen, i) {
+		addr = sg_dma_address(sg);
+		avail = sg_dma_len(sg);
+		total += avail;
+
+		do {
+			len = min_t(size_t, avail, DMA_MAX_SIZE);
+
+			if (dir == DMA_MEM_TO_DEV) {
+				src = addr;
+				dst = c->dev_addr;
+			} else if (dir == DMA_DEV_TO_MEM) {
+				src = c->dev_addr;
+				dst = addr;
+			}
+
+			k3_dma_fill_desc(ds, dst, src, len, num++, c->ccfg);
+
+			addr += len;
+			avail -= len;
+		} while (avail);
+	}
+
+	ds->desc_hw[num-1].lli = 0;	/* end of link */
+	ds->size = total;
+	return vchan_tx_prep(&c->vc, &ds->vd, flags);
+}
+
+static int k3_dma_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd,
+	unsigned long arg)
+{
+	struct k3_dma_chan *c = to_k3_chan(chan);
+	struct k3_dma_dev *d = to_k3_dma(chan->device);
+	struct dma_slave_config *cfg = (void *)arg;
+	struct k3_dma_phy *p = c->phy;
+	unsigned long flags;
+	u32 maxburst = 0, val = 0;
+	enum dma_slave_buswidth width = DMA_SLAVE_BUSWIDTH_UNDEFINED;
+	LIST_HEAD(head);
+
+	switch (cmd) {
+	case DMA_SLAVE_CONFIG:
+		if (cfg == NULL)
+			return -EINVAL;
+		c->dir = cfg->direction;
+		if (c->dir == DMA_DEV_TO_MEM) {
+			c->ccfg = CX_CFG_DSTINCR;
+			c->dev_addr = cfg->src_addr;
+			maxburst = cfg->src_maxburst;
+			width = cfg->src_addr_width;
+		} else if (c->dir == DMA_MEM_TO_DEV) {
+			c->ccfg = CX_CFG_SRCINCR;
+			c->dev_addr = cfg->dst_addr;
+			maxburst = cfg->dst_maxburst;
+			width = cfg->dst_addr_width;
+		}
+		switch (width) {
+		case DMA_SLAVE_BUSWIDTH_1_BYTE:
+		case DMA_SLAVE_BUSWIDTH_2_BYTES:
+		case DMA_SLAVE_BUSWIDTH_4_BYTES:
+		case DMA_SLAVE_BUSWIDTH_8_BYTES:
+			val =  __ffs(width);
+			break;
+		default:
+			val = 3;
+			break;
+		}
+		c->ccfg |= (val << 12) | (val << 16);
+
+		if ((maxburst == 0) || (maxburst > 16))
+			val = 16;
+		else
+			val = maxburst - 1;
+		c->ccfg |= (val << 20) | (val << 24);
+		c->ccfg |= CX_CFG_MEM2PER | CX_CFG_EN;
+
+		/* specific request line */
+		c->ccfg |= c->vc.chan.chan_id << 4;
+		break;
+
+	case DMA_TERMINATE_ALL:
+		dev_dbg(d->slave.dev, "vchan %p: terminate all\n", &c->vc);
+
+		/* Prevent this channel being scheduled */
+		spin_lock(&d->lock);
+		list_del_init(&c->node);
+		spin_unlock(&d->lock);
+
+		/* Clear the tx descriptor lists */
+		spin_lock_irqsave(&c->vc.lock, flags);
+		vchan_get_all_descriptors(&c->vc, &head);
+		if (p) {
+			/* vchan is assigned to a pchan - stop the channel */
+			k3_dma_terminate_chan(p, d);
+			c->phy = NULL;
+			p->vchan = NULL;
+			p->ds_run = p->ds_done = NULL;
+		}
+		spin_unlock_irqrestore(&c->vc.lock, flags);
+		vchan_dma_desc_free_list(&c->vc, &head);
+		break;
+
+	case DMA_PAUSE:
+		dev_dbg(d->slave.dev, "vchan %p: pause\n", &c->vc);
+		if (c->status == DMA_IN_PROGRESS) {
+			c->status = DMA_PAUSED;
+			if (p) {
+				k3_dma_pause_dma(p, false);
+			} else {
+				spin_lock(&d->lock);
+				list_del_init(&c->node);
+				spin_unlock(&d->lock);
+			}
+		}
+		break;
+
+	case DMA_RESUME:
+		dev_dbg(d->slave.dev, "vchan %p: resume\n", &c->vc);
+		spin_lock_irqsave(&c->vc.lock, flags);
+		if (c->status == DMA_PAUSED) {
+			c->status = DMA_IN_PROGRESS;
+			if (p) {
+				k3_dma_pause_dma(p, true);
+			} else if (!list_empty(&c->vc.desc_issued)) {
+				spin_lock(&d->lock);
+				list_add_tail(&c->node, &d->chan_pending);
+				spin_unlock(&d->lock);
+			}
+		}
+		spin_unlock_irqrestore(&c->vc.lock, flags);
+		break;
+	default:
+		return -ENXIO;
+	}
+	return 0;
+}
+
+static void k3_dma_free_desc(struct virt_dma_desc *vd)
+{
+	struct k3_dma_desc_sw *ds =
+		container_of(vd, struct k3_dma_desc_sw, vd);
+
+	kfree(ds);
+}
+
+static struct of_device_id k3_pdma_dt_ids[] = {
+	{ .compatible = "hisilicon,k3-dma-1.0", },
+	{}
+};
+MODULE_DEVICE_TABLE(of, k3_pdma_dt_ids);
+
+static struct dma_chan *k3_of_dma_simple_xlate(struct of_phandle_args *dma_spec,
+						struct of_dma *ofdma)
+{
+	struct k3_dma_dev *d = ofdma->of_dma_data;
+	unsigned int request = dma_spec->args[0];
+
+	if (request > d->dma_requests)
+		return NULL;
+
+	return dma_get_slave_channel(&(d->chans[request].vc.chan));
+}
+
+static int k3_dma_probe(struct platform_device *op)
+{
+	struct k3_dma_dev *d;
+	const struct of_device_id *of_id;
+	struct resource *iores;
+	int i, ret, irq = 0;
+
+	iores = platform_get_resource(op, IORESOURCE_MEM, 0);
+	if (!iores)
+		return -EINVAL;
+
+	d = devm_kzalloc(&op->dev, sizeof(*d), GFP_KERNEL);
+	if (!d)
+		return -ENOMEM;
+
+	d->base = devm_ioremap_resource(&op->dev, iores);
+	if (IS_ERR(d->base))
+		return PTR_ERR(d->base);
+
+	of_id = of_match_device(k3_pdma_dt_ids, &op->dev);
+	if (of_id) {
+		of_property_read_u32((&op->dev)->of_node,
+				"dma-channels", &d->dma_channels);
+		of_property_read_u32((&op->dev)->of_node,
+				"dma-requests", &d->dma_requests);
+	}
+
+	d->clk = devm_clk_get(&op->dev, NULL);
+	if (IS_ERR(d->clk)) {
+		dev_err(&op->dev, "no dma clk\n");
+		return PTR_ERR(d->clk);
+	}
+
+	irq = platform_get_irq(op, 0);
+	ret = devm_request_irq(&op->dev, irq,
+			k3_dma_int_handler, IRQF_DISABLED, DRIVER_NAME, d);
+	if (ret)
+		return ret;
+
+	/* init phy channel */
+	d->phy = devm_kzalloc(&op->dev,
+		d->dma_channels * sizeof(struct k3_dma_phy), GFP_KERNEL);
+	if (d->phy == NULL)
+		return -ENOMEM;
+
+	for (i = 0; i < d->dma_channels; i++) {
+		struct k3_dma_phy *p = &d->phy[i];
+
+		p->idx = i;
+		p->base = d->base + i * 0x40;
+	}
+
+	INIT_LIST_HEAD(&d->slave.channels);
+	dma_cap_set(DMA_SLAVE, d->slave.cap_mask);
+	dma_cap_set(DMA_MEMCPY, d->slave.cap_mask);
+	d->slave.dev = &op->dev;
+	d->slave.device_alloc_chan_resources = k3_dma_alloc_chan_resources;
+	d->slave.device_free_chan_resources = k3_dma_free_chan_resources;
+	d->slave.device_tx_status = k3_dma_tx_status;
+	d->slave.device_prep_dma_memcpy = k3_dma_prep_memcpy;
+	d->slave.device_prep_slave_sg = k3_dma_prep_slave_sg;
+	d->slave.device_issue_pending = k3_dma_issue_pending;
+	d->slave.device_control = k3_dma_control;
+	d->slave.copy_align = DMA_ALIGN;
+	d->slave.chancnt = d->dma_requests;
+
+	/* init virtual channel */
+	d->chans = devm_kzalloc(&op->dev,
+		d->dma_requests * sizeof(struct k3_dma_chan), GFP_KERNEL);
+	if (d->chans == NULL)
+		return -ENOMEM;
+
+	for (i = 0; i < d->dma_requests; i++) {
+		struct k3_dma_chan *c = &d->chans[i];
+
+		c->status = DMA_IN_PROGRESS;
+		INIT_LIST_HEAD(&c->node);
+		c->vc.desc_free = k3_dma_free_desc;
+		vchan_init(&c->vc, &d->slave);
+	}
+
+	/* Enable clock before accessing registers */
+	ret = clk_prepare_enable(d->clk);
+	if (ret < 0) {
+		dev_err(&op->dev, "clk_prepare_enable failed: %d\n", ret);
+		return ret;
+	}
+
+	k3_dma_enable_dma(d, true);
+
+	ret = dma_async_device_register(&d->slave);
+	if (ret)
+		return ret;
+
+	ret = of_dma_controller_register((&op->dev)->of_node,
+					k3_of_dma_simple_xlate, d);
+	if (ret)
+		goto of_dma_register_fail;
+
+	spin_lock_init(&d->lock);
+	INIT_LIST_HEAD(&d->chan_pending);
+	tasklet_init(&d->task, k3_dma_tasklet, (unsigned long)d);
+	platform_set_drvdata(op, d);
+	dev_info(&op->dev, "initialized\n");
+
+	return 0;
+
+of_dma_register_fail:
+	dma_async_device_unregister(&d->slave);
+	return ret;
+}
+
+static int k3_dma_remove(struct platform_device *op)
+{
+	struct k3_dma_chan *c, *cn;
+	struct k3_dma_dev *d = platform_get_drvdata(op);
+
+	dma_async_device_unregister(&d->slave);
+	of_dma_controller_free((&op->dev)->of_node);
+
+	list_for_each_entry_safe(c, cn, &d->slave.channels, vc.chan.device_node) {
+		list_del(&c->vc.chan.device_node);
+		tasklet_kill(&c->vc.task);
+	}
+	tasklet_kill(&d->task);
+	clk_disable_unprepare(d->clk);
+	return 0;
+}
+
+static int k3_dma_suspend(struct device *dev)
+{
+	struct k3_dma_dev *d = dev_get_drvdata(dev);
+	u32 stat = 0;
+
+	stat = k3_dma_get_chan_stat(d);
+	if (stat) {
+		dev_warn(d->slave.dev,
+			"chan %d is running fail to suspend\n", stat);
+		return -1;
+	}
+	k3_dma_enable_dma(d, false);
+	clk_disable_unprepare(d->clk);
+	return 0;
+}
+
+static int k3_dma_resume(struct device *dev)
+{
+	struct k3_dma_dev *d = dev_get_drvdata(dev);
+	int ret = 0;
+
+	ret = clk_prepare_enable(d->clk);
+	if (ret < 0) {
+		dev_err(d->slave.dev, "clk_prepare_enable failed: %d\n", ret);
+		return ret;
+	}
+	k3_dma_enable_dma(d, true);
+	return 0;
+}
+
+SIMPLE_DEV_PM_OPS(k3_dma_pmops, k3_dma_suspend, k3_dma_resume);
+
+static struct platform_driver k3_pdma_driver = {
+	.driver		= {
+		.name	= DRIVER_NAME,
+		.owner  = THIS_MODULE,
+		.pm	= &k3_dma_pmops,
+		.of_match_table = k3_pdma_dt_ids,
+	},
+	.probe		= k3_dma_probe,
+	.remove		= k3_dma_remove,
+};
+
+module_platform_driver(k3_pdma_driver);
+
+MODULE_DESCRIPTION("Hisilicon k3 DMA Driver");
+MODULE_ALIAS("platform:k3dma");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/dma/mmp_pdma.c b/drivers/dma/mmp_pdma.c
index c26699f..ff8d7827 100644
--- a/drivers/dma/mmp_pdma.c
+++ b/drivers/dma/mmp_pdma.c
@@ -18,7 +18,9 @@
 #include <linux/platform_data/mmp_dma.h>
 #include <linux/dmapool.h>
 #include <linux/of_device.h>
+#include <linux/of_dma.h>
 #include <linux/of.h>
+#include <linux/dma/mmp-pdma.h>
 
 #include "dmaengine.h"
 
@@ -47,6 +49,8 @@
 #define DCSR_CMPST	(1 << 10)       /* The Descriptor Compare Status */
 #define DCSR_EORINTR	(1 << 9)        /* The end of Receive */
 
+#define DRCMR(n)	((((n) < 64) ? 0x0100 : 0x1100) + \
+				 (((n) & 0x3f) << 2))
 #define DRCMR_MAPVLD	(1 << 7)	/* Map Valid (read / write) */
 #define DRCMR_CHLNUM	0x1f		/* mask for Channel Number (read / write) */
 
@@ -69,7 +73,7 @@
 #define DCMD_LENGTH	0x01fff		/* length mask (max = 8K - 1) */
 
 #define PDMA_ALIGNMENT		3
-#define PDMA_MAX_DESC_BYTES	0x1000
+#define PDMA_MAX_DESC_BYTES	DCMD_LENGTH
 
 struct mmp_pdma_desc_hw {
 	u32 ddadr;	/* Points to the next descriptor + flags */
@@ -94,6 +98,9 @@
 	struct mmp_pdma_phy *phy;
 	enum dma_transfer_direction dir;
 
+	struct mmp_pdma_desc_sw *cyclic_first;	/* first desc_sw if channel
+						 * is in cyclic mode */
+
 	/* channel's basic info */
 	struct tasklet_struct tasklet;
 	u32 dcmd;
@@ -105,6 +112,7 @@
 	struct list_head chain_pending;	/* Link descriptors queue for pending */
 	struct list_head chain_running;	/* Link descriptors queue for running */
 	bool idle;			/* channel statue machine */
+	bool byte_align;
 
 	struct dma_pool *desc_pool;	/* Descriptors pool */
 };
@@ -121,6 +129,7 @@
 	struct device			*dev;
 	struct dma_device		device;
 	struct mmp_pdma_phy		*phy;
+	spinlock_t phy_lock; /* protect alloc/free phy channels */
 };
 
 #define tx_to_mmp_pdma_desc(tx) container_of(tx, struct mmp_pdma_desc_sw, async_tx)
@@ -137,15 +146,21 @@
 
 static void enable_chan(struct mmp_pdma_phy *phy)
 {
-	u32 reg;
+	u32 reg, dalgn;
 
 	if (!phy->vchan)
 		return;
 
-	reg = phy->vchan->drcmr;
-	reg = (((reg) < 64) ? 0x0100 : 0x1100) + (((reg) & 0x3f) << 2);
+	reg = DRCMR(phy->vchan->drcmr);
 	writel(DRCMR_MAPVLD | phy->idx, phy->base + reg);
 
+	dalgn = readl(phy->base + DALGN);
+	if (phy->vchan->byte_align)
+		dalgn |= 1 << phy->idx;
+	else
+		dalgn &= ~(1 << phy->idx);
+	writel(dalgn, phy->base + DALGN);
+
 	reg = (phy->idx << 2) + DCSR;
 	writel(readl(phy->base + reg) | DCSR_RUN,
 					phy->base + reg);
@@ -218,7 +233,8 @@
 {
 	int prio, i;
 	struct mmp_pdma_device *pdev = to_mmp_pdma_dev(pchan->chan.device);
-	struct mmp_pdma_phy *phy;
+	struct mmp_pdma_phy *phy, *found = NULL;
+	unsigned long flags;
 
 	/*
 	 * dma channel priorities
@@ -227,6 +243,8 @@
 	 * ch 8 - 11, 24 - 27  <--> (2)
 	 * ch 12 - 15, 28 - 31  <--> (3)
 	 */
+
+	spin_lock_irqsave(&pdev->phy_lock, flags);
 	for (prio = 0; prio <= (((pdev->dma_channels - 1) & 0xf) >> 2); prio++) {
 		for (i = 0; i < pdev->dma_channels; i++) {
 			if (prio != ((i & 0xf) >> 2))
@@ -234,31 +252,34 @@
 			phy = &pdev->phy[i];
 			if (!phy->vchan) {
 				phy->vchan = pchan;
-				return phy;
+				found = phy;
+				goto out_unlock;
 			}
 		}
 	}
 
-	return NULL;
+out_unlock:
+	spin_unlock_irqrestore(&pdev->phy_lock, flags);
+	return found;
 }
 
-/* desc->tx_list ==> pending list */
-static void append_pending_queue(struct mmp_pdma_chan *chan,
-					struct mmp_pdma_desc_sw *desc)
+static void mmp_pdma_free_phy(struct mmp_pdma_chan *pchan)
 {
-	struct mmp_pdma_desc_sw *tail =
-				to_mmp_pdma_desc(chan->chain_pending.prev);
+	struct mmp_pdma_device *pdev = to_mmp_pdma_dev(pchan->chan.device);
+	unsigned long flags;
+	u32 reg;
 
-	if (list_empty(&chan->chain_pending))
-		goto out_splice;
+	if (!pchan->phy)
+		return;
 
-	/* one irq per queue, even appended */
-	tail->desc.ddadr = desc->async_tx.phys;
-	tail->desc.dcmd &= ~DCMD_ENDIRQEN;
+	/* clear the channel mapping in DRCMR */
+	reg = DRCMR(pchan->phy->vchan->drcmr);
+	writel(0, pchan->phy->base + reg);
 
-	/* softly link to pending list */
-out_splice:
-	list_splice_tail_init(&desc->tx_list, &chan->chain_pending);
+	spin_lock_irqsave(&pdev->phy_lock, flags);
+	pchan->phy->vchan = NULL;
+	pchan->phy = NULL;
+	spin_unlock_irqrestore(&pdev->phy_lock, flags);
 }
 
 /**
@@ -277,10 +298,7 @@
 
 	if (list_empty(&chan->chain_pending)) {
 		/* chance to re-fetch phy channel with higher prio */
-		if (chan->phy) {
-			chan->phy->vchan = NULL;
-			chan->phy = NULL;
-		}
+		mmp_pdma_free_phy(chan);
 		dev_dbg(chan->dev, "no pending list\n");
 		return;
 	}
@@ -326,14 +344,16 @@
 		cookie = dma_cookie_assign(&child->async_tx);
 	}
 
-	append_pending_queue(chan, desc);
+	/* softly link to pending list - desc->tx_list ==> pending list */
+	list_splice_tail_init(&desc->tx_list, &chan->chain_pending);
 
 	spin_unlock_irqrestore(&chan->desc_lock, flags);
 
 	return cookie;
 }
 
-struct mmp_pdma_desc_sw *mmp_pdma_alloc_descriptor(struct mmp_pdma_chan *chan)
+static struct mmp_pdma_desc_sw *
+mmp_pdma_alloc_descriptor(struct mmp_pdma_chan *chan)
 {
 	struct mmp_pdma_desc_sw *desc;
 	dma_addr_t pdesc;
@@ -377,10 +397,7 @@
 		dev_err(chan->dev, "unable to allocate descriptor pool\n");
 		return -ENOMEM;
 	}
-	if (chan->phy) {
-		chan->phy->vchan = NULL;
-		chan->phy = NULL;
-	}
+	mmp_pdma_free_phy(chan);
 	chan->idle = true;
 	chan->dev_addr = 0;
 	return 1;
@@ -411,10 +428,7 @@
 	chan->desc_pool = NULL;
 	chan->idle = true;
 	chan->dev_addr = 0;
-	if (chan->phy) {
-		chan->phy->vchan = NULL;
-		chan->phy = NULL;
-	}
+	mmp_pdma_free_phy(chan);
 	return;
 }
 
@@ -434,6 +448,7 @@
 		return NULL;
 
 	chan = to_mmp_pdma_chan(dchan);
+	chan->byte_align = false;
 
 	if (!chan->dir) {
 		chan->dir = DMA_MEM_TO_MEM;
@@ -450,6 +465,8 @@
 		}
 
 		copy = min_t(size_t, len, PDMA_MAX_DESC_BYTES);
+		if (dma_src & 0x7 || dma_dst & 0x7)
+			chan->byte_align = true;
 
 		new->desc.dcmd = chan->dcmd | (DCMD_LENGTH & copy);
 		new->desc.dsadr = dma_src;
@@ -486,6 +503,8 @@
 	new->desc.ddadr = DDADR_STOP;
 	new->desc.dcmd |= DCMD_ENDIRQEN;
 
+	chan->cyclic_first = NULL;
+
 	return &first->async_tx;
 
 fail:
@@ -509,12 +528,16 @@
 	if ((sgl == NULL) || (sg_len == 0))
 		return NULL;
 
+	chan->byte_align = false;
+
 	for_each_sg(sgl, sg, sg_len, i) {
 		addr = sg_dma_address(sg);
 		avail = sg_dma_len(sgl);
 
 		do {
 			len = min_t(size_t, avail, PDMA_MAX_DESC_BYTES);
+			if (addr & 0x7)
+				chan->byte_align = true;
 
 			/* allocate and populate the descriptor */
 			new = mmp_pdma_alloc_descriptor(chan);
@@ -557,6 +580,94 @@
 	new->desc.ddadr = DDADR_STOP;
 	new->desc.dcmd |= DCMD_ENDIRQEN;
 
+	chan->dir = dir;
+	chan->cyclic_first = NULL;
+
+	return &first->async_tx;
+
+fail:
+	if (first)
+		mmp_pdma_free_desc_list(chan, &first->tx_list);
+	return NULL;
+}
+
+static struct dma_async_tx_descriptor *mmp_pdma_prep_dma_cyclic(
+	struct dma_chan *dchan, dma_addr_t buf_addr, size_t len,
+	size_t period_len, enum dma_transfer_direction direction,
+	unsigned long flags, void *context)
+{
+	struct mmp_pdma_chan *chan;
+	struct mmp_pdma_desc_sw *first = NULL, *prev = NULL, *new;
+	dma_addr_t dma_src, dma_dst;
+
+	if (!dchan || !len || !period_len)
+		return NULL;
+
+	/* the buffer length must be a multiple of period_len */
+	if (len % period_len != 0)
+		return NULL;
+
+	if (period_len > PDMA_MAX_DESC_BYTES)
+		return NULL;
+
+	chan = to_mmp_pdma_chan(dchan);
+
+	switch (direction) {
+	case DMA_MEM_TO_DEV:
+		dma_src = buf_addr;
+		dma_dst = chan->dev_addr;
+		break;
+	case DMA_DEV_TO_MEM:
+		dma_dst = buf_addr;
+		dma_src = chan->dev_addr;
+		break;
+	default:
+		dev_err(chan->dev, "Unsupported direction for cyclic DMA\n");
+		return NULL;
+	}
+
+	chan->dir = direction;
+
+	do {
+		/* Allocate the link descriptor from DMA pool */
+		new = mmp_pdma_alloc_descriptor(chan);
+		if (!new) {
+			dev_err(chan->dev, "no memory for desc\n");
+			goto fail;
+		}
+
+		new->desc.dcmd = chan->dcmd | DCMD_ENDIRQEN |
+					(DCMD_LENGTH & period_len);
+		new->desc.dsadr = dma_src;
+		new->desc.dtadr = dma_dst;
+
+		if (!first)
+			first = new;
+		else
+			prev->desc.ddadr = new->async_tx.phys;
+
+		new->async_tx.cookie = 0;
+		async_tx_ack(&new->async_tx);
+
+		prev = new;
+		len -= period_len;
+
+		if (chan->dir == DMA_MEM_TO_DEV)
+			dma_src += period_len;
+		else
+			dma_dst += period_len;
+
+		/* Insert the link descriptor to the LD ring */
+		list_add_tail(&new->node, &first->tx_list);
+	} while (len);
+
+	first->async_tx.flags = flags; /* client is in control of this ack */
+	first->async_tx.cookie = -EBUSY;
+
+	/* make the cyclic link */
+	new->desc.ddadr = first->async_tx.phys;
+	chan->cyclic_first = first;
+
 	return &first->async_tx;
 
 fail:
@@ -581,10 +692,7 @@
 	switch (cmd) {
 	case DMA_TERMINATE_ALL:
 		disable_chan(chan->phy);
-		if (chan->phy) {
-			chan->phy->vchan = NULL;
-			chan->phy = NULL;
-		}
+		mmp_pdma_free_phy(chan);
 		spin_lock_irqsave(&chan->desc_lock, flags);
 		mmp_pdma_free_desc_list(chan, &chan->chain_pending);
 		mmp_pdma_free_desc_list(chan, &chan->chain_running);
@@ -619,8 +727,13 @@
 			chan->dcmd |= DCMD_BURST32;
 
 		chan->dir = cfg->direction;
-		chan->drcmr = cfg->slave_id;
 		chan->dev_addr = addr;
+		/* FIXME: drivers should be ported over to use the filter
+		 * function. Once that's done, the following two lines can
+		 * be removed.
+		 */
+		if (cfg->slave_id)
+			chan->drcmr = cfg->slave_id;
 		break;
 	default:
 		return -ENOSYS;
@@ -632,15 +745,7 @@
 static enum dma_status mmp_pdma_tx_status(struct dma_chan *dchan,
 			dma_cookie_t cookie, struct dma_tx_state *txstate)
 {
-	struct mmp_pdma_chan *chan = to_mmp_pdma_chan(dchan);
-	enum dma_status ret;
-	unsigned long flags;
-
-	spin_lock_irqsave(&chan->desc_lock, flags);
-	ret = dma_cookie_status(dchan, cookie, txstate);
-	spin_unlock_irqrestore(&chan->desc_lock, flags);
-
-	return ret;
+	return dma_cookie_status(dchan, cookie, txstate);
 }
 
 /**
@@ -669,29 +774,51 @@
 	LIST_HEAD(chain_cleanup);
 	unsigned long flags;
 
-	/* submit pending list; callback for each desc; free desc */
+	if (chan->cyclic_first) {
+		dma_async_tx_callback cb = NULL;
+		void *cb_data = NULL;
 
+		spin_lock_irqsave(&chan->desc_lock, flags);
+		desc = chan->cyclic_first;
+		cb = desc->async_tx.callback;
+		cb_data = desc->async_tx.callback_param;
+		spin_unlock_irqrestore(&chan->desc_lock, flags);
+
+		if (cb)
+			cb(cb_data);
+
+		return;
+	}
+
+	/* submit pending list; callback for each desc; free desc */
 	spin_lock_irqsave(&chan->desc_lock, flags);
 
-	/* update the cookie if we have some descriptors to cleanup */
-	if (!list_empty(&chan->chain_running)) {
-		dma_cookie_t cookie;
+	list_for_each_entry_safe(desc, _desc, &chan->chain_running, node) {
+		/*
+		 * move the descriptors to a temporary list so we can drop
+		 * the lock during the entire cleanup operation
+		 */
+		list_del(&desc->node);
+		list_add(&desc->node, &chain_cleanup);
 
-		desc = to_mmp_pdma_desc(chan->chain_running.prev);
-		cookie = desc->async_tx.cookie;
-		dma_cookie_complete(&desc->async_tx);
-
-		dev_dbg(chan->dev, "completed_cookie=%d\n", cookie);
+		/*
+		 * Look for the first list entry which has the ENDIRQEN flag
+		 * set. That is the descriptor we got an interrupt for, so
+		 * complete that transaction and its cookie.
+		 */
+		if (desc->desc.dcmd & DCMD_ENDIRQEN) {
+			dma_cookie_t cookie = desc->async_tx.cookie;
+			dma_cookie_complete(&desc->async_tx);
+			dev_dbg(chan->dev, "completed_cookie=%d\n", cookie);
+			break;
+		}
 	}
 
 	/*
-	 * move the descriptors to a temporary list so we can drop the lock
-	 * during the entire cleanup operation
+	 * The hardware is idle and ready for more when the
+	 * chain_running list is empty.
 	 */
-	list_splice_tail_init(&chan->chain_running, &chain_cleanup);
-
-	/* the hardware is now idle and ready for more */
-	chan->idle = true;
+	chan->idle = list_empty(&chan->chain_running);
 
 	/* Start any pending transactions automatically */
 	start_pending_queue(chan);
@@ -763,6 +890,39 @@
 };
 MODULE_DEVICE_TABLE(of, mmp_pdma_dt_ids);
 
+static struct dma_chan *mmp_pdma_dma_xlate(struct of_phandle_args *dma_spec,
+					   struct of_dma *ofdma)
+{
+	struct mmp_pdma_device *d = ofdma->of_dma_data;
+	struct dma_chan *chan, *candidate;
+
+retry:
+	candidate = NULL;
+
+	/* walk the list of channels registered with the current instance and
+	 * find one that is currently unused */
+	list_for_each_entry(chan, &d->device.channels, device_node)
+		if (chan->client_count == 0) {
+			candidate = chan;
+			break;
+		}
+
+	if (!candidate)
+		return NULL;
+
+	/* dma_get_slave_channel will return NULL if we lost a race between
+	 * the lookup and the reservation */
+	chan = dma_get_slave_channel(candidate);
+
+	if (chan) {
+		struct mmp_pdma_chan *c = to_mmp_pdma_chan(chan);
+		c->drcmr = dma_spec->args[0];
+		return chan;
+	}
+
+	goto retry;
+}
+
 static int mmp_pdma_probe(struct platform_device *op)
 {
 	struct mmp_pdma_device *pdev;
@@ -777,10 +937,9 @@
 		return -ENOMEM;
 	pdev->dev = &op->dev;
 
-	iores = platform_get_resource(op, IORESOURCE_MEM, 0);
-	if (!iores)
-		return -EINVAL;
+	spin_lock_init(&pdev->phy_lock);
 
+	iores = platform_get_resource(op, IORESOURCE_MEM, 0);
 	pdev->base = devm_ioremap_resource(pdev->dev, iores);
 	if (IS_ERR(pdev->base))
 		return PTR_ERR(pdev->base);
@@ -825,13 +984,15 @@
 
 	dma_cap_set(DMA_SLAVE, pdev->device.cap_mask);
 	dma_cap_set(DMA_MEMCPY, pdev->device.cap_mask);
-	dma_cap_set(DMA_SLAVE, pdev->device.cap_mask);
+	dma_cap_set(DMA_CYCLIC, pdev->device.cap_mask);
+	dma_cap_set(DMA_PRIVATE, pdev->device.cap_mask);
 	pdev->device.dev = &op->dev;
 	pdev->device.device_alloc_chan_resources = mmp_pdma_alloc_chan_resources;
 	pdev->device.device_free_chan_resources = mmp_pdma_free_chan_resources;
 	pdev->device.device_tx_status = mmp_pdma_tx_status;
 	pdev->device.device_prep_dma_memcpy = mmp_pdma_prep_memcpy;
 	pdev->device.device_prep_slave_sg = mmp_pdma_prep_slave_sg;
+	pdev->device.device_prep_dma_cyclic = mmp_pdma_prep_dma_cyclic;
 	pdev->device.device_issue_pending = mmp_pdma_issue_pending;
 	pdev->device.device_control = mmp_pdma_control;
 	pdev->device.copy_align = PDMA_ALIGNMENT;
@@ -847,7 +1008,17 @@
 		return ret;
 	}
 
-	dev_info(pdev->device.dev, "initialized\n");
+	if (op->dev.of_node) {
+		/* Device-tree DMA controller registration */
+		ret = of_dma_controller_register(op->dev.of_node,
+						 mmp_pdma_dma_xlate, pdev);
+		if (ret < 0) {
+			dev_err(&op->dev, "of_dma_controller_register failed\n");
+			return ret;
+		}
+	}
+
+	dev_info(pdev->device.dev, "initialized %d channels\n", dma_channels);
 	return 0;
 }
 
@@ -867,6 +1038,19 @@
 	.remove		= mmp_pdma_remove,
 };
 
+bool mmp_pdma_filter_fn(struct dma_chan *chan, void *param)
+{
+	struct mmp_pdma_chan *c = to_mmp_pdma_chan(chan);
+
+	if (chan->device->dev->driver != &mmp_pdma_driver.driver)
+		return false;
+
+	c->drcmr = *(unsigned int *) param;
+
+	return true;
+}
+EXPORT_SYMBOL_GPL(mmp_pdma_filter_fn);
+
 module_platform_driver(mmp_pdma_driver);
 
 MODULE_DESCRIPTION("MARVELL MMP Periphera DMA Driver");
diff --git a/drivers/dma/mmp_tdma.c b/drivers/dma/mmp_tdma.c
index 9b93665..38cb517 100644
--- a/drivers/dma/mmp_tdma.c
+++ b/drivers/dma/mmp_tdma.c
@@ -460,7 +460,8 @@
 {
 	struct mmp_tdma_chan *tdmac = to_mmp_tdma_chan(chan);
 
-	dma_set_residue(txstate, tdmac->buf_len - tdmac->pos);
+	dma_set_tx_state(txstate, chan->completed_cookie, chan->cookie,
+			 tdmac->buf_len - tdmac->pos);
 
 	return tdmac->status;
 }
@@ -549,9 +550,6 @@
 	}
 
 	iores = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	if (!iores)
-		return -EINVAL;
-
 	tdev->base = devm_ioremap_resource(&pdev->dev, iores);
 	if (IS_ERR(tdev->base))
 		return PTR_ERR(tdev->base);
diff --git a/drivers/dma/mpc512x_dma.c b/drivers/dma/mpc512x_dma.c
index 2d95673..2fe4353 100644
--- a/drivers/dma/mpc512x_dma.c
+++ b/drivers/dma/mpc512x_dma.c
@@ -556,15 +556,7 @@
 mpc_dma_tx_status(struct dma_chan *chan, dma_cookie_t cookie,
 	       struct dma_tx_state *txstate)
 {
-	struct mpc_dma_chan *mchan = dma_chan_to_mpc_dma_chan(chan);
-	enum dma_status ret;
-	unsigned long flags;
-
-	spin_lock_irqsave(&mchan->lock, flags);
-	ret = dma_cookie_status(chan, cookie, txstate);
-	spin_unlock_irqrestore(&mchan->lock, flags);
-
-	return ret;
+	return dma_cookie_status(chan, cookie, txstate);
 }
 
 /* Prepare descriptor for memory to memory copy */
diff --git a/drivers/dma/mv_xor.c b/drivers/dma/mv_xor.c
index 200f1a3..d9a2677 100644
--- a/drivers/dma/mv_xor.c
+++ b/drivers/dma/mv_xor.c
@@ -647,7 +647,7 @@
 
 	dev_dbg(mv_chan_to_devp(mv_chan),
 		"%s sw_desc %p async_tx %p\n",
-		__func__, sw_desc, sw_desc ? &sw_desc->async_tx : 0);
+		__func__, sw_desc, sw_desc ? &sw_desc->async_tx : NULL);
 
 	return sw_desc ? &sw_desc->async_tx : NULL;
 }
@@ -1166,7 +1166,7 @@
 {
 	const struct mbus_dram_target_info *dram;
 	struct mv_xor_device *xordev;
-	struct mv_xor_platform_data *pdata = pdev->dev.platform_data;
+	struct mv_xor_platform_data *pdata = dev_get_platdata(&pdev->dev);
 	struct resource *res;
 	int i, ret;
 
diff --git a/drivers/dma/mxs-dma.c b/drivers/dma/mxs-dma.c
index 7195930..ccd13df 100644
--- a/drivers/dma/mxs-dma.c
+++ b/drivers/dma/mxs-dma.c
@@ -23,7 +23,6 @@
 #include <linux/dmaengine.h>
 #include <linux/delay.h>
 #include <linux/module.h>
-#include <linux/fsl/mxs-dma.h>
 #include <linux/stmp_device.h>
 #include <linux/of.h>
 #include <linux/of_device.h>
@@ -197,24 +196,6 @@
 	return container_of(chan, struct mxs_dma_chan, chan);
 }
 
-int mxs_dma_is_apbh(struct dma_chan *chan)
-{
-	struct mxs_dma_chan *mxs_chan = to_mxs_dma_chan(chan);
-	struct mxs_dma_engine *mxs_dma = mxs_chan->mxs_dma;
-
-	return dma_is_apbh(mxs_dma);
-}
-EXPORT_SYMBOL_GPL(mxs_dma_is_apbh);
-
-int mxs_dma_is_apbx(struct dma_chan *chan)
-{
-	struct mxs_dma_chan *mxs_chan = to_mxs_dma_chan(chan);
-	struct mxs_dma_engine *mxs_dma = mxs_chan->mxs_dma;
-
-	return !dma_is_apbh(mxs_dma);
-}
-EXPORT_SYMBOL_GPL(mxs_dma_is_apbx);
-
 static void mxs_dma_reset_chan(struct mxs_dma_chan *mxs_chan)
 {
 	struct mxs_dma_engine *mxs_dma = mxs_chan->mxs_dma;
@@ -349,13 +330,9 @@
 static int mxs_dma_alloc_chan_resources(struct dma_chan *chan)
 {
 	struct mxs_dma_chan *mxs_chan = to_mxs_dma_chan(chan);
-	struct mxs_dma_data *data = chan->private;
 	struct mxs_dma_engine *mxs_dma = mxs_chan->mxs_dma;
 	int ret;
 
-	if (data)
-		mxs_chan->chan_irq = data->chan_irq;
-
 	mxs_chan->ccw = dma_alloc_coherent(mxs_dma->dma_device.dev,
 				CCW_BLOCK_SIZE, &mxs_chan->ccw_phys,
 				GFP_KERNEL);
@@ -622,10 +599,8 @@
 			dma_cookie_t cookie, struct dma_tx_state *txstate)
 {
 	struct mxs_dma_chan *mxs_chan = to_mxs_dma_chan(chan);
-	dma_cookie_t last_used;
 
-	last_used = chan->cookie;
-	dma_set_tx_state(txstate, chan->completed_cookie, last_used, 0);
+	dma_set_tx_state(txstate, chan->completed_cookie, chan->cookie, 0);
 
 	return mxs_chan->status;
 }
diff --git a/drivers/dma/pch_dma.c b/drivers/dma/pch_dma.c
index 0bbdea50..61fdc54 100644
--- a/drivers/dma/pch_dma.c
+++ b/drivers/dma/pch_dma.c
@@ -564,14 +564,7 @@
 static enum dma_status pd_tx_status(struct dma_chan *chan, dma_cookie_t cookie,
 				    struct dma_tx_state *txstate)
 {
-	struct pch_dma_chan *pd_chan = to_pd_chan(chan);
-	enum dma_status ret;
-
-	spin_lock_irq(&pd_chan->lock);
-	ret = dma_cookie_status(chan, cookie, txstate);
-	spin_unlock_irq(&pd_chan->lock);
-
-	return ret;
+	return dma_cookie_status(chan, cookie, txstate);
 }
 
 static void pd_issue_pending(struct dma_chan *chan)
@@ -1036,3 +1029,4 @@
 		   "DMA controller driver");
 MODULE_AUTHOR("Yong Wang <yong.y.wang@intel.com>");
 MODULE_LICENSE("GPL v2");
+MODULE_DEVICE_TABLE(pci, pch_dma_id_table);
diff --git a/drivers/dma/pl330.c b/drivers/dma/pl330.c
index fa645d8..36ed301 100644
--- a/drivers/dma/pl330.c
+++ b/drivers/dma/pl330.c
@@ -545,6 +545,8 @@
 
 	/* List of to be xfered descriptors */
 	struct list_head work_list;
+	/* List of completed descriptors */
+	struct list_head completed_list;
 
 	/* Pointer to the DMAC that manages this channel,
 	 * NULL if the channel is available to be acquired.
@@ -2198,66 +2200,6 @@
 	return container_of(tx, struct dma_pl330_desc, txd);
 }
 
-static inline void free_desc_list(struct list_head *list)
-{
-	struct dma_pl330_dmac *pdmac;
-	struct dma_pl330_desc *desc;
-	struct dma_pl330_chan *pch = NULL;
-	unsigned long flags;
-
-	/* Finish off the work list */
-	list_for_each_entry(desc, list, node) {
-		dma_async_tx_callback callback;
-		void *param;
-
-		/* All desc in a list belong to same channel */
-		pch = desc->pchan;
-		callback = desc->txd.callback;
-		param = desc->txd.callback_param;
-
-		if (callback)
-			callback(param);
-
-		desc->pchan = NULL;
-	}
-
-	/* pch will be unset if list was empty */
-	if (!pch)
-		return;
-
-	pdmac = pch->dmac;
-
-	spin_lock_irqsave(&pdmac->pool_lock, flags);
-	list_splice_tail_init(list, &pdmac->desc_pool);
-	spin_unlock_irqrestore(&pdmac->pool_lock, flags);
-}
-
-static inline void handle_cyclic_desc_list(struct list_head *list)
-{
-	struct dma_pl330_desc *desc;
-	struct dma_pl330_chan *pch = NULL;
-	unsigned long flags;
-
-	list_for_each_entry(desc, list, node) {
-		dma_async_tx_callback callback;
-
-		/* Change status to reload it */
-		desc->status = PREP;
-		pch = desc->pchan;
-		callback = desc->txd.callback;
-		if (callback)
-			callback(desc->txd.callback_param);
-	}
-
-	/* pch will be unset if list was empty */
-	if (!pch)
-		return;
-
-	spin_lock_irqsave(&pch->lock, flags);
-	list_splice_tail_init(list, &pch->work_list);
-	spin_unlock_irqrestore(&pch->lock, flags);
-}
-
 static inline void fill_queue(struct dma_pl330_chan *pch)
 {
 	struct dma_pl330_desc *desc;
@@ -2291,7 +2233,6 @@
 	struct dma_pl330_chan *pch = (struct dma_pl330_chan *)data;
 	struct dma_pl330_desc *desc, *_dt;
 	unsigned long flags;
-	LIST_HEAD(list);
 
 	spin_lock_irqsave(&pch->lock, flags);
 
@@ -2300,7 +2241,7 @@
 		if (desc->status == DONE) {
 			if (!pch->cyclic)
 				dma_cookie_complete(&desc->txd);
-			list_move_tail(&desc->node, &list);
+			list_move_tail(&desc->node, &pch->completed_list);
 		}
 
 	/* Try to submit a req imm. next to the last completed cookie */
@@ -2309,12 +2250,31 @@
 	/* Make sure the PL330 Channel thread is active */
 	pl330_chan_ctrl(pch->pl330_chid, PL330_OP_START);
 
-	spin_unlock_irqrestore(&pch->lock, flags);
+	while (!list_empty(&pch->completed_list)) {
+		dma_async_tx_callback callback;
+		void *callback_param;
 
-	if (pch->cyclic)
-		handle_cyclic_desc_list(&list);
-	else
-		free_desc_list(&list);
+		desc = list_first_entry(&pch->completed_list,
+					struct dma_pl330_desc, node);
+
+		callback = desc->txd.callback;
+		callback_param = desc->txd.callback_param;
+
+		if (pch->cyclic) {
+			desc->status = PREP;
+			list_move_tail(&desc->node, &pch->work_list);
+		} else {
+			desc->status = FREE;
+			list_move_tail(&desc->node, &pch->dmac->desc_pool);
+		}
+
+		if (callback) {
+			spin_unlock_irqrestore(&pch->lock, flags);
+			callback(callback_param);
+			spin_lock_irqsave(&pch->lock, flags);
+		}
+	}
+	spin_unlock_irqrestore(&pch->lock, flags);
 }
 
 static void dma_pl330_rqcb(void *token, enum pl330_op_err err)
@@ -2409,7 +2369,7 @@
 static int pl330_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd, unsigned long arg)
 {
 	struct dma_pl330_chan *pch = to_pchan(chan);
-	struct dma_pl330_desc *desc, *_dt;
+	struct dma_pl330_desc *desc;
 	unsigned long flags;
 	struct dma_pl330_dmac *pdmac = pch->dmac;
 	struct dma_slave_config *slave_config;
@@ -2423,12 +2383,18 @@
 		pl330_chan_ctrl(pch->pl330_chid, PL330_OP_FLUSH);
 
 		/* Mark all desc done */
-		list_for_each_entry_safe(desc, _dt, &pch->work_list , node) {
-			desc->status = DONE;
-			list_move_tail(&desc->node, &list);
+		list_for_each_entry(desc, &pch->work_list , node) {
+			desc->status = FREE;
+			dma_cookie_complete(&desc->txd);
 		}
 
-		list_splice_tail_init(&list, &pdmac->desc_pool);
+		list_for_each_entry(desc, &pch->completed_list , node) {
+			desc->status = FREE;
+			dma_cookie_complete(&desc->txd);
+		}
+
+		list_splice_tail_init(&pch->work_list, &pdmac->desc_pool);
+		list_splice_tail_init(&pch->completed_list, &pdmac->desc_pool);
 		spin_unlock_irqrestore(&pch->lock, flags);
 		break;
 	case DMA_SLAVE_CONFIG:
@@ -2814,6 +2780,28 @@
 	return &desc->txd;
 }
 
+static void __pl330_giveback_desc(struct dma_pl330_dmac *pdmac,
+				  struct dma_pl330_desc *first)
+{
+	unsigned long flags;
+	struct dma_pl330_desc *desc;
+
+	if (!first)
+		return;
+
+	spin_lock_irqsave(&pdmac->pool_lock, flags);
+
+	while (!list_empty(&first->node)) {
+		desc = list_entry(first->node.next,
+				struct dma_pl330_desc, node);
+		list_move_tail(&desc->node, &pdmac->desc_pool);
+	}
+
+	list_move_tail(&first->node, &pdmac->desc_pool);
+
+	spin_unlock_irqrestore(&pdmac->pool_lock, flags);
+}
+
 static struct dma_async_tx_descriptor *
 pl330_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
 		unsigned int sg_len, enum dma_transfer_direction direction,
@@ -2822,7 +2810,6 @@
 	struct dma_pl330_desc *first, *desc = NULL;
 	struct dma_pl330_chan *pch = to_pchan(chan);
 	struct scatterlist *sg;
-	unsigned long flags;
 	int i;
 	dma_addr_t addr;
 
@@ -2842,20 +2829,7 @@
 			dev_err(pch->dmac->pif.dev,
 				"%s:%d Unable to fetch desc\n",
 				__func__, __LINE__);
-			if (!first)
-				return NULL;
-
-			spin_lock_irqsave(&pdmac->pool_lock, flags);
-
-			while (!list_empty(&first->node)) {
-				desc = list_entry(first->node.next,
-						struct dma_pl330_desc, node);
-				list_move_tail(&desc->node, &pdmac->desc_pool);
-			}
-
-			list_move_tail(&first->node, &pdmac->desc_pool);
-
-			spin_unlock_irqrestore(&pdmac->pool_lock, flags);
+			__pl330_giveback_desc(pdmac, first);
 
 			return NULL;
 		}
@@ -2896,6 +2870,32 @@
 		return IRQ_NONE;
 }
 
+#define PL330_DMA_BUSWIDTHS \
+	BIT(DMA_SLAVE_BUSWIDTH_UNDEFINED) | \
+	BIT(DMA_SLAVE_BUSWIDTH_1_BYTE) | \
+	BIT(DMA_SLAVE_BUSWIDTH_2_BYTES) | \
+	BIT(DMA_SLAVE_BUSWIDTH_4_BYTES) | \
+	BIT(DMA_SLAVE_BUSWIDTH_8_BYTES)
+
+static int pl330_dma_device_slave_caps(struct dma_chan *dchan,
+	struct dma_slave_caps *caps)
+{
+	caps->src_addr_widths = PL330_DMA_BUSWIDTHS;
+	caps->dstn_addr_widths = PL330_DMA_BUSWIDTHS;
+	caps->directions = BIT(DMA_DEV_TO_MEM) | BIT(DMA_MEM_TO_DEV);
+	caps->cmd_pause = false;
+	caps->cmd_terminate = true;
+
+	/*
+	 * This is the limit for transfers with a buswidth of 1, larger
+	 * buswidths will have larger limits.
+	 */
+	caps->max_sg_len = 1900800;
+	caps->max_sg_nr = 0;
+
+	return 0;
+}
+
 static int
 pl330_probe(struct amba_device *adev, const struct amba_id *id)
 {
@@ -2908,7 +2908,7 @@
 	int i, ret, irq;
 	int num_chan;
 
-	pdat = adev->dev.platform_data;
+	pdat = dev_get_platdata(&adev->dev);
 
 	/* Allocate a new DMAC and its Channels */
 	pdmac = devm_kzalloc(&adev->dev, sizeof(*pdmac), GFP_KERNEL);
@@ -2971,6 +2971,7 @@
 			pch->chan.private = adev->dev.of_node;
 
 		INIT_LIST_HEAD(&pch->work_list);
+		INIT_LIST_HEAD(&pch->completed_list);
 		spin_lock_init(&pch->lock);
 		pch->pl330_chid = NULL;
 		pch->chan.device = pd;
@@ -3000,6 +3001,7 @@
 	pd->device_prep_slave_sg = pl330_prep_slave_sg;
 	pd->device_control = pl330_control;
 	pd->device_issue_pending = pl330_issue_pending;
+	pd->device_slave_caps = pl330_dma_device_slave_caps;
 
 	ret = dma_async_device_register(pd);
 	if (ret) {
diff --git a/drivers/dma/sh/Kconfig b/drivers/dma/sh/Kconfig
index 5c1dee2..e2b94d1 100644
--- a/drivers/dma/sh/Kconfig
+++ b/drivers/dma/sh/Kconfig
@@ -22,3 +22,9 @@
 	depends on SH_DMAE_BASE
 	help
 	  Enable support for the Renesas SUDMAC controllers.
+
+config RCAR_HPB_DMAE
+	tristate "Renesas R-Car HPB DMAC support"
+	depends on SH_DMAE_BASE
+	help
+	  Enable support for the Renesas R-Car series DMA controllers.
diff --git a/drivers/dma/sh/Makefile b/drivers/dma/sh/Makefile
index c962138..ccf17cb5 100644
--- a/drivers/dma/sh/Makefile
+++ b/drivers/dma/sh/Makefile
@@ -1,3 +1,4 @@
 obj-$(CONFIG_SH_DMAE_BASE) += shdma-base.o shdma-of.o
 obj-$(CONFIG_SH_DMAE) += shdma.o
 obj-$(CONFIG_SUDMAC) += sudmac.o
+obj-$(CONFIG_RCAR_HPB_DMAE) += rcar-hpbdma.o
diff --git a/drivers/dma/sh/rcar-hpbdma.c b/drivers/dma/sh/rcar-hpbdma.c
new file mode 100644
index 0000000..45a5202
--- /dev/null
+++ b/drivers/dma/sh/rcar-hpbdma.c
@@ -0,0 +1,655 @@
+/*
+ * Copyright (C) 2011-2013 Renesas Electronics Corporation
+ * Copyright (C) 2013 Cogent Embedded, Inc.
+ *
+ * This file is based on the drivers/dma/sh/shdma.c
+ *
+ * Renesas SuperH DMA Engine support
+ *
+ * This is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * - DMA of SuperH does not have Hardware DMA chain mode.
+ * - max DMA size is 16MB.
+ *
+ */
+
+#include <linux/dmaengine.h>
+#include <linux/delay.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/platform_data/dma-rcar-hpbdma.h>
+#include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
+#include <linux/shdma-base.h>
+#include <linux/slab.h>
+
+/* DMA channel registers */
+#define HPB_DMAE_DSAR0	0x00
+#define HPB_DMAE_DDAR0	0x04
+#define HPB_DMAE_DTCR0	0x08
+#define HPB_DMAE_DSAR1	0x0C
+#define HPB_DMAE_DDAR1	0x10
+#define HPB_DMAE_DTCR1	0x14
+#define HPB_DMAE_DSASR	0x18
+#define HPB_DMAE_DDASR	0x1C
+#define HPB_DMAE_DTCSR	0x20
+#define HPB_DMAE_DPTR	0x24
+#define HPB_DMAE_DCR	0x28
+#define HPB_DMAE_DCMDR	0x2C
+#define HPB_DMAE_DSTPR	0x30
+#define HPB_DMAE_DSTSR	0x34
+#define HPB_DMAE_DDBGR	0x38
+#define HPB_DMAE_DDBGR2	0x3C
+#define HPB_DMAE_CHAN(n)	(0x40 * (n))
+
+/* DMA command register (DCMDR) bits */
+#define HPB_DMAE_DCMDR_BDOUT	BIT(7)
+#define HPB_DMAE_DCMDR_DQSPD	BIT(6)
+#define HPB_DMAE_DCMDR_DQSPC	BIT(5)
+#define HPB_DMAE_DCMDR_DMSPD	BIT(4)
+#define HPB_DMAE_DCMDR_DMSPC	BIT(3)
+#define HPB_DMAE_DCMDR_DQEND	BIT(2)
+#define HPB_DMAE_DCMDR_DNXT	BIT(1)
+#define HPB_DMAE_DCMDR_DMEN	BIT(0)
+
+/* DMA forced stop register (DSTPR) bits */
+#define HPB_DMAE_DSTPR_DMSTP	BIT(0)
+
+/* DMA status register (DSTSR) bits */
+#define HPB_DMAE_DSTSR_DMSTS	BIT(0)
+
+/* DMA common registers */
+#define HPB_DMAE_DTIMR		0x00
+#define HPB_DMAE_DINTSR0		0x0C
+#define HPB_DMAE_DINTSR1		0x10
+#define HPB_DMAE_DINTCR0		0x14
+#define HPB_DMAE_DINTCR1		0x18
+#define HPB_DMAE_DINTMR0		0x1C
+#define HPB_DMAE_DINTMR1		0x20
+#define HPB_DMAE_DACTSR0		0x24
+#define HPB_DMAE_DACTSR1		0x28
+#define HPB_DMAE_HSRSTR(n)	(0x40 + (n) * 4)
+#define HPB_DMAE_HPB_DMASPR(n)	(0x140 + (n) * 4)
+#define HPB_DMAE_HPB_DMLVLR0	0x160
+#define HPB_DMAE_HPB_DMLVLR1	0x164
+#define HPB_DMAE_HPB_DMSHPT0	0x168
+#define HPB_DMAE_HPB_DMSHPT1	0x16C
+
+#define HPB_DMA_SLAVE_NUMBER 256
+#define HPB_DMA_TCR_MAX 0x01000000	/* 16 MiB */
+
+struct hpb_dmae_chan {
+	struct shdma_chan shdma_chan;
+	int xfer_mode;			/* DMA transfer mode */
+#define XFER_SINGLE	1
+#define XFER_DOUBLE	2
+	unsigned plane_idx;		/* current DMA information set */
+	bool first_desc;		/* first/next transfer */
+	int xmit_shift;			/* log_2(bytes_per_xfer) */
+	void __iomem *base;
+	const struct hpb_dmae_slave_config *cfg;
+	char dev_id[16];		/* unique name per DMAC of channel */
+};
+
+struct hpb_dmae_device {
+	struct shdma_dev shdma_dev;
+	spinlock_t reg_lock;		/* comm_reg operation lock */
+	struct hpb_dmae_pdata *pdata;
+	void __iomem *chan_reg;
+	void __iomem *comm_reg;
+	void __iomem *reset_reg;
+	void __iomem *mode_reg;
+};
+
+struct hpb_dmae_regs {
+	u32 sar; /* SAR / source address */
+	u32 dar; /* DAR / destination address */
+	u32 tcr; /* TCR / transfer count */
+};
+
+struct hpb_desc {
+	struct shdma_desc shdma_desc;
+	struct hpb_dmae_regs hw;
+	unsigned plane_idx;
+};
+
+#define to_chan(schan) container_of(schan, struct hpb_dmae_chan, shdma_chan)
+#define to_desc(sdesc) container_of(sdesc, struct hpb_desc, shdma_desc)
+#define to_dev(sc) container_of(sc->shdma_chan.dma_chan.device, \
+				struct hpb_dmae_device, shdma_dev.dma_dev)
+
+static void ch_reg_write(struct hpb_dmae_chan *hpb_dc, u32 data, u32 reg)
+{
+	iowrite32(data, hpb_dc->base + reg);
+}
+
+static u32 ch_reg_read(struct hpb_dmae_chan *hpb_dc, u32 reg)
+{
+	return ioread32(hpb_dc->base + reg);
+}
+
+static void dcmdr_write(struct hpb_dmae_device *hpbdev, u32 data)
+{
+	iowrite32(data, hpbdev->chan_reg + HPB_DMAE_DCMDR);
+}
+
+static void hsrstr_write(struct hpb_dmae_device *hpbdev, u32 ch)
+{
+	iowrite32(0x1, hpbdev->comm_reg + HPB_DMAE_HSRSTR(ch));
+}
+
+static u32 dintsr_read(struct hpb_dmae_device *hpbdev, u32 ch)
+{
+	u32 v;
+
+	if (ch < 32)
+		v = ioread32(hpbdev->comm_reg + HPB_DMAE_DINTSR0) >> ch;
+	else
+		v = ioread32(hpbdev->comm_reg + HPB_DMAE_DINTSR1) >> (ch - 32);
+	return v & 0x1;
+}
+
+static void dintcr_write(struct hpb_dmae_device *hpbdev, u32 ch)
+{
+	if (ch < 32)
+		iowrite32((0x1 << ch), hpbdev->comm_reg + HPB_DMAE_DINTCR0);
+	else
+		iowrite32((0x1 << (ch - 32)),
+			  hpbdev->comm_reg + HPB_DMAE_DINTCR1);
+}
+
+static void asyncmdr_write(struct hpb_dmae_device *hpbdev, u32 data)
+{
+	iowrite32(data, hpbdev->mode_reg);
+}
+
+static u32 asyncmdr_read(struct hpb_dmae_device *hpbdev)
+{
+	return ioread32(hpbdev->mode_reg);
+}
+
+static void hpb_dmae_enable_int(struct hpb_dmae_device *hpbdev, u32 ch)
+{
+	u32 intreg;
+
+	spin_lock_irq(&hpbdev->reg_lock);
+	if (ch < 32) {
+		intreg = ioread32(hpbdev->comm_reg + HPB_DMAE_DINTMR0);
+		iowrite32(BIT(ch) | intreg,
+			  hpbdev->comm_reg + HPB_DMAE_DINTMR0);
+	} else {
+		intreg = ioread32(hpbdev->comm_reg + HPB_DMAE_DINTMR1);
+		iowrite32(BIT(ch - 32) | intreg,
+			  hpbdev->comm_reg + HPB_DMAE_DINTMR1);
+	}
+	spin_unlock_irq(&hpbdev->reg_lock);
+}
+
+static void hpb_dmae_async_reset(struct hpb_dmae_device *hpbdev, u32 data)
+{
+	u32 rstr;
+	int timeout = 10000;	/* 100 ms */
+
+	spin_lock(&hpbdev->reg_lock);
+	rstr = ioread32(hpbdev->reset_reg);
+	rstr |= data;
+	iowrite32(rstr, hpbdev->reset_reg);
+	do {
+		rstr = ioread32(hpbdev->reset_reg);
+		if ((rstr & data) == data)
+			break;
+		udelay(10);
+	} while (timeout--);
+
+	if (timeout < 0)
+		dev_err(hpbdev->shdma_dev.dma_dev.dev,
+			"%s timeout\n", __func__);
+
+	rstr &= ~data;
+	iowrite32(rstr, hpbdev->reset_reg);
+	spin_unlock(&hpbdev->reg_lock);
+}
+
+static void hpb_dmae_set_async_mode(struct hpb_dmae_device *hpbdev,
+				    u32 mask, u32 data)
+{
+	u32 mode;
+
+	spin_lock_irq(&hpbdev->reg_lock);
+	mode = asyncmdr_read(hpbdev);
+	mode &= ~mask;
+	mode |= data;
+	asyncmdr_write(hpbdev, mode);
+	spin_unlock_irq(&hpbdev->reg_lock);
+}
+
+static void hpb_dmae_ctl_stop(struct hpb_dmae_device *hpbdev)
+{
+	dcmdr_write(hpbdev, HPB_DMAE_DCMDR_DQSPD);
+}
+
+static void hpb_dmae_reset(struct hpb_dmae_device *hpbdev)
+{
+	u32 ch;
+
+	for (ch = 0; ch < hpbdev->pdata->num_hw_channels; ch++)
+		hsrstr_write(hpbdev, ch);
+}
+
+static unsigned int calc_xmit_shift(struct hpb_dmae_chan *hpb_chan)
+{
+	struct hpb_dmae_device *hpbdev = to_dev(hpb_chan);
+	struct hpb_dmae_pdata *pdata = hpbdev->pdata;
+	int width = ch_reg_read(hpb_chan, HPB_DMAE_DCR);
+	int i;
+
+	switch (width & (HPB_DMAE_DCR_SPDS_MASK | HPB_DMAE_DCR_DPDS_MASK)) {
+	case HPB_DMAE_DCR_SPDS_8BIT | HPB_DMAE_DCR_DPDS_8BIT:
+	default:
+		i = XMIT_SZ_8BIT;
+		break;
+	case HPB_DMAE_DCR_SPDS_16BIT | HPB_DMAE_DCR_DPDS_16BIT:
+		i = XMIT_SZ_16BIT;
+		break;
+	case HPB_DMAE_DCR_SPDS_32BIT | HPB_DMAE_DCR_DPDS_32BIT:
+		i = XMIT_SZ_32BIT;
+		break;
+	}
+	return pdata->ts_shift[i];
+}
+
+static void hpb_dmae_set_reg(struct hpb_dmae_chan *hpb_chan,
+			     struct hpb_dmae_regs *hw, unsigned plane)
+{
+	ch_reg_write(hpb_chan, hw->sar,
+		     plane ? HPB_DMAE_DSAR1 : HPB_DMAE_DSAR0);
+	ch_reg_write(hpb_chan, hw->dar,
+		     plane ? HPB_DMAE_DDAR1 : HPB_DMAE_DDAR0);
+	ch_reg_write(hpb_chan, hw->tcr >> hpb_chan->xmit_shift,
+		     plane ? HPB_DMAE_DTCR1 : HPB_DMAE_DTCR0);
+}
+
+static void hpb_dmae_start(struct hpb_dmae_chan *hpb_chan, bool next)
+{
+	ch_reg_write(hpb_chan, (next ? HPB_DMAE_DCMDR_DNXT : 0) |
+		     HPB_DMAE_DCMDR_DMEN, HPB_DMAE_DCMDR);
+}
+
+static void hpb_dmae_halt(struct shdma_chan *schan)
+{
+	struct hpb_dmae_chan *chan = to_chan(schan);
+
+	ch_reg_write(chan, HPB_DMAE_DCMDR_DQEND, HPB_DMAE_DCMDR);
+	ch_reg_write(chan, HPB_DMAE_DSTPR_DMSTP, HPB_DMAE_DSTPR);
+}
+
+static const struct hpb_dmae_slave_config *
+hpb_dmae_find_slave(struct hpb_dmae_chan *hpb_chan, int slave_id)
+{
+	struct hpb_dmae_device *hpbdev = to_dev(hpb_chan);
+	struct hpb_dmae_pdata *pdata = hpbdev->pdata;
+	int i;
+
+	if (slave_id >= HPB_DMA_SLAVE_NUMBER)
+		return NULL;
+
+	for (i = 0; i < pdata->num_slaves; i++)
+		if (pdata->slaves[i].id == slave_id)
+			return pdata->slaves + i;
+
+	return NULL;
+}
+
+static void hpb_dmae_start_xfer(struct shdma_chan *schan,
+				struct shdma_desc *sdesc)
+{
+	struct hpb_dmae_chan *chan = to_chan(schan);
+	struct hpb_dmae_device *hpbdev = to_dev(chan);
+	struct hpb_desc *desc = to_desc(sdesc);
+
+	if (chan->cfg->flags & HPB_DMAE_SET_ASYNC_RESET)
+		hpb_dmae_async_reset(hpbdev, chan->cfg->rstr);
+
+	desc->plane_idx = chan->plane_idx;
+	hpb_dmae_set_reg(chan, &desc->hw, chan->plane_idx);
+	hpb_dmae_start(chan, !chan->first_desc);
+
+	if (chan->xfer_mode == XFER_DOUBLE) {
+		chan->plane_idx ^= 1;
+		chan->first_desc = false;
+	}
+}
+
+static bool hpb_dmae_desc_completed(struct shdma_chan *schan,
+				    struct shdma_desc *sdesc)
+{
+	/*
+	 * This is correct since we always have at most single
+	 * outstanding DMA transfer per channel, and by the time
+	 * we get completion interrupt the transfer is completed.
+	 * This will change if we ever use alternating DMA
+	 * information sets and submit two descriptors at once.
+	 */
+	return true;
+}
+
+static bool hpb_dmae_chan_irq(struct shdma_chan *schan, int irq)
+{
+	struct hpb_dmae_chan *chan = to_chan(schan);
+	struct hpb_dmae_device *hpbdev = to_dev(chan);
+	int ch = chan->cfg->dma_ch;
+
+	/* Check Complete DMA Transfer */
+	if (dintsr_read(hpbdev, ch)) {
+		/* Clear Interrupt status */
+		dintcr_write(hpbdev, ch);
+		return true;
+	}
+	return false;
+}
+
+static int hpb_dmae_desc_setup(struct shdma_chan *schan,
+			       struct shdma_desc *sdesc,
+			       dma_addr_t src, dma_addr_t dst, size_t *len)
+{
+	struct hpb_desc *desc = to_desc(sdesc);
+
+	if (*len > (size_t)HPB_DMA_TCR_MAX)
+		*len = (size_t)HPB_DMA_TCR_MAX;
+
+	desc->hw.sar = src;
+	desc->hw.dar = dst;
+	desc->hw.tcr = *len;
+
+	return 0;
+}
+
+static size_t hpb_dmae_get_partial(struct shdma_chan *schan,
+				   struct shdma_desc *sdesc)
+{
+	struct hpb_desc *desc = to_desc(sdesc);
+	struct hpb_dmae_chan *chan = to_chan(schan);
+	u32 tcr = ch_reg_read(chan, desc->plane_idx ?
+			      HPB_DMAE_DTCR1 : HPB_DMAE_DTCR0);
+
+	return (desc->hw.tcr - tcr) << chan->xmit_shift;
+}
+
+static bool hpb_dmae_channel_busy(struct shdma_chan *schan)
+{
+	struct hpb_dmae_chan *chan = to_chan(schan);
+	u32 dstsr = ch_reg_read(chan, HPB_DMAE_DSTSR);
+
+	return (dstsr & HPB_DMAE_DSTSR_DMSTS) == HPB_DMAE_DSTSR_DMSTS;
+}
+
+static int
+hpb_dmae_alloc_chan_resources(struct hpb_dmae_chan *hpb_chan,
+			      const struct hpb_dmae_slave_config *cfg)
+{
+	struct hpb_dmae_device *hpbdev = to_dev(hpb_chan);
+	struct hpb_dmae_pdata *pdata = hpbdev->pdata;
+	const struct hpb_dmae_channel *channel = pdata->channels;
+	int slave_id = cfg->id;
+	int i, err;
+
+	for (i = 0; i < pdata->num_channels; i++, channel++) {
+		if (channel->s_id == slave_id) {
+			struct device *dev = hpb_chan->shdma_chan.dev;
+
+			hpb_chan->base = hpbdev->chan_reg +
+				HPB_DMAE_CHAN(cfg->dma_ch);
+
+			dev_dbg(dev, "Detected Slave device\n");
+			dev_dbg(dev, " -- slave_id       : 0x%x\n", slave_id);
+			dev_dbg(dev, " -- cfg->dma_ch    : %d\n", cfg->dma_ch);
+			dev_dbg(dev, " -- channel->ch_irq: %d\n",
+				channel->ch_irq);
+			break;
+		}
+	}
+
+	err = shdma_request_irq(&hpb_chan->shdma_chan, channel->ch_irq,
+				IRQF_SHARED, hpb_chan->dev_id);
+	if (err) {
+		dev_err(hpb_chan->shdma_chan.dev,
+			"DMA channel request_irq %d failed with error %d\n",
+			channel->ch_irq, err);
+		return err;
+	}
+
+	hpb_chan->plane_idx = 0;
+	hpb_chan->first_desc = true;
+
+	if ((cfg->dcr & (HPB_DMAE_DCR_CT | HPB_DMAE_DCR_DIP)) == 0) {
+		hpb_chan->xfer_mode = XFER_SINGLE;
+	} else if ((cfg->dcr & (HPB_DMAE_DCR_CT | HPB_DMAE_DCR_DIP)) ==
+		   (HPB_DMAE_DCR_CT | HPB_DMAE_DCR_DIP)) {
+		hpb_chan->xfer_mode = XFER_DOUBLE;
+	} else {
+		dev_err(hpb_chan->shdma_chan.dev, "DCR setting error");
+		shdma_free_irq(&hpb_chan->shdma_chan);
+		return -EINVAL;
+	}
+
+	if (cfg->flags & HPB_DMAE_SET_ASYNC_MODE)
+		hpb_dmae_set_async_mode(hpbdev, cfg->mdm, cfg->mdr);
+	ch_reg_write(hpb_chan, cfg->dcr, HPB_DMAE_DCR);
+	ch_reg_write(hpb_chan, cfg->port, HPB_DMAE_DPTR);
+	hpb_chan->xmit_shift = calc_xmit_shift(hpb_chan);
+	hpb_dmae_enable_int(hpbdev, cfg->dma_ch);
+
+	return 0;
+}
+
+static int hpb_dmae_set_slave(struct shdma_chan *schan, int slave_id, bool try)
+{
+	struct hpb_dmae_chan *chan = to_chan(schan);
+	const struct hpb_dmae_slave_config *sc =
+		hpb_dmae_find_slave(chan, slave_id);
+
+	if (!sc)
+		return -ENODEV;
+	if (try)
+		return 0;
+	chan->cfg = sc;
+	return hpb_dmae_alloc_chan_resources(chan, sc);
+}
+
+static void hpb_dmae_setup_xfer(struct shdma_chan *schan, int slave_id)
+{
+}
+
+static dma_addr_t hpb_dmae_slave_addr(struct shdma_chan *schan)
+{
+	struct hpb_dmae_chan *chan = to_chan(schan);
+
+	return chan->cfg->addr;
+}
+
+static struct shdma_desc *hpb_dmae_embedded_desc(void *buf, int i)
+{
+	return &((struct hpb_desc *)buf)[i].shdma_desc;
+}
+
+static const struct shdma_ops hpb_dmae_ops = {
+	.desc_completed = hpb_dmae_desc_completed,
+	.halt_channel = hpb_dmae_halt,
+	.channel_busy = hpb_dmae_channel_busy,
+	.slave_addr = hpb_dmae_slave_addr,
+	.desc_setup = hpb_dmae_desc_setup,
+	.set_slave = hpb_dmae_set_slave,
+	.setup_xfer = hpb_dmae_setup_xfer,
+	.start_xfer = hpb_dmae_start_xfer,
+	.embedded_desc = hpb_dmae_embedded_desc,
+	.chan_irq = hpb_dmae_chan_irq,
+	.get_partial = hpb_dmae_get_partial,
+};
+
+static int hpb_dmae_chan_probe(struct hpb_dmae_device *hpbdev, int id)
+{
+	struct shdma_dev *sdev = &hpbdev->shdma_dev;
+	struct platform_device *pdev =
+		to_platform_device(hpbdev->shdma_dev.dma_dev.dev);
+	struct hpb_dmae_chan *new_hpb_chan;
+	struct shdma_chan *schan;
+
+	/* Alloc channel */
+	new_hpb_chan = devm_kzalloc(&pdev->dev,
+				    sizeof(struct hpb_dmae_chan), GFP_KERNEL);
+	if (!new_hpb_chan) {
+		dev_err(hpbdev->shdma_dev.dma_dev.dev,
+			"No free memory for allocating DMA channels!\n");
+		return -ENOMEM;
+	}
+
+	schan = &new_hpb_chan->shdma_chan;
+	shdma_chan_probe(sdev, schan, id);
+
+	if (pdev->id >= 0)
+		snprintf(new_hpb_chan->dev_id, sizeof(new_hpb_chan->dev_id),
+			 "hpb-dmae%d.%d", pdev->id, id);
+	else
+		snprintf(new_hpb_chan->dev_id, sizeof(new_hpb_chan->dev_id),
+			 "hpb-dma.%d", id);
+
+	return 0;
+}
+
+static int hpb_dmae_probe(struct platform_device *pdev)
+{
+	struct hpb_dmae_pdata *pdata = pdev->dev.platform_data;
+	struct hpb_dmae_device *hpbdev;
+	struct dma_device *dma_dev;
+	struct resource *chan, *comm, *rest, *mode, *irq_res;
+	int err, i;
+
+	/* Get platform data */
+	if (!pdata || !pdata->num_channels)
+		return -ENODEV;
+
+	chan = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	comm = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+	rest = platform_get_resource(pdev, IORESOURCE_MEM, 2);
+	mode = platform_get_resource(pdev, IORESOURCE_MEM, 3);
+
+	irq_res = platform_get_resource(pdev, IORESOURCE_IRQ, 0);
+	if (!irq_res)
+		return -ENODEV;
+
+	hpbdev = devm_kzalloc(&pdev->dev, sizeof(struct hpb_dmae_device),
+			      GFP_KERNEL);
+	if (!hpbdev) {
+		dev_err(&pdev->dev, "Not enough memory\n");
+		return -ENOMEM;
+	}
+
+	hpbdev->chan_reg = devm_ioremap_resource(&pdev->dev, chan);
+	if (IS_ERR(hpbdev->chan_reg))
+		return PTR_ERR(hpbdev->chan_reg);
+
+	hpbdev->comm_reg = devm_ioremap_resource(&pdev->dev, comm);
+	if (IS_ERR(hpbdev->comm_reg))
+		return PTR_ERR(hpbdev->comm_reg);
+
+	hpbdev->reset_reg = devm_ioremap_resource(&pdev->dev, rest);
+	if (IS_ERR(hpbdev->reset_reg))
+		return PTR_ERR(hpbdev->reset_reg);
+
+	hpbdev->mode_reg = devm_ioremap_resource(&pdev->dev, mode);
+	if (IS_ERR(hpbdev->mode_reg))
+		return PTR_ERR(hpbdev->mode_reg);
+
+	dma_dev = &hpbdev->shdma_dev.dma_dev;
+
+	spin_lock_init(&hpbdev->reg_lock);
+
+	/* Platform data */
+	hpbdev->pdata = pdata;
+
+	pm_runtime_enable(&pdev->dev);
+	err = pm_runtime_get_sync(&pdev->dev);
+	if (err < 0)
+		dev_err(&pdev->dev, "%s(): GET = %d\n", __func__, err);
+
+	/* Reset DMA controller */
+	hpb_dmae_reset(hpbdev);
+
+	pm_runtime_put(&pdev->dev);
+
+	dma_cap_set(DMA_MEMCPY, dma_dev->cap_mask);
+	dma_cap_set(DMA_SLAVE, dma_dev->cap_mask);
+
+	hpbdev->shdma_dev.ops = &hpb_dmae_ops;
+	hpbdev->shdma_dev.desc_size = sizeof(struct hpb_desc);
+	err = shdma_init(&pdev->dev, &hpbdev->shdma_dev, pdata->num_channels);
+	if (err < 0)
+		goto error;
+
+	/* Create DMA channels */
+	for (i = 0; i < pdata->num_channels; i++)
+		hpb_dmae_chan_probe(hpbdev, i);
+
+	platform_set_drvdata(pdev, hpbdev);
+	err = dma_async_device_register(dma_dev);
+	if (!err)
+		return 0;
+
+	shdma_cleanup(&hpbdev->shdma_dev);
+error:
+	pm_runtime_disable(&pdev->dev);
+	return err;
+}
+
+static void hpb_dmae_chan_remove(struct hpb_dmae_device *hpbdev)
+{
+	struct dma_device *dma_dev = &hpbdev->shdma_dev.dma_dev;
+	struct shdma_chan *schan;
+	int i;
+
+	shdma_for_each_chan(schan, &hpbdev->shdma_dev, i) {
+		BUG_ON(!schan);
+
+		shdma_free_irq(schan);
+		shdma_chan_remove(schan);
+	}
+	dma_dev->chancnt = 0;
+}
+
+static int hpb_dmae_remove(struct platform_device *pdev)
+{
+	struct hpb_dmae_device *hpbdev = platform_get_drvdata(pdev);
+
+	dma_async_device_unregister(&hpbdev->shdma_dev.dma_dev);
+
+	pm_runtime_disable(&pdev->dev);
+
+	hpb_dmae_chan_remove(hpbdev);
+
+	return 0;
+}
+
+static void hpb_dmae_shutdown(struct platform_device *pdev)
+{
+	struct hpb_dmae_device *hpbdev = platform_get_drvdata(pdev);
+	hpb_dmae_ctl_stop(hpbdev);
+}
+
+static struct platform_driver hpb_dmae_driver = {
+	.probe		= hpb_dmae_probe,
+	.remove		= hpb_dmae_remove,
+	.shutdown	= hpb_dmae_shutdown,
+	.driver = {
+		.owner	= THIS_MODULE,
+		.name	= "hpb-dma-engine",
+	},
+};
+module_platform_driver(hpb_dmae_driver);
+
+MODULE_AUTHOR("Max Filippov <max.filippov@cogentembedded.com>");
+MODULE_DESCRIPTION("Renesas HPB DMA Engine driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/dma/sh/shdma-of.c b/drivers/dma/sh/shdma-of.c
index 11bcb05..966aaab 100644
--- a/drivers/dma/sh/shdma-of.c
+++ b/drivers/dma/sh/shdma-of.c
@@ -42,7 +42,7 @@
 
 static int shdma_of_probe(struct platform_device *pdev)
 {
-	const struct of_dev_auxdata *lookup = pdev->dev.platform_data;
+	const struct of_dev_auxdata *lookup = dev_get_platdata(&pdev->dev);
 	int ret;
 
 	if (!lookup)
diff --git a/drivers/dma/sh/shdma.c b/drivers/dma/sh/shdma.c
index b67f45f..b388b12 100644
--- a/drivers/dma/sh/shdma.c
+++ b/drivers/dma/sh/shdma.c
@@ -660,7 +660,7 @@
 
 static int sh_dmae_probe(struct platform_device *pdev)
 {
-	struct sh_dmae_pdata *pdata = pdev->dev.platform_data;
+	struct sh_dmae_pdata *pdata = dev_get_platdata(&pdev->dev);
 	unsigned long irqflags = IRQF_DISABLED,
 		chan_flag[SH_DMAE_MAX_CHANNELS] = {};
 	int errirq, chan_irq[SH_DMAE_MAX_CHANNELS];
diff --git a/drivers/dma/sh/sudmac.c b/drivers/dma/sh/sudmac.c
index e7c94bb..8369c6f0 100644
--- a/drivers/dma/sh/sudmac.c
+++ b/drivers/dma/sh/sudmac.c
@@ -335,7 +335,7 @@
 
 static int sudmac_probe(struct platform_device *pdev)
 {
-	struct sudmac_pdata *pdata = pdev->dev.platform_data;
+	struct sudmac_pdata *pdata = dev_get_platdata(&pdev->dev);
 	int err, i;
 	struct sudmac_device *su_dev;
 	struct dma_device *dma_dev;
@@ -345,9 +345,8 @@
 	if (!pdata)
 		return -ENODEV;
 
-	chan = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	irq_res = platform_get_resource(pdev, IORESOURCE_IRQ, 0);
-	if (!chan || !irq_res)
+	if (!irq_res)
 		return -ENODEV;
 
 	err = -ENOMEM;
@@ -360,9 +359,10 @@
 
 	dma_dev = &su_dev->shdma_dev.dma_dev;
 
-	su_dev->chan_reg = devm_request_and_ioremap(&pdev->dev, chan);
-	if (!su_dev->chan_reg)
-		return err;
+	chan = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	su_dev->chan_reg = devm_ioremap_resource(&pdev->dev, chan);
+	if (IS_ERR(su_dev->chan_reg))
+		return PTR_ERR(su_dev->chan_reg);
 
 	dma_cap_set(DMA_SLAVE, dma_dev->cap_mask);
 
@@ -373,7 +373,7 @@
 		return err;
 
 	/* platform data */
-	su_dev->pdata = pdev->dev.platform_data;
+	su_dev->pdata = dev_get_platdata(&pdev->dev);
 
 	platform_set_drvdata(pdev, su_dev);
 
diff --git a/drivers/dma/sirf-dma.c b/drivers/dma/sirf-dma.c
index 716b23e..6aec3ad 100644
--- a/drivers/dma/sirf-dma.c
+++ b/drivers/dma/sirf-dma.c
@@ -9,6 +9,7 @@
 #include <linux/module.h>
 #include <linux/dmaengine.h>
 #include <linux/dma-mapping.h>
+#include <linux/pm_runtime.h>
 #include <linux/interrupt.h>
 #include <linux/io.h>
 #include <linux/slab.h>
@@ -73,6 +74,11 @@
 	int				mode;
 };
 
+struct sirfsoc_dma_regs {
+	u32				ctrl[SIRFSOC_DMA_CHANNELS];
+	u32				interrupt_en;
+};
+
 struct sirfsoc_dma {
 	struct dma_device		dma;
 	struct tasklet_struct		tasklet;
@@ -81,10 +87,13 @@
 	int				irq;
 	struct clk			*clk;
 	bool				is_marco;
+	struct sirfsoc_dma_regs		regs_save;
 };
 
 #define DRV_NAME	"sirfsoc_dma"
 
+static int sirfsoc_dma_runtime_suspend(struct device *dev);
+
 /* Convert struct dma_chan to struct sirfsoc_dma_chan */
 static inline
 struct sirfsoc_dma_chan *dma_chan_to_sirfsoc_dma_chan(struct dma_chan *c)
@@ -393,6 +402,8 @@
 	LIST_HEAD(descs);
 	int i;
 
+	pm_runtime_get_sync(sdma->dma.dev);
+
 	/* Alloc descriptors for this channel */
 	for (i = 0; i < SIRFSOC_DMA_DESCRIPTORS; i++) {
 		sdesc = kzalloc(sizeof(*sdesc), GFP_KERNEL);
@@ -425,6 +436,7 @@
 static void sirfsoc_dma_free_chan_resources(struct dma_chan *chan)
 {
 	struct sirfsoc_dma_chan *schan = dma_chan_to_sirfsoc_dma_chan(chan);
+	struct sirfsoc_dma *sdma = dma_chan_to_sirfsoc_dma(chan);
 	struct sirfsoc_dma_desc *sdesc, *tmp;
 	unsigned long flags;
 	LIST_HEAD(descs);
@@ -445,6 +457,8 @@
 	/* Free descriptors */
 	list_for_each_entry_safe(sdesc, tmp, &descs, node)
 		kfree(sdesc);
+
+	pm_runtime_put(sdma->dma.dev);
 }
 
 /* Send pending descriptor to hardware */
@@ -595,7 +609,7 @@
 	spin_unlock_irqrestore(&schan->lock, iflags);
 
 	if (!sdesc)
-		return 0;
+		return NULL;
 
 	/* Place descriptor in prepared list */
 	spin_lock_irqsave(&schan->lock, iflags);
@@ -723,14 +737,14 @@
 
 	tasklet_init(&sdma->tasklet, sirfsoc_dma_tasklet, (unsigned long)sdma);
 
-	clk_prepare_enable(sdma->clk);
-
 	/* Register DMA engine */
 	dev_set_drvdata(dev, sdma);
+
 	ret = dma_async_device_register(dma);
 	if (ret)
 		goto free_irq;
 
+	pm_runtime_enable(&op->dev);
 	dev_info(dev, "initialized SIRFSOC DMAC driver\n");
 
 	return 0;
@@ -747,13 +761,124 @@
 	struct device *dev = &op->dev;
 	struct sirfsoc_dma *sdma = dev_get_drvdata(dev);
 
-	clk_disable_unprepare(sdma->clk);
 	dma_async_device_unregister(&sdma->dma);
 	free_irq(sdma->irq, sdma);
 	irq_dispose_mapping(sdma->irq);
+	pm_runtime_disable(&op->dev);
+	if (!pm_runtime_status_suspended(&op->dev))
+		sirfsoc_dma_runtime_suspend(&op->dev);
+
 	return 0;
 }
 
+static int sirfsoc_dma_runtime_suspend(struct device *dev)
+{
+	struct sirfsoc_dma *sdma = dev_get_drvdata(dev);
+
+	clk_disable_unprepare(sdma->clk);
+	return 0;
+}
+
+static int sirfsoc_dma_runtime_resume(struct device *dev)
+{
+	struct sirfsoc_dma *sdma = dev_get_drvdata(dev);
+	int ret;
+
+	ret = clk_prepare_enable(sdma->clk);
+	if (ret < 0) {
+		dev_err(dev, "clk_enable failed: %d\n", ret);
+		return ret;
+	}
+	return 0;
+}
+
+static int sirfsoc_dma_pm_suspend(struct device *dev)
+{
+	struct sirfsoc_dma *sdma = dev_get_drvdata(dev);
+	struct sirfsoc_dma_regs *save = &sdma->regs_save;
+	struct sirfsoc_dma_desc *sdesc;
+	struct sirfsoc_dma_chan *schan;
+	int ch;
+	int ret;
+
+	/*
+	 * if we were runtime-suspended before, resume to enable clock
+	 * before accessing register
+	 */
+	if (pm_runtime_status_suspended(dev)) {
+		ret = sirfsoc_dma_runtime_resume(dev);
+		if (ret < 0)
+			return ret;
+	}
+
+	/*
+	 * DMA controller will lose all registers while suspending
+	 * so we need to save registers for active channels
+	 */
+	for (ch = 0; ch < SIRFSOC_DMA_CHANNELS; ch++) {
+		schan = &sdma->channels[ch];
+		if (list_empty(&schan->active))
+			continue;
+		sdesc = list_first_entry(&schan->active,
+			struct sirfsoc_dma_desc,
+			node);
+		save->ctrl[ch] = readl_relaxed(sdma->base +
+			ch * 0x10 + SIRFSOC_DMA_CH_CTRL);
+	}
+	save->interrupt_en = readl_relaxed(sdma->base + SIRFSOC_DMA_INT_EN);
+
+	/* Disable clock */
+	sirfsoc_dma_runtime_suspend(dev);
+
+	return 0;
+}
+
+static int sirfsoc_dma_pm_resume(struct device *dev)
+{
+	struct sirfsoc_dma *sdma = dev_get_drvdata(dev);
+	struct sirfsoc_dma_regs *save = &sdma->regs_save;
+	struct sirfsoc_dma_desc *sdesc;
+	struct sirfsoc_dma_chan *schan;
+	int ch;
+	int ret;
+
+	/* Enable clock before accessing register */
+	ret = sirfsoc_dma_runtime_resume(dev);
+	if (ret < 0)
+		return ret;
+
+	writel_relaxed(save->interrupt_en, sdma->base + SIRFSOC_DMA_INT_EN);
+	for (ch = 0; ch < SIRFSOC_DMA_CHANNELS; ch++) {
+		schan = &sdma->channels[ch];
+		if (list_empty(&schan->active))
+			continue;
+		sdesc = list_first_entry(&schan->active,
+			struct sirfsoc_dma_desc,
+			node);
+		writel_relaxed(sdesc->width,
+			sdma->base + SIRFSOC_DMA_WIDTH_0 + ch * 4);
+		writel_relaxed(sdesc->xlen,
+			sdma->base + ch * 0x10 + SIRFSOC_DMA_CH_XLEN);
+		writel_relaxed(sdesc->ylen,
+			sdma->base + ch * 0x10 + SIRFSOC_DMA_CH_YLEN);
+		writel_relaxed(save->ctrl[ch],
+			sdma->base + ch * 0x10 + SIRFSOC_DMA_CH_CTRL);
+		writel_relaxed(sdesc->addr >> 2,
+			sdma->base + ch * 0x10 + SIRFSOC_DMA_CH_ADDR);
+	}
+
+	/* if we were runtime-suspended before, suspend again */
+	if (pm_runtime_status_suspended(dev))
+		sirfsoc_dma_runtime_suspend(dev);
+
+	return 0;
+}
+
+static const struct dev_pm_ops sirfsoc_dma_pm_ops = {
+	SET_RUNTIME_PM_OPS(sirfsoc_dma_runtime_suspend, sirfsoc_dma_runtime_resume, NULL)
+	SET_SYSTEM_SLEEP_PM_OPS(sirfsoc_dma_pm_suspend, sirfsoc_dma_pm_resume)
+};
+
 static struct of_device_id sirfsoc_dma_match[] = {
 	{ .compatible = "sirf,prima2-dmac", },
 	{ .compatible = "sirf,marco-dmac", },
@@ -766,6 +891,7 @@
 	.driver = {
 		.name = DRV_NAME,
 		.owner = THIS_MODULE,
+		.pm = &sirfsoc_dma_pm_ops,
 		.of_match_table	= sirfsoc_dma_match,
 	},
 };
diff --git a/drivers/dma/ste_dma40.c b/drivers/dma/ste_dma40.c
index 5ab5880..82d2b97 100644
--- a/drivers/dma/ste_dma40.c
+++ b/drivers/dma/ste_dma40.c
@@ -2591,6 +2591,9 @@
 	int i;
 
 	sg = kcalloc(periods + 1, sizeof(struct scatterlist), GFP_NOWAIT);
+	if (!sg)
+		return NULL;
+
 	for (i = 0; i < periods; i++) {
 		sg_dma_address(&sg[i]) = dma_addr;
 		sg_dma_len(&sg[i]) = period_len;
@@ -3139,7 +3142,7 @@
 
 static struct d40_base * __init d40_hw_detect_init(struct platform_device *pdev)
 {
-	struct stedma40_platform_data *plat_data = pdev->dev.platform_data;
+	struct stedma40_platform_data *plat_data = dev_get_platdata(&pdev->dev);
 	struct clk *clk = NULL;
 	void __iomem *virtbase = NULL;
 	struct resource *res = NULL;
@@ -3226,8 +3229,8 @@
 	num_log_chans = num_phy_chans * D40_MAX_LOG_CHAN_PER_PHY;
 
 	dev_info(&pdev->dev,
-		 "hardware rev: %d @ 0x%x with %d physical and %d logical channels\n",
-		 rev, res->start, num_phy_chans, num_log_chans);
+		 "hardware rev: %d @ %pa with %d physical and %d logical channels\n",
+		 rev, &res->start, num_phy_chans, num_log_chans);
 
 	base = kzalloc(ALIGN(sizeof(struct d40_base), 4) +
 		       (num_phy_chans + num_log_chans + num_memcpy_chans) *
@@ -3485,7 +3488,7 @@
 {
 	struct stedma40_platform_data *pdata;
 	int num_phy = 0, num_memcpy = 0, num_disabled = 0;
-	const const __be32 *list;
+	const __be32 *list;
 
 	pdata = devm_kzalloc(&pdev->dev,
 			     sizeof(struct stedma40_platform_data),
@@ -3516,7 +3519,7 @@
 	list = of_get_property(np, "disabled-channels", &num_disabled);
 	num_disabled /= sizeof(*list);
 
-	if (num_disabled > STEDMA40_MAX_PHYS || num_disabled < 0) {
+	if (num_disabled >= STEDMA40_MAX_PHYS || num_disabled < 0) {
 		d40_err(&pdev->dev,
 			"Invalid number of disabled channels specified (%d)\n",
 			num_disabled);
@@ -3535,7 +3538,7 @@
 
 static int __init d40_probe(struct platform_device *pdev)
 {
-	struct stedma40_platform_data *plat_data = pdev->dev.platform_data;
+	struct stedma40_platform_data *plat_data = dev_get_platdata(&pdev->dev);
 	struct device_node *np = pdev->dev.of_node;
 	int ret = -ENOENT;
 	struct d40_base *base = NULL;
@@ -3579,9 +3582,7 @@
 	if (request_mem_region(res->start, resource_size(res),
 			       D40_NAME " I/O lcpa") == NULL) {
 		ret = -EBUSY;
-		d40_err(&pdev->dev,
-			"Failed to request LCPA region 0x%x-0x%x\n",
-			res->start, res->end);
+		d40_err(&pdev->dev, "Failed to request LCPA region %pR\n", res);
 		goto failure;
 	}
 
@@ -3589,8 +3590,8 @@
 	val = readl(base->virtbase + D40_DREG_LCPA);
 	if (res->start != val && val != 0) {
 		dev_warn(&pdev->dev,
-			 "[%s] Mismatch LCPA dma 0x%x, def 0x%x\n",
-			 __func__, val, res->start);
+			 "[%s] Mismatch LCPA dma 0x%x, def %pa\n",
+			 __func__, val, &res->start);
 	} else
 		writel(res->start, base->virtbase + D40_DREG_LCPA);
 
diff --git a/drivers/dma/tegra20-apb-dma.c b/drivers/dma/tegra20-apb-dma.c
index f137914..5d4986e 100644
--- a/drivers/dma/tegra20-apb-dma.c
+++ b/drivers/dma/tegra20-apb-dma.c
@@ -767,13 +767,11 @@
 	unsigned long flags;
 	unsigned int residual;
 
-	spin_lock_irqsave(&tdc->lock, flags);
-
 	ret = dma_cookie_status(dc, cookie, txstate);
-	if (ret == DMA_SUCCESS) {
-		spin_unlock_irqrestore(&tdc->lock, flags);
+	if (ret == DMA_SUCCESS)
 		return ret;
-	}
+
+	spin_lock_irqsave(&tdc->lock, flags);
 
 	/* Check on wait_ack desc status */
 	list_for_each_entry(dma_desc, &tdc->free_dma_desc, node) {
diff --git a/drivers/dma/timb_dma.c b/drivers/dma/timb_dma.c
index 0ef43c1..28af214 100644
--- a/drivers/dma/timb_dma.c
+++ b/drivers/dma/timb_dma.c
@@ -669,7 +669,7 @@
 
 static int td_probe(struct platform_device *pdev)
 {
-	struct timb_dma_platform_data *pdata = pdev->dev.platform_data;
+	struct timb_dma_platform_data *pdata = dev_get_platdata(&pdev->dev);
 	struct timb_dma *td;
 	struct resource *iomem;
 	int irq;
diff --git a/drivers/dma/txx9dmac.c b/drivers/dma/txx9dmac.c
index a59fb48..71e8e77 100644
--- a/drivers/dma/txx9dmac.c
+++ b/drivers/dma/txx9dmac.c
@@ -962,15 +962,14 @@
 	enum dma_status ret;
 
 	ret = dma_cookie_status(chan, cookie, txstate);
-	if (ret != DMA_SUCCESS) {
-		spin_lock_bh(&dc->lock);
-		txx9dmac_scan_descriptors(dc);
-		spin_unlock_bh(&dc->lock);
+	if (ret == DMA_SUCCESS)
+		return DMA_SUCCESS;
 
-		ret = dma_cookie_status(chan, cookie, txstate);
-	}
+	spin_lock_bh(&dc->lock);
+	txx9dmac_scan_descriptors(dc);
+	spin_unlock_bh(&dc->lock);
 
-	return ret;
+	return dma_cookie_status(chan, cookie, txstate);
 }
 
 static void txx9dmac_chain_dynamic(struct txx9dmac_chan *dc,
@@ -1118,9 +1117,10 @@
 
 static int __init txx9dmac_chan_probe(struct platform_device *pdev)
 {
-	struct txx9dmac_chan_platform_data *cpdata = pdev->dev.platform_data;
+	struct txx9dmac_chan_platform_data *cpdata =
+			dev_get_platdata(&pdev->dev);
 	struct platform_device *dmac_dev = cpdata->dmac_dev;
-	struct txx9dmac_platform_data *pdata = dmac_dev->dev.platform_data;
+	struct txx9dmac_platform_data *pdata = dev_get_platdata(&dmac_dev->dev);
 	struct txx9dmac_chan *dc;
 	int err;
 	int ch = pdev->id % TXX9_DMA_MAX_NR_CHANNELS;
@@ -1203,7 +1203,7 @@
 
 static int __init txx9dmac_probe(struct platform_device *pdev)
 {
-	struct txx9dmac_platform_data *pdata = pdev->dev.platform_data;
+	struct txx9dmac_platform_data *pdata = dev_get_platdata(&pdev->dev);
 	struct resource *io;
 	struct txx9dmac_dev *ddev;
 	u32 mcr;
@@ -1282,7 +1282,7 @@
 {
 	struct platform_device *pdev = to_platform_device(dev);
 	struct txx9dmac_dev *ddev = platform_get_drvdata(pdev);
-	struct txx9dmac_platform_data *pdata = pdev->dev.platform_data;
+	struct txx9dmac_platform_data *pdata = dev_get_platdata(&pdev->dev);
 	u32 mcr;
 
 	mcr = TXX9_DMA_MCR_MSTEN | MCR_LE;
diff --git a/include/linux/amba/pl080.h b/include/linux/amba/pl080.h
index 3e7b62f..91b84a7 100644
--- a/include/linux/amba/pl080.h
+++ b/include/linux/amba/pl080.h
@@ -87,6 +87,7 @@
 #define PL080_CONTROL_SB_SIZE_MASK		(0x7 << 12)
 #define PL080_CONTROL_SB_SIZE_SHIFT		(12)
 #define PL080_CONTROL_TRANSFER_SIZE_MASK	(0xfff << 0)
+#define PL080S_CONTROL_TRANSFER_SIZE_MASK	(0x1ffffff << 0)
 #define PL080_CONTROL_TRANSFER_SIZE_SHIFT	(0)
 
 #define PL080_BSIZE_1				(0x0)
diff --git a/include/linux/dma/mmp-pdma.h b/include/linux/dma/mmp-pdma.h
new file mode 100644
index 0000000..2dc9b2b
--- /dev/null
+++ b/include/linux/dma/mmp-pdma.h
@@ -0,0 +1,15 @@
+#ifndef _MMP_PDMA_H_
+#define _MMP_PDMA_H_
+
+struct dma_chan;
+
+#ifdef CONFIG_MMP_PDMA
+bool mmp_pdma_filter_fn(struct dma_chan *chan, void *param);
+#else
+static inline bool mmp_pdma_filter_fn(struct dma_chan *chan, void *param)
+{
+	return false;
+}
+#endif
+
+#endif /* _MMP_PDMA_H_ */
diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
index c271608..13ac4f5 100644
--- a/include/linux/dmaengine.h
+++ b/include/linux/dmaengine.h
@@ -370,6 +370,33 @@
 	unsigned int slave_id;
 };
 
+/* struct dma_slave_caps - expose capabilities of a slave channel only
+ *
+ * @src_addr_widths: bit mask of src addr widths the channel supports
+ * @dstn_addr_widths: bit mask of dstn addr widths the channel supports
+ * @directions: bit mask of slave direction the channel supported
+ * 	since the enum dma_transfer_direction is not defined as bits for each
+ * 	type of direction, the dma controller should fill (1 << <TYPE>) and same
+ * 	should be checked by controller as well
+ * @cmd_pause: true, if pause and thereby resume is supported
+ * @cmd_terminate: true, if terminate cmd is supported
+ *
+ * @max_sg_nr: maximum number of SG segments supported
+ * 	0 for no maximum
+ * @max_sg_len: maximum length of a SG segment supported
+ * 	0 for no maximum
+ */
+struct dma_slave_caps {
+	u32 src_addr_widths;
+	u32 dstn_addr_widths;
+	u32 directions;
+	bool cmd_pause;
+	bool cmd_terminate;
+
+	u32 max_sg_nr;
+	u32 max_sg_len;
+};
+
 static inline const char *dma_chan_name(struct dma_chan *chan)
 {
 	return dev_name(&chan->dev->device);
@@ -532,6 +559,7 @@
  *	struct with auxiliary transfer status information, otherwise the call
  *	will just return a simple status code
  * @device_issue_pending: push pending transactions to hardware
+ * @device_slave_caps: return the slave channel capabilities
  */
 struct dma_device {
 
@@ -597,6 +625,7 @@
 					    dma_cookie_t cookie,
 					    struct dma_tx_state *txstate);
 	void (*device_issue_pending)(struct dma_chan *chan);
+	int (*device_slave_caps)(struct dma_chan *chan, struct dma_slave_caps *caps);
 };
 
 static inline int dmaengine_device_control(struct dma_chan *chan,
@@ -670,6 +699,21 @@
 	return chan->device->device_prep_interleaved_dma(chan, xt, flags);
 }
 
+static inline int dma_get_slave_caps(struct dma_chan *chan, struct dma_slave_caps *caps)
+{
+	if (!chan || !caps)
+		return -EINVAL;
+
+	/* check if the channel supports slave transactions */
+	if (!test_bit(DMA_SLAVE, chan->device->cap_mask.bits))
+		return -ENXIO;
+
+	if (chan->device->device_slave_caps)
+		return chan->device->device_slave_caps(chan, caps);
+
+	return -ENXIO;
+}
+
 static inline int dmaengine_terminate_all(struct dma_chan *chan)
 {
 	return dmaengine_device_control(chan, DMA_TERMINATE_ALL, 0);
diff --git a/include/linux/fsl/mxs-dma.h b/include/linux/fsl/mxs-dma.h
deleted file mode 100644
index 55d8702..0000000
--- a/include/linux/fsl/mxs-dma.h
+++ /dev/null
@@ -1,20 +0,0 @@
-/*
- * Copyright 2011 Freescale Semiconductor, Inc. All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#ifndef __MACH_MXS_DMA_H__
-#define __MACH_MXS_DMA_H__
-
-#include <linux/dmaengine.h>
-
-struct mxs_dma_data {
-	int chan_irq;
-};
-
-extern int mxs_dma_is_apbh(struct dma_chan *chan);
-extern int mxs_dma_is_apbx(struct dma_chan *chan);
-#endif /* __MACH_MXS_DMA_H__ */
diff --git a/include/linux/platform_data/dma-rcar-hpbdma.h b/include/linux/platform_data/dma-rcar-hpbdma.h
new file mode 100644
index 0000000..648b8ea
--- /dev/null
+++ b/include/linux/platform_data/dma-rcar-hpbdma.h
@@ -0,0 +1,103 @@
+/*
+ * Copyright (C) 2011-2013 Renesas Electronics Corporation
+ * Copyright (C) 2013 Cogent Embedded, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation.
+ */
+
+#ifndef __DMA_RCAR_HPBDMA_H
+#define __DMA_RCAR_HPBDMA_H
+
+#include <linux/bitops.h>
+#include <linux/types.h>
+
+/* Transmit sizes and respective register values */
+enum {
+	XMIT_SZ_8BIT	= 0,
+	XMIT_SZ_16BIT	= 1,
+	XMIT_SZ_32BIT	= 2,
+	XMIT_SZ_MAX
+};
+
+/* DMA control register (DCR) bits */
+#define HPB_DMAE_DCR_DTAMD		(1u << 26)
+#define HPB_DMAE_DCR_DTAC		(1u << 25)
+#define HPB_DMAE_DCR_DTAU		(1u << 24)
+#define HPB_DMAE_DCR_DTAU1		(1u << 23)
+#define HPB_DMAE_DCR_SWMD		(1u << 22)
+#define HPB_DMAE_DCR_BTMD		(1u << 21)
+#define HPB_DMAE_DCR_PKMD		(1u << 20)
+#define HPB_DMAE_DCR_CT			(1u << 18)
+#define HPB_DMAE_DCR_ACMD		(1u << 17)
+#define HPB_DMAE_DCR_DIP		(1u << 16)
+#define HPB_DMAE_DCR_SMDL		(1u << 13)
+#define HPB_DMAE_DCR_SPDAM		(1u << 12)
+#define HPB_DMAE_DCR_SDRMD_MASK		(3u << 10)
+#define HPB_DMAE_DCR_SDRMD_MOD		(0u << 10)
+#define HPB_DMAE_DCR_SDRMD_AUTO		(1u << 10)
+#define HPB_DMAE_DCR_SDRMD_TIMER	(2u << 10)
+#define HPB_DMAE_DCR_SPDS_MASK		(3u << 8)
+#define HPB_DMAE_DCR_SPDS_8BIT		(0u << 8)
+#define HPB_DMAE_DCR_SPDS_16BIT		(1u << 8)
+#define HPB_DMAE_DCR_SPDS_32BIT		(2u << 8)
+#define HPB_DMAE_DCR_DMDL		(1u << 5)
+#define HPB_DMAE_DCR_DPDAM		(1u << 4)
+#define HPB_DMAE_DCR_DDRMD_MASK		(3u << 2)
+#define HPB_DMAE_DCR_DDRMD_MOD		(0u << 2)
+#define HPB_DMAE_DCR_DDRMD_AUTO		(1u << 2)
+#define HPB_DMAE_DCR_DDRMD_TIMER	(2u << 2)
+#define HPB_DMAE_DCR_DPDS_MASK		(3u << 0)
+#define HPB_DMAE_DCR_DPDS_8BIT		(0u << 0)
+#define HPB_DMAE_DCR_DPDS_16BIT		(1u << 0)
+#define HPB_DMAE_DCR_DPDS_32BIT		(2u << 0)
+
+/* Asynchronous reset register (ASYNCRSTR) bits */
+#define HPB_DMAE_ASYNCRSTR_ASRST41	BIT(10)
+#define HPB_DMAE_ASYNCRSTR_ASRST40	BIT(9)
+#define HPB_DMAE_ASYNCRSTR_ASRST39	BIT(8)
+#define HPB_DMAE_ASYNCRSTR_ASRST27	BIT(7)
+#define HPB_DMAE_ASYNCRSTR_ASRST26	BIT(6)
+#define HPB_DMAE_ASYNCRSTR_ASRST25	BIT(5)
+#define HPB_DMAE_ASYNCRSTR_ASRST24	BIT(4)
+#define HPB_DMAE_ASYNCRSTR_ASRST23	BIT(3)
+#define HPB_DMAE_ASYNCRSTR_ASRST22	BIT(2)
+#define HPB_DMAE_ASYNCRSTR_ASRST21	BIT(1)
+#define HPB_DMAE_ASYNCRSTR_ASRST20	BIT(0)
+
+struct hpb_dmae_slave_config {
+	unsigned int	id;
+	dma_addr_t	addr;
+	u32		dcr;
+	u32		port;
+	u32		rstr;
+	u32		mdr;
+	u32		mdm;
+	u32		flags;
+#define	HPB_DMAE_SET_ASYNC_RESET	BIT(0)
+#define	HPB_DMAE_SET_ASYNC_MODE		BIT(1)
+	u32		dma_ch;
+};
+
+#define HPB_DMAE_CHANNEL(_irq, _s_id)	\
+{					\
+	.ch_irq		= _irq,		\
+	.s_id		= _s_id,	\
+}
+
+struct hpb_dmae_channel {
+	unsigned int	ch_irq;
+	unsigned int	s_id;
+};
+
+struct hpb_dmae_pdata {
+	const struct hpb_dmae_slave_config *slaves;
+	int num_slaves;
+	const struct hpb_dmae_channel *channels;
+	int num_channels;
+	const unsigned int ts_shift[XMIT_SZ_MAX];
+	int num_hw_channels;
+};
+
+#endif