Merge branches 'dma40', 'pl08x', 'fsldma', 'imx' and 'intel-mid' into dmaengine
diff --git a/arch/arm/mach-imx/include/mach/dma-v1.h b/arch/arm/mach-imx/include/mach/dma-v1.h
index 287431c..ac6fd71 100644
--- a/arch/arm/mach-imx/include/mach/dma-v1.h
+++ b/arch/arm/mach-imx/include/mach/dma-v1.h
@@ -27,6 +27,8 @@
 
 #define imx_has_dma_v1()	(cpu_is_mx1() || cpu_is_mx21() || cpu_is_mx27())
 
+#include <mach/dma.h>
+
 #define IMX_DMA_CHANNELS  16
 
 #define DMA_MODE_READ		0
@@ -96,12 +98,6 @@
 
 void imx_dma_free(int channel);
 
-enum imx_dma_prio {
-	DMA_PRIO_HIGH = 0,
-	DMA_PRIO_MEDIUM = 1,
-	DMA_PRIO_LOW = 2
-};
-
 int imx_dma_request_by_prio(const char *name, enum imx_dma_prio prio);
 
 #endif	/* __MACH_DMA_V1_H__ */
diff --git a/arch/arm/mach-ux500/devices-db8500.c b/arch/arm/mach-ux500/devices-db8500.c
index 9280d25..58b3e72 100644
--- a/arch/arm/mach-ux500/devices-db8500.c
+++ b/arch/arm/mach-ux500/devices-db8500.c
@@ -193,7 +193,6 @@
 	.memcpy_len = ARRAY_SIZE(dma40_memcpy_event),
 	.memcpy_conf_phy = &dma40_memcpy_conf_phy,
 	.memcpy_conf_log = &dma40_memcpy_conf_log,
-	.llis_per_log = 8,
 	.disabled_channels = {-1},
 };
 
diff --git a/arch/arm/plat-mxc/include/mach/dma.h b/arch/arm/plat-mxc/include/mach/dma.h
new file mode 100644
index 0000000..ef77515
--- /dev/null
+++ b/arch/arm/plat-mxc/include/mach/dma.h
@@ -0,0 +1,67 @@
+/*
+ * Copyright 2004-2009 Freescale Semiconductor, Inc. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef __ASM_ARCH_MXC_DMA_H__
+#define __ASM_ARCH_MXC_DMA_H__
+
+#include <linux/scatterlist.h>
+#include <linux/device.h>
+#include <linux/dmaengine.h>
+
+/*
+ * This enumerates peripheral types. Used for SDMA.
+ */
+enum sdma_peripheral_type {
+	IMX_DMATYPE_SSI,	/* MCU domain SSI */
+	IMX_DMATYPE_SSI_SP,	/* Shared SSI */
+	IMX_DMATYPE_MMC,	/* MMC */
+	IMX_DMATYPE_SDHC,	/* SDHC */
+	IMX_DMATYPE_UART,	/* MCU domain UART */
+	IMX_DMATYPE_UART_SP,	/* Shared UART */
+	IMX_DMATYPE_FIRI,	/* FIRI */
+	IMX_DMATYPE_CSPI,	/* MCU domain CSPI */
+	IMX_DMATYPE_CSPI_SP,	/* Shared CSPI */
+	IMX_DMATYPE_SIM,	/* SIM */
+	IMX_DMATYPE_ATA,	/* ATA */
+	IMX_DMATYPE_CCM,	/* CCM */
+	IMX_DMATYPE_EXT,	/* External peripheral */
+	IMX_DMATYPE_MSHC,	/* Memory Stick Host Controller */
+	IMX_DMATYPE_MSHC_SP,	/* Shared Memory Stick Host Controller */
+	IMX_DMATYPE_DSP,	/* DSP */
+	IMX_DMATYPE_MEMORY,	/* Memory */
+	IMX_DMATYPE_FIFO_MEMORY,/* FIFO type Memory */
+	IMX_DMATYPE_SPDIF,	/* SPDIF */
+	IMX_DMATYPE_IPU_MEMORY,	/* IPU Memory */
+	IMX_DMATYPE_ASRC,	/* ASRC */
+	IMX_DMATYPE_ESAI,	/* ESAI */
+};
+
+enum imx_dma_prio {
+	DMA_PRIO_HIGH = 0,
+	DMA_PRIO_MEDIUM = 1,
+	DMA_PRIO_LOW = 2
+};
+
+struct imx_dma_data {
+	int dma_request; /* DMA request line */
+	enum sdma_peripheral_type peripheral_type;
+	int priority;
+};
+
+static inline int imx_dma_is_ipu(struct dma_chan *chan)
+{
+	return !strcmp(dev_name(chan->device->dev), "ipu-core");
+}
+
+static inline int imx_dma_is_general_purpose(struct dma_chan *chan)
+{
+	return !strcmp(dev_name(chan->device->dev), "imx-sdma") ||
+		!strcmp(dev_name(chan->device->dev), "imx-dma");
+}
+
+#endif
diff --git a/arch/arm/plat-mxc/include/mach/sdma.h b/arch/arm/plat-mxc/include/mach/sdma.h
new file mode 100644
index 0000000..9be1122
--- /dev/null
+++ b/arch/arm/plat-mxc/include/mach/sdma.h
@@ -0,0 +1,17 @@
+#ifndef __MACH_MXC_SDMA_H__
+#define __MACH_MXC_SDMA_H__
+
+/**
+ * struct sdma_platform_data - platform specific data for SDMA engine
+ *
+ * @sdma_version	The version of this SDMA engine
+ * @cpu_name		used to generate the firmware name
+ * @to_version		CPU Tape out version
+ */
+struct sdma_platform_data {
+	int sdma_version;
+	char *cpu_name;
+	int to_version;
+};
+
+#endif /* __MACH_MXC_SDMA_H__ */
diff --git a/arch/arm/plat-nomadik/include/plat/ste_dma40.h b/arch/arm/plat-nomadik/include/plat/ste_dma40.h
index 5fbde4b..3dd4255 100644
--- a/arch/arm/plat-nomadik/include/plat/ste_dma40.h
+++ b/arch/arm/plat-nomadik/include/plat/ste_dma40.h
@@ -1,10 +1,8 @@
 /*
- * arch/arm/plat-nomadik/include/plat/ste_dma40.h
- *
- * Copyright (C) ST-Ericsson 2007-2010
+ * Copyright (C) ST-Ericsson SA 2007-2010
+ * Author: Per Forlin <per.forlin@stericsson.com> for ST-Ericsson
+ * Author: Jonas Aaberg <jonas.aberg@stericsson.com> for ST-Ericsson
  * License terms: GNU General Public License (GPL) version 2
- * Author: Per Friden <per.friden@stericsson.com>
- * Author: Jonas Aaberg <jonas.aberg@stericsson.com>
  */
 
 
@@ -73,6 +71,9 @@
 #define STEDMA40_PSIZE_LOG_8  STEDMA40_PSIZE_PHY_8
 #define STEDMA40_PSIZE_LOG_16 STEDMA40_PSIZE_PHY_16
 
+/* Maximum number of possible physical channels */
+#define STEDMA40_MAX_PHYS 32
+
 enum stedma40_flow_ctrl {
 	STEDMA40_NO_FLOW_CTRL,
 	STEDMA40_FLOW_CTRL,
@@ -90,6 +91,22 @@
 	STEDMA40_DOUBLEWORD_WIDTH = STEDMA40_ESIZE_64_BIT
 };
 
+enum stedma40_xfer_dir {
+	STEDMA40_MEM_TO_MEM = 1,
+	STEDMA40_MEM_TO_PERIPH,
+	STEDMA40_PERIPH_TO_MEM,
+	STEDMA40_PERIPH_TO_PERIPH
+};
+
+
+/**
+ * struct stedma40_chan_cfg - dst/src channel configuration
+ *
+ * @endianess: Endianess of the src/dst hardware
+ * @data_width: Data width of the src/dst hardware
+ * @p_size: Burst size
+ * @flow_ctrl: Flow control on/off.
+ */
 struct stedma40_half_channel_info {
 	enum stedma40_endianess endianess;
 	enum stedma40_periph_data_width data_width;
@@ -97,14 +114,6 @@
 	enum stedma40_flow_ctrl flow_ctrl;
 };
 
-enum stedma40_xfer_dir {
-	STEDMA40_MEM_TO_MEM,
-	STEDMA40_MEM_TO_PERIPH,
-	STEDMA40_PERIPH_TO_MEM,
-	STEDMA40_PERIPH_TO_PERIPH
-};
-
-
 /**
  * struct stedma40_chan_cfg - Structure to be filled by client drivers.
  *
@@ -114,10 +123,6 @@
  * @dst_dev_type: Dst device type
  * @src_info: Parameters for dst half channel
  * @dst_info: Parameters for dst half channel
- * @pre_transfer_data: Data to be passed on to the pre_transfer() function.
- * @pre_transfer: Callback used if needed before preparation of transfer.
- * Only called if device is set. size of bytes to transfer
- * (in case of multiple element transfer size is size of the first element).
  *
  *
  * This structure has to be filled by the client drivers.
@@ -131,10 +136,6 @@
 	int					 dst_dev_type;
 	struct stedma40_half_channel_info	 src_info;
 	struct stedma40_half_channel_info	 dst_info;
-	void					*pre_transfer_data;
-	int (*pre_transfer)			(struct dma_chan *chan,
-						 void *data,
-						 int size);
 };
 
 /**
@@ -147,7 +148,6 @@
  * @memcpy_len: length of memcpy
  * @memcpy_conf_phy: default configuration of physical channel memcpy
  * @memcpy_conf_log: default configuration of logical channel memcpy
- * @llis_per_log: number of max linked list items per logical channel
  * @disabled_channels: A vector, ending with -1, that marks physical channels
  * that are for different reasons not available for the driver.
  */
@@ -159,23 +159,10 @@
 	u32				 memcpy_len;
 	struct stedma40_chan_cfg	*memcpy_conf_phy;
 	struct stedma40_chan_cfg	*memcpy_conf_log;
-	unsigned int			 llis_per_log;
-	int				 disabled_channels[8];
+	int				 disabled_channels[STEDMA40_MAX_PHYS];
 };
 
-/**
- * setdma40_set_psize() - Used for changing the package size of an
- * already configured dma channel.
- *
- * @chan: dmaengine handle
- * @src_psize: new package side for src. (STEDMA40_PSIZE*)
- * @src_psize: new package side for dst. (STEDMA40_PSIZE*)
- *
- * returns 0 on ok, otherwise negative error number.
- */
-int stedma40_set_psize(struct dma_chan *chan,
-		       int src_psize,
-		       int dst_psize);
+#ifdef CONFIG_STE_DMA40
 
 /**
  * stedma40_filter() - Provides stedma40_chan_cfg to the
@@ -238,4 +225,21 @@
 						  direction, flags);
 }
 
+#else
+static inline bool stedma40_filter(struct dma_chan *chan, void *data)
+{
+	return false;
+}
+
+static inline struct
+dma_async_tx_descriptor *stedma40_slave_mem(struct dma_chan *chan,
+					    dma_addr_t addr,
+					    unsigned int size,
+					    enum dma_data_direction direction,
+					    unsigned long flags)
+{
+	return NULL;
+}
+#endif
+
 #endif
diff --git a/arch/powerpc/include/asm/fsldma.h b/arch/powerpc/include/asm/fsldma.h
deleted file mode 100644
index debc5ed..0000000
--- a/arch/powerpc/include/asm/fsldma.h
+++ /dev/null
@@ -1,137 +0,0 @@
-/*
- * Freescale MPC83XX / MPC85XX DMA Controller
- *
- * Copyright (c) 2009 Ira W. Snyder <iws@ovro.caltech.edu>
- *
- * This file is licensed under the terms of the GNU General Public License
- * version 2. This program is licensed "as is" without any warranty of any
- * kind, whether express or implied.
- */
-
-#ifndef __ARCH_POWERPC_ASM_FSLDMA_H__
-#define __ARCH_POWERPC_ASM_FSLDMA_H__
-
-#include <linux/slab.h>
-#include <linux/dmaengine.h>
-
-/*
- * Definitions for the Freescale DMA controller's DMA_SLAVE implemention
- *
- * The Freescale DMA_SLAVE implementation was designed to handle many-to-many
- * transfers. An example usage would be an accelerated copy between two
- * scatterlists. Another example use would be an accelerated copy from
- * multiple non-contiguous device buffers into a single scatterlist.
- *
- * A DMA_SLAVE transaction is defined by a struct fsl_dma_slave. This
- * structure contains a list of hardware addresses that should be copied
- * to/from the scatterlist passed into device_prep_slave_sg(). The structure
- * also has some fields to enable hardware-specific features.
- */
-
-/**
- * struct fsl_dma_hw_addr
- * @entry: linked list entry
- * @address: the hardware address
- * @length: length to transfer
- *
- * Holds a single physical hardware address / length pair for use
- * with the DMAEngine DMA_SLAVE API.
- */
-struct fsl_dma_hw_addr {
-	struct list_head entry;
-
-	dma_addr_t address;
-	size_t length;
-};
-
-/**
- * struct fsl_dma_slave
- * @addresses: a linked list of struct fsl_dma_hw_addr structures
- * @request_count: value for DMA request count
- * @src_loop_size: setup and enable constant source-address DMA transfers
- * @dst_loop_size: setup and enable constant destination address DMA transfers
- * @external_start: enable externally started DMA transfers
- * @external_pause: enable externally paused DMA transfers
- *
- * Holds a list of address / length pairs for use with the DMAEngine
- * DMA_SLAVE API implementation for the Freescale DMA controller.
- */
-struct fsl_dma_slave {
-
-	/* List of hardware address/length pairs */
-	struct list_head addresses;
-
-	/* Support for extra controller features */
-	unsigned int request_count;
-	unsigned int src_loop_size;
-	unsigned int dst_loop_size;
-	bool external_start;
-	bool external_pause;
-};
-
-/**
- * fsl_dma_slave_append - add an address/length pair to a struct fsl_dma_slave
- * @slave: the &struct fsl_dma_slave to add to
- * @address: the hardware address to add
- * @length: the length of bytes to transfer from @address
- *
- * Add a hardware address/length pair to a struct fsl_dma_slave. Returns 0 on
- * success, -ERRNO otherwise.
- */
-static inline int fsl_dma_slave_append(struct fsl_dma_slave *slave,
-				       dma_addr_t address, size_t length)
-{
-	struct fsl_dma_hw_addr *addr;
-
-	addr = kzalloc(sizeof(*addr), GFP_ATOMIC);
-	if (!addr)
-		return -ENOMEM;
-
-	INIT_LIST_HEAD(&addr->entry);
-	addr->address = address;
-	addr->length = length;
-
-	list_add_tail(&addr->entry, &slave->addresses);
-	return 0;
-}
-
-/**
- * fsl_dma_slave_free - free a struct fsl_dma_slave
- * @slave: the struct fsl_dma_slave to free
- *
- * Free a struct fsl_dma_slave and all associated address/length pairs
- */
-static inline void fsl_dma_slave_free(struct fsl_dma_slave *slave)
-{
-	struct fsl_dma_hw_addr *addr, *tmp;
-
-	if (slave) {
-		list_for_each_entry_safe(addr, tmp, &slave->addresses, entry) {
-			list_del(&addr->entry);
-			kfree(addr);
-		}
-
-		kfree(slave);
-	}
-}
-
-/**
- * fsl_dma_slave_alloc - allocate a struct fsl_dma_slave
- * @gfp: the flags to pass to kmalloc when allocating this structure
- *
- * Allocate a struct fsl_dma_slave for use by the DMA_SLAVE API. Returns a new
- * struct fsl_dma_slave on success, or NULL on failure.
- */
-static inline struct fsl_dma_slave *fsl_dma_slave_alloc(gfp_t gfp)
-{
-	struct fsl_dma_slave *slave;
-
-	slave = kzalloc(sizeof(*slave), gfp);
-	if (!slave)
-		return NULL;
-
-	INIT_LIST_HEAD(&slave->addresses);
-	return slave;
-}
-
-#endif /* __ARCH_POWERPC_ASM_FSLDMA_H__ */
diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig
index 9520cf0..ab28f60 100644
--- a/drivers/dma/Kconfig
+++ b/drivers/dma/Kconfig
@@ -49,6 +49,14 @@
 config ASYNC_TX_DISABLE_CHANNEL_SWITCH
 	bool
 
+config AMBA_PL08X
+	bool "ARM PrimeCell PL080 or PL081 support"
+	depends on ARM_AMBA && EXPERIMENTAL
+	select DMA_ENGINE
+	help
+	  Platform has a PL08x DMAC device
+	  which can provide DMA engine support
+
 config INTEL_IOATDMA
 	tristate "Intel I/OAT DMA support"
 	depends on PCI && X86
@@ -195,6 +203,22 @@
 	help
 	  Enable support for the Topcliff PCH DMA engine.
 
+config IMX_SDMA
+	tristate "i.MX SDMA support"
+	depends on ARCH_MX25 || ARCH_MX3 || ARCH_MX5
+	select DMA_ENGINE
+	help
+	  Support the i.MX SDMA engine. This engine is integrated into
+	  Freescale i.MX25/31/35/51 chips.
+
+config IMX_DMA
+	tristate "i.MX DMA support"
+	depends on ARCH_MX1 || ARCH_MX21 || MACH_MX27
+	select DMA_ENGINE
+	help
+	  Support the i.MX DMA engine. This engine is integrated into
+	  Freescale i.MX1/21/27 chips.
+
 config DMA_ENGINE
 	bool
 
diff --git a/drivers/dma/Makefile b/drivers/dma/Makefile
index 72bd703..a8a84f4 100644
--- a/drivers/dma/Makefile
+++ b/drivers/dma/Makefile
@@ -21,7 +21,10 @@
 obj-$(CONFIG_SH_DMAE) += shdma.o
 obj-$(CONFIG_COH901318) += coh901318.o coh901318_lli.o
 obj-$(CONFIG_AMCC_PPC440SPE_ADMA) += ppc4xx/
+obj-$(CONFIG_IMX_SDMA) += imx-sdma.o
+obj-$(CONFIG_IMX_DMA) += imx-dma.o
 obj-$(CONFIG_TIMB_DMA) += timb_dma.o
 obj-$(CONFIG_STE_DMA40) += ste_dma40.o ste_dma40_ll.o
 obj-$(CONFIG_PL330_DMA) += pl330.o
 obj-$(CONFIG_PCH_DMA) += pch_dma.o
+obj-$(CONFIG_AMBA_PL08X) += amba-pl08x.o
diff --git a/drivers/dma/amba-pl08x.c b/drivers/dma/amba-pl08x.c
new file mode 100644
index 0000000..b605cc9
--- /dev/null
+++ b/drivers/dma/amba-pl08x.c
@@ -0,0 +1,2167 @@
+/*
+ * Copyright (c) 2006 ARM Ltd.
+ * Copyright (c) 2010 ST-Ericsson SA
+ *
+ * Author: Peter Pearse <peter.pearse@arm.com>
+ * Author: Linus Walleij <linus.walleij@stericsson.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+ *
+ * The full GNU General Public License is iin this distribution in the
+ * file called COPYING.
+ *
+ * Documentation: ARM DDI 0196G == PL080
+ * Documentation: ARM DDI 0218E	== PL081
+ *
+ * PL080 & PL081 both have 16 sets of DMA signals that can be routed to
+ * any channel.
+ *
+ * The PL080 has 8 channels available for simultaneous use, and the PL081
+ * has only two channels. So on these DMA controllers the number of channels
+ * and the number of incoming DMA signals are two totally different things.
+ * It is usually not possible to theoretically handle all physical signals,
+ * so a multiplexing scheme with possible denial of use is necessary.
+ *
+ * The PL080 has a dual bus master, PL081 has a single master.
+ *
+ * Memory to peripheral transfer may be visualized as
+ *	Get data from memory to DMAC
+ *	Until no data left
+ *		On burst request from peripheral
+ *			Destination burst from DMAC to peripheral
+ *			Clear burst request
+ *	Raise terminal count interrupt
+ *
+ * For peripherals with a FIFO:
+ * Source      burst size == half the depth of the peripheral FIFO
+ * Destination burst size == the depth of the peripheral FIFO
+ *
+ * (Bursts are irrelevant for mem to mem transfers - there are no burst
+ * signals, the DMA controller will simply facilitate its AHB master.)
+ *
+ * ASSUMES default (little) endianness for DMA transfers
+ *
+ * Only DMAC flow control is implemented
+ *
+ * Global TODO:
+ * - Break out common code from arch/arm/mach-s3c64xx and share
+ */
+#include <linux/device.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/interrupt.h>
+#include <linux/slab.h>
+#include <linux/dmapool.h>
+#include <linux/amba/bus.h>
+#include <linux/dmaengine.h>
+#include <linux/amba/pl08x.h>
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+
+#include <asm/hardware/pl080.h>
+#include <asm/dma.h>
+#include <asm/mach/dma.h>
+#include <asm/atomic.h>
+#include <asm/processor.h>
+#include <asm/cacheflush.h>
+
+#define DRIVER_NAME	"pl08xdmac"
+
+/**
+ * struct vendor_data - vendor-specific config parameters
+ * for PL08x derivates
+ * @name: the name of this specific variant
+ * @channels: the number of channels available in this variant
+ * @dualmaster: whether this version supports dual AHB masters
+ * or not.
+ */
+struct vendor_data {
+	char *name;
+	u8 channels;
+	bool dualmaster;
+};
+
+/*
+ * PL08X private data structures
+ * An LLI struct - see pl08x TRM
+ * Note that next uses bit[0] as a bus bit,
+ * start & end do not - their bus bit info
+ * is in cctl
+ */
+struct lli {
+	dma_addr_t src;
+	dma_addr_t dst;
+	dma_addr_t next;
+	u32 cctl;
+};
+
+/**
+ * struct pl08x_driver_data - the local state holder for the PL08x
+ * @slave: slave engine for this instance
+ * @memcpy: memcpy engine for this instance
+ * @base: virtual memory base (remapped) for the PL08x
+ * @adev: the corresponding AMBA (PrimeCell) bus entry
+ * @vd: vendor data for this PL08x variant
+ * @pd: platform data passed in from the platform/machine
+ * @phy_chans: array of data for the physical channels
+ * @pool: a pool for the LLI descriptors
+ * @pool_ctr: counter of LLIs in the pool
+ * @lock: a spinlock for this struct
+ */
+struct pl08x_driver_data {
+	struct dma_device slave;
+	struct dma_device memcpy;
+	void __iomem *base;
+	struct amba_device *adev;
+	struct vendor_data *vd;
+	struct pl08x_platform_data *pd;
+	struct pl08x_phy_chan *phy_chans;
+	struct dma_pool *pool;
+	int pool_ctr;
+	spinlock_t lock;
+};
+
+/*
+ * PL08X specific defines
+ */
+
+/*
+ * Memory boundaries: the manual for PL08x says that the controller
+ * cannot read past a 1KiB boundary, so these defines are used to
+ * create transfer LLIs that do not cross such boundaries.
+ */
+#define PL08X_BOUNDARY_SHIFT		(10)	/* 1KB 0x400 */
+#define PL08X_BOUNDARY_SIZE		(1 << PL08X_BOUNDARY_SHIFT)
+
+/* Minimum period between work queue runs */
+#define PL08X_WQ_PERIODMIN	20
+
+/* Size (bytes) of each LLI buffer allocated for one transfer */
+# define PL08X_LLI_TSFR_SIZE	0x2000
+
+/* Maximimum times we call dma_pool_alloc on this pool without freeing */
+#define PL08X_MAX_ALLOCS	0x40
+#define MAX_NUM_TSFR_LLIS	(PL08X_LLI_TSFR_SIZE/sizeof(struct lli))
+#define PL08X_ALIGN		8
+
+static inline struct pl08x_dma_chan *to_pl08x_chan(struct dma_chan *chan)
+{
+	return container_of(chan, struct pl08x_dma_chan, chan);
+}
+
+/*
+ * Physical channel handling
+ */
+
+/* Whether a certain channel is busy or not */
+static int pl08x_phy_channel_busy(struct pl08x_phy_chan *ch)
+{
+	unsigned int val;
+
+	val = readl(ch->base + PL080_CH_CONFIG);
+	return val & PL080_CONFIG_ACTIVE;
+}
+
+/*
+ * Set the initial DMA register values i.e. those for the first LLI
+ * The next lli pointer and the configuration interrupt bit have
+ * been set when the LLIs were constructed
+ */
+static void pl08x_set_cregs(struct pl08x_driver_data *pl08x,
+			    struct pl08x_phy_chan *ch)
+{
+	/* Wait for channel inactive */
+	while (pl08x_phy_channel_busy(ch))
+		;
+
+	dev_vdbg(&pl08x->adev->dev,
+		"WRITE channel %d: csrc=%08x, cdst=%08x, "
+		 "cctl=%08x, clli=%08x, ccfg=%08x\n",
+		ch->id,
+		ch->csrc,
+		ch->cdst,
+		ch->cctl,
+		ch->clli,
+		ch->ccfg);
+
+	writel(ch->csrc, ch->base + PL080_CH_SRC_ADDR);
+	writel(ch->cdst, ch->base + PL080_CH_DST_ADDR);
+	writel(ch->clli, ch->base + PL080_CH_LLI);
+	writel(ch->cctl, ch->base + PL080_CH_CONTROL);
+	writel(ch->ccfg, ch->base + PL080_CH_CONFIG);
+}
+
+static inline void pl08x_config_phychan_for_txd(struct pl08x_dma_chan *plchan)
+{
+	struct pl08x_channel_data *cd = plchan->cd;
+	struct pl08x_phy_chan *phychan = plchan->phychan;
+	struct pl08x_txd *txd = plchan->at;
+
+	/* Copy the basic control register calculated at transfer config */
+	phychan->csrc = txd->csrc;
+	phychan->cdst = txd->cdst;
+	phychan->clli = txd->clli;
+	phychan->cctl = txd->cctl;
+
+	/* Assign the signal to the proper control registers */
+	phychan->ccfg = cd->ccfg;
+	phychan->ccfg &= ~PL080_CONFIG_SRC_SEL_MASK;
+	phychan->ccfg &= ~PL080_CONFIG_DST_SEL_MASK;
+	/* If it wasn't set from AMBA, ignore it */
+	if (txd->direction == DMA_TO_DEVICE)
+		/* Select signal as destination */
+		phychan->ccfg |=
+			(phychan->signal << PL080_CONFIG_DST_SEL_SHIFT);
+	else if (txd->direction == DMA_FROM_DEVICE)
+		/* Select signal as source */
+		phychan->ccfg |=
+			(phychan->signal << PL080_CONFIG_SRC_SEL_SHIFT);
+	/* Always enable error interrupts */
+	phychan->ccfg |= PL080_CONFIG_ERR_IRQ_MASK;
+	/* Always enable terminal interrupts */
+	phychan->ccfg |= PL080_CONFIG_TC_IRQ_MASK;
+}
+
+/*
+ * Enable the DMA channel
+ * Assumes all other configuration bits have been set
+ * as desired before this code is called
+ */
+static void pl08x_enable_phy_chan(struct pl08x_driver_data *pl08x,
+				  struct pl08x_phy_chan *ch)
+{
+	u32 val;
+
+	/*
+	 * Do not access config register until channel shows as disabled
+	 */
+	while (readl(pl08x->base + PL080_EN_CHAN) & (1 << ch->id))
+		;
+
+	/*
+	 * Do not access config register until channel shows as inactive
+	 */
+	val = readl(ch->base + PL080_CH_CONFIG);
+	while ((val & PL080_CONFIG_ACTIVE) || (val & PL080_CONFIG_ENABLE))
+		val = readl(ch->base + PL080_CH_CONFIG);
+
+	writel(val | PL080_CONFIG_ENABLE, ch->base + PL080_CH_CONFIG);
+}
+
+/*
+ * Overall DMAC remains enabled always.
+ *
+ * Disabling individual channels could lose data.
+ *
+ * Disable the peripheral DMA after disabling the DMAC
+ * in order to allow the DMAC FIFO to drain, and
+ * hence allow the channel to show inactive
+ *
+ */
+static void pl08x_pause_phy_chan(struct pl08x_phy_chan *ch)
+{
+	u32 val;
+
+	/* Set the HALT bit and wait for the FIFO to drain */
+	val = readl(ch->base + PL080_CH_CONFIG);
+	val |= PL080_CONFIG_HALT;
+	writel(val, ch->base + PL080_CH_CONFIG);
+
+	/* Wait for channel inactive */
+	while (pl08x_phy_channel_busy(ch))
+		;
+}
+
+static void pl08x_resume_phy_chan(struct pl08x_phy_chan *ch)
+{
+	u32 val;
+
+	/* Clear the HALT bit */
+	val = readl(ch->base + PL080_CH_CONFIG);
+	val &= ~PL080_CONFIG_HALT;
+	writel(val, ch->base + PL080_CH_CONFIG);
+}
+
+
+/* Stops the channel */
+static void pl08x_stop_phy_chan(struct pl08x_phy_chan *ch)
+{
+	u32 val;
+
+	pl08x_pause_phy_chan(ch);
+
+	/* Disable channel */
+	val = readl(ch->base + PL080_CH_CONFIG);
+	val &= ~PL080_CONFIG_ENABLE;
+	val &= ~PL080_CONFIG_ERR_IRQ_MASK;
+	val &= ~PL080_CONFIG_TC_IRQ_MASK;
+	writel(val, ch->base + PL080_CH_CONFIG);
+}
+
+static inline u32 get_bytes_in_cctl(u32 cctl)
+{
+	/* The source width defines the number of bytes */
+	u32 bytes = cctl & PL080_CONTROL_TRANSFER_SIZE_MASK;
+
+	switch (cctl >> PL080_CONTROL_SWIDTH_SHIFT) {
+	case PL080_WIDTH_8BIT:
+		break;
+	case PL080_WIDTH_16BIT:
+		bytes *= 2;
+		break;
+	case PL080_WIDTH_32BIT:
+		bytes *= 4;
+		break;
+	}
+	return bytes;
+}
+
+/* The channel should be paused when calling this */
+static u32 pl08x_getbytes_chan(struct pl08x_dma_chan *plchan)
+{
+	struct pl08x_phy_chan *ch;
+	struct pl08x_txd *txdi = NULL;
+	struct pl08x_txd *txd;
+	unsigned long flags;
+	u32 bytes = 0;
+
+	spin_lock_irqsave(&plchan->lock, flags);
+
+	ch = plchan->phychan;
+	txd = plchan->at;
+
+	/*
+	 * Next follow the LLIs to get the number of pending bytes in the
+	 * currently active transaction.
+	 */
+	if (ch && txd) {
+		struct lli *llis_va = txd->llis_va;
+		struct lli *llis_bus = (struct lli *) txd->llis_bus;
+		u32 clli = readl(ch->base + PL080_CH_LLI);
+
+		/* First get the bytes in the current active LLI */
+		bytes = get_bytes_in_cctl(readl(ch->base + PL080_CH_CONTROL));
+
+		if (clli) {
+			int i = 0;
+
+			/* Forward to the LLI pointed to by clli */
+			while ((clli != (u32) &(llis_bus[i])) &&
+			       (i < MAX_NUM_TSFR_LLIS))
+				i++;
+
+			while (clli) {
+				bytes += get_bytes_in_cctl(llis_va[i].cctl);
+				/*
+				 * A clli of 0x00000000 will terminate the
+				 * LLI list
+				 */
+				clli = llis_va[i].next;
+				i++;
+			}
+		}
+	}
+
+	/* Sum up all queued transactions */
+	if (!list_empty(&plchan->desc_list)) {
+		list_for_each_entry(txdi, &plchan->desc_list, node) {
+			bytes += txdi->len;
+		}
+
+	}
+
+	spin_unlock_irqrestore(&plchan->lock, flags);
+
+	return bytes;
+}
+
+/*
+ * Allocate a physical channel for a virtual channel
+ */
+static struct pl08x_phy_chan *
+pl08x_get_phy_channel(struct pl08x_driver_data *pl08x,
+		      struct pl08x_dma_chan *virt_chan)
+{
+	struct pl08x_phy_chan *ch = NULL;
+	unsigned long flags;
+	int i;
+
+	/*
+	 * Try to locate a physical channel to be used for
+	 * this transfer. If all are taken return NULL and
+	 * the requester will have to cope by using some fallback
+	 * PIO mode or retrying later.
+	 */
+	for (i = 0; i < pl08x->vd->channels; i++) {
+		ch = &pl08x->phy_chans[i];
+
+		spin_lock_irqsave(&ch->lock, flags);
+
+		if (!ch->serving) {
+			ch->serving = virt_chan;
+			ch->signal = -1;
+			spin_unlock_irqrestore(&ch->lock, flags);
+			break;
+		}
+
+		spin_unlock_irqrestore(&ch->lock, flags);
+	}
+
+	if (i == pl08x->vd->channels) {
+		/* No physical channel available, cope with it */
+		return NULL;
+	}
+
+	return ch;
+}
+
+static inline void pl08x_put_phy_channel(struct pl08x_driver_data *pl08x,
+					 struct pl08x_phy_chan *ch)
+{
+	unsigned long flags;
+
+	/* Stop the channel and clear its interrupts */
+	pl08x_stop_phy_chan(ch);
+	writel((1 << ch->id), pl08x->base + PL080_ERR_CLEAR);
+	writel((1 << ch->id), pl08x->base + PL080_TC_CLEAR);
+
+	/* Mark it as free */
+	spin_lock_irqsave(&ch->lock, flags);
+	ch->serving = NULL;
+	spin_unlock_irqrestore(&ch->lock, flags);
+}
+
+/*
+ * LLI handling
+ */
+
+static inline unsigned int pl08x_get_bytes_for_cctl(unsigned int coded)
+{
+	switch (coded) {
+	case PL080_WIDTH_8BIT:
+		return 1;
+	case PL080_WIDTH_16BIT:
+		return 2;
+	case PL080_WIDTH_32BIT:
+		return 4;
+	default:
+		break;
+	}
+	BUG();
+	return 0;
+}
+
+static inline u32 pl08x_cctl_bits(u32 cctl, u8 srcwidth, u8 dstwidth,
+				  u32 tsize)
+{
+	u32 retbits = cctl;
+
+	/* Remove all src, dst and transfersize bits */
+	retbits &= ~PL080_CONTROL_DWIDTH_MASK;
+	retbits &= ~PL080_CONTROL_SWIDTH_MASK;
+	retbits &= ~PL080_CONTROL_TRANSFER_SIZE_MASK;
+
+	/* Then set the bits according to the parameters */
+	switch (srcwidth) {
+	case 1:
+		retbits |= PL080_WIDTH_8BIT << PL080_CONTROL_SWIDTH_SHIFT;
+		break;
+	case 2:
+		retbits |= PL080_WIDTH_16BIT << PL080_CONTROL_SWIDTH_SHIFT;
+		break;
+	case 4:
+		retbits |= PL080_WIDTH_32BIT << PL080_CONTROL_SWIDTH_SHIFT;
+		break;
+	default:
+		BUG();
+		break;
+	}
+
+	switch (dstwidth) {
+	case 1:
+		retbits |= PL080_WIDTH_8BIT << PL080_CONTROL_DWIDTH_SHIFT;
+		break;
+	case 2:
+		retbits |= PL080_WIDTH_16BIT << PL080_CONTROL_DWIDTH_SHIFT;
+		break;
+	case 4:
+		retbits |= PL080_WIDTH_32BIT << PL080_CONTROL_DWIDTH_SHIFT;
+		break;
+	default:
+		BUG();
+		break;
+	}
+
+	retbits |= tsize << PL080_CONTROL_TRANSFER_SIZE_SHIFT;
+	return retbits;
+}
+
+/*
+ * Autoselect a master bus to use for the transfer
+ * this prefers the destination bus if both available
+ * if fixed address on one bus the other will be chosen
+ */
+void pl08x_choose_master_bus(struct pl08x_bus_data *src_bus,
+	struct pl08x_bus_data *dst_bus, struct pl08x_bus_data **mbus,
+	struct pl08x_bus_data **sbus, u32 cctl)
+{
+	if (!(cctl & PL080_CONTROL_DST_INCR)) {
+		*mbus = src_bus;
+		*sbus = dst_bus;
+	} else if (!(cctl & PL080_CONTROL_SRC_INCR)) {
+		*mbus = dst_bus;
+		*sbus = src_bus;
+	} else {
+		if (dst_bus->buswidth == 4) {
+			*mbus = dst_bus;
+			*sbus = src_bus;
+		} else if (src_bus->buswidth == 4) {
+			*mbus = src_bus;
+			*sbus = dst_bus;
+		} else if (dst_bus->buswidth == 2) {
+			*mbus = dst_bus;
+			*sbus = src_bus;
+		} else if (src_bus->buswidth == 2) {
+			*mbus = src_bus;
+			*sbus = dst_bus;
+		} else {
+			/* src_bus->buswidth == 1 */
+			*mbus = dst_bus;
+			*sbus = src_bus;
+		}
+	}
+}
+
+/*
+ * Fills in one LLI for a certain transfer descriptor
+ * and advance the counter
+ */
+int pl08x_fill_lli_for_desc(struct pl08x_driver_data *pl08x,
+			    struct pl08x_txd *txd, int num_llis, int len,
+			    u32 cctl, u32 *remainder)
+{
+	struct lli *llis_va = txd->llis_va;
+	struct lli *llis_bus = (struct lli *) txd->llis_bus;
+
+	BUG_ON(num_llis >= MAX_NUM_TSFR_LLIS);
+
+	llis_va[num_llis].cctl		= cctl;
+	llis_va[num_llis].src		= txd->srcbus.addr;
+	llis_va[num_llis].dst		= txd->dstbus.addr;
+
+	/*
+	 * On versions with dual masters, you can optionally AND on
+	 * PL080_LLI_LM_AHB2 to the LLI to tell the hardware to read
+	 * in new LLIs with that controller, but we always try to
+	 * choose AHB1 to point into memory. The idea is to have AHB2
+	 * fixed on the peripheral and AHB1 messing around in the
+	 * memory. So we don't manipulate this bit currently.
+	 */
+
+	llis_va[num_llis].next =
+		(dma_addr_t)((u32) &(llis_bus[num_llis + 1]));
+
+	if (cctl & PL080_CONTROL_SRC_INCR)
+		txd->srcbus.addr += len;
+	if (cctl & PL080_CONTROL_DST_INCR)
+		txd->dstbus.addr += len;
+
+	*remainder -= len;
+
+	return num_llis + 1;
+}
+
+/*
+ * Return number of bytes to fill to boundary, or len
+ */
+static inline u32 pl08x_pre_boundary(u32 addr, u32 len)
+{
+	u32 boundary;
+
+	boundary = ((addr >> PL08X_BOUNDARY_SHIFT) + 1)
+		<< PL08X_BOUNDARY_SHIFT;
+
+	if (boundary < addr + len)
+		return boundary - addr;
+	else
+		return len;
+}
+
+/*
+ * This fills in the table of LLIs for the transfer descriptor
+ * Note that we assume we never have to change the burst sizes
+ * Return 0 for error
+ */
+static int pl08x_fill_llis_for_desc(struct pl08x_driver_data *pl08x,
+			      struct pl08x_txd *txd)
+{
+	struct pl08x_channel_data *cd = txd->cd;
+	struct pl08x_bus_data *mbus, *sbus;
+	u32 remainder;
+	int num_llis = 0;
+	u32 cctl;
+	int max_bytes_per_lli;
+	int total_bytes = 0;
+	struct lli *llis_va;
+	struct lli *llis_bus;
+
+	if (!txd) {
+		dev_err(&pl08x->adev->dev, "%s no descriptor\n", __func__);
+		return 0;
+	}
+
+	txd->llis_va = dma_pool_alloc(pl08x->pool, GFP_NOWAIT,
+				      &txd->llis_bus);
+	if (!txd->llis_va) {
+		dev_err(&pl08x->adev->dev, "%s no memory for llis\n", __func__);
+		return 0;
+	}
+
+	pl08x->pool_ctr++;
+
+	/*
+	 * Initialize bus values for this transfer
+	 * from the passed optimal values
+	 */
+	if (!cd) {
+		dev_err(&pl08x->adev->dev, "%s no channel data\n", __func__);
+		return 0;
+	}
+
+	/* Get the default CCTL from the platform data */
+	cctl = cd->cctl;
+
+	/*
+	 * On the PL080 we have two bus masters and we
+	 * should select one for source and one for
+	 * destination. We try to use AHB2 for the
+	 * bus which does not increment (typically the
+	 * peripheral) else we just choose something.
+	 */
+	cctl &= ~(PL080_CONTROL_DST_AHB2 | PL080_CONTROL_SRC_AHB2);
+	if (pl08x->vd->dualmaster) {
+		if (cctl & PL080_CONTROL_SRC_INCR)
+			/* Source increments, use AHB2 for destination */
+			cctl |= PL080_CONTROL_DST_AHB2;
+		else if (cctl & PL080_CONTROL_DST_INCR)
+			/* Destination increments, use AHB2 for source */
+			cctl |= PL080_CONTROL_SRC_AHB2;
+		else
+			/* Just pick something, source AHB1 dest AHB2 */
+			cctl |= PL080_CONTROL_DST_AHB2;
+	}
+
+	/* Find maximum width of the source bus */
+	txd->srcbus.maxwidth =
+		pl08x_get_bytes_for_cctl((cctl & PL080_CONTROL_SWIDTH_MASK) >>
+				       PL080_CONTROL_SWIDTH_SHIFT);
+
+	/* Find maximum width of the destination bus */
+	txd->dstbus.maxwidth =
+		pl08x_get_bytes_for_cctl((cctl & PL080_CONTROL_DWIDTH_MASK) >>
+				       PL080_CONTROL_DWIDTH_SHIFT);
+
+	/* Set up the bus widths to the maximum */
+	txd->srcbus.buswidth = txd->srcbus.maxwidth;
+	txd->dstbus.buswidth = txd->dstbus.maxwidth;
+	dev_vdbg(&pl08x->adev->dev,
+		 "%s source bus is %d bytes wide, dest bus is %d bytes wide\n",
+		 __func__, txd->srcbus.buswidth, txd->dstbus.buswidth);
+
+
+	/*
+	 * Bytes transferred == tsize * MIN(buswidths), not max(buswidths)
+	 */
+	max_bytes_per_lli = min(txd->srcbus.buswidth, txd->dstbus.buswidth) *
+		PL080_CONTROL_TRANSFER_SIZE_MASK;
+	dev_vdbg(&pl08x->adev->dev,
+		 "%s max bytes per lli = %d\n",
+		 __func__, max_bytes_per_lli);
+
+	/* We need to count this down to zero */
+	remainder = txd->len;
+	dev_vdbg(&pl08x->adev->dev,
+		 "%s remainder = %d\n",
+		 __func__, remainder);
+
+	/*
+	 * Choose bus to align to
+	 * - prefers destination bus if both available
+	 * - if fixed address on one bus chooses other
+	 * - modifies cctl to choose an apropriate master
+	 */
+	pl08x_choose_master_bus(&txd->srcbus, &txd->dstbus,
+				&mbus, &sbus, cctl);
+
+
+	/*
+	 * The lowest bit of the LLI register
+	 * is also used to indicate which master to
+	 * use for reading the LLIs.
+	 */
+
+	if (txd->len < mbus->buswidth) {
+		/*
+		 * Less than a bus width available
+		 * - send as single bytes
+		 */
+		while (remainder) {
+			dev_vdbg(&pl08x->adev->dev,
+				 "%s single byte LLIs for a transfer of "
+				 "less than a bus width (remain %08x)\n",
+				 __func__, remainder);
+			cctl = pl08x_cctl_bits(cctl, 1, 1, 1);
+			num_llis =
+				pl08x_fill_lli_for_desc(pl08x, txd, num_llis, 1,
+					cctl, &remainder);
+			total_bytes++;
+		}
+	} else {
+		/*
+		 *  Make one byte LLIs until master bus is aligned
+		 *  - slave will then be aligned also
+		 */
+		while ((mbus->addr) % (mbus->buswidth)) {
+			dev_vdbg(&pl08x->adev->dev,
+				"%s adjustment lli for less than bus width "
+				 "(remain %08x)\n",
+				 __func__, remainder);
+			cctl = pl08x_cctl_bits(cctl, 1, 1, 1);
+			num_llis = pl08x_fill_lli_for_desc
+				(pl08x, txd, num_llis, 1, cctl, &remainder);
+			total_bytes++;
+		}
+
+		/*
+		 *  Master now aligned
+		 * - if slave is not then we must set its width down
+		 */
+		if (sbus->addr % sbus->buswidth) {
+			dev_dbg(&pl08x->adev->dev,
+				"%s set down bus width to one byte\n",
+				 __func__);
+
+			sbus->buswidth = 1;
+		}
+
+		/*
+		 * Make largest possible LLIs until less than one bus
+		 * width left
+		 */
+		while (remainder > (mbus->buswidth - 1)) {
+			int lli_len, target_len;
+			int tsize;
+			int odd_bytes;
+
+			/*
+			 * If enough left try to send max possible,
+			 * otherwise try to send the remainder
+			 */
+			target_len = remainder;
+			if (remainder > max_bytes_per_lli)
+				target_len = max_bytes_per_lli;
+
+			/*
+			 * Set bus lengths for incrementing busses
+			 * to number of bytes which fill to next memory
+			 * boundary
+			 */
+			if (cctl & PL080_CONTROL_SRC_INCR)
+				txd->srcbus.fill_bytes =
+					pl08x_pre_boundary(
+						txd->srcbus.addr,
+						remainder);
+			else
+				txd->srcbus.fill_bytes =
+					max_bytes_per_lli;
+
+			if (cctl & PL080_CONTROL_DST_INCR)
+				txd->dstbus.fill_bytes =
+					pl08x_pre_boundary(
+						txd->dstbus.addr,
+						remainder);
+			else
+				txd->dstbus.fill_bytes =
+						max_bytes_per_lli;
+
+			/*
+			 *  Find the nearest
+			 */
+			lli_len	= min(txd->srcbus.fill_bytes,
+				txd->dstbus.fill_bytes);
+
+			BUG_ON(lli_len > remainder);
+
+			if (lli_len <= 0) {
+				dev_err(&pl08x->adev->dev,
+					"%s lli_len is %d, <= 0\n",
+						__func__, lli_len);
+				return 0;
+			}
+
+			if (lli_len == target_len) {
+				/*
+				 * Can send what we wanted
+				 */
+				/*
+				 *  Maintain alignment
+				 */
+				lli_len	= (lli_len/mbus->buswidth) *
+							mbus->buswidth;
+				odd_bytes = 0;
+			} else {
+				/*
+				 * So now we know how many bytes to transfer
+				 * to get to the nearest boundary
+				 * The next lli will past the boundary
+				 * - however we may be working to a boundary
+				 *   on the slave bus
+				 *   We need to ensure the master stays aligned
+				 */
+				odd_bytes = lli_len % mbus->buswidth;
+				/*
+				 * - and that we are working in multiples
+				 *   of the bus widths
+				 */
+				lli_len -= odd_bytes;
+
+			}
+
+			if (lli_len) {
+				/*
+				 * Check against minimum bus alignment:
+				 * Calculate actual transfer size in relation
+				 * to bus width an get a maximum remainder of
+				 * the smallest bus width - 1
+				 */
+				/* FIXME: use round_down()? */
+				tsize = lli_len / min(mbus->buswidth,
+						      sbus->buswidth);
+				lli_len	= tsize * min(mbus->buswidth,
+						      sbus->buswidth);
+
+				if (target_len != lli_len) {
+					dev_vdbg(&pl08x->adev->dev,
+					"%s can't send what we want. Desired %08x, lli of %08x bytes in txd of %08x\n",
+					__func__, target_len, lli_len, txd->len);
+				}
+
+				cctl = pl08x_cctl_bits(cctl,
+						       txd->srcbus.buswidth,
+						       txd->dstbus.buswidth,
+						       tsize);
+
+				dev_vdbg(&pl08x->adev->dev,
+					"%s fill lli with single lli chunk of size %08x (remainder %08x)\n",
+					__func__, lli_len, remainder);
+				num_llis = pl08x_fill_lli_for_desc(pl08x, txd,
+						num_llis, lli_len, cctl,
+						&remainder);
+				total_bytes += lli_len;
+			}
+
+
+			if (odd_bytes) {
+				/*
+				 * Creep past the boundary,
+				 * maintaining master alignment
+				 */
+				int j;
+				for (j = 0; (j < mbus->buswidth)
+						&& (remainder); j++) {
+					cctl = pl08x_cctl_bits(cctl, 1, 1, 1);
+					dev_vdbg(&pl08x->adev->dev,
+						"%s align with boundardy, single byte (remain %08x)\n",
+						__func__, remainder);
+					num_llis =
+						pl08x_fill_lli_for_desc(pl08x,
+							txd, num_llis, 1,
+							cctl, &remainder);
+					total_bytes++;
+				}
+			}
+		}
+
+		/*
+		 * Send any odd bytes
+		 */
+		if (remainder < 0) {
+			dev_err(&pl08x->adev->dev, "%s remainder not fitted 0x%08x bytes\n",
+					__func__, remainder);
+			return 0;
+		}
+
+		while (remainder) {
+			cctl = pl08x_cctl_bits(cctl, 1, 1, 1);
+			dev_vdbg(&pl08x->adev->dev,
+				"%s align with boundardy, single odd byte (remain %d)\n",
+				__func__, remainder);
+			num_llis = pl08x_fill_lli_for_desc(pl08x, txd, num_llis,
+					1, cctl, &remainder);
+			total_bytes++;
+		}
+	}
+	if (total_bytes != txd->len) {
+		dev_err(&pl08x->adev->dev,
+			"%s size of encoded lli:s don't match total txd, transferred 0x%08x from size 0x%08x\n",
+			__func__, total_bytes, txd->len);
+		return 0;
+	}
+
+	if (num_llis >= MAX_NUM_TSFR_LLIS) {
+		dev_err(&pl08x->adev->dev,
+			"%s need to increase MAX_NUM_TSFR_LLIS from 0x%08x\n",
+			__func__, (u32) MAX_NUM_TSFR_LLIS);
+		return 0;
+	}
+	/*
+	 * Decide whether this is a loop or a terminated transfer
+	 */
+	llis_va = txd->llis_va;
+	llis_bus = (struct lli *) txd->llis_bus;
+
+	if (cd->circular_buffer) {
+		/*
+		 * Loop the circular buffer so that the next element
+		 * points back to the beginning of the LLI.
+		 */
+		llis_va[num_llis - 1].next =
+			(dma_addr_t)((unsigned int)&(llis_bus[0]));
+	} else {
+		/*
+		 * On non-circular buffers, the final LLI terminates
+		 * the LLI.
+		 */
+		llis_va[num_llis - 1].next = 0;
+		/*
+		 * The final LLI element shall also fire an interrupt
+		 */
+		llis_va[num_llis - 1].cctl |= PL080_CONTROL_TC_IRQ_EN;
+	}
+
+	/* Now store the channel register values */
+	txd->csrc = llis_va[0].src;
+	txd->cdst = llis_va[0].dst;
+	if (num_llis > 1)
+		txd->clli = llis_va[0].next;
+	else
+		txd->clli = 0;
+
+	txd->cctl = llis_va[0].cctl;
+	/* ccfg will be set at physical channel allocation time */
+
+#ifdef VERBOSE_DEBUG
+	{
+		int i;
+
+		for (i = 0; i < num_llis; i++) {
+			dev_vdbg(&pl08x->adev->dev,
+				 "lli %d @%p: csrc=%08x, cdst=%08x, cctl=%08x, clli=%08x\n",
+				 i,
+				 &llis_va[i],
+				 llis_va[i].src,
+				 llis_va[i].dst,
+				 llis_va[i].cctl,
+				 llis_va[i].next
+				);
+		}
+	}
+#endif
+
+	return num_llis;
+}
+
+/* You should call this with the struct pl08x lock held */
+static void pl08x_free_txd(struct pl08x_driver_data *pl08x,
+			   struct pl08x_txd *txd)
+{
+	if (!txd)
+		dev_err(&pl08x->adev->dev,
+			"%s no descriptor to free\n",
+			__func__);
+
+	/* Free the LLI */
+	dma_pool_free(pl08x->pool, txd->llis_va,
+		      txd->llis_bus);
+
+	pl08x->pool_ctr--;
+
+	kfree(txd);
+}
+
+static void pl08x_free_txd_list(struct pl08x_driver_data *pl08x,
+				struct pl08x_dma_chan *plchan)
+{
+	struct pl08x_txd *txdi = NULL;
+	struct pl08x_txd *next;
+
+	if (!list_empty(&plchan->desc_list)) {
+		list_for_each_entry_safe(txdi,
+					 next, &plchan->desc_list, node) {
+			list_del(&txdi->node);
+			pl08x_free_txd(pl08x, txdi);
+		}
+
+	}
+}
+
+/*
+ * The DMA ENGINE API
+ */
+static int pl08x_alloc_chan_resources(struct dma_chan *chan)
+{
+	return 0;
+}
+
+static void pl08x_free_chan_resources(struct dma_chan *chan)
+{
+}
+
+/*
+ * This should be called with the channel plchan->lock held
+ */
+static int prep_phy_channel(struct pl08x_dma_chan *plchan,
+			    struct pl08x_txd *txd)
+{
+	struct pl08x_driver_data *pl08x = plchan->host;
+	struct pl08x_phy_chan *ch;
+	int ret;
+
+	/* Check if we already have a channel */
+	if (plchan->phychan)
+		return 0;
+
+	ch = pl08x_get_phy_channel(pl08x, plchan);
+	if (!ch) {
+		/* No physical channel available, cope with it */
+		dev_dbg(&pl08x->adev->dev, "no physical channel available for xfer on %s\n", plchan->name);
+		return -EBUSY;
+	}
+
+	/*
+	 * OK we have a physical channel: for memcpy() this is all we
+	 * need, but for slaves the physical signals may be muxed!
+	 * Can the platform allow us to use this channel?
+	 */
+	if (plchan->slave &&
+	    ch->signal < 0 &&
+	    pl08x->pd->get_signal) {
+		ret = pl08x->pd->get_signal(plchan);
+		if (ret < 0) {
+			dev_dbg(&pl08x->adev->dev,
+				"unable to use physical channel %d for transfer on %s due to platform restrictions\n",
+				ch->id, plchan->name);
+			/* Release physical channel & return */
+			pl08x_put_phy_channel(pl08x, ch);
+			return -EBUSY;
+		}
+		ch->signal = ret;
+	}
+
+	dev_dbg(&pl08x->adev->dev, "allocated physical channel %d and signal %d for xfer on %s\n",
+		 ch->id,
+		 ch->signal,
+		 plchan->name);
+
+	plchan->phychan = ch;
+
+	return 0;
+}
+
+static dma_cookie_t pl08x_tx_submit(struct dma_async_tx_descriptor *tx)
+{
+	struct pl08x_dma_chan *plchan = to_pl08x_chan(tx->chan);
+
+	atomic_inc(&plchan->last_issued);
+	tx->cookie = atomic_read(&plchan->last_issued);
+	/* This unlock follows the lock in the prep() function */
+	spin_unlock_irqrestore(&plchan->lock, plchan->lockflags);
+
+	return tx->cookie;
+}
+
+static struct dma_async_tx_descriptor *pl08x_prep_dma_interrupt(
+		struct dma_chan *chan, unsigned long flags)
+{
+	struct dma_async_tx_descriptor *retval = NULL;
+
+	return retval;
+}
+
+/*
+ * Code accessing dma_async_is_complete() in a tight loop
+ * may give problems - could schedule where indicated.
+ * If slaves are relying on interrupts to signal completion this
+ * function must not be called with interrupts disabled
+ */
+static enum dma_status
+pl08x_dma_tx_status(struct dma_chan *chan,
+		    dma_cookie_t cookie,
+		    struct dma_tx_state *txstate)
+{
+	struct pl08x_dma_chan *plchan = to_pl08x_chan(chan);
+	dma_cookie_t last_used;
+	dma_cookie_t last_complete;
+	enum dma_status ret;
+	u32 bytesleft = 0;
+
+	last_used = atomic_read(&plchan->last_issued);
+	last_complete = plchan->lc;
+
+	ret = dma_async_is_complete(cookie, last_complete, last_used);
+	if (ret == DMA_SUCCESS) {
+		dma_set_tx_state(txstate, last_complete, last_used, 0);
+		return ret;
+	}
+
+	/*
+	 * schedule(); could be inserted here
+	 */
+
+	/*
+	 * This cookie not complete yet
+	 */
+	last_used = atomic_read(&plchan->last_issued);
+	last_complete = plchan->lc;
+
+	/* Get number of bytes left in the active transactions and queue */
+	bytesleft = pl08x_getbytes_chan(plchan);
+
+	dma_set_tx_state(txstate, last_complete, last_used,
+			 bytesleft);
+
+	if (plchan->state == PL08X_CHAN_PAUSED)
+		return DMA_PAUSED;
+
+	/* Whether waiting or running, we're in progress */
+	return DMA_IN_PROGRESS;
+}
+
+/* PrimeCell DMA extension */
+struct burst_table {
+	int burstwords;
+	u32 reg;
+};
+
+static const struct burst_table burst_sizes[] = {
+	{
+		.burstwords = 256,
+		.reg = (PL080_BSIZE_256 << PL080_CONTROL_SB_SIZE_SHIFT) |
+			(PL080_BSIZE_256 << PL080_CONTROL_DB_SIZE_SHIFT),
+	},
+	{
+		.burstwords = 128,
+		.reg = (PL080_BSIZE_128 << PL080_CONTROL_SB_SIZE_SHIFT) |
+			(PL080_BSIZE_128 << PL080_CONTROL_DB_SIZE_SHIFT),
+	},
+	{
+		.burstwords = 64,
+		.reg = (PL080_BSIZE_64 << PL080_CONTROL_SB_SIZE_SHIFT) |
+			(PL080_BSIZE_64 << PL080_CONTROL_DB_SIZE_SHIFT),
+	},
+	{
+		.burstwords = 32,
+		.reg = (PL080_BSIZE_32 << PL080_CONTROL_SB_SIZE_SHIFT) |
+			(PL080_BSIZE_32 << PL080_CONTROL_DB_SIZE_SHIFT),
+	},
+	{
+		.burstwords = 16,
+		.reg = (PL080_BSIZE_16 << PL080_CONTROL_SB_SIZE_SHIFT) |
+			(PL080_BSIZE_16 << PL080_CONTROL_DB_SIZE_SHIFT),
+	},
+	{
+		.burstwords = 8,
+		.reg = (PL080_BSIZE_8 << PL080_CONTROL_SB_SIZE_SHIFT) |
+			(PL080_BSIZE_8 << PL080_CONTROL_DB_SIZE_SHIFT),
+	},
+	{
+		.burstwords = 4,
+		.reg = (PL080_BSIZE_4 << PL080_CONTROL_SB_SIZE_SHIFT) |
+			(PL080_BSIZE_4 << PL080_CONTROL_DB_SIZE_SHIFT),
+	},
+	{
+		.burstwords = 1,
+		.reg = (PL080_BSIZE_1 << PL080_CONTROL_SB_SIZE_SHIFT) |
+			(PL080_BSIZE_1 << PL080_CONTROL_DB_SIZE_SHIFT),
+	},
+};
+
+static void dma_set_runtime_config(struct dma_chan *chan,
+			       struct dma_slave_config *config)
+{
+	struct pl08x_dma_chan *plchan = to_pl08x_chan(chan);
+	struct pl08x_driver_data *pl08x = plchan->host;
+	struct pl08x_channel_data *cd = plchan->cd;
+	enum dma_slave_buswidth addr_width;
+	u32 maxburst;
+	u32 cctl = 0;
+	/* Mask out all except src and dst channel */
+	u32 ccfg = cd->ccfg & 0x000003DEU;
+	int i = 0;
+
+	/* Transfer direction */
+	plchan->runtime_direction = config->direction;
+	if (config->direction == DMA_TO_DEVICE) {
+		plchan->runtime_addr = config->dst_addr;
+		cctl |= PL080_CONTROL_SRC_INCR;
+		ccfg |= PL080_FLOW_MEM2PER << PL080_CONFIG_FLOW_CONTROL_SHIFT;
+		addr_width = config->dst_addr_width;
+		maxburst = config->dst_maxburst;
+	} else if (config->direction == DMA_FROM_DEVICE) {
+		plchan->runtime_addr = config->src_addr;
+		cctl |= PL080_CONTROL_DST_INCR;
+		ccfg |= PL080_FLOW_PER2MEM << PL080_CONFIG_FLOW_CONTROL_SHIFT;
+		addr_width = config->src_addr_width;
+		maxburst = config->src_maxburst;
+	} else {
+		dev_err(&pl08x->adev->dev,
+			"bad runtime_config: alien transfer direction\n");
+		return;
+	}
+
+	switch (addr_width) {
+	case DMA_SLAVE_BUSWIDTH_1_BYTE:
+		cctl |= (PL080_WIDTH_8BIT << PL080_CONTROL_SWIDTH_SHIFT) |
+			(PL080_WIDTH_8BIT << PL080_CONTROL_DWIDTH_SHIFT);
+		break;
+	case DMA_SLAVE_BUSWIDTH_2_BYTES:
+		cctl |= (PL080_WIDTH_16BIT << PL080_CONTROL_SWIDTH_SHIFT) |
+			(PL080_WIDTH_16BIT << PL080_CONTROL_DWIDTH_SHIFT);
+		break;
+	case DMA_SLAVE_BUSWIDTH_4_BYTES:
+		cctl |= (PL080_WIDTH_32BIT << PL080_CONTROL_SWIDTH_SHIFT) |
+			(PL080_WIDTH_32BIT << PL080_CONTROL_DWIDTH_SHIFT);
+		break;
+	default:
+		dev_err(&pl08x->adev->dev,
+			"bad runtime_config: alien address width\n");
+		return;
+	}
+
+	/*
+	 * Now decide on a maxburst:
+	 * If this channel will only request single transfers, set
+	 * this down to ONE element.
+	 */
+	if (plchan->cd->single) {
+		cctl |= (PL080_BSIZE_1 << PL080_CONTROL_SB_SIZE_SHIFT) |
+			(PL080_BSIZE_1 << PL080_CONTROL_DB_SIZE_SHIFT);
+	} else {
+		while (i < ARRAY_SIZE(burst_sizes)) {
+			if (burst_sizes[i].burstwords <= maxburst)
+				break;
+			i++;
+		}
+		cctl |= burst_sizes[i].reg;
+	}
+
+	/* Access the cell in privileged mode, non-bufferable, non-cacheable */
+	cctl &= ~PL080_CONTROL_PROT_MASK;
+	cctl |= PL080_CONTROL_PROT_SYS;
+
+	/* Modify the default channel data to fit PrimeCell request */
+	cd->cctl = cctl;
+	cd->ccfg = ccfg;
+
+	dev_dbg(&pl08x->adev->dev,
+		"configured channel %s (%s) for %s, data width %d, "
+		"maxburst %d words, LE, CCTL=%08x, CCFG=%08x\n",
+		dma_chan_name(chan), plchan->name,
+		(config->direction == DMA_FROM_DEVICE) ? "RX" : "TX",
+		addr_width,
+		maxburst,
+		cctl, ccfg);
+}
+
+/*
+ * Slave transactions callback to the slave device to allow
+ * synchronization of slave DMA signals with the DMAC enable
+ */
+static void pl08x_issue_pending(struct dma_chan *chan)
+{
+	struct pl08x_dma_chan *plchan = to_pl08x_chan(chan);
+	struct pl08x_driver_data *pl08x = plchan->host;
+	unsigned long flags;
+
+	spin_lock_irqsave(&plchan->lock, flags);
+	/* Something is already active */
+	if (plchan->at) {
+			spin_unlock_irqrestore(&plchan->lock, flags);
+			return;
+	}
+
+	/* Didn't get a physical channel so waiting for it ... */
+	if (plchan->state == PL08X_CHAN_WAITING)
+		return;
+
+	/* Take the first element in the queue and execute it */
+	if (!list_empty(&plchan->desc_list)) {
+		struct pl08x_txd *next;
+
+		next = list_first_entry(&plchan->desc_list,
+					struct pl08x_txd,
+					node);
+		list_del(&next->node);
+		plchan->at = next;
+		plchan->state = PL08X_CHAN_RUNNING;
+
+		/* Configure the physical channel for the active txd */
+		pl08x_config_phychan_for_txd(plchan);
+		pl08x_set_cregs(pl08x, plchan->phychan);
+		pl08x_enable_phy_chan(pl08x, plchan->phychan);
+	}
+
+	spin_unlock_irqrestore(&plchan->lock, flags);
+}
+
+static int pl08x_prep_channel_resources(struct pl08x_dma_chan *plchan,
+					struct pl08x_txd *txd)
+{
+	int num_llis;
+	struct pl08x_driver_data *pl08x = plchan->host;
+	int ret;
+
+	num_llis = pl08x_fill_llis_for_desc(pl08x, txd);
+
+	if (!num_llis)
+		return -EINVAL;
+
+	spin_lock_irqsave(&plchan->lock, plchan->lockflags);
+
+	/*
+	 * If this device is not using a circular buffer then
+	 * queue this new descriptor for transfer.
+	 * The descriptor for a circular buffer continues
+	 * to be used until the channel is freed.
+	 */
+	if (txd->cd->circular_buffer)
+		dev_err(&pl08x->adev->dev,
+			"%s attempting to queue a circular buffer\n",
+			__func__);
+	else
+		list_add_tail(&txd->node,
+			      &plchan->desc_list);
+
+	/*
+	 * See if we already have a physical channel allocated,
+	 * else this is the time to try to get one.
+	 */
+	ret = prep_phy_channel(plchan, txd);
+	if (ret) {
+		/*
+		 * No physical channel available, we will
+		 * stack up the memcpy channels until there is a channel
+		 * available to handle it whereas slave transfers may
+		 * have been denied due to platform channel muxing restrictions
+		 * and since there is no guarantee that this will ever be
+		 * resolved, and since the signal must be aquired AFTER
+		 * aquiring the physical channel, we will let them be NACK:ed
+		 * with -EBUSY here. The drivers can alway retry the prep()
+		 * call if they are eager on doing this using DMA.
+		 */
+		if (plchan->slave) {
+			pl08x_free_txd_list(pl08x, plchan);
+			spin_unlock_irqrestore(&plchan->lock, plchan->lockflags);
+			return -EBUSY;
+		}
+		/* Do this memcpy whenever there is a channel ready */
+		plchan->state = PL08X_CHAN_WAITING;
+		plchan->waiting = txd;
+	} else
+		/*
+		 * Else we're all set, paused and ready to roll,
+		 * status will switch to PL08X_CHAN_RUNNING when
+		 * we call issue_pending(). If there is something
+		 * running on the channel already we don't change
+		 * its state.
+		 */
+		if (plchan->state == PL08X_CHAN_IDLE)
+			plchan->state = PL08X_CHAN_PAUSED;
+
+	/*
+	 * Notice that we leave plchan->lock locked on purpose:
+	 * it will be unlocked in the subsequent tx_submit()
+	 * call. This is a consequence of the current API.
+	 */
+
+	return 0;
+}
+
+/*
+ * Initialize a descriptor to be used by memcpy submit
+ */
+static struct dma_async_tx_descriptor *pl08x_prep_dma_memcpy(
+		struct dma_chan *chan, dma_addr_t dest, dma_addr_t src,
+		size_t len, unsigned long flags)
+{
+	struct pl08x_dma_chan *plchan = to_pl08x_chan(chan);
+	struct pl08x_driver_data *pl08x = plchan->host;
+	struct pl08x_txd *txd;
+	int ret;
+
+	txd = kzalloc(sizeof(struct pl08x_txd), GFP_NOWAIT);
+	if (!txd) {
+		dev_err(&pl08x->adev->dev,
+			"%s no memory for descriptor\n", __func__);
+		return NULL;
+	}
+
+	dma_async_tx_descriptor_init(&txd->tx, chan);
+	txd->direction = DMA_NONE;
+	txd->srcbus.addr = src;
+	txd->dstbus.addr = dest;
+
+	/* Set platform data for m2m */
+	txd->cd = &pl08x->pd->memcpy_channel;
+	/* Both to be incremented or the code will break */
+	txd->cd->cctl |= PL080_CONTROL_SRC_INCR | PL080_CONTROL_DST_INCR;
+	txd->tx.tx_submit = pl08x_tx_submit;
+	txd->tx.callback = NULL;
+	txd->tx.callback_param = NULL;
+	txd->len = len;
+
+	INIT_LIST_HEAD(&txd->node);
+	ret = pl08x_prep_channel_resources(plchan, txd);
+	if (ret)
+		return NULL;
+	/*
+	 * NB: the channel lock is held at this point so tx_submit()
+	 * must be called in direct succession.
+	 */
+
+	return &txd->tx;
+}
+
+struct dma_async_tx_descriptor *pl08x_prep_slave_sg(
+		struct dma_chan *chan, struct scatterlist *sgl,
+		unsigned int sg_len, enum dma_data_direction direction,
+		unsigned long flags)
+{
+	struct pl08x_dma_chan *plchan = to_pl08x_chan(chan);
+	struct pl08x_driver_data *pl08x = plchan->host;
+	struct pl08x_txd *txd;
+	int ret;
+
+	/*
+	 * Current implementation ASSUMES only one sg
+	 */
+	if (sg_len != 1) {
+		dev_err(&pl08x->adev->dev, "%s prepared too long sglist\n",
+			__func__);
+		BUG();
+	}
+
+	dev_dbg(&pl08x->adev->dev, "%s prepare transaction of %d bytes from %s\n",
+		__func__, sgl->length, plchan->name);
+
+	txd = kzalloc(sizeof(struct pl08x_txd), GFP_NOWAIT);
+	if (!txd) {
+		dev_err(&pl08x->adev->dev, "%s no txd\n", __func__);
+		return NULL;
+	}
+
+	dma_async_tx_descriptor_init(&txd->tx, chan);
+
+	if (direction != plchan->runtime_direction)
+		dev_err(&pl08x->adev->dev, "%s DMA setup does not match "
+			"the direction configured for the PrimeCell\n",
+			__func__);
+
+	/*
+	 * Set up addresses, the PrimeCell configured address
+	 * will take precedence since this may configure the
+	 * channel target address dynamically at runtime.
+	 */
+	txd->direction = direction;
+	if (direction == DMA_TO_DEVICE) {
+		txd->srcbus.addr = sgl->dma_address;
+		if (plchan->runtime_addr)
+			txd->dstbus.addr = plchan->runtime_addr;
+		else
+			txd->dstbus.addr = plchan->cd->addr;
+	} else if (direction == DMA_FROM_DEVICE) {
+		if (plchan->runtime_addr)
+			txd->srcbus.addr = plchan->runtime_addr;
+		else
+			txd->srcbus.addr = plchan->cd->addr;
+		txd->dstbus.addr = sgl->dma_address;
+	} else {
+		dev_err(&pl08x->adev->dev,
+			"%s direction unsupported\n", __func__);
+		return NULL;
+	}
+	txd->cd = plchan->cd;
+	txd->tx.tx_submit = pl08x_tx_submit;
+	txd->tx.callback = NULL;
+	txd->tx.callback_param = NULL;
+	txd->len = sgl->length;
+	INIT_LIST_HEAD(&txd->node);
+
+	ret = pl08x_prep_channel_resources(plchan, txd);
+	if (ret)
+		return NULL;
+	/*
+	 * NB: the channel lock is held at this point so tx_submit()
+	 * must be called in direct succession.
+	 */
+
+	return &txd->tx;
+}
+
+static int pl08x_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd,
+			 unsigned long arg)
+{
+	struct pl08x_dma_chan *plchan = to_pl08x_chan(chan);
+	struct pl08x_driver_data *pl08x = plchan->host;
+	unsigned long flags;
+	int ret = 0;
+
+	/* Controls applicable to inactive channels */
+	if (cmd == DMA_SLAVE_CONFIG) {
+		dma_set_runtime_config(chan,
+				       (struct dma_slave_config *)
+				       arg);
+		return 0;
+	}
+
+	/*
+	 * Anything succeeds on channels with no physical allocation and
+	 * no queued transfers.
+	 */
+	spin_lock_irqsave(&plchan->lock, flags);
+	if (!plchan->phychan && !plchan->at) {
+		spin_unlock_irqrestore(&plchan->lock, flags);
+		return 0;
+	}
+
+	switch (cmd) {
+	case DMA_TERMINATE_ALL:
+		plchan->state = PL08X_CHAN_IDLE;
+
+		if (plchan->phychan) {
+			pl08x_stop_phy_chan(plchan->phychan);
+
+			/*
+			 * Mark physical channel as free and free any slave
+			 * signal
+			 */
+			if ((plchan->phychan->signal >= 0) &&
+			    pl08x->pd->put_signal) {
+				pl08x->pd->put_signal(plchan);
+				plchan->phychan->signal = -1;
+			}
+			pl08x_put_phy_channel(pl08x, plchan->phychan);
+			plchan->phychan = NULL;
+		}
+		/* Stop any pending tasklet */
+		tasklet_disable(&plchan->tasklet);
+		/* Dequeue jobs and free LLIs */
+		if (plchan->at) {
+			pl08x_free_txd(pl08x, plchan->at);
+			plchan->at = NULL;
+		}
+		/* Dequeue jobs not yet fired as well */
+		pl08x_free_txd_list(pl08x, plchan);
+		break;
+	case DMA_PAUSE:
+		pl08x_pause_phy_chan(plchan->phychan);
+		plchan->state = PL08X_CHAN_PAUSED;
+		break;
+	case DMA_RESUME:
+		pl08x_resume_phy_chan(plchan->phychan);
+		plchan->state = PL08X_CHAN_RUNNING;
+		break;
+	default:
+		/* Unknown command */
+		ret = -ENXIO;
+		break;
+	}
+
+	spin_unlock_irqrestore(&plchan->lock, flags);
+
+	return ret;
+}
+
+bool pl08x_filter_id(struct dma_chan *chan, void *chan_id)
+{
+	struct pl08x_dma_chan *plchan = to_pl08x_chan(chan);
+	char *name = chan_id;
+
+	/* Check that the channel is not taken! */
+	if (!strcmp(plchan->name, name))
+		return true;
+
+	return false;
+}
+
+/*
+ * Just check that the device is there and active
+ * TODO: turn this bit on/off depending on the number of
+ * physical channels actually used, if it is zero... well
+ * shut it off. That will save some power. Cut the clock
+ * at the same time.
+ */
+static void pl08x_ensure_on(struct pl08x_driver_data *pl08x)
+{
+	u32 val;
+
+	val = readl(pl08x->base + PL080_CONFIG);
+	val &= ~(PL080_CONFIG_M2_BE | PL080_CONFIG_M1_BE | PL080_CONFIG_ENABLE);
+	/* We implictly clear bit 1 and that means little-endian mode */
+	val |= PL080_CONFIG_ENABLE;
+	writel(val, pl08x->base + PL080_CONFIG);
+}
+
+static void pl08x_tasklet(unsigned long data)
+{
+	struct pl08x_dma_chan *plchan = (struct pl08x_dma_chan *) data;
+	struct pl08x_phy_chan *phychan = plchan->phychan;
+	struct pl08x_driver_data *pl08x = plchan->host;
+
+	if (!plchan)
+		BUG();
+
+	spin_lock(&plchan->lock);
+
+	if (plchan->at) {
+		dma_async_tx_callback callback =
+			plchan->at->tx.callback;
+		void *callback_param =
+			plchan->at->tx.callback_param;
+
+		/*
+		 * Update last completed
+		 */
+		plchan->lc =
+			(plchan->at->tx.cookie);
+
+		/*
+		 * Callback to signal completion
+		 */
+		if (callback)
+			callback(callback_param);
+
+		/*
+		 * Device callbacks should NOT clear
+		 * the current transaction on the channel
+		 * Linus: sometimes they should?
+		 */
+		if (!plchan->at)
+			BUG();
+
+		/*
+		 * Free the descriptor if it's not for a device
+		 * using a circular buffer
+		 */
+		if (!plchan->at->cd->circular_buffer) {
+			pl08x_free_txd(pl08x, plchan->at);
+			plchan->at = NULL;
+		}
+		/*
+		 * else descriptor for circular
+		 * buffers only freed when
+		 * client has disabled dma
+		 */
+	}
+	/*
+	 * If a new descriptor is queued, set it up
+	 * plchan->at is NULL here
+	 */
+	if (!list_empty(&plchan->desc_list)) {
+		struct pl08x_txd *next;
+
+		next = list_first_entry(&plchan->desc_list,
+					struct pl08x_txd,
+					node);
+		list_del(&next->node);
+		plchan->at = next;
+		/* Configure the physical channel for the next txd */
+		pl08x_config_phychan_for_txd(plchan);
+		pl08x_set_cregs(pl08x, plchan->phychan);
+		pl08x_enable_phy_chan(pl08x, plchan->phychan);
+	} else {
+		struct pl08x_dma_chan *waiting = NULL;
+
+		/*
+		 * No more jobs, so free up the physical channel
+		 * Free any allocated signal on slave transfers too
+		 */
+		if ((phychan->signal >= 0) && pl08x->pd->put_signal) {
+			pl08x->pd->put_signal(plchan);
+			phychan->signal = -1;
+		}
+		pl08x_put_phy_channel(pl08x, phychan);
+		plchan->phychan = NULL;
+		plchan->state = PL08X_CHAN_IDLE;
+
+		/*
+		 * And NOW before anyone else can grab that free:d
+		 * up physical channel, see if there is some memcpy
+		 * pending that seriously needs to start because of
+		 * being stacked up while we were choking the
+		 * physical channels with data.
+		 */
+		list_for_each_entry(waiting, &pl08x->memcpy.channels,
+				    chan.device_node) {
+		  if (waiting->state == PL08X_CHAN_WAITING &&
+			    waiting->waiting != NULL) {
+				int ret;
+
+				/* This should REALLY not fail now */
+				ret = prep_phy_channel(waiting,
+						       waiting->waiting);
+				BUG_ON(ret);
+				waiting->state = PL08X_CHAN_RUNNING;
+				waiting->waiting = NULL;
+				pl08x_issue_pending(&waiting->chan);
+				break;
+			}
+		}
+	}
+
+	spin_unlock(&plchan->lock);
+}
+
+static irqreturn_t pl08x_irq(int irq, void *dev)
+{
+	struct pl08x_driver_data *pl08x = dev;
+	u32 mask = 0;
+	u32 val;
+	int i;
+
+	val = readl(pl08x->base + PL080_ERR_STATUS);
+	if (val) {
+		/*
+		 * An error interrupt (on one or more channels)
+		 */
+		dev_err(&pl08x->adev->dev,
+			"%s error interrupt, register value 0x%08x\n",
+				__func__, val);
+		/*
+		 * Simply clear ALL PL08X error interrupts,
+		 * regardless of channel and cause
+		 * FIXME: should be 0x00000003 on PL081 really.
+		 */
+		writel(0x000000FF, pl08x->base + PL080_ERR_CLEAR);
+	}
+	val = readl(pl08x->base + PL080_INT_STATUS);
+	for (i = 0; i < pl08x->vd->channels; i++) {
+		if ((1 << i) & val) {
+			/* Locate physical channel */
+			struct pl08x_phy_chan *phychan = &pl08x->phy_chans[i];
+			struct pl08x_dma_chan *plchan = phychan->serving;
+
+			/* Schedule tasklet on this channel */
+			tasklet_schedule(&plchan->tasklet);
+
+			mask |= (1 << i);
+		}
+	}
+	/*
+	 * Clear only the terminal interrupts on channels we processed
+	 */
+	writel(mask, pl08x->base + PL080_TC_CLEAR);
+
+	return mask ? IRQ_HANDLED : IRQ_NONE;
+}
+
+/*
+ * Initialise the DMAC memcpy/slave channels.
+ * Make a local wrapper to hold required data
+ */
+static int pl08x_dma_init_virtual_channels(struct pl08x_driver_data *pl08x,
+					   struct dma_device *dmadev,
+					   unsigned int channels,
+					   bool slave)
+{
+	struct pl08x_dma_chan *chan;
+	int i;
+
+	INIT_LIST_HEAD(&dmadev->channels);
+	/*
+	 * Register as many many memcpy as we have physical channels,
+	 * we won't always be able to use all but the code will have
+	 * to cope with that situation.
+	 */
+	for (i = 0; i < channels; i++) {
+		chan = kzalloc(sizeof(struct pl08x_dma_chan), GFP_KERNEL);
+		if (!chan) {
+			dev_err(&pl08x->adev->dev,
+				"%s no memory for channel\n", __func__);
+			return -ENOMEM;
+		}
+
+		chan->host = pl08x;
+		chan->state = PL08X_CHAN_IDLE;
+
+		if (slave) {
+			chan->slave = true;
+			chan->name = pl08x->pd->slave_channels[i].bus_id;
+			chan->cd = &pl08x->pd->slave_channels[i];
+		} else {
+			chan->cd = &pl08x->pd->memcpy_channel;
+			chan->name = kasprintf(GFP_KERNEL, "memcpy%d", i);
+			if (!chan->name) {
+				kfree(chan);
+				return -ENOMEM;
+			}
+		}
+		dev_info(&pl08x->adev->dev,
+			 "initialize virtual channel \"%s\"\n",
+			 chan->name);
+
+		chan->chan.device = dmadev;
+		atomic_set(&chan->last_issued, 0);
+		chan->lc = atomic_read(&chan->last_issued);
+
+		spin_lock_init(&chan->lock);
+		INIT_LIST_HEAD(&chan->desc_list);
+		tasklet_init(&chan->tasklet, pl08x_tasklet,
+			     (unsigned long) chan);
+
+		list_add_tail(&chan->chan.device_node, &dmadev->channels);
+	}
+	dev_info(&pl08x->adev->dev, "initialized %d virtual %s channels\n",
+		 i, slave ? "slave" : "memcpy");
+	return i;
+}
+
+static void pl08x_free_virtual_channels(struct dma_device *dmadev)
+{
+	struct pl08x_dma_chan *chan = NULL;
+	struct pl08x_dma_chan *next;
+
+	list_for_each_entry_safe(chan,
+				 next, &dmadev->channels, chan.device_node) {
+		list_del(&chan->chan.device_node);
+		kfree(chan);
+	}
+}
+
+#ifdef CONFIG_DEBUG_FS
+static const char *pl08x_state_str(enum pl08x_dma_chan_state state)
+{
+	switch (state) {
+	case PL08X_CHAN_IDLE:
+		return "idle";
+	case PL08X_CHAN_RUNNING:
+		return "running";
+	case PL08X_CHAN_PAUSED:
+		return "paused";
+	case PL08X_CHAN_WAITING:
+		return "waiting";
+	default:
+		break;
+	}
+	return "UNKNOWN STATE";
+}
+
+static int pl08x_debugfs_show(struct seq_file *s, void *data)
+{
+	struct pl08x_driver_data *pl08x = s->private;
+	struct pl08x_dma_chan *chan;
+	struct pl08x_phy_chan *ch;
+	unsigned long flags;
+	int i;
+
+	seq_printf(s, "PL08x physical channels:\n");
+	seq_printf(s, "CHANNEL:\tUSER:\n");
+	seq_printf(s, "--------\t-----\n");
+	for (i = 0; i < pl08x->vd->channels; i++) {
+		struct pl08x_dma_chan *virt_chan;
+
+		ch = &pl08x->phy_chans[i];
+
+		spin_lock_irqsave(&ch->lock, flags);
+		virt_chan = ch->serving;
+
+		seq_printf(s, "%d\t\t%s\n",
+			   ch->id, virt_chan ? virt_chan->name : "(none)");
+
+		spin_unlock_irqrestore(&ch->lock, flags);
+	}
+
+	seq_printf(s, "\nPL08x virtual memcpy channels:\n");
+	seq_printf(s, "CHANNEL:\tSTATE:\n");
+	seq_printf(s, "--------\t------\n");
+	list_for_each_entry(chan, &pl08x->memcpy.channels, chan.device_node) {
+		seq_printf(s, "%s\t\t\%s\n", chan->name,
+			   pl08x_state_str(chan->state));
+	}
+
+	seq_printf(s, "\nPL08x virtual slave channels:\n");
+	seq_printf(s, "CHANNEL:\tSTATE:\n");
+	seq_printf(s, "--------\t------\n");
+	list_for_each_entry(chan, &pl08x->slave.channels, chan.device_node) {
+		seq_printf(s, "%s\t\t\%s\n", chan->name,
+			   pl08x_state_str(chan->state));
+	}
+
+	return 0;
+}
+
+static int pl08x_debugfs_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, pl08x_debugfs_show, inode->i_private);
+}
+
+static const struct file_operations pl08x_debugfs_operations = {
+	.open		= pl08x_debugfs_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+static void init_pl08x_debugfs(struct pl08x_driver_data *pl08x)
+{
+	/* Expose a simple debugfs interface to view all clocks */
+	(void) debugfs_create_file(dev_name(&pl08x->adev->dev), S_IFREG | S_IRUGO,
+				   NULL, pl08x,
+				   &pl08x_debugfs_operations);
+}
+
+#else
+static inline void init_pl08x_debugfs(struct pl08x_driver_data *pl08x)
+{
+}
+#endif
+
+static int pl08x_probe(struct amba_device *adev, struct amba_id *id)
+{
+	struct pl08x_driver_data *pl08x;
+	struct vendor_data *vd = id->data;
+	int ret = 0;
+	int i;
+
+	ret = amba_request_regions(adev, NULL);
+	if (ret)
+		return ret;
+
+	/* Create the driver state holder */
+	pl08x = kzalloc(sizeof(struct pl08x_driver_data), GFP_KERNEL);
+	if (!pl08x) {
+		ret = -ENOMEM;
+		goto out_no_pl08x;
+	}
+
+	/* Initialize memcpy engine */
+	dma_cap_set(DMA_MEMCPY, pl08x->memcpy.cap_mask);
+	pl08x->memcpy.dev = &adev->dev;
+	pl08x->memcpy.device_alloc_chan_resources = pl08x_alloc_chan_resources;
+	pl08x->memcpy.device_free_chan_resources = pl08x_free_chan_resources;
+	pl08x->memcpy.device_prep_dma_memcpy = pl08x_prep_dma_memcpy;
+	pl08x->memcpy.device_prep_dma_interrupt = pl08x_prep_dma_interrupt;
+	pl08x->memcpy.device_tx_status = pl08x_dma_tx_status;
+	pl08x->memcpy.device_issue_pending = pl08x_issue_pending;
+	pl08x->memcpy.device_control = pl08x_control;
+
+	/* Initialize slave engine */
+	dma_cap_set(DMA_SLAVE, pl08x->slave.cap_mask);
+	pl08x->slave.dev = &adev->dev;
+	pl08x->slave.device_alloc_chan_resources = pl08x_alloc_chan_resources;
+	pl08x->slave.device_free_chan_resources = pl08x_free_chan_resources;
+	pl08x->slave.device_prep_dma_interrupt = pl08x_prep_dma_interrupt;
+	pl08x->slave.device_tx_status = pl08x_dma_tx_status;
+	pl08x->slave.device_issue_pending = pl08x_issue_pending;
+	pl08x->slave.device_prep_slave_sg = pl08x_prep_slave_sg;
+	pl08x->slave.device_control = pl08x_control;
+
+	/* Get the platform data */
+	pl08x->pd = dev_get_platdata(&adev->dev);
+	if (!pl08x->pd) {
+		dev_err(&adev->dev, "no platform data supplied\n");
+		goto out_no_platdata;
+	}
+
+	/* Assign useful pointers to the driver state */
+	pl08x->adev = adev;
+	pl08x->vd = vd;
+
+	/* A DMA memory pool for LLIs, align on 1-byte boundary */
+	pl08x->pool = dma_pool_create(DRIVER_NAME, &pl08x->adev->dev,
+			PL08X_LLI_TSFR_SIZE, PL08X_ALIGN, 0);
+	if (!pl08x->pool) {
+		ret = -ENOMEM;
+		goto out_no_lli_pool;
+	}
+
+	spin_lock_init(&pl08x->lock);
+
+	pl08x->base = ioremap(adev->res.start, resource_size(&adev->res));
+	if (!pl08x->base) {
+		ret = -ENOMEM;
+		goto out_no_ioremap;
+	}
+
+	/* Turn on the PL08x */
+	pl08x_ensure_on(pl08x);
+
+	/*
+	 * Attach the interrupt handler
+	 */
+	writel(0x000000FF, pl08x->base + PL080_ERR_CLEAR);
+	writel(0x000000FF, pl08x->base + PL080_TC_CLEAR);
+
+	ret = request_irq(adev->irq[0], pl08x_irq, IRQF_DISABLED,
+			  vd->name, pl08x);
+	if (ret) {
+		dev_err(&adev->dev, "%s failed to request interrupt %d\n",
+			__func__, adev->irq[0]);
+		goto out_no_irq;
+	}
+
+	/* Initialize physical channels */
+	pl08x->phy_chans = kmalloc((vd->channels * sizeof(struct pl08x_phy_chan)),
+			GFP_KERNEL);
+	if (!pl08x->phy_chans) {
+		dev_err(&adev->dev, "%s failed to allocate "
+			"physical channel holders\n",
+			__func__);
+		goto out_no_phychans;
+	}
+
+	for (i = 0; i < vd->channels; i++) {
+		struct pl08x_phy_chan *ch = &pl08x->phy_chans[i];
+
+		ch->id = i;
+		ch->base = pl08x->base + PL080_Cx_BASE(i);
+		spin_lock_init(&ch->lock);
+		ch->serving = NULL;
+		ch->signal = -1;
+		dev_info(&adev->dev,
+			 "physical channel %d is %s\n", i,
+			 pl08x_phy_channel_busy(ch) ? "BUSY" : "FREE");
+	}
+
+	/* Register as many memcpy channels as there are physical channels */
+	ret = pl08x_dma_init_virtual_channels(pl08x, &pl08x->memcpy,
+					      pl08x->vd->channels, false);
+	if (ret <= 0) {
+		dev_warn(&pl08x->adev->dev,
+			 "%s failed to enumerate memcpy channels - %d\n",
+			 __func__, ret);
+		goto out_no_memcpy;
+	}
+	pl08x->memcpy.chancnt = ret;
+
+	/* Register slave channels */
+	ret = pl08x_dma_init_virtual_channels(pl08x, &pl08x->slave,
+					      pl08x->pd->num_slave_channels,
+					      true);
+	if (ret <= 0) {
+		dev_warn(&pl08x->adev->dev,
+			"%s failed to enumerate slave channels - %d\n",
+				__func__, ret);
+		goto out_no_slave;
+	}
+	pl08x->slave.chancnt = ret;
+
+	ret = dma_async_device_register(&pl08x->memcpy);
+	if (ret) {
+		dev_warn(&pl08x->adev->dev,
+			"%s failed to register memcpy as an async device - %d\n",
+			__func__, ret);
+		goto out_no_memcpy_reg;
+	}
+
+	ret = dma_async_device_register(&pl08x->slave);
+	if (ret) {
+		dev_warn(&pl08x->adev->dev,
+			"%s failed to register slave as an async device - %d\n",
+			__func__, ret);
+		goto out_no_slave_reg;
+	}
+
+	amba_set_drvdata(adev, pl08x);
+	init_pl08x_debugfs(pl08x);
+	dev_info(&pl08x->adev->dev, "ARM(R) %s DMA block initialized @%08x\n",
+		vd->name, adev->res.start);
+	return 0;
+
+out_no_slave_reg:
+	dma_async_device_unregister(&pl08x->memcpy);
+out_no_memcpy_reg:
+	pl08x_free_virtual_channels(&pl08x->slave);
+out_no_slave:
+	pl08x_free_virtual_channels(&pl08x->memcpy);
+out_no_memcpy:
+	kfree(pl08x->phy_chans);
+out_no_phychans:
+	free_irq(adev->irq[0], pl08x);
+out_no_irq:
+	iounmap(pl08x->base);
+out_no_ioremap:
+	dma_pool_destroy(pl08x->pool);
+out_no_lli_pool:
+out_no_platdata:
+	kfree(pl08x);
+out_no_pl08x:
+	amba_release_regions(adev);
+	return ret;
+}
+
+/* PL080 has 8 channels and the PL080 have just 2 */
+static struct vendor_data vendor_pl080 = {
+	.name = "PL080",
+	.channels = 8,
+	.dualmaster = true,
+};
+
+static struct vendor_data vendor_pl081 = {
+	.name = "PL081",
+	.channels = 2,
+	.dualmaster = false,
+};
+
+static struct amba_id pl08x_ids[] = {
+	/* PL080 */
+	{
+		.id	= 0x00041080,
+		.mask	= 0x000fffff,
+		.data	= &vendor_pl080,
+	},
+	/* PL081 */
+	{
+		.id	= 0x00041081,
+		.mask	= 0x000fffff,
+		.data	= &vendor_pl081,
+	},
+	/* Nomadik 8815 PL080 variant */
+	{
+		.id	= 0x00280880,
+		.mask	= 0x00ffffff,
+		.data	= &vendor_pl080,
+	},
+	{ 0, 0 },
+};
+
+static struct amba_driver pl08x_amba_driver = {
+	.drv.name	= DRIVER_NAME,
+	.id_table	= pl08x_ids,
+	.probe		= pl08x_probe,
+};
+
+static int __init pl08x_init(void)
+{
+	int retval;
+	retval = amba_driver_register(&pl08x_amba_driver);
+	if (retval)
+		printk(KERN_WARNING DRIVER_NAME
+		       "failed to register as an amba device (%d)\n",
+		       retval);
+	return retval;
+}
+subsys_initcall(pl08x_init);
diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c
index 9d31d5e..235153c 100644
--- a/drivers/dma/dmaengine.c
+++ b/drivers/dma/dmaengine.c
@@ -690,8 +690,12 @@
 		!device->device_prep_dma_memset);
 	BUG_ON(dma_has_cap(DMA_INTERRUPT, device->cap_mask) &&
 		!device->device_prep_dma_interrupt);
+	BUG_ON(dma_has_cap(DMA_SG, device->cap_mask) &&
+		!device->device_prep_dma_sg);
 	BUG_ON(dma_has_cap(DMA_SLAVE, device->cap_mask) &&
 		!device->device_prep_slave_sg);
+	BUG_ON(dma_has_cap(DMA_CYCLIC, device->cap_mask) &&
+		!device->device_prep_dma_cyclic);
 	BUG_ON(dma_has_cap(DMA_SLAVE, device->cap_mask) &&
 		!device->device_control);
 
diff --git a/drivers/dma/fsldma.c b/drivers/dma/fsldma.c
index cea08be..286c3ac 100644
--- a/drivers/dma/fsldma.c
+++ b/drivers/dma/fsldma.c
@@ -35,9 +35,10 @@
 #include <linux/dmapool.h>
 #include <linux/of_platform.h>
 
-#include <asm/fsldma.h>
 #include "fsldma.h"
 
+static const char msg_ld_oom[] = "No free memory for link descriptor\n";
+
 static void dma_init(struct fsldma_chan *chan)
 {
 	/* Reset the channel */
@@ -499,7 +500,7 @@
 
 	new = fsl_dma_alloc_descriptor(chan);
 	if (!new) {
-		dev_err(chan->dev, "No free memory for link descriptor\n");
+		dev_err(chan->dev, msg_ld_oom);
 		return NULL;
 	}
 
@@ -536,8 +537,7 @@
 		/* Allocate the link descriptor from DMA pool */
 		new = fsl_dma_alloc_descriptor(chan);
 		if (!new) {
-			dev_err(chan->dev,
-					"No free memory for link descriptor\n");
+			dev_err(chan->dev, msg_ld_oom);
 			goto fail;
 		}
 #ifdef FSL_DMA_LD_DEBUG
@@ -583,6 +583,125 @@
 	return NULL;
 }
 
+static struct dma_async_tx_descriptor *fsl_dma_prep_sg(struct dma_chan *dchan,
+	struct scatterlist *dst_sg, unsigned int dst_nents,
+	struct scatterlist *src_sg, unsigned int src_nents,
+	unsigned long flags)
+{
+	struct fsl_desc_sw *first = NULL, *prev = NULL, *new = NULL;
+	struct fsldma_chan *chan = to_fsl_chan(dchan);
+	size_t dst_avail, src_avail;
+	dma_addr_t dst, src;
+	size_t len;
+
+	/* basic sanity checks */
+	if (dst_nents == 0 || src_nents == 0)
+		return NULL;
+
+	if (dst_sg == NULL || src_sg == NULL)
+		return NULL;
+
+	/*
+	 * TODO: should we check that both scatterlists have the same
+	 * TODO: number of bytes in total? Is that really an error?
+	 */
+
+	/* get prepared for the loop */
+	dst_avail = sg_dma_len(dst_sg);
+	src_avail = sg_dma_len(src_sg);
+
+	/* run until we are out of scatterlist entries */
+	while (true) {
+
+		/* create the largest transaction possible */
+		len = min_t(size_t, src_avail, dst_avail);
+		len = min_t(size_t, len, FSL_DMA_BCR_MAX_CNT);
+		if (len == 0)
+			goto fetch;
+
+		dst = sg_dma_address(dst_sg) + sg_dma_len(dst_sg) - dst_avail;
+		src = sg_dma_address(src_sg) + sg_dma_len(src_sg) - src_avail;
+
+		/* allocate and populate the descriptor */
+		new = fsl_dma_alloc_descriptor(chan);
+		if (!new) {
+			dev_err(chan->dev, msg_ld_oom);
+			goto fail;
+		}
+#ifdef FSL_DMA_LD_DEBUG
+		dev_dbg(chan->dev, "new link desc alloc %p\n", new);
+#endif
+
+		set_desc_cnt(chan, &new->hw, len);
+		set_desc_src(chan, &new->hw, src);
+		set_desc_dst(chan, &new->hw, dst);
+
+		if (!first)
+			first = new;
+		else
+			set_desc_next(chan, &prev->hw, new->async_tx.phys);
+
+		new->async_tx.cookie = 0;
+		async_tx_ack(&new->async_tx);
+		prev = new;
+
+		/* Insert the link descriptor to the LD ring */
+		list_add_tail(&new->node, &first->tx_list);
+
+		/* update metadata */
+		dst_avail -= len;
+		src_avail -= len;
+
+fetch:
+		/* fetch the next dst scatterlist entry */
+		if (dst_avail == 0) {
+
+			/* no more entries: we're done */
+			if (dst_nents == 0)
+				break;
+
+			/* fetch the next entry: if there are no more: done */
+			dst_sg = sg_next(dst_sg);
+			if (dst_sg == NULL)
+				break;
+
+			dst_nents--;
+			dst_avail = sg_dma_len(dst_sg);
+		}
+
+		/* fetch the next src scatterlist entry */
+		if (src_avail == 0) {
+
+			/* no more entries: we're done */
+			if (src_nents == 0)
+				break;
+
+			/* fetch the next entry: if there are no more: done */
+			src_sg = sg_next(src_sg);
+			if (src_sg == NULL)
+				break;
+
+			src_nents--;
+			src_avail = sg_dma_len(src_sg);
+		}
+	}
+
+	new->async_tx.flags = flags; /* client is in control of this ack */
+	new->async_tx.cookie = -EBUSY;
+
+	/* Set End-of-link to the last link descriptor of new list */
+	set_ld_eol(chan, new);
+
+	return &first->async_tx;
+
+fail:
+	if (!first)
+		return NULL;
+
+	fsldma_free_desc_list_reverse(chan, &first->tx_list);
+	return NULL;
+}
+
 /**
  * fsl_dma_prep_slave_sg - prepare descriptors for a DMA_SLAVE transaction
  * @chan: DMA channel
@@ -599,207 +718,70 @@
 	struct dma_chan *dchan, struct scatterlist *sgl, unsigned int sg_len,
 	enum dma_data_direction direction, unsigned long flags)
 {
-	struct fsldma_chan *chan;
-	struct fsl_desc_sw *first = NULL, *prev = NULL, *new = NULL;
-	struct fsl_dma_slave *slave;
-	size_t copy;
-
-	int i;
-	struct scatterlist *sg;
-	size_t sg_used;
-	size_t hw_used;
-	struct fsl_dma_hw_addr *hw;
-	dma_addr_t dma_dst, dma_src;
-
-	if (!dchan)
-		return NULL;
-
-	if (!dchan->private)
-		return NULL;
-
-	chan = to_fsl_chan(dchan);
-	slave = dchan->private;
-
-	if (list_empty(&slave->addresses))
-		return NULL;
-
-	hw = list_first_entry(&slave->addresses, struct fsl_dma_hw_addr, entry);
-	hw_used = 0;
-
 	/*
-	 * Build the hardware transaction to copy from the scatterlist to
-	 * the hardware, or from the hardware to the scatterlist
+	 * This operation is not supported on the Freescale DMA controller
 	 *
-	 * If you are copying from the hardware to the scatterlist and it
-	 * takes two hardware entries to fill an entire page, then both
-	 * hardware entries will be coalesced into the same page
-	 *
-	 * If you are copying from the scatterlist to the hardware and a
-	 * single page can fill two hardware entries, then the data will
-	 * be read out of the page into the first hardware entry, and so on
+	 * However, we need to provide the function pointer to allow the
+	 * device_control() method to work.
 	 */
-	for_each_sg(sgl, sg, sg_len, i) {
-		sg_used = 0;
-
-		/* Loop until the entire scatterlist entry is used */
-		while (sg_used < sg_dma_len(sg)) {
-
-			/*
-			 * If we've used up the current hardware address/length
-			 * pair, we need to load a new one
-			 *
-			 * This is done in a while loop so that descriptors with
-			 * length == 0 will be skipped
-			 */
-			while (hw_used >= hw->length) {
-
-				/*
-				 * If the current hardware entry is the last
-				 * entry in the list, we're finished
-				 */
-				if (list_is_last(&hw->entry, &slave->addresses))
-					goto finished;
-
-				/* Get the next hardware address/length pair */
-				hw = list_entry(hw->entry.next,
-						struct fsl_dma_hw_addr, entry);
-				hw_used = 0;
-			}
-
-			/* Allocate the link descriptor from DMA pool */
-			new = fsl_dma_alloc_descriptor(chan);
-			if (!new) {
-				dev_err(chan->dev, "No free memory for "
-						       "link descriptor\n");
-				goto fail;
-			}
-#ifdef FSL_DMA_LD_DEBUG
-			dev_dbg(chan->dev, "new link desc alloc %p\n", new);
-#endif
-
-			/*
-			 * Calculate the maximum number of bytes to transfer,
-			 * making sure it is less than the DMA controller limit
-			 */
-			copy = min_t(size_t, sg_dma_len(sg) - sg_used,
-					     hw->length - hw_used);
-			copy = min_t(size_t, copy, FSL_DMA_BCR_MAX_CNT);
-
-			/*
-			 * DMA_FROM_DEVICE
-			 * from the hardware to the scatterlist
-			 *
-			 * DMA_TO_DEVICE
-			 * from the scatterlist to the hardware
-			 */
-			if (direction == DMA_FROM_DEVICE) {
-				dma_src = hw->address + hw_used;
-				dma_dst = sg_dma_address(sg) + sg_used;
-			} else {
-				dma_src = sg_dma_address(sg) + sg_used;
-				dma_dst = hw->address + hw_used;
-			}
-
-			/* Fill in the descriptor */
-			set_desc_cnt(chan, &new->hw, copy);
-			set_desc_src(chan, &new->hw, dma_src);
-			set_desc_dst(chan, &new->hw, dma_dst);
-
-			/*
-			 * If this is not the first descriptor, chain the
-			 * current descriptor after the previous descriptor
-			 */
-			if (!first) {
-				first = new;
-			} else {
-				set_desc_next(chan, &prev->hw,
-					      new->async_tx.phys);
-			}
-
-			new->async_tx.cookie = 0;
-			async_tx_ack(&new->async_tx);
-
-			prev = new;
-			sg_used += copy;
-			hw_used += copy;
-
-			/* Insert the link descriptor into the LD ring */
-			list_add_tail(&new->node, &first->tx_list);
-		}
-	}
-
-finished:
-
-	/* All of the hardware address/length pairs had length == 0 */
-	if (!first || !new)
-		return NULL;
-
-	new->async_tx.flags = flags;
-	new->async_tx.cookie = -EBUSY;
-
-	/* Set End-of-link to the last link descriptor of new list */
-	set_ld_eol(chan, new);
-
-	/* Enable extra controller features */
-	if (chan->set_src_loop_size)
-		chan->set_src_loop_size(chan, slave->src_loop_size);
-
-	if (chan->set_dst_loop_size)
-		chan->set_dst_loop_size(chan, slave->dst_loop_size);
-
-	if (chan->toggle_ext_start)
-		chan->toggle_ext_start(chan, slave->external_start);
-
-	if (chan->toggle_ext_pause)
-		chan->toggle_ext_pause(chan, slave->external_pause);
-
-	if (chan->set_request_count)
-		chan->set_request_count(chan, slave->request_count);
-
-	return &first->async_tx;
-
-fail:
-	/* If first was not set, then we failed to allocate the very first
-	 * descriptor, and we're done */
-	if (!first)
-		return NULL;
-
-	/*
-	 * First is set, so all of the descriptors we allocated have been added
-	 * to first->tx_list, INCLUDING "first" itself. Therefore we
-	 * must traverse the list backwards freeing each descriptor in turn
-	 *
-	 * We're re-using variables for the loop, oh well
-	 */
-	fsldma_free_desc_list_reverse(chan, &first->tx_list);
 	return NULL;
 }
 
 static int fsl_dma_device_control(struct dma_chan *dchan,
 				  enum dma_ctrl_cmd cmd, unsigned long arg)
 {
+	struct dma_slave_config *config;
 	struct fsldma_chan *chan;
 	unsigned long flags;
-
-	/* Only supports DMA_TERMINATE_ALL */
-	if (cmd != DMA_TERMINATE_ALL)
-		return -ENXIO;
+	int size;
 
 	if (!dchan)
 		return -EINVAL;
 
 	chan = to_fsl_chan(dchan);
 
-	/* Halt the DMA engine */
-	dma_halt(chan);
+	switch (cmd) {
+	case DMA_TERMINATE_ALL:
+		/* Halt the DMA engine */
+		dma_halt(chan);
 
-	spin_lock_irqsave(&chan->desc_lock, flags);
+		spin_lock_irqsave(&chan->desc_lock, flags);
 
-	/* Remove and free all of the descriptors in the LD queue */
-	fsldma_free_desc_list(chan, &chan->ld_pending);
-	fsldma_free_desc_list(chan, &chan->ld_running);
+		/* Remove and free all of the descriptors in the LD queue */
+		fsldma_free_desc_list(chan, &chan->ld_pending);
+		fsldma_free_desc_list(chan, &chan->ld_running);
 
-	spin_unlock_irqrestore(&chan->desc_lock, flags);
+		spin_unlock_irqrestore(&chan->desc_lock, flags);
+		return 0;
+
+	case DMA_SLAVE_CONFIG:
+		config = (struct dma_slave_config *)arg;
+
+		/* make sure the channel supports setting burst size */
+		if (!chan->set_request_count)
+			return -ENXIO;
+
+		/* we set the controller burst size depending on direction */
+		if (config->direction == DMA_TO_DEVICE)
+			size = config->dst_addr_width * config->dst_maxburst;
+		else
+			size = config->src_addr_width * config->src_maxburst;
+
+		chan->set_request_count(chan, size);
+		return 0;
+
+	case FSLDMA_EXTERNAL_START:
+
+		/* make sure the channel supports external start */
+		if (!chan->toggle_ext_start)
+			return -ENXIO;
+
+		chan->toggle_ext_start(chan, arg);
+		return 0;
+
+	default:
+		return -ENXIO;
+	}
 
 	return 0;
 }
@@ -1327,11 +1309,13 @@
 
 	dma_cap_set(DMA_MEMCPY, fdev->common.cap_mask);
 	dma_cap_set(DMA_INTERRUPT, fdev->common.cap_mask);
+	dma_cap_set(DMA_SG, fdev->common.cap_mask);
 	dma_cap_set(DMA_SLAVE, fdev->common.cap_mask);
 	fdev->common.device_alloc_chan_resources = fsl_dma_alloc_chan_resources;
 	fdev->common.device_free_chan_resources = fsl_dma_free_chan_resources;
 	fdev->common.device_prep_dma_interrupt = fsl_dma_prep_interrupt;
 	fdev->common.device_prep_dma_memcpy = fsl_dma_prep_memcpy;
+	fdev->common.device_prep_dma_sg = fsl_dma_prep_sg;
 	fdev->common.device_tx_status = fsl_tx_status;
 	fdev->common.device_issue_pending = fsl_dma_memcpy_issue_pending;
 	fdev->common.device_prep_slave_sg = fsl_dma_prep_slave_sg;
diff --git a/drivers/dma/imx-dma.c b/drivers/dma/imx-dma.c
new file mode 100644
index 0000000..346be62
--- /dev/null
+++ b/drivers/dma/imx-dma.c
@@ -0,0 +1,422 @@
+/*
+ * drivers/dma/imx-dma.c
+ *
+ * This file contains a driver for the Freescale i.MX DMA engine
+ * found on i.MX1/21/27
+ *
+ * Copyright 2010 Sascha Hauer, Pengutronix <s.hauer@pengutronix.de>
+ *
+ * The code contained herein is licensed under the GNU General Public
+ * License. You may obtain a copy of the GNU General Public License
+ * Version 2 or later at the following locations:
+ *
+ * http://www.opensource.org/licenses/gpl-license.html
+ * http://www.gnu.org/copyleft/gpl.html
+ */
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/mm.h>
+#include <linux/interrupt.h>
+#include <linux/spinlock.h>
+#include <linux/device.h>
+#include <linux/dma-mapping.h>
+#include <linux/slab.h>
+#include <linux/platform_device.h>
+#include <linux/dmaengine.h>
+
+#include <asm/irq.h>
+#include <mach/dma-v1.h>
+#include <mach/hardware.h>
+
+struct imxdma_channel {
+	struct imxdma_engine		*imxdma;
+	unsigned int			channel;
+	unsigned int			imxdma_channel;
+
+	enum dma_slave_buswidth		word_size;
+	dma_addr_t			per_address;
+	u32				watermark_level;
+	struct dma_chan			chan;
+	spinlock_t			lock;
+	struct dma_async_tx_descriptor	desc;
+	dma_cookie_t			last_completed;
+	enum dma_status			status;
+	int				dma_request;
+	struct scatterlist		*sg_list;
+};
+
+#define MAX_DMA_CHANNELS 8
+
+struct imxdma_engine {
+	struct device			*dev;
+	struct dma_device		dma_device;
+	struct imxdma_channel		channel[MAX_DMA_CHANNELS];
+};
+
+static struct imxdma_channel *to_imxdma_chan(struct dma_chan *chan)
+{
+	return container_of(chan, struct imxdma_channel, chan);
+}
+
+static void imxdma_handle(struct imxdma_channel *imxdmac)
+{
+	if (imxdmac->desc.callback)
+		imxdmac->desc.callback(imxdmac->desc.callback_param);
+	imxdmac->last_completed = imxdmac->desc.cookie;
+}
+
+static void imxdma_irq_handler(int channel, void *data)
+{
+	struct imxdma_channel *imxdmac = data;
+
+	imxdmac->status = DMA_SUCCESS;
+	imxdma_handle(imxdmac);
+}
+
+static void imxdma_err_handler(int channel, void *data, int error)
+{
+	struct imxdma_channel *imxdmac = data;
+
+	imxdmac->status = DMA_ERROR;
+	imxdma_handle(imxdmac);
+}
+
+static void imxdma_progression(int channel, void *data,
+		struct scatterlist *sg)
+{
+	struct imxdma_channel *imxdmac = data;
+
+	imxdmac->status = DMA_SUCCESS;
+	imxdma_handle(imxdmac);
+}
+
+static int imxdma_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd,
+		unsigned long arg)
+{
+	struct imxdma_channel *imxdmac = to_imxdma_chan(chan);
+	struct dma_slave_config *dmaengine_cfg = (void *)arg;
+	int ret;
+	unsigned int mode = 0;
+
+	switch (cmd) {
+	case DMA_TERMINATE_ALL:
+		imxdmac->status = DMA_ERROR;
+		imx_dma_disable(imxdmac->imxdma_channel);
+		return 0;
+	case DMA_SLAVE_CONFIG:
+		if (dmaengine_cfg->direction == DMA_FROM_DEVICE) {
+			imxdmac->per_address = dmaengine_cfg->src_addr;
+			imxdmac->watermark_level = dmaengine_cfg->src_maxburst;
+			imxdmac->word_size = dmaengine_cfg->src_addr_width;
+		} else {
+			imxdmac->per_address = dmaengine_cfg->dst_addr;
+			imxdmac->watermark_level = dmaengine_cfg->dst_maxburst;
+			imxdmac->word_size = dmaengine_cfg->dst_addr_width;
+		}
+
+		switch (imxdmac->word_size) {
+		case DMA_SLAVE_BUSWIDTH_1_BYTE:
+			mode = IMX_DMA_MEMSIZE_8;
+			break;
+		case DMA_SLAVE_BUSWIDTH_2_BYTES:
+			mode = IMX_DMA_MEMSIZE_16;
+			break;
+		default:
+		case DMA_SLAVE_BUSWIDTH_4_BYTES:
+			mode = IMX_DMA_MEMSIZE_32;
+			break;
+		}
+		ret = imx_dma_config_channel(imxdmac->imxdma_channel,
+				mode | IMX_DMA_TYPE_FIFO,
+				IMX_DMA_MEMSIZE_32 | IMX_DMA_TYPE_LINEAR,
+				imxdmac->dma_request, 1);
+
+		if (ret)
+			return ret;
+
+		imx_dma_config_burstlen(imxdmac->imxdma_channel, imxdmac->watermark_level);
+
+		return 0;
+	default:
+		return -ENOSYS;
+	}
+
+	return -EINVAL;
+}
+
+static enum dma_status imxdma_tx_status(struct dma_chan *chan,
+					    dma_cookie_t cookie,
+					    struct dma_tx_state *txstate)
+{
+	struct imxdma_channel *imxdmac = to_imxdma_chan(chan);
+	dma_cookie_t last_used;
+	enum dma_status ret;
+
+	last_used = chan->cookie;
+
+	ret = dma_async_is_complete(cookie, imxdmac->last_completed, last_used);
+	dma_set_tx_state(txstate, imxdmac->last_completed, last_used, 0);
+
+	return ret;
+}
+
+static dma_cookie_t imxdma_assign_cookie(struct imxdma_channel *imxdma)
+{
+	dma_cookie_t cookie = imxdma->chan.cookie;
+
+	if (++cookie < 0)
+		cookie = 1;
+
+	imxdma->chan.cookie = cookie;
+	imxdma->desc.cookie = cookie;
+
+	return cookie;
+}
+
+static dma_cookie_t imxdma_tx_submit(struct dma_async_tx_descriptor *tx)
+{
+	struct imxdma_channel *imxdmac = to_imxdma_chan(tx->chan);
+	dma_cookie_t cookie;
+
+	spin_lock_irq(&imxdmac->lock);
+
+	cookie = imxdma_assign_cookie(imxdmac);
+
+	imx_dma_enable(imxdmac->imxdma_channel);
+
+	spin_unlock_irq(&imxdmac->lock);
+
+	return cookie;
+}
+
+static int imxdma_alloc_chan_resources(struct dma_chan *chan)
+{
+	struct imxdma_channel *imxdmac = to_imxdma_chan(chan);
+	struct imx_dma_data *data = chan->private;
+
+	imxdmac->dma_request = data->dma_request;
+
+	dma_async_tx_descriptor_init(&imxdmac->desc, chan);
+	imxdmac->desc.tx_submit = imxdma_tx_submit;
+	/* txd.flags will be overwritten in prep funcs */
+	imxdmac->desc.flags = DMA_CTRL_ACK;
+
+	imxdmac->status = DMA_SUCCESS;
+
+	return 0;
+}
+
+static void imxdma_free_chan_resources(struct dma_chan *chan)
+{
+	struct imxdma_channel *imxdmac = to_imxdma_chan(chan);
+
+	imx_dma_disable(imxdmac->imxdma_channel);
+
+	if (imxdmac->sg_list) {
+		kfree(imxdmac->sg_list);
+		imxdmac->sg_list = NULL;
+	}
+}
+
+static struct dma_async_tx_descriptor *imxdma_prep_slave_sg(
+		struct dma_chan *chan, struct scatterlist *sgl,
+		unsigned int sg_len, enum dma_data_direction direction,
+		unsigned long flags)
+{
+	struct imxdma_channel *imxdmac = to_imxdma_chan(chan);
+	struct scatterlist *sg;
+	int i, ret, dma_length = 0;
+	unsigned int dmamode;
+
+	if (imxdmac->status == DMA_IN_PROGRESS)
+		return NULL;
+
+	imxdmac->status = DMA_IN_PROGRESS;
+
+	for_each_sg(sgl, sg, sg_len, i) {
+		dma_length += sg->length;
+	}
+
+	if (direction == DMA_FROM_DEVICE)
+		dmamode = DMA_MODE_READ;
+	else
+		dmamode = DMA_MODE_WRITE;
+
+	ret = imx_dma_setup_sg(imxdmac->imxdma_channel, sgl, sg_len,
+		 dma_length, imxdmac->per_address, dmamode);
+	if (ret)
+		return NULL;
+
+	return &imxdmac->desc;
+}
+
+static struct dma_async_tx_descriptor *imxdma_prep_dma_cyclic(
+		struct dma_chan *chan, dma_addr_t dma_addr, size_t buf_len,
+		size_t period_len, enum dma_data_direction direction)
+{
+	struct imxdma_channel *imxdmac = to_imxdma_chan(chan);
+	struct imxdma_engine *imxdma = imxdmac->imxdma;
+	int i, ret;
+	unsigned int periods = buf_len / period_len;
+	unsigned int dmamode;
+
+	dev_dbg(imxdma->dev, "%s channel: %d buf_len=%d period_len=%d\n",
+			__func__, imxdmac->channel, buf_len, period_len);
+
+	if (imxdmac->status == DMA_IN_PROGRESS)
+		return NULL;
+	imxdmac->status = DMA_IN_PROGRESS;
+
+	ret = imx_dma_setup_progression_handler(imxdmac->imxdma_channel,
+			imxdma_progression);
+	if (ret) {
+		dev_err(imxdma->dev, "Failed to setup the DMA handler\n");
+		return NULL;
+	}
+
+	if (imxdmac->sg_list)
+		kfree(imxdmac->sg_list);
+
+	imxdmac->sg_list = kcalloc(periods + 1,
+			sizeof(struct scatterlist), GFP_KERNEL);
+	if (!imxdmac->sg_list)
+		return NULL;
+
+	sg_init_table(imxdmac->sg_list, periods);
+
+	for (i = 0; i < periods; i++) {
+		imxdmac->sg_list[i].page_link = 0;
+		imxdmac->sg_list[i].offset = 0;
+		imxdmac->sg_list[i].dma_address = dma_addr;
+		imxdmac->sg_list[i].length = period_len;
+		dma_addr += period_len;
+	}
+
+	/* close the loop */
+	imxdmac->sg_list[periods].offset = 0;
+	imxdmac->sg_list[periods].length = 0;
+	imxdmac->sg_list[periods].page_link =
+		((unsigned long)imxdmac->sg_list | 0x01) & ~0x02;
+
+	if (direction == DMA_FROM_DEVICE)
+		dmamode = DMA_MODE_READ;
+	else
+		dmamode = DMA_MODE_WRITE;
+
+	ret = imx_dma_setup_sg(imxdmac->imxdma_channel, imxdmac->sg_list, periods,
+		 IMX_DMA_LENGTH_LOOP, imxdmac->per_address, dmamode);
+	if (ret)
+		return NULL;
+
+	return &imxdmac->desc;
+}
+
+static void imxdma_issue_pending(struct dma_chan *chan)
+{
+	/*
+	 * Nothing to do. We only have a single descriptor
+	 */
+}
+
+static int __init imxdma_probe(struct platform_device *pdev)
+{
+	struct imxdma_engine *imxdma;
+	int ret, i;
+
+	imxdma = kzalloc(sizeof(*imxdma), GFP_KERNEL);
+	if (!imxdma)
+		return -ENOMEM;
+
+	INIT_LIST_HEAD(&imxdma->dma_device.channels);
+
+	/* Initialize channel parameters */
+	for (i = 0; i < MAX_DMA_CHANNELS; i++) {
+		struct imxdma_channel *imxdmac = &imxdma->channel[i];
+
+		imxdmac->imxdma_channel = imx_dma_request_by_prio("dmaengine",
+				DMA_PRIO_MEDIUM);
+		if (imxdmac->channel < 0)
+			goto err_init;
+
+		imx_dma_setup_handlers(imxdmac->imxdma_channel,
+		       imxdma_irq_handler, imxdma_err_handler, imxdmac);
+
+		imxdmac->imxdma = imxdma;
+		spin_lock_init(&imxdmac->lock);
+
+		dma_cap_set(DMA_SLAVE, imxdma->dma_device.cap_mask);
+		dma_cap_set(DMA_CYCLIC, imxdma->dma_device.cap_mask);
+
+		imxdmac->chan.device = &imxdma->dma_device;
+		imxdmac->chan.chan_id = i;
+		imxdmac->channel = i;
+
+		/* Add the channel to the DMAC list */
+		list_add_tail(&imxdmac->chan.device_node, &imxdma->dma_device.channels);
+	}
+
+	imxdma->dev = &pdev->dev;
+	imxdma->dma_device.dev = &pdev->dev;
+
+	imxdma->dma_device.device_alloc_chan_resources = imxdma_alloc_chan_resources;
+	imxdma->dma_device.device_free_chan_resources = imxdma_free_chan_resources;
+	imxdma->dma_device.device_tx_status = imxdma_tx_status;
+	imxdma->dma_device.device_prep_slave_sg = imxdma_prep_slave_sg;
+	imxdma->dma_device.device_prep_dma_cyclic = imxdma_prep_dma_cyclic;
+	imxdma->dma_device.device_control = imxdma_control;
+	imxdma->dma_device.device_issue_pending = imxdma_issue_pending;
+
+	platform_set_drvdata(pdev, imxdma);
+
+	ret = dma_async_device_register(&imxdma->dma_device);
+	if (ret) {
+		dev_err(&pdev->dev, "unable to register\n");
+		goto err_init;
+	}
+
+	return 0;
+
+err_init:
+	while (i-- >= 0) {
+		struct imxdma_channel *imxdmac = &imxdma->channel[i];
+		imx_dma_free(imxdmac->imxdma_channel);
+	}
+
+	kfree(imxdma);
+	return ret;
+}
+
+static int __exit imxdma_remove(struct platform_device *pdev)
+{
+	struct imxdma_engine *imxdma = platform_get_drvdata(pdev);
+	int i;
+
+        dma_async_device_unregister(&imxdma->dma_device);
+
+	for (i = 0; i < MAX_DMA_CHANNELS; i++) {
+		struct imxdma_channel *imxdmac = &imxdma->channel[i];
+
+		 imx_dma_free(imxdmac->imxdma_channel);
+	}
+
+        kfree(imxdma);
+
+        return 0;
+}
+
+static struct platform_driver imxdma_driver = {
+	.driver		= {
+		.name	= "imx-dma",
+	},
+	.remove		= __exit_p(imxdma_remove),
+};
+
+static int __init imxdma_module_init(void)
+{
+	return platform_driver_probe(&imxdma_driver, imxdma_probe);
+}
+subsys_initcall(imxdma_module_init);
+
+MODULE_AUTHOR("Sascha Hauer, Pengutronix <s.hauer@pengutronix.de>");
+MODULE_DESCRIPTION("i.MX dma driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/dma/imx-sdma.c b/drivers/dma/imx-sdma.c
new file mode 100644
index 0000000..0834323
--- /dev/null
+++ b/drivers/dma/imx-sdma.c
@@ -0,0 +1,1392 @@
+/*
+ * drivers/dma/imx-sdma.c
+ *
+ * This file contains a driver for the Freescale Smart DMA engine
+ *
+ * Copyright 2010 Sascha Hauer, Pengutronix <s.hauer@pengutronix.de>
+ *
+ * Based on code from Freescale:
+ *
+ * Copyright 2004-2009 Freescale Semiconductor, Inc. All Rights Reserved.
+ *
+ * The code contained herein is licensed under the GNU General Public
+ * License. You may obtain a copy of the GNU General Public License
+ * Version 2 or later at the following locations:
+ *
+ * http://www.opensource.org/licenses/gpl-license.html
+ * http://www.gnu.org/copyleft/gpl.html
+ */
+
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/mm.h>
+#include <linux/interrupt.h>
+#include <linux/clk.h>
+#include <linux/wait.h>
+#include <linux/sched.h>
+#include <linux/semaphore.h>
+#include <linux/spinlock.h>
+#include <linux/device.h>
+#include <linux/dma-mapping.h>
+#include <linux/firmware.h>
+#include <linux/slab.h>
+#include <linux/platform_device.h>
+#include <linux/dmaengine.h>
+
+#include <asm/irq.h>
+#include <mach/sdma.h>
+#include <mach/dma.h>
+#include <mach/hardware.h>
+
+/* SDMA registers */
+#define SDMA_H_C0PTR		0x000
+#define SDMA_H_INTR		0x004
+#define SDMA_H_STATSTOP		0x008
+#define SDMA_H_START		0x00c
+#define SDMA_H_EVTOVR		0x010
+#define SDMA_H_DSPOVR		0x014
+#define SDMA_H_HOSTOVR		0x018
+#define SDMA_H_EVTPEND		0x01c
+#define SDMA_H_DSPENBL		0x020
+#define SDMA_H_RESET		0x024
+#define SDMA_H_EVTERR		0x028
+#define SDMA_H_INTRMSK		0x02c
+#define SDMA_H_PSW		0x030
+#define SDMA_H_EVTERRDBG	0x034
+#define SDMA_H_CONFIG		0x038
+#define SDMA_ONCE_ENB		0x040
+#define SDMA_ONCE_DATA		0x044
+#define SDMA_ONCE_INSTR		0x048
+#define SDMA_ONCE_STAT		0x04c
+#define SDMA_ONCE_CMD		0x050
+#define SDMA_EVT_MIRROR		0x054
+#define SDMA_ILLINSTADDR	0x058
+#define SDMA_CHN0ADDR		0x05c
+#define SDMA_ONCE_RTB		0x060
+#define SDMA_XTRIG_CONF1	0x070
+#define SDMA_XTRIG_CONF2	0x074
+#define SDMA_CHNENBL0_V2	0x200
+#define SDMA_CHNENBL0_V1	0x080
+#define SDMA_CHNPRI_0		0x100
+
+/*
+ * Buffer descriptor status values.
+ */
+#define BD_DONE  0x01
+#define BD_WRAP  0x02
+#define BD_CONT  0x04
+#define BD_INTR  0x08
+#define BD_RROR  0x10
+#define BD_LAST  0x20
+#define BD_EXTD  0x80
+
+/*
+ * Data Node descriptor status values.
+ */
+#define DND_END_OF_FRAME  0x80
+#define DND_END_OF_XFER   0x40
+#define DND_DONE          0x20
+#define DND_UNUSED        0x01
+
+/*
+ * IPCV2 descriptor status values.
+ */
+#define BD_IPCV2_END_OF_FRAME  0x40
+
+#define IPCV2_MAX_NODES        50
+/*
+ * Error bit set in the CCB status field by the SDMA,
+ * in setbd routine, in case of a transfer error
+ */
+#define DATA_ERROR  0x10000000
+
+/*
+ * Buffer descriptor commands.
+ */
+#define C0_ADDR             0x01
+#define C0_LOAD             0x02
+#define C0_DUMP             0x03
+#define C0_SETCTX           0x07
+#define C0_GETCTX           0x03
+#define C0_SETDM            0x01
+#define C0_SETPM            0x04
+#define C0_GETDM            0x02
+#define C0_GETPM            0x08
+/*
+ * Change endianness indicator in the BD command field
+ */
+#define CHANGE_ENDIANNESS   0x80
+
+/*
+ * Mode/Count of data node descriptors - IPCv2
+ */
+struct sdma_mode_count {
+	u32 count   : 16; /* size of the buffer pointed by this BD */
+	u32 status  :  8; /* E,R,I,C,W,D status bits stored here */
+	u32 command :  8; /* command mostlky used for channel 0 */
+};
+
+/*
+ * Buffer descriptor
+ */
+struct sdma_buffer_descriptor {
+	struct sdma_mode_count  mode;
+	u32 buffer_addr;	/* address of the buffer described */
+	u32 ext_buffer_addr;	/* extended buffer address */
+} __attribute__ ((packed));
+
+/**
+ * struct sdma_channel_control - Channel control Block
+ *
+ * @current_bd_ptr	current buffer descriptor processed
+ * @base_bd_ptr		first element of buffer descriptor array
+ * @unused		padding. The SDMA engine expects an array of 128 byte
+ *			control blocks
+ */
+struct sdma_channel_control {
+	u32 current_bd_ptr;
+	u32 base_bd_ptr;
+	u32 unused[2];
+} __attribute__ ((packed));
+
+/**
+ * struct sdma_state_registers - SDMA context for a channel
+ *
+ * @pc:		program counter
+ * @t:		test bit: status of arithmetic & test instruction
+ * @rpc:	return program counter
+ * @sf:		source fault while loading data
+ * @spc:	loop start program counter
+ * @df:		destination fault while storing data
+ * @epc:	loop end program counter
+ * @lm:		loop mode
+ */
+struct sdma_state_registers {
+	u32 pc     :14;
+	u32 unused1: 1;
+	u32 t      : 1;
+	u32 rpc    :14;
+	u32 unused0: 1;
+	u32 sf     : 1;
+	u32 spc    :14;
+	u32 unused2: 1;
+	u32 df     : 1;
+	u32 epc    :14;
+	u32 lm     : 2;
+} __attribute__ ((packed));
+
+/**
+ * struct sdma_context_data - sdma context specific to a channel
+ *
+ * @channel_state:	channel state bits
+ * @gReg:		general registers
+ * @mda:		burst dma destination address register
+ * @msa:		burst dma source address register
+ * @ms:			burst dma status register
+ * @md:			burst dma data register
+ * @pda:		peripheral dma destination address register
+ * @psa:		peripheral dma source address register
+ * @ps:			peripheral dma status register
+ * @pd:			peripheral dma data register
+ * @ca:			CRC polynomial register
+ * @cs:			CRC accumulator register
+ * @dda:		dedicated core destination address register
+ * @dsa:		dedicated core source address register
+ * @ds:			dedicated core status register
+ * @dd:			dedicated core data register
+ */
+struct sdma_context_data {
+	struct sdma_state_registers  channel_state;
+	u32  gReg[8];
+	u32  mda;
+	u32  msa;
+	u32  ms;
+	u32  md;
+	u32  pda;
+	u32  psa;
+	u32  ps;
+	u32  pd;
+	u32  ca;
+	u32  cs;
+	u32  dda;
+	u32  dsa;
+	u32  ds;
+	u32  dd;
+	u32  scratch0;
+	u32  scratch1;
+	u32  scratch2;
+	u32  scratch3;
+	u32  scratch4;
+	u32  scratch5;
+	u32  scratch6;
+	u32  scratch7;
+} __attribute__ ((packed));
+
+#define NUM_BD (int)(PAGE_SIZE / sizeof(struct sdma_buffer_descriptor))
+
+struct sdma_engine;
+
+/**
+ * struct sdma_channel - housekeeping for a SDMA channel
+ *
+ * @sdma		pointer to the SDMA engine for this channel
+ * @channel		the channel number, matches dmaengine chan_id
+ * @direction		transfer type. Needed for setting SDMA script
+ * @peripheral_type	Peripheral type. Needed for setting SDMA script
+ * @event_id0		aka dma request line
+ * @event_id1		for channels that use 2 events
+ * @word_size		peripheral access size
+ * @buf_tail		ID of the buffer that was processed
+ * @done		channel completion
+ * @num_bd		max NUM_BD. number of descriptors currently handling
+ */
+struct sdma_channel {
+	struct sdma_engine		*sdma;
+	unsigned int			channel;
+	enum dma_data_direction		direction;
+	enum sdma_peripheral_type	peripheral_type;
+	unsigned int			event_id0;
+	unsigned int			event_id1;
+	enum dma_slave_buswidth		word_size;
+	unsigned int			buf_tail;
+	struct completion		done;
+	unsigned int			num_bd;
+	struct sdma_buffer_descriptor	*bd;
+	dma_addr_t			bd_phys;
+	unsigned int			pc_from_device, pc_to_device;
+	unsigned long			flags;
+	dma_addr_t			per_address;
+	u32				event_mask0, event_mask1;
+	u32				watermark_level;
+	u32				shp_addr, per_addr;
+	struct dma_chan			chan;
+	spinlock_t			lock;
+	struct dma_async_tx_descriptor	desc;
+	dma_cookie_t			last_completed;
+	enum dma_status			status;
+};
+
+#define IMX_DMA_SG_LOOP		(1 << 0)
+
+#define MAX_DMA_CHANNELS 32
+#define MXC_SDMA_DEFAULT_PRIORITY 1
+#define MXC_SDMA_MIN_PRIORITY 1
+#define MXC_SDMA_MAX_PRIORITY 7
+
+/**
+ * struct sdma_script_start_addrs - SDMA script start pointers
+ *
+ * start addresses of the different functions in the physical
+ * address space of the SDMA engine.
+ */
+struct sdma_script_start_addrs {
+	u32 ap_2_ap_addr;
+	u32 ap_2_bp_addr;
+	u32 ap_2_ap_fixed_addr;
+	u32 bp_2_ap_addr;
+	u32 loopback_on_dsp_side_addr;
+	u32 mcu_interrupt_only_addr;
+	u32 firi_2_per_addr;
+	u32 firi_2_mcu_addr;
+	u32 per_2_firi_addr;
+	u32 mcu_2_firi_addr;
+	u32 uart_2_per_addr;
+	u32 uart_2_mcu_addr;
+	u32 per_2_app_addr;
+	u32 mcu_2_app_addr;
+	u32 per_2_per_addr;
+	u32 uartsh_2_per_addr;
+	u32 uartsh_2_mcu_addr;
+	u32 per_2_shp_addr;
+	u32 mcu_2_shp_addr;
+	u32 ata_2_mcu_addr;
+	u32 mcu_2_ata_addr;
+	u32 app_2_per_addr;
+	u32 app_2_mcu_addr;
+	u32 shp_2_per_addr;
+	u32 shp_2_mcu_addr;
+	u32 mshc_2_mcu_addr;
+	u32 mcu_2_mshc_addr;
+	u32 spdif_2_mcu_addr;
+	u32 mcu_2_spdif_addr;
+	u32 asrc_2_mcu_addr;
+	u32 ext_mem_2_ipu_addr;
+	u32 descrambler_addr;
+	u32 dptc_dvfs_addr;
+	u32 utra_addr;
+	u32 ram_code_start_addr;
+};
+
+#define SDMA_FIRMWARE_MAGIC 0x414d4453
+
+/**
+ * struct sdma_firmware_header - Layout of the firmware image
+ *
+ * @magic		"SDMA"
+ * @version_major	increased whenever layout of struct sdma_script_start_addrs
+ *			changes.
+ * @version_minor	firmware minor version (for binary compatible changes)
+ * @script_addrs_start	offset of struct sdma_script_start_addrs in this image
+ * @num_script_addrs	Number of script addresses in this image
+ * @ram_code_start	offset of SDMA ram image in this firmware image
+ * @ram_code_size	size of SDMA ram image
+ * @script_addrs	Stores the start address of the SDMA scripts
+ *			(in SDMA memory space)
+ */
+struct sdma_firmware_header {
+	u32	magic;
+	u32	version_major;
+	u32	version_minor;
+	u32	script_addrs_start;
+	u32	num_script_addrs;
+	u32	ram_code_start;
+	u32	ram_code_size;
+};
+
+struct sdma_engine {
+	struct device			*dev;
+	struct sdma_channel		channel[MAX_DMA_CHANNELS];
+	struct sdma_channel_control	*channel_control;
+	void __iomem			*regs;
+	unsigned int			version;
+	unsigned int			num_events;
+	struct sdma_context_data	*context;
+	dma_addr_t			context_phys;
+	struct dma_device		dma_device;
+	struct clk			*clk;
+	struct sdma_script_start_addrs	*script_addrs;
+};
+
+#define SDMA_H_CONFIG_DSPDMA	(1 << 12) /* indicates if the DSPDMA is used */
+#define SDMA_H_CONFIG_RTD_PINS	(1 << 11) /* indicates if Real-Time Debug pins are enabled */
+#define SDMA_H_CONFIG_ACR	(1 << 4)  /* indicates if AHB freq /core freq = 2 or 1 */
+#define SDMA_H_CONFIG_CSM	(3)       /* indicates which context switch mode is selected*/
+
+static inline u32 chnenbl_ofs(struct sdma_engine *sdma, unsigned int event)
+{
+	u32 chnenbl0 = (sdma->version == 2 ? SDMA_CHNENBL0_V2 : SDMA_CHNENBL0_V1);
+
+	return chnenbl0 + event * 4;
+}
+
+static int sdma_config_ownership(struct sdma_channel *sdmac,
+		bool event_override, bool mcu_override, bool dsp_override)
+{
+	struct sdma_engine *sdma = sdmac->sdma;
+	int channel = sdmac->channel;
+	u32 evt, mcu, dsp;
+
+	if (event_override && mcu_override && dsp_override)
+		return -EINVAL;
+
+	evt = __raw_readl(sdma->regs + SDMA_H_EVTOVR);
+	mcu = __raw_readl(sdma->regs + SDMA_H_HOSTOVR);
+	dsp = __raw_readl(sdma->regs + SDMA_H_DSPOVR);
+
+	if (dsp_override)
+		dsp &= ~(1 << channel);
+	else
+		dsp |= (1 << channel);
+
+	if (event_override)
+		evt &= ~(1 << channel);
+	else
+		evt |= (1 << channel);
+
+	if (mcu_override)
+		mcu &= ~(1 << channel);
+	else
+		mcu |= (1 << channel);
+
+	__raw_writel(evt, sdma->regs + SDMA_H_EVTOVR);
+	__raw_writel(mcu, sdma->regs + SDMA_H_HOSTOVR);
+	__raw_writel(dsp, sdma->regs + SDMA_H_DSPOVR);
+
+	return 0;
+}
+
+/*
+ * sdma_run_channel - run a channel and wait till it's done
+ */
+static int sdma_run_channel(struct sdma_channel *sdmac)
+{
+	struct sdma_engine *sdma = sdmac->sdma;
+	int channel = sdmac->channel;
+	int ret;
+
+	init_completion(&sdmac->done);
+
+	__raw_writel(1 << channel, sdma->regs + SDMA_H_START);
+
+	ret = wait_for_completion_timeout(&sdmac->done, HZ);
+
+	return ret ? 0 : -ETIMEDOUT;
+}
+
+static int sdma_load_script(struct sdma_engine *sdma, void *buf, int size,
+		u32 address)
+{
+	struct sdma_buffer_descriptor *bd0 = sdma->channel[0].bd;
+	void *buf_virt;
+	dma_addr_t buf_phys;
+	int ret;
+
+	buf_virt = dma_alloc_coherent(NULL,
+			size,
+			&buf_phys, GFP_KERNEL);
+	if (!buf_virt)
+		return -ENOMEM;
+
+	bd0->mode.command = C0_SETPM;
+	bd0->mode.status = BD_DONE | BD_INTR | BD_WRAP | BD_EXTD;
+	bd0->mode.count = size / 2;
+	bd0->buffer_addr = buf_phys;
+	bd0->ext_buffer_addr = address;
+
+	memcpy(buf_virt, buf, size);
+
+	ret = sdma_run_channel(&sdma->channel[0]);
+
+	dma_free_coherent(NULL, size, buf_virt, buf_phys);
+
+	return ret;
+}
+
+static void sdma_event_enable(struct sdma_channel *sdmac, unsigned int event)
+{
+	struct sdma_engine *sdma = sdmac->sdma;
+	int channel = sdmac->channel;
+	u32 val;
+	u32 chnenbl = chnenbl_ofs(sdma, event);
+
+	val = __raw_readl(sdma->regs + chnenbl);
+	val |= (1 << channel);
+	__raw_writel(val, sdma->regs + chnenbl);
+}
+
+static void sdma_event_disable(struct sdma_channel *sdmac, unsigned int event)
+{
+	struct sdma_engine *sdma = sdmac->sdma;
+	int channel = sdmac->channel;
+	u32 chnenbl = chnenbl_ofs(sdma, event);
+	u32 val;
+
+	val = __raw_readl(sdma->regs + chnenbl);
+	val &= ~(1 << channel);
+	__raw_writel(val, sdma->regs + chnenbl);
+}
+
+static void sdma_handle_channel_loop(struct sdma_channel *sdmac)
+{
+	struct sdma_buffer_descriptor *bd;
+
+	/*
+	 * loop mode. Iterate over descriptors, re-setup them and
+	 * call callback function.
+	 */
+	while (1) {
+		bd = &sdmac->bd[sdmac->buf_tail];
+
+		if (bd->mode.status & BD_DONE)
+			break;
+
+		if (bd->mode.status & BD_RROR)
+			sdmac->status = DMA_ERROR;
+		else
+			sdmac->status = DMA_SUCCESS;
+
+		bd->mode.status |= BD_DONE;
+		sdmac->buf_tail++;
+		sdmac->buf_tail %= sdmac->num_bd;
+
+		if (sdmac->desc.callback)
+			sdmac->desc.callback(sdmac->desc.callback_param);
+	}
+}
+
+static void mxc_sdma_handle_channel_normal(struct sdma_channel *sdmac)
+{
+	struct sdma_buffer_descriptor *bd;
+	int i, error = 0;
+
+	/*
+	 * non loop mode. Iterate over all descriptors, collect
+	 * errors and call callback function
+	 */
+	for (i = 0; i < sdmac->num_bd; i++) {
+		bd = &sdmac->bd[i];
+
+		 if (bd->mode.status & (BD_DONE | BD_RROR))
+			error = -EIO;
+	}
+
+	if (error)
+		sdmac->status = DMA_ERROR;
+	else
+		sdmac->status = DMA_SUCCESS;
+
+	if (sdmac->desc.callback)
+		sdmac->desc.callback(sdmac->desc.callback_param);
+	sdmac->last_completed = sdmac->desc.cookie;
+}
+
+static void mxc_sdma_handle_channel(struct sdma_channel *sdmac)
+{
+	complete(&sdmac->done);
+
+	/* not interested in channel 0 interrupts */
+	if (sdmac->channel == 0)
+		return;
+
+	if (sdmac->flags & IMX_DMA_SG_LOOP)
+		sdma_handle_channel_loop(sdmac);
+	else
+		mxc_sdma_handle_channel_normal(sdmac);
+}
+
+static irqreturn_t sdma_int_handler(int irq, void *dev_id)
+{
+	struct sdma_engine *sdma = dev_id;
+	u32 stat;
+
+	stat = __raw_readl(sdma->regs + SDMA_H_INTR);
+	__raw_writel(stat, sdma->regs + SDMA_H_INTR);
+
+	while (stat) {
+		int channel = fls(stat) - 1;
+		struct sdma_channel *sdmac = &sdma->channel[channel];
+
+		mxc_sdma_handle_channel(sdmac);
+
+		stat &= ~(1 << channel);
+	}
+
+	return IRQ_HANDLED;
+}
+
+/*
+ * sets the pc of SDMA script according to the peripheral type
+ */
+static void sdma_get_pc(struct sdma_channel *sdmac,
+		enum sdma_peripheral_type peripheral_type)
+{
+	struct sdma_engine *sdma = sdmac->sdma;
+	int per_2_emi = 0, emi_2_per = 0;
+	/*
+	 * These are needed once we start to support transfers between
+	 * two peripherals or memory-to-memory transfers
+	 */
+	int per_2_per = 0, emi_2_emi = 0;
+
+	sdmac->pc_from_device = 0;
+	sdmac->pc_to_device = 0;
+
+	switch (peripheral_type) {
+	case IMX_DMATYPE_MEMORY:
+		emi_2_emi = sdma->script_addrs->ap_2_ap_addr;
+		break;
+	case IMX_DMATYPE_DSP:
+		emi_2_per = sdma->script_addrs->bp_2_ap_addr;
+		per_2_emi = sdma->script_addrs->ap_2_bp_addr;
+		break;
+	case IMX_DMATYPE_FIRI:
+		per_2_emi = sdma->script_addrs->firi_2_mcu_addr;
+		emi_2_per = sdma->script_addrs->mcu_2_firi_addr;
+		break;
+	case IMX_DMATYPE_UART:
+		per_2_emi = sdma->script_addrs->uart_2_mcu_addr;
+		emi_2_per = sdma->script_addrs->mcu_2_app_addr;
+		break;
+	case IMX_DMATYPE_UART_SP:
+		per_2_emi = sdma->script_addrs->uartsh_2_mcu_addr;
+		emi_2_per = sdma->script_addrs->mcu_2_shp_addr;
+		break;
+	case IMX_DMATYPE_ATA:
+		per_2_emi = sdma->script_addrs->ata_2_mcu_addr;
+		emi_2_per = sdma->script_addrs->mcu_2_ata_addr;
+		break;
+	case IMX_DMATYPE_CSPI:
+	case IMX_DMATYPE_EXT:
+	case IMX_DMATYPE_SSI:
+		per_2_emi = sdma->script_addrs->app_2_mcu_addr;
+		emi_2_per = sdma->script_addrs->mcu_2_app_addr;
+		break;
+	case IMX_DMATYPE_SSI_SP:
+	case IMX_DMATYPE_MMC:
+	case IMX_DMATYPE_SDHC:
+	case IMX_DMATYPE_CSPI_SP:
+	case IMX_DMATYPE_ESAI:
+	case IMX_DMATYPE_MSHC_SP:
+		per_2_emi = sdma->script_addrs->shp_2_mcu_addr;
+		emi_2_per = sdma->script_addrs->mcu_2_shp_addr;
+		break;
+	case IMX_DMATYPE_ASRC:
+		per_2_emi = sdma->script_addrs->asrc_2_mcu_addr;
+		emi_2_per = sdma->script_addrs->asrc_2_mcu_addr;
+		per_2_per = sdma->script_addrs->per_2_per_addr;
+		break;
+	case IMX_DMATYPE_MSHC:
+		per_2_emi = sdma->script_addrs->mshc_2_mcu_addr;
+		emi_2_per = sdma->script_addrs->mcu_2_mshc_addr;
+		break;
+	case IMX_DMATYPE_CCM:
+		per_2_emi = sdma->script_addrs->dptc_dvfs_addr;
+		break;
+	case IMX_DMATYPE_SPDIF:
+		per_2_emi = sdma->script_addrs->spdif_2_mcu_addr;
+		emi_2_per = sdma->script_addrs->mcu_2_spdif_addr;
+		break;
+	case IMX_DMATYPE_IPU_MEMORY:
+		emi_2_per = sdma->script_addrs->ext_mem_2_ipu_addr;
+		break;
+	default:
+		break;
+	}
+
+	sdmac->pc_from_device = per_2_emi;
+	sdmac->pc_to_device = emi_2_per;
+}
+
+static int sdma_load_context(struct sdma_channel *sdmac)
+{
+	struct sdma_engine *sdma = sdmac->sdma;
+	int channel = sdmac->channel;
+	int load_address;
+	struct sdma_context_data *context = sdma->context;
+	struct sdma_buffer_descriptor *bd0 = sdma->channel[0].bd;
+	int ret;
+
+	if (sdmac->direction == DMA_FROM_DEVICE) {
+		load_address = sdmac->pc_from_device;
+	} else {
+		load_address = sdmac->pc_to_device;
+	}
+
+	if (load_address < 0)
+		return load_address;
+
+	dev_dbg(sdma->dev, "load_address = %d\n", load_address);
+	dev_dbg(sdma->dev, "wml = 0x%08x\n", sdmac->watermark_level);
+	dev_dbg(sdma->dev, "shp_addr = 0x%08x\n", sdmac->shp_addr);
+	dev_dbg(sdma->dev, "per_addr = 0x%08x\n", sdmac->per_addr);
+	dev_dbg(sdma->dev, "event_mask0 = 0x%08x\n", sdmac->event_mask0);
+	dev_dbg(sdma->dev, "event_mask1 = 0x%08x\n", sdmac->event_mask1);
+
+	memset(context, 0, sizeof(*context));
+	context->channel_state.pc = load_address;
+
+	/* Send by context the event mask,base address for peripheral
+	 * and watermark level
+	 */
+	context->gReg[0] = sdmac->event_mask1;
+	context->gReg[1] = sdmac->event_mask0;
+	context->gReg[2] = sdmac->per_addr;
+	context->gReg[6] = sdmac->shp_addr;
+	context->gReg[7] = sdmac->watermark_level;
+
+	bd0->mode.command = C0_SETDM;
+	bd0->mode.status = BD_DONE | BD_INTR | BD_WRAP | BD_EXTD;
+	bd0->mode.count = sizeof(*context) / 4;
+	bd0->buffer_addr = sdma->context_phys;
+	bd0->ext_buffer_addr = 2048 + (sizeof(*context) / 4) * channel;
+
+	ret = sdma_run_channel(&sdma->channel[0]);
+
+	return ret;
+}
+
+static void sdma_disable_channel(struct sdma_channel *sdmac)
+{
+	struct sdma_engine *sdma = sdmac->sdma;
+	int channel = sdmac->channel;
+
+	__raw_writel(1 << channel, sdma->regs + SDMA_H_STATSTOP);
+	sdmac->status = DMA_ERROR;
+}
+
+static int sdma_config_channel(struct sdma_channel *sdmac)
+{
+	int ret;
+
+	sdma_disable_channel(sdmac);
+
+	sdmac->event_mask0 = 0;
+	sdmac->event_mask1 = 0;
+	sdmac->shp_addr = 0;
+	sdmac->per_addr = 0;
+
+	if (sdmac->event_id0) {
+		if (sdmac->event_id0 > 32)
+			return -EINVAL;
+		sdma_event_enable(sdmac, sdmac->event_id0);
+	}
+
+	switch (sdmac->peripheral_type) {
+	case IMX_DMATYPE_DSP:
+		sdma_config_ownership(sdmac, false, true, true);
+		break;
+	case IMX_DMATYPE_MEMORY:
+		sdma_config_ownership(sdmac, false, true, false);
+		break;
+	default:
+		sdma_config_ownership(sdmac, true, true, false);
+		break;
+	}
+
+	sdma_get_pc(sdmac, sdmac->peripheral_type);
+
+	if ((sdmac->peripheral_type != IMX_DMATYPE_MEMORY) &&
+			(sdmac->peripheral_type != IMX_DMATYPE_DSP)) {
+		/* Handle multiple event channels differently */
+		if (sdmac->event_id1) {
+			sdmac->event_mask1 = 1 << (sdmac->event_id1 % 32);
+			if (sdmac->event_id1 > 31)
+				sdmac->watermark_level |= 1 << 31;
+			sdmac->event_mask0 = 1 << (sdmac->event_id0 % 32);
+			if (sdmac->event_id0 > 31)
+				sdmac->watermark_level |= 1 << 30;
+		} else {
+			sdmac->event_mask0 = 1 << sdmac->event_id0;
+			sdmac->event_mask1 = 1 << (sdmac->event_id0 - 32);
+		}
+		/* Watermark Level */
+		sdmac->watermark_level |= sdmac->watermark_level;
+		/* Address */
+		sdmac->shp_addr = sdmac->per_address;
+	} else {
+		sdmac->watermark_level = 0; /* FIXME: M3_BASE_ADDRESS */
+	}
+
+	ret = sdma_load_context(sdmac);
+
+	return ret;
+}
+
+static int sdma_set_channel_priority(struct sdma_channel *sdmac,
+		unsigned int priority)
+{
+	struct sdma_engine *sdma = sdmac->sdma;
+	int channel = sdmac->channel;
+
+	if (priority < MXC_SDMA_MIN_PRIORITY
+	    || priority > MXC_SDMA_MAX_PRIORITY) {
+		return -EINVAL;
+	}
+
+	__raw_writel(priority, sdma->regs + SDMA_CHNPRI_0 + 4 * channel);
+
+	return 0;
+}
+
+static int sdma_request_channel(struct sdma_channel *sdmac)
+{
+	struct sdma_engine *sdma = sdmac->sdma;
+	int channel = sdmac->channel;
+	int ret = -EBUSY;
+
+	sdmac->bd = dma_alloc_coherent(NULL, PAGE_SIZE, &sdmac->bd_phys, GFP_KERNEL);
+	if (!sdmac->bd) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	memset(sdmac->bd, 0, PAGE_SIZE);
+
+	sdma->channel_control[channel].base_bd_ptr = sdmac->bd_phys;
+	sdma->channel_control[channel].current_bd_ptr = sdmac->bd_phys;
+
+	clk_enable(sdma->clk);
+
+	sdma_set_channel_priority(sdmac, MXC_SDMA_DEFAULT_PRIORITY);
+
+	init_completion(&sdmac->done);
+
+	sdmac->buf_tail = 0;
+
+	return 0;
+out:
+
+	return ret;
+}
+
+static void sdma_enable_channel(struct sdma_engine *sdma, int channel)
+{
+	__raw_writel(1 << channel, sdma->regs + SDMA_H_START);
+}
+
+static dma_cookie_t sdma_assign_cookie(struct sdma_channel *sdma)
+{
+	dma_cookie_t cookie = sdma->chan.cookie;
+
+	if (++cookie < 0)
+		cookie = 1;
+
+	sdma->chan.cookie = cookie;
+	sdma->desc.cookie = cookie;
+
+	return cookie;
+}
+
+static struct sdma_channel *to_sdma_chan(struct dma_chan *chan)
+{
+	return container_of(chan, struct sdma_channel, chan);
+}
+
+static dma_cookie_t sdma_tx_submit(struct dma_async_tx_descriptor *tx)
+{
+	struct sdma_channel *sdmac = to_sdma_chan(tx->chan);
+	struct sdma_engine *sdma = sdmac->sdma;
+	dma_cookie_t cookie;
+
+	spin_lock_irq(&sdmac->lock);
+
+	cookie = sdma_assign_cookie(sdmac);
+
+	sdma_enable_channel(sdma, tx->chan->chan_id);
+
+	spin_unlock_irq(&sdmac->lock);
+
+	return cookie;
+}
+
+static int sdma_alloc_chan_resources(struct dma_chan *chan)
+{
+	struct sdma_channel *sdmac = to_sdma_chan(chan);
+	struct imx_dma_data *data = chan->private;
+	int prio, ret;
+
+	/* No need to execute this for internal channel 0 */
+	if (chan->chan_id == 0)
+		return 0;
+
+	if (!data)
+		return -EINVAL;
+
+	switch (data->priority) {
+	case DMA_PRIO_HIGH:
+		prio = 3;
+		break;
+	case DMA_PRIO_MEDIUM:
+		prio = 2;
+		break;
+	case DMA_PRIO_LOW:
+	default:
+		prio = 1;
+		break;
+	}
+
+	sdmac->peripheral_type = data->peripheral_type;
+	sdmac->event_id0 = data->dma_request;
+	ret = sdma_set_channel_priority(sdmac, prio);
+	if (ret)
+		return ret;
+
+	ret = sdma_request_channel(sdmac);
+	if (ret)
+		return ret;
+
+	dma_async_tx_descriptor_init(&sdmac->desc, chan);
+	sdmac->desc.tx_submit = sdma_tx_submit;
+	/* txd.flags will be overwritten in prep funcs */
+	sdmac->desc.flags = DMA_CTRL_ACK;
+
+	return 0;
+}
+
+static void sdma_free_chan_resources(struct dma_chan *chan)
+{
+	struct sdma_channel *sdmac = to_sdma_chan(chan);
+	struct sdma_engine *sdma = sdmac->sdma;
+
+	sdma_disable_channel(sdmac);
+
+	if (sdmac->event_id0)
+		sdma_event_disable(sdmac, sdmac->event_id0);
+	if (sdmac->event_id1)
+		sdma_event_disable(sdmac, sdmac->event_id1);
+
+	sdmac->event_id0 = 0;
+	sdmac->event_id1 = 0;
+
+	sdma_set_channel_priority(sdmac, 0);
+
+	dma_free_coherent(NULL, PAGE_SIZE, sdmac->bd, sdmac->bd_phys);
+
+	clk_disable(sdma->clk);
+}
+
+static struct dma_async_tx_descriptor *sdma_prep_slave_sg(
+		struct dma_chan *chan, struct scatterlist *sgl,
+		unsigned int sg_len, enum dma_data_direction direction,
+		unsigned long flags)
+{
+	struct sdma_channel *sdmac = to_sdma_chan(chan);
+	struct sdma_engine *sdma = sdmac->sdma;
+	int ret, i, count;
+	int channel = chan->chan_id;
+	struct scatterlist *sg;
+
+	if (sdmac->status == DMA_IN_PROGRESS)
+		return NULL;
+	sdmac->status = DMA_IN_PROGRESS;
+
+	sdmac->flags = 0;
+
+	dev_dbg(sdma->dev, "setting up %d entries for channel %d.\n",
+			sg_len, channel);
+
+	sdmac->direction = direction;
+	ret = sdma_load_context(sdmac);
+	if (ret)
+		goto err_out;
+
+	if (sg_len > NUM_BD) {
+		dev_err(sdma->dev, "SDMA channel %d: maximum number of sg exceeded: %d > %d\n",
+				channel, sg_len, NUM_BD);
+		ret = -EINVAL;
+		goto err_out;
+	}
+
+	for_each_sg(sgl, sg, sg_len, i) {
+		struct sdma_buffer_descriptor *bd = &sdmac->bd[i];
+		int param;
+
+		bd->buffer_addr = sgl->dma_address;
+
+		count = sg->length;
+
+		if (count > 0xffff) {
+			dev_err(sdma->dev, "SDMA channel %d: maximum bytes for sg entry exceeded: %d > %d\n",
+					channel, count, 0xffff);
+			ret = -EINVAL;
+			goto err_out;
+		}
+
+		bd->mode.count = count;
+
+		if (sdmac->word_size > DMA_SLAVE_BUSWIDTH_4_BYTES) {
+			ret =  -EINVAL;
+			goto err_out;
+		}
+		if (sdmac->word_size == DMA_SLAVE_BUSWIDTH_4_BYTES)
+			bd->mode.command = 0;
+		else
+			bd->mode.command = sdmac->word_size;
+
+		param = BD_DONE | BD_EXTD | BD_CONT;
+
+		if (sdmac->flags & IMX_DMA_SG_LOOP) {
+			param |= BD_INTR;
+			if (i + 1 == sg_len)
+				param |= BD_WRAP;
+		}
+
+		if (i + 1 == sg_len)
+			param |= BD_INTR;
+
+		dev_dbg(sdma->dev, "entry %d: count: %d dma: 0x%08x %s%s\n",
+				i, count, sg->dma_address,
+				param & BD_WRAP ? "wrap" : "",
+				param & BD_INTR ? " intr" : "");
+
+		bd->mode.status = param;
+	}
+
+	sdmac->num_bd = sg_len;
+	sdma->channel_control[channel].current_bd_ptr = sdmac->bd_phys;
+
+	return &sdmac->desc;
+err_out:
+	return NULL;
+}
+
+static struct dma_async_tx_descriptor *sdma_prep_dma_cyclic(
+		struct dma_chan *chan, dma_addr_t dma_addr, size_t buf_len,
+		size_t period_len, enum dma_data_direction direction)
+{
+	struct sdma_channel *sdmac = to_sdma_chan(chan);
+	struct sdma_engine *sdma = sdmac->sdma;
+	int num_periods = buf_len / period_len;
+	int channel = chan->chan_id;
+	int ret, i = 0, buf = 0;
+
+	dev_dbg(sdma->dev, "%s channel: %d\n", __func__, channel);
+
+	if (sdmac->status == DMA_IN_PROGRESS)
+		return NULL;
+
+	sdmac->status = DMA_IN_PROGRESS;
+
+	sdmac->flags |= IMX_DMA_SG_LOOP;
+	sdmac->direction = direction;
+	ret = sdma_load_context(sdmac);
+	if (ret)
+		goto err_out;
+
+	if (num_periods > NUM_BD) {
+		dev_err(sdma->dev, "SDMA channel %d: maximum number of sg exceeded: %d > %d\n",
+				channel, num_periods, NUM_BD);
+		goto err_out;
+	}
+
+	if (period_len > 0xffff) {
+		dev_err(sdma->dev, "SDMA channel %d: maximum period size exceeded: %d > %d\n",
+				channel, period_len, 0xffff);
+		goto err_out;
+	}
+
+	while (buf < buf_len) {
+		struct sdma_buffer_descriptor *bd = &sdmac->bd[i];
+		int param;
+
+		bd->buffer_addr = dma_addr;
+
+		bd->mode.count = period_len;
+
+		if (sdmac->word_size > DMA_SLAVE_BUSWIDTH_4_BYTES)
+			goto err_out;
+		if (sdmac->word_size == DMA_SLAVE_BUSWIDTH_4_BYTES)
+			bd->mode.command = 0;
+		else
+			bd->mode.command = sdmac->word_size;
+
+		param = BD_DONE | BD_EXTD | BD_CONT | BD_INTR;
+		if (i + 1 == num_periods)
+			param |= BD_WRAP;
+
+		dev_dbg(sdma->dev, "entry %d: count: %d dma: 0x%08x %s%s\n",
+				i, period_len, dma_addr,
+				param & BD_WRAP ? "wrap" : "",
+				param & BD_INTR ? " intr" : "");
+
+		bd->mode.status = param;
+
+		dma_addr += period_len;
+		buf += period_len;
+
+		i++;
+	}
+
+	sdmac->num_bd = num_periods;
+	sdma->channel_control[channel].current_bd_ptr = sdmac->bd_phys;
+
+	return &sdmac->desc;
+err_out:
+	sdmac->status = DMA_ERROR;
+	return NULL;
+}
+
+static int sdma_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd,
+		unsigned long arg)
+{
+	struct sdma_channel *sdmac = to_sdma_chan(chan);
+	struct dma_slave_config *dmaengine_cfg = (void *)arg;
+
+	switch (cmd) {
+	case DMA_TERMINATE_ALL:
+		sdma_disable_channel(sdmac);
+		return 0;
+	case DMA_SLAVE_CONFIG:
+		if (dmaengine_cfg->direction == DMA_FROM_DEVICE) {
+			sdmac->per_address = dmaengine_cfg->src_addr;
+			sdmac->watermark_level = dmaengine_cfg->src_maxburst;
+			sdmac->word_size = dmaengine_cfg->src_addr_width;
+		} else {
+			sdmac->per_address = dmaengine_cfg->dst_addr;
+			sdmac->watermark_level = dmaengine_cfg->dst_maxburst;
+			sdmac->word_size = dmaengine_cfg->dst_addr_width;
+		}
+		return sdma_config_channel(sdmac);
+	default:
+		return -ENOSYS;
+	}
+
+	return -EINVAL;
+}
+
+static enum dma_status sdma_tx_status(struct dma_chan *chan,
+					    dma_cookie_t cookie,
+					    struct dma_tx_state *txstate)
+{
+	struct sdma_channel *sdmac = to_sdma_chan(chan);
+	dma_cookie_t last_used;
+	enum dma_status ret;
+
+	last_used = chan->cookie;
+
+	ret = dma_async_is_complete(cookie, sdmac->last_completed, last_used);
+	dma_set_tx_state(txstate, sdmac->last_completed, last_used, 0);
+
+	return ret;
+}
+
+static void sdma_issue_pending(struct dma_chan *chan)
+{
+	/*
+	 * Nothing to do. We only have a single descriptor
+	 */
+}
+
+static int __init sdma_init(struct sdma_engine *sdma,
+		void *ram_code, int ram_code_size)
+{
+	int i, ret;
+	dma_addr_t ccb_phys;
+
+	switch (sdma->version) {
+	case 1:
+		sdma->num_events = 32;
+		break;
+	case 2:
+		sdma->num_events = 48;
+		break;
+	default:
+		dev_err(sdma->dev, "Unknown version %d. aborting\n", sdma->version);
+		return -ENODEV;
+	}
+
+	clk_enable(sdma->clk);
+
+	/* Be sure SDMA has not started yet */
+	__raw_writel(0, sdma->regs + SDMA_H_C0PTR);
+
+	sdma->channel_control = dma_alloc_coherent(NULL,
+			MAX_DMA_CHANNELS * sizeof (struct sdma_channel_control) +
+			sizeof(struct sdma_context_data),
+			&ccb_phys, GFP_KERNEL);
+
+	if (!sdma->channel_control) {
+		ret = -ENOMEM;
+		goto err_dma_alloc;
+	}
+
+	sdma->context = (void *)sdma->channel_control +
+		MAX_DMA_CHANNELS * sizeof (struct sdma_channel_control);
+	sdma->context_phys = ccb_phys +
+		MAX_DMA_CHANNELS * sizeof (struct sdma_channel_control);
+
+	/* Zero-out the CCB structures array just allocated */
+	memset(sdma->channel_control, 0,
+			MAX_DMA_CHANNELS * sizeof (struct sdma_channel_control));
+
+	/* disable all channels */
+	for (i = 0; i < sdma->num_events; i++)
+		__raw_writel(0, sdma->regs + chnenbl_ofs(sdma, i));
+
+	/* All channels have priority 0 */
+	for (i = 0; i < MAX_DMA_CHANNELS; i++)
+		__raw_writel(0, sdma->regs + SDMA_CHNPRI_0 + i * 4);
+
+	ret = sdma_request_channel(&sdma->channel[0]);
+	if (ret)
+		goto err_dma_alloc;
+
+	sdma_config_ownership(&sdma->channel[0], false, true, false);
+
+	/* Set Command Channel (Channel Zero) */
+	__raw_writel(0x4050, sdma->regs + SDMA_CHN0ADDR);
+
+	/* Set bits of CONFIG register but with static context switching */
+	/* FIXME: Check whether to set ACR bit depending on clock ratios */
+	__raw_writel(0, sdma->regs + SDMA_H_CONFIG);
+
+	__raw_writel(ccb_phys, sdma->regs + SDMA_H_C0PTR);
+
+	/* download the RAM image for SDMA */
+	sdma_load_script(sdma, ram_code,
+			ram_code_size,
+			sdma->script_addrs->ram_code_start_addr);
+
+	/* Set bits of CONFIG register with given context switching mode */
+	__raw_writel(SDMA_H_CONFIG_CSM, sdma->regs + SDMA_H_CONFIG);
+
+	/* Initializes channel's priorities */
+	sdma_set_channel_priority(&sdma->channel[0], 7);
+
+	clk_disable(sdma->clk);
+
+	return 0;
+
+err_dma_alloc:
+	clk_disable(sdma->clk);
+	dev_err(sdma->dev, "initialisation failed with %d\n", ret);
+	return ret;
+}
+
+static int __init sdma_probe(struct platform_device *pdev)
+{
+	int ret;
+	const struct firmware *fw;
+	const struct sdma_firmware_header *header;
+	const struct sdma_script_start_addrs *addr;
+	int irq;
+	unsigned short *ram_code;
+	struct resource *iores;
+	struct sdma_platform_data *pdata = pdev->dev.platform_data;
+	char *fwname;
+	int i;
+	dma_cap_mask_t mask;
+	struct sdma_engine *sdma;
+
+	sdma = kzalloc(sizeof(*sdma), GFP_KERNEL);
+	if (!sdma)
+		return -ENOMEM;
+
+	sdma->dev = &pdev->dev;
+
+	iores = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	irq = platform_get_irq(pdev, 0);
+	if (!iores || irq < 0 || !pdata) {
+		ret = -EINVAL;
+		goto err_irq;
+	}
+
+	if (!request_mem_region(iores->start, resource_size(iores), pdev->name)) {
+		ret = -EBUSY;
+		goto err_request_region;
+	}
+
+	sdma->clk = clk_get(&pdev->dev, NULL);
+	if (IS_ERR(sdma->clk)) {
+		ret = PTR_ERR(sdma->clk);
+		goto err_clk;
+	}
+
+	sdma->regs = ioremap(iores->start, resource_size(iores));
+	if (!sdma->regs) {
+		ret = -ENOMEM;
+		goto err_ioremap;
+	}
+
+	ret = request_irq(irq, sdma_int_handler, 0, "sdma", sdma);
+	if (ret)
+		goto err_request_irq;
+
+	fwname = kasprintf(GFP_KERNEL, "sdma-%s-to%d.bin",
+			pdata->cpu_name, pdata->to_version);
+	if (!fwname) {
+		ret = -ENOMEM;
+		goto err_cputype;
+	}
+
+	ret = request_firmware(&fw, fwname, &pdev->dev);
+	if (ret) {
+		dev_err(&pdev->dev, "request firmware \"%s\" failed with %d\n",
+				fwname, ret);
+		kfree(fwname);
+		goto err_cputype;
+	}
+	kfree(fwname);
+
+	if (fw->size < sizeof(*header))
+		goto err_firmware;
+
+	header = (struct sdma_firmware_header *)fw->data;
+
+	if (header->magic != SDMA_FIRMWARE_MAGIC)
+		goto err_firmware;
+	if (header->ram_code_start + header->ram_code_size > fw->size)
+		goto err_firmware;
+
+	addr = (void *)header + header->script_addrs_start;
+	ram_code = (void *)header + header->ram_code_start;
+	sdma->script_addrs = kmalloc(sizeof(*addr), GFP_KERNEL);
+	if (!sdma->script_addrs)
+		goto err_firmware;
+	memcpy(sdma->script_addrs, addr, sizeof(*addr));
+
+	sdma->version = pdata->sdma_version;
+
+	INIT_LIST_HEAD(&sdma->dma_device.channels);
+	/* Initialize channel parameters */
+	for (i = 0; i < MAX_DMA_CHANNELS; i++) {
+		struct sdma_channel *sdmac = &sdma->channel[i];
+
+		sdmac->sdma = sdma;
+		spin_lock_init(&sdmac->lock);
+
+		dma_cap_set(DMA_SLAVE, sdma->dma_device.cap_mask);
+		dma_cap_set(DMA_CYCLIC, sdma->dma_device.cap_mask);
+
+		sdmac->chan.device = &sdma->dma_device;
+		sdmac->chan.chan_id = i;
+		sdmac->channel = i;
+
+		/* Add the channel to the DMAC list */
+		list_add_tail(&sdmac->chan.device_node, &sdma->dma_device.channels);
+	}
+
+	ret = sdma_init(sdma, ram_code, header->ram_code_size);
+	if (ret)
+		goto err_init;
+
+	sdma->dma_device.dev = &pdev->dev;
+
+	sdma->dma_device.device_alloc_chan_resources = sdma_alloc_chan_resources;
+	sdma->dma_device.device_free_chan_resources = sdma_free_chan_resources;
+	sdma->dma_device.device_tx_status = sdma_tx_status;
+	sdma->dma_device.device_prep_slave_sg = sdma_prep_slave_sg;
+	sdma->dma_device.device_prep_dma_cyclic = sdma_prep_dma_cyclic;
+	sdma->dma_device.device_control = sdma_control;
+	sdma->dma_device.device_issue_pending = sdma_issue_pending;
+
+	ret = dma_async_device_register(&sdma->dma_device);
+	if (ret) {
+		dev_err(&pdev->dev, "unable to register\n");
+		goto err_init;
+	}
+
+	dev_info(&pdev->dev, "initialized (firmware %d.%d)\n",
+			header->version_major,
+			header->version_minor);
+
+	/* request channel 0. This is an internal control channel
+	 * to the SDMA engine and not available to clients.
+	 */
+	dma_cap_zero(mask);
+	dma_cap_set(DMA_SLAVE, mask);
+	dma_request_channel(mask, NULL, NULL);
+
+	release_firmware(fw);
+
+	return 0;
+
+err_init:
+	kfree(sdma->script_addrs);
+err_firmware:
+	release_firmware(fw);
+err_cputype:
+	free_irq(irq, sdma);
+err_request_irq:
+	iounmap(sdma->regs);
+err_ioremap:
+	clk_put(sdma->clk);
+err_clk:
+	release_mem_region(iores->start, resource_size(iores));
+err_request_region:
+err_irq:
+	kfree(sdma);
+	return 0;
+}
+
+static int __exit sdma_remove(struct platform_device *pdev)
+{
+	return -EBUSY;
+}
+
+static struct platform_driver sdma_driver = {
+	.driver		= {
+		.name	= "imx-sdma",
+	},
+	.remove		= __exit_p(sdma_remove),
+};
+
+static int __init sdma_module_init(void)
+{
+	return platform_driver_probe(&sdma_driver, sdma_probe);
+}
+subsys_initcall(sdma_module_init);
+
+MODULE_AUTHOR("Sascha Hauer, Pengutronix <s.hauer@pengutronix.de>");
+MODULE_DESCRIPTION("i.MX SDMA driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/dma/ste_dma40.c b/drivers/dma/ste_dma40.c
index 17e2600..3f76cd9 100644
--- a/drivers/dma/ste_dma40.c
+++ b/drivers/dma/ste_dma40.c
@@ -1,11 +1,8 @@
 /*
- * driver/dma/ste_dma40.c
- *
- * Copyright (C) ST-Ericsson 2007-2010
+ * Copyright (C) ST-Ericsson SA 2007-2010
+ * Author: Per Forlin <per.forlin@stericsson.com> for ST-Ericsson
+ * Author: Jonas Aaberg <jonas.aberg@stericsson.com> for ST-Ericsson
  * License terms: GNU General Public License (GPL) version 2
- * Author: Per Friden <per.friden@stericsson.com>
- * Author: Jonas Aaberg <jonas.aberg@stericsson.com>
- *
  */
 
 #include <linux/kernel.h>
@@ -14,6 +11,7 @@
 #include <linux/platform_device.h>
 #include <linux/clk.h>
 #include <linux/delay.h>
+#include <linux/err.h>
 
 #include <plat/ste_dma40.h>
 
@@ -32,6 +30,11 @@
 
 /* Hardware requirement on LCLA alignment */
 #define LCLA_ALIGNMENT 0x40000
+
+/* Max number of links per event group */
+#define D40_LCLA_LINK_PER_EVENT_GRP 128
+#define D40_LCLA_END D40_LCLA_LINK_PER_EVENT_GRP
+
 /* Attempts before giving up to trying to get pages that are aligned */
 #define MAX_LCLA_ALLOC_ATTEMPTS 256
 
@@ -41,7 +44,7 @@
 #define D40_ALLOC_LOG_FREE	0
 
 /* Hardware designer of the block */
-#define D40_PERIPHID2_DESIGNER 0x8
+#define D40_HW_DESIGNER 0x8
 
 /**
  * enum 40_command - The different commands and/or statuses.
@@ -84,18 +87,17 @@
  * @lli_log: Same as above but for logical channels.
  * @lli_pool: The pool with two entries pre-allocated.
  * @lli_len: Number of llis of current descriptor.
- * @lli_count: Number of transfered llis.
- * @lli_tx_len: Max number of LLIs per transfer, there can be
- * many transfer for one descriptor.
+ * @lli_current: Number of transfered llis.
+ * @lcla_alloc: Number of LCLA entries allocated.
  * @txd: DMA engine struct. Used for among other things for communication
  * during a transfer.
  * @node: List entry.
- * @dir: The transfer direction of this job.
  * @is_in_client_list: true if the client owns this descriptor.
+ * @is_hw_linked: true if this job will automatically be continued for
+ * the previous one.
  *
  * This descriptor is used for both logical and physical transfers.
  */
-
 struct d40_desc {
 	/* LLI physical */
 	struct d40_phy_lli_bidir	 lli_phy;
@@ -104,14 +106,14 @@
 
 	struct d40_lli_pool		 lli_pool;
 	int				 lli_len;
-	int				 lli_count;
-	u32				 lli_tx_len;
+	int				 lli_current;
+	int				 lcla_alloc;
 
 	struct dma_async_tx_descriptor	 txd;
 	struct list_head		 node;
 
-	enum dma_data_direction		 dir;
 	bool				 is_in_client_list;
+	bool				 is_hw_linked;
 };
 
 /**
@@ -123,17 +125,14 @@
  * @pages: The number of pages needed for all physical channels.
  * Only used later for clean-up on error
  * @lock: Lock to protect the content in this struct.
- * @alloc_map: Bitmap mapping between physical channel and LCLA entries.
- * @num_blocks: The number of entries of alloc_map. Equals to the
- * number of physical channels.
+ * @alloc_map: big map over which LCLA entry is own by which job.
  */
 struct d40_lcla_pool {
 	void		*base;
 	void		*base_unaligned;
 	int		 pages;
 	spinlock_t	 lock;
-	u32		*alloc_map;
-	int		 num_blocks;
+	struct d40_desc	**alloc_map;
 };
 
 /**
@@ -146,9 +145,7 @@
  * this physical channel. Can also be free or physically allocated.
  * @allocated_dst: Same as for src but is dst.
  * allocated_dst and allocated_src uses the D40_ALLOC* defines as well as
- * event line number. Both allocated_src and allocated_dst can not be
- * allocated to a physical channel, since the interrupt handler has then
- * no way of figure out which one the interrupt belongs to.
+ * event line number.
  */
 struct d40_phy_res {
 	spinlock_t lock;
@@ -206,7 +203,6 @@
 	u32				 src_def_cfg;
 	u32				 dst_def_cfg;
 	struct d40_def_lcsp		 log_def;
-	struct d40_lcla_elem		 lcla;
 	struct d40_log_lli_full		*lcpa;
 	/* Runtime reconfiguration */
 	dma_addr_t			runtime_addr;
@@ -234,7 +230,6 @@
  * @dma_both: dma_device channels that can do both memcpy and slave transfers.
  * @dma_slave: dma_device channels that can do only do slave transfers.
  * @dma_memcpy: dma_device channels that can do only do memcpy transfers.
- * @phy_chans: Room for all possible physical channels in system.
  * @log_chans: Room for all possible logical channels in system.
  * @lookup_log_chans: Used to map interrupt number to logical channel. Points
  * to log_chans entries.
@@ -340,9 +335,6 @@
 					      align);
 		d40d->lli_phy.dst = PTR_ALIGN(d40d->lli_phy.src + lli_len,
 					      align);
-
-		d40d->lli_phy.src_addr = virt_to_phys(d40d->lli_phy.src);
-		d40d->lli_phy.dst_addr = virt_to_phys(d40d->lli_phy.dst);
 	}
 
 	return 0;
@@ -357,22 +349,67 @@
 	d40d->lli_log.dst = NULL;
 	d40d->lli_phy.src = NULL;
 	d40d->lli_phy.dst = NULL;
-	d40d->lli_phy.src_addr = 0;
-	d40d->lli_phy.dst_addr = 0;
 }
 
-static dma_cookie_t d40_assign_cookie(struct d40_chan *d40c,
-				      struct d40_desc *desc)
+static int d40_lcla_alloc_one(struct d40_chan *d40c,
+			      struct d40_desc *d40d)
 {
-	dma_cookie_t cookie = d40c->chan.cookie;
+	unsigned long flags;
+	int i;
+	int ret = -EINVAL;
+	int p;
 
-	if (++cookie < 0)
-		cookie = 1;
+	spin_lock_irqsave(&d40c->base->lcla_pool.lock, flags);
 
-	d40c->chan.cookie = cookie;
-	desc->txd.cookie = cookie;
+	p = d40c->phy_chan->num * D40_LCLA_LINK_PER_EVENT_GRP;
 
-	return cookie;
+	/*
+	 * Allocate both src and dst at the same time, therefore the half
+	 * start on 1 since 0 can't be used since zero is used as end marker.
+	 */
+	for (i = 1 ; i < D40_LCLA_LINK_PER_EVENT_GRP / 2; i++) {
+		if (!d40c->base->lcla_pool.alloc_map[p + i]) {
+			d40c->base->lcla_pool.alloc_map[p + i] = d40d;
+			d40d->lcla_alloc++;
+			ret = i;
+			break;
+		}
+	}
+
+	spin_unlock_irqrestore(&d40c->base->lcla_pool.lock, flags);
+
+	return ret;
+}
+
+static int d40_lcla_free_all(struct d40_chan *d40c,
+			     struct d40_desc *d40d)
+{
+	unsigned long flags;
+	int i;
+	int ret = -EINVAL;
+
+	if (d40c->log_num == D40_PHY_CHAN)
+		return 0;
+
+	spin_lock_irqsave(&d40c->base->lcla_pool.lock, flags);
+
+	for (i = 1 ; i < D40_LCLA_LINK_PER_EVENT_GRP / 2; i++) {
+		if (d40c->base->lcla_pool.alloc_map[d40c->phy_chan->num *
+						    D40_LCLA_LINK_PER_EVENT_GRP + i] == d40d) {
+			d40c->base->lcla_pool.alloc_map[d40c->phy_chan->num *
+							D40_LCLA_LINK_PER_EVENT_GRP + i] = NULL;
+			d40d->lcla_alloc--;
+			if (d40d->lcla_alloc == 0) {
+				ret = 0;
+				break;
+			}
+		}
+	}
+
+	spin_unlock_irqrestore(&d40c->base->lcla_pool.lock, flags);
+
+	return ret;
+
 }
 
 static void d40_desc_remove(struct d40_desc *d40d)
@@ -382,28 +419,35 @@
 
 static struct d40_desc *d40_desc_get(struct d40_chan *d40c)
 {
-	struct d40_desc *d;
-	struct d40_desc *_d;
+	struct d40_desc *desc = NULL;
 
 	if (!list_empty(&d40c->client)) {
+		struct d40_desc *d;
+		struct d40_desc *_d;
+
 		list_for_each_entry_safe(d, _d, &d40c->client, node)
 			if (async_tx_test_ack(&d->txd)) {
 				d40_pool_lli_free(d);
 				d40_desc_remove(d);
+				desc = d;
+				memset(desc, 0, sizeof(*desc));
 				break;
 			}
-	} else {
-		d = kmem_cache_alloc(d40c->base->desc_slab, GFP_NOWAIT);
-		if (d != NULL) {
-			memset(d, 0, sizeof(struct d40_desc));
-			INIT_LIST_HEAD(&d->node);
-		}
 	}
-	return d;
+
+	if (!desc)
+		desc = kmem_cache_zalloc(d40c->base->desc_slab, GFP_NOWAIT);
+
+	if (desc)
+		INIT_LIST_HEAD(&desc->node);
+
+	return desc;
 }
 
 static void d40_desc_free(struct d40_chan *d40c, struct d40_desc *d40d)
 {
+
+	d40_lcla_free_all(d40c, d40d);
 	kmem_cache_free(d40c->base->desc_slab, d40d);
 }
 
@@ -412,6 +456,59 @@
 	list_add_tail(&desc->node, &d40c->active);
 }
 
+static void d40_desc_load(struct d40_chan *d40c, struct d40_desc *d40d)
+{
+	int curr_lcla = -EINVAL, next_lcla;
+
+	if (d40c->log_num == D40_PHY_CHAN) {
+		d40_phy_lli_write(d40c->base->virtbase,
+				  d40c->phy_chan->num,
+				  d40d->lli_phy.dst,
+				  d40d->lli_phy.src);
+		d40d->lli_current = d40d->lli_len;
+	} else {
+
+		if ((d40d->lli_len - d40d->lli_current) > 1)
+			curr_lcla = d40_lcla_alloc_one(d40c, d40d);
+
+		d40_log_lli_lcpa_write(d40c->lcpa,
+				       &d40d->lli_log.dst[d40d->lli_current],
+				       &d40d->lli_log.src[d40d->lli_current],
+				       curr_lcla);
+
+		d40d->lli_current++;
+		for (; d40d->lli_current < d40d->lli_len; d40d->lli_current++) {
+			struct d40_log_lli *lcla;
+
+			if (d40d->lli_current + 1 < d40d->lli_len)
+				next_lcla = d40_lcla_alloc_one(d40c, d40d);
+			else
+				next_lcla = -EINVAL;
+
+			lcla = d40c->base->lcla_pool.base +
+				d40c->phy_chan->num * 1024 +
+				8 * curr_lcla * 2;
+
+			d40_log_lli_lcla_write(lcla,
+					       &d40d->lli_log.dst[d40d->lli_current],
+					       &d40d->lli_log.src[d40d->lli_current],
+					       next_lcla);
+
+			(void) dma_map_single(d40c->base->dev, lcla,
+					      2 * sizeof(struct d40_log_lli),
+					      DMA_TO_DEVICE);
+
+			curr_lcla = next_lcla;
+
+			if (curr_lcla == -EINVAL) {
+				d40d->lli_current++;
+				break;
+			}
+
+		}
+	}
+}
+
 static struct d40_desc *d40_first_active_get(struct d40_chan *d40c)
 {
 	struct d40_desc *d;
@@ -443,68 +540,26 @@
 	return d;
 }
 
-/* Support functions for logical channels */
-
-static int d40_lcla_id_get(struct d40_chan *d40c)
+static struct d40_desc *d40_last_queued(struct d40_chan *d40c)
 {
-	int src_id = 0;
-	int dst_id = 0;
-	struct d40_log_lli *lcla_lidx_base =
-		d40c->base->lcla_pool.base + d40c->phy_chan->num * 1024;
-	int i;
-	int lli_per_log = d40c->base->plat_data->llis_per_log;
-	unsigned long flags;
+	struct d40_desc *d;
 
-	if (d40c->lcla.src_id >= 0 && d40c->lcla.dst_id >= 0)
-		return 0;
-
-	if (d40c->base->lcla_pool.num_blocks > 32)
-		return -EINVAL;
-
-	spin_lock_irqsave(&d40c->base->lcla_pool.lock, flags);
-
-	for (i = 0; i < d40c->base->lcla_pool.num_blocks; i++) {
-		if (!(d40c->base->lcla_pool.alloc_map[d40c->phy_chan->num] &
-		      (0x1 << i))) {
-			d40c->base->lcla_pool.alloc_map[d40c->phy_chan->num] |=
-				(0x1 << i);
+	if (list_empty(&d40c->queue))
+		return NULL;
+	list_for_each_entry(d, &d40c->queue, node)
+		if (list_is_last(&d->node, &d40c->queue))
 			break;
-		}
-	}
-	src_id = i;
-	if (src_id >= d40c->base->lcla_pool.num_blocks)
-		goto err;
-
-	for (; i < d40c->base->lcla_pool.num_blocks; i++) {
-		if (!(d40c->base->lcla_pool.alloc_map[d40c->phy_chan->num] &
-		      (0x1 << i))) {
-			d40c->base->lcla_pool.alloc_map[d40c->phy_chan->num] |=
-				(0x1 << i);
-			break;
-		}
-	}
-
-	dst_id = i;
-	if (dst_id == src_id)
-		goto err;
-
-	d40c->lcla.src_id = src_id;
-	d40c->lcla.dst_id = dst_id;
-	d40c->lcla.dst = lcla_lidx_base + dst_id * lli_per_log + 1;
-	d40c->lcla.src = lcla_lidx_base + src_id * lli_per_log + 1;
-
-	spin_unlock_irqrestore(&d40c->base->lcla_pool.lock, flags);
-	return 0;
-err:
-	spin_unlock_irqrestore(&d40c->base->lcla_pool.lock, flags);
-	return -EINVAL;
+	return d;
 }
 
+/* Support functions for logical channels */
+
 
 static int d40_channel_execute_command(struct d40_chan *d40c,
 				       enum d40_command command)
 {
-	int status, i;
+	u32 status;
+	int i;
 	void __iomem *active_reg;
 	int ret = 0;
 	unsigned long flags;
@@ -567,35 +622,19 @@
 static void d40_term_all(struct d40_chan *d40c)
 {
 	struct d40_desc *d40d;
-	unsigned long flags;
 
 	/* Release active descriptors */
 	while ((d40d = d40_first_active_get(d40c))) {
 		d40_desc_remove(d40d);
-
-		/* Return desc to free-list */
 		d40_desc_free(d40c, d40d);
 	}
 
 	/* Release queued descriptors waiting for transfer */
 	while ((d40d = d40_first_queued(d40c))) {
 		d40_desc_remove(d40d);
-
-		/* Return desc to free-list */
 		d40_desc_free(d40c, d40d);
 	}
 
-	spin_lock_irqsave(&d40c->base->lcla_pool.lock, flags);
-
-	d40c->base->lcla_pool.alloc_map[d40c->phy_chan->num] &=
-		(~(0x1 << d40c->lcla.dst_id));
-	d40c->base->lcla_pool.alloc_map[d40c->phy_chan->num] &=
-		(~(0x1 << d40c->lcla.src_id));
-
-	d40c->lcla.src_id = -1;
-	d40c->lcla.dst_id = -1;
-
-	spin_unlock_irqrestore(&d40c->base->lcla_pool.lock, flags);
 
 	d40c->pending_tx = 0;
 	d40c->busy = false;
@@ -640,45 +679,22 @@
 
 static u32 d40_chan_has_events(struct d40_chan *d40c)
 {
-	u32 val = 0;
+	u32 val;
 
-	/* If SSLNK or SDLNK is zero all events are disabled */
-	if ((d40c->dma_cfg.dir ==  STEDMA40_PERIPH_TO_MEM) ||
-	    (d40c->dma_cfg.dir == STEDMA40_PERIPH_TO_PERIPH))
-		val = readl(d40c->base->virtbase + D40_DREG_PCBASE +
-			    d40c->phy_chan->num * D40_DREG_PCDELTA +
-			    D40_CHAN_REG_SSLNK);
+	val = readl(d40c->base->virtbase + D40_DREG_PCBASE +
+		    d40c->phy_chan->num * D40_DREG_PCDELTA +
+		    D40_CHAN_REG_SSLNK);
 
-	if (d40c->dma_cfg.dir !=  STEDMA40_PERIPH_TO_MEM)
-		val = readl(d40c->base->virtbase + D40_DREG_PCBASE +
-			    d40c->phy_chan->num * D40_DREG_PCDELTA +
-			    D40_CHAN_REG_SDLNK);
+	val |= readl(d40c->base->virtbase + D40_DREG_PCBASE +
+		     d40c->phy_chan->num * D40_DREG_PCDELTA +
+		     D40_CHAN_REG_SDLNK);
 	return val;
 }
 
-static void d40_config_enable_lidx(struct d40_chan *d40c)
-{
-	/* Set LIDX for lcla */
-	writel((d40c->phy_chan->num << D40_SREG_ELEM_LOG_LIDX_POS) &
-	       D40_SREG_ELEM_LOG_LIDX_MASK,
-	       d40c->base->virtbase + D40_DREG_PCBASE +
-	       d40c->phy_chan->num * D40_DREG_PCDELTA + D40_CHAN_REG_SDELT);
-
-	writel((d40c->phy_chan->num << D40_SREG_ELEM_LOG_LIDX_POS) &
-	       D40_SREG_ELEM_LOG_LIDX_MASK,
-	       d40c->base->virtbase + D40_DREG_PCBASE +
-	       d40c->phy_chan->num * D40_DREG_PCDELTA + D40_CHAN_REG_SSELT);
-}
-
-static int d40_config_write(struct d40_chan *d40c)
+static void d40_config_write(struct d40_chan *d40c)
 {
 	u32 addr_base;
 	u32 var;
-	int res;
-
-	res = d40_channel_execute_command(d40c, D40_DMA_SUSPEND_REQ);
-	if (res)
-		return res;
 
 	/* Odd addresses are even addresses + 4 */
 	addr_base = (d40c->phy_chan->num % 2) * 4;
@@ -704,41 +720,181 @@
 		       d40c->phy_chan->num * D40_DREG_PCDELTA +
 		       D40_CHAN_REG_SDCFG);
 
-		d40_config_enable_lidx(d40c);
+		/* Set LIDX for lcla */
+		writel((d40c->phy_chan->num << D40_SREG_ELEM_LOG_LIDX_POS) &
+		       D40_SREG_ELEM_LOG_LIDX_MASK,
+		       d40c->base->virtbase + D40_DREG_PCBASE +
+		       d40c->phy_chan->num * D40_DREG_PCDELTA +
+		       D40_CHAN_REG_SDELT);
+
+		writel((d40c->phy_chan->num << D40_SREG_ELEM_LOG_LIDX_POS) &
+		       D40_SREG_ELEM_LOG_LIDX_MASK,
+		       d40c->base->virtbase + D40_DREG_PCBASE +
+		       d40c->phy_chan->num * D40_DREG_PCDELTA +
+		       D40_CHAN_REG_SSELT);
+
 	}
+}
+
+static u32 d40_residue(struct d40_chan *d40c)
+{
+	u32 num_elt;
+
+	if (d40c->log_num != D40_PHY_CHAN)
+		num_elt = (readl(&d40c->lcpa->lcsp2) & D40_MEM_LCSP2_ECNT_MASK)
+			>> D40_MEM_LCSP2_ECNT_POS;
+	else
+		num_elt = (readl(d40c->base->virtbase + D40_DREG_PCBASE +
+				 d40c->phy_chan->num * D40_DREG_PCDELTA +
+				 D40_CHAN_REG_SDELT) &
+			   D40_SREG_ELEM_PHY_ECNT_MASK) >>
+			D40_SREG_ELEM_PHY_ECNT_POS;
+	return num_elt * (1 << d40c->dma_cfg.dst_info.data_width);
+}
+
+static bool d40_tx_is_linked(struct d40_chan *d40c)
+{
+	bool is_link;
+
+	if (d40c->log_num != D40_PHY_CHAN)
+		is_link = readl(&d40c->lcpa->lcsp3) &  D40_MEM_LCSP3_DLOS_MASK;
+	else
+		is_link = readl(d40c->base->virtbase + D40_DREG_PCBASE +
+				d40c->phy_chan->num * D40_DREG_PCDELTA +
+				D40_CHAN_REG_SDLNK) &
+			D40_SREG_LNK_PHYS_LNK_MASK;
+	return is_link;
+}
+
+static int d40_pause(struct dma_chan *chan)
+{
+	struct d40_chan *d40c =
+		container_of(chan, struct d40_chan, chan);
+	int res = 0;
+	unsigned long flags;
+
+	if (!d40c->busy)
+		return 0;
+
+	spin_lock_irqsave(&d40c->lock, flags);
+
+	res = d40_channel_execute_command(d40c, D40_DMA_SUSPEND_REQ);
+	if (res == 0) {
+		if (d40c->log_num != D40_PHY_CHAN) {
+			d40_config_set_event(d40c, false);
+			/* Resume the other logical channels if any */
+			if (d40_chan_has_events(d40c))
+				res = d40_channel_execute_command(d40c,
+								  D40_DMA_RUN);
+		}
+	}
+
+	spin_unlock_irqrestore(&d40c->lock, flags);
 	return res;
 }
 
-static void d40_desc_load(struct d40_chan *d40c, struct d40_desc *d40d)
+static int d40_resume(struct dma_chan *chan)
 {
-	if (d40d->lli_phy.dst && d40d->lli_phy.src) {
-		d40_phy_lli_write(d40c->base->virtbase,
-				  d40c->phy_chan->num,
-				  d40d->lli_phy.dst,
-				  d40d->lli_phy.src);
-	} else if (d40d->lli_log.dst && d40d->lli_log.src) {
-		struct d40_log_lli *src = d40d->lli_log.src;
-		struct d40_log_lli *dst = d40d->lli_log.dst;
-		int s;
+	struct d40_chan *d40c =
+		container_of(chan, struct d40_chan, chan);
+	int res = 0;
+	unsigned long flags;
 
-		src += d40d->lli_count;
-		dst += d40d->lli_count;
-		s = d40_log_lli_write(d40c->lcpa,
-				      d40c->lcla.src, d40c->lcla.dst,
-				      dst, src,
-				      d40c->base->plat_data->llis_per_log);
+	if (!d40c->busy)
+		return 0;
 
-		/* If s equals to zero, the job is not linked */
-		if (s > 0) {
-			(void) dma_map_single(d40c->base->dev, d40c->lcla.src,
-					      s * sizeof(struct d40_log_lli),
-					      DMA_TO_DEVICE);
-			(void) dma_map_single(d40c->base->dev, d40c->lcla.dst,
-					      s * sizeof(struct d40_log_lli),
-					      DMA_TO_DEVICE);
+	spin_lock_irqsave(&d40c->lock, flags);
+
+	if (d40c->base->rev == 0)
+		if (d40c->log_num != D40_PHY_CHAN) {
+			res = d40_channel_execute_command(d40c,
+							  D40_DMA_SUSPEND_REQ);
+			goto no_suspend;
 		}
+
+	/* If bytes left to transfer or linked tx resume job */
+	if (d40_residue(d40c) || d40_tx_is_linked(d40c)) {
+
+		if (d40c->log_num != D40_PHY_CHAN)
+			d40_config_set_event(d40c, true);
+
+		res = d40_channel_execute_command(d40c, D40_DMA_RUN);
 	}
-	d40d->lli_count += d40d->lli_tx_len;
+
+no_suspend:
+	spin_unlock_irqrestore(&d40c->lock, flags);
+	return res;
+}
+
+static void d40_tx_submit_log(struct d40_chan *d40c, struct d40_desc *d40d)
+{
+	/* TODO: Write */
+}
+
+static void d40_tx_submit_phy(struct d40_chan *d40c, struct d40_desc *d40d)
+{
+	struct d40_desc *d40d_prev = NULL;
+	int i;
+	u32 val;
+
+	if (!list_empty(&d40c->queue))
+		d40d_prev = d40_last_queued(d40c);
+	else if (!list_empty(&d40c->active))
+		d40d_prev = d40_first_active_get(d40c);
+
+	if (!d40d_prev)
+		return;
+
+	/* Here we try to join this job with previous jobs */
+	val = readl(d40c->base->virtbase + D40_DREG_PCBASE +
+		    d40c->phy_chan->num * D40_DREG_PCDELTA +
+		    D40_CHAN_REG_SSLNK);
+
+	/* Figure out which link we're currently transmitting */
+	for (i = 0; i < d40d_prev->lli_len; i++)
+		if (val == d40d_prev->lli_phy.src[i].reg_lnk)
+			break;
+
+	val = readl(d40c->base->virtbase + D40_DREG_PCBASE +
+		    d40c->phy_chan->num * D40_DREG_PCDELTA +
+		    D40_CHAN_REG_SSELT) >> D40_SREG_ELEM_LOG_ECNT_POS;
+
+	if (i == (d40d_prev->lli_len - 1) && val > 0) {
+		/* Change the current one */
+		writel(virt_to_phys(d40d->lli_phy.src),
+		       d40c->base->virtbase + D40_DREG_PCBASE +
+		       d40c->phy_chan->num * D40_DREG_PCDELTA +
+		       D40_CHAN_REG_SSLNK);
+		writel(virt_to_phys(d40d->lli_phy.dst),
+		       d40c->base->virtbase + D40_DREG_PCBASE +
+		       d40c->phy_chan->num * D40_DREG_PCDELTA +
+		       D40_CHAN_REG_SDLNK);
+
+		d40d->is_hw_linked = true;
+
+	} else if (i < d40d_prev->lli_len) {
+		(void) dma_unmap_single(d40c->base->dev,
+					virt_to_phys(d40d_prev->lli_phy.src),
+					d40d_prev->lli_pool.size,
+					DMA_TO_DEVICE);
+
+		/* Keep the settings */
+		val = d40d_prev->lli_phy.src[d40d_prev->lli_len - 1].reg_lnk &
+			~D40_SREG_LNK_PHYS_LNK_MASK;
+		d40d_prev->lli_phy.src[d40d_prev->lli_len - 1].reg_lnk =
+			val | virt_to_phys(d40d->lli_phy.src);
+
+		val = d40d_prev->lli_phy.dst[d40d_prev->lli_len - 1].reg_lnk &
+			~D40_SREG_LNK_PHYS_LNK_MASK;
+		d40d_prev->lli_phy.dst[d40d_prev->lli_len - 1].reg_lnk =
+			val | virt_to_phys(d40d->lli_phy.dst);
+
+		(void) dma_map_single(d40c->base->dev,
+				      d40d_prev->lli_phy.src,
+				      d40d_prev->lli_pool.size,
+				      DMA_TO_DEVICE);
+		d40d->is_hw_linked = true;
+	}
 }
 
 static dma_cookie_t d40_tx_submit(struct dma_async_tx_descriptor *tx)
@@ -749,14 +905,28 @@
 	struct d40_desc *d40d = container_of(tx, struct d40_desc, txd);
 	unsigned long flags;
 
+	(void) d40_pause(&d40c->chan);
+
 	spin_lock_irqsave(&d40c->lock, flags);
 
-	tx->cookie = d40_assign_cookie(d40c, d40d);
+	d40c->chan.cookie++;
+
+	if (d40c->chan.cookie < 0)
+		d40c->chan.cookie = 1;
+
+	d40d->txd.cookie = d40c->chan.cookie;
+
+	if (d40c->log_num == D40_PHY_CHAN)
+		d40_tx_submit_phy(d40c, d40d);
+	else
+		d40_tx_submit_log(d40c, d40d);
 
 	d40_desc_queue(d40c, d40d);
 
 	spin_unlock_irqrestore(&d40c->lock, flags);
 
+	(void) d40_resume(&d40c->chan);
+
 	return tx->cookie;
 }
 
@@ -796,14 +966,21 @@
 		/* Add to active queue */
 		d40_desc_submit(d40c, d40d);
 
-		/* Initiate DMA job */
-		d40_desc_load(d40c, d40d);
+		/*
+		 * If this job is already linked in hw,
+		 * do not submit it.
+		 */
 
-		/* Start dma job */
-		err = d40_start(d40c);
+		if (!d40d->is_hw_linked) {
+			/* Initiate DMA job */
+			d40_desc_load(d40c, d40d);
 
-		if (err)
-			return NULL;
+			/* Start dma job */
+			err = d40_start(d40c);
+
+			if (err)
+				return NULL;
+		}
 	}
 
 	return d40d;
@@ -814,17 +991,15 @@
 {
 	struct d40_desc *d40d;
 
-	if (!d40c->phy_chan)
-		return;
-
 	/* Get first active entry from list */
 	d40d = d40_first_active_get(d40c);
 
 	if (d40d == NULL)
 		return;
 
-	if (d40d->lli_count < d40d->lli_len) {
+	d40_lcla_free_all(d40c, d40d);
 
+	if (d40d->lli_current < d40d->lli_len) {
 		d40_desc_load(d40c, d40d);
 		/* Start dma job */
 		(void) d40_start(d40c);
@@ -842,7 +1017,7 @@
 static void dma_tasklet(unsigned long data)
 {
 	struct d40_chan *d40c = (struct d40_chan *) data;
-	struct d40_desc *d40d_fin;
+	struct d40_desc *d40d;
 	unsigned long flags;
 	dma_async_tx_callback callback;
 	void *callback_param;
@@ -850,12 +1025,12 @@
 	spin_lock_irqsave(&d40c->lock, flags);
 
 	/* Get first active entry from list */
-	d40d_fin = d40_first_active_get(d40c);
+	d40d = d40_first_active_get(d40c);
 
-	if (d40d_fin == NULL)
+	if (d40d == NULL)
 		goto err;
 
-	d40c->completed = d40d_fin->txd.cookie;
+	d40c->completed = d40d->txd.cookie;
 
 	/*
 	 * If terminating a channel pending_tx is set to zero.
@@ -867,19 +1042,19 @@
 	}
 
 	/* Callback to client */
-	callback = d40d_fin->txd.callback;
-	callback_param = d40d_fin->txd.callback_param;
+	callback = d40d->txd.callback;
+	callback_param = d40d->txd.callback_param;
 
-	if (async_tx_test_ack(&d40d_fin->txd)) {
-		d40_pool_lli_free(d40d_fin);
-		d40_desc_remove(d40d_fin);
-		/* Return desc to free-list */
-		d40_desc_free(d40c, d40d_fin);
+	if (async_tx_test_ack(&d40d->txd)) {
+		d40_pool_lli_free(d40d);
+		d40_desc_remove(d40d);
+		d40_desc_free(d40c, d40d);
 	} else {
-		if (!d40d_fin->is_in_client_list) {
-			d40_desc_remove(d40d_fin);
-			list_add_tail(&d40d_fin->node, &d40c->client);
-			d40d_fin->is_in_client_list = true;
+		if (!d40d->is_in_client_list) {
+			d40_desc_remove(d40d);
+			d40_lcla_free_all(d40c, d40d);
+			list_add_tail(&d40d->node, &d40c->client);
+			d40d->is_in_client_list = true;
 		}
 	}
 
@@ -890,7 +1065,7 @@
 
 	spin_unlock_irqrestore(&d40c->lock, flags);
 
-	if (callback)
+	if (callback && (d40d->txd.flags & DMA_PREP_INTERRUPT))
 		callback(callback_param);
 
 	return;
@@ -919,7 +1094,6 @@
 
 	int i;
 	u32 regs[ARRAY_SIZE(il)];
-	u32 tmp;
 	u32 idx;
 	u32 row;
 	long chan = -1;
@@ -946,9 +1120,7 @@
 		idx = chan & (BITS_PER_LONG - 1);
 
 		/* ACK interrupt */
-		tmp = readl(base->virtbase + il[row].clr);
-		tmp |= 1 << idx;
-		writel(tmp, base->virtbase + il[row].clr);
+		writel(1 << idx, base->virtbase + il[row].clr);
 
 		if (il[row].offset == D40_PHY_CHAN)
 			d40c = base->lookup_phy_chans[idx];
@@ -971,7 +1143,6 @@
 	return IRQ_HANDLED;
 }
 
-
 static int d40_validate_conf(struct d40_chan *d40c,
 			     struct stedma40_chan_cfg *conf)
 {
@@ -981,14 +1152,39 @@
 	bool is_log = (conf->channel_type & STEDMA40_CHANNEL_IN_OPER_MODE)
 		== STEDMA40_CHANNEL_IN_LOG_MODE;
 
-	if (d40c->dma_cfg.dir == STEDMA40_MEM_TO_PERIPH &&
+	if (!conf->dir) {
+		dev_err(&d40c->chan.dev->device, "[%s] Invalid direction.\n",
+			__func__);
+		res = -EINVAL;
+	}
+
+	if (conf->dst_dev_type != STEDMA40_DEV_DST_MEMORY &&
+	    d40c->base->plat_data->dev_tx[conf->dst_dev_type] == 0 &&
+	    d40c->runtime_addr == 0) {
+
+		dev_err(&d40c->chan.dev->device,
+			"[%s] Invalid TX channel address (%d)\n",
+			__func__, conf->dst_dev_type);
+		res = -EINVAL;
+	}
+
+	if (conf->src_dev_type != STEDMA40_DEV_SRC_MEMORY &&
+	    d40c->base->plat_data->dev_rx[conf->src_dev_type] == 0 &&
+	    d40c->runtime_addr == 0) {
+		dev_err(&d40c->chan.dev->device,
+			"[%s] Invalid RX channel address (%d)\n",
+			__func__, conf->src_dev_type);
+		res = -EINVAL;
+	}
+
+	if (conf->dir == STEDMA40_MEM_TO_PERIPH &&
 	    dst_event_group == STEDMA40_DEV_DST_MEMORY) {
 		dev_err(&d40c->chan.dev->device, "[%s] Invalid dst\n",
 			__func__);
 		res = -EINVAL;
 	}
 
-	if (d40c->dma_cfg.dir == STEDMA40_PERIPH_TO_MEM &&
+	if (conf->dir == STEDMA40_PERIPH_TO_MEM &&
 	    src_event_group == STEDMA40_DEV_SRC_MEMORY) {
 		dev_err(&d40c->chan.dev->device, "[%s] Invalid src\n",
 			__func__);
@@ -1082,7 +1278,6 @@
 
 	spin_lock_irqsave(&phy->lock, flags);
 	if (!log_event_line) {
-		/* Physical interrupts are masked per physical full channel */
 		phy->allocated_dst = D40_ALLOC_FREE;
 		phy->allocated_src = D40_ALLOC_FREE;
 		is_free = true;
@@ -1251,7 +1446,6 @@
 		list_for_each_entry_safe(d, _d, &d40c->client, node) {
 			d40_pool_lli_free(d);
 			d40_desc_remove(d);
-			/* Return desc to free-list */
 			d40_desc_free(d40c, d);
 		}
 
@@ -1331,30 +1525,6 @@
 	return 0;
 }
 
-static int d40_pause(struct dma_chan *chan)
-{
-	struct d40_chan *d40c =
-		container_of(chan, struct d40_chan, chan);
-	int res;
-	unsigned long flags;
-
-	spin_lock_irqsave(&d40c->lock, flags);
-
-	res = d40_channel_execute_command(d40c, D40_DMA_SUSPEND_REQ);
-	if (res == 0) {
-		if (d40c->log_num != D40_PHY_CHAN) {
-			d40_config_set_event(d40c, false);
-			/* Resume the other logical channels if any */
-			if (d40_chan_has_events(d40c))
-				res = d40_channel_execute_command(d40c,
-								  D40_DMA_RUN);
-		}
-	}
-
-	spin_unlock_irqrestore(&d40c->lock, flags);
-	return res;
-}
-
 static bool d40_is_paused(struct d40_chan *d40c)
 {
 	bool is_paused = false;
@@ -1381,16 +1551,22 @@
 	}
 
 	if (d40c->dma_cfg.dir == STEDMA40_MEM_TO_PERIPH ||
-	    d40c->dma_cfg.dir == STEDMA40_MEM_TO_MEM)
+	    d40c->dma_cfg.dir == STEDMA40_MEM_TO_MEM) {
 		event = D40_TYPE_TO_EVENT(d40c->dma_cfg.dst_dev_type);
-	else if (d40c->dma_cfg.dir == STEDMA40_PERIPH_TO_MEM)
+		status = readl(d40c->base->virtbase + D40_DREG_PCBASE +
+			       d40c->phy_chan->num * D40_DREG_PCDELTA +
+			       D40_CHAN_REG_SDLNK);
+	} else if (d40c->dma_cfg.dir == STEDMA40_PERIPH_TO_MEM) {
 		event = D40_TYPE_TO_EVENT(d40c->dma_cfg.src_dev_type);
-	else {
+		status = readl(d40c->base->virtbase + D40_DREG_PCBASE +
+			       d40c->phy_chan->num * D40_DREG_PCDELTA +
+			       D40_CHAN_REG_SSLNK);
+	} else {
 		dev_err(&d40c->chan.dev->device,
 			"[%s] Unknown direction\n", __func__);
 		goto _exit;
 	}
-	status = d40_chan_has_events(d40c);
+
 	status = (status & D40_EVENTLINE_MASK(event)) >>
 		D40_EVENTLINE_POS(event);
 
@@ -1403,64 +1579,6 @@
 }
 
 
-static bool d40_tx_is_linked(struct d40_chan *d40c)
-{
-	bool is_link;
-
-	if (d40c->log_num != D40_PHY_CHAN)
-		is_link = readl(&d40c->lcpa->lcsp3) &  D40_MEM_LCSP3_DLOS_MASK;
-	else
-		is_link = readl(d40c->base->virtbase + D40_DREG_PCBASE +
-				d40c->phy_chan->num * D40_DREG_PCDELTA +
-				D40_CHAN_REG_SDLNK) &
-			D40_SREG_LNK_PHYS_LNK_MASK;
-	return is_link;
-}
-
-static u32 d40_residue(struct d40_chan *d40c)
-{
-	u32 num_elt;
-
-	if (d40c->log_num != D40_PHY_CHAN)
-		num_elt = (readl(&d40c->lcpa->lcsp2) & D40_MEM_LCSP2_ECNT_MASK)
-			>> D40_MEM_LCSP2_ECNT_POS;
-	else
-		num_elt = (readl(d40c->base->virtbase + D40_DREG_PCBASE +
-				 d40c->phy_chan->num * D40_DREG_PCDELTA +
-				 D40_CHAN_REG_SDELT) &
-			   D40_SREG_ELEM_PHY_ECNT_MASK) >>
-			D40_SREG_ELEM_PHY_ECNT_POS;
-	return num_elt * (1 << d40c->dma_cfg.dst_info.data_width);
-}
-
-static int d40_resume(struct dma_chan *chan)
-{
-	struct d40_chan *d40c =
-		container_of(chan, struct d40_chan, chan);
-	int res = 0;
-	unsigned long flags;
-
-	spin_lock_irqsave(&d40c->lock, flags);
-
-	if (d40c->base->rev == 0)
-		if (d40c->log_num != D40_PHY_CHAN) {
-			res = d40_channel_execute_command(d40c,
-							  D40_DMA_SUSPEND_REQ);
-			goto no_suspend;
-		}
-
-	/* If bytes left to transfer or linked tx resume job */
-	if (d40_residue(d40c) || d40_tx_is_linked(d40c)) {
-		if (d40c->log_num != D40_PHY_CHAN)
-			d40_config_set_event(d40c, true);
-		res = d40_channel_execute_command(d40c, D40_DMA_RUN);
-	}
-
-no_suspend:
-	spin_unlock_irqrestore(&d40c->lock, flags);
-	return res;
-}
-
 static u32 stedma40_residue(struct dma_chan *chan)
 {
 	struct d40_chan *d40c =
@@ -1475,51 +1593,6 @@
 	return bytes_left;
 }
 
-/* Public DMA functions in addition to the DMA engine framework */
-
-int stedma40_set_psize(struct dma_chan *chan,
-		       int src_psize,
-		       int dst_psize)
-{
-	struct d40_chan *d40c =
-		container_of(chan, struct d40_chan, chan);
-	unsigned long flags;
-
-	spin_lock_irqsave(&d40c->lock, flags);
-
-	if (d40c->log_num != D40_PHY_CHAN) {
-		d40c->log_def.lcsp1 &= ~D40_MEM_LCSP1_SCFG_PSIZE_MASK;
-		d40c->log_def.lcsp3 &= ~D40_MEM_LCSP1_SCFG_PSIZE_MASK;
-		d40c->log_def.lcsp1 |= src_psize <<
-			D40_MEM_LCSP1_SCFG_PSIZE_POS;
-		d40c->log_def.lcsp3 |= dst_psize <<
-			D40_MEM_LCSP1_SCFG_PSIZE_POS;
-		goto out;
-	}
-
-	if (src_psize == STEDMA40_PSIZE_PHY_1)
-		d40c->src_def_cfg &= ~(1 << D40_SREG_CFG_PHY_PEN_POS);
-	else {
-		d40c->src_def_cfg |= 1 << D40_SREG_CFG_PHY_PEN_POS;
-		d40c->src_def_cfg &= ~(STEDMA40_PSIZE_PHY_16 <<
-				       D40_SREG_CFG_PSIZE_POS);
-		d40c->src_def_cfg |= src_psize << D40_SREG_CFG_PSIZE_POS;
-	}
-
-	if (dst_psize == STEDMA40_PSIZE_PHY_1)
-		d40c->dst_def_cfg &= ~(1 << D40_SREG_CFG_PHY_PEN_POS);
-	else {
-		d40c->dst_def_cfg |= 1 << D40_SREG_CFG_PHY_PEN_POS;
-		d40c->dst_def_cfg &= ~(STEDMA40_PSIZE_PHY_16 <<
-				       D40_SREG_CFG_PSIZE_POS);
-		d40c->dst_def_cfg |= dst_psize << D40_SREG_CFG_PSIZE_POS;
-	}
-out:
-	spin_unlock_irqrestore(&d40c->lock, flags);
-	return 0;
-}
-EXPORT_SYMBOL(stedma40_set_psize);
-
 struct dma_async_tx_descriptor *stedma40_memcpy_sg(struct dma_chan *chan,
 						   struct scatterlist *sgl_dst,
 						   struct scatterlist *sgl_src,
@@ -1545,21 +1618,10 @@
 		goto err;
 
 	d40d->lli_len = sgl_len;
-	d40d->lli_tx_len = d40d->lli_len;
+	d40d->lli_current = 0;
 	d40d->txd.flags = dma_flags;
 
 	if (d40c->log_num != D40_PHY_CHAN) {
-		if (d40d->lli_len > d40c->base->plat_data->llis_per_log)
-			d40d->lli_tx_len = d40c->base->plat_data->llis_per_log;
-
-		if (sgl_len > 1)
-			/*
-			 * Check if there is space available in lcla. If not,
-			 * split list into 1-length and run only in lcpa
-			 * space.
-			 */
-			if (d40_lcla_id_get(d40c) != 0)
-				d40d->lli_tx_len = 1;
 
 		if (d40_pool_lli_alloc(d40d, sgl_len, true) < 0) {
 			dev_err(&d40c->chan.dev->device,
@@ -1567,27 +1629,17 @@
 			goto err;
 		}
 
-		(void) d40_log_sg_to_lli(d40c->lcla.src_id,
-					 sgl_src,
+		(void) d40_log_sg_to_lli(sgl_src,
 					 sgl_len,
 					 d40d->lli_log.src,
 					 d40c->log_def.lcsp1,
-					 d40c->dma_cfg.src_info.data_width,
-					 dma_flags & DMA_PREP_INTERRUPT,
-					 d40d->lli_tx_len,
-					 d40c->base->plat_data->llis_per_log);
+					 d40c->dma_cfg.src_info.data_width);
 
-		(void) d40_log_sg_to_lli(d40c->lcla.dst_id,
-					 sgl_dst,
+		(void) d40_log_sg_to_lli(sgl_dst,
 					 sgl_len,
 					 d40d->lli_log.dst,
 					 d40c->log_def.lcsp3,
-					 d40c->dma_cfg.dst_info.data_width,
-					 dma_flags & DMA_PREP_INTERRUPT,
-					 d40d->lli_tx_len,
-					 d40c->base->plat_data->llis_per_log);
-
-
+					 d40c->dma_cfg.dst_info.data_width);
 	} else {
 		if (d40_pool_lli_alloc(d40d, sgl_len, false) < 0) {
 			dev_err(&d40c->chan.dev->device,
@@ -1599,11 +1651,10 @@
 					sgl_len,
 					0,
 					d40d->lli_phy.src,
-					d40d->lli_phy.src_addr,
+					virt_to_phys(d40d->lli_phy.src),
 					d40c->src_def_cfg,
 					d40c->dma_cfg.src_info.data_width,
-					d40c->dma_cfg.src_info.psize,
-					true);
+					d40c->dma_cfg.src_info.psize);
 
 		if (res < 0)
 			goto err;
@@ -1612,11 +1663,10 @@
 					sgl_len,
 					0,
 					d40d->lli_phy.dst,
-					d40d->lli_phy.dst_addr,
+					virt_to_phys(d40d->lli_phy.dst),
 					d40c->dst_def_cfg,
 					d40c->dma_cfg.dst_info.data_width,
-					d40c->dma_cfg.dst_info.psize,
-					true);
+					d40c->dma_cfg.dst_info.psize);
 
 		if (res < 0)
 			goto err;
@@ -1633,6 +1683,8 @@
 
 	return &d40d->txd;
 err:
+	if (d40d)
+		d40_desc_free(d40c, d40d);
 	spin_unlock_irqrestore(&d40c->lock, flags);
 	return NULL;
 }
@@ -1673,6 +1725,7 @@
 	 * use default configuration (memcpy)
 	 */
 	if (d40c->dma_cfg.channel_type == 0) {
+
 		err = d40_config_memcpy(d40c);
 		if (err) {
 			dev_err(&d40c->chan.dev->device,
@@ -1712,14 +1765,8 @@
 	 * resource is free. In case of multiple logical channels
 	 * on the same physical resource, only the first write is necessary.
 	 */
-	if (is_free_phy) {
-		err = d40_config_write(d40c);
-		if (err) {
-			dev_err(&d40c->chan.dev->device,
-				"[%s] Failed to configure channel\n",
-				__func__);
-		}
-	}
+	if (is_free_phy)
+		d40_config_write(d40c);
 fail:
 	spin_unlock_irqrestore(&d40c->lock, flags);
 	return err;
@@ -1790,23 +1837,21 @@
 			goto err;
 		}
 		d40d->lli_len = 1;
-		d40d->lli_tx_len = 1;
+		d40d->lli_current = 0;
 
 		d40_log_fill_lli(d40d->lli_log.src,
 				 src,
 				 size,
-				 0,
 				 d40c->log_def.lcsp1,
 				 d40c->dma_cfg.src_info.data_width,
-				 false, true);
+				 true);
 
 		d40_log_fill_lli(d40d->lli_log.dst,
 				 dst,
 				 size,
-				 0,
 				 d40c->log_def.lcsp3,
 				 d40c->dma_cfg.dst_info.data_width,
-				 true, true);
+				 true);
 
 	} else {
 
@@ -1851,12 +1896,25 @@
 err_fill_lli:
 	dev_err(&d40c->chan.dev->device,
 		"[%s] Failed filling in PHY LLI\n", __func__);
-	d40_pool_lli_free(d40d);
 err:
+	if (d40d)
+		d40_desc_free(d40c, d40d);
 	spin_unlock_irqrestore(&d40c->lock, flags);
 	return NULL;
 }
 
+static struct dma_async_tx_descriptor *
+d40_prep_sg(struct dma_chan *chan,
+	    struct scatterlist *dst_sg, unsigned int dst_nents,
+	    struct scatterlist *src_sg, unsigned int src_nents,
+	    unsigned long dma_flags)
+{
+	if (dst_nents != src_nents)
+		return NULL;
+
+	return stedma40_memcpy_sg(chan, dst_sg, src_sg, dst_nents, dma_flags);
+}
+
 static int d40_prep_slave_sg_log(struct d40_desc *d40d,
 				 struct d40_chan *d40c,
 				 struct scatterlist *sgl,
@@ -1874,19 +1932,7 @@
 	}
 
 	d40d->lli_len = sg_len;
-	if (d40d->lli_len <= d40c->base->plat_data->llis_per_log)
-		d40d->lli_tx_len = d40d->lli_len;
-	else
-		d40d->lli_tx_len = d40c->base->plat_data->llis_per_log;
-
-	if (sg_len > 1)
-		/*
-		 * Check if there is space available in lcla.
-		 * If not, split list into 1-length and run only
-		 * in lcpa space.
-		 */
-		if (d40_lcla_id_get(d40c) != 0)
-			d40d->lli_tx_len = 1;
+	d40d->lli_current = 0;
 
 	if (direction == DMA_FROM_DEVICE)
 		if (d40c->runtime_addr)
@@ -1902,16 +1948,13 @@
 	else
 		return -EINVAL;
 
-	total_size = d40_log_sg_to_dev(&d40c->lcla,
-				       sgl, sg_len,
+	total_size = d40_log_sg_to_dev(sgl, sg_len,
 				       &d40d->lli_log,
 				       &d40c->log_def,
 				       d40c->dma_cfg.src_info.data_width,
 				       d40c->dma_cfg.dst_info.data_width,
 				       direction,
-				       dma_flags & DMA_PREP_INTERRUPT,
-				       dev_addr, d40d->lli_tx_len,
-				       d40c->base->plat_data->llis_per_log);
+				       dev_addr);
 
 	if (total_size < 0)
 		return -EINVAL;
@@ -1937,7 +1980,7 @@
 	}
 
 	d40d->lli_len = sgl_len;
-	d40d->lli_tx_len = sgl_len;
+	d40d->lli_current = 0;
 
 	if (direction == DMA_FROM_DEVICE) {
 		dst_dev_addr = 0;
@@ -1958,11 +2001,10 @@
 				sgl_len,
 				src_dev_addr,
 				d40d->lli_phy.src,
-				d40d->lli_phy.src_addr,
+				virt_to_phys(d40d->lli_phy.src),
 				d40c->src_def_cfg,
 				d40c->dma_cfg.src_info.data_width,
-				d40c->dma_cfg.src_info.psize,
-				true);
+				d40c->dma_cfg.src_info.psize);
 	if (res < 0)
 		return res;
 
@@ -1970,11 +2012,10 @@
 				sgl_len,
 				dst_dev_addr,
 				d40d->lli_phy.dst,
-				d40d->lli_phy.dst_addr,
+				virt_to_phys(d40d->lli_phy.dst),
 				d40c->dst_def_cfg,
 				d40c->dma_cfg.dst_info.data_width,
-				d40c->dma_cfg.dst_info.psize,
-				 true);
+				d40c->dma_cfg.dst_info.psize);
 	if (res < 0)
 		return res;
 
@@ -2001,17 +2042,11 @@
 		return ERR_PTR(-EINVAL);
 	}
 
-	if (d40c->dma_cfg.pre_transfer)
-		d40c->dma_cfg.pre_transfer(chan,
-					   d40c->dma_cfg.pre_transfer_data,
-					   sg_dma_len(sgl));
-
 	spin_lock_irqsave(&d40c->lock, flags);
 	d40d = d40_desc_get(d40c);
-	spin_unlock_irqrestore(&d40c->lock, flags);
 
 	if (d40d == NULL)
-		return NULL;
+		goto err;
 
 	if (d40c->log_num != D40_PHY_CHAN)
 		err = d40_prep_slave_sg_log(d40d, d40c, sgl, sg_len,
@@ -2024,7 +2059,7 @@
 			"[%s] Failed to prepare %s slave sg job: %d\n",
 			__func__,
 			d40c->log_num != D40_PHY_CHAN ? "log" : "phy", err);
-		return NULL;
+		goto err;
 	}
 
 	d40d->txd.flags = dma_flags;
@@ -2033,7 +2068,14 @@
 
 	d40d->txd.tx_submit = d40_tx_submit;
 
+	spin_unlock_irqrestore(&d40c->lock, flags);
 	return &d40d->txd;
+
+err:
+	if (d40d)
+		d40_desc_free(d40c, d40d);
+	spin_unlock_irqrestore(&d40c->lock, flags);
+	return NULL;
 }
 
 static enum dma_status d40_tx_status(struct dma_chan *chan,
@@ -2166,14 +2208,25 @@
 		return;
 	}
 
-	if (config_maxburst >= 16)
-		psize = STEDMA40_PSIZE_LOG_16;
-	else if (config_maxburst >= 8)
-		psize = STEDMA40_PSIZE_LOG_8;
-	else if (config_maxburst >= 4)
-		psize = STEDMA40_PSIZE_LOG_4;
-	else
-		psize = STEDMA40_PSIZE_LOG_1;
+	if (d40c->log_num != D40_PHY_CHAN) {
+		if (config_maxburst >= 16)
+			psize = STEDMA40_PSIZE_LOG_16;
+		else if (config_maxburst >= 8)
+			psize = STEDMA40_PSIZE_LOG_8;
+		else if (config_maxburst >= 4)
+			psize = STEDMA40_PSIZE_LOG_4;
+		else
+			psize = STEDMA40_PSIZE_LOG_1;
+	} else {
+		if (config_maxburst >= 16)
+			psize = STEDMA40_PSIZE_PHY_16;
+		else if (config_maxburst >= 8)
+			psize = STEDMA40_PSIZE_PHY_8;
+		else if (config_maxburst >= 4)
+			psize = STEDMA40_PSIZE_PHY_4;
+		else
+			psize = STEDMA40_PSIZE_PHY_1;
+	}
 
 	/* Set up all the endpoint configs */
 	cfg->src_info.data_width = addr_width;
@@ -2185,6 +2238,13 @@
 	cfg->dst_info.endianess = STEDMA40_LITTLE_ENDIAN;
 	cfg->dst_info.flow_ctrl = STEDMA40_NO_FLOW_CTRL;
 
+	/* Fill in register values */
+	if (d40c->log_num != D40_PHY_CHAN)
+		d40_log_cfg(cfg, &d40c->log_def.lcsp1, &d40c->log_def.lcsp3);
+	else
+		d40_phy_cfg(cfg, &d40c->src_def_cfg,
+			    &d40c->dst_def_cfg, false);
+
 	/* These settings will take precedence later */
 	d40c->runtime_addr = config_addr;
 	d40c->runtime_direction = config->direction;
@@ -2247,10 +2307,6 @@
 		d40c->base = base;
 		d40c->chan.device = dma;
 
-		/* Invalidate lcla element */
-		d40c->lcla.src_id = -1;
-		d40c->lcla.dst_id = -1;
-
 		spin_lock_init(&d40c->lock);
 
 		d40c->log_num = D40_PHY_CHAN;
@@ -2281,6 +2337,7 @@
 	base->dma_slave.device_alloc_chan_resources = d40_alloc_chan_resources;
 	base->dma_slave.device_free_chan_resources = d40_free_chan_resources;
 	base->dma_slave.device_prep_dma_memcpy = d40_prep_memcpy;
+	base->dma_slave.device_prep_dma_sg = d40_prep_sg;
 	base->dma_slave.device_prep_slave_sg = d40_prep_slave_sg;
 	base->dma_slave.device_tx_status = d40_tx_status;
 	base->dma_slave.device_issue_pending = d40_issue_pending;
@@ -2301,10 +2358,12 @@
 
 	dma_cap_zero(base->dma_memcpy.cap_mask);
 	dma_cap_set(DMA_MEMCPY, base->dma_memcpy.cap_mask);
+	dma_cap_set(DMA_SG, base->dma_slave.cap_mask);
 
 	base->dma_memcpy.device_alloc_chan_resources = d40_alloc_chan_resources;
 	base->dma_memcpy.device_free_chan_resources = d40_free_chan_resources;
 	base->dma_memcpy.device_prep_dma_memcpy = d40_prep_memcpy;
+	base->dma_slave.device_prep_dma_sg = d40_prep_sg;
 	base->dma_memcpy.device_prep_slave_sg = d40_prep_slave_sg;
 	base->dma_memcpy.device_tx_status = d40_tx_status;
 	base->dma_memcpy.device_issue_pending = d40_issue_pending;
@@ -2331,10 +2390,12 @@
 	dma_cap_zero(base->dma_both.cap_mask);
 	dma_cap_set(DMA_SLAVE, base->dma_both.cap_mask);
 	dma_cap_set(DMA_MEMCPY, base->dma_both.cap_mask);
+	dma_cap_set(DMA_SG, base->dma_slave.cap_mask);
 
 	base->dma_both.device_alloc_chan_resources = d40_alloc_chan_resources;
 	base->dma_both.device_free_chan_resources = d40_free_chan_resources;
 	base->dma_both.device_prep_dma_memcpy = d40_prep_memcpy;
+	base->dma_slave.device_prep_dma_sg = d40_prep_sg;
 	base->dma_both.device_prep_slave_sg = d40_prep_slave_sg;
 	base->dma_both.device_tx_status = d40_tx_status;
 	base->dma_both.device_issue_pending = d40_issue_pending;
@@ -2387,9 +2448,11 @@
 
 	/* Mark disabled channels as occupied */
 	for (i = 0; base->plat_data->disabled_channels[i] != -1; i++) {
-			base->phy_res[i].allocated_src = D40_ALLOC_PHY;
-			base->phy_res[i].allocated_dst = D40_ALLOC_PHY;
-			num_phy_chans_avail--;
+		int chan = base->plat_data->disabled_channels[i];
+
+		base->phy_res[chan].allocated_src = D40_ALLOC_PHY;
+		base->phy_res[chan].allocated_dst = D40_ALLOC_PHY;
+		num_phy_chans_avail--;
 	}
 
 	dev_info(base->dev, "%d of %d physical DMA channels available\n",
@@ -2441,6 +2504,7 @@
 	int num_phy_chans;
 	int i;
 	u32 val;
+	u32 rev;
 
 	clk = clk_get(&pdev->dev, NULL);
 
@@ -2479,21 +2543,26 @@
 		}
 	}
 
-	/* Get silicon revision */
+	/* Get silicon revision and designer */
 	val = readl(virtbase + D40_DREG_PERIPHID2);
 
-	if ((val & 0xf) != D40_PERIPHID2_DESIGNER) {
+	if ((val & D40_DREG_PERIPHID2_DESIGNER_MASK) !=
+	    D40_HW_DESIGNER) {
 		dev_err(&pdev->dev,
 			"[%s] Unknown designer! Got %x wanted %x\n",
-			__func__, val & 0xf, D40_PERIPHID2_DESIGNER);
+			__func__, val & D40_DREG_PERIPHID2_DESIGNER_MASK,
+			D40_HW_DESIGNER);
 		goto failure;
 	}
 
+	rev = (val & D40_DREG_PERIPHID2_REV_MASK) >>
+		D40_DREG_PERIPHID2_REV_POS;
+
 	/* The number of physical channels on this HW */
 	num_phy_chans = 4 * (readl(virtbase + D40_DREG_ICFG) & 0x7) + 4;
 
 	dev_info(&pdev->dev, "hardware revision: %d @ 0x%x\n",
-		 (val >> 4) & 0xf, res->start);
+		 rev, res->start);
 
 	plat_data = pdev->dev.platform_data;
 
@@ -2515,7 +2584,7 @@
 		goto failure;
 	}
 
-	base->rev = (val >> 4) & 0xf;
+	base->rev = rev;
 	base->clk = clk;
 	base->num_phy_chans = num_phy_chans;
 	base->num_log_chans = num_log_chans;
@@ -2549,7 +2618,10 @@
 		if (!base->lookup_log_chans)
 			goto failure;
 	}
-	base->lcla_pool.alloc_map = kzalloc(num_phy_chans * sizeof(u32),
+
+	base->lcla_pool.alloc_map = kzalloc(num_phy_chans *
+					    sizeof(struct d40_desc *) *
+					    D40_LCLA_LINK_PER_EVENT_GRP,
 					    GFP_KERNEL);
 	if (!base->lcla_pool.alloc_map)
 		goto failure;
@@ -2563,7 +2635,7 @@
 	return base;
 
 failure:
-	if (clk) {
+	if (!IS_ERR(clk)) {
 		clk_disable(clk);
 		clk_put(clk);
 	}
@@ -2700,8 +2772,10 @@
 	if (i < MAX_LCLA_ALLOC_ATTEMPTS) {
 		base->lcla_pool.base = (void *)page_list[i];
 	} else {
-		/* After many attempts, no succees with finding the correct
-		 * alignment try with allocating a big buffer */
+		/*
+		 * After many attempts and no succees with finding the correct
+		 * alignment, try with allocating a big buffer.
+		 */
 		dev_warn(base->dev,
 			 "[%s] Failed to get %d pages @ 18 bit align.\n",
 			 __func__, base->lcla_pool.pages);
@@ -2794,8 +2868,6 @@
 
 	spin_lock_init(&base->lcla_pool.lock);
 
-	base->lcla_pool.num_blocks = base->num_phy_chans;
-
 	base->irq = platform_get_irq(pdev, 0);
 
 	ret = request_irq(base->irq, d40_handle_interrupt, 0, D40_NAME, base);
@@ -2823,8 +2895,9 @@
 		if (!base->lcla_pool.base_unaligned && base->lcla_pool.base)
 			free_pages((unsigned long)base->lcla_pool.base,
 				   base->lcla_pool.pages);
-		if (base->lcla_pool.base_unaligned)
-			kfree(base->lcla_pool.base_unaligned);
+
+		kfree(base->lcla_pool.base_unaligned);
+
 		if (base->phy_lcpa)
 			release_mem_region(base->phy_lcpa,
 					   base->lcpa_size);
diff --git a/drivers/dma/ste_dma40_ll.c b/drivers/dma/ste_dma40_ll.c
index d937f76..86a306d 100644
--- a/drivers/dma/ste_dma40_ll.c
+++ b/drivers/dma/ste_dma40_ll.c
@@ -1,10 +1,8 @@
 /*
- * driver/dma/ste_dma40_ll.c
- *
- * Copyright (C) ST-Ericsson 2007-2010
+ * Copyright (C) ST-Ericsson SA 2007-2010
+ * Author: Per Friden <per.friden@stericsson.com> for ST-Ericsson
+ * Author: Jonas Aaberg <jonas.aberg@stericsson.com> for ST-Ericsson
  * License terms: GNU General Public License (GPL) version 2
- * Author: Per Friden <per.friden@stericsson.com>
- * Author: Jonas Aaberg <jonas.aberg@stericsson.com>
  */
 
 #include <linux/kernel.h>
@@ -39,16 +37,13 @@
 	    cfg->dir ==  STEDMA40_PERIPH_TO_PERIPH)
 		l3 |= 1 << D40_MEM_LCSP3_DCFG_MST_POS;
 
-	l3 |= 1 << D40_MEM_LCSP3_DCFG_TIM_POS;
 	l3 |= 1 << D40_MEM_LCSP3_DCFG_EIM_POS;
 	l3 |= cfg->dst_info.psize << D40_MEM_LCSP3_DCFG_PSIZE_POS;
 	l3 |= cfg->dst_info.data_width << D40_MEM_LCSP3_DCFG_ESIZE_POS;
-	l3 |= 1 << D40_MEM_LCSP3_DTCP_POS;
 
 	l1 |= 1 << D40_MEM_LCSP1_SCFG_EIM_POS;
 	l1 |= cfg->src_info.psize << D40_MEM_LCSP1_SCFG_PSIZE_POS;
 	l1 |= cfg->src_info.data_width << D40_MEM_LCSP1_SCFG_ESIZE_POS;
-	l1 |= 1 << D40_MEM_LCSP1_STCP_POS;
 
 	*lcsp1 = l1;
 	*lcsp3 = l3;
@@ -197,8 +192,7 @@
 		      dma_addr_t lli_phys,
 		      u32 reg_cfg,
 		      u32 data_width,
-		      int psize,
-		      bool term_int)
+		      int psize)
 {
 	int total_size = 0;
 	int i;
@@ -238,7 +232,7 @@
 	}
 
 	return total_size;
- err:
+err:
 	return err;
 }
 
@@ -271,11 +265,59 @@
 
 /* DMA logical lli operations */
 
+static void d40_log_lli_link(struct d40_log_lli *lli_dst,
+			     struct d40_log_lli *lli_src,
+			     int next)
+{
+	u32 slos = 0;
+	u32 dlos = 0;
+
+	if (next != -EINVAL) {
+		slos = next * 2;
+		dlos = next * 2 + 1;
+	} else {
+		lli_dst->lcsp13 |= D40_MEM_LCSP1_SCFG_TIM_MASK;
+		lli_dst->lcsp13 |= D40_MEM_LCSP3_DTCP_MASK;
+	}
+
+	lli_src->lcsp13 = (lli_src->lcsp13 & ~D40_MEM_LCSP1_SLOS_MASK) |
+		(slos << D40_MEM_LCSP1_SLOS_POS);
+
+	lli_dst->lcsp13 = (lli_dst->lcsp13 & ~D40_MEM_LCSP1_SLOS_MASK) |
+		(dlos << D40_MEM_LCSP1_SLOS_POS);
+}
+
+void d40_log_lli_lcpa_write(struct d40_log_lli_full *lcpa,
+			   struct d40_log_lli *lli_dst,
+			   struct d40_log_lli *lli_src,
+			   int next)
+{
+	d40_log_lli_link(lli_dst, lli_src, next);
+
+	writel(lli_src->lcsp02, &lcpa[0].lcsp0);
+	writel(lli_src->lcsp13, &lcpa[0].lcsp1);
+	writel(lli_dst->lcsp02, &lcpa[0].lcsp2);
+	writel(lli_dst->lcsp13, &lcpa[0].lcsp3);
+}
+
+void d40_log_lli_lcla_write(struct d40_log_lli *lcla,
+			   struct d40_log_lli *lli_dst,
+			   struct d40_log_lli *lli_src,
+			   int next)
+{
+	d40_log_lli_link(lli_dst, lli_src, next);
+
+	writel(lli_src->lcsp02, &lcla[0].lcsp02);
+	writel(lli_src->lcsp13, &lcla[0].lcsp13);
+	writel(lli_dst->lcsp02, &lcla[1].lcsp02);
+	writel(lli_dst->lcsp13, &lcla[1].lcsp13);
+}
+
 void d40_log_fill_lli(struct d40_log_lli *lli,
 		      dma_addr_t data, u32 data_size,
-		      u32 lli_next_off, u32 reg_cfg,
+		      u32 reg_cfg,
 		      u32 data_width,
-		      bool term_int, bool addr_inc)
+		      bool addr_inc)
 {
 	lli->lcsp13 = reg_cfg;
 
@@ -290,165 +332,69 @@
 	if (addr_inc)
 		lli->lcsp13 |= D40_MEM_LCSP1_SCFG_INCR_MASK;
 
-	lli->lcsp13 |= D40_MEM_LCSP3_DTCP_MASK;
-	/* If this scatter list entry is the last one, no next link */
-	lli->lcsp13 |= (lli_next_off << D40_MEM_LCSP1_SLOS_POS) &
-		D40_MEM_LCSP1_SLOS_MASK;
-
-	if (term_int)
-		lli->lcsp13 |= D40_MEM_LCSP1_SCFG_TIM_MASK;
-	else
-		lli->lcsp13 &= ~D40_MEM_LCSP1_SCFG_TIM_MASK;
 }
 
-int d40_log_sg_to_dev(struct d40_lcla_elem *lcla,
-		      struct scatterlist *sg,
+int d40_log_sg_to_dev(struct scatterlist *sg,
 		      int sg_len,
 		      struct d40_log_lli_bidir *lli,
 		      struct d40_def_lcsp *lcsp,
 		      u32 src_data_width,
 		      u32 dst_data_width,
 		      enum dma_data_direction direction,
-		      bool term_int, dma_addr_t dev_addr, int max_len,
-		      int llis_per_log)
+		      dma_addr_t dev_addr)
 {
 	int total_size = 0;
 	struct scatterlist *current_sg = sg;
 	int i;
-	u32 next_lli_off_dst = 0;
-	u32 next_lli_off_src = 0;
 
 	for_each_sg(sg, current_sg, sg_len, i) {
 		total_size += sg_dma_len(current_sg);
 
-		/*
-		 * If this scatter list entry is the last one or
-		 * max length, terminate link.
-		 */
-		if (sg_len - 1 == i || ((i+1) % max_len == 0)) {
-			next_lli_off_src = 0;
-			next_lli_off_dst = 0;
-		} else {
-			if (next_lli_off_dst == 0 &&
-			    next_lli_off_src == 0) {
-				/* The first lli will be at next_lli_off */
-				next_lli_off_dst = (lcla->dst_id *
-						    llis_per_log + 1);
-				next_lli_off_src = (lcla->src_id *
-						    llis_per_log + 1);
-			} else {
-				next_lli_off_dst++;
-				next_lli_off_src++;
-			}
-		}
-
 		if (direction == DMA_TO_DEVICE) {
 			d40_log_fill_lli(&lli->src[i],
 					 sg_phys(current_sg),
 					 sg_dma_len(current_sg),
-					 next_lli_off_src,
 					 lcsp->lcsp1, src_data_width,
-					 false,
 					 true);
 			d40_log_fill_lli(&lli->dst[i],
 					 dev_addr,
 					 sg_dma_len(current_sg),
-					 next_lli_off_dst,
 					 lcsp->lcsp3, dst_data_width,
-					 /* No next == terminal interrupt */
-					 term_int && !next_lli_off_dst,
 					 false);
 		} else {
 			d40_log_fill_lli(&lli->dst[i],
 					 sg_phys(current_sg),
 					 sg_dma_len(current_sg),
-					 next_lli_off_dst,
 					 lcsp->lcsp3, dst_data_width,
-					 /* No next == terminal interrupt */
-					 term_int && !next_lli_off_dst,
 					 true);
 			d40_log_fill_lli(&lli->src[i],
 					 dev_addr,
 					 sg_dma_len(current_sg),
-					 next_lli_off_src,
 					 lcsp->lcsp1, src_data_width,
-					 false,
 					 false);
 		}
 	}
 	return total_size;
 }
 
-int d40_log_sg_to_lli(int lcla_id,
-		      struct scatterlist *sg,
+int d40_log_sg_to_lli(struct scatterlist *sg,
 		      int sg_len,
 		      struct d40_log_lli *lli_sg,
 		      u32 lcsp13, /* src or dst*/
-		      u32 data_width,
-		      bool term_int, int max_len, int llis_per_log)
+		      u32 data_width)
 {
 	int total_size = 0;
 	struct scatterlist *current_sg = sg;
 	int i;
-	u32 next_lli_off = 0;
 
 	for_each_sg(sg, current_sg, sg_len, i) {
 		total_size += sg_dma_len(current_sg);
 
-		/*
-		 * If this scatter list entry is the last one or
-		 * max length, terminate link.
-		 */
-		if (sg_len - 1 == i || ((i+1) % max_len == 0))
-			next_lli_off = 0;
-		else {
-			if (next_lli_off == 0)
-				/* The first lli will be at next_lli_off */
-				next_lli_off = lcla_id * llis_per_log + 1;
-			else
-				next_lli_off++;
-		}
-
 		d40_log_fill_lli(&lli_sg[i],
 				 sg_phys(current_sg),
 				 sg_dma_len(current_sg),
-				 next_lli_off,
 				 lcsp13, data_width,
-				 term_int && !next_lli_off,
 				 true);
 	}
 	return total_size;
 }
-
-int d40_log_lli_write(struct d40_log_lli_full *lcpa,
-		       struct d40_log_lli *lcla_src,
-		       struct d40_log_lli *lcla_dst,
-		       struct d40_log_lli *lli_dst,
-		       struct d40_log_lli *lli_src,
-		       int llis_per_log)
-{
-	u32 slos;
-	u32 dlos;
-	int i;
-
-	writel(lli_src->lcsp02, &lcpa->lcsp0);
-	writel(lli_src->lcsp13, &lcpa->lcsp1);
-	writel(lli_dst->lcsp02, &lcpa->lcsp2);
-	writel(lli_dst->lcsp13, &lcpa->lcsp3);
-
-	slos = lli_src->lcsp13 & D40_MEM_LCSP1_SLOS_MASK;
-	dlos = lli_dst->lcsp13 & D40_MEM_LCSP3_DLOS_MASK;
-
-	for (i = 0; (i < llis_per_log) && slos && dlos; i++) {
-		writel(lli_src[i + 1].lcsp02, &lcla_src[i].lcsp02);
-		writel(lli_src[i + 1].lcsp13, &lcla_src[i].lcsp13);
-		writel(lli_dst[i + 1].lcsp02, &lcla_dst[i].lcsp02);
-		writel(lli_dst[i + 1].lcsp13, &lcla_dst[i].lcsp13);
-
-		slos = lli_src[i + 1].lcsp13 & D40_MEM_LCSP1_SLOS_MASK;
-		dlos = lli_dst[i + 1].lcsp13 & D40_MEM_LCSP3_DLOS_MASK;
-	}
-
-	return i;
-
-}
diff --git a/drivers/dma/ste_dma40_ll.h b/drivers/dma/ste_dma40_ll.h
index 9c0fa2f..37f81e8 100644
--- a/drivers/dma/ste_dma40_ll.h
+++ b/drivers/dma/ste_dma40_ll.h
@@ -1,10 +1,8 @@
 /*
- * driver/dma/ste_dma40_ll.h
- *
- * Copyright (C) ST-Ericsson 2007-2010
+ * Copyright (C) ST-Ericsson SA 2007-2010
+ * Author: Per Friden <per.friden@stericsson.com> for ST-Ericsson SA
+ * Author: Jonas Aaberg <jonas.aberg@stericsson.com> for ST-Ericsson SA
  * License terms: GNU General Public License (GPL) version 2
- * Author: Per Friden <per.friden@stericsson.com>
- * Author: Jonas Aaberg <jonas.aberg@stericsson.com>
  */
 #ifndef STE_DMA40_LL_H
 #define STE_DMA40_LL_H
@@ -163,6 +161,9 @@
 #define D40_DREG_PERIPHID0	0xFE0
 #define D40_DREG_PERIPHID1	0xFE4
 #define D40_DREG_PERIPHID2	0xFE8
+#define D40_DREG_PERIPHID2_REV_POS 4
+#define D40_DREG_PERIPHID2_REV_MASK (0xf << D40_DREG_PERIPHID2_REV_POS)
+#define D40_DREG_PERIPHID2_DESIGNER_MASK 0xf
 #define D40_DREG_PERIPHID3	0xFEC
 #define D40_DREG_CELLID0	0xFF0
 #define D40_DREG_CELLID1	0xFF4
@@ -199,8 +200,6 @@
  *
  * @src: Register settings for src channel.
  * @dst: Register settings for dst channel.
- * @dst_addr: Physical destination address.
- * @src_addr: Physical source address.
  *
  * All DMA transfers have a source and a destination.
  */
@@ -208,8 +207,6 @@
 struct d40_phy_lli_bidir {
 	struct d40_phy_lli	*src;
 	struct d40_phy_lli	*dst;
-	dma_addr_t		 dst_addr;
-	dma_addr_t		 src_addr;
 };
 
 
@@ -271,29 +268,16 @@
 	u32 lcsp1;
 };
 
-/**
- * struct d40_lcla_elem - Info for one LCA element.
- *
- * @src_id: logical channel src id
- * @dst_id: logical channel dst id
- * @src: LCPA formated src parameters
- * @dst: LCPA formated dst parameters
- *
- */
-struct d40_lcla_elem {
-	int			src_id;
-	int			dst_id;
-	struct d40_log_lli     *src;
-	struct d40_log_lli     *dst;
-};
-
 /* Physical channels */
 
 void d40_phy_cfg(struct stedma40_chan_cfg *cfg,
-		 u32 *src_cfg, u32 *dst_cfg, bool is_log);
+		 u32 *src_cfg,
+		 u32 *dst_cfg,
+		 bool is_log);
 
 void d40_log_cfg(struct stedma40_chan_cfg *cfg,
-		 u32 *lcsp1, u32 *lcsp2);
+		 u32 *lcsp1,
+		 u32 *lcsp2);
 
 int d40_phy_sg_to_lli(struct scatterlist *sg,
 		      int sg_len,
@@ -302,8 +286,7 @@
 		      dma_addr_t lli_phys,
 		      u32 reg_cfg,
 		      u32 data_width,
-		      int psize,
-		      bool term_int);
+		      int psize);
 
 int d40_phy_fill_lli(struct d40_phy_lli *lli,
 		     dma_addr_t data,
@@ -323,35 +306,35 @@
 /* Logical channels */
 
 void d40_log_fill_lli(struct d40_log_lli *lli,
-		      dma_addr_t data, u32 data_size,
-		      u32 lli_next_off, u32 reg_cfg,
+		      dma_addr_t data,
+		      u32 data_size,
+		      u32 reg_cfg,
 		      u32 data_width,
-		      bool term_int, bool addr_inc);
+		      bool addr_inc);
 
-int d40_log_sg_to_dev(struct d40_lcla_elem *lcla,
-		      struct scatterlist *sg,
+int d40_log_sg_to_dev(struct scatterlist *sg,
 		      int sg_len,
 		      struct d40_log_lli_bidir *lli,
 		      struct d40_def_lcsp *lcsp,
 		      u32 src_data_width,
 		      u32 dst_data_width,
 		      enum dma_data_direction direction,
-		      bool term_int, dma_addr_t dev_addr, int max_len,
-		      int llis_per_log);
+		      dma_addr_t dev_addr);
 
-int d40_log_lli_write(struct d40_log_lli_full *lcpa,
-		      struct d40_log_lli *lcla_src,
-		      struct d40_log_lli *lcla_dst,
-		      struct d40_log_lli *lli_dst,
-		      struct d40_log_lli *lli_src,
-		      int llis_per_log);
-
-int d40_log_sg_to_lli(int lcla_id,
-		      struct scatterlist *sg,
+int d40_log_sg_to_lli(struct scatterlist *sg,
 		      int sg_len,
 		      struct d40_log_lli *lli_sg,
 		      u32 lcsp13, /* src or dst*/
-		      u32 data_width,
-		      bool term_int, int max_len, int llis_per_log);
+		      u32 data_width);
+
+void d40_log_lli_lcpa_write(struct d40_log_lli_full *lcpa,
+			    struct d40_log_lli *lli_dst,
+			    struct d40_log_lli *lli_src,
+			    int next);
+
+void d40_log_lli_lcla_write(struct d40_log_lli *lcla,
+			    struct d40_log_lli *lli_dst,
+			    struct d40_log_lli *lli_src,
+			    int next);
 
 #endif /* STE_DMA40_LLI_H */
diff --git a/include/linux/amba/pl08x.h b/include/linux/amba/pl08x.h
new file mode 100644
index 0000000..521a0f8
--- /dev/null
+++ b/include/linux/amba/pl08x.h
@@ -0,0 +1,222 @@
+/*
+ * linux/amba/pl08x.h - ARM PrimeCell DMA Controller driver
+ *
+ * Copyright (C) 2005 ARM Ltd
+ * Copyright (C) 2010 ST-Ericsson SA
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * pl08x information required by platform code
+ *
+ * Please credit ARM.com
+ * Documentation: ARM DDI 0196D
+ *
+ */
+
+#ifndef AMBA_PL08X_H
+#define AMBA_PL08X_H
+
+/* We need sizes of structs from this header */
+#include <linux/dmaengine.h>
+#include <linux/interrupt.h>
+
+/**
+ * struct pl08x_channel_data - data structure to pass info between
+ * platform and PL08x driver regarding channel configuration
+ * @bus_id: name of this device channel, not just a device name since
+ * devices may have more than one channel e.g. "foo_tx"
+ * @min_signal: the minimum DMA signal number to be muxed in for this
+ * channel (for platforms supporting muxed signals). If you have
+ * static assignments, make sure this is set to the assigned signal
+ * number, PL08x have 16 possible signals in number 0 thru 15 so
+ * when these are not enough they often get muxed (in hardware)
+ * disabling simultaneous use of the same channel for two devices.
+ * @max_signal: the maximum DMA signal number to be muxed in for
+ * the channel. Set to the same as min_signal for
+ * devices with static assignments
+ * @muxval: a number usually used to poke into some mux regiser to
+ * mux in the signal to this channel
+ * @cctl_opt: default options for the channel control register
+ * @addr: source/target address in physical memory for this DMA channel,
+ * can be the address of a FIFO register for burst requests for example.
+ * This can be left undefined if the PrimeCell API is used for configuring
+ * this.
+ * @circular_buffer: whether the buffer passed in is circular and
+ * shall simply be looped round round (like a record baby round
+ * round round round)
+ * @single: the device connected to this channel will request single
+ * DMA transfers, not bursts. (Bursts are default.)
+ */
+struct pl08x_channel_data {
+	char *bus_id;
+	int min_signal;
+	int max_signal;
+	u32 muxval;
+	u32 cctl;
+	u32 ccfg;
+	dma_addr_t addr;
+	bool circular_buffer;
+	bool single;
+};
+
+/**
+ * Struct pl08x_bus_data - information of source or destination
+ * busses for a transfer
+ * @addr: current address
+ * @maxwidth: the maximum width of a transfer on this bus
+ * @buswidth: the width of this bus in bytes: 1, 2 or 4
+ * @fill_bytes: bytes required to fill to the next bus memory
+ * boundary
+ */
+struct pl08x_bus_data {
+	dma_addr_t addr;
+	u8 maxwidth;
+	u8 buswidth;
+	u32 fill_bytes;
+};
+
+/**
+ * struct pl08x_phy_chan - holder for the physical channels
+ * @id: physical index to this channel
+ * @lock: a lock to use when altering an instance of this struct
+ * @signal: the physical signal (aka channel) serving this
+ * physical channel right now
+ * @serving: the virtual channel currently being served by this
+ * physical channel
+ */
+struct pl08x_phy_chan {
+	unsigned int id;
+	void __iomem *base;
+	spinlock_t lock;
+	int signal;
+	struct pl08x_dma_chan *serving;
+	u32 csrc;
+	u32 cdst;
+	u32 clli;
+	u32 cctl;
+	u32 ccfg;
+};
+
+/**
+ * struct pl08x_txd - wrapper for struct dma_async_tx_descriptor
+ * @llis_bus: DMA memory address (physical) start for the LLIs
+ * @llis_va: virtual memory address start for the LLIs
+ */
+struct pl08x_txd {
+	struct dma_async_tx_descriptor tx;
+	struct list_head node;
+	enum dma_data_direction	direction;
+	struct pl08x_bus_data srcbus;
+	struct pl08x_bus_data dstbus;
+	int len;
+	dma_addr_t llis_bus;
+	void *llis_va;
+	struct pl08x_channel_data *cd;
+	bool active;
+	/*
+	 * Settings to be put into the physical channel when we
+	 * trigger this txd
+	 */
+	u32 csrc;
+	u32 cdst;
+	u32 clli;
+	u32 cctl;
+};
+
+/**
+ * struct pl08x_dma_chan_state - holds the PL08x specific virtual
+ * channel states
+ * @PL08X_CHAN_IDLE: the channel is idle
+ * @PL08X_CHAN_RUNNING: the channel has allocated a physical transport
+ * channel and is running a transfer on it
+ * @PL08X_CHAN_PAUSED: the channel has allocated a physical transport
+ * channel, but the transfer is currently paused
+ * @PL08X_CHAN_WAITING: the channel is waiting for a physical transport
+ * channel to become available (only pertains to memcpy channels)
+ */
+enum pl08x_dma_chan_state {
+	PL08X_CHAN_IDLE,
+	PL08X_CHAN_RUNNING,
+	PL08X_CHAN_PAUSED,
+	PL08X_CHAN_WAITING,
+};
+
+/**
+ * struct pl08x_dma_chan - this structure wraps a DMA ENGINE channel
+ * @chan: wrappped abstract channel
+ * @phychan: the physical channel utilized by this channel, if there is one
+ * @tasklet: tasklet scheduled by the IRQ to handle actual work etc
+ * @name: name of channel
+ * @cd: channel platform data
+ * @runtime_addr: address for RX/TX according to the runtime config
+ * @runtime_direction: current direction of this channel according to
+ * runtime config
+ * @lc: last completed transaction on this channel
+ * @desc_list: queued transactions pending on this channel
+ * @at: active transaction on this channel
+ * @lockflags: sometimes we let a lock last between two function calls,
+ * especially prep/submit, and then we need to store the IRQ flags
+ * in the channel state, here
+ * @lock: a lock for this channel data
+ * @host: a pointer to the host (internal use)
+ * @state: whether the channel is idle, paused, running etc
+ * @slave: whether this channel is a device (slave) or for memcpy
+ * @waiting: a TX descriptor on this channel which is waiting for
+ * a physical channel to become available
+ */
+struct pl08x_dma_chan {
+	struct dma_chan chan;
+	struct pl08x_phy_chan *phychan;
+	struct tasklet_struct tasklet;
+	char *name;
+	struct pl08x_channel_data *cd;
+	dma_addr_t runtime_addr;
+	enum dma_data_direction	runtime_direction;
+	atomic_t last_issued;
+	dma_cookie_t lc;
+	struct list_head desc_list;
+	struct pl08x_txd *at;
+	unsigned long lockflags;
+	spinlock_t lock;
+	void *host;
+	enum pl08x_dma_chan_state state;
+	bool slave;
+	struct pl08x_txd *waiting;
+};
+
+/**
+ * struct pl08x_platform_data - the platform configuration for the
+ * PL08x PrimeCells.
+ * @slave_channels: the channels defined for the different devices on the
+ * platform, all inclusive, including multiplexed channels. The available
+ * physical channels will be multiplexed around these signals as they
+ * are requested, just enumerate all possible channels.
+ * @get_signal: request a physical signal to be used for a DMA
+ * transfer immediately: if there is some multiplexing or similar blocking
+ * the use of the channel the transfer can be denied by returning
+ * less than zero, else it returns the allocated signal number
+ * @put_signal: indicate to the platform that this physical signal is not
+ * running any DMA transfer and multiplexing can be recycled
+ * @bus_bit_lli: Bit[0] of the address indicated which AHB bus master the
+ * LLI addresses are on 0/1 Master 1/2.
+ */
+struct pl08x_platform_data {
+	struct pl08x_channel_data *slave_channels;
+	unsigned int num_slave_channels;
+	struct pl08x_channel_data memcpy_channel;
+	int (*get_signal)(struct pl08x_dma_chan *);
+	void (*put_signal)(struct pl08x_dma_chan *);
+};
+
+#ifdef CONFIG_AMBA_PL08X
+bool pl08x_filter_id(struct dma_chan *chan, void *chan_id);
+#else
+static inline bool pl08x_filter_id(struct dma_chan *chan, void *chan_id)
+{
+	return false;
+}
+#endif
+
+#endif	/* AMBA_PL08X_H */
diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
index e210649..3934ebd 100644
--- a/include/linux/dmaengine.h
+++ b/include/linux/dmaengine.h
@@ -64,13 +64,15 @@
 	DMA_PQ_VAL,
 	DMA_MEMSET,
 	DMA_INTERRUPT,
+	DMA_SG,
 	DMA_PRIVATE,
 	DMA_ASYNC_TX,
 	DMA_SLAVE,
+	DMA_CYCLIC,
 };
 
 /* last transaction type for creation of the capabilities mask */
-#define DMA_TX_TYPE_END (DMA_SLAVE + 1)
+#define DMA_TX_TYPE_END (DMA_CYCLIC + 1)
 
 
 /**
@@ -119,12 +121,15 @@
  * configuration data in statically from the platform). An additional
  * argument of struct dma_slave_config must be passed in with this
  * command.
+ * @FSLDMA_EXTERNAL_START: this command will put the Freescale DMA controller
+ * into external start mode.
  */
 enum dma_ctrl_cmd {
 	DMA_TERMINATE_ALL,
 	DMA_PAUSE,
 	DMA_RESUME,
 	DMA_SLAVE_CONFIG,
+	FSLDMA_EXTERNAL_START,
 };
 
 /**
@@ -422,6 +427,9 @@
  * @device_prep_dma_memset: prepares a memset operation
  * @device_prep_dma_interrupt: prepares an end of chain interrupt operation
  * @device_prep_slave_sg: prepares a slave dma operation
+ * @device_prep_dma_cyclic: prepare a cyclic dma operation suitable for audio.
+ *	The function takes a buffer of size buf_len. The callback function will
+ *	be called after period_len bytes have been transferred.
  * @device_control: manipulate all pending operations on a channel, returns
  *	zero or error code
  * @device_tx_status: poll for transaction completion, the optional
@@ -473,11 +481,19 @@
 		unsigned long flags);
 	struct dma_async_tx_descriptor *(*device_prep_dma_interrupt)(
 		struct dma_chan *chan, unsigned long flags);
+	struct dma_async_tx_descriptor *(*device_prep_dma_sg)(
+		struct dma_chan *chan,
+		struct scatterlist *dst_sg, unsigned int dst_nents,
+		struct scatterlist *src_sg, unsigned int src_nents,
+		unsigned long flags);
 
 	struct dma_async_tx_descriptor *(*device_prep_slave_sg)(
 		struct dma_chan *chan, struct scatterlist *sgl,
 		unsigned int sg_len, enum dma_data_direction direction,
 		unsigned long flags);
+	struct dma_async_tx_descriptor *(*device_prep_dma_cyclic)(
+		struct dma_chan *chan, dma_addr_t buf_addr, size_t buf_len,
+		size_t period_len, enum dma_data_direction direction);
 	int (*device_control)(struct dma_chan *chan, enum dma_ctrl_cmd cmd,
 		unsigned long arg);
 
@@ -487,6 +503,40 @@
 	void (*device_issue_pending)(struct dma_chan *chan);
 };
 
+static inline int dmaengine_device_control(struct dma_chan *chan,
+					   enum dma_ctrl_cmd cmd,
+					   unsigned long arg)
+{
+	return chan->device->device_control(chan, cmd, arg);
+}
+
+static inline int dmaengine_slave_config(struct dma_chan *chan,
+					  struct dma_slave_config *config)
+{
+	return dmaengine_device_control(chan, DMA_SLAVE_CONFIG,
+			(unsigned long)config);
+}
+
+static inline int dmaengine_terminate_all(struct dma_chan *chan)
+{
+	return dmaengine_device_control(chan, DMA_TERMINATE_ALL, 0);
+}
+
+static inline int dmaengine_pause(struct dma_chan *chan)
+{
+	return dmaengine_device_control(chan, DMA_PAUSE, 0);
+}
+
+static inline int dmaengine_resume(struct dma_chan *chan)
+{
+	return dmaengine_device_control(chan, DMA_RESUME, 0);
+}
+
+static inline int dmaengine_submit(struct dma_async_tx_descriptor *desc)
+{
+	return desc->tx_submit(desc);
+}
+
 static inline bool dmaengine_check_align(u8 align, size_t off1, size_t off2, size_t len)
 {
 	size_t mask;