Merge branch 'topic/rcar' into for-linus
diff --git a/Documentation/dmaengine/client.txt b/Documentation/dmaengine/client.txt
index 11fb87f..9e33189 100644
--- a/Documentation/dmaengine/client.txt
+++ b/Documentation/dmaengine/client.txt
@@ -22,25 +22,14 @@
    Channel allocation is slightly different in the slave DMA context,
    client drivers typically need a channel from a particular DMA
    controller only and even in some cases a specific channel is desired.
-   To request a channel dma_request_channel() API is used.
+   To request a channel dma_request_chan() API is used.
 
    Interface:
-	struct dma_chan *dma_request_channel(dma_cap_mask_t mask,
-			dma_filter_fn filter_fn,
-			void *filter_param);
-   where dma_filter_fn is defined as:
-	typedef bool (*dma_filter_fn)(struct dma_chan *chan, void *filter_param);
+	struct dma_chan *dma_request_chan(struct device *dev, const char *name);
 
-   The 'filter_fn' parameter is optional, but highly recommended for
-   slave and cyclic channels as they typically need to obtain a specific
-   DMA channel.
-
-   When the optional 'filter_fn' parameter is NULL, dma_request_channel()
-   simply returns the first channel that satisfies the capability mask.
-
-   Otherwise, the 'filter_fn' routine will be called once for each free
-   channel which has a capability in 'mask'.  'filter_fn' is expected to
-   return 'true' when the desired DMA channel is found.
+   Which will find and return the 'name' DMA channel associated with the 'dev'
+   device. The association is done via DT, ACPI or board file based
+   dma_slave_map matching table.
 
    A channel allocated via this interface is exclusive to the caller,
    until dma_release_channel() is called.
@@ -128,7 +117,7 @@
 	transaction.
 
 	For cyclic DMA, a callback function may wish to terminate the
-	DMA via dmaengine_terminate_all().
+	DMA via dmaengine_terminate_async().
 
 	Therefore, it is important that DMA engine drivers drop any
 	locks before calling the callback function which may cause a
@@ -166,12 +155,29 @@
 
 Further APIs:
 
-1. int dmaengine_terminate_all(struct dma_chan *chan)
+1. int dmaengine_terminate_sync(struct dma_chan *chan)
+   int dmaengine_terminate_async(struct dma_chan *chan)
+   int dmaengine_terminate_all(struct dma_chan *chan) /* DEPRECATED */
 
    This causes all activity for the DMA channel to be stopped, and may
    discard data in the DMA FIFO which hasn't been fully transferred.
    No callback functions will be called for any incomplete transfers.
 
+   Two variants of this function are available.
+
+   dmaengine_terminate_async() might not wait until the DMA has been fully
+   stopped or until any running complete callbacks have finished. But it is
+   possible to call dmaengine_terminate_async() from atomic context or from
+   within a complete callback. dmaengine_synchronize() must be called before it
+   is safe to free the memory accessed by the DMA transfer or free resources
+   accessed from within the complete callback.
+
+   dmaengine_terminate_sync() will wait for the transfer and any running
+   complete callbacks to finish before it returns. But the function must not be
+   called from atomic context or from within a complete callback.
+
+   dmaengine_terminate_all() is deprecated and should not be used in new code.
+
 2. int dmaengine_pause(struct dma_chan *chan)
 
    This pauses activity on the DMA channel without data loss.
@@ -197,3 +203,20 @@
 	a running DMA channel.  It is recommended that DMA engine users
 	pause or stop (via dmaengine_terminate_all()) the channel before
 	using this API.
+
+5. void dmaengine_synchronize(struct dma_chan *chan)
+
+  Synchronize the termination of the DMA channel to the current context.
+
+  This function should be used after dmaengine_terminate_async() to synchronize
+  the termination of the DMA channel to the current context. The function will
+  wait for the transfer and any running complete callbacks to finish before it
+  returns.
+
+  If dmaengine_terminate_async() is used to stop the DMA channel this function
+  must be called before it is safe to free memory accessed by previously
+  submitted descriptors or to free any resources accessed within the complete
+  callback of previously submitted descriptors.
+
+  The behavior of this function is undefined if dma_async_issue_pending() has
+  been called between dmaengine_terminate_async() and this function.
diff --git a/Documentation/dmaengine/provider.txt b/Documentation/dmaengine/provider.txt
index 67d4ce4..122b7f4 100644
--- a/Documentation/dmaengine/provider.txt
+++ b/Documentation/dmaengine/provider.txt
@@ -327,8 +327,24 @@
 
    * device_terminate_all
      - Aborts all the pending and ongoing transfers on the channel
-     - This command should operate synchronously on the channel,
-       terminating right away all the channels
+     - For aborted transfers the complete callback should not be called
+     - Can be called from atomic context or from within a complete
+       callback of a descriptor. Must not sleep. Drivers must be able
+       to handle this correctly.
+     - Termination may be asynchronous. The driver does not have to
+       wait until the currently active transfer has completely stopped.
+       See device_synchronize.
+
+   * device_synchronize
+     - Must synchronize the termination of a channel to the current
+       context.
+     - Must make sure that memory for previously submitted
+       descriptors is no longer accessed by the DMA controller.
+     - Must make sure that all complete callbacks for previously
+       submitted descriptors have finished running and none are
+       scheduled to run.
+     - May sleep.
+
 
 Misc notes (stuff that should be documented, but don't really know
 where to put them)
diff --git a/drivers/dca/dca-core.c b/drivers/dca/dca-core.c
index 819dfda..7afbb28 100644
--- a/drivers/dca/dca-core.c
+++ b/drivers/dca/dca-core.c
@@ -321,7 +321,8 @@
  * @ops - pointer to struct of dca operation function pointers
  * @priv_size - size of extra mem to be added for provider's needs
  */
-struct dca_provider *alloc_dca_provider(struct dca_ops *ops, int priv_size)
+struct dca_provider *alloc_dca_provider(const struct dca_ops *ops,
+					int priv_size)
 {
 	struct dca_provider *dca;
 	int alloc_size;
diff --git a/drivers/dma/acpi-dma.c b/drivers/dma/acpi-dma.c
index 16d0daa..eed6bda 100644
--- a/drivers/dma/acpi-dma.c
+++ b/drivers/dma/acpi-dma.c
@@ -15,6 +15,7 @@
 #include <linux/device.h>
 #include <linux/err.h>
 #include <linux/module.h>
+#include <linux/kernel.h>
 #include <linux/list.h>
 #include <linux/mutex.h>
 #include <linux/slab.h>
@@ -72,7 +73,9 @@
 	si = (const struct acpi_csrt_shared_info *)&grp[1];
 
 	/* Match device by MMIO and IRQ */
-	if (si->mmio_base_low != mem || si->gsi_interrupt != irq)
+	if (si->mmio_base_low != lower_32_bits(mem) ||
+	    si->mmio_base_high != upper_32_bits(mem) ||
+	    si->gsi_interrupt != irq)
 		return 0;
 
 	dev_dbg(&adev->dev, "matches with %.4s%04X (rev %u)\n",
diff --git a/drivers/dma/at_xdmac.c b/drivers/dma/at_xdmac.c
index b5e132d..d0ae461 100644
--- a/drivers/dma/at_xdmac.c
+++ b/drivers/dma/at_xdmac.c
@@ -863,8 +863,12 @@
 	 * access. Hopefully we can access DDR through both ports (at least on
 	 * SAMA5D4x), so we can use the same interface for source and dest,
 	 * that solves the fact we don't know the direction.
+	 * ERRATA: Even if useless for memory transfers, the PERID has to not
+	 * match the one of another channel. If not, it could lead to spurious
+	 * flag status.
 	 */
-	u32			chan_cc = AT_XDMAC_CC_DIF(0)
+	u32			chan_cc = AT_XDMAC_CC_PERID(0x3f)
+					| AT_XDMAC_CC_DIF(0)
 					| AT_XDMAC_CC_SIF(0)
 					| AT_XDMAC_CC_MBSIZE_SIXTEEN
 					| AT_XDMAC_CC_TYPE_MEM_TRAN;
@@ -1039,8 +1043,12 @@
 	 * access DDR through both ports (at least on SAMA5D4x), so we can use
 	 * the same interface for source and dest, that solves the fact we
 	 * don't know the direction.
+	 * ERRATA: Even if useless for memory transfers, the PERID has to not
+	 * match the one of another channel. If not, it could lead to spurious
+	 * flag status.
 	 */
-	u32			chan_cc = AT_XDMAC_CC_DAM_INCREMENTED_AM
+	u32			chan_cc = AT_XDMAC_CC_PERID(0x3f)
+					| AT_XDMAC_CC_DAM_INCREMENTED_AM
 					| AT_XDMAC_CC_SAM_INCREMENTED_AM
 					| AT_XDMAC_CC_DIF(0)
 					| AT_XDMAC_CC_SIF(0)
@@ -1140,8 +1148,12 @@
 	 * access. Hopefully we can access DDR through both ports (at least on
 	 * SAMA5D4x), so we can use the same interface for source and dest,
 	 * that solves the fact we don't know the direction.
+	 * ERRATA: Even if useless for memory transfers, the PERID has to not
+	 * match the one of another channel. If not, it could lead to spurious
+	 * flag status.
 	 */
-	u32			chan_cc = AT_XDMAC_CC_DAM_UBS_AM
+	u32			chan_cc = AT_XDMAC_CC_PERID(0x3f)
+					| AT_XDMAC_CC_DAM_UBS_AM
 					| AT_XDMAC_CC_SAM_INCREMENTED_AM
 					| AT_XDMAC_CC_DIF(0)
 					| AT_XDMAC_CC_SIF(0)
@@ -1995,8 +2007,6 @@
 	dma_async_device_unregister(&atxdmac->dma);
 	clk_disable_unprepare(atxdmac->clk);
 
-	synchronize_irq(atxdmac->irq);
-
 	free_irq(atxdmac->irq, atxdmac->dma.dev);
 
 	for (i = 0; i < atxdmac->dma.chancnt; i++) {
diff --git a/drivers/dma/dma-axi-dmac.c b/drivers/dma/dma-axi-dmac.c
index 5b2395e..c346809 100644
--- a/drivers/dma/dma-axi-dmac.c
+++ b/drivers/dma/dma-axi-dmac.c
@@ -307,6 +307,13 @@
 	return 0;
 }
 
+static void axi_dmac_synchronize(struct dma_chan *c)
+{
+	struct axi_dmac_chan *chan = to_axi_dmac_chan(c);
+
+	vchan_synchronize(&chan->vchan);
+}
+
 static void axi_dmac_issue_pending(struct dma_chan *c)
 {
 	struct axi_dmac_chan *chan = to_axi_dmac_chan(c);
@@ -613,6 +620,7 @@
 	dma_dev->device_prep_dma_cyclic = axi_dmac_prep_dma_cyclic;
 	dma_dev->device_prep_interleaved_dma = axi_dmac_prep_interleaved;
 	dma_dev->device_terminate_all = axi_dmac_terminate_all;
+	dma_dev->device_synchronize = axi_dmac_synchronize;
 	dma_dev->dev = &pdev->dev;
 	dma_dev->chancnt = 1;
 	dma_dev->src_addr_widths = BIT(dmac->chan.src_width);
diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c
index 3ecec14..c50a247 100644
--- a/drivers/dma/dmaengine.c
+++ b/drivers/dma/dmaengine.c
@@ -43,6 +43,7 @@
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
+#include <linux/platform_device.h>
 #include <linux/dma-mapping.h>
 #include <linux/init.h>
 #include <linux/module.h>
@@ -265,8 +266,11 @@
 	module_put(dma_chan_to_owner(chan));
 
 	/* This channel is not in use anymore, free it */
-	if (!chan->client_count && chan->device->device_free_chan_resources)
+	if (!chan->client_count && chan->device->device_free_chan_resources) {
+		/* Make sure all operations have completed */
+		dmaengine_synchronize(chan);
 		chan->device->device_free_chan_resources(chan);
+	}
 
 	/* If the channel is used via a DMA request router, free the mapping */
 	if (chan->router && chan->router->route_free) {
@@ -493,6 +497,7 @@
 	caps->dst_addr_widths = device->dst_addr_widths;
 	caps->directions = device->directions;
 	caps->residue_granularity = device->residue_granularity;
+	caps->descriptor_reuse = device->descriptor_reuse;
 
 	/*
 	 * Some devices implement only pause (e.g. to get residuum) but no
@@ -511,7 +516,7 @@
 {
 	struct dma_chan *chan;
 
-	if (!__dma_device_satisfies_mask(dev, mask)) {
+	if (mask && !__dma_device_satisfies_mask(dev, mask)) {
 		pr_debug("%s: wrong capabilities\n", __func__);
 		return NULL;
 	}
@@ -542,6 +547,42 @@
 	return NULL;
 }
 
+static struct dma_chan *find_candidate(struct dma_device *device,
+				       const dma_cap_mask_t *mask,
+				       dma_filter_fn fn, void *fn_param)
+{
+	struct dma_chan *chan = private_candidate(mask, device, fn, fn_param);
+	int err;
+
+	if (chan) {
+		/* Found a suitable channel, try to grab, prep, and return it.
+		 * We first set DMA_PRIVATE to disable balance_ref_count as this
+		 * channel will not be published in the general-purpose
+		 * allocator
+		 */
+		dma_cap_set(DMA_PRIVATE, device->cap_mask);
+		device->privatecnt++;
+		err = dma_chan_get(chan);
+
+		if (err) {
+			if (err == -ENODEV) {
+				pr_debug("%s: %s module removed\n", __func__,
+					 dma_chan_name(chan));
+				list_del_rcu(&device->global_node);
+			} else
+				pr_debug("%s: failed to get %s: (%d)\n",
+					 __func__, dma_chan_name(chan), err);
+
+			if (--device->privatecnt == 0)
+				dma_cap_clear(DMA_PRIVATE, device->cap_mask);
+
+			chan = ERR_PTR(err);
+		}
+	}
+
+	return chan ? chan : ERR_PTR(-EPROBE_DEFER);
+}
+
 /**
  * dma_get_slave_channel - try to get specific channel exclusively
  * @chan: target channel
@@ -580,7 +621,6 @@
 {
 	dma_cap_mask_t mask;
 	struct dma_chan *chan;
-	int err;
 
 	dma_cap_zero(mask);
 	dma_cap_set(DMA_SLAVE, mask);
@@ -588,23 +628,11 @@
 	/* lock against __dma_request_channel */
 	mutex_lock(&dma_list_mutex);
 
-	chan = private_candidate(&mask, device, NULL, NULL);
-	if (chan) {
-		dma_cap_set(DMA_PRIVATE, device->cap_mask);
-		device->privatecnt++;
-		err = dma_chan_get(chan);
-		if (err) {
-			pr_debug("%s: failed to get %s: (%d)\n",
-				__func__, dma_chan_name(chan), err);
-			chan = NULL;
-			if (--device->privatecnt == 0)
-				dma_cap_clear(DMA_PRIVATE, device->cap_mask);
-		}
-	}
+	chan = find_candidate(device, &mask, NULL, NULL);
 
 	mutex_unlock(&dma_list_mutex);
 
-	return chan;
+	return IS_ERR(chan) ? NULL : chan;
 }
 EXPORT_SYMBOL_GPL(dma_get_any_slave_channel);
 
@@ -621,35 +649,15 @@
 {
 	struct dma_device *device, *_d;
 	struct dma_chan *chan = NULL;
-	int err;
 
 	/* Find a channel */
 	mutex_lock(&dma_list_mutex);
 	list_for_each_entry_safe(device, _d, &dma_device_list, global_node) {
-		chan = private_candidate(mask, device, fn, fn_param);
-		if (chan) {
-			/* Found a suitable channel, try to grab, prep, and
-			 * return it.  We first set DMA_PRIVATE to disable
-			 * balance_ref_count as this channel will not be
-			 * published in the general-purpose allocator
-			 */
-			dma_cap_set(DMA_PRIVATE, device->cap_mask);
-			device->privatecnt++;
-			err = dma_chan_get(chan);
+		chan = find_candidate(device, mask, fn, fn_param);
+		if (!IS_ERR(chan))
+			break;
 
-			if (err == -ENODEV) {
-				pr_debug("%s: %s module removed\n",
-					 __func__, dma_chan_name(chan));
-				list_del_rcu(&device->global_node);
-			} else if (err)
-				pr_debug("%s: failed to get %s: (%d)\n",
-					 __func__, dma_chan_name(chan), err);
-			else
-				break;
-			if (--device->privatecnt == 0)
-				dma_cap_clear(DMA_PRIVATE, device->cap_mask);
-			chan = NULL;
-		}
+		chan = NULL;
 	}
 	mutex_unlock(&dma_list_mutex);
 
@@ -662,27 +670,73 @@
 }
 EXPORT_SYMBOL_GPL(__dma_request_channel);
 
+static const struct dma_slave_map *dma_filter_match(struct dma_device *device,
+						    const char *name,
+						    struct device *dev)
+{
+	int i;
+
+	if (!device->filter.mapcnt)
+		return NULL;
+
+	for (i = 0; i < device->filter.mapcnt; i++) {
+		const struct dma_slave_map *map = &device->filter.map[i];
+
+		if (!strcmp(map->devname, dev_name(dev)) &&
+		    !strcmp(map->slave, name))
+			return map;
+	}
+
+	return NULL;
+}
+
 /**
- * dma_request_slave_channel_reason - try to allocate an exclusive slave channel
+ * dma_request_chan - try to allocate an exclusive slave channel
  * @dev:	pointer to client device structure
  * @name:	slave channel name
  *
  * Returns pointer to appropriate DMA channel on success or an error pointer.
  */
-struct dma_chan *dma_request_slave_channel_reason(struct device *dev,
-						  const char *name)
+struct dma_chan *dma_request_chan(struct device *dev, const char *name)
 {
+	struct dma_device *d, *_d;
+	struct dma_chan *chan = NULL;
+
 	/* If device-tree is present get slave info from here */
 	if (dev->of_node)
-		return of_dma_request_slave_channel(dev->of_node, name);
+		chan = of_dma_request_slave_channel(dev->of_node, name);
 
 	/* If device was enumerated by ACPI get slave info from here */
-	if (ACPI_HANDLE(dev))
-		return acpi_dma_request_slave_chan_by_name(dev, name);
+	if (has_acpi_companion(dev) && !chan)
+		chan = acpi_dma_request_slave_chan_by_name(dev, name);
 
-	return ERR_PTR(-ENODEV);
+	if (chan) {
+		/* Valid channel found or requester need to be deferred */
+		if (!IS_ERR(chan) || PTR_ERR(chan) == -EPROBE_DEFER)
+			return chan;
+	}
+
+	/* Try to find the channel via the DMA filter map(s) */
+	mutex_lock(&dma_list_mutex);
+	list_for_each_entry_safe(d, _d, &dma_device_list, global_node) {
+		dma_cap_mask_t mask;
+		const struct dma_slave_map *map = dma_filter_match(d, name, dev);
+
+		if (!map)
+			continue;
+
+		dma_cap_zero(mask);
+		dma_cap_set(DMA_SLAVE, mask);
+
+		chan = find_candidate(d, &mask, d->filter.fn, map->param);
+		if (!IS_ERR(chan))
+			break;
+	}
+	mutex_unlock(&dma_list_mutex);
+
+	return chan ? chan : ERR_PTR(-EPROBE_DEFER);
 }
-EXPORT_SYMBOL_GPL(dma_request_slave_channel_reason);
+EXPORT_SYMBOL_GPL(dma_request_chan);
 
 /**
  * dma_request_slave_channel - try to allocate an exclusive slave channel
@@ -694,17 +748,35 @@
 struct dma_chan *dma_request_slave_channel(struct device *dev,
 					   const char *name)
 {
-	struct dma_chan *ch = dma_request_slave_channel_reason(dev, name);
+	struct dma_chan *ch = dma_request_chan(dev, name);
 	if (IS_ERR(ch))
 		return NULL;
 
-	dma_cap_set(DMA_PRIVATE, ch->device->cap_mask);
-	ch->device->privatecnt++;
-
 	return ch;
 }
 EXPORT_SYMBOL_GPL(dma_request_slave_channel);
 
+/**
+ * dma_request_chan_by_mask - allocate a channel satisfying certain capabilities
+ * @mask: capabilities that the channel must satisfy
+ *
+ * Returns pointer to appropriate DMA channel on success or an error pointer.
+ */
+struct dma_chan *dma_request_chan_by_mask(const dma_cap_mask_t *mask)
+{
+	struct dma_chan *chan;
+
+	if (!mask)
+		return ERR_PTR(-ENODEV);
+
+	chan = __dma_request_channel(mask, NULL, NULL);
+	if (!chan)
+		chan = ERR_PTR(-ENODEV);
+
+	return chan;
+}
+EXPORT_SYMBOL_GPL(dma_request_chan_by_mask);
+
 void dma_release_channel(struct dma_chan *chan)
 {
 	mutex_lock(&dma_list_mutex);
diff --git a/drivers/dma/dw/platform.c b/drivers/dma/dw/platform.c
index 68a4815..5a417bb 100644
--- a/drivers/dma/dw/platform.c
+++ b/drivers/dma/dw/platform.c
@@ -103,18 +103,21 @@
 	struct device_node *np = pdev->dev.of_node;
 	struct dw_dma_platform_data *pdata;
 	u32 tmp, arr[DW_DMA_MAX_NR_MASTERS];
+	u32 nr_channels;
 
 	if (!np) {
 		dev_err(&pdev->dev, "Missing DT data\n");
 		return NULL;
 	}
 
+	if (of_property_read_u32(np, "dma-channels", &nr_channels))
+		return NULL;
+
 	pdata = devm_kzalloc(&pdev->dev, sizeof(*pdata), GFP_KERNEL);
 	if (!pdata)
 		return NULL;
 
-	if (of_property_read_u32(np, "dma-channels", &pdata->nr_channels))
-		return NULL;
+	pdata->nr_channels = nr_channels;
 
 	if (of_property_read_bool(np, "is_private"))
 		pdata->is_private = true;
diff --git a/drivers/dma/edma.c b/drivers/dma/edma.c
index 6b03e4e..6b3e9d9 100644
--- a/drivers/dma/edma.c
+++ b/drivers/dma/edma.c
@@ -2297,6 +2297,10 @@
 		edma_set_chmap(&ecc->slave_chans[i], ecc->dummy_slot);
 	}
 
+	ecc->dma_slave.filter.map = info->slave_map;
+	ecc->dma_slave.filter.mapcnt = info->slavecnt;
+	ecc->dma_slave.filter.fn = edma_filter_fn;
+
 	ret = dma_async_device_register(&ecc->dma_slave);
 	if (ret) {
 		dev_err(dev, "slave ddev registration failed (%d)\n", ret);
@@ -2404,7 +2408,13 @@
 	},
 };
 
+static int edma_tptc_probe(struct platform_device *pdev)
+{
+	return 0;
+}
+
 static struct platform_driver edma_tptc_driver = {
+	.probe		= edma_tptc_probe,
 	.driver = {
 		.name	= "edma3-tptc",
 		.of_match_table = edma_tptc_of_ids,
diff --git a/drivers/dma/fsl-edma.c b/drivers/dma/fsl-edma.c
index 915eec3..be2e62b 100644
--- a/drivers/dma/fsl-edma.c
+++ b/drivers/dma/fsl-edma.c
@@ -116,6 +116,10 @@
 				BIT(DMA_SLAVE_BUSWIDTH_2_BYTES) | \
 				BIT(DMA_SLAVE_BUSWIDTH_4_BYTES) | \
 				BIT(DMA_SLAVE_BUSWIDTH_8_BYTES)
+enum fsl_edma_pm_state {
+	RUNNING = 0,
+	SUSPENDED,
+};
 
 struct fsl_edma_hw_tcd {
 	__le32	saddr;
@@ -147,6 +151,9 @@
 struct fsl_edma_chan {
 	struct virt_dma_chan		vchan;
 	enum dma_status			status;
+	enum fsl_edma_pm_state		pm_state;
+	bool				idle;
+	u32				slave_id;
 	struct fsl_edma_engine		*edma;
 	struct fsl_edma_desc		*edesc;
 	struct fsl_edma_slave_config	fsc;
@@ -298,6 +305,7 @@
 	spin_lock_irqsave(&fsl_chan->vchan.lock, flags);
 	fsl_edma_disable_request(fsl_chan);
 	fsl_chan->edesc = NULL;
+	fsl_chan->idle = true;
 	vchan_get_all_descriptors(&fsl_chan->vchan, &head);
 	spin_unlock_irqrestore(&fsl_chan->vchan.lock, flags);
 	vchan_dma_desc_free_list(&fsl_chan->vchan, &head);
@@ -313,6 +321,7 @@
 	if (fsl_chan->edesc) {
 		fsl_edma_disable_request(fsl_chan);
 		fsl_chan->status = DMA_PAUSED;
+		fsl_chan->idle = true;
 	}
 	spin_unlock_irqrestore(&fsl_chan->vchan.lock, flags);
 	return 0;
@@ -327,6 +336,7 @@
 	if (fsl_chan->edesc) {
 		fsl_edma_enable_request(fsl_chan);
 		fsl_chan->status = DMA_IN_PROGRESS;
+		fsl_chan->idle = false;
 	}
 	spin_unlock_irqrestore(&fsl_chan->vchan.lock, flags);
 	return 0;
@@ -648,6 +658,7 @@
 	fsl_edma_set_tcd_regs(fsl_chan, fsl_chan->edesc->tcd[0].vtcd);
 	fsl_edma_enable_request(fsl_chan);
 	fsl_chan->status = DMA_IN_PROGRESS;
+	fsl_chan->idle = false;
 }
 
 static irqreturn_t fsl_edma_tx_handler(int irq, void *dev_id)
@@ -676,6 +687,7 @@
 				vchan_cookie_complete(&fsl_chan->edesc->vdesc);
 				fsl_chan->edesc = NULL;
 				fsl_chan->status = DMA_COMPLETE;
+				fsl_chan->idle = true;
 			} else {
 				vchan_cyclic_callback(&fsl_chan->edesc->vdesc);
 			}
@@ -704,6 +716,7 @@
 			edma_writeb(fsl_edma, EDMA_CERR_CERR(ch),
 				fsl_edma->membase + EDMA_CERR);
 			fsl_edma->chans[ch].status = DMA_ERROR;
+			fsl_edma->chans[ch].idle = true;
 		}
 	}
 	return IRQ_HANDLED;
@@ -724,6 +737,12 @@
 
 	spin_lock_irqsave(&fsl_chan->vchan.lock, flags);
 
+	if (unlikely(fsl_chan->pm_state != RUNNING)) {
+		spin_unlock_irqrestore(&fsl_chan->vchan.lock, flags);
+		/* cannot submit due to suspend */
+		return;
+	}
+
 	if (vchan_issue_pending(&fsl_chan->vchan) && !fsl_chan->edesc)
 		fsl_edma_xfer_desc(fsl_chan);
 
@@ -735,6 +754,7 @@
 {
 	struct fsl_edma_engine *fsl_edma = ofdma->of_dma_data;
 	struct dma_chan *chan, *_chan;
+	struct fsl_edma_chan *fsl_chan;
 	unsigned long chans_per_mux = fsl_edma->n_chans / DMAMUX_NR;
 
 	if (dma_spec->args_count != 2)
@@ -748,8 +768,10 @@
 			chan = dma_get_slave_channel(chan);
 			if (chan) {
 				chan->device->privatecnt++;
-				fsl_edma_chan_mux(to_fsl_edma_chan(chan),
-					dma_spec->args[1], true);
+				fsl_chan = to_fsl_edma_chan(chan);
+				fsl_chan->slave_id = dma_spec->args[1];
+				fsl_edma_chan_mux(fsl_chan, fsl_chan->slave_id,
+						true);
 				mutex_unlock(&fsl_edma->fsl_edma_mutex);
 				return chan;
 			}
@@ -888,7 +910,9 @@
 		struct fsl_edma_chan *fsl_chan = &fsl_edma->chans[i];
 
 		fsl_chan->edma = fsl_edma;
-
+		fsl_chan->pm_state = RUNNING;
+		fsl_chan->slave_id = 0;
+		fsl_chan->idle = true;
 		fsl_chan->vchan.desc_free = fsl_edma_free_desc;
 		vchan_init(&fsl_chan->vchan, &fsl_edma->dma_dev);
 
@@ -959,6 +983,60 @@
 	return 0;
 }
 
+static int fsl_edma_suspend_late(struct device *dev)
+{
+	struct fsl_edma_engine *fsl_edma = dev_get_drvdata(dev);
+	struct fsl_edma_chan *fsl_chan;
+	unsigned long flags;
+	int i;
+
+	for (i = 0; i < fsl_edma->n_chans; i++) {
+		fsl_chan = &fsl_edma->chans[i];
+		spin_lock_irqsave(&fsl_chan->vchan.lock, flags);
+		/* Make sure chan is idle or will force disable. */
+		if (unlikely(!fsl_chan->idle)) {
+			dev_warn(dev, "WARN: There is non-idle channel.");
+			fsl_edma_disable_request(fsl_chan);
+			fsl_edma_chan_mux(fsl_chan, 0, false);
+		}
+
+		fsl_chan->pm_state = SUSPENDED;
+		spin_unlock_irqrestore(&fsl_chan->vchan.lock, flags);
+	}
+
+	return 0;
+}
+
+static int fsl_edma_resume_early(struct device *dev)
+{
+	struct fsl_edma_engine *fsl_edma = dev_get_drvdata(dev);
+	struct fsl_edma_chan *fsl_chan;
+	int i;
+
+	for (i = 0; i < fsl_edma->n_chans; i++) {
+		fsl_chan = &fsl_edma->chans[i];
+		fsl_chan->pm_state = RUNNING;
+		edma_writew(fsl_edma, 0x0, fsl_edma->membase + EDMA_TCD_CSR(i));
+		if (fsl_chan->slave_id != 0)
+			fsl_edma_chan_mux(fsl_chan, fsl_chan->slave_id, true);
+	}
+
+	edma_writel(fsl_edma, EDMA_CR_ERGA | EDMA_CR_ERCA,
+			fsl_edma->membase + EDMA_CR);
+
+	return 0;
+}
+
+/*
+ * eDMA provides the service to others, so it should be suspend late
+ * and resume early. When eDMA suspend, all of the clients should stop
+ * the DMA data transmission and let the channel idle.
+ */
+static const struct dev_pm_ops fsl_edma_pm_ops = {
+	.suspend_late   = fsl_edma_suspend_late,
+	.resume_early   = fsl_edma_resume_early,
+};
+
 static const struct of_device_id fsl_edma_dt_ids[] = {
 	{ .compatible = "fsl,vf610-edma", },
 	{ /* sentinel */ }
@@ -969,6 +1047,7 @@
 	.driver		= {
 		.name	= "fsl-edma",
 		.of_match_table = fsl_edma_dt_ids,
+		.pm     = &fsl_edma_pm_ops,
 	},
 	.probe          = fsl_edma_probe,
 	.remove		= fsl_edma_remove,
diff --git a/drivers/dma/hsu/hsu.c b/drivers/dma/hsu/hsu.c
index 823ad72..eef145e 100644
--- a/drivers/dma/hsu/hsu.c
+++ b/drivers/dma/hsu/hsu.c
@@ -228,6 +228,8 @@
 	for_each_sg(sgl, sg, sg_len, i) {
 		desc->sg[i].addr = sg_dma_address(sg);
 		desc->sg[i].len = sg_dma_len(sg);
+
+		desc->length += sg_dma_len(sg);
 	}
 
 	desc->nents = sg_len;
@@ -249,21 +251,10 @@
 	spin_unlock_irqrestore(&hsuc->vchan.lock, flags);
 }
 
-static size_t hsu_dma_desc_size(struct hsu_dma_desc *desc)
-{
-	size_t bytes = 0;
-	unsigned int i;
-
-	for (i = desc->active; i < desc->nents; i++)
-		bytes += desc->sg[i].len;
-
-	return bytes;
-}
-
 static size_t hsu_dma_active_desc_size(struct hsu_dma_chan *hsuc)
 {
 	struct hsu_dma_desc *desc = hsuc->desc;
-	size_t bytes = hsu_dma_desc_size(desc);
+	size_t bytes = desc->length;
 	int i;
 
 	i = desc->active % HSU_DMA_CHAN_NR_DESC;
@@ -294,7 +285,7 @@
 		dma_set_residue(state, bytes);
 		status = hsuc->desc->status;
 	} else if (vdesc) {
-		bytes = hsu_dma_desc_size(to_hsu_dma_desc(vdesc));
+		bytes = to_hsu_dma_desc(vdesc)->length;
 		dma_set_residue(state, bytes);
 	}
 	spin_unlock_irqrestore(&hsuc->vchan.lock, flags);
diff --git a/drivers/dma/hsu/hsu.h b/drivers/dma/hsu/hsu.h
index f06579c..578a8ee 100644
--- a/drivers/dma/hsu/hsu.h
+++ b/drivers/dma/hsu/hsu.h
@@ -65,6 +65,7 @@
 	enum dma_transfer_direction direction;
 	struct hsu_dma_sg *sg;
 	unsigned int nents;
+	size_t length;
 	unsigned int active;
 	enum dma_status status;
 };
diff --git a/drivers/dma/idma64.c b/drivers/dma/idma64.c
index 7d56b47..3cb7b2c 100644
--- a/drivers/dma/idma64.c
+++ b/drivers/dma/idma64.c
@@ -178,20 +178,12 @@
 	if (!status)
 		return IRQ_NONE;
 
-	/* Disable interrupts */
-	channel_clear_bit(idma64, MASK(XFER), idma64->all_chan_mask);
-	channel_clear_bit(idma64, MASK(ERROR), idma64->all_chan_mask);
-
 	status_xfer = dma_readl(idma64, RAW(XFER));
 	status_err = dma_readl(idma64, RAW(ERROR));
 
 	for (i = 0; i < idma64->dma.chancnt; i++)
 		idma64_chan_irq(idma64, i, status_err, status_xfer);
 
-	/* Re-enable interrupts */
-	channel_set_bit(idma64, MASK(XFER), idma64->all_chan_mask);
-	channel_set_bit(idma64, MASK(ERROR), idma64->all_chan_mask);
-
 	return IRQ_HANDLED;
 }
 
@@ -239,7 +231,7 @@
 	idma64_desc_free(idma64c, to_idma64_desc(vdesc));
 }
 
-static u64 idma64_hw_desc_fill(struct idma64_hw_desc *hw,
+static void idma64_hw_desc_fill(struct idma64_hw_desc *hw,
 		struct dma_slave_config *config,
 		enum dma_transfer_direction direction, u64 llp)
 {
@@ -276,26 +268,26 @@
 		     IDMA64C_CTLL_SRC_WIDTH(src_width);
 
 	lli->llp = llp;
-	return hw->llp;
 }
 
 static void idma64_desc_fill(struct idma64_chan *idma64c,
 		struct idma64_desc *desc)
 {
 	struct dma_slave_config *config = &idma64c->config;
-	struct idma64_hw_desc *hw = &desc->hw[desc->ndesc - 1];
+	unsigned int i = desc->ndesc;
+	struct idma64_hw_desc *hw = &desc->hw[i - 1];
 	struct idma64_lli *lli = hw->lli;
 	u64 llp = 0;
-	unsigned int i = desc->ndesc;
 
 	/* Fill the hardware descriptors and link them to a list */
 	do {
 		hw = &desc->hw[--i];
-		llp = idma64_hw_desc_fill(hw, config, desc->direction, llp);
+		idma64_hw_desc_fill(hw, config, desc->direction, llp);
+		llp = hw->llp;
 		desc->length += hw->len;
 	} while (i);
 
-	/* Trigger interrupt after last block */
+	/* Trigger an interrupt after the last block is transfered */
 	lli->ctllo |= IDMA64C_CTLL_INT_EN;
 }
 
@@ -596,6 +588,8 @@
 
 	idma64->dma.dev = chip->dev;
 
+	dma_set_max_seg_size(idma64->dma.dev, IDMA64C_CTLH_BLOCK_TS_MASK);
+
 	ret = dma_async_device_register(&idma64->dma);
 	if (ret)
 		return ret;
diff --git a/drivers/dma/idma64.h b/drivers/dma/idma64.h
index f6aeff0..8423f13 100644
--- a/drivers/dma/idma64.h
+++ b/drivers/dma/idma64.h
@@ -54,7 +54,8 @@
 #define IDMA64C_CTLL_LLP_S_EN		(1 << 28)	/* src block chain */
 
 /* Bitfields in CTL_HI */
-#define IDMA64C_CTLH_BLOCK_TS(x)	((x) & ((1 << 17) - 1))
+#define IDMA64C_CTLH_BLOCK_TS_MASK	((1 << 17) - 1)
+#define IDMA64C_CTLH_BLOCK_TS(x)	((x) & IDMA64C_CTLH_BLOCK_TS_MASK)
 #define IDMA64C_CTLH_DONE		(1 << 17)
 
 /* Bitfields in CFG_LO */
diff --git a/drivers/dma/img-mdc-dma.c b/drivers/dma/img-mdc-dma.c
index 9ca5683..a4c53be 100644
--- a/drivers/dma/img-mdc-dma.c
+++ b/drivers/dma/img-mdc-dma.c
@@ -651,6 +651,48 @@
 	return ret;
 }
 
+static unsigned int mdc_get_new_events(struct mdc_chan *mchan)
+{
+	u32 val, processed, done1, done2;
+	unsigned int ret;
+
+	val = mdc_chan_readl(mchan, MDC_CMDS_PROCESSED);
+	processed = (val >> MDC_CMDS_PROCESSED_CMDS_PROCESSED_SHIFT) &
+				MDC_CMDS_PROCESSED_CMDS_PROCESSED_MASK;
+	/*
+	 * CMDS_DONE may have incremented between reading CMDS_PROCESSED
+	 * and clearing INT_ACTIVE.  Re-read CMDS_PROCESSED to ensure we
+	 * didn't miss a command completion.
+	 */
+	do {
+		val = mdc_chan_readl(mchan, MDC_CMDS_PROCESSED);
+
+		done1 = (val >> MDC_CMDS_PROCESSED_CMDS_DONE_SHIFT) &
+			MDC_CMDS_PROCESSED_CMDS_DONE_MASK;
+
+		val &= ~((MDC_CMDS_PROCESSED_CMDS_PROCESSED_MASK <<
+			  MDC_CMDS_PROCESSED_CMDS_PROCESSED_SHIFT) |
+			 MDC_CMDS_PROCESSED_INT_ACTIVE);
+
+		val |= done1 << MDC_CMDS_PROCESSED_CMDS_PROCESSED_SHIFT;
+
+		mdc_chan_writel(mchan, val, MDC_CMDS_PROCESSED);
+
+		val = mdc_chan_readl(mchan, MDC_CMDS_PROCESSED);
+
+		done2 = (val >> MDC_CMDS_PROCESSED_CMDS_DONE_SHIFT) &
+			MDC_CMDS_PROCESSED_CMDS_DONE_MASK;
+	} while (done1 != done2);
+
+	if (done1 >= processed)
+		ret = done1 - processed;
+	else
+		ret = ((MDC_CMDS_PROCESSED_CMDS_PROCESSED_MASK + 1) -
+			processed) + done1;
+
+	return ret;
+}
+
 static int mdc_terminate_all(struct dma_chan *chan)
 {
 	struct mdc_chan *mchan = to_mdc_chan(chan);
@@ -667,6 +709,8 @@
 	mchan->desc = NULL;
 	vchan_get_all_descriptors(&mchan->vc, &head);
 
+	mdc_get_new_events(mchan);
+
 	spin_unlock_irqrestore(&mchan->vc.lock, flags);
 
 	if (mdesc)
@@ -703,35 +747,17 @@
 {
 	struct mdc_chan *mchan = (struct mdc_chan *)dev_id;
 	struct mdc_tx_desc *mdesc;
-	u32 val, processed, done1, done2;
-	unsigned int i;
+	unsigned int i, new_events;
 
 	spin_lock(&mchan->vc.lock);
 
-	val = mdc_chan_readl(mchan, MDC_CMDS_PROCESSED);
-	processed = (val >> MDC_CMDS_PROCESSED_CMDS_PROCESSED_SHIFT) &
-		MDC_CMDS_PROCESSED_CMDS_PROCESSED_MASK;
-	/*
-	 * CMDS_DONE may have incremented between reading CMDS_PROCESSED
-	 * and clearing INT_ACTIVE.  Re-read CMDS_PROCESSED to ensure we
-	 * didn't miss a command completion.
-	 */
-	do {
-		val = mdc_chan_readl(mchan, MDC_CMDS_PROCESSED);
-		done1 = (val >> MDC_CMDS_PROCESSED_CMDS_DONE_SHIFT) &
-			MDC_CMDS_PROCESSED_CMDS_DONE_MASK;
-		val &= ~((MDC_CMDS_PROCESSED_CMDS_PROCESSED_MASK <<
-			  MDC_CMDS_PROCESSED_CMDS_PROCESSED_SHIFT) |
-			 MDC_CMDS_PROCESSED_INT_ACTIVE);
-		val |= done1 << MDC_CMDS_PROCESSED_CMDS_PROCESSED_SHIFT;
-		mdc_chan_writel(mchan, val, MDC_CMDS_PROCESSED);
-		val = mdc_chan_readl(mchan, MDC_CMDS_PROCESSED);
-		done2 = (val >> MDC_CMDS_PROCESSED_CMDS_DONE_SHIFT) &
-			MDC_CMDS_PROCESSED_CMDS_DONE_MASK;
-	} while (done1 != done2);
-
 	dev_dbg(mdma2dev(mchan->mdma), "IRQ on channel %d\n", mchan->chan_nr);
 
+	new_events = mdc_get_new_events(mchan);
+
+	if (!new_events)
+		goto out;
+
 	mdesc = mchan->desc;
 	if (!mdesc) {
 		dev_warn(mdma2dev(mchan->mdma),
@@ -740,8 +766,7 @@
 		goto out;
 	}
 
-	for (i = processed; i != done1;
-	     i = (i + 1) % (MDC_CMDS_PROCESSED_CMDS_PROCESSED_MASK + 1)) {
+	for (i = 0; i < new_events; i++) {
 		/*
 		 * The first interrupt in a transfer indicates that the
 		 * command list has been loaded, not that a command has
@@ -979,7 +1004,6 @@
 				 vc.chan.device_node) {
 		list_del(&mchan->vc.chan.device_node);
 
-		synchronize_irq(mchan->irq);
 		devm_free_irq(&pdev->dev, mchan->irq, mchan);
 
 		tasklet_kill(&mchan->vc.task);
diff --git a/drivers/dma/ioat/dca.c b/drivers/dma/ioat/dca.c
index 2cb7c30..0b9b6b0 100644
--- a/drivers/dma/ioat/dca.c
+++ b/drivers/dma/ioat/dca.c
@@ -224,7 +224,7 @@
 	return tag;
 }
 
-static struct dca_ops ioat_dca_ops = {
+static const struct dca_ops ioat_dca_ops = {
 	.add_requester		= ioat_dca_add_requester,
 	.remove_requester	= ioat_dca_remove_requester,
 	.get_tag		= ioat_dca_get_tag,
diff --git a/drivers/dma/ioat/dma.h b/drivers/dma/ioat/dma.h
index 8f4e607..b8f4807 100644
--- a/drivers/dma/ioat/dma.h
+++ b/drivers/dma/ioat/dma.h
@@ -235,43 +235,11 @@
 	return ioat_dma->idx[index];
 }
 
-static inline u64 ioat_chansts_32(struct ioatdma_chan *ioat_chan)
-{
-	u8 ver = ioat_chan->ioat_dma->version;
-	u64 status;
-	u32 status_lo;
-
-	/* We need to read the low address first as this causes the
-	 * chipset to latch the upper bits for the subsequent read
-	 */
-	status_lo = readl(ioat_chan->reg_base + IOAT_CHANSTS_OFFSET_LOW(ver));
-	status = readl(ioat_chan->reg_base + IOAT_CHANSTS_OFFSET_HIGH(ver));
-	status <<= 32;
-	status |= status_lo;
-
-	return status;
-}
-
-#if BITS_PER_LONG == 64
-
 static inline u64 ioat_chansts(struct ioatdma_chan *ioat_chan)
 {
-	u8 ver = ioat_chan->ioat_dma->version;
-	u64 status;
-
-	 /* With IOAT v3.3 the status register is 64bit.  */
-	if (ver >= IOAT_VER_3_3)
-		status = readq(ioat_chan->reg_base + IOAT_CHANSTS_OFFSET(ver));
-	else
-		status = ioat_chansts_32(ioat_chan);
-
-	return status;
+	return readq(ioat_chan->reg_base + IOAT_CHANSTS_OFFSET);
 }
 
-#else
-#define ioat_chansts ioat_chansts_32
-#endif
-
 static inline u64 ioat_chansts_to_addr(u64 status)
 {
 	return status & IOAT_CHANSTS_COMPLETED_DESCRIPTOR_ADDR;
diff --git a/drivers/dma/ioat/registers.h b/drivers/dma/ioat/registers.h
index 909352f..4994a36 100644
--- a/drivers/dma/ioat/registers.h
+++ b/drivers/dma/ioat/registers.h
@@ -99,19 +99,9 @@
 #define IOAT_DMA_COMP_V1			0x0001	/* Compatibility with DMA version 1 */
 #define IOAT_DMA_COMP_V2			0x0002	/* Compatibility with DMA version 2 */
 
-
-#define IOAT1_CHANSTS_OFFSET		0x04	/* 64-bit Channel Status Register */
-#define IOAT2_CHANSTS_OFFSET		0x08	/* 64-bit Channel Status Register */
-#define IOAT_CHANSTS_OFFSET(ver)		((ver) < IOAT_VER_2_0 \
-						? IOAT1_CHANSTS_OFFSET : IOAT2_CHANSTS_OFFSET)
-#define IOAT1_CHANSTS_OFFSET_LOW	0x04
-#define IOAT2_CHANSTS_OFFSET_LOW	0x08
-#define IOAT_CHANSTS_OFFSET_LOW(ver)		((ver) < IOAT_VER_2_0 \
-						? IOAT1_CHANSTS_OFFSET_LOW : IOAT2_CHANSTS_OFFSET_LOW)
-#define IOAT1_CHANSTS_OFFSET_HIGH	0x08
-#define IOAT2_CHANSTS_OFFSET_HIGH	0x0C
-#define IOAT_CHANSTS_OFFSET_HIGH(ver)		((ver) < IOAT_VER_2_0 \
-						? IOAT1_CHANSTS_OFFSET_HIGH : IOAT2_CHANSTS_OFFSET_HIGH)
+/* IOAT1 define left for i7300_idle driver to not fail compiling */
+#define IOAT1_CHANSTS_OFFSET		0x04
+#define IOAT_CHANSTS_OFFSET		0x08	/* 64-bit Channel Status Register */
 #define IOAT_CHANSTS_COMPLETED_DESCRIPTOR_ADDR	(~0x3fULL)
 #define IOAT_CHANSTS_SOFT_ERR			0x10ULL
 #define IOAT_CHANSTS_UNAFFILIATED_ERR		0x8ULL
diff --git a/drivers/dma/omap-dma.c b/drivers/dma/omap-dma.c
index 1dfc71c..9794b07 100644
--- a/drivers/dma/omap-dma.c
+++ b/drivers/dma/omap-dma.c
@@ -28,8 +28,6 @@
 struct omap_dmadev {
 	struct dma_device ddev;
 	spinlock_t lock;
-	struct tasklet_struct task;
-	struct list_head pending;
 	void __iomem *base;
 	const struct omap_dma_reg *reg_map;
 	struct omap_system_dma_plat_info *plat;
@@ -42,7 +40,6 @@
 
 struct omap_chan {
 	struct virt_dma_chan vc;
-	struct list_head node;
 	void __iomem *channel_base;
 	const struct omap_dma_reg *reg_map;
 	uint32_t ccr;
@@ -454,33 +451,6 @@
 	spin_unlock_irqrestore(&c->vc.lock, flags);
 }
 
-/*
- * This callback schedules all pending channels.  We could be more
- * clever here by postponing allocation of the real DMA channels to
- * this point, and freeing them when our virtual channel becomes idle.
- *
- * We would then need to deal with 'all channels in-use'
- */
-static void omap_dma_sched(unsigned long data)
-{
-	struct omap_dmadev *d = (struct omap_dmadev *)data;
-	LIST_HEAD(head);
-
-	spin_lock_irq(&d->lock);
-	list_splice_tail_init(&d->pending, &head);
-	spin_unlock_irq(&d->lock);
-
-	while (!list_empty(&head)) {
-		struct omap_chan *c = list_first_entry(&head,
-			struct omap_chan, node);
-
-		spin_lock_irq(&c->vc.lock);
-		list_del_init(&c->node);
-		omap_dma_start_desc(c);
-		spin_unlock_irq(&c->vc.lock);
-	}
-}
-
 static irqreturn_t omap_dma_irq(int irq, void *devid)
 {
 	struct omap_dmadev *od = devid;
@@ -703,8 +673,14 @@
 	struct omap_chan *c = to_omap_dma_chan(chan);
 	struct virt_dma_desc *vd;
 	enum dma_status ret;
+	uint32_t ccr;
 	unsigned long flags;
 
+	ccr = omap_dma_chan_read(c, CCR);
+	/* The channel is no longer active, handle the completion right away */
+	if (!(ccr & CCR_ENABLE))
+		omap_dma_callback(c->dma_ch, 0, c);
+
 	ret = dma_cookie_status(chan, cookie, txstate);
 	if (ret == DMA_COMPLETE || !txstate)
 		return ret;
@@ -719,7 +695,7 @@
 
 		if (d->dir == DMA_MEM_TO_DEV)
 			pos = omap_dma_get_src_pos(c);
-		else if (d->dir == DMA_DEV_TO_MEM)
+		else if (d->dir == DMA_DEV_TO_MEM  || d->dir == DMA_MEM_TO_MEM)
 			pos = omap_dma_get_dst_pos(c);
 		else
 			pos = 0;
@@ -739,22 +715,8 @@
 	unsigned long flags;
 
 	spin_lock_irqsave(&c->vc.lock, flags);
-	if (vchan_issue_pending(&c->vc) && !c->desc) {
-		/*
-		 * c->cyclic is used only by audio and in this case the DMA need
-		 * to be started without delay.
-		 */
-		if (!c->cyclic) {
-			struct omap_dmadev *d = to_omap_dma_dev(chan->device);
-			spin_lock(&d->lock);
-			if (list_empty(&c->node))
-				list_add_tail(&c->node, &d->pending);
-			spin_unlock(&d->lock);
-			tasklet_schedule(&d->task);
-		} else {
-			omap_dma_start_desc(c);
-		}
-	}
+	if (vchan_issue_pending(&c->vc) && !c->desc)
+		omap_dma_start_desc(c);
 	spin_unlock_irqrestore(&c->vc.lock, flags);
 }
 
@@ -768,7 +730,7 @@
 	struct scatterlist *sgent;
 	struct omap_desc *d;
 	dma_addr_t dev_addr;
-	unsigned i, j = 0, es, en, frame_bytes;
+	unsigned i, es, en, frame_bytes;
 	u32 burst;
 
 	if (dir == DMA_DEV_TO_MEM) {
@@ -845,13 +807,12 @@
 	en = burst;
 	frame_bytes = es_bytes[es] * en;
 	for_each_sg(sgl, sgent, sglen, i) {
-		d->sg[j].addr = sg_dma_address(sgent);
-		d->sg[j].en = en;
-		d->sg[j].fn = sg_dma_len(sgent) / frame_bytes;
-		j++;
+		d->sg[i].addr = sg_dma_address(sgent);
+		d->sg[i].en = en;
+		d->sg[i].fn = sg_dma_len(sgent) / frame_bytes;
 	}
 
-	d->sglen = j;
+	d->sglen = sglen;
 
 	return vchan_tx_prep(&c->vc, &d->vd, tx_flags);
 }
@@ -1018,17 +979,11 @@
 static int omap_dma_terminate_all(struct dma_chan *chan)
 {
 	struct omap_chan *c = to_omap_dma_chan(chan);
-	struct omap_dmadev *d = to_omap_dma_dev(c->vc.chan.device);
 	unsigned long flags;
 	LIST_HEAD(head);
 
 	spin_lock_irqsave(&c->vc.lock, flags);
 
-	/* Prevent this channel being scheduled */
-	spin_lock(&d->lock);
-	list_del_init(&c->node);
-	spin_unlock(&d->lock);
-
 	/*
 	 * Stop DMA activity: we assume the callback will not be called
 	 * after omap_dma_stop() returns (even if it does, it will see
@@ -1102,14 +1057,12 @@
 	c->reg_map = od->reg_map;
 	c->vc.desc_free = omap_dma_desc_free;
 	vchan_init(&c->vc, &od->ddev);
-	INIT_LIST_HEAD(&c->node);
 
 	return 0;
 }
 
 static void omap_dma_free(struct omap_dmadev *od)
 {
-	tasklet_kill(&od->task);
 	while (!list_empty(&od->ddev.channels)) {
 		struct omap_chan *c = list_first_entry(&od->ddev.channels,
 			struct omap_chan, vc.chan.device_node);
@@ -1165,12 +1118,9 @@
 	od->ddev.residue_granularity = DMA_RESIDUE_GRANULARITY_BURST;
 	od->ddev.dev = &pdev->dev;
 	INIT_LIST_HEAD(&od->ddev.channels);
-	INIT_LIST_HEAD(&od->pending);
 	spin_lock_init(&od->lock);
 	spin_lock_init(&od->irq_lock);
 
-	tasklet_init(&od->task, omap_dma_sched, (unsigned long)od);
-
 	od->dma_requests = OMAP_SDMA_REQUESTS;
 	if (pdev->dev.of_node && of_property_read_u32(pdev->dev.of_node,
 						      "dma-requests",
@@ -1203,6 +1153,10 @@
 			return rc;
 	}
 
+	od->ddev.filter.map = od->plat->slave_map;
+	od->ddev.filter.mapcnt = od->plat->slavecnt;
+	od->ddev.filter.fn = omap_dma_filter_fn;
+
 	rc = dma_async_device_register(&od->ddev);
 	if (rc) {
 		pr_warn("OMAP-DMA: failed to register slave DMA engine device: %d\n",
diff --git a/drivers/dma/pxa_dma.c b/drivers/dma/pxa_dma.c
index fc4156a..f2a0310 100644
--- a/drivers/dma/pxa_dma.c
+++ b/drivers/dma/pxa_dma.c
@@ -1414,6 +1414,7 @@
 	pdev->slave.dst_addr_widths = widths;
 	pdev->slave.directions = BIT(DMA_MEM_TO_DEV) | BIT(DMA_DEV_TO_MEM);
 	pdev->slave.residue_granularity = DMA_RESIDUE_GRANULARITY_DESCRIPTOR;
+	pdev->slave.descriptor_reuse = true;
 
 	pdev->slave.dev = &op->dev;
 	ret = pxad_init_dmadev(op, pdev, dma_channels);
diff --git a/drivers/dma/sh/usb-dmac.c b/drivers/dma/sh/usb-dmac.c
index ebd8a5f..16fb330 100644
--- a/drivers/dma/sh/usb-dmac.c
+++ b/drivers/dma/sh/usb-dmac.c
@@ -448,7 +448,7 @@
 static int usb_dmac_chan_terminate_all(struct dma_chan *chan)
 {
 	struct usb_dmac_chan *uchan = to_usb_dmac_chan(chan);
-	struct usb_dmac_desc *desc;
+	struct usb_dmac_desc *desc, *_desc;
 	unsigned long flags;
 	LIST_HEAD(head);
 	LIST_HEAD(list);
@@ -459,7 +459,7 @@
 	if (uchan->desc)
 		uchan->desc = NULL;
 	list_splice_init(&uchan->desc_got, &list);
-	list_for_each_entry(desc, &list, node)
+	list_for_each_entry_safe(desc, _desc, &list, node)
 		list_move_tail(&desc->node, &uchan->desc_freed);
 	spin_unlock_irqrestore(&uchan->vc.lock, flags);
 	vchan_dma_desc_free_list(&uchan->vc, &head);
diff --git a/drivers/dma/virt-dma.c b/drivers/dma/virt-dma.c
index 6f80432..a35c211 100644
--- a/drivers/dma/virt-dma.c
+++ b/drivers/dma/virt-dma.c
@@ -29,7 +29,7 @@
 	spin_lock_irqsave(&vc->lock, flags);
 	cookie = dma_cookie_assign(tx);
 
-	list_add_tail(&vd->node, &vc->desc_submitted);
+	list_move_tail(&vd->node, &vc->desc_submitted);
 	spin_unlock_irqrestore(&vc->lock, flags);
 
 	dev_dbg(vc->chan.device->dev, "vchan %p: txd %p[%x]: submitted\n",
@@ -39,6 +39,33 @@
 }
 EXPORT_SYMBOL_GPL(vchan_tx_submit);
 
+/**
+ * vchan_tx_desc_free - free a reusable descriptor
+ * @tx: the transfer
+ *
+ * This function frees a previously allocated reusable descriptor. The only
+ * other way is to clear the DMA_CTRL_REUSE flag and submit one last time the
+ * transfer.
+ *
+ * Returns 0 upon success
+ */
+int vchan_tx_desc_free(struct dma_async_tx_descriptor *tx)
+{
+	struct virt_dma_chan *vc = to_virt_chan(tx->chan);
+	struct virt_dma_desc *vd = to_virt_desc(tx);
+	unsigned long flags;
+
+	spin_lock_irqsave(&vc->lock, flags);
+	list_del(&vd->node);
+	spin_unlock_irqrestore(&vc->lock, flags);
+
+	dev_dbg(vc->chan.device->dev, "vchan %p: txd %p[%x]: freeing\n",
+		vc, vd, vd->tx.cookie);
+	vc->desc_free(vd);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(vchan_tx_desc_free);
+
 struct virt_dma_desc *vchan_find_desc(struct virt_dma_chan *vc,
 	dma_cookie_t cookie)
 {
@@ -83,8 +110,10 @@
 		cb_data = vd->tx.callback_param;
 
 		list_del(&vd->node);
-
-		vc->desc_free(vd);
+		if (dmaengine_desc_test_reuse(&vd->tx))
+			list_add(&vd->node, &vc->desc_allocated);
+		else
+			vc->desc_free(vd);
 
 		if (cb)
 			cb(cb_data);
@@ -96,9 +125,13 @@
 	while (!list_empty(head)) {
 		struct virt_dma_desc *vd = list_first_entry(head,
 			struct virt_dma_desc, node);
-		list_del(&vd->node);
-		dev_dbg(vc->chan.device->dev, "txd %p: freeing\n", vd);
-		vc->desc_free(vd);
+		if (dmaengine_desc_test_reuse(&vd->tx)) {
+			list_move_tail(&vd->node, &vc->desc_allocated);
+		} else {
+			dev_dbg(vc->chan.device->dev, "txd %p: freeing\n", vd);
+			list_del(&vd->node);
+			vc->desc_free(vd);
+		}
 	}
 }
 EXPORT_SYMBOL_GPL(vchan_dma_desc_free_list);
@@ -108,6 +141,7 @@
 	dma_cookie_init(&vc->chan);
 
 	spin_lock_init(&vc->lock);
+	INIT_LIST_HEAD(&vc->desc_allocated);
 	INIT_LIST_HEAD(&vc->desc_submitted);
 	INIT_LIST_HEAD(&vc->desc_issued);
 	INIT_LIST_HEAD(&vc->desc_completed);
diff --git a/drivers/dma/virt-dma.h b/drivers/dma/virt-dma.h
index 2fa4774..d9731ca 100644
--- a/drivers/dma/virt-dma.h
+++ b/drivers/dma/virt-dma.h
@@ -29,6 +29,7 @@
 	spinlock_t lock;
 
 	/* protected by vc.lock */
+	struct list_head desc_allocated;
 	struct list_head desc_submitted;
 	struct list_head desc_issued;
 	struct list_head desc_completed;
@@ -55,10 +56,17 @@
 	struct virt_dma_desc *vd, unsigned long tx_flags)
 {
 	extern dma_cookie_t vchan_tx_submit(struct dma_async_tx_descriptor *);
+	extern int vchan_tx_desc_free(struct dma_async_tx_descriptor *);
+	unsigned long flags;
 
 	dma_async_tx_descriptor_init(&vd->tx, &vc->chan);
 	vd->tx.flags = tx_flags;
 	vd->tx.tx_submit = vchan_tx_submit;
+	vd->tx.desc_free = vchan_tx_desc_free;
+
+	spin_lock_irqsave(&vc->lock, flags);
+	list_add_tail(&vd->node, &vc->desc_allocated);
+	spin_unlock_irqrestore(&vc->lock, flags);
 
 	return &vd->tx;
 }
@@ -134,6 +142,7 @@
 static inline void vchan_get_all_descriptors(struct virt_dma_chan *vc,
 	struct list_head *head)
 {
+	list_splice_tail_init(&vc->desc_allocated, head);
 	list_splice_tail_init(&vc->desc_submitted, head);
 	list_splice_tail_init(&vc->desc_issued, head);
 	list_splice_tail_init(&vc->desc_completed, head);
@@ -141,14 +150,30 @@
 
 static inline void vchan_free_chan_resources(struct virt_dma_chan *vc)
 {
+	struct virt_dma_desc *vd;
 	unsigned long flags;
 	LIST_HEAD(head);
 
 	spin_lock_irqsave(&vc->lock, flags);
 	vchan_get_all_descriptors(vc, &head);
+	list_for_each_entry(vd, &head, node)
+		dmaengine_desc_clear_reuse(&vd->tx);
 	spin_unlock_irqrestore(&vc->lock, flags);
 
 	vchan_dma_desc_free_list(vc, &head);
 }
 
+/**
+ * vchan_synchronize() - synchronize callback execution to the current context
+ * @vc: virtual channel to synchronize
+ *
+ * Makes sure that all scheduled or active callbacks have finished running. For
+ * proper operation the caller has to ensure that no new callbacks are scheduled
+ * after the invocation of this function started.
+ */
+static inline void vchan_synchronize(struct virt_dma_chan *vc)
+{
+	tasklet_kill(&vc->task);
+}
+
 #endif
diff --git a/include/linux/dca.h b/include/linux/dca.h
index d27a7a0..ad956c2 100644
--- a/include/linux/dca.h
+++ b/include/linux/dca.h
@@ -34,7 +34,7 @@
 
 struct dca_provider {
 	struct list_head	node;
-	struct dca_ops		*ops;
+	const struct dca_ops	*ops;
 	struct device 		*cd;
 	int			 id;
 };
@@ -53,7 +53,8 @@
 	int	(*dev_managed)      (struct dca_provider *, struct device *);
 };
 
-struct dca_provider *alloc_dca_provider(struct dca_ops *ops, int priv_size);
+struct dca_provider *alloc_dca_provider(const struct dca_ops *ops,
+					int priv_size);
 void free_dca_provider(struct dca_provider *dca);
 int register_dca_provider(struct dca_provider *dca, struct device *dev);
 void unregister_dca_provider(struct dca_provider *dca, struct device *dev);
diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
index c47c68e..16a1cad 100644
--- a/include/linux/dmaengine.h
+++ b/include/linux/dmaengine.h
@@ -607,11 +607,38 @@
 };
 
 /**
+ * struct dma_slave_map - associates slave device and it's slave channel with
+ * parameter to be used by a filter function
+ * @devname: name of the device
+ * @slave: slave channel name
+ * @param: opaque parameter to pass to struct dma_filter.fn
+ */
+struct dma_slave_map {
+	const char *devname;
+	const char *slave;
+	void *param;
+};
+
+/**
+ * struct dma_filter - information for slave device/channel to filter_fn/param
+ * mapping
+ * @fn: filter function callback
+ * @mapcnt: number of slave device/channel in the map
+ * @map: array of channel to filter mapping data
+ */
+struct dma_filter {
+	dma_filter_fn fn;
+	int mapcnt;
+	const struct dma_slave_map *map;
+};
+
+/**
  * struct dma_device - info on the entity supplying DMA services
  * @chancnt: how many DMA channels are supported
  * @privatecnt: how many DMA channels are requested by dma_request_channel
  * @channels: the list of struct dma_chan
  * @global_node: list_head for global dma_device_list
+ * @filter: information for device/slave to filter function/param mapping
  * @cap_mask: one or more dma_capability flags
  * @max_xor: maximum number of xor sources, 0 if no capability
  * @max_pq: maximum number of PQ sources and PQ-continue capability
@@ -654,11 +681,14 @@
  *	paused. Returns 0 or an error code
  * @device_terminate_all: Aborts all transfers on a channel. Returns 0
  *	or an error code
+ * @device_synchronize: Synchronizes the termination of a transfers to the
+ *  current context.
  * @device_tx_status: poll for transaction completion, the optional
  *	txstate parameter can be supplied with a pointer to get a
  *	struct with auxiliary transfer status information, otherwise the call
  *	will just return a simple status code
  * @device_issue_pending: push pending transactions to hardware
+ * @descriptor_reuse: a submitted transfer can be resubmitted after completion
  */
 struct dma_device {
 
@@ -666,6 +696,7 @@
 	unsigned int privatecnt;
 	struct list_head channels;
 	struct list_head global_node;
+	struct dma_filter filter;
 	dma_cap_mask_t  cap_mask;
 	unsigned short max_xor;
 	unsigned short max_pq;
@@ -681,6 +712,7 @@
 	u32 src_addr_widths;
 	u32 dst_addr_widths;
 	u32 directions;
+	bool descriptor_reuse;
 	enum dma_residue_granularity residue_granularity;
 
 	int (*device_alloc_chan_resources)(struct dma_chan *chan);
@@ -737,6 +769,7 @@
 	int (*device_pause)(struct dma_chan *chan);
 	int (*device_resume)(struct dma_chan *chan);
 	int (*device_terminate_all)(struct dma_chan *chan);
+	void (*device_synchronize)(struct dma_chan *chan);
 
 	enum dma_status (*device_tx_status)(struct dma_chan *chan,
 					    dma_cookie_t cookie,
@@ -828,6 +861,13 @@
 			src_sg, src_nents, flags);
 }
 
+/**
+ * dmaengine_terminate_all() - Terminate all active DMA transfers
+ * @chan: The channel for which to terminate the transfers
+ *
+ * This function is DEPRECATED use either dmaengine_terminate_sync() or
+ * dmaengine_terminate_async() instead.
+ */
 static inline int dmaengine_terminate_all(struct dma_chan *chan)
 {
 	if (chan->device->device_terminate_all)
@@ -836,6 +876,88 @@
 	return -ENOSYS;
 }
 
+/**
+ * dmaengine_terminate_async() - Terminate all active DMA transfers
+ * @chan: The channel for which to terminate the transfers
+ *
+ * Calling this function will terminate all active and pending descriptors
+ * that have previously been submitted to the channel. It is not guaranteed
+ * though that the transfer for the active descriptor has stopped when the
+ * function returns. Furthermore it is possible the complete callback of a
+ * submitted transfer is still running when this function returns.
+ *
+ * dmaengine_synchronize() needs to be called before it is safe to free
+ * any memory that is accessed by previously submitted descriptors or before
+ * freeing any resources accessed from within the completion callback of any
+ * perviously submitted descriptors.
+ *
+ * This function can be called from atomic context as well as from within a
+ * complete callback of a descriptor submitted on the same channel.
+ *
+ * If none of the two conditions above apply consider using
+ * dmaengine_terminate_sync() instead.
+ */
+static inline int dmaengine_terminate_async(struct dma_chan *chan)
+{
+	if (chan->device->device_terminate_all)
+		return chan->device->device_terminate_all(chan);
+
+	return -EINVAL;
+}
+
+/**
+ * dmaengine_synchronize() - Synchronize DMA channel termination
+ * @chan: The channel to synchronize
+ *
+ * Synchronizes to the DMA channel termination to the current context. When this
+ * function returns it is guaranteed that all transfers for previously issued
+ * descriptors have stopped and and it is safe to free the memory assoicated
+ * with them. Furthermore it is guaranteed that all complete callback functions
+ * for a previously submitted descriptor have finished running and it is safe to
+ * free resources accessed from within the complete callbacks.
+ *
+ * The behavior of this function is undefined if dma_async_issue_pending() has
+ * been called between dmaengine_terminate_async() and this function.
+ *
+ * This function must only be called from non-atomic context and must not be
+ * called from within a complete callback of a descriptor submitted on the same
+ * channel.
+ */
+static inline void dmaengine_synchronize(struct dma_chan *chan)
+{
+	might_sleep();
+
+	if (chan->device->device_synchronize)
+		chan->device->device_synchronize(chan);
+}
+
+/**
+ * dmaengine_terminate_sync() - Terminate all active DMA transfers
+ * @chan: The channel for which to terminate the transfers
+ *
+ * Calling this function will terminate all active and pending transfers
+ * that have previously been submitted to the channel. It is similar to
+ * dmaengine_terminate_async() but guarantees that the DMA transfer has actually
+ * stopped and that all complete callbacks have finished running when the
+ * function returns.
+ *
+ * This function must only be called from non-atomic context and must not be
+ * called from within a complete callback of a descriptor submitted on the same
+ * channel.
+ */
+static inline int dmaengine_terminate_sync(struct dma_chan *chan)
+{
+	int ret;
+
+	ret = dmaengine_terminate_async(chan);
+	if (ret)
+		return ret;
+
+	dmaengine_synchronize(chan);
+
+	return 0;
+}
+
 static inline int dmaengine_pause(struct dma_chan *chan)
 {
 	if (chan->device->device_pause)
@@ -1140,9 +1262,11 @@
 void dma_issue_pending_all(void);
 struct dma_chan *__dma_request_channel(const dma_cap_mask_t *mask,
 					dma_filter_fn fn, void *fn_param);
-struct dma_chan *dma_request_slave_channel_reason(struct device *dev,
-						  const char *name);
 struct dma_chan *dma_request_slave_channel(struct device *dev, const char *name);
+
+struct dma_chan *dma_request_chan(struct device *dev, const char *name);
+struct dma_chan *dma_request_chan_by_mask(const dma_cap_mask_t *mask);
+
 void dma_release_channel(struct dma_chan *chan);
 int dma_get_slave_caps(struct dma_chan *chan, struct dma_slave_caps *caps);
 #else
@@ -1166,16 +1290,21 @@
 {
 	return NULL;
 }
-static inline struct dma_chan *dma_request_slave_channel_reason(
-					struct device *dev, const char *name)
-{
-	return ERR_PTR(-ENODEV);
-}
 static inline struct dma_chan *dma_request_slave_channel(struct device *dev,
 							 const char *name)
 {
 	return NULL;
 }
+static inline struct dma_chan *dma_request_chan(struct device *dev,
+						const char *name)
+{
+	return ERR_PTR(-ENODEV);
+}
+static inline struct dma_chan *dma_request_chan_by_mask(
+						const dma_cap_mask_t *mask)
+{
+	return ERR_PTR(-ENODEV);
+}
 static inline void dma_release_channel(struct dma_chan *chan)
 {
 }
@@ -1186,6 +1315,8 @@
 }
 #endif
 
+#define dma_request_slave_channel_reason(dev, name) dma_request_chan(dev, name)
+
 static inline int dmaengine_desc_set_reuse(struct dma_async_tx_descriptor *tx)
 {
 	struct dma_slave_caps caps;
diff --git a/include/linux/omap-dma.h b/include/linux/omap-dma.h
index 88fa8af..1d99b61 100644
--- a/include/linux/omap-dma.h
+++ b/include/linux/omap-dma.h
@@ -267,6 +267,9 @@
 	u8	type;
 };
 
+#define SDMA_FILTER_PARAM(hw_req)	((int[]) { (hw_req) })
+struct dma_slave_map;
+
 /* System DMA platform data structure */
 struct omap_system_dma_plat_info {
 	const struct omap_dma_reg *reg_map;
@@ -278,6 +281,9 @@
 	void (*clear_dma)(int lch);
 	void (*dma_write)(u32 val, int reg, int lch);
 	u32 (*dma_read)(int reg, int lch);
+
+	const struct dma_slave_map *slave_map;
+	int slavecnt;
 };
 
 #ifdef CONFIG_ARCH_OMAP2PLUS
diff --git a/include/linux/platform_data/edma.h b/include/linux/platform_data/edma.h
index e2878ba..105700e 100644
--- a/include/linux/platform_data/edma.h
+++ b/include/linux/platform_data/edma.h
@@ -53,12 +53,16 @@
 #define EDMA_CTLR(i)			((i) >> 16)
 #define EDMA_CHAN_SLOT(i)		((i) & 0xffff)
 
+#define EDMA_FILTER_PARAM(ctlr, chan)	((int[]) { EDMA_CTLR_CHAN(ctlr, chan) })
+
 struct edma_rsv_info {
 
 	const s16	(*rsv_chans)[2];
 	const s16	(*rsv_slots)[2];
 };
 
+struct dma_slave_map;
+
 /* platform_data for EDMA driver */
 struct edma_soc_info {
 	/*
@@ -76,6 +80,9 @@
 
 	s8	(*queue_priority_mapping)[2];
 	const s16	(*xbar_chans)[2];
+
+	const struct dma_slave_map *slave_map;
+	int slavecnt;
 };
 
 #endif
diff --git a/sound/core/pcm_dmaengine.c b/sound/core/pcm_dmaengine.c
index fba365a..697c166 100644
--- a/sound/core/pcm_dmaengine.c
+++ b/sound/core/pcm_dmaengine.c
@@ -202,13 +202,13 @@
 		if (runtime->info & SNDRV_PCM_INFO_PAUSE)
 			dmaengine_pause(prtd->dma_chan);
 		else
-			dmaengine_terminate_all(prtd->dma_chan);
+			dmaengine_terminate_async(prtd->dma_chan);
 		break;
 	case SNDRV_PCM_TRIGGER_PAUSE_PUSH:
 		dmaengine_pause(prtd->dma_chan);
 		break;
 	case SNDRV_PCM_TRIGGER_STOP:
-		dmaengine_terminate_all(prtd->dma_chan);
+		dmaengine_terminate_async(prtd->dma_chan);
 		break;
 	default:
 		return -EINVAL;
@@ -346,6 +346,7 @@
 {
 	struct dmaengine_pcm_runtime_data *prtd = substream_to_prtd(substream);
 
+	dmaengine_synchronize(prtd->dma_chan);
 	kfree(prtd);
 
 	return 0;
@@ -362,9 +363,11 @@
 {
 	struct dmaengine_pcm_runtime_data *prtd = substream_to_prtd(substream);
 
+	dmaengine_synchronize(prtd->dma_chan);
 	dma_release_channel(prtd->dma_chan);
+	kfree(prtd);
 
-	return snd_dmaengine_pcm_close(substream);
+	return 0;
 }
 EXPORT_SYMBOL_GPL(snd_dmaengine_pcm_close_release_chan);