Merge branch 'topic/at_xdmac' into for-linus
diff --git a/Documentation/devicetree/bindings/dma/fsl-imx-sdma.txt b/Documentation/devicetree/bindings/dma/fsl-imx-sdma.txt
index 4659fd9..dc8d3aa 100644
--- a/Documentation/devicetree/bindings/dma/fsl-imx-sdma.txt
+++ b/Documentation/devicetree/bindings/dma/fsl-imx-sdma.txt
@@ -48,6 +48,7 @@
 	21	ESAI
 	22	SSI Dual FIFO	(needs firmware ver >= 2)
 	23	Shared ASRC
+	24	SAI
 
 The third cell specifies the transfer priority as below.
 
diff --git a/Documentation/devicetree/bindings/dma/qcom_bam_dma.txt b/Documentation/devicetree/bindings/dma/qcom_bam_dma.txt
index d75a9d7..f8c3311 100644
--- a/Documentation/devicetree/bindings/dma/qcom_bam_dma.txt
+++ b/Documentation/devicetree/bindings/dma/qcom_bam_dma.txt
@@ -1,7 +1,9 @@
 QCOM BAM DMA controller
 
 Required properties:
-- compatible: must contain "qcom,bam-v1.4.0" for MSM8974
+- compatible: must be one of the following:
+ * "qcom,bam-v1.4.0" for MSM8974, APQ8074 and APQ8084
+ * "qcom,bam-v1.3.0" for APQ8064, IPQ8064 and MSM8960
 - reg: Address range for DMA registers
 - interrupts: Should contain the one interrupt shared by all channels
 - #dma-cells: must be <1>, the cell in the dmas property of the client device
diff --git a/Documentation/devicetree/bindings/dma/sun6i-dma.txt b/Documentation/devicetree/bindings/dma/sun6i-dma.txt
index 3e145c1..9cdcba24d 100644
--- a/Documentation/devicetree/bindings/dma/sun6i-dma.txt
+++ b/Documentation/devicetree/bindings/dma/sun6i-dma.txt
@@ -4,7 +4,7 @@
 
 Required properties:
 
-- compatible:	Must be "allwinner,sun6i-a31-dma"
+- compatible:	Must be "allwinner,sun6i-a31-dma" or "allwinner,sun8i-a23-dma"
 - reg:		Should contain the registers base address and length
 - interrupts:	Should contain a reference to the interrupt used by this device
 - clocks:	Should contain a reference to the parent AHB clock
diff --git a/Documentation/dmaengine.txt b/Documentation/dmaengine/client.txt
similarity index 100%
rename from Documentation/dmaengine.txt
rename to Documentation/dmaengine/client.txt
diff --git a/Documentation/dmatest.txt b/Documentation/dmaengine/dmatest.txt
similarity index 100%
rename from Documentation/dmatest.txt
rename to Documentation/dmaengine/dmatest.txt
diff --git a/Documentation/dmaengine/provider.txt b/Documentation/dmaengine/provider.txt
new file mode 100644
index 0000000..766658c
--- /dev/null
+++ b/Documentation/dmaengine/provider.txt
@@ -0,0 +1,366 @@
+DMAengine controller documentation
+==================================
+
+Hardware Introduction
++++++++++++++++++++++
+
+Most of the Slave DMA controllers have the same general principles of
+operations.
+
+They have a given number of channels to use for the DMA transfers, and
+a given number of requests lines.
+
+Requests and channels are pretty much orthogonal. Channels can be used
+to serve several to any requests. To simplify, channels are the
+entities that will be doing the copy, and requests what endpoints are
+involved.
+
+The request lines actually correspond to physical lines going from the
+DMA-eligible devices to the controller itself. Whenever the device
+will want to start a transfer, it will assert a DMA request (DRQ) by
+asserting that request line.
+
+A very simple DMA controller would only take into account a single
+parameter: the transfer size. At each clock cycle, it would transfer a
+byte of data from one buffer to another, until the transfer size has
+been reached.
+
+That wouldn't work well in the real world, since slave devices might
+require a specific number of bits to be transferred in a single
+cycle. For example, we may want to transfer as much data as the
+physical bus allows to maximize performances when doing a simple
+memory copy operation, but our audio device could have a narrower FIFO
+that requires data to be written exactly 16 or 24 bits at a time. This
+is why most if not all of the DMA controllers can adjust this, using a
+parameter called the transfer width.
+
+Moreover, some DMA controllers, whenever the RAM is used as a source
+or destination, can group the reads or writes in memory into a buffer,
+so instead of having a lot of small memory accesses, which is not
+really efficient, you'll get several bigger transfers. This is done
+using a parameter called the burst size, that defines how many single
+reads/writes it's allowed to do without the controller splitting the
+transfer into smaller sub-transfers.
+
+Our theoretical DMA controller would then only be able to do transfers
+that involve a single contiguous block of data. However, some of the
+transfers we usually have are not, and want to copy data from
+non-contiguous buffers to a contiguous buffer, which is called
+scatter-gather.
+
+DMAEngine, at least for mem2dev transfers, require support for
+scatter-gather. So we're left with two cases here: either we have a
+quite simple DMA controller that doesn't support it, and we'll have to
+implement it in software, or we have a more advanced DMA controller,
+that implements in hardware scatter-gather.
+
+The latter are usually programmed using a collection of chunks to
+transfer, and whenever the transfer is started, the controller will go
+over that collection, doing whatever we programmed there.
+
+This collection is usually either a table or a linked list. You will
+then push either the address of the table and its number of elements,
+or the first item of the list to one channel of the DMA controller,
+and whenever a DRQ will be asserted, it will go through the collection
+to know where to fetch the data from.
+
+Either way, the format of this collection is completely dependent on
+your hardware. Each DMA controller will require a different structure,
+but all of them will require, for every chunk, at least the source and
+destination addresses, whether it should increment these addresses or
+not and the three parameters we saw earlier: the burst size, the
+transfer width and the transfer size.
+
+The one last thing is that usually, slave devices won't issue DRQ by
+default, and you have to enable this in your slave device driver first
+whenever you're willing to use DMA.
+
+These were just the general memory-to-memory (also called mem2mem) or
+memory-to-device (mem2dev) kind of transfers. Most devices often
+support other kind of transfers or memory operations that dmaengine
+support and will be detailed later in this document.
+
+DMA Support in Linux
+++++++++++++++++++++
+
+Historically, DMA controller drivers have been implemented using the
+async TX API, to offload operations such as memory copy, XOR,
+cryptography, etc., basically any memory to memory operation.
+
+Over time, the need for memory to device transfers arose, and
+dmaengine was extended. Nowadays, the async TX API is written as a
+layer on top of dmaengine, and acts as a client. Still, dmaengine
+accommodates that API in some cases, and made some design choices to
+ensure that it stayed compatible.
+
+For more information on the Async TX API, please look the relevant
+documentation file in Documentation/crypto/async-tx-api.txt.
+
+DMAEngine Registration
+++++++++++++++++++++++
+
+struct dma_device Initialization
+--------------------------------
+
+Just like any other kernel framework, the whole DMAEngine registration
+relies on the driver filling a structure and registering against the
+framework. In our case, that structure is dma_device.
+
+The first thing you need to do in your driver is to allocate this
+structure. Any of the usual memory allocators will do, but you'll also
+need to initialize a few fields in there:
+
+  * channels:	should be initialized as a list using the
+		INIT_LIST_HEAD macro for example
+
+  * dev: 	should hold the pointer to the struct device associated
+		to your current driver instance.
+
+Supported transaction types
+---------------------------
+
+The next thing you need is to set which transaction types your device
+(and driver) supports.
+
+Our dma_device structure has a field called cap_mask that holds the
+various types of transaction supported, and you need to modify this
+mask using the dma_cap_set function, with various flags depending on
+transaction types you support as an argument.
+
+All those capabilities are defined in the dma_transaction_type enum,
+in include/linux/dmaengine.h
+
+Currently, the types available are:
+  * DMA_MEMCPY
+    - The device is able to do memory to memory copies
+
+  * DMA_XOR
+    - The device is able to perform XOR operations on memory areas
+    - Used to accelerate XOR intensive tasks, such as RAID5
+
+  * DMA_XOR_VAL
+    - The device is able to perform parity check using the XOR
+      algorithm against a memory buffer.
+
+  * DMA_PQ
+    - The device is able to perform RAID6 P+Q computations, P being a
+      simple XOR, and Q being a Reed-Solomon algorithm.
+
+  * DMA_PQ_VAL
+    - The device is able to perform parity check using RAID6 P+Q
+      algorithm against a memory buffer.
+
+  * DMA_INTERRUPT
+    - The device is able to trigger a dummy transfer that will
+      generate periodic interrupts
+    - Used by the client drivers to register a callback that will be
+      called on a regular basis through the DMA controller interrupt
+
+  * DMA_SG
+    - The device supports memory to memory scatter-gather
+      transfers.
+    - Even though a plain memcpy can look like a particular case of a
+      scatter-gather transfer, with a single chunk to transfer, it's a
+      distinct transaction type in the mem2mem transfers case
+
+  * DMA_PRIVATE
+    - The devices only supports slave transfers, and as such isn't
+      available for async transfers.
+
+  * DMA_ASYNC_TX
+    - Must not be set by the device, and will be set by the framework
+      if needed
+    - /* TODO: What is it about? */
+
+  * DMA_SLAVE
+    - The device can handle device to memory transfers, including
+      scatter-gather transfers.
+    - While in the mem2mem case we were having two distinct types to
+      deal with a single chunk to copy or a collection of them, here,
+      we just have a single transaction type that is supposed to
+      handle both.
+    - If you want to transfer a single contiguous memory buffer,
+      simply build a scatter list with only one item.
+
+  * DMA_CYCLIC
+    - The device can handle cyclic transfers.
+    - A cyclic transfer is a transfer where the chunk collection will
+      loop over itself, with the last item pointing to the first.
+    - It's usually used for audio transfers, where you want to operate
+      on a single ring buffer that you will fill with your audio data.
+
+  * DMA_INTERLEAVE
+    - The device supports interleaved transfer.
+    - These transfers can transfer data from a non-contiguous buffer
+      to a non-contiguous buffer, opposed to DMA_SLAVE that can
+      transfer data from a non-contiguous data set to a continuous
+      destination buffer.
+    - It's usually used for 2d content transfers, in which case you
+      want to transfer a portion of uncompressed data directly to the
+      display to print it
+
+These various types will also affect how the source and destination
+addresses change over time.
+
+Addresses pointing to RAM are typically incremented (or decremented)
+after each transfer. In case of a ring buffer, they may loop
+(DMA_CYCLIC). Addresses pointing to a device's register (e.g. a FIFO)
+are typically fixed.
+
+Device operations
+-----------------
+
+Our dma_device structure also requires a few function pointers in
+order to implement the actual logic, now that we described what
+operations we were able to perform.
+
+The functions that we have to fill in there, and hence have to
+implement, obviously depend on the transaction types you reported as
+supported.
+
+   * device_alloc_chan_resources
+   * device_free_chan_resources
+     - These functions will be called whenever a driver will call
+       dma_request_channel or dma_release_channel for the first/last
+       time on the channel associated to that driver.
+     - They are in charge of allocating/freeing all the needed
+       resources in order for that channel to be useful for your
+       driver.
+     - These functions can sleep.
+
+   * device_prep_dma_*
+     - These functions are matching the capabilities you registered
+       previously.
+     - These functions all take the buffer or the scatterlist relevant
+       for the transfer being prepared, and should create a hardware
+       descriptor or a list of hardware descriptors from it
+     - These functions can be called from an interrupt context
+     - Any allocation you might do should be using the GFP_NOWAIT
+       flag, in order not to potentially sleep, but without depleting
+       the emergency pool either.
+     - Drivers should try to pre-allocate any memory they might need
+       during the transfer setup at probe time to avoid putting to
+       much pressure on the nowait allocator.
+
+     - It should return a unique instance of the
+       dma_async_tx_descriptor structure, that further represents this
+       particular transfer.
+
+     - This structure can be initialized using the function
+       dma_async_tx_descriptor_init.
+     - You'll also need to set two fields in this structure:
+       + flags:
+		TODO: Can it be modified by the driver itself, or
+		should it be always the flags passed in the arguments
+
+       + tx_submit:	A pointer to a function you have to implement,
+			that is supposed to push the current
+			transaction descriptor to a pending queue, waiting
+			for issue_pending to be called.
+
+   * device_issue_pending
+     - Takes the first transaction descriptor in the pending queue,
+       and starts the transfer. Whenever that transfer is done, it
+       should move to the next transaction in the list.
+     - This function can be called in an interrupt context
+
+   * device_tx_status
+     - Should report the bytes left to go over on the given channel
+     - Should only care about the transaction descriptor passed as
+       argument, not the currently active one on a given channel
+     - The tx_state argument might be NULL
+     - Should use dma_set_residue to report it
+     - In the case of a cyclic transfer, it should only take into
+       account the current period.
+     - This function can be called in an interrupt context.
+
+   * device_control
+     - Used by client drivers to control and configure the channel it
+       has a handle on.
+     - Called with a command and an argument
+       + The command is one of the values listed by the enum
+         dma_ctrl_cmd. The valid commands are:
+         + DMA_PAUSE
+           + Pauses a transfer on the channel
+           + This command should operate synchronously on the channel,
+             pausing right away the work of the given channel
+         + DMA_RESUME
+           + Restarts a transfer on the channel
+           + This command should operate synchronously on the channel,
+             resuming right away the work of the given channel
+         + DMA_TERMINATE_ALL
+           + Aborts all the pending and ongoing transfers on the
+             channel
+           + This command should operate synchronously on the channel,
+             terminating right away all the channels
+         + DMA_SLAVE_CONFIG
+           + Reconfigures the channel with passed configuration
+           + This command should NOT perform synchronously, or on any
+             currently queued transfers, but only on subsequent ones
+           + In this case, the function will receive a
+             dma_slave_config structure pointer as an argument, that
+             will detail which configuration to use.
+           + Even though that structure contains a direction field,
+             this field is deprecated in favor of the direction
+             argument given to the prep_* functions
+         + FSLDMA_EXTERNAL_START
+           + TODO: Why does that even exist?
+       + The argument is an opaque unsigned long. This actually is a
+         pointer to a struct dma_slave_config that should be used only
+         in the DMA_SLAVE_CONFIG.
+
+  * device_slave_caps
+    - Called through the framework by client drivers in order to have
+      an idea of what are the properties of the channel allocated to
+      them.
+    - Such properties are the buswidth, available directions, etc.
+    - Required for every generic layer doing DMA transfers, such as
+      ASoC.
+
+Misc notes (stuff that should be documented, but don't really know
+where to put them)
+------------------------------------------------------------------
+  * dma_run_dependencies
+    - Should be called at the end of an async TX transfer, and can be
+      ignored in the slave transfers case.
+    - Makes sure that dependent operations are run before marking it
+      as complete.
+
+  * dma_cookie_t
+    - it's a DMA transaction ID that will increment over time.
+    - Not really relevant any more since the introduction of virt-dma
+      that abstracts it away.
+
+  * DMA_CTRL_ACK
+    - Undocumented feature
+    - No one really has an idea of what it's about, besides being
+      related to reusing the DMA transaction descriptors or having
+      additional transactions added to it in the async-tx API
+    - Useless in the case of the slave API
+
+General Design Notes
+--------------------
+
+Most of the DMAEngine drivers you'll see are based on a similar design
+that handles the end of transfer interrupts in the handler, but defer
+most work to a tasklet, including the start of a new transfer whenever
+the previous transfer ended.
+
+This is a rather inefficient design though, because the inter-transfer
+latency will be not only the interrupt latency, but also the
+scheduling latency of the tasklet, which will leave the channel idle
+in between, which will slow down the global transfer rate.
+
+You should avoid this kind of practice, and instead of electing a new
+transfer in your tasklet, move that part to the interrupt handler in
+order to have a shorter idle window (that we can't really avoid
+anyway).
+
+Glossary
+--------
+
+Burst: 		A number of consecutive read or write operations
+		that can be queued to buffers before being flushed to
+		memory.
+Chunk:		A contiguous collection of bursts
+Transfer:	A collection of chunks (be it contiguous or not)
diff --git a/MAINTAINERS b/MAINTAINERS
index fd3771c..b3f5493 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -3080,7 +3080,8 @@
 S:	Maintained
 F:	drivers/dma/
 F:	include/linux/dma*
-T:	git git://git.infradead.org/users/vkoul/slave-dma.git (slave-dma)
+F:	Documentation/dmaengine/
+T:	git git://git.infradead.org/users/vkoul/slave-dma.git
 
 DME1737 HARDWARE MONITOR DRIVER
 M:	Juerg Haefliger <juergh@gmail.com>
diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig
index 8b6fb0f..f2b2c4e 100644
--- a/drivers/dma/Kconfig
+++ b/drivers/dma/Kconfig
@@ -402,12 +402,12 @@
 
 config DMA_SUN6I
 	tristate "Allwinner A31 SoCs DMA support"
-	depends on MACH_SUN6I || COMPILE_TEST
+	depends on MACH_SUN6I || MACH_SUN8I || COMPILE_TEST
 	depends on RESET_CONTROLLER
 	select DMA_ENGINE
 	select DMA_VIRTUAL_CHANNELS
 	help
-	  Support for the DMA engine for Allwinner A31 SoCs.
+	  Support for the DMA engine first found in Allwinner A31 SoCs.
 
 config NBPFAXI_DMA
 	tristate "Renesas Type-AXI NBPF DMA support"
diff --git a/drivers/dma/amba-pl08x.c b/drivers/dma/amba-pl08x.c
index e34024b..1364d00 100644
--- a/drivers/dma/amba-pl08x.c
+++ b/drivers/dma/amba-pl08x.c
@@ -2164,7 +2164,6 @@
 			 __func__, ret);
 		goto out_no_memcpy;
 	}
-	pl08x->memcpy.chancnt = ret;
 
 	/* Register slave channels */
 	ret = pl08x_dma_init_virtual_channels(pl08x, &pl08x->slave,
@@ -2175,7 +2174,6 @@
 				__func__, ret);
 		goto out_no_slave;
 	}
-	pl08x->slave.chancnt = ret;
 
 	ret = dma_async_device_register(&pl08x->memcpy);
 	if (ret) {
diff --git a/drivers/dma/bcm2835-dma.c b/drivers/dma/bcm2835-dma.c
index 6800797..918b7b3 100644
--- a/drivers/dma/bcm2835-dma.c
+++ b/drivers/dma/bcm2835-dma.c
@@ -525,8 +525,6 @@
 	vchan_init(&c->vc, &d->ddev);
 	INIT_LIST_HEAD(&c->node);
 
-	d->ddev.chancnt++;
-
 	c->chan_base = BCM2835_DMA_CHANIO(d->base, chan_id);
 	c->ch = chan_id;
 	c->irq_number = irq;
@@ -694,7 +692,6 @@
 	.remove	= bcm2835_dma_remove,
 	.driver = {
 		.name = "bcm2835-dma",
-		.owner = THIS_MODULE,
 		.of_match_table = of_match_ptr(bcm2835_dma_of_match),
 	},
 };
diff --git a/drivers/dma/cppi41.c b/drivers/dma/cppi41.c
index a58eec3..cf7633f 100644
--- a/drivers/dma/cppi41.c
+++ b/drivers/dma/cppi41.c
@@ -1088,7 +1088,6 @@
 	.remove = cppi41_dma_remove,
 	.driver = {
 		.name = "cppi41-dma-engine",
-		.owner = THIS_MODULE,
 		.pm = &cppi41_pm_ops,
 		.of_match_table = of_match_ptr(cppi41_dma_ids),
 	},
diff --git a/drivers/dma/dma-jz4740.c b/drivers/dma/dma-jz4740.c
index ae2ab14..bdeafee 100644
--- a/drivers/dma/dma-jz4740.c
+++ b/drivers/dma/dma-jz4740.c
@@ -563,10 +563,9 @@
 	dd->device_prep_dma_cyclic = jz4740_dma_prep_dma_cyclic;
 	dd->device_control = jz4740_dma_control;
 	dd->dev = &pdev->dev;
-	dd->chancnt = JZ_DMA_NR_CHANS;
 	INIT_LIST_HEAD(&dd->channels);
 
-	for (i = 0; i < dd->chancnt; i++) {
+	for (i = 0; i < JZ_DMA_NR_CHANS; i++) {
 		chan = &dmadev->chan[i];
 		chan->id = i;
 		chan->vchan.desc_free = jz4740_dma_desc_free;
@@ -608,7 +607,6 @@
 	.remove = jz4740_dma_remove,
 	.driver = {
 		.name = "jz4740-dma",
-		.owner = THIS_MODULE,
 	},
 };
 module_platform_driver(jz4740_dma_driver);
diff --git a/drivers/dma/fsldma.c b/drivers/dma/fsldma.c
index 994bcb2..3d8feb5 100644
--- a/drivers/dma/fsldma.c
+++ b/drivers/dma/fsldma.c
@@ -1337,7 +1337,6 @@
 
 	/* Add the channel to DMA device channel list */
 	list_add_tail(&chan->common.device_node, &fdev->common.channels);
-	fdev->common.chancnt++;
 
 	dev_info(fdev->dev, "#%d (%s), irq %d\n", chan->id, compatible,
 		 chan->irq != NO_IRQ ? chan->irq : fdev->irq);
diff --git a/drivers/dma/imx-sdma.c b/drivers/dma/imx-sdma.c
index 88afc48..5b38f2b 100644
--- a/drivers/dma/imx-sdma.c
+++ b/drivers/dma/imx-sdma.c
@@ -729,6 +729,7 @@
 	case IMX_DMATYPE_CSPI:
 	case IMX_DMATYPE_EXT:
 	case IMX_DMATYPE_SSI:
+	case IMX_DMATYPE_SAI:
 		per_2_emi = sdma->script_addrs->app_2_mcu_addr;
 		emi_2_per = sdma->script_addrs->mcu_2_app_addr;
 		break;
@@ -1346,7 +1347,7 @@
 	return ret;
 }
 
-static int __init sdma_init(struct sdma_engine *sdma)
+static int sdma_init(struct sdma_engine *sdma)
 {
 	int i, ret;
 	dma_addr_t ccb_phys;
diff --git a/drivers/dma/iop-adma.c b/drivers/dma/iop-adma.c
index c56137b..263d9f6 100644
--- a/drivers/dma/iop-adma.c
+++ b/drivers/dma/iop-adma.c
@@ -1557,7 +1557,6 @@
 	.probe		= iop_adma_probe,
 	.remove		= iop_adma_remove,
 	.driver		= {
-		.owner	= THIS_MODULE,
 		.name	= "iop-adma",
 	},
 };
diff --git a/drivers/dma/k3dma.c b/drivers/dma/k3dma.c
index a1f911a..a1de14a 100644
--- a/drivers/dma/k3dma.c
+++ b/drivers/dma/k3dma.c
@@ -722,7 +722,6 @@
 	d->slave.device_issue_pending = k3_dma_issue_pending;
 	d->slave.device_control = k3_dma_control;
 	d->slave.copy_align = DMA_ALIGN;
-	d->slave.chancnt = d->dma_requests;
 
 	/* init virtual channel */
 	d->chans = devm_kzalloc(&op->dev,
@@ -787,6 +786,7 @@
 	return 0;
 }
 
+#ifdef CONFIG_PM_SLEEP
 static int k3_dma_suspend(struct device *dev)
 {
 	struct k3_dma_dev *d = dev_get_drvdata(dev);
@@ -816,13 +816,13 @@
 	k3_dma_enable_dma(d, true);
 	return 0;
 }
+#endif
 
 static SIMPLE_DEV_PM_OPS(k3_dma_pmops, k3_dma_suspend, k3_dma_resume);
 
 static struct platform_driver k3_pdma_driver = {
 	.driver		= {
 		.name	= DRIVER_NAME,
-		.owner  = THIS_MODULE,
 		.pm	= &k3_dma_pmops,
 		.of_match_table = k3_pdma_dt_ids,
 	},
diff --git a/drivers/dma/mmp_pdma.c b/drivers/dma/mmp_pdma.c
index a1a4db5..8b8952f 100644
--- a/drivers/dma/mmp_pdma.c
+++ b/drivers/dma/mmp_pdma.c
@@ -1098,7 +1098,6 @@
 static struct platform_driver mmp_pdma_driver = {
 	.driver		= {
 		.name	= "mmp-pdma",
-		.owner  = THIS_MODULE,
 		.of_match_table = mmp_pdma_dt_ids,
 	},
 	.id_table	= mmp_pdma_id_table,
diff --git a/drivers/dma/mmp_tdma.c b/drivers/dma/mmp_tdma.c
index c6bd015..bfb4695 100644
--- a/drivers/dma/mmp_tdma.c
+++ b/drivers/dma/mmp_tdma.c
@@ -703,7 +703,6 @@
 static struct platform_driver mmp_tdma_driver = {
 	.driver		= {
 		.name	= "mmp-tdma",
-		.owner  = THIS_MODULE,
 		.of_match_table = mmp_tdma_dt_ids,
 	},
 	.id_table	= mmp_tdma_id_table,
diff --git a/drivers/dma/mpc512x_dma.c b/drivers/dma/mpc512x_dma.c
index 881db2b..01bec40 100644
--- a/drivers/dma/mpc512x_dma.c
+++ b/drivers/dma/mpc512x_dma.c
@@ -885,6 +885,7 @@
 	struct resource res;
 	ulong regs_start, regs_size;
 	int retval, i;
+	u8 chancnt;
 
 	mdma = devm_kzalloc(dev, sizeof(struct mpc_dma), GFP_KERNEL);
 	if (!mdma) {
@@ -956,10 +957,6 @@
 
 	dma = &mdma->dma;
 	dma->dev = dev;
-	if (mdma->is_mpc8308)
-		dma->chancnt = MPC8308_DMACHAN_MAX;
-	else
-		dma->chancnt = MPC512x_DMACHAN_MAX;
 	dma->device_alloc_chan_resources = mpc_dma_alloc_chan_resources;
 	dma->device_free_chan_resources = mpc_dma_free_chan_resources;
 	dma->device_issue_pending = mpc_dma_issue_pending;
@@ -972,7 +969,12 @@
 	dma_cap_set(DMA_MEMCPY, dma->cap_mask);
 	dma_cap_set(DMA_SLAVE, dma->cap_mask);
 
-	for (i = 0; i < dma->chancnt; i++) {
+	if (mdma->is_mpc8308)
+		chancnt = MPC8308_DMACHAN_MAX;
+	else
+		chancnt = MPC512x_DMACHAN_MAX;
+
+	for (i = 0; i < chancnt; i++) {
 		mchan = &mdma->channels[i];
 
 		mchan->chan.device = dma;
@@ -1090,7 +1092,6 @@
 	.remove		= mpc_dma_remove,
 	.driver = {
 		.name = DRV_NAME,
-		.owner = THIS_MODULE,
 		.of_match_table	= mpc_dma_match,
 	},
 };
diff --git a/drivers/dma/nbpfaxi.c b/drivers/dma/nbpfaxi.c
index 5aeada5..a7a3683 100644
--- a/drivers/dma/nbpfaxi.c
+++ b/drivers/dma/nbpfaxi.c
@@ -1500,7 +1500,6 @@
 
 static struct platform_driver nbpf_driver = {
 	.driver = {
-		.owner = THIS_MODULE,
 		.name = "dma-nbpf",
 		.of_match_table = nbpf_match,
 		.pm = &nbpf_pm_ops,
diff --git a/drivers/dma/omap-dma.c b/drivers/dma/omap-dma.c
index bbea824..6ea1ade 100644
--- a/drivers/dma/omap-dma.c
+++ b/drivers/dma/omap-dma.c
@@ -1074,8 +1074,6 @@
 	vchan_init(&c->vc, &od->ddev);
 	INIT_LIST_HEAD(&c->node);
 
-	od->ddev.chancnt++;
-
 	return 0;
 }
 
diff --git a/drivers/dma/pl330.c b/drivers/dma/pl330.c
index 4839bfa..83e2257 100644
--- a/drivers/dma/pl330.c
+++ b/drivers/dma/pl330.c
@@ -2610,6 +2610,9 @@
 		return -ENOMEM;
 	}
 
+	pd = &pl330->ddma;
+	pd->dev = &adev->dev;
+
 	pl330->mcbufsz = pdat ? pdat->mcbuf_sz : 0;
 
 	res = &adev->res;
@@ -2646,7 +2649,6 @@
 	if (!add_desc(pl330, GFP_KERNEL, NR_DEFAULT_DESC))
 		dev_warn(&adev->dev, "unable to allocate desc\n");
 
-	pd = &pl330->ddma;
 	INIT_LIST_HEAD(&pd->channels);
 
 	/* Initialize channel parameters */
@@ -2683,7 +2685,6 @@
 		list_add_tail(&pch->chan.device_node, &pd->channels);
 	}
 
-	pd->dev = &adev->dev;
 	if (pdat) {
 		pd->cap_mask = pdat->cap_mask;
 	} else {
diff --git a/drivers/dma/qcom_bam_dma.c b/drivers/dma/qcom_bam_dma.c
index 7a4bbb0..3122a99 100644
--- a/drivers/dma/qcom_bam_dma.c
+++ b/drivers/dma/qcom_bam_dma.c
@@ -79,35 +79,97 @@
 	struct bam_desc_hw desc[0];
 };
 
-#define BAM_CTRL			0x0000
-#define BAM_REVISION			0x0004
-#define BAM_SW_REVISION			0x0080
-#define BAM_NUM_PIPES			0x003C
-#define BAM_TIMER			0x0040
-#define BAM_TIMER_CTRL			0x0044
-#define BAM_DESC_CNT_TRSHLD		0x0008
-#define BAM_IRQ_SRCS			0x000C
-#define BAM_IRQ_SRCS_MSK		0x0010
-#define BAM_IRQ_SRCS_UNMASKED		0x0030
-#define BAM_IRQ_STTS			0x0014
-#define BAM_IRQ_CLR			0x0018
-#define BAM_IRQ_EN			0x001C
-#define BAM_CNFG_BITS			0x007C
-#define BAM_IRQ_SRCS_EE(ee)		(0x0800 + ((ee) * 0x80))
-#define BAM_IRQ_SRCS_MSK_EE(ee)		(0x0804 + ((ee) * 0x80))
-#define BAM_P_CTRL(pipe)		(0x1000 + ((pipe) * 0x1000))
-#define BAM_P_RST(pipe)			(0x1004 + ((pipe) * 0x1000))
-#define BAM_P_HALT(pipe)		(0x1008 + ((pipe) * 0x1000))
-#define BAM_P_IRQ_STTS(pipe)		(0x1010 + ((pipe) * 0x1000))
-#define BAM_P_IRQ_CLR(pipe)		(0x1014 + ((pipe) * 0x1000))
-#define BAM_P_IRQ_EN(pipe)		(0x1018 + ((pipe) * 0x1000))
-#define BAM_P_EVNT_DEST_ADDR(pipe)	(0x182C + ((pipe) * 0x1000))
-#define BAM_P_EVNT_REG(pipe)		(0x1818 + ((pipe) * 0x1000))
-#define BAM_P_SW_OFSTS(pipe)		(0x1800 + ((pipe) * 0x1000))
-#define BAM_P_DATA_FIFO_ADDR(pipe)	(0x1824 + ((pipe) * 0x1000))
-#define BAM_P_DESC_FIFO_ADDR(pipe)	(0x181C + ((pipe) * 0x1000))
-#define BAM_P_EVNT_TRSHLD(pipe)		(0x1828 + ((pipe) * 0x1000))
-#define BAM_P_FIFO_SIZES(pipe)		(0x1820 + ((pipe) * 0x1000))
+enum bam_reg {
+	BAM_CTRL,
+	BAM_REVISION,
+	BAM_NUM_PIPES,
+	BAM_DESC_CNT_TRSHLD,
+	BAM_IRQ_SRCS,
+	BAM_IRQ_SRCS_MSK,
+	BAM_IRQ_SRCS_UNMASKED,
+	BAM_IRQ_STTS,
+	BAM_IRQ_CLR,
+	BAM_IRQ_EN,
+	BAM_CNFG_BITS,
+	BAM_IRQ_SRCS_EE,
+	BAM_IRQ_SRCS_MSK_EE,
+	BAM_P_CTRL,
+	BAM_P_RST,
+	BAM_P_HALT,
+	BAM_P_IRQ_STTS,
+	BAM_P_IRQ_CLR,
+	BAM_P_IRQ_EN,
+	BAM_P_EVNT_DEST_ADDR,
+	BAM_P_EVNT_REG,
+	BAM_P_SW_OFSTS,
+	BAM_P_DATA_FIFO_ADDR,
+	BAM_P_DESC_FIFO_ADDR,
+	BAM_P_EVNT_GEN_TRSHLD,
+	BAM_P_FIFO_SIZES,
+};
+
+struct reg_offset_data {
+	u32 base_offset;
+	unsigned int pipe_mult, evnt_mult, ee_mult;
+};
+
+static const struct reg_offset_data bam_v1_3_reg_info[] = {
+	[BAM_CTRL]		= { 0x0F80, 0x00, 0x00, 0x00 },
+	[BAM_REVISION]		= { 0x0F84, 0x00, 0x00, 0x00 },
+	[BAM_NUM_PIPES]		= { 0x0FBC, 0x00, 0x00, 0x00 },
+	[BAM_DESC_CNT_TRSHLD]	= { 0x0F88, 0x00, 0x00, 0x00 },
+	[BAM_IRQ_SRCS]		= { 0x0F8C, 0x00, 0x00, 0x00 },
+	[BAM_IRQ_SRCS_MSK]	= { 0x0F90, 0x00, 0x00, 0x00 },
+	[BAM_IRQ_SRCS_UNMASKED]	= { 0x0FB0, 0x00, 0x00, 0x00 },
+	[BAM_IRQ_STTS]		= { 0x0F94, 0x00, 0x00, 0x00 },
+	[BAM_IRQ_CLR]		= { 0x0F98, 0x00, 0x00, 0x00 },
+	[BAM_IRQ_EN]		= { 0x0F9C, 0x00, 0x00, 0x00 },
+	[BAM_CNFG_BITS]		= { 0x0FFC, 0x00, 0x00, 0x00 },
+	[BAM_IRQ_SRCS_EE]	= { 0x1800, 0x00, 0x00, 0x80 },
+	[BAM_IRQ_SRCS_MSK_EE]	= { 0x1804, 0x00, 0x00, 0x80 },
+	[BAM_P_CTRL]		= { 0x0000, 0x80, 0x00, 0x00 },
+	[BAM_P_RST]		= { 0x0004, 0x80, 0x00, 0x00 },
+	[BAM_P_HALT]		= { 0x0008, 0x80, 0x00, 0x00 },
+	[BAM_P_IRQ_STTS]	= { 0x0010, 0x80, 0x00, 0x00 },
+	[BAM_P_IRQ_CLR]		= { 0x0014, 0x80, 0x00, 0x00 },
+	[BAM_P_IRQ_EN]		= { 0x0018, 0x80, 0x00, 0x00 },
+	[BAM_P_EVNT_DEST_ADDR]	= { 0x102C, 0x00, 0x40, 0x00 },
+	[BAM_P_EVNT_REG]	= { 0x1018, 0x00, 0x40, 0x00 },
+	[BAM_P_SW_OFSTS]	= { 0x1000, 0x00, 0x40, 0x00 },
+	[BAM_P_DATA_FIFO_ADDR]	= { 0x1024, 0x00, 0x40, 0x00 },
+	[BAM_P_DESC_FIFO_ADDR]	= { 0x101C, 0x00, 0x40, 0x00 },
+	[BAM_P_EVNT_GEN_TRSHLD]	= { 0x1028, 0x00, 0x40, 0x00 },
+	[BAM_P_FIFO_SIZES]	= { 0x1020, 0x00, 0x40, 0x00 },
+};
+
+static const struct reg_offset_data bam_v1_4_reg_info[] = {
+	[BAM_CTRL]		= { 0x0000, 0x00, 0x00, 0x00 },
+	[BAM_REVISION]		= { 0x0004, 0x00, 0x00, 0x00 },
+	[BAM_NUM_PIPES]		= { 0x003C, 0x00, 0x00, 0x00 },
+	[BAM_DESC_CNT_TRSHLD]	= { 0x0008, 0x00, 0x00, 0x00 },
+	[BAM_IRQ_SRCS]		= { 0x000C, 0x00, 0x00, 0x00 },
+	[BAM_IRQ_SRCS_MSK]	= { 0x0010, 0x00, 0x00, 0x00 },
+	[BAM_IRQ_SRCS_UNMASKED]	= { 0x0030, 0x00, 0x00, 0x00 },
+	[BAM_IRQ_STTS]		= { 0x0014, 0x00, 0x00, 0x00 },
+	[BAM_IRQ_CLR]		= { 0x0018, 0x00, 0x00, 0x00 },
+	[BAM_IRQ_EN]		= { 0x001C, 0x00, 0x00, 0x00 },
+	[BAM_CNFG_BITS]		= { 0x007C, 0x00, 0x00, 0x00 },
+	[BAM_IRQ_SRCS_EE]	= { 0x0800, 0x00, 0x00, 0x80 },
+	[BAM_IRQ_SRCS_MSK_EE]	= { 0x0804, 0x00, 0x00, 0x80 },
+	[BAM_P_CTRL]		= { 0x1000, 0x1000, 0x00, 0x00 },
+	[BAM_P_RST]		= { 0x1004, 0x1000, 0x00, 0x00 },
+	[BAM_P_HALT]		= { 0x1008, 0x1000, 0x00, 0x00 },
+	[BAM_P_IRQ_STTS]	= { 0x1010, 0x1000, 0x00, 0x00 },
+	[BAM_P_IRQ_CLR]		= { 0x1014, 0x1000, 0x00, 0x00 },
+	[BAM_P_IRQ_EN]		= { 0x1018, 0x1000, 0x00, 0x00 },
+	[BAM_P_EVNT_DEST_ADDR]	= { 0x102C, 0x00, 0x1000, 0x00 },
+	[BAM_P_EVNT_REG]	= { 0x1018, 0x00, 0x1000, 0x00 },
+	[BAM_P_SW_OFSTS]	= { 0x1000, 0x00, 0x1000, 0x00 },
+	[BAM_P_DATA_FIFO_ADDR]	= { 0x1824, 0x00, 0x1000, 0x00 },
+	[BAM_P_DESC_FIFO_ADDR]	= { 0x181C, 0x00, 0x1000, 0x00 },
+	[BAM_P_EVNT_GEN_TRSHLD]	= { 0x1828, 0x00, 0x1000, 0x00 },
+	[BAM_P_FIFO_SIZES]	= { 0x1820, 0x00, 0x1000, 0x00 },
+};
 
 /* BAM CTRL */
 #define BAM_SW_RST			BIT(0)
@@ -297,6 +359,8 @@
 	/* execution environment ID, from DT */
 	u32 ee;
 
+	const struct reg_offset_data *layout;
+
 	struct clk *bamclk;
 	int irq;
 
@@ -305,6 +369,23 @@
 };
 
 /**
+ * bam_addr - returns BAM register address
+ * @bdev: bam device
+ * @pipe: pipe instance (ignored when register doesn't have multiple instances)
+ * @reg:  register enum
+ */
+static inline void __iomem *bam_addr(struct bam_device *bdev, u32 pipe,
+		enum bam_reg reg)
+{
+	const struct reg_offset_data r = bdev->layout[reg];
+
+	return bdev->regs + r.base_offset +
+		r.pipe_mult * pipe +
+		r.evnt_mult * pipe +
+		r.ee_mult * bdev->ee;
+}
+
+/**
  * bam_reset_channel - Reset individual BAM DMA channel
  * @bchan: bam channel
  *
@@ -317,8 +398,8 @@
 	lockdep_assert_held(&bchan->vc.lock);
 
 	/* reset channel */
-	writel_relaxed(1, bdev->regs + BAM_P_RST(bchan->id));
-	writel_relaxed(0, bdev->regs + BAM_P_RST(bchan->id));
+	writel_relaxed(1, bam_addr(bdev, bchan->id, BAM_P_RST));
+	writel_relaxed(0, bam_addr(bdev, bchan->id, BAM_P_RST));
 
 	/* don't allow cpu to reorder BAM register accesses done after this */
 	wmb();
@@ -347,17 +428,18 @@
 	 * because we allocated 1 more descriptor (8 bytes) than we can use
 	 */
 	writel_relaxed(ALIGN(bchan->fifo_phys, sizeof(struct bam_desc_hw)),
-			bdev->regs + BAM_P_DESC_FIFO_ADDR(bchan->id));
-	writel_relaxed(BAM_DESC_FIFO_SIZE, bdev->regs +
-			BAM_P_FIFO_SIZES(bchan->id));
+			bam_addr(bdev, bchan->id, BAM_P_DESC_FIFO_ADDR));
+	writel_relaxed(BAM_DESC_FIFO_SIZE,
+			bam_addr(bdev, bchan->id, BAM_P_FIFO_SIZES));
 
 	/* enable the per pipe interrupts, enable EOT, ERR, and INT irqs */
-	writel_relaxed(P_DEFAULT_IRQS_EN, bdev->regs + BAM_P_IRQ_EN(bchan->id));
+	writel_relaxed(P_DEFAULT_IRQS_EN,
+			bam_addr(bdev, bchan->id, BAM_P_IRQ_EN));
 
 	/* unmask the specific pipe and EE combo */
-	val = readl_relaxed(bdev->regs + BAM_IRQ_SRCS_MSK_EE(bdev->ee));
+	val = readl_relaxed(bam_addr(bdev, 0, BAM_IRQ_SRCS_MSK_EE));
 	val |= BIT(bchan->id);
-	writel_relaxed(val, bdev->regs + BAM_IRQ_SRCS_MSK_EE(bdev->ee));
+	writel_relaxed(val, bam_addr(bdev, 0, BAM_IRQ_SRCS_MSK_EE));
 
 	/* don't allow cpu to reorder the channel enable done below */
 	wmb();
@@ -367,7 +449,7 @@
 	if (dir == DMA_DEV_TO_MEM)
 		val |= P_DIRECTION;
 
-	writel_relaxed(val, bdev->regs + BAM_P_CTRL(bchan->id));
+	writel_relaxed(val, bam_addr(bdev, bchan->id, BAM_P_CTRL));
 
 	bchan->initialized = 1;
 
@@ -432,12 +514,12 @@
 	bchan->fifo_virt = NULL;
 
 	/* mask irq for pipe/channel */
-	val = readl_relaxed(bdev->regs + BAM_IRQ_SRCS_MSK_EE(bdev->ee));
+	val = readl_relaxed(bam_addr(bdev, 0, BAM_IRQ_SRCS_MSK_EE));
 	val &= ~BIT(bchan->id);
-	writel_relaxed(val, bdev->regs + BAM_IRQ_SRCS_MSK_EE(bdev->ee));
+	writel_relaxed(val, bam_addr(bdev, 0, BAM_IRQ_SRCS_MSK_EE));
 
 	/* disable irq */
-	writel_relaxed(0, bdev->regs + BAM_P_IRQ_EN(bchan->id));
+	writel_relaxed(0, bam_addr(bdev, bchan->id, BAM_P_IRQ_EN));
 }
 
 /**
@@ -583,14 +665,14 @@
 	switch (cmd) {
 	case DMA_PAUSE:
 		spin_lock_irqsave(&bchan->vc.lock, flag);
-		writel_relaxed(1, bdev->regs + BAM_P_HALT(bchan->id));
+		writel_relaxed(1, bam_addr(bdev, bchan->id, BAM_P_HALT));
 		bchan->paused = 1;
 		spin_unlock_irqrestore(&bchan->vc.lock, flag);
 		break;
 
 	case DMA_RESUME:
 		spin_lock_irqsave(&bchan->vc.lock, flag);
-		writel_relaxed(0, bdev->regs + BAM_P_HALT(bchan->id));
+		writel_relaxed(0, bam_addr(bdev, bchan->id, BAM_P_HALT));
 		bchan->paused = 0;
 		spin_unlock_irqrestore(&bchan->vc.lock, flag);
 		break;
@@ -626,7 +708,7 @@
 	unsigned long flags;
 	struct bam_async_desc *async_desc;
 
-	srcs = readl_relaxed(bdev->regs + BAM_IRQ_SRCS_EE(bdev->ee));
+	srcs = readl_relaxed(bam_addr(bdev, 0, BAM_IRQ_SRCS_EE));
 
 	/* return early if no pipe/channel interrupts are present */
 	if (!(srcs & P_IRQ))
@@ -639,11 +721,9 @@
 			continue;
 
 		/* clear pipe irq */
-		pipe_stts = readl_relaxed(bdev->regs +
-			BAM_P_IRQ_STTS(i));
+		pipe_stts = readl_relaxed(bam_addr(bdev, i, BAM_P_IRQ_STTS));
 
-		writel_relaxed(pipe_stts, bdev->regs +
-				BAM_P_IRQ_CLR(i));
+		writel_relaxed(pipe_stts, bam_addr(bdev, i, BAM_P_IRQ_CLR));
 
 		spin_lock_irqsave(&bchan->vc.lock, flags);
 		async_desc = bchan->curr_txd;
@@ -694,12 +774,12 @@
 		tasklet_schedule(&bdev->task);
 
 	if (srcs & BAM_IRQ)
-		clr_mask = readl_relaxed(bdev->regs + BAM_IRQ_STTS);
+		clr_mask = readl_relaxed(bam_addr(bdev, 0, BAM_IRQ_STTS));
 
 	/* don't allow reorder of the various accesses to the BAM registers */
 	mb();
 
-	writel_relaxed(clr_mask, bdev->regs + BAM_IRQ_CLR);
+	writel_relaxed(clr_mask, bam_addr(bdev, 0, BAM_IRQ_CLR));
 
 	return IRQ_HANDLED;
 }
@@ -763,7 +843,7 @@
 	else
 		maxburst = bchan->slave.dst_maxburst;
 
-	writel_relaxed(maxburst, bdev->regs + BAM_DESC_CNT_TRSHLD);
+	writel_relaxed(maxburst, bam_addr(bdev, 0, BAM_DESC_CNT_TRSHLD));
 
 	bchan->reconfigure = 0;
 }
@@ -830,7 +910,7 @@
 	/* ensure descriptor writes and dma start not reordered */
 	wmb();
 	writel_relaxed(bchan->tail * sizeof(struct bam_desc_hw),
-			bdev->regs + BAM_P_EVNT_REG(bchan->id));
+			bam_addr(bdev, bchan->id, BAM_P_EVNT_REG));
 }
 
 /**
@@ -918,43 +998,44 @@
 	u32 val;
 
 	/* read revision and configuration information */
-	val = readl_relaxed(bdev->regs + BAM_REVISION) >> NUM_EES_SHIFT;
+	val = readl_relaxed(bam_addr(bdev, 0, BAM_REVISION)) >> NUM_EES_SHIFT;
 	val &= NUM_EES_MASK;
 
 	/* check that configured EE is within range */
 	if (bdev->ee >= val)
 		return -EINVAL;
 
-	val = readl_relaxed(bdev->regs + BAM_NUM_PIPES);
+	val = readl_relaxed(bam_addr(bdev, 0, BAM_NUM_PIPES));
 	bdev->num_channels = val & BAM_NUM_PIPES_MASK;
 
 	/* s/w reset bam */
 	/* after reset all pipes are disabled and idle */
-	val = readl_relaxed(bdev->regs + BAM_CTRL);
+	val = readl_relaxed(bam_addr(bdev, 0, BAM_CTRL));
 	val |= BAM_SW_RST;
-	writel_relaxed(val, bdev->regs + BAM_CTRL);
+	writel_relaxed(val, bam_addr(bdev, 0, BAM_CTRL));
 	val &= ~BAM_SW_RST;
-	writel_relaxed(val, bdev->regs + BAM_CTRL);
+	writel_relaxed(val, bam_addr(bdev, 0, BAM_CTRL));
 
 	/* make sure previous stores are visible before enabling BAM */
 	wmb();
 
 	/* enable bam */
 	val |= BAM_EN;
-	writel_relaxed(val, bdev->regs + BAM_CTRL);
+	writel_relaxed(val, bam_addr(bdev, 0, BAM_CTRL));
 
 	/* set descriptor threshhold, start with 4 bytes */
-	writel_relaxed(DEFAULT_CNT_THRSHLD, bdev->regs + BAM_DESC_CNT_TRSHLD);
+	writel_relaxed(DEFAULT_CNT_THRSHLD,
+			bam_addr(bdev, 0, BAM_DESC_CNT_TRSHLD));
 
 	/* Enable default set of h/w workarounds, ie all except BAM_FULL_PIPE */
-	writel_relaxed(BAM_CNFG_BITS_DEFAULT, bdev->regs + BAM_CNFG_BITS);
+	writel_relaxed(BAM_CNFG_BITS_DEFAULT, bam_addr(bdev, 0, BAM_CNFG_BITS));
 
 	/* enable irqs for errors */
 	writel_relaxed(BAM_ERROR_EN | BAM_HRESP_ERR_EN,
-				bdev->regs + BAM_IRQ_EN);
+			bam_addr(bdev, 0, BAM_IRQ_EN));
 
 	/* unmask global bam interrupt */
-	writel_relaxed(BAM_IRQ_MSK, bdev->regs + BAM_IRQ_SRCS_MSK_EE(bdev->ee));
+	writel_relaxed(BAM_IRQ_MSK, bam_addr(bdev, 0, BAM_IRQ_SRCS_MSK_EE));
 
 	return 0;
 }
@@ -969,9 +1050,18 @@
 	bchan->vc.desc_free = bam_dma_free_desc;
 }
 
+static const struct of_device_id bam_of_match[] = {
+	{ .compatible = "qcom,bam-v1.3.0", .data = &bam_v1_3_reg_info },
+	{ .compatible = "qcom,bam-v1.4.0", .data = &bam_v1_4_reg_info },
+	{}
+};
+
+MODULE_DEVICE_TABLE(of, bam_of_match);
+
 static int bam_dma_probe(struct platform_device *pdev)
 {
 	struct bam_device *bdev;
+	const struct of_device_id *match;
 	struct resource *iores;
 	int ret, i;
 
@@ -981,6 +1071,14 @@
 
 	bdev->dev = &pdev->dev;
 
+	match = of_match_node(bam_of_match, pdev->dev.of_node);
+	if (!match) {
+		dev_err(&pdev->dev, "Unsupported BAM module\n");
+		return -ENODEV;
+	}
+
+	bdev->layout = match->data;
+
 	iores = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	bdev->regs = devm_ioremap_resource(&pdev->dev, iores);
 	if (IS_ERR(bdev->regs))
@@ -1084,7 +1182,7 @@
 	dma_async_device_unregister(&bdev->common);
 
 	/* mask all interrupts for this execution environment */
-	writel_relaxed(0, bdev->regs + BAM_IRQ_SRCS_MSK_EE(bdev->ee));
+	writel_relaxed(0, bam_addr(bdev, 0,  BAM_IRQ_SRCS_MSK_EE));
 
 	devm_free_irq(bdev->dev, bdev->irq, bdev);
 
@@ -1104,18 +1202,11 @@
 	return 0;
 }
 
-static const struct of_device_id bam_of_match[] = {
-	{ .compatible = "qcom,bam-v1.4.0", },
-	{}
-};
-MODULE_DEVICE_TABLE(of, bam_of_match);
-
 static struct platform_driver bam_dma_driver = {
 	.probe = bam_dma_probe,
 	.remove = bam_dma_remove,
 	.driver = {
 		.name = "bam-dma-engine",
-		.owner = THIS_MODULE,
 		.of_match_table = bam_of_match,
 	},
 };
diff --git a/drivers/dma/s3c24xx-dma.c b/drivers/dma/s3c24xx-dma.c
index 7416572..6941a77 100644
--- a/drivers/dma/s3c24xx-dma.c
+++ b/drivers/dma/s3c24xx-dma.c
@@ -1402,7 +1402,6 @@
 static struct platform_driver s3c24xx_dma_driver = {
 	.driver		= {
 		.name	= "s3c24xx-dma",
-		.owner	= THIS_MODULE,
 	},
 	.id_table	= s3c24xx_dma_driver_ids,
 	.probe		= s3c24xx_dma_probe,
diff --git a/drivers/dma/sa11x0-dma.c b/drivers/dma/sa11x0-dma.c
index 4b0ef04..2329d29 100644
--- a/drivers/dma/sa11x0-dma.c
+++ b/drivers/dma/sa11x0-dma.c
@@ -829,7 +829,6 @@
 {
 	unsigned i;
 
-	dmadev->chancnt = ARRAY_SIZE(chan_desc);
 	INIT_LIST_HEAD(&dmadev->channels);
 	dmadev->dev = dev;
 	dmadev->device_alloc_chan_resources = sa11x0_dma_alloc_chan_resources;
@@ -838,7 +837,7 @@
 	dmadev->device_tx_status = sa11x0_dma_tx_status;
 	dmadev->device_issue_pending = sa11x0_dma_issue_pending;
 
-	for (i = 0; i < dmadev->chancnt; i++) {
+	for (i = 0; i < ARRAY_SIZE(chan_desc); i++) {
 		struct sa11x0_dma_chan *c;
 
 		c = kzalloc(sizeof(*c), GFP_KERNEL);
diff --git a/drivers/dma/sh/rcar-audmapp.c b/drivers/dma/sh/rcar-audmapp.c
index 80fd2ae..d95bbdd 100644
--- a/drivers/dma/sh/rcar-audmapp.c
+++ b/drivers/dma/sh/rcar-audmapp.c
@@ -253,7 +253,6 @@
 
 static void audmapp_chan_remove(struct audmapp_device *audev)
 {
-	struct dma_device *dma_dev = &audev->shdma_dev.dma_dev;
 	struct shdma_chan *schan;
 	int i;
 
@@ -261,7 +260,6 @@
 		BUG_ON(!schan);
 		shdma_chan_remove(schan);
 	}
-	dma_dev->chancnt = 0;
 }
 
 static struct dma_chan *audmapp_of_xlate(struct of_phandle_args *dma_spec,
@@ -367,7 +365,6 @@
 	.probe		= audmapp_probe,
 	.remove		= audmapp_remove,
 	.driver		= {
-		.owner	= THIS_MODULE,
 		.name	= "rcar-audmapp-engine",
 		.of_match_table = audmapp_of_match,
 	},
diff --git a/drivers/dma/sh/rcar-hpbdma.c b/drivers/dma/sh/rcar-hpbdma.c
index b212d94..20a6f6f 100644
--- a/drivers/dma/sh/rcar-hpbdma.c
+++ b/drivers/dma/sh/rcar-hpbdma.c
@@ -619,7 +619,6 @@
 
 static void hpb_dmae_chan_remove(struct hpb_dmae_device *hpbdev)
 {
-	struct dma_device *dma_dev = &hpbdev->shdma_dev.dma_dev;
 	struct shdma_chan *schan;
 	int i;
 
@@ -628,7 +627,6 @@
 
 		shdma_chan_remove(schan);
 	}
-	dma_dev->chancnt = 0;
 }
 
 static int hpb_dmae_remove(struct platform_device *pdev)
@@ -655,7 +653,6 @@
 	.remove		= hpb_dmae_remove,
 	.shutdown	= hpb_dmae_shutdown,
 	.driver = {
-		.owner	= THIS_MODULE,
 		.name	= "hpb-dma-engine",
 	},
 };
diff --git a/drivers/dma/sh/shdma-base.c b/drivers/dma/sh/shdma-base.c
index 42d4974..3a2adb1 100644
--- a/drivers/dma/sh/shdma-base.c
+++ b/drivers/dma/sh/shdma-base.c
@@ -391,6 +391,8 @@
 				dev_dbg(schan->dev, "Bring down channel %d\n", schan->id);
 				pm_runtime_put(schan->dev);
 				schan->pm_state = SHDMA_PM_ESTABLISHED;
+			} else if (schan->pm_state == SHDMA_PM_PENDING) {
+				shdma_chan_xfer_ld_queue(schan);
 			}
 		}
 	}
@@ -951,7 +953,7 @@
 	/* Add the channel to DMA device channel list */
 	list_add_tail(&schan->dma_chan.device_node,
 			&sdev->dma_dev.channels);
-	sdev->schan[sdev->dma_dev.chancnt++] = schan;
+	sdev->schan[id] = schan;
 }
 EXPORT_SYMBOL(shdma_chan_probe);
 
diff --git a/drivers/dma/sh/shdma-of.c b/drivers/dma/sh/shdma-of.c
index b4ff9d3..f999f9b 100644
--- a/drivers/dma/sh/shdma-of.c
+++ b/drivers/dma/sh/shdma-of.c
@@ -66,7 +66,6 @@
 
 static struct platform_driver shdma_of = {
 	.driver		= {
-		.owner	= THIS_MODULE,
 		.name	= "shdma-of",
 		.of_match_table = shdma_of_match,
 	},
diff --git a/drivers/dma/sh/shdmac.c b/drivers/dma/sh/shdmac.c
index 58eb857..b65317c 100644
--- a/drivers/dma/sh/shdmac.c
+++ b/drivers/dma/sh/shdmac.c
@@ -572,7 +572,6 @@
 
 static void sh_dmae_chan_remove(struct sh_dmae_device *shdev)
 {
-	struct dma_device *dma_dev = &shdev->shdma_dev.dma_dev;
 	struct shdma_chan *schan;
 	int i;
 
@@ -581,7 +580,6 @@
 
 		shdma_chan_remove(schan);
 	}
-	dma_dev->chancnt = 0;
 }
 
 static void sh_dmae_shutdown(struct platform_device *pdev)
diff --git a/drivers/dma/sh/sudmac.c b/drivers/dma/sh/sudmac.c
index 3ce1039..6da2eaa 100644
--- a/drivers/dma/sh/sudmac.c
+++ b/drivers/dma/sh/sudmac.c
@@ -295,7 +295,6 @@
 
 static void sudmac_chan_remove(struct sudmac_device *su_dev)
 {
-	struct dma_device *dma_dev = &su_dev->shdma_dev.dma_dev;
 	struct shdma_chan *schan;
 	int i;
 
@@ -304,7 +303,6 @@
 
 		shdma_chan_remove(schan);
 	}
-	dma_dev->chancnt = 0;
 }
 
 static dma_addr_t sudmac_slave_addr(struct shdma_chan *schan)
@@ -411,7 +409,6 @@
 
 static struct platform_driver sudmac_driver = {
 	.driver		= {
-		.owner	= THIS_MODULE,
 		.name	= SUDMAC_DRV_NAME,
 	},
 	.probe		= sudmac_probe,
diff --git a/drivers/dma/sirf-dma.c b/drivers/dma/sirf-dma.c
index aac03ab..feb1e8a 100644
--- a/drivers/dma/sirf-dma.c
+++ b/drivers/dma/sirf-dma.c
@@ -735,7 +735,6 @@
 
 	dma = &sdma->dma;
 	dma->dev = dev;
-	dma->chancnt = SIRFSOC_DMA_CHANNELS;
 
 	dma->device_alloc_chan_resources = sirfsoc_dma_alloc_chan_resources;
 	dma->device_free_chan_resources = sirfsoc_dma_free_chan_resources;
@@ -752,7 +751,7 @@
 	dma_cap_set(DMA_INTERLEAVE, dma->cap_mask);
 	dma_cap_set(DMA_PRIVATE, dma->cap_mask);
 
-	for (i = 0; i < dma->chancnt; i++) {
+	for (i = 0; i < SIRFSOC_DMA_CHANNELS; i++) {
 		schan = &sdma->channels[i];
 
 		schan->chan.device = dma;
@@ -835,6 +834,7 @@
 	return 0;
 }
 
+#ifdef CONFIG_PM_SLEEP
 static int sirfsoc_dma_pm_suspend(struct device *dev)
 {
 	struct sirfsoc_dma *sdma = dev_get_drvdata(dev);
@@ -916,6 +916,7 @@
 
 	return 0;
 }
+#endif
 
 static const struct dev_pm_ops sirfsoc_dma_pm_ops = {
 	SET_RUNTIME_PM_OPS(sirfsoc_dma_runtime_suspend, sirfsoc_dma_runtime_resume, NULL)
diff --git a/drivers/dma/sun6i-dma.c b/drivers/dma/sun6i-dma.c
index 3aa10b3..f9f8f4d 100644
--- a/drivers/dma/sun6i-dma.c
+++ b/drivers/dma/sun6i-dma.c
@@ -18,6 +18,7 @@
 #include <linux/interrupt.h>
 #include <linux/module.h>
 #include <linux/of_dma.h>
+#include <linux/of_device.h>
 #include <linux/platform_device.h>
 #include <linux/reset.h>
 #include <linux/slab.h>
@@ -26,24 +27,6 @@
 #include "virt-dma.h"
 
 /*
- * There's 16 physical channels that can work in parallel.
- *
- * However we have 30 different endpoints for our requests.
- *
- * Since the channels are able to handle only an unidirectional
- * transfer, we need to allocate more virtual channels so that
- * everyone can grab one channel.
- *
- * Some devices can't work in both direction (mostly because it
- * wouldn't make sense), so we have a bit fewer virtual channels than
- * 2 channels per endpoints.
- */
-
-#define NR_MAX_CHANNELS		16
-#define NR_MAX_REQUESTS		30
-#define NR_MAX_VCHANS		53
-
-/*
  * Common registers
  */
 #define DMA_IRQ_EN(x)		((x) * 0x04)
@@ -60,6 +43,12 @@
 #define DMA_STAT		0x30
 
 /*
+ * sun8i specific registers
+ */
+#define SUN8I_DMA_GATE		0x20
+#define SUN8I_DMA_GATE_ENABLE	0x4
+
+/*
  * Channels specific registers
  */
 #define DMA_CHAN_ENABLE		0x00
@@ -102,6 +91,19 @@
 #define DRQ_SDRAM	1
 
 /*
+ * Hardware channels / ports representation
+ *
+ * The hardware is used in several SoCs, with differing numbers
+ * of channels and endpoints. This structure ties those numbers
+ * to a certain compatible string.
+ */
+struct sun6i_dma_config {
+	u32 nr_max_channels;
+	u32 nr_max_requests;
+	u32 nr_max_vchans;
+};
+
+/*
  * Hardware representation of the LLI
  *
  * The hardware will be fed the physical address of this structure,
@@ -159,6 +161,7 @@
 	struct dma_pool		*pool;
 	struct sun6i_pchan	*pchans;
 	struct sun6i_vchan	*vchans;
+	const struct sun6i_dma_config *cfg;
 };
 
 static struct device *chan2dev(struct dma_chan *chan)
@@ -432,6 +435,7 @@
 static void sun6i_dma_tasklet(unsigned long data)
 {
 	struct sun6i_dma_dev *sdev = (struct sun6i_dma_dev *)data;
+	const struct sun6i_dma_config *cfg = sdev->cfg;
 	struct sun6i_vchan *vchan;
 	struct sun6i_pchan *pchan;
 	unsigned int pchan_alloc = 0;
@@ -459,7 +463,7 @@
 	}
 
 	spin_lock_irq(&sdev->lock);
-	for (pchan_idx = 0; pchan_idx < NR_MAX_CHANNELS; pchan_idx++) {
+	for (pchan_idx = 0; pchan_idx < cfg->nr_max_channels; pchan_idx++) {
 		pchan = &sdev->pchans[pchan_idx];
 
 		if (pchan->vchan || list_empty(&sdev->pending))
@@ -480,7 +484,7 @@
 	}
 	spin_unlock_irq(&sdev->lock);
 
-	for (pchan_idx = 0; pchan_idx < NR_MAX_CHANNELS; pchan_idx++) {
+	for (pchan_idx = 0; pchan_idx < cfg->nr_max_channels; pchan_idx++) {
 		if (!(pchan_alloc & BIT(pchan_idx)))
 			continue;
 
@@ -502,7 +506,7 @@
 	int i, j, ret = IRQ_NONE;
 	u32 status;
 
-	for (i = 0; i < 2; i++) {
+	for (i = 0; i < sdev->cfg->nr_max_channels / DMA_IRQ_CHAN_NR; i++) {
 		status = readl(sdev->base + DMA_IRQ_STAT(i));
 		if (!status)
 			continue;
@@ -512,7 +516,7 @@
 
 		writel(status, sdev->base + DMA_IRQ_STAT(i));
 
-		for (j = 0; (j < 8) && status; j++) {
+		for (j = 0; (j < DMA_IRQ_CHAN_NR) && status; j++) {
 			if (status & DMA_IRQ_QUEUE) {
 				pchan = sdev->pchans + j;
 				vchan = pchan->vchan;
@@ -525,7 +529,7 @@
 				}
 			}
 
-			status = status >> 4;
+			status = status >> DMA_IRQ_CHAN_WIDTH;
 		}
 
 		if (!atomic_read(&sdev->tasklet_shutdown))
@@ -817,7 +821,7 @@
 	struct dma_chan *chan;
 	u8 port = dma_spec->args[0];
 
-	if (port > NR_MAX_REQUESTS)
+	if (port > sdev->cfg->nr_max_requests)
 		return NULL;
 
 	chan = dma_get_any_slave_channel(&sdev->slave);
@@ -850,7 +854,7 @@
 {
 	int i;
 
-	for (i = 0; i < NR_MAX_VCHANS; i++) {
+	for (i = 0; i < sdev->cfg->nr_max_vchans; i++) {
 		struct sun6i_vchan *vchan = &sdev->vchans[i];
 
 		list_del(&vchan->vc.chan.device_node);
@@ -858,8 +862,48 @@
 	}
 }
 
+/*
+ * For A31:
+ *
+ * There's 16 physical channels that can work in parallel.
+ *
+ * However we have 30 different endpoints for our requests.
+ *
+ * Since the channels are able to handle only an unidirectional
+ * transfer, we need to allocate more virtual channels so that
+ * everyone can grab one channel.
+ *
+ * Some devices can't work in both direction (mostly because it
+ * wouldn't make sense), so we have a bit fewer virtual channels than
+ * 2 channels per endpoints.
+ */
+
+static struct sun6i_dma_config sun6i_a31_dma_cfg = {
+	.nr_max_channels = 16,
+	.nr_max_requests = 30,
+	.nr_max_vchans   = 53,
+};
+
+/*
+ * The A23 only has 8 physical channels, a maximum DRQ port id of 24,
+ * and a total of 37 usable source and destination endpoints.
+ */
+
+static struct sun6i_dma_config sun8i_a23_dma_cfg = {
+	.nr_max_channels = 8,
+	.nr_max_requests = 24,
+	.nr_max_vchans   = 37,
+};
+
+static struct of_device_id sun6i_dma_match[] = {
+	{ .compatible = "allwinner,sun6i-a31-dma", .data = &sun6i_a31_dma_cfg },
+	{ .compatible = "allwinner,sun8i-a23-dma", .data = &sun8i_a23_dma_cfg },
+	{ /* sentinel */ }
+};
+
 static int sun6i_dma_probe(struct platform_device *pdev)
 {
+	const struct of_device_id *device;
 	struct sun6i_dma_dev *sdc;
 	struct resource *res;
 	int ret, i;
@@ -868,6 +912,11 @@
 	if (!sdc)
 		return -ENOMEM;
 
+	device = of_match_device(sun6i_dma_match, &pdev->dev);
+	if (!device)
+		return -ENODEV;
+	sdc->cfg = device->data;
+
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	sdc->base = devm_ioremap_resource(&pdev->dev, res);
 	if (IS_ERR(sdc->base))
@@ -914,30 +963,29 @@
 	sdc->slave.device_prep_slave_sg		= sun6i_dma_prep_slave_sg;
 	sdc->slave.device_prep_dma_memcpy	= sun6i_dma_prep_dma_memcpy;
 	sdc->slave.device_control		= sun6i_dma_control;
-	sdc->slave.chancnt			= NR_MAX_VCHANS;
 
 	sdc->slave.dev = &pdev->dev;
 
-	sdc->pchans = devm_kcalloc(&pdev->dev, NR_MAX_CHANNELS,
+	sdc->pchans = devm_kcalloc(&pdev->dev, sdc->cfg->nr_max_channels,
 				   sizeof(struct sun6i_pchan), GFP_KERNEL);
 	if (!sdc->pchans)
 		return -ENOMEM;
 
-	sdc->vchans = devm_kcalloc(&pdev->dev, NR_MAX_VCHANS,
+	sdc->vchans = devm_kcalloc(&pdev->dev, sdc->cfg->nr_max_vchans,
 				   sizeof(struct sun6i_vchan), GFP_KERNEL);
 	if (!sdc->vchans)
 		return -ENOMEM;
 
 	tasklet_init(&sdc->task, sun6i_dma_tasklet, (unsigned long)sdc);
 
-	for (i = 0; i < NR_MAX_CHANNELS; i++) {
+	for (i = 0; i < sdc->cfg->nr_max_channels; i++) {
 		struct sun6i_pchan *pchan = &sdc->pchans[i];
 
 		pchan->idx = i;
 		pchan->base = sdc->base + 0x100 + i * 0x40;
 	}
 
-	for (i = 0; i < NR_MAX_VCHANS; i++) {
+	for (i = 0; i < sdc->cfg->nr_max_vchans; i++) {
 		struct sun6i_vchan *vchan = &sdc->vchans[i];
 
 		INIT_LIST_HEAD(&vchan->node);
@@ -977,6 +1025,15 @@
 		goto err_dma_unregister;
 	}
 
+	/*
+	 * sun8i variant requires us to toggle a dma gating register,
+	 * as seen in Allwinner's SDK. This register is not documented
+	 * in the A23 user manual.
+	 */
+	if (of_device_is_compatible(pdev->dev.of_node,
+				    "allwinner,sun8i-a23-dma"))
+		writel(SUN8I_DMA_GATE_ENABLE, sdc->base + SUN8I_DMA_GATE);
+
 	return 0;
 
 err_dma_unregister:
@@ -1009,11 +1066,6 @@
 	return 0;
 }
 
-static struct of_device_id sun6i_dma_match[] = {
-	{ .compatible = "allwinner,sun6i-a31-dma" },
-	{ /* sentinel */ }
-};
-
 static struct platform_driver sun6i_dma_driver = {
 	.probe		= sun6i_dma_probe,
 	.remove		= sun6i_dma_remove,
diff --git a/drivers/dma/tegra20-apb-dma.c b/drivers/dma/tegra20-apb-dma.c
index 16efa60..a6c0b3f 100644
--- a/drivers/dma/tegra20-apb-dma.c
+++ b/drivers/dma/tegra20-apb-dma.c
@@ -1597,7 +1597,6 @@
 static struct platform_driver tegra_dmac_driver = {
 	.driver = {
 		.name	= "tegra-apbdma",
-		.owner = THIS_MODULE,
 		.pm	= &tegra_dma_dev_pm_ops,
 		.of_match_table = tegra_dma_of_match,
 	},
diff --git a/drivers/dma/timb_dma.c b/drivers/dma/timb_dma.c
index 4506a7b..2407ccf 100644
--- a/drivers/dma/timb_dma.c
+++ b/drivers/dma/timb_dma.c
@@ -783,7 +783,6 @@
 static struct platform_driver td_driver = {
 	.driver = {
 		.name	= DRIVER_NAME,
-		.owner  = THIS_MODULE,
 	},
 	.probe	= td_probe,
 	.remove	= td_remove,
diff --git a/drivers/dma/xilinx/xilinx_vdma.c b/drivers/dma/xilinx/xilinx_vdma.c
index a6e6476..4a3a8f3 100644
--- a/drivers/dma/xilinx/xilinx_vdma.c
+++ b/drivers/dma/xilinx/xilinx_vdma.c
@@ -942,6 +942,9 @@
 	if (!xt->numf || !xt->sgl[0].size)
 		return NULL;
 
+	if (xt->frame_size != 1)
+		return NULL;
+
 	/* Allocate a transaction descriptor. */
 	desc = xilinx_vdma_alloc_tx_descriptor(chan);
 	if (!desc)
@@ -960,7 +963,7 @@
 	hw = &segment->hw;
 	hw->vsize = xt->numf;
 	hw->hsize = xt->sgl[0].size;
-	hw->stride = xt->sgl[0].icg <<
+	hw->stride = (xt->sgl[0].icg + xt->sgl[0].size) <<
 			XILINX_VDMA_FRMDLY_STRIDE_STRIDE_SHIFT;
 	hw->stride |= chan->config.frm_dly <<
 			XILINX_VDMA_FRMDLY_STRIDE_FRMDLY_SHIFT;
@@ -971,9 +974,11 @@
 		hw->buf_addr = xt->src_start;
 
 	/* Link the previous next descriptor to current */
-	prev = list_last_entry(&desc->segments,
-				struct xilinx_vdma_tx_segment, node);
-	prev->hw.next_desc = segment->phys;
+	if (!list_empty(&desc->segments)) {
+		prev = list_last_entry(&desc->segments,
+				       struct xilinx_vdma_tx_segment, node);
+		prev->hw.next_desc = segment->phys;
+	}
 
 	/* Insert the segment into the descriptor segments list. */
 	list_add_tail(&segment->node, &desc->segments);
diff --git a/include/linux/platform_data/dma-imx.h b/include/linux/platform_data/dma-imx.h
index 6a1357d..7d964e7 100644
--- a/include/linux/platform_data/dma-imx.h
+++ b/include/linux/platform_data/dma-imx.h
@@ -41,6 +41,7 @@
 	IMX_DMATYPE_ESAI,	/* ESAI */
 	IMX_DMATYPE_SSI_DUAL,	/* SSI Dual FIFO */
 	IMX_DMATYPE_ASRC_SP,	/* Shared ASRC */
+	IMX_DMATYPE_SAI,	/* SAI */
 };
 
 enum imx_dma_prio {