Merge branch 'next' into v3.1-rc4

Fixed trivial conflicts  in  drivers/dma/amba-pl08x.c

Signed-off-by: Vinod Koul <vinod.koul@intel.com>
diff --git a/drivers/dma/amba-pl08x.c b/drivers/dma/amba-pl08x.c
index be21e3f..3c2cad5 100644
--- a/drivers/dma/amba-pl08x.c
+++ b/drivers/dma/amba-pl08x.c
@@ -66,28 +66,23 @@
  *    after the final transfer signalled by LBREQ or LSREQ.  The DMAC
  *    will then move to the next LLI entry.
  *
- * Only the former works sanely with scatter lists, so we only implement
- * the DMAC flow control method.  However, peripherals which use the LBREQ
- * and LSREQ signals (eg, MMCI) are unable to use this mode, which through
- * these hardware restrictions prevents them from using scatter DMA.
- *
  * Global TODO:
  * - Break out common code from arch/arm/mach-s3c64xx and share
  */
-#include <linux/device.h>
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/interrupt.h>
-#include <linux/slab.h>
-#include <linux/delay.h>
-#include <linux/dma-mapping.h>
-#include <linux/dmapool.h>
-#include <linux/dmaengine.h>
 #include <linux/amba/bus.h>
 #include <linux/amba/pl08x.h>
 #include <linux/debugfs.h>
+#include <linux/delay.h>
+#include <linux/device.h>
+#include <linux/dmaengine.h>
+#include <linux/dmapool.h>
+#include <linux/dma-mapping.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/pm_runtime.h>
 #include <linux/seq_file.h>
-
+#include <linux/slab.h>
 #include <asm/hardware/pl080.h>
 
 #define DRIVER_NAME	"pl08xdmac"
@@ -126,7 +121,8 @@
  * @phy_chans: array of data for the physical channels
  * @pool: a pool for the LLI descriptors
  * @pool_ctr: counter of LLIs in the pool
- * @lli_buses: bitmask to or in to LLI pointer selecting AHB port for LLI fetches
+ * @lli_buses: bitmask to or in to LLI pointer selecting AHB port for LLI
+ * fetches
  * @mem_buses: set to indicate memory transfers on AHB2.
  * @lock: a spinlock for this struct
  */
@@ -149,14 +145,6 @@
  * PL08X specific defines
  */
 
-/*
- * Memory boundaries: the manual for PL08x says that the controller
- * cannot read past a 1KiB boundary, so these defines are used to
- * create transfer LLIs that do not cross such boundaries.
- */
-#define PL08X_BOUNDARY_SHIFT		(10)	/* 1KB 0x400 */
-#define PL08X_BOUNDARY_SIZE		(1 << PL08X_BOUNDARY_SHIFT)
-
 /* Size (bytes) of each LLI buffer allocated for one transfer */
 # define PL08X_LLI_TSFR_SIZE	0x2000
 
@@ -272,7 +260,6 @@
 	writel(val, ch->base + PL080_CH_CONFIG);
 }
 
-
 /*
  * pl08x_terminate_phy_chan() stops the channel, clears the FIFO and
  * clears any pending interrupt status.  This should not be used for
@@ -407,6 +394,7 @@
 		return NULL;
 	}
 
+	pm_runtime_get_sync(&pl08x->adev->dev);
 	return ch;
 }
 
@@ -420,6 +408,8 @@
 	/* Stop the channel and clear its interrupts */
 	pl08x_terminate_phy_chan(pl08x, ch);
 
+	pm_runtime_put(&pl08x->adev->dev);
+
 	/* Mark it as free */
 	ch->serving = NULL;
 	spin_unlock_irqrestore(&ch->lock, flags);
@@ -499,36 +489,30 @@
 };
 
 /*
- * Autoselect a master bus to use for the transfer this prefers the
- * destination bus if both available if fixed address on one bus the
- * other will be chosen
+ * Autoselect a master bus to use for the transfer. Slave will be the chosen as
+ * victim in case src & dest are not similarly aligned. i.e. If after aligning
+ * masters address with width requirements of transfer (by sending few byte by
+ * byte data), slave is still not aligned, then its width will be reduced to
+ * BYTE.
+ * - prefers the destination bus if both available
+ * - prefers bus with fixed address (i.e. peripheral)
  */
 static void pl08x_choose_master_bus(struct pl08x_lli_build_data *bd,
 	struct pl08x_bus_data **mbus, struct pl08x_bus_data **sbus, u32 cctl)
 {
 	if (!(cctl & PL080_CONTROL_DST_INCR)) {
-		*mbus = &bd->srcbus;
-		*sbus = &bd->dstbus;
-	} else if (!(cctl & PL080_CONTROL_SRC_INCR)) {
 		*mbus = &bd->dstbus;
 		*sbus = &bd->srcbus;
+	} else if (!(cctl & PL080_CONTROL_SRC_INCR)) {
+		*mbus = &bd->srcbus;
+		*sbus = &bd->dstbus;
 	} else {
-		if (bd->dstbus.buswidth == 4) {
+		if (bd->dstbus.buswidth >= bd->srcbus.buswidth) {
 			*mbus = &bd->dstbus;
 			*sbus = &bd->srcbus;
-		} else if (bd->srcbus.buswidth == 4) {
-			*mbus = &bd->srcbus;
-			*sbus = &bd->dstbus;
-		} else if (bd->dstbus.buswidth == 2) {
-			*mbus = &bd->dstbus;
-			*sbus = &bd->srcbus;
-		} else if (bd->srcbus.buswidth == 2) {
-			*mbus = &bd->srcbus;
-			*sbus = &bd->dstbus;
 		} else {
-			/* bd->srcbus.buswidth == 1 */
-			*mbus = &bd->dstbus;
-			*sbus = &bd->srcbus;
+			*mbus = &bd->srcbus;
+			*sbus = &bd->dstbus;
 		}
 	}
 }
@@ -547,7 +531,8 @@
 	llis_va[num_llis].cctl = cctl;
 	llis_va[num_llis].src = bd->srcbus.addr;
 	llis_va[num_llis].dst = bd->dstbus.addr;
-	llis_va[num_llis].lli = llis_bus + (num_llis + 1) * sizeof(struct pl08x_lli);
+	llis_va[num_llis].lli = llis_bus + (num_llis + 1) *
+		sizeof(struct pl08x_lli);
 	llis_va[num_llis].lli |= bd->lli_bus;
 
 	if (cctl & PL080_CONTROL_SRC_INCR)
@@ -560,16 +545,12 @@
 	bd->remainder -= len;
 }
 
-/*
- * Return number of bytes to fill to boundary, or len.
- * This calculation works for any value of addr.
- */
-static inline size_t pl08x_pre_boundary(u32 addr, size_t len)
+static inline void prep_byte_width_lli(struct pl08x_lli_build_data *bd,
+		u32 *cctl, u32 len, int num_llis, size_t *total_bytes)
 {
-	size_t boundary_len = PL08X_BOUNDARY_SIZE -
-			(addr & (PL08X_BOUNDARY_SIZE - 1));
-
-	return min(boundary_len, len);
+	*cctl = pl08x_cctl_bits(*cctl, 1, 1, len);
+	pl08x_fill_lli_for_desc(bd, num_llis, len, *cctl);
+	(*total_bytes) += len;
 }
 
 /*
@@ -583,13 +564,11 @@
 	struct pl08x_bus_data *mbus, *sbus;
 	struct pl08x_lli_build_data bd;
 	int num_llis = 0;
-	u32 cctl;
-	size_t max_bytes_per_lli;
-	size_t total_bytes = 0;
+	u32 cctl, early_bytes = 0;
+	size_t max_bytes_per_lli, total_bytes = 0;
 	struct pl08x_lli *llis_va;
 
-	txd->llis_va = dma_pool_alloc(pl08x->pool, GFP_NOWAIT,
-				      &txd->llis_bus);
+	txd->llis_va = dma_pool_alloc(pl08x->pool, GFP_NOWAIT, &txd->llis_bus);
 	if (!txd->llis_va) {
 		dev_err(&pl08x->adev->dev, "%s no memory for llis\n", __func__);
 		return 0;
@@ -619,55 +598,85 @@
 	bd.srcbus.buswidth = bd.srcbus.maxwidth;
 	bd.dstbus.buswidth = bd.dstbus.maxwidth;
 
-	/*
-	 * Bytes transferred == tsize * MIN(buswidths), not max(buswidths)
-	 */
-	max_bytes_per_lli = min(bd.srcbus.buswidth, bd.dstbus.buswidth) *
-		PL080_CONTROL_TRANSFER_SIZE_MASK;
-
 	/* We need to count this down to zero */
 	bd.remainder = txd->len;
 
-	/*
-	 * Choose bus to align to
-	 * - prefers destination bus if both available
-	 * - if fixed address on one bus chooses other
-	 */
 	pl08x_choose_master_bus(&bd, &mbus, &sbus, cctl);
 
-	dev_vdbg(&pl08x->adev->dev, "src=0x%08x%s/%u dst=0x%08x%s/%u len=%zu llimax=%zu\n",
+	dev_vdbg(&pl08x->adev->dev, "src=0x%08x%s/%u dst=0x%08x%s/%u len=%zu\n",
 		 bd.srcbus.addr, cctl & PL080_CONTROL_SRC_INCR ? "+" : "",
 		 bd.srcbus.buswidth,
 		 bd.dstbus.addr, cctl & PL080_CONTROL_DST_INCR ? "+" : "",
 		 bd.dstbus.buswidth,
-		 bd.remainder, max_bytes_per_lli);
+		 bd.remainder);
 	dev_vdbg(&pl08x->adev->dev, "mbus=%s sbus=%s\n",
 		 mbus == &bd.srcbus ? "src" : "dst",
 		 sbus == &bd.srcbus ? "src" : "dst");
 
-	if (txd->len < mbus->buswidth) {
-		/* Less than a bus width available - send as single bytes */
-		while (bd.remainder) {
-			dev_vdbg(&pl08x->adev->dev,
-				 "%s single byte LLIs for a transfer of "
-				 "less than a bus width (remain 0x%08x)\n",
-				 __func__, bd.remainder);
-			cctl = pl08x_cctl_bits(cctl, 1, 1, 1);
-			pl08x_fill_lli_for_desc(&bd, num_llis++, 1, cctl);
-			total_bytes++;
-		}
-	} else {
-		/* Make one byte LLIs until master bus is aligned */
-		while ((mbus->addr) % (mbus->buswidth)) {
-			dev_vdbg(&pl08x->adev->dev,
-				"%s adjustment lli for less than bus width "
-				 "(remain 0x%08x)\n",
-				 __func__, bd.remainder);
-			cctl = pl08x_cctl_bits(cctl, 1, 1, 1);
-			pl08x_fill_lli_for_desc(&bd, num_llis++, 1, cctl);
-			total_bytes++;
+	/*
+	 * Zero length is only allowed if all these requirements are met:
+	 * - flow controller is peripheral.
+	 * - src.addr is aligned to src.width
+	 * - dst.addr is aligned to dst.width
+	 *
+	 * sg_len == 1 should be true, as there can be two cases here:
+	 * - Memory addresses are contiguous and are not scattered. Here, Only
+	 * one sg will be passed by user driver, with memory address and zero
+	 * length. We pass this to controller and after the transfer it will
+	 * receive the last burst request from peripheral and so transfer
+	 * finishes.
+	 *
+	 * - Memory addresses are scattered and are not contiguous. Here,
+	 * Obviously as DMA controller doesn't know when a lli's transfer gets
+	 * over, it can't load next lli. So in this case, there has to be an
+	 * assumption that only one lli is supported. Thus, we can't have
+	 * scattered addresses.
+	 */
+	if (!bd.remainder) {
+		u32 fc = (txd->ccfg & PL080_CONFIG_FLOW_CONTROL_MASK) >>
+			PL080_CONFIG_FLOW_CONTROL_SHIFT;
+		if (!((fc >= PL080_FLOW_SRC2DST_DST) &&
+					(fc <= PL080_FLOW_SRC2DST_SRC))) {
+			dev_err(&pl08x->adev->dev, "%s sg len can't be zero",
+				__func__);
+			return 0;
 		}
 
+		if ((bd.srcbus.addr % bd.srcbus.buswidth) ||
+				(bd.srcbus.addr % bd.srcbus.buswidth)) {
+			dev_err(&pl08x->adev->dev,
+				"%s src & dst address must be aligned to src"
+				" & dst width if peripheral is flow controller",
+				__func__);
+			return 0;
+		}
+
+		cctl = pl08x_cctl_bits(cctl, bd.srcbus.buswidth,
+				bd.dstbus.buswidth, 0);
+		pl08x_fill_lli_for_desc(&bd, num_llis++, 0, cctl);
+	}
+
+	/*
+	 * Send byte by byte for following cases
+	 * - Less than a bus width available
+	 * - until master bus is aligned
+	 */
+	if (bd.remainder < mbus->buswidth)
+		early_bytes = bd.remainder;
+	else if ((mbus->addr) % (mbus->buswidth)) {
+		early_bytes = mbus->buswidth - (mbus->addr) % (mbus->buswidth);
+		if ((bd.remainder - early_bytes) < mbus->buswidth)
+			early_bytes = bd.remainder;
+	}
+
+	if (early_bytes) {
+		dev_vdbg(&pl08x->adev->dev, "%s byte width LLIs "
+				"(remain 0x%08x)\n", __func__, bd.remainder);
+		prep_byte_width_lli(&bd, &cctl, early_bytes, num_llis++,
+				&total_bytes);
+	}
+
+	if (bd.remainder) {
 		/*
 		 * Master now aligned
 		 * - if slave is not then we must set its width down
@@ -680,138 +689,55 @@
 			sbus->buswidth = 1;
 		}
 
+		/* Bytes transferred = tsize * src width, not MIN(buswidths) */
+		max_bytes_per_lli = bd.srcbus.buswidth *
+			PL080_CONTROL_TRANSFER_SIZE_MASK;
+
 		/*
 		 * Make largest possible LLIs until less than one bus
 		 * width left
 		 */
 		while (bd.remainder > (mbus->buswidth - 1)) {
-			size_t lli_len, target_len, tsize, odd_bytes;
+			size_t lli_len, tsize, width;
 
 			/*
 			 * If enough left try to send max possible,
 			 * otherwise try to send the remainder
 			 */
-			target_len = min(bd.remainder, max_bytes_per_lli);
+			lli_len = min(bd.remainder, max_bytes_per_lli);
 
 			/*
-			 * Set bus lengths for incrementing buses to the
-			 * number of bytes which fill to next memory boundary,
-			 * limiting on the target length calculated above.
+			 * Check against maximum bus alignment: Calculate actual
+			 * transfer size in relation to bus width and get a
+			 * maximum remainder of the highest bus width - 1
 			 */
-			if (cctl & PL080_CONTROL_SRC_INCR)
-				bd.srcbus.fill_bytes =
-					pl08x_pre_boundary(bd.srcbus.addr,
-						target_len);
-			else
-				bd.srcbus.fill_bytes = target_len;
+			width = max(mbus->buswidth, sbus->buswidth);
+			lli_len = (lli_len / width) * width;
+			tsize = lli_len / bd.srcbus.buswidth;
 
-			if (cctl & PL080_CONTROL_DST_INCR)
-				bd.dstbus.fill_bytes =
-					pl08x_pre_boundary(bd.dstbus.addr,
-						target_len);
-			else
-				bd.dstbus.fill_bytes = target_len;
+			dev_vdbg(&pl08x->adev->dev,
+				"%s fill lli with single lli chunk of "
+				"size 0x%08zx (remainder 0x%08zx)\n",
+				__func__, lli_len, bd.remainder);
 
-			/* Find the nearest */
-			lli_len	= min(bd.srcbus.fill_bytes,
-				      bd.dstbus.fill_bytes);
-
-			BUG_ON(lli_len > bd.remainder);
-
-			if (lli_len <= 0) {
-				dev_err(&pl08x->adev->dev,
-					"%s lli_len is %zu, <= 0\n",
-						__func__, lli_len);
-				return 0;
-			}
-
-			if (lli_len == target_len) {
-				/*
-				 * Can send what we wanted.
-				 * Maintain alignment
-				 */
-				lli_len	= (lli_len/mbus->buswidth) *
-							mbus->buswidth;
-				odd_bytes = 0;
-			} else {
-				/*
-				 * So now we know how many bytes to transfer
-				 * to get to the nearest boundary.  The next
-				 * LLI will past the boundary.  However, we
-				 * may be working to a boundary on the slave
-				 * bus.  We need to ensure the master stays
-				 * aligned, and that we are working in
-				 * multiples of the bus widths.
-				 */
-				odd_bytes = lli_len % mbus->buswidth;
-				lli_len -= odd_bytes;
-
-			}
-
-			if (lli_len) {
-				/*
-				 * Check against minimum bus alignment:
-				 * Calculate actual transfer size in relation
-				 * to bus width an get a maximum remainder of
-				 * the smallest bus width - 1
-				 */
-				/* FIXME: use round_down()? */
-				tsize = lli_len / min(mbus->buswidth,
-						      sbus->buswidth);
-				lli_len	= tsize * min(mbus->buswidth,
-						      sbus->buswidth);
-
-				if (target_len != lli_len) {
-					dev_vdbg(&pl08x->adev->dev,
-					"%s can't send what we want. Desired 0x%08zx, lli of 0x%08zx bytes in txd of 0x%08zx\n",
-					__func__, target_len, lli_len, txd->len);
-				}
-
-				cctl = pl08x_cctl_bits(cctl,
-						       bd.srcbus.buswidth,
-						       bd.dstbus.buswidth,
-						       tsize);
-
-				dev_vdbg(&pl08x->adev->dev,
-					"%s fill lli with single lli chunk of size 0x%08zx (remainder 0x%08zx)\n",
-					__func__, lli_len, bd.remainder);
-				pl08x_fill_lli_for_desc(&bd, num_llis++,
-					lli_len, cctl);
-				total_bytes += lli_len;
-			}
-
-
-			if (odd_bytes) {
-				/*
-				 * Creep past the boundary, maintaining
-				 * master alignment
-				 */
-				int j;
-				for (j = 0; (j < mbus->buswidth)
-						&& (bd.remainder); j++) {
-					cctl = pl08x_cctl_bits(cctl, 1, 1, 1);
-					dev_vdbg(&pl08x->adev->dev,
-						"%s align with boundary, single byte (remain 0x%08zx)\n",
-						__func__, bd.remainder);
-					pl08x_fill_lli_for_desc(&bd,
-						num_llis++, 1, cctl);
-					total_bytes++;
-				}
-			}
+			cctl = pl08x_cctl_bits(cctl, bd.srcbus.buswidth,
+					bd.dstbus.buswidth, tsize);
+			pl08x_fill_lli_for_desc(&bd, num_llis++, lli_len, cctl);
+			total_bytes += lli_len;
 		}
 
 		/*
 		 * Send any odd bytes
 		 */
-		while (bd.remainder) {
-			cctl = pl08x_cctl_bits(cctl, 1, 1, 1);
+		if (bd.remainder) {
 			dev_vdbg(&pl08x->adev->dev,
-				"%s align with boundary, single odd byte (remain %zu)\n",
+				"%s align with boundary, send odd bytes (remain %zu)\n",
 				__func__, bd.remainder);
-			pl08x_fill_lli_for_desc(&bd, num_llis++, 1, cctl);
-			total_bytes++;
+			prep_byte_width_lli(&bd, &cctl, bd.remainder,
+					num_llis++, &total_bytes);
 		}
 	}
+
 	if (total_bytes != txd->len) {
 		dev_err(&pl08x->adev->dev,
 			"%s size of encoded lli:s don't match total txd, transferred 0x%08zx from size 0x%08zx\n",
@@ -917,9 +843,7 @@
 	 * need, but for slaves the physical signals may be muxed!
 	 * Can the platform allow us to use this channel?
 	 */
-	if (plchan->slave &&
-	    ch->signal < 0 &&
-	    pl08x->pd->get_signal) {
+	if (plchan->slave && pl08x->pd->get_signal) {
 		ret = pl08x->pd->get_signal(plchan);
 		if (ret < 0) {
 			dev_dbg(&pl08x->adev->dev,
@@ -1008,10 +932,8 @@
  * If slaves are relying on interrupts to signal completion this function
  * must not be called with interrupts disabled.
  */
-static enum dma_status
-pl08x_dma_tx_status(struct dma_chan *chan,
-		    dma_cookie_t cookie,
-		    struct dma_tx_state *txstate)
+static enum dma_status pl08x_dma_tx_status(struct dma_chan *chan,
+		dma_cookie_t cookie, struct dma_tx_state *txstate)
 {
 	struct pl08x_dma_chan *plchan = to_pl08x_chan(chan);
 	dma_cookie_t last_used;
@@ -1253,7 +1175,9 @@
 
 	num_llis = pl08x_fill_llis_for_desc(pl08x, txd);
 	if (!num_llis) {
-		kfree(txd);
+		spin_lock_irqsave(&plchan->lock, flags);
+		pl08x_free_txd(pl08x, txd);
+		spin_unlock_irqrestore(&plchan->lock, flags);
 		return -EINVAL;
 	}
 
@@ -1301,7 +1225,7 @@
 static struct pl08x_txd *pl08x_get_txd(struct pl08x_dma_chan *plchan,
 	unsigned long flags)
 {
-	struct pl08x_txd *txd = kzalloc(sizeof(struct pl08x_txd), GFP_NOWAIT);
+	struct pl08x_txd *txd = kzalloc(sizeof(*txd), GFP_NOWAIT);
 
 	if (txd) {
 		dma_async_tx_descriptor_init(&txd->tx, &plchan->chan);
@@ -1367,7 +1291,7 @@
 	struct pl08x_dma_chan *plchan = to_pl08x_chan(chan);
 	struct pl08x_driver_data *pl08x = plchan->host;
 	struct pl08x_txd *txd;
-	int ret;
+	int ret, tmp;
 
 	/*
 	 * Current implementation ASSUMES only one sg
@@ -1401,12 +1325,10 @@
 	txd->len = sgl->length;
 
 	if (direction == DMA_TO_DEVICE) {
-		txd->ccfg |= PL080_FLOW_MEM2PER << PL080_CONFIG_FLOW_CONTROL_SHIFT;
 		txd->cctl = plchan->dst_cctl;
 		txd->src_addr = sgl->dma_address;
 		txd->dst_addr = plchan->dst_addr;
 	} else if (direction == DMA_FROM_DEVICE) {
-		txd->ccfg |= PL080_FLOW_PER2MEM << PL080_CONFIG_FLOW_CONTROL_SHIFT;
 		txd->cctl = plchan->src_cctl;
 		txd->src_addr = plchan->src_addr;
 		txd->dst_addr = sgl->dma_address;
@@ -1416,6 +1338,15 @@
 		return NULL;
 	}
 
+	if (plchan->cd->device_fc)
+		tmp = (direction == DMA_TO_DEVICE) ? PL080_FLOW_MEM2PER_PER :
+			PL080_FLOW_PER2MEM_PER;
+	else
+		tmp = (direction == DMA_TO_DEVICE) ? PL080_FLOW_MEM2PER :
+			PL080_FLOW_PER2MEM;
+
+	txd->ccfg |= tmp << PL080_CONFIG_FLOW_CONTROL_SHIFT;
+
 	ret = pl08x_prep_channel_resources(plchan, txd);
 	if (ret)
 		return NULL;
@@ -1507,13 +1438,7 @@
  */
 static void pl08x_ensure_on(struct pl08x_driver_data *pl08x)
 {
-	u32 val;
-
-	val = readl(pl08x->base + PL080_CONFIG);
-	val &= ~(PL080_CONFIG_M2_BE | PL080_CONFIG_M1_BE | PL080_CONFIG_ENABLE);
-	/* We implicitly clear bit 1 and that means little-endian mode */
-	val |= PL080_CONFIG_ENABLE;
-	writel(val, pl08x->base + PL080_CONFIG);
+	writel(PL080_CONFIG_ENABLE, pl08x->base + PL080_CONFIG);
 }
 
 static void pl08x_unmap_buffers(struct pl08x_txd *txd)
@@ -1589,8 +1514,8 @@
 		 */
 		list_for_each_entry(waiting, &pl08x->memcpy.channels,
 				    chan.device_node) {
-		  if (waiting->state == PL08X_CHAN_WAITING &&
-			    waiting->waiting != NULL) {
+			if (waiting->state == PL08X_CHAN_WAITING &&
+				waiting->waiting != NULL) {
 				int ret;
 
 				/* This should REALLY not fail now */
@@ -1630,38 +1555,40 @@
 static irqreturn_t pl08x_irq(int irq, void *dev)
 {
 	struct pl08x_driver_data *pl08x = dev;
-	u32 mask = 0;
-	u32 val;
-	int i;
+	u32 mask = 0, err, tc, i;
 
-	val = readl(pl08x->base + PL080_ERR_STATUS);
-	if (val) {
-		/* An error interrupt (on one or more channels) */
-		dev_err(&pl08x->adev->dev,
-			"%s error interrupt, register value 0x%08x\n",
-				__func__, val);
-		/*
-		 * Simply clear ALL PL08X error interrupts,
-		 * regardless of channel and cause
-		 * FIXME: should be 0x00000003 on PL081 really.
-		 */
-		writel(0x000000FF, pl08x->base + PL080_ERR_CLEAR);
+	/* check & clear - ERR & TC interrupts */
+	err = readl(pl08x->base + PL080_ERR_STATUS);
+	if (err) {
+		dev_err(&pl08x->adev->dev, "%s error interrupt, register value 0x%08x\n",
+			__func__, err);
+		writel(err, pl08x->base + PL080_ERR_CLEAR);
 	}
-	val = readl(pl08x->base + PL080_INT_STATUS);
+	tc = readl(pl08x->base + PL080_INT_STATUS);
+	if (tc)
+		writel(tc, pl08x->base + PL080_TC_CLEAR);
+
+	if (!err && !tc)
+		return IRQ_NONE;
+
 	for (i = 0; i < pl08x->vd->channels; i++) {
-		if ((1 << i) & val) {
+		if (((1 << i) & err) || ((1 << i) & tc)) {
 			/* Locate physical channel */
 			struct pl08x_phy_chan *phychan = &pl08x->phy_chans[i];
 			struct pl08x_dma_chan *plchan = phychan->serving;
 
+			if (!plchan) {
+				dev_err(&pl08x->adev->dev,
+					"%s Error TC interrupt on unused channel: 0x%08x\n",
+					__func__, i);
+				continue;
+			}
+
 			/* Schedule tasklet on this channel */
 			tasklet_schedule(&plchan->tasklet);
-
 			mask |= (1 << i);
 		}
 	}
-	/* Clear only the terminal interrupts on channels we processed */
-	writel(mask, pl08x->base + PL080_TC_CLEAR);
 
 	return mask ? IRQ_HANDLED : IRQ_NONE;
 }
@@ -1685,9 +1612,7 @@
  * Make a local wrapper to hold required data
  */
 static int pl08x_dma_init_virtual_channels(struct pl08x_driver_data *pl08x,
-					   struct dma_device *dmadev,
-					   unsigned int channels,
-					   bool slave)
+		struct dma_device *dmadev, unsigned int channels, bool slave)
 {
 	struct pl08x_dma_chan *chan;
 	int i;
@@ -1700,7 +1625,7 @@
 	 * to cope with that situation.
 	 */
 	for (i = 0; i < channels; i++) {
-		chan = kzalloc(sizeof(struct pl08x_dma_chan), GFP_KERNEL);
+		chan = kzalloc(sizeof(*chan), GFP_KERNEL);
 		if (!chan) {
 			dev_err(&pl08x->adev->dev,
 				"%s no memory for channel\n", __func__);
@@ -1728,7 +1653,7 @@
 			kfree(chan);
 			continue;
 		}
-		dev_info(&pl08x->adev->dev,
+		dev_dbg(&pl08x->adev->dev,
 			 "initialize virtual channel \"%s\"\n",
 			 chan->name);
 
@@ -1837,9 +1762,9 @@
 static void init_pl08x_debugfs(struct pl08x_driver_data *pl08x)
 {
 	/* Expose a simple debugfs interface to view all clocks */
-	(void) debugfs_create_file(dev_name(&pl08x->adev->dev), S_IFREG | S_IRUGO,
-				   NULL, pl08x,
-				   &pl08x_debugfs_operations);
+	(void) debugfs_create_file(dev_name(&pl08x->adev->dev),
+			S_IFREG | S_IRUGO, NULL, pl08x,
+			&pl08x_debugfs_operations);
 }
 
 #else
@@ -1860,12 +1785,15 @@
 		return ret;
 
 	/* Create the driver state holder */
-	pl08x = kzalloc(sizeof(struct pl08x_driver_data), GFP_KERNEL);
+	pl08x = kzalloc(sizeof(*pl08x), GFP_KERNEL);
 	if (!pl08x) {
 		ret = -ENOMEM;
 		goto out_no_pl08x;
 	}
 
+	pm_runtime_set_active(&adev->dev);
+	pm_runtime_enable(&adev->dev);
+
 	/* Initialize memcpy engine */
 	dma_cap_set(DMA_MEMCPY, pl08x->memcpy.cap_mask);
 	pl08x->memcpy.dev = &adev->dev;
@@ -1939,7 +1867,7 @@
 	}
 
 	/* Initialize physical channels */
-	pl08x->phy_chans = kmalloc((vd->channels * sizeof(struct pl08x_phy_chan)),
+	pl08x->phy_chans = kmalloc((vd->channels * sizeof(*pl08x->phy_chans)),
 			GFP_KERNEL);
 	if (!pl08x->phy_chans) {
 		dev_err(&adev->dev, "%s failed to allocate "
@@ -1956,9 +1884,8 @@
 		spin_lock_init(&ch->lock);
 		ch->serving = NULL;
 		ch->signal = -1;
-		dev_info(&adev->dev,
-			 "physical channel %d is %s\n", i,
-			 pl08x_phy_channel_busy(ch) ? "BUSY" : "FREE");
+		dev_dbg(&adev->dev, "physical channel %d is %s\n",
+			i, pl08x_phy_channel_busy(ch) ? "BUSY" : "FREE");
 	}
 
 	/* Register as many memcpy channels as there are physical channels */
@@ -1974,8 +1901,7 @@
 
 	/* Register slave channels */
 	ret = pl08x_dma_init_virtual_channels(pl08x, &pl08x->slave,
-					      pl08x->pd->num_slave_channels,
-					      true);
+			pl08x->pd->num_slave_channels, true);
 	if (ret <= 0) {
 		dev_warn(&pl08x->adev->dev,
 			"%s failed to enumerate slave channels - %d\n",
@@ -2005,6 +1931,8 @@
 	dev_info(&pl08x->adev->dev, "DMA: PL%03x rev%u at 0x%08llx irq %d\n",
 		 amba_part(adev), amba_rev(adev),
 		 (unsigned long long)adev->res.start, adev->irq[0]);
+
+	pm_runtime_put(&adev->dev);
 	return 0;
 
 out_no_slave_reg:
@@ -2023,6 +1951,9 @@
 	dma_pool_destroy(pl08x->pool);
 out_no_lli_pool:
 out_no_platdata:
+	pm_runtime_put(&adev->dev);
+	pm_runtime_disable(&adev->dev);
+
 	kfree(pl08x);
 out_no_pl08x:
 	amba_release_regions(adev);