dmaengine: i.MX: Add support for interleaved transfers.

i.MX2 and i.MX1 chips have the possibility to do
interleaved transfers with two constraints:
- Only one chunk can be used (i.e. only 2D transfers are allowed).
- Only 2 interleaved configurations can be applied at the same time
for all channels.

Since this patch adds a new resource 'slots_2d' which is shared
by all the DMA channels and to avoid disgustin locking BUGs, the
'lock' member has been moved to the global 'imxdma_engine' structure.

Signed-off-by: Javier Martin <javier.martin@vista-silicon.com>
Acked-by: Sascha Hauer <s.hauer@pengutronix.de>
Signed-off-by: Vinod Koul <vinod.koul@linux.intel.com>
diff --git a/drivers/dma/imx-dma.c b/drivers/dma/imx-dma.c
index 307cd14..0f698f8 100644
--- a/drivers/dma/imx-dma.c
+++ b/drivers/dma/imx-dma.c
@@ -36,6 +36,10 @@
 #define IMXDMA_MAX_CHAN_DESCRIPTORS	16
 #define IMX_DMA_CHANNELS  16
 
+#define IMX_DMA_2D_SLOTS	2
+#define IMX_DMA_2D_SLOT_A	0
+#define IMX_DMA_2D_SLOT_B	1
+
 #define IMX_DMA_LENGTH_LOOP	((unsigned int)-1)
 #define IMX_DMA_MEMSIZE_32	(0 << 4)
 #define IMX_DMA_MEMSIZE_8	(1 << 4)
@@ -111,6 +115,13 @@
 	IMXDMA_DESC_CYCLIC,
 };
 
+struct imx_dma_2d_config {
+	u16		xsr;
+	u16		ysr;
+	u16		wsr;
+	int		count;
+};
+
 struct imxdma_desc {
 	struct list_head		node;
 	struct dma_async_tx_descriptor	desc;
@@ -147,13 +158,14 @@
 	dma_addr_t			per_address;
 	u32				watermark_level;
 	struct dma_chan			chan;
-	spinlock_t			lock;
 	struct dma_async_tx_descriptor	desc;
 	enum dma_status			status;
 	int				dma_request;
 	struct scatterlist		*sg_list;
 	u32				ccr_from_device;
 	u32				ccr_to_device;
+	bool				enabled_2d;
+	int				slot_2d;
 };
 
 struct imxdma_engine {
@@ -162,6 +174,8 @@
 	struct dma_device		dma_device;
 	void __iomem			*base;
 	struct clk			*dma_clk;
+	spinlock_t			lock;
+	struct imx_dma_2d_config	slots_2d[IMX_DMA_2D_SLOTS];
 	struct imxdma_channel		channel[IMX_DMA_CHANNELS];
 };
 
@@ -361,16 +375,16 @@
 	int chno = imxdmac->channel;
 	struct imxdma_desc *desc;
 
-	spin_lock(&imxdmac->lock);
+	spin_lock(&imxdma->lock);
 	if (list_empty(&imxdmac->ld_active)) {
-		spin_unlock(&imxdmac->lock);
+		spin_unlock(&imxdma->lock);
 		goto out;
 	}
 
 	desc = list_first_entry(&imxdmac->ld_active,
 				struct imxdma_desc,
 				node);
-	spin_unlock(&imxdmac->lock);
+	spin_unlock(&imxdma->lock);
 
 	if (desc->sg) {
 		u32 tmp;
@@ -442,9 +456,53 @@
 {
 	struct imxdma_channel *imxdmac = to_imxdma_chan(d->desc.chan);
 	struct imxdma_engine *imxdma = imxdmac->imxdma;
+	unsigned long flags;
+	int slot = -1;
+	int i;
 
 	/* Configure and enable */
 	switch (d->type) {
+	case IMXDMA_DESC_INTERLEAVED:
+		/* Try to get a free 2D slot */
+		spin_lock_irqsave(&imxdma->lock, flags);
+		for (i = 0; i < IMX_DMA_2D_SLOTS; i++) {
+			if ((imxdma->slots_2d[i].count > 0) &&
+			((imxdma->slots_2d[i].xsr != d->x) ||
+			(imxdma->slots_2d[i].ysr != d->y) ||
+			(imxdma->slots_2d[i].wsr != d->w)))
+				continue;
+			slot = i;
+			break;
+		}
+		if (slot < 0)
+			return -EBUSY;
+
+		imxdma->slots_2d[slot].xsr = d->x;
+		imxdma->slots_2d[slot].ysr = d->y;
+		imxdma->slots_2d[slot].wsr = d->w;
+		imxdma->slots_2d[slot].count++;
+
+		imxdmac->slot_2d = slot;
+		imxdmac->enabled_2d = true;
+		spin_unlock_irqrestore(&imxdma->lock, flags);
+
+		if (slot == IMX_DMA_2D_SLOT_A) {
+			d->config_mem &= ~CCR_MSEL_B;
+			d->config_port &= ~CCR_MSEL_B;
+			imx_dmav1_writel(imxdma, d->x, DMA_XSRA);
+			imx_dmav1_writel(imxdma, d->y, DMA_YSRA);
+			imx_dmav1_writel(imxdma, d->w, DMA_WSRA);
+		} else {
+			d->config_mem |= CCR_MSEL_B;
+			d->config_port |= CCR_MSEL_B;
+			imx_dmav1_writel(imxdma, d->x, DMA_XSRB);
+			imx_dmav1_writel(imxdma, d->y, DMA_YSRB);
+			imx_dmav1_writel(imxdma, d->w, DMA_WSRB);
+		}
+		/*
+		 * We fall-through here intentionally, since a 2D transfer is
+		 * similar to MEMCPY just adding the 2D slot configuration.
+		 */
 	case IMXDMA_DESC_MEMCPY:
 		imx_dmav1_writel(imxdma, d->src, DMA_SAR(imxdmac->channel));
 		imx_dmav1_writel(imxdma, d->dest, DMA_DAR(imxdmac->channel));
@@ -503,7 +561,7 @@
 	struct imxdma_engine *imxdma = imxdmac->imxdma;
 	struct imxdma_desc *desc;
 
-	spin_lock(&imxdmac->lock);
+	spin_lock(&imxdma->lock);
 
 	if (list_empty(&imxdmac->ld_active)) {
 		/* Someone might have called terminate all */
@@ -520,6 +578,12 @@
 	if (imxdma_chan_is_doing_cyclic(imxdmac))
 		goto out;
 
+	/* Free 2D slot if it was an interleaved transfer */
+	if (imxdmac->enabled_2d) {
+		imxdma->slots_2d[imxdmac->slot_2d].count--;
+		imxdmac->enabled_2d = false;
+	}
+
 	list_move_tail(imxdmac->ld_active.next, &imxdmac->ld_free);
 
 	if (!list_empty(&imxdmac->ld_queue)) {
@@ -531,7 +595,7 @@
 				 __func__, imxdmac->channel);
 	}
 out:
-	spin_unlock(&imxdmac->lock);
+	spin_unlock(&imxdma->lock);
 }
 
 static int imxdma_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd,
@@ -547,10 +611,10 @@
 	case DMA_TERMINATE_ALL:
 		imxdma_disable_hw(imxdmac);
 
-		spin_lock_irqsave(&imxdmac->lock, flags);
+		spin_lock_irqsave(&imxdma->lock, flags);
 		list_splice_tail_init(&imxdmac->ld_active, &imxdmac->ld_free);
 		list_splice_tail_init(&imxdmac->ld_queue, &imxdmac->ld_free);
-		spin_unlock_irqrestore(&imxdmac->lock, flags);
+		spin_unlock_irqrestore(&imxdma->lock, flags);
 		return 0;
 	case DMA_SLAVE_CONFIG:
 		if (dmaengine_cfg->direction == DMA_DEV_TO_MEM) {
@@ -610,12 +674,13 @@
 static dma_cookie_t imxdma_tx_submit(struct dma_async_tx_descriptor *tx)
 {
 	struct imxdma_channel *imxdmac = to_imxdma_chan(tx->chan);
+	struct imxdma_engine *imxdma = imxdmac->imxdma;
 	dma_cookie_t cookie;
 	unsigned long flags;
 
-	spin_lock_irqsave(&imxdmac->lock, flags);
+	spin_lock_irqsave(&imxdma->lock, flags);
 	cookie = dma_cookie_assign(tx);
-	spin_unlock_irqrestore(&imxdmac->lock, flags);
+	spin_unlock_irqrestore(&imxdma->lock, flags);
 
 	return cookie;
 }
@@ -654,16 +719,17 @@
 static void imxdma_free_chan_resources(struct dma_chan *chan)
 {
 	struct imxdma_channel *imxdmac = to_imxdma_chan(chan);
+	struct imxdma_engine *imxdma = imxdmac->imxdma;
 	struct imxdma_desc *desc, *_desc;
 	unsigned long flags;
 
-	spin_lock_irqsave(&imxdmac->lock, flags);
+	spin_lock_irqsave(&imxdma->lock, flags);
 
 	imxdma_disable_hw(imxdmac);
 	list_splice_tail_init(&imxdmac->ld_active, &imxdmac->ld_free);
 	list_splice_tail_init(&imxdmac->ld_queue, &imxdmac->ld_free);
 
-	spin_unlock_irqrestore(&imxdmac->lock, flags);
+	spin_unlock_irqrestore(&imxdma->lock, flags);
 
 	list_for_each_entry_safe(desc, _desc, &imxdmac->ld_free, node) {
 		kfree(desc);
@@ -818,6 +884,49 @@
 	return &desc->desc;
 }
 
+static struct dma_async_tx_descriptor *imxdma_prep_dma_interleaved(
+	struct dma_chan *chan, struct dma_interleaved_template *xt,
+	unsigned long flags)
+{
+	struct imxdma_channel *imxdmac = to_imxdma_chan(chan);
+	struct imxdma_engine *imxdma = imxdmac->imxdma;
+	struct imxdma_desc *desc;
+
+	dev_dbg(imxdma->dev, "%s channel: %d src_start=0x%x dst_start=0x%x\n"
+		"   src_sgl=%s dst_sgl=%s numf=%d frame_size=%d\n", __func__,
+		imxdmac->channel, xt->src_start, xt->dst_start,
+		xt->src_sgl ? "true" : "false", xt->dst_sgl ? "true" : "false",
+		xt->numf, xt->frame_size);
+
+	if (list_empty(&imxdmac->ld_free) ||
+	    imxdma_chan_is_doing_cyclic(imxdmac))
+		return NULL;
+
+	if (xt->frame_size != 1 || xt->numf <= 0 || xt->dir != DMA_MEM_TO_MEM)
+		return NULL;
+
+	desc = list_first_entry(&imxdmac->ld_free, struct imxdma_desc, node);
+
+	desc->type = IMXDMA_DESC_INTERLEAVED;
+	desc->src = xt->src_start;
+	desc->dest = xt->dst_start;
+	desc->x = xt->sgl[0].size;
+	desc->y = xt->numf;
+	desc->w = xt->sgl[0].icg + desc->x;
+	desc->len = desc->x * desc->y;
+	desc->direction = DMA_MEM_TO_MEM;
+	desc->config_port = IMX_DMA_MEMSIZE_32;
+	desc->config_mem = IMX_DMA_MEMSIZE_32;
+	if (xt->src_sgl)
+		desc->config_mem |= IMX_DMA_TYPE_2D;
+	if (xt->dst_sgl)
+		desc->config_port |= IMX_DMA_TYPE_2D;
+	desc->desc.callback = NULL;
+	desc->desc.callback_param = NULL;
+
+	return &desc->desc;
+}
+
 static void imxdma_issue_pending(struct dma_chan *chan)
 {
 	struct imxdma_channel *imxdmac = to_imxdma_chan(chan);
@@ -825,7 +934,7 @@
 	struct imxdma_desc *desc;
 	unsigned long flags;
 
-	spin_lock_irqsave(&imxdmac->lock, flags);
+	spin_lock_irqsave(&imxdma->lock, flags);
 	if (list_empty(&imxdmac->ld_active) &&
 	    !list_empty(&imxdmac->ld_queue)) {
 		desc = list_first_entry(&imxdmac->ld_queue,
@@ -840,7 +949,7 @@
 				       &imxdmac->ld_active);
 		}
 	}
-	spin_unlock_irqrestore(&imxdmac->lock, flags);
+	spin_unlock_irqrestore(&imxdma->lock, flags);
 }
 
 static int __init imxdma_probe(struct platform_device *pdev)
@@ -903,6 +1012,13 @@
 	dma_cap_set(DMA_SLAVE, imxdma->dma_device.cap_mask);
 	dma_cap_set(DMA_CYCLIC, imxdma->dma_device.cap_mask);
 	dma_cap_set(DMA_MEMCPY, imxdma->dma_device.cap_mask);
+	dma_cap_set(DMA_INTERLEAVE, imxdma->dma_device.cap_mask);
+
+	/* Initialize 2D global parameters */
+	for (i = 0; i < IMX_DMA_2D_SLOTS; i++)
+		imxdma->slots_2d[i].count = 0;
+
+	spin_lock_init(&imxdma->lock);
 
 	/* Initialize channel parameters */
 	for (i = 0; i < IMX_DMA_CHANNELS; i++) {
@@ -923,7 +1039,6 @@
 		}
 
 		imxdmac->imxdma = imxdma;
-		spin_lock_init(&imxdmac->lock);
 
 		INIT_LIST_HEAD(&imxdmac->ld_queue);
 		INIT_LIST_HEAD(&imxdmac->ld_free);
@@ -949,6 +1064,7 @@
 	imxdma->dma_device.device_prep_slave_sg = imxdma_prep_slave_sg;
 	imxdma->dma_device.device_prep_dma_cyclic = imxdma_prep_dma_cyclic;
 	imxdma->dma_device.device_prep_dma_memcpy = imxdma_prep_dma_memcpy;
+	imxdma->dma_device.device_prep_interleaved_dma = imxdma_prep_dma_interleaved;
 	imxdma->dma_device.device_control = imxdma_control;
 	imxdma->dma_device.device_issue_pending = imxdma_issue_pending;