dmaengine: vdma: Add support for mulit-channel dma mode

This patch adds support for AXI DMA multi-channel dma mode
Multichannel mode enables DMA to connect to multiple masters
and slaves on the streaming side.

In Multichannel mode AXI DMA supports 2D transfers.

Signed-off-by: Kedareswara rao Appana <appanad@xilinx.com>
Signed-off-by: Vinod Koul <vinod.koul@intel.com>
diff --git a/drivers/dma/xilinx/xilinx_vdma.c b/drivers/dma/xilinx/xilinx_vdma.c
index 40f754b..0768d9f 100644
--- a/drivers/dma/xilinx/xilinx_vdma.c
+++ b/drivers/dma/xilinx/xilinx_vdma.c
@@ -114,7 +114,7 @@
 #define XILINX_VDMA_REG_START_ADDRESS_64(n)	(0x000c + 8 * (n))
 
 /* HW specific definitions */
-#define XILINX_DMA_MAX_CHANS_PER_DEVICE	0x2
+#define XILINX_DMA_MAX_CHANS_PER_DEVICE	0x20
 
 #define XILINX_DMA_DMAXR_ALL_IRQ_MASK	\
 		(XILINX_DMA_DMASR_FRM_CNT_IRQ | \
@@ -165,6 +165,18 @@
 #define XILINX_DMA_COALESCE_MAX		255
 #define XILINX_DMA_NUM_APP_WORDS	5
 
+/* Multi-Channel DMA Descriptor offsets*/
+#define XILINX_DMA_MCRX_CDESC(x)	(0x40 + (x-1) * 0x20)
+#define XILINX_DMA_MCRX_TDESC(x)	(0x48 + (x-1) * 0x20)
+
+/* Multi-Channel DMA Masks/Shifts */
+#define XILINX_DMA_BD_HSIZE_MASK	GENMASK(15, 0)
+#define XILINX_DMA_BD_STRIDE_MASK	GENMASK(15, 0)
+#define XILINX_DMA_BD_VSIZE_MASK	GENMASK(31, 19)
+#define XILINX_DMA_BD_TDEST_MASK	GENMASK(4, 0)
+#define XILINX_DMA_BD_STRIDE_SHIFT	0
+#define XILINX_DMA_BD_VSIZE_SHIFT	19
+
 /* AXI CDMA Specific Registers/Offsets */
 #define XILINX_CDMA_REG_SRCADDR		0x18
 #define XILINX_CDMA_REG_DSTADDR		0x20
@@ -210,8 +222,8 @@
 	u32 next_desc_msb;
 	u32 buf_addr;
 	u32 buf_addr_msb;
-	u32 pad1;
-	u32 pad2;
+	u32 mcdma_control;
+	u32 vsize_stride;
 	u32 control;
 	u32 status;
 	u32 app[XILINX_DMA_NUM_APP_WORDS];
@@ -349,6 +361,7 @@
 	struct xilinx_axidma_tx_segment *seg_v;
 	struct xilinx_axidma_tx_segment *cyclic_seg_v;
 	void (*start_transfer)(struct xilinx_dma_chan *chan);
+	u16 tdest;
 };
 
 struct xilinx_dma_config {
@@ -365,6 +378,7 @@
  * @common: DMA device structure
  * @chan: Driver specific DMA channel
  * @has_sg: Specifies whether Scatter-Gather is present or not
+ * @mcdma: Specifies whether Multi-Channel is present or not
  * @flush_on_fsync: Flush on frame sync
  * @ext_addr: Indicates 64 bit addressing is supported by dma device
  * @pdev: Platform device structure pointer
@@ -374,6 +388,8 @@
  * @txs_clk: DMA mm2s stream clock
  * @rx_clk: DMA s2mm clock
  * @rxs_clk: DMA s2mm stream clock
+ * @nr_channels: Number of channels DMA device supports
+ * @chan_id: DMA channel identifier
  */
 struct xilinx_dma_device {
 	void __iomem *regs;
@@ -381,6 +397,7 @@
 	struct dma_device common;
 	struct xilinx_dma_chan *chan[XILINX_DMA_MAX_CHANS_PER_DEVICE];
 	bool has_sg;
+	bool mcdma;
 	u32 flush_on_fsync;
 	bool ext_addr;
 	struct platform_device  *pdev;
@@ -390,6 +407,8 @@
 	struct clk *txs_clk;
 	struct clk *rx_clk;
 	struct clk *rxs_clk;
+	u32 nr_channels;
+	u32 chan_id;
 };
 
 /* Macros */
@@ -1196,18 +1215,20 @@
 	tail_segment = list_last_entry(&tail_desc->segments,
 				       struct xilinx_axidma_tx_segment, node);
 
-	old_head = list_first_entry(&head_desc->segments,
-				struct xilinx_axidma_tx_segment, node);
-	new_head = chan->seg_v;
-	/* Copy Buffer Descriptor fields. */
-	new_head->hw = old_head->hw;
+	if (chan->has_sg && !chan->xdev->mcdma) {
+		old_head = list_first_entry(&head_desc->segments,
+					struct xilinx_axidma_tx_segment, node);
+		new_head = chan->seg_v;
+		/* Copy Buffer Descriptor fields. */
+		new_head->hw = old_head->hw;
 
-	/* Swap and save new reserve */
-	list_replace_init(&old_head->node, &new_head->node);
-	chan->seg_v = old_head;
+		/* Swap and save new reserve */
+		list_replace_init(&old_head->node, &new_head->node);
+		chan->seg_v = old_head;
 
-	tail_segment->hw.next_desc = chan->seg_v->phys;
-	head_desc->async_tx.phys = new_head->phys;
+		tail_segment->hw.next_desc = chan->seg_v->phys;
+		head_desc->async_tx.phys = new_head->phys;
+	}
 
 	reg = dma_ctrl_read(chan, XILINX_DMA_REG_DMACR);
 
@@ -1218,23 +1239,53 @@
 		dma_ctrl_write(chan, XILINX_DMA_REG_DMACR, reg);
 	}
 
-	if (chan->has_sg)
+	if (chan->has_sg && !chan->xdev->mcdma)
 		xilinx_write(chan, XILINX_DMA_REG_CURDESC,
 			     head_desc->async_tx.phys);
 
+	if (chan->has_sg && chan->xdev->mcdma) {
+		if (chan->direction == DMA_MEM_TO_DEV) {
+			dma_ctrl_write(chan, XILINX_DMA_REG_CURDESC,
+				       head_desc->async_tx.phys);
+		} else {
+			if (!chan->tdest) {
+				dma_ctrl_write(chan, XILINX_DMA_REG_CURDESC,
+				       head_desc->async_tx.phys);
+			} else {
+				dma_ctrl_write(chan,
+					XILINX_DMA_MCRX_CDESC(chan->tdest),
+				       head_desc->async_tx.phys);
+			}
+		}
+	}
+
 	xilinx_dma_start(chan);
 
 	if (chan->err)
 		return;
 
 	/* Start the transfer */
-	if (chan->has_sg) {
+	if (chan->has_sg && !chan->xdev->mcdma) {
 		if (chan->cyclic)
 			xilinx_write(chan, XILINX_DMA_REG_TAILDESC,
 				     chan->cyclic_seg_v->phys);
 		else
 			xilinx_write(chan, XILINX_DMA_REG_TAILDESC,
 				     tail_segment->phys);
+	} else if (chan->has_sg && chan->xdev->mcdma) {
+		if (chan->direction == DMA_MEM_TO_DEV) {
+			dma_ctrl_write(chan, XILINX_DMA_REG_TAILDESC,
+			       tail_segment->phys);
+		} else {
+			if (!chan->tdest) {
+				dma_ctrl_write(chan, XILINX_DMA_REG_TAILDESC,
+					       tail_segment->phys);
+			} else {
+				dma_ctrl_write(chan,
+					XILINX_DMA_MCRX_TDESC(chan->tdest),
+					tail_segment->phys);
+			}
+		}
 	} else {
 		struct xilinx_axidma_tx_segment *segment;
 		struct xilinx_axidma_desc_hw *hw;
@@ -1862,6 +1913,90 @@
 }
 
 /**
+ * xilinx_dma_prep_interleaved - prepare a descriptor for a
+ *	DMA_SLAVE transaction
+ * @dchan: DMA channel
+ * @xt: Interleaved template pointer
+ * @flags: transfer ack flags
+ *
+ * Return: Async transaction descriptor on success and NULL on failure
+ */
+static struct dma_async_tx_descriptor *
+xilinx_dma_prep_interleaved(struct dma_chan *dchan,
+				 struct dma_interleaved_template *xt,
+				 unsigned long flags)
+{
+	struct xilinx_dma_chan *chan = to_xilinx_chan(dchan);
+	struct xilinx_dma_tx_descriptor *desc;
+	struct xilinx_axidma_tx_segment *segment;
+	struct xilinx_axidma_desc_hw *hw;
+
+	if (!is_slave_direction(xt->dir))
+		return NULL;
+
+	if (!xt->numf || !xt->sgl[0].size)
+		return NULL;
+
+	if (xt->frame_size != 1)
+		return NULL;
+
+	/* Allocate a transaction descriptor. */
+	desc = xilinx_dma_alloc_tx_descriptor(chan);
+	if (!desc)
+		return NULL;
+
+	chan->direction = xt->dir;
+	dma_async_tx_descriptor_init(&desc->async_tx, &chan->common);
+	desc->async_tx.tx_submit = xilinx_dma_tx_submit;
+
+	/* Get a free segment */
+	segment = xilinx_axidma_alloc_tx_segment(chan);
+	if (!segment)
+		goto error;
+
+	hw = &segment->hw;
+
+	/* Fill in the descriptor */
+	if (xt->dir != DMA_MEM_TO_DEV)
+		hw->buf_addr = xt->dst_start;
+	else
+		hw->buf_addr = xt->src_start;
+
+	hw->mcdma_control = chan->tdest & XILINX_DMA_BD_TDEST_MASK;
+	hw->vsize_stride = (xt->numf << XILINX_DMA_BD_VSIZE_SHIFT) &
+			    XILINX_DMA_BD_VSIZE_MASK;
+	hw->vsize_stride |= (xt->sgl[0].icg + xt->sgl[0].size) &
+			    XILINX_DMA_BD_STRIDE_MASK;
+	hw->control = xt->sgl[0].size & XILINX_DMA_BD_HSIZE_MASK;
+
+	/*
+	 * Insert the segment into the descriptor segments
+	 * list.
+	 */
+	list_add_tail(&segment->node, &desc->segments);
+
+
+	segment = list_first_entry(&desc->segments,
+				   struct xilinx_axidma_tx_segment, node);
+	desc->async_tx.phys = segment->phys;
+
+	/* For the last DMA_MEM_TO_DEV transfer, set EOP */
+	if (xt->dir == DMA_MEM_TO_DEV) {
+		segment->hw.control |= XILINX_DMA_BD_SOP;
+		segment = list_last_entry(&desc->segments,
+					  struct xilinx_axidma_tx_segment,
+					  node);
+		segment->hw.control |= XILINX_DMA_BD_EOP;
+	}
+
+	return &desc->async_tx;
+
+error:
+	xilinx_dma_free_tx_descriptor(chan, desc);
+	return NULL;
+}
+
+/**
  * xilinx_dma_terminate_all - Halt the channel and free descriptors
  * @chan: Driver specific DMA Channel pointer
  */
@@ -2176,7 +2311,7 @@
  * Return: '0' on success and failure value on error
  */
 static int xilinx_dma_chan_probe(struct xilinx_dma_device *xdev,
-				  struct device_node *node)
+				  struct device_node *node, int chan_id)
 {
 	struct xilinx_dma_chan *chan;
 	bool has_dre = false;
@@ -2220,7 +2355,8 @@
 
 	if (of_device_is_compatible(node, "xlnx,axi-vdma-mm2s-channel")) {
 		chan->direction = DMA_MEM_TO_DEV;
-		chan->id = 0;
+		chan->id = chan_id;
+		chan->tdest = chan_id;
 
 		chan->ctrl_offset = XILINX_DMA_MM2S_CTRL_OFFSET;
 		if (xdev->dma_config->dmatype == XDMA_TYPE_VDMA) {
@@ -2233,7 +2369,8 @@
 	} else if (of_device_is_compatible(node,
 					    "xlnx,axi-vdma-s2mm-channel")) {
 		chan->direction = DMA_DEV_TO_MEM;
-		chan->id = 1;
+		chan->id = chan_id;
+		chan->tdest = chan_id - xdev->nr_channels;
 
 		chan->ctrl_offset = XILINX_DMA_S2MM_CTRL_OFFSET;
 		if (xdev->dma_config->dmatype == XDMA_TYPE_VDMA) {
@@ -2288,6 +2425,32 @@
 }
 
 /**
+ * xilinx_dma_child_probe - Per child node probe
+ * It get number of dma-channels per child node from
+ * device-tree and initializes all the channels.
+ *
+ * @xdev: Driver specific device structure
+ * @node: Device node
+ *
+ * Return: 0 always.
+ */
+static int xilinx_dma_child_probe(struct xilinx_dma_device *xdev,
+				    struct device_node *node) {
+	int ret, i, nr_channels = 1;
+
+	ret = of_property_read_u32(node, "dma-channels", &nr_channels);
+	if ((ret < 0) && xdev->mcdma)
+		dev_warn(xdev->dev, "missing dma-channels property\n");
+
+	for (i = 0; i < nr_channels; i++)
+		xilinx_dma_chan_probe(xdev, node, xdev->chan_id++);
+
+	xdev->nr_channels += nr_channels;
+
+	return 0;
+}
+
+/**
  * of_dma_xilinx_xlate - Translation function
  * @dma_spec: Pointer to DMA specifier as found in the device tree
  * @ofdma: Pointer to DMA controller data
@@ -2300,7 +2463,7 @@
 	struct xilinx_dma_device *xdev = ofdma->of_dma_data;
 	int chan_id = dma_spec->args[0];
 
-	if (chan_id >= XILINX_DMA_MAX_CHANS_PER_DEVICE || !xdev->chan[chan_id])
+	if (chan_id >= xdev->nr_channels || !xdev->chan[chan_id])
 		return NULL;
 
 	return dma_get_slave_channel(&xdev->chan[chan_id]->common);
@@ -2376,6 +2539,8 @@
 
 	/* Retrieve the DMA engine properties from the device tree */
 	xdev->has_sg = of_property_read_bool(node, "xlnx,include-sg");
+	if (xdev->dma_config->dmatype == XDMA_TYPE_AXIDMA)
+		xdev->mcdma = of_property_read_bool(node, "xlnx,mcdma");
 
 	if (xdev->dma_config->dmatype == XDMA_TYPE_VDMA) {
 		err = of_property_read_u32(node, "xlnx,num-fstores",
@@ -2426,6 +2591,8 @@
 		xdev->common.device_prep_slave_sg = xilinx_dma_prep_slave_sg;
 		xdev->common.device_prep_dma_cyclic =
 					  xilinx_dma_prep_dma_cyclic;
+		xdev->common.device_prep_interleaved_dma =
+					xilinx_dma_prep_interleaved;
 		/* Residue calculation is supported by only AXI DMA */
 		xdev->common.residue_granularity =
 					  DMA_RESIDUE_GRANULARITY_SEGMENT;
@@ -2441,13 +2608,13 @@
 
 	/* Initialize the channels */
 	for_each_child_of_node(node, child) {
-		err = xilinx_dma_chan_probe(xdev, child);
+		err = xilinx_dma_child_probe(xdev, child);
 		if (err < 0)
 			goto disable_clks;
 	}
 
 	if (xdev->dma_config->dmatype == XDMA_TYPE_VDMA) {
-		for (i = 0; i < XILINX_DMA_MAX_CHANS_PER_DEVICE; i++)
+		for (i = 0; i < xdev->nr_channels; i++)
 			if (xdev->chan[i])
 				xdev->chan[i]->num_frms = num_frames;
 	}
@@ -2470,7 +2637,7 @@
 disable_clks:
 	xdma_disable_allclks(xdev);
 error:
-	for (i = 0; i < XILINX_DMA_MAX_CHANS_PER_DEVICE; i++)
+	for (i = 0; i < xdev->nr_channels; i++)
 		if (xdev->chan[i])
 			xilinx_dma_chan_remove(xdev->chan[i]);
 
@@ -2492,7 +2659,7 @@
 
 	dma_async_device_unregister(&xdev->common);
 
-	for (i = 0; i < XILINX_DMA_MAX_CHANS_PER_DEVICE; i++)
+	for (i = 0; i < xdev->nr_channels; i++)
 		if (xdev->chan[i])
 			xilinx_dma_chan_remove(xdev->chan[i]);