dmaengine: dmatest: honor alignment restriction for buffers

Existing implementation does not honor the alignment restrictions imposed
by the DMA engines. Allocate buffers with built in slack for honoring
alignment restrictions. Creating new arrays to hold the aligned pointers
and use those pointers for operations.

Signed-off-by: Dave Jiang <dave.jiang@intel.com>
Signed-off-by: Vinod Koul <vinod.koul@intel.com>
diff --git a/drivers/dma/dmatest.c b/drivers/dma/dmatest.c
index 3b358c4..451f899 100644
--- a/drivers/dma/dmatest.c
+++ b/drivers/dma/dmatest.c
@@ -164,7 +164,9 @@
 	struct task_struct	*task;
 	struct dma_chan		*chan;
 	u8			**srcs;
+	u8			**usrcs;
 	u8			**dsts;
+	u8			**udsts;
 	enum dma_transaction_type type;
 	bool			done;
 };
@@ -431,6 +433,7 @@
 	ktime_t			comparetime = ktime_set(0, 0);
 	s64			runtime = 0;
 	unsigned long long	total_len = 0;
+	u8			align = 0;
 
 	set_freezable();
 
@@ -441,18 +444,22 @@
 	params = &info->params;
 	chan = thread->chan;
 	dev = chan->device;
-	if (thread->type == DMA_MEMCPY)
+	if (thread->type == DMA_MEMCPY) {
+		align = dev->copy_align;
 		src_cnt = dst_cnt = 1;
-	else if (thread->type == DMA_SG)
+	} else if (thread->type == DMA_SG) {
+		align = dev->copy_align;
 		src_cnt = dst_cnt = sg_buffers;
-	else if (thread->type == DMA_XOR) {
+	} else if (thread->type == DMA_XOR) {
 		/* force odd to ensure dst = src */
 		src_cnt = min_odd(params->xor_sources | 1, dev->max_xor);
 		dst_cnt = 1;
+		align = dev->xor_align;
 	} else if (thread->type == DMA_PQ) {
 		/* force odd to ensure dst = src */
 		src_cnt = min_odd(params->pq_sources | 1, dma_maxpq(dev, 0));
 		dst_cnt = 2;
+		align = dev->pq_align;
 
 		pq_coefs = kmalloc(params->pq_sources + 1, GFP_KERNEL);
 		if (!pq_coefs)
@@ -466,20 +473,44 @@
 	thread->srcs = kcalloc(src_cnt + 1, sizeof(u8 *), GFP_KERNEL);
 	if (!thread->srcs)
 		goto err_srcs;
+
+	thread->usrcs = kcalloc(src_cnt + 1, sizeof(u8 *), GFP_KERNEL);
+	if (!thread->usrcs)
+		goto err_usrcs;
+
 	for (i = 0; i < src_cnt; i++) {
-		thread->srcs[i] = kmalloc(params->buf_size, GFP_KERNEL);
-		if (!thread->srcs[i])
+		thread->usrcs[i] = kmalloc(params->buf_size + align,
+					   GFP_KERNEL);
+		if (!thread->usrcs[i])
 			goto err_srcbuf;
+
+		/* align srcs to alignment restriction */
+		if (align)
+			thread->srcs[i] = PTR_ALIGN(thread->usrcs[i], align);
+		else
+			thread->srcs[i] = thread->usrcs[i];
 	}
 	thread->srcs[i] = NULL;
 
 	thread->dsts = kcalloc(dst_cnt + 1, sizeof(u8 *), GFP_KERNEL);
 	if (!thread->dsts)
 		goto err_dsts;
+
+	thread->udsts = kcalloc(dst_cnt + 1, sizeof(u8 *), GFP_KERNEL);
+	if (!thread->udsts)
+		goto err_udsts;
+
 	for (i = 0; i < dst_cnt; i++) {
-		thread->dsts[i] = kmalloc(params->buf_size, GFP_KERNEL);
-		if (!thread->dsts[i])
+		thread->udsts[i] = kmalloc(params->buf_size + align,
+					   GFP_KERNEL);
+		if (!thread->udsts[i])
 			goto err_dstbuf;
+
+		/* align dsts to alignment restriction */
+		if (align)
+			thread->dsts[i] = PTR_ALIGN(thread->udsts[i], align);
+		else
+			thread->dsts[i] = thread->udsts[i];
 	}
 	thread->dsts[i] = NULL;
 
@@ -498,20 +529,11 @@
 		dma_addr_t srcs[src_cnt];
 		dma_addr_t *dsts;
 		unsigned int src_off, dst_off, len;
-		u8 align = 0;
 		struct scatterlist tx_sg[src_cnt];
 		struct scatterlist rx_sg[src_cnt];
 
 		total_tests++;
 
-		/* honor alignment restrictions */
-		if (thread->type == DMA_MEMCPY || thread->type == DMA_SG)
-			align = dev->copy_align;
-		else if (thread->type == DMA_XOR)
-			align = dev->xor_align;
-		else if (thread->type == DMA_PQ)
-			align = dev->pq_align;
-
 		if (1 << align > params->buf_size) {
 			pr_err("%u-byte buffer too small for %d-byte alignment\n",
 			       params->buf_size, 1 << align);
@@ -729,13 +751,17 @@
 
 	ret = 0;
 err_dstbuf:
-	for (i = 0; thread->dsts[i]; i++)
-		kfree(thread->dsts[i]);
+	for (i = 0; thread->udsts[i]; i++)
+		kfree(thread->udsts[i]);
+	kfree(thread->udsts);
+err_udsts:
 	kfree(thread->dsts);
 err_dsts:
 err_srcbuf:
-	for (i = 0; thread->srcs[i]; i++)
-		kfree(thread->srcs[i]);
+	for (i = 0; thread->usrcs[i]; i++)
+		kfree(thread->usrcs[i]);
+	kfree(thread->usrcs);
+err_usrcs:
 	kfree(thread->srcs);
 err_srcs:
 	kfree(pq_coefs);