spi_qsd: DM mode support for different fifo sizes

SPI driver doesn't support different block size in
DM mode because based on assumption that GSBI tx&rx
FIFOs are of same size. Some GSBI configuration
can have different tx/rx fifo size. Tx/Rx fifo size
is defined based on input/output block size.

Adds support for different rx/tx fifo size in
case of DMA transaction.

CRs-Fixed: 452986
Change-Id: Ie86f487ea97037ed14e9838a3bd52dfe95f56974
Signed-off-by: Alok Chauhan <alokc@codeaurora.org>
diff --git a/drivers/spi/spi_qsd.c b/drivers/spi/spi_qsd.c
index 7f60128..b89f608 100644
--- a/drivers/spi/spi_qsd.c
+++ b/drivers/spi/spi_qsd.c
@@ -1,4 +1,4 @@
-/* Copyright (c) 2008-2012, The Linux Foundation. All rights reserved.
+/* Copyright (c) 2008-2013, The Linux Foundation. All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 and
@@ -276,13 +276,12 @@
 		if (dd->input_block_size == 4 || dd->output_block_size == 4)
 			dd->use_dma = 0;
 
-		/* DM mode is currently unsupported for different block sizes */
-		if (dd->input_block_size != dd->output_block_size)
-			dd->use_dma = 0;
-
-		if (dd->use_dma)
-			dd->burst_size = max(dd->input_block_size,
-							DM_BURST_SIZE);
+		if (dd->use_dma) {
+			dd->input_burst_size = max(dd->input_block_size,
+						DM_BURST_SIZE);
+			dd->output_burst_size = max(dd->output_block_size,
+						DM_BURST_SIZE);
+		}
 	}
 
 	return;
@@ -646,7 +645,8 @@
 static void msm_spi_setup_dm_transfer(struct msm_spi *dd)
 {
 	dmov_box *box;
-	int bytes_to_send, num_rows, bytes_sent;
+	int bytes_to_send, bytes_sent;
+	int tx_num_rows, rx_num_rows;
 	u32 num_transfers;
 
 	atomic_set(&dd->rx_irq_called, 0);
@@ -678,64 +678,80 @@
 			      dd->max_trfr_len);
 
 	num_transfers = DIV_ROUND_UP(bytes_to_send, dd->bytes_per_word);
-	dd->unaligned_len = bytes_to_send % dd->burst_size;
-	num_rows = bytes_to_send / dd->burst_size;
+	dd->tx_unaligned_len = bytes_to_send % dd->output_burst_size;
+	dd->rx_unaligned_len = bytes_to_send % dd->input_burst_size;
+	tx_num_rows = bytes_to_send / dd->output_burst_size;
+	rx_num_rows = bytes_to_send / dd->input_burst_size;
 
 	dd->mode = SPI_DMOV_MODE;
 
-	if (num_rows) {
+	if (tx_num_rows) {
 		/* src in 16 MSB, dst in 16 LSB */
 		box = &dd->tx_dmov_cmd->box;
 		box->src_row_addr = dd->cur_transfer->tx_dma + bytes_sent;
-		box->src_dst_len = (dd->burst_size << 16) | dd->burst_size;
-		box->num_rows = (num_rows << 16) | num_rows;
-		box->row_offset = (dd->burst_size << 16) | 0;
+		box->src_dst_len
+			= (dd->output_burst_size << 16) | dd->output_burst_size;
+		box->num_rows = (tx_num_rows << 16) | tx_num_rows;
+		box->row_offset = (dd->output_burst_size << 16) | 0;
 
+		dd->tx_dmov_cmd->cmd_ptr = CMD_PTR_LP |
+				   DMOV_CMD_ADDR(dd->tx_dmov_cmd_dma +
+				   offsetof(struct spi_dmov_cmd, box));
+	} else {
+		dd->tx_dmov_cmd->cmd_ptr = CMD_PTR_LP |
+				   DMOV_CMD_ADDR(dd->tx_dmov_cmd_dma +
+				   offsetof(struct spi_dmov_cmd, single_pad));
+	}
+
+	if (rx_num_rows) {
+		/* src in 16 MSB, dst in 16 LSB */
 		box = &dd->rx_dmov_cmd->box;
 		box->dst_row_addr = dd->cur_transfer->rx_dma + bytes_sent;
-		box->src_dst_len = (dd->burst_size << 16) | dd->burst_size;
-		box->num_rows = (num_rows << 16) | num_rows;
-		box->row_offset = (0 << 16) | dd->burst_size;
+		box->src_dst_len
+			= (dd->input_burst_size << 16) | dd->input_burst_size;
+		box->num_rows = (rx_num_rows << 16) | rx_num_rows;
+		box->row_offset = (0 << 16) | dd->input_burst_size;
 
-		dd->tx_dmov_cmd->cmd_ptr = CMD_PTR_LP |
-				   DMOV_CMD_ADDR(dd->tx_dmov_cmd_dma +
-				   offsetof(struct spi_dmov_cmd, box));
 		dd->rx_dmov_cmd->cmd_ptr = CMD_PTR_LP |
 				   DMOV_CMD_ADDR(dd->rx_dmov_cmd_dma +
 				   offsetof(struct spi_dmov_cmd, box));
 	} else {
-		dd->tx_dmov_cmd->cmd_ptr = CMD_PTR_LP |
-				   DMOV_CMD_ADDR(dd->tx_dmov_cmd_dma +
-				   offsetof(struct spi_dmov_cmd, single_pad));
 		dd->rx_dmov_cmd->cmd_ptr = CMD_PTR_LP |
 				   DMOV_CMD_ADDR(dd->rx_dmov_cmd_dma +
 				   offsetof(struct spi_dmov_cmd, single_pad));
 	}
 
-	if (!dd->unaligned_len) {
+	if (!dd->tx_unaligned_len) {
 		dd->tx_dmov_cmd->box.cmd |= CMD_LC;
-		dd->rx_dmov_cmd->box.cmd |= CMD_LC;
 	} else {
 		dmov_s *tx_cmd = &(dd->tx_dmov_cmd->single_pad);
-		dmov_s *rx_cmd = &(dd->rx_dmov_cmd->single_pad);
-		u32 offset = dd->cur_transfer->len - dd->unaligned_len;
+		u32 tx_offset = dd->cur_transfer->len - dd->tx_unaligned_len;
 
 		if ((dd->multi_xfr) && (dd->read_len <= 0))
-			offset = dd->cur_msg_len - dd->unaligned_len;
+			tx_offset = dd->cur_msg_len - dd->tx_unaligned_len;
 
 		dd->tx_dmov_cmd->box.cmd &= ~CMD_LC;
-		dd->rx_dmov_cmd->box.cmd &= ~CMD_LC;
 
-		memset(dd->tx_padding, 0, dd->burst_size);
-		memset(dd->rx_padding, 0, dd->burst_size);
+		memset(dd->tx_padding, 0, dd->output_burst_size);
 		if (dd->write_buf)
-			memcpy(dd->tx_padding, dd->write_buf + offset,
-			       dd->unaligned_len);
+			memcpy(dd->tx_padding, dd->write_buf + tx_offset,
+			       dd->tx_unaligned_len);
 
 		tx_cmd->src = dd->tx_padding_dma;
-		rx_cmd->dst = dd->rx_padding_dma;
-		tx_cmd->len = rx_cmd->len = dd->burst_size;
+		tx_cmd->len = dd->output_burst_size;
 	}
+
+	if (!dd->rx_unaligned_len) {
+		dd->rx_dmov_cmd->box.cmd |= CMD_LC;
+	} else {
+		dmov_s *rx_cmd = &(dd->rx_dmov_cmd->single_pad);
+		dd->rx_dmov_cmd->box.cmd &= ~CMD_LC;
+
+		memset(dd->rx_padding, 0, dd->input_burst_size);
+		rx_cmd->dst = dd->rx_padding_dma;
+		rx_cmd->len = dd->input_burst_size;
+	}
+
 	/* This also takes care of the padding dummy buf
 	   Since this is set to the correct length, the
 	   dummy bytes won't be actually sent */
@@ -893,7 +909,7 @@
 		if ((!dd->read_buf || op & SPI_OP_MAX_INPUT_DONE_FLAG) &&
 		    (!dd->write_buf || op & SPI_OP_MAX_OUTPUT_DONE_FLAG)) {
 			msm_spi_ack_transfer(dd);
-			if (dd->unaligned_len == 0) {
+			if (dd->rx_unaligned_len == 0) {
 				if (atomic_inc_return(&dd->rx_irq_called) == 1)
 					return IRQ_HANDLED;
 			}
@@ -1145,11 +1161,11 @@
 						 prev_xfr->len,
 						 DMA_TO_DEVICE);
 			}
-			if (dd->unaligned_len && dd->read_buf) {
-				offset = dd->cur_msg_len - dd->unaligned_len;
+			if (dd->rx_unaligned_len && dd->read_buf) {
+				offset = dd->cur_msg_len - dd->rx_unaligned_len;
 				dma_coherent_post_ops();
 				memcpy(dd->read_buf + offset, dd->rx_padding,
-				       dd->unaligned_len);
+				       dd->rx_unaligned_len);
 				memcpy(dd->cur_transfer->rx_buf,
 				       dd->read_buf + prev_xfr->len,
 				       dd->cur_transfer->len);
@@ -1171,11 +1187,11 @@
 
 unmap_end:
 	/* If we padded the transfer, we copy it from the padding buf */
-	if (dd->unaligned_len && dd->read_buf) {
-		offset = dd->cur_transfer->len - dd->unaligned_len;
+	if (dd->rx_unaligned_len && dd->read_buf) {
+		offset = dd->cur_transfer->len - dd->rx_unaligned_len;
 		dma_coherent_post_ops();
 		memcpy(dd->read_buf + offset, dd->rx_padding,
-		       dd->unaligned_len);
+		       dd->rx_unaligned_len);
 	}
 }
 
@@ -1960,7 +1976,8 @@
 			"use_dma ?    %s\n"
 			"rx block size = %d bytes\n"
 			"tx block size = %d bytes\n"
-			"burst size = %d bytes\n"
+			"input burst size = %d bytes\n"
+			"output burst size = %d bytes\n"
 			"DMA configuration:\n"
 			"tx_ch=%d, rx_ch=%d, tx_crci= %d, rx_crci=%d\n"
 			"--statistics--\n"
@@ -1975,7 +1992,8 @@
 			dd->use_dma ? "yes" : "no",
 			dd->input_block_size,
 			dd->output_block_size,
-			dd->burst_size,
+			dd->input_burst_size,
+			dd->output_burst_size,
 			dd->tx_dma_chan,
 			dd->rx_dma_chan,
 			dd->tx_dma_crci,
@@ -2104,12 +2122,15 @@
 	}
 }
 
-static inline u32 get_chunk_size(struct msm_spi *dd)
+static inline u32 get_chunk_size(struct msm_spi *dd, int input_burst_size,
+			int output_burst_size)
 {
 	u32 cache_line = dma_get_cache_alignment();
+	int burst_size = (input_burst_size > output_burst_size) ?
+		input_burst_size : output_burst_size;
 
 	return (roundup(sizeof(struct spi_dmov_cmd), DM_BYTE_ALIGN) +
-			  roundup(dd->burst_size, cache_line))*2;
+			  roundup(burst_size, cache_line))*2;
 }
 
 static void msm_spi_dmov_teardown(struct msm_spi *dd)
@@ -2125,8 +2146,10 @@
 		msleep(10);
 	}
 
-	dma_free_coherent(NULL, get_chunk_size(dd), dd->tx_dmov_cmd,
-			  dd->tx_dmov_cmd_dma);
+	dma_free_coherent(NULL,
+		get_chunk_size(dd, dd->input_burst_size, dd->output_burst_size),
+		dd->tx_dmov_cmd,
+		dd->tx_dmov_cmd_dma);
 	dd->tx_dmov_cmd = dd->rx_dmov_cmd = NULL;
 	dd->tx_padding = dd->rx_padding = NULL;
 }
@@ -2304,8 +2327,11 @@
 
 	/* We send NULL device, since it requires coherent_dma_mask id
 	   device definition, we're okay with using system pool */
-	dd->tx_dmov_cmd = dma_alloc_coherent(NULL, get_chunk_size(dd),
-					     &dd->tx_dmov_cmd_dma, GFP_KERNEL);
+	dd->tx_dmov_cmd
+		= dma_alloc_coherent(NULL,
+			get_chunk_size(dd, dd->input_burst_size,
+				dd->output_burst_size),
+			&dd->tx_dmov_cmd_dma, GFP_KERNEL);
 	if (dd->tx_dmov_cmd == NULL)
 		return -ENOMEM;
 
@@ -2319,9 +2345,9 @@
 	dd->tx_padding = (u8 *)ALIGN((size_t)&dd->rx_dmov_cmd[1], cache_line);
 	dd->tx_padding_dma = ALIGN(dd->rx_dmov_cmd_dma +
 			      sizeof(struct spi_dmov_cmd), cache_line);
-	dd->rx_padding = (u8 *)ALIGN((size_t)(dd->tx_padding + dd->burst_size),
-				     cache_line);
-	dd->rx_padding_dma = ALIGN(dd->tx_padding_dma + dd->burst_size,
+	dd->rx_padding = (u8 *)ALIGN((size_t)(dd->tx_padding +
+		dd->output_burst_size), cache_line);
+	dd->rx_padding_dma = ALIGN(dd->tx_padding_dma + dd->output_burst_size,
 				      cache_line);
 
 	/* Setup DM commands */
diff --git a/drivers/spi/spi_qsd.h b/drivers/spi/spi_qsd.h
index 7f5b726..b749cc0 100644
--- a/drivers/spi/spi_qsd.h
+++ b/drivers/spi/spi_qsd.h
@@ -1,4 +1,4 @@
-/* Copyright (c) 2008-2012, The Linux Foundation. All rights reserved.
+/* Copyright (c) 2008-2013, The Linux Foundation. All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 and
@@ -318,7 +318,8 @@
 	struct msm_dmov_cmd      rx_hdr;
 	int                      input_block_size;
 	int                      output_block_size;
-	int                      burst_size;
+	int                      input_burst_size;
+	int                      output_burst_size;
 	atomic_t                 rx_irq_called;
 	atomic_t                 tx_irq_called;
 	/* Used to pad messages unaligned to block size */
@@ -326,7 +327,8 @@
 	dma_addr_t               tx_padding_dma;
 	u8                       *rx_padding;
 	dma_addr_t               rx_padding_dma;
-	u32                      unaligned_len;
+	u32                      tx_unaligned_len;
+	u32                      rx_unaligned_len;
 	/* DMA statistics */
 	int                      stat_dmov_tx_err;
 	int                      stat_dmov_rx_err;