spi/spi_sh_msiof: consolidate data in 8-bit mode into 32-bit words

Instead of sending data 8 bits at a time in 8-bit SPI mode, swap bytes
and send and receive them 32 bits at a time. Tested with an SD-card,
with which this patch reduced the number of interrupts by 50%, when
reading 5MiB of data (there are also small service packets, the number
of interrupts, produced by 512-byte sectors should, of course, drop by
75%), and improved throughput by more than 40%.

Signed-off-by: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Signed-off-by: Grant Likely <grant.likely@secretlab.ca>
diff --git a/drivers/spi/spi_sh_msiof.c b/drivers/spi/spi_sh_msiof.c
index d220cb2..da6e42e 100644
--- a/drivers/spi/spi_sh_msiof.c
+++ b/drivers/spi/spi_sh_msiof.c
@@ -267,6 +267,26 @@
 		sh_msiof_write(p, TFDR, get_unaligned(&buf_32[k]) << fs);
 }
 
+static void sh_msiof_spi_write_fifo_s32(struct sh_msiof_spi_priv *p,
+					const void *tx_buf, int words, int fs)
+{
+	const u32 *buf_32 = tx_buf;
+	int k;
+
+	for (k = 0; k < words; k++)
+		sh_msiof_write(p, TFDR, swab32(buf_32[k] << fs));
+}
+
+static void sh_msiof_spi_write_fifo_s32u(struct sh_msiof_spi_priv *p,
+					 const void *tx_buf, int words, int fs)
+{
+	const u32 *buf_32 = tx_buf;
+	int k;
+
+	for (k = 0; k < words; k++)
+		sh_msiof_write(p, TFDR, swab32(get_unaligned(&buf_32[k]) << fs));
+}
+
 static void sh_msiof_spi_read_fifo_8(struct sh_msiof_spi_priv *p,
 				     void *rx_buf, int words, int fs)
 {
@@ -317,6 +337,26 @@
 		put_unaligned(sh_msiof_read(p, RFDR) >> fs, &buf_32[k]);
 }
 
+static void sh_msiof_spi_read_fifo_s32(struct sh_msiof_spi_priv *p,
+				       void *rx_buf, int words, int fs)
+{
+	u32 *buf_32 = rx_buf;
+	int k;
+
+	for (k = 0; k < words; k++)
+		buf_32[k] = swab32(sh_msiof_read(p, RFDR) >> fs);
+}
+
+static void sh_msiof_spi_read_fifo_s32u(struct sh_msiof_spi_priv *p,
+				       void *rx_buf, int words, int fs)
+{
+	u32 *buf_32 = rx_buf;
+	int k;
+
+	for (k = 0; k < words; k++)
+		put_unaligned(swab32(sh_msiof_read(p, RFDR) >> fs), &buf_32[k]);
+}
+
 static int sh_msiof_spi_bits(struct spi_device *spi, struct spi_transfer *t)
 {
 	int bits;
@@ -468,9 +508,17 @@
 	int bytes_done;
 	int words;
 	int n;
+	bool swab;
 
 	bits = sh_msiof_spi_bits(spi, t);
 
+	if (bits <= 8 && t->len > 15 && !(t->len & 3)) {
+		bits = 32;
+		swab = true;
+	} else {
+		swab = false;
+	}
+
 	/* setup bytes per word and fifo read/write functions */
 	if (bits <= 8) {
 		bytes_per_word = 1;
@@ -487,6 +535,17 @@
 			rx_fifo = sh_msiof_spi_read_fifo_16u;
 		else
 			rx_fifo = sh_msiof_spi_read_fifo_16;
+	} else if (swab) {
+		bytes_per_word = 4;
+		if ((unsigned long)t->tx_buf & 0x03)
+			tx_fifo = sh_msiof_spi_write_fifo_s32u;
+		else
+			tx_fifo = sh_msiof_spi_write_fifo_s32;
+
+		if ((unsigned long)t->rx_buf & 0x03)
+			rx_fifo = sh_msiof_spi_read_fifo_s32u;
+		else
+			rx_fifo = sh_msiof_spi_read_fifo_s32;
 	} else {
 		bytes_per_word = 4;
 		if ((unsigned long)t->tx_buf & 0x03)