spi/bfin_spi: support for multiples of 8bits with hardware CS

We can do multiples of 8bit transfers when using the hardware CS and a
little bit of magic, so make it work.

Signed-off-by: Bob Liu <lliubbo@gmail.com>
Signed-off-by: Mike Frysinger <vapier@gentoo.org>
Signed-off-by: Grant Likely <grant.likely@secretlab.ca>
diff --git a/drivers/spi/spi_bfin5xx.c b/drivers/spi/spi_bfin5xx.c
index 3f22351..e8d68b7 100644
--- a/drivers/spi/spi_bfin5xx.c
+++ b/drivers/spi/spi_bfin5xx.c
@@ -425,6 +425,7 @@
 	struct bfin_spi_slave_data *chip = drv_data->cur_chip;
 	struct spi_message *msg = drv_data->cur_msg;
 	int n_bytes = drv_data->n_bytes;
+	int loop = 0;
 
 	/* wait until transfer finished. */
 	while (!(read_STAT(drv_data) & BIT_STAT_RXS))
@@ -435,10 +436,15 @@
 		/* last read */
 		if (drv_data->rx) {
 			dev_dbg(&drv_data->pdev->dev, "last read\n");
-			if (n_bytes == 2)
-				*(u16 *) (drv_data->rx) = read_RDBR(drv_data);
-			else if (n_bytes == 1)
-				*(u8 *) (drv_data->rx) = read_RDBR(drv_data);
+			if (n_bytes % 2) {
+				u16 *buf = (u16 *)drv_data->rx;
+				for (loop = 0; loop < n_bytes / 2; loop++)
+					*buf++ = read_RDBR(drv_data);
+			} else {
+				u8 *buf = (u8 *)drv_data->rx;
+				for (loop = 0; loop < n_bytes; loop++)
+					*buf++ = read_RDBR(drv_data);
+			}
 			drv_data->rx += n_bytes;
 		}
 
@@ -458,29 +464,53 @@
 	if (drv_data->rx && drv_data->tx) {
 		/* duplex */
 		dev_dbg(&drv_data->pdev->dev, "duplex: write_TDBR\n");
-		if (drv_data->n_bytes == 2) {
-			*(u16 *) (drv_data->rx) = read_RDBR(drv_data);
-			write_TDBR(drv_data, (*(u16 *) (drv_data->tx)));
-		} else if (drv_data->n_bytes == 1) {
-			*(u8 *) (drv_data->rx) = read_RDBR(drv_data);
-			write_TDBR(drv_data, (*(u8 *) (drv_data->tx)));
+		if (n_bytes % 2) {
+			u16 *buf = (u16 *)drv_data->rx;
+			u16 *buf2 = (u16 *)drv_data->tx;
+			for (loop = 0; loop < n_bytes / 2; loop++) {
+				*buf++ = read_RDBR(drv_data);
+				write_TDBR(drv_data, *buf2++);
+			}
+		} else {
+			u8 *buf = (u8 *)drv_data->rx;
+			u8 *buf2 = (u8 *)drv_data->tx;
+			for (loop = 0; loop < n_bytes; loop++) {
+				*buf++ = read_RDBR(drv_data);
+				write_TDBR(drv_data, *buf2++);
+			}
 		}
 	} else if (drv_data->rx) {
 		/* read */
 		dev_dbg(&drv_data->pdev->dev, "read: write_TDBR\n");
-		if (drv_data->n_bytes == 2)
-			*(u16 *) (drv_data->rx) = read_RDBR(drv_data);
-		else if (drv_data->n_bytes == 1)
-			*(u8 *) (drv_data->rx) = read_RDBR(drv_data);
-		write_TDBR(drv_data, chip->idle_tx_val);
+		if (n_bytes % 2) {
+			u16 *buf = (u16 *)drv_data->rx;
+			for (loop = 0; loop < n_bytes / 2; loop++) {
+				*buf++ = read_RDBR(drv_data);
+				write_TDBR(drv_data, chip->idle_tx_val);
+			}
+		} else {
+			u8 *buf = (u8 *)drv_data->rx;
+			for (loop = 0; loop < n_bytes; loop++) {
+				*buf++ = read_RDBR(drv_data);
+				write_TDBR(drv_data, chip->idle_tx_val);
+			}
+		}
 	} else if (drv_data->tx) {
 		/* write */
 		dev_dbg(&drv_data->pdev->dev, "write: write_TDBR\n");
-		bfin_spi_dummy_read(drv_data);
-		if (drv_data->n_bytes == 2)
-			write_TDBR(drv_data, (*(u16 *) (drv_data->tx)));
-		else if (drv_data->n_bytes == 1)
-			write_TDBR(drv_data, (*(u8 *) (drv_data->tx)));
+		if (n_bytes % 2) {
+			u16 *buf = (u16 *)drv_data->tx;
+			for (loop = 0; loop < n_bytes / 2; loop++) {
+				read_RDBR(drv_data);
+				write_TDBR(drv_data, *buf++);
+			}
+		} else {
+			u8 *buf = (u8 *)drv_data->tx;
+			for (loop = 0; loop < n_bytes; loop++) {
+				read_RDBR(drv_data);
+				write_TDBR(drv_data, *buf++);
+			}
+		}
 	}
 
 	if (drv_data->tx)
@@ -651,16 +681,16 @@
 
 	/* Bits per word setup */
 	bits_per_word = transfer->bits_per_word ? : message->spi->bits_per_word;
-	if (bits_per_word == 8) {
-		drv_data->n_bytes = 1;
-		drv_data->len = transfer->len;
-		cr_width = 0;
-		drv_data->ops = &bfin_bfin_spi_transfer_ops_u8;
-	} else if (bits_per_word == 16) {
-		drv_data->n_bytes = 2;
+	if ((bits_per_word > 0) && (bits_per_word % 16 == 0)) {
+		drv_data->n_bytes = bits_per_word/8;
 		drv_data->len = (transfer->len) >> 1;
 		cr_width = BIT_CTL_WORDSIZE;
 		drv_data->ops = &bfin_bfin_spi_transfer_ops_u16;
+	} else if ((bits_per_word > 0) && (bits_per_word % 8 == 0)) {
+		drv_data->n_bytes = bits_per_word/8;
+		drv_data->len = transfer->len;
+		cr_width = 0;
+		drv_data->ops = &bfin_bfin_spi_transfer_ops_u8;
 	} else {
 		dev_err(&drv_data->pdev->dev, "transfer: unsupported bits_per_word\n");
 		message->status = -EINVAL;
@@ -815,10 +845,19 @@
 		if (drv_data->tx == NULL)
 			write_TDBR(drv_data, chip->idle_tx_val);
 		else {
-			if (bits_per_word == 8)
-				write_TDBR(drv_data, (*(u8 *) (drv_data->tx)));
-			else
-				write_TDBR(drv_data, (*(u16 *) (drv_data->tx)));
+			int loop;
+			if (bits_per_word % 16 == 0) {
+				u16 *buf = (u16 *)drv_data->tx;
+				for (loop = 0; loop < bits_per_word / 16;
+						loop++) {
+					write_TDBR(drv_data, *buf++);
+				}
+			} else if (bits_per_word % 8 == 0) {
+				u8 *buf = (u8 *)drv_data->tx;
+				for (loop = 0; loop < bits_per_word / 8; loop++)
+					write_TDBR(drv_data, *buf++);
+			}
+
 			drv_data->tx += drv_data->n_bytes;
 		}
 
@@ -1031,7 +1070,7 @@
 		chip->ctl_reg &= bfin_ctl_reg;
 	}
 
-	if (spi->bits_per_word != 8 && spi->bits_per_word != 16) {
+	if (spi->bits_per_word % 8) {
 		dev_err(&spi->dev, "%d bits_per_word is not supported\n",
 				spi->bits_per_word);
 		goto error;