libata-sff: fix 32-bit PIO ATAPI regression

Commit 871af1210f13966ab911ed2166e4ab2ce775b99d (libata: Add 32bit
PIO support) has caused all kinds of errors on the ATAPI devices, so
it has been empirically proven that one shouldn't try to read/write
an extra data word when a device is not expecting it already. "Don't
do it then"; however, still use a chance to do 32-bit read/write one
last time when there are exactly 3 trailing bytes.

Oh, and stop pointlessly swapping the bytes to and fro on big-endian
machines by using io*_rep() accessors which shouldn't byte-swap.

This patch should fix the kernel.org bug #12609.

Signed-off-by: Sergei Shtylyov <sshtylyov@ru.mvista.com>
Signed-off-by: Jeff Garzik <jgarzik@redhat.com>
diff --git a/drivers/ata/libata-sff.c b/drivers/ata/libata-sff.c
index 0b299b0..714cb04 100644
--- a/drivers/ata/libata-sff.c
+++ b/drivers/ata/libata-sff.c
@@ -773,18 +773,32 @@
 	else
 		iowrite32_rep(data_addr, buf, words);
 
+	/* Transfer trailing bytes, if any */
 	if (unlikely(slop)) {
-		__le32 pad;
+		unsigned char pad[4];
+
+		/* Point buf to the tail of buffer */
+		buf += buflen - slop;
+
+		/*
+		 * Use io*_rep() accessors here as well to avoid pointlessly
+		 * swapping bytes to and fro on the big endian machines...
+		 */
 		if (rw == READ) {
-			pad = cpu_to_le32(ioread32(ap->ioaddr.data_addr));
-			memcpy(buf + buflen - slop, &pad, slop);
+			if (slop < 3)
+				ioread16_rep(data_addr, pad, 1);
+			else
+				ioread32_rep(data_addr, pad, 1);
+			memcpy(buf, pad, slop);
 		} else {
-			memcpy(&pad, buf + buflen - slop, slop);
-			iowrite32(le32_to_cpu(pad), ap->ioaddr.data_addr);
+			memcpy(pad, buf, slop);
+			if (slop < 3)
+				iowrite16_rep(data_addr, pad, 1);
+			else
+				iowrite32_rep(data_addr, pad, 1);
 		}
-		words++;
 	}
-	return words << 2;
+	return (buflen + 1) & ~1;
 }
 EXPORT_SYMBOL_GPL(ata_sff_data_xfer32);