[libata ahci, qstor] fix miscount of scatter/gather entries

Don't directly reference qc->n_elem, as that might cause an off-by-one
error for misaligned (padded) ATAPI transfers.
diff --git a/drivers/scsi/sata_qstor.c b/drivers/scsi/sata_qstor.c
index b2f6324..4a6d306 100644
--- a/drivers/scsi/sata_qstor.c
+++ b/drivers/scsi/sata_qstor.c
@@ -268,7 +268,7 @@
 	writel(val, (void __iomem *)(ap->ioaddr.scr_addr + (sc_reg * 8)));
 }
 
-static void qs_fill_sg(struct ata_queued_cmd *qc)
+static unsigned int qs_fill_sg(struct ata_queued_cmd *qc)
 {
 	struct scatterlist *sg;
 	struct ata_port *ap = qc->ap;
@@ -296,6 +296,8 @@
 					(unsigned long long)addr, len);
 		nelem++;
 	}
+
+	return nelem;
 }
 
 static void qs_qc_prep(struct ata_queued_cmd *qc)
@@ -304,6 +306,7 @@
 	u8 dflags = QS_DF_PORD, *buf = pp->pkt;
 	u8 hflags = QS_HF_DAT | QS_HF_IEN | QS_HF_VLD;
 	u64 addr;
+	unsigned int nelem;
 
 	VPRINTK("ENTER\n");
 
@@ -313,7 +316,7 @@
 		return;
 	}
 
-	qs_fill_sg(qc);
+	nelem = qs_fill_sg(qc);
 
 	if ((qc->tf.flags & ATA_TFLAG_WRITE))
 		hflags |= QS_HF_DIRO;
@@ -324,7 +327,7 @@
 	buf[ 0] = QS_HCB_HDR;
 	buf[ 1] = hflags;
 	*(__le32 *)(&buf[ 4]) = cpu_to_le32(qc->nsect * ATA_SECT_SIZE);
-	*(__le32 *)(&buf[ 8]) = cpu_to_le32(qc->n_elem);
+	*(__le32 *)(&buf[ 8]) = cpu_to_le32(nelem);
 	addr = ((u64)pp->pkt_dma) + QS_CPB_BYTES;
 	*(__le64 *)(&buf[16]) = cpu_to_le64(addr);