sata_promise: ATAPI support

This patch adds ATAPI support to the sata_promise driver.
This has been tested on both first- and second-generation
chips (20378 and 20575), and with both SATAPI and PATAPI
devices. CD-writing works.

SATAPI DMA works on second-generation chips, but on
first-generation chips SATAPI is limited to PIO due
to what appears to be HW limitations.
PATAPI DMA works on both first- and second-generation
chips, but requires the separate PATA support patch
before it can be used on TX2plus chips.

The functional changes to the driver are:
- remove ATA_FLAG_NO_ATAPI from PDC_COMMON_FLAGS
- add ->check_atapi_dma() operation to enable DMA for bulk data
  transfers but force PIO for other ATAPI commands; this filter
  is from Promise's driver and largely matches pata_pdc207x.c
- use a more restrictive ->check_atapi_dma() on first-generation
  chips to force SATAPI to always use PIO
- add handling of ATAPI protocols to pdc_qc_prep(), pdc_host_intr(),
  and pdc_qc_issue_prot(): ATAPI_DMA is handled by the driver
  while non-DMA protocols are handed over to libata generic code
- add pdc_issue_atapi_pkt_cmd() to handle the initial steps in
  issuing ATAPI DMA commands before sending the actual CDB;
  this procedure was ported from Promise's driver

Signed-off-by: Mikael Pettersson <mikpe@it.uu.se>
Signed-off-by: Jeff Garzik <jeff@garzik.org>
diff --git a/drivers/ata/sata_promise.c b/drivers/ata/sata_promise.c
index 6ab0574..9bd195f 100644
--- a/drivers/ata/sata_promise.c
+++ b/drivers/ata/sata_promise.c
@@ -39,6 +39,7 @@
 #include <linux/interrupt.h>
 #include <linux/sched.h>
 #include <linux/device.h>
+#include <scsi/scsi.h>
 #include <scsi/scsi_host.h>
 #include <scsi/scsi_cmnd.h>
 #include <linux/libata.h>
@@ -50,6 +51,14 @@
 
 
 enum {
+	/* register offsets */
+	PDC_FEATURE		= 0x04, /* Feature/Error reg (per port) */
+	PDC_SECTOR_COUNT	= 0x08, /* Sector count reg (per port) */
+	PDC_SECTOR_NUMBER	= 0x0C, /* Sector number reg (per port) */
+	PDC_CYLINDER_LOW	= 0x10, /* Cylinder low reg (per port) */
+	PDC_CYLINDER_HIGH	= 0x14, /* Cylinder high reg (per port) */
+	PDC_DEVICE		= 0x18, /* Device/Head reg (per port) */
+	PDC_COMMAND		= 0x1C, /* Command/status reg (per port) */
 	PDC_PKT_SUBMIT		= 0x40, /* Command packet pointer addr */
 	PDC_INT_SEQMASK		= 0x40,	/* Mask of asserted SEQ INTs */
 	PDC_FLASH_CTL		= 0x44, /* Flash control register */
@@ -71,13 +80,23 @@
 
 	PDC_HAS_PATA		= (1 << 1), /* PDC20375/20575 has PATA */
 
+	/* Sequence counter control registers bit definitions */
+	PDC_SEQCNTRL_INT_MASK	= (1 << 5), /* Sequence Interrupt Mask */
+
+	/* Feature register values */
+	PDC_FEATURE_ATAPI_PIO	= 0x00, /* ATAPI data xfer by PIO */
+	PDC_FEATURE_ATAPI_DMA	= 0x01, /* ATAPI data xfer by DMA */
+
+	/* Device/Head register values */
+	PDC_DEVICE_SATA		= 0xE0, /* Device/Head value for SATA devices */
+
 	/* PDC_CTLSTAT bit definitions */
 	PDC_DMA_ENABLE		= (1 << 7),
 	PDC_IRQ_DISABLE		= (1 << 10),
 	PDC_RESET		= (1 << 11), /* HDMA reset */
 
 	PDC_COMMON_FLAGS	= ATA_FLAG_NO_LEGACY |
-				  ATA_FLAG_MMIO | ATA_FLAG_NO_ATAPI |
+				  ATA_FLAG_MMIO |
 				  ATA_FLAG_PIO_POLLING,
 
 	/* hp->flags bits */
@@ -106,6 +125,8 @@
 static void pdc_qc_prep(struct ata_queued_cmd *qc);
 static void pdc_tf_load_mmio(struct ata_port *ap, const struct ata_taskfile *tf);
 static void pdc_exec_command_mmio(struct ata_port *ap, const struct ata_taskfile *tf);
+static int pdc_check_atapi_dma(struct ata_queued_cmd *qc);
+static int pdc_old_check_atapi_dma(struct ata_queued_cmd *qc);
 static void pdc_irq_clear(struct ata_port *ap);
 static unsigned int pdc_qc_issue_prot(struct ata_queued_cmd *qc);
 static void pdc_host_stop(struct ata_host *host);
@@ -140,6 +161,34 @@
 	.check_status		= ata_check_status,
 	.exec_command		= pdc_exec_command_mmio,
 	.dev_select		= ata_std_dev_select,
+	.check_atapi_dma	= pdc_check_atapi_dma,
+
+	.qc_prep		= pdc_qc_prep,
+	.qc_issue		= pdc_qc_issue_prot,
+	.freeze			= pdc_freeze,
+	.thaw			= pdc_thaw,
+	.error_handler		= pdc_error_handler,
+	.post_internal_cmd	= pdc_post_internal_cmd,
+	.data_xfer		= ata_mmio_data_xfer,
+	.irq_handler		= pdc_interrupt,
+	.irq_clear		= pdc_irq_clear,
+
+	.scr_read		= pdc_sata_scr_read,
+	.scr_write		= pdc_sata_scr_write,
+	.port_start		= pdc_port_start,
+	.port_stop		= pdc_port_stop,
+	.host_stop		= pdc_host_stop,
+};
+
+/* First-generation chips need a more restrictive ->check_atapi_dma op */
+static const struct ata_port_operations pdc_old_sata_ops = {
+	.port_disable		= ata_port_disable,
+	.tf_load		= pdc_tf_load_mmio,
+	.tf_read		= ata_tf_read,
+	.check_status		= ata_check_status,
+	.exec_command		= pdc_exec_command_mmio,
+	.dev_select		= ata_std_dev_select,
+	.check_atapi_dma	= pdc_old_check_atapi_dma,
 
 	.qc_prep		= pdc_qc_prep,
 	.qc_issue		= pdc_qc_issue_prot,
@@ -165,6 +214,7 @@
 	.check_status		= ata_check_status,
 	.exec_command		= pdc_exec_command_mmio,
 	.dev_select		= ata_std_dev_select,
+	.check_atapi_dma	= pdc_check_atapi_dma,
 
 	.phy_reset		= pdc_pata_phy_reset,
 
@@ -188,7 +238,7 @@
 		.pio_mask	= 0x1f, /* pio0-4 */
 		.mwdma_mask	= 0x07, /* mwdma0-2 */
 		.udma_mask	= 0x7f, /* udma0-6 ; FIXME */
-		.port_ops	= &pdc_sata_ops,
+		.port_ops	= &pdc_old_sata_ops,
 	},
 
 	/* board_20319 */
@@ -198,7 +248,7 @@
 		.pio_mask	= 0x1f, /* pio0-4 */
 		.mwdma_mask	= 0x07, /* mwdma0-2 */
 		.udma_mask	= 0x7f, /* udma0-6 ; FIXME */
-		.port_ops	= &pdc_sata_ops,
+		.port_ops	= &pdc_old_sata_ops,
 	},
 
 	/* board_20619 */
@@ -397,6 +447,30 @@
 	writel(val, (void __iomem *) ap->ioaddr.scr_addr + (sc_reg * 4));
 }
 
+static void pdc_atapi_dma_pkt(struct ata_taskfile *tf,
+			      dma_addr_t sg_table,
+			      unsigned int cdb_len, u8 *cdb,
+			      u8 *buf)
+{
+	u32 *buf32 = (u32 *) buf;
+
+	/* set control bits (byte 0), zero delay seq id (byte 3),
+	 * and seq id (byte 2)
+	 */
+	if (!(tf->flags & ATA_TFLAG_WRITE))
+		buf32[0] = cpu_to_le32(PDC_PKT_READ);
+	else
+		buf32[0] = 0;
+	buf32[1] = cpu_to_le32(sg_table);	/* S/G table addr */
+	buf32[2] = 0;				/* no next-packet */
+
+	/* we can represent cdb lengths 2/4/6/8/10/12/14/16 */
+	BUG_ON(cdb_len & ~0x1E);
+
+	buf[12] = (((cdb_len >> 1) & 7) << 5) | ATA_REG_DATA | PDC_LAST_REG;
+	memcpy(buf+13, cdb, cdb_len);
+}
+
 static void pdc_qc_prep(struct ata_queued_cmd *qc)
 {
 	struct pdc_port_priv *pp = qc->ap->private_data;
@@ -421,6 +495,16 @@
 		pdc_pkt_footer(&qc->tf, pp->pkt, i);
 		break;
 
+	case ATA_PROT_ATAPI:
+	case ATA_PROT_ATAPI_NODATA:
+		ata_qc_prep(qc);
+		break;
+
+	case ATA_PROT_ATAPI_DMA:
+		ata_qc_prep(qc);
+		pdc_atapi_dma_pkt(&qc->tf, qc->ap->prd_dma, qc->dev->cdb_len, qc->cdb, pp->pkt);
+		break;
+
 	default:
 		break;
 	}
@@ -534,6 +618,7 @@
 	switch (qc->tf.protocol) {
 	case ATA_PROT_DMA:
 	case ATA_PROT_NODATA:
+	case ATA_PROT_ATAPI_DMA:
 		qc->err_mask |= ac_err_mask(ata_wait_idle(ap));
 		ata_qc_complete(qc);
 		handled = 1;
@@ -630,18 +715,105 @@
 	readl((void __iomem *) ap->ioaddr.cmd_addr + PDC_PKT_SUBMIT); /* flush */
 }
 
+static unsigned int pdc_wait_for_drq(struct ata_port *ap)
+{ 
+	void __iomem *port_mmio = (void __iomem *) ap->ioaddr.cmd_addr;
+	unsigned int i;
+	unsigned int status;
+
+	/* Following pdc-ultra's WaitForDrq() we loop here until BSY
+	 * is clear and DRQ is set in altstatus. We could possibly call
+	 * ata_busy_wait() and loop until DRQ is set, but since we don't
+	 * know how much time a call to ata_busy_wait() took, we don't
+	 * know when to time out the outer loop.
+	 */
+	for(i = 0; i < 1000; ++i) {
+		status = readb(port_mmio + 0x38); /* altstatus */
+		if (status == 0xFF)
+			break;
+		if (status & ATA_BUSY)
+			;
+		else if (status & (ATA_DRQ | ATA_ERR))
+			break;
+		mdelay(1);
+	}
+	if (i >= 1000)
+		ata_port_printk(ap, KERN_WARNING, "%s timed out", __FUNCTION__);
+	return status;
+}
+
+static void pdc_issue_atapi_pkt_cmd(struct ata_queued_cmd *qc)
+{
+	struct ata_port *ap = qc->ap;
+	void __iomem *port_mmio = (void __iomem *) ap->ioaddr.cmd_addr;
+	void __iomem *host_mmio = ap->host->mmio_base;
+	unsigned int nbytes;
+	unsigned int tmp;
+
+	writeb(0x00, port_mmio + PDC_CTLSTAT); /* route drive INT to SEQ 0 */
+	writeb(PDC_SEQCNTRL_INT_MASK, host_mmio + 0); /* but mask SEQ 0 INT */
+
+	/* select drive */
+	if (sata_scr_valid(ap)) {
+		tmp = PDC_DEVICE_SATA;
+	} else {
+		tmp = ATA_DEVICE_OBS;
+		if (qc->dev->devno != 0)
+			tmp |= ATA_DEV1;
+	}
+	writeb(tmp, port_mmio + PDC_DEVICE);
+	ata_busy_wait(ap, ATA_BUSY, 1000);
+
+	writeb(0x00, port_mmio + PDC_SECTOR_COUNT);
+	writeb(0x00, port_mmio + PDC_SECTOR_NUMBER);
+
+	/* set feature and byte counter registers */
+	if (qc->tf.protocol != ATA_PROT_ATAPI_DMA) {
+		tmp = PDC_FEATURE_ATAPI_PIO;
+		/* set byte counter register to real transfer byte count */
+		nbytes = qc->nbytes;
+		if (!nbytes)
+			nbytes = qc->nsect << 9;
+		if (nbytes > 0xffff)
+			nbytes = 0xffff;
+	} else {
+		tmp = PDC_FEATURE_ATAPI_DMA;
+		/* set byte counter register to 0 */
+		nbytes = 0;
+	}
+	writeb(tmp, port_mmio + PDC_FEATURE);
+	writeb(nbytes & 0xFF, port_mmio + PDC_CYLINDER_LOW);
+	writeb((nbytes >> 8) & 0xFF, port_mmio + PDC_CYLINDER_HIGH);
+
+	/* send ATAPI packet command 0xA0 */
+	writeb(ATA_CMD_PACKET, port_mmio + PDC_COMMAND);
+
+	/*
+	 * At this point in the issuing of a packet command, the Promise
+	 * driver busy-waits for INT (CTLSTAT bit 27) if it detected
+	 * (at port init time) that the device interrupts with assertion
+	 * of DRQ after receiving a packet command.
+	 *
+	 * XXX: Do we need to handle this case as well? Does libata detect
+	 * this case for us, or do we have to do our own per-port init?
+	 */
+
+	pdc_wait_for_drq(ap);
+
+	/* now the device only waits for the CDB */
+}
+
 static unsigned int pdc_qc_issue_prot(struct ata_queued_cmd *qc)
 {
 	switch (qc->tf.protocol) {
+	case ATA_PROT_ATAPI_DMA:
+		pdc_issue_atapi_pkt_cmd(qc);
+		/*FALLTHROUGH*/
 	case ATA_PROT_DMA:
 	case ATA_PROT_NODATA:
 		pdc_packet_start(qc);
 		return 0;
 
-	case ATA_PROT_ATAPI_DMA:
-		BUG();
-		break;
-
 	default:
 		break;
 	}
@@ -664,6 +836,42 @@
 	ata_exec_command(ap, tf);
 }
 
+static int pdc_check_atapi_dma(struct ata_queued_cmd *qc)
+{
+	u8 *scsicmd = qc->scsicmd->cmnd;
+	int pio = 1; /* atapi dma off by default */
+
+	/* Whitelist commands that may use DMA. */
+	switch (scsicmd[0]) {
+	case WRITE_12:
+	case WRITE_10:
+	case WRITE_6:
+	case READ_12:
+	case READ_10:
+	case READ_6:
+	case 0xad: /* READ_DVD_STRUCTURE */
+	case 0xbe: /* READ_CD */
+		pio = 0;
+	}
+	/* -45150 (FFFF4FA2) to -1 (FFFFFFFF) shall use PIO mode */
+	if (scsicmd[0] == WRITE_10) {
+		unsigned int lba;
+		lba = (scsicmd[2] << 24) | (scsicmd[3] << 16) | (scsicmd[4] << 8) | scsicmd[5];
+		if (lba >= 0xFFFF4FA2)
+			pio = 1;
+	}
+	return pio;
+}
+
+static int pdc_old_check_atapi_dma(struct ata_queued_cmd *qc)
+{
+	struct ata_port *ap = qc->ap;
+
+	/* First generation chips cannot use ATAPI DMA on SATA ports */
+	if (sata_scr_valid(ap))
+		return 1;
+	return pdc_check_atapi_dma(qc);
+}
 
 static void pdc_ata_setup_port(struct ata_ioports *port, unsigned long base)
 {