i915: Add support for MSI and interrupt mitigation.

Previous attempts at interrupt mitigation had been foiled by i915_wait_irq's
failure to update the sarea seqno value when the status page indicated that
the seqno had already been passed.  MSI support has been seen to cut CPU
costs by up to 40% in some workloads by avoiding other expensive interrupt
handlers for frequent graphics interrupts.

Signed-off-by: Eric Anholt <eric@anholt.net>
Signed-off-by: Dave Airlie <airlied@redhat.com>
diff --git a/drivers/gpu/drm/drm_irq.c b/drivers/gpu/drm/drm_irq.c
index 53f0e5a..61ed515 100644
--- a/drivers/gpu/drm/drm_irq.c
+++ b/drivers/gpu/drm/drm_irq.c
@@ -63,7 +63,7 @@
 	    p->devnum != PCI_SLOT(dev->pdev->devfn) || p->funcnum != PCI_FUNC(dev->pdev->devfn))
 		return -EINVAL;
 
-	p->irq = dev->irq;
+	p->irq = dev->pdev->irq;
 
 	DRM_DEBUG("%d:%d:%d => IRQ %d\n", p->busnum, p->devnum, p->funcnum,
 		  p->irq);
@@ -89,7 +89,7 @@
 	if (!drm_core_check_feature(dev, DRIVER_HAVE_IRQ))
 		return -EINVAL;
 
-	if (dev->irq == 0)
+	if (dev->pdev->irq == 0)
 		return -EINVAL;
 
 	mutex_lock(&dev->struct_mutex);
@@ -107,7 +107,7 @@
 	dev->irq_enabled = 1;
 	mutex_unlock(&dev->struct_mutex);
 
-	DRM_DEBUG("irq=%d\n", dev->irq);
+	DRM_DEBUG("irq=%d\n", dev->pdev->irq);
 
 	if (drm_core_check_feature(dev, DRIVER_IRQ_VBL)) {
 		init_waitqueue_head(&dev->vbl_queue);
@@ -127,8 +127,12 @@
 	if (drm_core_check_feature(dev, DRIVER_IRQ_SHARED))
 		sh_flags = IRQF_SHARED;
 
-	ret = request_irq(dev->irq, dev->driver->irq_handler,
+	ret = request_irq(dev->pdev->irq, dev->driver->irq_handler,
 			  sh_flags, dev->devname, dev);
+	/* Expose the device irq number to drivers that want to export it for
+	 * whatever reason.
+	 */
+	dev->irq = dev->pdev->irq;
 	if (ret < 0) {
 		mutex_lock(&dev->struct_mutex);
 		dev->irq_enabled = 0;
@@ -164,11 +168,11 @@
 	if (!irq_enabled)
 		return -EINVAL;
 
-	DRM_DEBUG("irq=%d\n", dev->irq);
+	DRM_DEBUG("irq=%d\n", dev->pdev->irq);
 
 	dev->driver->irq_uninstall(dev);
 
-	free_irq(dev->irq, dev);
+	free_irq(dev->pdev->irq, dev);
 
 	dev->locked_tasklet_func = NULL;
 
@@ -201,7 +205,7 @@
 		if (!drm_core_check_feature(dev, DRIVER_HAVE_IRQ))
 			return 0;
 		if (dev->if_version < DRM_IF_VERSION(1, 2) &&
-		    ctl->irq != dev->irq)
+		    ctl->irq != dev->pdev->irq)
 			return -EINVAL;
 		return drm_irq_install(dev);
 	case DRM_UNINST_HANDLER:
@@ -239,7 +243,7 @@
 	int ret = 0;
 	unsigned int flags, seq;
 
-	if ((!dev->irq) || (!dev->irq_enabled))
+	if ((!dev->pdev->irq) || (!dev->irq_enabled))
 		return -EINVAL;
 
 	if (vblwait->request.type &
diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index 4d56dfd..27a1f78 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -84,7 +84,7 @@
 	 * may not have been called from userspace and after dev_private
 	 * is freed, it's too late.
 	 */
-	if (dev->irq)
+	if (dev->irq_enabled)
 		drm_irq_uninstall(dev);
 
 	if (dev_priv->ring.virtual_start) {
@@ -644,7 +644,7 @@
 
 	switch (param->param) {
 	case I915_PARAM_IRQ_ACTIVE:
-		value = dev->irq ? 1 : 0;
+		value = dev->irq_enabled;
 		break;
 	case I915_PARAM_ALLOW_BATCHBUFFER:
 		value = dev_priv->allow_batchbuffer ? 1 : 0;
@@ -763,6 +763,20 @@
 	ret = drm_addmap(dev, base, size, _DRM_REGISTERS,
 			 _DRM_KERNEL | _DRM_DRIVER,
 			 &dev_priv->mmio_map);
+
+
+	/* On the 945G/GM, the chipset reports the MSI capability on the
+	 * integrated graphics even though the support isn't actually there
+	 * according to the published specs.  It doesn't appear to function
+	 * correctly in testing on 945G.
+	 * This may be a side effect of MSI having been made available for PEG
+	 * and the registers being closely associated.
+	 */
+	if (!IS_I945G(dev) && !IS_I945GM(dev))
+		pci_enable_msi(dev->pdev);
+
+	spin_lock_init(&dev_priv->user_irq_lock);
+
 	return ret;
 }
 
@@ -770,6 +784,9 @@
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
 
+	if (dev->pdev->msi_enabled)
+		pci_disable_msi(dev->pdev);
+
 	if (dev_priv->mmio_map)
 		drm_rmmap(dev, dev_priv->mmio_map);
 
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index afb51a3..8daf0d84 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -105,6 +105,12 @@
 	wait_queue_head_t irq_queue;
 	atomic_t irq_received;
 	atomic_t irq_emitted;
+	/** Protects user_irq_refcount and irq_mask_reg */
+	spinlock_t user_irq_lock;
+	/** Refcount for i915_user_irq_get() versus i915_user_irq_put(). */
+	int user_irq_refcount;
+	/** Cached value of IMR to avoid reads in updating the bitfield */
+	u32 irq_mask_reg;
 
 	int tex_lru_log_granularity;
 	int allow_batchbuffer;
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 4a2de78..24d11ed 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -33,6 +33,31 @@
 
 #define MAX_NOPID ((u32)~0)
 
+/** These are the interrupts used by the driver */
+#define I915_INTERRUPT_ENABLE_MASK (I915_USER_INTERRUPT |		\
+				    I915_DISPLAY_PIPE_A_VBLANK_INTERRUPT | \
+				    I915_DISPLAY_PIPE_B_VBLANK_INTERRUPT)
+
+static inline void
+i915_enable_irq(drm_i915_private_t *dev_priv, u32 mask)
+{
+	if ((dev_priv->irq_mask_reg & mask) != 0) {
+		dev_priv->irq_mask_reg &= ~mask;
+		I915_WRITE(IMR, dev_priv->irq_mask_reg);
+		(void) I915_READ(IMR);
+	}
+}
+
+static inline void
+i915_disable_irq(drm_i915_private_t *dev_priv, u32 mask)
+{
+	if ((dev_priv->irq_mask_reg & mask) != mask) {
+		dev_priv->irq_mask_reg |= mask;
+		I915_WRITE(IMR, dev_priv->irq_mask_reg);
+		(void) I915_READ(IMR);
+	}
+}
+
 /**
  * Emit blits for scheduled buffer swaps.
  *
@@ -229,46 +254,50 @@
 {
 	struct drm_device *dev = (struct drm_device *) arg;
 	drm_i915_private_t *dev_priv = (drm_i915_private_t *) dev->dev_private;
-	u16 temp;
 	u32 pipea_stats, pipeb_stats;
+	u32 iir;
 
 	pipea_stats = I915_READ(PIPEASTAT);
 	pipeb_stats = I915_READ(PIPEBSTAT);
 
-	temp = I915_READ16(IIR);
+	if (dev->pdev->msi_enabled)
+		I915_WRITE(IMR, ~0);
+	iir = I915_READ(IIR);
 
-	temp &= (I915_USER_INTERRUPT |
-		 I915_DISPLAY_PIPE_A_VBLANK_INTERRUPT |
-		 I915_DISPLAY_PIPE_B_VBLANK_INTERRUPT);
+	DRM_DEBUG("iir=%08x\n", iir);
 
-	DRM_DEBUG("%s flag=%08x\n", __FUNCTION__, temp);
-
-	if (temp == 0)
+	if (iir == 0) {
+		if (dev->pdev->msi_enabled) {
+			I915_WRITE(IMR, dev_priv->irq_mask_reg);
+			(void) I915_READ(IMR);
+		}
 		return IRQ_NONE;
+	}
 
-	I915_WRITE16(IIR, temp);
-	(void) I915_READ16(IIR);
-	DRM_READMEMORYBARRIER();
+	I915_WRITE(IIR, iir);
+	if (dev->pdev->msi_enabled)
+		I915_WRITE(IMR, dev_priv->irq_mask_reg);
+	(void) I915_READ(IIR); /* Flush posted writes */
 
 	dev_priv->sarea_priv->last_dispatch = READ_BREADCRUMB(dev_priv);
 
-	if (temp & I915_USER_INTERRUPT)
+	if (iir & I915_USER_INTERRUPT)
 		DRM_WAKEUP(&dev_priv->irq_queue);
 
-	if (temp & (I915_DISPLAY_PIPE_A_VBLANK_INTERRUPT |
-		    I915_DISPLAY_PIPE_B_VBLANK_INTERRUPT)) {
+	if (iir & (I915_DISPLAY_PIPE_A_VBLANK_INTERRUPT |
+		   I915_DISPLAY_PIPE_B_VBLANK_INTERRUPT)) {
 		int vblank_pipe = dev_priv->vblank_pipe;
 
 		if ((vblank_pipe &
 		     (DRM_I915_VBLANK_PIPE_A | DRM_I915_VBLANK_PIPE_B))
 		    == (DRM_I915_VBLANK_PIPE_A | DRM_I915_VBLANK_PIPE_B)) {
-			if (temp & I915_DISPLAY_PIPE_A_VBLANK_INTERRUPT)
+			if (iir & I915_DISPLAY_PIPE_A_VBLANK_INTERRUPT)
 				atomic_inc(&dev->vbl_received);
-			if (temp & I915_DISPLAY_PIPE_B_VBLANK_INTERRUPT)
+			if (iir & I915_DISPLAY_PIPE_B_VBLANK_INTERRUPT)
 				atomic_inc(&dev->vbl_received2);
-		} else if (((temp & I915_DISPLAY_PIPE_A_VBLANK_INTERRUPT) &&
+		} else if (((iir & I915_DISPLAY_PIPE_A_VBLANK_INTERRUPT) &&
 			    (vblank_pipe & DRM_I915_VBLANK_PIPE_A)) ||
-			   ((temp & I915_DISPLAY_PIPE_B_VBLANK_INTERRUPT) &&
+			   ((iir & I915_DISPLAY_PIPE_B_VBLANK_INTERRUPT) &&
 			    (vblank_pipe & DRM_I915_VBLANK_PIPE_B)))
 			atomic_inc(&dev->vbl_received);
 
@@ -314,6 +343,27 @@
 	return dev_priv->counter;
 }
 
+static void i915_user_irq_get(struct drm_device *dev)
+{
+	drm_i915_private_t *dev_priv = (drm_i915_private_t *) dev->dev_private;
+
+	spin_lock(&dev_priv->user_irq_lock);
+	if (dev->irq_enabled && (++dev_priv->user_irq_refcount == 1))
+		i915_enable_irq(dev_priv, I915_USER_INTERRUPT);
+	spin_unlock(&dev_priv->user_irq_lock);
+}
+
+static void i915_user_irq_put(struct drm_device *dev)
+{
+	drm_i915_private_t *dev_priv = (drm_i915_private_t *) dev->dev_private;
+
+	spin_lock(&dev_priv->user_irq_lock);
+	BUG_ON(dev->irq_enabled && dev_priv->user_irq_refcount <= 0);
+	if (dev->irq_enabled && (--dev_priv->user_irq_refcount == 0))
+		i915_disable_irq(dev_priv, I915_USER_INTERRUPT);
+	spin_unlock(&dev_priv->user_irq_lock);
+}
+
 static int i915_wait_irq(struct drm_device * dev, int irq_nr)
 {
 	drm_i915_private_t *dev_priv = (drm_i915_private_t *) dev->dev_private;
@@ -322,13 +372,17 @@
 	DRM_DEBUG("irq_nr=%d breadcrumb=%d\n", irq_nr,
 		  READ_BREADCRUMB(dev_priv));
 
-	if (READ_BREADCRUMB(dev_priv) >= irq_nr)
+	if (READ_BREADCRUMB(dev_priv) >= irq_nr) {
+		dev_priv->sarea_priv->last_dispatch = READ_BREADCRUMB(dev_priv);
 		return 0;
+	}
 
 	dev_priv->sarea_priv->perf_boxes |= I915_BOX_WAIT;
 
+	i915_user_irq_get(dev);
 	DRM_WAIT_ON(ret, dev_priv->irq_queue, 3 * DRM_HZ,
 		    READ_BREADCRUMB(dev_priv) >= irq_nr);
+	i915_user_irq_put(dev);
 
 	if (ret == -EBUSY) {
 		DRM_ERROR("EBUSY -- rec: %d emitted: %d\n",
@@ -413,20 +467,6 @@
 	return i915_wait_irq(dev, irqwait->irq_seq);
 }
 
-static void i915_enable_interrupt (struct drm_device *dev)
-{
-	drm_i915_private_t *dev_priv = (drm_i915_private_t *) dev->dev_private;
-	u16 flag;
-
-	flag = 0;
-	if (dev_priv->vblank_pipe & DRM_I915_VBLANK_PIPE_A)
-		flag |= I915_DISPLAY_PIPE_A_VBLANK_INTERRUPT;
-	if (dev_priv->vblank_pipe & DRM_I915_VBLANK_PIPE_B)
-		flag |= I915_DISPLAY_PIPE_B_VBLANK_INTERRUPT;
-
-	I915_WRITE16(IER, I915_USER_INTERRUPT | flag);
-}
-
 /* Set the vblank monitor pipe
  */
 int i915_vblank_pipe_set(struct drm_device *dev, void *data,
@@ -434,6 +474,7 @@
 {
 	drm_i915_private_t *dev_priv = dev->dev_private;
 	drm_i915_vblank_pipe_t *pipe = data;
+	u32 enable_mask = 0, disable_mask = 0;
 
 	if (!dev_priv) {
 		DRM_ERROR("called with no initialization\n");
@@ -445,9 +486,20 @@
 		return -EINVAL;
 	}
 
-	dev_priv->vblank_pipe = pipe->pipe;
+	if (pipe->pipe & DRM_I915_VBLANK_PIPE_A)
+		enable_mask |= I915_DISPLAY_PIPE_A_VBLANK_INTERRUPT;
+	else
+		disable_mask |= I915_DISPLAY_PIPE_A_VBLANK_INTERRUPT;
 
-	i915_enable_interrupt (dev);
+	if (pipe->pipe & DRM_I915_VBLANK_PIPE_B)
+		enable_mask |= I915_DISPLAY_PIPE_B_VBLANK_INTERRUPT;
+	else
+		disable_mask |= I915_DISPLAY_PIPE_B_VBLANK_INTERRUPT;
+
+	i915_enable_irq(dev_priv, enable_mask);
+	i915_disable_irq(dev_priv, disable_mask);
+
+	dev_priv->vblank_pipe = pipe->pipe;
 
 	return 0;
 }
@@ -464,7 +516,7 @@
 		return -EINVAL;
 	}
 
-	flag = I915_READ(IER);
+	flag = I915_READ(IMR);
 	pipe->pipe = 0;
 	if (flag & I915_DISPLAY_PIPE_A_VBLANK_INTERRUPT)
 		pipe->pipe |= DRM_I915_VBLANK_PIPE_A;
@@ -586,9 +638,9 @@
 {
 	drm_i915_private_t *dev_priv = (drm_i915_private_t *) dev->dev_private;
 
-	I915_WRITE16(HWSTAM, 0xfffe);
-	I915_WRITE16(IMR, 0x0);
-	I915_WRITE16(IER, 0x0);
+	I915_WRITE(HWSTAM, 0xfffe);
+	I915_WRITE(IMR, 0x0);
+	I915_WRITE(IER, 0x0);
 }
 
 void i915_driver_irq_postinstall(struct drm_device * dev)
@@ -601,7 +653,18 @@
 
 	if (!dev_priv->vblank_pipe)
 		dev_priv->vblank_pipe = DRM_I915_VBLANK_PIPE_A;
-	i915_enable_interrupt(dev);
+
+	/* Set initial unmasked IRQs to just the selected vblank pipes. */
+	dev_priv->irq_mask_reg = ~0;
+	if (dev_priv->vblank_pipe & DRM_I915_VBLANK_PIPE_A)
+		dev_priv->irq_mask_reg &= ~I915_DISPLAY_PIPE_A_VBLANK_INTERRUPT;
+	if (dev_priv->vblank_pipe & DRM_I915_VBLANK_PIPE_B)
+		dev_priv->irq_mask_reg &= ~I915_DISPLAY_PIPE_B_VBLANK_INTERRUPT;
+
+	I915_WRITE(IMR, dev_priv->irq_mask_reg);
+	I915_WRITE(IER, I915_INTERRUPT_ENABLE_MASK);
+	(void) I915_READ(IER);
+
 	DRM_INIT_WAITQUEUE(&dev_priv->irq_queue);
 }
 
@@ -613,10 +676,10 @@
 	if (!dev_priv)
 		return;
 
-	I915_WRITE16(HWSTAM, 0xffff);
-	I915_WRITE16(IMR, 0xffff);
-	I915_WRITE16(IER, 0x0);
+	I915_WRITE(HWSTAM, 0xffff);
+	I915_WRITE(IMR, 0xffff);
+	I915_WRITE(IER, 0x0);
 
-	temp = I915_READ16(IIR);
-	I915_WRITE16(IIR, temp);
+	temp = I915_READ(IIR);
+	I915_WRITE(IIR, temp);
 }