drm/i915: Make dev_priv->mm.wedged an atomic_t

There is a very real possibility that multiple CPUs will notice that the
GPU is wedged. This introduces all sorts of potential race conditions.
Make the wedged flag atomic to mitigate this risk.

Signed-off-by: Ben Gamari <bgamari.foss@gmail.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 42142f2..bcc1be2 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -450,7 +450,7 @@
 		 * It prevents command submission from occuring and makes
 		 * every pending request fail
 		 */
-		int wedged;
+		atomic_t wedged;
 
 		/** Bit 6 swizzling required for X tiling */
 		uint32_t bit_6_swizzle_x;
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 579b3b04..f0f6f668 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1712,7 +1712,7 @@
 		retiring_seqno = request->seqno;
 
 		if (i915_seqno_passed(seqno, retiring_seqno) ||
-		    dev_priv->mm.wedged) {
+		    atomic_read(&dev_priv->mm.wedged)) {
 			i915_gem_retire_request(dev, request);
 
 			list_del(&request->list);
@@ -1754,7 +1754,7 @@
 
 	BUG_ON(seqno == 0);
 
-	if (dev_priv->mm.wedged)
+	if (atomic_read(&dev_priv->mm.wedged))
 		return -EIO;
 
 	if (!i915_seqno_passed(i915_get_gem_seqno(dev), seqno)) {
@@ -1774,11 +1774,11 @@
 		ret = wait_event_interruptible(dev_priv->irq_queue,
 					       i915_seqno_passed(i915_get_gem_seqno(dev),
 								 seqno) ||
-					       dev_priv->mm.wedged);
+					       atomic_read(&dev_priv->mm.wedged));
 		i915_user_irq_put(dev);
 		dev_priv->mm.waiting_gem_seqno = 0;
 	}
-	if (dev_priv->mm.wedged)
+	if (atomic_read(&dev_priv->mm.wedged))
 		ret = -EIO;
 
 	if (ret && ret != -ERESTARTSYS)
@@ -3359,7 +3359,7 @@
 
 	i915_verify_inactive(dev, __FILE__, __LINE__);
 
-	if (dev_priv->mm.wedged) {
+	if (atomic_read(&dev_priv->mm.wedged)) {
 		DRM_ERROR("Execbuf while wedged\n");
 		mutex_unlock(&dev->struct_mutex);
 		ret = -EIO;
@@ -3929,7 +3929,7 @@
 		if (last_seqno == cur_seqno) {
 			if (stuck++ > 100) {
 				DRM_ERROR("hardware wedged\n");
-				dev_priv->mm.wedged = 1;
+				atomic_set(&dev_priv->mm.wedged, 1);
 				DRM_WAKEUP(&dev_priv->irq_queue);
 				break;
 			}
@@ -3942,7 +3942,7 @@
 	i915_gem_retire_requests(dev);
 
 	spin_lock(&dev_priv->mm.active_list_lock);
-	if (!dev_priv->mm.wedged) {
+	if (!atomic_read(&dev_priv->mm.wedged)) {
 		/* Active and flushing should now be empty as we've
 		 * waited for a sequence higher than any pending execbuffer
 		 */
@@ -4204,9 +4204,9 @@
 	if (drm_core_check_feature(dev, DRIVER_MODESET))
 		return 0;
 
-	if (dev_priv->mm.wedged) {
+	if (atomic_read(&dev_priv->mm.wedged)) {
 		DRM_ERROR("Reenabling wedged hardware, good luck\n");
-		dev_priv->mm.wedged = 0;
+		atomic_set(&dev_priv->mm.wedged, 0);
 	}
 
 	mutex_lock(&dev->struct_mutex);
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 8f52766..13e664d 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -309,12 +309,12 @@
 	DRM_DEBUG("generating error event\n");
 	kobject_uevent_env(&dev->primary->kdev.kobj, KOBJ_CHANGE, error_event);
 
-	if (dev_priv->mm.wedged) {
+	if (atomic_read(&dev_priv->mm.wedged)) {
 		if (IS_I965G(dev)) {
 			DRM_DEBUG("resetting chip\n");
 			kobject_uevent_env(&dev->primary->kdev.kobj, KOBJ_CHANGE, reset_event);
 			if (!i965_reset(dev, GDRST_RENDER)) {
-				dev_priv->mm.wedged = 0;
+				atomic_set(&dev_priv->mm.wedged, 0);
 				kobject_uevent_env(&dev->primary->kdev.kobj, KOBJ_CHANGE, reset_done_event);
 			}
 		} else {
@@ -385,7 +385,7 @@
  * so userspace knows something bad happened (should trigger collection
  * of a ring dump etc.).
  */
-static void i915_handle_error(struct drm_device *dev)
+static void i915_handle_error(struct drm_device *dev, bool wedged)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	u32 eir = I915_READ(EIR);
@@ -495,7 +495,9 @@
 		I915_WRITE(IIR, I915_RENDER_COMMAND_PARSER_ERROR_INTERRUPT);
 	}
 
-	if (dev_priv->mm.wedged) {
+	if (wedged) {
+		atomic_set(&dev_priv->mm.wedged, 1);
+
 		/*
 		 * Wakeup waiting processes so they don't hang
 		 */
@@ -548,7 +550,7 @@
 		pipeb_stats = I915_READ(PIPEBSTAT);
 
 		if (iir & I915_RENDER_COMMAND_PARSER_ERROR_INTERRUPT)
-			i915_handle_error(dev);
+			i915_handle_error(dev, false);
 
 		/*
 		 * Clear the PIPE(A|B)STAT regs before the IIR
@@ -934,8 +936,7 @@
 
 	if (dev_priv->last_acthd == acthd && dev_priv->hangcheck_count > 0) {
 		DRM_ERROR("Hangcheck timer elapsed... GPU hung\n");
-		dev_priv->mm.wedged = true; /* Hopefully this is atomic */
-		i915_handle_error(dev);
+		i915_handle_error(dev, true);
 		return;
 	}