drm/i915: Remove nested work in gpu error handling Now when we declare gpu errors only through our own dedicated hangcheck workqueue there is no need to have a separate workqueue for handling the resetting and waking up the clients as the deadlock concerns are no more. The only exception is i915_debugfs::i915_set_wedged, which triggers error handling through process context. However as this is only used through test harness it is responsibility for test harness not to introduce hangs through both debug interface and through hangcheck mechanism at the same time. Remove gpu_error.work and let the hangcheck work do the tasks it used to. v2: Add a big warning sign into i915_debugfs::i915_set_wedged (Chris) Cc: Chris Wilson <chris@chris-wilson.co.uk> Signed-off-by: Mika Kuoppala <mika.kuoppala@intel.com> Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>

commit: b8d24a06568368076ebd5a858a011699a97bfa42 [log] [tgz]
author: Mika Kuoppala <mika.kuoppala@linux.intel.com> Wed Jan 28 17:03:14 2015 +0200
committer: Daniel Vetter <daniel.vetter@ffwll.ch> Thu Jan 29 18:03:07 2015 +0100
tree: cb627b2237cc410389311838260072429541337c
parent: 397f6fa6b1d5150add9043d7ac60e32307eb7c6b [diff] [blame]
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 23bfe22..53c5f9e 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c

@@ -2421,19 +2421,15 @@
 }
 
 /**
- * i915_error_work_func - do process context error handling work
- * @work: work struct
+ * i915_reset_and_wakeup - do process context error handling work
  *
  * Fire an error uevent so userspace can see that a hang or error
  * was detected.
  */
-static void i915_error_work_func(struct work_struct *work)
+static void i915_reset_and_wakeup(struct drm_device *dev)
 {
-	struct i915_gpu_error *error = container_of(work, struct i915_gpu_error,
-						    work);
-	struct drm_i915_private *dev_priv =
-		container_of(error, struct drm_i915_private, gpu_error);
-	struct drm_device *dev = dev_priv->dev;
+	struct drm_i915_private *dev_priv = to_i915(dev);
+	struct i915_gpu_error *error = &dev_priv->gpu_error;
 	char *error_event[] = { I915_ERROR_UEVENT "=1", NULL };
 	char *reset_event[] = { I915_RESET_UEVENT "=1", NULL };
 	char *reset_done_event[] = { I915_ERROR_UEVENT "=0", NULL };
@@ -2600,10 +2596,10 @@
 }
 
 /**
- * i915_handle_error - handle an error interrupt
+ * i915_handle_error - handle a gpu error
  * @dev: drm device
  *
- * Do some basic checking of regsiter state at error interrupt time and
+ * Do some basic checking of regsiter state at error time and
  * dump it to the syslog.  Also call i915_capture_error_state() to make
  * sure we get a record and make it available in debugfs.  Fire a uevent
  * so userspace knows something bad happened (should trigger collection
@@ -2616,6 +2612,9 @@
 	va_list args;
 	char error_msg[80];
 
+	if (WARN_ON(mutex_is_locked(&dev_priv->dev->struct_mutex)))
+		return;
+
 	va_start(args, fmt);
 	vscnprintf(error_msg, sizeof(error_msg), fmt, args);
 	va_end(args);
@@ -2628,9 +2627,9 @@
 				&dev_priv->gpu_error.reset_counter);
 
 		/*
-		 * Wakeup waiting processes so that the reset work function
-		 * i915_error_work_func doesn't deadlock trying to grab various
-		 * locks. By bumping the reset counter first, the woken
+		 * Wakeup waiting processes so that the reset function
+		 * i915_reset_and_wakeup doesn't deadlock trying to grab
+		 * various locks. By bumping the reset counter first, the woken
 		 * processes will see a reset in progress and back off,
 		 * releasing their locks and then wait for the reset completion.
 		 * We must do this for _all_ gpu waiters that might hold locks
@@ -2643,13 +2642,7 @@
 		i915_error_wake_up(dev_priv, false);
 	}
 
-	/*
-	 * Our reset work can grab modeset locks (since it needs to reset the
-	 * state of outstanding pagelips). Hence it must not be run on our own
-	 * dev-priv->wq work queue for otherwise the flush_work in the pageflip
-	 * code will deadlock.
-	 */
-	schedule_work(&dev_priv->gpu_error.work);
+	i915_reset_and_wakeup(dev);
 }
 
 /* Called from drm generic code, passed 'crtc' which
@@ -4345,7 +4338,6 @@
 
 	INIT_WORK(&dev_priv->hotplug_work, i915_hotplug_work_func);
 	INIT_WORK(&dev_priv->dig_port_work, i915_digport_work_func);
-	INIT_WORK(&dev_priv->gpu_error.work, i915_error_work_func);
 	INIT_WORK(&dev_priv->rps.work, gen6_pm_rps_work);
 	INIT_WORK(&dev_priv->l3_parity.error_work, ivybridge_parity_work);
commit	b8d24a06568368076ebd5a858a011699a97bfa42	[log] [tgz]
author	Mika Kuoppala <mika.kuoppala@linux.intel.com>	Wed Jan 28 17:03:14 2015 +0200
committer	Daniel Vetter <daniel.vetter@ffwll.ch>	Thu Jan 29 18:03:07 2015 +0100
tree	cb627b2237cc410389311838260072429541337c
parent	397f6fa6b1d5150add9043d7ac60e32307eb7c6b [diff] [blame]