drm/i915: Only apply one barrier after a breadcrumb interrupt is posted If we flag the seqno as potentially stale upon receiving an interrupt, we can use that information to reduce the frequency that we apply the heavyweight coherent seqno read (i.e. if we wake up a chain of waiters). v2: Use cmpxchg to replace READ_ONCE/WRITE_ONCE for more explicit control of the ordering wrt to interrupt generation and interrupt checking in the bottom-half. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Link: http://patchwork.freedesktop.org/patch/msgid/1467390209-3576-14-git-send-email-chris@chris-wilson.co.uk

commit: 3d5564e91025bd17d93d0a23ebf8e22309652591 [log] [tgz]
author: Chris Wilson <chris@chris-wilson.co.uk> Fri Jul 01 17:23:23 2016 +0100
committer: Chris Wilson <chris@chris-wilson.co.uk> Fri Jul 01 21:00:54 2016 +0100
tree: cc05b709edc9aa5dee158b2a28631de5f8c35ae2
parent: 7ec2c73b1dbe1cd83c52e4a386b2070331c5414c [diff]
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index ee04bd4..21181a6 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h

@@ -4005,7 +4005,20 @@
 	 * but it is easier and safer to do it every time the waiter
 	 * is woken.
 	 */
-	if (engine->irq_seqno_barrier) {
+	if (engine->irq_seqno_barrier &&
+	    cmpxchg_relaxed(&engine->irq_posted, 1, 0)) {
+		/* The ordering of irq_posted versus applying the barrier
+		 * is crucial. The clearing of the current irq_posted must
+		 * be visible before we perform the barrier operation,
+		 * such that if a subsequent interrupt arrives, irq_posted
+		 * is reasserted and our task rewoken (which causes us to
+		 * do another __i915_request_irq_complete() immediately
+		 * and reapply the barrier). Conversely, if the clear
+		 * occurs after the barrier, then an interrupt that arrived
+		 * whilst we waited on the barrier would not trigger a
+		 * barrier on the next pass, and the read may not see the
+		 * seqno update.
+		 */
 		engine->irq_seqno_barrier(engine);
 		if (i915_gem_request_completed(req))
 			return true;

diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index be7f0b9..7724bae 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c

@@ -976,6 +976,7 @@
 
 static void notify_ring(struct intel_engine_cs *engine)
 {
+	smp_store_mb(engine->irq_posted, true);
 	if (intel_engine_wakeup(engine)) {
 		trace_i915_gem_request_notify(engine);
 		engine->user_interrupts++;

diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c
index 7cdb02d..85ef6c0 100644
--- a/drivers/gpu/drm/i915/intel_breadcrumbs.c
+++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c

@@ -43,12 +43,18 @@
 
 static void irq_enable(struct intel_engine_cs *engine)
 {
+	/* Enabling the IRQ may miss the generation of the interrupt, but
+	 * we still need to force the barrier before reading the seqno,
+	 * just in case.
+	 */
+	engine->irq_posted = true;
 	WARN_ON(!engine->irq_get(engine));
 }
 
 static void irq_disable(struct intel_engine_cs *engine)
 {
 	engine->irq_put(engine);
+	engine->irq_posted = false;
 }
 
 static bool __intel_breadcrumbs_enable_irq(struct intel_breadcrumbs *b)
@@ -56,7 +62,6 @@
 	struct intel_engine_cs *engine =
 		container_of(b, struct intel_engine_cs, breadcrumbs);
 	struct drm_i915_private *i915 = engine->i915;
-	bool irq_posted = false;
 
 	assert_spin_locked(&b->lock);
 	if (b->rpm_wakelock)
@@ -72,10 +77,8 @@
 
 	/* No interrupts? Kick the waiter every jiffie! */
 	if (intel_irqs_enabled(i915)) {
-		if (!test_bit(engine->id, &i915->gpu_error.test_irq_rings)) {
+		if (!test_bit(engine->id, &i915->gpu_error.test_irq_rings))
 			irq_enable(engine);
-			irq_posted = true;
-		}
 		b->irq_enabled = true;
 	}
 
@@ -83,7 +86,7 @@
 	    test_bit(engine->id, &i915->gpu_error.missed_irq_rings))
 		mod_timer(&b->fake_irq, jiffies + 1);
 
-	return irq_posted;
+	return engine->irq_posted;
 }
 
 static void __intel_breadcrumbs_disable_irq(struct intel_breadcrumbs *b)
@@ -207,7 +210,8 @@
 			 * in case the seqno passed.
 			 */
 			__intel_breadcrumbs_enable_irq(b);
-			wake_up_process(to_wait(next)->tsk);
+			if (READ_ONCE(engine->irq_posted))
+				wake_up_process(to_wait(next)->tsk);
 		}
 
 		do {

diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index ad86686..fcc34837 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h

@@ -185,6 +185,7 @@
 	struct i915_ctx_workarounds wa_ctx;
 
 	unsigned irq_refcount; /* protected by dev_priv->irq_lock */
+	bool		irq_posted;
 	u32		irq_enable_mask;	/* bitmask to enable ring interrupt */
 	struct drm_i915_gem_request *trace_irq_req;
 	bool __must_check (*irq_get)(struct intel_engine_cs *ring);
commit	3d5564e91025bd17d93d0a23ebf8e22309652591	[log] [tgz]
author	Chris Wilson <chris@chris-wilson.co.uk>	Fri Jul 01 17:23:23 2016 +0100
committer	Chris Wilson <chris@chris-wilson.co.uk>	Fri Jul 01 21:00:54 2016 +0100
tree	cc05b709edc9aa5dee158b2a28631de5f8c35ae2
parent	7ec2c73b1dbe1cd83c52e4a386b2070331c5414c [diff]