drm/i915: Remove obsolete engine->gpu_caches_dirty

Space for flushing the GPU cache prior to completing the request is
preallocated and so cannot fail - the GPU caches will always be flushed
along with the completed request. This means we no longer have to track
whether the GPU cache is dirty between batches like we had to with the
outstanding_lazy_seqno.

With the removal of the duplication in the per-backend entry points for
emitting the obsolete lazy flush, we can then further unify the
engine->emit_flush.

v2: Expand a bit on the legacy of gpu_caches_dirty

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Link: http://patchwork.freedesktop.org/patch/msgid/1469432687-22756-18-git-send-email-chris@chris-wilson.co.uk
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Link: http://patchwork.freedesktop.org/patch/msgid/1470174640-18242-7-git-send-email-chris@chris-wilson.co.uk
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index 3336a5f..beece8f 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -568,7 +568,7 @@
 	 * itlb_before_ctx_switch.
 	 */
 	if (IS_GEN6(dev_priv)) {
-		ret = engine->flush(req, I915_GEM_GPU_DOMAINS, 0);
+		ret = engine->emit_flush(req, I915_GEM_GPU_DOMAINS, 0);
 		if (ret)
 			return ret;
 	}
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index d0ef675..35c4c595 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -998,10 +998,8 @@
 	if (flush_domains & I915_GEM_DOMAIN_GTT)
 		wmb();
 
-	/* Unconditionally invalidate gpu caches and ensure that we do flush
-	 * any residual writes from the previous batch.
-	 */
-	return intel_engine_invalidate_all_caches(req);
+	/* Unconditionally invalidate GPU caches and TLBs. */
+	return req->engine->emit_flush(req, I915_GEM_GPU_DOMAINS, 0);
 }
 
 static bool
@@ -1163,9 +1161,6 @@
 static void
 i915_gem_execbuffer_retire_commands(struct i915_execbuffer_params *params)
 {
-	/* Unconditionally force add_request to emit a full flush. */
-	params->engine->gpu_caches_dirty = true;
-
 	/* Add a breadcrumb for the completion of the batch buffer */
 	__i915_add_request(params->request, params->batch_obj, true);
 }
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index ebfa040..39fa9eb 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -1666,7 +1666,8 @@
 	int ret;
 
 	/* NB: TLBs must be flushed and invalidated before a switch */
-	ret = engine->flush(req, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS);
+	ret = engine->emit_flush(req,
+				 I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS);
 	if (ret)
 		return ret;
 
@@ -1693,7 +1694,8 @@
 	int ret;
 
 	/* NB: TLBs must be flushed and invalidated before a switch */
-	ret = engine->flush(req, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS);
+	ret = engine->emit_flush(req,
+				 I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS);
 	if (ret)
 		return ret;
 
@@ -1711,8 +1713,9 @@
 
 	/* XXX: RCS is the only one to auto invalidate the TLBs? */
 	if (engine->id != RCS) {
-		ret = engine->flush(req,
-				    I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS);
+		ret = engine->emit_flush(req,
+					 I915_GEM_GPU_DOMAINS,
+					 I915_GEM_GPU_DOMAINS);
 		if (ret)
 			return ret;
 	}
diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
index 942b5b1..7e32060 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -451,12 +451,10 @@
 	 * what.
 	 */
 	if (flush_caches) {
-		if (i915.enable_execlists)
-			ret = logical_ring_flush_all_caches(request);
-		else
-			ret = intel_engine_flush_all_caches(request);
+		ret = engine->emit_flush(request, 0, I915_GEM_GPU_DOMAINS);
+
 		/* Not allowed to fail! */
-		WARN(ret, "*_ring_flush_all_caches failed: %d!\n", ret);
+		WARN(ret, "engine->emit_flush() failed: %d!\n", ret);
 	}
 
 	trace_i915_gem_request_add(request);
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 86b8f41..e8d971e 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -642,24 +642,6 @@
 	spin_unlock_bh(&engine->execlist_lock);
 }
 
-static int logical_ring_invalidate_all_caches(struct drm_i915_gem_request *req)
-{
-	struct intel_engine_cs *engine = req->engine;
-	uint32_t flush_domains;
-	int ret;
-
-	flush_domains = 0;
-	if (engine->gpu_caches_dirty)
-		flush_domains = I915_GEM_GPU_DOMAINS;
-
-	ret = engine->emit_flush(req, I915_GEM_GPU_DOMAINS, flush_domains);
-	if (ret)
-		return ret;
-
-	engine->gpu_caches_dirty = false;
-	return 0;
-}
-
 static int execlists_move_to_gpu(struct drm_i915_gem_request *req,
 				 struct list_head *vmas)
 {
@@ -690,7 +672,7 @@
 	/* Unconditionally invalidate gpu caches and ensure that we do flush
 	 * any residual writes from the previous batch.
 	 */
-	return logical_ring_invalidate_all_caches(req);
+	return req->engine->emit_flush(req, I915_GEM_GPU_DOMAINS, 0);
 }
 
 int intel_logical_ring_alloc_request_extras(struct drm_i915_gem_request *request)
@@ -930,22 +912,6 @@
 	I915_WRITE_MODE(engine, _MASKED_BIT_DISABLE(STOP_RING));
 }
 
-int logical_ring_flush_all_caches(struct drm_i915_gem_request *req)
-{
-	struct intel_engine_cs *engine = req->engine;
-	int ret;
-
-	if (!engine->gpu_caches_dirty)
-		return 0;
-
-	ret = engine->emit_flush(req, 0, I915_GEM_GPU_DOMAINS);
-	if (ret)
-		return ret;
-
-	engine->gpu_caches_dirty = false;
-	return 0;
-}
-
 static int intel_lr_context_pin(struct i915_gem_context *ctx,
 				struct intel_engine_cs *engine)
 {
@@ -1026,15 +992,15 @@
 static int intel_logical_ring_workarounds_emit(struct drm_i915_gem_request *req)
 {
 	int ret, i;
-	struct intel_engine_cs *engine = req->engine;
 	struct intel_ring *ring = req->ring;
 	struct i915_workarounds *w = &req->i915->workarounds;
 
 	if (w->count == 0)
 		return 0;
 
-	engine->gpu_caches_dirty = true;
-	ret = logical_ring_flush_all_caches(req);
+	ret = req->engine->emit_flush(req,
+				      I915_GEM_GPU_DOMAINS,
+				      I915_GEM_GPU_DOMAINS);
 	if (ret)
 		return ret;
 
@@ -1051,8 +1017,9 @@
 
 	intel_ring_advance(ring);
 
-	engine->gpu_caches_dirty = true;
-	ret = logical_ring_flush_all_caches(req);
+	ret = req->engine->emit_flush(req,
+				      I915_GEM_GPU_DOMAINS,
+				      I915_GEM_GPU_DOMAINS);
 	if (ret)
 		return ret;
 
diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h
index d26fb44..33e0193 100644
--- a/drivers/gpu/drm/i915/intel_lrc.h
+++ b/drivers/gpu/drm/i915/intel_lrc.h
@@ -72,8 +72,6 @@
 
 int intel_engines_init(struct drm_device *dev);
 
-int logical_ring_flush_all_caches(struct drm_i915_gem_request *req);
-
 /* Logical Ring Contexts */
 
 /* One extra page is added before LRC for GuC as shared data */
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index e7a7f67..9e4b496 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -688,8 +688,9 @@
 	if (w->count == 0)
 		return 0;
 
-	req->engine->gpu_caches_dirty = true;
-	ret = intel_engine_flush_all_caches(req);
+	ret = req->engine->emit_flush(req,
+				      I915_GEM_GPU_DOMAINS,
+				      I915_GEM_GPU_DOMAINS);
 	if (ret)
 		return ret;
 
@@ -706,8 +707,9 @@
 
 	intel_ring_advance(ring);
 
-	req->engine->gpu_caches_dirty = true;
-	ret = intel_engine_flush_all_caches(req);
+	ret = req->engine->emit_flush(req,
+				      I915_GEM_GPU_DOMAINS,
+				      I915_GEM_GPU_DOMAINS);
 	if (ret)
 		return ret;
 
@@ -2860,21 +2862,21 @@
 	if (INTEL_GEN(dev_priv) >= 8) {
 		engine->init_context = intel_rcs_ctx_init;
 		engine->add_request = gen8_render_add_request;
-		engine->flush = gen8_render_ring_flush;
+		engine->emit_flush = gen8_render_ring_flush;
 		if (i915.semaphores)
 			engine->semaphore.signal = gen8_rcs_signal;
 	} else if (INTEL_GEN(dev_priv) >= 6) {
 		engine->init_context = intel_rcs_ctx_init;
-		engine->flush = gen7_render_ring_flush;
+		engine->emit_flush = gen7_render_ring_flush;
 		if (IS_GEN6(dev_priv))
-			engine->flush = gen6_render_ring_flush;
+			engine->emit_flush = gen6_render_ring_flush;
 	} else if (IS_GEN5(dev_priv)) {
-		engine->flush = gen4_render_ring_flush;
+		engine->emit_flush = gen4_render_ring_flush;
 	} else {
 		if (INTEL_GEN(dev_priv) < 4)
-			engine->flush = gen2_render_ring_flush;
+			engine->emit_flush = gen2_render_ring_flush;
 		else
-			engine->flush = gen4_render_ring_flush;
+			engine->emit_flush = gen4_render_ring_flush;
 		engine->irq_enable_mask = I915_USER_INTERRUPT;
 	}
 
@@ -2911,12 +2913,12 @@
 		/* gen6 bsd needs a special wa for tail updates */
 		if (IS_GEN6(dev_priv))
 			engine->write_tail = gen6_bsd_ring_write_tail;
-		engine->flush = gen6_bsd_ring_flush;
+		engine->emit_flush = gen6_bsd_ring_flush;
 		if (INTEL_GEN(dev_priv) < 8)
 			engine->irq_enable_mask = GT_BSD_USER_INTERRUPT;
 	} else {
 		engine->mmio_base = BSD_RING_BASE;
-		engine->flush = bsd_ring_flush;
+		engine->emit_flush = bsd_ring_flush;
 		if (IS_GEN5(dev_priv))
 			engine->irq_enable_mask = ILK_BSD_USER_INTERRUPT;
 		else
@@ -2935,7 +2937,7 @@
 
 	intel_ring_default_vfuncs(dev_priv, engine);
 
-	engine->flush = gen6_bsd_ring_flush;
+	engine->emit_flush = gen6_bsd_ring_flush;
 
 	return intel_init_ring_buffer(engine);
 }
@@ -2946,7 +2948,7 @@
 
 	intel_ring_default_vfuncs(dev_priv, engine);
 
-	engine->flush = gen6_ring_flush;
+	engine->emit_flush = gen6_ring_flush;
 	if (INTEL_GEN(dev_priv) < 8)
 		engine->irq_enable_mask = GT_BLT_USER_INTERRUPT;
 
@@ -2959,7 +2961,7 @@
 
 	intel_ring_default_vfuncs(dev_priv, engine);
 
-	engine->flush = gen6_ring_flush;
+	engine->emit_flush = gen6_ring_flush;
 
 	if (INTEL_GEN(dev_priv) < 8) {
 		engine->irq_enable_mask = PM_VEBOX_USER_INTERRUPT;
@@ -2970,46 +2972,6 @@
 	return intel_init_ring_buffer(engine);
 }
 
-int
-intel_engine_flush_all_caches(struct drm_i915_gem_request *req)
-{
-	struct intel_engine_cs *engine = req->engine;
-	int ret;
-
-	if (!engine->gpu_caches_dirty)
-		return 0;
-
-	ret = engine->flush(req, 0, I915_GEM_GPU_DOMAINS);
-	if (ret)
-		return ret;
-
-	trace_i915_gem_ring_flush(req, 0, I915_GEM_GPU_DOMAINS);
-
-	engine->gpu_caches_dirty = false;
-	return 0;
-}
-
-int
-intel_engine_invalidate_all_caches(struct drm_i915_gem_request *req)
-{
-	struct intel_engine_cs *engine = req->engine;
-	uint32_t flush_domains;
-	int ret;
-
-	flush_domains = 0;
-	if (engine->gpu_caches_dirty)
-		flush_domains = I915_GEM_GPU_DOMAINS;
-
-	ret = engine->flush(req, I915_GEM_GPU_DOMAINS, flush_domains);
-	if (ret)
-		return ret;
-
-	trace_i915_gem_ring_flush(req, I915_GEM_GPU_DOMAINS, flush_domains);
-
-	engine->gpu_caches_dirty = false;
-	return 0;
-}
-
 void intel_engine_stop(struct intel_engine_cs *engine)
 {
 	int ret;
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index ba54ffc..00723401 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -206,9 +206,6 @@
 
 	void		(*write_tail)(struct intel_engine_cs *engine,
 				      u32 value);
-	int __must_check (*flush)(struct drm_i915_gem_request *req,
-				  u32	invalidate_domains,
-				  u32	flush_domains);
 	int		(*add_request)(struct drm_i915_gem_request *req);
 	/* Some chipsets are not quite as coherent as advertised and need
 	 * an expensive kick to force a true read of the up-to-date seqno.
@@ -325,8 +322,6 @@
 	 */
 	u32 last_submitted_seqno;
 
-	bool gpu_caches_dirty;
-
 	struct i915_gem_context *last_context;
 
 	struct intel_engine_hangcheck hangcheck;
@@ -474,8 +469,6 @@
 
 int __must_check intel_engine_idle(struct intel_engine_cs *engine);
 void intel_engine_init_seqno(struct intel_engine_cs *engine, u32 seqno);
-int intel_engine_flush_all_caches(struct drm_i915_gem_request *req);
-int intel_engine_invalidate_all_caches(struct drm_i915_gem_request *req);
 
 int intel_init_pipe_control(struct intel_engine_cs *engine, int size);
 void intel_fini_pipe_control(struct intel_engine_cs *engine);