drm/i915/guc: use for_each_engine_id() where appropriate Now that host structures are indexed by host engine-id rather than guc_id, we can usefully convert some for_each_engine() loops to use for_each_engine_id() and avoid multiple dereferences of engine->id. Also a few related tweaks to cache structure members locally wherever they're used more than once or twice, hopefully eliminating memory references. Signed-off-by: Dave Gordon <david.s.gordon@intel.com> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

commit: c18468c4b2dbf08854dba4f041c8c1845bf0cc9c [log] [tgz]
author: Dave Gordon <david.s.gordon@intel.com> Tue Aug 09 15:19:22 2016 +0100
committer: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Wed Aug 10 10:40:05 2016 +0100
tree: 5702381f0e41cb4af0e8c88bae29ea2c5a8ff5d3
parent: e02757d91f7481093742bb32385567499a59354c [diff]
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 0627e17..b84fa93 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c

@@ -2544,6 +2544,7 @@
 				 struct i915_guc_client *client)
 {
 	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
 	uint64_t tot = 0;
 
 	seq_printf(m, "\tPriority %d, GuC ctx index: %u, PD offset 0x%x\n",
@@ -2558,11 +2559,11 @@
 	seq_printf(m, "\tFailed doorbell: %u\n", client->b_fail);
 	seq_printf(m, "\tLast submission result: %d\n", client->retcode);
 
-	for_each_engine(engine, dev_priv) {
+	for_each_engine_id(engine, dev_priv, id) {
+		u64 submissions = client->submissions[id];
+		tot += submissions;
 		seq_printf(m, "\tSubmissions: %llu %s\n",
-				client->submissions[engine->id],
-				engine->name);
-		tot += client->submissions[engine->id];
+				submissions, engine->name);
 	}
 	seq_printf(m, "\tTotal: %llu\n", tot);
 }
@@ -2575,6 +2576,7 @@
 	struct intel_guc guc;
 	struct i915_guc_client client = {};
 	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
 	u64 total = 0;
 
 	if (!HAS_GUC_SCHED(dev_priv))
@@ -2601,11 +2603,11 @@
 	seq_printf(m, "GuC last action error code: %d\n", guc.action_err);
 
 	seq_printf(m, "\nGuC submissions:\n");
-	for_each_engine(engine, dev_priv) {
+	for_each_engine_id(engine, dev_priv, id) {
+		u64 submissions = guc.submissions[id];
+		total += submissions;
 		seq_printf(m, "\t%-24s: %10llu, last seqno 0x%08x\n",
-			engine->name, guc.submissions[engine->id],
-			guc.last_seqno[engine->id]);
-		total += guc.submissions[engine->id];
+			engine->name, submissions, guc.last_seqno[id]);
 	}
 	seq_printf(m, "\t%s: %llu\n", "Total", total);
 

diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c
index 405e445..6831321 100644
--- a/drivers/gpu/drm/i915/i915_guc_submission.c
+++ b/drivers/gpu/drm/i915/i915_guc_submission.c

@@ -342,7 +342,8 @@
 
 	for_each_engine_masked(engine, dev_priv, client->engines) {
 		struct intel_context *ce = &ctx->engine[engine->id];
-		struct guc_execlist_context *lrc = &desc.lrc[engine->guc_id];
+		uint32_t guc_engine_id = engine->guc_id;
+		struct guc_execlist_context *lrc = &desc.lrc[guc_engine_id];
 		struct drm_i915_gem_object *obj;
 
 		/* TODO: We have a design issue to be solved here. Only when we
@@ -361,7 +362,7 @@
 		gfx_addr = i915_gem_obj_ggtt_offset(ce->state);
 		lrc->ring_lcra = gfx_addr + LRC_STATE_PN * PAGE_SIZE;
 		lrc->context_id = (client->ctx_index << GUC_ELC_CTXID_OFFSET) |
-				(engine->guc_id << GUC_ELC_ENGINE_OFFSET);
+				(guc_engine_id << GUC_ELC_ENGINE_OFFSET);
 
 		obj = ce->ring->obj;
 		gfx_addr = i915_gem_obj_ggtt_offset(obj);
@@ -371,7 +372,7 @@
 		lrc->ring_next_free_location = gfx_addr;
 		lrc->ring_current_tail_pointer_value = 0;
 
-		desc.engines_used |= (1 << engine->guc_id);
+		desc.engines_used |= (1 << guc_engine_id);
 	}
 
 	DRM_DEBUG_DRIVER("Host engines 0x%x => GuC engines used 0x%x\n",
@@ -459,6 +460,7 @@
 	/* wqi_len is in DWords, and does not include the one-word header */
 	const size_t wqi_size = sizeof(struct guc_wq_item);
 	const u32 wqi_len = wqi_size/sizeof(u32) - 1;
+	struct intel_engine_cs *engine = rq->engine;
 	struct guc_process_desc *desc;
 	struct guc_wq_item *wqi;
 	void *base;
@@ -500,12 +502,11 @@
 	/* Now fill in the 4-word work queue item */
 	wqi->header = WQ_TYPE_INORDER |
 			(wqi_len << WQ_LEN_SHIFT) |
-			(rq->engine->guc_id << WQ_TARGET_SHIFT) |
+			(engine->guc_id << WQ_TARGET_SHIFT) |
 			WQ_NO_WCFLUSH_WAIT;
 
 	/* The GuC wants only the low-order word of the context descriptor */
-	wqi->context_desc = (u32)intel_lr_context_descriptor(rq->ctx,
-							     rq->engine);
+	wqi->context_desc = (u32)intel_lr_context_descriptor(rq->ctx, engine);
 
 	wqi->ring_tail = tail << WQ_RING_TAIL_SHIFT;
 	wqi->fence_id = rq->fence.seqno;
commit	c18468c4b2dbf08854dba4f041c8c1845bf0cc9c	[log] [tgz]
author	Dave Gordon <david.s.gordon@intel.com>	Tue Aug 09 15:19:22 2016 +0100
committer	Tvrtko Ursulin <tvrtko.ursulin@intel.com>	Wed Aug 10 10:40:05 2016 +0100
tree	5702381f0e41cb4af0e8c88bae29ea2c5a8ff5d3
parent	e02757d91f7481093742bb32385567499a59354c [diff]