drm/i915: Make for_each_engine_masked() more compact and quicker
Rather than walk the full array of engines checking whether each is in
the mask in turn, we can use the mask to jump to the right engines. This
should quicker for a sparse array of engines or mask, whilst generating
smaller code:
text data bss dec hex filename
1251010 4579 800 1256389 132bc5 drivers/gpu/drm/i915/i915.ko
1250530 4579 800 1255909 1329e5 drivers/gpu/drm/i915/i915.ko
The downside is that we have to pass in a temporary, alas no C99
iterators yet.
[P.S. Joonas doesn't like having to pass extra temporaries into the
macro, and even less that I called them tmp. As yet, we haven't found a
macro that avoids passing in a temporary that is smaller. We probably
will get C99 iterators first!]
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Mika Kuoppala <mika.kuoppala@intel.com>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>
Link: http://patchwork.freedesktop.org/patch/msgid/20160827075401.16470-2-chris@chris-wilson.co.uk
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 9fdaf23..77a5478 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2093,13 +2093,16 @@
for_each_if (((id__) = (engine__)->id, \
intel_engine_initialized(engine__)))
+#define __mask_next_bit(mask) ({ \
+ int __idx = ffs(mask) - 1; \
+ mask &= ~BIT(__idx); \
+ __idx; \
+})
+
/* Iterator over subset of engines selected by mask */
-#define for_each_engine_masked(engine__, dev_priv__, mask__) \
- for ((engine__) = &(dev_priv__)->engine[0]; \
- (engine__) < &(dev_priv__)->engine[I915_NUM_ENGINES]; \
- (engine__)++) \
- for_each_if (((mask__) & intel_engine_flag(engine__)) && \
- intel_engine_initialized(engine__))
+#define for_each_engine_masked(engine__, dev_priv__, mask__, tmp__) \
+ for (tmp__ = mask__ & INTEL_INFO(dev_priv__)->ring_mask; \
+ tmp__ ? (engine__ = &(dev_priv__)->engine[__mask_next_bit(tmp__)]), 1 : 0; )
enum hdmi_force_audio {
HDMI_AUDIO_OFF_DVI = -2, /* no aux data for HDMI-DVI converter */
diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
index 5e55270..9cc08a1 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -766,6 +766,7 @@
void i915_gem_retire_requests(struct drm_i915_private *dev_priv)
{
struct intel_engine_cs *engine;
+ unsigned int tmp;
lockdep_assert_held(&dev_priv->drm.struct_mutex);
@@ -774,7 +775,7 @@
GEM_BUG_ON(!dev_priv->gt.awake);
- for_each_engine_masked(engine, dev_priv, dev_priv->gt.active_engines)
+ for_each_engine_masked(engine, dev_priv, dev_priv->gt.active_engines, tmp)
if (engine_retire_requests(engine))
dev_priv->gt.active_engines &= ~intel_engine_flag(engine);
diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c
index e436941..32699a7 100644
--- a/drivers/gpu/drm/i915/i915_guc_submission.c
+++ b/drivers/gpu/drm/i915/i915_guc_submission.c
@@ -330,6 +330,7 @@
struct i915_gem_context *ctx = client->owner;
struct guc_context_desc desc;
struct sg_table *sg;
+ unsigned int tmp;
u32 gfx_addr;
memset(&desc, 0, sizeof(desc));
@@ -339,7 +340,7 @@
desc.priority = client->priority;
desc.db_id = client->doorbell_id;
- for_each_engine_masked(engine, dev_priv, client->engines) {
+ for_each_engine_masked(engine, dev_priv, client->engines, tmp) {
struct intel_context *ce = &ctx->engine[engine->id];
uint32_t guc_engine_id = engine->guc_id;
struct guc_execlist_context *lrc = &desc.lrc[guc_engine_id];
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 7610eca..82358d4 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -3169,6 +3169,7 @@
if (hung) {
char msg[80];
+ unsigned int tmp;
int len;
/* If some rings hung but others were still busy, only
@@ -3178,7 +3179,7 @@
hung &= ~stuck;
len = scnprintf(msg, sizeof(msg),
"%s on ", stuck == hung ? "No progress" : "Hang");
- for_each_engine_masked(engine, dev_priv, hung)
+ for_each_engine_masked(engine, dev_priv, hung, tmp)
len += scnprintf(msg + len, sizeof(msg) - len,
"%s, ", engine->name);
msg[len-2] = '\0';
diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c
index 43f8339..e9f68cd 100644
--- a/drivers/gpu/drm/i915/intel_uncore.c
+++ b/drivers/gpu/drm/i915/intel_uncore.c
@@ -1597,8 +1597,10 @@
if (engine_mask == ALL_ENGINES) {
hw_mask = GEN6_GRDOM_FULL;
} else {
+ unsigned int tmp;
+
hw_mask = 0;
- for_each_engine_masked(engine, dev_priv, engine_mask)
+ for_each_engine_masked(engine, dev_priv, engine_mask, tmp)
hw_mask |= hw_engine_mask[engine->id];
}
@@ -1714,15 +1716,16 @@
unsigned engine_mask)
{
struct intel_engine_cs *engine;
+ unsigned int tmp;
- for_each_engine_masked(engine, dev_priv, engine_mask)
+ for_each_engine_masked(engine, dev_priv, engine_mask, tmp)
if (gen8_request_engine_reset(engine))
goto not_ready;
return gen6_reset_engines(dev_priv, engine_mask);
not_ready:
- for_each_engine_masked(engine, dev_priv, engine_mask)
+ for_each_engine_masked(engine, dev_priv, engine_mask, tmp)
gen8_unrequest_engine_reset(engine);
return -EIO;