drm/i915: Rearrange switch_context to load the aliasing ppgtt on first use
The code to switch_mm() is already handled by i915_switch_context(), the
only difference required to setup the aliasing ppgtt is that we need to
emit te switch_mm() on the first context, i.e. when transitioning from
engine->last_context == NULL. This allows us to defer the
initialisation of the GPU from early device initialisation to first use,
which should marginally speed up both. The caveat is that we then defer
the context initialisation until first use - i.e. we cannot assume that
the GPU engines are initialised. For example, this means that power
contexts for rc6 (Ironlake) need to explicitly loaded, as they are.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Mika Kuoppala <mika.kuoppala@intel.com>
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Link: http://patchwork.freedesktop.org/patch/msgid/1461833819-3991-11-git-send-email-chris@chris-wilson.co.uk
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index a403239..444a8ea 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -3296,7 +3296,6 @@
void i915_gem_context_fini(struct drm_device *dev);
void i915_gem_context_reset(struct drm_device *dev);
int i915_gem_context_open(struct drm_device *dev, struct drm_file *file);
-int i915_gem_context_enable(struct drm_i915_gem_request *req);
void i915_gem_context_close(struct drm_device *dev, struct drm_file *file);
int i915_switch_context(struct drm_i915_gem_request *req);
struct intel_context *
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 762ddd2..2dfdfe5 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -4911,36 +4911,6 @@
* on re-initialisation
*/
ret = i915_gem_set_seqno(dev, dev_priv->next_seqno+0x100);
- if (ret)
- goto out;
-
- /* Now it is safe to go back round and do everything else: */
- for_each_engine(engine, dev_priv) {
- struct drm_i915_gem_request *req;
-
- req = i915_gem_request_alloc(engine, NULL);
- if (IS_ERR(req)) {
- ret = PTR_ERR(req);
- break;
- }
-
- ret = i915_ppgtt_init_ring(req);
- if (ret)
- goto err_request;
-
- ret = i915_gem_context_enable(req);
- if (ret)
- goto err_request;
-
-err_request:
- i915_add_request_no_flush(req);
- if (ret) {
- DRM_ERROR("Failed to enable %s, error=%d\n",
- engine->name, ret);
- i915_gem_cleanup_engines(dev);
- break;
- }
- }
out:
intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index fc3b634..2fea74b 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -431,27 +431,6 @@
dev_priv->kernel_context = NULL;
}
-int i915_gem_context_enable(struct drm_i915_gem_request *req)
-{
- struct intel_engine_cs *engine = req->engine;
- int ret;
-
- if (i915.enable_execlists) {
- if (engine->init_context == NULL)
- return 0;
-
- ret = engine->init_context(req);
- } else
- ret = i915_switch_context(req);
-
- if (ret) {
- DRM_ERROR("ring init context: %d\n", ret);
- return ret;
- }
-
- return 0;
-}
-
static int context_idr_cleanup(int id, void *p, void *data)
{
struct intel_context *ctx = p;
@@ -630,7 +609,8 @@
return 0;
}
-static inline bool skip_rcs_switch(struct intel_engine_cs *engine,
+static inline bool skip_rcs_switch(struct i915_hw_ppgtt *ppgtt,
+ struct intel_engine_cs *engine,
struct intel_context *to)
{
if (to->remap_slice)
@@ -639,21 +619,27 @@
if (!to->legacy_hw_ctx.initialized)
return false;
- if (to->ppgtt &&
- !(intel_engine_flag(engine) & to->ppgtt->pd_dirty_rings))
+ if (ppgtt && (intel_engine_flag(engine) & ppgtt->pd_dirty_rings))
return false;
return to == engine->last_context;
}
static bool
-needs_pd_load_pre(struct intel_engine_cs *engine, struct intel_context *to)
+needs_pd_load_pre(struct i915_hw_ppgtt *ppgtt,
+ struct intel_engine_cs *engine,
+ struct intel_context *to)
{
- if (!to->ppgtt)
+ if (!ppgtt)
return false;
+ /* Always load the ppgtt on first use */
+ if (!engine->last_context)
+ return true;
+
+ /* Same context without new entries, skip */
if (engine->last_context == to &&
- !(intel_engine_flag(engine) & to->ppgtt->pd_dirty_rings))
+ !(intel_engine_flag(engine) & ppgtt->pd_dirty_rings))
return false;
if (engine->id != RCS)
@@ -666,9 +652,11 @@
}
static bool
-needs_pd_load_post(struct intel_context *to, u32 hw_flags)
+needs_pd_load_post(struct i915_hw_ppgtt *ppgtt,
+ struct intel_context *to,
+ u32 hw_flags)
{
- if (!to->ppgtt)
+ if (!ppgtt)
return false;
if (!IS_GEN8(to->i915))
@@ -684,11 +672,12 @@
{
struct intel_context *to = req->ctx;
struct intel_engine_cs *engine = req->engine;
+ struct i915_hw_ppgtt *ppgtt = to->ppgtt ?: req->i915->mm.aliasing_ppgtt;
struct intel_context *from;
u32 hw_flags;
int ret, i;
- if (skip_rcs_switch(engine, to))
+ if (skip_rcs_switch(ppgtt, engine, to))
return 0;
/* Trying to pin first makes error handling easier. */
@@ -719,13 +708,13 @@
if (ret)
goto unpin_out;
- if (needs_pd_load_pre(engine, to)) {
+ if (needs_pd_load_pre(ppgtt, engine, to)) {
/* Older GENs and non render rings still want the load first,
* "PP_DCLV followed by PP_DIR_BASE register through Load
* Register Immediate commands in Ring Buffer before submitting
* a context."*/
trace_switch_mm(engine, to);
- ret = to->ppgtt->switch_mm(to->ppgtt, req);
+ ret = ppgtt->switch_mm(ppgtt, req);
if (ret)
goto unpin_out;
}
@@ -736,16 +725,11 @@
* space. This means we must enforce that a page table load
* occur when this occurs. */
hw_flags = MI_RESTORE_INHIBIT;
- else if (to->ppgtt &&
- intel_engine_flag(engine) & to->ppgtt->pd_dirty_rings)
+ else if (ppgtt && intel_engine_flag(engine) & ppgtt->pd_dirty_rings)
hw_flags = MI_FORCE_RESTORE;
else
hw_flags = 0;
- /* We should never emit switch_mm more than once */
- WARN_ON(needs_pd_load_pre(engine, to) &&
- needs_pd_load_post(to, hw_flags));
-
if (to != from || (hw_flags & MI_FORCE_RESTORE)) {
ret = mi_set_context(req, hw_flags);
if (ret)
@@ -780,9 +764,9 @@
/* GEN8 does *not* require an explicit reload if the PDPs have been
* setup, and we do not wish to move them.
*/
- if (needs_pd_load_post(to, hw_flags)) {
+ if (needs_pd_load_post(ppgtt, to, hw_flags)) {
trace_switch_mm(engine, to);
- ret = to->ppgtt->switch_mm(to->ppgtt, req);
+ ret = ppgtt->switch_mm(ppgtt, req);
/* The hardware context switch is emitted, but we haven't
* actually changed the state - so it's probably safe to bail
* here. Still, let the user know something dangerous has
@@ -792,8 +776,8 @@
return ret;
}
- if (to->ppgtt)
- to->ppgtt->pd_dirty_rings &= ~intel_engine_flag(engine);
+ if (ppgtt)
+ ppgtt->pd_dirty_rings &= ~intel_engine_flag(engine);
for (i = 0; i < MAX_L3_SLICES; i++) {
if (!(to->remap_slice & (1<<i)))
@@ -846,17 +830,18 @@
if (engine->id != RCS ||
req->ctx->legacy_hw_ctx.rcs_state == NULL) {
struct intel_context *to = req->ctx;
+ struct i915_hw_ppgtt *ppgtt =
+ to->ppgtt ?: req->i915->mm.aliasing_ppgtt;
- if (needs_pd_load_pre(engine, to)) {
+ if (needs_pd_load_pre(ppgtt, engine, to)) {
int ret;
trace_switch_mm(engine, to);
- ret = to->ppgtt->switch_mm(to->ppgtt, req);
+ ret = ppgtt->switch_mm(ppgtt, req);
if (ret)
return ret;
- /* Doing a PD load always reloads the page dirs */
- to->ppgtt->pd_dirty_rings &= ~intel_engine_flag(engine);
+ ppgtt->pd_dirty_rings &= ~intel_engine_flag(engine);
}
if (to != engine->last_context) {
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 8702343..59a78f7 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -2193,20 +2193,6 @@
return 0;
}
-int i915_ppgtt_init_ring(struct drm_i915_gem_request *req)
-{
- struct drm_i915_private *dev_priv = req->i915;
- struct i915_hw_ppgtt *ppgtt = dev_priv->mm.aliasing_ppgtt;
-
- if (i915.enable_execlists)
- return 0;
-
- if (!ppgtt)
- return 0;
-
- return ppgtt->switch_mm(ppgtt, req);
-}
-
struct i915_hw_ppgtt *
i915_ppgtt_create(struct drm_device *dev, struct drm_i915_file_private *fpriv)
{
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
index b0ae663..1148503 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.h
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
@@ -522,7 +522,6 @@
int i915_ppgtt_init(struct drm_device *dev, struct i915_hw_ppgtt *ppgtt);
int i915_ppgtt_init_hw(struct drm_device *dev);
-int i915_ppgtt_init_ring(struct drm_i915_gem_request *req);
void i915_ppgtt_release(struct kref *kref);
struct i915_hw_ppgtt *i915_ppgtt_create(struct drm_device *dev,
struct drm_i915_file_private *fpriv);