drm/i915/bdw: Pin the ringbuffer backing object to GGTT on-demand

Same as with the context, pinning to GGTT regardless is harmful (it
badly fragments the GGTT and can even exhaust it).

Unfortunately, this case is also more complex than the previous one
because we need to map and access the ringbuffer in several places
along the execbuffer path (and we cannot make do by leaving the
default ringbuffer pinned, as before). Also, the context object
itself contains a pointer to the ringbuffer address that we have to
keep updated if we are going to allow the ringbuffer to move around.

v2: Same as with the context pinning, we cannot really do it during
an interrupt. Also, pin the default ringbuffers objects regardless
(makes error capture a lot easier).

v3: Rebased. Take a pin reference of the ringbuffer for each item
in the execlist request queue because the hardware may still be using
the ringbuffer after the MI_USER_INTERRUPT to notify the seqno update
is executed.  The ringbuffer must remain pinned until the context save
is complete.  No longer pin and unpin ringbuffer in
populate_lr_context() - this transient address is meaningless and the
pinning can cause a sleep while atomic.

v4: Moved ringbuffer pin and unpin into the lr_context_pin functions.
Downgraded pinning check BUG_ONs to WARN_ONs.

v5: Reinstated WARN_ONs for unexpected execlist states.  Removed unused
variable.

Issue: VIZ-4277
Signed-off-by: Oscar Mateo <oscar.mateo@intel.com>
Signed-off-by: Thomas Daniel <thomas.daniel@intel.com>
Reviewed-by: Akash Goel <akash.goels@gmail.com>
Reviewed-by: Deepak S<deepak.s@linux.intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 9560e63..e588376 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -203,6 +203,9 @@
 };
 #define GEN8_CTX_ID_SHIFT 32
 
+static int intel_lr_context_pin(struct intel_engine_cs *ring,
+		struct intel_context *ctx);
+
 /**
  * intel_sanitize_enable_execlists() - sanitize i915.enable_execlists
  * @dev: DRM device.
@@ -354,7 +357,9 @@
 	spin_unlock_irqrestore(&dev_priv->uncore.lock, flags);
 }
 
-static int execlists_ctx_write_tail(struct drm_i915_gem_object *ctx_obj, u32 tail)
+static int execlists_update_context(struct drm_i915_gem_object *ctx_obj,
+				    struct drm_i915_gem_object *ring_obj,
+				    u32 tail)
 {
 	struct page *page;
 	uint32_t *reg_state;
@@ -363,6 +368,7 @@
 	reg_state = kmap_atomic(page);
 
 	reg_state[CTX_RING_TAIL+1] = tail;
+	reg_state[CTX_RING_BUFFER_START+1] = i915_gem_obj_ggtt_offset(ring_obj);
 
 	kunmap_atomic(reg_state);
 
@@ -373,21 +379,25 @@
 				      struct intel_context *to0, u32 tail0,
 				      struct intel_context *to1, u32 tail1)
 {
-	struct drm_i915_gem_object *ctx_obj0;
+	struct drm_i915_gem_object *ctx_obj0 = to0->engine[ring->id].state;
+	struct intel_ringbuffer *ringbuf0 = to0->engine[ring->id].ringbuf;
 	struct drm_i915_gem_object *ctx_obj1 = NULL;
+	struct intel_ringbuffer *ringbuf1 = NULL;
 
-	ctx_obj0 = to0->engine[ring->id].state;
 	BUG_ON(!ctx_obj0);
 	WARN_ON(!i915_gem_obj_is_pinned(ctx_obj0));
+	WARN_ON(!i915_gem_obj_is_pinned(ringbuf0->obj));
 
-	execlists_ctx_write_tail(ctx_obj0, tail0);
+	execlists_update_context(ctx_obj0, ringbuf0->obj, tail0);
 
 	if (to1) {
+		ringbuf1 = to1->engine[ring->id].ringbuf;
 		ctx_obj1 = to1->engine[ring->id].state;
 		BUG_ON(!ctx_obj1);
 		WARN_ON(!i915_gem_obj_is_pinned(ctx_obj1));
+		WARN_ON(!i915_gem_obj_is_pinned(ringbuf1->obj));
 
-		execlists_ctx_write_tail(ctx_obj1, tail1);
+		execlists_update_context(ctx_obj1, ringbuf1->obj, tail1);
 	}
 
 	execlists_elsp_write(ring, ctx_obj0, ctx_obj1);
@@ -537,6 +547,10 @@
 		return -ENOMEM;
 	req->ctx = to;
 	i915_gem_context_reference(req->ctx);
+
+	if (to != ring->default_context)
+		intel_lr_context_pin(ring, to);
+
 	req->ring = ring;
 	req->tail = tail;
 
@@ -557,7 +571,7 @@
 
 		if (to == tail_req->ctx) {
 			WARN(tail_req->elsp_submitted != 0,
-			     "More than 2 already-submitted reqs queued\n");
+				"More than 2 already-submitted reqs queued\n");
 			list_del(&tail_req->execlist_link);
 			list_add_tail(&tail_req->execlist_link,
 				&ring->execlist_retired_req_list);
@@ -745,6 +759,12 @@
 	spin_unlock_irqrestore(&ring->execlist_lock, flags);
 
 	list_for_each_entry_safe(req, tmp, &retired_list, execlist_link) {
+		struct intel_context *ctx = req->ctx;
+		struct drm_i915_gem_object *ctx_obj =
+				ctx->engine[ring->id].state;
+
+		if (ctx_obj && (ctx != ring->default_context))
+			intel_lr_context_unpin(ring, ctx);
 		intel_runtime_pm_put(dev_priv);
 		i915_gem_context_unreference(req->ctx);
 		list_del(&req->execlist_link);
@@ -816,6 +836,7 @@
 		struct intel_context *ctx)
 {
 	struct drm_i915_gem_object *ctx_obj = ctx->engine[ring->id].state;
+	struct intel_ringbuffer *ringbuf = ctx->engine[ring->id].ringbuf;
 	int ret = 0;
 
 	WARN_ON(!mutex_is_locked(&ring->dev->struct_mutex));
@@ -823,21 +844,35 @@
 		ret = i915_gem_obj_ggtt_pin(ctx_obj,
 				GEN8_LR_CONTEXT_ALIGN, 0);
 		if (ret)
-			ctx->engine[ring->id].unpin_count = 0;
+			goto reset_unpin_count;
+
+		ret = intel_pin_and_map_ringbuffer_obj(ring->dev, ringbuf);
+		if (ret)
+			goto unpin_ctx_obj;
 	}
 
 	return ret;
+
+unpin_ctx_obj:
+	i915_gem_object_ggtt_unpin(ctx_obj);
+reset_unpin_count:
+	ctx->engine[ring->id].unpin_count = 0;
+
+	return ret;
 }
 
 void intel_lr_context_unpin(struct intel_engine_cs *ring,
 		struct intel_context *ctx)
 {
 	struct drm_i915_gem_object *ctx_obj = ctx->engine[ring->id].state;
+	struct intel_ringbuffer *ringbuf = ctx->engine[ring->id].ringbuf;
 
 	if (ctx_obj) {
 		WARN_ON(!mutex_is_locked(&ring->dev->struct_mutex));
-		if (--ctx->engine[ring->id].unpin_count == 0)
+		if (--ctx->engine[ring->id].unpin_count == 0) {
+			intel_unpin_ringbuffer_obj(ringbuf);
 			i915_gem_object_ggtt_unpin(ctx_obj);
+		}
 	}
 }
 
@@ -1595,7 +1630,6 @@
 {
 	struct drm_device *dev = ring->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct drm_i915_gem_object *ring_obj = ringbuf->obj;
 	struct i915_hw_ppgtt *ppgtt = ctx->ppgtt;
 	struct page *page;
 	uint32_t *reg_state;
@@ -1641,7 +1675,9 @@
 	reg_state[CTX_RING_TAIL] = RING_TAIL(ring->mmio_base);
 	reg_state[CTX_RING_TAIL+1] = 0;
 	reg_state[CTX_RING_BUFFER_START] = RING_START(ring->mmio_base);
-	reg_state[CTX_RING_BUFFER_START+1] = i915_gem_obj_ggtt_offset(ring_obj);
+	/* Ring buffer start address is not known until the buffer is pinned.
+	 * It is written to the context image in execlists_update_context()
+	 */
 	reg_state[CTX_RING_BUFFER_CONTROL] = RING_CTL(ring->mmio_base);
 	reg_state[CTX_RING_BUFFER_CONTROL+1] =
 			((ringbuf->size - PAGE_SIZE) & RING_NR_PAGES) | RING_VALID;
@@ -1723,10 +1759,12 @@
 					ctx->engine[i].ringbuf;
 			struct intel_engine_cs *ring = ringbuf->ring;
 
+			if (ctx == ring->default_context) {
+				intel_unpin_ringbuffer_obj(ringbuf);
+				i915_gem_object_ggtt_unpin(ctx_obj);
+			}
 			intel_destroy_ringbuffer_obj(ringbuf);
 			kfree(ringbuf);
-			if (ctx == ring->default_context)
-				i915_gem_object_ggtt_unpin(ctx_obj);
 			drm_gem_object_unreference(&ctx_obj->base);
 		}
 	}
@@ -1823,11 +1861,8 @@
 	if (!ringbuf) {
 		DRM_DEBUG_DRIVER("Failed to allocate ringbuffer %s\n",
 				ring->name);
-		if (is_global_default_ctx)
-			i915_gem_object_ggtt_unpin(ctx_obj);
-		drm_gem_object_unreference(&ctx_obj->base);
 		ret = -ENOMEM;
-		return ret;
+		goto error_unpin_ctx;
 	}
 
 	ringbuf->ring = ring;
@@ -1840,22 +1875,30 @@
 	ringbuf->space = ringbuf->size;
 	ringbuf->last_retired_head = -1;
 
-	/* TODO: For now we put this in the mappable region so that we can reuse
-	 * the existing ringbuffer code which ioremaps it. When we start
-	 * creating many contexts, this will no longer work and we must switch
-	 * to a kmapish interface.
-	 */
-	ret = intel_alloc_ringbuffer_obj(dev, ringbuf);
-	if (ret) {
-		DRM_DEBUG_DRIVER("Failed to allocate ringbuffer obj %s: %d\n",
+	if (ringbuf->obj == NULL) {
+		ret = intel_alloc_ringbuffer_obj(dev, ringbuf);
+		if (ret) {
+			DRM_DEBUG_DRIVER(
+				"Failed to allocate ringbuffer obj %s: %d\n",
 				ring->name, ret);
-		goto error;
+			goto error_free_rbuf;
+		}
+
+		if (is_global_default_ctx) {
+			ret = intel_pin_and_map_ringbuffer_obj(dev, ringbuf);
+			if (ret) {
+				DRM_ERROR(
+					"Failed to pin and map ringbuffer %s: %d\n",
+					ring->name, ret);
+				goto error_destroy_rbuf;
+			}
+		}
+
 	}
 
 	ret = populate_lr_context(ctx, ctx_obj, ring, ringbuf);
 	if (ret) {
 		DRM_DEBUG_DRIVER("Failed to populate LRC: %d\n", ret);
-		intel_destroy_ringbuffer_obj(ringbuf);
 		goto error;
 	}
 
@@ -1877,7 +1920,6 @@
 			DRM_ERROR("Init render state failed: %d\n", ret);
 			ctx->engine[ring->id].ringbuf = NULL;
 			ctx->engine[ring->id].state = NULL;
-			intel_destroy_ringbuffer_obj(ringbuf);
 			goto error;
 		}
 		ctx->rcs_initialized = true;
@@ -1886,7 +1928,13 @@
 	return 0;
 
 error:
+	if (is_global_default_ctx)
+		intel_unpin_ringbuffer_obj(ringbuf);
+error_destroy_rbuf:
+	intel_destroy_ringbuffer_obj(ringbuf);
+error_free_rbuf:
 	kfree(ringbuf);
+error_unpin_ctx:
 	if (is_global_default_ctx)
 		i915_gem_object_ggtt_unpin(ctx_obj);
 	drm_gem_object_unreference(&ctx_obj->base);