drm/i915: Integrate GuC-based command submission

GuC-based submission is mostly the same as execlist mode, up to
intel_logical_ring_advance_and_submit(), where the context being
dispatched would be added to the execlist queue; at this point
we submit the context to the GuC backend instead.

There are, however, a few other changes also required, notably:
1.  Contexts must be pinned at GGTT addresses accessible by the GuC
    i.e. NOT in the range [0..WOPCM_SIZE), so we have to add the
    PIN_OFFSET_BIAS flag to the relevant GGTT-pinning calls.

2.  The GuC's TLB must be invalidated after a context is pinned at
    a new GGTT address.

3.  GuC firmware uses the one page before Ring Context as shared data.
    Therefore, whenever driver wants to get base address of LRC, we
    will offset one page for it. LRC_PPHWSP_PN is defined as the page
    number of LRCA.

4.  In the work queue used to pass requests to the GuC, the GuC
    firmware requires the ring-tail-offset to be represented as an
    11-bit value, expressed in QWords. Therefore, the ringbuffer
    size must be reduced to the representable range (4 pages).

v2:
    Defer adding #defines until needed [Chris Wilson]
    Rationalise type declarations [Chris Wilson]

v4:
    Squashed kerneldoc patch into here [Daniel Vetter]

v5:
    Update request->tail in code common to both GuC and execlist modes.
    Add a private version of lr_context_update(), as sharing the
        execlist version leads to race conditions when the CPU and
        the GuC both update TAIL in the context image.
    Conversion of error-captured HWS page to string must account
        for offset from start of object to actual HWS (LRC_PPHWSP_PN).

Issue: VIZ-4884
Signed-off-by: Alex Dai <yu.dai@intel.com>
Signed-off-by: Dave Gordon <david.s.gordon@intel.com>
Reviewed-by: Tom O'Rourke <Tom.O'Rourke@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c
index 3352b85..ec70393 100644
--- a/drivers/gpu/drm/i915/i915_guc_submission.c
+++ b/drivers/gpu/drm/i915/i915_guc_submission.c
@@ -364,18 +364,58 @@
 static void guc_init_ctx_desc(struct intel_guc *guc,
 			      struct i915_guc_client *client)
 {
+	struct intel_context *ctx = client->owner;
 	struct guc_context_desc desc;
 	struct sg_table *sg;
+	int i;
 
 	memset(&desc, 0, sizeof(desc));
 
 	desc.attribute = GUC_CTX_DESC_ATTR_ACTIVE | GUC_CTX_DESC_ATTR_KERNEL;
 	desc.context_id = client->ctx_index;
 	desc.priority = client->priority;
-	desc.engines_used = (1 << RCS) | (1 << VCS) | (1 << BCS) |
-			    (1 << VECS) | (1 << VCS2); /* all engines */
 	desc.db_id = client->doorbell_id;
 
+	for (i = 0; i < I915_NUM_RINGS; i++) {
+		struct guc_execlist_context *lrc = &desc.lrc[i];
+		struct intel_ringbuffer *ringbuf = ctx->engine[i].ringbuf;
+		struct intel_engine_cs *ring;
+		struct drm_i915_gem_object *obj;
+		uint64_t ctx_desc;
+
+		/* TODO: We have a design issue to be solved here. Only when we
+		 * receive the first batch, we know which engine is used by the
+		 * user. But here GuC expects the lrc and ring to be pinned. It
+		 * is not an issue for default context, which is the only one
+		 * for now who owns a GuC client. But for future owner of GuC
+		 * client, need to make sure lrc is pinned prior to enter here.
+		 */
+		obj = ctx->engine[i].state;
+		if (!obj)
+			break;	/* XXX: continue? */
+
+		ring = ringbuf->ring;
+		ctx_desc = intel_lr_context_descriptor(ctx, ring);
+		lrc->context_desc = (u32)ctx_desc;
+
+		/* The state page is after PPHWSP */
+		lrc->ring_lcra = i915_gem_obj_ggtt_offset(obj) +
+				LRC_STATE_PN * PAGE_SIZE;
+		lrc->context_id = (client->ctx_index << GUC_ELC_CTXID_OFFSET) |
+				(ring->id << GUC_ELC_ENGINE_OFFSET);
+
+		obj = ringbuf->obj;
+
+		lrc->ring_begin = i915_gem_obj_ggtt_offset(obj);
+		lrc->ring_end = lrc->ring_begin + obj->base.size - 1;
+		lrc->ring_next_free_location = lrc->ring_begin;
+		lrc->ring_current_tail_pointer_value = 0;
+
+		desc.engines_used |= (1 << ring->id);
+	}
+
+	WARN_ON(desc.engines_used == 0);
+
 	/*
 	 * The CPU address is only needed at certain points, so kmap_atomic on
 	 * demand instead of storing it in the ctx descriptor.
@@ -501,6 +541,29 @@
 	return 0;
 }
 
+#define CTX_RING_BUFFER_START		0x08
+
+/* Update the ringbuffer pointer in a saved context image */
+static void lr_context_update(struct drm_i915_gem_request *rq)
+{
+	enum intel_ring_id ring_id = rq->ring->id;
+	struct drm_i915_gem_object *ctx_obj = rq->ctx->engine[ring_id].state;
+	struct drm_i915_gem_object *rb_obj = rq->ringbuf->obj;
+	struct page *page;
+	uint32_t *reg_state;
+
+	BUG_ON(!ctx_obj);
+	WARN_ON(!i915_gem_obj_is_pinned(ctx_obj));
+	WARN_ON(!i915_gem_obj_is_pinned(rb_obj));
+
+	page = i915_gem_object_get_page(ctx_obj, LRC_STATE_PN);
+	reg_state = kmap_atomic(page);
+
+	reg_state[CTX_RING_BUFFER_START+1] = i915_gem_obj_ggtt_offset(rb_obj);
+
+	kunmap_atomic(reg_state);
+}
+
 /**
  * i915_guc_submit() - Submit commands through GuC
  * @client:	the guc client where commands will go through
@@ -517,6 +580,10 @@
 	unsigned long flags;
 	int q_ret, b_ret;
 
+	/* Need this because of the deferred pin ctx and ring */
+	/* Shall we move this right after ring is pinned? */
+	lr_context_update(rq);
+
 	spin_lock_irqsave(&client->wq_lock, flags);
 
 	q_ret = guc_add_workqueue_item(client, rq);
@@ -643,11 +710,13 @@
  * 		The kernel client to replace ExecList submission is created with
  * 		NORMAL priority. Priority of a client for scheduler can be HIGH,
  * 		while a preemption context can use CRITICAL.
+ * @ctx		the context to own the client (we use the default render context)
  *
  * Return:	An i915_guc_client object if success.
  */
 static struct i915_guc_client *guc_client_alloc(struct drm_device *dev,
-						uint32_t priority)
+						uint32_t priority,
+						struct intel_context *ctx)
 {
 	struct i915_guc_client *client;
 	struct drm_i915_private *dev_priv = dev->dev_private;
@@ -660,6 +729,7 @@
 
 	client->doorbell_id = GUC_INVALID_DOORBELL_ID;
 	client->priority = priority;
+	client->owner = ctx;
 	client->guc = guc;
 
 	client->ctx_index = (uint32_t)ida_simple_get(&guc->ctx_ids, 0,
@@ -793,10 +863,11 @@
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct intel_guc *guc = &dev_priv->guc;
+	struct intel_context *ctx = dev_priv->ring[RCS].default_context;
 	struct i915_guc_client *client;
 
 	/* client for execbuf submission */
-	client = guc_client_alloc(dev, GUC_CTX_PRIORITY_KMD_NORMAL);
+	client = guc_client_alloc(dev, GUC_CTX_PRIORITY_KMD_NORMAL, ctx);
 	if (!client) {
 		DRM_ERROR("Failed to create execbuf guc_client\n");
 		return -ENOMEM;