drm/i915: Request full SSEU enablement on Gen9

On Gen9 the render power gating can leave slice/subslice/EU in
a partially enabled state. We must make an explicit request for
full SSEU enablement through the Render Power Clock State
register when resuming render work. This register is save/
restored in the logical ring context image for execlist
submission mode. Initialize its value in each LRC image to
request full enablement according to the device SSEU config.

Thanks to Sharma Ankitprasad and Akash Goel for highlighting the
issue and proposing the initial fix on which this patch is based.

v2: Adjusted the names of the power gating support flags to fit
    update of an earlier patch.

Signed-off-by: Jeff McGee <jeff.mcgee@intel.com>
Reviewed-by: "Akash Goel <akash.goel@intel.com>"
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index b355da4ca..98c87f6 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -1636,6 +1636,49 @@
 	return ret;
 }
 
+static u32
+make_rpcs(struct drm_device *dev)
+{
+	u32 rpcs = 0;
+
+	/*
+	 * No explicit RPCS request is needed to ensure full
+	 * slice/subslice/EU enablement prior to Gen9.
+	*/
+	if (INTEL_INFO(dev)->gen < 9)
+		return 0;
+
+	/*
+	 * Starting in Gen9, render power gating can leave
+	 * slice/subslice/EU in a partially enabled state. We
+	 * must make an explicit request through RPCS for full
+	 * enablement.
+	*/
+	if (INTEL_INFO(dev)->has_slice_pg) {
+		rpcs |= GEN8_RPCS_S_CNT_ENABLE;
+		rpcs |= INTEL_INFO(dev)->slice_total <<
+			GEN8_RPCS_S_CNT_SHIFT;
+		rpcs |= GEN8_RPCS_ENABLE;
+	}
+
+	if (INTEL_INFO(dev)->has_subslice_pg) {
+		rpcs |= GEN8_RPCS_SS_CNT_ENABLE;
+		rpcs |= INTEL_INFO(dev)->subslice_per_slice <<
+			GEN8_RPCS_SS_CNT_SHIFT;
+		rpcs |= GEN8_RPCS_ENABLE;
+	}
+
+	if (INTEL_INFO(dev)->has_eu_pg) {
+		rpcs |= INTEL_INFO(dev)->eu_per_subslice <<
+			GEN8_RPCS_EU_MIN_SHIFT;
+		rpcs |= INTEL_INFO(dev)->eu_per_subslice <<
+			GEN8_RPCS_EU_MAX_SHIFT;
+		rpcs |= GEN8_RPCS_ENABLE;
+	}
+
+	return rpcs;
+}
+
 static int
 populate_lr_context(struct intel_context *ctx, struct drm_i915_gem_object *ctx_obj,
 		    struct intel_engine_cs *ring, struct intel_ringbuffer *ringbuf)
@@ -1739,8 +1782,8 @@
 	reg_state[CTX_PDP0_LDW+1] = lower_32_bits(ppgtt->pd_dma_addr[0]);
 	if (ring->id == RCS) {
 		reg_state[CTX_LRI_HEADER_2] = MI_LOAD_REGISTER_IMM(1);
-		reg_state[CTX_R_PWR_CLK_STATE] = 0x20c8;
-		reg_state[CTX_R_PWR_CLK_STATE+1] = 0;
+		reg_state[CTX_R_PWR_CLK_STATE] = GEN8_R_PWR_CLK_STATE;
+		reg_state[CTX_R_PWR_CLK_STATE+1] = make_rpcs(dev);
 	}
 
 	kunmap_atomic(reg_state);