drm/i915/gen8: Add WaClearSlmSpaceAtContextSwitch workaround

In Indirect context w/a batch buffer,
WaClearSlmSpaceAtContextSwitch

This WA performs writes to scratch page so it must be valid, this check
is performed before initializing the batch with this WA.

v2: s/PIPE_CONTROL_FLUSH_RO_CACHES/PIPE_CONTROL_FLUSH_L3 (Ville)

v3: GTT bit in scratch address should be mbz (Chris)

Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Dave Gordon <david.s.gordon@intel.com>
Signed-off-by: Rafael Barbalho <rafael.barbalho@intel.com>
Signed-off-by: Arun Siluvery <arun.siluvery@linux.intel.com>
Acked-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 500ae51..90a02dc 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -1147,6 +1147,7 @@
 				    uint32_t *const batch,
 				    uint32_t *offset)
 {
+	uint32_t scratch_addr;
 	uint32_t index = wa_ctx_start(wa_ctx, *offset, CACHELINE_DWORDS);
 
 	/* WaDisableCtxRestoreArbitration:bdw,chv */
@@ -1175,6 +1176,20 @@
 		wa_ctx_emit(batch, l3sqc4_flush & ~GEN8_LQSC_FLUSH_COHERENT_LINES);
 	}
 
+	/* WaClearSlmSpaceAtContextSwitch:bdw,chv */
+	/* Actual scratch location is at 128 bytes offset */
+	scratch_addr = ring->scratch.gtt_offset + 2*CACHELINE_BYTES;
+
+	wa_ctx_emit(batch, GFX_OP_PIPE_CONTROL(6));
+	wa_ctx_emit(batch, (PIPE_CONTROL_FLUSH_L3 |
+			    PIPE_CONTROL_GLOBAL_GTT_IVB |
+			    PIPE_CONTROL_CS_STALL |
+			    PIPE_CONTROL_QW_WRITE));
+	wa_ctx_emit(batch, scratch_addr);
+	wa_ctx_emit(batch, 0);
+	wa_ctx_emit(batch, 0);
+	wa_ctx_emit(batch, 0);
+
 	/* Pad to end of cacheline */
 	while (index % CACHELINE_DWORDS)
 		wa_ctx_emit(batch, MI_NOOP);