drm/i915: implement WaClearTdlStateAckDirtyBits

This is to fix a GPU hang seen with mid thread pre-emption
and pooled EUs.

v2. Use IS_BXT_REVID instead of IS_BROXTON and INTEL_REVID

v3. And use correct type for register addresses

Signed-off-by: Tim Gore <tim.gore@intel.com>
Reviewed-by: Arun Siluvery <arun.siluvery@linux.intel.com>
Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: http://patchwork.freedesktop.org/patch/msgid/1458571049-854-1-git-send-email-tim.gore@intel.com
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 0744759..cea5a39 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -1790,6 +1790,18 @@
 #define   GEN9_IZ_HASHING_MASK(slice)			(0x3 << ((slice) * 2))
 #define   GEN9_IZ_HASHING(slice, val)			((val) << ((slice) * 2))
 
+/* WaClearTdlStateAckDirtyBits */
+#define GEN8_STATE_ACK		_MMIO(0x20F0)
+#define GEN9_STATE_ACK_SLICE1	_MMIO(0x20F8)
+#define GEN9_STATE_ACK_SLICE2	_MMIO(0x2100)
+#define   GEN9_STATE_ACK_TDL0 (1 << 12)
+#define   GEN9_STATE_ACK_TDL1 (1 << 13)
+#define   GEN9_STATE_ACK_TDL2 (1 << 14)
+#define   GEN9_STATE_ACK_TDL3 (1 << 15)
+#define   GEN9_SUBSLICE_TDL_ACK_BITS \
+	(GEN9_STATE_ACK_TDL3 | GEN9_STATE_ACK_TDL2 | \
+	 GEN9_STATE_ACK_TDL1 | GEN9_STATE_ACK_TDL0)
+
 #define GFX_MODE	_MMIO(0x2520)
 #define GFX_MODE_GEN7	_MMIO(0x229c)
 #define RING_MODE_GEN7(ring)	_MMIO((ring)->mmio_base+0x29c)
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index f209ecf..0d6dc5e 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -1448,6 +1448,25 @@
 		wa_ctx_emit(batch, index, MI_NOOP);
 	}
 
+	/* WaClearTdlStateAckDirtyBits:bxt */
+	if (IS_BXT_REVID(dev, 0, BXT_REVID_B0)) {
+		wa_ctx_emit(batch, index, MI_LOAD_REGISTER_IMM(4));
+
+		wa_ctx_emit_reg(batch, index, GEN8_STATE_ACK);
+		wa_ctx_emit(batch, index, _MASKED_BIT_DISABLE(GEN9_SUBSLICE_TDL_ACK_BITS));
+
+		wa_ctx_emit_reg(batch, index, GEN9_STATE_ACK_SLICE1);
+		wa_ctx_emit(batch, index, _MASKED_BIT_DISABLE(GEN9_SUBSLICE_TDL_ACK_BITS));
+
+		wa_ctx_emit_reg(batch, index, GEN9_STATE_ACK_SLICE2);
+		wa_ctx_emit(batch, index, _MASKED_BIT_DISABLE(GEN9_SUBSLICE_TDL_ACK_BITS));
+
+		wa_ctx_emit_reg(batch, index, GEN7_ROW_CHICKEN2);
+		/* dummy write to CS, mask bits are 0 to ensure the register is not modified */
+		wa_ctx_emit(batch, index, 0x0);
+		wa_ctx_emit(batch, index, MI_NOOP);
+	}
+
 	/* WaDisableCtxRestoreArbitration:skl,bxt */
 	if (IS_SKL_REVID(dev, 0, SKL_REVID_D0) ||
 	    IS_BXT_REVID(dev, 0, BXT_REVID_A1))