#define   GEN9_IZ_HASHING_MASK(slice)                  (0x3 << ((slice) * 2))
 #define   GEN9_IZ_HASHING(slice, val)                  ((val) << ((slice) * 2))
 
+/* WaClearTdlStateAckDirtyBits */
+#define GEN8_STATE_ACK         _MMIO(0x20F0)
+#define GEN9_STATE_ACK_SLICE1  _MMIO(0x20F8)
+#define GEN9_STATE_ACK_SLICE2  _MMIO(0x2100)
+#define   GEN9_STATE_ACK_TDL0 (1 << 12)
+#define   GEN9_STATE_ACK_TDL1 (1 << 13)
+#define   GEN9_STATE_ACK_TDL2 (1 << 14)
+#define   GEN9_STATE_ACK_TDL3 (1 << 15)
+#define   GEN9_SUBSLICE_TDL_ACK_BITS \
+       (GEN9_STATE_ACK_TDL3 | GEN9_STATE_ACK_TDL2 | \
+        GEN9_STATE_ACK_TDL1 | GEN9_STATE_ACK_TDL0)
+
 #define GFX_MODE       _MMIO(0x2520)
 #define GFX_MODE_GEN7  _MMIO(0x229c)
 #define RING_MODE_GEN7(ring)   _MMIO((ring)->mmio_base+0x29c)
 
                wa_ctx_emit(batch, index, MI_NOOP);
        }
 
+       /* WaClearTdlStateAckDirtyBits:bxt */
+       if (IS_BXT_REVID(dev, 0, BXT_REVID_B0)) {
+               wa_ctx_emit(batch, index, MI_LOAD_REGISTER_IMM(4));
+
+               wa_ctx_emit_reg(batch, index, GEN8_STATE_ACK);
+               wa_ctx_emit(batch, index, _MASKED_BIT_DISABLE(GEN9_SUBSLICE_TDL_ACK_BITS));
+
+               wa_ctx_emit_reg(batch, index, GEN9_STATE_ACK_SLICE1);
+               wa_ctx_emit(batch, index, _MASKED_BIT_DISABLE(GEN9_SUBSLICE_TDL_ACK_BITS));
+
+               wa_ctx_emit_reg(batch, index, GEN9_STATE_ACK_SLICE2);
+               wa_ctx_emit(batch, index, _MASKED_BIT_DISABLE(GEN9_SUBSLICE_TDL_ACK_BITS));
+
+               wa_ctx_emit_reg(batch, index, GEN7_ROW_CHICKEN2);
+               /* dummy write to CS, mask bits are 0 to ensure the register is not modified */
+               wa_ctx_emit(batch, index, 0x0);
+               wa_ctx_emit(batch, index, MI_NOOP);
+       }
+
        /* WaDisableCtxRestoreArbitration:skl,bxt */
        if (IS_SKL_REVID(dev, 0, SKL_REVID_D0) ||
            IS_BXT_REVID(dev, 0, BXT_REVID_A1))