mutex_lock(>->tlb_invalidate_lock);
        intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL);
 
+       spin_lock_irq(&uncore->lock); /* serialise invalidate with GT reset */
+
+       for_each_engine(engine, gt, id) {
+               struct reg_and_bit rb;
+
+               rb = get_reg_and_bit(engine, regs == gen8_regs, regs, num);
+               if (!i915_mmio_reg_offset(rb.reg))
+                       continue;
+
+               intel_uncore_write_fw(uncore, rb.reg, rb.bit);
+       }
+
+       spin_unlock_irq(&uncore->lock);
+
        for_each_engine(engine, gt, id) {
                /*
                 * HW architecture suggest typical invalidation time at 40us,
                if (!i915_mmio_reg_offset(rb.reg))
                        continue;
 
-               intel_uncore_write_fw(uncore, rb.reg, rb.bit);
                if (__intel_wait_for_register_fw(uncore,
                                                 rb.reg, rb.bit, 0,
                                                 timeout_us, timeout_ms,