u32 hw_flags)
 {
        u32 flags = hw_flags | MI_MM_SPACE_GTT;
-       int ret;
+       const int num_rings =
+               /* Use an extended w/a on ivb+ if signalling from other rings */
+               i915_semaphore_is_enabled(ring->dev) ?
+               hweight32(INTEL_INFO(ring->dev)->ring_mask) - 1 :
+               0;
+       int len, i, ret;
 
        /* w/a: If Flush TLB Invalidation Mode is enabled, driver must do a TLB
         * invalidation prior to MI_SET_CONTEXT. On GEN6 we don't set the value
        if (!IS_HASWELL(ring->dev) && INTEL_INFO(ring->dev)->gen < 8)
                flags |= (MI_SAVE_EXT_STATE_EN | MI_RESTORE_EXT_STATE_EN);
 
-       ret = intel_ring_begin(ring, 6);
+
+       len = 4;
+       if (INTEL_INFO(ring->dev)->gen >= 7)
+               len += 2 + (num_rings ? 4*num_rings + 2 : 0);
+
+       ret = intel_ring_begin(ring, len);
        if (ret)
                return ret;
 
        /* WaProgramMiArbOnOffAroundMiSetContext:ivb,vlv,hsw,bdw,chv */
-       if (INTEL_INFO(ring->dev)->gen >= 7)
+       if (INTEL_INFO(ring->dev)->gen >= 7) {
                intel_ring_emit(ring, MI_ARB_ON_OFF | MI_ARB_DISABLE);
-       else
-               intel_ring_emit(ring, MI_NOOP);
+               if (num_rings) {
+                       struct intel_engine_cs *signaller;
+
+                       intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(num_rings));
+                       for_each_ring(signaller, to_i915(ring->dev), i) {
+                               if (signaller == ring)
+                                       continue;
+
+                               intel_ring_emit(ring, RING_PSMI_CTL(signaller->mmio_base));
+                               intel_ring_emit(ring, _MASKED_BIT_ENABLE(GEN6_PSMI_SLEEP_MSG_DISABLE));
+                       }
+               }
+       }
 
        intel_ring_emit(ring, MI_NOOP);
        intel_ring_emit(ring, MI_SET_CONTEXT);
         */
        intel_ring_emit(ring, MI_NOOP);
 
-       if (INTEL_INFO(ring->dev)->gen >= 7)
+       if (INTEL_INFO(ring->dev)->gen >= 7) {
+               if (num_rings) {
+                       struct intel_engine_cs *signaller;
+
+                       intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(num_rings));
+                       for_each_ring(signaller, to_i915(ring->dev), i) {
+                               if (signaller == ring)
+                                       continue;
+
+                               intel_ring_emit(ring, RING_PSMI_CTL(signaller->mmio_base));
+                               intel_ring_emit(ring, _MASKED_BIT_DISABLE(GEN6_PSMI_SLEEP_MSG_DISABLE));
+                       }
+               }
                intel_ring_emit(ring, MI_ARB_ON_OFF | MI_ARB_ENABLE);
-       else
-               intel_ring_emit(ring, MI_NOOP);
+       }
 
        intel_ring_advance(ring);
 
 
 #define GEN6_VERSYNC   (RING_SYNC_1(VEBOX_RING_BASE))
 #define GEN6_VEVSYNC   (RING_SYNC_2(VEBOX_RING_BASE))
 #define GEN6_NOSYNC 0
+#define RING_PSMI_CTL(base)    ((base)+0x50)
 #define RING_MAX_IDLE(base)    ((base)+0x54)
 #define RING_HWS_PGA(base)     ((base)+0x80)
 #define RING_HWS_PGA_GEN6(base)        ((base)+0x2080)
 #define   GEN6_BLITTER_FBC_NOTIFY                      (1<<3)
 
 #define GEN6_RC_SLEEP_PSMI_CONTROL     0x2050
+#define   GEN6_PSMI_SLEEP_MSG_DISABLE  (1 << 0)
 #define   GEN8_RC_SEMA_IDLE_MSG_DISABLE        (1 << 12)
 #define   GEN8_FF_DOP_CLOCK_GATE_DISABLE       (1<<10)