return 0;
 }
 
+/*
+ * GFX9 helper for wave launch stall requirements on debug trap setting.
+ *
+ * vmid:
+ *   Target VMID to stall/unstall.
+ *
+ * stall:
+ *   0-unstall wave launch (enable), 1-stall wave launch (disable).
+ *   After wavefront launch has been stalled, allocated waves must drain from
+ *   SPI in order for debug trap settings to take effect on those waves.
+ *   This is roughly a ~96 clock cycle wait on SPI where a read on
+ *   SPI_GDBG_WAVE_CNTL translates to ~32 clock cycles.
+ *   KGD_GFX_V9_WAVE_LAUNCH_SPI_DRAIN_LATENCY indicates the number of reads required.
+ *
+ *   NOTE: We can afford to clear the entire STALL_VMID field on unstall
+ *   because GFX9.4.1 cannot support multi-process debugging due to trap
+ *   configuration and masking being limited to global scope.  Always assume
+ *   single process conditions.
+ */
+#define KGD_GFX_V9_WAVE_LAUNCH_SPI_DRAIN_LATENCY       3
+void kgd_gfx_v9_set_wave_launch_stall(struct amdgpu_device *adev,
+                                       uint32_t vmid,
+                                       bool stall)
+{
+       int i;
+       uint32_t data = RREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL));
+
+       if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 1))
+               data = REG_SET_FIELD(data, SPI_GDBG_WAVE_CNTL, STALL_VMID,
+                                                       stall ? 1 << vmid : 0);
+       else
+               data = REG_SET_FIELD(data, SPI_GDBG_WAVE_CNTL, STALL_RA,
+                                                       stall ? 1 : 0);
+
+       WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL), data);
+
+       if (!stall)
+               return;
+
+       for (i = 0; i < KGD_GFX_V9_WAVE_LAUNCH_SPI_DRAIN_LATENCY; i++)
+               RREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL));
+}
+
+/*
+ * restore_dbg_registers is ignored here but is a general interface requirement
+ * for devices that support GFXOFF and where the RLC save/restore list
+ * does not support hw registers for debugging i.e. the driver has to manually
+ * initialize the debug mode registers after it has disabled GFX off during the
+ * debug session.
+ */
+uint32_t kgd_gfx_v9_enable_debug_trap(struct amdgpu_device *adev,
+                               bool restore_dbg_registers,
+                               uint32_t vmid)
+{
+       mutex_lock(&adev->grbm_idx_mutex);
+
+       kgd_gfx_v9_set_wave_launch_stall(adev, vmid, true);
+
+       WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_MASK), 0);
+
+       kgd_gfx_v9_set_wave_launch_stall(adev, vmid, false);
+
+       mutex_unlock(&adev->grbm_idx_mutex);
+
+       return 0;
+}
+
+/*
+ * keep_trap_enabled is ignored here but is a general interface requirement
+ * for devices that support multi-process debugging where the performance
+ * overhead from trap temporary setup needs to be bypassed when the debug
+ * session has ended.
+ */
+uint32_t kgd_gfx_v9_disable_debug_trap(struct amdgpu_device *adev,
+                                       bool keep_trap_enabled,
+                                       uint32_t vmid)
+{
+       mutex_lock(&adev->grbm_idx_mutex);
+
+       kgd_gfx_v9_set_wave_launch_stall(adev, vmid, true);
+
+       WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_MASK), 0);
+
+       kgd_gfx_v9_set_wave_launch_stall(adev, vmid, false);
+
+       mutex_unlock(&adev->grbm_idx_mutex);
+
+       return 0;
+}
+
 void kgd_gfx_v9_set_vm_context_page_table_base(struct amdgpu_device *adev,
                        uint32_t vmid, uint64_t page_table_base)
 {
        .get_atc_vmid_pasid_mapping_info =
                        kgd_gfx_v9_get_atc_vmid_pasid_mapping_info,
        .set_vm_context_page_table_base = kgd_gfx_v9_set_vm_context_page_table_base,
+       .enable_debug_trap = kgd_gfx_v9_enable_debug_trap,
+       .disable_debug_trap = kgd_gfx_v9_disable_debug_trap,
        .get_cu_occupancy = kgd_gfx_v9_get_cu_occupancy,
        .program_trap_handler_settings = kgd_gfx_v9_program_trap_handler_settings,
 };