#include "xe_wait_user_fence.h"
 
 #ifdef CONFIG_LOCKDEP
-static struct lockdep_map xe_device_mem_access_lockdep_map = {
+struct lockdep_map xe_device_mem_access_lockdep_map = {
        .name = "xe_device_mem_access_lockdep_map"
 };
 #endif
         * runtime_resume callback, lockdep should give us a nice splat.
         */
        lock_map_acquire(&xe_device_mem_access_lockdep_map);
+       lock_map_release(&xe_device_mem_access_lockdep_map);
 
        xe_pm_runtime_get(xe);
        ref = atomic_inc_return(&xe->mem_access.ref);
 
        XE_WARN_ON(ref == S32_MAX);
 
-       lock_map_release(&xe_device_mem_access_lockdep_map);
 }
 
 void xe_device_mem_access_put(struct xe_device *xe)
 
        /* Disable access_ongoing asserts and prevent recursive pm calls */
        xe_pm_write_callback_task(xe, current);
 
+       /*
+        * The actual xe_device_mem_access_put() is always async underneath, so
+        * exactly where that is called should makes no difference to us. However
+        * we still need to be very careful with the locks that this callback
+        * acquires and the locks that are acquired and held by any callers of
+        * xe_device_mem_access_get(). We already have the matching annotation
+        * on that side, but we also need it here. For example lockdep should be
+        * able to tell us if the following scenario is in theory possible:
+        *
+        * CPU0                          | CPU1 (kworker)
+        * lock(A)                       |
+        *                               | xe_pm_runtime_suspend()
+        *                               |      lock(A)
+        * xe_device_mem_access_get()    |
+        *
+        * This will clearly deadlock since rpm core needs to wait for
+        * xe_pm_runtime_suspend() to complete, but here we are holding lock(A)
+        * on CPU0 which prevents CPU1 making forward progress.  With the
+        * annotation here and in xe_device_mem_access_get() lockdep will see
+        * the potential lock inversion and give us a nice splat.
+        */
+       lock_map_acquire(&xe_device_mem_access_lockdep_map);
+
        if (xe->d3cold.allowed) {
                err = xe_bo_evict_all(xe);
                if (err)
 
        xe_irq_suspend(xe);
 out:
+       lock_map_release(&xe_device_mem_access_lockdep_map);
        xe_pm_write_callback_task(xe, NULL);
        return err;
 }
        /* Disable access_ongoing asserts and prevent recursive pm calls */
        xe_pm_write_callback_task(xe, current);
 
+       lock_map_acquire(&xe_device_mem_access_lockdep_map);
+
        /*
         * It can be possible that xe has allowed d3cold but other pcie devices
         * in gfx card soc would have blocked d3cold, therefore card has not
                        goto out;
        }
 out:
+       lock_map_release(&xe_device_mem_access_lockdep_map);
        xe_pm_write_callback_task(xe, NULL);
        return err;
 }