]> www.infradead.org Git - users/jedix/linux-maple.git/commitdiff
RDMA/mlx5: Fix unsafe xarray access in implicit ODP handling
authorOr Har-Toov <ohartoov@nvidia.com>
Mon, 16 Jun 2025 08:17:01 +0000 (11:17 +0300)
committerJason Gunthorpe <jgg@nvidia.com>
Tue, 17 Jun 2025 17:13:10 +0000 (14:13 -0300)
__xa_store() and __xa_erase() were used without holding the proper lock,
which led to a lockdep warning due to unsafe RCU usage.  This patch
replaces them with xa_store() and xa_erase(), which perform the necessary
locking internally.

  =============================
  WARNING: suspicious RCPU usage
  6.14.0-rc7_for_upstream_debug_2025_03_18_15_01 #1 Not tainted
  -----------------------------
  ./include/linux/xarray.h:1211 suspicious rcu_dereference_protected() usage!

  other info that might help us debug this:

  rcu_scheduler_active = 2, debug_locks = 1
  3 locks held by kworker/u136:0/219:
      at: process_one_work+0xbe4/0x15f0
      process_one_work+0x75c/0x15f0
      pagefault_mr+0x9a5/0x1390 [mlx5_ib]

  stack backtrace:
  CPU: 14 UID: 0 PID: 219 Comm: kworker/u136:0 Not tainted
  6.14.0-rc7_for_upstream_debug_2025_03_18_15_01 #1
  Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS
  rel-1.16.0-0-gd239552ce722-prebuilt.qemu.org 04/01/2014
  Workqueue: mlx5_ib_page_fault mlx5_ib_eqe_pf_action [mlx5_ib]
  Call Trace:
   dump_stack_lvl+0xa8/0xc0
   lockdep_rcu_suspicious+0x1e6/0x260
   xas_create+0xb8a/0xee0
   xas_store+0x73/0x14c0
   __xa_store+0x13c/0x220
   ? xa_store_range+0x390/0x390
   ? spin_bug+0x1d0/0x1d0
   pagefault_mr+0xcb5/0x1390 [mlx5_ib]
   ? _raw_spin_unlock+0x1f/0x30
   mlx5_ib_eqe_pf_action+0x3be/0x2620 [mlx5_ib]
   ? lockdep_hardirqs_on_prepare+0x400/0x400
   ? mlx5_ib_invalidate_range+0xcb0/0xcb0 [mlx5_ib]
   process_one_work+0x7db/0x15f0
   ? pwq_dec_nr_in_flight+0xda0/0xda0
   ? assign_work+0x168/0x240
   worker_thread+0x57d/0xcd0
   ? rescuer_thread+0xc40/0xc40
   kthread+0x3b3/0x800
   ? kthread_is_per_cpu+0xb0/0xb0
   ? lock_downgrade+0x680/0x680
   ? do_raw_spin_lock+0x12d/0x270
   ? spin_bug+0x1d0/0x1d0
   ? finish_task_switch.isra.0+0x284/0x9e0
   ? lockdep_hardirqs_on_prepare+0x284/0x400
   ? kthread_is_per_cpu+0xb0/0xb0
   ret_from_fork+0x2d/0x70
   ? kthread_is_per_cpu+0xb0/0xb0
   ret_from_fork_asm+0x11/0x20

Fixes: d3d930411ce3 ("RDMA/mlx5: Fix implicit ODP use after free")
Link: https://patch.msgid.link/r/a85ddd16f45c8cb2bc0a188c2b0fcedfce975eb8.1750061791.git.leon@kernel.org
Signed-off-by: Or Har-Toov <ohartoov@nvidia.com>
Reviewed-by: Patrisious Haddad <phaddad@nvidia.com>
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
drivers/infiniband/hw/mlx5/odp.c

index eaa2f9f5f3a9c79c0cc0e82306a0bf5dbf2a63d6..f6abd64f07f7e4cee6c244b0606a29f1319b0ddb 100644 (file)
@@ -259,8 +259,8 @@ static void destroy_unused_implicit_child_mr(struct mlx5_ib_mr *mr)
        }
 
        if (MLX5_CAP_ODP(mr_to_mdev(mr)->mdev, mem_page_fault))
-               __xa_erase(&mr_to_mdev(mr)->odp_mkeys,
-                          mlx5_base_mkey(mr->mmkey.key));
+               xa_erase(&mr_to_mdev(mr)->odp_mkeys,
+                        mlx5_base_mkey(mr->mmkey.key));
        xa_unlock(&imr->implicit_children);
 
        /* Freeing a MR is a sleeping operation, so bounce to a work queue */
@@ -532,8 +532,8 @@ static struct mlx5_ib_mr *implicit_get_child_mr(struct mlx5_ib_mr *imr,
        }
 
        if (MLX5_CAP_ODP(dev->mdev, mem_page_fault)) {
-               ret = __xa_store(&dev->odp_mkeys, mlx5_base_mkey(mr->mmkey.key),
-                                &mr->mmkey, GFP_KERNEL);
+               ret = xa_store(&dev->odp_mkeys, mlx5_base_mkey(mr->mmkey.key),
+                              &mr->mmkey, GFP_KERNEL);
                if (xa_is_err(ret)) {
                        ret = ERR_PTR(xa_err(ret));
                        __xa_erase(&imr->implicit_children, idx);