]> www.infradead.org Git - users/dwmw2/linux.git/commitdiff
RDMA/addr: Fix race with netevent_callback()/rdma_addr_cancel()
authorJason Gunthorpe <jgg@nvidia.com>
Wed, 30 Sep 2020 07:20:07 +0000 (10:20 +0300)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Sun, 1 Nov 2020 11:01:05 +0000 (12:01 +0100)
commit 2ee9bf346fbfd1dad0933b9eb3a4c2c0979b633e upstream.

This three thread race can result in the work being run once the callback
becomes NULL:

       CPU1                 CPU2                   CPU3
 netevent_callback()
                     process_one_req()       rdma_addr_cancel()
                      [..]
     spin_lock_bh()
   set_timeout()
     spin_unlock_bh()

spin_lock_bh()
list_del_init(&req->list);
spin_unlock_bh()

     req->callback = NULL
     spin_lock_bh()
       if (!list_empty(&req->list))
                         // Skipped!
         // cancel_delayed_work(&req->work);
     spin_unlock_bh()

    process_one_req() // again
     req->callback() // BOOM
cancel_delayed_work_sync()

The solution is to always cancel the work once it is completed so any
in between set_timeout() does not result in it running again.

Cc: stable@vger.kernel.org
Fixes: 44e75052bc2a ("RDMA/rdma_cm: Make rdma_addr_cancel into a fence")
Link: https://lore.kernel.org/r/20200930072007.1009692-1-leon@kernel.org
Reported-by: Dan Aloni <dan@kernelim.com>
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
drivers/infiniband/core/addr.c

index 1753a9801b704e6c34f20c99a08e2e799c26285f..8beed4197e73e8e729bf6a2f4e4bd42a8bfc589f 100644 (file)
@@ -645,13 +645,12 @@ static void process_one_req(struct work_struct *_work)
        req->callback = NULL;
 
        spin_lock_bh(&lock);
+       /*
+        * Although the work will normally have been canceled by the workqueue,
+        * it can still be requeued as long as it is on the req_list.
+        */
+       cancel_delayed_work(&req->work);
        if (!list_empty(&req->list)) {
-               /*
-                * Although the work will normally have been canceled by the
-                * workqueue, it can still be requeued as long as it is on the
-                * req_list.
-                */
-               cancel_delayed_work(&req->work);
                list_del_init(&req->list);
                kfree(req);
        }