]> www.infradead.org Git - users/dwmw2/linux.git/commitdiff
libceph: fix race between delayed_work() and ceph_monc_stop()
authorIlya Dryomov <idryomov@gmail.com>
Mon, 8 Jul 2024 20:37:29 +0000 (22:37 +0200)
committerIlya Dryomov <idryomov@gmail.com>
Wed, 10 Jul 2024 08:11:55 +0000 (10:11 +0200)
The way the delayed work is handled in ceph_monc_stop() is prone to
races with mon_fault() and possibly also finish_hunting().  Both of
these can requeue the delayed work which wouldn't be canceled by any of
the following code in case that happens after cancel_delayed_work_sync()
runs -- __close_session() doesn't mess with the delayed work in order
to avoid interfering with the hunting interval logic.  This part was
missed in commit b5d91704f53e ("libceph: behave in mon_fault() if
cur_mon < 0") and use-after-free can still ensue on monc and objects
that hang off of it, with monc->auth and monc->monmap being
particularly susceptible to quickly being reused.

To fix this:

- clear monc->cur_mon and monc->hunting as part of closing the session
  in ceph_monc_stop()
- bail from delayed_work() if monc->cur_mon is cleared, similar to how
  it's done in mon_fault() and finish_hunting() (based on monc->hunting)
- call cancel_delayed_work_sync() after the session is closed

Cc: stable@vger.kernel.org
Link: https://tracker.ceph.com/issues/66857
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
Reviewed-by: Xiubo Li <xiubli@redhat.com>
net/ceph/mon_client.c

index f263f7e91a219bbfa917c782ea1c8d9c4e236d9e..ab66b599ac4792d600f16eb8f22b128848d6105f 100644 (file)
@@ -1085,13 +1085,19 @@ static void delayed_work(struct work_struct *work)
        struct ceph_mon_client *monc =
                container_of(work, struct ceph_mon_client, delayed_work.work);
 
-       dout("monc delayed_work\n");
        mutex_lock(&monc->mutex);
+       dout("%s mon%d\n", __func__, monc->cur_mon);
+       if (monc->cur_mon < 0) {
+               goto out;
+       }
+
        if (monc->hunting) {
                dout("%s continuing hunt\n", __func__);
                reopen_session(monc);
        } else {
                int is_auth = ceph_auth_is_authenticated(monc->auth);
+
+               dout("%s is_authed %d\n", __func__, is_auth);
                if (ceph_con_keepalive_expired(&monc->con,
                                               CEPH_MONC_PING_TIMEOUT)) {
                        dout("monc keepalive timeout\n");
@@ -1116,6 +1122,8 @@ static void delayed_work(struct work_struct *work)
                }
        }
        __schedule_delayed(monc);
+
+out:
        mutex_unlock(&monc->mutex);
 }
 
@@ -1232,13 +1240,15 @@ EXPORT_SYMBOL(ceph_monc_init);
 void ceph_monc_stop(struct ceph_mon_client *monc)
 {
        dout("stop\n");
-       cancel_delayed_work_sync(&monc->delayed_work);
 
        mutex_lock(&monc->mutex);
        __close_session(monc);
+       monc->hunting = false;
        monc->cur_mon = -1;
        mutex_unlock(&monc->mutex);
 
+       cancel_delayed_work_sync(&monc->delayed_work);
+
        /*
         * flush msgr queue before we destroy ourselves to ensure that:
         *  - any work that references our embedded con is finished.