We got the following issue in a fuzz test of randomly issuing the restore
command:
==================================================================
BUG: KASAN: slab-use-after-free in cachefiles_ondemand_daemon_read+0x609/0xab0
Write of size 4 at addr 
ffff888109164a80 by task ondemand-04-dae/4962
CPU: 11 PID: 4962 Comm: ondemand-04-dae Not tainted 6.8.0-rc7-dirty #542
Call Trace:
 kasan_report+0x94/0xc0
 cachefiles_ondemand_daemon_read+0x609/0xab0
 vfs_read+0x169/0xb50
 ksys_read+0xf5/0x1e0
Allocated by task 626:
 __kmalloc+0x1df/0x4b0
 cachefiles_ondemand_send_req+0x24d/0x690
 cachefiles_create_tmpfile+0x249/0xb30
 cachefiles_create_file+0x6f/0x140
 cachefiles_look_up_object+0x29c/0xa60
 cachefiles_lookup_cookie+0x37d/0xca0
 fscache_cookie_state_machine+0x43c/0x1230
 [...]
Freed by task 626:
 kfree+0xf1/0x2c0
 cachefiles_ondemand_send_req+0x568/0x690
 cachefiles_create_tmpfile+0x249/0xb30
 cachefiles_create_file+0x6f/0x140
 cachefiles_look_up_object+0x29c/0xa60
 cachefiles_lookup_cookie+0x37d/0xca0
 fscache_cookie_state_machine+0x43c/0x1230
 [...]
==================================================================
Following is the process that triggers the issue:
     mount  |   daemon_thread1    |    daemon_thread2
------------------------------------------------------------
 cachefiles_ondemand_init_object
  cachefiles_ondemand_send_req
   REQ_A = kzalloc(sizeof(*req) + data_len)
   wait_for_completion(&REQ_A->done)
            cachefiles_daemon_read
             cachefiles_ondemand_daemon_read
              REQ_A = cachefiles_ondemand_select_req
              cachefiles_ondemand_get_fd
              copy_to_user(_buffer, msg, n)
            process_open_req(REQ_A)
                                  ------ restore ------
                                  cachefiles_ondemand_restore
                                  xas_for_each(&xas, req, ULONG_MAX)
                                   xas_set_mark(&xas, CACHEFILES_REQ_NEW);
                                  cachefiles_daemon_read
                                   cachefiles_ondemand_daemon_read
                                    REQ_A = cachefiles_ondemand_select_req
             write(devfd, ("copen %u,%llu", msg->msg_id, size));
             cachefiles_ondemand_copen
              xa_erase(&cache->reqs, id)
              complete(&REQ_A->done)
   kfree(REQ_A)
                                    cachefiles_ondemand_get_fd(REQ_A)
                                     fd = get_unused_fd_flags
                                     file = anon_inode_getfile
                                     fd_install(fd, file)
                                     load = (void *)REQ_A->msg.data;
                                     load->fd = fd;
                                     // load UAF !!!
This issue is caused by issuing a restore command when the daemon is still
alive, which results in a request being processed multiple times thus
triggering a UAF. So to avoid this problem, add an additional reference
count to cachefiles_req, which is held while waiting and reading, and then
released when the waiting and reading is over.
Note that since there is only one reference count for waiting, we need to
avoid the same request being completed multiple times, so we can only
complete the request if it is successfully removed from the xarray.
Fixes: e73fa11a356c ("cachefiles: add restore command to recover inflight ondemand read requests")
Suggested-by: Hou Tao <houtao1@huawei.com>
Signed-off-by: Baokun Li <libaokun1@huawei.com>
Link: https://lore.kernel.org/r/20240522114308.2402121-4-libaokun@huaweicloud.com
Acked-by: Jeff Layton <jlayton@kernel.org>
Reviewed-by: Jia Zhu <zhujia.zj@bytedance.com>
Reviewed-by: Jingbo Xu <jefflexu@linux.alibaba.com>
Signed-off-by: Christian Brauner <brauner@kernel.org>
 struct cachefiles_req {
        struct cachefiles_object *object;
        struct completion done;
+       refcount_t ref;
        int error;
        struct cachefiles_msg msg;
 };
 
 #include <linux/uio.h>
 #include "internal.h"
 
+static inline void cachefiles_req_put(struct cachefiles_req *req)
+{
+       if (refcount_dec_and_test(&req->ref))
+               kfree(req);
+}
+
 static int cachefiles_ondemand_fd_release(struct inode *inode,
                                          struct file *file)
 {
 
        xas_clear_mark(&xas, CACHEFILES_REQ_NEW);
        cache->req_id_next = xas.xa_index + 1;
+       refcount_inc(&req->ref);
        xa_unlock(&cache->reqs);
 
        id = xas.xa_index;
                complete(&req->done);
        }
 
+       cachefiles_req_put(req);
        return n;
 
 err_put_fd:
        if (msg->opcode == CACHEFILES_OP_OPEN)
                close_fd(((struct cachefiles_open *)msg->data)->fd);
 error:
-       xa_erase(&cache->reqs, id);
-       req->error = ret;
-       complete(&req->done);
+       xas_reset(&xas);
+       xas_lock(&xas);
+       if (xas_load(&xas) == req) {
+               req->error = ret;
+               complete(&req->done);
+               xas_store(&xas, NULL);
+       }
+       xas_unlock(&xas);
+       cachefiles_req_put(req);
        return ret;
 }
 
                goto out;
        }
 
+       refcount_set(&req->ref, 1);
        req->object = object;
        init_completion(&req->done);
        req->msg.opcode = opcode;
        wake_up_all(&cache->daemon_pollwq);
        wait_for_completion(&req->done);
        ret = req->error;
-       kfree(req);
+       cachefiles_req_put(req);
        return ret;
 out:
        /* Reset the object to close state in error handling path.