]> www.infradead.org Git - users/jedix/linux-maple.git/commitdiff
rpcrdma: Implement generic device removal
authorChuck Lever <chuck.lever@oracle.com>
Tue, 4 Jun 2024 19:45:24 +0000 (15:45 -0400)
committerAnna Schumaker <Anna.Schumaker@Netapp.com>
Mon, 8 Jul 2024 17:47:24 +0000 (13:47 -0400)
Commit e87a911fed07 ("nvme-rdma: use ib_client API to detect device
removal") explains the benefits of handling device removal outside
of the CM event handler.

Sketch in an IB device removal notification mechanism that can be
used by both the client and server side RPC-over-RDMA transport
implementations.

Suggested-by: Sagi Grimberg <sagi@grimberg.me>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
include/linux/sunrpc/rdma_rn.h [new file with mode: 0644]
include/trace/events/rpcrdma.h
net/sunrpc/xprtrdma/Makefile
net/sunrpc/xprtrdma/ib_client.c [new file with mode: 0644]
net/sunrpc/xprtrdma/module.c

diff --git a/include/linux/sunrpc/rdma_rn.h b/include/linux/sunrpc/rdma_rn.h
new file mode 100644 (file)
index 0000000..7d032ca
--- /dev/null
@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * * Copyright (c) 2024, Oracle and/or its affiliates.
+ */
+
+#ifndef _LINUX_SUNRPC_RDMA_RN_H
+#define _LINUX_SUNRPC_RDMA_RN_H
+
+#include <rdma/ib_verbs.h>
+
+/**
+ * rpcrdma_notification - request removal notification
+ */
+struct rpcrdma_notification {
+       void                    (*rn_done)(struct rpcrdma_notification *rn);
+       u32                     rn_index;
+};
+
+int rpcrdma_rn_register(struct ib_device *device,
+                       struct rpcrdma_notification *rn,
+                       void (*done)(struct rpcrdma_notification *rn));
+void rpcrdma_rn_unregister(struct ib_device *device,
+                          struct rpcrdma_notification *rn);
+int rpcrdma_ib_client_register(void);
+void rpcrdma_ib_client_unregister(void);
+
+#endif /* _LINUX_SUNRPC_RDMA_RN_H */
index 14392652273ad5f23c55a06c6896cf8ae2bb59f1..ecdaf088219daf5f1c6f887d536ab80fc3642be9 100644 (file)
@@ -2220,6 +2220,40 @@ TRACE_EVENT(svcrdma_sq_post_err,
        )
 );
 
+DECLARE_EVENT_CLASS(rpcrdma_client_device_class,
+       TP_PROTO(
+               const struct ib_device *device
+       ),
+
+       TP_ARGS(device),
+
+       TP_STRUCT__entry(
+               __string(name, device->name)
+       ),
+
+       TP_fast_assign(
+               __assign_str(name);
+       ),
+
+       TP_printk("device=%s",
+               __get_str(name)
+       )
+);
+
+#define DEFINE_CLIENT_DEVICE_EVENT(name)                               \
+       DEFINE_EVENT(rpcrdma_client_device_class, name,                 \
+               TP_PROTO(                                               \
+                       const struct ib_device *device                  \
+               ),                                                      \
+               TP_ARGS(device)                                         \
+       )
+
+DEFINE_CLIENT_DEVICE_EVENT(rpcrdma_client_completion);
+DEFINE_CLIENT_DEVICE_EVENT(rpcrdma_client_add_one);
+DEFINE_CLIENT_DEVICE_EVENT(rpcrdma_client_remove_one);
+DEFINE_CLIENT_DEVICE_EVENT(rpcrdma_client_wait_on);
+DEFINE_CLIENT_DEVICE_EVENT(rpcrdma_client_remove_one_done);
+
 #endif /* _TRACE_RPCRDMA_H */
 
 #include <trace/define_trace.h>
index 55b21bae866db88e3f9936b8f375b24db8a776c8..3232aa23cdb49612b34f40d67b61f5a797634685 100644 (file)
@@ -1,7 +1,7 @@
 # SPDX-License-Identifier: GPL-2.0
 obj-$(CONFIG_SUNRPC_XPRT_RDMA) += rpcrdma.o
 
-rpcrdma-y := transport.o rpc_rdma.o verbs.o frwr_ops.o \
+rpcrdma-y := transport.o rpc_rdma.o verbs.o frwr_ops.o ib_client.o \
        svc_rdma.o svc_rdma_backchannel.o svc_rdma_transport.o \
        svc_rdma_sendto.o svc_rdma_recvfrom.o svc_rdma_rw.o \
        svc_rdma_pcl.o module.o
diff --git a/net/sunrpc/xprtrdma/ib_client.c b/net/sunrpc/xprtrdma/ib_client.c
new file mode 100644 (file)
index 0000000..a938c19
--- /dev/null
@@ -0,0 +1,181 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
+/*
+ * Copyright (c) 2024 Oracle.  All rights reserved.
+ */
+
+/* #include <linux/module.h>
+#include <linux/slab.h> */
+#include <linux/xarray.h>
+#include <linux/types.h>
+#include <linux/kref.h>
+#include <linux/completion.h>
+
+#include <linux/sunrpc/svc_rdma.h>
+#include <linux/sunrpc/rdma_rn.h>
+
+#include "xprt_rdma.h"
+#include <trace/events/rpcrdma.h>
+
+/* Per-ib_device private data for rpcrdma */
+struct rpcrdma_device {
+       struct kref             rd_kref;
+       unsigned long           rd_flags;
+       struct ib_device        *rd_device;
+       struct xarray           rd_xa;
+       struct completion       rd_done;
+};
+
+#define RPCRDMA_RD_F_REMOVING  (0)
+
+static struct ib_client rpcrdma_ib_client;
+
+/*
+ * Listeners have no associated device, so we never register them.
+ * Note that ib_get_client_data() does not check if @device is
+ * NULL for us.
+ */
+static struct rpcrdma_device *rpcrdma_get_client_data(struct ib_device *device)
+{
+       if (!device)
+               return NULL;
+       return ib_get_client_data(device, &rpcrdma_ib_client);
+}
+
+/**
+ * rpcrdma_rn_register - register to get device removal notifications
+ * @device: device to monitor
+ * @rn: notification object that wishes to be notified
+ * @done: callback to notify caller of device removal
+ *
+ * Returns zero on success. The callback in rn_done is guaranteed
+ * to be invoked when the device is removed, unless this notification
+ * is unregistered first.
+ *
+ * On failure, a negative errno is returned.
+ */
+int rpcrdma_rn_register(struct ib_device *device,
+                       struct rpcrdma_notification *rn,
+                       void (*done)(struct rpcrdma_notification *rn))
+{
+       struct rpcrdma_device *rd = rpcrdma_get_client_data(device);
+
+       if (!rd || test_bit(RPCRDMA_RD_F_REMOVING, &rd->rd_flags))
+               return -ENETUNREACH;
+
+       kref_get(&rd->rd_kref);
+       if (xa_alloc(&rd->rd_xa, &rn->rn_index, rn, xa_limit_32b, GFP_KERNEL) < 0)
+               return -ENOMEM;
+       rn->rn_done = done;
+       return 0;
+}
+
+static void rpcrdma_rn_release(struct kref *kref)
+{
+       struct rpcrdma_device *rd = container_of(kref, struct rpcrdma_device,
+                                                rd_kref);
+
+       trace_rpcrdma_client_completion(rd->rd_device);
+       complete(&rd->rd_done);
+}
+
+/**
+ * rpcrdma_rn_unregister - stop device removal notifications
+ * @device: monitored device
+ * @rn: notification object that no longer wishes to be notified
+ */
+void rpcrdma_rn_unregister(struct ib_device *device,
+                          struct rpcrdma_notification *rn)
+{
+       struct rpcrdma_device *rd = rpcrdma_get_client_data(device);
+
+       if (!rd)
+               return;
+
+       xa_erase(&rd->rd_xa, rn->rn_index);
+       kref_put(&rd->rd_kref, rpcrdma_rn_release);
+}
+
+/**
+ * rpcrdma_add_one - ib_client device insertion callback
+ * @device: device about to be inserted
+ *
+ * Returns zero on success. xprtrdma private data has been allocated
+ * for this device. On failure, a negative errno is returned.
+ */
+static int rpcrdma_add_one(struct ib_device *device)
+{
+       struct rpcrdma_device *rd;
+
+       rd = kzalloc(sizeof(*rd), GFP_KERNEL);
+       if (!rd)
+               return -ENOMEM;
+
+       kref_init(&rd->rd_kref);
+       xa_init_flags(&rd->rd_xa, XA_FLAGS_ALLOC1);
+       rd->rd_device = device;
+       init_completion(&rd->rd_done);
+       ib_set_client_data(device, &rpcrdma_ib_client, rd);
+
+       trace_rpcrdma_client_add_one(device);
+       return 0;
+}
+
+/**
+ * rpcrdma_remove_one - ib_client device removal callback
+ * @device: device about to be removed
+ * @client_data: this module's private per-device data
+ *
+ * Upon return, all transports associated with @device have divested
+ * themselves from IB hardware resources.
+ */
+static void rpcrdma_remove_one(struct ib_device *device,
+                              void *client_data)
+{
+       struct rpcrdma_device *rd = client_data;
+       struct rpcrdma_notification *rn;
+       unsigned long index;
+
+       trace_rpcrdma_client_remove_one(device);
+
+       set_bit(RPCRDMA_RD_F_REMOVING, &rd->rd_flags);
+       xa_for_each(&rd->rd_xa, index, rn)
+               rn->rn_done(rn);
+
+       /*
+        * Wait only if there are still outstanding notification
+        * registrants for this device.
+        */
+       if (!refcount_dec_and_test(&rd->rd_kref.refcount)) {
+               trace_rpcrdma_client_wait_on(device);
+               wait_for_completion(&rd->rd_done);
+       }
+
+       trace_rpcrdma_client_remove_one_done(device);
+       kfree(rd);
+}
+
+static struct ib_client rpcrdma_ib_client = {
+       .name           = "rpcrdma",
+       .add            = rpcrdma_add_one,
+       .remove         = rpcrdma_remove_one,
+};
+
+/**
+ * rpcrdma_ib_client_unregister - unregister ib_client for xprtrdma
+ *
+ * cel: watch for orphaned rpcrdma_device objects on module unload
+ */
+void rpcrdma_ib_client_unregister(void)
+{
+       ib_unregister_client(&rpcrdma_ib_client);
+}
+
+/**
+ * rpcrdma_ib_client_register - register ib_client for rpcrdma
+ *
+ * Returns zero on success, or a negative errno.
+ */
+int rpcrdma_ib_client_register(void)
+{
+       return ib_register_client(&rpcrdma_ib_client);
+}
index 45c5b41ac8dc90e0043d4f795c23a42afe4ac6ad..697f571d4c0189198ff995243185ee1af628afa3 100644 (file)
@@ -11,6 +11,7 @@
 #include <linux/module.h>
 #include <linux/init.h>
 #include <linux/sunrpc/svc_rdma.h>
+#include <linux/sunrpc/rdma_rn.h>
 
 #include <asm/swab.h>
 
@@ -30,21 +31,32 @@ static void __exit rpc_rdma_cleanup(void)
 {
        xprt_rdma_cleanup();
        svc_rdma_cleanup();
+       rpcrdma_ib_client_unregister();
 }
 
 static int __init rpc_rdma_init(void)
 {
        int rc;
 
+       rc = rpcrdma_ib_client_register();
+       if (rc)
+               goto out_rc;
+
        rc = svc_rdma_init();
        if (rc)
-               goto out;
+               goto out_ib_client;
 
        rc = xprt_rdma_init();
        if (rc)
-               svc_rdma_cleanup();
+               goto out_svc_rdma;
 
-out:
+       return 0;
+
+out_svc_rdma:
+       svc_rdma_cleanup();
+out_ib_client:
+       rpcrdma_ib_client_unregister();
+out_rc:
        return rc;
 }