]> www.infradead.org Git - users/jedix/linux-maple.git/commitdiff
RDS: double free rdma_cm_id
authorBang Nguyen <bang.nguyen@oracle.com>
Tue, 20 Aug 2013 14:27:21 +0000 (07:27 -0700)
committerMukesh Kacker <mukesh.kacker@oracle.com>
Wed, 8 Jul 2015 20:59:45 +0000 (13:59 -0700)
RDS currently offloads rdma_destroy_id() to an aux thread as part of the
connection shutdown. This was to workaround a bug in which rdma_destroy_id()
could block and cause RDS reconnect to hang. By queuing the rdma_destroy_id()
work, we unfortunately open up a timing window in which the pending
CMA_ADDR_QUERY request might not get canceled right away and race with
rdma_destroy_id().

In this case, rdma_destroyed_id() gets called and frees the cm id. Then,
CMA_ADDR_QUERY completes and calls RDS event handler which calls
rds_resolve_route on the destroyed cm id. The event handler returns failure
which causes RDMA CM to call rdma_destroy_id() again on the same cm id!
Hence the problem.

Since the rdma_destroy_id() bug has been fixed by MLX to offload the blocking
operation to the worker thread, RDS no longer needs to queue up
rdma_destroy_id(). This closes up the window above and fixes the problem.

Orabug: 17192816

Signed-off-by: Richard Frank <richard.frank@oracle.com>
(cherry picked from commit 3fec98717bf926d869d049e17baad849d1ba7d78)

net/rds/ib.h
net/rds/ib_cm.c

index e3a6c422cbc9121434b738b888a8950d566cd10b..823ec806b48f2757d7860462dbdc29e31b175a5c 100644 (file)
@@ -137,11 +137,6 @@ struct rds_ib_path {
        union ib_gid    p_dgid;
 };
 
-struct rds_ib_destroy_id_work {
-       struct delayed_work             work;
-       struct rdma_cm_id               *cm_id;
-};
-
 struct rds_ib_migrate_work {
        struct delayed_work             work;
        struct rds_ib_connection        *ic;
index 11e2a5f831fa18a2bcc46613d384ef0a1fdac743..8be0141b4b56220708232349ef5cf3a43dda0848 100644 (file)
@@ -1002,22 +1002,10 @@ void rds_ib_check_migration(struct rds_connection *conn,
 }
 #endif
 
-static void rds_ib_destroy_id(struct work_struct *_work)
-{
-       struct rds_ib_destroy_id_work *work =
-               container_of(_work, struct rds_ib_destroy_id_work, work.work);
-       struct rdma_cm_id        *cm_id = work->cm_id;
-
-       rdma_destroy_id(cm_id);
-
-       kfree(work);
-}
-
 int rds_ib_conn_connect(struct rds_connection *conn)
 {
        struct rds_ib_connection *ic = conn->c_transport_data;
        struct sockaddr_in src, dest;
-       struct rds_ib_destroy_id_work *work;
        int ret;
 
        /* XXX I wonder what affect the port space has */
@@ -1047,13 +1035,7 @@ int rds_ib_conn_connect(struct rds_connection *conn)
        if (ret) {
                rdsdebug("addr resolve failed for cm id %p: %d\n", ic->i_cm_id,
                         ret);
-               work = kzalloc(sizeof *work, GFP_KERNEL);
-               if (work) {
-                       work->cm_id = ic->i_cm_id;
-                       INIT_DELAYED_WORK(&work->work, rds_ib_destroy_id);
-                       queue_delayed_work(rds_aux_wq, &work->work, 0);
-               } else
-                       rdma_destroy_id(ic->i_cm_id);
+               rdma_destroy_id(ic->i_cm_id);
 
                ic->i_cm_id = NULL;
        }
@@ -1070,7 +1052,6 @@ out:
 void rds_ib_conn_shutdown(struct rds_connection *conn)
 {
        struct rds_ib_connection *ic = conn->c_transport_data;
-       struct rds_ib_destroy_id_work *work;
        int err = 0;
 
        rdsdebug("cm %p pd %p cq %p qp %p\n", ic->i_cm_id,
@@ -1149,17 +1130,7 @@ void rds_ib_conn_shutdown(struct rds_connection *conn)
                if (ic->i_recvs)
                        rds_ib_recv_clear_ring(ic);
 
-               /*
-                * rdma_destroy_id may block so offload it to the aux
-                * thread for processing.
-                */
-               work = kzalloc(sizeof *work, GFP_KERNEL);
-               if (work) {
-                       work->cm_id = ic->i_cm_id;
-                       INIT_DELAYED_WORK(&work->work, rds_ib_destroy_id);
-                       queue_delayed_work(rds_aux_wq, &work->work, 0);
-               } else
-                       rdma_destroy_id(ic->i_cm_id);
+               rdma_destroy_id(ic->i_cm_id);
 
                /*
                 * Move connection back to the nodev list.