]> www.infradead.org Git - users/jedix/linux-maple.git/commitdiff
RDS: fix races and other problems with rmmod and device removal
authorZach Brown <zach.brown@oracle.com>
Fri, 3 Feb 2012 16:07:18 +0000 (11:07 -0500)
committerMukesh Kacker <mukesh.kacker@oracle.com>
Tue, 7 Jul 2015 23:41:24 +0000 (16:41 -0700)
Signed-off-by: Chris Mason <chris.mason@oracle.com>
Signed-off-by: Bang Nguyen <bang.nguyen@oracle.com>
net/rds/ib.c
net/rds/ib.h
net/rds/ib_rdma.c
net/rds/ib_recv.c
net/rds/threads.c

index 0649e72f231e8e7e4c8d2bc08981609dadf4bd17..b3aabfa3ed7a66102c1e749915d2b95afb6c4a93 100644 (file)
@@ -350,6 +350,7 @@ void rds_ib_exit(void)
        rds_ib_sysctl_exit();
        rds_ib_recv_exit();
        rds_trans_unregister(&rds_ib_transport);
+       rds_ib_fmr_exit();
 }
 
 struct rds_transport rds_ib_transport = {
@@ -385,15 +386,21 @@ int __init rds_ib_init(void)
 
        INIT_LIST_HEAD(&rds_ib_devices);
 
-       ret = ib_register_client(&rds_ib_client);
+       ret = rds_ib_fmr_init();
        if (ret)
                goto out;
 
+       ret = ib_register_client(&rds_ib_client);
+       if (ret)
+               goto out_fmr_exit;
+
        ret = rds_ib_sysctl_init();
        if (ret)
                goto out_ibreg;
 
-       rds_ib_recv_init();
+       ret = rds_ib_recv_init();
+       if (ret)
+               goto out_sysctl;
 
        ret = rds_trans_register(&rds_ib_transport);
        if (ret)
@@ -405,9 +412,12 @@ int __init rds_ib_init(void)
 
 out_recv:
        rds_ib_recv_exit();
+out_sysctl:
        rds_ib_sysctl_exit();
 out_ibreg:
        rds_ib_unregister_client();
+out_fmr_exit:
+       rds_ib_fmr_exit();
 out:
        return ret;
 }
index 91fc430cd328f73cfa85a2d988ef724b01d5f739..8963a6395e0e6965303c12735534be3a649b6542 100644 (file)
@@ -325,9 +325,11 @@ void *rds_ib_get_mr(struct scatterlist *sg, unsigned long nents,
 void rds_ib_sync_mr(void *trans_private, int dir);
 void rds_ib_free_mr(void *trans_private, int invalidate);
 void rds_ib_flush_mrs(void);
+int __init rds_ib_fmr_init(void);
+void __exit rds_ib_fmr_exit(void);
 
 /* ib_recv.c */
-void __init rds_ib_recv_init(void);
+int __init rds_ib_recv_init(void);
 void rds_ib_recv_exit(void);
 int rds_ib_recv(struct rds_connection *conn);
 int rds_ib_recv_alloc_caches(struct rds_ib_connection *ic);
index d4303a730c4143a2c9e8a285d72e35a126c6d43b..c835c07de4b3d4a8c316c93862eee5049af9bd0d 100644 (file)
@@ -689,6 +689,26 @@ out_nolock:
        return ret;
 }
 
+struct workqueue_struct *rds_ib_fmr_wq;
+
+int __init rds_ib_fmr_init(void)
+{
+       rds_ib_fmr_wq = create_workqueue("rds_fmr_flushd");
+       if (!rds_ib_fmr_wq)
+               return -ENOMEM;
+       return 0;
+}
+
+/*
+ * By the time this is called all the IB devices should have been torn down and
+ * had their pools freed.  As each pool is freed its work struct is waited on,
+ * so the pool flushing work queue should be idle by the time we get here.
+ */
+void __exit rds_ib_fmr_exit(void)
+{
+       destroy_workqueue(rds_ib_fmr_wq);
+}
+
 static void rds_ib_mr_pool_flush_worker(struct work_struct *work)
 {
        struct rds_ib_mr_pool *pool = container_of(work, struct rds_ib_mr_pool, flush_worker.work);
@@ -716,7 +736,7 @@ void rds_ib_free_mr(void *trans_private, int invalidate)
        /* If we've pinned too many pages, request a flush */
        if (atomic_read(&pool->free_pinned) >= pool->max_free_pinned
         || atomic_read(&pool->dirty_count) >= pool->max_items / 10)
-               queue_delayed_work(rds_wq, &pool->flush_worker, 10);
+               queue_delayed_work(rds_ib_fmr_wq, &pool->flush_worker, 10);
 
        if (invalidate) {
                if (likely(!in_interrupt())) {
@@ -724,7 +744,8 @@ void rds_ib_free_mr(void *trans_private, int invalidate)
                } else {
                        /* We get here if the user created a MR marked
                         * as use_once and invalidate at the same time. */
-                       queue_delayed_work(rds_wq, &pool->flush_worker, 10);
+                       queue_delayed_work(rds_ib_fmr_wq,
+                                          &pool->flush_worker, 10);
                }
        }
 
index 5131796dab576282aa7352a300c73c4e60802ebb..a9a02e9b4505a219bc60e311e50d045e4a975725 100644 (file)
@@ -939,23 +939,22 @@ void rds_ib_recv_cqe_handler(struct rds_ib_connection *ic,
 
        ib_dma_unmap_sg(ic->i_cm_id->device, &recv->r_frag->f_sg, 1, DMA_FROM_DEVICE);
 
-       /*
-        * Also process recvs in connecting state because it is possible
-        * to get a recv completion _before_ the rdmacm ESTABLISHED
-        * event is processed.
-        */
-       if (rds_conn_up(conn) || rds_conn_connecting(conn)) {
+       if (wc->status == IB_WC_SUCCESS) {
+               rds_ib_process_recv(conn, recv, wc->byte_len, state);
+       } else {
                /* We expect errors as the qp is drained during shutdown */
-               if (wc->status == IB_WC_SUCCESS) {
-                       rds_ib_process_recv(conn, recv, wc->byte_len, state);
-               } else {
+               if (rds_conn_up(conn) || rds_conn_connecting(conn))
                        rds_ib_conn_error(conn, "recv completion on "
                                          "%pI4 had status %u, disconnecting and "
                                          "reconnecting\n", &conn->c_faddr,
                                          wc->status);
-               }
        }
 
+       /*
+        * It's very important that we only free this ring entry if we've truly
+        * freed the resources allocated to the entry.  The refilling path can
+        * leak if we don't.
+        */
        rds_ib_ring_free(&ic->i_recv_ring, 1);
 
        /* If we ever end up with a really empty receive ring, we're
@@ -980,7 +979,7 @@ int rds_ib_recv(struct rds_connection *conn)
        return ret;
 }
 
-void __init rds_ib_recv_init(void)
+int __init rds_ib_recv_init(void)
 {
        struct sysinfo si;
 
@@ -990,10 +989,19 @@ void __init rds_ib_recv_init(void)
 
        rds_ib_incoming_slab = kmem_cache_create("rds_ib_incoming",
                                        sizeof(struct rds_ib_incoming),
-                                       SLAB_HWCACHE_ALIGN|SLAB_PANIC, 0, NULL);
+                                       SLAB_HWCACHE_ALIGN, 0, NULL);
+       if (!rds_ib_incoming_slab)
+               return -ENOMEM;
+
        rds_ib_frag_slab = kmem_cache_create("rds_ib_frag",
                                        sizeof(struct rds_page_frag),
-                                       SLAB_HWCACHE_ALIGN|SLAB_PANIC, 0, NULL);
+                                       SLAB_HWCACHE_ALIGN, 0, NULL);
+       if (!rds_ib_frag_slab) {
+               kmem_cache_destroy(rds_ib_incoming_slab);
+               rds_ib_incoming_slab = NULL;
+               return -ENOMEM;
+       }
+       return 0;
 }
 
 void rds_ib_recv_exit(void)
index 2bab9bf07b91e48536c9202ae98c8b0e225996f2..c08c220efac5cf36023c51ffd0c774dc266b1502 100644 (file)
@@ -214,7 +214,7 @@ void rds_threads_exit(void)
 
 int __init rds_threads_init(void)
 {
-       rds_wq = create_workqueue("krdsd");
+       rds_wq = create_singlethread_workqueue("krdsd");
        if (!rds_wq)
                return -ENOMEM;