RDS: fix races and other problems with rmmod and device removal

author Zach Brown <zach.brown@oracle.com>

Fri, 3 Feb 2012 16:07:18 +0000 (11:07 -0500)

committer Mukesh Kacker <mukesh.kacker@oracle.com>

Tue, 7 Jul 2015 23:41:24 +0000 (16:41 -0700)
author Zach Brown <zach.brown@oracle.com>
Fri, 3 Feb 2012 16:07:18 +0000 (11:07 -0500)
committer Mukesh Kacker <mukesh.kacker@oracle.com>
Tue, 7 Jul 2015 23:41:24 +0000 (16:41 -0700)
diff --git a/net/rds/ib.c b/net/rds/ib.c

index 0649e72f231e8e7e4c8d2bc08981609dadf4bd17..b3aabfa3ed7a66102c1e749915d2b95afb6c4a93 100644 (file)
--- a/net/rds/ib.c
+++ b/net/rds/ib.c
@@ -350,6 +350,7 @@ void rds_ib_exit(void)
         rds_ib_sysctl_exit();
         rds_ib_recv_exit();
         rds_trans_unregister(&rds_ib_transport);
+       rds_ib_fmr_exit();
  }
  
  struct rds_transport rds_ib_transport = {
@@ -385,15 +386,21 @@ int __init rds_ib_init(void)
  
         INIT_LIST_HEAD(&rds_ib_devices);
  
-       ret = ib_register_client(&rds_ib_client);
+       ret = rds_ib_fmr_init();
         if (ret)
                 goto out;
  
+       ret = ib_register_client(&rds_ib_client);
+       if (ret)
+               goto out_fmr_exit;
+
         ret = rds_ib_sysctl_init();
         if (ret)
                 goto out_ibreg;
  
-       rds_ib_recv_init();
+       ret = rds_ib_recv_init();
+       if (ret)
+               goto out_sysctl;
  
         ret = rds_trans_register(&rds_ib_transport);
         if (ret)
@@ -405,9 +412,12 @@ int __init rds_ib_init(void)
  
  out_recv:
         rds_ib_recv_exit();
+out_sysctl:
         rds_ib_sysctl_exit();
  out_ibreg:
         rds_ib_unregister_client();
+out_fmr_exit:
+       rds_ib_fmr_exit();
  out:
         return ret;
  }
diff --git a/net/rds/ib.h b/net/rds/ib.h

index 91fc430cd328f73cfa85a2d988ef724b01d5f739..8963a6395e0e6965303c12735534be3a649b6542 100644 (file)
--- a/net/rds/ib.h
+++ b/net/rds/ib.h
@@ -325,9 +325,11 @@ void *rds_ib_get_mr(struct scatterlist *sg, unsigned long nents,
  void rds_ib_sync_mr(void *trans_private, int dir);
  void rds_ib_free_mr(void *trans_private, int invalidate);
  void rds_ib_flush_mrs(void);
+int __init rds_ib_fmr_init(void);
+void __exit rds_ib_fmr_exit(void);
  
  /* ib_recv.c */
-void __init rds_ib_recv_init(void);
+int __init rds_ib_recv_init(void);
  void rds_ib_recv_exit(void);
  int rds_ib_recv(struct rds_connection *conn);
  int rds_ib_recv_alloc_caches(struct rds_ib_connection *ic);
diff --git a/net/rds/ib_rdma.c b/net/rds/ib_rdma.c

index d4303a730c4143a2c9e8a285d72e35a126c6d43b..c835c07de4b3d4a8c316c93862eee5049af9bd0d 100644 (file)
--- a/net/rds/ib_rdma.c
+++ b/net/rds/ib_rdma.c
@@ -689,6 +689,26 @@ out_nolock:
         return ret;
  }
  
+struct workqueue_struct *rds_ib_fmr_wq;
+
+int __init rds_ib_fmr_init(void)
+{
+       rds_ib_fmr_wq = create_workqueue("rds_fmr_flushd");
+       if (!rds_ib_fmr_wq)
+               return -ENOMEM;
+       return 0;
+}
+
+/*
+ * By the time this is called all the IB devices should have been torn down and
+ * had their pools freed.  As each pool is freed its work struct is waited on,
+ * so the pool flushing work queue should be idle by the time we get here.
+ */
+void __exit rds_ib_fmr_exit(void)
+{
+       destroy_workqueue(rds_ib_fmr_wq);
+}
+
  static void rds_ib_mr_pool_flush_worker(struct work_struct *work)
  {
         struct rds_ib_mr_pool *pool = container_of(work, struct rds_ib_mr_pool, flush_worker.work);
@@ -716,7 +736,7 @@ void rds_ib_free_mr(void *trans_private, int invalidate)
         /* If we've pinned too many pages, request a flush */
         if (atomic_read(&pool->free_pinned) >= pool->max_free_pinned
          || atomic_read(&pool->dirty_count) >= pool->max_items / 10)
-               queue_delayed_work(rds_wq, &pool->flush_worker, 10);
+               queue_delayed_work(rds_ib_fmr_wq, &pool->flush_worker, 10);
  
         if (invalidate) {
                 if (likely(!in_interrupt())) {
@@ -724,7 +744,8 @@ void rds_ib_free_mr(void *trans_private, int invalidate)
                 } else {
                         /* We get here if the user created a MR marked
                          * as use_once and invalidate at the same time. */
-                       queue_delayed_work(rds_wq, &pool->flush_worker, 10);
+                       queue_delayed_work(rds_ib_fmr_wq,
+                                          &pool->flush_worker, 10);
                 }
         }
  
diff --git a/net/rds/ib_recv.c b/net/rds/ib_recv.c

index 5131796dab576282aa7352a300c73c4e60802ebb..a9a02e9b4505a219bc60e311e50d045e4a975725 100644 (file)
--- a/net/rds/ib_recv.c
+++ b/net/rds/ib_recv.c
@@ -939,23 +939,22 @@ void rds_ib_recv_cqe_handler(struct rds_ib_connection *ic,
  
         ib_dma_unmap_sg(ic->i_cm_id->device, &recv->r_frag->f_sg, 1, DMA_FROM_DEVICE);
  
-       /*
-        * Also process recvs in connecting state because it is possible
-        * to get a recv completion _before_ the rdmacm ESTABLISHED
-        * event is processed.
-        */
-       if (rds_conn_up(conn) || rds_conn_connecting(conn)) {
+       if (wc->status == IB_WC_SUCCESS) {
+               rds_ib_process_recv(conn, recv, wc->byte_len, state);
+       } else {
                 /* We expect errors as the qp is drained during shutdown */
-               if (wc->status == IB_WC_SUCCESS) {
-                       rds_ib_process_recv(conn, recv, wc->byte_len, state);
-               } else {
+               if (rds_conn_up(conn) || rds_conn_connecting(conn))
                         rds_ib_conn_error(conn, "recv completion on "
                                           "%pI4 had status %u, disconnecting and "
                                           "reconnecting\n", &conn->c_faddr,
                                           wc->status);
-               }
         }
  
+       /*
+        * It's very important that we only free this ring entry if we've truly
+        * freed the resources allocated to the entry.  The refilling path can
+        * leak if we don't.
+        */
         rds_ib_ring_free(&ic->i_recv_ring, 1);
  
         /* If we ever end up with a really empty receive ring, we're
@@ -980,7 +979,7 @@ int rds_ib_recv(struct rds_connection *conn)
         return ret;
  }
  
-void __init rds_ib_recv_init(void)
+int __init rds_ib_recv_init(void)
  {
         struct sysinfo si;
  
@@ -990,10 +989,19 @@ void __init rds_ib_recv_init(void)
  
         rds_ib_incoming_slab = kmem_cache_create("rds_ib_incoming",
                                         sizeof(struct rds_ib_incoming),
-                                       SLAB_HWCACHE_ALIGN|SLAB_PANIC, 0, NULL);
+                                       SLAB_HWCACHE_ALIGN, 0, NULL);
+       if (!rds_ib_incoming_slab)
+               return -ENOMEM;
+
         rds_ib_frag_slab = kmem_cache_create("rds_ib_frag",
                                         sizeof(struct rds_page_frag),
-                                       SLAB_HWCACHE_ALIGN|SLAB_PANIC, 0, NULL);
+                                       SLAB_HWCACHE_ALIGN, 0, NULL);
+       if (!rds_ib_frag_slab) {
+               kmem_cache_destroy(rds_ib_incoming_slab);
+               rds_ib_incoming_slab = NULL;
+               return -ENOMEM;
+       }
+       return 0;
  }
  
  void rds_ib_recv_exit(void)
diff --git a/net/rds/threads.c b/net/rds/threads.c

index 2bab9bf07b91e48536c9202ae98c8b0e225996f2..c08c220efac5cf36023c51ffd0c774dc266b1502 100644 (file)
--- a/net/rds/threads.c
+++ b/net/rds/threads.c
@@ -214,7 +214,7 @@ void rds_threads_exit(void)
  
  int __init rds_threads_init(void)
  {
-       rds_wq = create_workqueue("krdsd");
+       rds_wq = create_singlethread_workqueue("krdsd");
         if (!rds_wq)
                 return -ENOMEM;
author	Zach Brown <zach.brown@oracle.com>
	Fri, 3 Feb 2012 16:07:18 +0000 (11:07 -0500)
committer	Mukesh Kacker <mukesh.kacker@oracle.com>
	Tue, 7 Jul 2015 23:41:24 +0000 (16:41 -0700)
net/rds/ib.c		patch \| blob \| history
net/rds/ib.h		patch \| blob \| history
net/rds/ib_rdma.c		patch \| blob \| history
net/rds/ib_recv.c		patch \| blob \| history
net/rds/threads.c		patch \| blob \| history