From: Zach Brown Date: Fri, 3 Feb 2012 16:07:18 +0000 (-0500) Subject: RDS: fix races and other problems with rmmod and device removal X-Git-Tag: v4.1.12-92~319^2^2~2^2~78 X-Git-Url: https://www.infradead.org/git/?a=commitdiff_plain;h=206b5d73954cb03e69a7c052aa5655b62d563e8f;p=users%2Fjedix%2Flinux-maple.git RDS: fix races and other problems with rmmod and device removal Signed-off-by: Chris Mason Signed-off-by: Bang Nguyen --- diff --git a/net/rds/ib.c b/net/rds/ib.c index 0649e72f231e..b3aabfa3ed7a 100644 --- a/net/rds/ib.c +++ b/net/rds/ib.c @@ -350,6 +350,7 @@ void rds_ib_exit(void) rds_ib_sysctl_exit(); rds_ib_recv_exit(); rds_trans_unregister(&rds_ib_transport); + rds_ib_fmr_exit(); } struct rds_transport rds_ib_transport = { @@ -385,15 +386,21 @@ int __init rds_ib_init(void) INIT_LIST_HEAD(&rds_ib_devices); - ret = ib_register_client(&rds_ib_client); + ret = rds_ib_fmr_init(); if (ret) goto out; + ret = ib_register_client(&rds_ib_client); + if (ret) + goto out_fmr_exit; + ret = rds_ib_sysctl_init(); if (ret) goto out_ibreg; - rds_ib_recv_init(); + ret = rds_ib_recv_init(); + if (ret) + goto out_sysctl; ret = rds_trans_register(&rds_ib_transport); if (ret) @@ -405,9 +412,12 @@ int __init rds_ib_init(void) out_recv: rds_ib_recv_exit(); +out_sysctl: rds_ib_sysctl_exit(); out_ibreg: rds_ib_unregister_client(); +out_fmr_exit: + rds_ib_fmr_exit(); out: return ret; } diff --git a/net/rds/ib.h b/net/rds/ib.h index 91fc430cd328..8963a6395e0e 100644 --- a/net/rds/ib.h +++ b/net/rds/ib.h @@ -325,9 +325,11 @@ void *rds_ib_get_mr(struct scatterlist *sg, unsigned long nents, void rds_ib_sync_mr(void *trans_private, int dir); void rds_ib_free_mr(void *trans_private, int invalidate); void rds_ib_flush_mrs(void); +int __init rds_ib_fmr_init(void); +void __exit rds_ib_fmr_exit(void); /* ib_recv.c */ -void __init rds_ib_recv_init(void); +int __init rds_ib_recv_init(void); void rds_ib_recv_exit(void); int rds_ib_recv(struct rds_connection *conn); int rds_ib_recv_alloc_caches(struct rds_ib_connection *ic); diff --git a/net/rds/ib_rdma.c b/net/rds/ib_rdma.c index d4303a730c41..c835c07de4b3 100644 --- a/net/rds/ib_rdma.c +++ b/net/rds/ib_rdma.c @@ -689,6 +689,26 @@ out_nolock: return ret; } +struct workqueue_struct *rds_ib_fmr_wq; + +int __init rds_ib_fmr_init(void) +{ + rds_ib_fmr_wq = create_workqueue("rds_fmr_flushd"); + if (!rds_ib_fmr_wq) + return -ENOMEM; + return 0; +} + +/* + * By the time this is called all the IB devices should have been torn down and + * had their pools freed. As each pool is freed its work struct is waited on, + * so the pool flushing work queue should be idle by the time we get here. + */ +void __exit rds_ib_fmr_exit(void) +{ + destroy_workqueue(rds_ib_fmr_wq); +} + static void rds_ib_mr_pool_flush_worker(struct work_struct *work) { struct rds_ib_mr_pool *pool = container_of(work, struct rds_ib_mr_pool, flush_worker.work); @@ -716,7 +736,7 @@ void rds_ib_free_mr(void *trans_private, int invalidate) /* If we've pinned too many pages, request a flush */ if (atomic_read(&pool->free_pinned) >= pool->max_free_pinned || atomic_read(&pool->dirty_count) >= pool->max_items / 10) - queue_delayed_work(rds_wq, &pool->flush_worker, 10); + queue_delayed_work(rds_ib_fmr_wq, &pool->flush_worker, 10); if (invalidate) { if (likely(!in_interrupt())) { @@ -724,7 +744,8 @@ void rds_ib_free_mr(void *trans_private, int invalidate) } else { /* We get here if the user created a MR marked * as use_once and invalidate at the same time. */ - queue_delayed_work(rds_wq, &pool->flush_worker, 10); + queue_delayed_work(rds_ib_fmr_wq, + &pool->flush_worker, 10); } } diff --git a/net/rds/ib_recv.c b/net/rds/ib_recv.c index 5131796dab57..a9a02e9b4505 100644 --- a/net/rds/ib_recv.c +++ b/net/rds/ib_recv.c @@ -939,23 +939,22 @@ void rds_ib_recv_cqe_handler(struct rds_ib_connection *ic, ib_dma_unmap_sg(ic->i_cm_id->device, &recv->r_frag->f_sg, 1, DMA_FROM_DEVICE); - /* - * Also process recvs in connecting state because it is possible - * to get a recv completion _before_ the rdmacm ESTABLISHED - * event is processed. - */ - if (rds_conn_up(conn) || rds_conn_connecting(conn)) { + if (wc->status == IB_WC_SUCCESS) { + rds_ib_process_recv(conn, recv, wc->byte_len, state); + } else { /* We expect errors as the qp is drained during shutdown */ - if (wc->status == IB_WC_SUCCESS) { - rds_ib_process_recv(conn, recv, wc->byte_len, state); - } else { + if (rds_conn_up(conn) || rds_conn_connecting(conn)) rds_ib_conn_error(conn, "recv completion on " "%pI4 had status %u, disconnecting and " "reconnecting\n", &conn->c_faddr, wc->status); - } } + /* + * It's very important that we only free this ring entry if we've truly + * freed the resources allocated to the entry. The refilling path can + * leak if we don't. + */ rds_ib_ring_free(&ic->i_recv_ring, 1); /* If we ever end up with a really empty receive ring, we're @@ -980,7 +979,7 @@ int rds_ib_recv(struct rds_connection *conn) return ret; } -void __init rds_ib_recv_init(void) +int __init rds_ib_recv_init(void) { struct sysinfo si; @@ -990,10 +989,19 @@ void __init rds_ib_recv_init(void) rds_ib_incoming_slab = kmem_cache_create("rds_ib_incoming", sizeof(struct rds_ib_incoming), - SLAB_HWCACHE_ALIGN|SLAB_PANIC, 0, NULL); + SLAB_HWCACHE_ALIGN, 0, NULL); + if (!rds_ib_incoming_slab) + return -ENOMEM; + rds_ib_frag_slab = kmem_cache_create("rds_ib_frag", sizeof(struct rds_page_frag), - SLAB_HWCACHE_ALIGN|SLAB_PANIC, 0, NULL); + SLAB_HWCACHE_ALIGN, 0, NULL); + if (!rds_ib_frag_slab) { + kmem_cache_destroy(rds_ib_incoming_slab); + rds_ib_incoming_slab = NULL; + return -ENOMEM; + } + return 0; } void rds_ib_recv_exit(void) diff --git a/net/rds/threads.c b/net/rds/threads.c index 2bab9bf07b91..c08c220efac5 100644 --- a/net/rds/threads.c +++ b/net/rds/threads.c @@ -214,7 +214,7 @@ void rds_threads_exit(void) int __init rds_threads_init(void) { - rds_wq = create_workqueue("krdsd"); + rds_wq = create_singlethread_workqueue("krdsd"); if (!rds_wq) return -ENOMEM;