From: Zach Brown Date: Thu, 15 Jul 2010 19:34:33 +0000 (-0700) Subject: RDS/IB: protect the list of IB devices X-Git-Tag: v4.1.12-92~319^2^2~2^2~71 X-Git-Url: https://www.infradead.org/git/?a=commitdiff_plain;h=7dd4dba0bb6cb531fd30fa57976011e53cf06090;p=users%2Fjedix%2Flinux-maple.git RDS/IB: protect the list of IB devices The RDS IB device list wasn't protected by any locking. Traversal in both the get_mr and FMR flushing paths could race with additon and removal. List manipulation is done with RCU primatives and is protected by the write side of a rwsem. The list traversal in the get_mr fast path is protected by a rcu read critical section. The FMR list traversal is more problematic because it can block while traversing the list. We protect this with the read side of the rwsem. Signed-off-by: Zach Brown Signed-off-by: Chris Mason Signed-off-by: Bang Nguyen --- diff --git a/net/rds/ib.c b/net/rds/ib.c index f17c8d53130a..ca84d32640f0 100644 --- a/net/rds/ib.c +++ b/net/rds/ib.c @@ -52,6 +52,12 @@ MODULE_PARM_DESC(fmr_message_size, " Max size of a RDMA transfer"); module_param(rds_ib_retry_count, int, 0444); MODULE_PARM_DESC(rds_ib_retry_count, " Number of hw retries before reporting an error"); +/* + * we have a clumsy combination of RCU and a rwsem protecting this list + * because it is used both in the get_mr fast path and while blocking in + * the FMR flushing path. + */ +DECLARE_RWSEM(rds_ib_devices_lock); struct list_head rds_ib_devices; /* NOTE: if also grabbing ibdev lock, grab this first */ @@ -170,7 +176,10 @@ void rds_ib_add_one(struct ib_device *device) INIT_LIST_HEAD(&rds_ibdev->ipaddr_list); INIT_LIST_HEAD(&rds_ibdev->conn_list); - list_add_tail(&rds_ibdev->list, &rds_ib_devices); + + down_write(&rds_ib_devices_lock); + list_add_tail_rcu(&rds_ibdev->list, &rds_ib_devices); + up_write(&rds_ib_devices_lock); atomic_inc(&rds_ibdev->refcount); ib_set_client_data(device, &rds_ib_client, rds_ibdev); @@ -229,16 +238,20 @@ void rds_ib_remove_one(struct ib_device *device) rds_ib_dev_shutdown(rds_ibdev); + /* stop connection attempts from getting a reference to this device. */ + ib_set_client_data(device, &rds_ib_client, NULL); + + down_write(&rds_ib_devices_lock); + list_del_rcu(&rds_ibdev->list); + up_write(&rds_ib_devices_lock); + /* - * prevent future connection attempts from getting a reference to this - * device and wait for currently racing connection attempts to finish - * getting their reference + * This synchronize rcu is waiting for readers of both the ib + * client data and the devices list to finish before we drop + * both of those references. */ - ib_set_client_data(device, &rds_ib_client, NULL); synchronize_rcu(); rds_ib_dev_put(rds_ibdev); - - list_del(&rds_ibdev->list); rds_ib_dev_put(rds_ibdev); } diff --git a/net/rds/ib.h b/net/rds/ib.h index e9b103d2abb7..0eeebcd06427 100644 --- a/net/rds/ib.h +++ b/net/rds/ib.h @@ -24,6 +24,7 @@ #define RDS_IB_RECYCLE_BATCH_COUNT 32 #define RDS_WC_MAX 32 +extern struct rw_semaphore rds_ib_devices_lock; extern struct list_head rds_ib_devices; /* diff --git a/net/rds/ib_rdma.c b/net/rds/ib_rdma.c index eb1ffe1be63b..8f259604606d 100644 --- a/net/rds/ib_rdma.c +++ b/net/rds/ib_rdma.c @@ -93,8 +93,8 @@ static struct rds_ib_device *rds_ib_get_device(__be32 ipaddr) struct rds_ib_device *rds_ibdev; struct rds_ib_ipaddr *i_ipaddr; - list_for_each_entry(rds_ibdev, &rds_ib_devices, list) { - rcu_read_lock(); + rcu_read_lock(); + list_for_each_entry_rcu(rds_ibdev, &rds_ib_devices, list) { list_for_each_entry_rcu(i_ipaddr, &rds_ibdev->ipaddr_list, list) { if (i_ipaddr->ipaddr == ipaddr) { atomic_inc(&rds_ibdev->refcount); @@ -102,8 +102,8 @@ static struct rds_ib_device *rds_ib_get_device(__be32 ipaddr) return rds_ibdev; } } - rcu_read_unlock(); } + rcu_read_unlock(); return NULL; } @@ -759,12 +759,14 @@ void rds_ib_flush_mrs(void) { struct rds_ib_device *rds_ibdev; + down_read(&rds_ib_devices_lock); list_for_each_entry(rds_ibdev, &rds_ib_devices, list) { struct rds_ib_mr_pool *pool = rds_ibdev->mr_pool; if (pool) rds_ib_flush_mr_pool(pool, 0, NULL); } + up_read(&rds_ib_devices_lock); } void *rds_ib_get_mr(struct scatterlist *sg, unsigned long nents,