From a398103c605f69164d3df78a7b35b974cae4d6b7 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 3 Feb 2012 11:09:23 -0500 Subject: [PATCH] RDS: don't use RCU for the bind hash table RCU delays are making socket shutdown too slow. Switch to a reader/writer lock so that we don't risk ooming as we wait for sockets to free Signed-off-by: Chris Mason Signed-off-by: Bang Nguyen --- net/rds/af_rds.c | 20 +------------------- net/rds/bind.c | 34 ++++++++++++++++------------------ net/rds/ib.h | 1 + net/rds/ib_rdma.c | 12 ++++++++---- 4 files changed, 26 insertions(+), 41 deletions(-) diff --git a/net/rds/af_rds.c b/net/rds/af_rds.c index 63afc6354bab..33ca87138580 100644 --- a/net/rds/af_rds.c +++ b/net/rds/af_rds.c @@ -78,13 +78,7 @@ static int rds_release(struct socket *sock) rds_clear_recv_queue(rs); rds_cong_remove_socket(rs); - /* - * the binding lookup hash uses rcu, we need to - * make sure we sychronize_rcu before we free our - * entry - */ rds_remove_bound(rs); - synchronize_rcu(); rds_send_drop_to(rs, NULL); rds_rdma_drop_keys(rs); @@ -98,19 +92,7 @@ static int rds_release(struct socket *sock) rds_trans_put(rs->rs_transport); sock->sk = NULL; - if ((atomic_read(&sk->sk_refcnt) == 0)) { - printk(KERN_CRIT "zero refcnt on sock put release\n"); - WARN_ON(1); - } - - if (atomic_dec_and_test(&sk->sk_refcnt)) { - if (rs->poison != 0xABABABAB) { - printk(KERN_CRIT "bad poison on put release %x\n", rs->poison); - WARN_ON(1); - } - rs->poison = 0xDEADBEEF; - sk_free(sk); - } + debug_sock_put(sk); out: return 0; } diff --git a/net/rds/bind.c b/net/rds/bind.c index 4f0644c0e86b..a9099cc5fcaa 100644 --- a/net/rds/bind.c +++ b/net/rds/bind.c @@ -39,7 +39,7 @@ #define BIND_HASH_SIZE 1024 static struct hlist_head bind_hash_table[BIND_HASH_SIZE]; -static DEFINE_SPINLOCK(rds_bind_lock); +static DEFINE_RWLOCK(rds_bind_lock); static struct hlist_head *hash_to_bucket(__be32 addr, __be16 port) { @@ -47,6 +47,9 @@ static struct hlist_head *hash_to_bucket(__be32 addr, __be16 port) (BIND_HASH_SIZE - 1)); } +/* + * must hold either read or write lock (write lock for insert != NULL) + */ static struct rds_sock *rds_bind_lookup(__be32 addr, __be16 port, struct rds_sock *insert) { @@ -56,31 +59,26 @@ static struct rds_sock *rds_bind_lookup(__be32 addr, __be16 port, u64 cmp; u64 needle = ((u64)be32_to_cpu(addr) << 32) | be16_to_cpu(port); - rcu_read_lock(); - hlist_for_each_entry_rcu(rs, node, head, rs_bound_node) { + hlist_for_each_entry(rs, node, head, rs_bound_node) { cmp = ((u64)be32_to_cpu(rs->rs_bound_addr) << 32) | be16_to_cpu(rs->rs_bound_port); if (cmp == needle) { rds_sock_addref(rs); - rcu_read_unlock(); return rs; } } - rcu_read_unlock(); if (insert) { /* * make sure our addr and port are set before - * we are added to the list, other people - * in rcu will find us as soon as the - * hlist_add_head_rcu is done + * we are added to the list. */ insert->rs_bound_addr = addr; insert->rs_bound_port = port; rds_sock_addref(insert); - hlist_add_head_rcu(&insert->rs_bound_node, head); + hlist_add_head(&insert->rs_bound_node, head); } return NULL; } @@ -94,8 +92,11 @@ static struct rds_sock *rds_bind_lookup(__be32 addr, __be16 port, struct rds_sock *rds_find_bound(__be32 addr, __be16 port) { struct rds_sock *rs; + unsigned long flags; + read_lock_irqsave(&rds_bind_lock, flags); rs = rds_bind_lookup(addr, port, NULL); + read_unlock_irqrestore(&rds_bind_lock, flags); if (rs && sock_flag(rds_rs_to_sk(rs), SOCK_DEAD)) { rds_sock_put(rs); @@ -104,6 +105,7 @@ struct rds_sock *rds_find_bound(__be32 addr, __be16 port) rdsdebug("returning rs %p for %pI4:%u\n", rs, &addr, ntohs(port)); + return rs; } @@ -122,7 +124,7 @@ static int rds_add_bound(struct rds_sock *rs, __be32 addr, __be16 *port) last = rover - 1; } - spin_lock_irqsave(&rds_bind_lock, flags); + write_lock_irqsave(&rds_bind_lock, flags); do { struct rds_sock *rrs; @@ -139,7 +141,7 @@ static int rds_add_bound(struct rds_sock *rs, __be32 addr, __be16 *port) rds_sock_put(rrs); } while (rover++ != last); - spin_unlock_irqrestore(&rds_bind_lock, flags); + write_unlock_irqrestore(&rds_bind_lock, flags); return ret; } @@ -148,19 +150,19 @@ void rds_remove_bound(struct rds_sock *rs) { unsigned long flags; - spin_lock_irqsave(&rds_bind_lock, flags); + write_lock_irqsave(&rds_bind_lock, flags); if (rs->rs_bound_addr) { rdsdebug("rs %p unbinding from %pI4:%d\n", rs, &rs->rs_bound_addr, ntohs(rs->rs_bound_port)); - hlist_del_init_rcu(&rs->rs_bound_node); + hlist_del_init(&rs->rs_bound_node); rds_sock_put(rs); rs->rs_bound_addr = 0; } - spin_unlock_irqrestore(&rds_bind_lock, flags); + write_unlock_irqrestore(&rds_bind_lock, flags); } int rds_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) @@ -200,9 +202,5 @@ int rds_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) out: release_sock(sk); - - /* we might have called rds_remove_bound on error */ - if (ret) - synchronize_rcu(); return ret; } diff --git a/net/rds/ib.h b/net/rds/ib.h index a75f2ad011e9..f0b7c4e1bc6e 100644 --- a/net/rds/ib.h +++ b/net/rds/ib.h @@ -185,6 +185,7 @@ struct rds_ib_connection { struct rds_ib_ipaddr { struct list_head list; __be32 ipaddr; + struct rcu_head rcu_head; }; struct rds_ib_device { diff --git a/net/rds/ib_rdma.c b/net/rds/ib_rdma.c index 6917322e0cd0..504fcb501123 100644 --- a/net/rds/ib_rdma.c +++ b/net/rds/ib_rdma.c @@ -125,6 +125,12 @@ static int rds_ib_add_ipaddr(struct rds_ib_device *rds_ibdev, __be32 ipaddr) return 0; } +static void ipaddr_free_cb(struct rcu_head *rp) +{ + struct rds_ib_ipaddr *ipaddr = container_of(rp, struct rds_ib_ipaddr, rcu_head); + kfree(ipaddr); +} + static void rds_ib_remove_ipaddr(struct rds_ib_device *rds_ibdev, __be32 ipaddr) { struct rds_ib_ipaddr *i_ipaddr; @@ -141,10 +147,8 @@ static void rds_ib_remove_ipaddr(struct rds_ib_device *rds_ibdev, __be32 ipaddr) } spin_unlock_irq(&rds_ibdev->spinlock); - if (to_free) { - synchronize_rcu(); - kfree(to_free); - } + if (to_free) + call_rcu(&to_free->rcu_head, ipaddr_free_cb); } int rds_ib_update_ipaddr(struct rds_ib_device *rds_ibdev, __be32 ipaddr) -- 2.50.1