From: Bang Nguyen Date: Fri, 3 Feb 2012 16:09:49 +0000 (-0500) Subject: RDS: issue warning if re-connect stalling for more than 1 min. X-Git-Tag: v4.1.12-92~319^2^2~2^2~31 X-Git-Url: https://www.infradead.org/git/?a=commitdiff_plain;h=fa31bc82c3ba9e7a436e2090733896a207942418;p=users%2Fjedix%2Flinux-maple.git RDS: issue warning if re-connect stalling for more than 1 min. Signed-off-by: Chris Mason Signed-off-by: Bang Nguyen --- diff --git a/net/rds/connection.c b/net/rds/connection.c index 6984ad2135ea..af13c524edc8 100644 --- a/net/rds/connection.c +++ b/net/rds/connection.c @@ -193,6 +193,11 @@ static struct rds_connection *__rds_conn_create(__be32 laddr, __be32 faddr, atomic_set(&conn->c_state, RDS_CONN_DOWN); conn->c_send_gen = 0; conn->c_reconnect_jiffies = 0; + conn->c_reconnect_start = get_seconds(); + conn->c_reconnect_warn = 1; + conn->c_reconnect_drops = 0; + conn->c_reconnect_err = 0; + INIT_DELAYED_WORK(&conn->c_send_w, rds_send_worker); INIT_DELAYED_WORK(&conn->c_recv_w, rds_recv_worker); INIT_DELAYED_WORK(&conn->c_conn_w, rds_connect_worker); @@ -547,6 +552,23 @@ void rds_conn_exit(void) */ void rds_conn_drop(struct rds_connection *conn) { + unsigned long now = get_seconds(); + + if (rds_conn_state(conn) == RDS_CONN_UP) { + conn->c_reconnect_start = now; + conn->c_reconnect_warn = 1; + conn->c_reconnect_drops = 0; + conn->c_reconnect_err = 0; + } else if ((conn->c_reconnect_warn) && + (now - conn->c_reconnect_start > 60)) { + printk(KERN_INFO "RDS/IB: re-connect to %u.%u.%u.%u is " + "stalling for more than 1 min...(drops=%d err=%d)\n", + NIPQUAD(conn->c_faddr), conn->c_reconnect_drops, + conn->c_reconnect_err); + conn->c_reconnect_warn = 0; + } + conn->c_reconnect_drops++; + atomic_set(&conn->c_state, RDS_CONN_ERROR); queue_work(rds_wq, &conn->c_down_w); } diff --git a/net/rds/ib_cm.c b/net/rds/ib_cm.c index cdd30c19454d..8beeed4c0bae 100644 --- a/net/rds/ib_cm.c +++ b/net/rds/ib_cm.c @@ -490,6 +490,7 @@ static int rds_ib_setup_qp(struct rds_connection *conn) rdsdebug("conn %p pd %p mr %p cq %p\n", conn, ic->i_pd, ic->i_mr, ic->i_rcq); out: + conn->c_reconnect_err = ret; rds_ib_dev_put(rds_ibdev); return ret; } diff --git a/net/rds/ib_recv.c b/net/rds/ib_recv.c index a15da0ddf4dc..fc6aa07609ab 100644 --- a/net/rds/ib_recv.c +++ b/net/rds/ib_recv.c @@ -41,6 +41,7 @@ static struct kmem_cache *rds_ib_incoming_slab; static struct kmem_cache *rds_ib_frag_slab; static atomic_t rds_ib_allocation = ATOMIC_INIT(0); +static unsigned long rds_ib_allocation_warn = 1; void rds_ib_recv_init_ring(struct rds_ib_connection *ic) { @@ -242,21 +243,13 @@ static struct rds_ib_incoming *rds_ib_refill_one_inc(struct rds_ib_connection *i { struct rds_ib_incoming *ibinc; struct list_head *cache_item; - int avail_allocs; cache_item = rds_ib_recv_cache_get(&ic->i_cache_incs); if (cache_item) { ibinc = container_of(cache_item, struct rds_ib_incoming, ii_cache_entry); } else { - avail_allocs = atomic_add_unless(&rds_ib_allocation, - 1, rds_ib_sysctl_max_recv_allocation); - if (!avail_allocs) { - rds_ib_stats_inc(s_ib_rx_alloc_limit); - return NULL; - } ibinc = kmem_cache_alloc(rds_ib_incoming_slab, slab_mask); if (!ibinc) { - atomic_dec(&rds_ib_allocation); return NULL; } } @@ -272,6 +265,7 @@ static struct rds_page_frag *rds_ib_refill_one_frag(struct rds_ib_connection *ic struct rds_page_frag *frag; struct list_head *cache_item; int ret; + int avail_allocs; cache_item = rds_ib_recv_cache_get(&ic->i_cache_frags); if (cache_item) { @@ -281,11 +275,25 @@ static struct rds_page_frag *rds_ib_refill_one_frag(struct rds_ib_connection *ic if (!frag) return NULL; + avail_allocs = atomic_add_unless(&rds_ib_allocation, + 1, rds_ib_sysctl_max_recv_allocation); + + if (!avail_allocs) { + if (test_and_clear_bit(0, &rds_ib_allocation_warn)) { + printk(KERN_NOTICE "RDS/IB: WARNING - " + "recv memory exceeded max_recv_allocation %d\n", + atomic_read(&rds_ib_allocation)); + } + rds_ib_stats_inc(s_ib_rx_alloc_limit); + return NULL; + } + sg_init_table(&frag->f_sg, 1); ret = rds_page_remainder_alloc(&frag->f_sg, RDS_FRAG_SIZE, page_mask); if (ret) { kmem_cache_free(rds_ib_frag_slab, frag); + atomic_dec(&rds_ib_allocation); return NULL; } } diff --git a/net/rds/rds.h b/net/rds/rds.h index bdf9a8ed1b2a..3aec13d2c766 100644 --- a/net/rds/rds.h +++ b/net/rds/rds.h @@ -131,6 +131,12 @@ struct rds_connection { /* Protocol version */ unsigned int c_version; + + /* Re-connect stall diagnostics */ + unsigned long c_reconnect_start; + unsigned long c_reconnect_drops; + int c_reconnect_warn; + int c_reconnect_err; }; #define RDS_FLAG_CONG_BITMAP 0x01