]> www.infradead.org Git - users/jedix/linux-maple.git/commitdiff
RDS: issue warning if re-connect stalling for more than 1 min.
authorBang Nguyen <bang.nguyen@oracle.com>
Fri, 3 Feb 2012 16:09:49 +0000 (11:09 -0500)
committerMukesh Kacker <mukesh.kacker@oracle.com>
Tue, 7 Jul 2015 23:41:34 +0000 (16:41 -0700)
Signed-off-by: Chris Mason <chris.mason@oracle.com>
Signed-off-by: Bang Nguyen <bang.nguyen@oracle.com>
net/rds/connection.c
net/rds/ib_cm.c
net/rds/ib_recv.c
net/rds/rds.h

index 6984ad2135ea9133d10e1cd9c9bbf3866bbad012..af13c524edc8a7a2574ce89ae2fdc180edb92637 100644 (file)
@@ -193,6 +193,11 @@ static struct rds_connection *__rds_conn_create(__be32 laddr, __be32 faddr,
        atomic_set(&conn->c_state, RDS_CONN_DOWN);
        conn->c_send_gen = 0;
        conn->c_reconnect_jiffies = 0;
+       conn->c_reconnect_start = get_seconds();
+       conn->c_reconnect_warn = 1;
+       conn->c_reconnect_drops = 0;
+       conn->c_reconnect_err = 0;
+
        INIT_DELAYED_WORK(&conn->c_send_w, rds_send_worker);
        INIT_DELAYED_WORK(&conn->c_recv_w, rds_recv_worker);
        INIT_DELAYED_WORK(&conn->c_conn_w, rds_connect_worker);
@@ -547,6 +552,23 @@ void rds_conn_exit(void)
  */
 void rds_conn_drop(struct rds_connection *conn)
 {
+       unsigned long now = get_seconds();
+
+       if (rds_conn_state(conn) == RDS_CONN_UP) {
+               conn->c_reconnect_start = now;
+               conn->c_reconnect_warn = 1;
+               conn->c_reconnect_drops = 0;
+               conn->c_reconnect_err = 0;
+       } else if ((conn->c_reconnect_warn) &&
+                  (now - conn->c_reconnect_start > 60)) {
+               printk(KERN_INFO "RDS/IB: re-connect to %u.%u.%u.%u is "
+                       "stalling for more than 1 min...(drops=%d err=%d)\n",
+                       NIPQUAD(conn->c_faddr), conn->c_reconnect_drops,
+                       conn->c_reconnect_err);
+               conn->c_reconnect_warn = 0;
+       }
+       conn->c_reconnect_drops++;
+
        atomic_set(&conn->c_state, RDS_CONN_ERROR);
        queue_work(rds_wq, &conn->c_down_w);
 }
index cdd30c19454dcaa09631f17febfc142f16439868..8beeed4c0baeb2b3698adc0fcfbb2e2c9bf83c00 100644 (file)
@@ -490,6 +490,7 @@ static int rds_ib_setup_qp(struct rds_connection *conn)
        rdsdebug("conn %p pd %p mr %p cq %p\n", conn, ic->i_pd, ic->i_mr, ic->i_rcq);
 
 out:
+       conn->c_reconnect_err = ret;
        rds_ib_dev_put(rds_ibdev);
        return ret;
 }
index a15da0ddf4dc944d57d240d384230497763d7e5c..fc6aa07609ab0e75e169854078ffc265723d28fc 100644 (file)
@@ -41,6 +41,7 @@
 static struct kmem_cache *rds_ib_incoming_slab;
 static struct kmem_cache *rds_ib_frag_slab;
 static atomic_t        rds_ib_allocation = ATOMIC_INIT(0);
+static unsigned long rds_ib_allocation_warn = 1;
 
 void rds_ib_recv_init_ring(struct rds_ib_connection *ic)
 {
@@ -242,21 +243,13 @@ static struct rds_ib_incoming *rds_ib_refill_one_inc(struct rds_ib_connection *i
 {
        struct rds_ib_incoming *ibinc;
        struct list_head *cache_item;
-       int avail_allocs;
 
        cache_item = rds_ib_recv_cache_get(&ic->i_cache_incs);
        if (cache_item) {
                ibinc = container_of(cache_item, struct rds_ib_incoming, ii_cache_entry);
        } else {
-               avail_allocs = atomic_add_unless(&rds_ib_allocation,
-                                                1, rds_ib_sysctl_max_recv_allocation);
-               if (!avail_allocs) {
-                       rds_ib_stats_inc(s_ib_rx_alloc_limit);
-                       return NULL;
-               }
                ibinc = kmem_cache_alloc(rds_ib_incoming_slab, slab_mask);
                if (!ibinc) {
-                       atomic_dec(&rds_ib_allocation);
                        return NULL;
                }
        }
@@ -272,6 +265,7 @@ static struct rds_page_frag *rds_ib_refill_one_frag(struct rds_ib_connection *ic
        struct rds_page_frag *frag;
        struct list_head *cache_item;
        int ret;
+       int avail_allocs;
 
        cache_item = rds_ib_recv_cache_get(&ic->i_cache_frags);
        if (cache_item) {
@@ -281,11 +275,25 @@ static struct rds_page_frag *rds_ib_refill_one_frag(struct rds_ib_connection *ic
                if (!frag)
                        return NULL;
 
+               avail_allocs = atomic_add_unless(&rds_ib_allocation,
+                               1, rds_ib_sysctl_max_recv_allocation);
+
+               if (!avail_allocs) {
+                       if (test_and_clear_bit(0, &rds_ib_allocation_warn)) {
+                         printk(KERN_NOTICE "RDS/IB: WARNING - "
+                              "recv memory exceeded max_recv_allocation %d\n",
+                              atomic_read(&rds_ib_allocation));
+                    }
+                    rds_ib_stats_inc(s_ib_rx_alloc_limit);
+                    return NULL;
+               }
+
                sg_init_table(&frag->f_sg, 1);
                ret = rds_page_remainder_alloc(&frag->f_sg,
                                               RDS_FRAG_SIZE, page_mask);
                if (ret) {
                        kmem_cache_free(rds_ib_frag_slab, frag);
+                       atomic_dec(&rds_ib_allocation);
                        return NULL;
                }
        }
index bdf9a8ed1b2a5e008907aa3d69795cdd1bf4764d..3aec13d2c7666a0ccf743e56959a93bfa5fd80a5 100644 (file)
@@ -131,6 +131,12 @@ struct rds_connection {
 
        /* Protocol version */
        unsigned int            c_version;
+
+       /* Re-connect stall diagnostics */
+       unsigned long           c_reconnect_start;
+       unsigned long           c_reconnect_drops;
+       int                     c_reconnect_warn;
+       int                     c_reconnect_err;
 };
 
 #define RDS_FLAG_CONG_BITMAP   0x01