]> www.infradead.org Git - users/jedix/linux-maple.git/commitdiff
rds: find connection drop reason
authorAjaykumar Hotchandani <ajaykumar.hotchandani@oracle.com>
Fri, 4 Mar 2016 03:18:28 +0000 (19:18 -0800)
committerChuck Anderson <chuck.anderson@oracle.com>
Thu, 10 Mar 2016 20:11:00 +0000 (12:11 -0800)
This patch attempts to find connection drop details.

Rational for adding this type of patch is, there are too many
places from where connection can get dropped.
And, in some cases, we don't have any idea of the source of
connection drop. This is especially painful for issues which
are reproducible in customer environment only.

Idea here is, we have tracker variable which keeps latest value
of connection drop source.
We can fetch that tracker variable as per our need.

Orabug: 22631108

Signed-off-by: Ajaykumar Hotchandani <ajaykumar.hotchandani@oracle.com>
Reviewed-by: Santosh Shilimkar <santosh.shilimkar@oracle.com>
Acked-by: Wengang Wang <wen.gang.wang@oracle.com>
Signed-off-by: Brian Maly <brian.maly@oracle.com>
14 files changed:
net/rds/af_rds.c
net/rds/connection.c
net/rds/ib.c
net/rds/ib_cm.c
net/rds/ib_recv.c
net/rds/ib_send.c
net/rds/iw_cm.c
net/rds/iw_recv.c
net/rds/iw_send.c
net/rds/rdma_transport.c
net/rds/rds.h
net/rds/tcp_connect.c
net/rds/tcp_send.c
net/rds/threads.c

index 53e808296ebb3bf96bb9762c3bd8255d0af7bf8b..a94317dfdcfffed07ee63c59d8dd2f1db3b7d52a 100644 (file)
@@ -334,6 +334,7 @@ static int rds_user_reset(struct rds_sock *rs, char __user *optval, int optlen)
                                "<%u.%u.%u.%u,%u.%u.%u.%u,%d>\n",
                                NIPQUAD(reset.src.s_addr),
                                NIPQUAD(reset.dst.s_addr), conn->c_tos);
+               conn->c_drop_source = 1;
                rds_conn_drop(conn);
        }
 
index a42470a2ec63c436142b1e73da289723f0083fcd..e9eecf497ee636d60b7ac93bd321e332d4016123 100644 (file)
@@ -322,6 +322,7 @@ void rds_conn_shutdown(struct rds_connection *conn, int restart)
                mutex_lock(&conn->c_cm_lock);
                if (!rds_conn_transition(conn, RDS_CONN_UP, RDS_CONN_DISCONNECTING)
                 && !rds_conn_transition(conn, RDS_CONN_ERROR, RDS_CONN_DISCONNECTING)) {
+                       conn->c_drop_source = 2;
                        rds_conn_error(conn, "shutdown called in state %d\n",
                                        atomic_read(&conn->c_state));
                        mutex_unlock(&conn->c_cm_lock);
@@ -343,6 +344,7 @@ void rds_conn_shutdown(struct rds_connection *conn, int restart)
                         * Quite reproduceable with loopback connections.
                         * Mostly harmless.
                         */
+                       conn->c_drop_source = 3;
                        rds_conn_error(conn,
                                "%s: failed to transition to state DOWN, "
                                "current state is %d\n",
@@ -398,6 +400,7 @@ void rds_conn_destroy(struct rds_connection *conn)
        synchronize_rcu();
 
        /* shut the connection down */
+       conn->c_drop_source = 4;
        rds_conn_drop(conn);
        flush_work(&conn->c_down_w);
 
@@ -610,6 +613,94 @@ void rds_conn_exit(void)
                                 rds_conn_message_info_retrans);
 }
 
+char *conn_drop_reason_str(u8 reason)
+{
+       /* Here is distribution of drop reason:
+        *
+        * 0-19: rds-core
+        *
+        * 20-119: IB
+        * 20-39: ib_cm
+        * 40-59: event handling
+        * 60-79: data path
+        * 80-119: special features like active bonding
+        *
+        * 120-139: iWARP
+        *
+        * 140-159: TCP
+        *
+        * 160-255: any other future additions
+        *
+        */
+       switch (reason) {
+       case 1: return "user reset";
+       case 2: return "invalid connection state";
+       case 3: return "failure to move to DOWN state";
+       case 4: return "connection destroy";
+       case 5: return "zero lane went down";
+       case 6: return "conn_connect failure";
+       case 7: return "hb timeout";
+       case 8: return "reconnect timeout";
+
+       case 20: return "race between ESTABLISHED event and drop";
+       case 21: return "conn is not in CONNECTING state";
+       case 22: return "qp event";
+       case 23: return "base conn down";
+       case 24: return "incoming REQ in CONN_UP state";
+       case 25: return "incoming REQ in CONNECTING state";
+       case 26: return "setup_qp failure";
+       case 27: return "rdma_accept failure";
+       case 28: return "setup_qp failure";
+       case 29: return "rdma_connect failure";
+
+       case 40: return "rdma_set_ib_paths failure";
+       case 41: return "resolve_route failure";
+       case 42: return "detected rdma_cm_id mismatch";
+       case 43: return "ROUTE_ERROR event";
+       case 44: return "ADDR_ERROR event";
+       case 45: return "CONNECT_ERROR or UNREACHABLE or DEVICE_REMOVE event";
+       case 46: return "CONSUMER_DEFINED reject";
+       case 47: return "REJECTED event";
+       case 48: return "ADDR_CHANGE event";
+       case 49: return "DISCONNECTED event";
+       case 50: return "TIMEWAIT_EXIT event";
+
+       case 60: return "post_recv failure";
+       case 61: return "send_ack failure";
+       case 62: return "no header in incoming msg";
+       case 63: return "corrupted header in incoming msg";
+       case 64: return "fragment header mismatch";
+       case 65: return "recv completion error";
+       case 66: return "send completion error";
+       case 67: return "post_send failure";
+
+       case 80: return "rds_rdma module unload";
+       case 81: return "active bonding failover";
+       case 82: return "corresponding loopback conn drop";
+       case 83: return "active bonding failback";
+
+       case 120: return "qp_event";
+       case 121: return "incoming REQ in connecting state";
+       case 122: return "setup_qp failure";
+       case 123: return "rdma_accept failure";
+       case 124: return "setup_qp failure";
+       case 125: return "rdma_connect failure";
+
+       case 130: return "post_recv failure";
+       case 131: return "send_ack failure";
+       case 132: return "no header in incoming msg";
+       case 133: return "corrupted header in incoming msg";
+       case 134: return "fragment header mismatch";
+       case 135: return "recv completion error";
+       case 136: return "send completion error";
+
+       case 140: return "sk_state to TCP_CLOSE";
+       case 141: return "tcp_send failure";
+
+       default: return "unknown reason";
+       }
+}
+
 static void rds_conn_probe_lanes(struct rds_connection *conn)
 {
        struct hlist_head *head =
@@ -632,6 +723,7 @@ static void rds_conn_probe_lanes(struct rds_connection *conn)
                                       NIPQUAD(tmp->c_faddr),
                                       tmp->c_tos);
 
+                               conn->c_drop_source = 5;
                                rds_conn_drop(tmp);
                        }
                }
@@ -653,10 +745,11 @@ void rds_conn_drop(struct rds_connection *conn)
                conn->c_reconnect_err = 0;
                conn->c_reconnect_racing = 0;
                printk(KERN_INFO "RDS/IB: connection "
-                       "<%u.%u.%u.%u,%u.%u.%u.%u,%d> dropped\n",
+                       "<%u.%u.%u.%u,%u.%u.%u.%u,%d> dropped due to '%s'\n",
                        NIPQUAD(conn->c_laddr),
                        NIPQUAD(conn->c_faddr),
-                       conn->c_tos);
+                       conn->c_tos,
+                       conn_drop_reason_str(conn->c_drop_source));
 
                if (conn->c_tos == 0)
                        rds_conn_probe_lanes(conn);
index 8d6c123c42d692f0fd35611eb7fdcb1e5233d336..fe9fcbfea0f303e91319b25e3bf6d59b859dfc01 100644 (file)
@@ -197,8 +197,10 @@ void rds_ib_dev_shutdown(struct rds_ib_device *rds_ibdev)
                "calling rds_conn_drop to drop all connections.\n");
 
        spin_lock_irqsave(&rds_ibdev->spinlock, flags);
-       list_for_each_entry(ic, &rds_ibdev->conn_list, ib_node)
+       list_for_each_entry(ic, &rds_ibdev->conn_list, ib_node) {
+               ic->conn->c_drop_source = 80;
                rds_conn_drop(ic->conn);
+       }
        spin_unlock_irqrestore(&rds_ibdev->spinlock, flags);
 }
 
@@ -625,6 +627,7 @@ static void rds_ib_conn_drop(struct work_struct *_work)
        rds_rtd(RDS_RTD_CM_EXT,
                "conn: %p, calling rds_conn_drop\n", conn);
 
+       conn->c_drop_source = 81;
        rds_conn_drop(conn);
 
        kfree(work);
@@ -852,10 +855,11 @@ static int rds_ib_move_ip(char                    *from_dev,
                                                        ic->conn->c_faddr &&
                                                        ic2->conn->c_faddr ==
                                                        ic->conn->c_laddr) {
-                                                   rds_rtd(RDS_RTD_CM_EXT_P,
-                                                           "conn:%p, tos %d, calling rds_conn_drop\n",
-                                                           ic2->conn,
-                                                           ic2->conn->c_tos);
+                                                       rds_rtd(RDS_RTD_CM_EXT_P,
+                                                               "conn:%p, tos %d, calling rds_conn_drop\n",
+                                                               ic2->conn,
+                                                               ic2->conn->c_tos);
+                                                       ic2->conn->c_drop_source = 82;
                                                        rds_conn_drop(ic2->conn);
                                                }
                                        }
@@ -884,6 +888,7 @@ static int rds_ib_move_ip(char                      *from_dev,
                                        rds_rtd(RDS_RTD_CM_EXT,
                                                "conn: %p, tos %d, calling rds_conn_drop\n",
                                                ic->conn, ic->conn->c_tos);
+                                       ic->conn->c_drop_source = 83;
                                        rds_conn_drop(ic->conn);
                                }
                        }
index fae04c620492e31db53c2380f23dfb3a23147d1f..59a07afb823665684dc95d7e4a385382ee635d11 100644 (file)
@@ -177,6 +177,7 @@ void rds_ib_cm_connect_complete(struct rds_connection *conn, struct rdma_cm_even
                rds_rtd(RDS_RTD_CM,
                        "ic->i_cm_id is NULL, ic: %p, calling rds_conn_drop\n",
                        ic);
+               conn->c_drop_source = 20;
                rds_conn_drop(conn);
                return;
        }
@@ -187,6 +188,7 @@ void rds_ib_cm_connect_complete(struct rds_connection *conn, struct rdma_cm_even
                rds_rtd(RDS_RTD_CM,
                        "conn is in connecting state, conn: %p, calling rds_conn_drop\n",
                        conn);
+               conn->c_drop_source = 21;
                rds_conn_drop(conn);
                return;
        }
@@ -530,6 +532,7 @@ static void rds_ib_qp_event_handler(struct ib_event *event, void *data)
                        "Fatal QP Event %u (%s) - connection %pI4->%pI4 tos %d, reconnecting\n",
                        event->event, rds_ib_event_str(event->event),
                        &conn->c_laddr, &conn->c_faddr, conn->c_tos);
+               conn->c_drop_source = 22;
                rds_conn_drop(conn);
                break;
        }
@@ -833,6 +836,7 @@ int rds_ib_cm_handle_connect(struct rdma_cm_id *cm_id,
                                NIPQUAD(conn->c_laddr),
                                NIPQUAD(conn->c_faddr),
                                conn->c_tos);
+               conn->c_drop_source = 23;
                rds_conn_drop(conn);
        }
 
@@ -855,6 +859,7 @@ int rds_ib_cm_handle_connect(struct rdma_cm_id *cm_id,
                if (rds_conn_state(conn) == RDS_CONN_UP) {
                        rds_rtd(RDS_RTD_CM_EXT_P,
                                "incoming connect while connecting\n");
+                       conn->c_drop_source = 24;
                        rds_conn_drop(conn);
                        rds_ib_stats_inc(s_ib_listen_closed_stale);
                } else if (rds_conn_state(conn) == RDS_CONN_CONNECTING) {
@@ -876,6 +881,7 @@ int rds_ib_cm_handle_connect(struct rdma_cm_id *cm_id,
                                        NIPQUAD(conn->c_laddr),
                                        NIPQUAD(conn->c_faddr),
                                        conn->c_tos);
+                               conn->c_drop_source = 25;
                                rds_conn_drop(conn);
                                rds_ib_stats_inc(s_ib_listen_closed_stale);
                        } else {
@@ -914,6 +920,7 @@ int rds_ib_cm_handle_connect(struct rdma_cm_id *cm_id,
 
        err = rds_ib_setup_qp(conn);
        if (err) {
+               conn->c_drop_source = 26;
                rds_ib_conn_error(conn, "rds_ib_setup_qp failed (%d)\n", err);
                goto out;
        }
@@ -928,8 +935,10 @@ int rds_ib_cm_handle_connect(struct rdma_cm_id *cm_id,
 #endif
        /* rdma_accept() calls rdma_reject() internally if it fails */
        err = rdma_accept(cm_id, &conn_param);
-       if (err)
+       if (err) {
+               conn->c_drop_source = 27;
                rds_ib_conn_error(conn, "rdma_accept failed (%d)\n", err);
+       }
 #if RDMA_RDS_APM_SUPPORTED
        else if (rds_ib_apm_enabled && !conn->c_loopback) {
                err = rdma_enable_apm(cm_id, RDMA_ALT_PATH_BEST);
@@ -968,6 +977,7 @@ int rds_ib_cm_initiate_connect(struct rdma_cm_id *cm_id)
 
        ret = rds_ib_setup_qp(conn);
        if (ret) {
+               conn->c_drop_source = 28;
                rds_ib_conn_error(conn, "rds_ib_setup_qp failed (%d)\n", ret);
                goto out;
        }
@@ -975,8 +985,10 @@ int rds_ib_cm_initiate_connect(struct rdma_cm_id *cm_id)
        rds_ib_cm_fill_conn_param(conn, &conn_param, &dp,
                                conn->c_proposed_version, UINT_MAX, UINT_MAX);
        ret = rdma_connect(cm_id, &conn_param);
-       if (ret)
+       if (ret) {
+               conn->c_drop_source = 29;
                rds_ib_conn_error(conn, "rdma_connect failed (%d)\n", ret);
+       }
 
 out:
        /* Beware - returning non-zero tells the rdma_cm to destroy
index 2e99bcb0fe88d4997f2a7bdc37c411f1cd4a2af8..d5d6700d938a3a4cfe521f9b2fd7ad9f35a23d30 100644 (file)
@@ -596,6 +596,7 @@ void rds_ib_recv_refill(struct rds_connection *conn, int prefill, gfp_t gfp)
                         recv->r_ibinc, sg_page(&recv->r_frag->f_sg),
                         (long) sg_dma_address(&recv->r_frag->f_sg), ret);
                if (ret) {
+                       conn->c_drop_source = 60;
                        rds_ib_conn_error(conn, "recv post on "
                               "%pI4 returned %d, disconnecting and "
                               "reconnecting\n", &conn->c_faddr,
@@ -863,6 +864,7 @@ static void rds_ib_send_ack(struct rds_ib_connection *ic, unsigned int adv_credi
 
                rds_ib_stats_inc(s_ib_ack_send_failure);
 
+               ic->conn->c_drop_source = 61;
                rds_ib_conn_error(ic->conn, "sending ack failed\n");
        } else
                rds_ib_stats_inc(s_ib_ack_sent);
@@ -1039,6 +1041,7 @@ static void rds_ib_process_recv(struct rds_connection *conn,
                 data_len);
 
        if (data_len < sizeof(struct rds_header)) {
+               conn->c_drop_source = 62;
                rds_ib_conn_error(conn, "incoming message "
                       "from %pI4 didn't inclue a "
                       "header, disconnecting and "
@@ -1052,6 +1055,7 @@ static void rds_ib_process_recv(struct rds_connection *conn,
 
        /* Validate the checksum. */
        if (!rds_message_verify_checksum(ihdr)) {
+               conn->c_drop_source = 63;
                rds_ib_conn_error(conn, "incoming message "
                       "from %pI4 has corrupted header - "
                       "forcing a reconnect\n",
@@ -1119,6 +1123,7 @@ static void rds_ib_process_recv(struct rds_connection *conn,
                 || hdr->h_len != ihdr->h_len
                 || hdr->h_sport != ihdr->h_sport
                 || hdr->h_dport != ihdr->h_dport) {
+                       conn->c_drop_source = 64;
                        rds_ib_conn_error(conn,
                                "fragment header mismatch; forcing reconnect\n");
                        return;
@@ -1279,6 +1284,7 @@ void rds_ib_recv_cqe_handler(struct rds_ib_connection *ic,
        } else {
                /* We expect errors as the qp is drained during shutdown */
                if (rds_conn_up(conn) || rds_conn_connecting(conn)) {
+                       conn->c_drop_source = 65;
                        rds_ib_conn_error(conn, "recv completion "
                                        "<%pI4,%pI4,%d> had "
                                        "status %u, disconnecting and "
index 749e8d88ed993e52858e6ef1e04768191fa7831f..2a1236cdfb87ba6df7dc05568a8134331b7108a8 100644 (file)
@@ -341,6 +341,7 @@ void rds_ib_send_cqe_handler(struct rds_ib_connection *ic, struct ib_wc *wc)
 
        /* We expect errors as the qp is drained during shutdown */
        if (wc->status != IB_WC_SUCCESS && rds_conn_up(conn)) {
+               conn->c_drop_source = 66;
                rds_ib_conn_error(conn,
                        "send completion <%u.%u.%u.%u,%u.%u.%u.%u,%d> status "
                        "%u vendor_err %u, disconnecting and reconnecting\n",
@@ -807,6 +808,7 @@ int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm,
                        prev->s_op = NULL;
                }
 
+               ic->conn->c_drop_source = 67;
                rds_ib_conn_error(ic->conn, "ib_post_send failed\n");
                goto out;
        }
index 1d89fe7038807518501d92e518afb4a5bc7c971e..ed0f8078b0bd2d7224d53de134dc47939ffcde86 100644 (file)
@@ -160,6 +160,7 @@ static void rds_iw_qp_event_handler(struct ib_event *event, void *data)
                        "- connection %pI4->%pI4, reconnecting\n",
                        event->event, &conn->c_laddr,
                        &conn->c_faddr);
+               conn->c_drop_source = 120;
                rds_conn_drop(conn);
                break;
        }
@@ -415,6 +416,7 @@ int rds_iw_cm_handle_connect(struct rdma_cm_id *cm_id,
        if (!rds_conn_transition(conn, RDS_CONN_DOWN, RDS_CONN_CONNECTING)) {
                if (rds_conn_state(conn) == RDS_CONN_UP) {
                        rdsdebug("incoming connect while connecting\n");
+                       conn->c_drop_source = 121;
                        rds_conn_drop(conn);
                        rds_iw_stats_inc(s_iw_listen_closed_stale);
                } else
@@ -451,6 +453,7 @@ int rds_iw_cm_handle_connect(struct rdma_cm_id *cm_id,
 
        err = rds_iw_setup_qp(conn);
        if (err) {
+               conn->c_drop_source = 122;
                rds_iw_conn_error(conn, "rds_iw_setup_qp failed (%d)\n", err);
                goto out;
        }
@@ -461,6 +464,7 @@ int rds_iw_cm_handle_connect(struct rdma_cm_id *cm_id,
        err = rdma_accept(cm_id, &conn_param);
        mutex_unlock(&conn->c_cm_lock);
        if (err) {
+               conn->c_drop_source = 123;
                rds_iw_conn_error(conn, "rdma_accept failed (%d)\n", err);
                goto out;
        }
@@ -488,6 +492,7 @@ int rds_iw_cm_initiate_connect(struct rdma_cm_id *cm_id)
 
        ret = rds_iw_setup_qp(conn);
        if (ret) {
+               conn->c_drop_source = 124;
                rds_iw_conn_error(conn, "rds_iw_setup_qp failed (%d)\n", ret);
                goto out;
        }
@@ -495,9 +500,10 @@ int rds_iw_cm_initiate_connect(struct rdma_cm_id *cm_id)
        rds_iw_cm_fill_conn_param(conn, &conn_param, &dp, RDS_PROTOCOL_VERSION);
 
        ret = rdma_connect(cm_id, &conn_param);
-       if (ret)
+       if (ret) {
+               conn->c_drop_source = 125;
                rds_iw_conn_error(conn, "rdma_connect failed (%d)\n", ret);
-
+       }
 out:
        /* Beware - returning non-zero tells the rdma_cm to destroy
         * the cm_id. We should certainly not do it as long as we still
index 545fb526f77b111b085843e3aa691985b185ce48..ec21cc5b21eb38ece439f4ba09e06f1f6a67b192 100644 (file)
@@ -252,6 +252,7 @@ int rds_iw_recv_refill(struct rds_connection *conn, gfp_t kptr_gfp,
                         recv->r_iwinc, recv->r_frag->f_page,
                         (long) recv->r_frag->f_mapped, ret);
                if (ret) {
+                       conn->c_drop_source = 130;
                        rds_iw_conn_error(conn, "recv post on "
                               "%pI4 returned %d, disconnecting and "
                               "reconnecting\n", &conn->c_faddr,
@@ -454,6 +455,7 @@ static void rds_iw_send_ack(struct rds_iw_connection *ic, unsigned int adv_credi
 
                rds_iw_stats_inc(s_iw_ack_send_failure);
 
+               ic->conn->c_drop_source = 131;
                rds_iw_conn_error(ic->conn, "sending ack failed\n");
        } else
                rds_iw_stats_inc(s_iw_ack_sent);
@@ -644,6 +646,7 @@ static void rds_iw_process_recv(struct rds_connection *conn,
                 byte_len);
 
        if (byte_len < sizeof(struct rds_header)) {
+               conn->c_drop_source = 132;
                rds_iw_conn_error(conn, "incoming message "
                       "from %pI4 didn't inclue a "
                       "header, disconnecting and "
@@ -657,6 +660,7 @@ static void rds_iw_process_recv(struct rds_connection *conn,
 
        /* Validate the checksum. */
        if (!rds_message_verify_checksum(ihdr)) {
+               conn->c_drop_source = 133;
                rds_iw_conn_error(conn, "incoming message "
                       "from %pI4 has corrupted header - "
                       "forcing a reconnect\n",
@@ -719,6 +723,7 @@ static void rds_iw_process_recv(struct rds_connection *conn,
                 || hdr->h_len != ihdr->h_len
                 || hdr->h_sport != ihdr->h_sport
                 || hdr->h_dport != ihdr->h_dport) {
+                       conn->c_drop_source = 134;
                        rds_iw_conn_error(conn,
                                "fragment header mismatch; forcing reconnect\n");
                        return;
@@ -803,6 +808,7 @@ static inline void rds_poll_cq(struct rds_iw_connection *ic,
                        if (wc.status == IB_WC_SUCCESS) {
                                rds_iw_process_recv(conn, recv, wc.byte_len, state);
                        } else {
+                               conn->c_drop_source = 135;
                                rds_iw_conn_error(conn, "recv completion on "
                                       "%pI4 had status %u, disconnecting and "
                                       "reconnecting\n", &conn->c_faddr,
index 33106547e4a639420663e03b64e6f946fb05468d..0820c4abc2ebd236b43fc6ffe037af5c32e4b833 100644 (file)
@@ -293,6 +293,7 @@ void rds_iw_send_cq_comp_handler(struct ib_cq *cq, void *context)
 
                /* We expect errors as the qp is drained during shutdown */
                if (wc.status != IB_WC_SUCCESS && rds_conn_up(conn)) {
+                       conn->c_drop_source = 136;
                        rds_iw_conn_error(conn,
                                "send completion on %pI4 "
                                "had status %u, disconnecting and reconnecting\n",
index b5c4834c4385c1afd66d0a15dacca58b20c1b356..c5d5011b15ad6cac2b3ffc4ce0666972688a9c48 100644 (file)
@@ -166,6 +166,7 @@ int rds_rdma_cm_event_handler(struct rdma_cm_id *cm_id,
                                        "ADDR_RESOLVED: ret %d, calling rds_conn_drop <%u.%u.%u.%u,%u.%u.%u.%u,%d>\n",
                                        ret, NIPQUAD(conn->c_laddr),
                                        NIPQUAD(conn->c_faddr), conn->c_tos);
+                               conn->c_drop_source = 40;
                                rds_conn_drop(conn);
                                ret = 0;
                        }
@@ -190,6 +191,7 @@ int rds_rdma_cm_event_handler(struct rdma_cm_id *cm_id,
                                ibic = conn->c_transport_data;
                                if (ibic && ibic->i_cm_id == cm_id)
                                        ibic->i_cm_id = NULL;
+                               conn->c_drop_source = 41;
                                rds_conn_drop(conn);
                        }
                } else if (conn->c_to_index < (RDS_RDMA_RESOLVE_TO_MAX_INDEX-1))
@@ -213,6 +215,7 @@ int rds_rdma_cm_event_handler(struct rdma_cm_id *cm_id,
                                        "ROUTE_RESOLVED: calling rds_conn_drop, conn %p <%u.%u.%u.%u,%u.%u.%u.%u,%d>\n",
                                        conn, NIPQUAD(conn->c_laddr),
                                        NIPQUAD(conn->c_faddr), conn->c_tos);
+                               conn->c_drop_source = 42;
                                rds_conn_drop(conn);
                        }
                }
@@ -256,6 +259,7 @@ int rds_rdma_cm_event_handler(struct rdma_cm_id *cm_id,
                                "ROUTE_ERROR: conn %p, calling rds_conn_drop <%u.%u.%u.%u,%u.%u.%u.%u,%d>\n",
                                conn, NIPQUAD(conn->c_laddr),
                                NIPQUAD(conn->c_faddr), conn->c_tos);
+                       conn->c_drop_source = 43;
                        rds_conn_drop(conn);
                }
                break;
@@ -270,6 +274,7 @@ int rds_rdma_cm_event_handler(struct rdma_cm_id *cm_id,
                                "ADDR_ERROR: conn %p, calling rds_conn_drop <%u.%u.%u.%u,%u.%u.%u.%u,%d>\n",
                                conn, NIPQUAD(conn->c_laddr),
                                NIPQUAD(conn->c_faddr), conn->c_tos);
+                       conn->c_drop_source = 44;
                        rds_conn_drop(conn);
                }
                break;
@@ -282,6 +287,7 @@ int rds_rdma_cm_event_handler(struct rdma_cm_id *cm_id,
                                "CONN/UNREACHABLE/RMVAL ERR: conn %p, calling rds_conn_drop <%u.%u.%u.%u,%u.%u.%u.%u,%d>\n",
                                conn, NIPQUAD(conn->c_laddr),
                                NIPQUAD(conn->c_faddr), conn->c_tos);
+                       conn->c_drop_source = 45;
                        rds_conn_drop(conn);
                }
                break;
@@ -303,6 +309,7 @@ int rds_rdma_cm_event_handler(struct rdma_cm_id *cm_id,
                                if (!conn->c_tos) {
                                        conn->c_proposed_version =
                                                RDS_PROTOCOL_COMPAT_VERSION;
+                                       conn->c_drop_source = 46;
                                        rds_conn_drop(conn);
                                } else  {
                                        if (conn->c_loopback)
@@ -321,6 +328,7 @@ int rds_rdma_cm_event_handler(struct rdma_cm_id *cm_id,
                                        NIPQUAD(conn->c_laddr),
                                        NIPQUAD(conn->c_faddr),
                                        conn->c_tos);
+                               conn->c_drop_source = 47;
                                rds_conn_drop(conn);
                        }
                }
@@ -337,6 +345,7 @@ int rds_rdma_cm_event_handler(struct rdma_cm_id *cm_id,
                                "ADDR_CHANGE: calling rds_conn_drop <%u.%u.%u.%u,%u.%u.%u.%u,%d>\n",
                                NIPQUAD(conn->c_laddr), NIPQUAD(conn->c_faddr),
                                conn->c_tos);
+                       conn->c_drop_source = 48;
                        rds_conn_drop(conn);
                }
 #else
@@ -345,6 +354,7 @@ int rds_rdma_cm_event_handler(struct rdma_cm_id *cm_id,
                                "ADDR_CHANGE: calling rds_conn_drop <%u.%u.%u.%u,%u.%u.%u.%u,%d>\n",
                                NIPQUAD(conn->c_laddr), NIPQUAD(conn->c_faddr),
                                conn->c_tos);
+                       conn->c_drop_source = 48;
                        rds_conn_drop(conn);
                }
 #endif
@@ -354,6 +364,7 @@ int rds_rdma_cm_event_handler(struct rdma_cm_id *cm_id,
                rds_rtd(RDS_RTD_CM,
                        "DISCONNECT event - dropping connection %pI4->%pI4 tos %d\n",
                        &conn->c_laddr, &conn->c_faddr, conn->c_tos);
+               conn->c_drop_source = 49;
                rds_conn_drop(conn);
                break;
 
@@ -363,6 +374,7 @@ int rds_rdma_cm_event_handler(struct rdma_cm_id *cm_id,
                                "dropping connection "
                                "%pI4->%pI4\n", &conn->c_laddr,
                                 &conn->c_faddr);
+                       conn->c_drop_source = 50;
                        rds_conn_drop(conn);
                } else
                        printk(KERN_INFO "TIMEWAIT_EXIT event - conn=NULL\n");
index a26152110da46da918eede578c54e71d782d6222..f01c043eb07dde0f69853a798a4ccb447c2d1906 100644 (file)
@@ -175,6 +175,7 @@ struct rds_connection {
 
        unsigned int            c_reconnect_racing;
        unsigned int            c_route_resolved;
+       u8                      c_drop_source;
 };
 
 static inline
@@ -732,6 +733,7 @@ void rds_for_each_conn_info(struct socket *sock, unsigned int len,
                          struct rds_info_lengths *lens,
                          int (*visitor)(struct rds_connection *, void *),
                          size_t item_len);
+char *conn_drop_reason_str(u8 reason);
 void __rds_conn_error(struct rds_connection *conn, const char *, ...)
                                __attribute__ ((format (printf, 2, 3)));
 #define rds_conn_error(conn, fmt...) \
index 262d996d05f5480fee69266b941a383f934dbbef..acaf420d3e7a4e0a2eeaf8d29754c41b5dbd4a5c 100644 (file)
@@ -64,6 +64,7 @@ void rds_tcp_state_change(struct sock *sk)
                        break;
                case TCP_CLOSE_WAIT:
                case TCP_CLOSE:
+                       conn->c_drop_source = 140;
                        rds_conn_drop(conn);
                default:
                        break;
index e9cffe0220f7e328812874657e6a08000a66f594..6e8e5a7b42afd8fe5594e1be011bbd2b458d6529 100644 (file)
@@ -152,6 +152,7 @@ out:
                        printk(KERN_WARNING "RDS/tcp: send to %u.%u.%u.%u "
                               "returned %d, disconnecting and reconnecting\n",
                               NIPQUAD(conn->c_faddr), ret);
+                       conn->c_drop_source = 141;
                        rds_conn_drop(conn);
                }
        }
index 015598b1aa85e96dad85383403bda0a076769766..0de85acd49c9f723c31a333a869f8f8152e4fea0 100644 (file)
@@ -179,6 +179,7 @@ void rds_connect_worker(struct work_struct *work)
                 * drop the connection if it doesn't work out after a while
                 */
                conn->c_connection_start = get_seconds();
+               conn->c_drop_source = 0;
 
                ret = conn->c_trans->conn_connect(conn);
                rds_rtd(RDS_RTD_CM_EXT,
@@ -190,8 +191,10 @@ void rds_connect_worker(struct work_struct *work)
                                rds_rtd(RDS_RTD_CM_EXT,
                                        "reconnecting..., conn %p\n", conn);
                                rds_queue_reconnect(conn);
-                       } else
+                       } else {
+                               conn->c_drop_source = 6;
                                rds_conn_error(conn, "RDS: connect failed\n");
+                       }
                }
        } else {
                rds_rtd(RDS_RTD_CM,
@@ -280,7 +283,8 @@ void rds_hb_worker(struct work_struct *work)
                                "RDS/IB: connection <%u.%u.%u.%u,%u.%u.%u.%u,%d> timed out (0x%lx,0x%lx)..discon and recon\n",
                                NIPQUAD(conn->c_laddr), NIPQUAD(conn->c_faddr),
                                conn->c_tos, conn->c_hb_start, now);
-                               rds_conn_drop(conn);
+                       conn->c_drop_source = 7;
+                       rds_conn_drop(conn);
                        return;
                }
                queue_delayed_work(rds_wq, &conn->c_hb_w, HZ);
@@ -300,6 +304,7 @@ void rds_reconnect_timeout(struct work_struct *work)
                        "conn not up, calling rds_conn_drop <%u.%u.%u.%u,%u.%u.%u.%u,%d>\n",
                        NIPQUAD(conn->c_laddr), NIPQUAD(conn->c_faddr),
                        conn->c_tos);
+               conn->c_drop_source = 8;
                rds_conn_drop(conn);
                conn->c_reconnect_racing = 0;
        }