From: Ajaykumar Hotchandani Date: Fri, 4 Mar 2016 03:18:28 +0000 (-0800) Subject: rds: find connection drop reason X-Git-Tag: v4.1.12-92~188^2~2 X-Git-Url: https://www.infradead.org/git/?a=commitdiff_plain;h=147954a1cd39fb9178782ce14b474c43f8378ad4;p=users%2Fjedix%2Flinux-maple.git rds: find connection drop reason This patch attempts to find connection drop details. Rational for adding this type of patch is, there are too many places from where connection can get dropped. And, in some cases, we don't have any idea of the source of connection drop. This is especially painful for issues which are reproducible in customer environment only. Idea here is, we have tracker variable which keeps latest value of connection drop source. We can fetch that tracker variable as per our need. Orabug: 22631108 Signed-off-by: Ajaykumar Hotchandani Reviewed-by: Santosh Shilimkar Acked-by: Wengang Wang Signed-off-by: Brian Maly --- diff --git a/net/rds/af_rds.c b/net/rds/af_rds.c index 53e808296ebb..a94317dfdcff 100644 --- a/net/rds/af_rds.c +++ b/net/rds/af_rds.c @@ -334,6 +334,7 @@ static int rds_user_reset(struct rds_sock *rs, char __user *optval, int optlen) "<%u.%u.%u.%u,%u.%u.%u.%u,%d>\n", NIPQUAD(reset.src.s_addr), NIPQUAD(reset.dst.s_addr), conn->c_tos); + conn->c_drop_source = 1; rds_conn_drop(conn); } diff --git a/net/rds/connection.c b/net/rds/connection.c index a42470a2ec63..e9eecf497ee6 100644 --- a/net/rds/connection.c +++ b/net/rds/connection.c @@ -322,6 +322,7 @@ void rds_conn_shutdown(struct rds_connection *conn, int restart) mutex_lock(&conn->c_cm_lock); if (!rds_conn_transition(conn, RDS_CONN_UP, RDS_CONN_DISCONNECTING) && !rds_conn_transition(conn, RDS_CONN_ERROR, RDS_CONN_DISCONNECTING)) { + conn->c_drop_source = 2; rds_conn_error(conn, "shutdown called in state %d\n", atomic_read(&conn->c_state)); mutex_unlock(&conn->c_cm_lock); @@ -343,6 +344,7 @@ void rds_conn_shutdown(struct rds_connection *conn, int restart) * Quite reproduceable with loopback connections. * Mostly harmless. */ + conn->c_drop_source = 3; rds_conn_error(conn, "%s: failed to transition to state DOWN, " "current state is %d\n", @@ -398,6 +400,7 @@ void rds_conn_destroy(struct rds_connection *conn) synchronize_rcu(); /* shut the connection down */ + conn->c_drop_source = 4; rds_conn_drop(conn); flush_work(&conn->c_down_w); @@ -610,6 +613,94 @@ void rds_conn_exit(void) rds_conn_message_info_retrans); } +char *conn_drop_reason_str(u8 reason) +{ + /* Here is distribution of drop reason: + * + * 0-19: rds-core + * + * 20-119: IB + * 20-39: ib_cm + * 40-59: event handling + * 60-79: data path + * 80-119: special features like active bonding + * + * 120-139: iWARP + * + * 140-159: TCP + * + * 160-255: any other future additions + * + */ + switch (reason) { + case 1: return "user reset"; + case 2: return "invalid connection state"; + case 3: return "failure to move to DOWN state"; + case 4: return "connection destroy"; + case 5: return "zero lane went down"; + case 6: return "conn_connect failure"; + case 7: return "hb timeout"; + case 8: return "reconnect timeout"; + + case 20: return "race between ESTABLISHED event and drop"; + case 21: return "conn is not in CONNECTING state"; + case 22: return "qp event"; + case 23: return "base conn down"; + case 24: return "incoming REQ in CONN_UP state"; + case 25: return "incoming REQ in CONNECTING state"; + case 26: return "setup_qp failure"; + case 27: return "rdma_accept failure"; + case 28: return "setup_qp failure"; + case 29: return "rdma_connect failure"; + + case 40: return "rdma_set_ib_paths failure"; + case 41: return "resolve_route failure"; + case 42: return "detected rdma_cm_id mismatch"; + case 43: return "ROUTE_ERROR event"; + case 44: return "ADDR_ERROR event"; + case 45: return "CONNECT_ERROR or UNREACHABLE or DEVICE_REMOVE event"; + case 46: return "CONSUMER_DEFINED reject"; + case 47: return "REJECTED event"; + case 48: return "ADDR_CHANGE event"; + case 49: return "DISCONNECTED event"; + case 50: return "TIMEWAIT_EXIT event"; + + case 60: return "post_recv failure"; + case 61: return "send_ack failure"; + case 62: return "no header in incoming msg"; + case 63: return "corrupted header in incoming msg"; + case 64: return "fragment header mismatch"; + case 65: return "recv completion error"; + case 66: return "send completion error"; + case 67: return "post_send failure"; + + case 80: return "rds_rdma module unload"; + case 81: return "active bonding failover"; + case 82: return "corresponding loopback conn drop"; + case 83: return "active bonding failback"; + + case 120: return "qp_event"; + case 121: return "incoming REQ in connecting state"; + case 122: return "setup_qp failure"; + case 123: return "rdma_accept failure"; + case 124: return "setup_qp failure"; + case 125: return "rdma_connect failure"; + + case 130: return "post_recv failure"; + case 131: return "send_ack failure"; + case 132: return "no header in incoming msg"; + case 133: return "corrupted header in incoming msg"; + case 134: return "fragment header mismatch"; + case 135: return "recv completion error"; + case 136: return "send completion error"; + + case 140: return "sk_state to TCP_CLOSE"; + case 141: return "tcp_send failure"; + + default: return "unknown reason"; + } +} + static void rds_conn_probe_lanes(struct rds_connection *conn) { struct hlist_head *head = @@ -632,6 +723,7 @@ static void rds_conn_probe_lanes(struct rds_connection *conn) NIPQUAD(tmp->c_faddr), tmp->c_tos); + conn->c_drop_source = 5; rds_conn_drop(tmp); } } @@ -653,10 +745,11 @@ void rds_conn_drop(struct rds_connection *conn) conn->c_reconnect_err = 0; conn->c_reconnect_racing = 0; printk(KERN_INFO "RDS/IB: connection " - "<%u.%u.%u.%u,%u.%u.%u.%u,%d> dropped\n", + "<%u.%u.%u.%u,%u.%u.%u.%u,%d> dropped due to '%s'\n", NIPQUAD(conn->c_laddr), NIPQUAD(conn->c_faddr), - conn->c_tos); + conn->c_tos, + conn_drop_reason_str(conn->c_drop_source)); if (conn->c_tos == 0) rds_conn_probe_lanes(conn); diff --git a/net/rds/ib.c b/net/rds/ib.c index 8d6c123c42d6..fe9fcbfea0f3 100644 --- a/net/rds/ib.c +++ b/net/rds/ib.c @@ -197,8 +197,10 @@ void rds_ib_dev_shutdown(struct rds_ib_device *rds_ibdev) "calling rds_conn_drop to drop all connections.\n"); spin_lock_irqsave(&rds_ibdev->spinlock, flags); - list_for_each_entry(ic, &rds_ibdev->conn_list, ib_node) + list_for_each_entry(ic, &rds_ibdev->conn_list, ib_node) { + ic->conn->c_drop_source = 80; rds_conn_drop(ic->conn); + } spin_unlock_irqrestore(&rds_ibdev->spinlock, flags); } @@ -625,6 +627,7 @@ static void rds_ib_conn_drop(struct work_struct *_work) rds_rtd(RDS_RTD_CM_EXT, "conn: %p, calling rds_conn_drop\n", conn); + conn->c_drop_source = 81; rds_conn_drop(conn); kfree(work); @@ -852,10 +855,11 @@ static int rds_ib_move_ip(char *from_dev, ic->conn->c_faddr && ic2->conn->c_faddr == ic->conn->c_laddr) { - rds_rtd(RDS_RTD_CM_EXT_P, - "conn:%p, tos %d, calling rds_conn_drop\n", - ic2->conn, - ic2->conn->c_tos); + rds_rtd(RDS_RTD_CM_EXT_P, + "conn:%p, tos %d, calling rds_conn_drop\n", + ic2->conn, + ic2->conn->c_tos); + ic2->conn->c_drop_source = 82; rds_conn_drop(ic2->conn); } } @@ -884,6 +888,7 @@ static int rds_ib_move_ip(char *from_dev, rds_rtd(RDS_RTD_CM_EXT, "conn: %p, tos %d, calling rds_conn_drop\n", ic->conn, ic->conn->c_tos); + ic->conn->c_drop_source = 83; rds_conn_drop(ic->conn); } } diff --git a/net/rds/ib_cm.c b/net/rds/ib_cm.c index fae04c620492..59a07afb8236 100644 --- a/net/rds/ib_cm.c +++ b/net/rds/ib_cm.c @@ -177,6 +177,7 @@ void rds_ib_cm_connect_complete(struct rds_connection *conn, struct rdma_cm_even rds_rtd(RDS_RTD_CM, "ic->i_cm_id is NULL, ic: %p, calling rds_conn_drop\n", ic); + conn->c_drop_source = 20; rds_conn_drop(conn); return; } @@ -187,6 +188,7 @@ void rds_ib_cm_connect_complete(struct rds_connection *conn, struct rdma_cm_even rds_rtd(RDS_RTD_CM, "conn is in connecting state, conn: %p, calling rds_conn_drop\n", conn); + conn->c_drop_source = 21; rds_conn_drop(conn); return; } @@ -530,6 +532,7 @@ static void rds_ib_qp_event_handler(struct ib_event *event, void *data) "Fatal QP Event %u (%s) - connection %pI4->%pI4 tos %d, reconnecting\n", event->event, rds_ib_event_str(event->event), &conn->c_laddr, &conn->c_faddr, conn->c_tos); + conn->c_drop_source = 22; rds_conn_drop(conn); break; } @@ -833,6 +836,7 @@ int rds_ib_cm_handle_connect(struct rdma_cm_id *cm_id, NIPQUAD(conn->c_laddr), NIPQUAD(conn->c_faddr), conn->c_tos); + conn->c_drop_source = 23; rds_conn_drop(conn); } @@ -855,6 +859,7 @@ int rds_ib_cm_handle_connect(struct rdma_cm_id *cm_id, if (rds_conn_state(conn) == RDS_CONN_UP) { rds_rtd(RDS_RTD_CM_EXT_P, "incoming connect while connecting\n"); + conn->c_drop_source = 24; rds_conn_drop(conn); rds_ib_stats_inc(s_ib_listen_closed_stale); } else if (rds_conn_state(conn) == RDS_CONN_CONNECTING) { @@ -876,6 +881,7 @@ int rds_ib_cm_handle_connect(struct rdma_cm_id *cm_id, NIPQUAD(conn->c_laddr), NIPQUAD(conn->c_faddr), conn->c_tos); + conn->c_drop_source = 25; rds_conn_drop(conn); rds_ib_stats_inc(s_ib_listen_closed_stale); } else { @@ -914,6 +920,7 @@ int rds_ib_cm_handle_connect(struct rdma_cm_id *cm_id, err = rds_ib_setup_qp(conn); if (err) { + conn->c_drop_source = 26; rds_ib_conn_error(conn, "rds_ib_setup_qp failed (%d)\n", err); goto out; } @@ -928,8 +935,10 @@ int rds_ib_cm_handle_connect(struct rdma_cm_id *cm_id, #endif /* rdma_accept() calls rdma_reject() internally if it fails */ err = rdma_accept(cm_id, &conn_param); - if (err) + if (err) { + conn->c_drop_source = 27; rds_ib_conn_error(conn, "rdma_accept failed (%d)\n", err); + } #if RDMA_RDS_APM_SUPPORTED else if (rds_ib_apm_enabled && !conn->c_loopback) { err = rdma_enable_apm(cm_id, RDMA_ALT_PATH_BEST); @@ -968,6 +977,7 @@ int rds_ib_cm_initiate_connect(struct rdma_cm_id *cm_id) ret = rds_ib_setup_qp(conn); if (ret) { + conn->c_drop_source = 28; rds_ib_conn_error(conn, "rds_ib_setup_qp failed (%d)\n", ret); goto out; } @@ -975,8 +985,10 @@ int rds_ib_cm_initiate_connect(struct rdma_cm_id *cm_id) rds_ib_cm_fill_conn_param(conn, &conn_param, &dp, conn->c_proposed_version, UINT_MAX, UINT_MAX); ret = rdma_connect(cm_id, &conn_param); - if (ret) + if (ret) { + conn->c_drop_source = 29; rds_ib_conn_error(conn, "rdma_connect failed (%d)\n", ret); + } out: /* Beware - returning non-zero tells the rdma_cm to destroy diff --git a/net/rds/ib_recv.c b/net/rds/ib_recv.c index 2e99bcb0fe88..d5d6700d938a 100644 --- a/net/rds/ib_recv.c +++ b/net/rds/ib_recv.c @@ -596,6 +596,7 @@ void rds_ib_recv_refill(struct rds_connection *conn, int prefill, gfp_t gfp) recv->r_ibinc, sg_page(&recv->r_frag->f_sg), (long) sg_dma_address(&recv->r_frag->f_sg), ret); if (ret) { + conn->c_drop_source = 60; rds_ib_conn_error(conn, "recv post on " "%pI4 returned %d, disconnecting and " "reconnecting\n", &conn->c_faddr, @@ -863,6 +864,7 @@ static void rds_ib_send_ack(struct rds_ib_connection *ic, unsigned int adv_credi rds_ib_stats_inc(s_ib_ack_send_failure); + ic->conn->c_drop_source = 61; rds_ib_conn_error(ic->conn, "sending ack failed\n"); } else rds_ib_stats_inc(s_ib_ack_sent); @@ -1039,6 +1041,7 @@ static void rds_ib_process_recv(struct rds_connection *conn, data_len); if (data_len < sizeof(struct rds_header)) { + conn->c_drop_source = 62; rds_ib_conn_error(conn, "incoming message " "from %pI4 didn't inclue a " "header, disconnecting and " @@ -1052,6 +1055,7 @@ static void rds_ib_process_recv(struct rds_connection *conn, /* Validate the checksum. */ if (!rds_message_verify_checksum(ihdr)) { + conn->c_drop_source = 63; rds_ib_conn_error(conn, "incoming message " "from %pI4 has corrupted header - " "forcing a reconnect\n", @@ -1119,6 +1123,7 @@ static void rds_ib_process_recv(struct rds_connection *conn, || hdr->h_len != ihdr->h_len || hdr->h_sport != ihdr->h_sport || hdr->h_dport != ihdr->h_dport) { + conn->c_drop_source = 64; rds_ib_conn_error(conn, "fragment header mismatch; forcing reconnect\n"); return; @@ -1279,6 +1284,7 @@ void rds_ib_recv_cqe_handler(struct rds_ib_connection *ic, } else { /* We expect errors as the qp is drained during shutdown */ if (rds_conn_up(conn) || rds_conn_connecting(conn)) { + conn->c_drop_source = 65; rds_ib_conn_error(conn, "recv completion " "<%pI4,%pI4,%d> had " "status %u, disconnecting and " diff --git a/net/rds/ib_send.c b/net/rds/ib_send.c index 749e8d88ed99..2a1236cdfb87 100644 --- a/net/rds/ib_send.c +++ b/net/rds/ib_send.c @@ -341,6 +341,7 @@ void rds_ib_send_cqe_handler(struct rds_ib_connection *ic, struct ib_wc *wc) /* We expect errors as the qp is drained during shutdown */ if (wc->status != IB_WC_SUCCESS && rds_conn_up(conn)) { + conn->c_drop_source = 66; rds_ib_conn_error(conn, "send completion <%u.%u.%u.%u,%u.%u.%u.%u,%d> status " "%u vendor_err %u, disconnecting and reconnecting\n", @@ -807,6 +808,7 @@ int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm, prev->s_op = NULL; } + ic->conn->c_drop_source = 67; rds_ib_conn_error(ic->conn, "ib_post_send failed\n"); goto out; } diff --git a/net/rds/iw_cm.c b/net/rds/iw_cm.c index 1d89fe703880..ed0f8078b0bd 100644 --- a/net/rds/iw_cm.c +++ b/net/rds/iw_cm.c @@ -160,6 +160,7 @@ static void rds_iw_qp_event_handler(struct ib_event *event, void *data) "- connection %pI4->%pI4, reconnecting\n", event->event, &conn->c_laddr, &conn->c_faddr); + conn->c_drop_source = 120; rds_conn_drop(conn); break; } @@ -415,6 +416,7 @@ int rds_iw_cm_handle_connect(struct rdma_cm_id *cm_id, if (!rds_conn_transition(conn, RDS_CONN_DOWN, RDS_CONN_CONNECTING)) { if (rds_conn_state(conn) == RDS_CONN_UP) { rdsdebug("incoming connect while connecting\n"); + conn->c_drop_source = 121; rds_conn_drop(conn); rds_iw_stats_inc(s_iw_listen_closed_stale); } else @@ -451,6 +453,7 @@ int rds_iw_cm_handle_connect(struct rdma_cm_id *cm_id, err = rds_iw_setup_qp(conn); if (err) { + conn->c_drop_source = 122; rds_iw_conn_error(conn, "rds_iw_setup_qp failed (%d)\n", err); goto out; } @@ -461,6 +464,7 @@ int rds_iw_cm_handle_connect(struct rdma_cm_id *cm_id, err = rdma_accept(cm_id, &conn_param); mutex_unlock(&conn->c_cm_lock); if (err) { + conn->c_drop_source = 123; rds_iw_conn_error(conn, "rdma_accept failed (%d)\n", err); goto out; } @@ -488,6 +492,7 @@ int rds_iw_cm_initiate_connect(struct rdma_cm_id *cm_id) ret = rds_iw_setup_qp(conn); if (ret) { + conn->c_drop_source = 124; rds_iw_conn_error(conn, "rds_iw_setup_qp failed (%d)\n", ret); goto out; } @@ -495,9 +500,10 @@ int rds_iw_cm_initiate_connect(struct rdma_cm_id *cm_id) rds_iw_cm_fill_conn_param(conn, &conn_param, &dp, RDS_PROTOCOL_VERSION); ret = rdma_connect(cm_id, &conn_param); - if (ret) + if (ret) { + conn->c_drop_source = 125; rds_iw_conn_error(conn, "rdma_connect failed (%d)\n", ret); - + } out: /* Beware - returning non-zero tells the rdma_cm to destroy * the cm_id. We should certainly not do it as long as we still diff --git a/net/rds/iw_recv.c b/net/rds/iw_recv.c index 545fb526f77b..ec21cc5b21eb 100644 --- a/net/rds/iw_recv.c +++ b/net/rds/iw_recv.c @@ -252,6 +252,7 @@ int rds_iw_recv_refill(struct rds_connection *conn, gfp_t kptr_gfp, recv->r_iwinc, recv->r_frag->f_page, (long) recv->r_frag->f_mapped, ret); if (ret) { + conn->c_drop_source = 130; rds_iw_conn_error(conn, "recv post on " "%pI4 returned %d, disconnecting and " "reconnecting\n", &conn->c_faddr, @@ -454,6 +455,7 @@ static void rds_iw_send_ack(struct rds_iw_connection *ic, unsigned int adv_credi rds_iw_stats_inc(s_iw_ack_send_failure); + ic->conn->c_drop_source = 131; rds_iw_conn_error(ic->conn, "sending ack failed\n"); } else rds_iw_stats_inc(s_iw_ack_sent); @@ -644,6 +646,7 @@ static void rds_iw_process_recv(struct rds_connection *conn, byte_len); if (byte_len < sizeof(struct rds_header)) { + conn->c_drop_source = 132; rds_iw_conn_error(conn, "incoming message " "from %pI4 didn't inclue a " "header, disconnecting and " @@ -657,6 +660,7 @@ static void rds_iw_process_recv(struct rds_connection *conn, /* Validate the checksum. */ if (!rds_message_verify_checksum(ihdr)) { + conn->c_drop_source = 133; rds_iw_conn_error(conn, "incoming message " "from %pI4 has corrupted header - " "forcing a reconnect\n", @@ -719,6 +723,7 @@ static void rds_iw_process_recv(struct rds_connection *conn, || hdr->h_len != ihdr->h_len || hdr->h_sport != ihdr->h_sport || hdr->h_dport != ihdr->h_dport) { + conn->c_drop_source = 134; rds_iw_conn_error(conn, "fragment header mismatch; forcing reconnect\n"); return; @@ -803,6 +808,7 @@ static inline void rds_poll_cq(struct rds_iw_connection *ic, if (wc.status == IB_WC_SUCCESS) { rds_iw_process_recv(conn, recv, wc.byte_len, state); } else { + conn->c_drop_source = 135; rds_iw_conn_error(conn, "recv completion on " "%pI4 had status %u, disconnecting and " "reconnecting\n", &conn->c_faddr, diff --git a/net/rds/iw_send.c b/net/rds/iw_send.c index 33106547e4a6..0820c4abc2eb 100644 --- a/net/rds/iw_send.c +++ b/net/rds/iw_send.c @@ -293,6 +293,7 @@ void rds_iw_send_cq_comp_handler(struct ib_cq *cq, void *context) /* We expect errors as the qp is drained during shutdown */ if (wc.status != IB_WC_SUCCESS && rds_conn_up(conn)) { + conn->c_drop_source = 136; rds_iw_conn_error(conn, "send completion on %pI4 " "had status %u, disconnecting and reconnecting\n", diff --git a/net/rds/rdma_transport.c b/net/rds/rdma_transport.c index b5c4834c4385..c5d5011b15ad 100644 --- a/net/rds/rdma_transport.c +++ b/net/rds/rdma_transport.c @@ -166,6 +166,7 @@ int rds_rdma_cm_event_handler(struct rdma_cm_id *cm_id, "ADDR_RESOLVED: ret %d, calling rds_conn_drop <%u.%u.%u.%u,%u.%u.%u.%u,%d>\n", ret, NIPQUAD(conn->c_laddr), NIPQUAD(conn->c_faddr), conn->c_tos); + conn->c_drop_source = 40; rds_conn_drop(conn); ret = 0; } @@ -190,6 +191,7 @@ int rds_rdma_cm_event_handler(struct rdma_cm_id *cm_id, ibic = conn->c_transport_data; if (ibic && ibic->i_cm_id == cm_id) ibic->i_cm_id = NULL; + conn->c_drop_source = 41; rds_conn_drop(conn); } } else if (conn->c_to_index < (RDS_RDMA_RESOLVE_TO_MAX_INDEX-1)) @@ -213,6 +215,7 @@ int rds_rdma_cm_event_handler(struct rdma_cm_id *cm_id, "ROUTE_RESOLVED: calling rds_conn_drop, conn %p <%u.%u.%u.%u,%u.%u.%u.%u,%d>\n", conn, NIPQUAD(conn->c_laddr), NIPQUAD(conn->c_faddr), conn->c_tos); + conn->c_drop_source = 42; rds_conn_drop(conn); } } @@ -256,6 +259,7 @@ int rds_rdma_cm_event_handler(struct rdma_cm_id *cm_id, "ROUTE_ERROR: conn %p, calling rds_conn_drop <%u.%u.%u.%u,%u.%u.%u.%u,%d>\n", conn, NIPQUAD(conn->c_laddr), NIPQUAD(conn->c_faddr), conn->c_tos); + conn->c_drop_source = 43; rds_conn_drop(conn); } break; @@ -270,6 +274,7 @@ int rds_rdma_cm_event_handler(struct rdma_cm_id *cm_id, "ADDR_ERROR: conn %p, calling rds_conn_drop <%u.%u.%u.%u,%u.%u.%u.%u,%d>\n", conn, NIPQUAD(conn->c_laddr), NIPQUAD(conn->c_faddr), conn->c_tos); + conn->c_drop_source = 44; rds_conn_drop(conn); } break; @@ -282,6 +287,7 @@ int rds_rdma_cm_event_handler(struct rdma_cm_id *cm_id, "CONN/UNREACHABLE/RMVAL ERR: conn %p, calling rds_conn_drop <%u.%u.%u.%u,%u.%u.%u.%u,%d>\n", conn, NIPQUAD(conn->c_laddr), NIPQUAD(conn->c_faddr), conn->c_tos); + conn->c_drop_source = 45; rds_conn_drop(conn); } break; @@ -303,6 +309,7 @@ int rds_rdma_cm_event_handler(struct rdma_cm_id *cm_id, if (!conn->c_tos) { conn->c_proposed_version = RDS_PROTOCOL_COMPAT_VERSION; + conn->c_drop_source = 46; rds_conn_drop(conn); } else { if (conn->c_loopback) @@ -321,6 +328,7 @@ int rds_rdma_cm_event_handler(struct rdma_cm_id *cm_id, NIPQUAD(conn->c_laddr), NIPQUAD(conn->c_faddr), conn->c_tos); + conn->c_drop_source = 47; rds_conn_drop(conn); } } @@ -337,6 +345,7 @@ int rds_rdma_cm_event_handler(struct rdma_cm_id *cm_id, "ADDR_CHANGE: calling rds_conn_drop <%u.%u.%u.%u,%u.%u.%u.%u,%d>\n", NIPQUAD(conn->c_laddr), NIPQUAD(conn->c_faddr), conn->c_tos); + conn->c_drop_source = 48; rds_conn_drop(conn); } #else @@ -345,6 +354,7 @@ int rds_rdma_cm_event_handler(struct rdma_cm_id *cm_id, "ADDR_CHANGE: calling rds_conn_drop <%u.%u.%u.%u,%u.%u.%u.%u,%d>\n", NIPQUAD(conn->c_laddr), NIPQUAD(conn->c_faddr), conn->c_tos); + conn->c_drop_source = 48; rds_conn_drop(conn); } #endif @@ -354,6 +364,7 @@ int rds_rdma_cm_event_handler(struct rdma_cm_id *cm_id, rds_rtd(RDS_RTD_CM, "DISCONNECT event - dropping connection %pI4->%pI4 tos %d\n", &conn->c_laddr, &conn->c_faddr, conn->c_tos); + conn->c_drop_source = 49; rds_conn_drop(conn); break; @@ -363,6 +374,7 @@ int rds_rdma_cm_event_handler(struct rdma_cm_id *cm_id, "dropping connection " "%pI4->%pI4\n", &conn->c_laddr, &conn->c_faddr); + conn->c_drop_source = 50; rds_conn_drop(conn); } else printk(KERN_INFO "TIMEWAIT_EXIT event - conn=NULL\n"); diff --git a/net/rds/rds.h b/net/rds/rds.h index a26152110da4..f01c043eb07d 100644 --- a/net/rds/rds.h +++ b/net/rds/rds.h @@ -175,6 +175,7 @@ struct rds_connection { unsigned int c_reconnect_racing; unsigned int c_route_resolved; + u8 c_drop_source; }; static inline @@ -732,6 +733,7 @@ void rds_for_each_conn_info(struct socket *sock, unsigned int len, struct rds_info_lengths *lens, int (*visitor)(struct rds_connection *, void *), size_t item_len); +char *conn_drop_reason_str(u8 reason); void __rds_conn_error(struct rds_connection *conn, const char *, ...) __attribute__ ((format (printf, 2, 3))); #define rds_conn_error(conn, fmt...) \ diff --git a/net/rds/tcp_connect.c b/net/rds/tcp_connect.c index 262d996d05f5..acaf420d3e7a 100644 --- a/net/rds/tcp_connect.c +++ b/net/rds/tcp_connect.c @@ -64,6 +64,7 @@ void rds_tcp_state_change(struct sock *sk) break; case TCP_CLOSE_WAIT: case TCP_CLOSE: + conn->c_drop_source = 140; rds_conn_drop(conn); default: break; diff --git a/net/rds/tcp_send.c b/net/rds/tcp_send.c index e9cffe0220f7..6e8e5a7b42af 100644 --- a/net/rds/tcp_send.c +++ b/net/rds/tcp_send.c @@ -152,6 +152,7 @@ out: printk(KERN_WARNING "RDS/tcp: send to %u.%u.%u.%u " "returned %d, disconnecting and reconnecting\n", NIPQUAD(conn->c_faddr), ret); + conn->c_drop_source = 141; rds_conn_drop(conn); } } diff --git a/net/rds/threads.c b/net/rds/threads.c index 015598b1aa85..0de85acd49c9 100644 --- a/net/rds/threads.c +++ b/net/rds/threads.c @@ -179,6 +179,7 @@ void rds_connect_worker(struct work_struct *work) * drop the connection if it doesn't work out after a while */ conn->c_connection_start = get_seconds(); + conn->c_drop_source = 0; ret = conn->c_trans->conn_connect(conn); rds_rtd(RDS_RTD_CM_EXT, @@ -190,8 +191,10 @@ void rds_connect_worker(struct work_struct *work) rds_rtd(RDS_RTD_CM_EXT, "reconnecting..., conn %p\n", conn); rds_queue_reconnect(conn); - } else + } else { + conn->c_drop_source = 6; rds_conn_error(conn, "RDS: connect failed\n"); + } } } else { rds_rtd(RDS_RTD_CM, @@ -280,7 +283,8 @@ void rds_hb_worker(struct work_struct *work) "RDS/IB: connection <%u.%u.%u.%u,%u.%u.%u.%u,%d> timed out (0x%lx,0x%lx)..discon and recon\n", NIPQUAD(conn->c_laddr), NIPQUAD(conn->c_faddr), conn->c_tos, conn->c_hb_start, now); - rds_conn_drop(conn); + conn->c_drop_source = 7; + rds_conn_drop(conn); return; } queue_delayed_work(rds_wq, &conn->c_hb_w, HZ); @@ -300,6 +304,7 @@ void rds_reconnect_timeout(struct work_struct *work) "conn not up, calling rds_conn_drop <%u.%u.%u.%u,%u.%u.%u.%u,%d>\n", NIPQUAD(conn->c_laddr), NIPQUAD(conn->c_faddr), conn->c_tos); + conn->c_drop_source = 8; rds_conn_drop(conn); conn->c_reconnect_racing = 0; }