From: Qing Huang Date: Wed, 15 Jul 2015 01:36:43 +0000 (-0700) Subject: ib/rds: runtime debuggability enhancement X-Git-Tag: v4.1.12-92~261^2^2^2~1 X-Git-Url: https://www.infradead.org/git/?a=commitdiff_plain;h=df7707e8c14e29a96e076fb08eabc7d9a9f39194;p=users%2Fjedix%2Flinux-maple.git ib/rds: runtime debuggability enhancement This patch provides the ability to dynamically turn on or off various types of debug/diag prints inside the RDS module. The run-time debug prints are controlled by a rds module parameter, rds_rt_debug_bitmap. Here is the definition for different bits. We have implemented feature related bits, such as Connection Management, Active Bonding, Error prints, Send, Recv. in net/rds/rds_rt_debug.h ... enum { /* bit 0 ~ 19 are feature related bits */ RDS_RTD_ERR = 1 << 0, /* 0x1 */ RDS_RTD_ERR_EXT = 1 << 1, /* 0x2 */ RDS_RTD_CM = 1 << 3, /* 0x8 */ RDS_RTD_CM_EXT = 1 << 4, /* 0x10 */ RDS_RTD_CM_EXT_P = 1 << 5, /* 0x20 */ RDS_RTD_ACT_BND = 1 << 7, /* 0x80 */ RDS_RTD_ACT_BND_EXT = 1 << 8, /* 0x100 */ RDS_RTD_RCV = 1 << 11, /* 0x800 */ RDS_RTD_RCV_EXT = 1 << 12, /* 0x1000 */ RDS_RTD_SND = 1 << 14, /* 0x4000 */ RDS_RTD_SND_EXT = 1 << 15, /* 0x8000 */ ... In general, *EXTRA bits mean that you will get extra information but possible flood prints as well. But every bit can be controlled by users so users can decide how much information they want to see/collect. The current embedded printk level used for this patch is KERN_INFO. Most likely all the msgs will only go to /var/log/messages without showing up on console if we use the default settings for /proc/sys/kernel/printk and /etc/rsyslog.conf in ol6 environment. E.g if we want to turn on RDS_RTD_ERR and RDS_RTD_CM bits. What we can do is echo 0x9 > /sys/module/rds/parameters/rds_rt_debug_bitmap To turn on RDS_RTD_ERR(0x1), RDS_RTD_CM(0x8), and RDS_RTD_RCV(0x800) bits echo 0x809 > /sys/module/rds/parameters/rds_rt_debug_bitmap Performance penalty: with all the debug flag bits set to 0, there should be no porformance impact in a kernel with this patch. Orabug 21314268 Reviewed-by: Ajaykumar Hotchandani Reviewed-by: Shamir Rabinovitch Reviewed-by: Yuval Shaia Signed-off-by: Qing Huang Signed-off-by: Mukesh Kacker --- diff --git a/net/rds/Makefile b/net/rds/Makefile index 56d3f6023ced..60fb4029e0a5 100644 --- a/net/rds/Makefile +++ b/net/rds/Makefile @@ -1,7 +1,7 @@ obj-$(CONFIG_RDS) += rds.o rds-y := af_rds.o bind.o cong.o connection.o info.o message.o \ recv.o send.o stats.o sysctl.o threads.o transport.o \ - loop.o page.o rdma.o + loop.o page.o rdma.o rds_rt_debug.o obj-$(CONFIG_RDS_RDMA) += rds_rdma.o rds_rdma-y := rdma_transport.o \ diff --git a/net/rds/connection.c b/net/rds/connection.c index 0dd0cef5ca8d..4538d90e2083 100644 --- a/net/rds/connection.c +++ b/net/rds/connection.c @@ -317,6 +317,10 @@ void rds_conn_shutdown(struct rds_connection *conn, int restart) { /* shut it down unless it's down already */ if (!rds_conn_transition(conn, RDS_CONN_DOWN, RDS_CONN_DOWN)) { + rds_rtd(RDS_RTD_CM_EXT, + "RDS/IB: shutdown init <%u.%u.%u.%u,%u.%u.%u.%u,%d>, cn %p, cn->c_p %p\n", + NIPQUAD(conn->c_laddr), NIPQUAD(conn->c_faddr), + conn->c_tos, conn, conn->c_passive); /* * Quiesce the connection mgmt handlers before we start tearing * things down. We don't hold the mutex for the entire @@ -365,6 +369,10 @@ void rds_conn_shutdown(struct rds_connection *conn, int restart) rcu_read_lock(); if (!hlist_unhashed(&conn->c_hash_node) && restart) { rcu_read_unlock(); + rds_rtd(RDS_RTD_CM_EXT, + "queueing reconnect request... <%u.%u.%u.%u,%u.%u.%u.%u,%d>\n", + NIPQUAD(conn->c_laddr), NIPQUAD(conn->c_faddr), + conn->c_tos); rds_queue_reconnect(conn); } else { rcu_read_unlock(); @@ -383,9 +391,9 @@ void rds_conn_destroy(struct rds_connection *conn) struct rds_message *rm, *rtmp; unsigned long flags; - rdsdebug("freeing conn %p for %pI4 -> " - "%pI4\n", conn, &conn->c_laddr, - &conn->c_faddr); + rds_rtd(RDS_RTD_CM, "freeing conn %p <%u.%u.%u.%u,%u.%u.%u.%u,%d>\n", + conn, NIPQUAD(conn->c_laddr), NIPQUAD(conn->c_faddr), + conn->c_tos); /* Ensure conn will not be scheduled for reconnect */ spin_lock_irq(&rds_conn_lock); @@ -676,6 +684,11 @@ void rds_conn_drop(struct rds_connection *conn) atomic_set(&conn->c_state, RDS_CONN_ERROR); + rds_rtd(RDS_RTD_CM_EXT, + "RDS/IB: queueing shutdown work, conn %p, <%u.%u.%u.%u,%u.%u.%u.%u,%d>\n", + conn, NIPQUAD(conn->c_laddr), NIPQUAD(conn->c_faddr), + conn->c_tos); + if (conn->c_loopback) queue_work(rds_local_wq, &conn->c_down_w); else @@ -692,6 +705,10 @@ void rds_conn_connect_if_down(struct rds_connection *conn) { if (rds_conn_state(conn) == RDS_CONN_DOWN && !test_and_set_bit(RDS_RECONNECT_PENDING, &conn->c_flags)) { + rds_rtd(RDS_RTD_CM_EXT, + "queueing connect work, conn %p, <%u.%u.%u.%u,%u.%u.%u.%u,%d>\n", + conn, NIPQUAD(conn->c_laddr), NIPQUAD(conn->c_faddr), + conn->c_tos); if (conn->c_loopback) queue_delayed_work(rds_local_wq, &conn->c_conn_w, 0); else diff --git a/net/rds/ib.c b/net/rds/ib.c index 9291e86d27f5..377d516b2c91 100644 --- a/net/rds/ib.c +++ b/net/rds/ib.c @@ -180,6 +180,8 @@ void rds_ib_nodev_connect(void) { struct rds_ib_connection *ic; + rds_rtd(RDS_RTD_CM_EXT, "check & build all connections\n"); + spin_lock(&ib_nodev_conns_lock); list_for_each_entry(ic, &ib_nodev_conns, ib_node) rds_conn_connect_if_down(ic->conn); @@ -191,6 +193,9 @@ void rds_ib_dev_shutdown(struct rds_ib_device *rds_ibdev) struct rds_ib_connection *ic; unsigned long flags; + rds_rtd(RDS_RTD_CM_EXT, + "calling rds_conn_drop to drop all connections.\n"); + spin_lock_irqsave(&rds_ibdev->spinlock, flags); list_for_each_entry(ic, &rds_ibdev->conn_list, ib_node) rds_conn_drop(ic->conn); @@ -296,8 +301,11 @@ void rds_ib_remove_one(struct ib_device *device) int i; rds_ibdev = ib_get_client_data(device, &rds_ib_client); - if (!rds_ibdev) + if (!rds_ibdev) { + rds_rtd(RDS_RTD_ACT_BND, "rds_ibdev is NULL, ib_device %p\n", + device); return; + } if (rds_ib_active_bonding_enabled) { for (i = 1; i <= ip_port_cnt; i++) { @@ -307,6 +315,9 @@ void rds_ib_remove_one(struct ib_device *device) ib_unregister_event_handler(&rds_ibdev->event_handler); } + rds_rtd(RDS_RTD_ACT_BND, + "calling rds_ib_dev_shutdown, ib_device %p, rds_ibdev %p\n", + device, rds_ibdev); rds_ib_dev_shutdown(rds_ibdev); /* stop connection attempts from getting a reference to this device. */ @@ -611,6 +622,9 @@ static void rds_ib_conn_drop(struct work_struct *_work) container_of(_work, struct rds_ib_conn_drop_work, work.work); struct rds_connection *conn = work->conn; + rds_rtd(RDS_RTD_CM_EXT, + "conn: %p, calling rds_conn_drop\n", conn); + rds_conn_drop(conn); kfree(work); @@ -838,6 +852,10 @@ static int rds_ib_move_ip(char *from_dev, ic->conn->c_faddr && ic2->conn->c_faddr == ic->conn->c_laddr) { + rds_rtd(RDS_RTD_CM_EXT_P, + "conn:%p, tos %d, calling rds_conn_drop\n", + ic2->conn, + ic2->conn->c_tos); rds_conn_drop(ic2->conn); } } @@ -862,8 +880,12 @@ static int rds_ib_move_ip(char *from_dev, INIT_DELAYED_WORK(&work->work, rds_ib_conn_drop); queue_delayed_work(rds_aux_wq, &work->work, msecs_to_jiffies(1000 * rds_ib_active_bonding_reconnect_delay)); - } else + } else { + rds_rtd(RDS_RTD_CM_EXT, + "conn: %p, tos %d, calling rds_conn_drop\n", + ic->conn, ic->conn->c_tos); rds_conn_drop(ic->conn); + } } } spin_unlock_bh(&rds_ibdev->spinlock); @@ -1090,11 +1112,10 @@ static void rds_ib_do_failover(u8 from_port, u8 to_port, u8 arp_port, if (!to_port) { /* we tried, but did not get a failover port! */ - rdsdebug("RDS/IB: IP %u.%u.%u.%u failed to " - "migrate from %s: no matching " - "destination port available!\n", - NIPQUAD(ip_config[from_port].ip_addr), - ip_config[from_port].if_name); + rds_rtd(RDS_RTD_ERR, + "RDS/IB: IP %u.%u.%u.%u failed to migrate from %s: no matching dest port avail!\n", + NIPQUAD(ip_config[from_port].ip_addr), + ip_config[from_port].if_name); return; } } else { @@ -1216,9 +1237,9 @@ static void rds_ib_do_failback(u8 port, int event_type) /* Test IP addresses and set them if not already set */ ret = rds_ib_testset_ip(port); if (ret) { - rdsdebug("RDS/IB: failed to ressurrect " - "port index %u devname %s or one if its aliases\n", - port, ip_config[port].dev->name); + rds_rtd(RDS_RTD_ACT_BND, + "RDS/IB: failed to ressrt port idx %u dev %s or one of its aliases\n", + port, ip_config[port].dev->name); } } } @@ -1336,8 +1357,11 @@ static void rds_ib_event_handler(struct ib_event_handler *handler, u8 port; struct rds_ib_port_ud_work *work; - if (!rds_ib_active_bonding_enabled || !ip_port_cnt) + if (!rds_ib_active_bonding_enabled || !ip_port_cnt) { + rds_rtd(RDS_RTD_ACT_BND, "ip_port_cnt %d, event %d\n", + ip_port_cnt, event->event); return; + } if (event->event != IB_EVENT_PORT_ACTIVE && event->event != IB_EVENT_PORT_ERR) @@ -1526,6 +1550,8 @@ static void rds_ib_event_handler(struct ib_event_handler *handler, if (this_port_transition == RDSIBP_TRANSITION_UP) { if (rds_ib_active_bonding_fallback) { + rds_rtd(RDS_RTD_ACT_BND, + "active bonding fallback enabled\n"); INIT_DELAYED_WORK(&work->work, rds_ib_failback); queue_delayed_work(rds_wq, &work->work, 0); } else @@ -1658,8 +1684,10 @@ static void rds_ib_dump_ip_config(void) { int i, j; - if (!rds_ib_active_bonding_enabled || !ip_port_cnt) + if (!rds_ib_active_bonding_enabled || !ip_port_cnt) { + rds_rtd(RDS_RTD_ACT_BND, "ip_port_cnt %d\n", ip_port_cnt); return; + } for (i = 1; i <= ip_port_cnt; i++) { printk(KERN_INFO "RDS/IB: %s/port_%d/%s: " @@ -1833,8 +1861,10 @@ static int rds_ib_ip_config_init(void) unsigned int tot_devs = 0; unsigned int tot_ibdevs = 0; - if (!rds_ib_active_bonding_enabled) + if (!rds_ib_active_bonding_enabled) { + rds_rtd(RDS_RTD_ACT_BND, "active bonding not enabled\n"); return 0; + } ip_config = kzalloc(sizeof(struct rds_ib_port) * (ip_port_max + 1), GFP_KERNEL); @@ -2021,8 +2051,10 @@ void rds_ib_ip_failover_groups_init(void) int i; struct rds_ib_device *rds_ibdev; - if (!rds_ib_active_bonding_enabled) + if (!rds_ib_active_bonding_enabled) { + rds_rtd(RDS_RTD_ACT_BND, "active bonding not enabled\n"); return; + } if (rds_ib_active_bonding_failover_groups == NULL) { list_for_each_entry_rcu(rds_ibdev, &rds_ib_devices, list) { @@ -2092,7 +2124,8 @@ void rds_ib_add_one(struct ib_device *device) return; if (ib_query_device(device, dev_attr)) { - rdsdebug("Query device failed for %s\n", device->name); + rds_rtd(RDS_RTD_ERR, "Query device failed for %s\n", + device->name); goto free_attr; } @@ -2319,8 +2352,10 @@ static int rds_ib_netdev_callback(struct notifier_block *self, unsigned long eve struct rds_ib_port_ud_work *work = NULL; int port_transition = RDSIBP_TRANSITION_NOOP; - if (!rds_ib_active_bonding_enabled) + if (!rds_ib_active_bonding_enabled) { + rds_rtd(RDS_RTD_ACT_BND, "active bonding not enabled\n"); return NOTIFY_DONE; + } if (event != NETDEV_UP && event != NETDEV_DOWN && @@ -2567,6 +2602,8 @@ static int rds_ib_netdev_callback(struct notifier_block *self, unsigned long eve switch (port_transition) { case RDSIBP_TRANSITION_UP: if (rds_ib_active_bonding_fallback) { + rds_rtd(RDS_RTD_ACT_BND, + "active bonding fallback enabled\n"); INIT_DELAYED_WORK(&work->work, rds_ib_failback); queue_delayed_work(rds_wq, &work->work, 0); } else diff --git a/net/rds/ib_cm.c b/net/rds/ib_cm.c index a1bbf0467cd9..153d60344730 100644 --- a/net/rds/ib_cm.c +++ b/net/rds/ib_cm.c @@ -164,7 +164,8 @@ void rds_ib_cm_connect_complete(struct rds_connection *conn, struct rdma_cm_even } printk(KERN_NOTICE - "RDS/IB: connected <%u.%u.%u.%u,%u.%u.%u.%u,%d> version %u.%u%s\n", + "RDS/IB: conn %p i_cm_id %p connected <%u.%u.%u.%u,%u.%u.%u.%u,%d> version %u.%u%s\n", + conn, ic->i_cm_id, NIPQUAD(conn->c_laddr), NIPQUAD(conn->c_faddr), conn->c_tos, @@ -174,6 +175,9 @@ void rds_ib_cm_connect_complete(struct rds_connection *conn, struct rdma_cm_even /* The connection might have been dropped under us*/ if (!ic->i_cm_id) { + rds_rtd(RDS_RTD_CM, + "ic->i_cm_id is NULL, ic: %p, calling rds_conn_drop\n", + ic); rds_conn_drop(conn); return; } @@ -181,6 +185,9 @@ void rds_ib_cm_connect_complete(struct rds_connection *conn, struct rdma_cm_even /* Drop connection if connection state is not CONNECTING. Potentially connection drop from some other place like rds_conn_probe_lanes() */ if (!rds_conn_connecting(conn)) { + rds_rtd(RDS_RTD_CM, + "conn is in connecting state, conn: %p, calling rds_conn_drop\n", + conn); rds_conn_drop(conn); return; } @@ -517,13 +524,13 @@ static void rds_ib_qp_event_handler(struct ib_event *event, void *data) break; case IB_EVENT_PATH_MIG_ERR: - rdsdebug("RDS: Path migration error\n"); + rds_rtd(RDS_RTD_ERR, "RDS: Path migration error\n"); break; default: - rdsdebug("Fatal QP Event %u (%s) " - "- connection %pI4->%pI4, reconnecting\n", + rds_rtd(RDS_RTD_ERR, + "Fatal QP Event %u (%s) - connection %pI4->%pI4 tos %d, reconnecting\n", event->event, rds_ib_event_str(event->event), - &conn->c_laddr, &conn->c_faddr); + &conn->c_laddr, &conn->c_faddr, conn->c_tos); rds_conn_drop(conn); break; } @@ -658,7 +665,7 @@ static int rds_ib_setup_qp(struct rds_connection *conn) */ ret = rdma_create_qp(ic->i_cm_id, ic->i_pd, &attr); if (ret) { - rdsdebug("rdma_create_qp failed: %d\n", ret); + rds_rtd(RDS_RTD_ERR, "rdma_create_qp failed: %d\n", ret); goto out; } @@ -668,7 +675,7 @@ static int rds_ib_setup_qp(struct rds_connection *conn) &ic->i_send_hdrs_dma, GFP_KERNEL); if (!ic->i_send_hdrs) { ret = -ENOMEM; - rdsdebug("ib_dma_alloc_coherent send failed\n"); + rds_rtd(RDS_RTD_ERR, "ib_dma_alloc_coherent send failed\n"); goto out; } @@ -679,7 +686,8 @@ static int rds_ib_setup_qp(struct rds_connection *conn) &ic->i_recv_hdrs_dma, GFP_KERNEL); if (!ic->i_recv_hdrs) { ret = -ENOMEM; - rdsdebug("ib_dma_alloc_coherent recv failed\n"); + rds_rtd(RDS_RTD_ERR, + "ib_dma_alloc_coherent recv failed\n"); goto out; } } @@ -688,7 +696,7 @@ static int rds_ib_setup_qp(struct rds_connection *conn) &ic->i_ack_dma, GFP_KERNEL); if (!ic->i_ack) { ret = -ENOMEM; - rdsdebug("ib_dma_alloc_coherent ack failed\n"); + rds_rtd(RDS_RTD_ERR, "ib_dma_alloc_coherent ack failed\n"); goto out; } @@ -696,7 +704,7 @@ static int rds_ib_setup_qp(struct rds_connection *conn) ibdev_to_node(dev)); if (!ic->i_sends) { ret = -ENOMEM; - rdsdebug("send allocation failed\n"); + rds_rtd(RDS_RTD_ERR, "send allocation failed\n"); goto out; } memset(ic->i_sends, 0, ic->i_send_ring.w_nr * sizeof(struct rds_ib_send_work)); @@ -706,7 +714,7 @@ static int rds_ib_setup_qp(struct rds_connection *conn) sizeof(struct rds_ib_recv_work)); if (!ic->i_recvs) { ret = -ENOMEM; - rdsdebug("recv allocation failed\n"); + rds_rtd(RDS_RTD_ERR, "recv allocation failed\n"); goto out; } memset(ic->i_recvs, 0, ic->i_recv_ring.w_nr * sizeof(struct rds_ib_recv_work)); @@ -786,17 +794,21 @@ int rds_ib_cm_handle_connect(struct rdma_cm_id *cm_id, if (!version) goto out; - rdsdebug("saddr %pI4 daddr %pI4 RDSv%u.%u lguid 0x%llx fguid " - "0x%llx\n", &dp->dp_saddr, &dp->dp_daddr, - RDS_PROTOCOL_MAJOR(version), RDS_PROTOCOL_MINOR(version), - (unsigned long long)be64_to_cpu(lguid), - (unsigned long long)be64_to_cpu(fguid)); + rds_rtd(RDS_RTD_CM, + "saddr %pI4 daddr %pI4 RDSv%u.%u lguid 0x%llx fguid 0x%llx tos %d\n", + &dp->dp_saddr, &dp->dp_daddr, + RDS_PROTOCOL_MAJOR(version), + RDS_PROTOCOL_MINOR(version), + (unsigned long long)be64_to_cpu(lguid), + (unsigned long long)be64_to_cpu(fguid), + dp->dp_tos); /* RDS/IB is not currently netns aware, thus init_net */ conn = rds_conn_create(&init_net, dp->dp_daddr, dp->dp_saddr, &rds_ib_transport, dp->dp_tos, GFP_KERNEL); if (IS_ERR(conn)) { - rdsdebug("rds_conn_create failed (%ld)\n", PTR_ERR(conn)); + rds_rtd(RDS_RTD_ERR, "rds_conn_create failed (%ld)\n", + PTR_ERR(conn)); conn = NULL; goto out; } @@ -828,7 +840,8 @@ int rds_ib_cm_handle_connect(struct rdma_cm_id *cm_id, if (ic && ic->i_cm_id == cm_id) destroy = 0; if (rds_conn_state(conn) == RDS_CONN_UP) { - rdsdebug("incoming connect while connecting\n"); + rds_rtd(RDS_RTD_CM_EXT_P, + "incoming connect while connecting\n"); rds_conn_drop(conn); rds_ib_stats_inc(s_ib_listen_closed_stale); } else if (rds_conn_state(conn) == RDS_CONN_CONNECTING) { @@ -1079,11 +1092,14 @@ int rds_ib_conn_connect(struct rds_connection *conn) if (IS_ERR(ic->i_cm_id)) { ret = PTR_ERR(ic->i_cm_id); ic->i_cm_id = NULL; - rdsdebug("rdma_create_id() failed: %d\n", ret); + rds_rtd(RDS_RTD_ERR, "rdma_create_id() failed: %d\n", ret); goto out; } - rdsdebug("created cm id %p for conn %p\n", ic->i_cm_id, conn); + rds_rtd(RDS_RTD_CM_EXT, + "RDS/IB: conn init <%u.%u.%u.%u,%u.%u.%u.%u,%d> cm_id %p\n", + NIPQUAD(conn->c_laddr), NIPQUAD(conn->c_faddr), + conn->c_tos, ic->i_cm_id); src.sin_family = AF_INET; src.sin_addr.s_addr = (__force u32)conn->c_laddr; @@ -1097,8 +1113,8 @@ int rds_ib_conn_connect(struct rds_connection *conn) (struct sockaddr *)&dest, RDS_RDMA_RESOLVE_TIMEOUT_MS); if (ret) { - rdsdebug("addr resolve failed for cm id %p: %d\n", ic->i_cm_id, - ret); + rds_rtd(RDS_RTD_ERR, "addr resolve failed for cm id %p: %d\n", + ic->i_cm_id, ret); rdma_destroy_id(ic->i_cm_id); ic->i_cm_id = NULL; @@ -1130,7 +1146,8 @@ void rds_ib_conn_shutdown(struct rds_connection *conn) /* Actually this may happen quite frequently, when * an outgoing connect raced with an incoming connect. */ - rdsdebug("failed to disconnect, cm: %p err %d\n", + rds_rtd(RDS_RTD_CM_EXT_P, + "failed to disconnect, cm: %p err %d\n", ic->i_cm_id, err); } else if (rds_ib_srq_enabled && ic->rds_ibdev) { /* diff --git a/net/rds/rdma_transport.c b/net/rds/rdma_transport.c index 42dad98f6cbe..bd2cfba0765e 100644 --- a/net/rds/rdma_transport.c +++ b/net/rds/rdma_transport.c @@ -128,6 +128,10 @@ int rds_rdma_cm_event_handler(struct rdma_cm_id *cm_id, mutex_unlock(&conn->c_base_conn->c_cm_lock); if (ret) { + rds_rtd(RDS_RTD_CM, + "ADDR_RESOLVED: ret %d, calling rds_conn_drop <%u.%u.%u.%u,%u.%u.%u.%u,%d>\n", + ret, NIPQUAD(conn->c_laddr), + NIPQUAD(conn->c_faddr), conn->c_tos); rds_conn_drop(conn); ret = 0; } @@ -170,8 +174,13 @@ int rds_rdma_cm_event_handler(struct rdma_cm_id *cm_id, ibic = conn->c_transport_data; if (ibic && ibic->i_cm_id == cm_id) ret = trans->cm_initiate_connect(cm_id); - else + else { + rds_rtd(RDS_RTD_CM, + "ROUTE_RESOLVED: calling rds_conn_drop, conn %p <%u.%u.%u.%u,%u.%u.%u.%u,%d>\n", + conn, NIPQUAD(conn->c_laddr), + NIPQUAD(conn->c_faddr), conn->c_tos); rds_conn_drop(conn); + } } break; @@ -208,7 +217,13 @@ int rds_rdma_cm_event_handler(struct rdma_cm_id *cm_id, __free_page(page); } - rds_conn_drop(conn); + if (conn) { + rds_rtd(RDS_RTD_ERR, + "ROUTE_ERROR: conn %p, calling rds_conn_drop <%u.%u.%u.%u,%u.%u.%u.%u,%d>\n", + conn, NIPQUAD(conn->c_laddr), + NIPQUAD(conn->c_faddr), conn->c_tos); + rds_conn_drop(conn); + } break; case RDMA_CM_EVENT_ESTABLISHED: @@ -216,15 +231,25 @@ int rds_rdma_cm_event_handler(struct rdma_cm_id *cm_id, break; case RDMA_CM_EVENT_ADDR_ERROR: - if (conn) + if (conn) { + rds_rtd(RDS_RTD_ERR, + "ADDR_ERROR: conn %p, calling rds_conn_drop <%u.%u.%u.%u,%u.%u.%u.%u,%d>\n", + conn, NIPQUAD(conn->c_laddr), + NIPQUAD(conn->c_faddr), conn->c_tos); rds_conn_drop(conn); + } break; case RDMA_CM_EVENT_CONNECT_ERROR: case RDMA_CM_EVENT_UNREACHABLE: case RDMA_CM_EVENT_DEVICE_REMOVAL: - if (conn) + if (conn) { + rds_rtd(RDS_RTD_ERR, + "CONN/UNREACHABLE/RMVAL ERR: conn %p, calling rds_conn_drop <%u.%u.%u.%u,%u.%u.%u.%u,%d>\n", + conn, NIPQUAD(conn->c_laddr), + NIPQUAD(conn->c_faddr), conn->c_tos); rds_conn_drop(conn); + } break; case RDMA_CM_EVENT_REJECTED: @@ -238,6 +263,9 @@ int rds_rdma_cm_event_handler(struct rdma_cm_id *cm_id, if (event->status == RDS_REJ_CONSUMER_DEFINED && (*err) == 0) { /* Rejection from RDSV3.1 */ + pr_warn("Rejected: CSR_DEF err 0, calling rds_conn_drop <%u.%u.%u.%u,%u.%u.%u.%u,%d>\n", + NIPQUAD(conn->c_laddr), + NIPQUAD(conn->c_faddr), conn->c_tos); if (!conn->c_tos) { conn->c_proposed_version = RDS_PROTOCOL_COMPAT_VERSION; @@ -252,27 +280,46 @@ int rds_rdma_cm_event_handler(struct rdma_cm_id *cm_id, &conn->c_reject_w, msecs_to_jiffies(10)); } - } else + } else { + rds_rtd(RDS_RTD_ERR, + "Rejected: *err %d status %d calling rds_conn_drop <%u.%u.%u.%u,%u.%u.%u.%u,%d>\n", + *err, event->status, + NIPQUAD(conn->c_laddr), + NIPQUAD(conn->c_faddr), + conn->c_tos); rds_conn_drop(conn); + } } break; case RDMA_CM_EVENT_ADDR_CHANGE: - rdsdebug("ADDR_CHANGE event <%u.%u.%u.%u,%u.%u.%u.%u>\n", - NIPQUAD(conn->c_laddr), NIPQUAD(conn->c_faddr)); + rds_rtd(RDS_RTD_CM_EXT, + "ADDR_CHANGE event <%u.%u.%u.%u,%u.%u.%u.%u>\n", + NIPQUAD(conn->c_laddr), + NIPQUAD(conn->c_faddr)); #if RDMA_RDS_APM_SUPPORTED - if (conn && !rds_ib_apm_enabled) + if (conn && !rds_ib_apm_enabled) { + rds_rtd(RDS_RTD_CM, + "ADDR_CHANGE: calling rds_conn_drop <%u.%u.%u.%u,%u.%u.%u.%u,%d>\n", + NIPQUAD(conn->c_laddr), NIPQUAD(conn->c_faddr), + conn->c_tos); rds_conn_drop(conn); + } #else - if (conn) + if (conn) { + rds_rtd(RDS_RTD_CM, + "ADDR_CHANGE: calling rds_conn_drop <%u.%u.%u.%u,%u.%u.%u.%u,%d>\n", + NIPQUAD(conn->c_laddr), NIPQUAD(conn->c_faddr), + conn->c_tos); rds_conn_drop(conn); + } #endif break; case RDMA_CM_EVENT_DISCONNECTED: - rdsdebug("DISCONNECT event - dropping connection " - "%pI4->%pI4\n", &conn->c_laddr, - &conn->c_faddr); + rds_rtd(RDS_RTD_CM, + "DISCONNECT event - dropping connection %pI4->%pI4 tos %d\n", + &conn->c_laddr, &conn->c_faddr, conn->c_tos); rds_conn_drop(conn); break; diff --git a/net/rds/rds.h b/net/rds/rds.h index be8306c35b37..348324f89cd0 100644 --- a/net/rds/rds.h +++ b/net/rds/rds.h @@ -11,6 +11,7 @@ #include #include "info.h" +#include "rds_rt_debug.h" /* * RDS Network protocol version diff --git a/net/rds/rds_rt_debug.c b/net/rds/rds_rt_debug.c new file mode 100644 index 000000000000..8012c8079b8d --- /dev/null +++ b/net/rds/rds_rt_debug.c @@ -0,0 +1,43 @@ +/* + * Copyright (c) 2015 Oracle. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ +#include + +#include "rds.h" +#include "rds_rt_debug.h" + +u32 kernel_rds_rt_debug_bitmap; +EXPORT_SYMBOL(kernel_rds_rt_debug_bitmap); + +module_param_named(rds_rt_debug_bitmap, kernel_rds_rt_debug_bitmap, uint, 0644); +MODULE_PARM_DESC(rds_rt_debug_bitmap, "RDS Runtime Debug Message Enabling Bitmap [default 0]"); + diff --git a/net/rds/rds_rt_debug.h b/net/rds/rds_rt_debug.h new file mode 100644 index 000000000000..80afccd6bdd7 --- /dev/null +++ b/net/rds/rds_rt_debug.h @@ -0,0 +1,42 @@ +#ifndef _RDS_RT_DEBUG_H +#define _RDS_RT_DEBUG_H + +extern u32 kernel_rds_rt_debug_bitmap; + +enum { + /* bit 0 ~ 19 are feature related bits */ + RDS_RTD_ERR = 1 << 0, /* 0x1 */ + RDS_RTD_ERR_EXT = 1 << 1, /* 0x2 */ + + RDS_RTD_CM = 1 << 3, /* 0x8 */ + RDS_RTD_CM_EXT = 1 << 4, /* 0x10 */ + RDS_RTD_CM_EXT_P = 1 << 5, /* 0x20 */ + + RDS_RTD_ACT_BND = 1 << 7, /* 0x80 */ + RDS_RTD_ACT_BND_EXT = 1 << 8, /* 0x100 */ + + RDS_RTD_RCV = 1 << 11, /* 0x800 */ + RDS_RTD_RCV_EXT = 1 << 12, /* 0x1000 */ + + RDS_RTD_SND = 1 << 14, /* 0x4000 */ + RDS_RTD_SND_EXT = 1 << 15, /* 0x8000 */ + + /* bit 20 ~ 31 are module specific bits */ + RDS_RTD_CORE = 1 << 20, /* 0x100000 */ + RDS_RTD_RDMA_IB = 1 << 23, /* 0x800000 */ + + /* the following are placeholders for now */ + RDS_RTD_RDMA_IW = 1 << 26, /* 0x4000000 */ + RDS_RTD_TCP = 1 << 28, /* 0x10000000 */ +}; + +#define rds_rtd_printk(level, format, arg...) \ + printk(level "RDS_RTD %s:%d: " format, __func__, __LINE__, ## arg) + +#define rds_rtd(enabling_bit, format, arg...) \ + do { if (likely(!(enabling_bit & kernel_rds_rt_debug_bitmap))) break; \ + rds_rtd_printk(KERN_INFO, format, ## arg); \ + } while (0) + +#endif /* _RDS_RT_DEBUG_H */ + diff --git a/net/rds/recv.c b/net/rds/recv.c index b103976a2090..5d5296a00c04 100644 --- a/net/rds/recv.c +++ b/net/rds/recv.c @@ -237,8 +237,9 @@ void rds_recv_incoming(struct rds_connection *conn, __be32 saddr, __be32 daddr, skb = alloc_skb(sizeof(struct rds_nf_hdr) * 2, gfp); if (NULL == skb) { /* if we have allocation problems, then we just need to depart */ - rdsdebug("failure to allocate space for inc %p, %u.%u.%u.%u -> %u.%d.%u.%u\n", - inc, NIPQUAD(saddr), NIPQUAD(daddr)); + rds_rtd(RDS_RTD_ERR, + "failure to allocate space for inc %p, %u.%u.%u.%u -> %u.%d.%u.%u tos %d\n", + inc, NIPQUAD(saddr), NIPQUAD(daddr), conn->c_tos); rds_recv_local(conn, saddr, daddr, inc, gfp); return; } @@ -281,8 +282,9 @@ void rds_recv_incoming(struct rds_connection *conn, __be32 saddr, __be32 daddr, } /* if we had a failure to convert, then just assuming to continue as local */ else { - rdsdebug("failed to create skb form, conn %p, inc %p, %u.%u.%u.%u -> %u.%u.%u.%u\n", - conn, inc, NIPQUAD(saddr), NIPQUAD(daddr)); + rds_rtd(RDS_RTD_RCV_EXT, + "failed to create skb form, conn %p, inc %p, %u.%u.%u.%u -> %u.%u.%u.%u tos %d\n", + conn, inc, NIPQUAD(saddr), NIPQUAD(daddr), conn->c_tos); ret = 1; } @@ -399,8 +401,10 @@ rds_recv_forward(struct rds_connection *conn, struct rds_incoming *inc, /* find the proper output socket - it should be the local one on which we originated */ rs = rds_find_bound(dst->saddr, dst->sport); if (!rs) { - rdsdebug("failed to find output rds_socket dst %u.%u.%u.%u : %u, inc %p, conn %p\n", - NIPQUAD(dst->daddr), dst->dport, inc, conn); + rds_rtd(RDS_RTD_RCV, + "failed to find output rds_socket dst %u.%u.%u.%u : %u, inc %p, conn %p tos %d\n", + NIPQUAD(dst->daddr), dst->dport, inc, conn, + conn->c_tos); rds_stats_inc(s_recv_drop_no_sock); goto out; } @@ -411,8 +415,10 @@ rds_recv_forward(struct rds_connection *conn, struct rds_incoming *inc, /* now lets see if we can send it all */ ret = rds_send_internal(conn, rs, inc->i_skb, gfp); if (len != ret) { - rdsdebug("failed to send rds_data dst %u.%u.%u.%u : %u, inc %p, conn %p, len %d != ret %d\n", - NIPQUAD(dst->daddr), dst->dport, inc, conn, len, ret); + rds_rtd(RDS_RTD_RCV, + "failed to send rds_data dst %u.%u.%u.%u : %u, inc %p, conn %p tos %d, len %d != ret %d\n", + NIPQUAD(dst->daddr), dst->dport, inc, conn, conn->c_tos, + len, ret); goto out; } @@ -443,6 +449,7 @@ rds_recv_local(struct rds_connection *conn, __be32 saddr, __be32 daddr, struct rds_sock *rs = NULL; struct sock *sk; unsigned long flags; + u64 inc_hdr_h_sequence = 0; inc->i_conn = conn; inc->i_rx_jiffies = jiffies; @@ -478,13 +485,22 @@ rds_recv_local(struct rds_connection *conn, __be32 saddr, __be32 daddr, * XXX we could spend more on the wire to get more robust failure * detection, arguably worth it to avoid data corruption. */ + inc_hdr_h_sequence = be64_to_cpu(inc->i_hdr.h_sequence); - if (be64_to_cpu(inc->i_hdr.h_sequence) < conn->c_next_rx_seq + if (inc_hdr_h_sequence != conn->c_next_rx_seq) { + rds_rtd(RDS_RTD_RCV, + "conn %p <%u.%u.%u.%u,%u.%u.%u.%u,%d> expect seq# %llu, recved seq# %llu, retrans bit %d\n", + conn, NIPQUAD(conn->c_laddr), NIPQUAD(conn->c_faddr), + conn->c_tos, conn->c_next_rx_seq, inc_hdr_h_sequence, + inc->i_hdr.h_flags & RDS_FLAG_RETRANSMITTED); + } + + if (inc_hdr_h_sequence < conn->c_next_rx_seq && (inc->i_hdr.h_flags & RDS_FLAG_RETRANSMITTED)) { rds_stats_inc(s_recv_drop_old_seq); goto out; } - conn->c_next_rx_seq = be64_to_cpu(inc->i_hdr.h_sequence) + 1; + conn->c_next_rx_seq = inc_hdr_h_sequence + 1; if (rds_sysctl_ping_enable && inc->i_hdr.h_dport == 0) { if (inc->i_hdr.h_flags & RDS_FLAG_HB_PING) { diff --git a/net/rds/send.c b/net/rds/send.c index cc4835a81afe..7efb0642922b 100644 --- a/net/rds/send.c +++ b/net/rds/send.c @@ -1259,6 +1259,8 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len) ret = PTR_ERR(conn->c_base_conn); goto out; } + rds_rtd(RDS_RTD_CM_EXT, "checking conn %p\n", + conn->c_base_conn); rds_conn_connect_if_down(conn->c_base_conn); } rs->rs_conn = conn; @@ -1302,6 +1304,9 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len) goto out; } + if (rds_conn_state(conn) == RDS_CONN_DOWN) + rds_rtd(RDS_RTD_CM_EXT, "checking conn in down state %p\n", + conn); rds_conn_connect_if_down(conn); ret = rds_cong_wait(conn->c_fcong, dport, nonblock, rs); @@ -1412,7 +1417,8 @@ int rds_send_internal(struct rds_connection *conn, struct rds_sock *rs, /* create ourselves a new message to send out the data */ rm = rds_message_alloc(ret, gfp); if (!rm) { - rdsdebug("failed to allocate response message rs %p", rs); + rds_rtd(RDS_RTD_ERR, "failed to allocate resp message rs %p", + rs); ret = -ENOMEM; goto out; } @@ -1464,6 +1470,7 @@ int rds_send_internal(struct rds_connection *conn, struct rds_sock *rs, } /* retry the connection if it hasn't actually been made */ + rds_rtd(RDS_RTD_CM_EXT, "checking conn %p\n", conn); rds_conn_connect_if_down(conn); /* simple congestion check */ @@ -1476,7 +1483,7 @@ int rds_send_internal(struct rds_connection *conn, struct rds_sock *rs, /* only take a single pass */ if (!rds_send_queue_rm(rs, conn, rm, rs->rs_bound_port, dst->dport, &queued)) { - rdsdebug("cannot block on internal send rs %p", rs); + rds_rtd(RDS_RTD_SND, "cannot block on internal send rs %p", rs); rds_stats_inc(s_send_queue_full); /* force a requeue of the work for later */ @@ -1534,6 +1541,7 @@ rds_send_pong(struct rds_connection *conn, __be16 dport) rm->m_daddr = conn->c_faddr; rm->data.op_active = 1; + rds_rtd(RDS_RTD_CM_EXT, "checking conn %p\n", conn); rds_conn_connect_if_down(conn); ret = rds_cong_wait(conn->c_fcong, dport, 1, NULL); diff --git a/net/rds/threads.c b/net/rds/threads.c index 13eff3b134e5..015598b1aa85 100644 --- a/net/rds/threads.c +++ b/net/rds/threads.c @@ -92,8 +92,8 @@ void rds_connect_complete(struct rds_connection *conn) return; } - rdsdebug("conn %p for %pI4 to %pI4 complete\n", - conn, &conn->c_laddr, &conn->c_faddr); + rds_rtd(RDS_RTD_CM_EXT, "conn %p for %pI4 to %pI4 tos %d complete\n", + conn, &conn->c_laddr, &conn->c_faddr, conn->c_tos); conn->c_reconnect_jiffies = 0; set_bit(0, &conn->c_map_queued); @@ -131,9 +131,10 @@ void rds_queue_reconnect(struct rds_connection *conn) { unsigned long rand; - rdsdebug("conn %p for %pI4 to %pI4 reconnect jiffies %lu\n", - conn, &conn->c_laddr, &conn->c_faddr, - conn->c_reconnect_jiffies); + rds_rtd(RDS_RTD_CM_EXT, + "conn %p for %pI4 to %pI4 tos %d reconnect jiffies %lu\n", conn, + &conn->c_laddr, &conn->c_faddr, conn->c_tos, + conn->c_reconnect_jiffies); set_bit(RDS_RECONNECT_PENDING, &conn->c_flags); if (conn->c_reconnect_jiffies == 0) { @@ -143,9 +144,10 @@ void rds_queue_reconnect(struct rds_connection *conn) } get_random_bytes(&rand, sizeof(rand)); - rdsdebug("%lu delay %lu ceil conn %p for %pI4 -> %pI4\n", - rand % conn->c_reconnect_jiffies, conn->c_reconnect_jiffies, - conn, &conn->c_laddr, &conn->c_faddr); + rds_rtd(RDS_RTD_CM_EXT, + "%lu delay %lu ceil conn %p for %pI4 -> %pI4 tos %d\n", + rand % conn->c_reconnect_jiffies, conn->c_reconnect_jiffies, + conn, &conn->c_laddr, &conn->c_faddr, conn->c_tos); if (conn->c_loopback) { if (conn->c_laddr >= conn->c_faddr) @@ -179,15 +181,22 @@ void rds_connect_worker(struct work_struct *work) conn->c_connection_start = get_seconds(); ret = conn->c_trans->conn_connect(conn); - rdsdebug("conn %p for %pI4 to %pI4 dispatched, ret %d\n", - conn, &conn->c_laddr, &conn->c_faddr, ret); + rds_rtd(RDS_RTD_CM_EXT, + "conn %p for %pI4 to %pI4 tos %d dispatched, ret %d\n", + conn, &conn->c_laddr, &conn->c_faddr, conn->c_tos, ret); if (ret) { if (rds_conn_transition(conn, RDS_CONN_CONNECTING, RDS_CONN_DOWN)) { + rds_rtd(RDS_RTD_CM_EXT, + "reconnecting..., conn %p\n", conn); rds_queue_reconnect(conn); } else rds_conn_error(conn, "RDS: connect failed\n"); } + } else { + rds_rtd(RDS_RTD_CM, + "conn %p cannot trans from DOWN to CONNECTING state.\n", + conn); } } @@ -200,7 +209,7 @@ void rds_send_worker(struct work_struct *work) clear_bit(RDS_LL_SEND_FULL, &conn->c_flags); ret = rds_send_xmit(conn); cond_resched(); - rdsdebug("conn %p ret %d\n", conn, ret); + rds_rtd(RDS_RTD_SND_EXT, "conn %p ret %d\n", conn, ret); switch (ret) { case -EAGAIN: rds_stats_inc(s_send_immediate_retry); @@ -222,7 +231,7 @@ void rds_recv_worker(struct work_struct *work) if (rds_conn_state(conn) == RDS_CONN_UP) { ret = conn->c_trans->recv(conn); - rdsdebug("conn %p ret %d\n", conn, ret); + rds_rtd(RDS_RTD_RCV_EXT, "conn %p ret %d\n", conn, ret); switch (ret) { case -EAGAIN: rds_stats_inc(s_recv_immediate_retry); @@ -242,6 +251,7 @@ void rds_reject_worker(struct work_struct *work) struct rds_connection *conn = container_of(work, struct rds_connection, c_reject_w.work); atomic_set(&conn->c_state, RDS_CONN_ERROR); + rds_rtd(RDS_RTD_CM, "calling rds_conn_shutdown, conn %p:0\n", conn); rds_conn_shutdown(conn, 0); rds_route_to_base(conn); } @@ -259,17 +269,17 @@ void rds_hb_worker(struct work_struct *work) if (!conn->c_hb_start) { ret = rds_send_hb(conn, 0); if (ret) { - rdsdebug("RDS/IB: rds_hb_worker: failed %d\n", ret); + rds_rtd(RDS_RTD_ERR_EXT, + "RDS/IB: rds_hb_worker: failed %d\n", + ret); return; } conn->c_hb_start = now; } else if (now - conn->c_hb_start > rds_conn_hb_timeout) { - printk(KERN_NOTICE - "RDS/IB: connection <%u.%u.%u.%u,%u.%u.%u.%u,%d> " - "timed out (0x%lx,0x%lx)..disconnecting and reconnecting\n", - NIPQUAD(conn->c_laddr), - NIPQUAD(conn->c_faddr), conn->c_tos, - conn->c_hb_start, now); + rds_rtd(RDS_RTD_CM, + "RDS/IB: connection <%u.%u.%u.%u,%u.%u.%u.%u,%d> timed out (0x%lx,0x%lx)..discon and recon\n", + NIPQUAD(conn->c_laddr), NIPQUAD(conn->c_faddr), + conn->c_tos, conn->c_hb_start, now); rds_conn_drop(conn); return; } @@ -286,6 +296,10 @@ void rds_reconnect_timeout(struct work_struct *work) * reconnect. */ if (!rds_conn_up(conn)) { + rds_rtd(RDS_RTD_CM, + "conn not up, calling rds_conn_drop <%u.%u.%u.%u,%u.%u.%u.%u,%d>\n", + NIPQUAD(conn->c_laddr), NIPQUAD(conn->c_faddr), + conn->c_tos); rds_conn_drop(conn); conn->c_reconnect_racing = 0; } @@ -300,6 +314,10 @@ void rds_shutdown_worker(struct work_struct *work) * drives the reconnect (one-sided reconnect) */ if (conn->c_laddr < conn->c_faddr && conn->c_reconnect_racing) { + rds_rtd(RDS_RTD_CM, + "calling rds_conn_shutdown, conn %p:0 <%u.%u.%u.%u,%u.%u.%u.%u,%d>\n", + conn, NIPQUAD(conn->c_laddr), NIPQUAD(conn->c_faddr), + conn->c_tos); rds_conn_shutdown(conn, 0); if (conn->c_loopback) queue_delayed_work(rds_local_wq, &conn->c_reconn_w, @@ -307,8 +325,13 @@ void rds_shutdown_worker(struct work_struct *work) else queue_delayed_work(rds_wq, &conn->c_reconn_w, msecs_to_jiffies(5000)); - } else + } else { + rds_rtd(RDS_RTD_CM, + "calling rds_conn_shutdown, conn %p:1 <%u.%u.%u.%u,%u.%u.%u.%u,%d>\n", + conn, NIPQUAD(conn->c_laddr), NIPQUAD(conn->c_faddr), + conn->c_tos); rds_conn_shutdown(conn, 1); + } }