As of today, all the TOS connections can only be established after their
base connections are up. This is due to the fact that TOS connections rely
on their base connections to perform route resolution. Nevertheless, when
all the connections drop/reconnect(e.g., ADDR_CHANGE event), the TOS
connections establishment consume the CPU resources by constantly retrying
the connection establishment until their base connections are up.
Thus, this patch delays all the TOS connections if their associated base
connections are not up. By doing so, the priority is given to the base
connections establishment. Consequently, the base connections can be
established faster and subsequent their associated TOS connections.
Orabug:
25521901
Signed-off-by: Wei Lin Guay <wei.lin.guay@oracle.com>
Reviewed-by: HÃ¥kon Bugge <haakon.bugge@oracle.com>
Reviewed-by: Ajaykumar Hotchandani <ajaykumar.hotchandani@oracle.com>
Tested-by: Dib Chatterjee <dib.chatterjee@oracle.com>
Tested-by: Rosa Isela Lopez Romero <rosa.lopez@oracle.com>
INIT_DELAYED_WORK(&cp->cp_hb_w, rds_hb_worker);
INIT_DELAYED_WORK(&cp->cp_reconn_w, rds_reconnect_timeout);
INIT_DELAYED_WORK(&cp->cp_reject_w, rds_reject_worker);
- INIT_WORK(&cp->cp_down_w, rds_shutdown_worker);
+ INIT_DELAYED_WORK(&cp->cp_down_w, rds_shutdown_worker);
mutex_init(&cp->cp_cm_lock);
cp->cp_flags = 0;
}
rcu_read_lock();
if (!hlist_unhashed(&conn->c_hash_node) && restart) {
rcu_read_unlock();
- rds_queue_reconnect(cp);
+ rds_queue_reconnect(cp, DR_DEFAULT);
} else {
rcu_read_unlock();
}
return;
rds_conn_path_drop(cp, DR_CONN_DESTROY);
- flush_work(&cp->cp_down_w);
+ flush_delayed_work(&cp->cp_down_w);
/* now that conn down worker is flushed; there cannot be any
* more posting of reconn timeout work. But cancel any already
unsigned long now = get_seconds();
struct rds_connection *conn = cp->cp_conn;
+ unsigned long delay = 0;
cp->cp_drop_source = reason;
if (rds_conn_path_state(cp) == RDS_CONN_UP) {
cp->cp_reconnect_start = now;
atomic_set(&cp->cp_state, RDS_CONN_ERROR);
+ if ((conn->c_tos && reason == DR_IB_ADDR_CHANGE) ||
+ reason == DR_IB_BASE_CONN_DOWN)
+ delay = msecs_to_jiffies(100);
rds_rtd(RDS_RTD_CM_EXT,
"RDS/%s: queueing shutdown work, conn %p, <%u.%u.%u.%u,%u.%u.%u.%u,%d>\n",
conn->c_trans->t_type == RDS_TRANS_TCP ? "TCP" : "IB",
conn, NIPQUAD(conn->c_laddr), NIPQUAD(conn->c_faddr),
conn->c_tos);
- queue_work(cp->cp_wq, &cp->cp_down_w);
+ queue_delayed_work(cp->cp_wq, &cp->cp_down_w, delay);
}
EXPORT_SYMBOL_GPL(rds_conn_path_drop);
if (rds_conn_path_state(cp) == RDS_CONN_DOWN &&
!test_and_set_bit(RDS_RECONNECT_PENDING, &cp->cp_flags)) {
+ if (conn->c_tos == 0 ||
+ (conn->c_tos && rds_conn_state(cp->cp_base_conn) == RDS_CONN_UP)) {
rds_rtd(RDS_RTD_CM_EXT,
"queueing connect work, conn %p, <%u.%u.%u.%u,%u.%u.%u.%u,%d>\n",
conn, NIPQUAD(conn->c_laddr), NIPQUAD(conn->c_faddr),
conn->c_tos);
queue_delayed_work(cp->cp_wq, &cp->cp_conn_w, 0);
+ } else
+ rds_rtd(RDS_RTD_CM_EXT,
+ "skip, base conn %p down, conn %p, <%u.%u.%u.%u,%u.%u.%u.%u,%d>\n",
+ cp->cp_base_conn, conn, NIPQUAD(conn->c_laddr),
+ NIPQUAD(conn->c_faddr), conn->c_tos);
}
}
EXPORT_SYMBOL_GPL(rds_conn_path_connect_if_down);
struct rds_connection *conn = cp->cp_conn;
struct rds_ib_connection *ic = conn->c_transport_data;
struct sockaddr_in src, dest;
- int ret;
+ int ret = 0;
conn->c_route_resolved = 0;
+
+ if (conn->c_tos) {
+ mutex_lock(&conn->c_base_conn->c_cm_lock);
+ if (!rds_conn_transition(conn->c_base_conn, RDS_CONN_UP,
+ RDS_CONN_UP)) {
+ rds_rtd(RDS_RTD_CM_EXT,
+ "RDS/IB: base conn %p (%p) is not up\n",
+ conn->c_base_conn, conn);
+ ret = DR_IB_BASE_CONN_DOWN;
+ }
+ mutex_unlock(&conn->c_base_conn->c_cm_lock);
+ if (ret)
+ goto out;
+ }
+
/* XXX I wonder what affect the port space has */
/* delegate cm event handler to rdma_transport */
ic->i_cm_id = rdma_create_id(rds_rdma_cm_event_handler, conn,
struct delayed_work cp_reject_w;
struct delayed_work cp_hb_w;
struct delayed_work cp_reconn_w;
- struct work_struct cp_down_w;
+ struct delayed_work cp_down_w;
struct mutex cp_cm_lock; /* protect cp_state & cm */
wait_queue_head_t cp_waitq;
extern struct workqueue_struct *rds_wq;
extern struct workqueue_struct *rds_tos_wq;
extern struct workqueue_struct *rds_local_wq;
-void rds_queue_reconnect(struct rds_conn_path *cp);
+void rds_queue_reconnect(struct rds_conn_path *cp, int reason);
void rds_connect_worker(struct work_struct *);
void rds_shutdown_worker(struct work_struct *);
void rds_send_worker(struct work_struct *);
* We should *always* start with a random backoff; otherwise a broken connection
* will always take several iterations to be re-established.
*/
-void rds_queue_reconnect(struct rds_conn_path *cp)
+void rds_queue_reconnect(struct rds_conn_path *cp, int reason)
{
unsigned long delay = 0;
unsigned long rand;
bool is_tcp = conn->c_trans->t_type == RDS_TRANS_TCP;
rds_rtd(RDS_RTD_CM_EXT,
- "conn %p for %pI4 to %pI4 tos %d reconnect jiffies %lu\n", conn,
+ "conn %p for %pI4 to %pI4 tos %d reconnect jiffies %lu %s\n", conn,
&conn->c_laddr, &conn->c_faddr, conn->c_tos,
- cp->cp_reconnect_jiffies);
+ cp->cp_reconnect_jiffies, conn_drop_reason_str(reason));
/* let peer with smaller addr initiate reconnect, to avoid duels */
if (is_tcp && !IS_CANONICAL(conn->c_laddr, conn->c_faddr))
return;
+ if (reason == DR_IB_BASE_CONN_DOWN) {
+ cp->cp_reconnect_jiffies = 0;
+ delay = msecs_to_jiffies(500);
+ }
+
set_bit(RDS_RECONNECT_PENDING, &cp->cp_flags);
if (cp->cp_reconnect_jiffies == 0) {
set_bit(RDS_INITIAL_RECONNECT, &cp->cp_flags);
+ get_random_bytes(&rand, sizeof(rand));
cp->cp_reconnect_jiffies = rds_sysctl_reconnect_min_jiffies;
- queue_delayed_work(cp->cp_wq, &cp->cp_conn_w, rand % conn->c_reconnect_jiffies);
+ queue_delayed_work(cp->cp_wq, &cp->cp_conn_w,
+ delay + (rand % cp->cp_reconnect_jiffies));
return;
}
- clear_bit(RDS_INITIAL_RECONNECT, &conn->c_flags);
+ clear_bit(RDS_INITIAL_RECONNECT, &cp->cp_flags);
if ((conn->c_laddr > conn->c_faddr) ||
rds_conn_self_loopback_passive(conn))
delay = msecs_to_jiffies(15000);
RDS_CONN_DOWN)) {
rds_rtd(RDS_RTD_CM_EXT,
"reconnecting..., conn %p\n", conn);
- rds_queue_reconnect(cp);
+ rds_queue_reconnect(cp, ret == DR_IB_BASE_CONN_DOWN ?
+ DR_IB_BASE_CONN_DOWN : DR_DEFAULT);
} else {
rds_conn_path_drop(cp, DR_CONN_CONNECT_FAIL);
}
{
struct rds_conn_path *cp = container_of(work,
struct rds_conn_path,
- cp_down_w);
+ cp_down_w.work);
unsigned long now = get_seconds();
bool is_tcp = cp->cp_conn->c_trans->t_type == RDS_TRANS_TCP;
struct rds_connection *conn = cp->cp_conn;