Working with the following SRs:
Exadata SR# 3-
15640329311
Linux SR#3-
15675579325
it was discovered that by inserting IB_SEND_SOLICITED at regular
intervals removed the endless RNR Retry situation. The test was made
by inserting IB_SEND_SOLICITED at the same interval as
IB_SEND_SIGNALED was inserted, that is, by default for every 17th
fragment.
This commit introduces the sysctl variable
net.rds.ib.max_unsolicited_wr. A value of zero disables the
functionality of inserting IB_SEND_SOLICITED. A value of N will insert
IB_SEND_SOLICITED for every Nth fragment.
net.rds.ib.max_unsolicited_wr is by default 16, in order to avoid
customization when this fix is applied at the customer site.
This fix also has the nice side-effect that it improves IOPS for 1Q,
1D, 1T cases:
-q 1M -a 256:
Without fix:
tsks tx/s rx/s tx+rx K/s mbi K/s mbo K/s tx us/c rtt us cpu %
1 1161 0
1189243.20 0.00 0.00 203.52 857.34 -1.00
(average)
With fix (with default net.rds.ib.max_unsolicited_wr = 16):
tsks tx/s rx/s tx+rx K/s mbi K/s mbo K/s tx us/c rtt us cpu %
1 1323 0
1355849.36 0.00 0.00 203.76 751.50 -1.00
(average)
-q $[32*1024+256] -a 256:
With fix (net.rds.ib.max_unsolicited_wr = 0, i.e. disabled):
tsks tx/s rx/s tx+rx K/s mbi K/s mbo K/s tx us/c rtt us cpu %
1 15243 0 492547.75 0.00 0.00 10.58 62.01 -1.00
(average)
Ditto with net.rds.ib.max_unsolicited_wr = 4 (two SEND_SOLICITED per ~32K):
tsks tx/s rx/s tx+rx K/s mbi K/s mbo K/s tx us/c rtt us cpu %
1 16422 0 530641.03 0.00 0.00 10.28 57.25 -1.00
(average)
Orabug:
28857027
Reviewed-by: HÃ¥kon Bugge <haakon.bugge@oracle.com
Signed-off-by: Venkat Venkatsubra <venkat.x.venkatsubra@oracle.com>
Signed-off-by: Brian Maly <brian.maly@oracle.com>
/* Batched completions */
unsigned int i_unsignaled_wrs;
+
+ /* Wake up receiver once in a while */
+ unsigned int i_unsolicited_wrs;
u8 i_sl;
atomic_t i_cache_allocs;
extern unsigned long rds_ib_sysctl_max_send_wr;
extern unsigned long rds_ib_sysctl_max_recv_wr;
extern unsigned long rds_ib_sysctl_max_unsig_wrs;
+extern unsigned long rds_ib_sysctl_max_unsolicited_wrs;
extern unsigned long rds_ib_sysctl_max_unsig_bytes;
extern unsigned long rds_ib_sysctl_max_recv_allocation;
extern unsigned int rds_ib_sysctl_flow_control;
if (ic->i_unsignaled_wrs-- == 0 || notify) {
ic->i_unsignaled_wrs = rds_ib_sysctl_max_unsig_wrs;
send->s_wr.send_flags |= IB_SEND_SIGNALED;
- return 1;
}
- return 0;
+
+ /* To keep the rx pipeline going, add SEND_SOLIICITED once in a while */
+ if (rds_ib_sysctl_max_unsolicited_wrs && --ic->i_unsolicited_wrs == 0) {
+ ic->i_unsolicited_wrs = rds_ib_sysctl_max_unsolicited_wrs;
+ send->s_wr.send_flags |= IB_SEND_SOLICITED;
+ }
+
+ return !!(send->s_wr.send_flags & IB_SEND_SIGNALED);
}
/*
rm->data.op_count = 0;
}
+ ic->i_unsolicited_wrs = rds_ib_sysctl_max_unsolicited_wrs;
rds_message_addref(rm);
rm->data.op_dmasg = 0;
rm->data.op_dmaoff = 0;
static unsigned long rds_ib_sysctl_max_unsig_wr_min = 1;
static unsigned long rds_ib_sysctl_max_unsig_wr_max = 64;
+unsigned long rds_ib_sysctl_max_unsolicited_wrs = 16;
+
+/* Zero means inserting SEND_SOLICITED in the middle of an RDS message
+ * is disabled
+ */
+static unsigned long rds_ib_sysctl_max_unsolicited_wr_min;
+/* Nmbr frags of 1MB + 256B RDBMS hdr */
+static unsigned long rds_ib_sysctl_max_unsolicited_wr_max =
+ (1 * 1024 * 1024 + RDS_FRAG_SIZE) / RDS_FRAG_SIZE;
+
/*
* This sysctl does nothing.
*
.extra1 = &rds_ib_sysctl_max_unsig_wr_min,
.extra2 = &rds_ib_sysctl_max_unsig_wr_max,
},
+ {
+ .procname = "max_unsolicited_wr",
+ .data = &rds_ib_sysctl_max_unsolicited_wrs,
+ .maxlen = sizeof(unsigned long),
+ .mode = 0644,
+ .proc_handler = &proc_doulongvec_minmax,
+ .extra1 = &rds_ib_sysctl_max_unsolicited_wr_min,
+ .extra2 = &rds_ib_sysctl_max_unsolicited_wr_max,
+ },
{
.procname = "max_recv_allocation",
.data = &rds_ib_sysctl_max_recv_allocation,