]> www.infradead.org Git - users/jedix/linux-maple.git/commitdiff
IB: Add RNR timer workaround for PSIF
authorSantosh Shilimkar <santosh.shilimkar@oracle.com>
Sat, 18 Jun 2016 20:06:29 +0000 (13:06 -0700)
committerChuck Anderson <chuck.anderson@oracle.com>
Wed, 13 Jul 2016 08:01:16 +0000 (01:01 -0700)
The RNR NAK Retry timer on Titan and Sonoma 1&2 IB subsystems runs 500
times faster than desired. This means that retries are started a lot
sooner than they should.

The software workaround is bit involved and intrusive because it needs
to work in mixed HCA environments. It uses CM protocol to detect the
involvement of the offending IB requestor and then enables the
workaround in the peer responder. To keep the workaround flag
persistent, ib_qp verbs need to carry the flag which impacts
IB core kABI which is wrapped under __GENKSYMS__.

The workaround matches the desired RNR NAK Retry timer value when the
encodings 1 to 14 (decimal) are supplied. For encodings larger than 14
and for zero, the work-around will set the largest possible RNR NAK
Timer value for the offending requestor, which is 1,31 ms.

Thanks to Trivino, Haakon for updates and wide range of testing for
kernel as well as userland with mixed HCA configurations.

Orabug: 23633926

Reviewed-by Yuval Shaia <yuval.shaia@oracle.com>
Reviewed-by: Håkon Bugge <haakon.bugge@oracle.com>
Reviewed-by: David Brean <david.brean@oracle.com>
Tested-by: Francisco Triviño García <francisco.trivino@oracle.com>
Signed-off-by: Francisco Triviño García <francisco.trivino@oracle.com>
Signed-off-by: Santosh Shilimkar <santosh.shilimkar@oracle.com>
drivers/infiniband/core/cm.c
drivers/infiniband/core/core_priv.h
drivers/infiniband/core/uverbs_cmd.c
drivers/infiniband/core/verbs.c
include/rdma/ib_verbs.h

index 912fa921b6aa83d15fb44165c05567bf72d2887f..ba45b53379f54b72984e664645c3c9ab572118c3 100644 (file)
@@ -52,6 +52,7 @@
 #include <rdma/ib_cache.h>
 #include <rdma/ib_cm.h>
 #include "cm_msgs.h"
+#include "core_priv.h"
 
 MODULE_AUTHOR("Sean Hefty");
 MODULE_DESCRIPTION("InfiniBand CM");
@@ -1054,6 +1055,10 @@ static void cm_cleanup_timewait(struct cm_timewait_info *timewait_info)
                rb_erase(&timewait_info->remote_qp_node, &cm.remote_qp_table);
                timewait_info->inserted_remote_qp = 0;
        }
+
+       /* Clean-up the overloaded MBIT */
+       if (timewait_info->remote_ca_guid & IB_GUID_MBIT)
+               timewait_info->remote_ca_guid &= ~IB_GUID_MBIT;
 }
 
 static struct cm_timewait_info * cm_create_timewait_info(__be32 local_id)
@@ -1275,19 +1280,46 @@ static void cm_format_mad_hdr(struct ib_mad_hdr *hdr,
        hdr->tid           = tid;
 }
 
+#define SIF_DEVICES            6
+const u32 sif_family_vendor_part_id[SIF_DEVICES] = {
+       0x2088, 0x2089, 0x2188, 0x2189, 0x2198, 0x2199};
+
+static inline bool is_vendor_sif_family(u32 part_id)
+{
+       int i;
+
+       for (i = 0; i < SIF_DEVICES; i++) {
+               if (part_id == sif_family_vendor_part_id[i])
+                       return true;
+       }
+       return false;
+}
+
 static void cm_format_req(struct cm_req_msg *req_msg,
                          struct cm_id_private *cm_id_priv,
                          struct ib_cm_req_param *param)
 {
        struct ib_sa_path_rec *pri_path = param->primary_path;
        struct ib_sa_path_rec *alt_path = param->alternate_path;
+       struct ib_device_attr attr;
+       u32 vendor_part_id;
+
+       if (ib_query_device(cm_id_priv->id.device, &attr))
+               vendor_part_id = 0;
+       else
+               vendor_part_id = attr.vendor_part_id;
 
        cm_format_mad_hdr(&req_msg->hdr, CM_REQ_ATTR_ID,
                          cm_form_tid(cm_id_priv, CM_MSG_SEQUENCE_REQ));
 
        req_msg->local_comm_id = cm_id_priv->id.local_id;
        req_msg->service_id = param->service_id;
-       req_msg->local_ca_guid = cm_id_priv->id.device->node_guid;
+
+       if (is_vendor_sif_family(vendor_part_id))
+               req_msg->local_ca_guid = cm_id_priv->id.device->node_guid | IB_GUID_MBIT;
+       else
+               req_msg->local_ca_guid = cm_id_priv->id.device->node_guid;
+
        cm_req_set_local_qpn(req_msg, cpu_to_be32(param->qp_num));
        cm_req_set_init_depth(req_msg, param->initiator_depth);
        cm_req_set_remote_resp_timeout(req_msg,
@@ -1962,6 +1994,14 @@ static void cm_format_rep(struct cm_rep_msg *rep_msg,
                          struct cm_id_private *cm_id_priv,
                          struct ib_cm_rep_param *param)
 {
+       struct ib_device_attr attr;
+       u32 vendor_part_id;
+
+       if (ib_query_device(cm_id_priv->id.device, &attr))
+               vendor_part_id = 0;
+       else
+               vendor_part_id = attr.vendor_part_id;
+
        cm_format_mad_hdr(&rep_msg->hdr, CM_REP_ATTR_ID, cm_id_priv->tid);
        rep_msg->local_comm_id = cm_id_priv->id.local_id;
        rep_msg->remote_comm_id = cm_id_priv->id.remote_id;
@@ -1971,7 +2011,11 @@ static void cm_format_rep(struct cm_rep_msg *rep_msg,
                                    cm_id_priv->av.port->cm_dev->ack_delay);
        cm_rep_set_failover(rep_msg, param->failover_accepted);
        cm_rep_set_rnr_retry_count(rep_msg, param->rnr_retry_count);
-       rep_msg->local_ca_guid = cm_id_priv->id.device->node_guid;
+
+       if (is_vendor_sif_family(vendor_part_id))
+               rep_msg->local_ca_guid = cm_id_priv->id.device->node_guid | IB_GUID_MBIT;
+       else
+               rep_msg->local_ca_guid = cm_id_priv->id.device->node_guid;
 
        if (cm_id_priv->qp_type != IB_QPT_XRC_TGT) {
                rep_msg->initiator_depth = param->initiator_depth;
@@ -3826,6 +3870,9 @@ static int cm_init_qp_init_attr(struct cm_id_private *cm_id_priv,
 {
        unsigned long flags;
        int ret;
+       u64 remote_guid;
+
+       remote_guid = cm_id_priv->timewait_info->remote_ca_guid;
 
        spin_lock_irqsave(&cm_id_priv->lock, flags);
        switch (cm_id_priv->id.state) {
@@ -3840,7 +3887,12 @@ static int cm_init_qp_init_attr(struct cm_id_private *cm_id_priv,
        case IB_CM_ESTABLISHED:
                *qp_attr_mask = IB_QP_STATE | IB_QP_ACCESS_FLAGS |
                                IB_QP_PKEY_INDEX | IB_QP_PORT;
+
                qp_attr->qp_access_flags = IB_ACCESS_REMOTE_WRITE;
+
+               if (remote_guid & IB_GUID_MBIT)
+                       qp_attr->qp_access_flags |= IB_GUID_RNR_TWEAK;
+
                if (cm_id_priv->responder_resources)
                        qp_attr->qp_access_flags |= IB_ACCESS_REMOTE_READ |
                                                    IB_ACCESS_REMOTE_ATOMIC;
@@ -4302,4 +4354,3 @@ static void __exit ib_cm_cleanup(void)
 
 module_init(ib_cm_init);
 module_exit(ib_cm_cleanup);
-
index 01cd259aa4bd2d2b73a8b7631c4619edce604430..21a25f6e1b9c58d5cee18bd511dca885b19c9cd6 100644 (file)
 
 #include <rdma/ib_verbs.h>
 
+/* Used for a HCA workaound and overloads qp_access_flags(int) */
+#define IB_GUID_RNR_TWEAK              (1 << 12)
+#define IB_GUID_MBIT                   (1ULL << 56)
+
 int  ib_device_register_sysfs(struct ib_device *device,
                              int (*port_callback)(struct ib_device *,
                                                   u8, struct kobject *));
index 12b766512ade3447b213ce0e82c415fc82f962f8..586629c680c233cb52d20f486e1b65183f61f7b7 100644 (file)
@@ -2734,6 +2734,13 @@ out:
        return ret ? ret : in_len;
 }
 
+const u8 ib_uverbs_rnr_timeout_sif[32] = {
+       0, 18, 20, 21, 22, 23, 24, 25,
+       26, 27, 28, 29, 30, 31, 0, 0,
+       0, 0, 0, 0, 0, 0, 0, 0,
+       0, 0, 0, 0, 0, 0, 0, 0
+};
+
 ssize_t ib_uverbs_modify_qp(struct ib_uverbs_file *file,
                            const char __user *buf, int in_len,
                            int out_len)
@@ -2810,6 +2817,16 @@ ssize_t ib_uverbs_modify_qp(struct ib_uverbs_file *file,
                ret = ib_resolve_eth_l2_attrs(qp, attr, &cmd.attr_mask);
                if (ret)
                        goto release_qp;
+
+               if ((cmd.attr_mask & IB_QP_STATE) &&
+                       (attr->qp_state == IB_QPS_INIT) &&
+                       (attr->qp_access_flags & IB_GUID_RNR_TWEAK))
+                       qp->qp_flag |= IB_GUID_RNR_TWEAK;
+
+               if ((qp->qp_flag & IB_GUID_RNR_TWEAK) &&
+                       (cmd.attr_mask & IB_QP_MIN_RNR_TIMER))
+                       attr->min_rnr_timer = ib_uverbs_rnr_timeout_sif[attr->min_rnr_timer];
+
                ret = qp->device->modify_qp(qp, attr, cmd.attr_mask, &udata);
        } else {
                ret = ib_modify_qp(qp, attr, cmd.attr_mask);
index 7619e2872bb4db03745ab37ac6a9eff7e2cd8dc5..cb63b692472fc07a6bc71c1e2639e8776363ef2a 100644 (file)
@@ -924,17 +924,33 @@ out:
 }
 EXPORT_SYMBOL(ib_resolve_eth_l2_attrs);
 
+const u8 ib_rnr_timeout_sif[32] = {
+       0, 18, 20, 21, 22, 23, 24, 25,
+       26, 27, 28, 29, 30, 31, 0, 0,
+       0, 0, 0, 0, 0, 0, 0, 0,
+       0, 0, 0, 0, 0, 0, 0, 0
+};
 
 int ib_modify_qp(struct ib_qp *qp,
                 struct ib_qp_attr *qp_attr,
                 int qp_attr_mask)
 {
+       u8 idx = qp_attr->min_rnr_timer;
        int ret;
 
        ret = ib_resolve_eth_l2_attrs(qp, qp_attr, &qp_attr_mask);
        if (ret)
                return ret;
 
+       if ((qp_attr_mask & IB_QP_STATE) &&
+               (qp_attr->qp_state == IB_QPS_INIT) &&
+               (qp_attr->qp_access_flags & IB_GUID_RNR_TWEAK))
+               qp->qp_flag |= IB_GUID_RNR_TWEAK;
+
+       if ((qp->qp_flag & IB_GUID_RNR_TWEAK) &&
+               (qp_attr_mask & IB_QP_MIN_RNR_TIMER))
+               qp_attr->min_rnr_timer = ib_rnr_timeout_sif[idx];
+
        return qp->device->modify_qp(qp->real_qp, qp_attr, qp_attr_mask, NULL);
 }
 EXPORT_SYMBOL(ib_modify_qp);
index 4f939728280ea4a9d36d26b357bf75153594655f..f526b363a32e3c1b271c8b6e0a09414ff02f9f58 100644 (file)
@@ -1281,6 +1281,9 @@ struct ib_qp {
        void                   *qp_context;
        u32                     qp_num;
        enum ib_qp_type         qp_type;
+#ifndef __GENKSYMS__
+       int                     qp_flag;
+#endif
 };
 
 struct ib_mr {