From: Santosh Shilimkar Date: Sat, 18 Jun 2016 20:06:29 +0000 (-0700) Subject: IB: Add RNR timer workaround for PSIF X-Git-Tag: v4.1.12-92~108^2 X-Git-Url: https://www.infradead.org/git/?a=commitdiff_plain;h=40429d5d3e84656fdc8d2cedc9c7169ec3d682ac;p=users%2Fjedix%2Flinux-maple.git IB: Add RNR timer workaround for PSIF The RNR NAK Retry timer on Titan and Sonoma 1&2 IB subsystems runs 500 times faster than desired. This means that retries are started a lot sooner than they should. The software workaround is bit involved and intrusive because it needs to work in mixed HCA environments. It uses CM protocol to detect the involvement of the offending IB requestor and then enables the workaround in the peer responder. To keep the workaround flag persistent, ib_qp verbs need to carry the flag which impacts IB core kABI which is wrapped under __GENKSYMS__. The workaround matches the desired RNR NAK Retry timer value when the encodings 1 to 14 (decimal) are supplied. For encodings larger than 14 and for zero, the work-around will set the largest possible RNR NAK Timer value for the offending requestor, which is 1,31 ms. Thanks to Trivino, Haakon for updates and wide range of testing for kernel as well as userland with mixed HCA configurations. Orabug: 23633926 Reviewed-by Yuval Shaia Reviewed-by: Håkon Bugge Reviewed-by: David Brean Tested-by: Francisco Triviño García Signed-off-by: Francisco Triviño García Signed-off-by: Santosh Shilimkar --- diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index 912fa921b6aa..ba45b53379f5 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -52,6 +52,7 @@ #include #include #include "cm_msgs.h" +#include "core_priv.h" MODULE_AUTHOR("Sean Hefty"); MODULE_DESCRIPTION("InfiniBand CM"); @@ -1054,6 +1055,10 @@ static void cm_cleanup_timewait(struct cm_timewait_info *timewait_info) rb_erase(&timewait_info->remote_qp_node, &cm.remote_qp_table); timewait_info->inserted_remote_qp = 0; } + + /* Clean-up the overloaded MBIT */ + if (timewait_info->remote_ca_guid & IB_GUID_MBIT) + timewait_info->remote_ca_guid &= ~IB_GUID_MBIT; } static struct cm_timewait_info * cm_create_timewait_info(__be32 local_id) @@ -1275,19 +1280,46 @@ static void cm_format_mad_hdr(struct ib_mad_hdr *hdr, hdr->tid = tid; } +#define SIF_DEVICES 6 +const u32 sif_family_vendor_part_id[SIF_DEVICES] = { + 0x2088, 0x2089, 0x2188, 0x2189, 0x2198, 0x2199}; + +static inline bool is_vendor_sif_family(u32 part_id) +{ + int i; + + for (i = 0; i < SIF_DEVICES; i++) { + if (part_id == sif_family_vendor_part_id[i]) + return true; + } + return false; +} + static void cm_format_req(struct cm_req_msg *req_msg, struct cm_id_private *cm_id_priv, struct ib_cm_req_param *param) { struct ib_sa_path_rec *pri_path = param->primary_path; struct ib_sa_path_rec *alt_path = param->alternate_path; + struct ib_device_attr attr; + u32 vendor_part_id; + + if (ib_query_device(cm_id_priv->id.device, &attr)) + vendor_part_id = 0; + else + vendor_part_id = attr.vendor_part_id; cm_format_mad_hdr(&req_msg->hdr, CM_REQ_ATTR_ID, cm_form_tid(cm_id_priv, CM_MSG_SEQUENCE_REQ)); req_msg->local_comm_id = cm_id_priv->id.local_id; req_msg->service_id = param->service_id; - req_msg->local_ca_guid = cm_id_priv->id.device->node_guid; + + if (is_vendor_sif_family(vendor_part_id)) + req_msg->local_ca_guid = cm_id_priv->id.device->node_guid | IB_GUID_MBIT; + else + req_msg->local_ca_guid = cm_id_priv->id.device->node_guid; + cm_req_set_local_qpn(req_msg, cpu_to_be32(param->qp_num)); cm_req_set_init_depth(req_msg, param->initiator_depth); cm_req_set_remote_resp_timeout(req_msg, @@ -1962,6 +1994,14 @@ static void cm_format_rep(struct cm_rep_msg *rep_msg, struct cm_id_private *cm_id_priv, struct ib_cm_rep_param *param) { + struct ib_device_attr attr; + u32 vendor_part_id; + + if (ib_query_device(cm_id_priv->id.device, &attr)) + vendor_part_id = 0; + else + vendor_part_id = attr.vendor_part_id; + cm_format_mad_hdr(&rep_msg->hdr, CM_REP_ATTR_ID, cm_id_priv->tid); rep_msg->local_comm_id = cm_id_priv->id.local_id; rep_msg->remote_comm_id = cm_id_priv->id.remote_id; @@ -1971,7 +2011,11 @@ static void cm_format_rep(struct cm_rep_msg *rep_msg, cm_id_priv->av.port->cm_dev->ack_delay); cm_rep_set_failover(rep_msg, param->failover_accepted); cm_rep_set_rnr_retry_count(rep_msg, param->rnr_retry_count); - rep_msg->local_ca_guid = cm_id_priv->id.device->node_guid; + + if (is_vendor_sif_family(vendor_part_id)) + rep_msg->local_ca_guid = cm_id_priv->id.device->node_guid | IB_GUID_MBIT; + else + rep_msg->local_ca_guid = cm_id_priv->id.device->node_guid; if (cm_id_priv->qp_type != IB_QPT_XRC_TGT) { rep_msg->initiator_depth = param->initiator_depth; @@ -3826,6 +3870,9 @@ static int cm_init_qp_init_attr(struct cm_id_private *cm_id_priv, { unsigned long flags; int ret; + u64 remote_guid; + + remote_guid = cm_id_priv->timewait_info->remote_ca_guid; spin_lock_irqsave(&cm_id_priv->lock, flags); switch (cm_id_priv->id.state) { @@ -3840,7 +3887,12 @@ static int cm_init_qp_init_attr(struct cm_id_private *cm_id_priv, case IB_CM_ESTABLISHED: *qp_attr_mask = IB_QP_STATE | IB_QP_ACCESS_FLAGS | IB_QP_PKEY_INDEX | IB_QP_PORT; + qp_attr->qp_access_flags = IB_ACCESS_REMOTE_WRITE; + + if (remote_guid & IB_GUID_MBIT) + qp_attr->qp_access_flags |= IB_GUID_RNR_TWEAK; + if (cm_id_priv->responder_resources) qp_attr->qp_access_flags |= IB_ACCESS_REMOTE_READ | IB_ACCESS_REMOTE_ATOMIC; @@ -4302,4 +4354,3 @@ static void __exit ib_cm_cleanup(void) module_init(ib_cm_init); module_exit(ib_cm_cleanup); - diff --git a/drivers/infiniband/core/core_priv.h b/drivers/infiniband/core/core_priv.h index 01cd259aa4bd..21a25f6e1b9c 100644 --- a/drivers/infiniband/core/core_priv.h +++ b/drivers/infiniband/core/core_priv.h @@ -38,6 +38,10 @@ #include +/* Used for a HCA workaound and overloads qp_access_flags(int) */ +#define IB_GUID_RNR_TWEAK (1 << 12) +#define IB_GUID_MBIT (1ULL << 56) + int ib_device_register_sysfs(struct ib_device *device, int (*port_callback)(struct ib_device *, u8, struct kobject *)); diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 12b766512ade..586629c680c2 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -2734,6 +2734,13 @@ out: return ret ? ret : in_len; } +const u8 ib_uverbs_rnr_timeout_sif[32] = { + 0, 18, 20, 21, 22, 23, 24, 25, + 26, 27, 28, 29, 30, 31, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0 +}; + ssize_t ib_uverbs_modify_qp(struct ib_uverbs_file *file, const char __user *buf, int in_len, int out_len) @@ -2810,6 +2817,16 @@ ssize_t ib_uverbs_modify_qp(struct ib_uverbs_file *file, ret = ib_resolve_eth_l2_attrs(qp, attr, &cmd.attr_mask); if (ret) goto release_qp; + + if ((cmd.attr_mask & IB_QP_STATE) && + (attr->qp_state == IB_QPS_INIT) && + (attr->qp_access_flags & IB_GUID_RNR_TWEAK)) + qp->qp_flag |= IB_GUID_RNR_TWEAK; + + if ((qp->qp_flag & IB_GUID_RNR_TWEAK) && + (cmd.attr_mask & IB_QP_MIN_RNR_TIMER)) + attr->min_rnr_timer = ib_uverbs_rnr_timeout_sif[attr->min_rnr_timer]; + ret = qp->device->modify_qp(qp, attr, cmd.attr_mask, &udata); } else { ret = ib_modify_qp(qp, attr, cmd.attr_mask); diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index 7619e2872bb4..cb63b692472f 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -924,17 +924,33 @@ out: } EXPORT_SYMBOL(ib_resolve_eth_l2_attrs); +const u8 ib_rnr_timeout_sif[32] = { + 0, 18, 20, 21, 22, 23, 24, 25, + 26, 27, 28, 29, 30, 31, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0 +}; int ib_modify_qp(struct ib_qp *qp, struct ib_qp_attr *qp_attr, int qp_attr_mask) { + u8 idx = qp_attr->min_rnr_timer; int ret; ret = ib_resolve_eth_l2_attrs(qp, qp_attr, &qp_attr_mask); if (ret) return ret; + if ((qp_attr_mask & IB_QP_STATE) && + (qp_attr->qp_state == IB_QPS_INIT) && + (qp_attr->qp_access_flags & IB_GUID_RNR_TWEAK)) + qp->qp_flag |= IB_GUID_RNR_TWEAK; + + if ((qp->qp_flag & IB_GUID_RNR_TWEAK) && + (qp_attr_mask & IB_QP_MIN_RNR_TIMER)) + qp_attr->min_rnr_timer = ib_rnr_timeout_sif[idx]; + return qp->device->modify_qp(qp->real_qp, qp_attr, qp_attr_mask, NULL); } EXPORT_SYMBOL(ib_modify_qp); diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 4f939728280e..f526b363a32e 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -1281,6 +1281,9 @@ struct ib_qp { void *qp_context; u32 qp_num; enum ib_qp_type qp_type; +#ifndef __GENKSYMS__ + int qp_flag; +#endif }; struct ib_mr {