scsi: lpfc: Support non-uniform allocation of MSIX vectors to hardware queues

author James Smart <jsmart2021@gmail.com>

Mon, 28 Jan 2019 19:14:31 +0000 (11:14 -0800)

committer Martin K. Petersen <martin.petersen@oracle.com>

Wed, 6 Feb 2019 03:29:49 +0000 (22:29 -0500)
author James Smart <jsmart2021@gmail.com>
Mon, 28 Jan 2019 19:14:31 +0000 (11:14 -0800)
committer Martin K. Petersen <martin.petersen@oracle.com>
Wed, 6 Feb 2019 03:29:49 +0000 (22:29 -0500)
diff --git a/drivers/scsi/lpfc/lpfc.h b/drivers/scsi/lpfc/lpfc.h

index 0f8964fdfecf30a59df729f2d63f565e88ba866f..9fd2811ffa8b86e9dafd8f533be4feb7b14812c3 100644 (file)
--- a/drivers/scsi/lpfc/lpfc.h
+++ b/drivers/scsi/lpfc/lpfc.h
@@ -84,8 +84,6 @@ struct lpfc_sli2_slim;
  #define LPFC_HB_MBOX_INTERVAL   5      /* Heart beat interval in seconds. */
  #define LPFC_HB_MBOX_TIMEOUT    30     /* Heart beat timeout  in seconds. */
  
-#define LPFC_LOOK_AHEAD_OFF    0       /* Look ahead logic is turned off */
-
  /* Error Attention event polling interval */
  #define LPFC_ERATT_POLL_INTERVAL       5 /* EATT poll interval in seconds */
  
@@ -821,6 +819,7 @@ struct lpfc_hba {
         uint32_t cfg_fcp_imax;
         uint32_t cfg_fcp_cpu_map;
         uint32_t cfg_hdw_queue;
+       uint32_t cfg_irq_chann;
         uint32_t cfg_suppress_rsp;
         uint32_t cfg_nvme_oas;
         uint32_t cfg_nvme_embed_cmd;
@@ -1042,6 +1041,9 @@ struct lpfc_hba {
         struct dentry *debug_nvmeio_trc;
         struct lpfc_debugfs_nvmeio_trc *nvmeio_trc;
         struct dentry *debug_hdwqinfo;
+#ifdef LPFC_HDWQ_LOCK_STAT
+       struct dentry *debug_lockstat;
+#endif
         atomic_t nvmeio_trc_cnt;
         uint32_t nvmeio_trc_size;
         uint32_t nvmeio_trc_output_idx;
@@ -1161,6 +1163,7 @@ struct lpfc_hba {
  #define LPFC_CHECK_NVME_IO     1
  #define LPFC_CHECK_NVMET_RCV   2
  #define LPFC_CHECK_NVMET_IO    4
+#define LPFC_CHECK_SCSI_IO     8
         uint16_t ktime_on;
         uint64_t ktime_data_samples;
         uint64_t ktime_status_samples;
diff --git a/drivers/scsi/lpfc/lpfc_attr.c b/drivers/scsi/lpfc/lpfc_attr.c

index 787812dd57a96d889ec61b61f8a56fdfb502ca95..fc7f80d68638ddf1fa1df6d854c669806786e639 100644 (file)
--- a/drivers/scsi/lpfc/lpfc_attr.c
+++ b/drivers/scsi/lpfc/lpfc_attr.c
@@ -4958,7 +4958,7 @@ lpfc_fcp_imax_store(struct device *dev, struct device_attribute *attr,
         phba->cfg_fcp_imax = (uint32_t)val;
         phba->initial_imax = phba->cfg_fcp_imax;
  
-       for (i = 0; i < phba->cfg_hdw_queue; i += LPFC_MAX_EQ_DELAY_EQID_CNT)
+       for (i = 0; i < phba->cfg_irq_chann; i += LPFC_MAX_EQ_DELAY_EQID_CNT)
                 lpfc_modify_hba_eq_delay(phba, i, LPFC_MAX_EQ_DELAY_EQID_CNT,
                                          val);
  
@@ -5059,13 +5059,6 @@ lpfc_fcp_cpu_map_show(struct device *dev, struct device_attribute *attr,
                                 phba->cfg_fcp_cpu_map,
                                 phba->sli4_hba.num_online_cpu);
                 break;
-       case 2:
-               len += snprintf(buf + len, PAGE_SIZE-len,
-                               "fcp_cpu_map: Driver centric mapping (%d): "
-                               "%d online CPUs\n",
-                               phba->cfg_fcp_cpu_map,
-                               phba->sli4_hba.num_online_cpu);
-               break;
         }
  
         while (phba->sli4_hba.curr_disp_cpu < phba->sli4_hba.num_present_cpu) {
@@ -5076,35 +5069,35 @@ lpfc_fcp_cpu_map_show(struct device *dev, struct device_attribute *attr,
                                 len += snprintf(
                                         buf + len, PAGE_SIZE - len,
                                         "CPU %02d hdwq None "
-                                       "physid %d coreid %d\n",
+                                       "physid %d coreid %d ht %d\n",
                                         phba->sli4_hba.curr_disp_cpu,
                                         cpup->phys_id,
-                                       cpup->core_id);
+                                       cpup->core_id, cpup->hyper);
                         else
                                 len += snprintf(
                                         buf + len, PAGE_SIZE - len,
-                                       "CPU %02d hdwq %04d "
-                                       "physid %d coreid %d\n",
+                                       "CPU %02d EQ %04d hdwq %04d "
+                                       "physid %d coreid %d ht %d\n",
                                         phba->sli4_hba.curr_disp_cpu,
-                                       cpup->hdwq, cpup->phys_id,
-                                       cpup->core_id);
+                                       cpup->eq, cpup->hdwq, cpup->phys_id,
+                                       cpup->core_id, cpup->hyper);
                 } else {
                         if (cpup->hdwq == LPFC_VECTOR_MAP_EMPTY)
                                 len += snprintf(
                                         buf + len, PAGE_SIZE - len,
                                         "CPU %02d hdwq None "
-                                       "physid %d coreid %d IRQ %d\n",
+                                       "physid %d coreid %d ht %d IRQ %d\n",
                                         phba->sli4_hba.curr_disp_cpu,
                                         cpup->phys_id,
-                                       cpup->core_id, cpup->irq);
+                                       cpup->core_id, cpup->hyper, cpup->irq);
                         else
                                 len += snprintf(
                                         buf + len, PAGE_SIZE - len,
-                                       "CPU %02d hdwq %04d "
-                                       "physid %d coreid %d IRQ %d\n",
+                                       "CPU %02d EQ %04d hdwq %04d "
+                                       "physid %d coreid %d ht %d IRQ %d\n",
                                         phba->sli4_hba.curr_disp_cpu,
-                                       cpup->hdwq, cpup->phys_id,
-                                       cpup->core_id, cpup->irq);
+                                       cpup->eq, cpup->hdwq, cpup->phys_id,
+                                       cpup->core_id, cpup->hyper, cpup->irq);
                 }
  
                 phba->sli4_hba.curr_disp_cpu++;
@@ -5146,14 +5139,13 @@ lpfc_fcp_cpu_map_store(struct device *dev, struct device_attribute *attr,
  # lpfc_fcp_cpu_map: Defines how to map CPUs to IRQ vectors
  # for the HBA.
  #
-# Value range is [0 to 2]. Default value is LPFC_DRIVER_CPU_MAP (2).
+# Value range is [0 to 1]. Default value is LPFC_HBA_CPU_MAP (1).
  #      0 - Do not affinitze IRQ vectors
  #      1 - Affintize HBA vectors with respect to each HBA
  #          (start with CPU0 for each HBA)
-#      2 - Affintize HBA vectors with respect to the entire driver
-#          (round robin thru all CPUs across all HBAs)
+# This also defines how Hardware Queues are mapped to specific CPUs.
  */
-static int lpfc_fcp_cpu_map = LPFC_DRIVER_CPU_MAP;
+static int lpfc_fcp_cpu_map = LPFC_HBA_CPU_MAP;
  module_param(lpfc_fcp_cpu_map, int, S_IRUGO|S_IWUSR);
  MODULE_PARM_DESC(lpfc_fcp_cpu_map,
                  "Defines how to map CPUs to IRQ vectors per HBA");
@@ -5187,7 +5179,7 @@ lpfc_fcp_cpu_map_init(struct lpfc_hba *phba, int val)
         lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
                         "3326 lpfc_fcp_cpu_map: %d out of range, using "
                         "default\n", val);
-       phba->cfg_fcp_cpu_map = LPFC_DRIVER_CPU_MAP;
+       phba->cfg_fcp_cpu_map = LPFC_HBA_CPU_MAP;
  
         return 0;
  }
@@ -5308,7 +5300,7 @@ LPFC_ATTR_R(xri_rebalancing, 1, 0, 1, "Enable/Disable XRI rebalancing");
   * CPU. Otherwise, the default 0 (Round Robin) scheduling of FCP/NVME I/Os
   * through WQs will be used.
   */
-LPFC_ATTR_RW(fcp_io_sched, LPFC_FCP_SCHED_BY_HDWQ,
+LPFC_ATTR_RW(fcp_io_sched, LPFC_FCP_SCHED_BY_CPU,
              LPFC_FCP_SCHED_BY_HDWQ,
              LPFC_FCP_SCHED_BY_CPU,
              "Determine scheduling algorithm for "
@@ -5474,24 +5466,40 @@ LPFC_ATTR_RW(nvme_embed_cmd, 1, 0, 2,
              "Embed NVME Command in WQE");
  
  /*
- * lpfc_hdw_queue: Set the number of IO channels the driver
+ * lpfc_hdw_queue: Set the number of Hardware Queues the driver
   * will advertise it supports to the NVME and  SCSI layers. This also
- * will map to the number of EQ/CQ/WQs the driver will create.
+ * will map to the number of CQ/WQ pairs the driver will create.
   *
   * The NVME Layer will try to create this many, plus 1 administrative
   * hardware queue. The administrative queue will always map to WQ 0
- * A hardware IO queue maps (qidx) to a specific driver WQ.
+ * A hardware IO queue maps (qidx) to a specific driver CQ/WQ.
   *
   *      0    = Configure the number of hdw queues to the number of active CPUs.
- *      1,64 = Manually specify how many hdw queues to use.
+ *      1,128 = Manually specify how many hdw queues to use.
   *
- * Value range is [0,64]. Default value is 0.
+ * Value range is [0,128]. Default value is 0.
   */
  LPFC_ATTR_R(hdw_queue,
             LPFC_HBA_HDWQ_DEF,
             LPFC_HBA_HDWQ_MIN, LPFC_HBA_HDWQ_MAX,
             "Set the number of I/O Hardware Queues");
  
+/*
+ * lpfc_irq_chann: Set the number of IRQ vectors that are available
+ * for Hardware Queues to utilize.  This also will map to the number
+ * of EQ / MSI-X vectors the driver will create. This should never be
+ * more than the number of Hardware Queues
+ *
+ *      0     = Configure number of IRQ Channels to the number of active CPUs.
+ *      1,128 = Manually specify how many IRQ Channels to use.
+ *
+ * Value range is [0,128]. Default value is 0.
+ */
+LPFC_ATTR_R(irq_chann,
+           LPFC_HBA_HDWQ_DEF,
+           LPFC_HBA_HDWQ_MIN, LPFC_HBA_HDWQ_MAX,
+           "Set the number of I/O IRQ Channels");
+
  /*
  # lpfc_enable_hba_reset: Allow or prevent HBA resets to the hardware.
  #       0  = HBA resets disabled
@@ -5532,16 +5540,6 @@ LPFC_ATTR_RW(XLanePriority, 0, 0x0, 0x7f, "CS_CTL for Express Lane Feature.");
  */
  LPFC_ATTR_R(enable_bg, 0, 0, 1, "Enable BlockGuard Support");
  
-/*
-# lpfc_fcp_look_ahead: Look ahead for completions in FCP start routine
-#       0  = disabled (default)
-#       1  = enabled
-# Value range is [0,1]. Default value is 0.
-#
-# This feature in under investigation and may be supported in the future.
-*/
-unsigned int lpfc_fcp_look_ahead = LPFC_LOOK_AHEAD_OFF;
-
  /*
  # lpfc_prot_mask: i
  #      - Bit mask of host protection capabilities used to register with the
@@ -5788,6 +5786,7 @@ struct device_attribute *lpfc_hba_attrs[] = {
         &dev_attr_lpfc_fcp_imax,
         &dev_attr_lpfc_fcp_cpu_map,
         &dev_attr_lpfc_hdw_queue,
+       &dev_attr_lpfc_irq_chann,
         &dev_attr_lpfc_suppress_rsp,
         &dev_attr_lpfc_nvmet_mrq,
         &dev_attr_lpfc_nvmet_mrq_post,
@@ -6867,6 +6866,7 @@ lpfc_get_cfgparam(struct lpfc_hba *phba)
         lpfc_nvme_enable_fb_init(phba, lpfc_nvme_enable_fb);
         lpfc_nvmet_fb_size_init(phba, lpfc_nvmet_fb_size);
         lpfc_hdw_queue_init(phba, lpfc_hdw_queue);
+       lpfc_irq_chann_init(phba, lpfc_irq_chann);
         lpfc_enable_bbcr_init(phba, lpfc_enable_bbcr);
         lpfc_enable_dpp_init(phba, lpfc_enable_dpp);
  
@@ -6891,6 +6891,10 @@ lpfc_get_cfgparam(struct lpfc_hba *phba)
         /* A value of 0 means use the number of CPUs found in the system */
         if (phba->cfg_hdw_queue == 0)
                 phba->cfg_hdw_queue = phba->sli4_hba.num_present_cpu;
+       if (phba->cfg_irq_chann == 0)
+               phba->cfg_irq_chann = phba->sli4_hba.num_present_cpu;
+       if (phba->cfg_irq_chann > phba->cfg_hdw_queue)
+               phba->cfg_irq_chann = phba->cfg_hdw_queue;
  
         phba->cfg_soft_wwnn = 0L;
         phba->cfg_soft_wwpn = 0L;
@@ -6933,6 +6937,10 @@ lpfc_nvme_mod_param_dep(struct lpfc_hba *phba)
  {
         if (phba->cfg_hdw_queue > phba->sli4_hba.num_present_cpu)
                 phba->cfg_hdw_queue = phba->sli4_hba.num_present_cpu;
+       if (phba->cfg_irq_chann > phba->sli4_hba.num_present_cpu)
+               phba->cfg_irq_chann = phba->sli4_hba.num_present_cpu;
+       if (phba->cfg_irq_chann > phba->cfg_hdw_queue)
+               phba->cfg_irq_chann = phba->cfg_hdw_queue;
  
         if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME &&
             phba->nvmet_support) {
@@ -6953,11 +6961,11 @@ lpfc_nvme_mod_param_dep(struct lpfc_hba *phba)
                 }
  
                 if (!phba->cfg_nvmet_mrq)
-                       phba->cfg_nvmet_mrq = phba->cfg_hdw_queue;
+                       phba->cfg_nvmet_mrq = phba->cfg_irq_chann;
  
                 /* Adjust lpfc_nvmet_mrq to avoid running out of WQE slots */
-               if (phba->cfg_nvmet_mrq > phba->cfg_hdw_queue) {
-                       phba->cfg_nvmet_mrq = phba->cfg_hdw_queue;
+               if (phba->cfg_nvmet_mrq > phba->cfg_irq_chann) {
+                       phba->cfg_nvmet_mrq = phba->cfg_irq_chann;
                         lpfc_printf_log(phba, KERN_ERR, LOG_NVME_DISC,
                                         "6018 Adjust lpfc_nvmet_mrq to %d\n",
                                         phba->cfg_nvmet_mrq);
diff --git a/drivers/scsi/lpfc/lpfc_crtn.h b/drivers/scsi/lpfc/lpfc_crtn.h

index 726cd6a7c4523fc6e2abe15ade4acdff1f24872d..982401c31c1208adeea9f8d8d4732ff1957d06e3 100644 (file)
--- a/drivers/scsi/lpfc/lpfc_crtn.h
+++ b/drivers/scsi/lpfc/lpfc_crtn.h
@@ -440,7 +440,6 @@ extern spinlock_t _dump_buf_lock;
  extern int _dump_buf_done;
  extern spinlock_t pgcnt_lock;
  extern unsigned int pgcnt;
-extern unsigned int lpfc_fcp_look_ahead;
  
  /* Interface exported by fabric iocb scheduler */
  void lpfc_fabric_abort_nport(struct lpfc_nodelist *);
diff --git a/drivers/scsi/lpfc/lpfc_debugfs.c b/drivers/scsi/lpfc/lpfc_debugfs.c

index 2a2c46766eb6fe081f4dd8b9a4c7c7e2de2da334..72076b2cd4ffce8af7821fed473480fe80524b22 100644 (file)
--- a/drivers/scsi/lpfc/lpfc_debugfs.c
+++ b/drivers/scsi/lpfc/lpfc_debugfs.c
@@ -378,6 +378,67 @@ skipit:
         return len;
  }
  
+static int lpfc_debugfs_last_xripool;
+
+/**
+ * lpfc_debugfs_common_xri_data - Dump Hardware Queue info to a buffer
+ * @phba: The HBA to gather host buffer info from.
+ * @buf: The buffer to dump log into.
+ * @size: The maximum amount of data to process.
+ *
+ * Description:
+ * This routine dumps the Hardware Queue info from the @phba to @buf up to
+ * @size number of bytes. A header that describes the current hdwq state will be
+ * dumped to @buf first and then info on each hdwq entry will be dumped to @buf
+ * until @size bytes have been dumped or all the hdwq info has been dumped.
+ *
+ * Notes:
+ * This routine will rotate through each configured Hardware Queue each
+ * time called.
+ *
+ * Return Value:
+ * This routine returns the amount of bytes that were dumped into @buf and will
+ * not exceed @size.
+ **/
+static int
+lpfc_debugfs_commonxripools_data(struct lpfc_hba *phba, char *buf, int size)
+{
+       struct lpfc_sli4_hdw_queue *qp;
+       int len = 0;
+       int i, out;
+       unsigned long iflag;
+
+       for (i = 0; i < phba->cfg_hdw_queue; i++) {
+               if (len > (LPFC_DUMP_MULTIXRIPOOL_SIZE - 80))
+                       break;
+               qp = &phba->sli4_hba.hdwq[lpfc_debugfs_last_xripool];
+
+               len +=  snprintf(buf + len, size - len, "HdwQ %d Info ", i);
+               spin_lock_irqsave(&qp->abts_scsi_buf_list_lock, iflag);
+               spin_lock(&qp->abts_nvme_buf_list_lock);
+               spin_lock(&qp->io_buf_list_get_lock);
+               spin_lock(&qp->io_buf_list_put_lock);
+               out = qp->total_io_bufs - (qp->get_io_bufs + qp->put_io_bufs +
+                       qp->abts_scsi_io_bufs + qp->abts_nvme_io_bufs);
+               len +=  snprintf(buf + len, size - len,
+                                "tot:%d get:%d put:%d mt:%d "
+                                "ABTS scsi:%d nvme:%d Out:%d\n",
+                       qp->total_io_bufs, qp->get_io_bufs, qp->put_io_bufs,
+                       qp->empty_io_bufs, qp->abts_scsi_io_bufs,
+                       qp->abts_nvme_io_bufs, out);
+               spin_unlock(&qp->io_buf_list_put_lock);
+               spin_unlock(&qp->io_buf_list_get_lock);
+               spin_unlock(&qp->abts_nvme_buf_list_lock);
+               spin_unlock_irqrestore(&qp->abts_scsi_buf_list_lock, iflag);
+
+               lpfc_debugfs_last_xripool++;
+               if (lpfc_debugfs_last_xripool >= phba->cfg_hdw_queue)
+                       lpfc_debugfs_last_xripool = 0;
+       }
+
+       return len;
+}
+
  /**
   * lpfc_debugfs_multixripools_data - Display multi-XRI pools information
   * @phba: The HBA to gather host buffer info from.
@@ -405,6 +466,17 @@ lpfc_debugfs_multixripools_data(struct lpfc_hba *phba, char *buf, int size)
         u32 txcmplq_cnt;
         char tmp[LPFC_DEBUG_OUT_LINE_SZ] = {0};
  
+       if (phba->sli_rev != LPFC_SLI_REV4)
+               return 0;
+
+       if (!phba->sli4_hba.hdwq)
+               return 0;
+
+       if (!phba->cfg_xri_rebalancing) {
+               i = lpfc_debugfs_commonxripools_data(phba, buf, size);
+               return i;
+       }
+
         /*
          * Pbl: Current number of free XRIs in public pool
          * Pvt: Current number of free XRIs in private pool
@@ -498,10 +570,12 @@ lpfc_debugfs_multixripools_data(struct lpfc_hba *phba, char *buf, int size)
         return strnlen(buf, size);
  }
  
-static int lpfc_debugfs_last_hdwq;
+
+#ifdef LPFC_HDWQ_LOCK_STAT
+static int lpfc_debugfs_last_lock;
  
  /**
- * lpfc_debugfs_hdwqinfo_data - Dump Hardware Queue info to a buffer
+ * lpfc_debugfs_lockstat_data - Dump Hardware Queue info to a buffer
   * @phba: The HBA to gather host buffer info from.
   * @buf: The buffer to dump log into.
   * @size: The maximum amount of data to process.
@@ -521,12 +595,11 @@ static int lpfc_debugfs_last_hdwq;
   * not exceed @size.
   **/
  static int
-lpfc_debugfs_hdwqinfo_data(struct lpfc_hba *phba, char *buf, int size)
+lpfc_debugfs_lockstat_data(struct lpfc_hba *phba, char *buf, int size)
  {
         struct lpfc_sli4_hdw_queue *qp;
         int len = 0;
-       int i, out;
-       unsigned long iflag;
+       int i;
  
         if (phba->sli_rev != LPFC_SLI_REV4)
                 return 0;
@@ -535,35 +608,40 @@ lpfc_debugfs_hdwqinfo_data(struct lpfc_hba *phba, char *buf, int size)
                 return 0;
  
         for (i = 0; i < phba->cfg_hdw_queue; i++) {
-               if (len > (LPFC_HDWQINFO_SIZE - 80))
+               if (len > (LPFC_HDWQINFO_SIZE - 100))
                         break;
-               qp = &phba->sli4_hba.hdwq[lpfc_debugfs_last_hdwq];
+               qp = &phba->sli4_hba.hdwq[lpfc_debugfs_last_lock];
  
-               len +=  snprintf(buf + len, size - len, "HdwQ %d Info ", i);
-               spin_lock_irqsave(&qp->abts_scsi_buf_list_lock, iflag);
-               spin_lock(&qp->abts_nvme_buf_list_lock);
-               spin_lock(&qp->io_buf_list_get_lock);
-               spin_lock(&qp->io_buf_list_put_lock);
-               out = qp->total_io_bufs - (qp->get_io_bufs + qp->put_io_bufs +
-                       qp->abts_scsi_io_bufs + qp->abts_nvme_io_bufs);
-               len +=  snprintf(buf + len, size - len,
-                                "tot:%d get:%d put:%d mt:%d "
-                                "ABTS scsi:%d nvme:%d Out:%d\n",
-                       qp->total_io_bufs, qp->get_io_bufs, qp->put_io_bufs,
-                       qp->empty_io_bufs, qp->abts_scsi_io_bufs,
-                       qp->abts_nvme_io_bufs, out);
-               spin_unlock(&qp->io_buf_list_put_lock);
-               spin_unlock(&qp->io_buf_list_get_lock);
-               spin_unlock(&qp->abts_nvme_buf_list_lock);
-               spin_unlock_irqrestore(&qp->abts_scsi_buf_list_lock, iflag);
+               len +=  snprintf(buf + len, size - len, "HdwQ %03d Lock ", i);
+               if (phba->cfg_xri_rebalancing) {
+                       len +=  snprintf(buf + len, size - len,
+                                        "get_pvt:%d mv_pvt:%d "
+                                        "mv2pub:%d mv2pvt:%d "
+                                        "put_pvt:%d put_pub:%d wq:%d\n",
+                                        qp->lock_conflict.alloc_pvt_pool,
+                                        qp->lock_conflict.mv_from_pvt_pool,
+                                        qp->lock_conflict.mv_to_pub_pool,
+                                        qp->lock_conflict.mv_to_pvt_pool,
+                                        qp->lock_conflict.free_pvt_pool,
+                                        qp->lock_conflict.free_pub_pool,
+                                        qp->lock_conflict.wq_access);
+               } else {
+                       len +=  snprintf(buf + len, size - len,
+                                        "get:%d put:%d free:%d wq:%d\n",
+                                        qp->lock_conflict.alloc_xri_get,
+                                        qp->lock_conflict.alloc_xri_put,
+                                        qp->lock_conflict.free_xri,
+                                        qp->lock_conflict.wq_access);
+               }
  
-               lpfc_debugfs_last_hdwq++;
-               if (lpfc_debugfs_last_hdwq >= phba->cfg_hdw_queue)
-                       lpfc_debugfs_last_hdwq = 0;
+               lpfc_debugfs_last_lock++;
+               if (lpfc_debugfs_last_lock >= phba->cfg_hdw_queue)
+                       lpfc_debugfs_last_lock = 0;
         }
  
         return len;
  }
+#endif
  
  static int lpfc_debugfs_last_hba_slim_off;
  
@@ -964,7 +1042,7 @@ lpfc_debugfs_nvmestat_data(struct lpfc_vport *vport, char *buf, int size)
         struct lpfc_nvme_lport *lport;
         uint64_t data1, data2, data3;
         uint64_t tot, totin, totout;
-       int cnt, i, maxch;
+       int cnt, i;
         int len = 0;
  
         if (phba->nvmet_support) {
@@ -1106,10 +1184,6 @@ lpfc_debugfs_nvmestat_data(struct lpfc_vport *vport, char *buf, int size)
                                 atomic_read(&lport->fc4NvmeLsRequests),
                                 atomic_read(&lport->fc4NvmeLsCmpls));
  
-               if (phba->cfg_hdw_queue < LPFC_HBA_HDWQ_MAX)
-                       maxch = phba->cfg_hdw_queue;
-               else
-                       maxch = LPFC_HBA_HDWQ_MAX;
                 totin = 0;
                 totout = 0;
                 for (i = 0; i < phba->cfg_hdw_queue; i++) {
@@ -1547,7 +1621,7 @@ lpfc_debugfs_cpucheck_data(struct lpfc_vport *vport, char *buf, int size)
  {
         struct lpfc_hba   *phba = vport->phba;
         struct lpfc_sli4_hdw_queue *qp;
-       int i, j;
+       int i, j, max_cnt;
         int len = 0;
         uint32_t tot_xmt;
         uint32_t tot_rcv;
@@ -1565,6 +1639,7 @@ lpfc_debugfs_cpucheck_data(struct lpfc_vport *vport, char *buf, int size)
         } else {
                 len += snprintf(buf + len, PAGE_SIZE - len, "\n");
         }
+       max_cnt = size - LPFC_DEBUG_OUT_LINE_SZ;
  
         for (i = 0; i < phba->cfg_hdw_queue; i++) {
                 qp = &phba->sli4_hba.hdwq[i];
@@ -1606,6 +1681,11 @@ lpfc_debugfs_cpucheck_data(struct lpfc_vport *vport, char *buf, int size)
                 }
                 len += snprintf(buf + len, PAGE_SIZE - len,
                                 "Total: %x\n", tot_xmt);
+               if (len >= max_cnt) {
+                       len += snprintf(buf + len, PAGE_SIZE - len,
+                                       "Truncated ...\n");
+                       return len;
+               }
         }
         return len;
  }
@@ -1904,11 +1984,8 @@ lpfc_debugfs_multixripools_open(struct inode *inode, struct file *file)
                 goto out;
         }
  
-       if (phba->cfg_xri_rebalancing)
-               debug->len = lpfc_debugfs_multixripools_data(
-                       phba, debug->buffer, LPFC_DUMP_MULTIXRIPOOL_SIZE);
-       else
-               debug->len = 0;
+       debug->len = lpfc_debugfs_multixripools_data(
+               phba, debug->buffer, LPFC_DUMP_MULTIXRIPOOL_SIZE);
  
         debug->i_private = inode->i_private;
         file->private_data = debug;
@@ -1918,8 +1995,9 @@ out:
         return rc;
  }
  
+#ifdef LPFC_HDWQ_LOCK_STAT
  /**
- * lpfc_debugfs_hdwqinfo_open - Open the hdwqinfo debugfs buffer
+ * lpfc_debugfs_lockstat_open - Open the lockstat debugfs buffer
   * @inode: The inode pointer that contains a vport pointer.
   * @file: The file pointer to attach the log output.
   *
@@ -1934,7 +2012,7 @@ out:
   * error value.
   **/
  static int
-lpfc_debugfs_hdwqinfo_open(struct inode *inode, struct file *file)
+lpfc_debugfs_lockstat_open(struct inode *inode, struct file *file)
  {
         struct lpfc_hba *phba = inode->i_private;
         struct lpfc_debug *debug;
@@ -1951,7 +2029,7 @@ lpfc_debugfs_hdwqinfo_open(struct inode *inode, struct file *file)
                 goto out;
         }
  
-       debug->len = lpfc_debugfs_hdwqinfo_data(phba, debug->buffer,
+       debug->len = lpfc_debugfs_lockstat_data(phba, debug->buffer,
                 LPFC_HBQINFO_SIZE);
         file->private_data = debug;
  
@@ -1960,6 +2038,48 @@ out:
         return rc;
  }
  
+static ssize_t
+lpfc_debugfs_lockstat_write(struct file *file, const char __user *buf,
+                           size_t nbytes, loff_t *ppos)
+{
+       struct lpfc_debug *debug = file->private_data;
+       struct lpfc_hba *phba = (struct lpfc_hba *)debug->i_private;
+       struct lpfc_sli4_hdw_queue *qp;
+       char mybuf[64];
+       char *pbuf;
+       int i;
+
+       /* Protect copy from user */
+       if (!access_ok(buf, nbytes))
+               return -EFAULT;
+
+       memset(mybuf, 0, sizeof(mybuf));
+
+       if (copy_from_user(mybuf, buf, nbytes))
+               return -EFAULT;
+       pbuf = &mybuf[0];
+
+       if ((strncmp(pbuf, "reset", strlen("reset")) == 0) ||
+           (strncmp(pbuf, "zero", strlen("zero")) == 0)) {
+               for (i = 0; i < phba->cfg_hdw_queue; i++) {
+                       qp = &phba->sli4_hba.hdwq[i];
+                       qp->lock_conflict.alloc_xri_get = 0;
+                       qp->lock_conflict.alloc_xri_put = 0;
+                       qp->lock_conflict.free_xri = 0;
+                       qp->lock_conflict.wq_access = 0;
+                       qp->lock_conflict.alloc_pvt_pool = 0;
+                       qp->lock_conflict.mv_from_pvt_pool = 0;
+                       qp->lock_conflict.mv_to_pub_pool = 0;
+                       qp->lock_conflict.mv_to_pvt_pool = 0;
+                       qp->lock_conflict.free_pvt_pool = 0;
+                       qp->lock_conflict.free_pub_pool = 0;
+                       qp->lock_conflict.wq_access = 0;
+               }
+       }
+       return nbytes;
+}
+#endif
+
  /**
   * lpfc_debugfs_dumpHBASlim_open - Open the Dump HBA SLIM debugfs buffer
   * @inode: The inode pointer that contains a vport pointer.
@@ -2816,7 +2936,7 @@ lpfc_debugfs_cpucheck_open(struct inode *inode, struct file *file)
         }
  
         debug->len = lpfc_debugfs_cpucheck_data(vport, debug->buffer,
-               LPFC_NVMEKTIME_SIZE);
+               LPFC_CPUCHECK_SIZE);
  
         debug->i_private = inode->i_private;
         file->private_data = debug;
@@ -2848,11 +2968,21 @@ lpfc_debugfs_cpucheck_write(struct file *file, const char __user *buf,
         pbuf = &mybuf[0];
  
         if ((strncmp(pbuf, "on", sizeof("on") - 1) == 0)) {
+               if (phba->nvmet_support)
+                       phba->cpucheck_on |= LPFC_CHECK_NVMET_IO;
+               else
+                       phba->cpucheck_on |= (LPFC_CHECK_NVME_IO |
+                               LPFC_CHECK_SCSI_IO);
+               return strlen(pbuf);
+       } else if ((strncmp(pbuf, "nvme_on", sizeof("nvme_on") - 1) == 0)) {
                 if (phba->nvmet_support)
                         phba->cpucheck_on |= LPFC_CHECK_NVMET_IO;
                 else
                         phba->cpucheck_on |= LPFC_CHECK_NVME_IO;
                 return strlen(pbuf);
+       } else if ((strncmp(pbuf, "scsi_on", sizeof("scsi_on") - 1) == 0)) {
+               phba->cpucheck_on |= LPFC_CHECK_SCSI_IO;
+               return strlen(pbuf);
         } else if ((strncmp(pbuf, "rcv",
                    sizeof("rcv") - 1) == 0)) {
                 if (phba->nvmet_support)
@@ -3732,46 +3862,38 @@ lpfc_idiag_cqs_for_eq(struct lpfc_hba *phba, char *pbuffer,
                 int *len, int max_cnt, int eqidx, int eq_id)
  {
         struct lpfc_queue *qp;
-       int qidx, rc;
+       int rc;
  
-       for (qidx = 0; qidx < phba->cfg_hdw_queue; qidx++) {
-               qp = phba->sli4_hba.hdwq[qidx].fcp_cq;
-               if (qp->assoc_qid != eq_id)
-                       continue;
+       qp = phba->sli4_hba.hdwq[eqidx].fcp_cq;
  
-               *len = __lpfc_idiag_print_cq(qp, "FCP", pbuffer, *len);
+       *len = __lpfc_idiag_print_cq(qp, "FCP", pbuffer, *len);
  
-               /* Reset max counter */
-               qp->CQ_max_cqe = 0;
+       /* Reset max counter */
+       qp->CQ_max_cqe = 0;
  
-               if (*len >= max_cnt)
-                       return 1;
+       if (*len >= max_cnt)
+               return 1;
  
-               rc = lpfc_idiag_wqs_for_cq(phba, "FCP", pbuffer, len,
-                               max_cnt, qp->queue_id);
-               if (rc)
-                       return 1;
-       }
+       rc = lpfc_idiag_wqs_for_cq(phba, "FCP", pbuffer, len,
+                                  max_cnt, qp->queue_id);
+       if (rc)
+               return 1;
  
         if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME) {
-               for (qidx = 0; qidx < phba->cfg_hdw_queue; qidx++) {
-                       qp = phba->sli4_hba.hdwq[qidx].nvme_cq;
-                       if (qp->assoc_qid != eq_id)
-                               continue;
+               qp = phba->sli4_hba.hdwq[eqidx].nvme_cq;
  
-                       *len = __lpfc_idiag_print_cq(qp, "NVME", pbuffer, *len);
+               *len = __lpfc_idiag_print_cq(qp, "NVME", pbuffer, *len);
  
-                       /* Reset max counter */
-                       qp->CQ_max_cqe = 0;
+               /* Reset max counter */
+               qp->CQ_max_cqe = 0;
  
-                       if (*len >= max_cnt)
-                               return 1;
+               if (*len >= max_cnt)
+                       return 1;
  
-                       rc = lpfc_idiag_wqs_for_cq(phba, "NVME", pbuffer, len,
-                                                  max_cnt, qp->queue_id);
-                       if (rc)
-                               return 1;
-               }
+               rc = lpfc_idiag_wqs_for_cq(phba, "NVME", pbuffer, len,
+                                          max_cnt, qp->queue_id);
+               if (rc)
+                       return 1;
         }
  
         if ((eqidx < phba->cfg_nvmet_mrq) && phba->nvmet_support) {
@@ -3812,9 +3934,10 @@ __lpfc_idiag_print_eq(struct lpfc_queue *qp, char *eqtype,
                         (unsigned long long)qp->q_cnt_4, qp->q_mode);
         len += snprintf(pbuffer + len, LPFC_QUE_INFO_GET_BUF_SIZE - len,
                         "EQID[%02d], QE-CNT[%04d], QE-SZ[%04d], "
-                       "HST-IDX[%04d], PRT-IDX[%04d], PST[%03d]",
+                       "HST-IDX[%04d], PRT-IDX[%04d], PST[%03d] AFFIN[%03d]",
                         qp->queue_id, qp->entry_count, qp->entry_size,
-                       qp->host_index, qp->hba_index, qp->entry_repost);
+                       qp->host_index, qp->hba_index, qp->entry_repost,
+                       qp->chann);
         len +=  snprintf(pbuffer + len, LPFC_QUE_INFO_GET_BUF_SIZE - len, "\n");
  
         return len;
@@ -3869,7 +3992,7 @@ lpfc_idiag_queinfo_read(struct file *file, char __user *buf, size_t nbytes,
                         phba->lpfc_idiag_last_eq = 0;
  
                 len += snprintf(pbuffer + len, LPFC_QUE_INFO_GET_BUF_SIZE - len,
-                                       "EQ %d out of %d HBA EQs\n",
+                                       "HDWQ %d out of %d HBA HDWQs\n",
                                         x, phba->cfg_hdw_queue);
  
                 /* Fast-path EQ */
@@ -5299,14 +5422,17 @@ static const struct file_operations lpfc_debugfs_op_hbqinfo = {
         .release =      lpfc_debugfs_release,
  };
  
-#undef lpfc_debugfs_op_hdwqinfo
-static const struct file_operations lpfc_debugfs_op_hdwqinfo = {
+#ifdef LPFC_HDWQ_LOCK_STAT
+#undef lpfc_debugfs_op_lockstat
+static const struct file_operations lpfc_debugfs_op_lockstat = {
         .owner =        THIS_MODULE,
-       .open =         lpfc_debugfs_hdwqinfo_open,
+       .open =         lpfc_debugfs_lockstat_open,
         .llseek =       lpfc_debugfs_lseek,
         .read =         lpfc_debugfs_read,
+       .write =        lpfc_debugfs_lockstat_write,
         .release =      lpfc_debugfs_release,
  };
+#endif
  
  #undef lpfc_debugfs_op_dumpHBASlim
  static const struct file_operations lpfc_debugfs_op_dumpHBASlim = {
@@ -5756,17 +5882,19 @@ lpfc_debugfs_initialize(struct lpfc_vport *vport)
                                             phba->hba_debugfs_root,
                                             phba, &lpfc_debugfs_op_hbqinfo);
  
-               /* Setup hdwqinfo */
-               snprintf(name, sizeof(name), "hdwqinfo");
-               phba->debug_hdwqinfo =
+#ifdef LPFC_HDWQ_LOCK_STAT
+               /* Setup lockstat */
+               snprintf(name, sizeof(name), "lockstat");
+               phba->debug_lockstat =
                         debugfs_create_file(name, S_IFREG | 0644,
                                             phba->hba_debugfs_root,
-                                           phba, &lpfc_debugfs_op_hdwqinfo);
-               if (!phba->debug_hdwqinfo) {
+                                           phba, &lpfc_debugfs_op_lockstat);
+               if (!phba->debug_lockstat) {
                         lpfc_printf_vlog(vport, KERN_ERR, LOG_INIT,
-                                        "0511 Cant create debugfs hdwqinfo\n");
+                                        "0913 Cant create debugfs lockstat\n");
                         goto debug_failed;
                 }
+#endif
  
                 /* Setup dumpHBASlim */
                 if (phba->sli_rev < LPFC_SLI_REV4) {
@@ -6006,7 +6134,7 @@ nvmeio_off:
                                     vport, &lpfc_debugfs_op_scsistat);
         if (!vport->debug_scsistat) {
                 lpfc_printf_vlog(vport, KERN_ERR, LOG_INIT,
-                                "0811 Cannot create debugfs scsistat\n");
+                                "0914 Cannot create debugfs scsistat\n");
                 goto debug_failed;
         }
  
@@ -6171,9 +6299,10 @@ lpfc_debugfs_terminate(struct lpfc_vport *vport)
                 debugfs_remove(phba->debug_hbqinfo); /* hbqinfo */
                 phba->debug_hbqinfo = NULL;
  
-               debugfs_remove(phba->debug_hdwqinfo); /* hdwqinfo */
-               phba->debug_hdwqinfo = NULL;
-
+#ifdef LPFC_HDWQ_LOCK_STAT
+               debugfs_remove(phba->debug_lockstat); /* lockstat */
+               phba->debug_lockstat = NULL;
+#endif
                 debugfs_remove(phba->debug_dumpHBASlim); /* HBASlim */
                 phba->debug_dumpHBASlim = NULL;
  
diff --git a/drivers/scsi/lpfc/lpfc_debugfs.h b/drivers/scsi/lpfc/lpfc_debugfs.h

index cf256a6dca4203e52432ca27354143afd138a916..1fbee6496f85b2b57c05a97296414a895756b88f 100644 (file)
--- a/drivers/scsi/lpfc/lpfc_debugfs.h
+++ b/drivers/scsi/lpfc/lpfc_debugfs.h
@@ -290,9 +290,6 @@ struct lpfc_idiag {
  /* multixripool output buffer size */
  #define LPFC_DUMP_MULTIXRIPOOL_SIZE 8192
  
-/* hdwqinfo output buffer size */
-#define LPFC_HDWQINFO_SIZE 8192
-
  enum {
         DUMP_FCP,
         DUMP_NVME,
diff --git a/drivers/scsi/lpfc/lpfc_hw4.h b/drivers/scsi/lpfc/lpfc_hw4.h

index cd39845c909f3894fb11100a411aabfa9fe18b4f..665852291a4f1b4d61268e638d278bff1b4ca647 100644 (file)
--- a/drivers/scsi/lpfc/lpfc_hw4.h
+++ b/drivers/scsi/lpfc/lpfc_hw4.h
@@ -211,9 +211,8 @@ struct lpfc_sli_intf {
  #define LPFC_DEF_IMAX          150000
  
  #define LPFC_MIN_CPU_MAP       0
-#define LPFC_MAX_CPU_MAP       2
+#define LPFC_MAX_CPU_MAP       1
  #define LPFC_HBA_CPU_MAP       1
-#define LPFC_DRIVER_CPU_MAP    2  /* Default */
  
  /* PORT_CAPABILITIES constants. */
  #define LPFC_MAX_SUPPORTED_PAGES       8
diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c

index d9db29817f6b190f5e26fd3504727d81410164b6..145c08f112a38403969b15c09a377af32688ce7a 100644 (file)
--- a/drivers/scsi/lpfc/lpfc_init.c
+++ b/drivers/scsi/lpfc/lpfc_init.c
@@ -37,6 +37,7 @@
  #include <linux/miscdevice.h>
  #include <linux/percpu.h>
  #include <linux/msi.h>
+#include <linux/irq.h>
  #include <linux/bitops.h>
  
  #include <scsi/scsi.h>
@@ -92,6 +93,8 @@ static void lpfc_sli4_cq_event_release_all(struct lpfc_hba *);
  static void lpfc_sli4_disable_intr(struct lpfc_hba *);
  static uint32_t lpfc_sli4_enable_intr(struct lpfc_hba *, uint32_t);
  static void lpfc_sli4_oas_verify(struct lpfc_hba *phba);
+static uint16_t lpfc_find_eq_handle(struct lpfc_hba *, uint16_t);
+static uint16_t lpfc_find_cpu_handle(struct lpfc_hba *, uint16_t, int);
  
  static struct scsi_transport_template *lpfc_transport_template = NULL;
  static struct scsi_transport_template *lpfc_vport_transport_template = NULL;
@@ -1367,13 +1370,13 @@ lpfc_hb_timeout_handler(struct lpfc_hba *phba)
                 }
  
                 /* Interrupts per sec per EQ */
-               val = phba->cfg_fcp_imax / phba->cfg_hdw_queue;
+               val = phba->cfg_fcp_imax / phba->cfg_irq_chann;
                 tick_cqe = val / CONFIG_HZ; /* Per tick per EQ */
  
                 /* Assume 1 CQE/ISR, calc max CQEs allowed for time duration */
                 max_cqe = time_elapsed * tick_cqe;
  
-               for (i = 0; i < phba->cfg_hdw_queue; i++) {
+               for (i = 0; i < phba->cfg_irq_chann; i++) {
                         /* Fast-path EQ */
                         qp = phba->sli4_hba.hdwq[i].hba_eq;
                         if (!qp)
@@ -1397,7 +1400,7 @@ lpfc_hb_timeout_handler(struct lpfc_hba *phba)
                                 if (val) {
                                         /* First, interrupts per sec per EQ */
                                         val = phba->cfg_fcp_imax /
-                                               phba->cfg_hdw_queue;
+                                               phba->cfg_irq_chann;
  
                                         /* us delay between each interrupt */
                                         val = LPFC_SEC_TO_USEC / val;
@@ -4335,8 +4338,13 @@ lpfc_create_port(struct lpfc_hba *phba, int instance, struct device *dev)
         shost->max_lun = vport->cfg_max_luns;
         shost->this_id = -1;
         shost->max_cmd_len = 16;
+
         if (phba->sli_rev == LPFC_SLI_REV4) {
-               shost->nr_hw_queues = phba->cfg_hdw_queue;
+               if (phba->cfg_fcp_io_sched == LPFC_FCP_SCHED_BY_HDWQ)
+                       shost->nr_hw_queues = phba->cfg_hdw_queue;
+               else
+                       shost->nr_hw_queues = phba->sli4_hba.num_present_cpu;
+
                 shost->dma_boundary =
                         phba->sli4_hba.pc_sli4_params.sge_supp_len-1;
                 shost->sg_tablesize = phba->cfg_scsi_seg_cnt;
@@ -6819,7 +6827,7 @@ lpfc_sli4_driver_resource_setup(struct lpfc_hba *phba)
                 goto out_remove_rpi_hdrs;
         }
  
-       phba->sli4_hba.hba_eq_hdl = kcalloc(phba->cfg_hdw_queue,
+       phba->sli4_hba.hba_eq_hdl = kcalloc(phba->cfg_irq_chann,
                                             sizeof(struct lpfc_hba_eq_hdl),
                                             GFP_KERNEL);
         if (!phba->sli4_hba.hba_eq_hdl) {
@@ -8257,7 +8265,7 @@ lpfc_sli4_read_config(struct lpfc_hba *phba)
         struct lpfc_rsrc_desc_fcfcoe *desc;
         char *pdesc_0;
         uint16_t forced_link_speed;
-       uint32_t if_type;
+       uint32_t if_type, qmin;
         int length, i, rc = 0, rc2;
  
         pmb = (LPFC_MBOXQ_t *) mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL);
@@ -8362,40 +8370,44 @@ lpfc_sli4_read_config(struct lpfc_hba *phba)
                                 phba->sli4_hba.max_cfg_param.max_rq);
  
                 /*
-                * Calculate NVME queue resources based on how
-                * many WQ/CQs are available.
+                * Calculate queue resources based on how
+                * many WQ/CQ/EQs are available.
                  */
-               if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME) {
-                       length = phba->sli4_hba.max_cfg_param.max_wq;
-                       if (phba->sli4_hba.max_cfg_param.max_cq <
-                           phba->sli4_hba.max_cfg_param.max_wq)
-                               length = phba->sli4_hba.max_cfg_param.max_cq;
+               qmin = phba->sli4_hba.max_cfg_param.max_wq;
+               if (phba->sli4_hba.max_cfg_param.max_cq < qmin)
+                       qmin = phba->sli4_hba.max_cfg_param.max_cq;
+               if (phba->sli4_hba.max_cfg_param.max_eq < qmin)
+                       qmin = phba->sli4_hba.max_cfg_param.max_eq;
+               /*
+                * Whats left after this can go toward NVME / FCP.
+                * The minus 4 accounts for ELS, NVME LS, MBOX
+                * plus one extra. When configured for
+                * NVMET, FCP io channel WQs are not created.
+                */
+               qmin -= 4;
  
-                       /*
-                        * Whats left after this can go toward NVME.
-                        * The minus 6 accounts for ELS, NVME LS, MBOX
-                        * plus a couple extra. When configured for
-                        * NVMET, FCP io channel WQs are not created.
-                        */
-                       length -= 6;
-
-                       /* Take off FCP queues */
-                       if (!phba->nvmet_support)
-                               length -= phba->cfg_hdw_queue;
-
-                       /* Check to see if there is enough for NVME */
-                       if (phba->cfg_hdw_queue > length) {
-                               lpfc_printf_log(
-                                       phba, KERN_ERR, LOG_SLI,
-                                       "2005 Reducing NVME IO channel to %d: "
-                                       "WQ %d CQ %d CommonIO %d\n",
-                                       length,
+               /* If NVME is configured, double the number of CQ/WQs needed */
+               if ((phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME) &&
+                   !phba->nvmet_support)
+                       qmin /= 2;
+
+               /* Check to see if there is enough for NVME */
+               if ((phba->cfg_irq_chann > qmin) ||
+                   (phba->cfg_hdw_queue > qmin)) {
+                       lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
+                                       "2005 Reducing Queues: "
+                                       "WQ %d CQ %d EQ %d: min %d: "
+                                       "IRQ %d HDWQ %d\n",
                                         phba->sli4_hba.max_cfg_param.max_wq,
                                         phba->sli4_hba.max_cfg_param.max_cq,
+                                       phba->sli4_hba.max_cfg_param.max_eq,
+                                       qmin, phba->cfg_irq_chann,
                                         phba->cfg_hdw_queue);
  
-                               phba->cfg_hdw_queue = length;
-                       }
+                       if (phba->cfg_irq_chann > qmin)
+                               phba->cfg_irq_chann = qmin;
+                       if (phba->cfg_hdw_queue > qmin)
+                               phba->cfg_hdw_queue = qmin;
                 }
         }
  
@@ -8612,25 +8624,17 @@ lpfc_sli4_queue_verify(struct lpfc_hba *phba)
          * device parameters
          */
  
-       if (phba->cfg_hdw_queue > phba->sli4_hba.max_cfg_param.max_eq) {
-               lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
-                               "2575 Reducing IO channels to match number of "
-                               "available EQs: from %d to %d\n",
-                               phba->cfg_hdw_queue,
-                               phba->sli4_hba.max_cfg_param.max_eq);
-               phba->cfg_hdw_queue = phba->sli4_hba.max_cfg_param.max_eq;
-       }
-
         if (phba->nvmet_support) {
-               if (phba->cfg_hdw_queue < phba->cfg_nvmet_mrq)
-                       phba->cfg_nvmet_mrq = phba->cfg_hdw_queue;
+               if (phba->cfg_irq_chann < phba->cfg_nvmet_mrq)
+                       phba->cfg_nvmet_mrq = phba->cfg_irq_chann;
         }
         if (phba->cfg_nvmet_mrq > LPFC_NVMET_MRQ_MAX)
                 phba->cfg_nvmet_mrq = LPFC_NVMET_MRQ_MAX;
  
         lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
-                       "2574 IO channels: hdwQ %d MRQ: %d\n",
-                       phba->cfg_hdw_queue, phba->cfg_nvmet_mrq);
+                       "2574 IO channels: hdwQ %d IRQ %d MRQ: %d\n",
+                       phba->cfg_hdw_queue, phba->cfg_irq_chann,
+                       phba->cfg_nvmet_mrq);
  
         /* Get EQ depth from module parameter, fake the default for now */
         phba->sli4_hba.eq_esize = LPFC_EQE_SIZE_4B;
@@ -8658,6 +8662,7 @@ lpfc_alloc_nvme_wq_cq(struct lpfc_hba *phba, int wqidx)
         }
         qdesc->qe_valid = 1;
         qdesc->hdwq = wqidx;
+       qdesc->chann = lpfc_find_cpu_handle(phba, wqidx, LPFC_FIND_BY_HDWQ);
         phba->sli4_hba.hdwq[wqidx].nvme_cq = qdesc;
  
         qdesc = lpfc_sli4_queue_alloc(phba, LPFC_EXPANDED_PAGE_SIZE,
@@ -8669,6 +8674,7 @@ lpfc_alloc_nvme_wq_cq(struct lpfc_hba *phba, int wqidx)
                 return 1;
         }
         qdesc->hdwq = wqidx;
+       qdesc->chann = wqidx;
         phba->sli4_hba.hdwq[wqidx].nvme_wq = qdesc;
         list_add_tail(&qdesc->wq_list, &phba->sli4_hba.lpfc_wq_list);
         return 0;
@@ -8698,6 +8704,7 @@ lpfc_alloc_fcp_wq_cq(struct lpfc_hba *phba, int wqidx)
         }
         qdesc->qe_valid = 1;
         qdesc->hdwq = wqidx;
+       qdesc->chann = lpfc_find_cpu_handle(phba, wqidx, LPFC_FIND_BY_HDWQ);
         phba->sli4_hba.hdwq[wqidx].fcp_cq = qdesc;
  
         /* Create Fast Path FCP WQs */
@@ -8720,6 +8727,7 @@ lpfc_alloc_fcp_wq_cq(struct lpfc_hba *phba, int wqidx)
                 return 1;
         }
         qdesc->hdwq = wqidx;
+       qdesc->chann = wqidx;
         phba->sli4_hba.hdwq[wqidx].fcp_wq = qdesc;
         list_add_tail(&qdesc->wq_list, &phba->sli4_hba.lpfc_wq_list);
         return 0;
@@ -8743,7 +8751,7 @@ int
  lpfc_sli4_queue_create(struct lpfc_hba *phba)
  {
         struct lpfc_queue *qdesc;
-       int idx;
+       int idx, eqidx;
         struct lpfc_sli4_hdw_queue *qp;
  
         /*
@@ -8829,7 +8837,18 @@ lpfc_sli4_queue_create(struct lpfc_hba *phba)
  
         /* Create HBA Event Queues (EQs) */
         for (idx = 0; idx < phba->cfg_hdw_queue; idx++) {
-               /* Create EQs */
+               /*
+                * If there are more Hardware Queues than available
+                * CQs, multiple Hardware Queues may share a common EQ.
+                */
+               if (idx >= phba->cfg_irq_chann) {
+                       /* Share an existing EQ */
+                       eqidx = lpfc_find_eq_handle(phba, idx);
+                       phba->sli4_hba.hdwq[idx].hba_eq =
+                               phba->sli4_hba.hdwq[eqidx].hba_eq;
+                       continue;
+               }
+               /* Create an EQ */
                 qdesc = lpfc_sli4_queue_alloc(phba, LPFC_DEFAULT_PAGE_SIZE,
                                               phba->sli4_hba.eq_esize,
                                               phba->sli4_hba.eq_ecount);
@@ -8840,20 +8859,27 @@ lpfc_sli4_queue_create(struct lpfc_hba *phba)
                 }
                 qdesc->qe_valid = 1;
                 qdesc->hdwq = idx;
+
+               /* Save the CPU this EQ is affinitised to */
+               eqidx = lpfc_find_eq_handle(phba, idx);
+               qdesc->chann = lpfc_find_cpu_handle(phba, eqidx,
+                                                   LPFC_FIND_BY_EQ);
                 phba->sli4_hba.hdwq[idx].hba_eq = qdesc;
         }
  
  
         /* Allocate SCSI SLI4 CQ/WQs */
-       for (idx = 0; idx < phba->cfg_hdw_queue; idx++)
+       for (idx = 0; idx < phba->cfg_hdw_queue; idx++) {
                 if (lpfc_alloc_fcp_wq_cq(phba, idx))
                         goto out_error;
+       }
  
         /* Allocate NVME SLI4 CQ/WQs */
         if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME) {
-               for (idx = 0; idx < phba->cfg_hdw_queue; idx++)
+               for (idx = 0; idx < phba->cfg_hdw_queue; idx++) {
                         if (lpfc_alloc_nvme_wq_cq(phba, idx))
                                 goto out_error;
+               }
  
                 if (phba->nvmet_support) {
                         for (idx = 0; idx < phba->cfg_nvmet_mrq; idx++) {
@@ -8871,6 +8897,7 @@ lpfc_sli4_queue_create(struct lpfc_hba *phba)
                                 }
                                 qdesc->qe_valid = 1;
                                 qdesc->hdwq = idx;
+                               qdesc->chann = idx;
                                 phba->sli4_hba.nvmet_cqset[idx] = qdesc;
                         }
                 }
@@ -8902,6 +8929,7 @@ lpfc_sli4_queue_create(struct lpfc_hba *phba)
                 goto out_error;
         }
         qdesc->qe_valid = 1;
+       qdesc->chann = 0;
         phba->sli4_hba.els_cq = qdesc;
  
  
@@ -8919,6 +8947,7 @@ lpfc_sli4_queue_create(struct lpfc_hba *phba)
                                 "0505 Failed allocate slow-path MQ\n");
                 goto out_error;
         }
+       qdesc->chann = 0;
         phba->sli4_hba.mbx_wq = qdesc;
  
         /*
@@ -8934,6 +8963,7 @@ lpfc_sli4_queue_create(struct lpfc_hba *phba)
                                 "0504 Failed allocate slow-path ELS WQ\n");
                 goto out_error;
         }
+       qdesc->chann = 0;
         phba->sli4_hba.els_wq = qdesc;
         list_add_tail(&qdesc->wq_list, &phba->sli4_hba.lpfc_wq_list);
  
@@ -8947,6 +8977,7 @@ lpfc_sli4_queue_create(struct lpfc_hba *phba)
                                         "6079 Failed allocate NVME LS CQ\n");
                         goto out_error;
                 }
+               qdesc->chann = 0;
                 qdesc->qe_valid = 1;
                 phba->sli4_hba.nvmels_cq = qdesc;
  
@@ -8959,6 +8990,7 @@ lpfc_sli4_queue_create(struct lpfc_hba *phba)
                                         "6080 Failed allocate NVME LS WQ\n");
                         goto out_error;
                 }
+               qdesc->chann = 0;
                 phba->sli4_hba.nvmels_wq = qdesc;
                 list_add_tail(&qdesc->wq_list, &phba->sli4_hba.lpfc_wq_list);
         }
@@ -9085,17 +9117,21 @@ lpfc_sli4_release_queues(struct lpfc_queue ***qs, int max)
  }
  
  static inline void
-lpfc_sli4_release_hdwq(struct lpfc_sli4_hdw_queue *hdwq, int max)
+lpfc_sli4_release_hdwq(struct lpfc_hba *phba)
  {
+       struct lpfc_sli4_hdw_queue *hdwq;
         uint32_t idx;
  
-       for (idx = 0; idx < max; idx++) {
-               lpfc_sli4_queue_free(hdwq[idx].hba_eq);
+       hdwq = phba->sli4_hba.hdwq;
+       for (idx = 0; idx < phba->cfg_hdw_queue; idx++) {
+               if (idx < phba->cfg_irq_chann)
+                       lpfc_sli4_queue_free(hdwq[idx].hba_eq);
+               hdwq[idx].hba_eq = NULL;
+
                 lpfc_sli4_queue_free(hdwq[idx].fcp_cq);
                 lpfc_sli4_queue_free(hdwq[idx].nvme_cq);
                 lpfc_sli4_queue_free(hdwq[idx].fcp_wq);
                 lpfc_sli4_queue_free(hdwq[idx].nvme_wq);
-               hdwq[idx].hba_eq = NULL;
                 hdwq[idx].fcp_cq = NULL;
                 hdwq[idx].nvme_cq = NULL;
                 hdwq[idx].fcp_wq = NULL;
@@ -9120,8 +9156,7 @@ lpfc_sli4_queue_destroy(struct lpfc_hba *phba)
  {
         /* Release HBA eqs */
         if (phba->sli4_hba.hdwq)
-               lpfc_sli4_release_hdwq(phba->sli4_hba.hdwq,
-                                      phba->cfg_hdw_queue);
+               lpfc_sli4_release_hdwq(phba);
  
         if (phba->nvmet_support) {
                 lpfc_sli4_release_queues(&phba->sli4_hba.nvmet_cqset,
@@ -9202,7 +9237,6 @@ lpfc_create_wq_cq(struct lpfc_hba *phba, struct lpfc_queue *eq,
                         qidx, (uint32_t)rc);
                 return rc;
         }
-       cq->chann = qidx;
  
         if (qtype != LPFC_MBOX) {
                 /* Setup cq_map for fast lookup */
@@ -9222,7 +9256,6 @@ lpfc_create_wq_cq(struct lpfc_hba *phba, struct lpfc_queue *eq,
                         /* no need to tear down cq - caller will do so */
                         return rc;
                 }
-               wq->chann = qidx;
  
                 /* Bind this CQ/WQ to the NVME ring */
                 pring = wq->pring;
@@ -9251,6 +9284,38 @@ lpfc_create_wq_cq(struct lpfc_hba *phba, struct lpfc_queue *eq,
         return 0;
  }
  
+/**
+ * lpfc_setup_cq_lookup - Setup the CQ lookup table
+ * @phba: pointer to lpfc hba data structure.
+ *
+ * This routine will populate the cq_lookup table by all
+ * available CQ queue_id's.
+ **/
+void
+lpfc_setup_cq_lookup(struct lpfc_hba *phba)
+{
+       struct lpfc_queue *eq, *childq;
+       struct lpfc_sli4_hdw_queue *qp;
+       int qidx;
+
+       qp = phba->sli4_hba.hdwq;
+       memset(phba->sli4_hba.cq_lookup, 0,
+              (sizeof(struct lpfc_queue *) * (phba->sli4_hba.cq_max + 1)));
+       for (qidx = 0; qidx < phba->cfg_irq_chann; qidx++) {
+               eq = qp[qidx].hba_eq;
+               if (!eq)
+                       continue;
+               list_for_each_entry(childq, &eq->child_list, list) {
+                       if (childq->queue_id > phba->sli4_hba.cq_max)
+                               continue;
+                       if ((childq->subtype == LPFC_FCP) ||
+                           (childq->subtype == LPFC_NVME))
+                               phba->sli4_hba.cq_lookup[childq->queue_id] =
+                                       childq;
+               }
+       }
+}
+
  /**
   * lpfc_sli4_queue_setup - Set up all the SLI4 queues
   * @phba: pointer to lpfc hba data structure.
@@ -9331,7 +9396,7 @@ lpfc_sli4_queue_setup(struct lpfc_hba *phba)
                 rc = -ENOMEM;
                 goto out_error;
         }
-       for (qidx = 0; qidx < phba->cfg_hdw_queue; qidx++) {
+       for (qidx = 0; qidx < phba->cfg_irq_chann; qidx++) {
                 if (!qp[qidx].hba_eq) {
                         lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
                                         "0522 Fast-path EQ (%d) not "
@@ -9578,11 +9643,23 @@ lpfc_sli4_queue_setup(struct lpfc_hba *phba)
                         phba->sli4_hba.dat_rq->queue_id,
                         phba->sli4_hba.els_cq->queue_id);
  
-       for (qidx = 0; qidx < phba->cfg_hdw_queue;
+       for (qidx = 0; qidx < phba->cfg_irq_chann;
              qidx += LPFC_MAX_EQ_DELAY_EQID_CNT)
                 lpfc_modify_hba_eq_delay(phba, qidx, LPFC_MAX_EQ_DELAY_EQID_CNT,
                                          phba->cfg_fcp_imax);
  
+       if (phba->sli4_hba.cq_max) {
+               kfree(phba->sli4_hba.cq_lookup);
+               phba->sli4_hba.cq_lookup = kcalloc((phba->sli4_hba.cq_max + 1),
+                       sizeof(struct lpfc_queue *), GFP_KERNEL);
+               if (!phba->sli4_hba.cq_lookup) {
+                       lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+                                       "0549 Failed setup of CQ Lookup table: "
+                                       "size 0x%x\n", phba->sli4_hba.cq_max);
+                       goto out_destroy;
+               }
+               lpfc_setup_cq_lookup(phba);
+       }
         return 0;
  
  out_destroy:
@@ -9664,9 +9741,14 @@ lpfc_sli4_queue_unset(struct lpfc_hba *phba)
                         lpfc_wq_destroy(phba, qp->nvme_wq);
                         lpfc_cq_destroy(phba, qp->fcp_cq);
                         lpfc_cq_destroy(phba, qp->nvme_cq);
-                       lpfc_eq_destroy(phba, qp->hba_eq);
+                       if (qidx < phba->cfg_irq_chann)
+                               lpfc_eq_destroy(phba, qp->hba_eq);
                 }
         }
+
+       kfree(phba->sli4_hba.cq_lookup);
+       phba->sli4_hba.cq_lookup = NULL;
+       phba->sli4_hba.cq_max = 0;
  }
  
  /**
@@ -10445,23 +10527,199 @@ lpfc_sli_disable_intr(struct lpfc_hba *phba)
         phba->sli.slistat.sli_intr = 0;
  }
  
+/**
+ * lpfc_find_cpu_handle - Find the CPU that corresponds to the specified EQ
+ * @phba: pointer to lpfc hba data structure.
+ * @id: EQ vector index or Hardware Queue index
+ * @match: LPFC_FIND_BY_EQ = match by EQ
+ *         LPFC_FIND_BY_HDWQ = match by Hardware Queue
+ */
+static uint16_t
+lpfc_find_cpu_handle(struct lpfc_hba *phba, uint16_t id, int match)
+{
+       struct lpfc_vector_map_info *cpup;
+       int cpu;
+
+       /* Find the desired phys_id for the specified EQ */
+       cpup = phba->sli4_hba.cpu_map;
+       for (cpu = 0; cpu < phba->sli4_hba.num_present_cpu; cpu++) {
+               if ((match == LPFC_FIND_BY_EQ) &&
+                   (cpup->irq != LPFC_VECTOR_MAP_EMPTY) &&
+                   (cpup->eq == id))
+                       return cpu;
+               if ((match == LPFC_FIND_BY_HDWQ) && (cpup->hdwq == id))
+                       return cpu;
+               cpup++;
+       }
+       return 0;
+}
+
+/**
+ * lpfc_find_eq_handle - Find the EQ that corresponds to the specified
+ *                       Hardware Queue
+ * @phba: pointer to lpfc hba data structure.
+ * @hdwq: Hardware Queue index
+ */
+static uint16_t
+lpfc_find_eq_handle(struct lpfc_hba *phba, uint16_t hdwq)
+{
+       struct lpfc_vector_map_info *cpup;
+       int cpu;
+
+       /* Find the desired phys_id for the specified EQ */
+       cpup = phba->sli4_hba.cpu_map;
+       for (cpu = 0; cpu < phba->sli4_hba.num_present_cpu; cpu++) {
+               if (cpup->hdwq == hdwq)
+                       return cpup->eq;
+               cpup++;
+       }
+       return 0;
+}
+
+/**
+ * lpfc_find_phys_id_eq - Find the next EQ that corresponds to the specified
+ *                        Physical Id.
+ * @phba: pointer to lpfc hba data structure.
+ * @eqidx: EQ index
+ * @phys_id: CPU package physical id
+ */
+static uint16_t
+lpfc_find_phys_id_eq(struct lpfc_hba *phba, uint16_t eqidx, uint16_t phys_id)
+{
+       struct lpfc_vector_map_info *cpup;
+       int cpu, desired_phys_id;
+
+       desired_phys_id = LPFC_VECTOR_MAP_EMPTY;
+
+       /* Find the desired phys_id for the specified EQ */
+       cpup = phba->sli4_hba.cpu_map;
+       for (cpu = 0; cpu < phba->sli4_hba.num_present_cpu; cpu++) {
+               if ((cpup->irq != LPFC_VECTOR_MAP_EMPTY) &&
+                   (cpup->eq == eqidx)) {
+                       desired_phys_id = cpup->phys_id;
+                       break;
+               }
+               cpup++;
+       }
+       if (phys_id == desired_phys_id)
+               return eqidx;
+
+       /* Find a EQ thats on the specified phys_id */
+       cpup = phba->sli4_hba.cpu_map;
+       for (cpu = 0; cpu < phba->sli4_hba.num_present_cpu; cpu++) {
+               if ((cpup->irq != LPFC_VECTOR_MAP_EMPTY) &&
+                   (cpup->phys_id == phys_id))
+                       return cpup->eq;
+               cpup++;
+       }
+       return 0;
+}
+
+/**
+ * lpfc_find_cpu_map - Find next available CPU map entry that matches the
+ *                     phys_id and core_id.
+ * @phba: pointer to lpfc hba data structure.
+ * @phys_id: CPU package physical id
+ * @core_id: CPU core id
+ * @hdwqidx: Hardware Queue index
+ * @eqidx: EQ index
+ * @isr_avail: Should an IRQ be associated with this entry
+ */
+static struct lpfc_vector_map_info *
+lpfc_find_cpu_map(struct lpfc_hba *phba, uint16_t phys_id, uint16_t core_id,
+                 uint16_t hdwqidx, uint16_t eqidx, int isr_avail)
+{
+       struct lpfc_vector_map_info *cpup;
+       int cpu;
+
+       cpup = phba->sli4_hba.cpu_map;
+       for (cpu = 0; cpu < phba->sli4_hba.num_present_cpu; cpu++) {
+               /* Does the cpup match the one we are looking for */
+               if ((cpup->phys_id == phys_id) &&
+                   (cpup->core_id == core_id)) {
+                       /* If it has been already assigned, then skip it */
+                       if (cpup->hdwq != LPFC_VECTOR_MAP_EMPTY) {
+                               cpup++;
+                               continue;
+                       }
+                       /* Ensure we are on the same phys_id as the first one */
+                       if (!isr_avail)
+                               cpup->eq = lpfc_find_phys_id_eq(phba, eqidx,
+                                                               phys_id);
+                       else
+                               cpup->eq = eqidx;
+
+                       cpup->hdwq = hdwqidx;
+                       if (isr_avail) {
+                               cpup->irq =
+                                   pci_irq_vector(phba->pcidev, eqidx);
+
+                               /* Now affinitize to the selected CPU */
+                               irq_set_affinity_hint(cpup->irq,
+                                                     get_cpu_mask(cpu));
+                               irq_set_status_flags(cpup->irq,
+                                                    IRQ_NO_BALANCING);
+
+                               lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
+                                               "3330 Set Affinity: CPU %d "
+                                               "EQ %d irq %d (HDWQ %x)\n",
+                                               cpu, cpup->eq,
+                                               cpup->irq, cpup->hdwq);
+                       }
+                       return cpup;
+               }
+               cpup++;
+       }
+       return 0;
+}
+
+#ifdef CONFIG_X86
+/**
+ * lpfc_find_hyper - Determine if the CPU map entry is hyper-threaded
+ * @phba: pointer to lpfc hba data structure.
+ * @cpu: CPU map index
+ * @phys_id: CPU package physical id
+ * @core_id: CPU core id
+ */
+static int
+lpfc_find_hyper(struct lpfc_hba *phba, int cpu,
+               uint16_t phys_id, uint16_t core_id)
+{
+       struct lpfc_vector_map_info *cpup;
+       int idx;
+
+       cpup = phba->sli4_hba.cpu_map;
+       for (idx = 0; idx < phba->sli4_hba.num_present_cpu; idx++) {
+               /* Does the cpup match the one we are looking for */
+               if ((cpup->phys_id == phys_id) &&
+                   (cpup->core_id == core_id) &&
+                   (cpu != idx)) {
+                       return 1;
+               }
+               cpup++;
+       }
+       return 0;
+}
+#endif
+
  /**
   * lpfc_cpu_affinity_check - Check vector CPU affinity mappings
   * @phba: pointer to lpfc hba data structure.
+ * @vectors: number of msix vectors allocated.
   *
   * The routine will figure out the CPU affinity assignment for every
- * MSI-X vector allocated for the HBA.  The hba_eq_hdl will be updated
- * with a pointer to the CPU mask that defines ALL the CPUs this vector
- * can be associated with. If the vector can be unquely associated with
- * a single CPU, that CPU will be recorded in hba_eq_hdl[index].cpu.
+ * MSI-X vector allocated for the HBA.
   * In addition, the CPU to IO channel mapping will be calculated
   * and the phba->sli4_hba.cpu_map array will reflect this.
   */
  static void
-lpfc_cpu_affinity_check(struct lpfc_hba *phba)
+lpfc_cpu_affinity_check(struct lpfc_hba *phba, int vectors)
  {
+       int i, j, idx, phys_id;
+       int max_phys_id, min_phys_id;
+       int max_core_id, min_core_id;
         struct lpfc_vector_map_info *cpup;
-       int cpu, idx;
+       int cpu, eqidx, hdwqidx, isr_avail;
  #ifdef CONFIG_X86
         struct cpuinfo_x86 *cpuinfo;
  #endif
@@ -10471,6 +10729,12 @@ lpfc_cpu_affinity_check(struct lpfc_hba *phba)
                (sizeof(struct lpfc_vector_map_info) *
                phba->sli4_hba.num_present_cpu));
  
+       max_phys_id = 0;
+       min_phys_id = 0xffff;
+       max_core_id = 0;
+       min_core_id = 0xffff;
+       phys_id = 0;
+
         /* Update CPU map with physical id and core id of each CPU */
         cpup = phba->sli4_hba.cpu_map;
         for (cpu = 0; cpu < phba->sli4_hba.num_present_cpu; cpu++) {
@@ -10478,34 +10742,91 @@ lpfc_cpu_affinity_check(struct lpfc_hba *phba)
                 cpuinfo = &cpu_data(cpu);
                 cpup->phys_id = cpuinfo->phys_proc_id;
                 cpup->core_id = cpuinfo->cpu_core_id;
+               cpup->hyper = lpfc_find_hyper(phba, cpu,
+                                             cpup->phys_id, cpup->core_id);
  #else
                 /* No distinction between CPUs for other platforms */
                 cpup->phys_id = 0;
-               cpup->core_id = 0;
+               cpup->core_id = cpu;
+               cpup->hyper = 0;
  #endif
+
                 lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
                                 "3328 CPU physid %d coreid %d\n",
                                 cpup->phys_id, cpup->core_id);
+
+               if (cpup->phys_id > max_phys_id)
+                       max_phys_id = cpup->phys_id;
+               if (cpup->phys_id < min_phys_id)
+                       min_phys_id = cpup->phys_id;
+
+               if (cpup->core_id > max_core_id)
+                       max_core_id = cpup->core_id;
+               if (cpup->core_id < min_core_id)
+                       min_core_id = cpup->core_id;
+
                 cpup++;
         }
  
-       for (idx = 0; idx <  phba->cfg_hdw_queue; idx++) {
-               cpup = &phba->sli4_hba.cpu_map[idx];
-               cpup->irq = pci_irq_vector(phba->pcidev, idx);
+       /*
+        * If the number of IRQ vectors == number of CPUs,
+        * mapping is pretty simple: 1 to 1.
+        * This is the desired path if NVME is enabled.
+        */
+       if (vectors == phba->sli4_hba.num_present_cpu) {
+               cpup = phba->sli4_hba.cpu_map;
+               for (idx = 0; idx < vectors; idx++) {
+                       cpup->eq = idx;
+                       cpup->hdwq = idx;
+                       cpup->irq = pci_irq_vector(phba->pcidev, idx);
+
+                       /* Now affinitize to the selected CPU */
+                       irq_set_affinity_hint(
+                               pci_irq_vector(phba->pcidev, idx),
+                               get_cpu_mask(idx));
+                       irq_set_status_flags(cpup->irq, IRQ_NO_BALANCING);
  
-               /* For now assume vector N maps to CPU N */
-               irq_set_affinity_hint(cpup->irq, get_cpu_mask(idx));
-               cpup->hdwq = idx;
+                       lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
+                                       "3336 Set Affinity: CPU %d "
+                                       "EQ %d irq %d\n",
+                                       idx, cpup->eq,
+                                       pci_irq_vector(phba->pcidev, idx));
+                       cpup++;
+               }
+               return;
+       }
  
-               lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
-                       "3336 Set Affinity: CPU %d "
-                       "hdwq %d irq %d\n",
-                       cpu, cpup->hdwq, cpup->irq);
+       idx = 0;
+       isr_avail = 1;
+       eqidx = 0;
+       hdwqidx = 0;
+
+       /* Mapping is more complicated for this case. Hardware Queues are
+        * assigned in a "ping pong" fashion, ping pong-ing between the
+        * available phys_id's.
+        */
+       while (idx < phba->sli4_hba.num_present_cpu) {
+               for (i = min_core_id; i <= max_core_id; i++) {
+                       for (j = min_phys_id; j <= max_phys_id; j++) {
+                               cpup = lpfc_find_cpu_map(phba, j, i, hdwqidx,
+                                                        eqidx, isr_avail);
+                               if (!cpup)
+                                       continue;
+                               idx++;
+                               hdwqidx++;
+                               if (hdwqidx >= phba->cfg_hdw_queue)
+                                       hdwqidx = 0;
+                               eqidx++;
+                               if (eqidx >= phba->cfg_irq_chann) {
+                                       isr_avail = 0;
+                                       eqidx = 0;
+                               }
+                       }
+               }
         }
         return;
  }
  
-
  /**
   * lpfc_sli4_enable_msix - Enable MSI-X interrupt mode to SLI-4 device
   * @phba: pointer to lpfc hba data structure.
@@ -10524,7 +10845,7 @@ lpfc_sli4_enable_msix(struct lpfc_hba *phba)
         char *name;
  
         /* Set up MSI-X multi-message vectors */
-       vectors = phba->cfg_hdw_queue;
+       vectors = phba->cfg_irq_chann;
  
         rc = pci_alloc_irq_vectors(phba->pcidev,
                                 (phba->nvmet_support) ? 1 : 2,
@@ -10545,7 +10866,6 @@ lpfc_sli4_enable_msix(struct lpfc_hba *phba)
  
                 phba->sli4_hba.hba_eq_hdl[index].idx = index;
                 phba->sli4_hba.hba_eq_hdl[index].phba = phba;
-               atomic_set(&phba->sli4_hba.hba_eq_hdl[index].hba_eq_in_use, 1);
                 rc = request_irq(pci_irq_vector(phba->pcidev, index),
                          &lpfc_sli4_hba_intr_handler, 0,
                          name,
@@ -10558,17 +10878,16 @@ lpfc_sli4_enable_msix(struct lpfc_hba *phba)
                 }
         }
  
-       if (vectors != phba->cfg_hdw_queue) {
+       if (vectors != phba->cfg_irq_chann) {
                 lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
                                 "3238 Reducing IO channels to match number of "
                                 "MSI-X vectors, requested %d got %d\n",
-                               phba->cfg_hdw_queue, vectors);
-               if (phba->cfg_hdw_queue > vectors)
-                       phba->cfg_hdw_queue = vectors;
+                               phba->cfg_irq_chann, vectors);
+               if (phba->cfg_irq_chann > vectors)
+                       phba->cfg_irq_chann = vectors;
                 if (phba->cfg_nvmet_mrq > vectors)
                         phba->cfg_nvmet_mrq = vectors;
         }
-       lpfc_cpu_affinity_check(phba);
  
         return rc;
  
@@ -10623,7 +10942,7 @@ lpfc_sli4_enable_msi(struct lpfc_hba *phba)
                 return rc;
         }
  
-       for (index = 0; index < phba->cfg_hdw_queue; index++) {
+       for (index = 0; index < phba->cfg_irq_chann; index++) {
                 phba->sli4_hba.hba_eq_hdl[index].idx = index;
                 phba->sli4_hba.hba_eq_hdl[index].phba = phba;
         }
@@ -10688,11 +11007,10 @@ lpfc_sli4_enable_intr(struct lpfc_hba *phba, uint32_t cfg_mode)
                         phba->intr_type = INTx;
                         intr_mode = 0;
  
-                       for (idx = 0; idx < phba->cfg_hdw_queue; idx++) {
+                       for (idx = 0; idx < phba->cfg_irq_chann; idx++) {
                                 eqhdl = &phba->sli4_hba.hba_eq_hdl[idx];
                                 eqhdl->idx = idx;
                                 eqhdl->phba = phba;
-                               atomic_set(&eqhdl->hba_eq_in_use, 1);
                         }
                 }
         }
@@ -10716,7 +11034,7 @@ lpfc_sli4_disable_intr(struct lpfc_hba *phba)
                 int index;
  
                 /* Free up MSI-X multi-message vectors */
-               for (index = 0; index < phba->cfg_hdw_queue; index++) {
+               for (index = 0; index < phba->cfg_irq_chann; index++) {
                         irq_set_affinity_hint(
                                 pci_irq_vector(phba->pcidev, index),
                                 NULL);
@@ -12092,12 +12410,13 @@ lpfc_pci_probe_one_s4(struct pci_dev *pdev, const struct pci_device_id *pid)
         }
         /* Default to single EQ for non-MSI-X */
         if (phba->intr_type != MSIX) {
-               phba->cfg_hdw_queue = 1;
+               phba->cfg_irq_chann = 1;
                 if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME) {
                         if (phba->nvmet_support)
                                 phba->cfg_nvmet_mrq = 1;
                 }
         }
+       lpfc_cpu_affinity_check(phba, phba->cfg_irq_chann);
  
         /* Create SCSI host to the physical port */
         error = lpfc_create_shost(phba);
diff --git a/drivers/scsi/lpfc/lpfc_nvme.c b/drivers/scsi/lpfc/lpfc_nvme.c

index c9aacd56a449982c43e5165db2d4dc391a330947..9480257c51431bfe894b65e99e295d5efe9c6d19 100644 (file)
--- a/drivers/scsi/lpfc/lpfc_nvme.c
+++ b/drivers/scsi/lpfc/lpfc_nvme.c
@@ -239,7 +239,7 @@ lpfc_nvme_create_queue(struct nvme_fc_local_port *pnvme_lport,
         if (qidx) {
                 str = "IO ";  /* IO queue */
                 qhandle->index = ((qidx - 1) %
-                       vport->phba->cfg_hdw_queue);
+                       lpfc_nvme_template.max_hw_queues);
         } else {
                 str = "ADM";  /* Admin queue */
                 qhandle->index = qidx;
@@ -1546,14 +1546,12 @@ lpfc_nvme_fcp_io_submit(struct nvme_fc_local_port *pnvme_lport,
                 }
         }
  
+       /* Lookup Hardware Queue index based on fcp_io_sched module parameter */
         if (phba->cfg_fcp_io_sched == LPFC_FCP_SCHED_BY_HDWQ) {
                 idx = lpfc_queue_info->index;
         } else {
                 cpu = smp_processor_id();
-               if (cpu < phba->cfg_hdw_queue)
-                       idx = cpu;
-               else
-                       idx = cpu % phba->cfg_hdw_queue;
+               idx = phba->sli4_hba.cpu_map[cpu].hdwq;
         }
  
         lpfc_ncmd = lpfc_get_nvme_buf(phba, ndlp, idx, expedite);
@@ -2060,7 +2058,13 @@ lpfc_nvme_create_localport(struct lpfc_vport *vport)
          * allocate + 3, one for cmd, one for rsp and one for this alignment
          */
         lpfc_nvme_template.max_sgl_segments = phba->cfg_nvme_seg_cnt + 1;
-       lpfc_nvme_template.max_hw_queues = phba->cfg_hdw_queue;
+
+       /* Advertise how many hw queues we support based on fcp_io_sched */
+       if (phba->cfg_fcp_io_sched == LPFC_FCP_SCHED_BY_HDWQ)
+               lpfc_nvme_template.max_hw_queues = phba->cfg_hdw_queue;
+       else
+               lpfc_nvme_template.max_hw_queues =
+                       phba->sli4_hba.num_present_cpu;
  
         /* localport is allocated from the stack, but the registration
          * call allocates heap memory as well as the private area.
@@ -2554,6 +2558,8 @@ lpfc_nvme_wait_for_io_drain(struct lpfc_hba *phba)
          * WQEs have been removed from the txcmplqs.
          */
         for (i = 0; i < phba->cfg_hdw_queue; i++) {
+               if (!phba->sli4_hba.hdwq[i].nvme_wq)
+                       continue;
                 pring = phba->sli4_hba.hdwq[i].nvme_wq->pring;
  
                 if (!pring)
diff --git a/drivers/scsi/lpfc/lpfc_scsi.c b/drivers/scsi/lpfc/lpfc_scsi.c

index 7b22cc995d7fc83ec53ca71b22176848f04d1b05..a827520789f1d5b20d99ebea433261fbee2fd602 100644 (file)
--- a/drivers/scsi/lpfc/lpfc_scsi.c
+++ b/drivers/scsi/lpfc/lpfc_scsi.c
@@ -692,10 +692,7 @@ lpfc_get_scsi_buf_s4(struct lpfc_hba *phba, struct lpfc_nodelist *ndlp,
                 tag = blk_mq_unique_tag(cmnd->request);
                 idx = blk_mq_unique_tag_to_hwq(tag);
         } else {
-               if (cpu < phba->cfg_hdw_queue)
-                       idx = cpu;
-               else
-                       idx = cpu % phba->cfg_hdw_queue;
+               idx = phba->sli4_hba.cpu_map[cpu].hdwq;
         }
  
         lpfc_cmd = lpfc_get_io_buf(phba, ndlp, idx,
@@ -3650,6 +3647,9 @@ lpfc_scsi_cmd_iocb_cmpl(struct lpfc_hba *phba, struct lpfc_iocbq *pIocbIn,
         struct Scsi_Host *shost;
         int idx;
         uint32_t logit = LOG_FCP;
+#ifdef CONFIG_SCSI_LPFC_DEBUG_FS
+       int cpu;
+#endif
  
         /* Sanity check on return of outstanding command */
         cmd = lpfc_cmd->pCmd;
@@ -3660,6 +3660,13 @@ lpfc_scsi_cmd_iocb_cmpl(struct lpfc_hba *phba, struct lpfc_iocbq *pIocbIn,
         if (phba->sli4_hba.hdwq)
                 phba->sli4_hba.hdwq[idx].scsi_cstat.io_cmpls++;
  
+#ifdef CONFIG_SCSI_LPFC_DEBUG_FS
+       if (phba->cpucheck_on & LPFC_CHECK_SCSI_IO) {
+               cpu = smp_processor_id();
+               if (cpu < LPFC_CHECK_CPU_CNT)
+                       phba->sli4_hba.hdwq[idx].cpucheck_cmpl_io[cpu]++;
+       }
+#endif
         shost = cmd->device->host;
  
         lpfc_cmd->result = (pIocbOut->iocb.un.ulpWord[4] & IOERR_PARAM_MASK);
@@ -4336,6 +4343,9 @@ lpfc_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *cmnd)
         struct lpfc_io_buf *lpfc_cmd;
         struct fc_rport *rport = starget_to_rport(scsi_target(cmnd->device));
         int err, idx;
+#ifdef CONFIG_SCSI_LPFC_DEBUG_FS
+       int cpu;
+#endif
  
         rdata = lpfc_rport_data_from_scsi_device(cmnd->device);
  
@@ -4450,6 +4460,16 @@ lpfc_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *cmnd)
  
         lpfc_scsi_prep_cmnd(vport, lpfc_cmd, ndlp);
  
+#ifdef CONFIG_SCSI_LPFC_DEBUG_FS
+       if (phba->cpucheck_on & LPFC_CHECK_SCSI_IO) {
+               cpu = smp_processor_id();
+               if (cpu < LPFC_CHECK_CPU_CNT) {
+                       struct lpfc_sli4_hdw_queue *hdwq =
+                                       &phba->sli4_hba.hdwq[lpfc_cmd->hdwq_no];
+                       hdwq->cpucheck_xmt_io[cpu]++;
+               }
+       }
+#endif
         err = lpfc_sli_issue_iocb(phba, LPFC_FCP_RING,
                                   &lpfc_cmd->cur_iocbq, SLI_IOCB_RET_IOCB);
         if (err) {
diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c

index c0f0adccdea7ee96230594263fb454d4430afc82..0cc81321643d85a1884ff8bed4f46b9e9e0f6bc4 100644 (file)
--- a/drivers/scsi/lpfc/lpfc_sli.c
+++ b/drivers/scsi/lpfc/lpfc_sli.c
@@ -5586,7 +5586,7 @@ lpfc_sli4_arm_cqeq_intr(struct lpfc_hba *phba)
                                                 LPFC_QUEUE_REARM);
                 }
  
-               for (qidx = 0; qidx < phba->cfg_hdw_queue; qidx++)
+               for (qidx = 0; qidx < phba->cfg_irq_chann; qidx++)
                         sli4_hba->sli4_eq_release(qp[qidx].hba_eq,
                                                 LPFC_QUEUE_REARM);
         }
@@ -7878,7 +7878,7 @@ lpfc_sli4_process_missed_mbox_completions(struct lpfc_hba *phba)
         /* Find the eq associated with the mcq */
  
         if (sli4_hba->hdwq)
-               for (eqidx = 0; eqidx < phba->cfg_hdw_queue; eqidx++)
+               for (eqidx = 0; eqidx < phba->cfg_irq_chann; eqidx++)
                         if (sli4_hba->hdwq[eqidx].hba_eq->queue_id ==
                             sli4_hba->mbx_cq->assoc_qid) {
                                 fpeq = sli4_hba->hdwq[eqidx].hba_eq;
@@ -10058,12 +10058,9 @@ int
  lpfc_sli_issue_iocb(struct lpfc_hba *phba, uint32_t ring_number,
                     struct lpfc_iocbq *piocb, uint32_t flag)
  {
-       struct lpfc_hba_eq_hdl *hba_eq_hdl;
         struct lpfc_sli_ring *pring;
-       struct lpfc_queue *fpeq;
-       struct lpfc_eqe *eqe;
         unsigned long iflags;
-       int rc, idx;
+       int rc;
  
         if (phba->sli_rev == LPFC_SLI_REV4) {
                 pring = lpfc_sli4_calc_ring(phba, piocb);
@@ -10073,34 +10070,6 @@ lpfc_sli_issue_iocb(struct lpfc_hba *phba, uint32_t ring_number,
                 spin_lock_irqsave(&pring->ring_lock, iflags);
                 rc = __lpfc_sli_issue_iocb(phba, ring_number, piocb, flag);
                 spin_unlock_irqrestore(&pring->ring_lock, iflags);
-
-               if (lpfc_fcp_look_ahead && (piocb->iocb_flag &  LPFC_IO_FCP)) {
-                       idx = piocb->hba_wqidx;
-                       hba_eq_hdl = &phba->sli4_hba.hba_eq_hdl[idx];
-
-                       if (atomic_dec_and_test(&hba_eq_hdl->hba_eq_in_use)) {
-
-                               /* Get associated EQ with this index */
-                               fpeq = phba->sli4_hba.hdwq[idx].hba_eq;
-
-                               /* Turn off interrupts from this EQ */
-                               phba->sli4_hba.sli4_eq_clr_intr(fpeq);
-
-                               /*
-                                * Process all the events on FCP EQ
-                                */
-                               while ((eqe = lpfc_sli4_eq_get(fpeq))) {
-                                       lpfc_sli4_hba_handle_eqe(phba,
-                                               eqe, idx);
-                                       fpeq->EQ_processed++;
-                               }
-
-                               /* Always clear and re-arm the EQ */
-                               phba->sli4_hba.sli4_eq_release(fpeq,
-                                       LPFC_QUEUE_REARM);
-                       }
-                       atomic_inc(&hba_eq_hdl->hba_eq_in_use);
-               }
         } else {
                 /* For now, SLI2/3 will still use hbalock */
                 spin_lock_irqsave(&phba->hbalock, iflags);
@@ -13651,7 +13620,7 @@ lpfc_sli4_sp_handle_eqe(struct lpfc_hba *phba, struct lpfc_eqe *eqe,
         /* Save EQ associated with this CQ */
         cq->assoc_qp = speq;
  
-       if (!queue_work(phba->wq, &cq->spwork))
+       if (!queue_work_on(cq->chann, phba->wq, &cq->spwork))
                 lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
                                 "0390 Cannot schedule soft IRQ "
                                 "for CQ eqcqid=%d, cqid=%d on CPU %d\n",
@@ -14057,18 +14026,11 @@ lpfc_sli4_hba_handle_eqe(struct lpfc_hba *phba, struct lpfc_eqe *eqe,
         /* Get the reference to the corresponding CQ */
         cqid = bf_get_le32(lpfc_eqe_resource_id, eqe);
  
-       /* First check for NVME/SCSI completion */
-       if ((phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME) &&
-           (cqid == phba->sli4_hba.hdwq[qidx].nvme_cq_map)) {
-               /* Process NVME / NVMET command completion */
-               cq = phba->sli4_hba.hdwq[qidx].nvme_cq;
-               goto  process_cq;
-       }
-
-       if (cqid == phba->sli4_hba.hdwq[qidx].fcp_cq_map) {
-               /* Process FCP command completion */
-               cq = phba->sli4_hba.hdwq[qidx].fcp_cq;
-               goto  process_cq;
+       /* Use the fast lookup method first */
+       if (cqid <= phba->sli4_hba.cq_max) {
+               cq = phba->sli4_hba.cq_lookup[cqid];
+               if (cq)
+                       goto  work_cq;
         }
  
         /* Next check for NVMET completion */
@@ -14103,9 +14065,7 @@ process_cq:
                 return;
         }
  
-       /* Save EQ associated with this CQ */
-       cq->assoc_qp = phba->sli4_hba.hdwq[qidx].hba_eq;
-
+work_cq:
         if (!queue_work_on(cq->chann, phba->wq, &cq->irqwork))
                 lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
                                 "0363 Cannot schedule soft IRQ "
@@ -14233,15 +14193,6 @@ lpfc_sli4_hba_intr_handler(int irq, void *dev_id)
         if (unlikely(!fpeq))
                 return IRQ_NONE;
  
-       if (lpfc_fcp_look_ahead) {
-               if (atomic_dec_and_test(&hba_eq_hdl->hba_eq_in_use))
-                       phba->sli4_hba.sli4_eq_clr_intr(fpeq);
-               else {
-                       atomic_inc(&hba_eq_hdl->hba_eq_in_use);
-                       return IRQ_NONE;
-               }
-       }
-
         /* Check device state for handling interrupt */
         if (unlikely(lpfc_intr_state_check(phba))) {
                 /* Check again for link_state with lock held */
@@ -14250,8 +14201,6 @@ lpfc_sli4_hba_intr_handler(int irq, void *dev_id)
                         /* Flush, clear interrupt, and rearm the EQ */
                         lpfc_sli4_eq_flush(phba, fpeq);
                 spin_unlock_irqrestore(&phba->hbalock, iflag);
-               if (lpfc_fcp_look_ahead)
-                       atomic_inc(&hba_eq_hdl->hba_eq_in_use);
                 return IRQ_NONE;
         }
  
@@ -14274,12 +14223,6 @@ lpfc_sli4_hba_intr_handler(int irq, void *dev_id)
  
         if (unlikely(ecount == 0)) {
                 fpeq->EQ_no_entry++;
-
-               if (lpfc_fcp_look_ahead) {
-                       atomic_inc(&hba_eq_hdl->hba_eq_in_use);
-                       return IRQ_NONE;
-               }
-
                 if (phba->intr_type == MSIX)
                         /* MSI-X treated interrupt served as no EQ share INT */
                         lpfc_printf_log(phba, KERN_WARNING, LOG_SLI,
@@ -14289,9 +14232,6 @@ lpfc_sli4_hba_intr_handler(int irq, void *dev_id)
                         return IRQ_NONE;
         }
  
-       if (lpfc_fcp_look_ahead)
-               atomic_inc(&hba_eq_hdl->hba_eq_in_use);
-
         return IRQ_HANDLED;
  } /* lpfc_sli4_fp_intr_handler */
  
@@ -14329,7 +14269,7 @@ lpfc_sli4_intr_handler(int irq, void *dev_id)
         /*
          * Invoke fast-path host attention interrupt handling as appropriate.
          */
-       for (qidx = 0; qidx < phba->cfg_hdw_queue; qidx++) {
+       for (qidx = 0; qidx < phba->cfg_irq_chann; qidx++) {
                 hba_irq_rc = lpfc_sli4_hba_intr_handler(irq,
                                         &phba->sli4_hba.hba_eq_hdl[qidx]);
                 if (hba_irq_rc == IRQ_HANDLED)
@@ -14516,7 +14456,7 @@ lpfc_modify_hba_eq_delay(struct lpfc_hba *phba, uint32_t startq,
         union lpfc_sli4_cfg_shdr *shdr;
         uint16_t dmult;
  
-       if (startq >= phba->cfg_hdw_queue)
+       if (startq >= phba->cfg_irq_chann)
                 return 0;
  
         mbox = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL);
@@ -14530,7 +14470,7 @@ lpfc_modify_hba_eq_delay(struct lpfc_hba *phba, uint32_t startq,
         eq_delay = &mbox->u.mqe.un.eq_delay;
  
         /* Calculate delay multiper from maximum interrupt per second */
-       result = imax / phba->cfg_hdw_queue;
+       result = imax / phba->cfg_irq_chann;
         if (result > LPFC_DMULT_CONST || result == 0)
                 dmult = 0;
         else
@@ -14539,7 +14479,7 @@ lpfc_modify_hba_eq_delay(struct lpfc_hba *phba, uint32_t startq,
                 dmult = LPFC_DMULT_MAX;
  
         cnt = 0;
-       for (qidx = startq; qidx < phba->cfg_hdw_queue; qidx++) {
+       for (qidx = startq; qidx < phba->cfg_irq_chann; qidx++) {
                 eq = phba->sli4_hba.hdwq[qidx].hba_eq;
                 if (!eq)
                         continue;
@@ -14557,7 +14497,7 @@ lpfc_modify_hba_eq_delay(struct lpfc_hba *phba, uint32_t startq,
                         val =  phba->cfg_fcp_imax;
                         if (val) {
                                 /* First, interrupts per sec per EQ */
-                               val = phba->cfg_fcp_imax / phba->cfg_hdw_queue;
+                               val = phba->cfg_fcp_imax / phba->cfg_irq_chann;
  
                                 /* us delay between each interrupt */
                                 val = LPFC_SEC_TO_USEC / val;
@@ -14852,10 +14792,13 @@ lpfc_cq_create(struct lpfc_hba *phba, struct lpfc_queue *cq,
         cq->subtype = subtype;
         cq->queue_id = bf_get(lpfc_mbx_cq_create_q_id, &cq_create->u.response);
         cq->assoc_qid = eq->queue_id;
+       cq->assoc_qp = eq;
         cq->host_index = 0;
         cq->hba_index = 0;
         cq->entry_repost = LPFC_CQ_REPOST;
  
+       if (cq->queue_id > phba->sli4_hba.cq_max)
+               phba->sli4_hba.cq_max = cq->queue_id;
  out:
         mempool_free(mbox, phba->mbox_mem_pool);
         return status;
@@ -15061,6 +15004,7 @@ lpfc_cq_create_set(struct lpfc_hba *phba, struct lpfc_queue **cqp,
                 cq->type = type;
                 cq->subtype = subtype;
                 cq->assoc_qid = eq->queue_id;
+               cq->assoc_qp = eq;
                 cq->host_index = 0;
                 cq->hba_index = 0;
                 cq->entry_repost = LPFC_CQ_REPOST;
@@ -15101,6 +15045,8 @@ lpfc_cq_create_set(struct lpfc_hba *phba, struct lpfc_queue **cqp,
         for (idx = 0; idx < numcq; idx++) {
                 cq = cqp[idx];
                 cq->queue_id = rc + idx;
+               if (cq->queue_id > phba->sli4_hba.cq_max)
+                       phba->sli4_hba.cq_max = cq->queue_id;
         }
  
  out:
@@ -19664,7 +19610,8 @@ lpfc_sli4_issue_wqe(struct lpfc_hba *phba, struct lpfc_sli4_hdw_queue *qp,
         /* NVME_LS and NVME_LS ABTS requests. */
         if (pwqe->iocb_flag & LPFC_IO_NVME_LS) {
                 pring =  phba->sli4_hba.nvmels_wq->pring;
-               spin_lock_irqsave(&pring->ring_lock, iflags);
+               lpfc_qp_spin_lock_irqsave(&pring->ring_lock, iflags,
+                                         qp, wq_access);
                 sglq = __lpfc_sli_get_els_sglq(phba, pwqe);
                 if (!sglq) {
                         spin_unlock_irqrestore(&pring->ring_lock, iflags);
@@ -19697,7 +19644,8 @@ lpfc_sli4_issue_wqe(struct lpfc_hba *phba, struct lpfc_sli4_hdw_queue *qp,
  
                 bf_set(wqe_cqid, &wqe->generic.wqe_com, qp->nvme_cq_map);
  
-               spin_lock_irqsave(&pring->ring_lock, iflags);
+               lpfc_qp_spin_lock_irqsave(&pring->ring_lock, iflags,
+                                         qp, wq_access);
                 ret = lpfc_sli4_wq_put(wq, wqe);
                 if (ret) {
                         spin_unlock_irqrestore(&pring->ring_lock, iflags);
@@ -19724,7 +19672,8 @@ lpfc_sli4_issue_wqe(struct lpfc_hba *phba, struct lpfc_sli4_hdw_queue *qp,
                        pwqe->sli4_xritag);
                 bf_set(wqe_cqid, &wqe->generic.wqe_com, qp->nvme_cq_map);
  
-               spin_lock_irqsave(&pring->ring_lock, iflags);
+               lpfc_qp_spin_lock_irqsave(&pring->ring_lock, iflags,
+                                         qp, wq_access);
                 ret = lpfc_sli4_wq_put(wq, wqe);
                 if (ret) {
                         spin_unlock_irqrestore(&pring->ring_lock, iflags);
@@ -19872,18 +19821,20 @@ void lpfc_move_xri_pvt_to_pbl(struct lpfc_hba *phba, u32 hwqid)
  {
         struct lpfc_pbl_pool *pbl_pool;
         struct lpfc_pvt_pool *pvt_pool;
+       struct lpfc_sli4_hdw_queue *qp;
         struct lpfc_io_buf *lpfc_ncmd;
         struct lpfc_io_buf *lpfc_ncmd_next;
         unsigned long iflag;
         struct list_head tmp_list;
         u32 tmp_count;
  
-       pbl_pool = &phba->sli4_hba.hdwq[hwqid].p_multixri_pool->pbl_pool;
-       pvt_pool = &phba->sli4_hba.hdwq[hwqid].p_multixri_pool->pvt_pool;
+       qp = &phba->sli4_hba.hdwq[hwqid];
+       pbl_pool = &qp->p_multixri_pool->pbl_pool;
+       pvt_pool = &qp->p_multixri_pool->pvt_pool;
         tmp_count = 0;
  
-       spin_lock_irqsave(&pbl_pool->lock, iflag);
-       spin_lock(&pvt_pool->lock);
+       lpfc_qp_spin_lock_irqsave(&pbl_pool->lock, iflag, qp, mv_to_pub_pool);
+       lpfc_qp_spin_lock(&pvt_pool->lock, qp, mv_from_pvt_pool);
  
         if (pvt_pool->count > pvt_pool->low_watermark) {
                 /* Step 1: move (all - low_watermark) from pvt_pool
@@ -19936,7 +19887,8 @@ void lpfc_move_xri_pvt_to_pbl(struct lpfc_hba *phba, u32 hwqid)
   *   false - if the specified pbl_pool is empty or locked by someone else
   **/
  static bool
-_lpfc_move_xri_pbl_to_pvt(struct lpfc_hba *phba, struct lpfc_pbl_pool *pbl_pool,
+_lpfc_move_xri_pbl_to_pvt(struct lpfc_hba *phba, struct lpfc_sli4_hdw_queue *qp,
+                         struct lpfc_pbl_pool *pbl_pool,
                           struct lpfc_pvt_pool *pvt_pool, u32 count)
  {
         struct lpfc_io_buf *lpfc_ncmd;
@@ -19948,7 +19900,7 @@ _lpfc_move_xri_pbl_to_pvt(struct lpfc_hba *phba, struct lpfc_pbl_pool *pbl_pool,
         if (ret) {
                 if (pbl_pool->count) {
                         /* Move a batch of XRIs from public to private pool */
-                       spin_lock(&pvt_pool->lock);
+                       lpfc_qp_spin_lock(&pvt_pool->lock, qp, mv_to_pvt_pool);
                         list_for_each_entry_safe(lpfc_ncmd,
                                                  lpfc_ncmd_next,
                                                  &pbl_pool->list,
@@ -19990,16 +19942,18 @@ void lpfc_move_xri_pbl_to_pvt(struct lpfc_hba *phba, u32 hwqid, u32 count)
         struct lpfc_multixri_pool *next_multixri_pool;
         struct lpfc_pvt_pool *pvt_pool;
         struct lpfc_pbl_pool *pbl_pool;
+       struct lpfc_sli4_hdw_queue *qp;
         u32 next_hwqid;
         u32 hwq_count;
         int ret;
  
-       multixri_pool = phba->sli4_hba.hdwq[hwqid].p_multixri_pool;
+       qp = &phba->sli4_hba.hdwq[hwqid];
+       multixri_pool = qp->p_multixri_pool;
         pvt_pool = &multixri_pool->pvt_pool;
         pbl_pool = &multixri_pool->pbl_pool;
  
         /* Check if local pbl_pool is available */
-       ret = _lpfc_move_xri_pbl_to_pvt(phba, pbl_pool, pvt_pool, count);
+       ret = _lpfc_move_xri_pbl_to_pvt(phba, qp, pbl_pool, pvt_pool, count);
         if (ret) {
  #ifdef LPFC_MXP_STAT
                 multixri_pool->local_pbl_hit_count++;
@@ -20022,7 +19976,7 @@ void lpfc_move_xri_pbl_to_pvt(struct lpfc_hba *phba, u32 hwqid, u32 count)
  
                 /* Check if the public free xri pool is available */
                 ret = _lpfc_move_xri_pbl_to_pvt(
-                       phba, pbl_pool, pvt_pool, count);
+                       phba, qp, pbl_pool, pvt_pool, count);
  
                 /* Exit while-loop if success or all hwqid are checked */
         } while (!ret && next_hwqid != multixri_pool->rrb_next_hwqid);
@@ -20138,20 +20092,23 @@ void lpfc_release_io_buf(struct lpfc_hba *phba, struct lpfc_io_buf *lpfc_ncmd,
                 if ((pvt_pool->count < pvt_pool->low_watermark) ||
                     (xri_owned < xri_limit &&
                      pvt_pool->count < pvt_pool->high_watermark)) {
-                       spin_lock_irqsave(&pvt_pool->lock, iflag);
+                       lpfc_qp_spin_lock_irqsave(&pvt_pool->lock, iflag,
+                                                 qp, free_pvt_pool);
                         list_add_tail(&lpfc_ncmd->list,
                                       &pvt_pool->list);
                         pvt_pool->count++;
                         spin_unlock_irqrestore(&pvt_pool->lock, iflag);
                 } else {
-                       spin_lock_irqsave(&pbl_pool->lock, iflag);
+                       lpfc_qp_spin_lock_irqsave(&pbl_pool->lock, iflag,
+                                                 qp, free_pub_pool);
                         list_add_tail(&lpfc_ncmd->list,
                                       &pbl_pool->list);
                         pbl_pool->count++;
                         spin_unlock_irqrestore(&pbl_pool->lock, iflag);
                 }
         } else {
-               spin_lock_irqsave(&qp->io_buf_list_put_lock, iflag);
+               lpfc_qp_spin_lock_irqsave(&qp->io_buf_list_put_lock, iflag,
+                                         qp, free_xri);
                 list_add_tail(&lpfc_ncmd->list,
                               &qp->lpfc_io_buf_list_put);
                 qp->put_io_bufs++;
@@ -20174,6 +20131,7 @@ void lpfc_release_io_buf(struct lpfc_hba *phba, struct lpfc_io_buf *lpfc_ncmd,
   **/
  static struct lpfc_io_buf *
  lpfc_get_io_buf_from_private_pool(struct lpfc_hba *phba,
+                                 struct lpfc_sli4_hdw_queue *qp,
                                   struct lpfc_pvt_pool *pvt_pool,
                                   struct lpfc_nodelist *ndlp)
  {
@@ -20181,7 +20139,7 @@ lpfc_get_io_buf_from_private_pool(struct lpfc_hba *phba,
         struct lpfc_io_buf *lpfc_ncmd_next;
         unsigned long iflag;
  
-       spin_lock_irqsave(&pvt_pool->lock, iflag);
+       lpfc_qp_spin_lock_irqsave(&pvt_pool->lock, iflag, qp, alloc_pvt_pool);
         list_for_each_entry_safe(lpfc_ncmd, lpfc_ncmd_next,
                                  &pvt_pool->list, list) {
                 if (lpfc_test_rrq_active(
@@ -20276,7 +20234,7 @@ lpfc_get_io_buf_from_multixri_pools(struct lpfc_hba *phba,
                 lpfc_move_xri_pbl_to_pvt(phba, hwqid, XRI_BATCH);
  
         /* Get one XRI from private free xri pool */
-       lpfc_ncmd = lpfc_get_io_buf_from_private_pool(phba, pvt_pool, ndlp);
+       lpfc_ncmd = lpfc_get_io_buf_from_private_pool(phba, qp, pvt_pool, ndlp);
  
         if (lpfc_ncmd) {
                 lpfc_ncmd->hdwq = qp;
@@ -20349,11 +20307,13 @@ struct lpfc_io_buf *lpfc_get_io_buf(struct lpfc_hba *phba,
                 lpfc_cmd = lpfc_get_io_buf_from_multixri_pools(
                         phba, ndlp, hwqid, expedite);
         else {
-               spin_lock_irqsave(&qp->io_buf_list_get_lock, iflag);
+               lpfc_qp_spin_lock_irqsave(&qp->io_buf_list_get_lock, iflag,
+                                         qp, alloc_xri_get);
                 if (qp->get_io_bufs > LPFC_NVME_EXPEDITE_XRICNT || expedite)
                         lpfc_cmd = lpfc_io_buf(phba, ndlp, hwqid);
                 if (!lpfc_cmd) {
-                       spin_lock(&qp->io_buf_list_put_lock);
+                       lpfc_qp_spin_lock(&qp->io_buf_list_put_lock,
+                                         qp, alloc_xri_put);
                         list_splice(&qp->lpfc_io_buf_list_put,
                                     &qp->lpfc_io_buf_list_get);
                         qp->get_io_bufs += qp->put_io_bufs;
diff --git a/drivers/scsi/lpfc/lpfc_sli4.h b/drivers/scsi/lpfc/lpfc_sli4.h

index f5e58cd4c6ac756c044af6d97d3e9d3b675f9a12..c381f2cb490911f9f4532141053c7280cb95e367 100644 (file)
--- a/drivers/scsi/lpfc/lpfc_sli4.h
+++ b/drivers/scsi/lpfc/lpfc_sli4.h
@@ -41,7 +41,7 @@
  
  /* Multi-queue arrangement for FCP EQ/CQ/WQ tuples */
  #define LPFC_HBA_HDWQ_MIN      0
-#define LPFC_HBA_HDWQ_MAX      64
+#define LPFC_HBA_HDWQ_MAX      128
  #define LPFC_HBA_HDWQ_DEF      0
  
  /* Common buffer size to accomidate SCSI and NVME IO buffers */
@@ -166,16 +166,19 @@ struct lpfc_queue {
         uint32_t assoc_qid;     /* Queue ID associated with, for CQ/WQ/MQ */
         uint32_t host_index;    /* The host's index for putting or getting */
         uint32_t hba_index;     /* The last known hba index for get or put */
+       uint32_t q_mode;
  
         struct lpfc_sli_ring *pring; /* ptr to io ring associated with q */
         struct lpfc_rqb *rqbp;  /* ptr to RQ buffers */
  
-       uint32_t q_mode;
         uint16_t page_count;    /* Number of pages allocated for this queue */
         uint16_t page_size;     /* size of page allocated for this queue */
  #define LPFC_EXPANDED_PAGE_SIZE        16384
  #define LPFC_DEFAULT_PAGE_SIZE 4096
-       uint16_t chann;         /* IO channel this queue is associated with */
+       uint16_t chann;         /* Hardware Queue association WQ/CQ */
+                               /* CPU affinity for EQ */
+#define LPFC_FIND_BY_EQ                0
+#define LPFC_FIND_BY_HDWQ      1
         uint8_t db_format;
  #define LPFC_DB_RING_FORMAT    0x01
  #define LPFC_DB_LIST_FORMAT    0x02
@@ -431,11 +434,6 @@ struct lpfc_hba_eq_hdl {
         uint32_t idx;
         char handler_name[LPFC_SLI4_HANDLER_NAME_SZ];
         struct lpfc_hba *phba;
-       atomic_t hba_eq_in_use;
-       struct cpumask *cpumask;
-       /* CPU affinitsed to or 0xffffffff if multiple */
-       uint32_t cpu;
-#define LPFC_MULTI_CPU_AFFINITY 0xffffffff
  };
  
  /*BB Credit recovery value*/
@@ -529,7 +527,9 @@ struct lpfc_vector_map_info {
         uint16_t        phys_id;
         uint16_t        core_id;
         uint16_t        irq;
+       uint16_t        eq;
         uint16_t        hdwq;
+       uint16_t        hyper;
  };
  #define LPFC_VECTOR_MAP_EMPTY  0xffff
  
@@ -593,6 +593,21 @@ struct lpfc_fc4_ctrl_stat {
         u32 io_cmpls;
  };
  
+#ifdef LPFC_HDWQ_LOCK_STAT
+struct lpfc_lock_stat {
+       uint32_t alloc_xri_get;
+       uint32_t alloc_xri_put;
+       uint32_t free_xri;
+       uint32_t wq_access;
+       uint32_t alloc_pvt_pool;
+       uint32_t mv_from_pvt_pool;
+       uint32_t mv_to_pub_pool;
+       uint32_t mv_to_pvt_pool;
+       uint32_t free_pub_pool;
+       uint32_t free_pvt_pool;
+};
+#endif
+
  /* SLI4 HBA data structure entries */
  struct lpfc_sli4_hdw_queue {
         /* Pointers to the constructed SLI4 queues */
@@ -626,6 +641,9 @@ struct lpfc_sli4_hdw_queue {
         /* FC-4 Stats counters */
         struct lpfc_fc4_ctrl_stat nvme_cstat;
         struct lpfc_fc4_ctrl_stat scsi_cstat;
+#ifdef LPFC_HDWQ_LOCK_STAT
+       struct lpfc_lock_stat lock_conflict;
+#endif
  
  #ifdef CONFIG_SCSI_LPFC_DEBUG_FS
  #define LPFC_CHECK_CPU_CNT    128
@@ -635,6 +653,34 @@ struct lpfc_sli4_hdw_queue {
  #endif
  };
  
+#ifdef LPFC_HDWQ_LOCK_STAT
+/* compile time trylock stats */
+#define lpfc_qp_spin_lock_irqsave(lock, flag, qp, lstat) \
+       { \
+       int only_once = 1; \
+       while (spin_trylock_irqsave(lock, flag) == 0) { \
+               if (only_once) { \
+                       only_once = 0; \
+                       qp->lock_conflict.lstat++; \
+               } \
+       } \
+       }
+#define lpfc_qp_spin_lock(lock, qp, lstat) \
+       { \
+       int only_once = 1; \
+       while (spin_trylock(lock) == 0) { \
+               if (only_once) { \
+                       only_once = 0; \
+                       qp->lock_conflict.lstat++; \
+               } \
+       } \
+       }
+#else
+#define lpfc_qp_spin_lock_irqsave(lock, flag, qp, lstat) \
+       spin_lock_irqsave(lock, flag)
+#define lpfc_qp_spin_lock(lock, qp, lstat) spin_lock(lock)
+#endif
+
  struct lpfc_sli4_hba {
         void __iomem *conf_regs_memmap_p; /* Kernel memory mapped address for
                                            * config space registers
@@ -764,6 +810,8 @@ struct lpfc_sli4_hba {
         uint16_t nvmet_xri_cnt;
         uint16_t nvmet_io_wait_cnt;
         uint16_t nvmet_io_wait_total;
+       uint16_t cq_max;
+       struct lpfc_queue **cq_lookup;
         struct list_head lpfc_els_sgl_list;
         struct list_head lpfc_abts_els_sgl_list;
         spinlock_t abts_scsi_buf_list_lock; /* list of aborted SCSI IOs */
author	James Smart <jsmart2021@gmail.com>
	Mon, 28 Jan 2019 19:14:31 +0000 (11:14 -0800)
committer	Martin K. Petersen <martin.petersen@oracle.com>
	Wed, 6 Feb 2019 03:29:49 +0000 (22:29 -0500)
drivers/scsi/lpfc/lpfc.h		patch \| blob \| history
drivers/scsi/lpfc/lpfc_attr.c		patch \| blob \| history
drivers/scsi/lpfc/lpfc_crtn.h		patch \| blob \| history
drivers/scsi/lpfc/lpfc_debugfs.c		patch \| blob \| history
drivers/scsi/lpfc/lpfc_debugfs.h		patch \| blob \| history
drivers/scsi/lpfc/lpfc_hw4.h		patch \| blob \| history
drivers/scsi/lpfc/lpfc_init.c		patch \| blob \| history
drivers/scsi/lpfc/lpfc_nvme.c		patch \| blob \| history
drivers/scsi/lpfc/lpfc_scsi.c		patch \| blob \| history
drivers/scsi/lpfc/lpfc_sli.c		patch \| blob \| history
drivers/scsi/lpfc/lpfc_sli4.h		patch \| blob \| history