static void lpfc_sli4_disable_intr(struct lpfc_hba *);
 static uint32_t lpfc_sli4_enable_intr(struct lpfc_hba *, uint32_t);
 static void lpfc_sli4_oas_verify(struct lpfc_hba *phba);
-static uint16_t lpfc_find_eq_handle(struct lpfc_hba *, uint16_t);
 static uint16_t lpfc_find_cpu_handle(struct lpfc_hba *, uint16_t, int);
 
 static struct scsi_transport_template *lpfc_transport_template = NULL;
        if (!eqcnt)
                goto requeue;
 
+       /* Loop thru all IRQ vectors */
        for (i = 0; i < phba->cfg_irq_chann; i++) {
-               eq = phba->sli4_hba.hdwq[i].hba_eq;
+               /* Get the EQ corresponding to the IRQ vector */
+               eq = phba->sli4_hba.hba_eq_hdl[i].eq;
                if (eq && eqcnt[eq->last_cpu] < 2)
                        eqcnt[eq->last_cpu]++;
                continue;
 lpfc_sli4_queue_create(struct lpfc_hba *phba)
 {
        struct lpfc_queue *qdesc;
-       int idx, eqidx, cpu;
+       int idx, cpu, eqcpu;
        struct lpfc_sli4_hdw_queue *qp;
+       struct lpfc_vector_map_info *cpup;
+       struct lpfc_vector_map_info *eqcpup;
        struct lpfc_eq_intr_info *eqi;
 
        /*
        INIT_LIST_HEAD(&phba->sli4_hba.lpfc_wq_list);
 
        /* Create HBA Event Queues (EQs) */
-       for (idx = 0; idx < phba->cfg_hdw_queue; idx++) {
-               /* determine EQ affinity */
-               eqidx = lpfc_find_eq_handle(phba, idx);
-               cpu = lpfc_find_cpu_handle(phba, eqidx, LPFC_FIND_BY_EQ);
-               /*
-                * If there are more Hardware Queues than available
-                * EQs, multiple Hardware Queues may share a common EQ.
+       for_each_present_cpu(cpu) {
+               /* We only want to create 1 EQ per vector, even though
+                * multiple CPUs might be using that vector. so only
+                * selects the CPUs that are LPFC_CPU_FIRST_IRQ.
                 */
-               if (idx >= phba->cfg_irq_chann) {
-                       /* Share an existing EQ */
-                       phba->sli4_hba.hdwq[idx].hba_eq =
-                               phba->sli4_hba.hdwq[eqidx].hba_eq;
+               cpup = &phba->sli4_hba.cpu_map[cpu];
+               if (!(cpup->flag & LPFC_CPU_FIRST_IRQ))
                        continue;
-               }
-               /* Create an EQ */
+
+               /* Get a ptr to the Hardware Queue associated with this CPU */
+               qp = &phba->sli4_hba.hdwq[cpup->hdwq];
+
+               /* Allocate an EQ */
                qdesc = lpfc_sli4_queue_alloc(phba, LPFC_DEFAULT_PAGE_SIZE,
                                              phba->sli4_hba.eq_esize,
                                              phba->sli4_hba.eq_ecount, cpu);
                if (!qdesc) {
                        lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
-                                       "0497 Failed allocate EQ (%d)\n", idx);
+                                       "0497 Failed allocate EQ (%d)\n",
+                                       cpup->hdwq);
                        goto out_error;
                }
                qdesc->qe_valid = 1;
-               qdesc->hdwq = idx;
-
-               /* Save the CPU this EQ is affinitised to */
-               qdesc->chann = cpu;
-               phba->sli4_hba.hdwq[idx].hba_eq = qdesc;
+               qdesc->hdwq = cpup->hdwq;
+               qdesc->chann = cpu; /* First CPU this EQ is affinitised to */
                qdesc->last_cpu = qdesc->chann;
+
+               /* Save the allocated EQ in the Hardware Queue */
+               qp->hba_eq = qdesc;
+
                eqi = per_cpu_ptr(phba->sli4_hba.eq_info, qdesc->last_cpu);
                list_add(&qdesc->cpu_list, &eqi->list);
        }
 
+       /* Now we need to populate the other Hardware Queues, that share
+        * an IRQ vector, with the associated EQ ptr.
+        */
+       for_each_present_cpu(cpu) {
+               cpup = &phba->sli4_hba.cpu_map[cpu];
+
+               /* Check for EQ already allocated in previous loop */
+               if (cpup->flag & LPFC_CPU_FIRST_IRQ)
+                       continue;
+
+               /* Check for multiple CPUs per hdwq */
+               qp = &phba->sli4_hba.hdwq[cpup->hdwq];
+               if (qp->hba_eq)
+                       continue;
+
+               /* We need to share an EQ for this hdwq */
+               eqcpu = lpfc_find_cpu_handle(phba, cpup->eq, LPFC_FIND_BY_EQ);
+               eqcpup = &phba->sli4_hba.cpu_map[eqcpu];
+               qp->hba_eq = phba->sli4_hba.hdwq[eqcpup->hdwq].hba_eq;
+       }
 
        /* Allocate SCSI SLI4 CQ/WQs */
        for (idx = 0; idx < phba->cfg_hdw_queue; idx++) {
 lpfc_sli4_release_hdwq(struct lpfc_hba *phba)
 {
        struct lpfc_sli4_hdw_queue *hdwq;
+       struct lpfc_queue *eq;
        uint32_t idx;
 
        hdwq = phba->sli4_hba.hdwq;
-       for (idx = 0; idx < phba->cfg_hdw_queue; idx++) {
-               if (idx < phba->cfg_irq_chann)
-                       lpfc_sli4_queue_free(hdwq[idx].hba_eq);
-               hdwq[idx].hba_eq = NULL;
 
+       /* Loop thru all Hardware Queues */
+       for (idx = 0; idx < phba->cfg_hdw_queue; idx++) {
+               /* Free the CQ/WQ corresponding to the Hardware Queue */
                lpfc_sli4_queue_free(hdwq[idx].fcp_cq);
                lpfc_sli4_queue_free(hdwq[idx].nvme_cq);
                lpfc_sli4_queue_free(hdwq[idx].fcp_wq);
                lpfc_sli4_queue_free(hdwq[idx].nvme_wq);
+               hdwq[idx].hba_eq = NULL;
                hdwq[idx].fcp_cq = NULL;
                hdwq[idx].nvme_cq = NULL;
                hdwq[idx].fcp_wq = NULL;
                hdwq[idx].nvme_wq = NULL;
        }
+       /* Loop thru all IRQ vectors */
+       for (idx = 0; idx < phba->cfg_irq_chann; idx++) {
+               /* Free the EQ corresponding to the IRQ vector */
+               eq = phba->sli4_hba.hba_eq_hdl[idx].eq;
+               lpfc_sli4_queue_free(eq);
+               phba->sli4_hba.hba_eq_hdl[idx].eq = NULL;
+       }
 }
 
 /**
        qp = phba->sli4_hba.hdwq;
        memset(phba->sli4_hba.cq_lookup, 0,
               (sizeof(struct lpfc_queue *) * (phba->sli4_hba.cq_max + 1)));
+       /* Loop thru all IRQ vectors */
        for (qidx = 0; qidx < phba->cfg_irq_chann; qidx++) {
-               eq = qp[qidx].hba_eq;
+               /* Get the EQ corresponding to the IRQ vector */
+               eq = phba->sli4_hba.hba_eq_hdl[qidx].eq;
                if (!eq)
                        continue;
+               /* Loop through all CQs associated with that EQ */
                list_for_each_entry(childq, &eq->child_list, list) {
                        if (childq->queue_id > phba->sli4_hba.cq_max)
                                continue;
 {
        uint32_t shdr_status, shdr_add_status;
        union lpfc_sli4_cfg_shdr *shdr;
+       struct lpfc_vector_map_info *cpup;
        struct lpfc_sli4_hdw_queue *qp;
        LPFC_MBOXQ_t *mboxq;
-       int qidx;
+       int qidx, cpu;
        uint32_t length, usdelay;
        int rc = -ENOMEM;
 
                rc = -ENOMEM;
                goto out_error;
        }
+
+       /* Loop thru all IRQ vectors */
        for (qidx = 0; qidx < phba->cfg_irq_chann; qidx++) {
-               if (!qp[qidx].hba_eq) {
-                       lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
-                                       "0522 Fast-path EQ (%d) not "
-                                       "allocated\n", qidx);
-                       rc = -ENOMEM;
-                       goto out_destroy;
-               }
-               rc = lpfc_eq_create(phba, qp[qidx].hba_eq,
-                                   phba->cfg_fcp_imax);
-               if (rc) {
-                       lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
-                                       "0523 Failed setup of fast-path EQ "
-                                       "(%d), rc = 0x%x\n", qidx,
-                                       (uint32_t)rc);
-                       goto out_destroy;
+               /* Create HBA Event Queues (EQs) in order */
+               for_each_present_cpu(cpu) {
+                       cpup = &phba->sli4_hba.cpu_map[cpu];
+
+                       /* Look for the CPU thats using that vector with
+                        * LPFC_CPU_FIRST_IRQ set.
+                        */
+                       if (!(cpup->flag & LPFC_CPU_FIRST_IRQ))
+                               continue;
+                       if (qidx != cpup->eq)
+                               continue;
+
+                       /* Create an EQ for that vector */
+                       rc = lpfc_eq_create(phba, qp[cpup->hdwq].hba_eq,
+                                           phba->cfg_fcp_imax);
+                       if (rc) {
+                               lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+                                               "0523 Failed setup of fast-path"
+                                               " EQ (%d), rc = 0x%x\n",
+                                               cpup->eq, (uint32_t)rc);
+                               goto out_destroy;
+                       }
+
+                       /* Save the EQ for that vector in the hba_eq_hdl */
+                       phba->sli4_hba.hba_eq_hdl[cpup->eq].eq =
+                               qp[cpup->hdwq].hba_eq;
+
+                       lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
+                                       "2584 HBA EQ setup: queue[%d]-id=%d\n",
+                                       cpup->eq,
+                                       qp[cpup->hdwq].hba_eq->queue_id);
                }
-               lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
-                               "2584 HBA EQ setup: queue[%d]-id=%d\n", qidx,
-                               qp[qidx].hba_eq->queue_id);
        }
 
+       /* Loop thru all Hardware Queues */
        if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME) {
                for (qidx = 0; qidx < phba->cfg_hdw_queue; qidx++) {
+                       cpu = lpfc_find_cpu_handle(phba, qidx,
+                                                  LPFC_FIND_BY_HDWQ);
+                       cpup = &phba->sli4_hba.cpu_map[cpu];
+
+                       /* Create the CQ/WQ corresponding to the
+                        * Hardware Queue
+                        */
                        rc = lpfc_create_wq_cq(phba,
-                                       qp[qidx].hba_eq,
+                                       phba->sli4_hba.hdwq[cpup->hdwq].hba_eq,
                                        qp[qidx].nvme_cq,
                                        qp[qidx].nvme_wq,
                                        &phba->sli4_hba.hdwq[qidx].nvme_cq_map,
        }
 
        for (qidx = 0; qidx < phba->cfg_hdw_queue; qidx++) {
+               cpu = lpfc_find_cpu_handle(phba, qidx, LPFC_FIND_BY_HDWQ);
+               cpup = &phba->sli4_hba.cpu_map[cpu];
+
+               /* Create the CQ/WQ corresponding to the Hardware Queue */
                rc = lpfc_create_wq_cq(phba,
-                                      qp[qidx].hba_eq,
+                                      phba->sli4_hba.hdwq[cpup->hdwq].hba_eq,
                                       qp[qidx].fcp_cq,
                                       qp[qidx].fcp_wq,
                                       &phba->sli4_hba.hdwq[qidx].fcp_cq_map,
 lpfc_sli4_queue_unset(struct lpfc_hba *phba)
 {
        struct lpfc_sli4_hdw_queue *qp;
+       struct lpfc_queue *eq;
        int qidx;
 
        /* Unset mailbox command work queue */
 
        /* Unset fast-path SLI4 queues */
        if (phba->sli4_hba.hdwq) {
+               /* Loop thru all Hardware Queues */
                for (qidx = 0; qidx < phba->cfg_hdw_queue; qidx++) {
+                       /* Destroy the CQ/WQ corresponding to Hardware Queue */
                        qp = &phba->sli4_hba.hdwq[qidx];
                        lpfc_wq_destroy(phba, qp->fcp_wq);
                        lpfc_wq_destroy(phba, qp->nvme_wq);
                        lpfc_cq_destroy(phba, qp->fcp_cq);
                        lpfc_cq_destroy(phba, qp->nvme_cq);
-                       if (qidx < phba->cfg_irq_chann)
-                               lpfc_eq_destroy(phba, qp->hba_eq);
+               }
+               /* Loop thru all IRQ vectors */
+               for (qidx = 0; qidx < phba->cfg_irq_chann; qidx++) {
+                       /* Destroy the EQ corresponding to the IRQ vector */
+                       eq = phba->sli4_hba.hba_eq_hdl[qidx].eq;
+                       lpfc_eq_destroy(phba, eq);
                }
        }
 
 }
 
 /**
- * lpfc_find_cpu_handle - Find the CPU that corresponds to the specified EQ
+ * lpfc_find_cpu_handle - Find the CPU that corresponds to the specified Queue
  * @phba: pointer to lpfc hba data structure.
  * @id: EQ vector index or Hardware Queue index
  * @match: LPFC_FIND_BY_EQ = match by EQ
  *         LPFC_FIND_BY_HDWQ = match by Hardware Queue
+ * Return the CPU that matches the selection criteria
  */
 static uint16_t
 lpfc_find_cpu_handle(struct lpfc_hba *phba, uint16_t id, int match)
        struct lpfc_vector_map_info *cpup;
        int cpu;
 
-       /* Find the desired phys_id for the specified EQ */
+       /* Loop through all CPUs */
        for_each_present_cpu(cpu) {
                cpup = &phba->sli4_hba.cpu_map[cpu];
+
+               /* If we are matching by EQ, there may be multiple CPUs using
+                * using the same vector, so select the one with
+                * LPFC_CPU_FIRST_IRQ set.
+                */
                if ((match == LPFC_FIND_BY_EQ) &&
+                   (cpup->flag & LPFC_CPU_FIRST_IRQ) &&
                    (cpup->irq != LPFC_VECTOR_MAP_EMPTY) &&
                    (cpup->eq == id))
                        return cpu;
+
+               /* If matching by HDWQ, select the first CPU that matches */
                if ((match == LPFC_FIND_BY_HDWQ) && (cpup->hdwq == id))
                        return cpu;
        }
        return 0;
 }
 
-/**
- * lpfc_find_eq_handle - Find the EQ that corresponds to the specified
- *                       Hardware Queue
- * @phba: pointer to lpfc hba data structure.
- * @hdwq: Hardware Queue index
- */
-static uint16_t
-lpfc_find_eq_handle(struct lpfc_hba *phba, uint16_t hdwq)
-{
-       struct lpfc_vector_map_info *cpup;
-       int cpu;
-
-       /* Find the desired phys_id for the specified EQ */
-       for_each_present_cpu(cpu) {
-               cpup = &phba->sli4_hba.cpu_map[cpu];
-               if (cpup->hdwq == hdwq)
-                       return cpup->eq;
-       }
-       return 0;
-}
-
 #ifdef CONFIG_X86
 /**
  * lpfc_find_hyper - Determine if the CPU map entry is hyper-threaded
 
        /* This loop sets up all CPUs that are affinitized with a
         * irq vector assigned to the driver. All affinitized CPUs
-        * will get a link to that vectors IRQ and EQ. For now we
-        * are assuming all CPUs using the same EQ will all share
-        * the same hardware queue.
+        * will get a link to that vectors IRQ and EQ.
         */
        for (idx = 0; idx <  phba->cfg_irq_chann; idx++) {
+               /* Get a CPU mask for all CPUs affinitized to this vector */
                maskp = pci_irq_get_affinity(phba->pcidev, idx);
                if (!maskp)
                        continue;
 
+               i = 0;
+               /* Loop through all CPUs associated with vector idx */
                for_each_cpu_and(cpu, maskp, cpu_present_mask) {
+                       /* Set the EQ index and IRQ for that vector */
                        cpup = &phba->sli4_hba.cpu_map[cpu];
                        cpup->eq = idx;
-                       cpup->hdwq = idx;
                        cpup->irq = pci_irq_vector(phba->pcidev, idx);
 
-                       lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+                       lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
                                        "3336 Set Affinity: CPU %d "
-                                       "hdwq %d irq %d\n",
-                                       cpu, cpup->hdwq, cpup->irq);
+                                       "irq %d eq %d\n",
+                                       cpu, cpup->irq, cpup->eq);
+
+                       /* If this is the first CPU thats assigned to this
+                        * vector, set LPFC_CPU_FIRST_IRQ.
+                        */
+                       if (!i)
+                               cpup->flag |= LPFC_CPU_FIRST_IRQ;
+                       i++;
                }
        }
 
        /* After looking at each irq vector assigned to this pcidev, its
         * possible to see that not ALL CPUs have been accounted for.
-        * Next we will set any unassigned cpu map entries to a IRQ
-        * on the same phys_id
+        * Next we will set any unassigned (unaffinitized) cpu map
+        * entries to a IRQ on the same phys_id.
         */
        first_cpu = cpumask_first(cpu_present_mask);
        start_cpu = first_cpu;
                        /* Mark CPU as IRQ not assigned by the kernel */
                        cpup->flag |= LPFC_CPU_MAP_UNASSIGN;
 
-                       /* If so, find a new_cpup thats on the the same
+                       /* If so, find a new_cpup thats on the the SAME
                         * phys_id as cpup. start_cpu will start where we
                         * left off so all unassigned entries don't get assgined
                         * the IRQ of the first entry.
 found_same:
                        /* We found a matching phys_id, so copy the IRQ info */
                        cpup->eq = new_cpup->eq;
-                       cpup->hdwq = new_cpup->hdwq;
                        cpup->irq = new_cpup->irq;
 
                        /* Bump start_cpu to the next slot to minmize the
                        if (start_cpu == nr_cpumask_bits)
                                start_cpu = first_cpu;
 
-                       lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+                       lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
                                        "3337 Set Affinity: CPU %d "
-                                       "hdwq %d irq %d from id %d same "
+                                       "irq %d from id %d same "
                                        "phys_id (%d)\n",
-                                       cpu, cpup->hdwq, cpup->irq,
-                                       new_cpu, cpup->phys_id);
+                                       cpu, cpup->irq, new_cpu, cpup->phys_id);
                }
        }
 
                        /* Mark it as IRQ not assigned by the kernel */
                        cpup->flag |= LPFC_CPU_MAP_UNASSIGN;
 
-                       /* If so, find a new_cpup thats on any phys_id
+                       /* If so, find a new_cpup thats on ANY phys_id
                         * as the cpup. start_cpu will start where we
                         * left off so all unassigned entries don't get
                         * assigned the IRQ of the first entry.
                        /* We should never leave an entry unassigned */
                        lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
                                        "3339 Set Affinity: CPU %d "
-                                       "hdwq %d irq %d UNASSIGNED\n",
-                                       cpu, cpup->hdwq, cpup->irq);
+                                       "irq %d UNASSIGNED\n",
+                                       cpup->hdwq, cpup->irq);
                        continue;
 found_any:
                        /* We found an available entry, copy the IRQ info */
                        cpup->eq = new_cpup->eq;
-                       cpup->hdwq = new_cpup->hdwq;
                        cpup->irq = new_cpup->irq;
 
                        /* Bump start_cpu to the next slot to minmize the
                        if (start_cpu == nr_cpumask_bits)
                                start_cpu = first_cpu;
 
-                       lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+                       lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
                                        "3338 Set Affinity: CPU %d "
-                                       "hdwq %d irq %d from id %d (%d/%d)\n",
-                                       cpu, cpup->hdwq, cpup->irq, new_cpu,
+                                       "irq %d from id %d (%d/%d)\n",
+                                       cpu, cpup->irq, new_cpu,
                                        new_cpup->phys_id, new_cpup->core_id);
                }
        }
+
+       /* Finally we need to associate a hdwq with each cpu_map entry
+        * This will be 1 to 1 - hdwq to cpu, unless there are less
+        * hardware queues then CPUs. For that case we will just round-robin
+        * the available hardware queues as they get assigned to CPUs.
+        */
+       idx = 0;
+       start_cpu = 0;
+       for_each_present_cpu(cpu) {
+               cpup = &phba->sli4_hba.cpu_map[cpu];
+               if (idx >=  phba->cfg_hdw_queue) {
+                       /* We need to reuse a Hardware Queue for another CPU,
+                        * so be smart about it and pick one that has its
+                        * IRQ/EQ mapped to the same phys_id (CPU package).
+                        * and core_id.
+                        */
+                       new_cpu = start_cpu;
+                       for (i = 0; i < phba->sli4_hba.num_present_cpu; i++) {
+                               new_cpup = &phba->sli4_hba.cpu_map[new_cpu];
+                               if ((new_cpup->hdwq != LPFC_VECTOR_MAP_EMPTY) &&
+                                   (new_cpup->phys_id == cpup->phys_id) &&
+                                   (new_cpup->core_id == cpup->core_id))
+                                       goto found_hdwq;
+                               new_cpu = cpumask_next(
+                                       new_cpu, cpu_present_mask);
+                               if (new_cpu == nr_cpumask_bits)
+                                       new_cpu = first_cpu;
+                       }
+
+                       /* If we can't match both phys_id and core_id,
+                        * settle for just a phys_id match.
+                        */
+                       new_cpu = start_cpu;
+                       for (i = 0; i < phba->sli4_hba.num_present_cpu; i++) {
+                               new_cpup = &phba->sli4_hba.cpu_map[new_cpu];
+                               if ((new_cpup->hdwq != LPFC_VECTOR_MAP_EMPTY) &&
+                                   (new_cpup->phys_id == cpup->phys_id))
+                                       goto found_hdwq;
+                               new_cpu = cpumask_next(
+                                       new_cpu, cpu_present_mask);
+                               if (new_cpu == nr_cpumask_bits)
+                                       new_cpu = first_cpu;
+                       }
+
+                       /* Otherwise just round robin on cfg_hdw_queue */
+                       cpup->hdwq = idx % phba->cfg_hdw_queue;
+                       goto logit;
+found_hdwq:
+                       /* We found an available entry, copy the IRQ info */
+                       start_cpu = cpumask_next(new_cpu, cpu_present_mask);
+                       if (start_cpu == nr_cpumask_bits)
+                               start_cpu = first_cpu;
+                       cpup->hdwq = new_cpup->hdwq;
+               } else {
+                       /* 1 to 1, CPU to hdwq */
+                       cpup->hdwq = idx;
+               }
+logit:
+               lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+                               "3335 Set Affinity: CPU %d (phys %d core %d): "
+                               "hdwq %d eq %d irq %d flg x%x\n",
+                               cpu, cpup->phys_id, cpup->core_id,
+                               cpup->hdwq, cpup->eq, cpup->irq, cpup->flag);
+               idx++;
+       }
+
+       /* The cpu_map array will be used later during initialization
+        * when EQ / CQ / WQs are allocated and configured.
+        */
        return;
 }