"\t\tdefault - default perf_mode is 'balanced'"
        );
 
+static int poll_queues;
+module_param(poll_queues, int, 0444);
+MODULE_PARM_DESC(poll_queues, "Number of queues to be use for io_uring poll mode.\n\t\t"
+       "This parameter is effective only if host_tagset_enable=1. &\n\t\t"
+       "when poll_queues are enabled then &\n\t\t"
+       "perf_mode is set to latency mode. &\n\t\t"
+       );
+
 enum mpt3sas_perf_mode {
        MPT_PERF_MODE_DEFAULT   = -1,
        MPT_PERF_MODE_BALANCED  = 0,
                 * and this call is safe since dead ioc will never return any
                 * command back from HW.
                 */
+               mpt3sas_base_pause_mq_polling(ioc);
                ioc->schedule_dead_ioc_flush_running_cmds(ioc);
                /*
                 * Set remove_host flag early since kernel thread will
                        spin_unlock_irqrestore(
                            &ioc->ioc_reset_in_progress_lock, flags);
                        mpt3sas_base_mask_interrupts(ioc);
+                       mpt3sas_base_pause_mq_polling(ioc);
                        _base_clear_outstanding_commands(ioc);
                }
 
        return cb_idx;
 }
 
+/**
+ * mpt3sas_base_pause_mq_polling - pause polling on the mq poll queues
+ *                             when driver is flushing out the IOs.
+ * @ioc: per adapter object
+ *
+ * Pause polling on the mq poll (io uring) queues when driver is flushing
+ * out the IOs. Otherwise we may see the race condition of completing the same
+ * IO from two paths.
+ *
+ * Returns nothing.
+ */
+void
+mpt3sas_base_pause_mq_polling(struct MPT3SAS_ADAPTER *ioc)
+{
+       int iopoll_q_count =
+           ioc->reply_queue_count - ioc->iopoll_q_start_index;
+       int qid;
+
+       for (qid = 0; qid < iopoll_q_count; qid++)
+               atomic_set(&ioc->io_uring_poll_queues[qid].pause, 1);
+
+       /*
+        * wait for current poll to complete.
+        */
+       for (qid = 0; qid < iopoll_q_count; qid++) {
+               while (atomic_read(&ioc->io_uring_poll_queues[qid].busy))
+                       udelay(500);
+       }
+}
+
+/**
+ * mpt3sas_base_resume_mq_polling - Resume polling on mq poll queues.
+ * @ioc: per adapter object
+ *
+ * Returns nothing.
+ */
+void
+mpt3sas_base_resume_mq_polling(struct MPT3SAS_ADAPTER *ioc)
+{
+       int iopoll_q_count =
+           ioc->reply_queue_count - ioc->iopoll_q_start_index;
+       int qid;
+
+       for (qid = 0; qid < iopoll_q_count; qid++)
+               atomic_set(&ioc->io_uring_poll_queues[qid].pause, 0);
+}
+
 /**
  * mpt3sas_base_mask_interrupts - disable interrupts
  * @ioc: per adapter object
                                                 MPI2_RPHI_MSIX_INDEX_SHIFT),
                                                &ioc->chip->ReplyPostHostIndex);
                        }
-                       if (!reply_q->irq_poll_scheduled) {
+                       if (!reply_q->is_iouring_poll_q &&
+                           !reply_q->irq_poll_scheduled) {
                                reply_q->irq_poll_scheduled = true;
                                irq_poll_sched(&reply_q->irqpoll);
                        }
        return completed_cmds;
 }
 
+/**
+ * mpt3sas_blk_mq_poll - poll the blk mq poll queue
+ * @shost: Scsi_Host object
+ * @queue_num: hw ctx queue number
+ *
+ * Return number of entries that has been processed from poll queue.
+ */
+int mpt3sas_blk_mq_poll(struct Scsi_Host *shost, unsigned int queue_num)
+{
+       struct MPT3SAS_ADAPTER *ioc =
+           (struct MPT3SAS_ADAPTER *)shost->hostdata;
+       struct adapter_reply_queue *reply_q;
+       int num_entries = 0;
+       int qid = queue_num - ioc->iopoll_q_start_index;
+
+       if (atomic_read(&ioc->io_uring_poll_queues[qid].pause) ||
+           !atomic_add_unless(&ioc->io_uring_poll_queues[qid].busy, 1, 1))
+               return 0;
+
+       reply_q = ioc->io_uring_poll_queues[qid].reply_q;
+
+       num_entries = _base_process_reply_queue(reply_q);
+       atomic_dec(&ioc->io_uring_poll_queues[qid].busy);
+
+       return num_entries;
+}
+
 /**
  * _base_interrupt - MPT adapter (IOC) specific interrupt handler.
  * @irq: irq number (not used)
                return;
 
        list_for_each_entry_safe(reply_q, next, &ioc->reply_queue_list, list) {
+               if (reply_q->is_iouring_poll_q)
+                       continue;
                irq_poll_init(&reply_q->irqpoll,
                        ioc->hba_queue_depth/4, _base_irqpoll);
                reply_q->irq_poll_scheduled = false;
                /* TMs are on msix_index == 0 */
                if (reply_q->msix_index == 0)
                        continue;
+
+               if (reply_q->is_iouring_poll_q) {
+                       _base_process_reply_queue(reply_q);
+                       continue;
+               }
+
                synchronize_irq(pci_irq_vector(ioc->pdev, reply_q->msix_index));
                if (reply_q->irq_poll_scheduled) {
                        /* Calling irq_poll_disable will wait for any pending
 
        list_for_each_entry_safe(reply_q, next, &ioc->reply_queue_list, list) {
                list_del(&reply_q->list);
+               if (reply_q->is_iouring_poll_q) {
+                       kfree(reply_q);
+                       continue;
+               }
+
                if (ioc->smp_affinity_enable)
                        irq_set_affinity_hint(pci_irq_vector(ioc->pdev,
                            reply_q->msix_index), NULL);
 {
        struct pci_dev *pdev = ioc->pdev;
        struct adapter_reply_queue *reply_q;
-       int r;
+       int r, qid;
 
        reply_q =  kzalloc(sizeof(struct adapter_reply_queue), GFP_KERNEL);
        if (!reply_q) {
        reply_q->msix_index = index;
 
        atomic_set(&reply_q->busy, 0);
+
+       if (index >= ioc->iopoll_q_start_index) {
+               qid = index - ioc->iopoll_q_start_index;
+               snprintf(reply_q->name, MPT_NAME_LENGTH, "%s%d-mq-poll%d",
+                   ioc->driver_name, ioc->id, qid);
+               reply_q->is_iouring_poll_q = 1;
+               ioc->io_uring_poll_queues[qid].reply_q = reply_q;
+               goto out;
+       }
+
+
        if (ioc->msix_enable)
                snprintf(reply_q->name, MPT_NAME_LENGTH, "%s%d-msix%d",
                    ioc->driver_name, ioc->id, index);
                kfree(reply_q);
                return -EBUSY;
        }
-
+out:
        INIT_LIST_HEAD(&reply_q->list);
        list_add_tail(&reply_q->list, &ioc->reply_queue_list);
        return 0;
        unsigned int cpu, nr_cpus, nr_msix, index = 0;
        struct adapter_reply_queue *reply_q;
        int local_numa_node;
+       int iopoll_q_count = ioc->reply_queue_count -
+           ioc->iopoll_q_start_index;
 
        if (!_base_is_controller_msix_enabled(ioc))
                return;
                list_for_each_entry(reply_q, &ioc->reply_queue_list, list) {
                        const cpumask_t *mask;
 
-                       if (reply_q->msix_index < ioc->high_iops_queues)
+                       if (reply_q->msix_index < ioc->high_iops_queues ||
+                           reply_q->msix_index >= ioc->iopoll_q_start_index)
                                continue;
 
                        mask = pci_irq_get_affinity(ioc->pdev,
 
 fall_back:
        cpu = cpumask_first(cpu_online_mask);
-       nr_msix -= ioc->high_iops_queues;
+       nr_msix -= (ioc->high_iops_queues - iopoll_q_count);
        index = 0;
 
        list_for_each_entry(reply_q, &ioc->reply_queue_list, list) {
                unsigned int i, group = nr_cpus / nr_msix;
 
-               if (reply_q->msix_index < ioc->high_iops_queues)
+               if (reply_q->msix_index < ioc->high_iops_queues ||
+                   reply_q->msix_index >= ioc->iopoll_q_start_index)
                        continue;
 
                if (cpu >= nr_cpus)
 {
        u16 lnksta, speed;
 
+       /*
+        * Disable high iops queues if io uring poll queues are enabled.
+        */
        if (perf_mode == MPT_PERF_MODE_IOPS ||
-           perf_mode == MPT_PERF_MODE_LATENCY) {
+           perf_mode == MPT_PERF_MODE_LATENCY ||
+           ioc->io_uring_poll_queues) {
                ioc->high_iops_queues = 0;
                return;
        }
                return;
        pci_free_irq_vectors(ioc->pdev);
        ioc->msix_enable = 0;
+       kfree(ioc->io_uring_poll_queues);
 }
 
 /**
        int i, irq_flags = PCI_IRQ_MSIX;
        struct irq_affinity desc = { .pre_vectors = ioc->high_iops_queues };
        struct irq_affinity *descp = &desc;
+       /*
+        * Don't allocate msix vectors for poll_queues.
+        * msix_vectors is always within a range of FW supported reply queue.
+        */
+       int nr_msix_vectors = ioc->iopoll_q_start_index;
+
 
        if (ioc->smp_affinity_enable)
-               irq_flags |= PCI_IRQ_AFFINITY;
+               irq_flags |= PCI_IRQ_AFFINITY | PCI_IRQ_ALL_TYPES;
        else
                descp = NULL;
 
-       ioc_info(ioc, " %d %d\n", ioc->high_iops_queues,
-           ioc->reply_queue_count);
+       ioc_info(ioc, " %d %d %d\n", ioc->high_iops_queues,
+           ioc->reply_queue_count, nr_msix_vectors);
 
        i = pci_alloc_irq_vectors_affinity(ioc->pdev,
            ioc->high_iops_queues,
-           ioc->reply_queue_count, irq_flags, descp);
+           nr_msix_vectors, irq_flags, descp);
 
        return i;
 }
        int r;
        int i, local_max_msix_vectors;
        u8 try_msix = 0;
+       int iopoll_q_count = 0;
 
        ioc->msix_load_balance = false;
 
        ioc_info(ioc, "MSI-X vectors supported: %d\n", ioc->msix_vector_count);
        pr_info("\t no of cores: %d, max_msix_vectors: %d\n",
                ioc->cpu_count, max_msix_vectors);
-       if (ioc->is_aero_ioc)
-               _base_check_and_enable_high_iops_queues(ioc,
-                       ioc->msix_vector_count);
+
        ioc->reply_queue_count =
-               min_t(int, ioc->cpu_count + ioc->high_iops_queues,
-               ioc->msix_vector_count);
+               min_t(int, ioc->cpu_count, ioc->msix_vector_count);
 
        if (!ioc->rdpq_array_enable && max_msix_vectors == -1)
                local_max_msix_vectors = (reset_devices) ? 1 : 8;
        else
                local_max_msix_vectors = max_msix_vectors;
 
-       if (local_max_msix_vectors > 0)
-               ioc->reply_queue_count = min_t(int, local_max_msix_vectors,
-                       ioc->reply_queue_count);
-       else if (local_max_msix_vectors == 0)
+       if (local_max_msix_vectors == 0)
                goto try_ioapic;
 
        /*
        if (ioc->msix_load_balance)
                ioc->smp_affinity_enable = 0;
 
+       if (!ioc->smp_affinity_enable || ioc->reply_queue_count <= 1)
+               ioc->shost->host_tagset = 0;
+
+       /*
+        * Enable io uring poll queues only if host_tagset is enabled.
+        */
+       if (ioc->shost->host_tagset)
+               iopoll_q_count = poll_queues;
+
+       if (iopoll_q_count) {
+               ioc->io_uring_poll_queues = kcalloc(iopoll_q_count,
+                   sizeof(struct io_uring_poll_queue), GFP_KERNEL);
+               if (!ioc->io_uring_poll_queues)
+                       iopoll_q_count = 0;
+       }
+
+       if (ioc->is_aero_ioc)
+               _base_check_and_enable_high_iops_queues(ioc,
+                   ioc->msix_vector_count);
+
+       /*
+        * Add high iops queues count to reply queue count if high iops queues
+        * are enabled.
+        */
+       ioc->reply_queue_count = min_t(int,
+           ioc->reply_queue_count + ioc->high_iops_queues,
+           ioc->msix_vector_count);
+
+       /*
+        * Adjust the reply queue count incase reply queue count
+        * exceeds the user provided MSIx vectors count.
+        */
+       if (local_max_msix_vectors > 0)
+               ioc->reply_queue_count = min_t(int, local_max_msix_vectors,
+                   ioc->reply_queue_count);
+       /*
+        * Add io uring poll queues count to reply queues count
+        * if io uring is enabled in driver.
+        */
+       if (iopoll_q_count) {
+               if (ioc->reply_queue_count < (iopoll_q_count + MPT3_MIN_IRQS))
+                       iopoll_q_count = 0;
+               ioc->reply_queue_count = min_t(int,
+                   ioc->reply_queue_count + iopoll_q_count,
+                   ioc->msix_vector_count);
+       }
+
+       /*
+        * Starting index of io uring poll queues in reply queue list.
+        */
+       ioc->iopoll_q_start_index =
+           ioc->reply_queue_count - iopoll_q_count;
+
        r = _base_alloc_irq_vectors(ioc);
        if (r < 0) {
                ioc_info(ioc, "pci_alloc_irq_vectors failed (r=%d) !!!\n", r);
                goto try_ioapic;
        }
 
+       /*
+        * Adjust the reply queue count if the allocated
+        * MSIx vectors is less then the requested number
+        * of MSIx vectors.
+        */
+       if (r < ioc->iopoll_q_start_index) {
+               ioc->reply_queue_count = r + iopoll_q_count;
+               ioc->iopoll_q_start_index =
+                   ioc->reply_queue_count - iopoll_q_count;
+       }
+
        ioc->msix_enable = 1;
-       ioc->reply_queue_count = r;
        for (i = 0; i < ioc->reply_queue_count; i++) {
                r = _base_request_irq(ioc, i);
                if (r) {
        ioc->high_iops_queues = 0;
        ioc_info(ioc, "High IOPs queues : disabled\n");
        ioc->reply_queue_count = 1;
+       ioc->iopoll_q_start_index = ioc->reply_queue_count - 0;
        r = pci_alloc_irq_vectors(ioc->pdev, 1, 1, PCI_IRQ_LEGACY);
        if (r < 0) {
                dfailprintk(ioc,
        u64 pio_chip = 0;
        phys_addr_t chip_phys = 0;
        struct adapter_reply_queue *reply_q;
+       int iopoll_q_count = 0;
 
        dinitprintk(ioc, ioc_info(ioc, "%s\n", __func__));
 
        if (r)
                goto out_fail;
 
+       iopoll_q_count = ioc->reply_queue_count - ioc->iopoll_q_start_index;
+       for (i = 0; i < iopoll_q_count; i++) {
+               atomic_set(&ioc->io_uring_poll_queues[i].busy, 0);
+               atomic_set(&ioc->io_uring_poll_queues[i].pause, 0);
+       }
+
        if (!ioc->is_driver_loading)
                _base_init_irqpolls(ioc);
        /* Use the Combined reply queue feature only for SAS3 C0 & higher
                        * 4)));
        }
 
-       list_for_each_entry(reply_q, &ioc->reply_queue_list, list)
+       list_for_each_entry(reply_q, &ioc->reply_queue_list, list) {
+               if (reply_q->msix_index >= ioc->iopoll_q_start_index) {
+                       pr_info("%s: enabled: index: %d\n",
+                           reply_q->name, reply_q->msix_index);
+                       continue;
+               }
+
                pr_info("%s: %s enabled: IRQ %d\n",
                        reply_q->name,
                        ioc->msix_enable ? "PCI-MSI-X" : "IO-APIC",
                        pci_irq_vector(ioc->pdev, reply_q->msix_index));
+       }
 
        ioc_info(ioc, "iomem(%pap), mapped(0x%p), size(%d)\n",
                 &chip_phys, ioc->chip, memap_sz);
        _base_pre_reset_handler(ioc);
        mpt3sas_wait_for_commands_to_complete(ioc);
        mpt3sas_base_mask_interrupts(ioc);
+       mpt3sas_base_pause_mq_polling(ioc);
        r = _base_make_ioc_ready(ioc, type);
        if (r)
                goto out;
        spin_unlock_irqrestore(&ioc->ioc_reset_in_progress_lock, flags);
        ioc->ioc_reset_count++;
        mutex_unlock(&ioc->reset_in_progress_mutex);
+       mpt3sas_base_resume_mq_polling(ioc);
 
  out_unlocked:
        if ((r == 0) && is_trigger) {
 
 
        ioc->remove_host = 1;
 
-       if (!pci_device_is_present(pdev))
+       if (!pci_device_is_present(pdev)) {
+               mpt3sas_base_pause_mq_polling(ioc);
                _scsih_flush_running_cmds(ioc);
+       }
 
        _scsih_fw_event_cleanup_queue(ioc);
 
 
        ioc->remove_host = 1;
 
-       if (!pci_device_is_present(pdev))
+       if (!pci_device_is_present(pdev)) {
+               mpt3sas_base_pause_mq_polling(ioc);
                _scsih_flush_running_cmds(ioc);
+       }
 
        _scsih_fw_event_cleanup_queue(ioc);
 
 {
        struct MPT3SAS_ADAPTER *ioc =
            (struct MPT3SAS_ADAPTER *)shost->hostdata;
+       struct blk_mq_queue_map *map;
+       int i, qoff, offset;
+       int nr_msix_vectors = ioc->iopoll_q_start_index;
+       int iopoll_q_count = ioc->reply_queue_count - nr_msix_vectors;
 
-       if (ioc->shost->nr_hw_queues == 1)
+       if (shost->nr_hw_queues == 1)
                return 0;
 
-       return blk_mq_pci_map_queues(&shost->tag_set.map[HCTX_TYPE_DEFAULT],
-           ioc->pdev, ioc->high_iops_queues);
+       for (i = 0, qoff = 0; i < shost->nr_maps; i++) {
+               map = &shost->tag_set.map[i];
+               map->nr_queues = 0;
+               offset = 0;
+               if (i == HCTX_TYPE_DEFAULT) {
+                       map->nr_queues =
+                           nr_msix_vectors - ioc->high_iops_queues;
+                       offset = ioc->high_iops_queues;
+               } else if (i == HCTX_TYPE_POLL)
+                       map->nr_queues = iopoll_q_count;
+
+               if (!map->nr_queues)
+                       BUG_ON(i == HCTX_TYPE_DEFAULT);
+
+               /*
+                * The poll queue(s) doesn't have an IRQ (and hence IRQ
+                * affinity), so use the regular blk-mq cpu mapping
+                */
+               map->queue_offset = qoff;
+               if (i != HCTX_TYPE_POLL)
+                       blk_mq_pci_map_queues(map, ioc->pdev, offset);
+               else
+                       blk_mq_map_queues(map);
+
+               qoff += map->nr_queues;
+       }
+       return 0;
 }
 
 /* shost template for SAS 2.0 HBA devices */
        .track_queue_depth              = 1,
        .cmd_size                       = sizeof(struct scsiio_tracker),
        .map_queues                     = scsih_map_queues,
+       .mq_poll                        = mpt3sas_blk_mq_poll,
 };
 
 /* raid transport support for SAS 3.0 HBA devices */
        struct Scsi_Host *shost = NULL;
        int rv;
        u16 hba_mpi_version;
+       int iopoll_q_count = 0;
 
        /* Determine in which MPI version class this pci device belongs */
        hba_mpi_version = _scsih_determine_hba_mpi_version(pdev);
                goto out_thread_fail;
        }
 
+       shost->host_tagset = 0;
+
+       if (ioc->is_gen35_ioc && host_tagset_enable)
+               shost->host_tagset = 1;
+
        ioc->is_driver_loading = 1;
        if ((mpt3sas_base_attach(ioc))) {
                ioc_err(ioc, "failure at %s:%d/%s()!\n",
        } else
                ioc->hide_drives = 0;
 
-       shost->host_tagset = 0;
        shost->nr_hw_queues = 1;
 
-       if (ioc->is_gen35_ioc && ioc->reply_queue_count > 1 &&
-           host_tagset_enable && ioc->smp_affinity_enable) {
-
-               shost->host_tagset = 1;
+       if (shost->host_tagset) {
                shost->nr_hw_queues =
                    ioc->reply_queue_count - ioc->high_iops_queues;
 
+               iopoll_q_count =
+                   ioc->reply_queue_count - ioc->iopoll_q_start_index;
+
+               shost->nr_maps = iopoll_q_count ? 3 : 1;
+
                dev_info(&ioc->pdev->dev,
                    "Max SCSIIO MPT commands: %d shared with nr_hw_queues = %d\n",
                    shost->can_queue, shost->nr_hw_queues);
                /* Permanent error, prepare for device removal */
                ioc->pci_error_recovery = 1;
                mpt3sas_base_stop_watchdog(ioc);
+               mpt3sas_base_pause_mq_polling(ioc);
                _scsih_flush_running_cmds(ioc);
                return PCI_ERS_RESULT_DISCONNECT;
        }