static int megasas_get_ld_vf_affiliation(struct megasas_instance *instance,
                                         int initial);
 static int
-megasas_set_dma_mask(struct pci_dev *pdev);
+megasas_set_dma_mask(struct megasas_instance *instance);
 static int
 megasas_alloc_ctrl_mem(struct megasas_instance *instance);
 static inline void
 static inline void
 megasas_init_ctrl_params(struct megasas_instance *instance);
 
+/**
+ * megasas_set_dma_settings -  Populate DMA address, length and flags for DCMDs
+ * @instance:                  Adapter soft state
+ * @dcmd:                      DCMD frame inside MFI command
+ * @dma_addr:                  DMA address of buffer to be passed to FW
+ * @dma_len:                   Length of DMA buffer to be passed to FW
+ * @return:                    void
+ */
+void megasas_set_dma_settings(struct megasas_instance *instance,
+                             struct megasas_dcmd_frame *dcmd,
+                             dma_addr_t dma_addr, u32 dma_len)
+{
+       if (instance->consistent_mask_64bit) {
+               dcmd->sgl.sge64[0].phys_addr = cpu_to_le64(dma_addr);
+               dcmd->sgl.sge64[0].length = cpu_to_le32(dma_len);
+               dcmd->flags = cpu_to_le16(dcmd->flags | MFI_FRAME_SGL64);
+
+       } else {
+               dcmd->sgl.sge32[0].phys_addr =
+                               cpu_to_le32(lower_32_bits(dma_addr));
+               dcmd->sgl.sge32[0].length = cpu_to_le32(dma_len);
+               dcmd->flags = cpu_to_le16(dcmd->flags);
+       }
+}
+
 void
 megasas_issue_dcmd(struct megasas_instance *instance, struct megasas_cmd *cmd)
 {
        dcmd->pad_0 = 0;
        dcmd->data_xfer_len = cpu_to_le32(sizeof(struct MR_CTRL_HB_HOST_MEM));
        dcmd->opcode = cpu_to_le32(MR_DCMD_CTRL_SHARED_HOST_MEM_ALLOC);
-       dcmd->sgl.sge32[0].phys_addr = cpu_to_le32(instance->hb_host_mem_h);
-       dcmd->sgl.sge32[0].length = cpu_to_le32(sizeof(struct MR_CTRL_HB_HOST_MEM));
+
+       megasas_set_dma_settings(instance, dcmd, instance->hb_host_mem_h,
+                                sizeof(struct MR_CTRL_HB_HOST_MEM));
 
        dev_warn(&instance->pdev->dev, "SR-IOV: Starting heartbeat for scsi%d\n",
               instance->host->host_no);
        dcmd->cmd = MFI_CMD_DCMD;
        dcmd->cmd_status = 0xFF;
        dcmd->sge_count = 1;
-       dcmd->flags = cpu_to_le16(MFI_FRAME_DIR_READ);
+       dcmd->flags = MFI_FRAME_DIR_READ;
        dcmd->timeout = 0;
        dcmd->pad_0 = 0;
        dcmd->data_xfer_len = cpu_to_le32(sizeof(struct MR_PD_INFO));
        dcmd->opcode = cpu_to_le32(MR_DCMD_PD_GET_INFO);
-       dcmd->sgl.sge32[0].phys_addr = cpu_to_le32(instance->pd_info_h);
-       dcmd->sgl.sge32[0].length = cpu_to_le32(sizeof(struct MR_PD_INFO));
+
+       megasas_set_dma_settings(instance, dcmd, instance->pd_info_h,
+                                sizeof(struct MR_PD_INFO));
 
        if ((instance->adapter_type != MFI_SERIES) &&
            !instance->mask_interrupts)
        dcmd->cmd = MFI_CMD_DCMD;
        dcmd->cmd_status = MFI_STAT_INVALID_STATUS;
        dcmd->sge_count = 1;
-       dcmd->flags = cpu_to_le16(MFI_FRAME_DIR_READ);
+       dcmd->flags = MFI_FRAME_DIR_READ;
        dcmd->timeout = 0;
        dcmd->pad_0 = 0;
        dcmd->data_xfer_len = cpu_to_le32(MEGASAS_MAX_PD * sizeof(struct MR_PD_LIST));
        dcmd->opcode = cpu_to_le32(MR_DCMD_PD_LIST_QUERY);
-       dcmd->sgl.sge32[0].phys_addr = cpu_to_le32(ci_h);
-       dcmd->sgl.sge32[0].length = cpu_to_le32(MEGASAS_MAX_PD * sizeof(struct MR_PD_LIST));
+
+       megasas_set_dma_settings(instance, dcmd, instance->pd_list_buf_h,
+                                (MEGASAS_MAX_PD * sizeof(struct MR_PD_LIST)));
 
        if ((instance->adapter_type != MFI_SERIES) &&
            !instance->mask_interrupts)
        dcmd->cmd = MFI_CMD_DCMD;
        dcmd->cmd_status = MFI_STAT_INVALID_STATUS;
        dcmd->sge_count = 1;
-       dcmd->flags = cpu_to_le16(MFI_FRAME_DIR_READ);
+       dcmd->flags = MFI_FRAME_DIR_READ;
        dcmd->timeout = 0;
        dcmd->data_xfer_len = cpu_to_le32(sizeof(struct MR_LD_LIST));
        dcmd->opcode = cpu_to_le32(MR_DCMD_LD_GET_LIST);
-       dcmd->sgl.sge32[0].phys_addr = cpu_to_le32(ci_h);
-       dcmd->sgl.sge32[0].length = cpu_to_le32(sizeof(struct MR_LD_LIST));
        dcmd->pad_0  = 0;
 
+       megasas_set_dma_settings(instance, dcmd, ci_h,
+                                sizeof(struct MR_LD_LIST));
+
        if ((instance->adapter_type != MFI_SERIES) &&
            !instance->mask_interrupts)
                ret = megasas_issue_blocked_cmd(instance, cmd,
        dcmd->cmd = MFI_CMD_DCMD;
        dcmd->cmd_status = MFI_STAT_INVALID_STATUS;
        dcmd->sge_count = 1;
-       dcmd->flags = cpu_to_le16(MFI_FRAME_DIR_READ);
+       dcmd->flags = MFI_FRAME_DIR_READ;
        dcmd->timeout = 0;
        dcmd->data_xfer_len = cpu_to_le32(sizeof(struct MR_LD_TARGETID_LIST));
        dcmd->opcode = cpu_to_le32(MR_DCMD_LD_LIST_QUERY);
-       dcmd->sgl.sge32[0].phys_addr = cpu_to_le32(ci_h);
-       dcmd->sgl.sge32[0].length = cpu_to_le32(sizeof(struct MR_LD_TARGETID_LIST));
        dcmd->pad_0  = 0;
 
+       megasas_set_dma_settings(instance, dcmd, ci_h,
+                                sizeof(struct MR_LD_TARGETID_LIST));
+
        if ((instance->adapter_type != MFI_SERIES) &&
            !instance->mask_interrupts)
                ret = megasas_issue_blocked_cmd(instance, cmd, MFI_IO_TIMEOUT_SECS);
        dcmd->cmd = MFI_CMD_DCMD;
        dcmd->cmd_status = MFI_STAT_INVALID_STATUS;
        dcmd->sge_count = 1;
-       dcmd->flags = cpu_to_le16(MFI_FRAME_DIR_READ);
+       dcmd->flags = MFI_FRAME_DIR_READ;
        dcmd->timeout = 0;
        dcmd->pad_0 = 0;
        dcmd->data_xfer_len = cpu_to_le32(sizeof(struct megasas_ctrl_info));
        dcmd->opcode = cpu_to_le32(MR_DCMD_CTRL_GET_INFO);
-       dcmd->sgl.sge32[0].phys_addr = cpu_to_le32(ci_h);
-       dcmd->sgl.sge32[0].length = cpu_to_le32(sizeof(struct megasas_ctrl_info));
        dcmd->mbox.b[0] = 1;
 
+       megasas_set_dma_settings(instance, dcmd, ci_h,
+                                sizeof(struct megasas_ctrl_info));
+
        if ((instance->adapter_type != MFI_SERIES) &&
            !instance->mask_interrupts)
                ret = megasas_issue_blocked_cmd(instance, cmd, MFI_IO_TIMEOUT_SECS);
        dcmd->cmd = MFI_CMD_DCMD;
        dcmd->cmd_status = MFI_STAT_INVALID_STATUS;
        dcmd->sge_count = 1;
-       dcmd->flags = cpu_to_le16(MFI_FRAME_DIR_NONE);
+       dcmd->flags = MFI_FRAME_DIR_NONE;
        dcmd->timeout = 0;
        dcmd->pad_0 = 0;
        dcmd->data_xfer_len = cpu_to_le32(CRASH_DMA_BUF_SIZE);
        dcmd->opcode = cpu_to_le32(MR_DCMD_CTRL_SET_CRASH_DUMP_PARAMS);
-       dcmd->sgl.sge32[0].phys_addr = cpu_to_le32(instance->crash_dump_h);
-       dcmd->sgl.sge32[0].length = cpu_to_le32(CRASH_DMA_BUF_SIZE);
+
+       megasas_set_dma_settings(instance, dcmd, instance->crash_dump_h,
+                                CRASH_DMA_BUF_SIZE);
 
        if ((instance->adapter_type != MFI_SERIES) &&
            !instance->mask_interrupts)
 
        megasas_init_ctrl_params(instance);
 
-       if (megasas_set_dma_mask(instance->pdev))
+       if (megasas_set_dma_mask(instance))
                goto fail_ready_state;
 
        if (megasas_alloc_ctrl_mem(instance))
        dcmd->cmd = MFI_CMD_DCMD;
        dcmd->cmd_status = 0x0;
        dcmd->sge_count = 1;
-       dcmd->flags = cpu_to_le16(MFI_FRAME_DIR_READ);
+       dcmd->flags = MFI_FRAME_DIR_READ;
        dcmd->timeout = 0;
        dcmd->pad_0 = 0;
        dcmd->data_xfer_len = cpu_to_le32(sizeof(struct megasas_evt_log_info));
        dcmd->opcode = cpu_to_le32(MR_DCMD_CTRL_EVENT_GET_INFO);
-       dcmd->sgl.sge32[0].phys_addr = cpu_to_le32(el_info_h);
-       dcmd->sgl.sge32[0].length = cpu_to_le32(sizeof(struct megasas_evt_log_info));
+
+       megasas_set_dma_settings(instance, dcmd, el_info_h,
+                                sizeof(struct megasas_evt_log_info));
 
        if (megasas_issue_blocked_cmd(instance, cmd, MFI_IO_TIMEOUT_SECS) ==
                DCMD_SUCCESS) {
        dcmd->cmd = MFI_CMD_DCMD;
        dcmd->cmd_status = 0x0;
        dcmd->sge_count = 1;
-       dcmd->flags = cpu_to_le16(MFI_FRAME_DIR_READ);
+       dcmd->flags = MFI_FRAME_DIR_READ;
        dcmd->timeout = 0;
        dcmd->pad_0 = 0;
        dcmd->data_xfer_len = cpu_to_le32(sizeof(struct megasas_evt_detail));
        dcmd->mbox.w[0] = cpu_to_le32(seq_num);
        instance->last_seq_num = seq_num;
        dcmd->mbox.w[1] = cpu_to_le32(curr_aen.word);
-       dcmd->sgl.sge32[0].phys_addr = cpu_to_le32(instance->evt_detail_h);
-       dcmd->sgl.sge32[0].length = cpu_to_le32(sizeof(struct megasas_evt_detail));
+
+       megasas_set_dma_settings(instance, dcmd, instance->evt_detail_h,
+                                sizeof(struct megasas_evt_detail));
 
        if (instance->aen_cmd != NULL) {
                megasas_return_cmd(instance, cmd);
        dcmd->cmd = MFI_CMD_DCMD;
        dcmd->cmd_status = 0xFF;
        dcmd->sge_count = 1;
-       dcmd->flags = cpu_to_le16(MFI_FRAME_DIR_READ);
+       dcmd->flags = MFI_FRAME_DIR_READ;
        dcmd->timeout = 0;
        dcmd->pad_0 = 0;
        dcmd->data_xfer_len =
                cpu_to_le32(sizeof(struct MR_TARGET_PROPERTIES));
        dcmd->opcode = cpu_to_le32(MR_DCMD_DRV_GET_TARGET_PROP);
-       dcmd->sgl.sge32[0].phys_addr =
-               cpu_to_le32(instance->tgt_prop_h);
-       dcmd->sgl.sge32[0].length =
-               cpu_to_le32(sizeof(struct MR_TARGET_PROPERTIES));
+
+       megasas_set_dma_settings(instance, dcmd, instance->tgt_prop_h,
+                                sizeof(struct MR_TARGET_PROPERTIES));
 
        if ((instance->adapter_type != MFI_SERIES) &&
            !instance->mask_interrupts)
        return 0;
 }
 
+/**
+ * megasas_set_dma_mask -      Set DMA mask for supported controllers
+ *
+ * @instance:          Adapter soft state
+ * Description:
+ *
+ * For Ventura, driver/FW will operate in 64bit DMA addresses.
+ *
+ * For invader-
+ *     By default, driver/FW will operate in 32bit DMA addresses
+ *     for consistent DMA mapping but if 32 bit consistent
+ *     DMA mask fails, driver will try with 64 bit consistent
+ *     mask provided FW is true 64bit DMA capable
+ *
+ * For older controllers(Thunderbolt and MFI based adapters)-
+ *     driver/FW will operate in 32 bit consistent DMA addresses.
+ */
 static int
-megasas_set_dma_mask(struct pci_dev *pdev)
+megasas_set_dma_mask(struct megasas_instance *instance)
 {
-       /*
-        * All our controllers are capable of performing 64-bit DMA
-        */
+       u64 consistent_mask;
+       struct pci_dev *pdev;
+       u32 scratch_pad_2;
+
+       pdev = instance->pdev;
+       consistent_mask = (instance->adapter_type == VENTURA_SERIES) ?
+                               DMA_BIT_MASK(64) : DMA_BIT_MASK(32);
+
        if (IS_DMA64) {
-               if (pci_set_dma_mask(pdev, DMA_BIT_MASK(64)) != 0) {
+               if (dma_set_mask(&pdev->dev, DMA_BIT_MASK(64)) &&
+                   dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32)))
+                       goto fail_set_dma_mask;
+
+               if ((*pdev->dev.dma_mask == DMA_BIT_MASK(64)) &&
+                   (dma_set_coherent_mask(&pdev->dev, consistent_mask) &&
+                    dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32)))) {
+                       /*
+                        * If 32 bit DMA mask fails, then try for 64 bit mask
+                        * for FW capable of handling 64 bit DMA.
+                        */
+                       scratch_pad_2 = readl
+                               (&instance->reg_set->outbound_scratch_pad_2);
 
-                       if (pci_set_dma_mask(pdev, DMA_BIT_MASK(32)) != 0)
+                       if (!(scratch_pad_2 & MR_CAN_HANDLE_64_BIT_DMA_OFFSET))
+                               goto fail_set_dma_mask;
+                       else if (dma_set_mask_and_coherent(&pdev->dev,
+                                                          DMA_BIT_MASK(64)))
                                goto fail_set_dma_mask;
                }
-       } else {
-               if (pci_set_dma_mask(pdev, DMA_BIT_MASK(32)) != 0)
-                       goto fail_set_dma_mask;
-       }
-       /*
-        * Ensure that all data structures are allocated in 32-bit
-        * memory.
-        */
-       if (pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32)) != 0) {
-               /* Try 32bit DMA mask and 32 bit Consistent dma mask */
-               if (!pci_set_dma_mask(pdev, DMA_BIT_MASK(32))
-                       && !pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32)))
-                       dev_info(&pdev->dev, "set 32bit DMA mask"
-                               "and 32 bit consistent mask\n");
-               else
-                       goto fail_set_dma_mask;
-       }
+       } else if (dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32)))
+               goto fail_set_dma_mask;
+
+       if (pdev->dev.coherent_dma_mask == DMA_BIT_MASK(32))
+               instance->consistent_mask_64bit = false;
+       else
+               instance->consistent_mask_64bit = true;
+
+       dev_info(&pdev->dev, "%s bit DMA mask and %s bit consistent mask\n",
+                ((*pdev->dev.dma_mask == DMA_BIT_MASK(64)) ? "64" : "32"),
+                (instance->consistent_mask_64bit ? "64" : "32"));
 
        return 0;
 
 fail_set_dma_mask:
-       return 1;
+       dev_err(&pdev->dev, "Failed to set DMA mask\n");
+       return -1;
+
 }
 
 /*
 
        pci_set_master(pdev);
 
-       if (megasas_set_dma_mask(pdev))
+       /*
+        * We expect the FW state to be READY
+        */
+       if (megasas_transition_to_ready(instance, 0))
+               goto fail_ready_state;
+
+       if (megasas_set_dma_mask(instance))
                goto fail_set_dma_mask;
 
        /*
 
        atomic_set(&instance->fw_outstanding, 0);
 
-       /*
-        * We expect the FW state to be READY
-        */
-       if (megasas_transition_to_ready(instance, 0))
-               goto fail_ready_state;
-
        /* Now re-enable MSI-X */
        if (instance->msix_vectors) {
                irq_flags = PCI_IRQ_MSIX;
        megasas_free_ctrl_mem(instance);
        scsi_host_put(host);
 
+fail_reenable_msix:
 fail_set_dma_mask:
 fail_ready_state:
-fail_reenable_msix:
 
        pci_disable_device(pdev);
 
                      struct megasas_iocpacket __user * user_ioc,
                      struct megasas_iocpacket *ioc)
 {
-       struct megasas_sge32 *kern_sge32;
+       struct megasas_sge64 *kern_sge64 = NULL;
+       struct megasas_sge32 *kern_sge32 = NULL;
        struct megasas_cmd *cmd;
        void *kbuff_arr[MAX_IOCTL_SGE];
        dma_addr_t buf_handle = 0;
        memcpy(cmd->frame, ioc->frame.raw, 2 * MEGAMFI_FRAME_SIZE);
        cmd->frame->hdr.context = cpu_to_le32(cmd->index);
        cmd->frame->hdr.pad_0 = 0;
-       cmd->frame->hdr.flags &= cpu_to_le16(~(MFI_FRAME_IEEE |
-                                              MFI_FRAME_SGL64 |
+
+       cmd->frame->hdr.flags &= (~MFI_FRAME_IEEE);
+
+       if (instance->consistent_mask_64bit)
+               cmd->frame->hdr.flags |= cpu_to_le16((MFI_FRAME_SGL64 |
+                                      MFI_FRAME_SENSE64));
+       else
+               cmd->frame->hdr.flags &= cpu_to_le16(~(MFI_FRAME_SGL64 |
                                               MFI_FRAME_SENSE64));
 
        if (cmd->frame->hdr.cmd == MFI_CMD_DCMD)
         * kernel buffers in SGLs. The location of SGL is embedded in the
         * struct iocpacket itself.
         */
-       kern_sge32 = (struct megasas_sge32 *)
-           ((unsigned long)cmd->frame + ioc->sgl_off);
+       if (instance->consistent_mask_64bit)
+               kern_sge64 = (struct megasas_sge64 *)
+                       ((unsigned long)cmd->frame + ioc->sgl_off);
+       else
+               kern_sge32 = (struct megasas_sge32 *)
+                       ((unsigned long)cmd->frame + ioc->sgl_off);
 
        /*
         * For each user buffer, create a mirror buffer and copy in
                 * We don't change the dma_coherent_mask, so
                 * pci_alloc_consistent only returns 32bit addresses
                 */
-               kern_sge32[i].phys_addr = cpu_to_le32(buf_handle);
-               kern_sge32[i].length = cpu_to_le32(ioc->sgl[i].iov_len);
+               if (instance->consistent_mask_64bit) {
+                       kern_sge64[i].phys_addr = cpu_to_le64(buf_handle);
+                       kern_sge64[i].length = cpu_to_le32(ioc->sgl[i].iov_len);
+               } else {
+                       kern_sge32[i].phys_addr = cpu_to_le32(buf_handle);
+                       kern_sge32[i].length = cpu_to_le32(ioc->sgl[i].iov_len);
+               }
 
                /*
                 * We created a kernel buffer corresponding to the
 
                sense_ptr =
                (unsigned long *) ((unsigned long)cmd->frame + ioc->sense_off);
-               *sense_ptr = cpu_to_le32(sense_handle);
+               if (instance->consistent_mask_64bit)
+                       *sense_ptr = cpu_to_le64(sense_handle);
+               else
+                       *sense_ptr = cpu_to_le32(sense_handle);
        }
 
        /*
 
        for (i = 0; i < ioc->sge_count; i++) {
                if (kbuff_arr[i]) {
-                       dma_free_coherent(&instance->pdev->dev,
-                                         le32_to_cpu(kern_sge32[i].length),
-                                         kbuff_arr[i],
-                                         le32_to_cpu(kern_sge32[i].phys_addr));
+                       if (instance->consistent_mask_64bit)
+                               dma_free_coherent(&instance->pdev->dev,
+                                       le32_to_cpu(kern_sge64[i].length),
+                                       kbuff_arr[i],
+                                       le64_to_cpu(kern_sge64[i].phys_addr));
+                       else
+                               dma_free_coherent(&instance->pdev->dev,
+                                       le32_to_cpu(kern_sge32[i].length),
+                                       kbuff_arr[i],
+                                       le32_to_cpu(kern_sge32[i].phys_addr));
                        kbuff_arr[i] = NULL;
                }
        }
 
 static inline
 void megasas_configure_queue_sizes(struct megasas_instance *instance);
 
+/**
+ * megasas_check_same_4gb_region -     check if allocation
+ *                                     crosses same 4GB boundary or not
+ * @instance -                         adapter's soft instance
+ * start_addr -                        start address of DMA allocation
+ * size -                              size of allocation in bytes
+ * return -                            true : allocation does not cross same
+ *                                     4GB boundary
+ *                                     false: allocation crosses same
+ *                                     4GB boundary
+ */
+static inline bool megasas_check_same_4gb_region
+       (struct megasas_instance *instance, dma_addr_t start_addr, size_t size)
+{
+       dma_addr_t end_addr;
+
+       end_addr = start_addr + size;
 
+       if (upper_32_bits(start_addr) != upper_32_bits(end_addr)) {
+               dev_err(&instance->pdev->dev,
+                       "Failed to get same 4GB boundary: start_addr: 0x%llx end_addr: 0x%llx\n",
+                       (unsigned long long)start_addr,
+                       (unsigned long long)end_addr);
+               return false;
+       }
+
+       return true;
+}
 
 /**
  * megasas_enable_intr_fusion -        Enables interrupts
        struct fusion_context *fusion = instance->ctrl_context;
        struct megasas_cmd_fusion *cmd;
 
-       /* SG, Sense */
-       for (i = 0; i < instance->max_mpt_cmds; i++) {
-               cmd = fusion->cmd_list[i];
-               if (cmd) {
-                       if (cmd->sg_frame)
-                               dma_pool_free(fusion->sg_dma_pool, cmd->sg_frame,
-                                     cmd->sg_frame_phys_addr);
-                       if (cmd->sense)
-                               dma_pool_free(fusion->sense_dma_pool, cmd->sense,
-                                     cmd->sense_phys_addr);
+       if (fusion->sense)
+               dma_pool_free(fusion->sense_dma_pool, fusion->sense,
+                             fusion->sense_phys_addr);
+
+       /* SG */
+       if (fusion->cmd_list) {
+               for (i = 0; i < instance->max_mpt_cmds; i++) {
+                       cmd = fusion->cmd_list[i];
+                       if (cmd) {
+                               if (cmd->sg_frame)
+                                       dma_pool_free(fusion->sg_dma_pool,
+                                                     cmd->sg_frame,
+                                                     cmd->sg_frame_phys_addr);
+                       }
+                       kfree(cmd);
                }
+               kfree(fusion->cmd_list);
        }
 
        if (fusion->sg_dma_pool) {
                dma_pool_destroy(fusion->io_request_frames_pool);
                fusion->io_request_frames_pool = NULL;
        }
-
-
-       /* cmd_list */
-       for (i = 0; i < instance->max_mpt_cmds; i++)
-               kfree(fusion->cmd_list[i]);
-
-       kfree(fusion->cmd_list);
 }
 
 /**
        u16 max_cmd;
        struct fusion_context *fusion;
        struct megasas_cmd_fusion *cmd;
+       int sense_sz;
+       u32 offset;
 
        fusion = instance->ctrl_context;
        max_cmd = instance->max_fw_cmds;
-
+       sense_sz = instance->max_mpt_cmds * SCSI_SENSE_BUFFERSIZE;
 
        fusion->sg_dma_pool =
                        dma_pool_create("mr_sg", &instance->pdev->dev,
        /* SCSI_SENSE_BUFFERSIZE  = 96 bytes */
        fusion->sense_dma_pool =
                        dma_pool_create("mr_sense", &instance->pdev->dev,
-                               SCSI_SENSE_BUFFERSIZE, 64, 0);
+                               sense_sz, 64, 0);
 
        if (!fusion->sense_dma_pool || !fusion->sg_dma_pool) {
                dev_err(&instance->pdev->dev,
                return -ENOMEM;
        }
 
+       fusion->sense = dma_pool_alloc(fusion->sense_dma_pool,
+                                      GFP_KERNEL, &fusion->sense_phys_addr);
+       if (!fusion->sense) {
+               dev_err(&instance->pdev->dev,
+                       "failed from %s %d\n",  __func__, __LINE__);
+               return -ENOMEM;
+       }
+
+       /* sense buffer, request frame and reply desc pool requires to be in
+        * same 4 gb region. Below function will check this.
+        * In case of failure, new pci pool will be created with updated
+        * alignment.
+        * Older allocation and pool will be destroyed.
+        * Alignment will be used such a way that next allocation if success,
+        * will always meet same 4gb region requirement.
+        * Actual requirement is not alignment, but we need start and end of
+        * DMA address must have same upper 32 bit address.
+        */
+
+       if (!megasas_check_same_4gb_region(instance, fusion->sense_phys_addr,
+                                          sense_sz)) {
+               dma_pool_free(fusion->sense_dma_pool, fusion->sense,
+                             fusion->sense_phys_addr);
+               fusion->sense = NULL;
+               dma_pool_destroy(fusion->sense_dma_pool);
+
+               fusion->sense_dma_pool =
+                       dma_pool_create("mr_sense_align", &instance->pdev->dev,
+                                       sense_sz, roundup_pow_of_two(sense_sz),
+                                       0);
+               if (!fusion->sense_dma_pool) {
+                       dev_err(&instance->pdev->dev,
+                               "Failed from %s %d\n",  __func__, __LINE__);
+                       return -ENOMEM;
+               }
+               fusion->sense = dma_pool_alloc(fusion->sense_dma_pool,
+                                              GFP_KERNEL,
+                                              &fusion->sense_phys_addr);
+               if (!fusion->sense) {
+                       dev_err(&instance->pdev->dev,
+                               "failed from %s %d\n",  __func__, __LINE__);
+                       return -ENOMEM;
+               }
+       }
+
        /*
         * Allocate and attach a frame to each of the commands in cmd_list
         */
                cmd->sg_frame = dma_pool_alloc(fusion->sg_dma_pool,
                                        GFP_KERNEL, &cmd->sg_frame_phys_addr);
 
-               cmd->sense = dma_pool_alloc(fusion->sense_dma_pool,
-                                       GFP_KERNEL, &cmd->sense_phys_addr);
-               if (!cmd->sg_frame || !cmd->sense) {
+               offset = SCSI_SENSE_BUFFERSIZE * i;
+               cmd->sense = (u8 *)fusion->sense + offset;
+               cmd->sense_phys_addr = fusion->sense_phys_addr + offset;
+
+               if (!cmd->sg_frame) {
                        dev_err(&instance->pdev->dev,
                                "Failed from %s %d\n",  __func__, __LINE__);
                        return -ENOMEM;
        /* create sense buffer for the raid 1/10 fp */
        for (i = max_cmd; i < instance->max_mpt_cmds; i++) {
                cmd = fusion->cmd_list[i];
-               cmd->sense = dma_pool_alloc(fusion->sense_dma_pool,
-                       GFP_KERNEL, &cmd->sense_phys_addr);
-               if (!cmd->sense) {
-                       dev_err(&instance->pdev->dev,
-                               "Failed from %s %d\n",  __func__, __LINE__);
-                       return -ENOMEM;
-               }
+               offset = SCSI_SENSE_BUFFERSIZE * i;
+               cmd->sense = (u8 *)fusion->sense + offset;
+               cmd->sense_phys_addr = fusion->sense_phys_addr + offset;
+
        }
 
        return 0;
                }
        }
 
+       if (!megasas_check_same_4gb_region(instance,
+                                          fusion->io_request_frames_phys,
+                                          fusion->io_frames_alloc_sz)) {
+               dma_pool_free(fusion->io_request_frames_pool,
+                             fusion->io_request_frames,
+                             fusion->io_request_frames_phys);
+               fusion->io_request_frames = NULL;
+               dma_pool_destroy(fusion->io_request_frames_pool);
+
+               fusion->io_request_frames_pool =
+                       dma_pool_create("mr_ioreq_align",
+                                       &instance->pdev->dev,
+                                       fusion->io_frames_alloc_sz,
+                                       roundup_pow_of_two(fusion->io_frames_alloc_sz),
+                                       0);
+
+               if (!fusion->io_request_frames_pool) {
+                       dev_err(&instance->pdev->dev,
+                               "Failed from %s %d\n",  __func__, __LINE__);
+                       return -ENOMEM;
+               }
+
+               fusion->io_request_frames =
+                       dma_pool_alloc(fusion->io_request_frames_pool,
+                                      GFP_KERNEL,
+                                      &fusion->io_request_frames_phys);
+
+               if (!fusion->io_request_frames) {
+                       dev_err(&instance->pdev->dev,
+                               "Failed from %s %d\n",  __func__, __LINE__);
+                       return -ENOMEM;
+               }
+       }
+
        fusion->req_frames_desc =
                dma_alloc_coherent(&instance->pdev->dev,
                                   fusion->request_alloc_sz,
                        "Failed from %s %d\n",  __func__, __LINE__);
                return -ENOMEM;
        }
+
+       if (!megasas_check_same_4gb_region(instance,
+                                          fusion->reply_frames_desc_phys[0],
+                                          (fusion->reply_alloc_sz * count))) {
+               dma_pool_free(fusion->reply_frames_desc_pool,
+                             fusion->reply_frames_desc[0],
+                             fusion->reply_frames_desc_phys[0]);
+               fusion->reply_frames_desc[0] = NULL;
+               dma_pool_destroy(fusion->reply_frames_desc_pool);
+
+               fusion->reply_frames_desc_pool =
+                       dma_pool_create("mr_reply_align",
+                                       &instance->pdev->dev,
+                                       fusion->reply_alloc_sz * count,
+                                       roundup_pow_of_two(fusion->reply_alloc_sz * count),
+                                       0);
+
+               if (!fusion->reply_frames_desc_pool) {
+                       dev_err(&instance->pdev->dev,
+                               "Failed from %s %d\n",  __func__, __LINE__);
+                       return -ENOMEM;
+               }
+
+               fusion->reply_frames_desc[0] =
+                       dma_pool_alloc(fusion->reply_frames_desc_pool,
+                                      GFP_KERNEL,
+                                      &fusion->reply_frames_desc_phys[0]);
+
+               if (!fusion->reply_frames_desc[0]) {
+                       dev_err(&instance->pdev->dev,
+                               "Failed from %s %d\n",  __func__, __LINE__);
+                       return -ENOMEM;
+               }
+       }
+
        reply_desc = fusion->reply_frames_desc[0];
        for (i = 0; i < fusion->reply_q_depth * count; i++, reply_desc++)
                reply_desc->Words = cpu_to_le64(ULLONG_MAX);
 int
 megasas_alloc_rdpq_fusion(struct megasas_instance *instance)
 {
-       int i, j, count;
+       int i, j, k, msix_count;
        struct fusion_context *fusion;
        union MPI2_REPLY_DESCRIPTORS_UNION *reply_desc;
+       union MPI2_REPLY_DESCRIPTORS_UNION *rdpq_chunk_virt[RDPQ_MAX_CHUNK_COUNT];
+       dma_addr_t rdpq_chunk_phys[RDPQ_MAX_CHUNK_COUNT];
+       u8 dma_alloc_count, abs_index;
+       u32 chunk_size, array_size, offset;
 
        fusion = instance->ctrl_context;
+       chunk_size = fusion->reply_alloc_sz * RDPQ_MAX_INDEX_IN_ONE_CHUNK;
+       array_size = sizeof(struct MPI2_IOC_INIT_RDPQ_ARRAY_ENTRY) *
+                    MAX_MSIX_QUEUES_FUSION;
 
-       fusion->rdpq_virt = pci_alloc_consistent(instance->pdev,
-                               sizeof(struct MPI2_IOC_INIT_RDPQ_ARRAY_ENTRY) * MAX_MSIX_QUEUES_FUSION,
-                               &fusion->rdpq_phys);
+       fusion->rdpq_virt = pci_alloc_consistent(instance->pdev, array_size,
+                                                &fusion->rdpq_phys);
        if (!fusion->rdpq_virt) {
                dev_err(&instance->pdev->dev,
                        "Failed from %s %d\n",  __func__, __LINE__);
                return -ENOMEM;
        }
 
-       memset(fusion->rdpq_virt, 0,
-                       sizeof(struct MPI2_IOC_INIT_RDPQ_ARRAY_ENTRY) * MAX_MSIX_QUEUES_FUSION);
-       count = instance->msix_vectors > 0 ? instance->msix_vectors : 1;
+       memset(fusion->rdpq_virt, 0, array_size);
+       msix_count = instance->msix_vectors > 0 ? instance->msix_vectors : 1;
+
        fusion->reply_frames_desc_pool = dma_pool_create("mr_rdpq",
                                                         &instance->pdev->dev,
-                                                        fusion->reply_alloc_sz,
-                                                        16, 0);
-
-       if (!fusion->reply_frames_desc_pool) {
+                                                        chunk_size, 16, 0);
+       fusion->reply_frames_desc_pool_align =
+                               dma_pool_create("mr_rdpq_align",
+                                               &instance->pdev->dev,
+                                               chunk_size,
+                                               roundup_pow_of_two(chunk_size),
+                                               0);
+
+       if (!fusion->reply_frames_desc_pool ||
+           !fusion->reply_frames_desc_pool_align) {
                dev_err(&instance->pdev->dev,
                        "Failed from %s %d\n",  __func__, __LINE__);
                return -ENOMEM;
        }
 
-       for (i = 0; i < count; i++) {
-               fusion->reply_frames_desc[i] =
-                               dma_pool_alloc(fusion->reply_frames_desc_pool,
-                                       GFP_KERNEL, &fusion->reply_frames_desc_phys[i]);
-               if (!fusion->reply_frames_desc[i]) {
+/*
+ * For INVADER_SERIES each set of 8 reply queues(0-7, 8-15, ..) and
+ * VENTURA_SERIES each set of 16 reply queues(0-15, 16-31, ..) should be
+ * within 4GB boundary and also reply queues in a set must have same
+ * upper 32-bits in their memory address. so here driver is allocating the
+ * DMA'able memory for reply queues according. Driver uses limitation of
+ * VENTURA_SERIES to manage INVADER_SERIES as well.
+ */
+       dma_alloc_count = DIV_ROUND_UP(msix_count, RDPQ_MAX_INDEX_IN_ONE_CHUNK);
+
+       for (i = 0; i < dma_alloc_count; i++) {
+               rdpq_chunk_virt[i] =
+                       dma_pool_alloc(fusion->reply_frames_desc_pool,
+                                      GFP_KERNEL, &rdpq_chunk_phys[i]);
+               if (!rdpq_chunk_virt[i]) {
                        dev_err(&instance->pdev->dev,
                                "Failed from %s %d\n",  __func__, __LINE__);
                        return -ENOMEM;
                }
+               /* reply desc pool requires to be in same 4 gb region.
+                * Below function will check this.
+                * In case of failure, new pci pool will be created with updated
+                * alignment.
+                * For RDPQ buffers, driver always allocate two separate pci pool.
+                * Alignment will be used such a way that next allocation if
+                * success, will always meet same 4gb region requirement.
+                * rdpq_tracker keep track of each buffer's physical,
+                * virtual address and pci pool descriptor. It will help driver
+                * while freeing the resources.
+                *
+                */
+               if (!megasas_check_same_4gb_region(instance, rdpq_chunk_phys[i],
+                                                  chunk_size)) {
+                       dma_pool_free(fusion->reply_frames_desc_pool,
+                                     rdpq_chunk_virt[i],
+                                     rdpq_chunk_phys[i]);
+
+                       rdpq_chunk_virt[i] =
+                               dma_pool_alloc(fusion->reply_frames_desc_pool_align,
+                                              GFP_KERNEL, &rdpq_chunk_phys[i]);
+                       if (!rdpq_chunk_virt[i]) {
+                               dev_err(&instance->pdev->dev,
+                                       "Failed from %s %d\n",
+                                       __func__, __LINE__);
+                               return -ENOMEM;
+                       }
+                       fusion->rdpq_tracker[i].dma_pool_ptr =
+                                       fusion->reply_frames_desc_pool_align;
+               } else {
+                       fusion->rdpq_tracker[i].dma_pool_ptr =
+                                       fusion->reply_frames_desc_pool;
+               }
 
-               fusion->rdpq_virt[i].RDPQBaseAddress =
-                       cpu_to_le64(fusion->reply_frames_desc_phys[i]);
+               fusion->rdpq_tracker[i].pool_entry_phys = rdpq_chunk_phys[i];
+               fusion->rdpq_tracker[i].pool_entry_virt = rdpq_chunk_virt[i];
+       }
 
-               reply_desc = fusion->reply_frames_desc[i];
-               for (j = 0; j < fusion->reply_q_depth; j++, reply_desc++)
-                       reply_desc->Words = cpu_to_le64(ULLONG_MAX);
+       for (k = 0; k < dma_alloc_count; k++) {
+               for (i = 0; i < RDPQ_MAX_INDEX_IN_ONE_CHUNK; i++) {
+                       abs_index = (k * RDPQ_MAX_INDEX_IN_ONE_CHUNK) + i;
+
+                       if (abs_index == msix_count)
+                               break;
+                       offset = fusion->reply_alloc_sz * i;
+                       fusion->rdpq_virt[abs_index].RDPQBaseAddress =
+                                       cpu_to_le64(rdpq_chunk_phys[k] + offset);
+                       fusion->reply_frames_desc_phys[abs_index] =
+                                       rdpq_chunk_phys[k] + offset;
+                       fusion->reply_frames_desc[abs_index] =
+                                       (union MPI2_REPLY_DESCRIPTORS_UNION *)((u8 *)rdpq_chunk_virt[k] + offset);
+
+                       reply_desc = fusion->reply_frames_desc[abs_index];
+                       for (j = 0; j < fusion->reply_q_depth; j++, reply_desc++)
+                               reply_desc->Words = ULLONG_MAX;
+               }
        }
+
        return 0;
 }
 
 
        fusion = instance->ctrl_context;
 
-       for (i = 0; i < MAX_MSIX_QUEUES_FUSION; i++) {
-               if (fusion->reply_frames_desc[i])
-                       dma_pool_free(fusion->reply_frames_desc_pool,
-                               fusion->reply_frames_desc[i],
-                               fusion->reply_frames_desc_phys[i]);
+       for (i = 0; i < RDPQ_MAX_CHUNK_COUNT; i++) {
+               if (fusion->rdpq_tracker[i].pool_entry_virt)
+                       dma_pool_free(fusion->rdpq_tracker[i].dma_pool_ptr,
+                                     fusion->rdpq_tracker[i].pool_entry_virt,
+                                     fusion->rdpq_tracker[i].pool_entry_phys);
+
        }
 
        if (fusion->reply_frames_desc_pool)
                dma_pool_destroy(fusion->reply_frames_desc_pool);
+       if (fusion->reply_frames_desc_pool_align)
+               dma_pool_destroy(fusion->reply_frames_desc_pool_align);
 
        if (fusion->rdpq_virt)
                pci_free_consistent(instance->pdev,
        u32 scratch_pad_2;
        unsigned long flags;
        struct timeval tv;
+       bool cur_fw_64bit_dma_capable;
 
        fusion = instance->ctrl_context;
 
 
        cur_rdpq_mode = (scratch_pad_2 & MR_RDPQ_MODE_OFFSET) ? 1 : 0;
 
+       if (instance->adapter_type == INVADER_SERIES) {
+               cur_fw_64bit_dma_capable =
+                       (scratch_pad_2 & MR_CAN_HANDLE_64_BIT_DMA_OFFSET) ? true : false;
+
+               if (instance->consistent_mask_64bit && !cur_fw_64bit_dma_capable) {
+                       dev_err(&instance->pdev->dev, "Driver was operating on 64bit "
+                               "DMA mask, but upcoming FW does not support 64bit DMA mask\n");
+                       megaraid_sas_kill_hba(instance);
+                       ret = 1;
+                       goto fail_fw_init;
+               }
+       }
+
        if (instance->is_rdpq && !cur_rdpq_mode) {
                dev_err(&instance->pdev->dev, "Firmware downgrade *NOT SUPPORTED*"
                        " from RDPQ mode to non RDPQ mode\n");
        IOCInitMessage->MsgFlags = instance->is_rdpq ?
                        MPI2_IOCINIT_MSGFLAG_RDPQ_ARRAY_MODE : 0;
        IOCInitMessage->SystemRequestFrameBaseAddress = cpu_to_le64(fusion->io_request_frames_phys);
+       IOCInitMessage->SenseBufferAddressHigh = cpu_to_le32(upper_32_bits(fusion->sense_phys_addr));
        IOCInitMessage->HostMSIxVectors = instance->msix_vectors;
        IOCInitMessage->HostPageSize = MR_DEFAULT_NVME_PAGE_SHIFT;
 
 
        drv_ops->mfi_capabilities.support_qd_throttling = 1;
        drv_ops->mfi_capabilities.support_pd_map_target_id = 1;
+
+       if (instance->consistent_mask_64bit)
+               drv_ops->mfi_capabilities.support_64bit_mode = 1;
+
        /* Convert capability to LE32 */
        cpu_to_le32s((u32 *)&init_frame->driver_operations.mfi_capabilities);
 
                        strlen(sys_info) > 64 ? 64 : strlen(sys_info));
                instance->system_info_buf->systemIdLength =
                        strlen(sys_info) > 64 ? 64 : strlen(sys_info);
-               init_frame->system_info_lo = instance->system_info_h;
-               init_frame->system_info_hi = 0;
+               init_frame->system_info_lo = cpu_to_le32(lower_32_bits(instance->system_info_h));
+               init_frame->system_info_hi = cpu_to_le32(upper_32_bits(instance->system_info_h));
        }
 
        init_frame->queue_info_new_phys_addr_hi =
 
        memset(pd_sync, 0, pd_seq_map_sz);
        memset(dcmd->mbox.b, 0, MFI_MBOX_SIZE);
+
+       if (pend) {
+               dcmd->mbox.b[0] = MEGASAS_DCMD_MBOX_PEND_FLAG;
+               dcmd->flags = MFI_FRAME_DIR_WRITE;
+               instance->jbod_seq_cmd = cmd;
+       } else {
+               dcmd->flags = MFI_FRAME_DIR_READ;
+       }
+
        dcmd->cmd = MFI_CMD_DCMD;
        dcmd->cmd_status = 0xFF;
        dcmd->sge_count = 1;
        dcmd->pad_0 = 0;
        dcmd->data_xfer_len = cpu_to_le32(pd_seq_map_sz);
        dcmd->opcode = cpu_to_le32(MR_DCMD_SYSTEM_PD_MAP_GET_INFO);
-       dcmd->sgl.sge32[0].phys_addr = cpu_to_le32(pd_seq_h);
-       dcmd->sgl.sge32[0].length = cpu_to_le32(pd_seq_map_sz);
+
+       megasas_set_dma_settings(instance, dcmd, pd_seq_h, pd_seq_map_sz);
 
        if (pend) {
-               dcmd->mbox.b[0] = MEGASAS_DCMD_MBOX_PEND_FLAG;
-               dcmd->flags = cpu_to_le16(MFI_FRAME_DIR_WRITE);
-               instance->jbod_seq_cmd = cmd;
                instance->instancet->issue_dcmd(instance, cmd);
                return 0;
        }
 
-       dcmd->flags = cpu_to_le16(MFI_FRAME_DIR_READ);
-
        /* Below code is only for non pended DCMD */
        if (!instance->mask_interrupts)
                ret = megasas_issue_blocked_cmd(instance, cmd,
        dcmd->cmd = MFI_CMD_DCMD;
        dcmd->cmd_status = 0xFF;
        dcmd->sge_count = 1;
-       dcmd->flags = cpu_to_le16(MFI_FRAME_DIR_READ);
+       dcmd->flags = MFI_FRAME_DIR_READ;
        dcmd->timeout = 0;
        dcmd->pad_0 = 0;
        dcmd->data_xfer_len = cpu_to_le32(size_map_info);
        dcmd->opcode = cpu_to_le32(MR_DCMD_LD_MAP_GET_INFO);
-       dcmd->sgl.sge32[0].phys_addr = cpu_to_le32(ci_h);
-       dcmd->sgl.sge32[0].length = cpu_to_le32(size_map_info);
+
+       megasas_set_dma_settings(instance, dcmd, ci_h, size_map_info);
 
        if (!instance->mask_interrupts)
                ret = megasas_issue_blocked_cmd(instance, cmd,
        dcmd->cmd = MFI_CMD_DCMD;
        dcmd->cmd_status = 0xFF;
        dcmd->sge_count = 1;
-       dcmd->flags = cpu_to_le16(MFI_FRAME_DIR_WRITE);
+       dcmd->flags = MFI_FRAME_DIR_WRITE;
        dcmd->timeout = 0;
        dcmd->pad_0 = 0;
        dcmd->data_xfer_len = cpu_to_le32(size_map_info);
        dcmd->mbox.b[0] = num_lds;
        dcmd->mbox.b[1] = MEGASAS_DCMD_MBOX_PEND_FLAG;
        dcmd->opcode = cpu_to_le32(MR_DCMD_LD_MAP_GET_INFO);
-       dcmd->sgl.sge32[0].phys_addr = cpu_to_le32(ci_h);
-       dcmd->sgl.sge32[0].length = cpu_to_le32(size_map_info);
+
+       megasas_set_dma_settings(instance, dcmd, ci_h, size_map_info);
 
        instance->map_update_cmd = cmd;
 
        io_request->SGLOffset0 =
                offsetof(struct MPI2_RAID_SCSI_IO_REQUEST, SGL) / 4;
 
-       io_request->SenseBufferLowAddress = cpu_to_le32(cmd->sense_phys_addr);
+       io_request->SenseBufferLowAddress =
+               cpu_to_le32(lower_32_bits(cmd->sense_phys_addr));
        io_request->SenseBufferLength = SCSI_SENSE_BUFFERSIZE;
 
        cmd->scmd = scp;
               (fusion->max_sge_in_main_msg * sizeof(union MPI2_SGE_IO_UNION)));
        /*sense buffer is different for r1 command*/
        r1_cmd->io_request->SenseBufferLowAddress =
-                       cpu_to_le32(r1_cmd->sense_phys_addr);
+                       cpu_to_le32(lower_32_bits(r1_cmd->sense_phys_addr));
        r1_cmd->scmd = cmd->scmd;
        req_desc2 = megasas_get_request_descriptor(instance,
                                                   (r1_cmd->index - 1));