int dma_mask;
 
        if (ioc->is_mcpu_endpoint ||
-           sizeof(dma_addr_t) == 4 ||
+           sizeof(dma_addr_t) == 4 || ioc->use_32bit_dma ||
            dma_get_required_mask(&pdev->dev) <= 32)
                dma_mask = 32;
        /* Set 63 bit DMA mask for all SAS3 and SAS35 controllers */
 {
        int i = 0;
        int j = 0;
+       int dma_alloc_count = 0;
        struct chain_tracker *ct;
-       struct reply_post_struct *rps;
 
        dexitprintk(ioc, ioc_info(ioc, "%s\n", __func__));
 
        }
 
        if (ioc->reply_post) {
-               do {
-                       rps = &ioc->reply_post[i];
-                       if (rps->reply_post_free) {
-                               dma_pool_free(
-                                   ioc->reply_post_free_dma_pool,
-                                   rps->reply_post_free,
-                                   rps->reply_post_free_dma);
-                               dexitprintk(ioc,
-                                           ioc_info(ioc, "reply_post_free_pool(0x%p): free\n",
-                                                    rps->reply_post_free));
-                               rps->reply_post_free = NULL;
+               dma_alloc_count = DIV_ROUND_UP(ioc->reply_queue_count,
+                               RDPQ_MAX_INDEX_IN_ONE_CHUNK);
+               for (i = 0; i < ioc->reply_queue_count; i++) {
+                       if (i % RDPQ_MAX_INDEX_IN_ONE_CHUNK == 0
+                           && dma_alloc_count) {
+                               if (ioc->reply_post[i].reply_post_free) {
+                                       dma_pool_free(
+                                           ioc->reply_post_free_dma_pool,
+                                           ioc->reply_post[i].reply_post_free,
+                                       ioc->reply_post[i].reply_post_free_dma);
+                                       dexitprintk(ioc, ioc_info(ioc,
+                                          "reply_post_free_pool(0x%p): free\n",
+                                          ioc->reply_post[i].reply_post_free));
+                                       ioc->reply_post[i].reply_post_free =
+                                                                       NULL;
+                               }
+                               --dma_alloc_count;
                        }
-               } while (ioc->rdpq_array_enable &&
-                          (++i < ioc->reply_queue_count));
+               }
+               dma_pool_destroy(ioc->reply_post_free_dma_pool);
                if (ioc->reply_post_free_array &&
                        ioc->rdpq_array_enable) {
                        dma_pool_free(ioc->reply_post_free_array_dma_pool,
-                               ioc->reply_post_free_array,
-                               ioc->reply_post_free_array_dma);
+                           ioc->reply_post_free_array,
+                           ioc->reply_post_free_array_dma);
                        ioc->reply_post_free_array = NULL;
                }
                dma_pool_destroy(ioc->reply_post_free_array_dma_pool);
-               dma_pool_destroy(ioc->reply_post_free_dma_pool);
                kfree(ioc->reply_post);
        }
 
 static int
 base_alloc_rdpq_dma_pool(struct MPT3SAS_ADAPTER *ioc, int sz)
 {
-       int i;
+       int i = 0;
+       u32 dma_alloc_count = 0;
+       int reply_post_free_sz = ioc->reply_post_queue_depth *
+               sizeof(Mpi2DefaultReplyDescriptor_t);
        int count = ioc->rdpq_array_enable ? ioc->reply_queue_count : 1;
 
        ioc->reply_post = kcalloc(count, sizeof(struct reply_post_struct),
                        GFP_KERNEL);
        if (!ioc->reply_post)
                return -ENOMEM;
+       /*
+        *  For INVADER_SERIES each set of 8 reply queues(0-7, 8-15, ..) and
+        *  VENTURA_SERIES each set of 16 reply queues(0-15, 16-31, ..) should
+        *  be within 4GB boundary i.e reply queues in a set must have same
+        *  upper 32-bits in their memory address. so here driver is allocating
+        *  the DMA'able memory for reply queues according.
+        *  Driver uses limitation of
+        *  VENTURA_SERIES to manage INVADER_SERIES as well.
+        */
+       dma_alloc_count = DIV_ROUND_UP(ioc->reply_queue_count,
+                               RDPQ_MAX_INDEX_IN_ONE_CHUNK);
        ioc->reply_post_free_dma_pool =
-           dma_pool_create("reply_post_free pool",
-           &ioc->pdev->dev, sz, 16, 0);
+               dma_pool_create("reply_post_free pool",
+                   &ioc->pdev->dev, sz, 16, 0);
        if (!ioc->reply_post_free_dma_pool)
                return -ENOMEM;
-       i = 0;
-       do {
-               ioc->reply_post[i].reply_post_free =
-                   dma_pool_zalloc(ioc->reply_post_free_dma_pool,
-                   GFP_KERNEL,
-                   &ioc->reply_post[i].reply_post_free_dma);
-               if (!ioc->reply_post[i].reply_post_free)
-                       return -ENOMEM;
-               dinitprintk(ioc,
-                       ioc_info(ioc, "reply post free pool (0x%p): depth(%d),"
-                           "element_size(%d), pool_size(%d kB)\n",
-                           ioc->reply_post[i].reply_post_free,
-                           ioc->reply_post_queue_depth, 8, sz / 1024));
-               dinitprintk(ioc,
-                       ioc_info(ioc, "reply_post_free_dma = (0x%llx)\n",
-                           (u64)ioc->reply_post[i].reply_post_free_dma));
+       for (i = 0; i < ioc->reply_queue_count; i++) {
+               if ((i % RDPQ_MAX_INDEX_IN_ONE_CHUNK == 0) && dma_alloc_count) {
+                       ioc->reply_post[i].reply_post_free =
+                           dma_pool_alloc(ioc->reply_post_free_dma_pool,
+                               GFP_KERNEL,
+                               &ioc->reply_post[i].reply_post_free_dma);
+                       if (!ioc->reply_post[i].reply_post_free)
+                               return -ENOMEM;
+                       /*
+                        * Each set of RDPQ pool must satisfy 4gb boundary
+                        * restriction.
+                        * 1) Check if allocated resources for RDPQ pool are in
+                        *      the same 4GB range.
+                        * 2) If #1 is true, continue with 64 bit DMA.
+                        * 3) If #1 is false, return 1. which means free all the
+                        * resources and set DMA mask to 32 and allocate.
+                        */
+                       if (!mpt3sas_check_same_4gb_region(
+                               (long)ioc->reply_post[i].reply_post_free, sz)) {
+                               dinitprintk(ioc,
+                                   ioc_err(ioc, "bad Replypost free pool(0x%p)"
+                                   "reply_post_free_dma = (0x%llx)\n",
+                                   ioc->reply_post[i].reply_post_free,
+                                   (unsigned long long)
+                                   ioc->reply_post[i].reply_post_free_dma));
+                               return -EAGAIN;
+                       }
+                       memset(ioc->reply_post[i].reply_post_free, 0,
+                                               RDPQ_MAX_INDEX_IN_ONE_CHUNK *
+                                               reply_post_free_sz);
+                       dma_alloc_count--;
 
-       } while (ioc->rdpq_array_enable && ++i < ioc->reply_queue_count);
+               } else {
+                       ioc->reply_post[i].reply_post_free =
+                           (Mpi2ReplyDescriptorsUnion_t *)
+                           ((long)ioc->reply_post[i-1].reply_post_free
+                           + reply_post_free_sz);
+                       ioc->reply_post[i].reply_post_free_dma =
+                           (dma_addr_t)
+                           (ioc->reply_post[i-1].reply_post_free_dma +
+                           reply_post_free_sz);
+               }
+       }
        return 0;
 }
 
        u16 chains_needed_per_io;
        u32 sz, total_sz, reply_post_free_sz, reply_post_free_array_sz;
        u32 retry_sz;
+       u32 rdpq_sz = 0;
        u16 max_request_credit, nvme_blocks_needed;
        unsigned short sg_tablesize;
        u16 sge_size;
        int i, j;
+       int ret = 0;
        struct chain_tracker *ct;
 
        dinitprintk(ioc, ioc_info(ioc, "%s\n", __func__));
        /* reply post queue, 16 byte align */
        reply_post_free_sz = ioc->reply_post_queue_depth *
            sizeof(Mpi2DefaultReplyDescriptor_t);
-
-       sz = reply_post_free_sz;
+       rdpq_sz = reply_post_free_sz * RDPQ_MAX_INDEX_IN_ONE_CHUNK;
        if (_base_is_controller_msix_enabled(ioc) && !ioc->rdpq_array_enable)
-               sz *= ioc->reply_queue_count;
-       if (base_alloc_rdpq_dma_pool(ioc, sz))
-               goto out;
-       total_sz += sz * (!ioc->rdpq_array_enable ? 1 : ioc->reply_queue_count);
-
+               rdpq_sz = reply_post_free_sz * ioc->reply_queue_count;
+       ret = base_alloc_rdpq_dma_pool(ioc, rdpq_sz);
+       if (ret == -EAGAIN) {
+               /*
+                * Free allocated bad RDPQ memory pools.
+                * Change dma coherent mask to 32 bit and reallocate RDPQ
+                */
+               _base_release_memory_pools(ioc);
+               ioc->use_32bit_dma = true;
+               if (_base_config_dma_addressing(ioc, ioc->pdev) != 0) {
+                       ioc_err(ioc,
+                           "32 DMA mask failed %s\n", pci_name(ioc->pdev));
+                       return -ENODEV;
+               }
+               if (base_alloc_rdpq_dma_pool(ioc, rdpq_sz))
+                       return -ENOMEM;
+       } else if (ret == -ENOMEM)
+               return -ENOMEM;
+       total_sz = rdpq_sz * (!ioc->rdpq_array_enable ? 1 :
+           DIV_ROUND_UP(ioc->reply_queue_count, RDPQ_MAX_INDEX_IN_ONE_CHUNK));
        ioc->scsiio_depth = ioc->hba_queue_depth -
            ioc->hi_priority_depth - ioc->internal_depth;
 
                    ioc_info(ioc, "scsi host: can_queue depth (%d)\n",
                             ioc->shost->can_queue));
 
-
        /* contiguous pool for request and chains, 16 byte align, one extra "
         * "frame for smid=0
         */
        ioc->smp_affinity_enable = smp_affinity_enable;
 
        ioc->rdpq_array_enable_assigned = 0;
+       ioc->use_32bit_dma = 0;
        if (ioc->is_aero_ioc)
                ioc->base_readl = &_base_readl_aero;
        else