static int __sbi_send_ipi_v02(const struct cpumask *cpu_mask)
 {
-       unsigned long hartid, cpuid, hmask = 0, hbase = 0;
+       unsigned long hartid, cpuid, hmask = 0, hbase = 0, htop = 0;
        struct sbiret ret = {0};
        int result;
 
 
        for_each_cpu(cpuid, cpu_mask) {
                hartid = cpuid_to_hartid_map(cpuid);
-               if (hmask && ((hbase + BITS_PER_LONG) <= hartid)) {
-                       ret = sbi_ecall(SBI_EXT_IPI, SBI_EXT_IPI_SEND_IPI,
-                                       hmask, hbase, 0, 0, 0, 0);
-                       if (ret.error)
-                               goto ecall_failed;
-                       hmask = 0;
-                       hbase = 0;
+               if (hmask) {
+                       if (hartid + BITS_PER_LONG <= htop ||
+                           hbase + BITS_PER_LONG <= hartid) {
+                               ret = sbi_ecall(SBI_EXT_IPI,
+                                               SBI_EXT_IPI_SEND_IPI, hmask,
+                                               hbase, 0, 0, 0, 0);
+                               if (ret.error)
+                                       goto ecall_failed;
+                               hmask = 0;
+                       } else if (hartid < hbase) {
+                               /* shift the mask to fit lower hartid */
+                               hmask <<= hbase - hartid;
+                               hbase = hartid;
+                       }
                }
-               if (!hmask)
+               if (!hmask) {
                        hbase = hartid;
+                       htop = hartid;
+               } else if (hartid > htop) {
+                       htop = hartid;
+               }
                hmask |= BIT(hartid - hbase);
        }
 
                            unsigned long start, unsigned long size,
                            unsigned long arg4, unsigned long arg5)
 {
-       unsigned long hartid, cpuid, hmask = 0, hbase = 0;
+       unsigned long hartid, cpuid, hmask = 0, hbase = 0, htop = 0;
        int result;
 
        if (!cpu_mask || cpumask_empty(cpu_mask))
 
        for_each_cpu(cpuid, cpu_mask) {
                hartid = cpuid_to_hartid_map(cpuid);
-               if (hmask && ((hbase + BITS_PER_LONG) <= hartid)) {
-                       result = __sbi_rfence_v02_call(fid, hmask, hbase,
-                                                      start, size, arg4, arg5);
-                       if (result)
-                               return result;
-                       hmask = 0;
-                       hbase = 0;
+               if (hmask) {
+                       if (hartid + BITS_PER_LONG <= htop ||
+                           hbase + BITS_PER_LONG <= hartid) {
+                               result = __sbi_rfence_v02_call(fid, hmask,
+                                               hbase, start, size, arg4, arg5);
+                               if (result)
+                                       return result;
+                               hmask = 0;
+                       } else if (hartid < hbase) {
+                               /* shift the mask to fit lower hartid */
+                               hmask <<= hbase - hartid;
+                               hbase = hartid;
+                       }
                }
-               if (!hmask)
+               if (!hmask) {
                        hbase = hartid;
+                       htop = hartid;
+               } else if (hartid > htop) {
+                       htop = hartid;
+               }
                hmask |= BIT(hartid - hbase);
        }