mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
        mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
        mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
+       mqd->compute_static_thread_mgmt_se4 = 0xffffffff;
+       mqd->compute_static_thread_mgmt_se5 = 0xffffffff;
+       mqd->compute_static_thread_mgmt_se6 = 0xffffffff;
+       mqd->compute_static_thread_mgmt_se7 = 0xffffffff;
        mqd->compute_misc_reserved = 0x00000003;
 
        mqd->dynamic_cu_mask_addr_lo =
 
                uint32_t *se_mask)
 {
        struct kfd_cu_info cu_info;
-       uint32_t cu_per_sh[4] = {0};
-       int i, se, cu = 0;
+       uint32_t cu_per_se[KFD_MAX_NUM_SE] = {0};
+       int i, se, sh, cu = 0;
 
        amdgpu_amdkfd_get_cu_info(mm->dev->kgd, &cu_info);
 
                cu_mask_count = cu_info.cu_active_number;
 
        for (se = 0; se < cu_info.num_shader_engines; se++)
-               for (i = 0; i < 4; i++)
-                       cu_per_sh[se] += hweight32(cu_info.cu_bitmap[se][i]);
+               for (sh = 0; sh < cu_info.num_shader_arrays_per_engine; sh++)
+                       cu_per_se[se] += hweight32(cu_info.cu_bitmap[se % 4][sh + (se / 4)]);
 
        /* Symmetrically map cu_mask to all SEs:
         * cu_mask[0] bit0 -> se_mask[0] bit0;
                                se = 0;
                                cu++;
                        }
-               } while (cu >= cu_per_sh[se] && cu < 32);
+               } while (cu >= cu_per_se[se] && cu < 32);
        }
 }
 
                        struct queue_properties *q)
 {
        struct v9_mqd *m;
-       uint32_t se_mask[4] = {0}; /* 4 is the max # of SEs */
+       uint32_t se_mask[KFD_MAX_NUM_SE] = {0};
 
        if (q->cu_mask_count == 0)
                return;
        m->compute_static_thread_mgmt_se1 = se_mask[1];
        m->compute_static_thread_mgmt_se2 = se_mask[2];
        m->compute_static_thread_mgmt_se3 = se_mask[3];
+       m->compute_static_thread_mgmt_se4 = se_mask[4];
+       m->compute_static_thread_mgmt_se5 = se_mask[5];
+       m->compute_static_thread_mgmt_se6 = se_mask[6];
+       m->compute_static_thread_mgmt_se7 = se_mask[7];
 
-       pr_debug("update cu mask to %#x %#x %#x %#x\n",
+       pr_debug("update cu mask to %#x %#x %#x %#x %#x %#x %#x %#x\n",
                m->compute_static_thread_mgmt_se0,
                m->compute_static_thread_mgmt_se1,
                m->compute_static_thread_mgmt_se2,
-               m->compute_static_thread_mgmt_se3);
+               m->compute_static_thread_mgmt_se3,
+               m->compute_static_thread_mgmt_se4,
+               m->compute_static_thread_mgmt_se5,
+               m->compute_static_thread_mgmt_se6,
+               m->compute_static_thread_mgmt_se7);
 }
 
 static void set_priority(struct v9_mqd *m, struct queue_properties *q)
        m->compute_static_thread_mgmt_se1 = 0xFFFFFFFF;
        m->compute_static_thread_mgmt_se2 = 0xFFFFFFFF;
        m->compute_static_thread_mgmt_se3 = 0xFFFFFFFF;
+       m->compute_static_thread_mgmt_se4 = 0xFFFFFFFF;
+       m->compute_static_thread_mgmt_se5 = 0xFFFFFFFF;
+       m->compute_static_thread_mgmt_se6 = 0xFFFFFFFF;
+       m->compute_static_thread_mgmt_se7 = 0xFFFFFFFF;
 
        m->cp_hqd_persistent_state = CP_HQD_PERSISTENT_STATE__PRELOAD_REQ_MASK |
                        0x53 << CP_HQD_PERSISTENT_STATE__PRELOAD_SIZE__SHIFT;
 
        uint32_t compute_wave_restore_addr_lo;
        uint32_t compute_wave_restore_addr_hi;
        uint32_t compute_wave_restore_control;
-       uint32_t reserved_39;
-       uint32_t reserved_40;
-       uint32_t reserved_41;
-       uint32_t reserved_42;
+       uint32_t compute_static_thread_mgmt_se4;
+       uint32_t compute_static_thread_mgmt_se5;
+       uint32_t compute_static_thread_mgmt_se6;
+       uint32_t compute_static_thread_mgmt_se7;
        uint32_t reserved_43;
        uint32_t reserved_44;
        uint32_t reserved_45;