.set_address_watch = kgd_gfx_aldebaran_set_address_watch,
.clear_address_watch = kgd_gfx_v9_clear_address_watch,
.get_iq_wait_times = kgd_gfx_v9_get_iq_wait_times,
- .build_grace_period_packet_info = kgd_gfx_v9_build_grace_period_packet_info,
+ .build_dequeue_wait_counts_packet_info = kgd_gfx_v9_build_dequeue_wait_counts_packet_info,
.program_trap_handler_settings = kgd_gfx_v9_program_trap_handler_settings,
.hqd_get_pq_addr = kgd_gfx_v9_hqd_get_pq_addr,
.hqd_reset = kgd_gfx_v9_hqd_reset,
.set_address_watch = kgd_gfx_v9_set_address_watch,
.clear_address_watch = kgd_gfx_v9_clear_address_watch,
.get_iq_wait_times = kgd_gfx_v9_get_iq_wait_times,
- .build_grace_period_packet_info = kgd_gfx_v9_build_grace_period_packet_info,
+ .build_dequeue_wait_counts_packet_info = kgd_gfx_v9_build_dequeue_wait_counts_packet_info,
.get_cu_occupancy = kgd_gfx_v9_get_cu_occupancy,
.program_trap_handler_settings = kgd_gfx_v9_program_trap_handler_settings,
.hqd_get_pq_addr = kgd_gfx_v9_hqd_get_pq_addr,
.get_cu_occupancy = kgd_gfx_v9_get_cu_occupancy,
.program_trap_handler_settings =
kgd_gfx_v9_program_trap_handler_settings,
- .build_grace_period_packet_info =
- kgd_gfx_v9_build_grace_period_packet_info,
+ .build_dequeue_wait_counts_packet_info =
+ kgd_gfx_v9_build_dequeue_wait_counts_packet_info,
.get_iq_wait_times = kgd_gfx_v9_get_iq_wait_times,
.enable_debug_trap = kgd_aldebaran_enable_debug_trap,
.disable_debug_trap = kgd_gfx_v9_4_3_disable_debug_trap,
*wait_times = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_IQ_WAIT_TIME2));
}
-void kgd_gfx_v10_build_grace_period_packet_info(struct amdgpu_device *adev,
+void kgd_gfx_v10_build_dequeue_wait_counts_packet_info(struct amdgpu_device *adev,
uint32_t wait_times,
- uint32_t grace_period,
+ uint32_t sch_wave,
+ uint32_t que_sleep,
uint32_t *reg_offset,
uint32_t *reg_data)
{
*reg_data = wait_times;
- /*
- * The CP cannont handle a 0 grace period input and will result in
- * an infinite grace period being set so set to 1 to prevent this.
- */
- if (grace_period == 0)
- grace_period = 1;
-
- *reg_data = REG_SET_FIELD(*reg_data,
- CP_IQ_WAIT_TIME2,
- SCH_WAVE,
- grace_period);
+ if (sch_wave)
+ *reg_data = REG_SET_FIELD(*reg_data,
+ CP_IQ_WAIT_TIME2,
+ SCH_WAVE,
+ sch_wave);
+ if (que_sleep)
+ *reg_data = REG_SET_FIELD(*reg_data,
+ CP_IQ_WAIT_TIME2,
+ QUE_SLEEP,
+ que_sleep);
*reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_IQ_WAIT_TIME2);
}
.set_address_watch = kgd_gfx_v10_set_address_watch,
.clear_address_watch = kgd_gfx_v10_clear_address_watch,
.get_iq_wait_times = kgd_gfx_v10_get_iq_wait_times,
- .build_grace_period_packet_info = kgd_gfx_v10_build_grace_period_packet_info,
+ .build_dequeue_wait_counts_packet_info = kgd_gfx_v10_build_dequeue_wait_counts_packet_info,
.program_trap_handler_settings = program_trap_handler_settings,
.hqd_get_pq_addr = kgd_gfx_v10_hqd_get_pq_addr,
.hqd_reset = kgd_gfx_v10_hqd_reset,
void kgd_gfx_v10_get_iq_wait_times(struct amdgpu_device *adev,
uint32_t *wait_times,
uint32_t inst);
-void kgd_gfx_v10_build_grace_period_packet_info(struct amdgpu_device *adev,
+void kgd_gfx_v10_build_dequeue_wait_counts_packet_info(struct amdgpu_device *adev,
uint32_t wait_times,
- uint32_t grace_period,
+ uint32_t sch_wave,
+ uint32_t que_sleep,
uint32_t *reg_offset,
uint32_t *reg_data);
uint64_t kgd_gfx_v10_hqd_get_pq_addr(struct amdgpu_device *adev,
.set_vm_context_page_table_base = set_vm_context_page_table_base_v10_3,
.program_trap_handler_settings = program_trap_handler_settings_v10_3,
.get_iq_wait_times = kgd_gfx_v10_get_iq_wait_times,
- .build_grace_period_packet_info = kgd_gfx_v10_build_grace_period_packet_info,
+ .build_dequeue_wait_counts_packet_info = kgd_gfx_v10_build_dequeue_wait_counts_packet_info,
.enable_debug_trap = kgd_gfx_v10_enable_debug_trap,
.disable_debug_trap = kgd_gfx_v10_disable_debug_trap,
.validate_trap_override_request = kgd_gfx_v10_validate_trap_override_request,
adev->gfx.cu_info.max_waves_per_simd;
}
-void kgd_gfx_v9_build_grace_period_packet_info(struct amdgpu_device *adev,
+void kgd_gfx_v9_build_dequeue_wait_counts_packet_info(struct amdgpu_device *adev,
uint32_t wait_times,
- uint32_t grace_period,
+ uint32_t sch_wave,
+ uint32_t que_sleep,
uint32_t *reg_offset,
uint32_t *reg_data)
{
*reg_data = wait_times;
- /*
- * The CP cannot handle a 0 grace period input and will result in
- * an infinite grace period being set so set to 1 to prevent this.
- */
- if (grace_period == 0)
- grace_period = 1;
-
- *reg_data = REG_SET_FIELD(*reg_data,
- CP_IQ_WAIT_TIME2,
- SCH_WAVE,
- grace_period);
+ if (sch_wave)
+ *reg_data = REG_SET_FIELD(*reg_data,
+ CP_IQ_WAIT_TIME2,
+ SCH_WAVE,
+ sch_wave);
+ if (que_sleep)
+ *reg_data = REG_SET_FIELD(*reg_data,
+ CP_IQ_WAIT_TIME2,
+ QUE_SLEEP,
+ que_sleep);
*reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_IQ_WAIT_TIME2);
}
.set_address_watch = kgd_gfx_v9_set_address_watch,
.clear_address_watch = kgd_gfx_v9_clear_address_watch,
.get_iq_wait_times = kgd_gfx_v9_get_iq_wait_times,
- .build_grace_period_packet_info = kgd_gfx_v9_build_grace_period_packet_info,
+ .build_dequeue_wait_counts_packet_info = kgd_gfx_v9_build_dequeue_wait_counts_packet_info,
.get_cu_occupancy = kgd_gfx_v9_get_cu_occupancy,
.program_trap_handler_settings = kgd_gfx_v9_program_trap_handler_settings,
.hqd_get_pq_addr = kgd_gfx_v9_hqd_get_pq_addr,
void kgd_gfx_v9_get_iq_wait_times(struct amdgpu_device *adev,
uint32_t *wait_times,
uint32_t inst);
-void kgd_gfx_v9_build_grace_period_packet_info(struct amdgpu_device *adev,
+void kgd_gfx_v9_build_dequeue_wait_counts_packet_info(struct amdgpu_device *adev,
uint32_t wait_times,
- uint32_t grace_period,
+ uint32_t sch_wave,
+ uint32_t que_sleep,
uint32_t *reg_offset,
uint32_t *reg_data);
uint64_t kgd_gfx_v9_hqd_get_pq_addr(struct amdgpu_device *adev,
}
static inline void pm_build_dequeue_wait_counts_packet_info(struct packet_manager *pm,
- uint32_t sch_value, uint32_t *reg_offset,
+ uint32_t sch_value, uint32_t que_sleep, uint32_t *reg_offset,
uint32_t *reg_data)
{
- pm->dqm->dev->kfd2kgd->build_grace_period_packet_info(
+ pm->dqm->dev->kfd2kgd->build_dequeue_wait_counts_packet_info(
pm->dqm->dev->adev,
pm->dqm->wait_times,
sch_value,
+ que_sleep,
reg_offset,
reg_data);
}
uint32_t reg_data = 0;
switch (cmd) {
- case KFD_DEQUEUE_WAIT_INIT:
- /* Set CWSR grace period to 1x1000 cycle for GFX9.4.3 APU */
- if (amdgpu_emu_mode == 0 && pm->dqm->dev->adev->gmc.is_app_apu &&
- (KFD_GC_VERSION(pm->dqm->dev) == IP_VERSION(9, 4, 3)))
- pm_build_dequeue_wait_counts_packet_info(pm, 1, ®_offset, ®_data);
- else
+ case KFD_DEQUEUE_WAIT_INIT: {
+ uint32_t sch_wave = 0, que_sleep = 0;
+ /* Reduce CP_IQ_WAIT_TIME2.QUE_SLEEP to 0x1 from default 0x40.
+ * On a 1GHz machine this is roughly 1 microsecond, which is
+ * about how long it takes to load data out of memory during
+ * queue connect
+ * QUE_SLEEP: Wait Count for Dequeue Retry.
+ */
+ if (KFD_GC_VERSION(pm->dqm->dev) >= IP_VERSION(9, 4, 1) &&
+ KFD_GC_VERSION(pm->dqm->dev) < IP_VERSION(10, 0, 0)) {
+ que_sleep = 1;
+
+ /* Set CWSR grace period to 1x1000 cycle for GFX9.4.3 APU */
+ if (amdgpu_emu_mode == 0 && pm->dqm->dev->adev->gmc.is_app_apu &&
+ (KFD_GC_VERSION(pm->dqm->dev) == IP_VERSION(9, 4, 3)))
+ sch_wave = 1;
+ } else {
return 0;
+ }
+ pm_build_dequeue_wait_counts_packet_info(pm, sch_wave, que_sleep,
+ ®_offset, ®_data);
+
break;
+ }
case KFD_DEQUEUE_WAIT_RESET:
- /* function called only to get reg_offset */
- pm_build_dequeue_wait_counts_packet_info(pm, 0, ®_offset, ®_data);
- reg_data = pm->dqm->wait_times;
+ /* reg_data would be set to dqm->wait_times */
+ pm_build_dequeue_wait_counts_packet_info(pm, 0, 0, ®_offset, ®_data);
break;
case KFD_DEQUEUE_WAIT_SET_SCH_WAVE:
/* The CP cannot handle value 0 and it will result in
- * an infinite grace period being set so set to 1 to prevent this.
+ * an infinite grace period being set so set to 1 to prevent this. Also
+ * avoid debugger API breakage as it sets 0 and expects a low value.
*/
if (!value)
value = 1;
- pm_build_dequeue_wait_counts_packet_info(pm, value, ®_offset, ®_data);
+ pm_build_dequeue_wait_counts_packet_info(pm, value, 0, ®_offset, ®_data);
break;
default:
pr_err("Invalid dequeue wait cmd\n");
void (*get_iq_wait_times)(struct amdgpu_device *adev,
uint32_t *wait_times,
uint32_t inst);
- void (*build_grace_period_packet_info)(struct amdgpu_device *adev,
+ void (*build_dequeue_wait_counts_packet_info)(struct amdgpu_device *adev,
uint32_t wait_times,
- uint32_t grace_period,
+ uint32_t sch_wave,
+ uint32_t que_sleep,
uint32_t *reg_offset,
uint32_t *reg_data);
void (*get_cu_occupancy)(struct amdgpu_device *adev,