When a poison is consumed on the guest before the guest receives the host's poison creation msg, a corner case may occur to have poison_handler complete processing earlier than it should to cause the guest to hang waiting for the req_bad_pages reply during a VF FLR, resulting in the VM becoming inaccessible in stress tests.
To fix this issue, this patch refactored the mailbox sequence by seperating the bad_page_work into two parts req_bad_pages_work and handle_bad_pages_work.
Old sequence:
1.Stop data exchange work
2.Guest sends MB_REQ_RAS_BAD_PAGES to host and keep polling for IDH_RAS_BAD_PAGES_READY
3.If the IDH_RAS_BAD_PAGES_READY arrives within timeout limit, re-init the data exchange region for updated bad page info
else timeout with error message
New sequence:
req_bad_pages_work:
1.Stop data exhange work
2.Guest sends MB_REQ_RAS_BAD_PAGES to host
Once Guest receives IDH_RAS_BAD_PAGES_READY event
handle_bad_pages_work:
3.re-init the data exchange region for updated bad page info
Signed-off-by: Chenglei Xie <Chenglei.Xie@amd.com>
Reviewed-by: Shravan Kumar Gande <Shravankumar.Gande@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
struct amdgpu_irq_src rcv_irq;
struct work_struct flr_work;
- struct work_struct bad_pages_work;
+ struct work_struct req_bad_pages_work;
+ struct work_struct handle_bad_pages_work;
struct amdgpu_mm_table mm_table;
const struct amdgpu_virt_ops *ops;
}
}
-static void xgpu_ai_mailbox_bad_pages_work(struct work_struct *work)
+static void xgpu_ai_mailbox_req_bad_pages_work(struct work_struct *work)
{
- struct amdgpu_virt *virt = container_of(work, struct amdgpu_virt, bad_pages_work);
+ struct amdgpu_virt *virt = container_of(work, struct amdgpu_virt, req_bad_pages_work);
struct amdgpu_device *adev = container_of(virt, struct amdgpu_device, virt);
if (down_read_trylock(&adev->reset_domain->sem)) {
amdgpu_virt_fini_data_exchange(adev);
amdgpu_virt_request_bad_pages(adev);
+ up_read(&adev->reset_domain->sem);
+ }
+}
+
+/**
+ * xgpu_ai_mailbox_handle_bad_pages_work - Reinitialize the data exchange region to get fresh bad page information
+ * @work: pointer to the work_struct
+ *
+ * This work handler is triggered when bad pages are ready, and it reinitializes
+ * the data exchange region to retrieve updated bad page information from the host.
+ */
+static void xgpu_ai_mailbox_handle_bad_pages_work(struct work_struct *work)
+{
+ struct amdgpu_virt *virt = container_of(work, struct amdgpu_virt, handle_bad_pages_work);
+ struct amdgpu_device *adev = container_of(virt, struct amdgpu_device, virt);
+
+ if (down_read_trylock(&adev->reset_domain->sem)) {
+ amdgpu_virt_fini_data_exchange(adev);
amdgpu_virt_init_data_exchange(adev);
up_read(&adev->reset_domain->sem);
}
struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
switch (event) {
+ case IDH_RAS_BAD_PAGES_READY:
+ xgpu_ai_mailbox_send_ack(adev);
+ if (amdgpu_sriov_runtime(adev))
+ schedule_work(&adev->virt.handle_bad_pages_work);
+ break;
case IDH_RAS_BAD_PAGES_NOTIFICATION:
xgpu_ai_mailbox_send_ack(adev);
if (amdgpu_sriov_runtime(adev))
- schedule_work(&adev->virt.bad_pages_work);
+ schedule_work(&adev->virt.req_bad_pages_work);
break;
case IDH_UNRECOV_ERR_NOTIFICATION:
xgpu_ai_mailbox_send_ack(adev);
}
INIT_WORK(&adev->virt.flr_work, xgpu_ai_mailbox_flr_work);
- INIT_WORK(&adev->virt.bad_pages_work, xgpu_ai_mailbox_bad_pages_work);
+ INIT_WORK(&adev->virt.req_bad_pages_work, xgpu_ai_mailbox_req_bad_pages_work);
+ INIT_WORK(&adev->virt.handle_bad_pages_work, xgpu_ai_mailbox_handle_bad_pages_work);
return 0;
}
case IDH_REQ_RAS_CPER_DUMP:
event = IDH_RAS_CPER_DUMP_READY;
break;
- case IDH_REQ_RAS_BAD_PAGES:
- event = IDH_RAS_BAD_PAGES_READY;
- break;
default:
break;
}
}
}
-static void xgpu_nv_mailbox_bad_pages_work(struct work_struct *work)
+static void xgpu_nv_mailbox_req_bad_pages_work(struct work_struct *work)
{
- struct amdgpu_virt *virt = container_of(work, struct amdgpu_virt, bad_pages_work);
+ struct amdgpu_virt *virt = container_of(work, struct amdgpu_virt, req_bad_pages_work);
struct amdgpu_device *adev = container_of(virt, struct amdgpu_device, virt);
if (down_read_trylock(&adev->reset_domain->sem)) {
amdgpu_virt_fini_data_exchange(adev);
amdgpu_virt_request_bad_pages(adev);
+ up_read(&adev->reset_domain->sem);
+ }
+}
+
+/**
+ * xgpu_nv_mailbox_handle_bad_pages_work - Reinitialize the data exchange region to get fresh bad page information
+ * @work: pointer to the work_struct
+ *
+ * This work handler is triggered when bad pages are ready, and it reinitializes
+ * the data exchange region to retrieve updated bad page information from the host.
+ */
+static void xgpu_nv_mailbox_handle_bad_pages_work(struct work_struct *work)
+{
+ struct amdgpu_virt *virt = container_of(work, struct amdgpu_virt, handle_bad_pages_work);
+ struct amdgpu_device *adev = container_of(virt, struct amdgpu_device, virt);
+
+ if (down_read_trylock(&adev->reset_domain->sem)) {
+ amdgpu_virt_fini_data_exchange(adev);
amdgpu_virt_init_data_exchange(adev);
up_read(&adev->reset_domain->sem);
}
struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
switch (event) {
+ case IDH_RAS_BAD_PAGES_READY:
+ xgpu_nv_mailbox_send_ack(adev);
+ if (amdgpu_sriov_runtime(adev))
+ schedule_work(&adev->virt.handle_bad_pages_work);
+ break;
case IDH_RAS_BAD_PAGES_NOTIFICATION:
xgpu_nv_mailbox_send_ack(adev);
if (amdgpu_sriov_runtime(adev))
- schedule_work(&adev->virt.bad_pages_work);
+ schedule_work(&adev->virt.req_bad_pages_work);
break;
case IDH_UNRECOV_ERR_NOTIFICATION:
xgpu_nv_mailbox_send_ack(adev);
}
INIT_WORK(&adev->virt.flr_work, xgpu_nv_mailbox_flr_work);
- INIT_WORK(&adev->virt.bad_pages_work, xgpu_nv_mailbox_bad_pages_work);
+ INIT_WORK(&adev->virt.req_bad_pages_work, xgpu_nv_mailbox_req_bad_pages_work);
+ INIT_WORK(&adev->virt.handle_bad_pages_work, xgpu_nv_mailbox_handle_bad_pages_work);
return 0;
}
void soc15_set_virt_ops(struct amdgpu_device *adev)
{
adev->virt.ops = &xgpu_ai_virt_ops;
-
/* init soc15 reg base early enough so we can
* request request full access for sriov before
* set_ip_blocks. */