From ba757a65d2a28d46a8ccf50538f4f05036983f1b Mon Sep 17 00:00:00 2001 From: Satyanarayana K V P Date: Mon, 24 Feb 2025 15:58:07 +0530 Subject: [PATCH] drm/xe/vf: Retry sending MMIO request to GUC on timeout error MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Add support to allow retrying the sending of MMIO requests from the VF to the GUC in the event of an error. During the suspend/resume process, VFs begin resuming only after the PF has resumed. Although the PF resumes, the GUC reset and provisioning occur later in a separate worker process. When there are a large number of VFs, some may attempt to resume before the PF has completed its provisioning. Therefore, if a MMIO request from a VF fails during this period, we will retry sending the request up to GUC_RESET_VF_STATE_RETRY_MAX times, which is set to a maximum of 10 attempts. Signed-off-by: Satyanarayana K V P Cc: Michał Wajdeczko Cc: Michał Winiarski Cc: Piotr Piórkowski Reviewed-by: Piotr Piorkowski Signed-off-by: Michal Wajdeczko Link: https://patchwork.freedesktop.org/patch/msgid/20250224102807.11065-3-satyanarayana.k.v.p@intel.com --- drivers/gpu/drm/xe/xe_gt_sriov_vf.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_vf.c b/drivers/gpu/drm/xe/xe_gt_sriov_vf.c index 4831549da319..a439261bf4d7 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_vf.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_vf.c @@ -47,12 +47,19 @@ static int guc_action_vf_reset(struct xe_guc *guc) return ret > 0 ? -EPROTO : ret; } +#define GUC_RESET_VF_STATE_RETRY_MAX 10 static int vf_reset_guc_state(struct xe_gt *gt) { + unsigned int retry = GUC_RESET_VF_STATE_RETRY_MAX; struct xe_guc *guc = >->uc.guc; int err; - err = guc_action_vf_reset(guc); + do { + err = guc_action_vf_reset(guc); + if (!err || err != -ETIMEDOUT) + break; + } while (--retry); + if (unlikely(err)) xe_gt_sriov_err(gt, "Failed to reset GuC state (%pe)\n", ERR_PTR(err)); return err; -- 2.50.1