drm/xe/guc: Set upper limit of H2G retries over CTB

author Michal Wajdeczko <michal.wajdeczko@intel.com>

Wed, 3 Sep 2025 22:33:30 +0000 (00:33 +0200)

committer Michal Wajdeczko <michal.wajdeczko@intel.com>

Thu, 4 Sep 2025 20:24:51 +0000 (22:24 +0200)
author Michal Wajdeczko <michal.wajdeczko@intel.com>
Wed, 3 Sep 2025 22:33:30 +0000 (00:33 +0200)
committer Michal Wajdeczko <michal.wajdeczko@intel.com>
Thu, 4 Sep 2025 20:24:51 +0000 (22:24 +0200)
diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c

index e431ff73227cc34d1dfc179f238b4cfb100d4e81..f40543b040d6a8e2e57b16852f4affc96f850251 100644 (file)
--- a/drivers/gpu/drm/xe/xe_guc_ct.c
+++ b/drivers/gpu/drm/xe/xe_guc_ct.c
@@ -1079,11 +1079,15 @@ static bool retry_failure(struct xe_guc_ct *ct, int ret)
         return true;
  }
  
+#define GUC_SEND_RETRY_LIMIT   50
+#define GUC_SEND_RETRY_MSLEEP  5
+
  static int guc_ct_send_recv(struct xe_guc_ct *ct, const u32 *action, u32 len,
                             u32 *response_buffer, bool no_fail)
  {
         struct xe_gt *gt = ct_to_gt(ct);
         struct g2h_fence g2h_fence;
+       unsigned int retries = 0;
         int ret = 0;
  
         /*
@@ -1148,6 +1152,12 @@ retry_same_fence:
                 xe_gt_dbg(gt, "H2G action %#x retrying: reason %#x\n",
                           action[0], g2h_fence.reason);
                 mutex_unlock(&ct->lock);
+               if (++retries > GUC_SEND_RETRY_LIMIT) {
+                       xe_gt_err(gt, "H2G action %#x reached retry limit=%u, aborting\n",
+                                 action[0], GUC_SEND_RETRY_LIMIT);
+                       return -ELOOP;
+               }
+               msleep(GUC_SEND_RETRY_MSLEEP * retries);
                 goto retry;
         }
         if (g2h_fence.fail) {
author	Michal Wajdeczko <michal.wajdeczko@intel.com>
	Wed, 3 Sep 2025 22:33:30 +0000 (00:33 +0200)
committer	Michal Wajdeczko <michal.wajdeczko@intel.com>
	Thu, 4 Sep 2025 20:24:51 +0000 (22:24 +0200)