]> www.infradead.org Git - users/hch/block.git/commitdiff
drm/i915/guc: Track all sent actions to GuC
authorMichal Wajdeczko <michal.wajdeczko@intel.com>
Fri, 26 May 2023 23:55:38 +0000 (16:55 -0700)
committerJohn Harrison <John.C.Harrison@Intel.com>
Tue, 30 May 2023 22:18:21 +0000 (15:18 -0700)
For easier debug of any unexpected error responses from GuC that
might be related to non-blocking fast requests, track action code (and
stack if under DEBUG_GUC config) for every H2G request.

Signed-off-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
Signed-off-by: John Harrison <John.C.Harrison@Intel.com>
Reviewed-by: John Harrison <John.C.Harrison@Intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20230526235538.2230780-4-John.C.Harrison@Intel.com
drivers/gpu/drm/i915/Kconfig.debug
drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h

index 47e845353ffad82547ec6cf864a2ff4bf09f0e13..2d21930d5501580a1ecb83c91f44c3fcf05d4a00 100644 (file)
@@ -157,6 +157,7 @@ config DRM_I915_SW_FENCE_CHECK_DAG
 config DRM_I915_DEBUG_GUC
        bool "Enable additional driver debugging for GuC"
        depends on DRM_I915
+       select STACKDEPOT
        default n
        help
          Choose this option to turn on extra driver debugging that may affect
index 3a71bb582089e16d34375712417c6ad115ae306f..4aa903be1317bdfe79cd63a665067f782cecaae3 100644 (file)
@@ -376,6 +376,24 @@ void intel_guc_ct_disable(struct intel_guc_ct *ct)
        }
 }
 
+#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)
+static void ct_track_lost_and_found(struct intel_guc_ct *ct, u32 fence, u32 action)
+{
+       unsigned int lost = fence % ARRAY_SIZE(ct->requests.lost_and_found);
+#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GUC)
+       unsigned long entries[SZ_32];
+       unsigned int n;
+
+       n = stack_trace_save(entries, ARRAY_SIZE(entries), 1);
+
+       /* May be called under spinlock, so avoid sleeping */
+       ct->requests.lost_and_found[lost].stack = stack_depot_save(entries, n, GFP_NOWAIT);
+#endif
+       ct->requests.lost_and_found[lost].fence = fence;
+       ct->requests.lost_and_found[lost].action = action;
+}
+#endif
+
 static u32 ct_get_next_fence(struct intel_guc_ct *ct)
 {
        /* For now it's trivial */
@@ -447,6 +465,11 @@ static int ct_write(struct intel_guc_ct *ct,
        }
        GEM_BUG_ON(tail > size);
 
+#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)
+       ct_track_lost_and_found(ct, fence,
+                               FIELD_GET(GUC_HXG_EVENT_MSG_0_ACTION, action[0]));
+#endif
+
        /*
         * make sure H2G buffer update and LRC tail update (if this triggering a
         * submission) are visible before updating the descriptor tail
@@ -953,6 +976,43 @@ corrupted:
        return -EPIPE;
 }
 
+#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)
+static bool ct_check_lost_and_found(struct intel_guc_ct *ct, u32 fence)
+{
+       unsigned int n;
+       char *buf = NULL;
+       bool found = false;
+
+       lockdep_assert_held(&ct->requests.lock);
+
+       for (n = 0; n < ARRAY_SIZE(ct->requests.lost_and_found); n++) {
+               if (ct->requests.lost_and_found[n].fence != fence)
+                       continue;
+               found = true;
+
+#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GUC)
+               buf = kmalloc(SZ_4K, GFP_NOWAIT);
+               if (buf && stack_depot_snprint(ct->requests.lost_and_found[n].stack,
+                                              buf, SZ_4K, 0)) {
+                       CT_ERROR(ct, "Fence %u was used by action %#04x sent at\n%s",
+                                fence, ct->requests.lost_and_found[n].action, buf);
+                       break;
+               }
+#endif
+               CT_ERROR(ct, "Fence %u was used by action %#04x\n",
+                        fence, ct->requests.lost_and_found[n].action);
+               break;
+       }
+       kfree(buf);
+       return found;
+}
+#else
+static bool ct_check_lost_and_found(struct intel_guc_ct *ct, u32 fence)
+{
+       return false;
+}
+#endif
+
 static int ct_handle_response(struct intel_guc_ct *ct, struct ct_incoming_msg *response)
 {
        u32 len = FIELD_GET(GUC_CTB_MSG_0_NUM_DWORDS, response->msg[0]);
@@ -996,9 +1056,11 @@ static int ct_handle_response(struct intel_guc_ct *ct, struct ct_incoming_msg *r
        if (!found) {
                CT_ERROR(ct, "Unsolicited response message: len %u, data %#x (fence %u, last %u)\n",
                         len, hxg[0], fence, ct->requests.last_fence);
-               list_for_each_entry(req, &ct->requests.pending, link)
-                       CT_ERROR(ct, "request %u awaits response\n",
-                                req->fence);
+               if (!ct_check_lost_and_found(ct, fence)) {
+                       list_for_each_entry(req, &ct->requests.pending, link)
+                               CT_ERROR(ct, "request %u awaits response\n",
+                                        req->fence);
+               }
                err = -ENOKEY;
        }
        spin_unlock_irqrestore(&ct->requests.lock, flags);
index 818415b64f4d1bc1097483e43717764707f4d9f1..58e42901ff498fae1715a3e274eaaf4a12c5091d 100644 (file)
@@ -8,6 +8,7 @@
 
 #include <linux/interrupt.h>
 #include <linux/spinlock.h>
+#include <linux/stackdepot.h>
 #include <linux/workqueue.h>
 #include <linux/ktime.h>
 #include <linux/wait.h>
@@ -81,6 +82,16 @@ struct intel_guc_ct {
 
                struct list_head incoming; /* incoming requests */
                struct work_struct worker; /* handler for incoming requests */
+
+#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)
+               struct {
+                       u16 fence;
+                       u16 action;
+#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GUC)
+                       depot_stack_handle_t stack;
+#endif
+               } lost_and_found[SZ_16];
+#endif
        } requests;
 
        /** @stall_time: time of first time a CTB submission is stalled */