]> www.infradead.org Git - linux.git/commitdiff
drm/xe/guc: Dump entire CTB on errors
authorJohn Harrison <John.C.Harrison@Intel.com>
Thu, 3 Oct 2024 00:46:09 +0000 (17:46 -0700)
committerJohn Harrison <John.C.Harrison@Intel.com>
Tue, 8 Oct 2024 01:35:00 +0000 (18:35 -0700)
The dump of the CT buffers was only showing the unprocessed data which
is not generally useful for saying why a hang occurred - because it
was probably caused by the commands that were just processed. So save
and dump the entire buffer but in a more compact dump format. Also
zero fill it on allocation to avoid confusion over uninitialised data
in the dump.

v2: Add kerneldoc - review feedback from Michal W.
v3: Fix kerneldoc.
v4: Use ascii85 instead of hexdump (review feedback from Matthew B).
v5: Dump the entire CTB object rather than separately dumping just the
H2G and G2H sections. That way it includes the full header info.

Signed-off-by: John Harrison <John.C.Harrison@Intel.com>
Reviewed-by: Julia Filipchuk <julia.filipchuk@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20241003004611.2323493-10-John.C.Harrison@Intel.com
drivers/gpu/drm/xe/xe_guc_ct.c
drivers/gpu/drm/xe/xe_guc_ct.h
drivers/gpu/drm/xe/xe_guc_ct_types.h

index 79571c9d180e8171cdc0c6e74f3a363812060392..b7f80449bd83877e9e2caba72b127813a67d4274 100644 (file)
@@ -18,6 +18,7 @@
 #include "abi/guc_actions_sriov_abi.h"
 #include "abi/guc_klvs_abi.h"
 #include "xe_bo.h"
+#include "xe_devcoredump.h"
 #include "xe_device.h"
 #include "xe_gt.h"
 #include "xe_gt_pagefault.h"
@@ -437,6 +438,7 @@ int xe_guc_ct_enable(struct xe_guc_ct *ct)
 
        xe_gt_assert(gt, !xe_guc_ct_enabled(ct));
 
+       xe_map_memset(xe, &ct->bo->vmap, 0, 0, ct->bo->size);
        guc_ct_ctb_h2g_init(xe, &ct->ctbs.h2g, &ct->bo->vmap);
        guc_ct_ctb_g2h_init(xe, &ct->ctbs.g2h, &ct->bo->vmap);
 
@@ -1585,48 +1587,33 @@ static void g2h_worker_func(struct work_struct *w)
        receive_g2h(ct);
 }
 
-static void guc_ctb_snapshot_capture(struct xe_device *xe, struct guc_ctb *ctb,
-                                    struct guc_ctb_snapshot *snapshot,
-                                    bool atomic)
+struct xe_guc_ct_snapshot *xe_guc_ct_snapshot_alloc(struct xe_guc_ct *ct, bool atomic)
 {
-       u32 head, tail;
+       struct xe_guc_ct_snapshot *snapshot;
 
-       xe_map_memcpy_from(xe, &snapshot->desc, &ctb->desc, 0,
-                          sizeof(struct guc_ct_buffer_desc));
-       memcpy(&snapshot->info, &ctb->info, sizeof(struct guc_ctb_info));
+       snapshot = kzalloc(sizeof(*snapshot), atomic ? GFP_ATOMIC : GFP_KERNEL);
+       if (!snapshot)
+               return NULL;
 
-       snapshot->cmds = kmalloc_array(ctb->info.size, sizeof(u32),
-                                      atomic ? GFP_ATOMIC : GFP_KERNEL);
-       if (!snapshot->cmds) {
-               drm_err(&xe->drm, "Skipping CTB commands snapshot. Only CT info will be available.\n");
-               return;
+       if (ct->bo) {
+               snapshot->ctb_size = ct->bo->size;
+               snapshot->ctb = kmalloc(snapshot->ctb_size, atomic ? GFP_ATOMIC : GFP_KERNEL);
        }
 
-       head = snapshot->desc.head;
-       tail = snapshot->desc.tail;
-
-       if (head != tail) {
-               struct iosys_map map =
-                       IOSYS_MAP_INIT_OFFSET(&ctb->cmds, head * sizeof(u32));
-
-               while (head != tail) {
-                       snapshot->cmds[head] = xe_map_rd(xe, &map, 0, u32);
-                       ++head;
-                       if (head == ctb->info.size) {
-                               head = 0;
-                               map = ctb->cmds;
-                       } else {
-                               iosys_map_incr(&map, sizeof(u32));
-                       }
-               }
-       }
+       return snapshot;
+}
+
+static void guc_ctb_snapshot_capture(struct xe_device *xe, struct guc_ctb *ctb,
+                                    struct guc_ctb_snapshot *snapshot)
+{
+       xe_map_memcpy_from(xe, &snapshot->desc, &ctb->desc, 0,
+                          sizeof(struct guc_ct_buffer_desc));
+       memcpy(&snapshot->info, &ctb->info, sizeof(struct guc_ctb_info));
 }
 
 static void guc_ctb_snapshot_print(struct guc_ctb_snapshot *snapshot,
                                   struct drm_printer *p)
 {
-       u32 head, tail;
-
        drm_printf(p, "\tsize: %d\n", snapshot->info.size);
        drm_printf(p, "\tresv_space: %d\n", snapshot->info.resv_space);
        drm_printf(p, "\thead: %d\n", snapshot->info.head);
@@ -1636,25 +1623,6 @@ static void guc_ctb_snapshot_print(struct guc_ctb_snapshot *snapshot,
        drm_printf(p, "\thead (memory): %d\n", snapshot->desc.head);
        drm_printf(p, "\ttail (memory): %d\n", snapshot->desc.tail);
        drm_printf(p, "\tstatus (memory): 0x%x\n", snapshot->desc.status);
-
-       if (!snapshot->cmds)
-               return;
-
-       head = snapshot->desc.head;
-       tail = snapshot->desc.tail;
-
-       while (head != tail) {
-               drm_printf(p, "\tcmd[%d]: 0x%08x\n", head,
-                          snapshot->cmds[head]);
-               ++head;
-               if (head == snapshot->info.size)
-                       head = 0;
-       }
-}
-
-static void guc_ctb_snapshot_free(struct guc_ctb_snapshot *snapshot)
-{
-       kfree(snapshot->cmds);
 }
 
 /**
@@ -1675,9 +1643,7 @@ struct xe_guc_ct_snapshot *xe_guc_ct_snapshot_capture(struct xe_guc_ct *ct,
        struct xe_device *xe = ct_to_xe(ct);
        struct xe_guc_ct_snapshot *snapshot;
 
-       snapshot = kzalloc(sizeof(*snapshot),
-                          atomic ? GFP_ATOMIC : GFP_KERNEL);
-
+       snapshot = xe_guc_ct_snapshot_alloc(ct, atomic);
        if (!snapshot) {
                xe_gt_err(ct_to_gt(ct), "Skipping CTB snapshot entirely.\n");
                return NULL;
@@ -1686,12 +1652,13 @@ struct xe_guc_ct_snapshot *xe_guc_ct_snapshot_capture(struct xe_guc_ct *ct,
        if (xe_guc_ct_enabled(ct) || ct->state == XE_GUC_CT_STATE_STOPPED) {
                snapshot->ct_enabled = true;
                snapshot->g2h_outstanding = READ_ONCE(ct->g2h_outstanding);
-               guc_ctb_snapshot_capture(xe, &ct->ctbs.h2g,
-                                        &snapshot->h2g, atomic);
-               guc_ctb_snapshot_capture(xe, &ct->ctbs.g2h,
-                                        &snapshot->g2h, atomic);
+               guc_ctb_snapshot_capture(xe, &ct->ctbs.h2g, &snapshot->h2g);
+               guc_ctb_snapshot_capture(xe, &ct->ctbs.g2h, &snapshot->g2h);
        }
 
+       if (ct->bo && snapshot->ctb)
+               xe_map_memcpy_from(xe, snapshot->ctb, &ct->bo->vmap, 0, snapshot->ctb_size);
+
        return snapshot;
 }
 
@@ -1714,9 +1681,15 @@ void xe_guc_ct_snapshot_print(struct xe_guc_ct_snapshot *snapshot,
 
                drm_puts(p, "G2H CTB (all sizes in DW):\n");
                guc_ctb_snapshot_print(&snapshot->g2h, p);
-
                drm_printf(p, "\tg2h outstanding: %d\n",
                           snapshot->g2h_outstanding);
+
+               if (snapshot->ctb) {
+                       xe_print_blob_ascii85(p, "CTB data", snapshot->ctb, 0, snapshot->ctb_size);
+               } else {
+                       drm_printf(p, "CTB snapshot missing!\n");
+                       return;
+               }
        } else {
                drm_puts(p, "CT disabled\n");
        }
@@ -1734,8 +1707,7 @@ void xe_guc_ct_snapshot_free(struct xe_guc_ct_snapshot *snapshot)
        if (!snapshot)
                return;
 
-       guc_ctb_snapshot_free(&snapshot->h2g);
-       guc_ctb_snapshot_free(&snapshot->g2h);
+       kfree(snapshot->ctb);
        kfree(snapshot);
 }
 
index 293041bed7ed0f0c285ca406da2000e8a65128e3..338f0b75d29f9c7b05b53aa6a412d89833e054b9 100644 (file)
@@ -9,6 +9,7 @@
 #include "xe_guc_ct_types.h"
 
 struct drm_printer;
+struct xe_device;
 
 int xe_guc_ct_init(struct xe_guc_ct *ct);
 int xe_guc_ct_enable(struct xe_guc_ct *ct);
@@ -16,10 +17,9 @@ void xe_guc_ct_disable(struct xe_guc_ct *ct);
 void xe_guc_ct_stop(struct xe_guc_ct *ct);
 void xe_guc_ct_fast_path(struct xe_guc_ct *ct);
 
-struct xe_guc_ct_snapshot *
-xe_guc_ct_snapshot_capture(struct xe_guc_ct *ct, bool atomic);
-void xe_guc_ct_snapshot_print(struct xe_guc_ct_snapshot *snapshot,
-                             struct drm_printer *p);
+struct xe_guc_ct_snapshot *xe_guc_ct_snapshot_alloc(struct xe_guc_ct *ct, bool atomic);
+struct xe_guc_ct_snapshot *xe_guc_ct_snapshot_capture(struct xe_guc_ct *ct, bool atomic);
+void xe_guc_ct_snapshot_print(struct xe_guc_ct_snapshot *snapshot, struct drm_printer *p);
 void xe_guc_ct_snapshot_free(struct xe_guc_ct_snapshot *snapshot);
 void xe_guc_ct_print(struct xe_guc_ct *ct, struct drm_printer *p);
 
index 85e127ec91d7af22468ef7ebb9c6a9c9a0eeebc3..8e1b9d981d61ecd9313825665d8f3e1227f436ec 100644 (file)
@@ -52,8 +52,6 @@ struct guc_ctb {
 struct guc_ctb_snapshot {
        /** @desc: snapshot of the CTB descriptor */
        struct guc_ct_buffer_desc desc;
-       /** @cmds: snapshot of the CTB commands */
-       u32 *cmds;
        /** @info: snapshot of the CTB info */
        struct guc_ctb_info info;
 };
@@ -70,6 +68,10 @@ struct xe_guc_ct_snapshot {
        struct guc_ctb_snapshot g2h;
        /** @h2g: H2G CTB snapshot */
        struct guc_ctb_snapshot h2g;
+       /** @ctb_size: size of the snapshot of the CTB */
+       size_t ctb_size;
+       /** @ctb: snapshot of the entire CTB */
+       u32 *ctb;
 };
 
 /**