]> www.infradead.org Git - linux.git/commitdiff
drm/xe/guc: Extract GuC error capture lists
authorZhanjun Dong <zhanjun.dong@intel.com>
Fri, 4 Oct 2024 19:34:26 +0000 (12:34 -0700)
committerMatt Roper <matthew.d.roper@intel.com>
Tue, 8 Oct 2024 16:34:45 +0000 (09:34 -0700)
Upon the G2H Notify-Err-Capture event, parse through the
GuC Log Buffer (error-capture-subregion) and generate one or
more capture-nodes. A single node represents a single "engine-
instance-capture-dump" and contains at least 3 register lists:
global, engine-class and engine-instance. An internal link
list is maintained to store one or more nodes.

Because the link-list node generation happen before the call
to devcoredump, duplicate global and engine-class register
lists for each engine-instance register dump if we find
dependent-engine resets in a engine-capture-group.

To avoid dynamically allocate the output nodes during gt reset,
pre-allocate a fixed number of empty nodes up front (at the
time of ADS registration) that we can consume from or return to
an internal cached list of nodes.

Signed-off-by: Zhanjun Dong <zhanjun.dong@intel.com>
Reviewed-by: Alan Previn <alan.previn.teres.alexis@intel.com>
Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20241004193428.3311145-5-zhanjun.dong@intel.com
drivers/gpu/drm/xe/abi/guc_actions_abi.h
drivers/gpu/drm/xe/abi/guc_log_abi.h
drivers/gpu/drm/xe/xe_guc_capture.c
drivers/gpu/drm/xe/xe_guc_capture.h
drivers/gpu/drm/xe/xe_guc_ct.c
drivers/gpu/drm/xe/xe_guc_log.c
drivers/gpu/drm/xe/xe_guc_log.h
drivers/gpu/drm/xe/xe_guc_log_types.h
drivers/gpu/drm/xe/xe_guc_submit.c
drivers/gpu/drm/xe/xe_guc_submit.h

index 43ad4652c2b257fd900c7237c595e1806307599f..b54fe40fc5a92d81c86793acf675ab3453431d07 100644 (file)
@@ -176,6 +176,14 @@ enum xe_guc_sleep_state_status {
 #define GUC_LOG_CONTROL_VERBOSITY_MASK (0xF << GUC_LOG_CONTROL_VERBOSITY_SHIFT)
 #define GUC_LOG_CONTROL_DEFAULT_LOGGING        (1 << 8)
 
+enum xe_guc_state_capture_event_status {
+       XE_GUC_STATE_CAPTURE_EVENT_STATUS_SUCCESS = 0x0,
+       XE_GUC_STATE_CAPTURE_EVENT_STATUS_NOSPACE = 0x1,
+};
+
+#define XE_GUC_STATE_CAPTURE_EVENT_STATUS_MASK      0x000000FF
+#define XE_GUC_ACTION_STATE_CAPTURE_NOTIFICATION_DATA_LEN 1
+
 #define XE_GUC_TLB_INVAL_TYPE_SHIFT 0
 #define XE_GUC_TLB_INVAL_MODE_SHIFT 8
 /* Flush PPC or SMRO caches along with TLB invalidation request */
index 10db4ffaa17f9eec29ff8191aae406f309fa42b4..554630b7ccd97f9e3e035258e03ffbf07dace679 100644 (file)
@@ -17,4 +17,59 @@ enum guc_log_buffer_type {
 
 #define GUC_LOG_BUFFER_TYPE_MAX                3
 
+/**
+ * struct guc_log_buffer_state - GuC log buffer state
+ *
+ * Below state structure is used for coordination of retrieval of GuC firmware
+ * logs. Separate state is maintained for each log buffer type.
+ * read_ptr points to the location where Xe read last in log buffer and
+ * is read only for GuC firmware. write_ptr is incremented by GuC with number
+ * of bytes written for each log entry and is read only for Xe.
+ * When any type of log buffer becomes half full, GuC sends a flush interrupt.
+ * GuC firmware expects that while it is writing to 2nd half of the buffer,
+ * first half would get consumed by Host and then get a flush completed
+ * acknowledgment from Host, so that it does not end up doing any overwrite
+ * causing loss of logs. So when buffer gets half filled & Xe has requested
+ * for interrupt, GuC will set flush_to_file field, set the sampled_write_ptr
+ * to the value of write_ptr and raise the interrupt.
+ * On receiving the interrupt Xe should read the buffer, clear flush_to_file
+ * field and also update read_ptr with the value of sample_write_ptr, before
+ * sending an acknowledgment to GuC. marker & version fields are for internal
+ * usage of GuC and opaque to Xe. buffer_full_cnt field is incremented every
+ * time GuC detects the log buffer overflow.
+ */
+struct guc_log_buffer_state {
+       /** @marker: buffer state start marker */
+       u32 marker[2];
+       /** @read_ptr: the last byte offset that was read by KMD previously */
+       u32 read_ptr;
+       /**
+        * @write_ptr: the next byte offset location that will be written by
+        * GuC
+        */
+       u32 write_ptr;
+       /** @size: Log buffer size */
+       u32 size;
+       /**
+        * @sampled_write_ptr: Log buffer write pointer
+        * This is written by GuC to the byte offset of the next free entry in
+        * the buffer on log buffer half full or state capture notification
+        */
+       u32 sampled_write_ptr;
+       /**
+        * @wrap_offset: wraparound offset
+        * This is the byte offset of location 1 byte after last valid guc log
+        * event entry written by Guc firmware before there was a wraparound.
+        * This field is updated by guc firmware and should be used by Host
+        * when copying buffer contents to file.
+        */
+       u32 wrap_offset;
+       /** @flags: Flush to file flag and buffer full count */
+       u32 flags;
+#define        GUC_LOG_BUFFER_STATE_FLUSH_TO_FILE      GENMASK(0, 0)
+#define        GUC_LOG_BUFFER_STATE_BUFFER_FULL_CNT    GENMASK(4, 1)
+       /** @version: The Guc-Log-Entry format version */
+       u32 version;
+} __packed;
+
 #endif
index f1b9ddcb2d896ed9aaaceca8818fc853e3e01b2e..d5cea907d2e680604127373bb4f9f56cc15cff04 100644 (file)
@@ -10,6 +10,7 @@
 
 #include "abi/guc_actions_abi.h"
 #include "abi/guc_capture_abi.h"
+#include "abi/guc_log_abi.h"
 #include "regs/xe_engine_regs.h"
 #include "regs/xe_gt_regs.h"
 #include "regs/xe_guc_regs.h"
 #include "xe_macros.h"
 #include "xe_map.h"
 
+/*
+ * struct __guc_capture_bufstate
+ *
+ * Book-keeping structure used to track read and write pointers
+ * as we extract error capture data from the GuC-log-buffer's
+ * error-capture region as a stream of dwords.
+ */
+struct __guc_capture_bufstate {
+       u32 size;
+       u32 data_offset;
+       u32 rd;
+       u32 wr;
+};
+
+/*
+ * struct __guc_capture_parsed_output - extracted error capture node
+ *
+ * A single unit of extracted error-capture output data grouped together
+ * at an engine-instance level. We keep these nodes in a linked list.
+ * See cachelist and outlist below.
+ */
+struct __guc_capture_parsed_output {
+       /*
+        * A single set of 3 capture lists: a global-list
+        * an engine-class-list and an engine-instance list.
+        * outlist in __guc_capture_parsed_output will keep
+        * a linked list of these nodes that will eventually
+        * be detached from outlist and attached into to
+        * xe_codedump in response to a context reset
+        */
+       struct list_head link;
+       bool is_partial;
+       u32 eng_class;
+       u32 eng_inst;
+       u32 guc_id;
+       u32 lrca;
+       struct gcap_reg_list_info {
+               u32 vfid;
+               u32 num_regs;
+               struct guc_mmio_reg *regs;
+       } reginfo[GUC_STATE_CAPTURE_TYPE_MAX];
+#define GCAP_PARSED_REGLIST_INDEX_GLOBAL   BIT(GUC_STATE_CAPTURE_TYPE_GLOBAL)
+#define GCAP_PARSED_REGLIST_INDEX_ENGCLASS BIT(GUC_STATE_CAPTURE_TYPE_ENGINE_CLASS)
+};
+
 /*
  * Define all device tables of GuC error capture register lists
  * NOTE:
@@ -221,6 +267,12 @@ struct xe_guc_state_capture {
                                                [GUC_STATE_CAPTURE_TYPE_MAX]
                                                [GUC_CAPTURE_LIST_CLASS_MAX];
        void *ads_null_cache;
+       struct list_head cachelist;
+#define PREALLOC_NODES_MAX_COUNT (3 * GUC_MAX_ENGINE_CLASSES * GUC_MAX_INSTANCES_PER_CLASS)
+#define PREALLOC_NODES_DEFAULT_NUMREGS 64
+
+       int max_mmio_per_node;
+       struct list_head outlist;
 };
 
 static const struct __guc_mmio_reg_descr_group *
@@ -450,8 +502,17 @@ guc_cap_list_num_regs(struct xe_guc *guc, u32 owner, u32 type,
        if (match)
                num_regs += match->num_regs;
        else
-               /* Estimate steering register size for rcs/ccs */
-               if (capture_class == GUC_CAPTURE_LIST_CLASS_RENDER_COMPUTE)
+               /*
+                * If a caller wants the full register dump size but we have
+                * not yet got the hw-config, which is before max_mmio_per_node
+                * is initialized, then provide a worst-case number for
+                * extlists based on max dss fuse bits, but only ever for
+                * render/compute
+                */
+               if (owner == GUC_CAPTURE_LIST_INDEX_PF &&
+                   type == GUC_STATE_CAPTURE_TYPE_ENGINE_CLASS &&
+                   capture_class == GUC_CAPTURE_LIST_CLASS_RENDER_COMPUTE &&
+                   !guc->capture->max_mmio_per_node)
                        num_regs += guc_capture_get_steer_reg_num(guc_to_xe(guc)) *
                                    XE_MAX_DSS_FUSE_BITS;
 
@@ -749,11 +810,664 @@ static void check_guc_capture_size(struct xe_guc *guc)
                          buffer_size, spare_size, capture_size);
 }
 
+static void
+guc_capture_add_node_to_list(struct __guc_capture_parsed_output *node,
+                            struct list_head *list)
+{
+       list_add_tail(&node->link, list);
+}
+
+static void
+guc_capture_add_node_to_outlist(struct xe_guc_state_capture *gc,
+                               struct __guc_capture_parsed_output *node)
+{
+       guc_capture_add_node_to_list(node, &gc->outlist);
+}
+
+static void
+guc_capture_add_node_to_cachelist(struct xe_guc_state_capture *gc,
+                                 struct __guc_capture_parsed_output *node)
+{
+       guc_capture_add_node_to_list(node, &gc->cachelist);
+}
+
+static void
+guc_capture_init_node(struct xe_guc *guc, struct __guc_capture_parsed_output *node)
+{
+       struct guc_mmio_reg *tmp[GUC_STATE_CAPTURE_TYPE_MAX];
+       int i;
+
+       for (i = 0; i < GUC_STATE_CAPTURE_TYPE_MAX; ++i) {
+               tmp[i] = node->reginfo[i].regs;
+               memset(tmp[i], 0, sizeof(struct guc_mmio_reg) *
+                      guc->capture->max_mmio_per_node);
+       }
+       memset(node, 0, sizeof(*node));
+       for (i = 0; i < GUC_STATE_CAPTURE_TYPE_MAX; ++i)
+               node->reginfo[i].regs = tmp[i];
+
+       INIT_LIST_HEAD(&node->link);
+}
+
+/**
+ * DOC: Init, G2H-event and reporting flows for GuC-error-capture
+ *
+ * KMD Init time flows:
+ * --------------------
+ *     --> alloc A: GuC input capture regs lists (registered to GuC via ADS).
+ *                  xe_guc_ads acquires the register lists by calling
+ *                  xe_guc_capture_getlistsize and xe_guc_capture_getlist 'n' times,
+ *                  where n = 1 for global-reg-list +
+ *                            num_engine_classes for class-reg-list +
+ *                            num_engine_classes for instance-reg-list
+ *                               (since all instances of the same engine-class type
+ *                                have an identical engine-instance register-list).
+ *                  ADS module also calls separately for PF vs VF.
+ *
+ *     --> alloc B: GuC output capture buf (registered via guc_init_params(log_param))
+ *                  Size = #define CAPTURE_BUFFER_SIZE (warns if on too-small)
+ *                  Note2: 'x 3' to hold multiple capture groups
+ *
+ * GUC Runtime notify capture:
+ * --------------------------
+ *     --> G2H STATE_CAPTURE_NOTIFICATION
+ *                   L--> xe_guc_capture_process
+ *                           L--> Loop through B (head..tail) and for each engine instance's
+ *                                err-state-captured register-list we find, we alloc 'C':
+ *      --> alloc C: A capture-output-node structure that includes misc capture info along
+ *                   with 3 register list dumps (global, engine-class and engine-instance)
+ *                   This node is created from a pre-allocated list of blank nodes in
+ *                   guc->capture->cachelist and populated with the error-capture
+ *                   data from GuC and then it's added into guc->capture->outlist linked
+ *                   list. This list is used for matchup and printout by xe_devcoredump_read
+ *                   and xe_hw_engine_snapshot_print, (when user invokes the devcoredump sysfs).
+ *
+ * GUC --> notify context reset:
+ * -----------------------------
+ *     --> guc_exec_queue_timedout_job
+ *                   L--> xe_devcoredump
+ *                          L--> devcoredump_snapshot
+ *                               --> xe_hw_engine_snapshot_capture
+ *
+ * User Sysfs / Debugfs
+ * --------------------
+ *      --> xe_devcoredump_read->
+ *             L--> xxx_snapshot_print
+ *                    L--> xe_hw_engine_snapshot_print
+ *                         Print register lists values saved at
+ *                         guc->capture->outlist
+ *
+ */
+
+static int guc_capture_buf_cnt(struct __guc_capture_bufstate *buf)
+{
+       if (buf->wr >= buf->rd)
+               return (buf->wr - buf->rd);
+       return (buf->size - buf->rd) + buf->wr;
+}
+
+static int guc_capture_buf_cnt_to_end(struct __guc_capture_bufstate *buf)
+{
+       if (buf->rd > buf->wr)
+               return (buf->size - buf->rd);
+       return (buf->wr - buf->rd);
+}
+
+/*
+ * GuC's error-capture output is a ring buffer populated in a byte-stream fashion:
+ *
+ * The GuC Log buffer region for error-capture is managed like a ring buffer.
+ * The GuC firmware dumps error capture logs into this ring in a byte-stream flow.
+ * Additionally, as per the current and foreseeable future, all packed error-
+ * capture output structures are dword aligned.
+ *
+ * That said, if the GuC firmware is in the midst of writing a structure that is larger
+ * than one dword but the tail end of the err-capture buffer-region has lesser space left,
+ * we would need to extract that structure one dword at a time straddled across the end,
+ * onto the start of the ring.
+ *
+ * Below function, guc_capture_log_remove_bytes is a helper for that. All callers of this
+ * function would typically do a straight-up memcpy from the ring contents and will only
+ * call this helper if their structure-extraction is straddling across the end of the
+ * ring. GuC firmware does not add any padding. The reason for the no-padding is to ease
+ * scalability for future expansion of output data types without requiring a redesign
+ * of the flow controls.
+ */
+static int
+guc_capture_log_remove_bytes(struct xe_guc *guc, struct __guc_capture_bufstate *buf,
+                            void *out, int bytes_needed)
+{
+#define GUC_CAPTURE_LOG_BUF_COPY_RETRY_MAX     3
+
+       int fill_size = 0, tries = GUC_CAPTURE_LOG_BUF_COPY_RETRY_MAX;
+       int copy_size, avail;
+
+       xe_assert(guc_to_xe(guc), bytes_needed % sizeof(u32) == 0);
+
+       if (bytes_needed > guc_capture_buf_cnt(buf))
+               return -1;
+
+       while (bytes_needed > 0 && tries--) {
+               int misaligned;
+
+               avail = guc_capture_buf_cnt_to_end(buf);
+               misaligned = avail % sizeof(u32);
+               /* wrap if at end */
+               if (!avail) {
+                       /* output stream clipped */
+                       if (!buf->rd)
+                               return fill_size;
+                       buf->rd = 0;
+                       continue;
+               }
+
+               /* Only copy to u32 aligned data */
+               copy_size = avail < bytes_needed ? avail - misaligned : bytes_needed;
+               xe_map_memcpy_from(guc_to_xe(guc), out + fill_size, &guc->log.bo->vmap,
+                                  buf->data_offset + buf->rd, copy_size);
+               buf->rd += copy_size;
+               fill_size += copy_size;
+               bytes_needed -= copy_size;
+
+               if (misaligned)
+                       xe_gt_warn(guc_to_gt(guc),
+                                  "Bytes extraction not dword aligned, clipping.\n");
+       }
+
+       return fill_size;
+}
+
+static int
+guc_capture_log_get_group_hdr(struct xe_guc *guc, struct __guc_capture_bufstate *buf,
+                             struct guc_state_capture_group_header_t *ghdr)
+{
+       int fullsize = sizeof(struct guc_state_capture_group_header_t);
+
+       if (guc_capture_log_remove_bytes(guc, buf, ghdr, fullsize) != fullsize)
+               return -1;
+       return 0;
+}
+
+static int
+guc_capture_log_get_data_hdr(struct xe_guc *guc, struct __guc_capture_bufstate *buf,
+                            struct guc_state_capture_header_t *hdr)
+{
+       int fullsize = sizeof(struct guc_state_capture_header_t);
+
+       if (guc_capture_log_remove_bytes(guc, buf, hdr, fullsize) != fullsize)
+               return -1;
+       return 0;
+}
+
+static int
+guc_capture_log_get_register(struct xe_guc *guc, struct __guc_capture_bufstate *buf,
+                            struct guc_mmio_reg *reg)
+{
+       int fullsize = sizeof(struct guc_mmio_reg);
+
+       if (guc_capture_log_remove_bytes(guc, buf, reg, fullsize) != fullsize)
+               return -1;
+       return 0;
+}
+
+static struct __guc_capture_parsed_output *
+guc_capture_get_prealloc_node(struct xe_guc *guc)
+{
+       struct __guc_capture_parsed_output *found = NULL;
+
+       if (!list_empty(&guc->capture->cachelist)) {
+               struct __guc_capture_parsed_output *n, *ntmp;
+
+               /* get first avail node from the cache list */
+               list_for_each_entry_safe(n, ntmp, &guc->capture->cachelist, link) {
+                       found = n;
+                       break;
+               }
+       } else {
+               struct __guc_capture_parsed_output *n, *ntmp;
+
+               /* traverse down and steal back the oldest node already allocated */
+               list_for_each_entry_safe(n, ntmp, &guc->capture->outlist, link) {
+                       found = n;
+               }
+       }
+       if (found) {
+               list_del(&found->link);
+               guc_capture_init_node(guc, found);
+       }
+
+       return found;
+}
+
+static struct __guc_capture_parsed_output *
+guc_capture_clone_node(struct xe_guc *guc, struct __guc_capture_parsed_output *original,
+                      u32 keep_reglist_mask)
+{
+       struct __guc_capture_parsed_output *new;
+       int i;
+
+       new = guc_capture_get_prealloc_node(guc);
+       if (!new)
+               return NULL;
+       if (!original)
+               return new;
+
+       new->is_partial = original->is_partial;
+
+       /* copy reg-lists that we want to clone */
+       for (i = 0; i < GUC_STATE_CAPTURE_TYPE_MAX; ++i) {
+               if (keep_reglist_mask & BIT(i)) {
+                       XE_WARN_ON(original->reginfo[i].num_regs  >
+                                  guc->capture->max_mmio_per_node);
+
+                       memcpy(new->reginfo[i].regs, original->reginfo[i].regs,
+                              original->reginfo[i].num_regs * sizeof(struct guc_mmio_reg));
+
+                       new->reginfo[i].num_regs = original->reginfo[i].num_regs;
+                       new->reginfo[i].vfid  = original->reginfo[i].vfid;
+
+                       if (i == GUC_STATE_CAPTURE_TYPE_ENGINE_CLASS) {
+                               new->eng_class = original->eng_class;
+                       } else if (i == GUC_STATE_CAPTURE_TYPE_ENGINE_INSTANCE) {
+                               new->eng_inst = original->eng_inst;
+                               new->guc_id = original->guc_id;
+                               new->lrca = original->lrca;
+                       }
+               }
+       }
+
+       return new;
+}
+
+static int
+guc_capture_extract_reglists(struct xe_guc *guc, struct __guc_capture_bufstate *buf)
+{
+       struct xe_gt *gt = guc_to_gt(guc);
+       struct guc_state_capture_group_header_t ghdr = {0};
+       struct guc_state_capture_header_t hdr = {0};
+       struct __guc_capture_parsed_output *node = NULL;
+       struct guc_mmio_reg *regs = NULL;
+       int i, numlists, numregs, ret = 0;
+       enum guc_state_capture_type datatype;
+       struct guc_mmio_reg tmp;
+       bool is_partial = false;
+
+       i = guc_capture_buf_cnt(buf);
+       if (!i)
+               return -ENODATA;
+
+       if (i % sizeof(u32)) {
+               xe_gt_warn(gt, "Got mis-aligned register capture entries\n");
+               ret = -EIO;
+               goto bailout;
+       }
+
+       /* first get the capture group header */
+       if (guc_capture_log_get_group_hdr(guc, buf, &ghdr)) {
+               ret = -EIO;
+               goto bailout;
+       }
+       /*
+        * we would typically expect a layout as below where n would be expected to be
+        * anywhere between 3 to n where n > 3 if we are seeing multiple dependent engine
+        * instances being reset together.
+        * ____________________________________________
+        * | Capture Group                            |
+        * | ________________________________________ |
+        * | | Capture Group Header:                | |
+        * | |  - num_captures = 5                  | |
+        * | |______________________________________| |
+        * | ________________________________________ |
+        * | | Capture1:                            | |
+        * | |  Hdr: GLOBAL, numregs=a              | |
+        * | | ____________________________________ | |
+        * | | | Reglist                          | | |
+        * | | | - reg1, reg2, ... rega           | | |
+        * | | |__________________________________| | |
+        * | |______________________________________| |
+        * | ________________________________________ |
+        * | | Capture2:                            | |
+        * | |  Hdr: CLASS=RENDER/COMPUTE, numregs=b| |
+        * | | ____________________________________ | |
+        * | | | Reglist                          | | |
+        * | | | - reg1, reg2, ... regb           | | |
+        * | | |__________________________________| | |
+        * | |______________________________________| |
+        * | ________________________________________ |
+        * | | Capture3:                            | |
+        * | |  Hdr: INSTANCE=RCS, numregs=c        | |
+        * | | ____________________________________ | |
+        * | | | Reglist                          | | |
+        * | | | - reg1, reg2, ... regc           | | |
+        * | | |__________________________________| | |
+        * | |______________________________________| |
+        * | ________________________________________ |
+        * | | Capture4:                            | |
+        * | |  Hdr: CLASS=RENDER/COMPUTE, numregs=d| |
+        * | | ____________________________________ | |
+        * | | | Reglist                          | | |
+        * | | | - reg1, reg2, ... regd           | | |
+        * | | |__________________________________| | |
+        * | |______________________________________| |
+        * | ________________________________________ |
+        * | | Capture5:                            | |
+        * | |  Hdr: INSTANCE=CCS0, numregs=e       | |
+        * | | ____________________________________ | |
+        * | | | Reglist                          | | |
+        * | | | - reg1, reg2, ... rege           | | |
+        * | | |__________________________________| | |
+        * | |______________________________________| |
+        * |__________________________________________|
+        */
+       is_partial = FIELD_GET(GUC_STATE_CAPTURE_GROUP_HEADER_CAPTURE_GROUP_TYPE, ghdr.info);
+       numlists = FIELD_GET(GUC_STATE_CAPTURE_GROUP_HEADER_NUM_CAPTURES, ghdr.info);
+
+       while (numlists--) {
+               if (guc_capture_log_get_data_hdr(guc, buf, &hdr)) {
+                       ret = -EIO;
+                       break;
+               }
+
+               datatype = FIELD_GET(GUC_STATE_CAPTURE_HEADER_CAPTURE_TYPE, hdr.info);
+               if (datatype > GUC_STATE_CAPTURE_TYPE_ENGINE_INSTANCE) {
+                       /* unknown capture type - skip over to next capture set */
+                       numregs = FIELD_GET(GUC_STATE_CAPTURE_HEADER_NUM_MMIO_ENTRIES,
+                                           hdr.num_mmio_entries);
+                       while (numregs--) {
+                               if (guc_capture_log_get_register(guc, buf, &tmp)) {
+                                       ret = -EIO;
+                                       break;
+                               }
+                       }
+                       continue;
+               } else if (node) {
+                       /*
+                        * Based on the current capture type and what we have so far,
+                        * decide if we should add the current node into the internal
+                        * linked list for match-up when xe_devcoredump calls later
+                        * (and alloc a blank node for the next set of reglists)
+                        * or continue with the same node or clone the current node
+                        * but only retain the global or class registers (such as the
+                        * case of dependent engine resets).
+                        */
+                       if (datatype == GUC_STATE_CAPTURE_TYPE_GLOBAL) {
+                               guc_capture_add_node_to_outlist(guc->capture, node);
+                               node = NULL;
+                       } else if (datatype == GUC_STATE_CAPTURE_TYPE_ENGINE_CLASS &&
+                                  node->reginfo[GUC_STATE_CAPTURE_TYPE_ENGINE_CLASS].num_regs) {
+                               /* Add to list, clone node and duplicate global list */
+                               guc_capture_add_node_to_outlist(guc->capture, node);
+                               node = guc_capture_clone_node(guc, node,
+                                                             GCAP_PARSED_REGLIST_INDEX_GLOBAL);
+                       } else if (datatype == GUC_STATE_CAPTURE_TYPE_ENGINE_INSTANCE &&
+                                  node->reginfo[GUC_STATE_CAPTURE_TYPE_ENGINE_INSTANCE].num_regs) {
+                               /* Add to list, clone node and duplicate global + class lists */
+                               guc_capture_add_node_to_outlist(guc->capture, node);
+                               node = guc_capture_clone_node(guc, node,
+                                                             (GCAP_PARSED_REGLIST_INDEX_GLOBAL |
+                                                             GCAP_PARSED_REGLIST_INDEX_ENGCLASS));
+                       }
+               }
+
+               if (!node) {
+                       node = guc_capture_get_prealloc_node(guc);
+                       if (!node) {
+                               ret = -ENOMEM;
+                               break;
+                       }
+                       if (datatype != GUC_STATE_CAPTURE_TYPE_GLOBAL)
+                               xe_gt_dbg(gt, "Register capture missing global dump: %08x!\n",
+                                         datatype);
+               }
+               node->is_partial = is_partial;
+               node->reginfo[datatype].vfid = FIELD_GET(GUC_STATE_CAPTURE_HEADER_VFID, hdr.owner);
+
+               switch (datatype) {
+               case GUC_STATE_CAPTURE_TYPE_ENGINE_INSTANCE:
+                       node->eng_class = FIELD_GET(GUC_STATE_CAPTURE_HEADER_ENGINE_CLASS,
+                                                   hdr.info);
+                       node->eng_inst = FIELD_GET(GUC_STATE_CAPTURE_HEADER_ENGINE_INSTANCE,
+                                                  hdr.info);
+                       node->lrca = hdr.lrca;
+                       node->guc_id = hdr.guc_id;
+                       break;
+               case GUC_STATE_CAPTURE_TYPE_ENGINE_CLASS:
+                       node->eng_class = FIELD_GET(GUC_STATE_CAPTURE_HEADER_ENGINE_CLASS,
+                                                   hdr.info);
+                       break;
+               default:
+                       break;
+               }
+
+               numregs = FIELD_GET(GUC_STATE_CAPTURE_HEADER_NUM_MMIO_ENTRIES,
+                                   hdr.num_mmio_entries);
+               if (numregs > guc->capture->max_mmio_per_node) {
+                       xe_gt_dbg(gt, "Register capture list extraction clipped by prealloc!\n");
+                       numregs = guc->capture->max_mmio_per_node;
+               }
+               node->reginfo[datatype].num_regs = numregs;
+               regs = node->reginfo[datatype].regs;
+               i = 0;
+               while (numregs--) {
+                       if (guc_capture_log_get_register(guc, buf, &regs[i++])) {
+                               ret = -EIO;
+                               break;
+                       }
+               }
+       }
+
+bailout:
+       if (node) {
+               /* If we have data, add to linked list for match-up when xe_devcoredump calls */
+               for (i = GUC_STATE_CAPTURE_TYPE_GLOBAL; i < GUC_STATE_CAPTURE_TYPE_MAX; ++i) {
+                       if (node->reginfo[i].regs) {
+                               guc_capture_add_node_to_outlist(guc->capture, node);
+                               node = NULL;
+                               break;
+                       }
+               }
+               if (node) /* else return it back to cache list */
+                       guc_capture_add_node_to_cachelist(guc->capture, node);
+       }
+       return ret;
+}
+
+static int __guc_capture_flushlog_complete(struct xe_guc *guc)
+{
+       u32 action[] = {
+               XE_GUC_ACTION_LOG_BUFFER_FILE_FLUSH_COMPLETE,
+               GUC_LOG_BUFFER_CAPTURE
+       };
+
+       return xe_guc_ct_send_g2h_handler(&guc->ct, action, ARRAY_SIZE(action));
+}
+
+static void __guc_capture_process_output(struct xe_guc *guc)
+{
+       unsigned int buffer_size, read_offset, write_offset, full_count;
+       struct xe_uc *uc = container_of(guc, typeof(*uc), guc);
+       struct guc_log_buffer_state log_buf_state_local;
+       struct __guc_capture_bufstate buf;
+       bool new_overflow;
+       int ret, tmp;
+       u32 log_buf_state_offset;
+       u32 src_data_offset;
+
+       log_buf_state_offset = sizeof(struct guc_log_buffer_state) * GUC_LOG_BUFFER_CAPTURE;
+       src_data_offset = xe_guc_get_log_buffer_offset(&guc->log, GUC_LOG_BUFFER_CAPTURE);
+
+       /*
+        * Make a copy of the state structure, inside GuC log buffer
+        * (which is uncached mapped), on the stack to avoid reading
+        * from it multiple times.
+        */
+       xe_map_memcpy_from(guc_to_xe(guc), &log_buf_state_local, &guc->log.bo->vmap,
+                          log_buf_state_offset, sizeof(struct guc_log_buffer_state));
+
+       buffer_size = xe_guc_get_log_buffer_size(&guc->log, GUC_LOG_BUFFER_CAPTURE);
+       read_offset = log_buf_state_local.read_ptr;
+       write_offset = log_buf_state_local.sampled_write_ptr;
+       full_count = FIELD_GET(GUC_LOG_BUFFER_STATE_BUFFER_FULL_CNT, log_buf_state_local.flags);
+
+       /* Bookkeeping stuff */
+       tmp = FIELD_GET(GUC_LOG_BUFFER_STATE_FLUSH_TO_FILE, log_buf_state_local.flags);
+       guc->log.stats[GUC_LOG_BUFFER_CAPTURE].flush += tmp;
+       new_overflow = xe_guc_check_log_buf_overflow(&guc->log, GUC_LOG_BUFFER_CAPTURE,
+                                                    full_count);
+
+       /* Now copy the actual logs. */
+       if (unlikely(new_overflow)) {
+               /* copy the whole buffer in case of overflow */
+               read_offset = 0;
+               write_offset = buffer_size;
+       } else if (unlikely((read_offset > buffer_size) ||
+                       (write_offset > buffer_size))) {
+               xe_gt_err(guc_to_gt(guc),
+                         "Register capture buffer in invalid state: read = 0x%X, size = 0x%X!\n",
+                         read_offset, buffer_size);
+               /* copy whole buffer as offsets are unreliable */
+               read_offset = 0;
+               write_offset = buffer_size;
+       }
+
+       buf.size = buffer_size;
+       buf.rd = read_offset;
+       buf.wr = write_offset;
+       buf.data_offset = src_data_offset;
+
+       if (!xe_guc_read_stopped(guc)) {
+               do {
+                       ret = guc_capture_extract_reglists(guc, &buf);
+                       if (ret && ret != -ENODATA)
+                               xe_gt_dbg(guc_to_gt(guc), "Capture extraction failed:%d\n", ret);
+               } while (ret >= 0);
+       }
+
+       /* Update the state of log buffer err-cap state */
+       xe_map_wr(guc_to_xe(guc), &guc->log.bo->vmap,
+                 log_buf_state_offset + offsetof(struct guc_log_buffer_state, read_ptr), u32,
+                 write_offset);
+
+       /*
+        * Clear the flush_to_file from local first, the local was loaded by above
+        * xe_map_memcpy_from, then write out the "updated local" through
+        * xe_map_wr()
+        */
+       log_buf_state_local.flags &= ~GUC_LOG_BUFFER_STATE_FLUSH_TO_FILE;
+       xe_map_wr(guc_to_xe(guc), &guc->log.bo->vmap,
+                 log_buf_state_offset + offsetof(struct guc_log_buffer_state, flags), u32,
+                 log_buf_state_local.flags);
+       __guc_capture_flushlog_complete(guc);
+}
+
+/*
+ * xe_guc_capture_process - Process GuC register captured data
+ * @guc: The GuC object
+ *
+ * When GuC captured data is ready, GuC will send message
+ * XE_GUC_ACTION_STATE_CAPTURE_NOTIFICATION to host, this function will be
+ * called to process the data comes with the message.
+ *
+ * Returns: None
+ */
+void xe_guc_capture_process(struct xe_guc *guc)
+{
+       if (guc->capture)
+               __guc_capture_process_output(guc);
+}
+
+static struct __guc_capture_parsed_output *
+guc_capture_alloc_one_node(struct xe_guc *guc)
+{
+       struct drm_device *drm = guc_to_drm(guc);
+       struct __guc_capture_parsed_output *new;
+       int i;
+
+       new = drmm_kzalloc(drm, sizeof(*new), GFP_KERNEL);
+       if (!new)
+               return NULL;
+
+       for (i = 0; i < GUC_STATE_CAPTURE_TYPE_MAX; ++i) {
+               new->reginfo[i].regs = drmm_kzalloc(drm, guc->capture->max_mmio_per_node *
+                                                   sizeof(struct guc_mmio_reg), GFP_KERNEL);
+               if (!new->reginfo[i].regs) {
+                       while (i)
+                               drmm_kfree(drm, new->reginfo[--i].regs);
+                       drmm_kfree(drm, new);
+                       return NULL;
+               }
+       }
+       guc_capture_init_node(guc, new);
+
+       return new;
+}
+
+static void
+__guc_capture_create_prealloc_nodes(struct xe_guc *guc)
+{
+       struct __guc_capture_parsed_output *node = NULL;
+       int i;
+
+       for (i = 0; i < PREALLOC_NODES_MAX_COUNT; ++i) {
+               node = guc_capture_alloc_one_node(guc);
+               if (!node) {
+                       xe_gt_warn(guc_to_gt(guc), "Register capture pre-alloc-cache failure\n");
+                       /* dont free the priors, use what we got and cleanup at shutdown */
+                       return;
+               }
+               guc_capture_add_node_to_cachelist(guc->capture, node);
+       }
+}
+
+static int
+guc_get_max_reglist_count(struct xe_guc *guc)
+{
+       int i, j, k, tmp, maxregcount = 0;
+
+       for (i = 0; i < GUC_CAPTURE_LIST_INDEX_MAX; ++i) {
+               for (j = 0; j < GUC_STATE_CAPTURE_TYPE_MAX; ++j) {
+                       for (k = 0; k < GUC_CAPTURE_LIST_CLASS_MAX; ++k) {
+                               const struct __guc_mmio_reg_descr_group *match;
+
+                               if (j == GUC_STATE_CAPTURE_TYPE_GLOBAL && k > 0)
+                                       continue;
+
+                               tmp = 0;
+                               match = guc_capture_get_one_list(guc->capture->reglists, i, j, k);
+                               if (match)
+                                       tmp = match->num_regs;
+
+                               match = guc_capture_get_one_list(guc->capture->extlists, i, j, k);
+                               if (match)
+                                       tmp += match->num_regs;
+
+                               if (tmp > maxregcount)
+                                       maxregcount = tmp;
+                       }
+               }
+       }
+       if (!maxregcount)
+               maxregcount = PREALLOC_NODES_DEFAULT_NUMREGS;
+
+       return maxregcount;
+}
+
+static void
+guc_capture_create_prealloc_nodes(struct xe_guc *guc)
+{
+       /* skip if we've already done the pre-alloc */
+       if (guc->capture->max_mmio_per_node)
+               return;
+
+       guc->capture->max_mmio_per_node = guc_get_max_reglist_count(guc);
+       __guc_capture_create_prealloc_nodes(guc);
+}
+
 /*
  * xe_guc_capture_steered_list_init - Init steering register list
  * @guc: The GuC object
  *
- * Init steering register list for GuC register capture
+ * Init steering register list for GuC register capture, create pre-alloc node
  */
 void xe_guc_capture_steered_list_init(struct xe_guc *guc)
 {
@@ -765,6 +1479,7 @@ void xe_guc_capture_steered_list_init(struct xe_guc *guc)
         */
        guc_capture_alloc_steered_lists(guc);
        check_guc_capture_size(guc);
+       guc_capture_create_prealloc_nodes(guc);
 }
 
 /*
@@ -783,5 +1498,9 @@ int xe_guc_capture_init(struct xe_guc *guc)
                return -ENOMEM;
 
        guc->capture->reglists = guc_capture_get_device_reglist(guc_to_xe(guc));
+
+       INIT_LIST_HEAD(&guc->capture->outlist);
+       INIT_LIST_HEAD(&guc->capture->cachelist);
+
        return 0;
 }
index 25d263e2ab1d4d6468b66b538893d8ead0d9e34f..4acf44472a63c5715039da083b50cbeee26ea669 100644 (file)
@@ -37,6 +37,7 @@ xe_engine_class_to_guc_capture_class(enum xe_engine_class class)
        return xe_guc_class_to_capture_class(xe_engine_class_to_guc_class(class));
 }
 
+void xe_guc_capture_process(struct xe_guc *guc);
 int xe_guc_capture_getlist(struct xe_guc *guc, u32 owner, u32 type,
                           enum guc_capture_list_class_type capture_class, void **outptr);
 int xe_guc_capture_getlistsize(struct xe_guc *guc, u32 owner, u32 type,
index b7f80449bd83877e9e2caba72b127813a67d4274..c7673f56d4133241abaf8adc4e33ee50cf31006e 100644 (file)
@@ -1254,6 +1254,8 @@ static int process_g2h_msg(struct xe_guc_ct *ct, u32 *msg, u32 len)
                /* Selftest only at the moment */
                break;
        case XE_GUC_ACTION_STATE_CAPTURE_NOTIFICATION:
+               ret = xe_guc_error_capture_handler(guc, payload, adj_len);
+               break;
        case XE_GUC_ACTION_NOTIFY_FLUSH_LOG_BUFFER_TO_FILE:
                /* FIXME: Handle this */
                break;
index 5bab5a3948cc393b03bb415e0b28b5173bec45a4..93921f04153f8a21c1856b9d8cd2ad172d17f32e 100644 (file)
@@ -337,3 +337,38 @@ u32 xe_guc_get_log_buffer_offset(struct xe_guc_log *log, enum guc_log_buffer_typ
 
        return offset;
 }
+
+/**
+ * xe_guc_check_log_buf_overflow - Check if log buffer overflowed
+ * @log: The log object.
+ * @type: The log buffer type
+ * @full_cnt: The count of buffer full
+ *
+ * This function will check count of buffer full against previous, mismatch
+ * indicate overflowed.
+ * Update the sampled_overflow counter, if the 4 bit counter overflowed, add
+ * up 16 to correct the value.
+ *
+ * Return: True if overflowed.
+ */
+bool xe_guc_check_log_buf_overflow(struct xe_guc_log *log, enum guc_log_buffer_type type,
+                                  unsigned int full_cnt)
+{
+       unsigned int prev_full_cnt = log->stats[type].sampled_overflow;
+       bool overflow = false;
+
+       if (full_cnt != prev_full_cnt) {
+               overflow = true;
+
+               log->stats[type].overflow = full_cnt;
+               log->stats[type].sampled_overflow += full_cnt - prev_full_cnt;
+
+               if (full_cnt < prev_full_cnt) {
+                       /* buffer_full_cnt is a 4 bit counter */
+                       log->stats[type].sampled_overflow += 16;
+               }
+               xe_gt_notice(log_to_gt(log), "log buffer overflow\n");
+       }
+
+       return overflow;
+}
index 86501643431b93f899a58840324fe0a6a820703e..5b896f5fafafa5c54bfc7913b97cb75d6906a059 100644 (file)
@@ -54,5 +54,8 @@ xe_guc_log_get_level(struct xe_guc_log *log)
 u32 xe_guc_log_section_size_capture(struct xe_guc_log *log);
 u32 xe_guc_get_log_buffer_size(struct xe_guc_log *log, enum guc_log_buffer_type type);
 u32 xe_guc_get_log_buffer_offset(struct xe_guc_log *log, enum guc_log_buffer_type type);
+bool xe_guc_check_log_buf_overflow(struct xe_guc_log *log,
+                                  enum guc_log_buffer_type type,
+                                  unsigned int full_cnt);
 
 #endif
index 962b9edbd9eb28e3152250e57104416c804776f4..4d57f8322efc7ee84d900846811fb23b5e38a4a0 100644 (file)
@@ -7,6 +7,7 @@
 #define _XE_GUC_LOG_TYPES_H_
 
 #include <linux/types.h>
+#include "abi/guc_log_abi.h"
 
 #include "xe_uc_fw_types.h"
 
@@ -45,6 +46,12 @@ struct xe_guc_log {
        u32 level;
        /** @bo: XE BO for GuC log */
        struct xe_bo *bo;
+       /** @stats: logging related stats */
+       struct {
+               u32 sampled_overflow;
+               u32 overflow;
+               u32 flush;
+       } stats[GUC_LOG_BUFFER_TYPE_MAX];
 };
 
 #endif
index fc4008018d1fb1dd38b5903c3df5e847d4c70d67..4b6e953fb210a7527c31b42241bee1f020975ef6 100644 (file)
@@ -27,6 +27,7 @@
 #include "xe_gt_clock.h"
 #include "xe_gt_printk.h"
 #include "xe_guc.h"
+#include "xe_guc_capture.h"
 #include "xe_guc_ct.h"
 #include "xe_guc_exec_queue_types.h"
 #include "xe_guc_id_mgr.h"
@@ -824,7 +825,7 @@ static void guc_exec_queue_free_job(struct drm_sched_job *drm_job)
        xe_sched_job_put(job);
 }
 
-static int guc_read_stopped(struct xe_guc *guc)
+int xe_guc_read_stopped(struct xe_guc *guc)
 {
        return atomic_read(&guc->submission_state.stopped);
 }
@@ -846,7 +847,7 @@ static void disable_scheduling_deregister(struct xe_guc *guc,
        set_min_preemption_timeout(guc, q);
        smp_rmb();
        ret = wait_event_timeout(guc->ct.wq, !exec_queue_pending_enable(q) ||
-                                guc_read_stopped(guc), HZ * 5);
+                                xe_guc_read_stopped(guc), HZ * 5);
        if (!ret) {
                struct xe_gpu_scheduler *sched = &q->guc->sched;
 
@@ -972,7 +973,7 @@ static void xe_guc_exec_queue_lr_cleanup(struct work_struct *w)
                 */
                ret = wait_event_timeout(guc->ct.wq,
                                         !exec_queue_pending_disable(q) ||
-                                        guc_read_stopped(guc), HZ * 5);
+                                        xe_guc_read_stopped(guc), HZ * 5);
                if (!ret) {
                        drm_warn(&xe->drm, "Schedule disable failed to respond");
                        xe_sched_submission_start(sched);
@@ -1040,8 +1041,8 @@ static void enable_scheduling(struct xe_exec_queue *q)
 
        ret = wait_event_timeout(guc->ct.wq,
                                 !exec_queue_pending_enable(q) ||
-                                guc_read_stopped(guc), HZ * 5);
-       if (!ret || guc_read_stopped(guc)) {
+                                xe_guc_read_stopped(guc), HZ * 5);
+       if (!ret || xe_guc_read_stopped(guc)) {
                xe_gt_warn(guc_to_gt(guc), "Schedule enable failed to respond");
                set_exec_queue_banned(q);
                xe_gt_reset_async(q->gt);
@@ -1146,8 +1147,8 @@ guc_exec_queue_timedout_job(struct drm_sched_job *drm_job)
                         */
                        ret = wait_event_timeout(guc->ct.wq,
                                                 !exec_queue_pending_enable(q) ||
-                                                guc_read_stopped(guc), HZ * 5);
-                       if (!ret || guc_read_stopped(guc))
+                                                xe_guc_read_stopped(guc), HZ * 5);
+                       if (!ret || xe_guc_read_stopped(guc))
                                goto trigger_reset;
 
                        /*
@@ -1171,8 +1172,8 @@ guc_exec_queue_timedout_job(struct drm_sched_job *drm_job)
                smp_rmb();
                ret = wait_event_timeout(guc->ct.wq,
                                         !exec_queue_pending_disable(q) ||
-                                        guc_read_stopped(guc), HZ * 5);
-               if (!ret || guc_read_stopped(guc)) {
+                                        xe_guc_read_stopped(guc), HZ * 5);
+               if (!ret || xe_guc_read_stopped(guc)) {
 trigger_reset:
                        if (!ret)
                                xe_gt_warn(guc_to_gt(guc), "Schedule disable failed to respond");
@@ -1361,7 +1362,7 @@ static void suspend_fence_signal(struct xe_exec_queue *q)
        struct xe_device *xe = guc_to_xe(guc);
 
        xe_assert(xe, exec_queue_suspended(q) || exec_queue_killed(q) ||
-                 guc_read_stopped(guc));
+                 xe_guc_read_stopped(guc));
        xe_assert(xe, q->guc->suspend_pending);
 
        __suspend_fence_signal(q);
@@ -1375,9 +1376,9 @@ static void __guc_exec_queue_process_msg_suspend(struct xe_sched_msg *msg)
        if (guc_exec_queue_allowed_to_change_state(q) && !exec_queue_suspended(q) &&
            exec_queue_enabled(q)) {
                wait_event(guc->ct.wq, q->guc->resume_time != RESUME_PENDING ||
-                          guc_read_stopped(guc));
+                          xe_guc_read_stopped(guc));
 
-               if (!guc_read_stopped(guc)) {
+               if (!xe_guc_read_stopped(guc)) {
                        s64 since_resume_ms =
                                ktime_ms_delta(ktime_get(),
                                               q->guc->resume_time);
@@ -1502,7 +1503,7 @@ static int guc_exec_queue_init(struct xe_exec_queue *q)
 
        q->entity = &ge->entity;
 
-       if (guc_read_stopped(guc))
+       if (xe_guc_read_stopped(guc))
                xe_sched_stop(sched);
 
        mutex_unlock(&guc->submission_state.lock);
@@ -1658,7 +1659,7 @@ static int guc_exec_queue_suspend_wait(struct xe_exec_queue *q)
        ret = wait_event_interruptible_timeout(q->guc->suspend_wait,
                                               !READ_ONCE(q->guc->suspend_pending) ||
                                               exec_queue_killed(q) ||
-                                              guc_read_stopped(guc),
+                                              xe_guc_read_stopped(guc),
                                               HZ * 5);
 
        if (!ret) {
@@ -1784,7 +1785,7 @@ int xe_guc_submit_reset_prepare(struct xe_guc *guc)
 void xe_guc_submit_reset_wait(struct xe_guc *guc)
 {
        wait_event(guc->ct.wq, xe_device_wedged(guc_to_xe(guc)) ||
-                  !guc_read_stopped(guc));
+                  !xe_guc_read_stopped(guc));
 }
 
 void xe_guc_submit_stop(struct xe_guc *guc)
@@ -1793,7 +1794,7 @@ void xe_guc_submit_stop(struct xe_guc *guc)
        unsigned long index;
        struct xe_device *xe = guc_to_xe(guc);
 
-       xe_assert(xe, guc_read_stopped(guc) == 1);
+       xe_assert(xe, xe_guc_read_stopped(guc) == 1);
 
        mutex_lock(&guc->submission_state.lock);
 
@@ -1832,7 +1833,7 @@ int xe_guc_submit_start(struct xe_guc *guc)
        unsigned long index;
        struct xe_device *xe = guc_to_xe(guc);
 
-       xe_assert(xe, guc_read_stopped(guc) == 1);
+       xe_assert(xe, xe_guc_read_stopped(guc) == 1);
 
        mutex_lock(&guc->submission_state.lock);
        atomic_dec(&guc->submission_state.stopped);
@@ -2023,6 +2024,36 @@ int xe_guc_exec_queue_reset_handler(struct xe_guc *guc, u32 *msg, u32 len)
        return 0;
 }
 
+/*
+ * xe_guc_error_capture_handler - Handler of GuC captured message
+ * @guc: The GuC object
+ * @msg: Point to the message
+ * @len: The message length
+ *
+ * When GuC captured data is ready, GuC will send message
+ * XE_GUC_ACTION_STATE_CAPTURE_NOTIFICATION to host, this function will be
+ * called 1st to check status before process the data comes with the message.
+ *
+ * Returns: None
+ */
+int xe_guc_error_capture_handler(struct xe_guc *guc, u32 *msg, u32 len)
+{
+       u32 status;
+
+       if (unlikely(len != XE_GUC_ACTION_STATE_CAPTURE_NOTIFICATION_DATA_LEN)) {
+               xe_gt_dbg(guc_to_gt(guc), "Invalid length %u", len);
+               return -EPROTO;
+       }
+
+       status = msg[0] & XE_GUC_STATE_CAPTURE_EVENT_STATUS_MASK;
+       if (status == XE_GUC_STATE_CAPTURE_EVENT_STATUS_NOSPACE)
+               xe_gt_warn(guc_to_gt(guc), "G2H-Error capture no space");
+
+       xe_guc_capture_process(guc);
+
+       return 0;
+}
+
 int xe_guc_exec_queue_memory_cat_error_handler(struct xe_guc *guc, u32 *msg,
                                               u32 len)
 {
index bdf8c9f3d24a2292c88872b382eec4e0e148b521..9b71a986c6ca69ebe1cf81df6ffbbfb2e2c82e6e 100644 (file)
@@ -20,12 +20,14 @@ void xe_guc_submit_stop(struct xe_guc *guc);
 int xe_guc_submit_start(struct xe_guc *guc);
 void xe_guc_submit_wedge(struct xe_guc *guc);
 
+int xe_guc_read_stopped(struct xe_guc *guc);
 int xe_guc_sched_done_handler(struct xe_guc *guc, u32 *msg, u32 len);
 int xe_guc_deregister_done_handler(struct xe_guc *guc, u32 *msg, u32 len);
 int xe_guc_exec_queue_reset_handler(struct xe_guc *guc, u32 *msg, u32 len);
 int xe_guc_exec_queue_memory_cat_error_handler(struct xe_guc *guc, u32 *msg,
                                               u32 len);
 int xe_guc_exec_queue_reset_failure_handler(struct xe_guc *guc, u32 *msg, u32 len);
+int xe_guc_error_capture_handler(struct xe_guc *guc, u32 *msg, u32 len);
 
 struct xe_guc_submit_exec_queue_snapshot *
 xe_guc_exec_queue_snapshot_capture(struct xe_exec_queue *q);