]> www.infradead.org Git - users/jedix/linux-maple.git/commitdiff
drm/xe/pf: Track adverse events notifications from GuC
authorMichal Wajdeczko <michal.wajdeczko@intel.com>
Tue, 14 May 2024 19:00:14 +0000 (21:00 +0200)
committerMichal Wajdeczko <michal.wajdeczko@intel.com>
Thu, 16 May 2024 16:04:51 +0000 (18:04 +0200)
When thresholds used to monitor VFs activities are configured,
then GuC may send GUC2PF_ADVERSE_EVENT messages informing the
PF driver about exceeded thresholds. Start handling such messages.

Reviewed-by: Piotr Piórkowski <piotr.piorkowski@intel.com>
Signed-off-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240514190015.2172-8-michal.wajdeczko@intel.com
drivers/gpu/drm/xe/Makefile
drivers/gpu/drm/xe/xe_gt_sriov_pf_monitor.c [new file with mode: 0644]
drivers/gpu/drm/xe/xe_gt_sriov_pf_monitor.h [new file with mode: 0644]
drivers/gpu/drm/xe/xe_gt_sriov_pf_monitor_types.h [new file with mode: 0644]
drivers/gpu/drm/xe/xe_gt_sriov_pf_types.h
drivers/gpu/drm/xe/xe_guc_ct.c

index 6acde66f0827ce21a7d282877040dc5edcbc847b..8fe7bb80501f0f8ca156e9ef587416942591ea06 100644 (file)
@@ -164,6 +164,7 @@ xe-$(CONFIG_PCI_IOV) += \
        xe_gt_sriov_pf_config.o \
        xe_gt_sriov_pf_control.o \
        xe_gt_sriov_pf_debugfs.o \
+       xe_gt_sriov_pf_monitor.o \
        xe_gt_sriov_pf_policy.o \
        xe_gt_sriov_pf_service.o \
        xe_lmtt.o \
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_monitor.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_monitor.c
new file mode 100644 (file)
index 0000000..7d532bd
--- /dev/null
@@ -0,0 +1,147 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2023-2024 Intel Corporation
+ */
+
+#include "abi/guc_actions_sriov_abi.h"
+#include "abi/guc_messages_abi.h"
+
+#include "xe_gt_sriov_pf_config.h"
+#include "xe_gt_sriov_pf_helpers.h"
+#include "xe_gt_sriov_pf_monitor.h"
+#include "xe_gt_sriov_printk.h"
+#include "xe_guc_klv_helpers.h"
+#include "xe_guc_klv_thresholds_set.h"
+
+/**
+ * xe_gt_sriov_pf_monitor_flr - Cleanup VF data after VF FLR.
+ * @gt: the &xe_gt
+ * @vfid: the VF identifier
+ *
+ * On FLR this function will reset all event data related to the VF.
+ * This function is for PF only.
+ */
+void xe_gt_sriov_pf_monitor_flr(struct xe_gt *gt, u32 vfid)
+{
+       int e;
+
+       xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt)));
+       xe_gt_sriov_pf_assert_vfid(gt, vfid);
+
+       for (e = 0; e < XE_GUC_KLV_NUM_THRESHOLDS; e++)
+               gt->sriov.pf.vfs[vfid].monitor.guc.events[e] = 0;
+}
+
+static void pf_update_event_counter(struct xe_gt *gt, u32 vfid,
+                                   enum xe_guc_klv_threshold_index e)
+{
+       xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt)));
+       xe_gt_assert(gt, e < XE_GUC_KLV_NUM_THRESHOLDS);
+
+       gt->sriov.pf.vfs[vfid].monitor.guc.events[e]++;
+}
+
+static int pf_handle_vf_threshold_event(struct xe_gt *gt, u32 vfid, u32 threshold)
+{
+       char origin[8];
+       int e;
+
+       e = xe_guc_klv_threshold_key_to_index(threshold);
+       xe_sriov_function_name(vfid, origin, sizeof(origin));
+
+       /* was there a new KEY added that we missed? */
+       if (unlikely(e < 0)) {
+               xe_gt_sriov_notice(gt, "unknown threshold key %#x reported for %s\n",
+                                  threshold, origin);
+               return -ENOTCONN;
+       }
+
+       xe_gt_sriov_dbg(gt, "%s exceeded threshold %u %s\n",
+                       origin, xe_gt_sriov_pf_config_get_threshold(gt, vfid, e),
+                       xe_guc_klv_key_to_string(threshold));
+
+       pf_update_event_counter(gt, vfid, e);
+
+       return 0;
+}
+
+/**
+ * xe_gt_sriov_pf_monitor_process_guc2pf - Handle adverse event notification from the GuC.
+ * @gt: the &xe_gt
+ * @msg: G2H event message
+ * @len: length of the message
+ *
+ * This function is intended for PF only.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_gt_sriov_pf_monitor_process_guc2pf(struct xe_gt *gt, const u32 *msg, u32 len)
+{
+       struct xe_device *xe = gt_to_xe(gt);
+       u32 vfid;
+       u32 threshold;
+
+       xe_gt_assert(gt, len >= GUC_HXG_MSG_MIN_LEN);
+       xe_gt_assert(gt, FIELD_GET(GUC_HXG_MSG_0_ORIGIN, msg[0]) == GUC_HXG_ORIGIN_GUC);
+       xe_gt_assert(gt, FIELD_GET(GUC_HXG_MSG_0_TYPE, msg[0]) == GUC_HXG_TYPE_EVENT);
+       xe_gt_assert(gt, FIELD_GET(GUC_HXG_EVENT_MSG_0_ACTION, msg[0]) ==
+                    GUC_ACTION_GUC2PF_ADVERSE_EVENT);
+
+       if (unlikely(!IS_SRIOV_PF(xe)))
+               return -EPROTO;
+
+       if (unlikely(FIELD_GET(GUC2PF_ADVERSE_EVENT_EVENT_MSG_0_MBZ, msg[0])))
+               return -EPFNOSUPPORT;
+
+       if (unlikely(len < GUC2PF_ADVERSE_EVENT_EVENT_MSG_LEN))
+               return -EPROTO;
+
+       vfid = FIELD_GET(GUC2PF_ADVERSE_EVENT_EVENT_MSG_1_VFID, msg[1]);
+       threshold = FIELD_GET(GUC2PF_ADVERSE_EVENT_EVENT_MSG_2_THRESHOLD, msg[2]);
+
+       if (unlikely(vfid > xe_gt_sriov_pf_get_totalvfs(gt)))
+               return -EINVAL;
+
+       return pf_handle_vf_threshold_event(gt, vfid, threshold);
+}
+
+/**
+ * xe_gt_sriov_pf_monitor_print_events - Print adverse events counters.
+ * @gt: the &xe_gt to print events from
+ * @p: the &drm_printer
+ *
+ * Print adverse events counters for all VFs.
+ * VFs with no events are not printed.
+ *
+ * This function can only be called on PF.
+ */
+void xe_gt_sriov_pf_monitor_print_events(struct xe_gt *gt, struct drm_printer *p)
+{
+       unsigned int n, total_vfs = xe_gt_sriov_pf_get_totalvfs(gt);
+       const struct xe_gt_sriov_monitor *data;
+       int e;
+
+       xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt)));
+
+       for (n = 1; n <= total_vfs; n++) {
+               data = &gt->sriov.pf.vfs[n].monitor;
+
+               for (e = 0; e < XE_GUC_KLV_NUM_THRESHOLDS; e++)
+                       if (data->guc.events[e])
+                               break;
+
+               /* skip empty unless in debug mode */
+               if (e >= XE_GUC_KLV_NUM_THRESHOLDS &&
+                   !IS_ENABLED(CONFIG_DRM_XE_DEBUG_SRIOV))
+                       continue;
+
+#define __format(...) "%s:%u "
+#define __value(TAG, NAME, ...) , #NAME, data->guc.events[MAKE_XE_GUC_KLV_THRESHOLD_INDEX(TAG)]
+
+               drm_printf(p, "VF%u:\t" MAKE_XE_GUC_KLV_THRESHOLDS_SET(__format) "\n",
+                          n MAKE_XE_GUC_KLV_THRESHOLDS_SET(__value));
+
+#undef __format
+#undef __value
+       }
+}
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_monitor.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf_monitor.h
new file mode 100644 (file)
index 0000000..7ca9351
--- /dev/null
@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023-2024 Intel Corporation
+ */
+
+#ifndef _XE_GT_SRIOV_PF_MONITOR_H_
+#define _XE_GT_SRIOV_PF_MONITOR_H_
+
+#include <linux/errno.h>
+#include <linux/types.h>
+
+struct drm_printer;
+struct xe_gt;
+
+void xe_gt_sriov_pf_monitor_flr(struct xe_gt *gt, u32 vfid);
+void xe_gt_sriov_pf_monitor_print_events(struct xe_gt *gt, struct drm_printer *p);
+
+#ifdef CONFIG_PCI_IOV
+int xe_gt_sriov_pf_monitor_process_guc2pf(struct xe_gt *gt, const u32 *msg, u32 len);
+#else
+static inline int xe_gt_sriov_pf_monitor_process_guc2pf(struct xe_gt *gt, const u32 *msg, u32 len)
+{
+       return -EPROTO;
+}
+#endif
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_monitor_types.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf_monitor_types.h
new file mode 100644 (file)
index 0000000..e27c030
--- /dev/null
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023-2024 Intel Corporation
+ */
+
+#ifndef _XE_GT_SRIOV_PF_MONITOR_TYPES_H_
+#define _XE_GT_SRIOV_PF_MONITOR_TYPES_H_
+
+#include "xe_guc_klv_thresholds_set_types.h"
+
+/**
+ * struct xe_gt_sriov_monitor - GT level per-VF monitoring data.
+ */
+struct xe_gt_sriov_monitor {
+       /** @guc: monitoring data related to the GuC. */
+       struct {
+               /** @guc.events: number of adverse events reported by the GuC. */
+               unsigned int events[XE_GUC_KLV_NUM_THRESHOLDS];
+       } guc;
+};
+
+#endif
index 880754f3e215d1b34fdbdbf5bab71a7c922d2ae5..40cbaea3ef44ece73f7efe34f21b9b68e43ebd01 100644 (file)
@@ -9,6 +9,7 @@
 #include <linux/types.h>
 
 #include "xe_gt_sriov_pf_config_types.h"
+#include "xe_gt_sriov_pf_monitor_types.h"
 #include "xe_gt_sriov_pf_policy_types.h"
 #include "xe_gt_sriov_pf_service_types.h"
 
 struct xe_gt_sriov_metadata {
        /** @config: per-VF provisioning data. */
        struct xe_gt_sriov_config config;
+
+       /** @monitor: per-VF monitoring data. */
+       struct xe_gt_sriov_monitor monitor;
+
        /** @version: negotiated VF/PF ABI version */
        struct xe_gt_sriov_pf_service_version version;
 };
index 0151d29b3c580e9441e3d90a68f0879b6a4823f9..c1f258348f5c40bcbb15ba37c007d34b20d67723 100644 (file)
@@ -22,6 +22,7 @@
 #include "xe_gt_pagefault.h"
 #include "xe_gt_printk.h"
 #include "xe_gt_sriov_pf_control.h"
+#include "xe_gt_sriov_pf_monitor.h"
 #include "xe_gt_tlb_invalidation.h"
 #include "xe_guc.h"
 #include "xe_guc_relay.h"
@@ -1071,6 +1072,9 @@ static int process_g2h_msg(struct xe_guc_ct *ct, u32 *msg, u32 len)
        case GUC_ACTION_GUC2PF_VF_STATE_NOTIFY:
                ret = xe_gt_sriov_pf_control_process_guc2pf(gt, hxg, hxg_len);
                break;
+       case GUC_ACTION_GUC2PF_ADVERSE_EVENT:
+               ret = xe_gt_sriov_pf_monitor_process_guc2pf(gt, hxg, hxg_len);
+               break;
        default:
                xe_gt_err(gt, "unexpected G2H action 0x%04x\n", action);
        }