]> www.infradead.org Git - users/hch/misc.git/commitdiff
drm/xe/xe_survivability: Refactor survivability mode
authorRiana Tauro <riana.tauro@intel.com>
Tue, 26 Aug 2025 06:34:12 +0000 (12:04 +0530)
committerRodrigo Vivi <rodrigo.vivi@intel.com>
Tue, 26 Aug 2025 14:11:34 +0000 (10:11 -0400)
Refactor survivability mode code to support both boot
and runtime survivability.

Signed-off-by: Riana Tauro <riana.tauro@intel.com>
Reviewed-by: Raag Jadav <raag.jadav@intel.com>
Link: https://lore.kernel.org/r/20250826063419.3022216-6-riana.tauro@intel.com
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
drivers/gpu/drm/xe/xe_device.c
drivers/gpu/drm/xe/xe_heci_gsc.c
drivers/gpu/drm/xe/xe_pci.c
drivers/gpu/drm/xe/xe_survivability_mode.c
drivers/gpu/drm/xe/xe_survivability_mode.h
drivers/gpu/drm/xe/xe_survivability_mode_types.h

index ab9f3a57dcd9a7e6cf4f7c267e2d2275b8a6b3f8..d6f10ee6730d046e60d9c86957fa61f956080c44 100644 (file)
@@ -743,7 +743,7 @@ int xe_device_probe_early(struct xe_device *xe)
                 * possible, but still return the previous error for error
                 * propagation
                 */
-               err = xe_survivability_mode_enable(xe);
+               err = xe_survivability_mode_boot_enable(xe);
                if (err)
                        return err;
 
index 6d7b627241268be7aab8d5c42810b7315be75fa0..a415ca4887914008a129c80fdcc3748d7d7aa434 100644 (file)
@@ -197,7 +197,7 @@ int xe_heci_gsc_init(struct xe_device *xe)
        if (ret)
                return ret;
 
-       if (!def->use_polling && !xe_survivability_mode_is_enabled(xe)) {
+       if (!def->use_polling && !xe_survivability_mode_is_boot_enabled(xe)) {
                ret = heci_gsc_irq_setup(xe);
                if (ret)
                        return ret;
index a8bab19111c38843bcbb4c7f875b3254960de38c..15a863491cddbb22f21d4acc86b3d622fee5289c 100644 (file)
@@ -784,7 +784,7 @@ static void xe_pci_remove(struct pci_dev *pdev)
        if (IS_SRIOV_PF(xe))
                xe_pci_sriov_configure(pdev, 0);
 
-       if (xe_survivability_mode_is_enabled(xe))
+       if (xe_survivability_mode_is_boot_enabled(xe))
                return;
 
        xe_device_remove(xe);
@@ -866,7 +866,7 @@ static int xe_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
         * flashed through mei. Return success, if survivability mode
         * is enabled due to pcode failure or configfs being set
         */
-       if (xe_survivability_mode_is_enabled(xe))
+       if (xe_survivability_mode_is_boot_enabled(xe))
                return 0;
 
        if (err)
@@ -960,7 +960,7 @@ static int xe_pci_suspend(struct device *dev)
        struct xe_device *xe = pdev_to_xe_device(pdev);
        int err;
 
-       if (xe_survivability_mode_is_enabled(xe))
+       if (xe_survivability_mode_is_boot_enabled(xe))
                return -EBUSY;
 
        err = xe_pm_suspend(xe);
index 41705f5d52e3a3ab697396decd68830e2f38e8f0..4a2d1cff65d2b9651de66e73a33ff2e934bf9652 100644 (file)
@@ -121,6 +121,14 @@ static void log_survivability_info(struct pci_dev *pdev)
        }
 }
 
+static int check_boot_failure(struct xe_device *xe)
+{
+       struct xe_survivability *survivability = &xe->survivability;
+
+       return survivability->boot_status == NON_CRITICAL_FAILURE ||
+               survivability->boot_status == CRITICAL_FAILURE;
+}
+
 static ssize_t survivability_mode_show(struct device *dev,
                                       struct device_attribute *attr, char *buff)
 {
@@ -130,6 +138,11 @@ static ssize_t survivability_mode_show(struct device *dev,
        struct xe_survivability_info *info = survivability->info;
        int index = 0, count = 0;
 
+       count += sysfs_emit_at(buff, count, "Survivability mode type: Boot\n");
+
+       if (!check_boot_failure(xe))
+               return count;
+
        for (index = 0; index < MAX_SCRATCH_MMIO; index++) {
                if (info[index].reg)
                        count += sysfs_emit_at(buff, count, "%s: 0x%x - 0x%x\n", info[index].name,
@@ -151,12 +164,11 @@ static void xe_survivability_mode_fini(void *arg)
        sysfs_remove_file(&dev->kobj, &dev_attr_survivability_mode.attr);
 }
 
-static int enable_survivability_mode(struct pci_dev *pdev)
+static int create_survivability_sysfs(struct pci_dev *pdev)
 {
        struct device *dev = &pdev->dev;
        struct xe_device *xe = pdev_to_xe_device(pdev);
-       struct xe_survivability *survivability = &xe->survivability;
-       int ret = 0;
+       int ret;
 
        /* create survivability mode sysfs */
        ret = sysfs_create_file(&dev->kobj, &dev_attr_survivability_mode.attr);
@@ -170,6 +182,20 @@ static int enable_survivability_mode(struct pci_dev *pdev)
        if (ret)
                return ret;
 
+       return 0;
+}
+
+static int enable_boot_survivability_mode(struct pci_dev *pdev)
+{
+       struct device *dev = &pdev->dev;
+       struct xe_device *xe = pdev_to_xe_device(pdev);
+       struct xe_survivability *survivability = &xe->survivability;
+       int ret = 0;
+
+       ret = create_survivability_sysfs(pdev);
+       if (ret)
+               return ret;
+
        /* Make sure xe_heci_gsc_init() knows about survivability mode */
        survivability->mode = true;
 
@@ -192,15 +218,36 @@ err:
        return ret;
 }
 
+static int init_survivability_mode(struct xe_device *xe)
+{
+       struct xe_survivability *survivability = &xe->survivability;
+       struct xe_survivability_info *info;
+
+       survivability->size = MAX_SCRATCH_MMIO;
+
+       info = devm_kcalloc(xe->drm.dev, survivability->size, sizeof(*info),
+                           GFP_KERNEL);
+       if (!info)
+               return -ENOMEM;
+
+       survivability->info = info;
+
+       populate_survivability_info(xe);
+
+       return 0;
+}
+
 /**
- * xe_survivability_mode_is_enabled - check if survivability mode is enabled
+ * xe_survivability_mode_is_boot_enabled- check if boot survivability mode is enabled
  * @xe: xe device instance
  *
- * Returns true if in survivability mode, false otherwise
+ * Returns true if in boot survivability mode of type, else false
  */
-bool xe_survivability_mode_is_enabled(struct xe_device *xe)
+bool xe_survivability_mode_is_boot_enabled(struct xe_device *xe)
 {
-       return xe->survivability.mode;
+       struct xe_survivability *survivability = &xe->survivability;
+
+       return survivability->mode && survivability->type == XE_SURVIVABILITY_TYPE_BOOT;
 }
 
 /**
@@ -241,44 +288,38 @@ bool xe_survivability_mode_is_requested(struct xe_device *xe)
        data = xe_mmio_read32(mmio, PCODE_SCRATCH(0));
        survivability->boot_status = REG_FIELD_GET(BOOT_STATUS, data);
 
-       return survivability->boot_status == NON_CRITICAL_FAILURE ||
-               survivability->boot_status == CRITICAL_FAILURE;
+       return check_boot_failure(xe);
 }
 
 /**
- * xe_survivability_mode_enable - Initialize and enable the survivability mode
+ * xe_survivability_mode_boot_enable - Initialize and enable boot survivability mode
  * @xe: xe device instance
  *
- * Initialize survivability information and enable survivability mode
+ * Initialize survivability information and enable boot survivability mode
  *
- * Return: 0 if survivability mode is enabled or not requested; negative error
+ * Return: 0 if boot survivability mode is enabled or not requested, negative error
  * code otherwise.
  */
-int xe_survivability_mode_enable(struct xe_device *xe)
+int xe_survivability_mode_boot_enable(struct xe_device *xe)
 {
        struct xe_survivability *survivability = &xe->survivability;
-       struct xe_survivability_info *info;
        struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
+       int ret;
 
        if (!xe_survivability_mode_is_requested(xe))
                return 0;
 
-       survivability->size = MAX_SCRATCH_MMIO;
-
-       info = devm_kcalloc(xe->drm.dev, survivability->size, sizeof(*info),
-                           GFP_KERNEL);
-       if (!info)
-               return -ENOMEM;
-
-       survivability->info = info;
-
-       populate_survivability_info(xe);
+       ret = init_survivability_mode(xe);
+       if (ret)
+               return ret;
 
-       /* Only log debug information and exit if it is a critical failure */
+       /* Log breadcrumbs but do not enter survivability mode for Critical boot errors */
        if (survivability->boot_status == CRITICAL_FAILURE) {
                log_survivability_info(pdev);
                return -ENXIO;
        }
 
-       return enable_survivability_mode(pdev);
+       survivability->type = XE_SURVIVABILITY_TYPE_BOOT;
+
+       return enable_boot_survivability_mode(pdev);
 }
index 02231c2bf0083d0b9afb4e36e2d39a0842c7c445..f6ee283ea5e800f2c7fc7163d07dff19c59501eb 100644 (file)
@@ -10,8 +10,8 @@
 
 struct xe_device;
 
-int xe_survivability_mode_enable(struct xe_device *xe);
-bool xe_survivability_mode_is_enabled(struct xe_device *xe);
+int xe_survivability_mode_boot_enable(struct xe_device *xe);
+bool xe_survivability_mode_is_boot_enabled(struct xe_device *xe);
 bool xe_survivability_mode_is_requested(struct xe_device *xe);
 
 #endif /* _XE_SURVIVABILITY_MODE_H_ */
index 19d433e253dfe91625f27fdf8ab3eab1a6b613ed..5dce393498da5d3f680d84b93aaea356a7b046aa 100644 (file)
@@ -9,6 +9,10 @@
 #include <linux/limits.h>
 #include <linux/types.h>
 
+enum xe_survivability_type {
+       XE_SURVIVABILITY_TYPE_BOOT,
+};
+
 struct xe_survivability_info {
        char name[NAME_MAX];
        u32 reg;
@@ -30,6 +34,9 @@ struct xe_survivability {
 
        /** @mode: boolean to indicate survivability mode */
        bool mode;
+
+       /** @type: survivability type */
+       enum xe_survivability_type type;
 };
 
 #endif /* _XE_SURVIVABILITY_MODE_TYPES_H_ */