]> www.infradead.org Git - users/dwmw2/linux.git/commitdiff
s390/pci: Report PCI error recovery results via SCLP
authorNiklas Schnelle <schnelle@linux.ibm.com>
Fri, 13 Dec 2024 13:47:28 +0000 (14:47 +0100)
committerAlexander Gordeev <agordeev@linux.ibm.com>
Mon, 16 Dec 2024 15:14:26 +0000 (16:14 +0100)
Add a mechanism with which the status of PCI error recovery runs
is reported to the platform. Together with the status supply additional
information that may aid in problem determination.

Reviewed-by: Halil Pasic <pasic@linux.ibm.com>
Signed-off-by: Niklas Schnelle <schnelle@linux.ibm.com>
Signed-off-by: Alexander Gordeev <agordeev@linux.ibm.com>
arch/s390/include/asm/sclp.h
arch/s390/pci/Makefile
arch/s390/pci/pci_event.c
arch/s390/pci/pci_report.c [new file with mode: 0644]
arch/s390/pci/pci_report.h [new file with mode: 0644]
drivers/s390/char/sclp.h
drivers/s390/char/sclp_pci.c

index eb00fa1771da07a564dbc35bb9baba24c3ecdd95..3267631b5adc3252bb485fd5ebdc297e410095ae 100644 (file)
 /* 24 + 16 * SCLP_MAX_CORES */
 #define EXT_SCCB_READ_CPU      (3 * PAGE_SIZE)
 
+#define SCLP_ERRNOTIFY_AQ_RESET                        0
+#define SCLP_ERRNOTIFY_AQ_REPAIR               1
+#define SCLP_ERRNOTIFY_AQ_INFO_LOG             2
+#define SCLP_ERRNOTIFY_AQ_OPTICS_DATA          3
+
 #ifndef __ASSEMBLY__
 #include <linux/uio.h>
 #include <asm/chpid.h>
@@ -111,6 +116,34 @@ struct sclp_info {
 };
 extern struct sclp_info sclp;
 
+struct sccb_header {
+       u16     length;
+       u8      function_code;
+       u8      control_mask[3];
+       u16     response_code;
+} __packed;
+
+struct evbuf_header {
+       u16     length;
+       u8      type;
+       u8      flags;
+       u16     _reserved;
+} __packed;
+
+struct err_notify_evbuf {
+       struct evbuf_header header;
+       u8 action;
+       u8 atype;
+       u32 fh;
+       u32 fid;
+       u8 data[];
+} __packed;
+
+struct err_notify_sccb {
+       struct sccb_header header;
+       struct err_notify_evbuf evbuf;
+} __packed;
+
 struct zpci_report_error_header {
        u8 version;     /* Interface version byte */
        u8 action;      /* Action qualifier byte
index 2c21f0394c9abe96013ba90c4f263e3f7024f3d5..df73c5182990ad3ae4ed5a785953011feb9a093c 100644 (file)
@@ -5,6 +5,6 @@
 
 obj-$(CONFIG_PCI)      += pci.o pci_irq.o pci_clp.o \
                           pci_event.o pci_debug.o pci_insn.o pci_mmio.o \
-                          pci_bus.o pci_kvm_hook.o
+                          pci_bus.o pci_kvm_hook.o pci_report.o
 obj-$(CONFIG_PCI_IOV)  += pci_iov.o
 obj-$(CONFIG_SYSFS)    += pci_sysfs.o
index 7f7b732b3f3efaecf9fd17420f911414d9d0ef7b..7bd7721c1239a20e13cd3c618cce6679f36b0d06 100644 (file)
@@ -16,6 +16,7 @@
 #include <asm/sclp.h>
 
 #include "pci_bus.h"
+#include "pci_report.h"
 
 /* Content Code Description for PCI Function Error */
 struct zpci_ccdf_err {
@@ -169,6 +170,8 @@ static pci_ers_result_t zpci_event_do_reset(struct pci_dev *pdev,
 static pci_ers_result_t zpci_event_attempt_error_recovery(struct pci_dev *pdev)
 {
        pci_ers_result_t ers_res = PCI_ERS_RESULT_DISCONNECT;
+       struct zpci_dev *zdev = to_zpci(pdev);
+       char *status_str = "success";
        struct pci_driver *driver;
 
        /*
@@ -186,29 +189,37 @@ static pci_ers_result_t zpci_event_attempt_error_recovery(struct pci_dev *pdev)
        if (is_passed_through(pdev)) {
                pr_info("%s: Cannot be recovered in the host because it is a pass-through device\n",
                        pci_name(pdev));
+               status_str = "failed (pass-through)";
                goto out_unlock;
        }
 
        driver = to_pci_driver(pdev->dev.driver);
        if (!is_driver_supported(driver)) {
-               if (!driver)
+               if (!driver) {
                        pr_info("%s: Cannot be recovered because no driver is bound to the device\n",
                                pci_name(pdev));
-               else
+                       status_str = "failed (no driver)";
+               } else {
                        pr_info("%s: The %s driver bound to the device does not support error recovery\n",
                                pci_name(pdev),
                                driver->name);
+                       status_str = "failed (no driver support)";
+               }
                goto out_unlock;
        }
 
        ers_res = zpci_event_notify_error_detected(pdev, driver);
-       if (ers_result_indicates_abort(ers_res))
+       if (ers_result_indicates_abort(ers_res)) {
+               status_str = "failed (abort on detection)";
                goto out_unlock;
+       }
 
        if (ers_res == PCI_ERS_RESULT_CAN_RECOVER) {
                ers_res = zpci_event_do_error_state_clear(pdev, driver);
-               if (ers_result_indicates_abort(ers_res))
+               if (ers_result_indicates_abort(ers_res)) {
+                       status_str = "failed (abort on MMIO enable)";
                        goto out_unlock;
+               }
        }
 
        if (ers_res == PCI_ERS_RESULT_NEED_RESET)
@@ -217,6 +228,7 @@ static pci_ers_result_t zpci_event_attempt_error_recovery(struct pci_dev *pdev)
        if (ers_res != PCI_ERS_RESULT_RECOVERED) {
                pr_err("%s: Automatic recovery failed; operator intervention is required\n",
                       pci_name(pdev));
+               status_str = "failed (driver can't recover)";
                goto out_unlock;
        }
 
@@ -225,6 +237,7 @@ static pci_ers_result_t zpci_event_attempt_error_recovery(struct pci_dev *pdev)
                driver->err_handler->resume(pdev);
 out_unlock:
        pci_dev_unlock(pdev);
+       zpci_report_status(zdev, "recovery", status_str);
 
        return ers_res;
 }
diff --git a/arch/s390/pci/pci_report.c b/arch/s390/pci/pci_report.c
new file mode 100644 (file)
index 0000000..2754c9c
--- /dev/null
@@ -0,0 +1,111 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright IBM Corp. 2024
+ *
+ * Author(s):
+ *   Niklas Schnelle <schnelle@linux.ibm.com>
+ *
+ */
+
+#define KMSG_COMPONENT "zpci"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/kernel.h>
+#include <linux/sprintf.h>
+#include <linux/pci.h>
+
+#include <asm/sclp.h>
+
+#include "pci_report.h"
+
+#define ZPCI_ERR_LOG_ID_KERNEL_REPORT 0x4714
+
+struct zpci_report_error_data {
+       u64 timestamp;
+       u64 err_log_id;
+       char log_data[];
+} __packed;
+
+#define ZPCI_REPORT_SIZE       (PAGE_SIZE - sizeof(struct err_notify_sccb))
+#define ZPCI_REPORT_DATA_SIZE  (ZPCI_REPORT_SIZE - sizeof(struct zpci_report_error_data))
+
+struct zpci_report_error {
+       struct zpci_report_error_header header;
+       struct zpci_report_error_data data;
+} __packed;
+
+static const char *zpci_state_str(pci_channel_state_t state)
+{
+       switch (state) {
+       case pci_channel_io_normal:
+               return "normal";
+       case pci_channel_io_frozen:
+               return "frozen";
+       case pci_channel_io_perm_failure:
+               return "permanent-failure";
+       default:
+               return "invalid";
+       };
+}
+
+/**
+ * zpci_report_status - Report the status of operations on a PCI device
+ * @zdev:      The PCI device for which to report status
+ * @operation: A string representing the operation reported
+ * @status:    A string representing the status of the operation
+ *
+ * This function creates a human readable report about an operation such as
+ * PCI device recovery and forwards this to the platform using the SCLP Write
+ * Event Data mechanism. Besides the operation and status strings the report
+ * also contains additional information about the device deemed useful for
+ * debug such as the currently bound device driver, if any, and error state.
+ *
+ * Return: 0 on success an error code < 0 otherwise.
+ */
+int zpci_report_status(struct zpci_dev *zdev, const char *operation, const char *status)
+{
+       struct zpci_report_error *report;
+       struct pci_driver *driver = NULL;
+       struct pci_dev *pdev = NULL;
+       char *buf, *end;
+       int ret;
+
+       if (!zdev || !zdev->zbus)
+               return -ENODEV;
+
+       /* Protected virtualization hosts get nothing from us */
+       if (prot_virt_guest)
+               return -ENODATA;
+
+       report = (void *)get_zeroed_page(GFP_KERNEL);
+       if (!report)
+               return -ENOMEM;
+       if (zdev->zbus->bus)
+               pdev = pci_get_slot(zdev->zbus->bus, zdev->devfn);
+       if (pdev)
+               driver = to_pci_driver(pdev->dev.driver);
+
+       buf = report->data.log_data;
+       end = report->data.log_data + ZPCI_REPORT_DATA_SIZE;
+       buf += scnprintf(buf, end - buf, "report: %s\n", operation);
+       buf += scnprintf(buf, end - buf, "status: %s\n", status);
+       buf += scnprintf(buf, end - buf, "state: %s\n",
+                        (pdev) ? zpci_state_str(pdev->error_state) : "n/a");
+       buf += scnprintf(buf, end - buf, "driver: %s\n", (driver) ? driver->name : "n/a");
+
+       report->header.version = 1;
+       report->header.action = SCLP_ERRNOTIFY_AQ_INFO_LOG;
+       report->header.length = buf - (char *)&report->data;
+       report->data.timestamp = ktime_get_clocktai_seconds();
+       report->data.err_log_id = ZPCI_ERR_LOG_ID_KERNEL_REPORT;
+
+       ret = sclp_pci_report(&report->header, zdev->fh, zdev->fid);
+       if (ret)
+               pr_err("Reporting PCI status failed with code %d\n", ret);
+       else
+               pr_info("Reported PCI device status\n");
+
+       free_page((unsigned long)report);
+
+       return ret;
+}
diff --git a/arch/s390/pci/pci_report.h b/arch/s390/pci/pci_report.h
new file mode 100644 (file)
index 0000000..e08003d
--- /dev/null
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright IBM Corp. 2024
+ *
+ * Author(s):
+ *   Niklas Schnelle <schnelle@linux.ibm.com>
+ *
+ */
+#ifndef __S390_PCI_REPORT_H
+#define __S390_PCI_REPORT_H
+
+struct zpci_dev;
+
+int zpci_report_status(struct zpci_dev *zdev, const char *operation, const char *status);
+
+#endif /* __S390_PCI_REPORT_H */
index 6c91e422927ff5e81694cf15586dcd3a1c4ea83f..73731fa2594e9e9a74234a9f7bc783f3164394dc 100644 (file)
@@ -85,13 +85,6 @@ typedef unsigned int sclp_cmdw_t;
 
 typedef u64 sccb_mask_t;
 
-struct sccb_header {
-       u16     length;
-       u8      function_code;
-       u8      control_mask[3];
-       u16     response_code;
-} __attribute__((packed));
-
 struct init_sccb {
        struct sccb_header header;
        u16 _reserved;
@@ -238,13 +231,6 @@ struct gds_vector {
        u16     gds_id;
 } __attribute__((packed));
 
-struct evbuf_header {
-       u16     length;
-       u8      type;
-       u8      flags;
-       u16     _reserved;
-} __attribute__((packed));
-
 struct sclp_req {
        struct list_head list;          /* list_head for request queueing. */
        sclp_cmdw_t command;            /* sclp command to execute */
index c3466a8c56bb5d1a1c62f5856f8ec152017fba02..56400886f7fca9d0202b27f87d2465976bbc44b9 100644 (file)
 
 #define SCLP_ATYPE_PCI                         2
 
-#define SCLP_ERRNOTIFY_AQ_RESET                        0
-#define SCLP_ERRNOTIFY_AQ_REPAIR               1
-#define SCLP_ERRNOTIFY_AQ_INFO_LOG             2
-#define SCLP_ERRNOTIFY_AQ_OPTICS_DATA          3
-
 static DEFINE_MUTEX(sclp_pci_mutex);
 static struct sclp_register sclp_pci_event = {
        .send_mask = EVTYP_ERRNOTIFY_MASK,
 };
 
-struct err_notify_evbuf {
-       struct evbuf_header header;
-       u8 action;
-       u8 atype;
-       u32 fh;
-       u32 fid;
-       u8 data[];
-} __packed;
-
-struct err_notify_sccb {
-       struct sccb_header header;
-       struct err_notify_evbuf evbuf;
-} __packed;
-
 struct pci_cfg_sccb {
        struct sccb_header header;
        u8 atype;               /* adapter type */