]> www.infradead.org Git - users/hch/misc.git/commitdiff
eth: fbnic: add OTP health reporter
authorJakub Kicinski <kuba@kernel.org>
Tue, 16 Sep 2025 23:14:20 +0000 (16:14 -0700)
committerPaolo Abeni <pabeni@redhat.com>
Thu, 18 Sep 2025 09:37:23 +0000 (11:37 +0200)
OTP memory ("fuses") are used for secure boot and anti-rollback
protection. The OTP memory is ECC protected. Check for its health
periodically to notice when the chip is starting to go bad.

Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Reviewed-by: Simon Horman <horms@kernel.org>
Link: https://patch.msgid.link/20250916231420.1693955-10-kuba@kernel.org
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Documentation/networking/device_drivers/ethernet/meta/fbnic.rst
drivers/net/ethernet/meta/fbnic/fbnic.h
drivers/net/ethernet/meta/fbnic/fbnic_csr.h
drivers/net/ethernet/meta/fbnic/fbnic_devlink.c
drivers/net/ethernet/meta/fbnic/fbnic_pci.c

index 8b7ae9975bf7bf7c51d8d80b5c6ad0a8e70b4290..1e82f90d9ad2f5a12c8bb4f02a8752f10c1a4eb4 100644 (file)
@@ -81,6 +81,13 @@ happened since power cycle - a snapshot of the FW memory. Diagnose callback
 shows FW uptime based on the most recently received heartbeat message
 (the crashes are detected by checking if uptime goes down).
 
+otp reporter
+~~~~~~~~~~~~
+
+OTP memory ("fuses") are used for secure boot and anti-rollback
+protection. The OTP memory is ECC protected, ECC errors indicate
+either manufacturing defect or part deteriorating with age.
+
 Statistics
 ----------
 
index 5f99976de0bbce2748dd7d2500316df97371cb39..b03e5a3d51445c5315b88e289b6f0d8b0457838f 100644 (file)
@@ -28,6 +28,7 @@ struct fbnic_dev {
        struct dentry *dbg_fbd;
        struct device *hwmon;
        struct devlink_health_reporter *fw_reporter;
+       struct devlink_health_reporter *otp_reporter;
 
        u32 __iomem *uc_addr0;
        u32 __iomem *uc_addr4;
@@ -166,6 +167,7 @@ void fbnic_devlink_register(struct fbnic_dev *fbd);
 void fbnic_devlink_unregister(struct fbnic_dev *fbd);
 void __printf(2, 3)
 fbnic_devlink_fw_report(struct fbnic_dev *fbd, const char *format, ...);
+void fbnic_devlink_otp_check(struct fbnic_dev *fbd, const char *msg);
 
 int fbnic_fw_request_mbx(struct fbnic_dev *fbd);
 void fbnic_fw_free_mbx(struct fbnic_dev *fbd);
index e2fffe1597e9fa07b42a2ff4df235a83f747cd9b..d3a7ad921f18cf384be1489e5a1bf15bfa6d28cf 100644 (file)
@@ -1178,4 +1178,22 @@ enum {
 #define FBNIC_IPC_MBX_DESC_FW_CMPL     DESC_BIT(1)
 #define FBNIC_IPC_MBX_DESC_HOST_CMPL   DESC_BIT(0)
 
+/* OTP Registers
+ * These registers are accessible via bar4 offset and are written by CMRT
+ * on boot. For the write status, the register is broken up in half with OTP
+ * Write Data Status occupying the top 16 bits and the ECC status occupying the
+ * bottom 16 bits.
+ */
+#define FBNIC_NS_OTP_STATUS            0x0021d
+#define FBNIC_NS_OTP_WRITE_STATUS      0x0021e
+
+#define FBNIC_NS_OTP_WRITE_DATA_STATUS_MASK    CSR_GENMASK(31, 16)
+#define FBNIC_NS_OTP_WRITE_ECC_STATUS_MASK     CSR_GENMASK(15, 0)
+
+#define FBNIC_REGS_VERSION                     CSR_GENMASK(31, 16)
+#define FBNIC_REGS_HW_TYPE                     CSR_GENMASK(15, 8)
+enum{
+       FBNIC_CSR_VERSION_V1_0_ASIC = 1,
+};
+
 #endif /* _FBNIC_CSR_H_ */
index fd7df44ae7a4697f82561c23c028563331bc2e82..b62b1d5b1453e7555fa003a33e9992ea303ad85c 100644 (file)
@@ -534,6 +534,60 @@ static const struct devlink_health_reporter_ops fbnic_fw_ops = {
        .diagnose = fbnic_fw_reporter_diagnose,
 };
 
+static u32 fbnic_read_otp_status(struct fbnic_dev *fbd)
+{
+       return fbnic_fw_rd32(fbd, FBNIC_NS_OTP_STATUS);
+}
+
+static int
+fbnic_otp_reporter_dump(struct devlink_health_reporter *reporter,
+                       struct devlink_fmsg *fmsg, void *priv_ctx,
+                       struct netlink_ext_ack *extack)
+{
+       struct fbnic_dev *fbd = devlink_health_reporter_priv(reporter);
+       u32 otp_status, otp_write_status, m;
+
+       otp_status = fbnic_read_otp_status(fbd);
+       otp_write_status = fbnic_fw_rd32(fbd, FBNIC_NS_OTP_WRITE_STATUS);
+
+       /* Dump OTP status */
+       devlink_fmsg_pair_nest_start(fmsg, "OTP");
+       devlink_fmsg_obj_nest_start(fmsg);
+
+       devlink_fmsg_u32_pair_put(fmsg, "Status", otp_status);
+
+       /* Extract OTP Write Data status */
+       m = FBNIC_NS_OTP_WRITE_DATA_STATUS_MASK;
+       devlink_fmsg_u32_pair_put(fmsg, "Data",
+                                 FIELD_GET(m, otp_write_status));
+
+       /* Extract OTP Write ECC status */
+       m = FBNIC_NS_OTP_WRITE_ECC_STATUS_MASK;
+       devlink_fmsg_u32_pair_put(fmsg, "ECC",
+                                 FIELD_GET(m, otp_write_status));
+
+       devlink_fmsg_obj_nest_end(fmsg);
+       devlink_fmsg_pair_nest_end(fmsg);
+
+       return 0;
+}
+
+void fbnic_devlink_otp_check(struct fbnic_dev *fbd, const char *msg)
+{
+       /* Check if there is anything to report */
+       if (!fbnic_read_otp_status(fbd))
+               return;
+
+       devlink_health_report(fbd->otp_reporter, msg, fbd);
+       if (fbnic_fw_log_ready(fbd))
+               fbnic_fw_log_write(fbd, 0, fbd->firmware_time, msg);
+}
+
+static const struct devlink_health_reporter_ops fbnic_otp_ops = {
+       .name = "otp",
+       .dump = fbnic_otp_reporter_dump,
+};
+
 int fbnic_devlink_health_create(struct fbnic_dev *fbd)
 {
        fbd->fw_reporter = devlink_health_reporter_create(priv_to_devlink(fbd),
@@ -545,11 +599,22 @@ int fbnic_devlink_health_create(struct fbnic_dev *fbd)
                return PTR_ERR(fbd->fw_reporter);
        }
 
+       fbd->otp_reporter = devlink_health_reporter_create(priv_to_devlink(fbd),
+                                                          &fbnic_otp_ops, fbd);
+       if (IS_ERR(fbd->otp_reporter)) {
+               devlink_health_reporter_destroy(fbd->fw_reporter);
+               dev_warn(fbd->dev,
+                        "Failed to create OTP fault reporter: %pe\n",
+                        fbd->otp_reporter);
+               return PTR_ERR(fbd->otp_reporter);
+       }
+
        return 0;
 }
 
 void fbnic_devlink_health_destroy(struct fbnic_dev *fbd)
 {
+       devlink_health_reporter_destroy(fbd->otp_reporter);
        devlink_health_reporter_destroy(fbd->fw_reporter);
 }
 
index 576fc89f8704929ea875b5fc73cca38fe8549305..a7a6b4db8016f3cdf9e353025c4026069cdc5fa5 100644 (file)
@@ -197,6 +197,7 @@ static void fbnic_health_check(struct fbnic_dev *fbd)
                return;
 
        fbnic_devlink_fw_report(fbd, "Firmware crashed detected!");
+       fbnic_devlink_otp_check(fbd, "error detected after firmware recovery");
 
        if (fbnic_fw_config_after_crash(fbd))
                dev_err(fbd->dev, "Firmware recovery failed after crash\n");
@@ -321,6 +322,7 @@ static int fbnic_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
                         err);
 
        fbnic_devlink_register(fbd);
+       fbnic_devlink_otp_check(fbd, "error detected during probe");
        fbnic_dbg_fbd_init(fbd);
 
        /* Capture snapshot of hardware stats so netdev can calculate delta */
@@ -474,6 +476,9 @@ static int __fbnic_pm_resume(struct device *dev)
         */
        fbnic_fw_log_enable(fbd, list_empty(&fbd->fw_log.entries));
 
+       /* Since the FW should be up, check if it reported OTP errors */
+       fbnic_devlink_otp_check(fbd, "error detected after PM resume");
+
        /* No netdev means there isn't a network interface to bring up */
        if (fbnic_init_failure(fbd))
                return 0;