]> www.infradead.org Git - users/hch/misc.git/commitdiff
eth: fbnic: use fw uptime to detect fw crashes
authorJakub Kicinski <kuba@kernel.org>
Tue, 16 Sep 2025 23:14:13 +0000 (16:14 -0700)
committerPaolo Abeni <pabeni@redhat.com>
Thu, 18 Sep 2025 09:37:23 +0000 (11:37 +0200)
Currently we only detect FW crashes when it stops responding
to heartbeat messages. FW has a watchdog which will reset it
in case of crashes. Use FW uptime sent in the ownership and
heartbeat messages to detect that the watchdog has fired
(uptime went down).

Reviewed-by: Simon Horman <horms@kernel.org>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Link: https://patch.msgid.link/20250916231420.1693955-3-kuba@kernel.org
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
drivers/net/ethernet/meta/fbnic/fbnic.h
drivers/net/ethernet/meta/fbnic/fbnic_fw.c
drivers/net/ethernet/meta/fbnic/fbnic_fw.h

index 311c7dda911a28fa80e3a94f1af79ab0c94f30ee..09058d847729980029e3a9a10ea7b9c87d5fcc46 100644 (file)
@@ -84,6 +84,10 @@ struct fbnic_dev {
        /* Local copy of hardware statistics */
        struct fbnic_hw_stats hw_stats;
 
+       /* Firmware time since boot in milliseconds */
+       u64 firmware_time;
+       u64 prev_firmware_time;
+
        struct fbnic_fw_log fw_log;
 };
 
index 6e580654493cf217ba2f7cb6da80e13da7f31e65..9b39a73e4c3590f2693cb06f99785a77a3b2d75c 100644 (file)
@@ -495,6 +495,11 @@ int fbnic_fw_xmit_ownership_msg(struct fbnic_dev *fbd, bool take_ownership)
 
        fbd->last_heartbeat_request = req_time;
 
+       /* Set prev_firmware_time to 0 to avoid triggering firmware crash
+        * detection until we receive the second uptime in a heartbeat resp.
+        */
+       fbd->prev_firmware_time = 0;
+
        /* Set heartbeat detection based on if we are taking ownership */
        fbd->fw_heartbeat_enabled = take_ownership;
 
@@ -660,6 +665,7 @@ static int fbnic_fw_parse_cap_resp(void *opaque, struct fbnic_tlv_msg **results)
 }
 
 static const struct fbnic_tlv_index fbnic_ownership_resp_index[] = {
+       FBNIC_TLV_ATTR_U64(FBNIC_FW_OWNERSHIP_TIME),
        FBNIC_TLV_ATTR_LAST
 };
 
@@ -671,10 +677,14 @@ static int fbnic_fw_parse_ownership_resp(void *opaque,
        /* Count the ownership response as a heartbeat reply */
        fbd->last_heartbeat_response = jiffies;
 
+       /* Capture firmware time for logging and firmware crash check */
+       fbd->firmware_time = fta_get_uint(results, FBNIC_FW_OWNERSHIP_TIME);
+
        return 0;
 }
 
 static const struct fbnic_tlv_index fbnic_heartbeat_resp_index[] = {
+       FBNIC_TLV_ATTR_U64(FBNIC_FW_HEARTBEAT_UPTIME),
        FBNIC_TLV_ATTR_LAST
 };
 
@@ -685,6 +695,9 @@ static int fbnic_fw_parse_heartbeat_resp(void *opaque,
 
        fbd->last_heartbeat_response = jiffies;
 
+       /* Capture firmware time for logging and firmware crash check */
+       fbd->firmware_time = fta_get_uint(results, FBNIC_FW_HEARTBEAT_UPTIME);
+
        return 0;
 }
 
@@ -706,6 +719,7 @@ static int fbnic_fw_xmit_heartbeat_message(struct fbnic_dev *fbd)
                goto free_message;
 
        fbd->last_heartbeat_request = req_time;
+       fbd->prev_firmware_time = fbd->firmware_time;
 
        return err;
 
@@ -766,7 +780,8 @@ void fbnic_fw_check_heartbeat(struct fbnic_dev *fbd)
                return;
 
        /* Was the last heartbeat response long time ago? */
-       if (!fbnic_fw_heartbeat_current(fbd)) {
+       if (!fbnic_fw_heartbeat_current(fbd) ||
+           fbd->firmware_time < fbd->prev_firmware_time) {
                dev_warn(fbd->dev,
                         "Firmware did not respond to heartbeat message\n");
                fbd->fw_heartbeat_enabled = false;
index ec67b80809b0bba74722143b268653cde7a77105..be7f2dc886981358a74d5a86e2169972ddc68209 100644 (file)
@@ -198,9 +198,16 @@ enum {
 
 enum {
        FBNIC_FW_OWNERSHIP_FLAG                 = 0x0,
+       FBNIC_FW_OWNERSHIP_TIME                 = 0x1,
        FBNIC_FW_OWNERSHIP_MSG_MAX
 };
 
+enum {
+       FBNIC_FW_HEARTBEAT_UPTIME               = 0x0,
+       FBNIC_FW_HEARTBEAT_NUMBER_OF_MESSAGES   = 0x1,
+       FBNIC_FW_HEARTBEAT_MSG_MAX
+};
+
 enum {
        FBNIC_FW_START_UPGRADE_ERROR            = 0x0,
        FBNIC_FW_START_UPGRADE_SECTION          = 0x1,