// SPDX-License-Identifier: GPL-2.0
 /* Copyright (c) 2024, Intel Corporation. */
 
-#include "health.h"
 #include "ice.h"
+#include "ice_adminq_cmd.h" /* for enum ice_aqc_health_status_elem */
+#include "health.h"
 
 #define ICE_DEVLINK_FMSG_PUT_FIELD(fmsg, obj, name) \
        devlink_fmsg_put(fmsg, #name, (obj)->name)
 
+#define ICE_HEALTH_STATUS_DATA_SIZE 2
+
+struct ice_health_status {
+       enum ice_aqc_health_status code;
+       const char *description;
+       const char *solution;
+       const char *data_label[ICE_HEALTH_STATUS_DATA_SIZE];
+};
+
+/*
+ * In addition to the health status codes provided below, the firmware might
+ * generate Health Status Codes that are not pertinent to the end-user.
+ * For instance, Health Code 0x1002 is triggered when the command fails.
+ * Such codes should be disregarded by the end-user.
+ * The below lookup requires to be sorted by code.
+ */
+
+static const char *const ice_common_port_solutions =
+       "Check your cable connection. Change or replace the module or cable. Manually set speed and duplex.";
+static const char *const ice_port_number_label = "Port Number";
+static const char *const ice_update_nvm_solution = "Update to the latest NVM image.";
+
+static const struct ice_health_status ice_health_status_lookup[] = {
+       {ICE_AQC_HEALTH_STATUS_ERR_UNKNOWN_MOD_STRICT, "An unsupported module was detected.",
+               ice_common_port_solutions, {ice_port_number_label}},
+       {ICE_AQC_HEALTH_STATUS_ERR_MOD_TYPE, "Module type is not supported.",
+               "Change or replace the module or cable.", {ice_port_number_label}},
+       {ICE_AQC_HEALTH_STATUS_ERR_MOD_QUAL, "Module is not qualified.",
+               ice_common_port_solutions, {ice_port_number_label}},
+       {ICE_AQC_HEALTH_STATUS_ERR_MOD_COMM,
+               "Device cannot communicate with the module.",
+               "Check your cable connection. Change or replace the module or cable. Manually set speed and duplex.",
+               {ice_port_number_label}},
+       {ICE_AQC_HEALTH_STATUS_ERR_MOD_CONFLICT, "Unresolved module conflict.",
+               "Manually set speed/duplex or change the port option. If the problem persists, use a cable/module that is found in the supported modules and cables list for this device.",
+               {ice_port_number_label}},
+       {ICE_AQC_HEALTH_STATUS_ERR_MOD_NOT_PRESENT, "Module is not present.",
+               "Check that the module is inserted correctly. If the problem persists, use a cable/module that is found in the supported modules and cables list for this device.",
+               {ice_port_number_label}},
+       {ICE_AQC_HEALTH_STATUS_INFO_MOD_UNDERUTILIZED, "Underutilized module.",
+               "Change or replace the module or cable. Change the port option.",
+               {ice_port_number_label}},
+       {ICE_AQC_HEALTH_STATUS_ERR_UNKNOWN_MOD_LENIENT, "An unsupported module was detected.",
+               ice_common_port_solutions, {ice_port_number_label}},
+       {ICE_AQC_HEALTH_STATUS_ERR_INVALID_LINK_CFG, "Invalid link configuration.",
+               NULL, {ice_port_number_label}},
+       {ICE_AQC_HEALTH_STATUS_ERR_PORT_ACCESS, "Port hardware access error.",
+               ice_update_nvm_solution, {ice_port_number_label}},
+       {ICE_AQC_HEALTH_STATUS_ERR_PORT_UNREACHABLE, "A port is unreachable.",
+               "Change the port option. Update to the latest NVM image."},
+       {ICE_AQC_HEALTH_STATUS_INFO_PORT_SPEED_MOD_LIMITED, "Port speed is limited due to module.",
+               "Change the module or configure the port option to match the current module speed. Change the port option.",
+               {ice_port_number_label}},
+       {ICE_AQC_HEALTH_STATUS_ERR_PARALLEL_FAULT,
+               "All configured link modes were attempted but failed to establish link. The device will restart the process to establish link.",
+               "Check link partner connection and configuration.",
+               {ice_port_number_label}},
+       {ICE_AQC_HEALTH_STATUS_INFO_PORT_SPEED_PHY_LIMITED,
+               "Port speed is limited by PHY capabilities.",
+               "Change the module to align to port option.", {ice_port_number_label}},
+       {ICE_AQC_HEALTH_STATUS_ERR_NETLIST_TOPO, "LOM topology netlist is corrupted.",
+               ice_update_nvm_solution, {ice_port_number_label}},
+       {ICE_AQC_HEALTH_STATUS_ERR_NETLIST, "Unrecoverable netlist error.",
+               ice_update_nvm_solution, {ice_port_number_label}},
+       {ICE_AQC_HEALTH_STATUS_ERR_TOPO_CONFLICT, "Port topology conflict.",
+               "Change the port option. Update to the latest NVM image."},
+       {ICE_AQC_HEALTH_STATUS_ERR_LINK_HW_ACCESS, "Unrecoverable hardware access error.",
+               ice_update_nvm_solution, {ice_port_number_label}},
+       {ICE_AQC_HEALTH_STATUS_ERR_LINK_RUNTIME, "Unrecoverable runtime error.",
+               ice_update_nvm_solution, {ice_port_number_label}},
+       {ICE_AQC_HEALTH_STATUS_ERR_DNL_INIT, "Link management engine failed to initialize.",
+               ice_update_nvm_solution, {ice_port_number_label}},
+       {ICE_AQC_HEALTH_STATUS_ERR_PHY_FW_LOAD,
+               "Failed to load the firmware image in the external PHY.",
+               ice_update_nvm_solution, {ice_port_number_label}},
+       {ICE_AQC_HEALTH_STATUS_INFO_RECOVERY, "The device is in firmware recovery mode.",
+               ice_update_nvm_solution, {"Extended Error"}},
+       {ICE_AQC_HEALTH_STATUS_ERR_FLASH_ACCESS, "The flash chip cannot be accessed.",
+               "If issue persists, call customer support.", {"Access Type"}},
+       {ICE_AQC_HEALTH_STATUS_ERR_NVM_AUTH, "NVM authentication failed.",
+               ice_update_nvm_solution},
+       {ICE_AQC_HEALTH_STATUS_ERR_OROM_AUTH, "Option ROM authentication failed.",
+               ice_update_nvm_solution},
+       {ICE_AQC_HEALTH_STATUS_ERR_DDP_AUTH, "DDP package authentication failed.",
+               "Update to latest base driver and DDP package."},
+       {ICE_AQC_HEALTH_STATUS_ERR_NVM_COMPAT, "NVM image is incompatible.",
+               ice_update_nvm_solution},
+       {ICE_AQC_HEALTH_STATUS_ERR_OROM_COMPAT, "Option ROM is incompatible.",
+               ice_update_nvm_solution, {"Expected PCI Device ID", "Expected Module ID"}},
+       {ICE_AQC_HEALTH_STATUS_ERR_DCB_MIB,
+               "Supplied MIB file is invalid. DCB reverted to default configuration.",
+               "Disable FW-LLDP and check DCBx system configuration.",
+               {ice_port_number_label, "MIB ID"}},
+};
+
+static int ice_health_status_lookup_compare(const void *a, const void *b)
+{
+       return ((struct ice_health_status *)a)->code - ((struct ice_health_status *)b)->code;
+}
+
+static const struct ice_health_status *ice_get_health_status(u16 code)
+{
+       struct ice_health_status key = { .code = code };
+
+       return bsearch(&key, ice_health_status_lookup, ARRAY_SIZE(ice_health_status_lookup),
+                      sizeof(struct ice_health_status), ice_health_status_lookup_compare);
+}
+
+static void ice_describe_status_code(struct devlink_fmsg *fmsg,
+                                    struct ice_aqc_health_status_elem *hse)
+{
+       static const char *const aux_label[] = { "Aux Data 1", "Aux Data 2" };
+       const struct ice_health_status *health_code;
+       u32 internal_data[2];
+       u16 status_code;
+
+       status_code = le16_to_cpu(hse->health_status_code);
+
+       devlink_fmsg_put(fmsg, "Syndrome", status_code);
+       if (status_code) {
+               internal_data[0] = le32_to_cpu(hse->internal_data1);
+               internal_data[1] = le32_to_cpu(hse->internal_data2);
+
+               health_code = ice_get_health_status(status_code);
+               if (!health_code)
+                       return;
+
+               devlink_fmsg_string_pair_put(fmsg, "Description", health_code->description);
+               if (health_code->solution)
+                       devlink_fmsg_string_pair_put(fmsg, "Possible Solution",
+                                                    health_code->solution);
+
+               for (size_t i = 0; i < ICE_HEALTH_STATUS_DATA_SIZE; i++) {
+                       if (internal_data[i] != ICE_AQC_HEALTH_STATUS_UNDEFINED_DATA)
+                               devlink_fmsg_u32_pair_put(fmsg,
+                                                         health_code->data_label[i] ?
+                                                         health_code->data_label[i] :
+                                                         aux_label[i],
+                                                         internal_data[i]);
+               }
+       }
+}
+
+static int
+ice_port_reporter_diagnose(struct devlink_health_reporter *reporter, struct devlink_fmsg *fmsg,
+                          struct netlink_ext_ack *extack)
+{
+       struct ice_pf *pf = devlink_health_reporter_priv(reporter);
+
+       ice_describe_status_code(fmsg, &pf->health_reporters.port_status);
+       return 0;
+}
+
+static int
+ice_port_reporter_dump(struct devlink_health_reporter *reporter, struct devlink_fmsg *fmsg,
+                      void *priv_ctx, struct netlink_ext_ack __always_unused *extack)
+{
+       struct ice_pf *pf = devlink_health_reporter_priv(reporter);
+
+       ice_describe_status_code(fmsg, &pf->health_reporters.port_status);
+       return 0;
+}
+
+static int
+ice_fw_reporter_diagnose(struct devlink_health_reporter *reporter, struct devlink_fmsg *fmsg,
+                        struct netlink_ext_ack *extack)
+{
+       struct ice_pf *pf = devlink_health_reporter_priv(reporter);
+
+       ice_describe_status_code(fmsg, &pf->health_reporters.fw_status);
+       return 0;
+}
+
+static int
+ice_fw_reporter_dump(struct devlink_health_reporter *reporter, struct devlink_fmsg *fmsg,
+                    void *priv_ctx, struct netlink_ext_ack *extack)
+{
+       struct ice_pf *pf = devlink_health_reporter_priv(reporter);
+
+       ice_describe_status_code(fmsg, &pf->health_reporters.fw_status);
+       return 0;
+}
+
+static void ice_config_health_events(struct ice_pf *pf, bool enable)
+{
+       u8 enable_bits = 0;
+       int ret;
+
+       if (enable)
+               enable_bits = ICE_AQC_HEALTH_STATUS_SET_PF_SPECIFIC_MASK |
+                             ICE_AQC_HEALTH_STATUS_SET_GLOBAL_MASK;
+
+       ret = ice_aq_set_health_status_cfg(&pf->hw, enable_bits);
+       if (ret)
+               dev_err(ice_pf_to_dev(pf), "Failed to %s firmware health events, err %d aq_err %s\n",
+                       str_enable_disable(enable), ret,
+                       ice_aq_str(pf->hw.adminq.sq_last_status));
+}
+
+/**
+ * ice_process_health_status_event - Process the health status event from FW
+ * @pf: pointer to the PF structure
+ * @event: event structure containing the Health Status Event opcode
+ *
+ * Decode the Health Status Events and print the associated messages
+ */
+void ice_process_health_status_event(struct ice_pf *pf, struct ice_rq_event_info *event)
+{
+       const struct ice_aqc_health_status_elem *health_info;
+       u16 count;
+
+       health_info = (struct ice_aqc_health_status_elem *)event->msg_buf;
+       count = le16_to_cpu(event->desc.params.get_health_status.health_status_count);
+
+       if (count > (event->buf_len / sizeof(*health_info))) {
+               dev_err(ice_pf_to_dev(pf), "Received a health status event with invalid element count\n");
+               return;
+       }
+
+       for (size_t i = 0; i < count; i++) {
+               const struct ice_health_status *health_code;
+               u16 status_code;
+
+               status_code = le16_to_cpu(health_info->health_status_code);
+               health_code = ice_get_health_status(status_code);
+
+               if (health_code) {
+                       switch (le16_to_cpu(health_info->event_source)) {
+                       case ICE_AQC_HEALTH_STATUS_GLOBAL:
+                               pf->health_reporters.fw_status = *health_info;
+                               devlink_health_report(pf->health_reporters.fw,
+                                                     "FW syndrome reported", NULL);
+                               break;
+                       case ICE_AQC_HEALTH_STATUS_PF:
+                       case ICE_AQC_HEALTH_STATUS_PORT:
+                               pf->health_reporters.port_status = *health_info;
+                               devlink_health_report(pf->health_reporters.port,
+                                                     "Port syndrome reported", NULL);
+                               break;
+                       default:
+                               dev_err(ice_pf_to_dev(pf), "Health code with unknown source\n");
+                       }
+               } else {
+                       u32 data1, data2;
+                       u16 source;
+
+                       source = le16_to_cpu(health_info->event_source);
+                       data1 = le32_to_cpu(health_info->internal_data1);
+                       data2 = le32_to_cpu(health_info->internal_data2);
+                       dev_dbg(ice_pf_to_dev(pf),
+                               "Received internal health status code 0x%08x, source: 0x%08x, data1: 0x%08x, data2: 0x%08x",
+                               status_code, source, data1, data2);
+               }
+               health_info++;
+       }
+}
+
 /**
  * ice_devlink_health_report - boilerplate to call given @reporter
  *
        return rep;
 }
 
-#define ICE_DEFINE_HEALTH_REPORTER_OPS(_name) \
-       static const struct devlink_health_reporter_ops ice_ ## _name ## _reporter_ops = { \
+#define ICE_HEALTH_REPORTER_OPS_FIELD(_name, _field) \
+       ._field = ice_##_name##_reporter_##_field,
+
+#define ICE_DEFINE_HEALTH_REPORTER_OPS_1(_name, _field1) \
+       static const struct devlink_health_reporter_ops ice_##_name##_reporter_ops = { \
        .name = #_name, \
-       .dump = ice_ ## _name ## _reporter_dump, \
-}
+       ICE_HEALTH_REPORTER_OPS_FIELD(_name, _field1) \
+       }
+
+#define ICE_DEFINE_HEALTH_REPORTER_OPS_2(_name, _field1, _field2) \
+       static const struct devlink_health_reporter_ops ice_##_name##_reporter_ops = { \
+       .name = #_name, \
+       ICE_HEALTH_REPORTER_OPS_FIELD(_name, _field1) \
+       ICE_HEALTH_REPORTER_OPS_FIELD(_name, _field2) \
+       }
 
-ICE_DEFINE_HEALTH_REPORTER_OPS(mdd);
-ICE_DEFINE_HEALTH_REPORTER_OPS(tx_hang);
+ICE_DEFINE_HEALTH_REPORTER_OPS_1(mdd, dump);
+ICE_DEFINE_HEALTH_REPORTER_OPS_1(tx_hang, dump);
+ICE_DEFINE_HEALTH_REPORTER_OPS_2(fw, dump, diagnose);
+ICE_DEFINE_HEALTH_REPORTER_OPS_2(port, dump, diagnose);
 
 /**
  * ice_health_init - allocate and init all ice devlink health reporters and
 
        reps->mdd = ice_init_devlink_rep(pf, &ice_mdd_reporter_ops);
        reps->tx_hang = ice_init_devlink_rep(pf, &ice_tx_hang_reporter_ops);
+
+       if (ice_is_fw_health_report_supported(&pf->hw)) {
+               reps->fw = ice_init_devlink_rep(pf, &ice_fw_reporter_ops);
+               reps->port = ice_init_devlink_rep(pf, &ice_port_reporter_ops);
+               ice_config_health_events(pf, true);
+       }
 }
 
 /**
 {
        ice_deinit_devl_reporter(pf->health_reporters.mdd);
        ice_deinit_devl_reporter(pf->health_reporters.tx_hang);
+       if (ice_is_fw_health_report_supported(&pf->hw)) {
+               ice_deinit_devl_reporter(pf->health_reporters.fw);
+               ice_deinit_devl_reporter(pf->health_reporters.port);
+               ice_config_health_events(pf, false);
+       }
 }
 
 static
 
        ICE_AQC_FW_LOG_ID_MAX,
 };
 
+enum ice_aqc_health_status_mask {
+       ICE_AQC_HEALTH_STATUS_SET_PF_SPECIFIC_MASK = BIT(0),
+       ICE_AQC_HEALTH_STATUS_SET_ALL_PF_MASK      = BIT(1),
+       ICE_AQC_HEALTH_STATUS_SET_GLOBAL_MASK      = BIT(2),
+};
+
+/* Set Health Status (direct 0xFF20) */
+struct ice_aqc_set_health_status_cfg {
+       u8 event_source;
+       u8 reserved[15];
+};
+
+enum ice_aqc_health_status {
+       ICE_AQC_HEALTH_STATUS_ERR_UNKNOWN_MOD_STRICT            = 0x101,
+       ICE_AQC_HEALTH_STATUS_ERR_MOD_TYPE                      = 0x102,
+       ICE_AQC_HEALTH_STATUS_ERR_MOD_QUAL                      = 0x103,
+       ICE_AQC_HEALTH_STATUS_ERR_MOD_COMM                      = 0x104,
+       ICE_AQC_HEALTH_STATUS_ERR_MOD_CONFLICT                  = 0x105,
+       ICE_AQC_HEALTH_STATUS_ERR_MOD_NOT_PRESENT               = 0x106,
+       ICE_AQC_HEALTH_STATUS_INFO_MOD_UNDERUTILIZED            = 0x107,
+       ICE_AQC_HEALTH_STATUS_ERR_UNKNOWN_MOD_LENIENT           = 0x108,
+       ICE_AQC_HEALTH_STATUS_ERR_MOD_DIAGNOSTIC_FEATURE        = 0x109,
+       ICE_AQC_HEALTH_STATUS_ERR_INVALID_LINK_CFG              = 0x10B,
+       ICE_AQC_HEALTH_STATUS_ERR_PORT_ACCESS                   = 0x10C,
+       ICE_AQC_HEALTH_STATUS_ERR_PORT_UNREACHABLE              = 0x10D,
+       ICE_AQC_HEALTH_STATUS_INFO_PORT_SPEED_MOD_LIMITED       = 0x10F,
+       ICE_AQC_HEALTH_STATUS_ERR_PARALLEL_FAULT                = 0x110,
+       ICE_AQC_HEALTH_STATUS_INFO_PORT_SPEED_PHY_LIMITED       = 0x111,
+       ICE_AQC_HEALTH_STATUS_ERR_NETLIST_TOPO                  = 0x112,
+       ICE_AQC_HEALTH_STATUS_ERR_NETLIST                       = 0x113,
+       ICE_AQC_HEALTH_STATUS_ERR_TOPO_CONFLICT                 = 0x114,
+       ICE_AQC_HEALTH_STATUS_ERR_LINK_HW_ACCESS                = 0x115,
+       ICE_AQC_HEALTH_STATUS_ERR_LINK_RUNTIME                  = 0x116,
+       ICE_AQC_HEALTH_STATUS_ERR_DNL_INIT                      = 0x117,
+       ICE_AQC_HEALTH_STATUS_ERR_PHY_NVM_PROG                  = 0x120,
+       ICE_AQC_HEALTH_STATUS_ERR_PHY_FW_LOAD                   = 0x121,
+       ICE_AQC_HEALTH_STATUS_INFO_RECOVERY                     = 0x500,
+       ICE_AQC_HEALTH_STATUS_ERR_FLASH_ACCESS                  = 0x501,
+       ICE_AQC_HEALTH_STATUS_ERR_NVM_AUTH                      = 0x502,
+       ICE_AQC_HEALTH_STATUS_ERR_OROM_AUTH                     = 0x503,
+       ICE_AQC_HEALTH_STATUS_ERR_DDP_AUTH                      = 0x504,
+       ICE_AQC_HEALTH_STATUS_ERR_NVM_COMPAT                    = 0x505,
+       ICE_AQC_HEALTH_STATUS_ERR_OROM_COMPAT                   = 0x506,
+       ICE_AQC_HEALTH_STATUS_ERR_NVM_SEC_VIOLATION             = 0x507,
+       ICE_AQC_HEALTH_STATUS_ERR_OROM_SEC_VIOLATION            = 0x508,
+       ICE_AQC_HEALTH_STATUS_ERR_DCB_MIB                       = 0x509,
+       ICE_AQC_HEALTH_STATUS_ERR_MNG_TIMEOUT                   = 0x50A,
+       ICE_AQC_HEALTH_STATUS_ERR_BMC_RESET                     = 0x50B,
+       ICE_AQC_HEALTH_STATUS_ERR_LAST_MNG_FAIL                 = 0x50C,
+       ICE_AQC_HEALTH_STATUS_ERR_RESOURCE_ALLOC_FAIL           = 0x50D,
+       ICE_AQC_HEALTH_STATUS_ERR_FW_LOOP                       = 0x1000,
+       ICE_AQC_HEALTH_STATUS_ERR_FW_PFR_FAIL                   = 0x1001,
+       ICE_AQC_HEALTH_STATUS_ERR_LAST_FAIL_AQ                  = 0x1002,
+};
+
+/* Get Health Status (indirect 0xFF22) */
+struct ice_aqc_get_health_status {
+       __le16 health_status_count;
+       u8 reserved[6];
+       __le32 addr_high;
+       __le32 addr_low;
+};
+
+enum ice_aqc_health_status_scope {
+       ICE_AQC_HEALTH_STATUS_PF        = 0x1,
+       ICE_AQC_HEALTH_STATUS_PORT      = 0x2,
+       ICE_AQC_HEALTH_STATUS_GLOBAL    = 0x3,
+};
+
+#define ICE_AQC_HEALTH_STATUS_UNDEFINED_DATA   0xDEADBEEF
+
+/* Get Health Status event buffer entry (0xFF22),
+ * repeated per reported health status.
+ */
+struct ice_aqc_health_status_elem {
+       __le16 health_status_code;
+       __le16 event_source;
+       __le32 internal_data1;
+       __le32 internal_data2;
+};
+
 /* Set FW Logging configuration (indirect 0xFF30)
  * Register for FW Logging (indirect 0xFF31)
  * Query FW Logging (indirect 0xFF32)
                struct ice_aqc_get_link_status get_link_status;
                struct ice_aqc_event_lan_overflow lan_overflow;
                struct ice_aqc_get_link_topo get_link_topo;
+               struct ice_aqc_set_health_status_cfg set_health_status_cfg;
+               struct ice_aqc_get_health_status get_health_status;
                struct ice_aqc_dnl_call_command dnl_call;
                struct ice_aqc_i2c read_write_i2c;
                struct ice_aqc_read_i2c_resp read_i2c_resp;
        /* Standalone Commands/Events */
        ice_aqc_opc_event_lan_overflow                  = 0x1001,
 
+       /* System Diagnostic commands */
+       ice_aqc_opc_set_health_status_cfg               = 0xFF20,
+       ice_aqc_opc_get_health_status                   = 0xFF22,
+
        /* FW Logging Commands */
        ice_aqc_opc_fw_logs_config                      = 0xFF30,
        ice_aqc_opc_fw_logs_register                    = 0xFF31,