]> www.infradead.org Git - nvme.git/commitdiff
crypto: qat - update PFVF protocol for recovery
authorMun Chun Yep <mun.chun.yep@intel.com>
Fri, 2 Feb 2024 10:53:19 +0000 (18:53 +0800)
committerHerbert Xu <herbert@gondor.apana.org.au>
Fri, 9 Feb 2024 04:57:18 +0000 (12:57 +0800)
Update the PFVF logic to handle restart and recovery. This adds the
following functions:

  * adf_pf2vf_notify_fatal_error(): allows the PF to notify VFs that the
    device detected a fatal error and requires a reset. This sends to
    VF the event `ADF_PF2VF_MSGTYPE_FATAL_ERROR`.
  * adf_pf2vf_wait_for_restarting_complete(): allows the PF to wait for
    `ADF_VF2PF_MSGTYPE_RESTARTING_COMPLETE` events from active VFs
    before proceeding with a reset.
  * adf_pf2vf_notify_restarted(): enables the PF to notify VFs with
    an `ADF_PF2VF_MSGTYPE_RESTARTED` event after recovery, indicating that
    the device is back to normal. This prompts VF drivers switch back to
    use the accelerator for workload processing.

These changes improve the communication and synchronization between PF
and VF drivers during system restart and recovery processes.

Signed-off-by: Mun Chun Yep <mun.chun.yep@intel.com>
Reviewed-by: Ahsan Atta <ahsan.atta@intel.com>
Reviewed-by: Markas Rapoportas <markas.rapoportas@intel.com>
Reviewed-by: Giovanni Cabiddu <giovanni.cabiddu@intel.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
drivers/crypto/intel/qat/qat_common/adf_accel_devices.h
drivers/crypto/intel/qat/qat_common/adf_aer.c
drivers/crypto/intel/qat/qat_common/adf_pfvf_msg.h
drivers/crypto/intel/qat/qat_common/adf_pfvf_pf_msg.c
drivers/crypto/intel/qat/qat_common/adf_pfvf_pf_msg.h
drivers/crypto/intel/qat/qat_common/adf_pfvf_pf_proto.c
drivers/crypto/intel/qat/qat_common/adf_pfvf_vf_proto.c
drivers/crypto/intel/qat/qat_common/adf_sriov.c

index a16c7e6edc65d448c6fdca62665213e41ceaa90c..4a3c36aaa7caf912a88cdd5c8e903e9ced6bd15b 100644 (file)
@@ -332,6 +332,7 @@ struct adf_accel_vf_info {
        struct ratelimit_state vf2pf_ratelimit;
        u32 vf_nr;
        bool init;
+       bool restarting;
        u8 vf_compat_ver;
 };
 
index acbbd32bd81576addf94e8560b02548e1ecf0ffe..ecb114e1b59f2c40e00f2e51208322a3fd7f82db 100644 (file)
@@ -7,6 +7,7 @@
 #include <linux/delay.h>
 #include "adf_accel_devices.h"
 #include "adf_common_drv.h"
+#include "adf_pfvf_pf_msg.h"
 
 struct adf_fatal_error_data {
        struct adf_accel_dev *accel_dev;
@@ -189,6 +190,8 @@ static void adf_notify_fatal_error_worker(struct work_struct *work)
                /* Disable arbitration to stop processing of new requests */
                if (hw_device->exit_arb)
                        hw_device->exit_arb(accel_dev);
+               if (accel_dev->pf.vf_info)
+                       adf_pf2vf_notify_fatal_error(accel_dev);
        }
 
        kfree(wq_data);
index 204a42438992645960e99b234b77983cc3e54864..d1b3ef9cadacc02574ccf9c56515cc1c2cabee36 100644 (file)
@@ -99,6 +99,8 @@ enum pf2vf_msgtype {
        ADF_PF2VF_MSGTYPE_RESTARTING            = 0x01,
        ADF_PF2VF_MSGTYPE_VERSION_RESP          = 0x02,
        ADF_PF2VF_MSGTYPE_BLKMSG_RESP           = 0x03,
+       ADF_PF2VF_MSGTYPE_FATAL_ERROR           = 0x04,
+       ADF_PF2VF_MSGTYPE_RESTARTED             = 0x05,
 /* Values from 0x10 are Gen4 specific, message type is only 4 bits in Gen2 devices. */
        ADF_PF2VF_MSGTYPE_RP_RESET_RESP         = 0x10,
 };
@@ -112,6 +114,7 @@ enum vf2pf_msgtype {
        ADF_VF2PF_MSGTYPE_LARGE_BLOCK_REQ       = 0x07,
        ADF_VF2PF_MSGTYPE_MEDIUM_BLOCK_REQ      = 0x08,
        ADF_VF2PF_MSGTYPE_SMALL_BLOCK_REQ       = 0x09,
+       ADF_VF2PF_MSGTYPE_RESTARTING_COMPLETE   = 0x0a,
 /* Values from 0x10 are Gen4 specific, message type is only 4 bits in Gen2 devices. */
        ADF_VF2PF_MSGTYPE_RP_RESET              = 0x10,
 };
@@ -124,8 +127,10 @@ enum pfvf_compatibility_version {
        ADF_PFVF_COMPAT_FAST_ACK                = 0x03,
        /* Ring to service mapping support for non-standard mappings */
        ADF_PFVF_COMPAT_RING_TO_SVC_MAP         = 0x04,
+       /* Fallback compat */
+       ADF_PFVF_COMPAT_FALLBACK                = 0x05,
        /* Reference to the latest version */
-       ADF_PFVF_COMPAT_THIS_VERSION            = 0x04,
+       ADF_PFVF_COMPAT_THIS_VERSION            = 0x05,
 };
 
 /* PF->VF Version Response */
index 14c069f0d71a5b81ce246366f83e492924d663cd..0e31f4b41844e0a8d53de4000c4d574afab989f4 100644 (file)
@@ -1,21 +1,83 @@
 // SPDX-License-Identifier: (BSD-3-Clause OR GPL-2.0-only)
 /* Copyright(c) 2015 - 2021 Intel Corporation */
+#include <linux/delay.h>
 #include <linux/pci.h>
 #include "adf_accel_devices.h"
 #include "adf_pfvf_msg.h"
 #include "adf_pfvf_pf_msg.h"
 #include "adf_pfvf_pf_proto.h"
 
+#define ADF_PF_WAIT_RESTARTING_COMPLETE_DELAY  100
+#define ADF_VF_SHUTDOWN_RETRY                  100
+
 void adf_pf2vf_notify_restarting(struct adf_accel_dev *accel_dev)
 {
        struct adf_accel_vf_info *vf;
        struct pfvf_message msg = { .type = ADF_PF2VF_MSGTYPE_RESTARTING };
        int i, num_vfs = pci_num_vf(accel_to_pci_dev(accel_dev));
 
+       dev_dbg(&GET_DEV(accel_dev), "pf2vf notify restarting\n");
        for (i = 0, vf = accel_dev->pf.vf_info; i < num_vfs; i++, vf++) {
-               if (vf->init && adf_send_pf2vf_msg(accel_dev, i, msg))
+               vf->restarting = false;
+               if (!vf->init)
+                       continue;
+               if (adf_send_pf2vf_msg(accel_dev, i, msg))
                        dev_err(&GET_DEV(accel_dev),
                                "Failed to send restarting msg to VF%d\n", i);
+               else if (vf->vf_compat_ver >= ADF_PFVF_COMPAT_FALLBACK)
+                       vf->restarting = true;
+       }
+}
+
+void adf_pf2vf_wait_for_restarting_complete(struct adf_accel_dev *accel_dev)
+{
+       int num_vfs = pci_num_vf(accel_to_pci_dev(accel_dev));
+       int i, retries = ADF_VF_SHUTDOWN_RETRY;
+       struct adf_accel_vf_info *vf;
+       bool vf_running;
+
+       dev_dbg(&GET_DEV(accel_dev), "pf2vf wait for restarting complete\n");
+       do {
+               vf_running = false;
+               for (i = 0, vf = accel_dev->pf.vf_info; i < num_vfs; i++, vf++)
+                       if (vf->restarting)
+                               vf_running = true;
+               if (!vf_running)
+                       break;
+               msleep(ADF_PF_WAIT_RESTARTING_COMPLETE_DELAY);
+       } while (--retries);
+
+       if (vf_running)
+               dev_warn(&GET_DEV(accel_dev), "Some VFs are still running\n");
+}
+
+void adf_pf2vf_notify_restarted(struct adf_accel_dev *accel_dev)
+{
+       struct pfvf_message msg = { .type = ADF_PF2VF_MSGTYPE_RESTARTED };
+       int i, num_vfs = pci_num_vf(accel_to_pci_dev(accel_dev));
+       struct adf_accel_vf_info *vf;
+
+       dev_dbg(&GET_DEV(accel_dev), "pf2vf notify restarted\n");
+       for (i = 0, vf = accel_dev->pf.vf_info; i < num_vfs; i++, vf++) {
+               if (vf->init && vf->vf_compat_ver >= ADF_PFVF_COMPAT_FALLBACK &&
+                   adf_send_pf2vf_msg(accel_dev, i, msg))
+                       dev_err(&GET_DEV(accel_dev),
+                               "Failed to send restarted msg to VF%d\n", i);
+       }
+}
+
+void adf_pf2vf_notify_fatal_error(struct adf_accel_dev *accel_dev)
+{
+       struct pfvf_message msg = { .type = ADF_PF2VF_MSGTYPE_FATAL_ERROR };
+       int i, num_vfs = pci_num_vf(accel_to_pci_dev(accel_dev));
+       struct adf_accel_vf_info *vf;
+
+       dev_dbg(&GET_DEV(accel_dev), "pf2vf notify fatal error\n");
+       for (i = 0, vf = accel_dev->pf.vf_info; i < num_vfs; i++, vf++) {
+               if (vf->init && vf->vf_compat_ver >= ADF_PFVF_COMPAT_FALLBACK &&
+                   adf_send_pf2vf_msg(accel_dev, i, msg))
+                       dev_err(&GET_DEV(accel_dev),
+                               "Failed to send fatal error msg to VF%d\n", i);
        }
 }
 
index e8982d1ac8962b3c4ebf3ab0d34f7d3eb65d8df3..f203d88c919c2f06bbbe1d82cf9f5966f0d5706f 100644 (file)
@@ -5,7 +5,28 @@
 
 #include "adf_accel_devices.h"
 
+#if defined(CONFIG_PCI_IOV)
 void adf_pf2vf_notify_restarting(struct adf_accel_dev *accel_dev);
+void adf_pf2vf_wait_for_restarting_complete(struct adf_accel_dev *accel_dev);
+void adf_pf2vf_notify_restarted(struct adf_accel_dev *accel_dev);
+void adf_pf2vf_notify_fatal_error(struct adf_accel_dev *accel_dev);
+#else
+static inline void adf_pf2vf_notify_restarting(struct adf_accel_dev *accel_dev)
+{
+}
+
+static inline void adf_pf2vf_wait_for_restarting_complete(struct adf_accel_dev *accel_dev)
+{
+}
+
+static inline void adf_pf2vf_notify_restarted(struct adf_accel_dev *accel_dev)
+{
+}
+
+static inline void adf_pf2vf_notify_fatal_error(struct adf_accel_dev *accel_dev)
+{
+}
+#endif
 
 typedef int (*adf_pf2vf_blkmsg_provider)(struct adf_accel_dev *accel_dev,
                                         u8 *buffer, u8 compat);
index 388e58bcbcaf2683228ae30e8aef99f35f3a513b..9ab93fbfefde9408826c44d59eae5c01411179f6 100644 (file)
@@ -291,6 +291,14 @@ static int adf_handle_vf2pf_msg(struct adf_accel_dev *accel_dev, u8 vf_nr,
                vf_info->init = false;
                }
                break;
+       case ADF_VF2PF_MSGTYPE_RESTARTING_COMPLETE:
+               {
+               dev_dbg(&GET_DEV(accel_dev),
+                       "Restarting Complete received from VF%d\n", vf_nr);
+               vf_info->restarting = false;
+               vf_info->init = false;
+               }
+               break;
        case ADF_VF2PF_MSGTYPE_LARGE_BLOCK_REQ:
        case ADF_VF2PF_MSGTYPE_MEDIUM_BLOCK_REQ:
        case ADF_VF2PF_MSGTYPE_SMALL_BLOCK_REQ:
index 1015155b637493fb81c9720b72b1324873020457..dc284a089c88954c100bf1a64348c456d6162353 100644 (file)
@@ -308,6 +308,12 @@ static bool adf_handle_pf2vf_msg(struct adf_accel_dev *accel_dev,
 
                adf_pf2vf_handle_pf_restarting(accel_dev);
                return false;
+       case ADF_PF2VF_MSGTYPE_RESTARTED:
+               dev_dbg(&GET_DEV(accel_dev), "Restarted message received from PF\n");
+               return true;
+       case ADF_PF2VF_MSGTYPE_FATAL_ERROR:
+               dev_err(&GET_DEV(accel_dev), "Fatal error received from PF\n");
+               return true;
        case ADF_PF2VF_MSGTYPE_VERSION_RESP:
        case ADF_PF2VF_MSGTYPE_BLKMSG_RESP:
        case ADF_PF2VF_MSGTYPE_RP_RESET_RESP:
index f44025bb6f995d9bdf58bf9d6290fd5566c10192..cb2a9830f192f7cc5e75a03b71c72d9411ebbffe 100644 (file)
@@ -103,6 +103,7 @@ void adf_disable_sriov(struct adf_accel_dev *accel_dev)
                return;
 
        adf_pf2vf_notify_restarting(accel_dev);
+       adf_pf2vf_wait_for_restarting_complete(accel_dev);
        pci_disable_sriov(accel_to_pci_dev(accel_dev));
 
        /* Disable VF to PF interrupts */